From c079420d7c55d8972db716a2695a5ddd606d11cd Mon Sep 17 00:00:00 2001 From: Cheng Lian Date: Sun, 13 Mar 2016 12:02:52 +0800 Subject: [SPARK-13841][SQL] Removes Dataset.collectRows()/takeRows() ## What changes were proposed in this pull request? This PR removes two methods, `collectRows()` and `takeRows()`, from `Dataset[T]`. These methods were added in PR #11443, and were later considered not useful. ## How was this patch tested? Existing tests should do the work. Author: Cheng Lian Closes #11678 from liancheng/remove-collect-rows-and-take-rows. --- .../main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java | 2 +- .../java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java | 2 +- .../java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java | 2 +- .../spark/examples/ml/JavaEstimatorTransformerParamExample.java | 3 ++- .../examples/ml/JavaModelSelectionViaCrossValidationExample.java | 2 +- .../src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java | 2 +- .../main/java/org/apache/spark/examples/ml/JavaPipelineExample.java | 2 +- .../org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java | 5 +++-- .../java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java | 3 ++- .../spark/examples/ml/JavaSimpleTextClassificationPipeline.java | 2 +- .../src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java | 2 +- .../main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java | 2 +- .../main/java/org/apache/spark/examples/ml/JavaWord2VecExample.java | 2 +- 13 files changed, 17 insertions(+), 14 deletions(-) (limited to 'examples') diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java index d554377975..0a6e9c2a1f 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizerExample.java @@ -58,7 +58,7 @@ public class JavaBinarizerExample { .setThreshold(0.5); Dataset binarizedDataFrame = binarizer.transform(continuousDataFrame); Dataset binarizedFeatures = binarizedDataFrame.select("binarized_feature"); - for (Row r : binarizedFeatures.collectRows()) { + for (Row r : binarizedFeatures.collectAsList()) { Double binarized_value = r.getDouble(0); System.out.println(binarized_value); } diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java index 90bc94c45b..07edeb3e52 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaCrossValidatorExample.java @@ -117,7 +117,7 @@ public class JavaCrossValidatorExample { // Make predictions on test documents. cvModel uses the best model found (lrModel). Dataset predictions = cvModel.transform(test); - for (Row r: predictions.select("id", "text", "probability", "prediction").collectRows()) { + for (Row r: predictions.select("id", "text", "probability", "prediction").collectAsList()) { System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> prob=" + r.get(2) + ", prediction=" + r.get(3)); } diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java index e8ae100d68..8a10dd48aa 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java @@ -85,7 +85,7 @@ public class JavaDeveloperApiExample { // Make predictions on test documents. cvModel uses the best model found (lrModel). Dataset results = model.transform(test); double sumPredictions = 0; - for (Row r : results.select("features", "label", "prediction").collectRows()) { + for (Row r : results.select("features", "label", "prediction").collectAsList()) { sumPredictions += r.getDouble(2); } if (sumPredictions != 0.0) { diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java index f13698ae5e..604b193dd4 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaEstimatorTransformerParamExample.java @@ -100,7 +100,8 @@ public class JavaEstimatorTransformerParamExample { // Note that model2.transform() outputs a 'myProbability' column instead of the usual // 'probability' column since we renamed the lr.probabilityCol parameter previously. Dataset results = model2.transform(test); - for (Row r : results.select("features", "label", "myProbability", "prediction").collectRows()) { + Dataset rows = results.select("features", "label", "myProbability", "prediction"); + for (Row r: rows.collectAsList()) { System.out.println("(" + r.get(0) + ", " + r.get(1) + ") -> prob=" + r.get(2) + ", prediction=" + r.get(3)); } diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java index e394605db7..c4122d1247 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaModelSelectionViaCrossValidationExample.java @@ -111,7 +111,7 @@ public class JavaModelSelectionViaCrossValidationExample { // Make predictions on test documents. cvModel uses the best model found (lrModel). Dataset predictions = cvModel.transform(test); - for (Row r : predictions.select("id", "text", "probability", "prediction").collectRows()) { + for (Row r : predictions.select("id", "text", "probability", "prediction").collectAsList()) { System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> prob=" + r.get(2) + ", prediction=" + r.get(3)); } diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java index 0305f737ca..608bd80285 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaNGramExample.java @@ -60,7 +60,7 @@ public class JavaNGramExample { Dataset ngramDataFrame = ngramTransformer.transform(wordDataFrame); - for (Row r : ngramDataFrame.select("ngrams", "label").takeRows(3)) { + for (Row r : ngramDataFrame.select("ngrams", "label").takeAsList(3)) { java.util.List ngrams = r.getList(0); for (String ngram : ngrams) System.out.print(ngram + " --- "); System.out.println(); diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java index 6ae418d564..305420f208 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPipelineExample.java @@ -80,7 +80,7 @@ public class JavaPipelineExample { // Make predictions on test documents. Dataset predictions = model.transform(test); - for (Row r : predictions.select("id", "text", "probability", "prediction").collectRows()) { + for (Row r : predictions.select("id", "text", "probability", "prediction").collectAsList()) { System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> prob=" + r.get(2) + ", prediction=" + r.get(3)); } diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java index 5a4064c604..48fc3c8acb 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansionExample.java @@ -23,6 +23,7 @@ import org.apache.spark.sql.SQLContext; // $example on$ import java.util.Arrays; +import java.util.List; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.ml.feature.PolynomialExpansion; @@ -61,8 +62,8 @@ public class JavaPolynomialExpansionExample { Dataset df = jsql.createDataFrame(data, schema); Dataset polyDF = polyExpansion.transform(df); - Row[] row = polyDF.select("polyFeatures").takeRows(3); - for (Row r : row) { + List rows = polyDF.select("polyFeatures").takeAsList(3); + for (Row r : rows) { System.out.println(r.get(0)); } // $example off$ diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java index 52bb4ec050..cb911ef5ef 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleParamsExample.java @@ -103,7 +103,8 @@ public class JavaSimpleParamsExample { // Note that model2.transform() outputs a 'myProbability' column instead of the usual // 'probability' column since we renamed the lr.probabilityCol parameter previously. Dataset results = model2.transform(test); - for (Row r: results.select("features", "label", "myProbability", "prediction").collectRows()) { + Dataset rows = results.select("features", "label", "myProbability", "prediction"); + for (Row r: rows.collectAsList()) { System.out.println("(" + r.get(0) + ", " + r.get(1) + ") -> prob=" + r.get(2) + ", prediction=" + r.get(3)); } diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java index 9bd543c44f..a18a60f448 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaSimpleTextClassificationPipeline.java @@ -84,7 +84,7 @@ public class JavaSimpleTextClassificationPipeline { // Make predictions on test documents. Dataset predictions = model.transform(test); - for (Row r: predictions.select("id", "text", "probability", "prediction").collectRows()) { + for (Row r: predictions.select("id", "text", "probability", "prediction").collectAsList()) { System.out.println("(" + r.get(0) + ", " + r.get(1) + ") --> prob=" + r.get(2) + ", prediction=" + r.get(3)); } diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java index fd1ce424bf..37a3d0d84d 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaTfIdfExample.java @@ -66,7 +66,7 @@ public class JavaTfIdfExample { IDF idf = new IDF().setInputCol("rawFeatures").setOutputCol("features"); IDFModel idfModel = idf.fit(featurizedData); Dataset rescaledData = idfModel.transform(featurizedData); - for (Row r : rescaledData.select("features", "label").takeRows(3)) { + for (Row r : rescaledData.select("features", "label").takeAsList(3)) { Vector features = r.getAs(0); Double label = r.getDouble(1); System.out.println(features); diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java index a2f8c436e3..9225fe2262 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizerExample.java @@ -59,7 +59,7 @@ public class JavaTokenizerExample { Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words"); Dataset wordsDataFrame = tokenizer.transform(sentenceDataFrame); - for (Row r : wordsDataFrame.select("words", "label").takeRows(3)) { + for (Row r : wordsDataFrame.select("words", "label").takeAsList(3)) { java.util.List words = r.getList(0); for (String word : words) System.out.print(word + " "); System.out.println(); diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaWord2VecExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaWord2VecExample.java index 2dce8c2168..c5bb1eaaa3 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaWord2VecExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaWord2VecExample.java @@ -59,7 +59,7 @@ public class JavaWord2VecExample { .setMinCount(0); Word2VecModel model = word2Vec.fit(documentDF); Dataset result = model.transform(documentDF); - for (Row r : result.select("result").takeRows(3)) { + for (Row r : result.select("result").takeAsList(3)) { System.out.println(r); } // $example off$ -- cgit v1.2.3