From e2ab79d5ea00af45c083cc9a6607d2f0905f9908 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Sun, 12 Jun 2016 21:36:41 -0700 Subject: [SPARK-15898][SQL] DataFrameReader.text should return DataFrame ## What changes were proposed in this pull request? We want to maintain API compatibility for DataFrameReader.text, and will introduce a new API called DataFrameReader.textFile which returns Dataset[String]. affected PRs: https://github.com/apache/spark/pull/11731 https://github.com/apache/spark/pull/13104 https://github.com/apache/spark/pull/13184 ## How was this patch tested? N/A Author: Wenchen Fan Closes #13604 from cloud-fan/revert. --- examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java | 2 +- examples/src/main/java/org/apache/spark/examples/JavaPageRank.java | 2 +- examples/src/main/java/org/apache/spark/examples/JavaWordCount.java | 2 +- examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java | 2 +- examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java | 2 +- examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala | 2 +- examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala | 2 +- examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala | 2 +- examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala | 2 +- .../scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) (limited to 'examples') diff --git a/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java index ded442096c..362bd4435e 100644 --- a/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaHdfsLR.java @@ -126,7 +126,7 @@ public final class JavaHdfsLR { .appName("JavaHdfsLR") .getOrCreate(); - JavaRDD lines = spark.read().text(args[0]).javaRDD(); + JavaRDD lines = spark.read().textFile(args[0]).javaRDD(); JavaRDD points = lines.map(new ParsePoint()).cache(); int ITERATIONS = Integer.parseInt(args[1]); diff --git a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java index 128b5ab17c..ed0bb87657 100644 --- a/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaPageRank.java @@ -82,7 +82,7 @@ public final class JavaPageRank { // URL neighbor URL // URL neighbor URL // ... - JavaRDD lines = spark.read().text(args[0]).javaRDD(); + JavaRDD lines = spark.read().textFile(args[0]).javaRDD(); // Loads all URLs from input file and initialize their neighbors. JavaPairRDD> links = lines.mapToPair( diff --git a/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java index 1caee60e34..8f18604c07 100644 --- a/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java +++ b/examples/src/main/java/org/apache/spark/examples/JavaWordCount.java @@ -46,7 +46,7 @@ public final class JavaWordCount { .appName("JavaWordCount") .getOrCreate(); - JavaRDD lines = spark.read().text(args[0]).javaRDD(); + JavaRDD lines = spark.read().textFile(args[0]).javaRDD(); JavaRDD words = lines.flatMap(new FlatMapFunction() { @Override diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java index 7f568f4e0d..739558e81f 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaALSExample.java @@ -87,7 +87,7 @@ public class JavaALSExample { // $example on$ JavaRDD ratingsRDD = spark - .read().text("data/mllib/als/sample_movielens_ratings.txt").javaRDD() + .read().textFile("data/mllib/als/sample_movielens_ratings.txt").javaRDD() .map(new Function() { public Rating call(String str) { return Rating.parseRating(str); diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java index 55e591d0ce..e512979ac7 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java @@ -59,7 +59,7 @@ public class JavaSparkSQL { System.out.println("=== Data source: RDD ==="); // Load a text file and convert each line to a Java Bean. String file = "examples/src/main/resources/people.txt"; - JavaRDD people = spark.read().text(file).javaRDD().map( + JavaRDD people = spark.read().textFile(file).javaRDD().map( new Function() { @Override public Person call(String line) { diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala index 84f133e011..05ac6cbcb3 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkHdfsLR.scala @@ -72,7 +72,7 @@ object SparkHdfsLR { .getOrCreate() val inputPath = args(0) - val lines = spark.read.text(inputPath).rdd + val lines = spark.read.textFile(inputPath).rdd val points = lines.map(parsePoint).cache() val ITERATIONS = args(1).toInt diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala index aa93c93c44..fec3160e9f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala @@ -71,7 +71,7 @@ object SparkKMeans { .appName("SparkKMeans") .getOrCreate() - val lines = spark.read.text(args(0)).rdd + val lines = spark.read.textFile(args(0)).rdd val data = lines.map(parseVector _).cache() val K = args(1).toInt val convergeDist = args(2).toDouble diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala index b7c363c7d4..d0b874c48d 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala @@ -56,7 +56,7 @@ object SparkPageRank { .getOrCreate() val iters = if (args.length > 1) args(1).toInt else 10 - val lines = spark.read.text(args(0)).rdd + val lines = spark.read.textFile(args(0)).rdd val links = lines.map{ s => val parts = s.split("\\s+") (parts(0), parts(1)) diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala index da19ea9f10..bb5d163608 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala @@ -50,7 +50,7 @@ object ALSExample { import spark.implicits._ // $example on$ - val ratings = spark.read.text("data/mllib/als/sample_movielens_ratings.txt") + val ratings = spark.read.textFile("data/mllib/als/sample_movielens_ratings.txt") .map(parseRating) .toDF() val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2)) diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala index 781a934df6..d514891da7 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/RankingMetricsExample.scala @@ -33,7 +33,7 @@ object RankingMetricsExample { import spark.implicits._ // $example on$ // Read in the ratings data - val ratings = spark.read.text("data/mllib/sample_movielens_data.txt").rdd.map { line => + val ratings = spark.read.textFile("data/mllib/sample_movielens_data.txt").rdd.map { line => val fields = line.split("::") Rating(fields(0).toInt, fields(1).toInt, fields(2).toDouble - 2.5) }.cache() -- cgit v1.2.3