From 25b315e6cad7c27b62dcaa2c194293c1115fdfb3 Mon Sep 17 00:00:00 2001 From: Sean Zhong Date: Wed, 18 May 2016 09:01:59 +0800 Subject: [SPARK-15171][SQL] Remove the references to deprecated method dataset.registerTempTable ## What changes were proposed in this pull request? Update the unit test code, examples, and documents to remove calls to deprecated method `dataset.registerTempTable`. ## How was this patch tested? This PR only changes the unit test code, examples, and comments. It should be safe. This is a follow up of PR https://github.com/apache/spark/pull/12945 which was merged. Author: Sean Zhong Closes #13098 from clockfly/spark-15171-remove-deprecation. --- .../src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java | 8 ++++---- .../apache/spark/examples/streaming/JavaSqlNetworkWordCount.java | 2 +- examples/src/main/python/sql.py | 2 +- examples/src/main/python/streaming/sql_network_wordcount.py | 2 +- .../main/scala/org/apache/spark/examples/sql/RDDRelation.scala | 6 +++--- .../scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala | 4 ++-- .../org/apache/spark/examples/streaming/SqlNetworkWordCount.scala | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) (limited to 'examples/src/main') diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java index cf0167f13a..55e591d0ce 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java @@ -73,11 +73,11 @@ public class JavaSparkSQL { } }); - // Apply a schema to an RDD of Java Beans and register it as a table. + // Apply a schema to an RDD of Java Beans and create a temporary view Dataset schemaPeople = spark.createDataFrame(people, Person.class); schemaPeople.createOrReplaceTempView("people"); - // SQL can be run over RDDs that have been registered as tables. + // SQL can be run over RDDs which backs a temporary view. Dataset teenagers = spark.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19"); // The results of SQL queries are DataFrames and support all the normal RDD operations. @@ -101,7 +101,7 @@ public class JavaSparkSQL { // The result of loading a parquet file is also a DataFrame. Dataset parquetFile = spark.read().parquet("people.parquet"); - //Parquet files can also be registered as tables and then used in SQL statements. + // A temporary view can be created by using Parquet files and then used in SQL statements. parquetFile.createOrReplaceTempView("parquetFile"); Dataset teenagers2 = spark.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19"); @@ -130,7 +130,7 @@ public class JavaSparkSQL { // |-- age: IntegerType // |-- name: StringType - // Register this DataFrame as a table. + // Creates a temporary view using the DataFrame peopleFromJsonFile.createOrReplaceTempView("people"); // SQL statements can be run by using the sql methods provided by `spark` diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java index 5130522770..b8e9e125ba 100644 --- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java +++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java @@ -94,7 +94,7 @@ public final class JavaSqlNetworkWordCount { }); Dataset wordsDataFrame = spark.createDataFrame(rowRDD, JavaRecord.class); - // Register as table + // Creates a temporary view using the DataFrame wordsDataFrame.createOrReplaceTempView("words"); // Do word count on table using SQL and print it diff --git a/examples/src/main/python/sql.py b/examples/src/main/python/sql.py index 234024063f..ac7246938d 100644 --- a/examples/src/main/python/sql.py +++ b/examples/src/main/python/sql.py @@ -66,7 +66,7 @@ if __name__ == "__main__": # |-- age: long (nullable = true) # |-- name: string (nullable = true) - # Register this DataFrame as a temporary table. + # Creates a temporary view using the DataFrame. people.createOrReplaceTempView("people") # SQL statements can be run by using the sql methods provided by `spark` diff --git a/examples/src/main/python/streaming/sql_network_wordcount.py b/examples/src/main/python/streaming/sql_network_wordcount.py index 25e821534e..398ac8d2d8 100644 --- a/examples/src/main/python/streaming/sql_network_wordcount.py +++ b/examples/src/main/python/streaming/sql_network_wordcount.py @@ -70,7 +70,7 @@ if __name__ == "__main__": rowRdd = rdd.map(lambda w: Row(word=w)) wordsDataFrame = spark.createDataFrame(rowRdd) - # Register as table + # Creates a temporary view using the DataFrame. wordsDataFrame.createOrReplaceTempView("words") # Do word count on table using SQL and print it diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala index d1bda0ff84..1b019fbb51 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala @@ -35,8 +35,8 @@ object RDDRelation { import spark.implicits._ val df = spark.createDataFrame((1 to 100).map(i => Record(i, s"val_$i"))) - // Any RDD containing case classes can be registered as a table. The schema of the table is - // automatically inferred using scala reflection. + // Any RDD containing case classes can be used to create a temporary view. The schema of the + // view is automatically inferred using scala reflection. df.createOrReplaceTempView("records") // Once tables have been registered, you can run SQL queries over them. @@ -66,7 +66,7 @@ object RDDRelation { // Queries can be run using the DSL on parquet files just like the original RDD. parquetFile.where($"key" === 1).select($"value".as("a")).collect().foreach(println) - // These files can also be registered as tables. + // These files can also be used to create a temporary view. parquetFile.createOrReplaceTempView("parquetFile") spark.sql("SELECT * FROM parquetFile").collect().foreach(println) diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala index a15cf5ded0..7293cb51b2 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala @@ -70,9 +70,9 @@ object HiveFromSpark { case Row(key: Int, value: String) => s"Key: $key, Value: $value" } - // You can also register RDDs as temporary tables within a HiveContext. + // You can also use RDDs to create temporary views within a HiveContext. val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i"))) - rdd.toDF().registerTempTable("records") + rdd.toDF().createOrReplaceTempView("records") // Queries can then join RDD data with data stored in Hive. println("Result of SELECT *:") diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala index 688c5b23c2..787bbec73b 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala @@ -66,7 +66,7 @@ object SqlNetworkWordCount { // Convert RDD[String] to RDD[case class] to DataFrame val wordsDataFrame = rdd.map(w => Record(w)).toDF() - // Register as table + // Creates a temporary view using the DataFrame wordsDataFrame.createOrReplaceTempView("words") // Do word count on table using SQL and print it -- cgit v1.2.3