diff options
author | Sean Zhong <seanzhong@databricks.com> | 2016-05-18 09:01:59 +0800 |
---|---|---|
committer | Cheng Lian <lian@databricks.com> | 2016-05-18 09:01:59 +0800 |
commit | 25b315e6cad7c27b62dcaa2c194293c1115fdfb3 (patch) | |
tree | cfeebcaf553d78ca80a70f7139a765e7759f0410 /examples/src/main | |
parent | b674e67c22bf663334e537e35787c00533adbb04 (diff) | |
download | spark-25b315e6cad7c27b62dcaa2c194293c1115fdfb3.tar.gz spark-25b315e6cad7c27b62dcaa2c194293c1115fdfb3.tar.bz2 spark-25b315e6cad7c27b62dcaa2c194293c1115fdfb3.zip |
[SPARK-15171][SQL] Remove the references to deprecated method dataset.registerTempTable
## What changes were proposed in this pull request?
Update the unit test code, examples, and documents to remove calls to deprecated method `dataset.registerTempTable`.
## How was this patch tested?
This PR only changes the unit test code, examples, and comments. It should be safe.
This is a follow up of PR https://github.com/apache/spark/pull/12945 which was merged.
Author: Sean Zhong <seanzhong@databricks.com>
Closes #13098 from clockfly/spark-15171-remove-deprecation.
Diffstat (limited to 'examples/src/main')
7 files changed, 13 insertions, 13 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java index cf0167f13a..55e591d0ce 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSparkSQL.java @@ -73,11 +73,11 @@ public class JavaSparkSQL { } }); - // Apply a schema to an RDD of Java Beans and register it as a table. + // Apply a schema to an RDD of Java Beans and create a temporary view Dataset<Row> schemaPeople = spark.createDataFrame(people, Person.class); schemaPeople.createOrReplaceTempView("people"); - // SQL can be run over RDDs that have been registered as tables. + // SQL can be run over RDDs which backs a temporary view. Dataset<Row> teenagers = spark.sql("SELECT name FROM people WHERE age >= 13 AND age <= 19"); // The results of SQL queries are DataFrames and support all the normal RDD operations. @@ -101,7 +101,7 @@ public class JavaSparkSQL { // The result of loading a parquet file is also a DataFrame. Dataset<Row> parquetFile = spark.read().parquet("people.parquet"); - //Parquet files can also be registered as tables and then used in SQL statements. + // A temporary view can be created by using Parquet files and then used in SQL statements. parquetFile.createOrReplaceTempView("parquetFile"); Dataset<Row> teenagers2 = spark.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19"); @@ -130,7 +130,7 @@ public class JavaSparkSQL { // |-- age: IntegerType // |-- name: StringType - // Register this DataFrame as a table. + // Creates a temporary view using the DataFrame peopleFromJsonFile.createOrReplaceTempView("people"); // SQL statements can be run by using the sql methods provided by `spark` diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java index 5130522770..b8e9e125ba 100644 --- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java +++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaSqlNetworkWordCount.java @@ -94,7 +94,7 @@ public final class JavaSqlNetworkWordCount { }); Dataset<Row> wordsDataFrame = spark.createDataFrame(rowRDD, JavaRecord.class); - // Register as table + // Creates a temporary view using the DataFrame wordsDataFrame.createOrReplaceTempView("words"); // Do word count on table using SQL and print it diff --git a/examples/src/main/python/sql.py b/examples/src/main/python/sql.py index 234024063f..ac7246938d 100644 --- a/examples/src/main/python/sql.py +++ b/examples/src/main/python/sql.py @@ -66,7 +66,7 @@ if __name__ == "__main__": # |-- age: long (nullable = true) # |-- name: string (nullable = true) - # Register this DataFrame as a temporary table. + # Creates a temporary view using the DataFrame. people.createOrReplaceTempView("people") # SQL statements can be run by using the sql methods provided by `spark` diff --git a/examples/src/main/python/streaming/sql_network_wordcount.py b/examples/src/main/python/streaming/sql_network_wordcount.py index 25e821534e..398ac8d2d8 100644 --- a/examples/src/main/python/streaming/sql_network_wordcount.py +++ b/examples/src/main/python/streaming/sql_network_wordcount.py @@ -70,7 +70,7 @@ if __name__ == "__main__": rowRdd = rdd.map(lambda w: Row(word=w)) wordsDataFrame = spark.createDataFrame(rowRdd) - # Register as table + # Creates a temporary view using the DataFrame. wordsDataFrame.createOrReplaceTempView("words") # Do word count on table using SQL and print it diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala index d1bda0ff84..1b019fbb51 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/RDDRelation.scala @@ -35,8 +35,8 @@ object RDDRelation { import spark.implicits._ val df = spark.createDataFrame((1 to 100).map(i => Record(i, s"val_$i"))) - // Any RDD containing case classes can be registered as a table. The schema of the table is - // automatically inferred using scala reflection. + // Any RDD containing case classes can be used to create a temporary view. The schema of the + // view is automatically inferred using scala reflection. df.createOrReplaceTempView("records") // Once tables have been registered, you can run SQL queries over them. @@ -66,7 +66,7 @@ object RDDRelation { // Queries can be run using the DSL on parquet files just like the original RDD. parquetFile.where($"key" === 1).select($"value".as("a")).collect().foreach(println) - // These files can also be registered as tables. + // These files can also be used to create a temporary view. parquetFile.createOrReplaceTempView("parquetFile") spark.sql("SELECT * FROM parquetFile").collect().foreach(println) diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala index a15cf5ded0..7293cb51b2 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala @@ -70,9 +70,9 @@ object HiveFromSpark { case Row(key: Int, value: String) => s"Key: $key, Value: $value" } - // You can also register RDDs as temporary tables within a HiveContext. + // You can also use RDDs to create temporary views within a HiveContext. val rdd = sc.parallelize((1 to 100).map(i => Record(i, s"val_$i"))) - rdd.toDF().registerTempTable("records") + rdd.toDF().createOrReplaceTempView("records") // Queries can then join RDD data with data stored in Hive. println("Result of SELECT *:") diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala index 688c5b23c2..787bbec73b 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/SqlNetworkWordCount.scala @@ -66,7 +66,7 @@ object SqlNetworkWordCount { // Convert RDD[String] to RDD[case class] to DataFrame val wordsDataFrame = rdd.map(w => Record(w)).toDF() - // Register as table + // Creates a temporary view using the DataFrame wordsDataFrame.createOrReplaceTempView("words") // Do word count on table using SQL and print it |