From 8a5a58506c35f35f41cd1366ee693abec2916153 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 27 Feb 2017 14:33:02 -0800 Subject: [SPARK-15615][SQL][BUILD][FOLLOW-UP] Replace deprecated usage of json(RDD[String]) API ## What changes were proposed in this pull request? This PR proposes to replace the deprecated `json(RDD[String])` usage to `json(Dataset[String])`. This currently produces so many warnings. ## How was this patch tested? Fixed tests. Author: hyukjinkwon Closes #17071 from HyukjinKwon/SPARK-15615-followup. --- .../apache/spark/examples/sql/JavaSQLDataSourceExample.java | 9 +++------ .../org/apache/spark/examples/sql/SQLDataSourceExample.scala | 10 +++++++--- 2 files changed, 10 insertions(+), 9 deletions(-) (limited to 'examples') diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java index adb96dd8bf..82bb284ea3 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java @@ -25,8 +25,6 @@ import java.util.List; import java.util.Properties; // $example on:basic_parquet_example$ -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; // $example on:schema_merging$ @@ -217,12 +215,11 @@ public class JavaSQLDataSourceExample { // +------+ // Alternatively, a DataFrame can be created for a JSON dataset represented by - // an RDD[String] storing one JSON object per string. + // an Dataset[String] storing one JSON object per string. List jsonData = Arrays.asList( "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}"); - JavaRDD anotherPeopleRDD = - new JavaSparkContext(spark.sparkContext()).parallelize(jsonData); - Dataset anotherPeople = spark.read().json(anotherPeopleRDD); + Dataset anotherPeopleDataset = spark.createDataset(jsonData, Encoders.STRING()); + Dataset anotherPeople = spark.read().json(anotherPeopleDataset); anotherPeople.show(); // +---------------+----+ // | address|name| diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala index 66f7cb1b53..381e69cda8 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala @@ -111,6 +111,10 @@ object SQLDataSourceExample { private def runJsonDatasetExample(spark: SparkSession): Unit = { // $example on:json_dataset$ + // Primitive types (Int, String, etc) and Product types (case classes) encoders are + // supported by importing this when creating a Dataset. + import spark.implicits._ + // A JSON dataset is pointed to by path. // The path can be either a single text file or a directory storing text files val path = "examples/src/main/resources/people.json" @@ -135,10 +139,10 @@ object SQLDataSourceExample { // +------+ // Alternatively, a DataFrame can be created for a JSON dataset represented by - // an RDD[String] storing one JSON object per string - val otherPeopleRDD = spark.sparkContext.makeRDD( + // an Dataset[String] storing one JSON object per string + val otherPeopleDataset = spark.createDataset( """{"name":"Yin","address":{"city":"Columbus","state":"Ohio"}}""" :: Nil) - val otherPeople = spark.read.json(otherPeopleRDD) + val otherPeople = spark.read.json(otherPeopleDataset) otherPeople.show() // +---------------+----+ // | address|name| -- cgit v1.2.3