diff options
Diffstat (limited to 'examples/src')
-rw-r--r-- | examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java | 9 | ||||
-rw-r--r-- | examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala | 10 |
2 files changed, 10 insertions, 9 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java index adb96dd8bf..82bb284ea3 100644 --- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java +++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java @@ -25,8 +25,6 @@ import java.util.List; import java.util.Properties; // $example on:basic_parquet_example$ -import org.apache.spark.api.java.JavaRDD; -import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.MapFunction; import org.apache.spark.sql.Encoders; // $example on:schema_merging$ @@ -217,12 +215,11 @@ public class JavaSQLDataSourceExample { // +------+ // Alternatively, a DataFrame can be created for a JSON dataset represented by - // an RDD[String] storing one JSON object per string. + // an Dataset[String] storing one JSON object per string. List<String> jsonData = Arrays.asList( "{\"name\":\"Yin\",\"address\":{\"city\":\"Columbus\",\"state\":\"Ohio\"}}"); - JavaRDD<String> anotherPeopleRDD = - new JavaSparkContext(spark.sparkContext()).parallelize(jsonData); - Dataset<Row> anotherPeople = spark.read().json(anotherPeopleRDD); + Dataset<String> anotherPeopleDataset = spark.createDataset(jsonData, Encoders.STRING()); + Dataset<Row> anotherPeople = spark.read().json(anotherPeopleDataset); anotherPeople.show(); // +---------------+----+ // | address|name| diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala index 66f7cb1b53..381e69cda8 100644 --- a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala @@ -111,6 +111,10 @@ object SQLDataSourceExample { private def runJsonDatasetExample(spark: SparkSession): Unit = { // $example on:json_dataset$ + // Primitive types (Int, String, etc) and Product types (case classes) encoders are + // supported by importing this when creating a Dataset. + import spark.implicits._ + // A JSON dataset is pointed to by path. // The path can be either a single text file or a directory storing text files val path = "examples/src/main/resources/people.json" @@ -135,10 +139,10 @@ object SQLDataSourceExample { // +------+ // Alternatively, a DataFrame can be created for a JSON dataset represented by - // an RDD[String] storing one JSON object per string - val otherPeopleRDD = spark.sparkContext.makeRDD( + // an Dataset[String] storing one JSON object per string + val otherPeopleDataset = spark.createDataset( """{"name":"Yin","address":{"city":"Columbus","state":"Ohio"}}""" :: Nil) - val otherPeople = spark.read.json(otherPeopleRDD) + val otherPeople = spark.read.json(otherPeopleDataset) otherPeople.show() // +---------------+----+ // | address|name| |