aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYin Huai <yhuai@databricks.com>2015-03-05 14:49:44 -0800
committerMichael Armbrust <michael@databricks.com>2015-03-05 14:49:44 -0800
commit1b4bb25c10d72132d7f4f3835ef9a3b94b2349e0 (patch)
treea33e2517ce7f78fd5114b29fb0be6a3bf367f61f
parent5873c713cc47af311f517ea33a6110993a410377 (diff)
downloadspark-1b4bb25c10d72132d7f4f3835ef9a3b94b2349e0.tar.gz
spark-1b4bb25c10d72132d7f4f3835ef9a3b94b2349e0.tar.bz2
spark-1b4bb25c10d72132d7f4f3835ef9a3b94b2349e0.zip
[SPARK-6163][SQL] jsonFile should be backed by the data source API
jira: https://issues.apache.org/jira/browse/SPARK-6163 Author: Yin Huai <yhuai@databricks.com> Closes #4896 from yhuai/SPARK-6163 and squashes the following commits: 45e023e [Yin Huai] Address @chenghao-intel's comment. 2e8734e [Yin Huai] Use JSON data source for jsonFile. 92a4a33 [Yin Huai] Test.
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala12
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala28
2 files changed, 32 insertions, 8 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index ce800e0754..9c49e84bf9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -542,20 +542,16 @@ class SQLContext(@transient val sparkContext: SparkContext)
* @group specificdata
*/
@Experimental
- def jsonFile(path: String, schema: StructType): DataFrame = {
- val json = sparkContext.textFile(path)
- jsonRDD(json, schema)
- }
+ def jsonFile(path: String, schema: StructType): DataFrame =
+ load("json", schema, Map("path" -> path))
/**
* :: Experimental ::
* @group specificdata
*/
@Experimental
- def jsonFile(path: String, samplingRatio: Double): DataFrame = {
- val json = sparkContext.textFile(path)
- jsonRDD(json, samplingRatio)
- }
+ def jsonFile(path: String, samplingRatio: Double): DataFrame =
+ load("json", Map("path" -> path, "samplingRatio" -> samplingRatio.toString))
/**
* Loads an RDD[String] storing JSON objects (one object per record), returning the result as a
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index 9d94d3406a..0c21f725f0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.json
import java.sql.{Date, Timestamp}
+import org.scalactic.Tolerance._
+
import org.apache.spark.sql.TestData._
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.functions._
@@ -551,6 +553,32 @@ class JsonSuite extends QueryTest {
jsonDF.registerTempTable("jsonTable")
}
+ test("jsonFile should be based on JSONRelation") {
+ val file = getTempFilePath("json")
+ val path = file.toString
+ sparkContext.parallelize(1 to 100).map(i => s"""{"a": 1, "b": "str$i"}""").saveAsTextFile(path)
+ val jsonDF = jsonFile(path, 0.49)
+
+ val analyzed = jsonDF.queryExecution.analyzed
+ assert(
+ analyzed.isInstanceOf[LogicalRelation],
+ "The DataFrame returned by jsonFile should be based on JSONRelation.")
+ val relation = analyzed.asInstanceOf[LogicalRelation].relation
+ assert(
+ relation.isInstanceOf[JSONRelation],
+ "The DataFrame returned by jsonFile should be based on JSONRelation.")
+ assert(relation.asInstanceOf[JSONRelation].path === path)
+ assert(relation.asInstanceOf[JSONRelation].samplingRatio === (0.49 +- 0.001))
+
+ val schema = StructType(StructField("a", LongType, true) :: Nil)
+ val logicalRelation =
+ jsonFile(path, schema).queryExecution.analyzed.asInstanceOf[LogicalRelation]
+ val relationWithSchema = logicalRelation.relation.asInstanceOf[JSONRelation]
+ assert(relationWithSchema.path === path)
+ assert(relationWithSchema.schema === schema)
+ assert(relationWithSchema.samplingRatio > 0.99)
+ }
+
test("Loading a JSON dataset from a text file") {
val file = getTempFilePath("json")
val path = file.toString