aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2016-04-12 17:26:37 -0700
committerDavies Liu <davies.liu@gmail.com>2016-04-12 17:26:37 -0700
commit372baf0479840695388515170e6eae0b3fc4125e (patch)
tree3f050e5ac95bc52a267f84a658b0caa47b3214f7
parentd187e7dea9540d26b7800de4eb79863ef5f574bf (diff)
downloadspark-372baf0479840695388515170e6eae0b3fc4125e.tar.gz
spark-372baf0479840695388515170e6eae0b3fc4125e.tar.bz2
spark-372baf0479840695388515170e6eae0b3fc4125e.zip
[SPARK-14578] [SQL] Fix codegen for CreateExternalRow with nested wide schema
## What changes were proposed in this pull request? The wide schema, the expression of fields will be splitted into multiple functions, but the variable for loopVar can't be accessed in splitted functions, this PR change them as class member. ## How was this patch tested? Added regression test. Author: Davies Liu <davies@databricks.com> Closes #12338 from davies/nested_row.
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala8
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala15
2 files changed, 20 insertions, 3 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
index 28b6b2adf8..26b1ff39b3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
@@ -446,6 +446,8 @@ case class MapObjects private(
override def genCode(ctx: CodegenContext, ev: ExprCode): String = {
val javaType = ctx.javaType(dataType)
val elementJavaType = ctx.javaType(loopVar.dataType)
+ ctx.addMutableState("boolean", loopVar.isNull, "")
+ ctx.addMutableState(elementJavaType, loopVar.value, "")
val genInputData = inputData.gen(ctx)
val genFunction = lambdaFunction.gen(ctx)
val dataLength = ctx.freshName("dataLength")
@@ -466,9 +468,9 @@ case class MapObjects private(
}
val loopNullCheck = if (primitiveElement) {
- s"boolean ${loopVar.isNull} = ${genInputData.value}.isNullAt($loopIndex);"
+ s"${loopVar.isNull} = ${genInputData.value}.isNullAt($loopIndex);"
} else {
- s"boolean ${loopVar.isNull} = ${genInputData.isNull} || ${loopVar.value} == null;"
+ s"${loopVar.isNull} = ${genInputData.isNull} || ${loopVar.value} == null;"
}
s"""
@@ -484,7 +486,7 @@ case class MapObjects private(
int $loopIndex = 0;
while ($loopIndex < $dataLength) {
- $elementJavaType ${loopVar.value} =
+ ${loopVar.value} =
($elementJavaType)${genInputData.value}${itemAccessor(loopIndex)};
$loopNullCheck
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 2a18acb95b..e17340c70b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -1664,4 +1664,19 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData {
)
}
}
+
+ test("wide nested json table") {
+ val nested = (1 to 100).map { i =>
+ s"""
+ |"c$i": $i
+ """.stripMargin
+ }.mkString(", ")
+ val json = s"""
+ |{"a": [{$nested}], "b": [{$nested}]}
+ """.stripMargin
+ val rdd = sqlContext.sparkContext.makeRDD(Seq(json))
+ val df = sqlContext.read.json(rdd)
+ assert(df.schema.size === 2)
+ df.collect()
+ }
}