[SPARK-12321][SQL] JSON format for TreeNode (use reflection)

An alternative solution for https://github.com/apache/spark/pull/10295 , instead of implementing json format for all logical/physical plans and expressions, use reflection to implement it in `TreeNode`. Here I use pre-order traversal to flattern a plan tree to a plan list, and add an extra field `num-children` to each plan node, so that we can reconstruct the tree from the list. example json: logical plan tree: ``` [ { "class" : "org.apache.spark.sql.catalyst.plans.logical.Sort", "num-children" : 1, "order" : [ [ { "class" : "org.apache.spark.sql.catalyst.expressions.SortOrder", "num-children" : 1, "child" : 0, "direction" : "Ascending" }, { "class" : "org.apache.spark.sql.catalyst.expressions.AttributeReference", "num-children" : 0, "name" : "i", "dataType" : "integer", "nullable" : true, "metadata" : { }, "exprId" : { "id" : 10, "jvmId" : "cd1313c7-3f66-4ed7-a320-7d91e4633ac6" }, "qualifiers" : [ ] } ] ], "global" : false, "child" : 0 }, { "class" : "org.apache.spark.sql.catalyst.plans.logical.Project", "num-children" : 1, "projectList" : [ [ { "class" : "org.apache.spark.sql.catalyst.expressions.Alias", "num-children" : 1, "child" : 0, "name" : "i", "exprId" : { "id" : 10, "jvmId" : "cd1313c7-3f66-4ed7-a320-7d91e4633ac6" }, "qualifiers" : [ ] }, { "class" : "org.apache.spark.sql.catalyst.expressions.Add", "num-children" : 2, "left" : 0, "right" : 1 }, { "class" : "org.apache.spark.sql.catalyst.expressions.AttributeReference", "num-children" : 0, "name" : "a", "dataType" : "integer", "nullable" : true, "metadata" : { }, "exprId" : { "id" : 0, "jvmId" : "cd1313c7-3f66-4ed7-a320-7d91e4633ac6" }, "qualifiers" : [ ] }, { "class" : "org.apache.spark.sql.catalyst.expressions.Literal", "num-children" : 0, "value" : "1", "dataType" : "integer" } ], [ { "class" : "org.apache.spark.sql.catalyst.expressions.Alias", "num-children" : 1, "child" : 0, "name" : "j", "exprId" : { "id" : 11, "jvmId" : "cd1313c7-3f66-4ed7-a320-7d91e4633ac6" }, "qualifiers" : [ ] }, { "class" : "org.apache.spark.sql.catalyst.expressions.Multiply", "num-children" : 2, "left" : 0, "right" : 1 }, { "class" : "org.apache.spark.sql.catalyst.expressions.AttributeReference", "num-children" : 0, "name" : "a", "dataType" : "integer", "nullable" : true, "metadata" : { }, "exprId" : { "id" : 0, "jvmId" : "cd1313c7-3f66-4ed7-a320-7d91e4633ac6" }, "qualifiers" : [ ] }, { "class" : "org.apache.spark.sql.catalyst.expressions.Literal", "num-children" : 0, "value" : "2", "dataType" : "integer" } ] ], "child" : 0 }, { "class" : "org.apache.spark.sql.catalyst.plans.logical.LocalRelation", "num-children" : 0, "output" : [ [ { "class" : "org.apache.spark.sql.catalyst.expressions.AttributeReference", "num-children" : 0, "name" : "a", "dataType" : "integer", "nullable" : true, "metadata" : { }, "exprId" : { "id" : 0, "jvmId" : "cd1313c7-3f66-4ed7-a320-7d91e4633ac6" }, "qualifiers" : [ ] } ] ], "data" : [ ] } ] ``` Author: Wenchen Fan <wenchen@databricks.com> Closes #10311 from cloud-fan/toJson-reflection.
author: Wenchen Fan <wenchen@databricks.com> 2015-12-21 12:47:07 -0800
committer: Michael Armbrust <michael@databricks.com> 2015-12-21 12:47:07 -0800
commit: 7634fe9511e1a8fb94979624b1b617b495b48ad3 (patch)
tree: df63fbdc4c50a5675540f33e4f1bdc3b00e4d629 /sql/hive
parent: 474eb21a30f7ee898f76a625a5470c8245af1d22 (diff)
download: spark-7634fe9511e1a8fb94979624b1b617b495b48ad3.tar.gz
spark-7634fe9511e1a8fb94979624b1b617b495b48ad3.tar.bz2
spark-7634fe9511e1a8fb94979624b1b617b495b48ad3.zip
2 files changed, 3 insertions, 1 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 08b291e088..f099e146d1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -728,6 +728,8 @@ private[hive] case class MetastoreRelation
     Objects.hashCode(databaseName, tableName, alias, output)
   }
 
+  override protected def otherCopyArgs: Seq[AnyRef] = table :: sqlContext :: Nil
+
   @transient val hiveQlTable: Table = {
     // We start by constructing an API table as Hive performs several important transformations
     // internally when converting an API table to a QL table.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
index b30117f0de..d9b9ba4bfd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala
@@ -58,7 +58,7 @@ case class ScriptTransformation(
     ioschema: HiveScriptIOSchema)(@transient private val sc: HiveContext)
   extends UnaryNode {
 
-  override def otherCopyArgs: Seq[HiveContext] = sc :: Nil
+  override protected def otherCopyArgs: Seq[HiveContext] = sc :: Nil
 
   private val serializedHiveConf = new SerializableConfiguration(sc.hiveconf)
author	Wenchen Fan <wenchen@databricks.com>	2015-12-21 12:47:07 -0800
committer	Michael Armbrust <michael@databricks.com>	2015-12-21 12:47:07 -0800
commit	7634fe9511e1a8fb94979624b1b617b495b48ad3 (patch)
tree	df63fbdc4c50a5675540f33e4f1bdc3b00e4d629 /sql/hive
parent	474eb21a30f7ee898f76a625a5470c8245af1d22 (diff)
download	spark-7634fe9511e1a8fb94979624b1b617b495b48ad3.tar.gz spark-7634fe9511e1a8fb94979624b1b617b495b48ad3.tar.bz2 spark-7634fe9511e1a8fb94979624b1b617b495b48ad3.zip