diff options
author | Wenchen Fan <wenchen@databricks.com> | 2016-03-25 20:19:04 +0800 |
---|---|---|
committer | Cheng Lian <lian@databricks.com> | 2016-03-25 20:19:04 +0800 |
commit | e9b6e7d8577cd721a433130f29e8b112d98768b9 (patch) | |
tree | ebafcac42412b9411271d36088cf4e3c1313ada5 /sql/catalyst/src | |
parent | 70a6f0bb57ca2248444157e2707fbcc3cb04e3bc (diff) | |
download | spark-e9b6e7d8577cd721a433130f29e8b112d98768b9.tar.gz spark-e9b6e7d8577cd721a433130f29e8b112d98768b9.tar.bz2 spark-e9b6e7d8577cd721a433130f29e8b112d98768b9.zip |
[SPARK-13456][SQL][FOLLOW-UP] lazily generate the outer pointer for case class defined in REPL
## What changes were proposed in this pull request?
In https://github.com/apache/spark/pull/11410, we missed a corner case: define the inner class and use it in `Dataset` at the same time by using paste mode. For this case, the inner class and the `Dataset` are inside same line object, when we build the `Dataset`, we try to get outer pointer from line object, and it will fail because the line object is not initialized yet.
https://issues.apache.org/jira/browse/SPARK-13456?focusedCommentId=15209174&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-15209174 is an example for this corner case.
This PR make the process of getting outer pointer from line object lazy, so that we can successfully build the `Dataset` and finish initializing the line object.
## How was this patch tested?
new test in repl suite.
Author: Wenchen Fan <wenchen@databricks.com>
Closes #11931 from cloud-fan/repl.
Diffstat (limited to 'sql/catalyst/src')
3 files changed, 33 insertions, 20 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index b344e041a5..89b18af9a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -605,7 +605,7 @@ class Analyzer( "access to the scope that this class was defined in.\n" + "Try moving this class out of its parent class.") } - n.copy(outerPointer = Some(Literal.fromObject(outer))) + n.copy(outerPointer = Some(outer)) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala index c047e96463..a1f0312bd8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala @@ -42,7 +42,12 @@ object OuterScopes { outerScopes.putIfAbsent(outer.getClass.getName, outer) } - def getOuterScope(innerCls: Class[_]): AnyRef = { + /** + * Returns a function which can get the outer scope for the given inner class. By using function + * as return type, we can delay the process of getting outer pointer to execution time, which is + * useful for inner class defined in REPL. + */ + def getOuterScope(innerCls: Class[_]): () => AnyRef = { assert(innerCls.isMemberClass) val outerClassName = innerCls.getDeclaringClass.getName val outer = outerScopes.get(outerClassName) @@ -53,24 +58,30 @@ object OuterScopes { // `INSTANCE()` method to get the single instance of class `$read`. Then call `$iw()` // method multiply times to get the single instance of the inner most `$iw` class. case REPLClass(baseClassName) => - val objClass = Utils.classForName(baseClassName + "$") - val objInstance = objClass.getField("MODULE$").get(null) - val baseInstance = objClass.getMethod("INSTANCE").invoke(objInstance) - val baseClass = Utils.classForName(baseClassName) + () => { + val objClass = Utils.classForName(baseClassName + "$") + val objInstance = objClass.getField("MODULE$").get(null) + val baseInstance = objClass.getMethod("INSTANCE").invoke(objInstance) + val baseClass = Utils.classForName(baseClassName) - var getter = iwGetter(baseClass) - var obj = baseInstance - while (getter != null) { - obj = getter.invoke(obj) - getter = iwGetter(getter.getReturnType) - } + var getter = iwGetter(baseClass) + var obj = baseInstance + while (getter != null) { + obj = getter.invoke(obj) + getter = iwGetter(getter.getReturnType) + } - outerScopes.putIfAbsent(outerClassName, obj) - obj + if (obj == null) { + throw new RuntimeException(s"Failed to get outer pointer for ${innerCls.getName}") + } + + outerScopes.putIfAbsent(outerClassName, obj) + obj + } case _ => null } } else { - outer + () => outer } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala index 7eba617fcd..07b67a0240 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala @@ -197,15 +197,17 @@ object NewInstance { * @param dataType The type of object being constructed, as a Spark SQL datatype. This allows you * to manually specify the type when the object in question is a valid internal * representation (i.e. ArrayData) instead of an object. - * @param outerPointer If the object being constructed is an inner class the outerPointer must - * for the containing class must be specified. + * @param outerPointer If the object being constructed is an inner class, the outerPointer for the + * containing class must be specified. This parameter is defined as an optional + * function, which allows us to get the outer pointer lazily,and it's useful if + * the inner class is defined in REPL. */ case class NewInstance( cls: Class[_], arguments: Seq[Expression], propagateNull: Boolean, dataType: DataType, - outerPointer: Option[Literal]) extends Expression with NonSQLExpression { + outerPointer: Option[() => AnyRef]) extends Expression with NonSQLExpression { private val className = cls.getName override def nullable: Boolean = propagateNull @@ -220,12 +222,12 @@ case class NewInstance( val argGen = arguments.map(_.gen(ctx)) val argString = argGen.map(_.value).mkString(", ") - val outer = outerPointer.map(_.gen(ctx)) + val outer = outerPointer.map(func => Literal.fromObject(func()).gen(ctx)) val setup = s""" ${argGen.map(_.code).mkString("\n")} - ${outer.map(_.code.mkString("")).getOrElse("")} + ${outer.map(_.code).getOrElse("")} """.stripMargin val constructorCall = outer.map { gen => |