aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-03-25 20:19:04 +0800
committerCheng Lian <lian@databricks.com>2016-03-25 20:19:04 +0800
commite9b6e7d8577cd721a433130f29e8b112d98768b9 (patch)
treeebafcac42412b9411271d36088cf4e3c1313ada5
parent70a6f0bb57ca2248444157e2707fbcc3cb04e3bc (diff)
downloadspark-e9b6e7d8577cd721a433130f29e8b112d98768b9.tar.gz
spark-e9b6e7d8577cd721a433130f29e8b112d98768b9.tar.bz2
spark-e9b6e7d8577cd721a433130f29e8b112d98768b9.zip
[SPARK-13456][SQL][FOLLOW-UP] lazily generate the outer pointer for case class defined in REPL
## What changes were proposed in this pull request? In https://github.com/apache/spark/pull/11410, we missed a corner case: define the inner class and use it in `Dataset` at the same time by using paste mode. For this case, the inner class and the `Dataset` are inside same line object, when we build the `Dataset`, we try to get outer pointer from line object, and it will fail because the line object is not initialized yet. https://issues.apache.org/jira/browse/SPARK-13456?focusedCommentId=15209174&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-15209174 is an example for this corner case. This PR make the process of getting outer pointer from line object lazy, so that we can successfully build the `Dataset` and finish initializing the line object. ## How was this patch tested? new test in repl suite. Author: Wenchen Fan <wenchen@databricks.com> Closes #11931 from cloud-fan/repl.
-rw-r--r--repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala15
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala39
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala12
4 files changed, 48 insertions, 20 deletions
diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index f148a6df47..dbfacba346 100644
--- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -59,6 +59,10 @@ class ReplSuite extends SparkFunSuite {
return out.toString
}
+ // Simulate the paste mode in Scala REPL.
+ def runInterpreterInPasteMode(master: String, input: String): String =
+ runInterpreter(master, ":paste\n" + input + 4.toChar) // 4 is the ascii code of CTRL + D
+
def assertContains(message: String, output: String) {
val isContain = output.contains(message)
assert(isContain,
@@ -381,4 +385,15 @@ class ReplSuite extends SparkFunSuite {
assertDoesNotContain("error:", output)
assertDoesNotContain("Exception", output)
}
+
+ test("define case class and create Dataset together with paste mode") {
+ val output = runInterpreterInPasteMode("local-cluster[1,1,1024]",
+ """
+ |import sqlContext.implicits._
+ |case class TestClass(value: Int)
+ |Seq(TestClass(1)).toDS()
+ """.stripMargin)
+ assertDoesNotContain("error:", output)
+ assertDoesNotContain("Exception", output)
+ }
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index b344e041a5..89b18af9a0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -605,7 +605,7 @@ class Analyzer(
"access to the scope that this class was defined in.\n" +
"Try moving this class out of its parent class.")
}
- n.copy(outerPointer = Some(Literal.fromObject(outer)))
+ n.copy(outerPointer = Some(outer))
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
index c047e96463..a1f0312bd8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
@@ -42,7 +42,12 @@ object OuterScopes {
outerScopes.putIfAbsent(outer.getClass.getName, outer)
}
- def getOuterScope(innerCls: Class[_]): AnyRef = {
+ /**
+ * Returns a function which can get the outer scope for the given inner class. By using function
+ * as return type, we can delay the process of getting outer pointer to execution time, which is
+ * useful for inner class defined in REPL.
+ */
+ def getOuterScope(innerCls: Class[_]): () => AnyRef = {
assert(innerCls.isMemberClass)
val outerClassName = innerCls.getDeclaringClass.getName
val outer = outerScopes.get(outerClassName)
@@ -53,24 +58,30 @@ object OuterScopes {
// `INSTANCE()` method to get the single instance of class `$read`. Then call `$iw()`
// method multiply times to get the single instance of the inner most `$iw` class.
case REPLClass(baseClassName) =>
- val objClass = Utils.classForName(baseClassName + "$")
- val objInstance = objClass.getField("MODULE$").get(null)
- val baseInstance = objClass.getMethod("INSTANCE").invoke(objInstance)
- val baseClass = Utils.classForName(baseClassName)
+ () => {
+ val objClass = Utils.classForName(baseClassName + "$")
+ val objInstance = objClass.getField("MODULE$").get(null)
+ val baseInstance = objClass.getMethod("INSTANCE").invoke(objInstance)
+ val baseClass = Utils.classForName(baseClassName)
- var getter = iwGetter(baseClass)
- var obj = baseInstance
- while (getter != null) {
- obj = getter.invoke(obj)
- getter = iwGetter(getter.getReturnType)
- }
+ var getter = iwGetter(baseClass)
+ var obj = baseInstance
+ while (getter != null) {
+ obj = getter.invoke(obj)
+ getter = iwGetter(getter.getReturnType)
+ }
- outerScopes.putIfAbsent(outerClassName, obj)
- obj
+ if (obj == null) {
+ throw new RuntimeException(s"Failed to get outer pointer for ${innerCls.getName}")
+ }
+
+ outerScopes.putIfAbsent(outerClassName, obj)
+ obj
+ }
case _ => null
}
} else {
- outer
+ () => outer
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
index 7eba617fcd..07b67a0240 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
@@ -197,15 +197,17 @@ object NewInstance {
* @param dataType The type of object being constructed, as a Spark SQL datatype. This allows you
* to manually specify the type when the object in question is a valid internal
* representation (i.e. ArrayData) instead of an object.
- * @param outerPointer If the object being constructed is an inner class the outerPointer must
- * for the containing class must be specified.
+ * @param outerPointer If the object being constructed is an inner class, the outerPointer for the
+ * containing class must be specified. This parameter is defined as an optional
+ * function, which allows us to get the outer pointer lazily,and it's useful if
+ * the inner class is defined in REPL.
*/
case class NewInstance(
cls: Class[_],
arguments: Seq[Expression],
propagateNull: Boolean,
dataType: DataType,
- outerPointer: Option[Literal]) extends Expression with NonSQLExpression {
+ outerPointer: Option[() => AnyRef]) extends Expression with NonSQLExpression {
private val className = cls.getName
override def nullable: Boolean = propagateNull
@@ -220,12 +222,12 @@ case class NewInstance(
val argGen = arguments.map(_.gen(ctx))
val argString = argGen.map(_.value).mkString(", ")
- val outer = outerPointer.map(_.gen(ctx))
+ val outer = outerPointer.map(func => Literal.fromObject(func()).gen(ctx))
val setup =
s"""
${argGen.map(_.code).mkString("\n")}
- ${outer.map(_.code.mkString("")).getOrElse("")}
+ ${outer.map(_.code).getOrElse("")}
""".stripMargin
val constructorCall = outer.map { gen =>