[SPARK-13456][SQL][FOLLOW-UP] lazily generate the outer pointer for case class defined in REPL

## What changes were proposed in this pull request? In https://github.com/apache/spark/pull/11410, we missed a corner case: define the inner class and use it in `Dataset` at the same time by using paste mode. For this case, the inner class and the `Dataset` are inside same line object, when we build the `Dataset`, we try to get outer pointer from line object, and it will fail because the line object is not initialized yet. https://issues.apache.org/jira/browse/SPARK-13456?focusedCommentId=15209174&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-15209174 is an example for this corner case. This PR make the process of getting outer pointer from line object lazy, so that we can successfully build the `Dataset` and finish initializing the line object. ## How was this patch tested? new test in repl suite. Author: Wenchen Fan <wenchen@databricks.com> Closes #11931 from cloud-fan/repl.
author: Wenchen Fan <wenchen@databricks.com> 2016-03-25 20:19:04 +0800
committer: Cheng Lian <lian@databricks.com> 2016-03-25 20:19:04 +0800
commit: e9b6e7d8577cd721a433130f29e8b112d98768b9 (patch)
tree: ebafcac42412b9411271d36088cf4e3c1313ada5
parent: 70a6f0bb57ca2248444157e2707fbcc3cb04e3bc (diff)
download: spark-e9b6e7d8577cd721a433130f29e8b112d98768b9.tar.gz
spark-e9b6e7d8577cd721a433130f29e8b112d98768b9.tar.bz2
spark-e9b6e7d8577cd721a433130f29e8b112d98768b9.zip
4 files changed, 48 insertions, 20 deletions
diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index f148a6df47..dbfacba346 100644
--- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -59,6 +59,10 @@ class ReplSuite extends SparkFunSuite {
     return out.toString
   }
 
+  // Simulate the paste mode in Scala REPL.
+  def runInterpreterInPasteMode(master: String, input: String): String =
+    runInterpreter(master, ":paste\n" + input + 4.toChar) // 4 is the ascii code of CTRL + D
+
   def assertContains(message: String, output: String) {
     val isContain = output.contains(message)
     assert(isContain,
@@ -381,4 +385,15 @@ class ReplSuite extends SparkFunSuite {
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
   }
+
+  test("define case class and create Dataset together with paste mode") {
+    val output = runInterpreterInPasteMode("local-cluster[1,1,1024]",
+      """
+        |import sqlContext.implicits._
+        |case class TestClass(value: Int)
+        |Seq(TestClass(1)).toDS()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index b344e041a5..89b18af9a0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -605,7 +605,7 @@ class Analyzer(
                 "access to the scope that this class was defined in.\n" +
                 "Try moving this class out of its parent class.")
           }
-          n.copy(outerPointer = Some(Literal.fromObject(outer)))
+          n.copy(outerPointer = Some(outer))
       }
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
index c047e96463..a1f0312bd8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
@@ -42,7 +42,12 @@ object OuterScopes {
     outerScopes.putIfAbsent(outer.getClass.getName, outer)
   }
 
-  def getOuterScope(innerCls: Class[_]): AnyRef = {
+  /**
+   * Returns a function which can get the outer scope for the given inner class.  By using function
+   * as return type, we can delay the process of getting outer pointer to execution time, which is
+   * useful for inner class defined in REPL.
+   */
+  def getOuterScope(innerCls: Class[_]): () => AnyRef = {
     assert(innerCls.isMemberClass)
     val outerClassName = innerCls.getDeclaringClass.getName
     val outer = outerScopes.get(outerClassName)
@@ -53,24 +58,30 @@ object OuterScopes {
         // `INSTANCE()` method to get the single instance of class `$read`. Then call `$iw()`
         // method multiply times to get the single instance of the inner most `$iw` class.
         case REPLClass(baseClassName) =>
-          val objClass = Utils.classForName(baseClassName + "$")
-          val objInstance = objClass.getField("MODULE$").get(null)
-          val baseInstance = objClass.getMethod("INSTANCE").invoke(objInstance)
-          val baseClass = Utils.classForName(baseClassName)
+          () => {
+            val objClass = Utils.classForName(baseClassName + "$")
+            val objInstance = objClass.getField("MODULE$").get(null)
+            val baseInstance = objClass.getMethod("INSTANCE").invoke(objInstance)
+            val baseClass = Utils.classForName(baseClassName)
 
-          var getter = iwGetter(baseClass)
-          var obj = baseInstance
-          while (getter != null) {
-            obj = getter.invoke(obj)
-            getter = iwGetter(getter.getReturnType)
-          }
+            var getter = iwGetter(baseClass)
+            var obj = baseInstance
+            while (getter != null) {
+              obj = getter.invoke(obj)
+              getter = iwGetter(getter.getReturnType)
+            }
 
-          outerScopes.putIfAbsent(outerClassName, obj)
-          obj
+            if (obj == null) {
+              throw new RuntimeException(s"Failed to get outer pointer for ${innerCls.getName}")
+            }
+
+            outerScopes.putIfAbsent(outerClassName, obj)
+            obj
+          }
         case _ => null
       }
     } else {
-      outer
+      () => outer
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
index 7eba617fcd..07b67a0240 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
@@ -197,15 +197,17 @@ object NewInstance {
  * @param dataType The type of object being constructed, as a Spark SQL datatype.  This allows you
  *                 to manually specify the type when the object in question is a valid internal
  *                 representation (i.e. ArrayData) instead of an object.
- * @param outerPointer If the object being constructed is an inner class the outerPointer must
- *                     for the containing class must be specified.
+ * @param outerPointer If the object being constructed is an inner class, the outerPointer for the
+ *                     containing class must be specified. This parameter is defined as an optional
+ *                     function, which allows us to get the outer pointer lazily,and it's useful if
+ *                     the inner class is defined in REPL.
  */
 case class NewInstance(
     cls: Class[_],
     arguments: Seq[Expression],
     propagateNull: Boolean,
     dataType: DataType,
-    outerPointer: Option[Literal]) extends Expression with NonSQLExpression {
+    outerPointer: Option[() => AnyRef]) extends Expression with NonSQLExpression {
   private val className = cls.getName
 
   override def nullable: Boolean = propagateNull
@@ -220,12 +222,12 @@ case class NewInstance(
     val argGen = arguments.map(_.gen(ctx))
     val argString = argGen.map(_.value).mkString(", ")
 
-    val outer = outerPointer.map(_.gen(ctx))
+    val outer = outerPointer.map(func => Literal.fromObject(func()).gen(ctx))
 
     val setup =
       s"""
          ${argGen.map(_.code).mkString("\n")}
-         ${outer.map(_.code.mkString("")).getOrElse("")}
+         ${outer.map(_.code).getOrElse("")}
        """.stripMargin
 
     val constructorCall = outer.map { gen =>
author	Wenchen Fan <wenchen@databricks.com>	2016-03-25 20:19:04 +0800
committer	Cheng Lian <lian@databricks.com>	2016-03-25 20:19:04 +0800
commit	e9b6e7d8577cd721a433130f29e8b112d98768b9 (patch)
tree	ebafcac42412b9411271d36088cf4e3c1313ada5
parent	70a6f0bb57ca2248444157e2707fbcc3cb04e3bc (diff)
download	spark-e9b6e7d8577cd721a433130f29e8b112d98768b9.tar.gz spark-e9b6e7d8577cd721a433130f29e8b112d98768b9.tar.bz2 spark-e9b6e7d8577cd721a433130f29e8b112d98768b9.zip