aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-06-19 11:40:04 -0700
committerDavies Liu <davies@databricks.com>2015-06-19 11:40:04 -0700
commite41e2fd6c61076f870de03b85c5da6c12b8da038 (patch)
tree2d4fcf15fe1009d2800d4b622b4e35cd84a3cee1
parent4a462c282c72c47eeecf35b4ab227c1bc71908e5 (diff)
downloadspark-e41e2fd6c61076f870de03b85c5da6c12b8da038.tar.gz
spark-e41e2fd6c61076f870de03b85c5da6c12b8da038.tar.bz2
spark-e41e2fd6c61076f870de03b85c5da6c12b8da038.zip
[SPARK-8461] [SQL] fix codegen with REPL class loader
The ExecutorClassLoader for REPL will cause Janino failed to find class for those in java.lang, so switch to use default class loader for Janino, which will also help performance. cc liancheng yhuai Author: Davies Liu <davies@databricks.com> Closes #6898 from davies/fix_class_loader and squashes the following commits: 24276d4 [Davies Liu] add regression test 4ff0457 [Davies Liu] address comment, refactor 7f5ffbe [Davies Liu] fix REPL class loader with codegen
-rw-r--r--repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala11
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala22
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala8
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala7
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala8
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala7
6 files changed, 29 insertions, 34 deletions
diff --git a/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 50fd43a418..f150fec7db 100644
--- a/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.10/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -267,6 +267,17 @@ class ReplSuite extends SparkFunSuite {
assertDoesNotContain("Exception", output)
}
+ test("SPARK-8461 SQL with codegen") {
+ val output = runInterpreter("local",
+ """
+ |val sqlContext = new org.apache.spark.sql.SQLContext(sc)
+ |sqlContext.setConf("spark.sql.codegen", "true")
+ |sqlContext.range(0, 100).filter('id > 50).count()
+ """.stripMargin)
+ assertContains("Long = 49", output)
+ assertDoesNotContain("java.lang.ClassNotFoundException", output)
+ }
+
test("SPARK-2632 importing a method from non serializable class and not using it.") {
val output = runInterpreter("local",
"""
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index ab850d17a6..bd5475d206 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -203,6 +203,11 @@ class CodeGenContext {
def isPrimitiveType(dt: DataType): Boolean = primitiveTypes.contains(dt)
}
+
+abstract class GeneratedClass {
+ def generate(expressions: Array[Expression]): Any
+}
+
/**
* A base class for generators of byte code to perform expression evaluation. Includes a set of
* helpers for referring to Catalyst types and building trees that perform evaluation of individual
@@ -215,11 +220,6 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
protected val genericMutableRowType: String = classOf[GenericMutableRow].getName
/**
- * Can be flipped on manually in the console to add (expensive) expression evaluation trace code.
- */
- var debugLogging = false
-
- /**
* Generates a class for a given input expression. Called when there is not cached code
* already available.
*/
@@ -239,10 +239,14 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
*
* It will track the time used to compile
*/
- protected def compile(code: String): Class[_] = {
+ protected def compile(code: String): GeneratedClass = {
val startTime = System.nanoTime()
- val clazz = try {
- new ClassBodyEvaluator(code).getClazz()
+ val evaluator = new ClassBodyEvaluator()
+ evaluator.setParentClassLoader(getClass.getClassLoader)
+ evaluator.setDefaultImports(Array("org.apache.spark.sql.catalyst.InternalRow"))
+ evaluator.setExtendedClass(classOf[GeneratedClass])
+ try {
+ evaluator.cook(code)
} catch {
case e: Exception =>
logError(s"failed to compile:\n $code", e)
@@ -251,7 +255,7 @@ abstract class CodeGenerator[InType <: AnyRef, OutType <: AnyRef] extends Loggin
val endTime = System.nanoTime()
def timeMs: Double = (endTime - startTime).toDouble / 1000000
logDebug(s"Code (${code.size} bytes) compiled in $timeMs ms")
- clazz
+ evaluator.getClazz().newInstance().asInstanceOf[GeneratedClass]
}
/**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
index 573a9ea0a5..e75e82d380 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateMutableProjection.scala
@@ -47,9 +47,7 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], () => Mu
"""
}.mkString("\n")
val code = s"""
- import org.apache.spark.sql.catalyst.InternalRow;
-
- public SpecificProjection generate($exprType[] expr) {
+ public Object generate($exprType[] expr) {
return new SpecificProjection(expr);
}
@@ -85,10 +83,8 @@ object GenerateMutableProjection extends CodeGenerator[Seq[Expression], () => Mu
logDebug(s"code for ${expressions.mkString(",")}:\n$code")
val c = compile(code)
- // fetch the only one method `generate(Expression[])`
- val m = c.getDeclaredMethods()(0)
() => {
- m.invoke(c.newInstance(), ctx.references.toArray).asInstanceOf[BaseMutableProjection]
+ c.generate(ctx.references.toArray).asInstanceOf[MutableProjection]
}
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index 3e9ee60f33..7ed2c5adde 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -76,8 +76,6 @@ object GenerateOrdering
}.mkString("\n")
val code = s"""
- import org.apache.spark.sql.catalyst.InternalRow;
-
public SpecificOrdering generate($exprType[] expr) {
return new SpecificOrdering(expr);
}
@@ -100,9 +98,6 @@ object GenerateOrdering
logDebug(s"Generated Ordering: $code")
- val c = compile(code)
- // fetch the only one method `generate(Expression[])`
- val m = c.getDeclaredMethods()(0)
- m.invoke(c.newInstance(), ctx.references.toArray).asInstanceOf[BaseOrdering]
+ compile(code).generate(ctx.references.toArray).asInstanceOf[BaseOrdering]
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
index dad4364bdd..3ebc2c1475 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratePredicate.scala
@@ -17,7 +17,6 @@
package org.apache.spark.sql.catalyst.expressions.codegen
-import org.apache.spark.sql.catalyst
import org.apache.spark.sql.catalyst.expressions._
/**
@@ -41,8 +40,6 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool
val ctx = newCodeGenContext()
val eval = predicate.gen(ctx)
val code = s"""
- import org.apache.spark.sql.catalyst.InternalRow;
-
public SpecificPredicate generate($exprType[] expr) {
return new SpecificPredicate(expr);
}
@@ -62,10 +59,7 @@ object GeneratePredicate extends CodeGenerator[Expression, (InternalRow) => Bool
logDebug(s"Generated predicate '$predicate':\n$code")
- val c = compile(code)
- // fetch the only one method `generate(Expression[])`
- val m = c.getDeclaredMethods()(0)
- val p = m.invoke(c.newInstance(), ctx.references.toArray).asInstanceOf[Predicate]
+ val p = compile(code).generate(ctx.references.toArray).asInstanceOf[Predicate]
(r: InternalRow) => p.eval(r)
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
index 8b5dc194be..2e20eda1a3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateProjection.scala
@@ -147,8 +147,6 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
}.mkString("\n")
val code = s"""
- import org.apache.spark.sql.catalyst.InternalRow;
-
public SpecificProjection generate($exprType[] expr) {
return new SpecificProjection(expr);
}
@@ -220,9 +218,6 @@ object GenerateProjection extends CodeGenerator[Seq[Expression], Projection] {
logDebug(s"MutableRow, initExprs: ${expressions.mkString(",")} code:\n${code}")
- val c = compile(code)
- // fetch the only one method `generate(Expression[])`
- val m = c.getDeclaredMethods()(0)
- m.invoke(c.newInstance(), ctx.references.toArray).asInstanceOf[Projection]
+ compile(code).generate(ctx.references.toArray).asInstanceOf[Projection]
}
}