aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src/main
diff options
context:
space:
mode:
authorEric Liang <ekl@databricks.com>2016-06-29 15:07:32 -0700
committerReynold Xin <rxin@databricks.com>2016-06-29 15:07:32 -0700
commit23c58653f900bfb71ef2b3186a95ad2562c33969 (patch)
treecb33af47a0654ffa60f22f727951c48b6777e98b /sql/catalyst/src/main
parent9b1b3ae771babf127f64898d5dc110721597a760 (diff)
downloadspark-23c58653f900bfb71ef2b3186a95ad2562c33969.tar.gz
spark-23c58653f900bfb71ef2b3186a95ad2562c33969.tar.bz2
spark-23c58653f900bfb71ef2b3186a95ad2562c33969.zip
[SPARK-16238] Metrics for generated method and class bytecode size
## What changes were proposed in this pull request? This extends SPARK-15860 to include metrics for the actual bytecode size of janino-generated methods. They can be accessed in the same way as any other codahale metric, e.g. ``` scala> org.apache.spark.metrics.source.CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.getSnapshot().getValues() res7: Array[Long] = Array(532, 532, 532, 542, 1479, 2670, 3585, 3585) scala> org.apache.spark.metrics.source.CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.getSnapshot().getValues() res8: Array[Long] = Array(5, 5, 5, 5, 10, 10, 10, 10, 15, 15, 15, 38, 63, 79, 88, 94, 94, 94, 132, 132, 165, 165, 220, 220) ``` ## How was this patch tested? Small unit test, also verified manually that the performance impact is minimal (<10%). hvanhovell Author: Eric Liang <ekl@databricks.com> Closes #13934 from ericl/spark-16238.
Diffstat (limited to 'sql/catalyst/src/main')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala40
1 files changed, 39 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 6392ff42d7..16fb1f6837 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -17,11 +17,16 @@
package org.apache.spark.sql.catalyst.expressions.codegen
+import java.io.ByteArrayInputStream
+import java.util.{Map => JavaMap}
+
+import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
import com.google.common.cache.{CacheBuilder, CacheLoader}
-import org.codehaus.janino.ClassBodyEvaluator
+import org.codehaus.janino.{ByteArrayClassLoader, ClassBodyEvaluator, SimpleCompiler}
+import org.codehaus.janino.util.ClassFile
import scala.language.existentials
import org.apache.spark.SparkEnv
@@ -876,6 +881,7 @@ object CodeGenerator extends Logging {
try {
evaluator.cook("generated.java", code.body)
+ recordCompilationStats(evaluator)
} catch {
case e: Exception =>
val msg = s"failed to compile: $e\n$formatted"
@@ -886,6 +892,38 @@ object CodeGenerator extends Logging {
}
/**
+ * Records the generated class and method bytecode sizes by inspecting janino private fields.
+ */
+ private def recordCompilationStats(evaluator: ClassBodyEvaluator): Unit = {
+ // First retrieve the generated classes.
+ val classes = {
+ val resultField = classOf[SimpleCompiler].getDeclaredField("result")
+ resultField.setAccessible(true)
+ val loader = resultField.get(evaluator).asInstanceOf[ByteArrayClassLoader]
+ val classesField = loader.getClass.getDeclaredField("classes")
+ classesField.setAccessible(true)
+ classesField.get(loader).asInstanceOf[JavaMap[String, Array[Byte]]].asScala
+ }
+
+ // Then walk the classes to get at the method bytecode.
+ val codeAttr = Utils.classForName("org.codehaus.janino.util.ClassFile$CodeAttribute")
+ val codeAttrField = codeAttr.getDeclaredField("code")
+ codeAttrField.setAccessible(true)
+ classes.foreach { case (_, classBytes) =>
+ CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.update(classBytes.length)
+ val cf = new ClassFile(new ByteArrayInputStream(classBytes))
+ cf.methodInfos.asScala.foreach { method =>
+ method.getAttributes().foreach { a =>
+ if (a.getClass.getName == codeAttr.getName) {
+ CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE.update(
+ codeAttrField.get(a).asInstanceOf[Array[Byte]].length)
+ }
+ }
+ }
+ }
+ }
+
+ /**
* A cache of generated classes.
*
* From the Guava Docs: A Cache is similar to ConcurrentMap, but not quite the same. The most