[MINOR] [SQL] Support mutable expression unit test with codegen projection

This is actually contains 3 minor issues: 1) Enable the unit test(codegen) for mutable expressions (FormatNumber, Regexp_Replace/Regexp_Extract) 2) Use the `PlatformDependent.copyMemory` instead of the `System.arrayCopy` Author: Cheng Hao <hao.cheng@intel.com> Closes #7566 from chenghao-intel/codegen_ut and squashes the following commits: 24f43ea [Cheng Hao] enable codegen for mutable expression & UTF8String performance
author: Cheng Hao <hao.cheng@intel.com> 2015-07-27 23:02:23 -0700
committer: Davies Liu <davies.liu@gmail.com> 2015-07-27 23:02:23 -0700
commit: 9c5612f4e197dec82a5eac9542896d6216a866b7 (patch)
tree: 197cef432df57b209d0b9a2eb247ea839a175f15 /sql
parent: 60f08c7c8775c0462b74bc65b41397be6eb24b6d (diff)
download: spark-9c5612f4e197dec82a5eac9542896d6216a866b7.tar.gz
spark-9c5612f4e197dec82a5eac9542896d6216a866b7.tar.bz2
spark-9c5612f4e197dec82a5eac9542896d6216a866b7.zip
2 files changed, 26 insertions, 9 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
index 38b0fb37de..edfffbc01c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala
@@ -777,7 +777,6 @@ case class Levenshtein(left: Expression, right: Expression) extends BinaryExpres
   override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)
 
   override def dataType: DataType = IntegerType
-
   protected override def nullSafeEval(leftValue: Any, rightValue: Any): Any =
     leftValue.asInstanceOf[UTF8String].levenshteinDistance(rightValue.asInstanceOf[UTF8String])
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 0f9c986f64..8e0ea76d15 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -57,19 +57,27 @@ class StringFunctionsSuite extends QueryTest {
   }
 
   test("string regex_replace / regex_extract") {
-    val df = Seq(("100-200", "")).toDF("a", "b")
+    val df = Seq(
+      ("100-200", "(\\d+)-(\\d+)", "300"),
+      ("100-200", "(\\d+)-(\\d+)", "400"),
+      ("100-200", "(\\d+)", "400")).toDF("a", "b", "c")
 
     checkAnswer(
       df.select(
         regexp_replace($"a", "(\\d+)", "num"),
         regexp_extract($"a", "(\\d+)-(\\d+)", 1)),
-      Row("num-num", "100"))
-
-    checkAnswer(
-      df.selectExpr(
-        "regexp_replace(a, '(\\d+)', 'num')",
-        "regexp_extract(a, '(\\d+)-(\\d+)', 2)"),
-      Row("num-num", "200"))
+      Row("num-num", "100") :: Row("num-num", "100") :: Row("num-num", "100") :: Nil)
+
+    // for testing the mutable state of the expression in code gen.
+    // This is a hack way to enable the codegen, thus the codegen is enable by default,
+    // it will still use the interpretProjection if projection followed by a LocalRelation,
+    // hence we add a filter operator.
+    // See the optimizer rule `ConvertToLocalRelation`
+    checkAnswer(
+      df.filter("isnotnull(a)").selectExpr(
+        "regexp_replace(a, b, c)",
+        "regexp_extract(a, b, 1)"),
+      Row("300", "100") :: Row("400", "100") :: Row("400-400", "100") :: Nil)
   }
 
   test("string ascii function") {
@@ -290,5 +298,15 @@ class StringFunctionsSuite extends QueryTest {
         df.selectExpr("format_number(e, g)"), // decimal type of the 2nd argument is unacceptable
         Row("5.0000"))
     }
+
+    // for testing the mutable state of the expression in code gen.
+    // This is a hack way to enable the codegen, thus the codegen is enable by default,
+    // it will still use the interpretProjection if projection follows by a LocalRelation,
+    // hence we add a filter operator.
+    // See the optimizer rule `ConvertToLocalRelation`
+    val df2 = Seq((5L, 4), (4L, 3), (3L, 2)).toDF("a", "b")
+    checkAnswer(
+      df2.filter("b>0").selectExpr("format_number(a, b)"),
+      Row("5.0000") :: Row("4.000") :: Row("3.00") :: Nil)
   }
 }
author	Cheng Hao <hao.cheng@intel.com>	2015-07-27 23:02:23 -0700
committer	Davies Liu <davies.liu@gmail.com>	2015-07-27 23:02:23 -0700
commit	9c5612f4e197dec82a5eac9542896d6216a866b7 (patch)
tree	197cef432df57b209d0b9a2eb247ea839a175f15 /sql
parent	60f08c7c8775c0462b74bc65b41397be6eb24b6d (diff)
download	spark-9c5612f4e197dec82a5eac9542896d6216a866b7.tar.gz spark-9c5612f4e197dec82a5eac9542896d6216a866b7.tar.bz2 spark-9c5612f4e197dec82a5eac9542896d6216a866b7.zip