aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2016-07-16 23:42:28 -0700
committerReynold Xin <rxin@databricks.com>2016-07-16 23:42:28 -0700
commit7b84758034b9bceca1168438ef5d0beefd5b5273 (patch)
tree793b02a86aeac59304eb4ac5fb9628b7214089fb /sql/catalyst
parentc33e4b0d96d424568963c7e716c20f02949c72d1 (diff)
downloadspark-7b84758034b9bceca1168438ef5d0beefd5b5273.tar.gz
spark-7b84758034b9bceca1168438ef5d0beefd5b5273.tar.bz2
spark-7b84758034b9bceca1168438ef5d0beefd5b5273.zip
[SPARK-16584][SQL] Move regexp unit tests to RegexpExpressionsSuite
## What changes were proposed in this pull request? This patch moves regexp related unit tests from StringExpressionsSuite to RegexpExpressionsSuite to match the file name for regexp expressions. ## How was this patch tested? This is a test only change. Author: Reynold Xin <rxin@databricks.com> Closes #14230 from rxin/SPARK-16584.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala194
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala164
2 files changed, 194 insertions, 164 deletions
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
new file mode 100644
index 0000000000..5299549e7b
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.types.StringType
+
+/**
+ * Unit tests for regular expression (regexp) related SQL expressions.
+ */
+class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
+
+ test("LIKE literal Regular Expression") {
+ checkEvaluation(Literal.create(null, StringType).like("a"), null)
+ checkEvaluation(Literal.create("a", StringType).like(Literal.create(null, StringType)), null)
+ checkEvaluation(Literal.create(null, StringType).like(Literal.create(null, StringType)), null)
+ checkEvaluation(
+ Literal.create("a", StringType).like(NonFoldableLiteral.create("a", StringType)), true)
+ checkEvaluation(
+ Literal.create("a", StringType).like(NonFoldableLiteral.create(null, StringType)), null)
+ checkEvaluation(
+ Literal.create(null, StringType).like(NonFoldableLiteral.create("a", StringType)), null)
+ checkEvaluation(
+ Literal.create(null, StringType).like(NonFoldableLiteral.create(null, StringType)), null)
+
+ checkEvaluation("abdef" like "abdef", true)
+ checkEvaluation("a_%b" like "a\\__b", true)
+ checkEvaluation("addb" like "a_%b", true)
+ checkEvaluation("addb" like "a\\__b", false)
+ checkEvaluation("addb" like "a%\\%b", false)
+ checkEvaluation("a_%b" like "a%\\%b", true)
+ checkEvaluation("addb" like "a%", true)
+ checkEvaluation("addb" like "**", false)
+ checkEvaluation("abc" like "a%", true)
+ checkEvaluation("abc" like "b%", false)
+ checkEvaluation("abc" like "bc%", false)
+ checkEvaluation("a\nb" like "a_b", true)
+ checkEvaluation("ab" like "a%b", true)
+ checkEvaluation("a\nb" like "a%b", true)
+ }
+
+ test("LIKE Non-literal Regular Expression") {
+ val regEx = 'a.string.at(0)
+ checkEvaluation("abcd" like regEx, null, create_row(null))
+ checkEvaluation("abdef" like regEx, true, create_row("abdef"))
+ checkEvaluation("a_%b" like regEx, true, create_row("a\\__b"))
+ checkEvaluation("addb" like regEx, true, create_row("a_%b"))
+ checkEvaluation("addb" like regEx, false, create_row("a\\__b"))
+ checkEvaluation("addb" like regEx, false, create_row("a%\\%b"))
+ checkEvaluation("a_%b" like regEx, true, create_row("a%\\%b"))
+ checkEvaluation("addb" like regEx, true, create_row("a%"))
+ checkEvaluation("addb" like regEx, false, create_row("**"))
+ checkEvaluation("abc" like regEx, true, create_row("a%"))
+ checkEvaluation("abc" like regEx, false, create_row("b%"))
+ checkEvaluation("abc" like regEx, false, create_row("bc%"))
+ checkEvaluation("a\nb" like regEx, true, create_row("a_b"))
+ checkEvaluation("ab" like regEx, true, create_row("a%b"))
+ checkEvaluation("a\nb" like regEx, true, create_row("a%b"))
+
+ checkEvaluation(Literal.create(null, StringType) like regEx, null, create_row("bc%"))
+ }
+
+ test("RLIKE literal Regular Expression") {
+ checkEvaluation(Literal.create(null, StringType) rlike "abdef", null)
+ checkEvaluation("abdef" rlike Literal.create(null, StringType), null)
+ checkEvaluation(Literal.create(null, StringType) rlike Literal.create(null, StringType), null)
+ checkEvaluation("abdef" rlike NonFoldableLiteral.create("abdef", StringType), true)
+ checkEvaluation("abdef" rlike NonFoldableLiteral.create(null, StringType), null)
+ checkEvaluation(
+ Literal.create(null, StringType) rlike NonFoldableLiteral.create("abdef", StringType), null)
+ checkEvaluation(
+ Literal.create(null, StringType) rlike NonFoldableLiteral.create(null, StringType), null)
+
+ checkEvaluation("abdef" rlike "abdef", true)
+ checkEvaluation("abbbbc" rlike "a.*c", true)
+
+ checkEvaluation("fofo" rlike "^fo", true)
+ checkEvaluation("fo\no" rlike "^fo\no$", true)
+ checkEvaluation("Bn" rlike "^Ba*n", true)
+ checkEvaluation("afofo" rlike "fo", true)
+ checkEvaluation("afofo" rlike "^fo", false)
+ checkEvaluation("Baan" rlike "^Ba?n", false)
+ checkEvaluation("axe" rlike "pi|apa", false)
+ checkEvaluation("pip" rlike "^(pi)*$", false)
+
+ checkEvaluation("abc" rlike "^ab", true)
+ checkEvaluation("abc" rlike "^bc", false)
+ checkEvaluation("abc" rlike "^ab", true)
+ checkEvaluation("abc" rlike "^bc", false)
+
+ intercept[java.util.regex.PatternSyntaxException] {
+ evaluate("abbbbc" rlike "**")
+ }
+ }
+
+ test("RLIKE Non-literal Regular Expression") {
+ val regEx = 'a.string.at(0)
+ checkEvaluation("abdef" rlike regEx, true, create_row("abdef"))
+ checkEvaluation("abbbbc" rlike regEx, true, create_row("a.*c"))
+ checkEvaluation("fofo" rlike regEx, true, create_row("^fo"))
+ checkEvaluation("fo\no" rlike regEx, true, create_row("^fo\no$"))
+ checkEvaluation("Bn" rlike regEx, true, create_row("^Ba*n"))
+
+ intercept[java.util.regex.PatternSyntaxException] {
+ evaluate("abbbbc" rlike regEx, create_row("**"))
+ }
+ }
+
+
+ test("RegexReplace") {
+ val row1 = create_row("100-200", "(\\d+)", "num")
+ val row2 = create_row("100-200", "(\\d+)", "###")
+ val row3 = create_row("100-200", "(-)", "###")
+ val row4 = create_row(null, "(\\d+)", "###")
+ val row5 = create_row("100-200", null, "###")
+ val row6 = create_row("100-200", "(-)", null)
+
+ val s = 's.string.at(0)
+ val p = 'p.string.at(1)
+ val r = 'r.string.at(2)
+
+ val expr = RegExpReplace(s, p, r)
+ checkEvaluation(expr, "num-num", row1)
+ checkEvaluation(expr, "###-###", row2)
+ checkEvaluation(expr, "100###200", row3)
+ checkEvaluation(expr, null, row4)
+ checkEvaluation(expr, null, row5)
+ checkEvaluation(expr, null, row6)
+
+ val nonNullExpr = RegExpReplace(Literal("100-200"), Literal("(\\d+)"), Literal("num"))
+ checkEvaluation(nonNullExpr, "num-num", row1)
+ }
+
+ test("RegexExtract") {
+ val row1 = create_row("100-200", "(\\d+)-(\\d+)", 1)
+ val row2 = create_row("100-200", "(\\d+)-(\\d+)", 2)
+ val row3 = create_row("100-200", "(\\d+).*", 1)
+ val row4 = create_row("100-200", "([a-z])", 1)
+ val row5 = create_row(null, "([a-z])", 1)
+ val row6 = create_row("100-200", null, 1)
+ val row7 = create_row("100-200", "([a-z])", null)
+
+ val s = 's.string.at(0)
+ val p = 'p.string.at(1)
+ val r = 'r.int.at(2)
+
+ val expr = RegExpExtract(s, p, r)
+ checkEvaluation(expr, "100", row1)
+ checkEvaluation(expr, "200", row2)
+ checkEvaluation(expr, "100", row3)
+ checkEvaluation(expr, "", row4) // will not match anything, empty string get
+ checkEvaluation(expr, null, row5)
+ checkEvaluation(expr, null, row6)
+ checkEvaluation(expr, null, row7)
+
+ val expr1 = new RegExpExtract(s, p)
+ checkEvaluation(expr1, "100", row1)
+
+ val nonNullExpr = RegExpExtract(Literal("100-200"), Literal("(\\d+)-(\\d+)"), Literal(1))
+ checkEvaluation(nonNullExpr, "100", row1)
+ }
+
+ test("SPLIT") {
+ val s1 = 'a.string.at(0)
+ val s2 = 'b.string.at(1)
+ val row1 = create_row("aa2bb3cc", "[1-9]+")
+ val row2 = create_row(null, "[1-9]+")
+ val row3 = create_row("aa2bb3cc", null)
+
+ checkEvaluation(
+ StringSplit(Literal("aa2bb3cc"), Literal("[1-9]+")), Seq("aa", "bb", "cc"), row1)
+ checkEvaluation(
+ StringSplit(s1, s2), Seq("aa", "bb", "cc"), row1)
+ checkEvaluation(StringSplit(s1, s2), null, row2)
+ checkEvaluation(StringSplit(s1, s2), null, row3)
+ }
+
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 5b9ed83a4e..fdb9fa31f0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -254,102 +254,6 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
SubstringIndex(Literal("www||apache||org"), Literal( "||"), Literal(2)), "www||apache")
}
- test("LIKE literal Regular Expression") {
- checkEvaluation(Literal.create(null, StringType).like("a"), null)
- checkEvaluation(Literal.create("a", StringType).like(Literal.create(null, StringType)), null)
- checkEvaluation(Literal.create(null, StringType).like(Literal.create(null, StringType)), null)
- checkEvaluation(
- Literal.create("a", StringType).like(NonFoldableLiteral.create("a", StringType)), true)
- checkEvaluation(
- Literal.create("a", StringType).like(NonFoldableLiteral.create(null, StringType)), null)
- checkEvaluation(
- Literal.create(null, StringType).like(NonFoldableLiteral.create("a", StringType)), null)
- checkEvaluation(
- Literal.create(null, StringType).like(NonFoldableLiteral.create(null, StringType)), null)
-
- checkEvaluation("abdef" like "abdef", true)
- checkEvaluation("a_%b" like "a\\__b", true)
- checkEvaluation("addb" like "a_%b", true)
- checkEvaluation("addb" like "a\\__b", false)
- checkEvaluation("addb" like "a%\\%b", false)
- checkEvaluation("a_%b" like "a%\\%b", true)
- checkEvaluation("addb" like "a%", true)
- checkEvaluation("addb" like "**", false)
- checkEvaluation("abc" like "a%", true)
- checkEvaluation("abc" like "b%", false)
- checkEvaluation("abc" like "bc%", false)
- checkEvaluation("a\nb" like "a_b", true)
- checkEvaluation("ab" like "a%b", true)
- checkEvaluation("a\nb" like "a%b", true)
- }
-
- test("LIKE Non-literal Regular Expression") {
- val regEx = 'a.string.at(0)
- checkEvaluation("abcd" like regEx, null, create_row(null))
- checkEvaluation("abdef" like regEx, true, create_row("abdef"))
- checkEvaluation("a_%b" like regEx, true, create_row("a\\__b"))
- checkEvaluation("addb" like regEx, true, create_row("a_%b"))
- checkEvaluation("addb" like regEx, false, create_row("a\\__b"))
- checkEvaluation("addb" like regEx, false, create_row("a%\\%b"))
- checkEvaluation("a_%b" like regEx, true, create_row("a%\\%b"))
- checkEvaluation("addb" like regEx, true, create_row("a%"))
- checkEvaluation("addb" like regEx, false, create_row("**"))
- checkEvaluation("abc" like regEx, true, create_row("a%"))
- checkEvaluation("abc" like regEx, false, create_row("b%"))
- checkEvaluation("abc" like regEx, false, create_row("bc%"))
- checkEvaluation("a\nb" like regEx, true, create_row("a_b"))
- checkEvaluation("ab" like regEx, true, create_row("a%b"))
- checkEvaluation("a\nb" like regEx, true, create_row("a%b"))
-
- checkEvaluation(Literal.create(null, StringType) like regEx, null, create_row("bc%"))
- }
-
- test("RLIKE literal Regular Expression") {
- checkEvaluation(Literal.create(null, StringType) rlike "abdef", null)
- checkEvaluation("abdef" rlike Literal.create(null, StringType), null)
- checkEvaluation(Literal.create(null, StringType) rlike Literal.create(null, StringType), null)
- checkEvaluation("abdef" rlike NonFoldableLiteral.create("abdef", StringType), true)
- checkEvaluation("abdef" rlike NonFoldableLiteral.create(null, StringType), null)
- checkEvaluation(
- Literal.create(null, StringType) rlike NonFoldableLiteral.create("abdef", StringType), null)
- checkEvaluation(
- Literal.create(null, StringType) rlike NonFoldableLiteral.create(null, StringType), null)
-
- checkEvaluation("abdef" rlike "abdef", true)
- checkEvaluation("abbbbc" rlike "a.*c", true)
-
- checkEvaluation("fofo" rlike "^fo", true)
- checkEvaluation("fo\no" rlike "^fo\no$", true)
- checkEvaluation("Bn" rlike "^Ba*n", true)
- checkEvaluation("afofo" rlike "fo", true)
- checkEvaluation("afofo" rlike "^fo", false)
- checkEvaluation("Baan" rlike "^Ba?n", false)
- checkEvaluation("axe" rlike "pi|apa", false)
- checkEvaluation("pip" rlike "^(pi)*$", false)
-
- checkEvaluation("abc" rlike "^ab", true)
- checkEvaluation("abc" rlike "^bc", false)
- checkEvaluation("abc" rlike "^ab", true)
- checkEvaluation("abc" rlike "^bc", false)
-
- intercept[java.util.regex.PatternSyntaxException] {
- evaluate("abbbbc" rlike "**")
- }
- }
-
- test("RLIKE Non-literal Regular Expression") {
- val regEx = 'a.string.at(0)
- checkEvaluation("abdef" rlike regEx, true, create_row("abdef"))
- checkEvaluation("abbbbc" rlike regEx, true, create_row("a.*c"))
- checkEvaluation("fofo" rlike regEx, true, create_row("^fo"))
- checkEvaluation("fo\no" rlike regEx, true, create_row("^fo\no$"))
- checkEvaluation("Bn" rlike regEx, true, create_row("^Ba*n"))
-
- intercept[java.util.regex.PatternSyntaxException] {
- evaluate("abbbbc" rlike regEx, create_row("**"))
- }
- }
-
test("ascii for string") {
val a = 'a.string.at(0)
checkEvaluation(Ascii(Literal("efg")), 101, create_row("abdef"))
@@ -612,74 +516,6 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(StringSpace(s1), null, row2)
}
- test("RegexReplace") {
- val row1 = create_row("100-200", "(\\d+)", "num")
- val row2 = create_row("100-200", "(\\d+)", "###")
- val row3 = create_row("100-200", "(-)", "###")
- val row4 = create_row(null, "(\\d+)", "###")
- val row5 = create_row("100-200", null, "###")
- val row6 = create_row("100-200", "(-)", null)
-
- val s = 's.string.at(0)
- val p = 'p.string.at(1)
- val r = 'r.string.at(2)
-
- val expr = RegExpReplace(s, p, r)
- checkEvaluation(expr, "num-num", row1)
- checkEvaluation(expr, "###-###", row2)
- checkEvaluation(expr, "100###200", row3)
- checkEvaluation(expr, null, row4)
- checkEvaluation(expr, null, row5)
- checkEvaluation(expr, null, row6)
-
- val nonNullExpr = RegExpReplace(Literal("100-200"), Literal("(\\d+)"), Literal("num"))
- checkEvaluation(nonNullExpr, "num-num", row1)
- }
-
- test("RegexExtract") {
- val row1 = create_row("100-200", "(\\d+)-(\\d+)", 1)
- val row2 = create_row("100-200", "(\\d+)-(\\d+)", 2)
- val row3 = create_row("100-200", "(\\d+).*", 1)
- val row4 = create_row("100-200", "([a-z])", 1)
- val row5 = create_row(null, "([a-z])", 1)
- val row6 = create_row("100-200", null, 1)
- val row7 = create_row("100-200", "([a-z])", null)
-
- val s = 's.string.at(0)
- val p = 'p.string.at(1)
- val r = 'r.int.at(2)
-
- val expr = RegExpExtract(s, p, r)
- checkEvaluation(expr, "100", row1)
- checkEvaluation(expr, "200", row2)
- checkEvaluation(expr, "100", row3)
- checkEvaluation(expr, "", row4) // will not match anything, empty string get
- checkEvaluation(expr, null, row5)
- checkEvaluation(expr, null, row6)
- checkEvaluation(expr, null, row7)
-
- val expr1 = new RegExpExtract(s, p)
- checkEvaluation(expr1, "100", row1)
-
- val nonNullExpr = RegExpExtract(Literal("100-200"), Literal("(\\d+)-(\\d+)"), Literal(1))
- checkEvaluation(nonNullExpr, "100", row1)
- }
-
- test("SPLIT") {
- val s1 = 'a.string.at(0)
- val s2 = 'b.string.at(1)
- val row1 = create_row("aa2bb3cc", "[1-9]+")
- val row2 = create_row(null, "[1-9]+")
- val row3 = create_row("aa2bb3cc", null)
-
- checkEvaluation(
- StringSplit(Literal("aa2bb3cc"), Literal("[1-9]+")), Seq("aa", "bb", "cc"), row1)
- checkEvaluation(
- StringSplit(s1, s2), Seq("aa", "bb", "cc"), row1)
- checkEvaluation(StringSplit(s1, s2), null, row2)
- checkEvaluation(StringSplit(s1, s2), null, row3)
- }
-
test("length for string / binary") {
val a = 'a.string.at(0)
val b = 'b.binary.at(0)