aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-07-05 22:11:40 -0700
committerReynold Xin <rxin@databricks.com>2016-07-05 22:11:40 -0700
commitec79183ac5b842e49baa40ea1c9a72ce8f108fe5 (patch)
treedc38ac34ff39a4601cd3dccc79fe1a5dbab19c5e /sql
parentec18cd0af497d170bdcec345d845d925fb2880cf (diff)
downloadspark-ec79183ac5b842e49baa40ea1c9a72ce8f108fe5.tar.gz
spark-ec79183ac5b842e49baa40ea1c9a72ce8f108fe5.tar.bz2
spark-ec79183ac5b842e49baa40ea1c9a72ce8f108fe5.zip
[SPARK-16340][SQL] Support column arguments for `regexp_replace` Dataset operation
## What changes were proposed in this pull request? Currently, `regexp_replace` function supports `Column` arguments in a query. This PR supports that in a `Dataset` operation, too. ## How was this patch tested? Pass the Jenkins tests with a updated testcase. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #14060 from dongjoon-hyun/SPARK-16340.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala10
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala4
2 files changed, 13 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index c8782df146..45d5d05d9f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2194,6 +2194,16 @@ object functions {
}
/**
+ * Replace all substrings of the specified string value that match regexp with rep.
+ *
+ * @group string_funcs
+ * @since 2.1.0
+ */
+ def regexp_replace(e: Column, pattern: Column, replacement: Column): Column = withExpr {
+ RegExpReplace(e.expr, pattern.expr, replacement.expr)
+ }
+
+ /**
* Decodes a BASE64 encoded string column and returns it as a binary column.
* This is the reverse of base64.
*
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index dff4226051..3edd988496 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -77,8 +77,10 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext {
checkAnswer(
df.select(
regexp_replace($"a", "(\\d+)", "num"),
+ regexp_replace($"a", $"b", $"c"),
regexp_extract($"a", "(\\d+)-(\\d+)", 1)),
- Row("num-num", "100") :: Row("num-num", "100") :: Row("num-num", "100") :: Nil)
+ Row("num-num", "300", "100") :: Row("num-num", "400", "100") ::
+ Row("num-num", "400-400", "100") :: Nil)
// for testing the mutable state of the expression in code gen.
// This is a hack way to enable the codegen, thus the codegen is enable by default,