diff options
author | Dongjoon Hyun <dongjoon@apache.org> | 2016-07-05 22:11:40 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-07-05 22:11:40 -0700 |
commit | ec79183ac5b842e49baa40ea1c9a72ce8f108fe5 (patch) | |
tree | dc38ac34ff39a4601cd3dccc79fe1a5dbab19c5e /sql/core | |
parent | ec18cd0af497d170bdcec345d845d925fb2880cf (diff) | |
download | spark-ec79183ac5b842e49baa40ea1c9a72ce8f108fe5.tar.gz spark-ec79183ac5b842e49baa40ea1c9a72ce8f108fe5.tar.bz2 spark-ec79183ac5b842e49baa40ea1c9a72ce8f108fe5.zip |
[SPARK-16340][SQL] Support column arguments for `regexp_replace` Dataset operation
## What changes were proposed in this pull request?
Currently, `regexp_replace` function supports `Column` arguments in a query. This PR supports that in a `Dataset` operation, too.
## How was this patch tested?
Pass the Jenkins tests with a updated testcase.
Author: Dongjoon Hyun <dongjoon@apache.org>
Closes #14060 from dongjoon-hyun/SPARK-16340.
Diffstat (limited to 'sql/core')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/functions.scala | 10 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala | 4 |
2 files changed, 13 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index c8782df146..45d5d05d9f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2194,6 +2194,16 @@ object functions { } /** + * Replace all substrings of the specified string value that match regexp with rep. + * + * @group string_funcs + * @since 2.1.0 + */ + def regexp_replace(e: Column, pattern: Column, replacement: Column): Column = withExpr { + RegExpReplace(e.expr, pattern.expr, replacement.expr) + } + + /** * Decodes a BASE64 encoded string column and returns it as a binary column. * This is the reverse of base64. * diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index dff4226051..3edd988496 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -77,8 +77,10 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { checkAnswer( df.select( regexp_replace($"a", "(\\d+)", "num"), + regexp_replace($"a", $"b", $"c"), regexp_extract($"a", "(\\d+)-(\\d+)", 1)), - Row("num-num", "100") :: Row("num-num", "100") :: Row("num-num", "100") :: Nil) + Row("num-num", "300", "100") :: Row("num-num", "400", "100") :: + Row("num-num", "400-400", "100") :: Nil) // for testing the mutable state of the expression in code gen. // This is a hack way to enable the codegen, thus the codegen is enable by default, |