diff options
author | Dongjoon Hyun <dongjoon@apache.org> | 2016-04-05 13:31:00 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2016-04-05 13:31:00 -0700 |
commit | c59abad052b7beec4ef550049413e95578e545be (patch) | |
tree | b3415705f58670b8bb78f485d8a5e8b90e7d5174 | |
parent | 9ee5c257176d5c7989031d260e74e3eca530c120 (diff) | |
download | spark-c59abad052b7beec4ef550049413e95578e545be.tar.gz spark-c59abad052b7beec4ef550049413e95578e545be.tar.bz2 spark-c59abad052b7beec4ef550049413e95578e545be.zip |
[SPARK-14402][SQL] initcap UDF doesn't match Hive/Oracle behavior in lowercasing rest of string
## What changes were proposed in this pull request?
Current, SparkSQL `initCap` is using `toTitleCase` function. However, `UTF8String.toTitleCase` implementation changes only the first letter and just copy the other letters: e.g. sParK --> SParK. This is the correct implementation `toTitleCase`.
```
hive> select initcap('sParK');
Spark
```
```
scala> sql("select initcap('sParK')").head
res0: org.apache.spark.sql.Row = [SParK]
```
This PR updates the implementation of `initcap` using `toLowerCase` and `toTitleCase`.
## How was this patch tested?
Pass the Jenkins tests (including new testcase).
Author: Dongjoon Hyun <dongjoon@apache.org>
Closes #12175 from dongjoon-hyun/SPARK-14402.
3 files changed, 12 insertions, 6 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 3ee19cc4ad..b6ea03cd5c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -618,19 +618,24 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC } /** - * Returns string, with the first letter of each word in uppercase. + * Returns string, with the first letter of each word in uppercase, all other letters in lowercase. * Words are delimited by whitespace. */ +@ExpressionDescription( + usage = "_FUNC_(str) - " + + "Returns str, with the first letter of each word in uppercase, all other letters in " + + "lowercase. Words are delimited by white space.", + extended = "> SELECT initcap('sPark sql');\n 'Spark Sql'") case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { override def inputTypes: Seq[DataType] = Seq(StringType) override def dataType: DataType = StringType override def nullSafeEval(string: Any): Any = { - string.asInstanceOf[UTF8String].toTitleCase + string.asInstanceOf[UTF8String].toLowerCase.toTitleCase } override def genCode(ctx: CodegenContext, ev: ExprCode): String = { - defineCodeGen(ctx, ev, str => s"$str.toTitleCase()") + defineCodeGen(ctx, ev, str => s"$str.toLowerCase().toTitleCase()") } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index 99e3b13ce8..2cf8ca7000 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -382,6 +382,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(InitCap(Literal("a b")), "A B") checkEvaluation(InitCap(Literal(" a")), " A") checkEvaluation(InitCap(Literal("the test")), "The Test") + checkEvaluation(InitCap(Literal("sParK")), "Spark") // scalastyle:off // non ascii characters are not allowed in the code, so we disable the scalastyle here. checkEvaluation(InitCap(Literal("世界")), "世界") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index e2090b0a83..6809f26968 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -272,12 +272,12 @@ class StringFunctionsSuite extends QueryTest with SharedSQLContext { } test("initcap function") { - val df = Seq(("ab", "a B")).toDF("l", "r") + val df = Seq(("ab", "a B", "sParK")).toDF("x", "y", "z") checkAnswer( - df.select(initcap($"l"), initcap($"r")), Row("Ab", "A B")) + df.select(initcap($"x"), initcap($"y"), initcap($"z")), Row("Ab", "A B", "Spark")) checkAnswer( - df.selectExpr("InitCap(l)", "InitCap(r)"), Row("Ab", "A B")) + df.selectExpr("InitCap(x)", "InitCap(y)", "InitCap(z)"), Row("Ab", "A B", "Spark")) } test("number format function") { |