aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-04-05 13:31:00 -0700
committerMichael Armbrust <michael@databricks.com>2016-04-05 13:31:00 -0700
commitc59abad052b7beec4ef550049413e95578e545be (patch)
treeb3415705f58670b8bb78f485d8a5e8b90e7d5174 /sql/catalyst
parent9ee5c257176d5c7989031d260e74e3eca530c120 (diff)
downloadspark-c59abad052b7beec4ef550049413e95578e545be.tar.gz
spark-c59abad052b7beec4ef550049413e95578e545be.tar.bz2
spark-c59abad052b7beec4ef550049413e95578e545be.zip
[SPARK-14402][SQL] initcap UDF doesn't match Hive/Oracle behavior in lowercasing rest of string
## What changes were proposed in this pull request? Current, SparkSQL `initCap` is using `toTitleCase` function. However, `UTF8String.toTitleCase` implementation changes only the first letter and just copy the other letters: e.g. sParK --> SParK. This is the correct implementation `toTitleCase`. ``` hive> select initcap('sParK'); Spark ``` ``` scala> sql("select initcap('sParK')").head res0: org.apache.spark.sql.Row = [SParK] ``` This PR updates the implementation of `initcap` using `toLowerCase` and `toTitleCase`. ## How was this patch tested? Pass the Jenkins tests (including new testcase). Author: Dongjoon Hyun <dongjoon@apache.org> Closes #12175 from dongjoon-hyun/SPARK-14402.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala11
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala1
2 files changed, 9 insertions, 3 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 3ee19cc4ad..b6ea03cd5c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -618,19 +618,24 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
}
/**
- * Returns string, with the first letter of each word in uppercase.
+ * Returns string, with the first letter of each word in uppercase, all other letters in lowercase.
* Words are delimited by whitespace.
*/
+@ExpressionDescription(
+ usage = "_FUNC_(str) - " +
+ "Returns str, with the first letter of each word in uppercase, all other letters in " +
+ "lowercase. Words are delimited by white space.",
+ extended = "> SELECT initcap('sPark sql');\n 'Spark Sql'")
case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastInputTypes {
override def inputTypes: Seq[DataType] = Seq(StringType)
override def dataType: DataType = StringType
override def nullSafeEval(string: Any): Any = {
- string.asInstanceOf[UTF8String].toTitleCase
+ string.asInstanceOf[UTF8String].toLowerCase.toTitleCase
}
override def genCode(ctx: CodegenContext, ev: ExprCode): String = {
- defineCodeGen(ctx, ev, str => s"$str.toTitleCase()")
+ defineCodeGen(ctx, ev, str => s"$str.toLowerCase().toTitleCase()")
}
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 99e3b13ce8..2cf8ca7000 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -382,6 +382,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(InitCap(Literal("a b")), "A B")
checkEvaluation(InitCap(Literal(" a")), " A")
checkEvaluation(InitCap(Literal("the test")), "The Test")
+ checkEvaluation(InitCap(Literal("sParK")), "Spark")
// scalastyle:off
// non ascii characters are not allowed in the code, so we disable the scalastyle here.
checkEvaluation(InitCap(Literal("世界")), "世界")