diff options
author | HuJiayin <jiayin.hu@intel.com> | 2015-08-01 21:44:57 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2015-08-01 21:44:57 -0700 |
commit | 00cd92f32f17ca57d47aa2dcc716eb707aaee799 (patch) | |
tree | 87fae8a2daea19abc7dee69b551c5c0e6f54bf4b /sql/catalyst/src | |
parent | 5d9e33d9a2633e45082ac395a64646364f22f4c4 (diff) | |
download | spark-00cd92f32f17ca57d47aa2dcc716eb707aaee799.tar.gz spark-00cd92f32f17ca57d47aa2dcc716eb707aaee799.tar.bz2 spark-00cd92f32f17ca57d47aa2dcc716eb707aaee799.zip |
[SPARK-8269] [SQL] string function: initcap
This PR is based on #7208 , thanks to HuJiayin
Closes #7208
Author: HuJiayin <jiayin.hu@intel.com>
Author: Davies Liu <davies@databricks.com>
Closes #7850 from davies/initcap and squashes the following commits:
54472e9 [Davies Liu] fix python test
17ffe51 [Davies Liu] Merge branch 'master' of github.com:apache/spark into initcap
ca46390 [Davies Liu] Merge branch 'master' of github.com:apache/spark into initcap
3a906e4 [Davies Liu] implement title case in UTF8String
8b2506a [HuJiayin] Update functions.py
2cd43e5 [HuJiayin] fix python style check
b616c0e [HuJiayin] add python api
1f5a0ef [HuJiayin] add codegen
7e0c604 [HuJiayin] Merge branch 'master' of https://github.com/apache/spark into initcap
6a0b958 [HuJiayin] add column
c79482d [HuJiayin] support soundex
7ce416b [HuJiayin] support initcap rebase code
Diffstat (limited to 'sql/catalyst/src')
3 files changed, 30 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 6e144518bb..8fafd7778a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -178,6 +178,7 @@ object FunctionRegistry { expression[Encode]("encode"), expression[Decode]("decode"), expression[FormatNumber]("format_number"), + expression[InitCap]("initcap"), expression[Lower]("lcase"), expression[Lower]("lower"), expression[Length]("length"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 4d78c55497..80c64e5689 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -598,6 +598,23 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC } /** + * Returns string, with the first letter of each word in uppercase. + * Words are delimited by whitespace. + */ +case class InitCap(child: Expression) extends UnaryExpression with ImplicitCastInputTypes { + + override def inputTypes: Seq[DataType] = Seq(StringType) + override def dataType: DataType = StringType + + override def nullSafeEval(string: Any): Any = { + string.asInstanceOf[UTF8String].toTitleCase + } + override def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + defineCodeGen(ctx, ev, str => s"$str.toTitleCase()") + } +} + +/** * Returns the string which repeat the given string value n times. */ case class StringRepeat(str: Expression, times: Expression) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index 89c1e33420..906be701be 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -377,6 +377,18 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Decode(b, Literal.create(null, StringType)), null, create_row(null)) } + test("initcap unit test") { + checkEvaluation(InitCap(Literal.create(null, StringType)), null) + checkEvaluation(InitCap(Literal("a b")), "A B") + checkEvaluation(InitCap(Literal(" a")), " A") + checkEvaluation(InitCap(Literal("the test")), "The Test") + // scalastyle:off + // non ascii characters are not allowed in the code, so we disable the scalastyle here. + checkEvaluation(InitCap(Literal("世界")), "世界") + // scalastyle:on + } + + test("Levenshtein distance") { checkEvaluation(Levenshtein(Literal.create(null, StringType), Literal("")), null) checkEvaluation(Levenshtein(Literal(""), Literal.create(null, StringType)), null) |