diff options
author | petermaxlee <petermaxlee@gmail.com> | 2016-07-01 07:57:48 +0800 |
---|---|---|
committer | Wenchen Fan <wenchen@databricks.com> | 2016-07-01 07:57:48 +0800 |
commit | 85f2303ecadd9bf6d9694a2743dda075654c5ccf (patch) | |
tree | 09125fdca897a2995d33cf4378f3bfc2ac018ad1 /sql/catalyst/src/main | |
parent | 3d75a5b2a76eba0855d73476dc2fd579c612d521 (diff) | |
download | spark-85f2303ecadd9bf6d9694a2743dda075654c5ccf.tar.gz spark-85f2303ecadd9bf6d9694a2743dda075654c5ccf.tar.bz2 spark-85f2303ecadd9bf6d9694a2743dda075654c5ccf.zip |
[SPARK-16276][SQL] Implement elt SQL function
## What changes were proposed in this pull request?
This patch implements the elt function, as it is implemented in Hive.
## How was this patch tested?
Added expression unit test in StringExpressionsSuite and end-to-end test in StringFunctionsSuite.
Author: petermaxlee <petermaxlee@gmail.com>
Closes #13966 from petermaxlee/SPARK-16276.
Diffstat (limited to 'sql/catalyst/src/main')
3 files changed, 44 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 3fbdb2ab57..26b0c30db4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -267,6 +267,7 @@ object FunctionRegistry { expression[Concat]("concat"), expression[ConcatWs]("concat_ws"), expression[Decode]("decode"), + expression[Elt]("elt"), expression[Encode]("encode"), expression[FindInSet]("find_in_set"), expression[FormatNumber]("format_number"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala index c15a2df508..98f25a9ad7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala @@ -57,7 +57,8 @@ trait ExpectsInputTypes extends Expression { /** - * A mixin for the analyzer to perform implicit type casting using [[ImplicitTypeCasts]]. + * A mixin for the analyzer to perform implicit type casting using + * [[org.apache.spark.sql.catalyst.analysis.TypeCoercion.ImplicitTypeCasts]]. */ trait ImplicitCastInputTypes extends ExpectsInputTypes { // No other methods diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 44ff7fda8e..b0df957637 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -21,6 +21,7 @@ import java.text.{DecimalFormat, DecimalFormatSymbols} import java.util.{HashMap, Locale, Map => JMap} import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.util.ArrayData import org.apache.spark.sql.types._ @@ -162,6 +163,46 @@ case class ConcatWs(children: Seq[Expression]) } } +@ExpressionDescription( + usage = "_FUNC_(n, str1, str2, ...) - returns the n-th string, e.g. returns str2 when n is 2", + extended = "> SELECT _FUNC_(1, 'scala', 'java') FROM src LIMIT 1;\n" + "'scala'") +case class Elt(children: Seq[Expression]) + extends Expression with ImplicitCastInputTypes with CodegenFallback { + + private lazy val indexExpr = children.head + private lazy val stringExprs = children.tail.toArray + + /** This expression is always nullable because it returns null if index is out of range. */ + override def nullable: Boolean = true + + override def dataType: DataType = StringType + + override def inputTypes: Seq[DataType] = IntegerType +: Seq.fill(children.size - 1)(StringType) + + override def checkInputDataTypes(): TypeCheckResult = { + if (children.size < 2) { + TypeCheckResult.TypeCheckFailure("elt function requires at least two arguments") + } else { + super[ImplicitCastInputTypes].checkInputDataTypes() + } + } + + override def eval(input: InternalRow): Any = { + val indexObj = indexExpr.eval(input) + if (indexObj == null) { + null + } else { + val index = indexObj.asInstanceOf[Int] + if (index <= 0 || index > stringExprs.length) { + null + } else { + stringExprs(index - 1).eval(input) + } + } + } +} + + trait String2StringExpression extends ImplicitCastInputTypes { self: UnaryExpression => |