diff options
author | Takuya UESHIN <ueshin@happy-camper.st> | 2014-07-15 22:43:48 -0700 |
---|---|---|
committer | Reynold Xin <rxin@apache.org> | 2014-07-15 22:43:48 -0700 |
commit | 632fb3d9a9ebb3d2218385403145d5b89c41c025 (patch) | |
tree | c51d888662c05ea757c402d3eea23c33d1af5208 | |
parent | 9b38b7c71352bb5e6d359515111ad9ca33299127 (diff) | |
download | spark-632fb3d9a9ebb3d2218385403145d5b89c41c025.tar.gz spark-632fb3d9a9ebb3d2218385403145d5b89c41c025.tar.bz2 spark-632fb3d9a9ebb3d2218385403145d5b89c41c025.zip |
[SPARK-2504][SQL] Fix nullability of Substring expression.
This is a follow-up of #1359 with nullability narrowing.
Author: Takuya UESHIN <ueshin@happy-camper.st>
Closes #1426 from ueshin/issues/SPARK-2504 and squashes the following commits:
5157832 [Takuya UESHIN] Remove unnecessary white spaces.
80958ac [Takuya UESHIN] Fix nullability of Substring expression.
2 files changed, 22 insertions, 16 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 4bd7bf5a0c..f1b27c3cb5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -215,19 +215,19 @@ case class EndsWith(left: Expression, right: Expression) case class Substring(str: Expression, pos: Expression, len: Expression) extends Expression { type EvaluatedType = Any - - def nullable: Boolean = true + + def nullable: Boolean = str.nullable || pos.nullable || len.nullable def dataType: DataType = { if (!resolved) { throw new UnresolvedException(this, s"Cannot resolve since $children are not resolved") } if (str.dataType == BinaryType) str.dataType else StringType } - + def references = children.flatMap(_.references).toSet - + override def children = str :: pos :: len :: Nil - + @inline def slice[T, C <: Any](str: C, startPos: Int, sliceLen: Int) (implicit ev: (C=>IndexedSeqOptimized[T,_])): Any = { @@ -237,40 +237,40 @@ case class Substring(str: Expression, pos: Expression, len: Expression) extends // refers to element i-1 in the sequence. If a start index i is less than 0, it refers // to the -ith element before the end of the sequence. If a start index i is 0, it // refers to the first element. - + val start = startPos match { case pos if pos > 0 => pos - 1 case neg if neg < 0 => len + neg case _ => 0 } - + val end = sliceLen match { case max if max == Integer.MAX_VALUE => max case x => start + x } - + str.slice(start, end) } - + override def eval(input: Row): Any = { val string = str.eval(input) val po = pos.eval(input) val ln = len.eval(input) - + if ((string == null) || (po == null) || (ln == null)) { null } else { val start = po.asInstanceOf[Int] val length = ln.asInstanceOf[Int] - + string match { case ba: Array[Byte] => slice(ba, start, length) case other => slice(other.toString, start, length) } } } - + override def toString = len match { case max if max == Integer.MAX_VALUE => s"SUBSTR($str, $pos)" case _ => s"SUBSTR($str, $pos, $len)" diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index f1d7aedcc2..143330bd64 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -469,9 +469,9 @@ class ExpressionEvaluationSuite extends FunSuite { test("Substring") { val row = new GenericRow(Array[Any]("example", "example".toArray.map(_.toByte))) - + val s = 'a.string.at(0) - + // substring from zero position with less-than-full length checkEvaluation(Substring(s, Literal(0, IntegerType), Literal(2, IntegerType)), "ex", row) checkEvaluation(Substring(s, Literal(1, IntegerType), Literal(2, IntegerType)), "ex", row) @@ -501,7 +501,7 @@ class ExpressionEvaluationSuite extends FunSuite { // substring(null, _, _) -> null checkEvaluation(Substring(s, Literal(100, IntegerType), Literal(4, IntegerType)), null, new GenericRow(Array[Any](null))) - + // substring(_, null, _) -> null checkEvaluation(Substring(s, Literal(null, IntegerType), Literal(4, IntegerType)), null, row) @@ -514,6 +514,12 @@ class ExpressionEvaluationSuite extends FunSuite { // 2-arg substring from nonzero position checkEvaluation(Substring(s, Literal(2, IntegerType), Literal(Integer.MAX_VALUE, IntegerType)), "xample", row) + + val s_notNull = 'a.string.notNull.at(0) + + assert(Substring(s, Literal(0, IntegerType), Literal(2, IntegerType)).nullable === true) + assert(Substring(s_notNull, Literal(0, IntegerType), Literal(2, IntegerType)).nullable === false) + assert(Substring(s_notNull, Literal(null, IntegerType), Literal(2, IntegerType)).nullable === true) + assert(Substring(s_notNull, Literal(0, IntegerType), Literal(null, IntegerType)).nullable === true) } } - |