aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorJia Li <jiali@us.ibm.com>2015-10-27 10:57:08 +0100
committerMichael Armbrust <michael@databricks.com>2015-10-27 10:57:08 +0100
commit958a0ec8fa58ff091f595db2b574a7aa3ff41253 (patch)
tree70be62d93e62dd3b2a07c3117ea1d0faf9bce41a /sql
parent17f499920776e0e995434cfa300ff2ff38658fa8 (diff)
downloadspark-958a0ec8fa58ff091f595db2b574a7aa3ff41253.tar.gz
spark-958a0ec8fa58ff091f595db2b574a7aa3ff41253.tar.bz2
spark-958a0ec8fa58ff091f595db2b574a7aa3ff41253.zip
[SPARK-11277][SQL] sort_array throws exception scala.MatchError
I'm new to spark. I was trying out the sort_array function then hit this exception. I looked into the spark source code. I found the root cause is that sort_array does not check for an array of NULLs. It's not meaningful to sort an array of entirely NULLs anyway. I'm adding a check on the input array type to SortArray. If the array consists of NULLs entirely, there is no need to sort such array. I have also added a test case for this. Please help to review my fix. Thanks! Author: Jia Li <jiali@us.ibm.com> Closes #9247 from jliwork/SPARK-11277.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala6
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala7
2 files changed, 12 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 89d87726ac..2cf19b939f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -68,6 +68,7 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
private lazy val lt: Comparator[Any] = {
val ordering = base.dataType match {
case _ @ ArrayType(n: AtomicType, _) => n.ordering.asInstanceOf[Ordering[Any]]
+ case _ @ ArrayType(s: StructType, _) => s.interpretedOrdering.asInstanceOf[Ordering[Any]]
}
new Comparator[Any]() {
@@ -89,6 +90,7 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
private lazy val gt: Comparator[Any] = {
val ordering = base.dataType match {
case _ @ ArrayType(n: AtomicType, _) => n.ordering.asInstanceOf[Ordering[Any]]
+ case _ @ ArrayType(s: StructType, _) => s.interpretedOrdering.asInstanceOf[Ordering[Any]]
}
new Comparator[Any]() {
@@ -109,7 +111,9 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
override def nullSafeEval(array: Any, ascending: Any): Any = {
val elementType = base.dataType.asInstanceOf[ArrayType].elementType
val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
- java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt)
+ if (elementType != NullType) {
+ java.util.Arrays.sort(data, if (ascending.asInstanceOf[Boolean]) lt else gt)
+ }
new GenericArrayData(data.asInstanceOf[Array[Any]])
}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala
index a3e81888df..1aae4678d6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala
@@ -49,6 +49,7 @@ class CollectionFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
val a1 = Literal.create(Seq[Integer](), ArrayType(IntegerType))
val a2 = Literal.create(Seq("b", "a"), ArrayType(StringType))
val a3 = Literal.create(Seq("b", null, "a"), ArrayType(StringType))
+ val a4 = Literal.create(Seq(null, null), ArrayType(NullType))
checkEvaluation(new SortArray(a0), Seq(1, 2, 3))
checkEvaluation(new SortArray(a1), Seq[Integer]())
@@ -64,6 +65,12 @@ class CollectionFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(new SortArray(a3, Literal(false)), Seq("b", "a", null))
checkEvaluation(Literal.create(null, ArrayType(StringType)), null)
+ checkEvaluation(new SortArray(a4), Seq(null, null))
+
+ val typeAS = ArrayType(StructType(StructField("a", IntegerType) :: Nil))
+ val arrayStruct = Literal.create(Seq(create_row(2), create_row(1)), typeAS)
+
+ checkEvaluation(new SortArray(arrayStruct), Seq(create_row(1), create_row(2)))
}
test("Array contains") {