aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorCheng Hao <hao.cheng@intel.com>2015-07-31 23:11:22 -0700
committerDavies Liu <davies.liu@gmail.com>2015-07-31 23:11:22 -0700
commit67ad4e21fc68336b0ad6f9a363fb5ebb51f592bf (patch)
treede2371bb74fec1fa6b93391809e30bbad202ca3f /sql/catalyst
parent3320b0ba262159c0c7209ce39b353c93c597077d (diff)
downloadspark-67ad4e21fc68336b0ad6f9a363fb5ebb51f592bf.tar.gz
spark-67ad4e21fc68336b0ad6f9a363fb5ebb51f592bf.tar.bz2
spark-67ad4e21fc68336b0ad6f9a363fb5ebb51f592bf.zip
[SPARK-8232] [SQL] Add sort_array support
Add expression `sort_array` support. Author: Cheng Hao <hao.cheng@intel.com> This patch had conflicts when merged, resolved by Committer: Davies Liu <davies.liu@gmail.com> Closes #7581 from chenghao-intel/sort_array and squashes the following commits: 664c960 [Cheng Hao] update the sort_array by using the ArrayData 276d2d5 [Cheng Hao] add empty line 0edab9c [Cheng Hao] Add asending/descending support for sort_array 80fc0f8 [Cheng Hao] Add type checking a42b678 [Cheng Hao] Add sort_array support
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala1
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala80
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala22
3 files changed, 102 insertions, 1 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index ee44cbcba6..6e144518bb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -233,6 +233,7 @@ object FunctionRegistry {
// collection functions
expression[Size]("size"),
+ expression[SortArray]("sort_array"),
// misc functions
expression[Crc32]("crc32"),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 1a00dbc254..0a530596a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -16,7 +16,10 @@
*/
package org.apache.spark.sql.catalyst.expressions
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenContext, GeneratedExpressionCode}
+import java.util.Comparator
+
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenFallback, CodeGenContext, GeneratedExpressionCode}
import org.apache.spark.sql.types._
/**
@@ -39,3 +42,78 @@ case class Size(child: Expression) extends UnaryExpression with ExpectsInputType
nullSafeCodeGen(ctx, ev, c => s"${ev.primitive} = ($c).$sizeCall;")
}
}
+
+/**
+ * Sorts the input array in ascending / descending order according to the natural ordering of
+ * the array elements and returns it.
+ */
+case class SortArray(base: Expression, ascendingOrder: Expression)
+ extends BinaryExpression with ExpectsInputTypes with CodegenFallback {
+
+ def this(e: Expression) = this(e, Literal(true))
+
+ override def left: Expression = base
+ override def right: Expression = ascendingOrder
+ override def dataType: DataType = base.dataType
+ override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType)
+
+ override def checkInputDataTypes(): TypeCheckResult = base.dataType match {
+ case _ @ ArrayType(n: AtomicType, _) => TypeCheckResult.TypeCheckSuccess
+ case _ @ ArrayType(n, _) => TypeCheckResult.TypeCheckFailure(
+ s"Type $n is not the AtomicType, we can not perform the ordering operations")
+ case other =>
+ TypeCheckResult.TypeCheckFailure(s"ArrayType(AtomicType) is expected, but we got $other")
+ }
+
+ @transient
+ private lazy val lt = {
+ val ordering = base.dataType match {
+ case _ @ ArrayType(n: AtomicType, _) => n.ordering.asInstanceOf[Ordering[Any]]
+ }
+
+ new Comparator[Any]() {
+ override def compare(o1: Any, o2: Any): Int = {
+ if (o1 == null && o2 == null) {
+ 0
+ } else if (o1 == null) {
+ -1
+ } else if (o2 == null) {
+ 1
+ } else {
+ ordering.compare(o1, o2)
+ }
+ }
+ }
+ }
+
+ @transient
+ private lazy val gt = {
+ val ordering = base.dataType match {
+ case _ @ ArrayType(n: AtomicType, _) => n.ordering.asInstanceOf[Ordering[Any]]
+ }
+
+ new Comparator[Any]() {
+ override def compare(o1: Any, o2: Any): Int = {
+ if (o1 == null && o2 == null) {
+ 0
+ } else if (o1 == null) {
+ 1
+ } else if (o2 == null) {
+ -1
+ } else {
+ -ordering.compare(o1, o2)
+ }
+ }
+ }
+ }
+
+ override def nullSafeEval(array: Any, ascending: Any): Any = {
+ val data = array.asInstanceOf[ArrayData].toArray().asInstanceOf[Array[AnyRef]]
+ java.util.Arrays.sort(
+ data,
+ if (ascending.asInstanceOf[Boolean]) lt else gt)
+ new GenericArrayData(data.asInstanceOf[Array[Any]])
+ }
+
+ override def prettyName: String = "sort_array"
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala
index 28c41b5716..2c7e85c446 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionFunctionsSuite.scala
@@ -43,4 +43,26 @@ class CollectionFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(Literal.create(null, MapType(StringType, StringType)), null)
checkEvaluation(Literal.create(null, ArrayType(StringType)), null)
}
+
+ test("Sort Array") {
+ val a0 = Literal.create(Seq(2, 1, 3), ArrayType(IntegerType))
+ val a1 = Literal.create(Seq[Integer](), ArrayType(IntegerType))
+ val a2 = Literal.create(Seq("b", "a"), ArrayType(StringType))
+ val a3 = Literal.create(Seq("b", null, "a"), ArrayType(StringType))
+
+ checkEvaluation(new SortArray(a0), Seq(1, 2, 3))
+ checkEvaluation(new SortArray(a1), Seq[Integer]())
+ checkEvaluation(new SortArray(a2), Seq("a", "b"))
+ checkEvaluation(new SortArray(a3), Seq(null, "a", "b"))
+ checkEvaluation(SortArray(a0, Literal(true)), Seq(1, 2, 3))
+ checkEvaluation(SortArray(a1, Literal(true)), Seq[Integer]())
+ checkEvaluation(SortArray(a2, Literal(true)), Seq("a", "b"))
+ checkEvaluation(new SortArray(a3, Literal(true)), Seq(null, "a", "b"))
+ checkEvaluation(SortArray(a0, Literal(false)), Seq(3, 2, 1))
+ checkEvaluation(SortArray(a1, Literal(false)), Seq[Integer]())
+ checkEvaluation(SortArray(a2, Literal(false)), Seq("b", "a"))
+ checkEvaluation(new SortArray(a3, Literal(false)), Seq("b", "a", null))
+
+ checkEvaluation(Literal.create(null, ArrayType(StringType)), null)
+ }
}