diff options
author | xinyunh <xinyun.huang@huawei.com> | 2014-09-09 16:55:39 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-09-09 16:55:39 -0700 |
commit | 07ee4a28c3a502121770f301316cb2256e8f0ce2 (patch) | |
tree | e0baa385cb9c9ec344bbd012b5b8931189ec773c | |
parent | 02b5ac7191c66a866ffedde313eb10f2adfc9b58 (diff) | |
download | spark-07ee4a28c3a502121770f301316cb2256e8f0ce2.tar.gz spark-07ee4a28c3a502121770f301316cb2256e8f0ce2.tar.bz2 spark-07ee4a28c3a502121770f301316cb2256e8f0ce2.zip |
[SPARK-3176] Implement 'ABS and 'LAST' for sql
Add support for the mathematical function"ABS" and the analytic function "last" to return a subset of the rows satisfying a query within spark sql. Test-cases included.
Author: xinyunh <xinyun.huang@huawei.com>
Author: bomeng <golf8lover>
Closes #2099 from xinyunh/sqlTest and squashes the following commits:
71d15e7 [xinyunh] remove POWER part
8843643 [xinyunh] fix the code style issue
39f0309 [bomeng] Modify the code of POWER and ABS. Move them to the file arithmetic
ff8e51e [bomeng] add abs() function support
7f6980a [xinyunh] fix the bug in 'Last' component
b3df91b [xinyunh] add 'Last' component
-rwxr-xr-x[-rw-r--r--] | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala | 4 | ||||
-rwxr-xr-x[-rw-r--r--] | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala | 1 | ||||
-rwxr-xr-x[-rw-r--r--] | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala | 28 | ||||
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala | 15 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 23 |
5 files changed, 69 insertions, 2 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index bfc197cf7a..a04b4a938d 100644..100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -82,6 +82,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers { protected val DISTINCT = Keyword("DISTINCT") protected val FALSE = Keyword("FALSE") protected val FIRST = Keyword("FIRST") + protected val LAST = Keyword("LAST") protected val FROM = Keyword("FROM") protected val FULL = Keyword("FULL") protected val GROUP = Keyword("GROUP") @@ -125,6 +126,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers { protected val SUBSTR = Keyword("SUBSTR") protected val SUBSTRING = Keyword("SUBSTRING") protected val SQRT = Keyword("SQRT") + protected val ABS = Keyword("ABS") // Use reflection to find the reserved words defined in this class. protected val reservedWords = @@ -315,6 +317,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers { case s ~ _ ~ _ ~ _ ~ _ ~ e => ApproxCountDistinct(e, s.toDouble) } | FIRST ~> "(" ~> expression <~ ")" ^^ { case exp => First(exp) } | + LAST ~> "(" ~> expression <~ ")" ^^ { case exp => Last(exp) } | AVG ~> "(" ~> expression <~ ")" ^^ { case exp => Average(exp) } | MIN ~> "(" ~> expression <~ ")" ^^ { case exp => Min(exp) } | MAX ~> "(" ~> expression <~ ")" ^^ { case exp => Max(exp) } | @@ -330,6 +333,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers { case s ~ "," ~ p ~ "," ~ l => Substring(s,p,l) } | SQRT ~> "(" ~> expression <~ ")" ^^ { case exp => Sqrt(exp) } | + ABS ~> "(" ~> expression <~ ")" ^^ { case exp => Abs(exp) } | ident ~ "(" ~ repsep(expression, ",") <~ ")" ^^ { case udfName ~ _ ~ exprs => UnresolvedFunction(udfName, exprs) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index f44521d638..deb622c39f 100644..100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -132,6 +132,7 @@ package object dsl { def approxCountDistinct(e: Expression, rsd: Double = 0.05) = ApproxCountDistinct(e, rsd) def avg(e: Expression) = Average(e) def first(e: Expression) = First(e) + def last(e: Expression) = Last(e) def min(e: Expression) = Min(e) def max(e: Expression) = Max(e) def upper(e: Expression) = Upper(e) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala index 15560a2a93..1b4d892625 100644..100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala @@ -344,6 +344,21 @@ case class First(child: Expression) extends PartialAggregate with trees.UnaryNod override def newInstance() = new FirstFunction(child, this) } +case class Last(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { + override def references = child.references + override def nullable = true + override def dataType = child.dataType + override def toString = s"LAST($child)" + + override def asPartial: SplitEvaluation = { + val partialLast = Alias(Last(child), "PartialLast")() + SplitEvaluation( + Last(partialLast.toAttribute), + partialLast :: Nil) + } + override def newInstance() = new LastFunction(child, this) +} + case class AverageFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction { @@ -489,3 +504,16 @@ case class FirstFunction(expr: Expression, base: AggregateExpression) extends Ag override def eval(input: Row): Any = result } + +case class LastFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction { + def this() = this(null, null) // Required for serialization. + + var result: Any = null + + override def update(input: Row): Unit = { + result = input + } + + override def eval(input: Row): Any = if (result != null) expr.eval(result.asInstanceOf[Row]) + else null +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index f988fb010b..fe825fdcda 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.analysis.UnresolvedException import org.apache.spark.sql.catalyst.types._ +import scala.math.pow case class UnaryMinus(child: Expression) extends UnaryExpression { type EvaluatedType = Any @@ -129,3 +130,17 @@ case class MaxOf(left: Expression, right: Expression) extends Expression { override def toString = s"MaxOf($left, $right)" } + +/** + * A function that get the absolute value of the numeric value. + */ +case class Abs(child: Expression) extends UnaryExpression { + type EvaluatedType = Any + + def dataType = child.dataType + override def foldable = child.foldable + def nullable = child.nullable + override def toString = s"Abs($child)" + + override def eval(input: Row): Any = n1(child, input, _.abs(_)) +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 739c12f338..514ac543df 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -41,6 +41,25 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { } + test("SPARK-3176 Added Parser of SQL ABS()") { + checkAnswer( + sql("SELECT ABS(-1.3)"), + 1.3) + checkAnswer( + sql("SELECT ABS(0.0)"), + 0.0) + checkAnswer( + sql("SELECT ABS(2.5)"), + 2.5) + } + + test("SPARK-3176 Added Parser of SQL LAST()") { + checkAnswer( + sql("SELECT LAST(n) FROM lowerCaseData"), + 4) + } + + test("SPARK-2041 column name equals tablename") { checkAnswer( sql("SELECT tableName FROM tableName"), @@ -53,14 +72,14 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { (1 to 100).map(x => Row(math.sqrt(x.toDouble))).toSeq ) } - + test("SQRT with automatic string casts") { checkAnswer( sql("SELECT SQRT(CAST(key AS STRING)) FROM testData"), (1 to 100).map(x => Row(math.sqrt(x.toDouble))).toSeq ) } - + test("SPARK-2407 Added Parser of SQL SUBSTR()") { checkAnswer( sql("SELECT substr(tableName, 1, 2) FROM tableName"), |