From e75e04f980281389b881df76f59ba1adc6338629 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Tue, 2 Dec 2014 12:07:52 -0800 Subject: [SPARK-4536][SQL] Add sqrt and abs to Spark SQL DSL Spark SQL has embeded sqrt and abs but DSL doesn't support those functions. Author: Kousuke Saruta Closes #3401 from sarutak/dsl-missing-operator and squashes the following commits: 07700cf [Kousuke Saruta] Modified Literal(null, NullType) to Literal(null) in DslQuerySuite 8f366f8 [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into dsl-missing-operator 1b88e2e [Kousuke Saruta] Merge branch 'master' of git://git.apache.org/spark into dsl-missing-operator 0396f89 [Kousuke Saruta] Added sqrt and abs to Spark SQL DSL --- .../apache/spark/sql/catalyst/dsl/package.scala | 2 + .../sql/catalyst/expressions/arithmetic.scala | 1 - .../scala/org/apache/spark/sql/DslQuerySuite.scala | 68 ++++++++++++++++++++++ .../test/scala/org/apache/spark/sql/TestData.scala | 4 ++ 4 files changed, 74 insertions(+), 1 deletion(-) (limited to 'sql') diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index 31dc5a58e6..70dabc4e6c 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -147,6 +147,8 @@ package object dsl { def max(e: Expression) = Max(e) def upper(e: Expression) = Upper(e) def lower(e: Expression) = Lower(e) + def sqrt(e: Expression) = Sqrt(e) + def abs(e: Expression) = Abs(e) implicit class DslSymbol(sym: Symbol) extends ImplicitAttribute { def s = sym.name } // TODO more implicit class for literal? diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index d17c9553ac..900b7586ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.analysis.UnresolvedException import org.apache.spark.sql.catalyst.types._ -import scala.math.pow case class UnaryMinus(child: Expression) extends UnaryExpression { type EvaluatedType = Any diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala index 94bd97758f..1a330a2bb6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala @@ -282,4 +282,72 @@ class DslQuerySuite extends QueryTest { (1, "1", "11") :: (2, "2", "22") :: (3, "3", "33") :: Nil ) } + + test("sqrt") { + checkAnswer( + testData.select(sqrt('key)).orderBy('key asc), + (1 to 100).map(n => Seq(math.sqrt(n))) + ) + + checkAnswer( + testData.select(sqrt('value), 'key).orderBy('key asc, 'value asc), + (1 to 100).map(n => Seq(math.sqrt(n), n)) + ) + + checkAnswer( + testData.select(sqrt(Literal(null))), + (1 to 100).map(_ => Seq(null)) + ) + } + + test("abs") { + checkAnswer( + testData.select(abs('key)).orderBy('key asc), + (1 to 100).map(n => Seq(n)) + ) + + checkAnswer( + negativeData.select(abs('key)).orderBy('key desc), + (1 to 100).map(n => Seq(n)) + ) + + checkAnswer( + testData.select(abs(Literal(null))), + (1 to 100).map(_ => Seq(null)) + ) + } + + test("upper") { + checkAnswer( + lowerCaseData.select(upper('l)), + ('a' to 'd').map(c => Seq(c.toString.toUpperCase())) + ) + + checkAnswer( + testData.select(upper('value), 'key), + (1 to 100).map(n => Seq(n.toString, n)) + ) + + checkAnswer( + testData.select(upper(Literal(null))), + (1 to 100).map(n => Seq(null)) + ) + } + + test("lower") { + checkAnswer( + upperCaseData.select(lower('L)), + ('A' to 'F').map(c => Seq(c.toString.toLowerCase())) + ) + + checkAnswer( + testData.select(lower('value), 'key), + (1 to 100).map(n => Seq(n.toString, n)) + ) + + checkAnswer( + testData.select(lower(Literal(null))), + (1 to 100).map(n => Seq(null)) + ) + } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala index 933e027436..bb553a0a1e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala @@ -32,6 +32,10 @@ object TestData { (1 to 100).map(i => TestData(i, i.toString))).toSchemaRDD testData.registerTempTable("testData") + val negativeData = TestSQLContext.sparkContext.parallelize( + (1 to 100).map(i => TestData(-i, (-i).toString))).toSchemaRDD + negativeData.registerTempTable("negativeData") + case class LargeAndSmallInts(a: Int, b: Int) val largeAndSmallInts = TestSQLContext.sparkContext.parallelize( -- cgit v1.2.3