aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src
diff options
context:
space:
mode:
authorJihongMa <linlin200605@gmail.com>2015-11-12 13:47:34 -0800
committerXiangrui Meng <meng@databricks.com>2015-11-12 13:47:34 -0800
commitd292f74831de7e69c852ed26d9c15df85b4fb568 (patch)
treec29b1aed84f549f5295cc0e3cb3b6a508e3360a9 /sql/core/src
parentf5a9526fec284cccd0755d190c91e8d9999f7877 (diff)
downloadspark-d292f74831de7e69c852ed26d9c15df85b4fb568.tar.gz
spark-d292f74831de7e69c852ed26d9c15df85b4fb568.tar.bz2
spark-d292f74831de7e69c852ed26d9c15df85b4fb568.zip
[SPARK-11420] Updating Stddev support via Imperative Aggregate
switched stddev support from DeclarativeAggregate to ImperativeAggregate. Author: JihongMa <linlin200605@gmail.com> Closes #9380 from JihongMA/SPARK-11420.
Diffstat (limited to 'sql/core/src')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala11
4 files changed, 6 insertions, 13 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index b6330e230a..53cc6e0cda 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -397,7 +397,7 @@ object functions extends LegacyFunctions {
def stddev(e: Column): Column = withAggregateFunction { StddevSamp(e.expr) }
/**
- * Aggregate function: returns the unbiased sample standard deviation of
+ * Aggregate function: returns the sample standard deviation of
* the expression in a group.
*
* @group agg_funcs
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index eb1ee266c5..432e8d1762 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -195,7 +195,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
}
test("stddev") {
- val testData2ADev = math.sqrt(4 / 5.0)
+ val testData2ADev = math.sqrt(4.0 / 5.0)
checkAnswer(
testData2.agg(stddev('a), stddev_pop('a), stddev_samp('a)),
Row(testData2ADev, math.sqrt(4 / 6.0), testData2ADev))
@@ -205,7 +205,7 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext {
val emptyTableData = Seq.empty[(Int, Int)].toDF("a", "b")
checkAnswer(
emptyTableData.agg(stddev('a), stddev_pop('a), stddev_samp('a)),
- Row(null, null, null))
+ Row(Double.NaN, Double.NaN, Double.NaN))
}
test("zero sum") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index e4f23fe17b..35cdab50bd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -459,7 +459,7 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
val emptyDescribeResult = Seq(
Row("count", "0", "0"),
Row("mean", null, null),
- Row("stddev", null, null),
+ Row("stddev", "NaN", "NaN"),
Row("min", null, null),
Row("max", null, null))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 52a561d2e5..167aea87de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -314,13 +314,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
testCodeGen(
"SELECT min(key) FROM testData3x",
Row(1) :: Nil)
- // STDDEV
- testCodeGen(
- "SELECT a, stddev(b), stddev_pop(b) FROM testData2 GROUP BY a",
- (1 to 3).map(i => Row(i, math.sqrt(0.5), math.sqrt(0.25))))
- testCodeGen(
- "SELECT stddev(b), stddev_pop(b), stddev_samp(b) FROM testData2",
- Row(math.sqrt(1.5 / 5), math.sqrt(1.5 / 6), math.sqrt(1.5 / 5)) :: Nil)
// Some combinations.
testCodeGen(
"""
@@ -341,8 +334,8 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
Row(100, 1, 50.5, 300, 100) :: Nil)
// Aggregate with Code generation handling all null values
testCodeGen(
- "SELECT sum('a'), avg('a'), stddev('a'), count(null) FROM testData",
- Row(null, null, null, 0) :: Nil)
+ "SELECT sum('a'), avg('a'), count(null) FROM testData",
+ Row(null, null, 0) :: Nil)
} finally {
sqlContext.dropTempTable("testData3x")
}