diff options
author | Takuya UESHIN <ueshin@happy-camper.st> | 2014-05-27 22:17:50 -0700 |
---|---|---|
committer | Reynold Xin <rxin@apache.org> | 2014-05-27 22:18:16 -0700 |
commit | 24a1cac4ef10cb77ed39a385f4a9e76c39afeb1d (patch) | |
tree | ebefe984024d0637d9012d987db88322680ea937 /sql | |
parent | 5d6382566719043cf07adde36ffe76abb576e7da (diff) | |
download | spark-24a1cac4ef10cb77ed39a385f4a9e76c39afeb1d.tar.gz spark-24a1cac4ef10cb77ed39a385f4a9e76c39afeb1d.tar.bz2 spark-24a1cac4ef10cb77ed39a385f4a9e76c39afeb1d.zip |
[SPARK-1938] [SQL] ApproxCountDistinctMergeFunction should return Int value.
`ApproxCountDistinctMergeFunction` should return `Int` value because the `dataType` of `ApproxCountDistinct` is `IntegerType`.
Author: Takuya UESHIN <ueshin@happy-camper.st>
Closes #893 from ueshin/issues/SPARK-1938 and squashes the following commits:
3970e88 [Takuya UESHIN] Remove a superfluous line.
5ad7ec1 [Takuya UESHIN] Make dataType for each of CountDistinct, ApproxCountDistinctMerge and ApproxCountDistinct LongType.
cbe7c71 [Takuya UESHIN] Revert a change.
fc3ac0f [Takuya UESHIN] Fix evaluated value type of ApproxCountDistinctMergeFunction to Int.
(cherry picked from commit 9df86835b60ce587c8b9bd4ad7410eebf59a179d)
Signed-off-by: Reynold Xin <rxin@apache.org>
Diffstat (limited to 'sql')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala | 9 |
1 files changed, 4 insertions, 5 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala index c902433688..01947273b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala @@ -82,7 +82,6 @@ abstract class AggregateFunction override def dataType = base.dataType def update(input: Row): Unit - override def eval(input: Row): Any // Do we really need this? override def newInstance() = makeCopy(productIterator.map { case a: AnyRef => a }.toArray) @@ -166,7 +165,7 @@ case class CountDistinct(expressions: Seq[Expression]) extends AggregateExpressi override def children = expressions override def references = expressions.flatMap(_.references).toSet override def nullable = false - override def dataType = IntegerType + override def dataType = LongType override def toString = s"COUNT(DISTINCT ${expressions.mkString(",")})" override def newInstance() = new CountDistinctFunction(expressions, this) } @@ -184,7 +183,7 @@ case class ApproxCountDistinctMerge(child: Expression, relativeSD: Double) extends AggregateExpression with trees.UnaryNode[Expression] { override def references = child.references override def nullable = false - override def dataType = IntegerType + override def dataType = LongType override def toString = s"APPROXIMATE COUNT(DISTINCT $child)" override def newInstance() = new ApproxCountDistinctMergeFunction(child, this, relativeSD) } @@ -193,7 +192,7 @@ case class ApproxCountDistinct(child: Expression, relativeSD: Double = 0.05) extends PartialAggregate with trees.UnaryNode[Expression] { override def references = child.references override def nullable = false - override def dataType = IntegerType + override def dataType = LongType override def toString = s"APPROXIMATE COUNT(DISTINCT $child)" override def asPartial: SplitEvaluation = { @@ -394,7 +393,7 @@ case class CountDistinctFunction(expr: Seq[Expression], base: AggregateExpressio } } - override def eval(input: Row): Any = seen.size + override def eval(input: Row): Any = seen.size.toLong } case class FirstFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction { |