diff options
Diffstat (limited to 'sql')
4 files changed, 19 insertions, 7 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala index 5dbaaa3b0c..1bcd4e2276 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala @@ -151,7 +151,7 @@ case class MaxFunction(expr: Expression, base: AggregateExpression) extends Aggr case class Count(child: Expression) extends PartialAggregate with trees.UnaryNode[Expression] { override def references = child.references override def nullable = false - override def dataType = IntegerType + override def dataType = LongType override def toString = s"COUNT($child)" override def asPartial: SplitEvaluation = { @@ -295,12 +295,12 @@ case class AverageFunction(expr: Expression, base: AggregateExpression) case class CountFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction { def this() = this(null, null) // Required for serialization. - var count: Int = _ + var count: Long = _ override def update(input: Row): Unit = { val evaluatedExpr = expr.map(_.eval(input)) if (evaluatedExpr.map(_ != null).reduceLeft(_ || _)) { - count += 1 + count += 1L } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala index 2569815ebb..452da3d023 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala @@ -276,6 +276,15 @@ class SchemaRDD( /** * :: Experimental :: + * Overriding base RDD implementation to leverage query optimizer + */ + @Experimental + override def count(): Long = { + groupBy()(Count(Literal(1))).collect().head.getLong(0) + } + + /** + * :: Experimental :: * Applies the given Generator, or table generating function, to this relation. * * @param generator A table generating function. The API for such functions is likely to change diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala index f43e98d614..233132a2fe 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DslQuerySuite.scala @@ -108,10 +108,7 @@ class DslQuerySuite extends QueryTest { } test("count") { - checkAnswer( - testData2.groupBy()(Count(1)), - testData2.count() - ) + assert(testData2.count() === testData2.map(_ => 1).count()) } test("null count") { @@ -126,6 +123,10 @@ class DslQuerySuite extends QueryTest { ) } + test("zero count") { + assert(testData4.count() === 0) + } + test("inner join where, one match per row") { checkAnswer( upperCaseData.join(lowerCaseData, Inner).where('n === 'N), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala index 1aca387252..b1eecb4dd3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/TestData.scala @@ -47,6 +47,8 @@ object TestData { (1, null) :: (2, 2) :: Nil) + val testData4 = logical.LocalRelation('a.int, 'b.int) + case class UpperCaseData(N: Int, L: String) val upperCaseData = TestSQLContext.sparkContext.parallelize( |