aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorLiang-Chi Hsieh <viirya@appier.com>2015-12-01 07:42:37 -0800
committerYin Huai <yhuai@databricks.com>2015-12-01 07:44:22 -0800
commitc87531b765f8934a9a6c0f673617e0abfa5e5f0e (patch)
treeb4529982253e45edbea7a67c62b543299586a15e /sql/catalyst
parenta0af0e351e45a8be47a6f65efd132eaa4a00c9e4 (diff)
downloadspark-c87531b765f8934a9a6c0f673617e0abfa5e5f0e.tar.gz
spark-c87531b765f8934a9a6c0f673617e0abfa5e5f0e.tar.bz2
spark-c87531b765f8934a9a6c0f673617e0abfa5e5f0e.zip
[SPARK-11949][SQL] Set field nullable property for GroupingSets to get correct results for null values
JIRA: https://issues.apache.org/jira/browse/SPARK-11949 The result of cube plan uses incorrect schema. The schema of cube result should set nullable property to true because the grouping expressions will have null values. Author: Liang-Chi Hsieh <viirya@appier.com> Closes #10038 from viirya/fix-cube.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala10
1 files changed, 8 insertions, 2 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 94ffbbb2e5..b8f212fca7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -223,6 +223,11 @@ class Analyzer(
case other => Alias(other, other.toString)()
}
+ // TODO: We need to use bitmasks to determine which grouping expressions need to be
+ // set as nullable. For example, if we have GROUPING SETS ((a,b), a), we do not need
+ // to change the nullability of a.
+ val attributeMap = groupByAliases.map(a => (a -> a.toAttribute.withNullability(true))).toMap
+
val aggregations: Seq[NamedExpression] = x.aggregations.map {
// If an expression is an aggregate (contains a AggregateExpression) then we dont change
// it so that the aggregation is computed on the unmodified value of its argument
@@ -231,12 +236,13 @@ class Analyzer(
// If not then its a grouping expression and we need to use the modified (with nulls from
// Expand) value of the expression.
case expr => expr.transformDown {
- case e => groupByAliases.find(_.child.semanticEquals(e)).map(_.toAttribute).getOrElse(e)
+ case e =>
+ groupByAliases.find(_.child.semanticEquals(e)).map(attributeMap(_)).getOrElse(e)
}.asInstanceOf[NamedExpression]
}
val child = Project(x.child.output ++ groupByAliases, x.child)
- val groupByAttributes = groupByAliases.map(_.toAttribute)
+ val groupByAttributes = groupByAliases.map(attributeMap(_))
Aggregate(
groupByAttributes :+ VirtualColumn.groupingIdAttribute,