aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2017-03-22 09:58:46 -0700
committerXiao Li <gatorsmile@gmail.com>2017-03-22 09:58:46 -0700
commit80fd070389a9c8ffa342d7b11f1ab2ea92e0f562 (patch)
tree7bc05f19d92a8a3c5fe46e14c31c6a17815f735b /sql/catalyst
parent465818389aab1217c9de5c685cfaee3ffaec91bb (diff)
downloadspark-80fd070389a9c8ffa342d7b11f1ab2ea92e0f562.tar.gz
spark-80fd070389a9c8ffa342d7b11f1ab2ea92e0f562.tar.bz2
spark-80fd070389a9c8ffa342d7b11f1ab2ea92e0f562.zip
[SPARK-20018][SQL] Pivot with timestamp and count should not print internal representation
## What changes were proposed in this pull request? Currently, when we perform count with timestamp types, it prints the internal representation as the column name as below: ```scala Seq(new java.sql.Timestamp(1)).toDF("a").groupBy("a").pivot("a").count().show() ``` ``` +--------------------+----+ | a|1000| +--------------------+----+ |1969-12-31 16:00:...| 1| +--------------------+----+ ``` This PR proposes to use external Scala value instead of the internal representation in the column names as below: ``` +--------------------+-----------------------+ | a|1969-12-31 16:00:00.001| +--------------------+-----------------------+ |1969-12-31 16:00:...| 1| +--------------------+-----------------------+ ``` ## How was this patch tested? Unit test in `DataFramePivotSuite` and manual tests. Author: hyukjinkwon <gurwls223@gmail.com> Closes #17348 from HyukjinKwon/SPARK-20018.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala6
1 files changed, 4 insertions, 2 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 574f91b099..036ed060d9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -486,14 +486,16 @@ class Analyzer(
case Pivot(groupByExprs, pivotColumn, pivotValues, aggregates, child) =>
val singleAgg = aggregates.size == 1
def outputName(value: Literal, aggregate: Expression): String = {
+ val utf8Value = Cast(value, StringType, Some(conf.sessionLocalTimeZone)).eval(EmptyRow)
+ val stringValue: String = Option(utf8Value).map(_.toString).getOrElse("null")
if (singleAgg) {
- value.toString
+ stringValue
} else {
val suffix = aggregate match {
case n: NamedExpression => n.name
case _ => toPrettySQL(aggregate)
}
- value + "_" + suffix
+ stringValue + "_" + suffix
}
}
if (aggregates.forall(a => PivotFirst.supportsDataType(a.dataType))) {