aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2015-07-08 15:33:14 -0700
committerJosh Rosen <joshrosen@databricks.com>2015-07-08 15:33:14 -0700
commit3f6e6e0e2668832af1a54f5cb95e5a4537c7bc5a (patch)
tree77d7b608a3ec1000e612f3ab37bde4984b1aa562
parent4df0f1b1bbc994c4e538e22c4580c62f4fed9c45 (diff)
downloadspark-3f6e6e0e2668832af1a54f5cb95e5a4537c7bc5a.tar.gz
spark-3f6e6e0e2668832af1a54f5cb95e5a4537c7bc5a.tar.bz2
spark-3f6e6e0e2668832af1a54f5cb95e5a4537c7bc5a.zip
[SPARK-8903] Fix bug in cherry-pick of SPARK-8803
This fixes a bug introduced in the cherry-pick of #7201 which led to a NullPointerException when cross-tabulating a data set that contains null values. Author: Josh Rosen <joshrosen@databricks.com> Closes #7295 from JoshRosen/SPARK-8903 and squashes the following commits: 5489948 [Josh Rosen] [SPARK-8903] Fix bug in cherry-pick of SPARK-8803
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala6
1 files changed, 3 insertions, 3 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index 5a0c9a66b8..3c680282a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -113,7 +113,7 @@ private[sql] object StatFunctions extends Logging {
if (element == null) "null" else element.toString
}
// get the distinct values of column 2, so that we can make them the column names
- val distinctCol2: Map[Any, Int] =
+ val distinctCol2: Map[String, Int] =
counts.map(e => cleanElement(e.get(1))).distinct.zipWithIndex.toMap
val columnSize = distinctCol2.size
require(columnSize < 1e4, s"The number of distinct values for $col2, can't " +
@@ -128,7 +128,7 @@ private[sql] object StatFunctions extends Logging {
countsRow.setLong(columnIndex + 1, row.getLong(2))
}
// the value of col1 is the first value, the rest are the counts
- countsRow.setString(0, cleanElement(col1Item.toString))
+ countsRow.setString(0, cleanElement(col1Item))
countsRow
}.toSeq
// Back ticks can't exist in DataFrame column names, therefore drop them. To be able to accept
@@ -139,7 +139,7 @@ private[sql] object StatFunctions extends Logging {
// In the map, the column names (._1) are not ordered by the index (._2). This was the bug in
// SPARK-8681. We need to explicitly sort by the column index and assign the column names.
val headerNames = distinctCol2.toSeq.sortBy(_._2).map { r =>
- StructField(cleanColumnName(r._1.toString), LongType)
+ StructField(cleanColumnName(r._1), LongType)
}
val schema = StructType(StructField(tableName, StringType) +: headerNames)