aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-09-28 14:40:40 -0700
committerYin Huai <yhuai@databricks.com>2015-09-28 14:40:40 -0700
commitea02e5513a8f9853094d5612c962fc8c1a340f50 (patch)
tree778c4a8e2e56af12b5ff25910d620cf14e2e332c /sql
parent14978b785a43e0c13c8bdfd52d20cc8984984ba3 (diff)
downloadspark-ea02e5513a8f9853094d5612c962fc8c1a340f50.tar.gz
spark-ea02e5513a8f9853094d5612c962fc8c1a340f50.tar.bz2
spark-ea02e5513a8f9853094d5612c962fc8c1a340f50.zip
[SPARK-10859] [SQL] fix stats of StringType in columnar cache
The UTF8String may come from UnsafeRow, then underline buffer of it is not copied, so we should clone it in order to hold it in Stats. cc yhuai Author: Davies Liu <davies@databricks.com> Closes #8929 from davies/pushdown_string.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala7
2 files changed, 9 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
index 5cbd52bc05..fbd51b7c34 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala
@@ -213,8 +213,8 @@ private[sql] class StringColumnStats extends ColumnStats {
super.gatherStats(row, ordinal)
if (!row.isNullAt(ordinal)) {
val value = row.getUTF8String(ordinal)
- if (upper == null || value.compareTo(upper) > 0) upper = value
- if (lower == null || value.compareTo(lower) < 0) lower = value
+ if (upper == null || value.compareTo(upper) > 0) upper = value.clone()
+ if (lower == null || value.compareTo(lower) < 0) lower = value.clone()
sizeInBytes += STRING.actualSize(row, ordinal)
}
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
index cd3644eb9c..ea5dd2be33 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala
@@ -212,4 +212,11 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext {
// Drop the cache.
cached.unpersist()
}
+
+ test("SPARK-10859: Predicates pushed to InMemoryColumnarTableScan are not evaluated correctly") {
+ val data = sqlContext.range(10).selectExpr("id", "cast(id as string) as s")
+ data.cache()
+ assert(data.count() === 10)
+ assert(data.filter($"s" === "3").count() === 1)
+ }
}