diff options
author | Davies Liu <davies@databricks.com> | 2015-09-28 14:40:40 -0700 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2015-09-28 14:40:40 -0700 |
commit | ea02e5513a8f9853094d5612c962fc8c1a340f50 (patch) | |
tree | 778c4a8e2e56af12b5ff25910d620cf14e2e332c | |
parent | 14978b785a43e0c13c8bdfd52d20cc8984984ba3 (diff) | |
download | spark-ea02e5513a8f9853094d5612c962fc8c1a340f50.tar.gz spark-ea02e5513a8f9853094d5612c962fc8c1a340f50.tar.bz2 spark-ea02e5513a8f9853094d5612c962fc8c1a340f50.zip |
[SPARK-10859] [SQL] fix stats of StringType in columnar cache
The UTF8String may come from UnsafeRow, then underline buffer of it is not copied, so we should clone it in order to hold it in Stats.
cc yhuai
Author: Davies Liu <davies@databricks.com>
Closes #8929 from davies/pushdown_string.
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala | 4 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala | 7 |
2 files changed, 9 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala index 5cbd52bc05..fbd51b7c34 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala @@ -213,8 +213,8 @@ private[sql] class StringColumnStats extends ColumnStats { super.gatherStats(row, ordinal) if (!row.isNullAt(ordinal)) { val value = row.getUTF8String(ordinal) - if (upper == null || value.compareTo(upper) > 0) upper = value - if (lower == null || value.compareTo(lower) < 0) lower = value + if (upper == null || value.compareTo(upper) > 0) upper = value.clone() + if (lower == null || value.compareTo(lower) < 0) lower = value.clone() sizeInBytes += STRING.actualSize(row, ordinal) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala index cd3644eb9c..ea5dd2be33 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala @@ -212,4 +212,11 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext { // Drop the cache. cached.unpersist() } + + test("SPARK-10859: Predicates pushed to InMemoryColumnarTableScan are not evaluated correctly") { + val data = sqlContext.range(10).selectExpr("id", "cast(id as string) as s") + data.cache() + assert(data.count() === 10) + assert(data.filter($"s" === "3").count() === 1) + } } |