From ea02e5513a8f9853094d5612c962fc8c1a340f50 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Mon, 28 Sep 2015 14:40:40 -0700 Subject: [SPARK-10859] [SQL] fix stats of StringType in columnar cache The UTF8String may come from UnsafeRow, then underline buffer of it is not copied, so we should clone it in order to hold it in Stats. cc yhuai Author: Davies Liu Closes #8929 from davies/pushdown_string. --- .../src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala | 4 ++-- .../org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'sql') diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala index 5cbd52bc05..fbd51b7c34 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/ColumnStats.scala @@ -213,8 +213,8 @@ private[sql] class StringColumnStats extends ColumnStats { super.gatherStats(row, ordinal) if (!row.isNullAt(ordinal)) { val value = row.getUTF8String(ordinal) - if (upper == null || value.compareTo(upper) > 0) upper = value - if (lower == null || value.compareTo(lower) < 0) lower = value + if (upper == null || value.compareTo(upper) > 0) upper = value.clone() + if (lower == null || value.compareTo(lower) < 0) lower = value.clone() sizeInBytes += STRING.actualSize(row, ordinal) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala index cd3644eb9c..ea5dd2be33 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala @@ -212,4 +212,11 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSQLContext { // Drop the cache. cached.unpersist() } + + test("SPARK-10859: Predicates pushed to InMemoryColumnarTableScan are not evaluated correctly") { + val data = sqlContext.range(10).selectExpr("id", "cast(id as string) as s") + data.cache() + assert(data.count() === 10) + assert(data.filter($"s" === "3").count() === 1) + } } -- cgit v1.2.3