aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDilip Biswal <dbiswal@us.ibm.com>2015-11-26 21:04:40 -0800
committerDavies Liu <davies.liu@gmail.com>2015-11-26 21:04:40 -0800
commita374e20b5492c775f20d32e8fbddadbd8098a655 (patch)
treedd9ac693e8f42cf1d39f9c32f61bb834c543c5cb
parent10e315c28c933b967674ae51e1b2f24160c2e8a5 (diff)
downloadspark-a374e20b5492c775f20d32e8fbddadbd8098a655.tar.gz
spark-a374e20b5492c775f20d32e8fbddadbd8098a655.tar.bz2
spark-a374e20b5492c775f20d32e8fbddadbd8098a655.zip
[SPARK-11997] [SQL] NPE when save a DataFrame as parquet and partitioned by long column
Check for partition column null-ability while building the partition spec. Author: Dilip Biswal <dbiswal@us.ibm.com> Closes #10001 from dilipbiswal/spark-11997.
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala13
2 files changed, 14 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
index f9465157c9..9ace25dc7d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala
@@ -607,7 +607,7 @@ abstract class HadoopFsRelation private[sql](
def castPartitionValuesToUserSchema(row: InternalRow) = {
InternalRow((0 until row.numFields).map { i =>
Cast(
- Literal.create(row.getString(i), StringType),
+ Literal.create(row.getUTF8String(i), StringType),
userProvidedSchema.fields(i).dataType).eval()
}: _*)
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 70fae32b7e..f777e97305 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -252,6 +252,19 @@ class ParquetQuerySuite extends QueryTest with ParquetTest with SharedSQLContext
}
}
+ test("SPARK-11997 parquet with null partition values") {
+ withTempPath { dir =>
+ val path = dir.getCanonicalPath
+ sqlContext.range(1, 3)
+ .selectExpr("if(id % 2 = 0, null, id) AS n", "id")
+ .write.partitionBy("n").parquet(path)
+
+ checkAnswer(
+ sqlContext.read.parquet(path).filter("n is null"),
+ Row(2, null))
+ }
+ }
+
// This test case is ignored because of parquet-mr bug PARQUET-370
ignore("SPARK-10301 requested schema clipping - schemas with disjoint sets of fields") {
withTempPath { dir =>