aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/test/scala/org/apache
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2017-03-15 08:24:41 +0800
committerWenchen Fan <wenchen@databricks.com>2017-03-15 08:24:41 +0800
commitdacc382f0c918f1ca808228484305ce0e21c705e (patch)
treefa222f88241a07e53f87695625d5c2c1fc9350d3 /sql/hive/src/test/scala/org/apache
parent7ded39c223429265b23940ca8244660dbee8320c (diff)
downloadspark-dacc382f0c918f1ca808228484305ce0e21c705e.tar.gz
spark-dacc382f0c918f1ca808228484305ce0e21c705e.tar.bz2
spark-dacc382f0c918f1ca808228484305ce0e21c705e.zip
[SPARK-19887][SQL] dynamic partition keys can be null or empty string
## What changes were proposed in this pull request? When dynamic partition value is null or empty string, we should write the data to a directory like `a=__HIVE_DEFAULT_PARTITION__`, when we read the data back, we should respect this special directory name and treat it as null. This is the same behavior of impala, see https://issues.apache.org/jira/browse/IMPALA-252 ## How was this patch tested? new regression test Author: Wenchen Fan <wenchen@databricks.com> Closes #17277 from cloud-fan/partition.
Diffstat (limited to 'sql/hive/src/test/scala/org/apache')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala24
1 files changed, 23 insertions, 1 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
index 96385961c9..9440a17677 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
@@ -22,7 +22,7 @@ import java.io.File
import org.apache.hadoop.fs.Path
import org.apache.spark.metrics.source.HiveCatalogMetrics
-import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
@@ -316,6 +316,28 @@ class PartitionProviderCompatibilitySuite
}
}
}
+
+ test(s"SPARK-19887 partition value is null - partition management $enabled") {
+ withTable("test") {
+ Seq((1, "p", 1), (2, null, 2)).toDF("a", "b", "c")
+ .write.partitionBy("b", "c").saveAsTable("test")
+ checkAnswer(spark.table("test"),
+ Row(1, "p", 1) :: Row(2, null, 2) :: Nil)
+
+ Seq((3, null: String, 3)).toDF("a", "b", "c")
+ .write.mode("append").partitionBy("b", "c").saveAsTable("test")
+ checkAnswer(spark.table("test"),
+ Row(1, "p", 1) :: Row(2, null, 2) :: Row(3, null, 3) :: Nil)
+ // make sure partition pruning also works.
+ checkAnswer(spark.table("test").filter($"b".isNotNull), Row(1, "p", 1))
+
+ // empty string is an invalid partition value and we treat it as null when read back.
+ Seq((4, "", 4)).toDF("a", "b", "c")
+ .write.mode("append").partitionBy("b", "c").saveAsTable("test")
+ checkAnswer(spark.table("test"),
+ Row(1, "p", 1) :: Row(2, null, 2) :: Row(3, null, 3) :: Row(4, null, 4) :: Nil)
+ }
+ }
}
/**