[SPARK-19887][SQL] dynamic partition keys can be null or empty string

## What changes were proposed in this pull request? When dynamic partition value is null or empty string, we should write the data to a directory like `a=__HIVE_DEFAULT_PARTITION__`, when we read the data back, we should respect this special directory name and treat it as null. This is the same behavior of impala, see https://issues.apache.org/jira/browse/IMPALA-252 ## How was this patch tested? new regression test Author: Wenchen Fan <wenchen@databricks.com> Closes #17277 from cloud-fan/partition.
author: Wenchen Fan <wenchen@databricks.com> 2017-03-15 08:24:41 +0800
committer: Wenchen Fan <wenchen@databricks.com> 2017-03-15 08:24:41 +0800
commit: dacc382f0c918f1ca808228484305ce0e21c705e (patch)
tree: fa222f88241a07e53f87695625d5c2c1fc9350d3 /sql/hive/src/test/scala/org
parent: 7ded39c223429265b23940ca8244660dbee8320c (diff)
download: spark-dacc382f0c918f1ca808228484305ce0e21c705e.tar.gz
spark-dacc382f0c918f1ca808228484305ce0e21c705e.tar.bz2
spark-dacc382f0c918f1ca808228484305ce0e21c705e.zip
1 files changed, 23 insertions, 1 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
index 96385961c9..9440a17677 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/PartitionProviderCompatibilitySuite.scala
@@ -22,7 +22,7 @@ import java.io.File
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.metrics.source.HiveCatalogMetrics
-import org.apache.spark.sql.{AnalysisException, QueryTest}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
@@ -316,6 +316,28 @@ class PartitionProviderCompatibilitySuite
         }
       }
     }
+
+    test(s"SPARK-19887 partition value is null - partition management $enabled") {
+      withTable("test") {
+        Seq((1, "p", 1), (2, null, 2)).toDF("a", "b", "c")
+          .write.partitionBy("b", "c").saveAsTable("test")
+        checkAnswer(spark.table("test"),
+          Row(1, "p", 1) :: Row(2, null, 2) :: Nil)
+
+        Seq((3, null: String, 3)).toDF("a", "b", "c")
+          .write.mode("append").partitionBy("b", "c").saveAsTable("test")
+        checkAnswer(spark.table("test"),
+          Row(1, "p", 1) :: Row(2, null, 2) :: Row(3, null, 3) :: Nil)
+        // make sure partition pruning also works.
+        checkAnswer(spark.table("test").filter($"b".isNotNull), Row(1, "p", 1))
+
+        // empty string is an invalid partition value and we treat it as null when read back.
+        Seq((4, "", 4)).toDF("a", "b", "c")
+          .write.mode("append").partitionBy("b", "c").saveAsTable("test")
+        checkAnswer(spark.table("test"),
+          Row(1, "p", 1) :: Row(2, null, 2) :: Row(3, null, 3) :: Row(4, null, 4) :: Nil)
+      }
+    }
   }
 
   /**
author	Wenchen Fan <wenchen@databricks.com>	2017-03-15 08:24:41 +0800
committer	Wenchen Fan <wenchen@databricks.com>	2017-03-15 08:24:41 +0800
commit	dacc382f0c918f1ca808228484305ce0e21c705e (patch)
tree	fa222f88241a07e53f87695625d5c2c1fc9350d3 /sql/hive/src/test/scala/org
parent	7ded39c223429265b23940ca8244660dbee8320c (diff)
download	spark-dacc382f0c918f1ca808228484305ce0e21c705e.tar.gz spark-dacc382f0c918f1ca808228484305ce0e21c705e.tar.bz2 spark-dacc382f0c918f1ca808228484305ce0e21c705e.zip