aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2017-03-15 08:24:41 +0800
committerWenchen Fan <wenchen@databricks.com>2017-03-15 08:24:41 +0800
commitdacc382f0c918f1ca808228484305ce0e21c705e (patch)
treefa222f88241a07e53f87695625d5c2c1fc9350d3 /sql/catalyst
parent7ded39c223429265b23940ca8244660dbee8320c (diff)
downloadspark-dacc382f0c918f1ca808228484305ce0e21c705e.tar.gz
spark-dacc382f0c918f1ca808228484305ce0e21c705e.tar.bz2
spark-dacc382f0c918f1ca808228484305ce0e21c705e.zip
[SPARK-19887][SQL] dynamic partition keys can be null or empty string
## What changes were proposed in this pull request? When dynamic partition value is null or empty string, we should write the data to a directory like `a=__HIVE_DEFAULT_PARTITION__`, when we read the data back, we should respect this special directory name and treat it as null. This is the same behavior of impala, see https://issues.apache.org/jira/browse/IMPALA-252 ## How was this patch tested? new regression test Author: Wenchen Fan <wenchen@databricks.com> Closes #17277 from cloud-fan/partition.
Diffstat (limited to 'sql/catalyst')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala9
2 files changed, 8 insertions, 3 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
index a418edc302..a8693dcca5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -118,7 +118,7 @@ object ExternalCatalogUtils {
}
def getPartitionPathString(col: String, value: String): String = {
- val partitionString = if (value == null) {
+ val partitionString = if (value == null || value.isEmpty) {
DEFAULT_PARTITION_NAME
} else {
escapePathName(value)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index b862deaf36..70ed44e025 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -116,7 +116,12 @@ case class CatalogTablePartition(
val timeZoneId = caseInsensitiveProperties.getOrElse(
DateTimeUtils.TIMEZONE_OPTION, defaultTimeZondId)
InternalRow.fromSeq(partitionSchema.map { field =>
- Cast(Literal(spec(field.name)), field.dataType, Option(timeZoneId)).eval()
+ val partValue = if (spec(field.name) == ExternalCatalogUtils.DEFAULT_PARTITION_NAME) {
+ null
+ } else {
+ spec(field.name)
+ }
+ Cast(Literal(partValue), field.dataType, Option(timeZoneId)).eval()
})
}
}
@@ -164,7 +169,7 @@ case class BucketSpec(
* @param tracksPartitionsInCatalog whether this table's partition metadata is stored in the
* catalog. If false, it is inferred automatically based on file
* structure.
- * @param schemaPresevesCase Whether or not the schema resolved for this table is case-sensitive.
+ * @param schemaPreservesCase Whether or not the schema resolved for this table is case-sensitive.
* When using a Hive Metastore, this flag is set to false if a case-
* sensitive schema was unable to be read from the table properties.
* Used to trigger case-sensitive schema inference at query time, when