diff options
author | Liang-Chi Hsieh <viirya@appier.com> | 2015-11-04 10:56:32 -0800 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2015-11-04 10:56:32 -0800 |
commit | de289bf279e14e47859b5fbcd70e97b9d0759f14 (patch) | |
tree | dfe89d28139e29428269e106fcab291da2f72764 /sql/core/src/main | |
parent | 987df4bfcafeca3633453c2d2f8e14d221fcef33 (diff) | |
download | spark-de289bf279e14e47859b5fbcd70e97b9d0759f14.tar.gz spark-de289bf279e14e47859b5fbcd70e97b9d0759f14.tar.bz2 spark-de289bf279e14e47859b5fbcd70e97b9d0759f14.zip |
[SPARK-10304][SQL] Following up checking valid dir structure for partition discovery
This patch follows up #8840.
Author: Liang-Chi Hsieh <viirya@appier.com>
Closes #9459 from viirya/detect_invalid_part_dir_following.
Diffstat (limited to 'sql/core/src/main')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala | 14 |
1 files changed, 13 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala index 16dc23661c..86bc3a1b6d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala @@ -81,6 +81,8 @@ private[sql] object PartitioningUtils { parsePartition(path, defaultPartitionName, typeInference) }.unzip + // We create pairs of (path -> path's partition value) here + // If the corresponding partition value is None, the pair will be skiped val pathsWithPartitionValues = paths.zip(partitionValues).flatMap(x => x._2.map(x._1 -> _)) if (pathsWithPartitionValues.isEmpty) { @@ -89,11 +91,21 @@ private[sql] object PartitioningUtils { } else { // This dataset is partitioned. We need to check whether all partitions have the same // partition columns and resolve potential type conflicts. + + // Check if there is conflicting directory structure. + // For the paths such as: + // var paths = Seq( + // "hdfs://host:9000/invalidPath", + // "hdfs://host:9000/path/a=10/b=20", + // "hdfs://host:9000/path/a=10.5/b=hello") + // It will be recognised as conflicting directory structure: + // "hdfs://host:9000/invalidPath" + // "hdfs://host:9000/path" val basePaths = optBasePaths.flatMap(x => x) assert( basePaths.distinct.size == 1, "Conflicting directory structures detected. Suspicious paths:\b" + - basePaths.mkString("\n\t", "\n\t", "\n\n")) + basePaths.distinct.mkString("\n\t", "\n\t", "\n\n")) val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues) |