aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-01-19 10:44:51 -0800
committerYin Huai <yhuai@databricks.com>2016-01-19 10:44:51 -0800
commite14817b528ccab4b4685b45a95e2325630b5fc53 (patch)
tree5347fe30325c3c8f4f7154d15c1625a94f78a839 /sql/hive
parent0ddba6d88ff093a96b4931f71bd0a599afbbca78 (diff)
downloadspark-e14817b528ccab4b4685b45a95e2325630b5fc53.tar.gz
spark-e14817b528ccab4b4685b45a95e2325630b5fc53.tar.bz2
spark-e14817b528ccab4b4685b45a95e2325630b5fc53.zip
[SPARK-12870][SQL] better format bucket id in file name
for normal parquet file without bucket, it's file name ends with a jobUUID which maybe all numbers and mistakeny regarded as bucket id. This PR improves the format of bucket id in file name by using a different seperator, `_`, so that the regex is more robust. Author: Wenchen Fan <wenchen@databricks.com> Closes #10799 from cloud-fan/fix-bucket.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala2
1 files changed, 1 insertions, 1 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
index 40409169b0..800823feba 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala
@@ -103,7 +103,7 @@ private[orc] class OrcOutputWriter(
val uniqueWriteJobId = conf.get("spark.sql.sources.writeJobUUID")
val taskAttemptId = context.getTaskAttemptID
val partition = taskAttemptId.getTaskID.getId
- val bucketString = bucketId.map(id => f"-$id%05d").getOrElse("")
+ val bucketString = bucketId.map(BucketingUtils.bucketIdToString).getOrElse("")
val filename = f"part-r-$partition%05d-$uniqueWriteJobId$bucketString.orc"
new OrcOutputFormat().getRecordWriter(