aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorCheng Lian <lian@databricks.com>2016-06-02 16:16:27 -0700
committerCheng Lian <lian@databricks.com>2016-06-02 16:16:27 -0700
commit431542765785304edb76a19885fbc5f9b8ae7d64 (patch)
treeef19e4c29d63eb81484fef8a65ccad5905315758 /sql/hive
parent72353311d3a37cb523c5bdd8072ffdff99af9749 (diff)
downloadspark-431542765785304edb76a19885fbc5f9b8ae7d64.tar.gz
spark-431542765785304edb76a19885fbc5f9b8ae7d64.tar.bz2
spark-431542765785304edb76a19885fbc5f9b8ae7d64.zip
[SPARK-15719][SQL] Disables writing Parquet summary files by default
## What changes were proposed in this pull request? This PR disables writing Parquet summary files by default (i.e., when Hadoop configuration "parquet.enable.summary-metadata" is not set). Please refer to [SPARK-15719][1] for more details. ## How was this patch tested? New test case added in `ParquetQuerySuite` to check no summary files are written by default. [1]: https://issues.apache.org/jira/browse/SPARK-15719 Author: Cheng Lian <lian@databricks.com> Closes #13455 from liancheng/spark-15719-disable-parquet-summary-files.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala29
1 files changed, 16 insertions, 13 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala
index f9a1d16d90..8aa018d0a9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/ParquetHadoopFsRelationSuite.scala
@@ -21,6 +21,7 @@ import java.io.File
import com.google.common.io.Files
import org.apache.hadoop.fs.Path
+import org.apache.parquet.hadoop.ParquetOutputFormat
import org.apache.spark.deploy.SparkHadoopUtil
import org.apache.spark.sql._
@@ -124,23 +125,25 @@ class ParquetHadoopFsRelationSuite extends HadoopFsRelationTest {
}
test("SPARK-8604: Parquet data source should write summary file while doing appending") {
- withTempPath { dir =>
- val path = dir.getCanonicalPath
- val df = spark.range(0, 5).toDF()
- df.write.mode(SaveMode.Overwrite).parquet(path)
+ withSQLConf(ParquetOutputFormat.ENABLE_JOB_SUMMARY -> "true") {
+ withTempPath { dir =>
+ val path = dir.getCanonicalPath
+ val df = spark.range(0, 5).toDF()
+ df.write.mode(SaveMode.Overwrite).parquet(path)
- val summaryPath = new Path(path, "_metadata")
- val commonSummaryPath = new Path(path, "_common_metadata")
+ val summaryPath = new Path(path, "_metadata")
+ val commonSummaryPath = new Path(path, "_common_metadata")
- val fs = summaryPath.getFileSystem(spark.sessionState.newHadoopConf())
- fs.delete(summaryPath, true)
- fs.delete(commonSummaryPath, true)
+ val fs = summaryPath.getFileSystem(spark.sessionState.newHadoopConf())
+ fs.delete(summaryPath, true)
+ fs.delete(commonSummaryPath, true)
- df.write.mode(SaveMode.Append).parquet(path)
- checkAnswer(spark.read.parquet(path), df.union(df))
+ df.write.mode(SaveMode.Append).parquet(path)
+ checkAnswer(spark.read.parquet(path), df.union(df))
- assert(fs.exists(summaryPath))
- assert(fs.exists(commonSummaryPath))
+ assert(fs.exists(summaryPath))
+ assert(fs.exists(commonSummaryPath))
+ }
}
}