[SPARK-10063][SQL] Remove DirectParquetOutputCommitter

## What changes were proposed in this pull request? This patch removes DirectParquetOutputCommitter. This was initially created by Databricks as a faster way to write Parquet data to S3. However, given how the underlying S3 Hadoop implementation works, this committer only works when there are no failures. If there are multiple attempts of the same task (e.g. speculation or task failures or node failures), the output data can be corrupted. I don't think this performance optimization outweighs the correctness issue. ## How was this patch tested? Removed the related tests also. Author: Reynold Xin <rxin@databricks.com> Closes #12229 from rxin/SPARK-10063.
author: Reynold Xin <rxin@databricks.com> 2016-04-07 00:51:45 -0700
committer: Reynold Xin <rxin@databricks.com> 2016-04-07 00:51:45 -0700
commit: 9ca0760d6769199f164a661655912f028234eb1c (patch)
tree: 9077fffc4e74921b25bc15b2e41150f98ac7000c /sql/hive
parent: e11aa9ec5c3cdcd8ca08d2486a7208840ad77bf8 (diff)
download: spark-9ca0760d6769199f164a661655912f028234eb1c.tar.gz
spark-9ca0760d6769199f164a661655912f028234eb1c.tar.bz2
spark-9ca0760d6769199f164a661655912f028234eb1c.zip
1 files changed, 0 insertions, 34 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
index ea7e905742..10eeb30242 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/hadoopFsRelationSuites.scala
@@ -668,40 +668,6 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
       df.write.format(dataSourceName).partitionBy("c", "d", "e").saveAsTable("t")
     }
   }
-
-  test("SPARK-9899 Disable customized output committer when speculation is on") {
-    val clonedConf = new Configuration(hadoopConfiguration)
-    val speculationEnabled =
-      sqlContext.sparkContext.conf.getBoolean("spark.speculation", defaultValue = false)
-
-    try {
-      withTempPath { dir =>
-        // Enables task speculation
-        sqlContext.sparkContext.conf.set("spark.speculation", "true")
-
-        // Uses a customized output committer which always fails
-        hadoopConfiguration.set(
-          SQLConf.OUTPUT_COMMITTER_CLASS.key,
-          classOf[AlwaysFailOutputCommitter].getName)
-
-        // Code below shouldn't throw since customized output committer should be disabled.
-        val df = sqlContext.range(10).toDF().coalesce(1)
-        df.write.format(dataSourceName).save(dir.getCanonicalPath)
-        checkAnswer(
-          sqlContext
-            .read
-            .format(dataSourceName)
-            .option("dataSchema", df.schema.json)
-            .load(dir.getCanonicalPath),
-          df)
-      }
-    } finally {
-      // Hadoop 1 doesn't have `Configuration.unset`
-      hadoopConfiguration.clear()
-      clonedConf.asScala.foreach(entry => hadoopConfiguration.set(entry.getKey, entry.getValue))
-      sqlContext.sparkContext.conf.set("spark.speculation", speculationEnabled.toString)
-    }
-  }
 }
 
 // This class is used to test SPARK-8578. We should not use any custom output committer when
author	Reynold Xin <rxin@databricks.com>	2016-04-07 00:51:45 -0700
committer	Reynold Xin <rxin@databricks.com>	2016-04-07 00:51:45 -0700
commit	9ca0760d6769199f164a661655912f028234eb1c (patch)
tree	9077fffc4e74921b25bc15b2e41150f98ac7000c /sql/hive
parent	e11aa9ec5c3cdcd8ca08d2486a7208840ad77bf8 (diff)
download	spark-9ca0760d6769199f164a661655912f028234eb1c.tar.gz spark-9ca0760d6769199f164a661655912f028234eb1c.tar.bz2 spark-9ca0760d6769199f164a661655912f028234eb1c.zip