diff options
author | Chia-Yung Su <chiayung@appier.com> | 2014-08-25 18:20:19 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-08-25 18:20:19 -0700 |
commit | 4243bb6634aca5b9ddf6d42778aa7b4866ce6256 (patch) | |
tree | 8ce489b6ea783e1d41b7df9274bd46807fd907f7 | |
parent | 507a1b520063ad3e10b909767d9e3fd72d24415b (diff) | |
download | spark-4243bb6634aca5b9ddf6d42778aa7b4866ce6256.tar.gz spark-4243bb6634aca5b9ddf6d42778aa7b4866ce6256.tar.bz2 spark-4243bb6634aca5b9ddf6d42778aa7b4866ce6256.zip |
[SPARK-3011][SQL] _temporary directory should be filtered out by sqlContext.parquetFile
fix compile error on hadoop 0.23 for the pull request #1924.
Author: Chia-Yung Su <chiayung@appier.com>
Closes #1959 from joesu/bugfix-spark3011 and squashes the following commits:
be30793 [Chia-Yung Su] remove .* and _* except _metadata
8fe2398 [Chia-Yung Su] add note to explain
40ea9bd [Chia-Yung Su] fix hadoop-0.23 compile error
c7e44f2 [Chia-Yung Su] match syntax
f8fc32a [Chia-Yung Su] filter out tmp dir
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala index c79a9ac2da..af8cd0a73b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala @@ -378,7 +378,7 @@ private[parquet] object ParquetTypesConverter extends Logging { val children = fs.listStatus(path).filterNot { status => val name = status.getPath.getName - name(0) == '.' || name == FileOutputCommitter.SUCCEEDED_FILE_NAME + (name(0) == '.' || name(0) == '_') && name != ParquetFileWriter.PARQUET_METADATA_FILE } // NOTE (lian): Parquet "_metadata" file can be very slow if the file consists of lots of row |