diff options
author | chutium <teng.qiu@gmail.com> | 2014-08-08 13:31:08 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-08-08 13:31:08 -0700 |
commit | b7c89a7f0ca73153dce36e0f01b81a3947ee1189 (patch) | |
tree | 926a85f9c0242df10e882f435cf7aca89d73d4ce | |
parent | 45d8f4deab50ae069ecde2201bd486d464a4501e (diff) | |
download | spark-b7c89a7f0ca73153dce36e0f01b81a3947ee1189.tar.gz spark-b7c89a7f0ca73153dce36e0f01b81a3947ee1189.tar.bz2 spark-b7c89a7f0ca73153dce36e0f01b81a3947ee1189.zip |
[SPARK-2700] [SQL] Hidden files (such as .impala_insert_staging) should be filtered out by sqlContext.parquetFile
Author: chutium <teng.qiu@gmail.com>
Closes #1691 from chutium/SPARK-2700 and squashes the following commits:
b76ae8c [chutium] [SPARK-2700] [SQL] fixed styling issue
d75a8bd [chutium] [SPARK-2700] [SQL] Hidden files (such as .impala_insert_staging) should be filtered out by sqlContext.parquetFile
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala index aaef1a1d47..2867dc0a8b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala @@ -373,8 +373,9 @@ private[parquet] object ParquetTypesConverter extends Logging { } ParquetRelation.enableLogForwarding() - val children = fs.listStatus(path).filterNot { - _.getPath.getName == FileOutputCommitter.SUCCEEDED_FILE_NAME + val children = fs.listStatus(path).filterNot { status => + val name = status.getPath.getName + name(0) == '.' || name == FileOutputCommitter.SUCCEEDED_FILE_NAME } // NOTE (lian): Parquet "_metadata" file can be very slow if the file consists of lots of row |