diff options
author | ravipesala <ravindra.pesala@huawei.com> | 2014-09-23 11:52:13 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-09-23 11:52:13 -0700 |
commit | 3b8eefa9b843c7f1e0e8dda6023272bc9f011c5c (patch) | |
tree | 8fb35e597bf7bee7060855ae41394596c17a1e4a /sql/core | |
parent | 116016b481cecbd8ad6e9717d92f977a164a6653 (diff) | |
download | spark-3b8eefa9b843c7f1e0e8dda6023272bc9f011c5c.tar.gz spark-3b8eefa9b843c7f1e0e8dda6023272bc9f011c5c.tar.bz2 spark-3b8eefa9b843c7f1e0e8dda6023272bc9f011c5c.zip |
[SPARK-3536][SQL] SELECT on empty parquet table throws exception
It returns null metadata from parquet if querying on empty parquet file while calculating splits.So added null check and returns the empty splits.
Author : ravipesala ravindra.pesalahuawei.com
Author: ravipesala <ravindra.pesala@huawei.com>
Closes #2456 from ravipesala/SPARK-3536 and squashes the following commits:
1e81a50 [ravipesala] Fixed the issue when querying on empty parquet file.
Diffstat (limited to 'sql/core')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala | 7 | ||||
-rw-r--r-- | sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala | 9 |
2 files changed, 14 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala index a5a5d139a6..d39e31a7fa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala @@ -427,11 +427,15 @@ private[parquet] class FilteringParquetRowInputFormat s"maxSplitSize or minSplitSie should not be negative: maxSplitSize = $maxSplitSize;" + s" minSplitSize = $minSplitSize") } - + val splits = mutable.ArrayBuffer.empty[ParquetInputSplit] val getGlobalMetaData = classOf[ParquetFileWriter].getDeclaredMethod("getGlobalMetaData", classOf[JList[Footer]]) getGlobalMetaData.setAccessible(true) val globalMetaData = getGlobalMetaData.invoke(null, footers).asInstanceOf[GlobalMetaData] + // if parquet file is empty, return empty splits. + if (globalMetaData == null) { + return splits + } val readContext = getReadSupport(configuration).init( new InitContext(configuration, @@ -442,7 +446,6 @@ private[parquet] class FilteringParquetRowInputFormat classOf[ParquetInputFormat[_]].getDeclaredMethods.find(_.getName == "generateSplits").get generateSplits.setAccessible(true) - val splits = mutable.ArrayBuffer.empty[ParquetInputSplit] for (footer <- footers) { val fs = footer.getFile.getFileSystem(configuration) val file = footer.getFile diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala index 08f7358446..07adf73140 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala @@ -789,4 +789,13 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA assert(result3(0)(1) === "the answer") Utils.deleteRecursively(tmpdir) } + + test("Querying on empty parquet throws exception (SPARK-3536)") { + val tmpdir = Utils.createTempDir() + Utils.deleteRecursively(tmpdir) + createParquetFile[TestRDDEntry](tmpdir.toString()).registerTempTable("tmpemptytable") + val result1 = sql("SELECT * FROM tmpemptytable").collect() + assert(result1.size === 0) + Utils.deleteRecursively(tmpdir) + } } |