aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorravipesala <ravindra.pesala@huawei.com>2014-09-23 11:52:13 -0700
committerMichael Armbrust <michael@databricks.com>2014-09-23 11:52:13 -0700
commit3b8eefa9b843c7f1e0e8dda6023272bc9f011c5c (patch)
tree8fb35e597bf7bee7060855ae41394596c17a1e4a /sql
parent116016b481cecbd8ad6e9717d92f977a164a6653 (diff)
downloadspark-3b8eefa9b843c7f1e0e8dda6023272bc9f011c5c.tar.gz
spark-3b8eefa9b843c7f1e0e8dda6023272bc9f011c5c.tar.bz2
spark-3b8eefa9b843c7f1e0e8dda6023272bc9f011c5c.zip
[SPARK-3536][SQL] SELECT on empty parquet table throws exception
It returns null metadata from parquet if querying on empty parquet file while calculating splits.So added null check and returns the empty splits. Author : ravipesala ravindra.pesalahuawei.com Author: ravipesala <ravindra.pesala@huawei.com> Closes #2456 from ravipesala/SPARK-3536 and squashes the following commits: 1e81a50 [ravipesala] Fixed the issue when querying on empty parquet file.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala7
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala9
2 files changed, 14 insertions, 2 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
index a5a5d139a6..d39e31a7fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTableOperations.scala
@@ -427,11 +427,15 @@ private[parquet] class FilteringParquetRowInputFormat
s"maxSplitSize or minSplitSie should not be negative: maxSplitSize = $maxSplitSize;" +
s" minSplitSize = $minSplitSize")
}
-
+ val splits = mutable.ArrayBuffer.empty[ParquetInputSplit]
val getGlobalMetaData =
classOf[ParquetFileWriter].getDeclaredMethod("getGlobalMetaData", classOf[JList[Footer]])
getGlobalMetaData.setAccessible(true)
val globalMetaData = getGlobalMetaData.invoke(null, footers).asInstanceOf[GlobalMetaData]
+ // if parquet file is empty, return empty splits.
+ if (globalMetaData == null) {
+ return splits
+ }
val readContext = getReadSupport(configuration).init(
new InitContext(configuration,
@@ -442,7 +446,6 @@ private[parquet] class FilteringParquetRowInputFormat
classOf[ParquetInputFormat[_]].getDeclaredMethods.find(_.getName == "generateSplits").get
generateSplits.setAccessible(true)
- val splits = mutable.ArrayBuffer.empty[ParquetInputSplit]
for (footer <- footers) {
val fs = footer.getFile.getFileSystem(configuration)
val file = footer.getFile
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
index 08f7358446..07adf73140 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala
@@ -789,4 +789,13 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA
assert(result3(0)(1) === "the answer")
Utils.deleteRecursively(tmpdir)
}
+
+ test("Querying on empty parquet throws exception (SPARK-3536)") {
+ val tmpdir = Utils.createTempDir()
+ Utils.deleteRecursively(tmpdir)
+ createParquetFile[TestRDDEntry](tmpdir.toString()).registerTempTable("tmpemptytable")
+ val result1 = sql("SELECT * FROM tmpemptytable").collect()
+ assert(result1.size === 0)
+ Utils.deleteRecursively(tmpdir)
+ }
}