aboutsummaryrefslogtreecommitdiff
path: root/sql/core
diff options
context:
space:
mode:
Diffstat (limited to 'sql/core')
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala29
1 files changed, 29 insertions, 0 deletions
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
index 88fcfce0ec..c7541889f2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetEncodingSuite.scala
@@ -16,6 +16,10 @@
*/
package org.apache.spark.sql.execution.datasources.parquet
+import scala.collection.JavaConverters._
+
+import org.apache.parquet.hadoop.ParquetOutputFormat
+
import org.apache.spark.sql.test.SharedSQLContext
// TODO: this needs a lot more testing but it's currently not easy to test with the parquet
@@ -78,4 +82,29 @@ class ParquetEncodingSuite extends ParquetCompatibilityTest with SharedSQLContex
}}
}
}
+
+ test("Read row group containing both dictionary and plain encoded pages") {
+ withSQLConf(ParquetOutputFormat.DICTIONARY_PAGE_SIZE -> "2048",
+ ParquetOutputFormat.PAGE_SIZE -> "4096") {
+ withTempPath { dir =>
+ // In order to explicitly test for SPARK-14217, we set the parquet dictionary and page size
+ // such that the following data spans across 3 pages (within a single row group) where the
+ // first page is dictionary encoded and the remaining two are plain encoded.
+ val data = (0 until 512).flatMap(i => Seq.fill(3)(i.toString))
+ data.toDF("f").coalesce(1).write.parquet(dir.getCanonicalPath)
+ val file = SpecificParquetRecordReaderBase.listDirectory(dir).asScala.head
+
+ val reader = new VectorizedParquetRecordReader
+ reader.initialize(file, null /* set columns to null to project all columns */)
+ val column = reader.resultBatch().column(0)
+ assert(reader.nextBatch())
+
+ (0 until 512).foreach { i =>
+ assert(column.getUTF8String(3 * i).toString == i.toString)
+ assert(column.getUTF8String(3 * i + 1).toString == i.toString)
+ assert(column.getUTF8String(3 * i + 2).toString == i.toString)
+ }
+ }
+ }
+ }
}