aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2016-09-30 09:59:12 -0700
committerDavies Liu <davies.liu@gmail.com>2016-09-30 09:59:12 -0700
commitf327e16863371076dbd2a7f22c8895ae07f8274b (patch)
tree2bc96f7cab165e6311be596286697e2da37c4124
parent8e491af52930886cbe0c54e7d67add3796ddb15f (diff)
downloadspark-f327e16863371076dbd2a7f22c8895ae07f8274b.tar.gz
spark-f327e16863371076dbd2a7f22c8895ae07f8274b.tar.bz2
spark-f327e16863371076dbd2a7f22c8895ae07f8274b.zip
[SPARK-17738] [SQL] fix ARRAY/MAP in columnar cache
## What changes were proposed in this pull request? The actualSize() of array and map is different from the actual size, the header is Int, rather than Long. ## How was this patch tested? The flaky test should be fixed. Author: Davies Liu <davies@databricks.com> Closes #15305 from davies/fix_MAP.
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala8
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala6
2 files changed, 7 insertions, 7 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
index fa9619eb07..d27d8c362d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
@@ -589,7 +589,7 @@ private[columnar] case class STRUCT(dataType: StructType)
private[columnar] case class ARRAY(dataType: ArrayType)
extends ColumnType[UnsafeArrayData] with DirectCopyColumnType[UnsafeArrayData] {
- override def defaultSize: Int = 16
+ override def defaultSize: Int = 28
override def setField(row: MutableRow, ordinal: Int, value: UnsafeArrayData): Unit = {
row.update(ordinal, value)
@@ -601,7 +601,7 @@ private[columnar] case class ARRAY(dataType: ArrayType)
override def actualSize(row: InternalRow, ordinal: Int): Int = {
val unsafeArray = getField(row, ordinal)
- 8 + unsafeArray.getSizeInBytes
+ 4 + unsafeArray.getSizeInBytes
}
override def append(value: UnsafeArrayData, buffer: ByteBuffer): Unit = {
@@ -628,7 +628,7 @@ private[columnar] case class ARRAY(dataType: ArrayType)
private[columnar] case class MAP(dataType: MapType)
extends ColumnType[UnsafeMapData] with DirectCopyColumnType[UnsafeMapData] {
- override def defaultSize: Int = 32
+ override def defaultSize: Int = 68
override def setField(row: MutableRow, ordinal: Int, value: UnsafeMapData): Unit = {
row.update(ordinal, value)
@@ -640,7 +640,7 @@ private[columnar] case class MAP(dataType: MapType)
override def actualSize(row: InternalRow, ordinal: Int): Int = {
val unsafeMap = getField(row, ordinal)
- 8 + unsafeMap.getSizeInBytes
+ 4 + unsafeMap.getSizeInBytes
}
override def append(value: UnsafeMapData, buffer: ByteBuffer): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
index 0b93c633b2..805b566728 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
@@ -38,7 +38,7 @@ class ColumnTypeSuite extends SparkFunSuite with Logging {
val checks = Map(
NULL -> 0, BOOLEAN -> 1, BYTE -> 1, SHORT -> 2, INT -> 4, LONG -> 8,
FLOAT -> 4, DOUBLE -> 8, COMPACT_DECIMAL(15, 10) -> 8, LARGE_DECIMAL(20, 10) -> 12,
- STRING -> 8, BINARY -> 16, STRUCT_TYPE -> 20, ARRAY_TYPE -> 16, MAP_TYPE -> 32)
+ STRING -> 8, BINARY -> 16, STRUCT_TYPE -> 20, ARRAY_TYPE -> 28, MAP_TYPE -> 68)
checks.foreach { case (columnType, expectedSize) =>
assertResult(expectedSize, s"Wrong defaultSize for $columnType") {
@@ -73,8 +73,8 @@ class ColumnTypeSuite extends SparkFunSuite with Logging {
checkActualSize(BINARY, Array.fill[Byte](4)(0.toByte), 4 + 4)
checkActualSize(COMPACT_DECIMAL(15, 10), Decimal(0, 15, 10), 8)
checkActualSize(LARGE_DECIMAL(20, 10), Decimal(0, 20, 10), 5)
- checkActualSize(ARRAY_TYPE, Array[Any](1), 8 + 8 + 8 + 8)
- checkActualSize(MAP_TYPE, Map(1 -> "a"), 8 + (8 + 8 + 8 + 8) + (8 + 8 + 8 + 8))
+ checkActualSize(ARRAY_TYPE, Array[Any](1), 4 + 8 + 8 + 8)
+ checkActualSize(MAP_TYPE, Map(1 -> "a"), 4 + (8 + 8 + 8 + 8) + (8 + 8 + 8 + 8))
checkActualSize(STRUCT_TYPE, Row("hello"), 28)
}