diff options
author | Sameer Agarwal <sameer@databricks.com> | 2016-04-21 21:31:01 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2016-04-21 21:31:01 -0700 |
commit | b29bc3f51518806ef7827b35df7c8aada329f961 (patch) | |
tree | e32fafa897aa0cad1aa152063cec30f7a39306da /sql/catalyst/src | |
parent | f1fdb23821b89623b592bfb3ef73d61afbe93b0a (diff) | |
download | spark-b29bc3f51518806ef7827b35df7c8aada329f961.tar.gz spark-b29bc3f51518806ef7827b35df7c8aada329f961.tar.bz2 spark-b29bc3f51518806ef7827b35df7c8aada329f961.zip |
[SPARK-14680] [SQL] Support all datatypes to use VectorizedHashmap in TungstenAggregate
## What changes were proposed in this pull request?
This PR adds support for all primitive datatypes, decimal types and stringtypes in the VectorizedHashmap during aggregation.
## How was this patch tested?
Existing tests for group-by aggregates should already test for all these datatypes. Additionally, manually inspected the generated code for all supported datatypes (details below).
Author: Sameer Agarwal <sameer@databricks.com>
Closes #12440 from sameeragarwal/all-datatypes.
Diffstat (limited to 'sql/catalyst/src')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index d29c27c14b..fa09f821fc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -271,6 +271,63 @@ class CodegenContext { } /** + * Returns the specialized code to set a given value in a column vector for a given `DataType`. + */ + def setValue(batch: String, row: String, dataType: DataType, ordinal: Int, + value: String): String = { + val jt = javaType(dataType) + dataType match { + case _ if isPrimitiveType(jt) => + s"$batch.column($ordinal).put${primitiveTypeName(jt)}($row, $value);" + case t: DecimalType => s"$batch.column($ordinal).putDecimal($row, $value, ${t.precision});" + case t: StringType => s"$batch.column($ordinal).putByteArray($row, $value.getBytes());" + case _ => + throw new IllegalArgumentException(s"cannot generate code for unsupported type: $dataType") + } + } + + /** + * Returns the specialized code to set a given value in a column vector for a given `DataType` + * that could potentially be nullable. + */ + def updateColumn( + batch: String, + row: String, + dataType: DataType, + ordinal: Int, + ev: ExprCode, + nullable: Boolean): String = { + if (nullable) { + s""" + if (!${ev.isNull}) { + ${setValue(batch, row, dataType, ordinal, ev.value)} + } else { + $batch.column($ordinal).putNull($row); + } + """ + } else { + s"""${setValue(batch, row, dataType, ordinal, ev.value)};""" + } + } + + /** + * Returns the specialized code to access a value from a column vector for a given `DataType`. + */ + def getValue(batch: String, row: String, dataType: DataType, ordinal: Int): String = { + val jt = javaType(dataType) + dataType match { + case _ if isPrimitiveType(jt) => + s"$batch.column($ordinal).get${primitiveTypeName(jt)}($row)" + case t: DecimalType => + s"$batch.column($ordinal).getDecimal($row, ${t.precision}, ${t.scale})" + case StringType => + s"$batch.column($ordinal).getUTF8String($row)" + case _ => + throw new IllegalArgumentException(s"cannot generate code for unsupported type: $dataType") + } + } + + /** * Returns the name used in accessor and setter for a Java primitive type. */ def primitiveTypeName(jt: String): String = jt match { |