aboutsummaryrefslogtreecommitdiff
path: root/sql/catalyst/src
diff options
context:
space:
mode:
authorSameer Agarwal <sameer@databricks.com>2016-04-21 21:31:01 -0700
committerDavies Liu <davies.liu@gmail.com>2016-04-21 21:31:01 -0700
commitb29bc3f51518806ef7827b35df7c8aada329f961 (patch)
treee32fafa897aa0cad1aa152063cec30f7a39306da /sql/catalyst/src
parentf1fdb23821b89623b592bfb3ef73d61afbe93b0a (diff)
downloadspark-b29bc3f51518806ef7827b35df7c8aada329f961.tar.gz
spark-b29bc3f51518806ef7827b35df7c8aada329f961.tar.bz2
spark-b29bc3f51518806ef7827b35df7c8aada329f961.zip
[SPARK-14680] [SQL] Support all datatypes to use VectorizedHashmap in TungstenAggregate
## What changes were proposed in this pull request? This PR adds support for all primitive datatypes, decimal types and stringtypes in the VectorizedHashmap during aggregation. ## How was this patch tested? Existing tests for group-by aggregates should already test for all these datatypes. Additionally, manually inspected the generated code for all supported datatypes (details below). Author: Sameer Agarwal <sameer@databricks.com> Closes #12440 from sameeragarwal/all-datatypes.
Diffstat (limited to 'sql/catalyst/src')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala57
1 files changed, 57 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index d29c27c14b..fa09f821fc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -271,6 +271,63 @@ class CodegenContext {
}
/**
+ * Returns the specialized code to set a given value in a column vector for a given `DataType`.
+ */
+ def setValue(batch: String, row: String, dataType: DataType, ordinal: Int,
+ value: String): String = {
+ val jt = javaType(dataType)
+ dataType match {
+ case _ if isPrimitiveType(jt) =>
+ s"$batch.column($ordinal).put${primitiveTypeName(jt)}($row, $value);"
+ case t: DecimalType => s"$batch.column($ordinal).putDecimal($row, $value, ${t.precision});"
+ case t: StringType => s"$batch.column($ordinal).putByteArray($row, $value.getBytes());"
+ case _ =>
+ throw new IllegalArgumentException(s"cannot generate code for unsupported type: $dataType")
+ }
+ }
+
+ /**
+ * Returns the specialized code to set a given value in a column vector for a given `DataType`
+ * that could potentially be nullable.
+ */
+ def updateColumn(
+ batch: String,
+ row: String,
+ dataType: DataType,
+ ordinal: Int,
+ ev: ExprCode,
+ nullable: Boolean): String = {
+ if (nullable) {
+ s"""
+ if (!${ev.isNull}) {
+ ${setValue(batch, row, dataType, ordinal, ev.value)}
+ } else {
+ $batch.column($ordinal).putNull($row);
+ }
+ """
+ } else {
+ s"""${setValue(batch, row, dataType, ordinal, ev.value)};"""
+ }
+ }
+
+ /**
+ * Returns the specialized code to access a value from a column vector for a given `DataType`.
+ */
+ def getValue(batch: String, row: String, dataType: DataType, ordinal: Int): String = {
+ val jt = javaType(dataType)
+ dataType match {
+ case _ if isPrimitiveType(jt) =>
+ s"$batch.column($ordinal).get${primitiveTypeName(jt)}($row)"
+ case t: DecimalType =>
+ s"$batch.column($ordinal).getDecimal($row, ${t.precision}, ${t.scale})"
+ case StringType =>
+ s"$batch.column($ordinal).getUTF8String($row)"
+ case _ =>
+ throw new IllegalArgumentException(s"cannot generate code for unsupported type: $dataType")
+ }
+ }
+
+ /**
* Returns the name used in accessor and setter for a Java primitive type.
*/
def primitiveTypeName(jt: String): String = jt match {