aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNong Li <nong@databricks.com>2016-03-10 13:31:19 -0800
committerDavies Liu <davies.liu@gmail.com>2016-03-10 13:31:19 -0800
commit747d2f5381b3ef0b2663ca7def42d3dc43ee13d7 (patch)
treee3716aaa02f9f89e7fe09b6db2743f6f3c4218b8
parent19f4ac6dc766a3d33b04cd071b414c3bea88e97d (diff)
downloadspark-747d2f5381b3ef0b2663ca7def42d3dc43ee13d7.tar.gz
spark-747d2f5381b3ef0b2663ca7def42d3dc43ee13d7.tar.bz2
spark-747d2f5381b3ef0b2663ca7def42d3dc43ee13d7.zip
[SPARK-13790] Speed up ColumnVector's getDecimal
## What changes were proposed in this pull request? We should reuse an object similar to the other non-primitive type getters. For a query that computes averages over decimal columns, this shows a 10% speedup on overall query times. ## How was this patch tested? Existing tests and this benchmark ``` TPCDS Snappy: Best/Avg Time(ms) Rate(M/s) Per Row(ns) -------------------------------------------------------------------------------- q27-agg (master) 10627 / 11057 10.8 92.3 q27-agg (this patch) 9722 / 9832 11.8 84.4 ``` Author: Nong Li <nong@databricks.com> Closes #11624 from nongli/spark-13790.
-rw-r--r--sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala11
-rw-r--r--sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java4
3 files changed, 14 insertions, 3 deletions
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index a88bcbfdb7..dd2f39eb81 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -400,7 +400,7 @@ public final class UnsafeRow extends MutableRow implements Externalizable, KryoS
return null;
}
if (precision <= Decimal.MAX_LONG_DIGITS()) {
- return Decimal.apply(getLong(ordinal), precision, scale);
+ return Decimal.createUnsafe(getLong(ordinal), precision, scale);
} else {
byte[] bytes = getBinary(ordinal);
BigInteger bigInteger = new BigInteger(bytes);
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 6a59e9728a..f0e535bb45 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -376,6 +376,17 @@ object Decimal {
def apply(value: String): Decimal = new Decimal().set(BigDecimal(value))
+ /**
+ * Creates a decimal from unscaled, precision and scale without checking the bounds.
+ */
+ def createUnsafe(unscaled: Long, precision: Int, scale: Int): Decimal = {
+ val dec = new Decimal()
+ dec.longVal = unscaled
+ dec._precision = precision
+ dec._scale = scale
+ dec
+ }
+
// Evidence parameters for Decimal considered either as Fractional or Integral. We provide two
// parameters inheriting from a common trait since both traits define mkNumericOps.
// See scala.math's Numeric.scala for examples for Scala's built-in types.
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
index bb0247c2fb..ffcc9c2ace 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java
@@ -536,9 +536,9 @@ public abstract class ColumnVector {
*/
public final Decimal getDecimal(int rowId, int precision, int scale) {
if (precision <= Decimal.MAX_INT_DIGITS()) {
- return Decimal.apply(getInt(rowId), precision, scale);
+ return Decimal.createUnsafe(getInt(rowId), precision, scale);
} else if (precision <= Decimal.MAX_LONG_DIGITS()) {
- return Decimal.apply(getLong(rowId), precision, scale);
+ return Decimal.createUnsafe(getLong(rowId), precision, scale);
} else {
// TODO: best perf?
byte[] bytes = getBinary(rowId);