diff options
author | Wenchen Fan <wenchen@databricks.com> | 2016-01-29 10:24:23 -0800 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2016-01-29 10:24:23 -0800 |
commit | c5f745ede01831b59c57effa7de88c648b82c13d (patch) | |
tree | b8119933725897c711ac94da671a4dad0522b517 /external | |
parent | e4c1162b6b3dbc8fc95cfe75c6e0bc2915575fb2 (diff) | |
download | spark-c5f745ede01831b59c57effa7de88c648b82c13d.tar.gz spark-c5f745ede01831b59c57effa7de88c648b82c13d.tar.bz2 spark-c5f745ede01831b59c57effa7de88c648b82c13d.zip |
[SPARK-13072] [SQL] simplify and improve murmur3 hash expression codegen
simplify(remove several unnecessary local variables) the generated code of hash expression, and avoid null check if possible.
generated code comparison for `hash(int, double, string, array<string>)`:
**before:**
```
public UnsafeRow apply(InternalRow i) {
/* hash(input[0, int],input[1, double],input[2, string],input[3, array<int>],42) */
int value1 = 42;
/* input[0, int] */
int value3 = i.getInt(0);
if (!false) {
value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashInt(value3, value1);
}
/* input[1, double] */
double value5 = i.getDouble(1);
if (!false) {
value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashLong(Double.doubleToLongBits(value5), value1);
}
/* input[2, string] */
boolean isNull6 = i.isNullAt(2);
UTF8String value7 = isNull6 ? null : (i.getUTF8String(2));
if (!isNull6) {
value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value7.getBaseObject(), value7.getBaseOffset(), value7.numBytes(), value1);
}
/* input[3, array<int>] */
boolean isNull8 = i.isNullAt(3);
ArrayData value9 = isNull8 ? null : (i.getArray(3));
if (!isNull8) {
int result10 = value1;
for (int index11 = 0; index11 < value9.numElements(); index11++) {
if (!value9.isNullAt(index11)) {
final int element12 = value9.getInt(index11);
result10 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashInt(element12, result10);
}
}
value1 = result10;
}
}
```
**after:**
```
public UnsafeRow apply(InternalRow i) {
/* hash(input[0, int],input[1, double],input[2, string],input[3, array<int>],42) */
int value1 = 42;
/* input[0, int] */
int value3 = i.getInt(0);
value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashInt(value3, value1);
/* input[1, double] */
double value5 = i.getDouble(1);
value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashLong(Double.doubleToLongBits(value5), value1);
/* input[2, string] */
boolean isNull6 = i.isNullAt(2);
UTF8String value7 = isNull6 ? null : (i.getUTF8String(2));
if (!isNull6) {
value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value7.getBaseObject(), value7.getBaseOffset(), value7.numBytes(), value1);
}
/* input[3, array<int>] */
boolean isNull8 = i.isNullAt(3);
ArrayData value9 = isNull8 ? null : (i.getArray(3));
if (!isNull8) {
for (int index10 = 0; index10 < value9.numElements(); index10++) {
final int element11 = value9.getInt(index10);
value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashInt(element11, value1);
}
}
rowWriter14.write(0, value1);
return result12;
}
```
Author: Wenchen Fan <wenchen@databricks.com>
Closes #10974 from cloud-fan/codegen.
Diffstat (limited to 'external')
0 files changed, 0 insertions, 0 deletions