aboutsummaryrefslogtreecommitdiff
path: root/dev
diff options
context:
space:
mode:
authorKazuaki Ishizaki <ishizaki@jp.ibm.com>2016-11-08 12:01:54 +0100
committerHerman van Hovell <hvanhovell@databricks.com>2016-11-08 12:01:54 +0100
commit47731e1865fa1e3a8881a1f4420017bdc026e455 (patch)
treee2c121e3bc62f5740ce85e5ed995384cd02e51c3 /dev
parent6f3697136aa68dc39d3ce42f43a7af554d2a3bf9 (diff)
downloadspark-47731e1865fa1e3a8881a1f4420017bdc026e455.tar.gz
spark-47731e1865fa1e3a8881a1f4420017bdc026e455.tar.bz2
spark-47731e1865fa1e3a8881a1f4420017bdc026e455.zip
[SPARK-18207][SQL] Fix a compilation error due to HashExpression.doGenCode
## What changes were proposed in this pull request? This PR avoids a compilation error due to more than 64KB Java byte code size. This error occur since generate java code for computing a hash value for a row is too big. This PR fixes this compilation error by splitting a big code chunk into multiple methods by calling `CodegenContext.splitExpression` at `HashExpression.doGenCode` The test case requires a calculation of hash code for a row that includes 1000 String fields. `HashExpression.doGenCode` generate a lot of Java code for this computation into one function. As a result, the size of the corresponding Java bytecode is more than 64 KB. Generated code without this PR ````java /* 027 */ public UnsafeRow apply(InternalRow i) { /* 028 */ boolean isNull = false; /* 029 */ /* 030 */ int value1 = 42; /* 031 */ /* 032 */ boolean isNull2 = i.isNullAt(0); /* 033 */ UTF8String value2 = isNull2 ? null : (i.getUTF8String(0)); /* 034 */ if (!isNull2) { /* 035 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value2.getBaseObject(), value2.getBaseOffset(), value2.numBytes(), value1); /* 036 */ } /* 037 */ /* 038 */ /* 039 */ boolean isNull3 = i.isNullAt(1); /* 040 */ UTF8String value3 = isNull3 ? null : (i.getUTF8String(1)); /* 041 */ if (!isNull3) { /* 042 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value3.getBaseObject(), value3.getBaseOffset(), value3.numBytes(), value1); /* 043 */ } /* 044 */ /* 045 */ ... /* 7024 */ /* 7025 */ boolean isNull1001 = i.isNullAt(999); /* 7026 */ UTF8String value1001 = isNull1001 ? null : (i.getUTF8String(999)); /* 7027 */ if (!isNull1001) { /* 7028 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1001.getBaseObject(), value1001.getBaseOffset(), value1001.numBytes(), value1); /* 7029 */ } /* 7030 */ /* 7031 */ /* 7032 */ boolean isNull1002 = i.isNullAt(1000); /* 7033 */ UTF8String value1002 = isNull1002 ? null : (i.getUTF8String(1000)); /* 7034 */ if (!isNull1002) { /* 7035 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1002.getBaseObject(), value1002.getBaseOffset(), value1002.numBytes(), value1); /* 7036 */ } ```` Generated code with this PR ````java /* 3807 */ private void apply_249(InternalRow i) { /* 3808 */ /* 3809 */ boolean isNull998 = i.isNullAt(996); /* 3810 */ UTF8String value998 = isNull998 ? null : (i.getUTF8String(996)); /* 3811 */ if (!isNull998) { /* 3812 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value998.getBaseObject(), value998.getBaseOffset(), value998.numBytes(), value1); /* 3813 */ } /* 3814 */ /* 3815 */ boolean isNull999 = i.isNullAt(997); /* 3816 */ UTF8String value999 = isNull999 ? null : (i.getUTF8String(997)); /* 3817 */ if (!isNull999) { /* 3818 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value999.getBaseObject(), value999.getBaseOffset(), value999.numBytes(), value1); /* 3819 */ } /* 3820 */ /* 3821 */ boolean isNull1000 = i.isNullAt(998); /* 3822 */ UTF8String value1000 = isNull1000 ? null : (i.getUTF8String(998)); /* 3823 */ if (!isNull1000) { /* 3824 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1000.getBaseObject(), value1000.getBaseOffset(), value1000.numBytes(), value1); /* 3825 */ } /* 3826 */ /* 3827 */ boolean isNull1001 = i.isNullAt(999); /* 3828 */ UTF8String value1001 = isNull1001 ? null : (i.getUTF8String(999)); /* 3829 */ if (!isNull1001) { /* 3830 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1001.getBaseObject(), value1001.getBaseOffset(), value1001.numBytes(), value1); /* 3831 */ } /* 3832 */ /* 3833 */ } /* 3834 */ ... /* 4532 */ private void apply_0(InternalRow i) { /* 4533 */ /* 4534 */ boolean isNull2 = i.isNullAt(0); /* 4535 */ UTF8String value2 = isNull2 ? null : (i.getUTF8String(0)); /* 4536 */ if (!isNull2) { /* 4537 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value2.getBaseObject(), value2.getBaseOffset(), value2.numBytes(), value1); /* 4538 */ } /* 4539 */ /* 4540 */ boolean isNull3 = i.isNullAt(1); /* 4541 */ UTF8String value3 = isNull3 ? null : (i.getUTF8String(1)); /* 4542 */ if (!isNull3) { /* 4543 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value3.getBaseObject(), value3.getBaseOffset(), value3.numBytes(), value1); /* 4544 */ } /* 4545 */ /* 4546 */ boolean isNull4 = i.isNullAt(2); /* 4547 */ UTF8String value4 = isNull4 ? null : (i.getUTF8String(2)); /* 4548 */ if (!isNull4) { /* 4549 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value4.getBaseObject(), value4.getBaseOffset(), value4.numBytes(), value1); /* 4550 */ } /* 4551 */ /* 4552 */ boolean isNull5 = i.isNullAt(3); /* 4553 */ UTF8String value5 = isNull5 ? null : (i.getUTF8String(3)); /* 4554 */ if (!isNull5) { /* 4555 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value5.getBaseObject(), value5.getBaseOffset(), value5.numBytes(), value1); /* 4556 */ } /* 4557 */ /* 4558 */ } ... /* 7344 */ public UnsafeRow apply(InternalRow i) { /* 7345 */ boolean isNull = false; /* 7346 */ /* 7347 */ value1 = 42; /* 7348 */ apply_0(i); /* 7349 */ apply_1(i); ... /* 7596 */ apply_248(i); /* 7597 */ apply_249(i); /* 7598 */ apply_250(i); /* 7599 */ apply_251(i); ... ```` ## How was this patch tested? Add a new test in `DataFrameSuite` Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com> Closes #15745 from kiszk/SPARK-18207.
Diffstat (limited to 'dev')
0 files changed, 0 insertions, 0 deletions