diff options
author | Kazuaki Ishizaki <ishizaki@jp.ibm.com> | 2016-11-08 12:01:54 +0100 |
---|---|---|
committer | Herman van Hovell <hvanhovell@databricks.com> | 2016-11-08 12:01:54 +0100 |
commit | 47731e1865fa1e3a8881a1f4420017bdc026e455 (patch) | |
tree | e2c121e3bc62f5740ce85e5ed995384cd02e51c3 /R/log4j.properties | |
parent | 6f3697136aa68dc39d3ce42f43a7af554d2a3bf9 (diff) | |
download | spark-47731e1865fa1e3a8881a1f4420017bdc026e455.tar.gz spark-47731e1865fa1e3a8881a1f4420017bdc026e455.tar.bz2 spark-47731e1865fa1e3a8881a1f4420017bdc026e455.zip |
[SPARK-18207][SQL] Fix a compilation error due to HashExpression.doGenCode
## What changes were proposed in this pull request?
This PR avoids a compilation error due to more than 64KB Java byte code size. This error occur since generate java code for computing a hash value for a row is too big. This PR fixes this compilation error by splitting a big code chunk into multiple methods by calling `CodegenContext.splitExpression` at `HashExpression.doGenCode`
The test case requires a calculation of hash code for a row that includes 1000 String fields. `HashExpression.doGenCode` generate a lot of Java code for this computation into one function. As a result, the size of the corresponding Java bytecode is more than 64 KB.
Generated code without this PR
````java
/* 027 */ public UnsafeRow apply(InternalRow i) {
/* 028 */ boolean isNull = false;
/* 029 */
/* 030 */ int value1 = 42;
/* 031 */
/* 032 */ boolean isNull2 = i.isNullAt(0);
/* 033 */ UTF8String value2 = isNull2 ? null : (i.getUTF8String(0));
/* 034 */ if (!isNull2) {
/* 035 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value2.getBaseObject(), value2.getBaseOffset(), value2.numBytes(), value1);
/* 036 */ }
/* 037 */
/* 038 */
/* 039 */ boolean isNull3 = i.isNullAt(1);
/* 040 */ UTF8String value3 = isNull3 ? null : (i.getUTF8String(1));
/* 041 */ if (!isNull3) {
/* 042 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value3.getBaseObject(), value3.getBaseOffset(), value3.numBytes(), value1);
/* 043 */ }
/* 044 */
/* 045 */
...
/* 7024 */
/* 7025 */ boolean isNull1001 = i.isNullAt(999);
/* 7026 */ UTF8String value1001 = isNull1001 ? null : (i.getUTF8String(999));
/* 7027 */ if (!isNull1001) {
/* 7028 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1001.getBaseObject(), value1001.getBaseOffset(), value1001.numBytes(), value1);
/* 7029 */ }
/* 7030 */
/* 7031 */
/* 7032 */ boolean isNull1002 = i.isNullAt(1000);
/* 7033 */ UTF8String value1002 = isNull1002 ? null : (i.getUTF8String(1000));
/* 7034 */ if (!isNull1002) {
/* 7035 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1002.getBaseObject(), value1002.getBaseOffset(), value1002.numBytes(), value1);
/* 7036 */ }
````
Generated code with this PR
````java
/* 3807 */ private void apply_249(InternalRow i) {
/* 3808 */
/* 3809 */ boolean isNull998 = i.isNullAt(996);
/* 3810 */ UTF8String value998 = isNull998 ? null : (i.getUTF8String(996));
/* 3811 */ if (!isNull998) {
/* 3812 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value998.getBaseObject(), value998.getBaseOffset(), value998.numBytes(), value1);
/* 3813 */ }
/* 3814 */
/* 3815 */ boolean isNull999 = i.isNullAt(997);
/* 3816 */ UTF8String value999 = isNull999 ? null : (i.getUTF8String(997));
/* 3817 */ if (!isNull999) {
/* 3818 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value999.getBaseObject(), value999.getBaseOffset(), value999.numBytes(), value1);
/* 3819 */ }
/* 3820 */
/* 3821 */ boolean isNull1000 = i.isNullAt(998);
/* 3822 */ UTF8String value1000 = isNull1000 ? null : (i.getUTF8String(998));
/* 3823 */ if (!isNull1000) {
/* 3824 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1000.getBaseObject(), value1000.getBaseOffset(), value1000.numBytes(), value1);
/* 3825 */ }
/* 3826 */
/* 3827 */ boolean isNull1001 = i.isNullAt(999);
/* 3828 */ UTF8String value1001 = isNull1001 ? null : (i.getUTF8String(999));
/* 3829 */ if (!isNull1001) {
/* 3830 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value1001.getBaseObject(), value1001.getBaseOffset(), value1001.numBytes(), value1);
/* 3831 */ }
/* 3832 */
/* 3833 */ }
/* 3834 */
...
/* 4532 */ private void apply_0(InternalRow i) {
/* 4533 */
/* 4534 */ boolean isNull2 = i.isNullAt(0);
/* 4535 */ UTF8String value2 = isNull2 ? null : (i.getUTF8String(0));
/* 4536 */ if (!isNull2) {
/* 4537 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value2.getBaseObject(), value2.getBaseOffset(), value2.numBytes(), value1);
/* 4538 */ }
/* 4539 */
/* 4540 */ boolean isNull3 = i.isNullAt(1);
/* 4541 */ UTF8String value3 = isNull3 ? null : (i.getUTF8String(1));
/* 4542 */ if (!isNull3) {
/* 4543 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value3.getBaseObject(), value3.getBaseOffset(), value3.numBytes(), value1);
/* 4544 */ }
/* 4545 */
/* 4546 */ boolean isNull4 = i.isNullAt(2);
/* 4547 */ UTF8String value4 = isNull4 ? null : (i.getUTF8String(2));
/* 4548 */ if (!isNull4) {
/* 4549 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value4.getBaseObject(), value4.getBaseOffset(), value4.numBytes(), value1);
/* 4550 */ }
/* 4551 */
/* 4552 */ boolean isNull5 = i.isNullAt(3);
/* 4553 */ UTF8String value5 = isNull5 ? null : (i.getUTF8String(3));
/* 4554 */ if (!isNull5) {
/* 4555 */ value1 = org.apache.spark.unsafe.hash.Murmur3_x86_32.hashUnsafeBytes(value5.getBaseObject(), value5.getBaseOffset(), value5.numBytes(), value1);
/* 4556 */ }
/* 4557 */
/* 4558 */ }
...
/* 7344 */ public UnsafeRow apply(InternalRow i) {
/* 7345 */ boolean isNull = false;
/* 7346 */
/* 7347 */ value1 = 42;
/* 7348 */ apply_0(i);
/* 7349 */ apply_1(i);
...
/* 7596 */ apply_248(i);
/* 7597 */ apply_249(i);
/* 7598 */ apply_250(i);
/* 7599 */ apply_251(i);
...
````
## How was this patch tested?
Add a new test in `DataFrameSuite`
Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Closes #15745 from kiszk/SPARK-18207.
Diffstat (limited to 'R/log4j.properties')
0 files changed, 0 insertions, 0 deletions