diff options
author | Kazuaki Ishizaki <ishizaki@jp.ibm.com> | 2016-03-28 10:35:48 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2016-03-28 10:35:48 -0700 |
commit | 4a7636f2da2121ee8c6fb7e6614820aaf3db8e0f (patch) | |
tree | 7efef508691c879b4b0fd6d8b835794d862f7e4f /sql | |
parent | e5a1b301fbe191f1a9627a1083d960c98f543d13 (diff) | |
download | spark-4a7636f2da2121ee8c6fb7e6614820aaf3db8e0f.tar.gz spark-4a7636f2da2121ee8c6fb7e6614820aaf3db8e0f.tar.bz2 spark-4a7636f2da2121ee8c6fb7e6614820aaf3db8e0f.zip |
[SPARK-13844] [SQL] Generate better code for filters with a non-nullable column
## What changes were proposed in this pull request?
This PR simplifies generated code with a non-nullable column. This PR addresses three items:
1. Generate simplified code for and / or
2. Generate better code for divide and remainder with non-zero dividend
3. Pass nullable information into BoundReference at WholeStageCodegen
I have attached the generated code with and without this PR
## How was this patch tested?
Tested by existing test suites in sql/core
Here is a motivating example
````
(0 to 6).map(i => (i.toString, i.toInt)).toDF("k", "v")
.filter("v % 2 == 0").filter("v <= 4").filter("v > 1").show()
````
Generated code without this PR
````java
/* 032 */ protected void processNext() throws java.io.IOException {
/* 033 */ /*** PRODUCE: Project [_1#0 AS k#3,_2#1 AS v#4] */
/* 034 */
/* 035 */ /*** PRODUCE: Filter ((isnotnull((_2#1 % 2)) && ((_2#1 % 2) = 0)) && ((_2#1 <= 4) && (_2#1 > 1))) */
/* 036 */
/* 037 */ /*** PRODUCE: INPUT */
/* 038 */
/* 039 */ while (!shouldStop() && inputadapter_input.hasNext()) {
/* 040 */ InternalRow inputadapter_row = (InternalRow) inputadapter_input.next();
/* 041 */ /*** CONSUME: Filter ((isnotnull((_2#1 % 2)) && ((_2#1 % 2) = 0)) && ((_2#1 <= 4) && (_2#1 > 1))) */
/* 042 */ /* input[1, int] */
/* 043 */ int filter_value1 = inputadapter_row.getInt(1);
/* 044 */
/* 045 */ /* isnotnull((input[1, int] % 2)) */
/* 046 */ /* (input[1, int] % 2) */
/* 047 */ boolean filter_isNull3 = false;
/* 048 */ int filter_value3 = -1;
/* 049 */ if (false || 2 == 0) {
/* 050 */ filter_isNull3 = true;
/* 051 */ } else {
/* 052 */ if (false) {
/* 053 */ filter_isNull3 = true;
/* 054 */ } else {
/* 055 */ filter_value3 = (int)(filter_value1 % 2);
/* 056 */ }
/* 057 */ }
/* 058 */ if (!(!(filter_isNull3))) continue;
/* 059 */
/* 060 */ /* ((input[1, int] % 2) = 0) */
/* 061 */ boolean filter_isNull6 = true;
/* 062 */ boolean filter_value6 = false;
/* 063 */ /* (input[1, int] % 2) */
/* 064 */ boolean filter_isNull7 = false;
/* 065 */ int filter_value7 = -1;
/* 066 */ if (false || 2 == 0) {
/* 067 */ filter_isNull7 = true;
/* 068 */ } else {
/* 069 */ if (false) {
/* 070 */ filter_isNull7 = true;
/* 071 */ } else {
/* 072 */ filter_value7 = (int)(filter_value1 % 2);
/* 073 */ }
/* 074 */ }
/* 075 */ if (!filter_isNull7) {
/* 076 */ filter_isNull6 = false; // resultCode could change nullability.
/* 077 */ filter_value6 = filter_value7 == 0;
/* 078 */
/* 079 */ }
/* 080 */ if (filter_isNull6 || !filter_value6) continue;
/* 081 */
/* 082 */ /* (input[1, int] <= 4) */
/* 083 */ boolean filter_value11 = false;
/* 084 */ filter_value11 = filter_value1 <= 4;
/* 085 */ if (!filter_value11) continue;
/* 086 */
/* 087 */ /* (input[1, int] > 1) */
/* 088 */ boolean filter_value14 = false;
/* 089 */ filter_value14 = filter_value1 > 1;
/* 090 */ if (!filter_value14) continue;
/* 091 */
/* 092 */ filter_metricValue.add(1);
/* 093 */
/* 094 */ /*** CONSUME: Project [_1#0 AS k#3,_2#1 AS v#4] */
/* 095 */
/* 096 */ /* input[0, string] */
/* 097 */ /* input[0, string] */
/* 098 */ boolean filter_isNull = inputadapter_row.isNullAt(0);
/* 099 */ UTF8String filter_value = filter_isNull ? null : (inputadapter_row.getUTF8String(0));
/* 100 */ project_holder.reset();
/* 101 */
/* 102 */ project_rowWriter.zeroOutNullBytes();
/* 103 */
/* 104 */ if (filter_isNull) {
/* 105 */ project_rowWriter.setNullAt(0);
/* 106 */ } else {
/* 107 */ project_rowWriter.write(0, filter_value);
/* 108 */ }
/* 109 */
/* 110 */ project_rowWriter.write(1, filter_value1);
/* 111 */ project_result.setTotalSize(project_holder.totalSize());
/* 112 */ append(project_result.copy());
/* 113 */ }
/* 114 */ }
/* 115 */ }
````
Generated code with this PR
````java
/* 032 */ protected void processNext() throws java.io.IOException {
/* 033 */ /*** PRODUCE: Project [_1#0 AS k#3,_2#1 AS v#4] */
/* 034 */
/* 035 */ /*** PRODUCE: Filter (((_2#1 % 2) = 0) && ((_2#1 <= 5) && (_2#1 > 1))) */
/* 036 */
/* 037 */ /*** PRODUCE: INPUT */
/* 038 */
/* 039 */ while (!shouldStop() && inputadapter_input.hasNext()) {
/* 040 */ InternalRow inputadapter_row = (InternalRow) inputadapter_input.next();
/* 041 */ /*** CONSUME: Filter (((_2#1 % 2) = 0) && ((_2#1 <= 5) && (_2#1 > 1))) */
/* 042 */ /* input[1, int] */
/* 043 */ int filter_value1 = inputadapter_row.getInt(1);
/* 044 */
/* 045 */ /* ((input[1, int] % 2) = 0) */
/* 046 */ /* (input[1, int] % 2) */
/* 047 */ int filter_value3 = (int)(filter_value1 % 2);
/* 048 */
/* 049 */ boolean filter_value2 = false;
/* 050 */ filter_value2 = filter_value3 == 0;
/* 051 */ if (!filter_value2) continue;
/* 052 */
/* 053 */ /* (input[1, int] <= 5) */
/* 054 */ boolean filter_value7 = false;
/* 055 */ filter_value7 = filter_value1 <= 5;
/* 056 */ if (!filter_value7) continue;
/* 057 */
/* 058 */ /* (input[1, int] > 1) */
/* 059 */ boolean filter_value10 = false;
/* 060 */ filter_value10 = filter_value1 > 1;
/* 061 */ if (!filter_value10) continue;
/* 062 */
/* 063 */ filter_metricValue.add(1);
/* 064 */
/* 065 */ /*** CONSUME: Project [_1#0 AS k#3,_2#1 AS v#4] */
/* 066 */
/* 067 */ /* input[0, string] */
/* 068 */ /* input[0, string] */
/* 069 */ boolean filter_isNull = inputadapter_row.isNullAt(0);
/* 070 */ UTF8String filter_value = filter_isNull ? null : (inputadapter_row.getUTF8String(0));
/* 071 */ project_holder.reset();
/* 072 */
/* 073 */ project_rowWriter.zeroOutNullBytes();
/* 074 */
/* 075 */ if (filter_isNull) {
/* 076 */ project_rowWriter.setNullAt(0);
/* 077 */ } else {
/* 078 */ project_rowWriter.write(0, filter_value);
/* 079 */ }
/* 080 */
/* 081 */ project_rowWriter.write(1, filter_value1);
/* 082 */ project_result.setTotalSize(project_holder.totalSize());
/* 083 */ append(project_result.copy());
/* 084 */ }
/* 085 */ }
/* 086 */ }
````
Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com>
Closes #11684 from kiszk/SPARK-13844.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala | 72 | ||||
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala | 78 |
2 files changed, 102 insertions, 48 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index ed812e0679..1e9c971800 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -237,21 +237,35 @@ case class Divide(left: Expression, right: Expression) extends BinaryArithmetic } else { s"($javaType)(${eval1.value} $symbol ${eval2.value})" } - s""" - ${eval2.code} - boolean ${ev.isNull} = false; - $javaType ${ev.value} = ${ctx.defaultValue(javaType)}; - if (${eval2.isNull} || $isZero) { - ${ev.isNull} = true; - } else { - ${eval1.code} - if (${eval1.isNull}) { + if (!left.nullable && !right.nullable) { + s""" + ${eval2.code} + boolean ${ev.isNull} = false; + $javaType ${ev.value} = ${ctx.defaultValue(javaType)}; + if ($isZero) { ${ev.isNull} = true; } else { + ${eval1.code} ${ev.value} = $divide; } - } - """ + """ + } else { + s""" + ${eval2.code} + boolean ${ev.isNull} = false; + $javaType ${ev.value} = ${ctx.defaultValue(javaType)}; + if (${eval2.isNull} || $isZero) { + ${ev.isNull} = true; + } else { + ${eval1.code} + if (${eval1.isNull}) { + ${ev.isNull} = true; + } else { + ${ev.value} = $divide; + } + } + """ + } } } @@ -299,21 +313,35 @@ case class Remainder(left: Expression, right: Expression) extends BinaryArithmet } else { s"($javaType)(${eval1.value} $symbol ${eval2.value})" } - s""" - ${eval2.code} - boolean ${ev.isNull} = false; - $javaType ${ev.value} = ${ctx.defaultValue(javaType)}; - if (${eval2.isNull} || $isZero) { - ${ev.isNull} = true; - } else { - ${eval1.code} - if (${eval1.isNull}) { + if (!left.nullable && !right.nullable) { + s""" + ${eval2.code} + boolean ${ev.isNull} = false; + $javaType ${ev.value} = ${ctx.defaultValue(javaType)}; + if ($isZero) { ${ev.isNull} = true; } else { + ${eval1.code} ${ev.value} = $remainder; } - } - """ + """ + } else { + s""" + ${eval2.code} + boolean ${ev.isNull} = false; + $javaType ${ev.value} = ${ctx.defaultValue(javaType)}; + if (${eval2.isNull} || $isZero) { + ${ev.isNull} = true; + } else { + ${eval1.code} + if (${eval1.isNull}) { + ${ev.isNull} = true; + } else { + ${ev.value} = $remainder; + } + } + """ + } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 20818bfb1a..e23ad5596b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -274,22 +274,35 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with val eval2 = right.gen(ctx) // The result should be `false`, if any of them is `false` whenever the other is null or not. - s""" - ${eval1.code} - boolean ${ev.isNull} = false; - boolean ${ev.value} = false; + if (!left.nullable && !right.nullable) { + ev.isNull = "false" + s""" + ${eval1.code} + boolean ${ev.value} = false; - if (!${eval1.isNull} && !${eval1.value}) { - } else { - ${eval2.code} - if (!${eval2.isNull} && !${eval2.value}) { - } else if (!${eval1.isNull} && !${eval2.isNull}) { - ${ev.value} = true; + if (${eval1.value}) { + ${eval2.code} + ${ev.value} = ${eval2.value}; + } + """ + } else { + s""" + ${eval1.code} + boolean ${ev.isNull} = false; + boolean ${ev.value} = false; + + if (!${eval1.isNull} && !${eval1.value}) { } else { - ${ev.isNull} = true; + ${eval2.code} + if (!${eval2.isNull} && !${eval2.value}) { + } else if (!${eval1.isNull} && !${eval2.isNull}) { + ${ev.value} = true; + } else { + ${ev.isNull} = true; + } } - } - """ + """ + } } } @@ -325,22 +338,35 @@ case class Or(left: Expression, right: Expression) extends BinaryOperator with P val eval2 = right.gen(ctx) // The result should be `true`, if any of them is `true` whenever the other is null or not. - s""" - ${eval1.code} - boolean ${ev.isNull} = false; - boolean ${ev.value} = true; + if (!left.nullable && !right.nullable) { + ev.isNull = "false" + s""" + ${eval1.code} + boolean ${ev.value} = true; - if (!${eval1.isNull} && ${eval1.value}) { - } else { - ${eval2.code} - if (!${eval2.isNull} && ${eval2.value}) { - } else if (!${eval1.isNull} && !${eval2.isNull}) { - ${ev.value} = false; + if (!${eval1.value}) { + ${eval2.code} + ${ev.value} = ${eval2.value}; + } + """ + } else { + s""" + ${eval1.code} + boolean ${ev.isNull} = false; + boolean ${ev.value} = true; + + if (!${eval1.isNull} && ${eval1.value}) { } else { - ${ev.isNull} = true; + ${eval2.code} + if (!${eval2.isNull} && ${eval2.value}) { + } else if (!${eval1.isNull} && !${eval2.isNull}) { + ${ev.value} = false; + } else { + ${ev.isNull} = true; + } } - } - """ + """ + } } } |