aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorKazuaki Ishizaki <ishizaki@jp.ibm.com>2016-03-28 10:35:48 -0700
committerDavies Liu <davies.liu@gmail.com>2016-03-28 10:35:48 -0700
commit4a7636f2da2121ee8c6fb7e6614820aaf3db8e0f (patch)
tree7efef508691c879b4b0fd6d8b835794d862f7e4f /sql
parente5a1b301fbe191f1a9627a1083d960c98f543d13 (diff)
downloadspark-4a7636f2da2121ee8c6fb7e6614820aaf3db8e0f.tar.gz
spark-4a7636f2da2121ee8c6fb7e6614820aaf3db8e0f.tar.bz2
spark-4a7636f2da2121ee8c6fb7e6614820aaf3db8e0f.zip
[SPARK-13844] [SQL] Generate better code for filters with a non-nullable column
## What changes were proposed in this pull request? This PR simplifies generated code with a non-nullable column. This PR addresses three items: 1. Generate simplified code for and / or 2. Generate better code for divide and remainder with non-zero dividend 3. Pass nullable information into BoundReference at WholeStageCodegen I have attached the generated code with and without this PR ## How was this patch tested? Tested by existing test suites in sql/core Here is a motivating example ```` (0 to 6).map(i => (i.toString, i.toInt)).toDF("k", "v") .filter("v % 2 == 0").filter("v <= 4").filter("v > 1").show() ```` Generated code without this PR ````java /* 032 */ protected void processNext() throws java.io.IOException { /* 033 */ /*** PRODUCE: Project [_1#0 AS k#3,_2#1 AS v#4] */ /* 034 */ /* 035 */ /*** PRODUCE: Filter ((isnotnull((_2#1 % 2)) && ((_2#1 % 2) = 0)) && ((_2#1 <= 4) && (_2#1 > 1))) */ /* 036 */ /* 037 */ /*** PRODUCE: INPUT */ /* 038 */ /* 039 */ while (!shouldStop() && inputadapter_input.hasNext()) { /* 040 */ InternalRow inputadapter_row = (InternalRow) inputadapter_input.next(); /* 041 */ /*** CONSUME: Filter ((isnotnull((_2#1 % 2)) && ((_2#1 % 2) = 0)) && ((_2#1 <= 4) && (_2#1 > 1))) */ /* 042 */ /* input[1, int] */ /* 043 */ int filter_value1 = inputadapter_row.getInt(1); /* 044 */ /* 045 */ /* isnotnull((input[1, int] % 2)) */ /* 046 */ /* (input[1, int] % 2) */ /* 047 */ boolean filter_isNull3 = false; /* 048 */ int filter_value3 = -1; /* 049 */ if (false || 2 == 0) { /* 050 */ filter_isNull3 = true; /* 051 */ } else { /* 052 */ if (false) { /* 053 */ filter_isNull3 = true; /* 054 */ } else { /* 055 */ filter_value3 = (int)(filter_value1 % 2); /* 056 */ } /* 057 */ } /* 058 */ if (!(!(filter_isNull3))) continue; /* 059 */ /* 060 */ /* ((input[1, int] % 2) = 0) */ /* 061 */ boolean filter_isNull6 = true; /* 062 */ boolean filter_value6 = false; /* 063 */ /* (input[1, int] % 2) */ /* 064 */ boolean filter_isNull7 = false; /* 065 */ int filter_value7 = -1; /* 066 */ if (false || 2 == 0) { /* 067 */ filter_isNull7 = true; /* 068 */ } else { /* 069 */ if (false) { /* 070 */ filter_isNull7 = true; /* 071 */ } else { /* 072 */ filter_value7 = (int)(filter_value1 % 2); /* 073 */ } /* 074 */ } /* 075 */ if (!filter_isNull7) { /* 076 */ filter_isNull6 = false; // resultCode could change nullability. /* 077 */ filter_value6 = filter_value7 == 0; /* 078 */ /* 079 */ } /* 080 */ if (filter_isNull6 || !filter_value6) continue; /* 081 */ /* 082 */ /* (input[1, int] <= 4) */ /* 083 */ boolean filter_value11 = false; /* 084 */ filter_value11 = filter_value1 <= 4; /* 085 */ if (!filter_value11) continue; /* 086 */ /* 087 */ /* (input[1, int] > 1) */ /* 088 */ boolean filter_value14 = false; /* 089 */ filter_value14 = filter_value1 > 1; /* 090 */ if (!filter_value14) continue; /* 091 */ /* 092 */ filter_metricValue.add(1); /* 093 */ /* 094 */ /*** CONSUME: Project [_1#0 AS k#3,_2#1 AS v#4] */ /* 095 */ /* 096 */ /* input[0, string] */ /* 097 */ /* input[0, string] */ /* 098 */ boolean filter_isNull = inputadapter_row.isNullAt(0); /* 099 */ UTF8String filter_value = filter_isNull ? null : (inputadapter_row.getUTF8String(0)); /* 100 */ project_holder.reset(); /* 101 */ /* 102 */ project_rowWriter.zeroOutNullBytes(); /* 103 */ /* 104 */ if (filter_isNull) { /* 105 */ project_rowWriter.setNullAt(0); /* 106 */ } else { /* 107 */ project_rowWriter.write(0, filter_value); /* 108 */ } /* 109 */ /* 110 */ project_rowWriter.write(1, filter_value1); /* 111 */ project_result.setTotalSize(project_holder.totalSize()); /* 112 */ append(project_result.copy()); /* 113 */ } /* 114 */ } /* 115 */ } ```` Generated code with this PR ````java /* 032 */ protected void processNext() throws java.io.IOException { /* 033 */ /*** PRODUCE: Project [_1#0 AS k#3,_2#1 AS v#4] */ /* 034 */ /* 035 */ /*** PRODUCE: Filter (((_2#1 % 2) = 0) && ((_2#1 <= 5) && (_2#1 > 1))) */ /* 036 */ /* 037 */ /*** PRODUCE: INPUT */ /* 038 */ /* 039 */ while (!shouldStop() && inputadapter_input.hasNext()) { /* 040 */ InternalRow inputadapter_row = (InternalRow) inputadapter_input.next(); /* 041 */ /*** CONSUME: Filter (((_2#1 % 2) = 0) && ((_2#1 <= 5) && (_2#1 > 1))) */ /* 042 */ /* input[1, int] */ /* 043 */ int filter_value1 = inputadapter_row.getInt(1); /* 044 */ /* 045 */ /* ((input[1, int] % 2) = 0) */ /* 046 */ /* (input[1, int] % 2) */ /* 047 */ int filter_value3 = (int)(filter_value1 % 2); /* 048 */ /* 049 */ boolean filter_value2 = false; /* 050 */ filter_value2 = filter_value3 == 0; /* 051 */ if (!filter_value2) continue; /* 052 */ /* 053 */ /* (input[1, int] <= 5) */ /* 054 */ boolean filter_value7 = false; /* 055 */ filter_value7 = filter_value1 <= 5; /* 056 */ if (!filter_value7) continue; /* 057 */ /* 058 */ /* (input[1, int] > 1) */ /* 059 */ boolean filter_value10 = false; /* 060 */ filter_value10 = filter_value1 > 1; /* 061 */ if (!filter_value10) continue; /* 062 */ /* 063 */ filter_metricValue.add(1); /* 064 */ /* 065 */ /*** CONSUME: Project [_1#0 AS k#3,_2#1 AS v#4] */ /* 066 */ /* 067 */ /* input[0, string] */ /* 068 */ /* input[0, string] */ /* 069 */ boolean filter_isNull = inputadapter_row.isNullAt(0); /* 070 */ UTF8String filter_value = filter_isNull ? null : (inputadapter_row.getUTF8String(0)); /* 071 */ project_holder.reset(); /* 072 */ /* 073 */ project_rowWriter.zeroOutNullBytes(); /* 074 */ /* 075 */ if (filter_isNull) { /* 076 */ project_rowWriter.setNullAt(0); /* 077 */ } else { /* 078 */ project_rowWriter.write(0, filter_value); /* 079 */ } /* 080 */ /* 081 */ project_rowWriter.write(1, filter_value1); /* 082 */ project_result.setTotalSize(project_holder.totalSize()); /* 083 */ append(project_result.copy()); /* 084 */ } /* 085 */ } /* 086 */ } ```` Author: Kazuaki Ishizaki <ishizaki@jp.ibm.com> Closes #11684 from kiszk/SPARK-13844.
Diffstat (limited to 'sql')
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala72
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala78
2 files changed, 102 insertions, 48 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index ed812e0679..1e9c971800 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -237,21 +237,35 @@ case class Divide(left: Expression, right: Expression) extends BinaryArithmetic
} else {
s"($javaType)(${eval1.value} $symbol ${eval2.value})"
}
- s"""
- ${eval2.code}
- boolean ${ev.isNull} = false;
- $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
- if (${eval2.isNull} || $isZero) {
- ${ev.isNull} = true;
- } else {
- ${eval1.code}
- if (${eval1.isNull}) {
+ if (!left.nullable && !right.nullable) {
+ s"""
+ ${eval2.code}
+ boolean ${ev.isNull} = false;
+ $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
+ if ($isZero) {
${ev.isNull} = true;
} else {
+ ${eval1.code}
${ev.value} = $divide;
}
- }
- """
+ """
+ } else {
+ s"""
+ ${eval2.code}
+ boolean ${ev.isNull} = false;
+ $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
+ if (${eval2.isNull} || $isZero) {
+ ${ev.isNull} = true;
+ } else {
+ ${eval1.code}
+ if (${eval1.isNull}) {
+ ${ev.isNull} = true;
+ } else {
+ ${ev.value} = $divide;
+ }
+ }
+ """
+ }
}
}
@@ -299,21 +313,35 @@ case class Remainder(left: Expression, right: Expression) extends BinaryArithmet
} else {
s"($javaType)(${eval1.value} $symbol ${eval2.value})"
}
- s"""
- ${eval2.code}
- boolean ${ev.isNull} = false;
- $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
- if (${eval2.isNull} || $isZero) {
- ${ev.isNull} = true;
- } else {
- ${eval1.code}
- if (${eval1.isNull}) {
+ if (!left.nullable && !right.nullable) {
+ s"""
+ ${eval2.code}
+ boolean ${ev.isNull} = false;
+ $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
+ if ($isZero) {
${ev.isNull} = true;
} else {
+ ${eval1.code}
${ev.value} = $remainder;
}
- }
- """
+ """
+ } else {
+ s"""
+ ${eval2.code}
+ boolean ${ev.isNull} = false;
+ $javaType ${ev.value} = ${ctx.defaultValue(javaType)};
+ if (${eval2.isNull} || $isZero) {
+ ${ev.isNull} = true;
+ } else {
+ ${eval1.code}
+ if (${eval1.isNull}) {
+ ${ev.isNull} = true;
+ } else {
+ ${ev.value} = $remainder;
+ }
+ }
+ """
+ }
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 20818bfb1a..e23ad5596b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -274,22 +274,35 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with
val eval2 = right.gen(ctx)
// The result should be `false`, if any of them is `false` whenever the other is null or not.
- s"""
- ${eval1.code}
- boolean ${ev.isNull} = false;
- boolean ${ev.value} = false;
+ if (!left.nullable && !right.nullable) {
+ ev.isNull = "false"
+ s"""
+ ${eval1.code}
+ boolean ${ev.value} = false;
- if (!${eval1.isNull} && !${eval1.value}) {
- } else {
- ${eval2.code}
- if (!${eval2.isNull} && !${eval2.value}) {
- } else if (!${eval1.isNull} && !${eval2.isNull}) {
- ${ev.value} = true;
+ if (${eval1.value}) {
+ ${eval2.code}
+ ${ev.value} = ${eval2.value};
+ }
+ """
+ } else {
+ s"""
+ ${eval1.code}
+ boolean ${ev.isNull} = false;
+ boolean ${ev.value} = false;
+
+ if (!${eval1.isNull} && !${eval1.value}) {
} else {
- ${ev.isNull} = true;
+ ${eval2.code}
+ if (!${eval2.isNull} && !${eval2.value}) {
+ } else if (!${eval1.isNull} && !${eval2.isNull}) {
+ ${ev.value} = true;
+ } else {
+ ${ev.isNull} = true;
+ }
}
- }
- """
+ """
+ }
}
}
@@ -325,22 +338,35 @@ case class Or(left: Expression, right: Expression) extends BinaryOperator with P
val eval2 = right.gen(ctx)
// The result should be `true`, if any of them is `true` whenever the other is null or not.
- s"""
- ${eval1.code}
- boolean ${ev.isNull} = false;
- boolean ${ev.value} = true;
+ if (!left.nullable && !right.nullable) {
+ ev.isNull = "false"
+ s"""
+ ${eval1.code}
+ boolean ${ev.value} = true;
- if (!${eval1.isNull} && ${eval1.value}) {
- } else {
- ${eval2.code}
- if (!${eval2.isNull} && ${eval2.value}) {
- } else if (!${eval1.isNull} && !${eval2.isNull}) {
- ${ev.value} = false;
+ if (!${eval1.value}) {
+ ${eval2.code}
+ ${ev.value} = ${eval2.value};
+ }
+ """
+ } else {
+ s"""
+ ${eval1.code}
+ boolean ${ev.isNull} = false;
+ boolean ${ev.value} = true;
+
+ if (!${eval1.isNull} && ${eval1.value}) {
} else {
- ${ev.isNull} = true;
+ ${eval2.code}
+ if (!${eval2.isNull} && ${eval2.value}) {
+ } else if (!${eval1.isNull} && !${eval2.isNull}) {
+ ${ev.value} = false;
+ } else {
+ ${ev.isNull} = true;
+ }
}
- }
- """
+ """
+ }
}
}