diff options
author | Herman van Hovell <hvanhovell@databricks.com> | 2016-06-10 14:29:05 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2016-06-10 14:29:05 -0700 |
commit | e05a2feebe928df691d5a8f42f22e088c6263dcf (patch) | |
tree | d4e31d19e13de1ef0047d0d4c6546c6be8e3ca90 | |
parent | 026eb90644be7685971dacaabae67a293edd0133 (diff) | |
download | spark-e05a2feebe928df691d5a8f42f22e088c6263dcf.tar.gz spark-e05a2feebe928df691d5a8f42f22e088c6263dcf.tar.bz2 spark-e05a2feebe928df691d5a8f42f22e088c6263dcf.zip |
[SPARK-15825] [SQL] Fix SMJ invalid results
## What changes were proposed in this pull request?
Code generated `SortMergeJoin` failed with wrong results when using structs as keys. This could (eventually) be traced back to the use of a wrong row reference when comparing structs.
## How was this patch tested?
TBD
Author: Herman van Hovell <hvanhovell@databricks.com>
Closes #13589 from hvanhovell/SPARK-15822.
2 files changed, 16 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 9657f26402..ca202928e8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -490,6 +490,7 @@ class CodegenContext { addNewFunction(compareFunc, funcCode) s"this.$compareFunc($c1, $c2)" case schema: StructType => + INPUT_ROW = "i" val comparisons = GenerateOrdering.genComparisons(this, schema) val compareFunc = freshName("compareStruct") val funcCode: String = diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala index 27f6abcd95..35dab63672 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala @@ -271,4 +271,19 @@ class InnerJoinSuite extends SparkPlanTest with SharedSQLContext { ) ) } + + { + def df: DataFrame = spark.range(3).selectExpr("struct(id, id) as key", "id as value") + lazy val left = df.selectExpr("key", "concat('L', value) as value").alias("left") + lazy val right = df.selectExpr("key", "concat('R', value) as value").alias("right") + testInnerJoin( + "SPARK-15822 - test structs as keys", + left, + right, + () => (left.col("key") === right.col("key")).expr, + Seq( + (Row(0, 0), "L0", Row(0, 0), "R0"), + (Row(1, 1), "L1", Row(1, 1), "R1"), + (Row(2, 2), "L2", Row(2, 2), "R2"))) + } } |