diff options
author | Nattavut Sutyanyong <nsy.can@gmail.com> | 2016-08-08 12:14:11 +0200 |
---|---|---|
committer | Herman van Hovell <hvanhovell@databricks.com> | 2016-08-08 12:14:11 +0200 |
commit | 06f5dc841517e7156f5f445655d97ba541ebbd7e (patch) | |
tree | 2cafb3241c4845131956ed0880cd29cd0dc95d56 /sql/catalyst/src | |
parent | e10ca8de49206087b336c6db0c40868fa271b989 (diff) | |
download | spark-06f5dc841517e7156f5f445655d97ba541ebbd7e.tar.gz spark-06f5dc841517e7156f5f445655d97ba541ebbd7e.tar.bz2 spark-06f5dc841517e7156f5f445655d97ba541ebbd7e.zip |
[SPARK-16804][SQL] Correlated subqueries containing non-deterministic operations return incorrect results
## What changes were proposed in this pull request?
This patch fixes the incorrect results in the rule ResolveSubquery in Catalyst's Analysis phase by returning an error message when the LIMIT is found in the path from the parent table to the correlated predicate in the subquery.
## How was this patch tested?
./dev/run-tests
a new unit test on the problematic pattern.
Author: Nattavut Sutyanyong <nsy.can@gmail.com>
Closes #14411 from nsyca/master.
Diffstat (limited to 'sql/catalyst/src')
-rw-r--r-- | sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 13 | ||||
-rw-r--r-- | sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala | 17 |
2 files changed, 30 insertions, 0 deletions
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 660f523698..25202b521a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1021,6 +1021,19 @@ class Analyzer( case e: Expand => failOnOuterReferenceInSubTree(e, "an EXPAND") e + case l : LocalLimit => + failOnOuterReferenceInSubTree(l, "a LIMIT") + l + // Since LIMIT <n> is represented as GlobalLimit(<n>, (LocalLimit (<n>, child)) + // and we are walking bottom up, we will fail on LocalLimit before + // reaching GlobalLimit. + // The code below is just a safety net. + case g : GlobalLimit => + failOnOuterReferenceInSubTree(g, "a LIMIT") + g + case s : Sample => + failOnOuterReferenceInSubTree(s, "a TABLESAMPLE") + s case p => failOnOuterReference(p) p diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 8363a1b1cd..13bf034f83 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -548,5 +548,22 @@ class AnalysisErrorSuite extends AnalysisTest { Exists(Union(LocalRelation(b), Filter(EqualTo(OuterReference(a), c), LocalRelation(c)))), LocalRelation(a)) assertAnalysisError(plan3, "Accessing outer query column is not allowed in" :: Nil) + + val plan4 = Filter( + Exists( + Limit(1, + Filter(EqualTo(OuterReference(a), b), LocalRelation(b))) + ), + LocalRelation(a)) + assertAnalysisError(plan4, "Accessing outer query column is not allowed in a LIMIT" :: Nil) + + val plan5 = Filter( + Exists( + Sample(0.0, 0.5, false, 1L, + Filter(EqualTo(OuterReference(a), b), LocalRelation(b)))().select('b) + ), + LocalRelation(a)) + assertAnalysisError(plan5, + "Accessing outer query column is not allowed in a TABLESAMPLE" :: Nil) } } |