diff options
author | Nattavut Sutyanyong <nsy.can@gmail.com> | 2017-01-25 17:04:36 +0100 |
---|---|---|
committer | Herman van Hovell <hvanhovell@databricks.com> | 2017-01-25 17:04:36 +0100 |
commit | f1ddca5fcc1e914b9efb8a634ea7c89407358ea6 (patch) | |
tree | b4eba1093824cd4d0a34d5b6bb946df9cfbed87d /sql/core/src/test/resources/sql-tests/results | |
parent | 0e821ec6fa98f4b0aa6e2eb6fecd18cc1ee6f3f2 (diff) | |
download | spark-f1ddca5fcc1e914b9efb8a634ea7c89407358ea6.tar.gz spark-f1ddca5fcc1e914b9efb8a634ea7c89407358ea6.tar.bz2 spark-f1ddca5fcc1e914b9efb8a634ea7c89407358ea6.zip |
[SPARK-18863][SQL] Output non-aggregate expressions without GROUP BY in a subquery does not yield an error
## What changes were proposed in this pull request?
This PR will report proper error messages when a subquery expression contain an invalid plan. This problem is fixed by calling CheckAnalysis for the plan inside a subquery.
## How was this patch tested?
Existing tests and two new test cases on 2 forms of subquery, namely, scalar subquery and in/exists subquery.
````
-- TC 01.01
-- The column t2b in the SELECT of the subquery is invalid
-- because it is neither an aggregate function nor a GROUP BY column.
select t1a, t2b
from t1, t2
where t1b = t2c
and t2b = (select max(avg)
from (select t2b, avg(t2b) avg
from t2
where t2a = t1.t1b
)
)
;
-- TC 01.02
-- Invalid due to the column t2b not part of the output from table t2.
select *
from t1
where t1a in (select min(t2a)
from t2
group by t2c
having t2c in (select max(t3c)
from t3
group by t3b
having t3b > t2b ))
;
````
Author: Nattavut Sutyanyong <nsy.can@gmail.com>
Closes #16572 from nsyca/18863.
Diffstat (limited to 'sql/core/src/test/resources/sql-tests/results')
-rw-r--r-- | sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out new file mode 100644 index 0000000000..50ae01e181 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out @@ -0,0 +1,66 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 5 + + +-- !query 0 +create temporary view t1 as select * from values + (1, 2, 3) +as t1(t1a, t1b, t1c) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +create temporary view t2 as select * from values + (1, 0, 1) +as t2(t2a, t2b, t2c) +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +create temporary view t3 as select * from values + (3, 1, 2) +as t3(t3a, t3b, t3c) +-- !query 2 schema +struct<> +-- !query 2 output + + + +-- !query 3 +select t1a, t2b +from t1, t2 +where t1b = t2c +and t2b = (select max(avg) + from (select t2b, avg(t2b) avg + from t2 + where t2a = t1.t1b + ) + ) +-- !query 3 schema +struct<> +-- !query 3 output +org.apache.spark.sql.AnalysisException +expression 't2.`t2b`' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.; + + +-- !query 4 +select * +from t1 +where t1a in (select min(t2a) + from t2 + group by t2c + having t2c in (select max(t3c) + from t3 + group by t3b + having t3b > t2b )) +-- !query 4 schema +struct<> +-- !query 4 output +org.apache.spark.sql.AnalysisException +resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter predicate-subquery#x [(t2c#x = max(t3c)#x) && (t3b#x > t2b#x)]; |