diff options
author | Dilip Biswal <dbiswal@us.ibm.com> | 2017-01-29 12:51:59 -0800 |
---|---|---|
committer | gatorsmile <gatorsmile@gmail.com> | 2017-01-29 12:51:59 -0800 |
commit | e2e7b12ce8fdf9d0bf0b7fce9283018c7d805988 (patch) | |
tree | 8d59b62afbaa821de8402b1fb52ffbd1b45f0666 | |
parent | f7c07db852f22d694ca49792e4ceae04d45b71ef (diff) | |
download | spark-e2e7b12ce8fdf9d0bf0b7fce9283018c7d805988.tar.gz spark-e2e7b12ce8fdf9d0bf0b7fce9283018c7d805988.tar.bz2 spark-e2e7b12ce8fdf9d0bf0b7fce9283018c7d805988.zip |
[SPARK-18872][SQL][TESTS] New test cases for EXISTS subquery
## What changes were proposed in this pull request?
This PR adds the first set of tests for EXISTS subquery.
File name | Brief description
------------------------| -----------------
exists-basic.sql |Tests EXISTS and NOT EXISTS subqueries with both correlated and local predicates.
exists-within-and-or.sql|Tests EXISTS and NOT EXISTS subqueries embedded in AND or OR expression.
DB2 results are attached here as reference :
[exists-basic-db2.txt](https://github.com/apache/spark/files/733031/exists-basic-db2.txt)
[exists-and-or-db2.txt](https://github.com/apache/spark/files/733030/exists-and-or-db2.txt)
## How was this patch tested?
This patch is adding tests.
Author: Dilip Biswal <dbiswal@us.ibm.com>
Closes #16710 from dilipbiswal/exist-basic.
4 files changed, 589 insertions, 0 deletions
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql new file mode 100644 index 0000000000..332e858800 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-basic.sql @@ -0,0 +1,123 @@ +-- Tests EXISTS subquery support. Tests basic form +-- of EXISTS subquery (both EXISTS and NOT EXISTS) + +CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES + (100, "emp 1", date "2005-01-01", 100.00D, 10), + (100, "emp 1", date "2005-01-01", 100.00D, 10), + (200, "emp 2", date "2003-01-01", 200.00D, 10), + (300, "emp 3", date "2002-01-01", 300.00D, 20), + (400, "emp 4", date "2005-01-01", 400.00D, 30), + (500, "emp 5", date "2001-01-01", 400.00D, NULL), + (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), + (700, "emp 7", date "2010-01-01", 400.00D, 100), + (800, "emp 8", date "2016-01-01", 150.00D, 70) +AS EMP(id, emp_name, hiredate, salary, dept_id); + +CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES + (10, "dept 1", "CA"), + (20, "dept 2", "NY"), + (30, "dept 3", "TX"), + (40, "dept 4 - unassigned", "OR"), + (50, "dept 5 - unassigned", "NJ"), + (70, "dept 7", "FL") +AS DEPT(dept_id, dept_name, state); + +CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES + ("emp 1", 10.00D), + ("emp 1", 20.00D), + ("emp 2", 300.00D), + ("emp 2", 100.00D), + ("emp 3", 300.00D), + ("emp 4", 100.00D), + ("emp 5", 1000.00D), + ("emp 6 - no dept", 500.00D) +AS BONUS(emp_name, bonus_amt); + +-- uncorrelated exist query +-- TC.01.01 +SELECT * +FROM emp +WHERE EXISTS (SELECT 1 + FROM dept + WHERE dept.dept_id > 10 + AND dept.dept_id < 30); + +-- simple correlated predicate in exist subquery +-- TC.01.02 +SELECT * +FROM emp +WHERE EXISTS (SELECT dept.dept_name + FROM dept + WHERE emp.dept_id = dept.dept_id); + +-- correlated outer isnull predicate +-- TC.01.03 +SELECT * +FROM emp +WHERE EXISTS (SELECT dept.dept_name + FROM dept + WHERE emp.dept_id = dept.dept_id + OR emp.dept_id IS NULL); + +-- Simple correlation with a local predicate in outer query +-- TC.01.04 +SELECT * +FROM emp +WHERE EXISTS (SELECT dept.dept_name + FROM dept + WHERE emp.dept_id = dept.dept_id) + AND emp.id > 200; + +-- Outer references (emp.id) should not be pruned from outer plan +-- TC.01.05 +SELECT emp.emp_name +FROM emp +WHERE EXISTS (SELECT dept.state + FROM dept + WHERE emp.dept_id = dept.dept_id) + AND emp.id > 200; + +-- not exists with correlated predicate +-- TC.01.06 +SELECT * +FROM dept +WHERE NOT EXISTS (SELECT emp_name + FROM emp + WHERE emp.dept_id = dept.dept_id); + +-- not exists with correlated predicate + local predicate +-- TC.01.07 +SELECT * +FROM dept +WHERE NOT EXISTS (SELECT emp_name + FROM emp + WHERE emp.dept_id = dept.dept_id + OR state = 'NJ'); + +-- not exist both equal and greaterthan predicate +-- TC.01.08 +SELECT * +FROM bonus +WHERE NOT EXISTS (SELECT * + FROM emp + WHERE emp.emp_name = emp_name + AND bonus_amt > emp.salary); + +-- select employees who have not received any bonus +-- TC 01.09 +SELECT emp.* +FROM emp +WHERE NOT EXISTS (SELECT NULL + FROM bonus + WHERE bonus.emp_name = emp.emp_name); + +-- Nested exists +-- TC.01.10 +SELECT * +FROM bonus +WHERE EXISTS (SELECT emp_name + FROM emp + WHERE bonus.emp_name = emp.emp_name + AND EXISTS (SELECT state + FROM dept + WHERE dept.dept_id = emp.dept_id)); diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql new file mode 100644 index 0000000000..7743b5241d --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-within-and-or.sql @@ -0,0 +1,96 @@ +-- Tests EXISTS subquery support. Tests EXISTS +-- subquery within a AND or OR expression. + +CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES + (100, "emp 1", date "2005-01-01", 100.00D, 10), + (100, "emp 1", date "2005-01-01", 100.00D, 10), + (200, "emp 2", date "2003-01-01", 200.00D, 10), + (300, "emp 3", date "2002-01-01", 300.00D, 20), + (400, "emp 4", date "2005-01-01", 400.00D, 30), + (500, "emp 5", date "2001-01-01", 400.00D, NULL), + (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), + (700, "emp 7", date "2010-01-01", 400.00D, 100), + (800, "emp 8", date "2016-01-01", 150.00D, 70) +AS EMP(id, emp_name, hiredate, salary, dept_id); + +CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES + (10, "dept 1", "CA"), + (20, "dept 2", "NY"), + (30, "dept 3", "TX"), + (40, "dept 4 - unassigned", "OR"), + (50, "dept 5 - unassigned", "NJ"), + (70, "dept 7", "FL") +AS DEPT(dept_id, dept_name, state); + +CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES + ("emp 1", 10.00D), + ("emp 1", 20.00D), + ("emp 2", 300.00D), + ("emp 2", 100.00D), + ("emp 3", 300.00D), + ("emp 4", 100.00D), + ("emp 5", 1000.00D), + ("emp 6 - no dept", 500.00D) +AS BONUS(emp_name, bonus_amt); + + +-- Or used in conjunction with exists - ExistenceJoin +-- TC.02.01 +SELECT emp.emp_name +FROM emp +WHERE EXISTS (SELECT dept.state + FROM dept + WHERE emp.dept_id = dept.dept_id) + OR emp.id > 200; + +-- all records from emp including the null dept_id +-- TC.02.02 +SELECT * +FROM emp +WHERE EXISTS (SELECT dept.dept_name + FROM dept + WHERE emp.dept_id = dept.dept_id) + OR emp.dept_id IS NULL; + +-- EXISTS subquery in both LHS and RHS of OR. +-- TC.02.03 +SELECT emp.emp_name +FROM emp +WHERE EXISTS (SELECT dept.state + FROM dept + WHERE emp.dept_id = dept.dept_id + AND dept.dept_id = 20) + OR EXISTS (SELECT dept.state + FROM dept + WHERE emp.dept_id = dept.dept_id + AND dept.dept_id = 30); +; + +-- not exists and exists predicate within OR +-- TC.02.04 +SELECT * +FROM bonus +WHERE ( NOT EXISTS (SELECT * + FROM emp + WHERE emp.emp_name = emp_name + AND bonus_amt > emp.salary) + OR EXISTS (SELECT * + FROM emp + WHERE emp.emp_name = emp_name + OR bonus_amt < emp.salary) ); + +-- not exists and in predicate within AND +-- TC.02.05 +SELECT * FROM bonus WHERE NOT EXISTS +( + SELECT * + FROM emp + WHERE emp.emp_name = emp_name + AND bonus_amt > emp.salary) +AND +emp_name IN +( + SELECT emp_name + FROM emp + WHERE bonus_amt < emp.salary); + diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-basic.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-basic.sql.out new file mode 100644 index 0000000000..900e4d573b --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-basic.sql.out @@ -0,0 +1,214 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 13 + + +-- !query 0 +CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES + (100, "emp 1", date "2005-01-01", 100.00D, 10), + (100, "emp 1", date "2005-01-01", 100.00D, 10), + (200, "emp 2", date "2003-01-01", 200.00D, 10), + (300, "emp 3", date "2002-01-01", 300.00D, 20), + (400, "emp 4", date "2005-01-01", 400.00D, 30), + (500, "emp 5", date "2001-01-01", 400.00D, NULL), + (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), + (700, "emp 7", date "2010-01-01", 400.00D, 100), + (800, "emp 8", date "2016-01-01", 150.00D, 70) +AS EMP(id, emp_name, hiredate, salary, dept_id) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES + (10, "dept 1", "CA"), + (20, "dept 2", "NY"), + (30, "dept 3", "TX"), + (40, "dept 4 - unassigned", "OR"), + (50, "dept 5 - unassigned", "NJ"), + (70, "dept 7", "FL") +AS DEPT(dept_id, dept_name, state) +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES + ("emp 1", 10.00D), + ("emp 1", 20.00D), + ("emp 2", 300.00D), + ("emp 2", 100.00D), + ("emp 3", 300.00D), + ("emp 4", 100.00D), + ("emp 5", 1000.00D), + ("emp 6 - no dept", 500.00D) +AS BONUS(emp_name, bonus_amt) +-- !query 2 schema +struct<> +-- !query 2 output + + + +-- !query 3 +SELECT * +FROM emp +WHERE EXISTS (SELECT 1 + FROM dept + WHERE dept.dept_id > 10 + AND dept.dept_id < 30) +-- !query 3 schema +struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int> +-- !query 3 output +100 emp 1 2005-01-01 100.0 10 +100 emp 1 2005-01-01 100.0 10 +200 emp 2 2003-01-01 200.0 10 +300 emp 3 2002-01-01 300.0 20 +400 emp 4 2005-01-01 400.0 30 +500 emp 5 2001-01-01 400.0 NULL +600 emp 6 - no dept 2001-01-01 400.0 100 +700 emp 7 2010-01-01 400.0 100 +800 emp 8 2016-01-01 150.0 70 + + +-- !query 4 +SELECT * +FROM emp +WHERE EXISTS (SELECT dept.dept_name + FROM dept + WHERE emp.dept_id = dept.dept_id) +-- !query 4 schema +struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int> +-- !query 4 output +100 emp 1 2005-01-01 100.0 10 +100 emp 1 2005-01-01 100.0 10 +200 emp 2 2003-01-01 200.0 10 +300 emp 3 2002-01-01 300.0 20 +400 emp 4 2005-01-01 400.0 30 +800 emp 8 2016-01-01 150.0 70 + + +-- !query 5 +SELECT * +FROM emp +WHERE EXISTS (SELECT dept.dept_name + FROM dept + WHERE emp.dept_id = dept.dept_id + OR emp.dept_id IS NULL) +-- !query 5 schema +struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int> +-- !query 5 output +100 emp 1 2005-01-01 100.0 10 +100 emp 1 2005-01-01 100.0 10 +200 emp 2 2003-01-01 200.0 10 +300 emp 3 2002-01-01 300.0 20 +400 emp 4 2005-01-01 400.0 30 +500 emp 5 2001-01-01 400.0 NULL +800 emp 8 2016-01-01 150.0 70 + + +-- !query 6 +SELECT * +FROM emp +WHERE EXISTS (SELECT dept.dept_name + FROM dept + WHERE emp.dept_id = dept.dept_id) + AND emp.id > 200 +-- !query 6 schema +struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int> +-- !query 6 output +300 emp 3 2002-01-01 300.0 20 +400 emp 4 2005-01-01 400.0 30 +800 emp 8 2016-01-01 150.0 70 + + +-- !query 7 +SELECT emp.emp_name +FROM emp +WHERE EXISTS (SELECT dept.state + FROM dept + WHERE emp.dept_id = dept.dept_id) + AND emp.id > 200 +-- !query 7 schema +struct<emp_name:string> +-- !query 7 output +emp 3 +emp 4 +emp 8 + + +-- !query 8 +SELECT * +FROM dept +WHERE NOT EXISTS (SELECT emp_name + FROM emp + WHERE emp.dept_id = dept.dept_id) +-- !query 8 schema +struct<dept_id:int,dept_name:string,state:string> +-- !query 8 output +40 dept 4 - unassigned OR +50 dept 5 - unassigned NJ + + +-- !query 9 +SELECT * +FROM dept +WHERE NOT EXISTS (SELECT emp_name + FROM emp + WHERE emp.dept_id = dept.dept_id + OR state = 'NJ') +-- !query 9 schema +struct<dept_id:int,dept_name:string,state:string> +-- !query 9 output +40 dept 4 - unassigned OR + + +-- !query 10 +SELECT * +FROM bonus +WHERE NOT EXISTS (SELECT * + FROM emp + WHERE emp.emp_name = emp_name + AND bonus_amt > emp.salary) +-- !query 10 schema +struct<emp_name:string,bonus_amt:double> +-- !query 10 output +emp 1 10.0 +emp 1 20.0 +emp 2 100.0 +emp 4 100.0 + + +-- !query 11 +SELECT emp.* +FROM emp +WHERE NOT EXISTS (SELECT NULL + FROM bonus + WHERE bonus.emp_name = emp.emp_name) +-- !query 11 schema +struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int> +-- !query 11 output +700 emp 7 2010-01-01 400.0 100 +800 emp 8 2016-01-01 150.0 70 + + +-- !query 12 +SELECT * +FROM bonus +WHERE EXISTS (SELECT emp_name + FROM emp + WHERE bonus.emp_name = emp.emp_name + AND EXISTS (SELECT state + FROM dept + WHERE dept.dept_id = emp.dept_id)) +-- !query 12 schema +struct<emp_name:string,bonus_amt:double> +-- !query 12 output +emp 1 10.0 +emp 1 20.0 +emp 2 100.0 +emp 2 300.0 +emp 3 300.0 +emp 4 100.0 diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-within-and-or.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-within-and-or.sql.out new file mode 100644 index 0000000000..865e4ed14e --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-within-and-or.sql.out @@ -0,0 +1,156 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 8 + + +-- !query 0 +CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES + (100, "emp 1", date "2005-01-01", 100.00D, 10), + (100, "emp 1", date "2005-01-01", 100.00D, 10), + (200, "emp 2", date "2003-01-01", 200.00D, 10), + (300, "emp 3", date "2002-01-01", 300.00D, 20), + (400, "emp 4", date "2005-01-01", 400.00D, 30), + (500, "emp 5", date "2001-01-01", 400.00D, NULL), + (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100), + (700, "emp 7", date "2010-01-01", 400.00D, 100), + (800, "emp 8", date "2016-01-01", 150.00D, 70) +AS EMP(id, emp_name, hiredate, salary, dept_id) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES + (10, "dept 1", "CA"), + (20, "dept 2", "NY"), + (30, "dept 3", "TX"), + (40, "dept 4 - unassigned", "OR"), + (50, "dept 5 - unassigned", "NJ"), + (70, "dept 7", "FL") +AS DEPT(dept_id, dept_name, state) +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES + ("emp 1", 10.00D), + ("emp 1", 20.00D), + ("emp 2", 300.00D), + ("emp 2", 100.00D), + ("emp 3", 300.00D), + ("emp 4", 100.00D), + ("emp 5", 1000.00D), + ("emp 6 - no dept", 500.00D) +AS BONUS(emp_name, bonus_amt) +-- !query 2 schema +struct<> +-- !query 2 output + + + +-- !query 3 +SELECT emp.emp_name +FROM emp +WHERE EXISTS (SELECT dept.state + FROM dept + WHERE emp.dept_id = dept.dept_id) + OR emp.id > 200 +-- !query 3 schema +struct<emp_name:string> +-- !query 3 output +emp 1 +emp 1 +emp 2 +emp 3 +emp 4 +emp 5 +emp 6 - no dept +emp 7 +emp 8 + + +-- !query 4 +SELECT * +FROM emp +WHERE EXISTS (SELECT dept.dept_name + FROM dept + WHERE emp.dept_id = dept.dept_id) + OR emp.dept_id IS NULL +-- !query 4 schema +struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int> +-- !query 4 output +100 emp 1 2005-01-01 100.0 10 +100 emp 1 2005-01-01 100.0 10 +200 emp 2 2003-01-01 200.0 10 +300 emp 3 2002-01-01 300.0 20 +400 emp 4 2005-01-01 400.0 30 +500 emp 5 2001-01-01 400.0 NULL +800 emp 8 2016-01-01 150.0 70 + + +-- !query 5 +SELECT emp.emp_name +FROM emp +WHERE EXISTS (SELECT dept.state + FROM dept + WHERE emp.dept_id = dept.dept_id + AND dept.dept_id = 20) + OR EXISTS (SELECT dept.state + FROM dept + WHERE emp.dept_id = dept.dept_id + AND dept.dept_id = 30) +-- !query 5 schema +struct<emp_name:string> +-- !query 5 output +emp 3 +emp 4 + + +-- !query 6 +SELECT * +FROM bonus +WHERE ( NOT EXISTS (SELECT * + FROM emp + WHERE emp.emp_name = emp_name + AND bonus_amt > emp.salary) + OR EXISTS (SELECT * + FROM emp + WHERE emp.emp_name = emp_name + OR bonus_amt < emp.salary) ) +-- !query 6 schema +struct<emp_name:string,bonus_amt:double> +-- !query 6 output +emp 1 10.0 +emp 1 20.0 +emp 2 100.0 +emp 2 300.0 +emp 3 300.0 +emp 4 100.0 +emp 5 1000.0 +emp 6 - no dept 500.0 + + +-- !query 7 +SELECT * FROM bonus WHERE NOT EXISTS +( + SELECT * + FROM emp + WHERE emp.emp_name = emp_name + AND bonus_amt > emp.salary) +AND +emp_name IN +( + SELECT emp_name + FROM emp + WHERE bonus_amt < emp.salary) +-- !query 7 schema +struct<emp_name:string,bonus_amt:double> +-- !query 7 output +emp 1 10.0 +emp 1 20.0 +emp 2 100.0 +emp 4 100.0 |