From 2a105134e9a3efd46b761fab5e563ddebb26575d Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 12 Aug 2016 19:07:34 +0200 Subject: [SPARK-16771][SQL] WITH clause should not fall into infinite loop. ## What changes were proposed in this pull request? This PR changes the CTE resolving rule to use only **forward-declared** tables in order to prevent infinite loops. More specifically, new logic is like the following. * Resolve CTEs in `WITH` clauses first before replacing the main SQL body. * When resolving CTEs, only forward-declared CTEs or base tables are referenced. - Self-referencing is not allowed any more. - Cross-referencing is not allowed any more. **Reported Error Scenarios** ```scala scala> sql("WITH t AS (SELECT 1 FROM t) SELECT * FROM t") java.lang.StackOverflowError ... scala> sql("WITH t1 AS (SELECT * FROM t2), t2 AS (SELECT 2 FROM t1) SELECT * FROM t1, t2") java.lang.StackOverflowError ... ``` Note that `t`, `t1`, and `t2` are not declared in database. Spark falls into infinite loops before resolving table names. ## How was this patch tested? Pass the Jenkins tests with new two testcases. Author: Dongjoon Hyun Closes #14397 from dongjoon-hyun/SPARK-16771-TREENODE. --- .../src/test/resources/sql-tests/inputs/cte.sql | 14 ++++++ .../test/resources/sql-tests/results/cte.sql.out | 57 ++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/cte.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/cte.sql.out (limited to 'sql/core/src/test') diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql new file mode 100644 index 0000000000..10d34deff4 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql @@ -0,0 +1,14 @@ +create temporary view t as select * from values 0, 1, 2 as t(id); +create temporary view t2 as select * from values 0, 1 as t(id); + +-- WITH clause should not fall into infinite loop by referencing self +WITH s AS (SELECT 1 FROM s) SELECT * FROM s; + +-- WITH clause should reference the base table +WITH t AS (SELECT 1 FROM t) SELECT * FROM t; + +-- WITH clause should not allow cross reference +WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2; + +-- WITH clause should reference the previous CTE +WITH t1 AS (SELECT * FROM t2), t2 AS (SELECT 2 FROM t1) SELECT * FROM t1, t2; diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out new file mode 100644 index 0000000000..ddee5bf2d4 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out @@ -0,0 +1,57 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 6 + + +-- !query 0 +create temporary view t as select * from values 0, 1, 2 as t(id) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +create temporary view t2 as select * from values 0, 1 as t(id) +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +WITH s AS (SELECT 1 FROM s) SELECT * FROM s +-- !query 2 schema +struct<> +-- !query 2 output +org.apache.spark.sql.AnalysisException +Table or view not found: s; line 1 pos 25 + + +-- !query 3 +WITH t AS (SELECT 1 FROM t) SELECT * FROM t +-- !query 3 schema +struct<1:int> +-- !query 3 output +1 +1 +1 + + +-- !query 4 +WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2 +-- !query 4 schema +struct<> +-- !query 4 output +org.apache.spark.sql.AnalysisException +Table or view not found: s2; line 1 pos 26 + + +-- !query 5 +WITH t1 AS (SELECT * FROM t2), t2 AS (SELECT 2 FROM t1) SELECT * FROM t1, t2 +-- !query 5 schema +struct +-- !query 5 output +0 2 +0 2 +1 2 +1 2 -- cgit v1.2.3