diff options
author | Sameer Agarwal <sameer@databricks.com> | 2016-05-22 23:32:39 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-05-22 23:32:39 -0700 |
commit | dafcb05c2ef8e09f45edfb7eabf58116c23975a0 (patch) | |
tree | 7c37771c4144b61cd31831e7de4671b0e6b42e12 /sql/hive/compatibility | |
parent | fc44b694bf5162b3a044768da4627b9969909829 (diff) | |
download | spark-dafcb05c2ef8e09f45edfb7eabf58116c23975a0.tar.gz spark-dafcb05c2ef8e09f45edfb7eabf58116c23975a0.tar.bz2 spark-dafcb05c2ef8e09f45edfb7eabf58116c23975a0.zip |
[SPARK-15425][SQL] Disallow cross joins by default
## What changes were proposed in this pull request?
In order to prevent users from inadvertently writing queries with cartesian joins, this patch introduces a new conf `spark.sql.crossJoin.enabled` (set to `false` by default) that if not set, results in a `SparkException` if the query contains one or more cartesian products.
## How was this patch tested?
Added a test to verify the new behavior in `JoinSuite`. Additionally, `SQLQuerySuite` and `SQLMetricsSuite` were modified to explicitly enable cartesian products.
Author: Sameer Agarwal <sameer@databricks.com>
Closes #13209 from sameeragarwal/disallow-cartesian.
Diffstat (limited to 'sql/hive/compatibility')
-rw-r--r-- | sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 54fb440b33..a8645f7cd3 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -40,6 +40,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { private val originalColumnBatchSize = TestHive.conf.columnBatchSize private val originalInMemoryPartitionPruning = TestHive.conf.inMemoryPartitionPruning private val originalConvertMetastoreOrc = TestHive.sessionState.convertMetastoreOrc + private val originalCrossJoinEnabled = TestHive.conf.crossJoinEnabled def testCases: Seq[(String, File)] = { hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f) @@ -61,6 +62,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // Ensures that the plans generation use metastore relation and not OrcRelation // Was done because SqlBuilder does not work with plans having logical relation TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, false) + // Ensures that cross joins are enabled so that we can test them + TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, true) RuleExecutor.resetTime() } @@ -72,6 +75,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, originalColumnBatchSize) TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning) TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, originalConvertMetastoreOrc) + TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled) TestHive.sessionState.functionRegistry.restore() // For debugging dump some statistics about how much time was spent in various optimizer rules |