[SPARK-15425][SQL] Disallow cross joins by default

## What changes were proposed in this pull request? In order to prevent users from inadvertently writing queries with cartesian joins, this patch introduces a new conf `spark.sql.crossJoin.enabled` (set to `false` by default) that if not set, results in a `SparkException` if the query contains one or more cartesian products. ## How was this patch tested? Added a test to verify the new behavior in `JoinSuite`. Additionally, `SQLQuerySuite` and `SQLMetricsSuite` were modified to explicitly enable cartesian products. Author: Sameer Agarwal <sameer@databricks.com> Closes #13209 from sameeragarwal/disallow-cartesian.
author: Sameer Agarwal <sameer@databricks.com> 2016-05-22 23:32:39 -0700
committer: Reynold Xin <rxin@databricks.com> 2016-05-22 23:32:39 -0700
commit: dafcb05c2ef8e09f45edfb7eabf58116c23975a0 (patch)
tree: 7c37771c4144b61cd31831e7de4671b0e6b42e12 /sql/hive/compatibility
parent: fc44b694bf5162b3a044768da4627b9969909829 (diff)
download: spark-dafcb05c2ef8e09f45edfb7eabf58116c23975a0.tar.gz
spark-dafcb05c2ef8e09f45edfb7eabf58116c23975a0.tar.bz2
spark-dafcb05c2ef8e09f45edfb7eabf58116c23975a0.zip
1 files changed, 4 insertions, 0 deletions
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index 54fb440b33..a8645f7cd3 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -40,6 +40,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
   private val originalColumnBatchSize = TestHive.conf.columnBatchSize
   private val originalInMemoryPartitionPruning = TestHive.conf.inMemoryPartitionPruning
   private val originalConvertMetastoreOrc = TestHive.sessionState.convertMetastoreOrc
+  private val originalCrossJoinEnabled = TestHive.conf.crossJoinEnabled
 
   def testCases: Seq[(String, File)] = {
     hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f)
@@ -61,6 +62,8 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
     // Ensures that the plans generation use metastore relation and not OrcRelation
     // Was done because SqlBuilder does not work with plans having logical relation
     TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, false)
+    // Ensures that cross joins are enabled so that we can test them
+    TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, true)
     RuleExecutor.resetTime()
   }
 
@@ -72,6 +75,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
       TestHive.setConf(SQLConf.COLUMN_BATCH_SIZE, originalColumnBatchSize)
       TestHive.setConf(SQLConf.IN_MEMORY_PARTITION_PRUNING, originalInMemoryPartitionPruning)
       TestHive.setConf(HiveUtils.CONVERT_METASTORE_ORC, originalConvertMetastoreOrc)
+      TestHive.setConf(SQLConf.CROSS_JOINS_ENABLED, originalCrossJoinEnabled)
       TestHive.sessionState.functionRegistry.restore()
 
       // For debugging dump some statistics about how much time was spent in various optimizer rules
author	Sameer Agarwal <sameer@databricks.com>	2016-05-22 23:32:39 -0700
committer	Reynold Xin <rxin@databricks.com>	2016-05-22 23:32:39 -0700
commit	dafcb05c2ef8e09f45edfb7eabf58116c23975a0 (patch)
tree	7c37771c4144b61cd31831e7de4671b0e6b42e12 /sql/hive/compatibility
parent	fc44b694bf5162b3a044768da4627b9969909829 (diff)
download	spark-dafcb05c2ef8e09f45edfb7eabf58116c23975a0.tar.gz spark-dafcb05c2ef8e09f45edfb7eabf58116c23975a0.tar.bz2 spark-dafcb05c2ef8e09f45edfb7eabf58116c23975a0.zip