aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2016-04-15 20:28:09 -0700
committerYin Huai <yhuai@databricks.com>2016-04-15 20:28:09 -0700
commitf4be0946af219379fb2476e6f80b2e50463adeb2 (patch)
treed4085adef6750a5315daa6fabef7f9c0218fca20 /sql/core/src
parentb2dfa849599843269a43e6e0f2ab8c539dfc32b6 (diff)
downloadspark-f4be0946af219379fb2476e6f80b2e50463adeb2.tar.gz
spark-f4be0946af219379fb2476e6f80b2e50463adeb2.tar.bz2
spark-f4be0946af219379fb2476e6f80b2e50463adeb2.zip
[SPARK-14677][SQL] Make the max number of iterations configurable for Catalyst
## What changes were proposed in this pull request? We currently hard code the max number of optimizer/analyzer iterations to 100. This patch makes it configurable. While I'm at it, I also added the SessionCatalog to the optimizer, so we can use information there in optimization. ## How was this patch tested? Updated unit tests to reflect the change. Author: Reynold Xin <rxin@databricks.com> Closes #12434 from rxin/SPARK-14677.
Diffstat (limited to 'sql/core/src')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala12
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala7
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala2
3 files changed, 15 insertions, 6 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
index 8dfbba779d..08b2d7fcd4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -18,14 +18,16 @@
package org.apache.spark.sql.execution
import org.apache.spark.sql.ExperimentalMethods
-import org.apache.spark.sql.catalyst.CatalystConf
import org.apache.spark.sql.catalyst.catalog.SessionCatalog
import org.apache.spark.sql.catalyst.optimizer.Optimizer
+import org.apache.spark.sql.internal.SQLConf
class SparkOptimizer(
- conf: CatalystConf,
- sessionCatalog: SessionCatalog,
- experimentalMethods: ExperimentalMethods) extends Optimizer(conf, sessionCatalog) {
+ catalog: SessionCatalog,
+ conf: SQLConf,
+ experimentalMethods: ExperimentalMethods)
+ extends Optimizer(catalog, conf) {
+
override def batches: Seq[Batch] = super.batches :+ Batch(
- "User Provided Optimizers", FixedPoint(100), experimentalMethods.extraOptimizations: _*)
+ "User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*)
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 20d9a28548..e58b7178e9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -51,6 +51,11 @@ object SQLConf {
}
+ val OPTIMIZER_MAX_ITERATIONS = SQLConfigBuilder("spark.sql.optimizer.maxIterations")
+ .doc("The max number of iterations the optimizer and analyzer runs")
+ .intConf
+ .createWithDefault(100)
+
val ALLOW_MULTIPLE_CONTEXTS = SQLConfigBuilder("spark.sql.allowMultipleContexts")
.doc("When set to true, creating multiple SQLContexts/HiveContexts is allowed. " +
"When set to false, only one SQLContext/HiveContext is allowed to be created " +
@@ -473,6 +478,8 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging {
/** ************************ Spark SQL Params/Hints ******************* */
+ def optimizerMaxIterations: Int = getConf(OPTIMIZER_MAX_ITERATIONS)
+
def checkpointLocation: String = getConf(CHECKPOINT_LOCATION)
def filesMaxPartitionBytes: Long = getConf(FILES_MAX_PARTITION_BYTES)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 10497e4fdf..c30f879ded 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -80,7 +80,7 @@ private[sql] class SessionState(ctx: SQLContext) {
/**
* Logical query plan optimizer.
*/
- lazy val optimizer: Optimizer = new SparkOptimizer(conf, catalog, experimentalMethods)
+ lazy val optimizer: Optimizer = new SparkOptimizer(catalog, conf, experimentalMethods)
/**
* Parser that extracts expressions, plans, table identifiers etc. from SQL texts.