diff options
author | Reynold Xin <rxin@databricks.com> | 2016-04-15 20:28:09 -0700 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2016-04-15 20:28:09 -0700 |
commit | f4be0946af219379fb2476e6f80b2e50463adeb2 (patch) | |
tree | d4085adef6750a5315daa6fabef7f9c0218fca20 /sql/core/src | |
parent | b2dfa849599843269a43e6e0f2ab8c539dfc32b6 (diff) | |
download | spark-f4be0946af219379fb2476e6f80b2e50463adeb2.tar.gz spark-f4be0946af219379fb2476e6f80b2e50463adeb2.tar.bz2 spark-f4be0946af219379fb2476e6f80b2e50463adeb2.zip |
[SPARK-14677][SQL] Make the max number of iterations configurable for Catalyst
## What changes were proposed in this pull request?
We currently hard code the max number of optimizer/analyzer iterations to 100. This patch makes it configurable. While I'm at it, I also added the SessionCatalog to the optimizer, so we can use information there in optimization.
## How was this patch tested?
Updated unit tests to reflect the change.
Author: Reynold Xin <rxin@databricks.com>
Closes #12434 from rxin/SPARK-14677.
Diffstat (limited to 'sql/core/src')
3 files changed, 15 insertions, 6 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala index 8dfbba779d..08b2d7fcd4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala @@ -18,14 +18,16 @@ package org.apache.spark.sql.execution import org.apache.spark.sql.ExperimentalMethods -import org.apache.spark.sql.catalyst.CatalystConf import org.apache.spark.sql.catalyst.catalog.SessionCatalog import org.apache.spark.sql.catalyst.optimizer.Optimizer +import org.apache.spark.sql.internal.SQLConf class SparkOptimizer( - conf: CatalystConf, - sessionCatalog: SessionCatalog, - experimentalMethods: ExperimentalMethods) extends Optimizer(conf, sessionCatalog) { + catalog: SessionCatalog, + conf: SQLConf, + experimentalMethods: ExperimentalMethods) + extends Optimizer(catalog, conf) { + override def batches: Seq[Batch] = super.batches :+ Batch( - "User Provided Optimizers", FixedPoint(100), experimentalMethods.extraOptimizations: _*) + "User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 20d9a28548..e58b7178e9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -51,6 +51,11 @@ object SQLConf { } + val OPTIMIZER_MAX_ITERATIONS = SQLConfigBuilder("spark.sql.optimizer.maxIterations") + .doc("The max number of iterations the optimizer and analyzer runs") + .intConf + .createWithDefault(100) + val ALLOW_MULTIPLE_CONTEXTS = SQLConfigBuilder("spark.sql.allowMultipleContexts") .doc("When set to true, creating multiple SQLContexts/HiveContexts is allowed. " + "When set to false, only one SQLContext/HiveContext is allowed to be created " + @@ -473,6 +478,8 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging { /** ************************ Spark SQL Params/Hints ******************* */ + def optimizerMaxIterations: Int = getConf(OPTIMIZER_MAX_ITERATIONS) + def checkpointLocation: String = getConf(CHECKPOINT_LOCATION) def filesMaxPartitionBytes: Long = getConf(FILES_MAX_PARTITION_BYTES) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala index 10497e4fdf..c30f879ded 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala @@ -80,7 +80,7 @@ private[sql] class SessionState(ctx: SQLContext) { /** * Logical query plan optimizer. */ - lazy val optimizer: Optimizer = new SparkOptimizer(conf, catalog, experimentalMethods) + lazy val optimizer: Optimizer = new SparkOptimizer(catalog, conf, experimentalMethods) /** * Parser that extracts expressions, plans, table identifiers etc. from SQL texts. |