diff options
author | Yin Huai <yhuai@databricks.com> | 2016-06-16 17:06:24 -0700 |
---|---|---|
committer | Shixiong Zhu <shixiong@databricks.com> | 2016-06-16 17:06:24 -0700 |
commit | d9c6628c47de547dc537310e3c775c7f3e0e4a12 (patch) | |
tree | 44d897bb65296bb52bdee0a3f306bbf224740c53 /sql | |
parent | 62d2fa5e996d428caaea005041b17ec115473762 (diff) | |
download | spark-d9c6628c47de547dc537310e3c775c7f3e0e4a12.tar.gz spark-d9c6628c47de547dc537310e3c775c7f3e0e4a12.tar.bz2 spark-d9c6628c47de547dc537310e3c775c7f3e0e4a12.zip |
[SPARK-15991] SparkContext.hadoopConfiguration should be always the base of hadoop conf created by SessionState
## What changes were proposed in this pull request?
Before this patch, after a SparkSession has been created, hadoop conf set directly to SparkContext.hadoopConfiguration will not affect the hadoop conf created by SessionState. This patch makes the change to always use SparkContext.hadoopConfiguration as the base.
This patch also changes the behavior of hive-site.xml support added in https://github.com/apache/spark/pull/12689/. With this patch, we will load hive-site.xml to SparkContext.hadoopConfiguration.
## How was this patch tested?
New test in SparkSessionBuilderSuite.
Author: Yin Huai <yhuai@databricks.com>
Closes #13711 from yhuai/SPARK-15991.
Diffstat (limited to 'sql')
5 files changed, 28 insertions, 17 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala index 59efa81275..dc95123d00 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala @@ -49,7 +49,7 @@ private[sql] class SessionState(sparkSession: SparkSession) { lazy val conf: SQLConf = new SQLConf def newHadoopConf(): Configuration = { - val hadoopConf = new Configuration(sparkSession.sharedState.hadoopConf) + val hadoopConf = new Configuration(sparkSession.sparkContext.hadoopConfiguration) conf.getAllConfs.foreach { case (k, v) => if (v ne null) hadoopConf.set(k, v) } hadoopConf } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala index bc349b4f28..6c43fe3177 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala @@ -43,23 +43,17 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging { */ val listener: SQLListener = createListenerAndUI(sparkContext) - /** - * The base hadoop configuration which is shared among all spark sessions. It is based on the - * default hadoop configuration of Spark, with custom configurations inside `hive-site.xml`. - */ - val hadoopConf: Configuration = { - val conf = new Configuration(sparkContext.hadoopConfiguration) + { val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml") if (configFile != null) { - conf.addResource(configFile) + sparkContext.hadoopConfiguration.addResource(configFile) } - conf } /** * A catalog that interacts with external systems. */ - lazy val externalCatalog: ExternalCatalog = new InMemoryCatalog(hadoopConf) + lazy val externalCatalog: ExternalCatalog = new InMemoryCatalog(sparkContext.hadoopConfiguration) /** * A classloader used to load all user-added jar. @@ -71,7 +65,7 @@ private[sql] class SharedState(val sparkContext: SparkContext) extends Logging { // Set the Hive metastore warehouse path to the one we use val tempConf = new SQLConf sparkContext.conf.getAll.foreach { case (k, v) => tempConf.setConfString(k, v) } - val hiveWarehouseDir = hadoopConf.get("hive.metastore.warehouse.dir") + val hiveWarehouseDir = sparkContext.hadoopConfiguration.get("hive.metastore.warehouse.dir") if (hiveWarehouseDir != null && !tempConf.contains(SQLConf.WAREHOUSE_PATH.key)) { // If hive.metastore.warehouse.dir is set and spark.sql.warehouse.dir is not set, // we will respect the value of hive.metastore.warehouse.dir. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 545c1776b7..bbe821b768 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2870,8 +2870,4 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { sql(s"SELECT '$literal' AS DUMMY"), Row(s"$expected") :: Nil) } - - test("SPARK-15887: hive-site.xml should be loaded") { - assert(spark.sessionState.newHadoopConf().get("hive.in.test") == "true") - } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala index 786956df8a..418345b9ee 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala @@ -102,4 +102,24 @@ class SparkSessionBuilderSuite extends SparkFunSuite { assert(session.sparkContext.conf.get("key2") == "value2") session.stop() } + + test("SPARK-15887: hive-site.xml should be loaded") { + val session = SparkSession.builder().master("local").getOrCreate() + assert(session.sessionState.newHadoopConf().get("hive.in.test") == "true") + assert(session.sparkContext.hadoopConfiguration.get("hive.in.test") == "true") + session.stop() + } + + test("SPARK-15991: Set global Hadoop conf") { + val session = SparkSession.builder().master("local").getOrCreate() + val mySpecialKey = "my.special.key.15991" + val mySpecialValue = "msv" + try { + session.sparkContext.hadoopConfiguration.set(mySpecialKey, mySpecialValue) + assert(session.sessionState.newHadoopConf().get(mySpecialKey) == mySpecialValue) + } finally { + session.sparkContext.hadoopConfiguration.unset(mySpecialKey) + session.stop() + } + } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala index 6b7a333f2d..12b4962fba 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala @@ -36,11 +36,12 @@ private[hive] class HiveSharedState(override val sparkContext: SparkContext) */ // This needs to be a lazy val at here because TestHiveSharedState is overriding it. lazy val metadataHive: HiveClient = { - HiveUtils.newClientForMetadata(sparkContext.conf, hadoopConf) + HiveUtils.newClientForMetadata(sparkContext.conf, sparkContext.hadoopConfiguration) } /** * A catalog that interacts with the Hive metastore. */ - override lazy val externalCatalog = new HiveExternalCatalog(metadataHive, hadoopConf) + override lazy val externalCatalog = + new HiveExternalCatalog(metadataHive, sparkContext.hadoopConfiguration) } |