aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-06-13 14:57:35 -0700
committerYin Huai <yhuai@databricks.com>2016-06-13 14:57:35 -0700
commitc4b1ad020962c42be804d3a1a55171d9b51b01e7 (patch)
tree567f143a02a0b657ddc29deb37c24701b7c59fcc
parentc654ae2140bc184adb407fd02072b653c5359ee5 (diff)
downloadspark-c4b1ad020962c42be804d3a1a55171d9b51b01e7.tar.gz
spark-c4b1ad020962c42be804d3a1a55171d9b51b01e7.tar.bz2
spark-c4b1ad020962c42be804d3a1a55171d9b51b01e7.zip
[SPARK-15887][SQL] Bring back the hive-site.xml support for Spark 2.0
## What changes were proposed in this pull request? Right now, Spark 2.0 does not load hive-site.xml. Based on users' feedback, it seems make sense to still load this conf file. This PR adds a `hadoopConf` API in `SharedState`, which is `sparkContext.hadoopConfiguration` by default. When users are under hive context, `SharedState.hadoopConf` will load hive-site.xml and append its configs to `sparkContext.hadoopConfiguration`. When we need to read hadoop config in spark sql, we should call `SessionState.newHadoopConf`, which contains `sparkContext.hadoopConfiguration`, hive-site.xml and sql configs. ## How was this patch tested? new test in `HiveDataFrameSuite` Author: Wenchen Fan <wenchen@databricks.com> Closes #13611 from cloud-fan/hive-site.
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala19
-rw-r--r--sql/core/src/test/resources/hive-site.xml26
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala4
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala5
-rw-r--r--sql/hive/src/test/resources/hive-site.xml26
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameSuite.scala5
8 files changed, 82 insertions, 7 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 2d4bef3f18..71c16008be 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -405,7 +405,7 @@ private[sql] class ParquetFileFormat
new ParquetOutputWriterFactory(
sqlContext.conf,
dataSchema,
- sqlContext.sparkContext.hadoopConfiguration,
+ sqlContext.sessionState.newHadoopConf(),
options)
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index b2db377ec7..b43095041b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -49,7 +49,7 @@ private[sql] class SessionState(sparkSession: SparkSession) {
lazy val conf: SQLConf = new SQLConf
def newHadoopConf(): Configuration = {
- val hadoopConf = new Configuration(sparkSession.sparkContext.hadoopConfiguration)
+ val hadoopConf = new Configuration(sparkSession.sharedState.hadoopConf)
conf.getAllConfs.foreach { case (k, v) => if (v ne null) hadoopConf.set(k, v) }
hadoopConf
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index 0d6f98416b..c37f7f12ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -17,12 +17,14 @@
package org.apache.spark.sql.internal
+import org.apache.hadoop.conf.Configuration
+
import org.apache.spark.SparkContext
import org.apache.spark.sql.{SparkSession, SQLContext}
import org.apache.spark.sql.catalyst.catalog.{ExternalCatalog, InMemoryCatalog}
import org.apache.spark.sql.execution.CacheManager
import org.apache.spark.sql.execution.ui.{SQLListener, SQLTab}
-import org.apache.spark.util.MutableURLClassLoader
+import org.apache.spark.util.{MutableURLClassLoader, Utils}
/**
@@ -41,9 +43,22 @@ private[sql] class SharedState(val sparkContext: SparkContext) {
val listener: SQLListener = createListenerAndUI(sparkContext)
/**
+ * The base hadoop configuration which is shared among all spark sessions. It is based on the
+ * default hadoop configuration of Spark, with custom configurations inside `hive-site.xml`.
+ */
+ lazy val hadoopConf: Configuration = {
+ val conf = new Configuration(sparkContext.hadoopConfiguration)
+ val configFile = Utils.getContextOrSparkClassLoader.getResource("hive-site.xml")
+ if (configFile != null) {
+ conf.addResource(configFile)
+ }
+ conf
+ }
+
+ /**
* A catalog that interacts with external systems.
*/
- lazy val externalCatalog: ExternalCatalog = new InMemoryCatalog(sparkContext.hadoopConfiguration)
+ lazy val externalCatalog: ExternalCatalog = new InMemoryCatalog(hadoopConf)
/**
* A classloader used to load all user-added jar.
diff --git a/sql/core/src/test/resources/hive-site.xml b/sql/core/src/test/resources/hive-site.xml
new file mode 100644
index 0000000000..17297b3e22
--- /dev/null
+++ b/sql/core/src/test/resources/hive-site.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<configuration>
+ <property>
+ <name>hive.in.test</name>
+ <value>true</value>
+ <description>Internal marker for test.</description>
+ </property>
+</configuration>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 90465b65bd..89f8685099 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2843,4 +2843,8 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
sql(s"SELECT '$literal' AS DUMMY"),
Row(s"$expected") :: Nil)
}
+
+ test("SPARK-15887: hive-site.xml should be loaded") {
+ assert(spark.sessionState.newHadoopConf().get("hive.in.test") == "true")
+ }
}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala
index a0106ee882..78b1ecbbea 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSharedState.scala
@@ -45,12 +45,11 @@ private[hive] class HiveSharedState(override val sparkContext: SparkContext)
*/
// This needs to be a lazy val at here because TestHiveSharedState is overriding it.
lazy val metadataHive: HiveClient = {
- HiveUtils.newClientForMetadata(sparkContext.conf, sparkContext.hadoopConfiguration)
+ HiveUtils.newClientForMetadata(sparkContext.conf, hadoopConf)
}
/**
* A catalog that interacts with the Hive metastore.
*/
- override lazy val externalCatalog =
- new HiveExternalCatalog(metadataHive, sparkContext.hadoopConfiguration)
+ override lazy val externalCatalog = new HiveExternalCatalog(metadataHive, hadoopConf)
}
diff --git a/sql/hive/src/test/resources/hive-site.xml b/sql/hive/src/test/resources/hive-site.xml
new file mode 100644
index 0000000000..17297b3e22
--- /dev/null
+++ b/sql/hive/src/test/resources/hive-site.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<configuration>
+ <property>
+ <name>hive.in.test</name>
+ <value>true</value>
+ <description>Internal marker for test.</description>
+ </property>
+</configuration>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameSuite.scala
index 1b31caa76d..23798431e6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveDataFrameSuite.scala
@@ -29,4 +29,9 @@ class HiveDataFrameSuite extends QueryTest with TestHiveSingleton {
spark.sql("drop table usrdb.test")
spark.sql("drop schema usrdb")
}
+
+ test("SPARK-15887: hive-site.xml should be loaded") {
+ val hiveClient = spark.sharedState.asInstanceOf[HiveSharedState].metadataHive
+ assert(hiveClient.getConf("hive.in.test", "") == "true")
+ }
}