aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/main
diff options
context:
space:
mode:
authorYin Huai <yhuai@databricks.com>2016-08-05 15:52:02 -0700
committerYin Huai <yhuai@databricks.com>2016-08-05 15:52:02 -0700
commite679bc3c1cd418ef0025d2ecbc547c9660cac433 (patch)
tree01bf9449afc2b035eeb6d79b1feed3d32340aabf /sql/hive/src/main
parent6cbde337a539e5bb170d0eb81f715a95ee9c9af3 (diff)
downloadspark-e679bc3c1cd418ef0025d2ecbc547c9660cac433.tar.gz
spark-e679bc3c1cd418ef0025d2ecbc547c9660cac433.tar.bz2
spark-e679bc3c1cd418ef0025d2ecbc547c9660cac433.zip
[SPARK-16901] Hive settings in hive-site.xml may be overridden by Hive's default values
## What changes were proposed in this pull request? When we create the HiveConf for metastore client, we use a Hadoop Conf as the base, which may contain Hive settings in hive-site.xml (https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala#L49). However, HiveConf's initialize function basically ignores the base Hadoop Conf and always its default values (i.e. settings with non-null default values) as the base (https://github.com/apache/hive/blob/release-1.2.1/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java#L2687). So, even a user put javax.jdo.option.ConnectionURL in hive-site.xml, it is not used and Hive will use its default, which is jdbc:derby:;databaseName=metastore_db;create=true. This issue only shows up when `spark.sql.hive.metastore.jars` is not set to builtin. ## How was this patch tested? New test in HiveSparkSubmitSuite. Author: Yin Huai <yhuai@databricks.com> Closes #14497 from yhuai/SPARK-16901.
Diffstat (limited to 'sql/hive/src/main')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala24
1 files changed, 21 insertions, 3 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index ef69ac76f2..3bf4ed5ab4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -141,14 +141,32 @@ private[hive] class HiveClientImpl(
// so we should keep `conf` and reuse the existing instance of `CliSessionState`.
originalState
} else {
- val hiveConf = new HiveConf(hadoopConf, classOf[SessionState])
+ val hiveConf = new HiveConf(classOf[SessionState])
+ // 1: we set all confs in the hadoopConf to this hiveConf.
+ // This hadoopConf contains user settings in Hadoop's core-site.xml file
+ // and Hive's hive-site.xml file. Note, we load hive-site.xml file manually in
+ // SharedState and put settings in this hadoopConf instead of relying on HiveConf
+ // to load user settings. Otherwise, HiveConf's initialize method will override
+ // settings in the hadoopConf. This issue only shows up when spark.sql.hive.metastore.jars
+ // is not set to builtin. When spark.sql.hive.metastore.jars is builtin, the classpath
+ // has hive-site.xml. So, HiveConf will use that to override its default values.
+ hadoopConf.iterator().asScala.foreach { entry =>
+ val key = entry.getKey
+ val value = entry.getValue
+ if (key.toLowerCase.contains("password")) {
+ logDebug(s"Applying Hadoop and Hive config to Hive Conf: $key=xxx")
+ } else {
+ logDebug(s"Applying Hadoop and Hive config to Hive Conf: $key=$value")
+ }
+ hiveConf.set(key, value)
+ }
// HiveConf is a Hadoop Configuration, which has a field of classLoader and
// the initial value will be the current thread's context class loader
// (i.e. initClassLoader at here).
// We call initialConf.setClassLoader(initClassLoader) at here to make
// this action explicit.
hiveConf.setClassLoader(initClassLoader)
- // First, we set all spark confs to this hiveConf.
+ // 2: we set all spark confs to this hiveConf.
sparkConf.getAll.foreach { case (k, v) =>
if (k.toLowerCase.contains("password")) {
logDebug(s"Applying Spark config to Hive Conf: $k=xxx")
@@ -157,7 +175,7 @@ private[hive] class HiveClientImpl(
}
hiveConf.set(k, v)
}
- // Second, we set all entries in config to this hiveConf.
+ // 3: we set all entries in config to this hiveConf.
extraConfig.foreach { case (k, v) =>
if (k.toLowerCase.contains("password")) {
logDebug(s"Applying extra config to HiveConf: $k=xxx")