aboutsummaryrefslogtreecommitdiff
path: root/sql
diff options
context:
space:
mode:
authorYin Huai <yhuai@databricks.com>2015-06-01 21:33:57 -0700
committerReynold Xin <rxin@databricks.com>2015-06-01 21:33:57 -0700
commit91f6be87bc5cff41ca7a9cca9fdcc4678a4e7086 (patch)
treec85c919d338a39a803fc00e8a62a64a908d82dbb /sql
parent4c868b9943a2d86107d1f15f8df9830aac36fb75 (diff)
downloadspark-91f6be87bc5cff41ca7a9cca9fdcc4678a4e7086.tar.gz
spark-91f6be87bc5cff41ca7a9cca9fdcc4678a4e7086.tar.bz2
spark-91f6be87bc5cff41ca7a9cca9fdcc4678a4e7086.zip
[SPARK-8020] Spark SQL in spark-defaults.conf make metadataHive get constructed too early
https://issues.apache.org/jira/browse/SPARK-8020 Author: Yin Huai <yhuai@databricks.com> Closes #6563 from yhuai/SPARK-8020 and squashes the following commits: 4e5addc [Yin Huai] style bf766c6 [Yin Huai] Failed test. 0398f5b [Yin Huai] First populate the SQLConf and then construct executionHive and metadataHive.
Diffstat (limited to 'sql')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala25
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala45
2 files changed, 66 insertions, 4 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index 7384b24c50..91e6385dec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -182,9 +182,28 @@ class SQLContext(@transient val sparkContext: SparkContext)
conf.dialect
}
- sparkContext.getConf.getAll.foreach {
- case (key, value) if key.startsWith("spark.sql") => setConf(key, value)
- case _ =>
+ {
+ // We extract spark sql settings from SparkContext's conf and put them to
+ // Spark SQL's conf.
+ // First, we populate the SQLConf (conf). So, we can make sure that other values using
+ // those settings in their construction can get the correct settings.
+ // For example, metadataHive in HiveContext may need both spark.sql.hive.metastore.version
+ // and spark.sql.hive.metastore.jars to get correctly constructed.
+ val properties = new Properties
+ sparkContext.getConf.getAll.foreach {
+ case (key, value) if key.startsWith("spark.sql") => properties.setProperty(key, value)
+ case _ =>
+ }
+ // We directly put those settings to conf to avoid of calling setConf, which may have
+ // side-effects. For example, in HiveContext, setConf may cause executionHive and metadataHive
+ // get constructed. If we call setConf directly, the constructed metadataHive may have
+ // wrong settings, or the construction may fail.
+ conf.setConf(properties)
+ // After we have populated SQLConf, we call setConf to populate other confs in the subclass
+ // (e.g. hiveconf in HiveContext).
+ properties.foreach {
+ case (key, value) => setConf(key, value)
+ }
}
@transient
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 7eb4842726..deceb67d2b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -17,7 +17,8 @@
package org.apache.spark.sql.hive.client
-import org.apache.spark.{Logging, SparkFunSuite}
+import org.apache.spark.sql.hive.HiveContext
+import org.apache.spark.{Logging, SparkConf, SparkContext, SparkFunSuite}
import org.apache.spark.sql.catalyst.util.quietly
import org.apache.spark.util.Utils
@@ -37,6 +38,48 @@ class VersionsSuite extends SparkFunSuite with Logging {
"hive.metastore.warehouse.dir" -> warehousePath.toString)
}
+ test("SPARK-8020: successfully create a HiveContext with metastore settings in Spark conf.") {
+ val sparkConf =
+ new SparkConf() {
+ // We are not really clone it. We need to keep the custom getAll.
+ override def clone: SparkConf = this
+
+ override def getAll: Array[(String, String)] = {
+ val allSettings = super.getAll
+ val metastoreVersion = get("spark.sql.hive.metastore.version")
+ val metastoreJars = get("spark.sql.hive.metastore.jars")
+
+ val others = allSettings.filterNot { case (key, _) =>
+ key == "spark.sql.hive.metastore.version" || key == "spark.sql.hive.metastore.jars"
+ }
+
+ // Put metastore.version to the first one. It is needed to trigger the exception
+ // caused by SPARK-8020. Other problems triggered by SPARK-8020
+ // (e.g. using Hive 0.13.1's metastore client to connect to the a 0.12 metastore)
+ // are not easy to test.
+ Array(
+ ("spark.sql.hive.metastore.version" -> metastoreVersion),
+ ("spark.sql.hive.metastore.jars" -> metastoreJars)) ++ others
+ }
+ }
+ sparkConf
+ .set("spark.sql.hive.metastore.version", "12")
+ .set("spark.sql.hive.metastore.jars", "maven")
+
+ val hiveContext = new HiveContext(
+ new SparkContext(
+ "local[2]",
+ "TestSQLContextInVersionsSuite",
+ sparkConf)) {
+
+ protected override def configure(): Map[String, String] = buildConf
+
+ }
+
+ // Make sure all metastore related lazy vals got created.
+ hiveContext.tables()
+ }
+
test("success sanity check") {
val badClient = IsolatedClientLoader.forVersion("13", buildConf()).client
val db = new HiveDatabase("default", "")