aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYin Huai <yhuai@databricks.com>2015-12-10 12:04:20 -0800
committerYin Huai <yhuai@databricks.com>2015-12-10 12:04:20 -0800
commitec5f9ed5de2218938dba52152475daafd4dc4786 (patch)
tree7f91343a7b237e373fad5d78f93884de0e78638d
parentbc5f56aa60a430244ffa0cacd81c0b1ecbf8d68f (diff)
downloadspark-ec5f9ed5de2218938dba52152475daafd4dc4786.tar.gz
spark-ec5f9ed5de2218938dba52152475daafd4dc4786.tar.bz2
spark-ec5f9ed5de2218938dba52152475daafd4dc4786.zip
[SPARK-12228][SQL] Try to run execution hive's derby in memory.
This PR tries to make execution hive's derby run in memory since it is a fake metastore and every time we create a HiveContext, we will switch to a new one. It is possible that it can reduce the flakyness of our tests that need to create HiveContext (e.g. HiveSparkSubmitSuite). I will test it more. https://issues.apache.org/jira/browse/SPARK-12228 Author: Yin Huai <yhuai@databricks.com> Closes #10204 from yhuai/derbyInMemory.
-rw-r--r--sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala8
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala2
4 files changed, 9 insertions, 5 deletions
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 4b928e600b..03bb2c2225 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -83,7 +83,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
val cliConf = new HiveConf(classOf[SessionState])
// Override the location of the metastore since this is only used for local execution.
- HiveContext.newTemporaryConfiguration().foreach {
+ HiveContext.newTemporaryConfiguration(useInMemoryDerby = false).foreach {
case (key, value) => cliConf.set(key, value)
}
val sessionState = new CliSessionState(cliConf)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index e83941c2ec..5958777b0d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -212,7 +212,7 @@ class HiveContext private[hive](
val loader = new IsolatedClientLoader(
version = IsolatedClientLoader.hiveVersion(hiveExecutionVersion),
execJars = Seq(),
- config = newTemporaryConfiguration(),
+ config = newTemporaryConfiguration(useInMemoryDerby = true),
isolationOn = false,
baseClassLoader = Utils.getContextOrSparkClassLoader)
loader.createClient().asInstanceOf[ClientWrapper]
@@ -721,7 +721,9 @@ private[hive] object HiveContext {
doc = "TODO")
/** Constructs a configuration for hive, where the metastore is located in a temp directory. */
- def newTemporaryConfiguration(): Map[String, String] = {
+ def newTemporaryConfiguration(useInMemoryDerby: Boolean): Map[String, String] = {
+ val withInMemoryMode = if (useInMemoryDerby) "memory:" else ""
+
val tempDir = Utils.createTempDir()
val localMetastore = new File(tempDir, "metastore")
val propMap: HashMap[String, String] = HashMap()
@@ -735,7 +737,7 @@ private[hive] object HiveContext {
}
propMap.put(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, localMetastore.toURI.toString)
propMap.put(HiveConf.ConfVars.METASTORECONNECTURLKEY.varname,
- s"jdbc:derby:;databaseName=${localMetastore.getAbsolutePath};create=true")
+ s"jdbc:derby:${withInMemoryMode};databaseName=${localMetastore.getAbsolutePath};create=true")
propMap.put("datanucleus.rdbms.datastoreAdapterClassName",
"org.datanucleus.store.rdbms.adapter.DerbyAdapter")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 2e2d201bf2..97792549bb 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -87,7 +87,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
dir
}
- private lazy val temporaryConfig = newTemporaryConfiguration()
+ private lazy val temporaryConfig = newTemporaryConfiguration(useInMemoryDerby = false)
/** Sets up the system initially or after a RESET command */
protected override def configure(): Map[String, String] = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index 9296219331..53185fd775 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -80,6 +80,8 @@ class HiveSparkSubmitSuite
"--master", "local-cluster[2,1,1024]",
"--conf", "spark.ui.enabled=false",
"--conf", "spark.master.rest.enabled=false",
+ "--conf", "spark.sql.hive.metastore.version=0.12",
+ "--conf", "spark.sql.hive.metastore.jars=maven",
"--driver-java-options", "-Dderby.system.durability=test",
unusedJar.toString)
runSparkSubmit(args)