aboutsummaryrefslogtreecommitdiff
path: root/repl
diff options
context:
space:
mode:
authorxin Wu <xinwu@us.ibm.com>2016-05-31 17:42:47 -0700
committerAndrew Or <andrew@databricks.com>2016-05-31 17:42:47 -0700
commit04f925ede851fc77add9ef1cacb79fb3a617f650 (patch)
tree3d7717eb9240f315c6b0eae7d51effd7fc258db3 /repl
parent85d6b0db9f5bd425c36482ffcb1c3b9fd0fcdb31 (diff)
downloadspark-04f925ede851fc77add9ef1cacb79fb3a617f650.tar.gz
spark-04f925ede851fc77add9ef1cacb79fb3a617f650.tar.bz2
spark-04f925ede851fc77add9ef1cacb79fb3a617f650.zip
[SPARK-15236][SQL][SPARK SHELL] Add spark-defaults property to switch to use InMemoryCatalog
## What changes were proposed in this pull request? This PR change REPL/Main to check this property `spark.sql.catalogImplementation` to decide if `enableHiveSupport `should be called. If `spark.sql.catalogImplementation` is set to `hive`, and hive classes are built, Spark will use Hive support. Other wise, Spark will create a SparkSession with in-memory catalog support. ## How was this patch tested? Run the REPL component test. Author: xin Wu <xinwu@us.ibm.com> Author: Xin Wu <xinwu@us.ibm.com> Closes #13088 from xwu0226/SPARK-15236.
Diffstat (limited to 'repl')
-rw-r--r--repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala20
-rw-r--r--repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala50
2 files changed, 66 insertions, 4 deletions
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
index 005edda2be..771670fa55 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
@@ -22,6 +22,7 @@ import java.io.File
import scala.tools.nsc.GenericRunnerSettings
import org.apache.spark._
+import org.apache.spark.internal.config.CATALOG_IMPLEMENTATION
import org.apache.spark.internal.Logging
import org.apache.spark.sql.SparkSession
import org.apache.spark.util.Utils
@@ -88,10 +89,23 @@ object Main extends Logging {
}
val builder = SparkSession.builder.config(conf)
- if (SparkSession.hiveClassesArePresent) {
- sparkSession = builder.enableHiveSupport().getOrCreate()
- logInfo("Created Spark session with Hive support")
+ if (conf.get(CATALOG_IMPLEMENTATION.key, "hive").toLowerCase == "hive") {
+ if (SparkSession.hiveClassesArePresent) {
+ // In the case that the property is not set at all, builder's config
+ // does not have this value set to 'hive' yet. The original default
+ // behavior is that when there are hive classes, we use hive catalog.
+ sparkSession = builder.enableHiveSupport().getOrCreate()
+ logInfo("Created Spark session with Hive support")
+ } else {
+ // Need to change it back to 'in-memory' if no hive classes are found
+ // in the case that the property is set to hive in spark-defaults.conf
+ builder.config(CATALOG_IMPLEMENTATION.key, "in-memory")
+ sparkSession = builder.getOrCreate()
+ logInfo("Created Spark session")
+ }
} else {
+ // In the case that the property is set but not to 'hive', the internal
+ // default is 'in-memory'. So the sparkSession will use in-memory catalog.
sparkSession = builder.getOrCreate()
logInfo("Created Spark session")
}
diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index af82e7a111..125686030c 100644
--- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -21,9 +21,11 @@ import java.io._
import java.net.URLClassLoader
import scala.collection.mutable.ArrayBuffer
-
import org.apache.commons.lang3.StringEscapeUtils
+import org.apache.log4j.{Level, LogManager}
import org.apache.spark.{SparkContext, SparkFunSuite}
+import org.apache.spark.internal.config._
+import org.apache.spark.sql.SparkSession
import org.apache.spark.util.Utils
class ReplSuite extends SparkFunSuite {
@@ -99,6 +101,52 @@ class ReplSuite extends SparkFunSuite {
System.clearProperty("spark.driver.port")
}
+ test("SPARK-15236: use Hive catalog") {
+ // turn on the INFO log so that it is possible the code will dump INFO
+ // entry for using "HiveMetastore"
+ val rootLogger = LogManager.getRootLogger()
+ val logLevel = rootLogger.getLevel
+ rootLogger.setLevel(Level.INFO)
+ try {
+ Main.conf.set(CATALOG_IMPLEMENTATION.key, "hive")
+ val output = runInterpreter("local",
+ """
+ |spark.sql("drop table if exists t_15236")
+ """.stripMargin)
+ assertDoesNotContain("error:", output)
+ assertDoesNotContain("Exception", output)
+ // only when the config is set to hive and
+ // hive classes are built, we will use hive catalog.
+ // Then log INFO entry will show things using HiveMetastore
+ if (SparkSession.hiveClassesArePresent) {
+ assertContains("HiveMetaStore", output)
+ } else {
+ // If hive classes are not built, in-memory catalog will be used
+ assertDoesNotContain("HiveMetaStore", output)
+ }
+ } finally {
+ rootLogger.setLevel(logLevel)
+ }
+ }
+
+ test("SPARK-15236: use in-memory catalog") {
+ val rootLogger = LogManager.getRootLogger()
+ val logLevel = rootLogger.getLevel
+ rootLogger.setLevel(Level.INFO)
+ try {
+ Main.conf.set(CATALOG_IMPLEMENTATION.key, "in-memory")
+ val output = runInterpreter("local",
+ """
+ |spark.sql("drop table if exists t_16236")
+ """.stripMargin)
+ assertDoesNotContain("error:", output)
+ assertDoesNotContain("Exception", output)
+ assertDoesNotContain("HiveMetaStore", output)
+ } finally {
+ rootLogger.setLevel(logLevel)
+ }
+ }
+
test("simple foreach with accumulator") {
val output = runInterpreter("local",
"""