[SPARK-15236][SQL][SPARK SHELL] Add spark-defaults property to switch to use InMemoryCatalog

## What changes were proposed in this pull request? This PR change REPL/Main to check this property `spark.sql.catalogImplementation` to decide if `enableHiveSupport `should be called. If `spark.sql.catalogImplementation` is set to `hive`, and hive classes are built, Spark will use Hive support. Other wise, Spark will create a SparkSession with in-memory catalog support. ## How was this patch tested? Run the REPL component test. Author: xin Wu <xinwu@us.ibm.com> Author: Xin Wu <xinwu@us.ibm.com> Closes #13088 from xwu0226/SPARK-15236.
author: xin Wu <xinwu@us.ibm.com> 2016-05-31 17:42:47 -0700
committer: Andrew Or <andrew@databricks.com> 2016-05-31 17:42:47 -0700
commit: 04f925ede851fc77add9ef1cacb79fb3a617f650 (patch)
tree: 3d7717eb9240f315c6b0eae7d51effd7fc258db3 /repl
parent: 85d6b0db9f5bd425c36482ffcb1c3b9fd0fcdb31 (diff)
download: spark-04f925ede851fc77add9ef1cacb79fb3a617f650.tar.gz
spark-04f925ede851fc77add9ef1cacb79fb3a617f650.tar.bz2
spark-04f925ede851fc77add9ef1cacb79fb3a617f650.zip
2 files changed, 66 insertions, 4 deletions
diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
index 005edda2be..771670fa55 100644
--- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala
@@ -22,6 +22,7 @@ import java.io.File
 import scala.tools.nsc.GenericRunnerSettings
 
 import org.apache.spark._
+import org.apache.spark.internal.config.CATALOG_IMPLEMENTATION
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.util.Utils
@@ -88,10 +89,23 @@ object Main extends Logging {
     }
 
     val builder = SparkSession.builder.config(conf)
-    if (SparkSession.hiveClassesArePresent) {
-      sparkSession = builder.enableHiveSupport().getOrCreate()
-      logInfo("Created Spark session with Hive support")
+    if (conf.get(CATALOG_IMPLEMENTATION.key, "hive").toLowerCase == "hive") {
+      if (SparkSession.hiveClassesArePresent) {
+        // In the case that the property is not set at all, builder's config
+        // does not have this value set to 'hive' yet. The original default
+        // behavior is that when there are hive classes, we use hive catalog.
+        sparkSession = builder.enableHiveSupport().getOrCreate()
+        logInfo("Created Spark session with Hive support")
+      } else {
+        // Need to change it back to 'in-memory' if no hive classes are found
+        // in the case that the property is set to hive in spark-defaults.conf
+        builder.config(CATALOG_IMPLEMENTATION.key, "in-memory")
+        sparkSession = builder.getOrCreate()
+        logInfo("Created Spark session")
+      }
     } else {
+      // In the case that the property is set but not to 'hive', the internal
+      // default is 'in-memory'. So the sparkSession will use in-memory catalog.
       sparkSession = builder.getOrCreate()
       logInfo("Created Spark session")
     }
diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index af82e7a111..125686030c 100644
--- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -21,9 +21,11 @@ import java.io._
 import java.net.URLClassLoader
 
 import scala.collection.mutable.ArrayBuffer
-
 import org.apache.commons.lang3.StringEscapeUtils
+import org.apache.log4j.{Level, LogManager}
 import org.apache.spark.{SparkContext, SparkFunSuite}
+import org.apache.spark.internal.config._
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.util.Utils
 
 class ReplSuite extends SparkFunSuite {
@@ -99,6 +101,52 @@ class ReplSuite extends SparkFunSuite {
     System.clearProperty("spark.driver.port")
   }
 
+  test("SPARK-15236: use Hive catalog") {
+    // turn on the INFO log so that it is possible the code will dump INFO
+    // entry for using "HiveMetastore"
+    val rootLogger = LogManager.getRootLogger()
+    val logLevel = rootLogger.getLevel
+    rootLogger.setLevel(Level.INFO)
+    try {
+      Main.conf.set(CATALOG_IMPLEMENTATION.key, "hive")
+      val output = runInterpreter("local",
+        """
+      |spark.sql("drop table if exists t_15236")
+    """.stripMargin)
+      assertDoesNotContain("error:", output)
+      assertDoesNotContain("Exception", output)
+      // only when the config is set to hive and
+      // hive classes are built, we will use hive catalog.
+      // Then log INFO entry will show things using HiveMetastore
+      if (SparkSession.hiveClassesArePresent) {
+        assertContains("HiveMetaStore", output)
+      } else {
+        // If hive classes are not built, in-memory catalog will be used
+        assertDoesNotContain("HiveMetaStore", output)
+      }
+    } finally {
+      rootLogger.setLevel(logLevel)
+    }
+  }
+
+  test("SPARK-15236: use in-memory catalog") {
+    val rootLogger = LogManager.getRootLogger()
+    val logLevel = rootLogger.getLevel
+    rootLogger.setLevel(Level.INFO)
+    try {
+      Main.conf.set(CATALOG_IMPLEMENTATION.key, "in-memory")
+      val output = runInterpreter("local",
+        """
+          |spark.sql("drop table if exists t_16236")
+        """.stripMargin)
+      assertDoesNotContain("error:", output)
+      assertDoesNotContain("Exception", output)
+      assertDoesNotContain("HiveMetaStore", output)
+    } finally {
+      rootLogger.setLevel(logLevel)
+    }
+  }
+
   test("simple foreach with accumulator") {
     val output = runInterpreter("local",
       """
author	xin Wu <xinwu@us.ibm.com>	2016-05-31 17:42:47 -0700
committer	Andrew Or <andrew@databricks.com>	2016-05-31 17:42:47 -0700
commit	04f925ede851fc77add9ef1cacb79fb3a617f650 (patch)
tree	3d7717eb9240f315c6b0eae7d51effd7fc258db3 /repl
parent	85d6b0db9f5bd425c36482ffcb1c3b9fd0fcdb31 (diff)
download	spark-04f925ede851fc77add9ef1cacb79fb3a617f650.tar.gz spark-04f925ede851fc77add9ef1cacb79fb3a617f650.tar.bz2 spark-04f925ede851fc77add9ef1cacb79fb3a617f650.zip