From 34336b6250d99bcf009b082cbf83f326d6b00074 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 25 Apr 2016 15:30:18 -0700 Subject: [SPARK-14828][SQL] Start SparkSession in REPL instead of SQLContext ## What changes were proposed in this pull request? ``` Spark context available as 'sc' (master = local[*], app id = local-1461283768192). Spark session available as 'spark'. Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /___/ .__/\_,_/_/ /_/\_\ version 2.0.0-SNAPSHOT /_/ Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.7.0_51) Type in expressions to have them evaluated. Type :help for more information. scala> sql("SHOW TABLES").collect() 16/04/21 17:09:39 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 1.2.0 16/04/21 17:09:39 WARN ObjectStore: Failed to get database default, returning NoSuchObjectException res0: Array[org.apache.spark.sql.Row] = Array([src,false]) scala> sql("SHOW TABLES").collect() res1: Array[org.apache.spark.sql.Row] = Array([src,false]) scala> spark.createDataFrame(Seq((1, 1), (2, 2), (3, 3))) res2: org.apache.spark.sql.DataFrame = [_1: int, _2: int] ``` Hive things are loaded lazily. ## How was this patch tested? Manual. Author: Andrew Or Closes #12589 from andrewor14/spark-session-repl. --- .../scala/org/apache/spark/sql/SparkSession.scala | 34 ++++++++++++++++++---- 1 file changed, 29 insertions(+), 5 deletions(-) (limited to 'sql') diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index 131f28f98b..3561765642 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -907,16 +907,19 @@ class SparkSession private( object SparkSession { + private val HIVE_SHARED_STATE_CLASS_NAME = "org.apache.spark.sql.hive.HiveSharedState" + private val HIVE_SESSION_STATE_CLASS_NAME = "org.apache.spark.sql.hive.HiveSessionState" + private def sharedStateClassName(conf: SparkConf): String = { conf.get(CATALOG_IMPLEMENTATION) match { - case "hive" => "org.apache.spark.sql.hive.HiveSharedState" + case "hive" => HIVE_SHARED_STATE_CLASS_NAME case "in-memory" => classOf[SharedState].getCanonicalName } } private def sessionStateClassName(conf: SparkConf): String = { conf.get(CATALOG_IMPLEMENTATION) match { - case "hive" => "org.apache.spark.sql.hive.HiveSessionState" + case "hive" => HIVE_SESSION_STATE_CLASS_NAME case "in-memory" => classOf[SessionState].getCanonicalName } } @@ -938,10 +941,31 @@ object SparkSession { } } - // TODO: do we want to expose this? + /** + * Return true if Hive classes can be loaded, otherwise false. + */ + private[spark] def hiveClassesArePresent: Boolean = { + try { + Utils.classForName(HIVE_SESSION_STATE_CLASS_NAME) + Utils.classForName(HIVE_SHARED_STATE_CLASS_NAME) + Utils.classForName("org.apache.hadoop.hive.conf.HiveConf") + true + } catch { + case _: ClassNotFoundException | _: NoClassDefFoundError => false + } + } + + /** + * Create a new [[SparkSession]] with a catalog backed by Hive. + */ def withHiveSupport(sc: SparkContext): SparkSession = { - sc.conf.set(CATALOG_IMPLEMENTATION.key, "hive") - new SparkSession(sc) + if (hiveClassesArePresent) { + sc.conf.set(CATALOG_IMPLEMENTATION.key, "hive") + new SparkSession(sc) + } else { + throw new IllegalArgumentException( + "Unable to instantiate SparkSession with Hive support because Hive classes are not found.") + } } } -- cgit v1.2.3