diff options
author | Andrew Or <andrew@databricks.com> | 2016-04-25 15:30:18 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-04-25 15:30:18 -0700 |
commit | 34336b6250d99bcf009b082cbf83f326d6b00074 (patch) | |
tree | b24149ee9336a699aecd2264f5010f9ecb67ba72 /sql/core/src | |
parent | 9cb3ba1013a7eae11be8a00fa4a9c5308bb20195 (diff) | |
download | spark-34336b6250d99bcf009b082cbf83f326d6b00074.tar.gz spark-34336b6250d99bcf009b082cbf83f326d6b00074.tar.bz2 spark-34336b6250d99bcf009b082cbf83f326d6b00074.zip |
[SPARK-14828][SQL] Start SparkSession in REPL instead of SQLContext
## What changes were proposed in this pull request?
```
Spark context available as 'sc' (master = local[*], app id = local-1461283768192).
Spark session available as 'spark'.
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 2.0.0-SNAPSHOT
/_/
Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.7.0_51)
Type in expressions to have them evaluated.
Type :help for more information.
scala> sql("SHOW TABLES").collect()
16/04/21 17:09:39 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 1.2.0
16/04/21 17:09:39 WARN ObjectStore: Failed to get database default, returning NoSuchObjectException
res0: Array[org.apache.spark.sql.Row] = Array([src,false])
scala> sql("SHOW TABLES").collect()
res1: Array[org.apache.spark.sql.Row] = Array([src,false])
scala> spark.createDataFrame(Seq((1, 1), (2, 2), (3, 3)))
res2: org.apache.spark.sql.DataFrame = [_1: int, _2: int]
```
Hive things are loaded lazily.
## How was this patch tested?
Manual.
Author: Andrew Or <andrew@databricks.com>
Closes #12589 from andrewor14/spark-session-repl.
Diffstat (limited to 'sql/core/src')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala | 34 |
1 files changed, 29 insertions, 5 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index 131f28f98b..3561765642 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -907,16 +907,19 @@ class SparkSession private( object SparkSession { + private val HIVE_SHARED_STATE_CLASS_NAME = "org.apache.spark.sql.hive.HiveSharedState" + private val HIVE_SESSION_STATE_CLASS_NAME = "org.apache.spark.sql.hive.HiveSessionState" + private def sharedStateClassName(conf: SparkConf): String = { conf.get(CATALOG_IMPLEMENTATION) match { - case "hive" => "org.apache.spark.sql.hive.HiveSharedState" + case "hive" => HIVE_SHARED_STATE_CLASS_NAME case "in-memory" => classOf[SharedState].getCanonicalName } } private def sessionStateClassName(conf: SparkConf): String = { conf.get(CATALOG_IMPLEMENTATION) match { - case "hive" => "org.apache.spark.sql.hive.HiveSessionState" + case "hive" => HIVE_SESSION_STATE_CLASS_NAME case "in-memory" => classOf[SessionState].getCanonicalName } } @@ -938,10 +941,31 @@ object SparkSession { } } - // TODO: do we want to expose this? + /** + * Return true if Hive classes can be loaded, otherwise false. + */ + private[spark] def hiveClassesArePresent: Boolean = { + try { + Utils.classForName(HIVE_SESSION_STATE_CLASS_NAME) + Utils.classForName(HIVE_SHARED_STATE_CLASS_NAME) + Utils.classForName("org.apache.hadoop.hive.conf.HiveConf") + true + } catch { + case _: ClassNotFoundException | _: NoClassDefFoundError => false + } + } + + /** + * Create a new [[SparkSession]] with a catalog backed by Hive. + */ def withHiveSupport(sc: SparkContext): SparkSession = { - sc.conf.set(CATALOG_IMPLEMENTATION.key, "hive") - new SparkSession(sc) + if (hiveClassesArePresent) { + sc.conf.set(CATALOG_IMPLEMENTATION.key, "hive") + new SparkSession(sc) + } else { + throw new IllegalArgumentException( + "Unable to instantiate SparkSession with Hive support because Hive classes are not found.") + } } } |