From 34336b6250d99bcf009b082cbf83f326d6b00074 Mon Sep 17 00:00:00 2001 From: Andrew Or Date: Mon, 25 Apr 2016 15:30:18 -0700 Subject: [SPARK-14828][SQL] Start SparkSession in REPL instead of SQLContext ## What changes were proposed in this pull request? ``` Spark context available as 'sc' (master = local[*], app id = local-1461283768192). Spark session available as 'spark'. Welcome to ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ /___/ .__/\_,_/_/ /_/\_\ version 2.0.0-SNAPSHOT /_/ Using Scala version 2.11.8 (Java HotSpot(TM) 64-Bit Server VM, Java 1.7.0_51) Type in expressions to have them evaluated. Type :help for more information. scala> sql("SHOW TABLES").collect() 16/04/21 17:09:39 WARN ObjectStore: Version information not found in metastore. hive.metastore.schema.verification is not enabled so recording the schema version 1.2.0 16/04/21 17:09:39 WARN ObjectStore: Failed to get database default, returning NoSuchObjectException res0: Array[org.apache.spark.sql.Row] = Array([src,false]) scala> sql("SHOW TABLES").collect() res1: Array[org.apache.spark.sql.Row] = Array([src,false]) scala> spark.createDataFrame(Seq((1, 1), (2, 2), (3, 3))) res2: org.apache.spark.sql.DataFrame = [_1: int, _2: int] ``` Hive things are loaded lazily. ## How was this patch tested? Manual. Author: Andrew Or Closes #12589 from andrewor14/spark-session-repl. --- .../scala/org/apache/spark/repl/SparkILoop.scala | 24 ++++++++-------------- .../org/apache/spark/repl/SparkILoopInit.scala | 14 ++++++------- .../main/scala/org/apache/spark/repl/Main.scala | 24 +++++++++------------- .../scala/org/apache/spark/repl/SparkILoop.scala | 14 ++++++------- .../scala/org/apache/spark/repl/ReplSuite.scala | 5 +++-- 5 files changed, 36 insertions(+), 45 deletions(-) (limited to 'repl') diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala index 8e7fc46e32..6a811adcf9 100644 --- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -43,7 +43,7 @@ import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.annotation.DeveloperApi import org.apache.spark.internal.Logging -import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.SparkSession import org.apache.spark.util.Utils /** The Scala interactive shell. It provides a read-eval-print loop @@ -129,7 +129,6 @@ class SparkILoop( // NOTE: Must be public for visibility @DeveloperApi var sparkContext: SparkContext = _ - var sqlContext: SQLContext = _ override def echoCommandMessage(msg: String) { intp.reporter printMessage msg @@ -1027,20 +1026,15 @@ class SparkILoop( } @DeveloperApi - def createSQLContext(): SQLContext = { - val name = "org.apache.spark.sql.hive.HiveContext" - val loader = Utils.getContextOrSparkClassLoader - try { - sqlContext = loader.loadClass(name).getConstructor(classOf[SparkContext]) - .newInstance(sparkContext).asInstanceOf[SQLContext] - logInfo("Created sql context (with Hive support)..") + // TODO: don't duplicate this code + def createSparkSession(): SparkSession = { + if (SparkSession.hiveClassesArePresent) { + logInfo("Creating Spark session with Hive support") + SparkSession.withHiveSupport(sparkContext) + } else { + logInfo("Creating Spark session") + new SparkSession(sparkContext) } - catch { - case _: java.lang.ClassNotFoundException | _: java.lang.NoClassDefFoundError => - sqlContext = new SQLContext(sparkContext) - logInfo("Created sql context..") - } - sqlContext } private def getMaster(): String = { diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala index 99e1e1df33..4ce776e17d 100644 --- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala +++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoopInit.scala @@ -125,21 +125,21 @@ private[repl] trait SparkILoopInit { command(""" @transient val sc = { val _sc = org.apache.spark.repl.Main.interp.createSparkContext() - println("Spark context available as sc " + + println("Spark context available as 'sc' " + s"(master = ${_sc.master}, app id = ${_sc.applicationId}).") _sc } """) command(""" - @transient val sqlContext = { - val _sqlContext = org.apache.spark.repl.Main.interp.createSQLContext() - println("SQL context available as sqlContext.") - _sqlContext + @transient val spark = { + val _session = org.apache.spark.repl.Main.interp.createSparkSession() + println("Spark session available as 'spark'.") + _session } """) command("import org.apache.spark.SparkContext._") - command("import sqlContext.implicits._") - command("import sqlContext.sql") + command("import spark.implicits._") + command("import spark.sql") command("import org.apache.spark.sql.functions._") } } diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala index bd853f1522..8e381ff6ae 100644 --- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala +++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala @@ -23,8 +23,8 @@ import scala.tools.nsc.GenericRunnerSettings import org.apache.spark._ import org.apache.spark.internal.Logging +import org.apache.spark.sql.SparkSession import org.apache.spark.util.Utils -import org.apache.spark.sql.SQLContext object Main extends Logging { @@ -35,7 +35,7 @@ object Main extends Logging { val outputDir = Utils.createTempDir(root = rootDir, namePrefix = "repl") var sparkContext: SparkContext = _ - var sqlContext: SQLContext = _ + var sparkSession: SparkSession = _ // this is a public var because tests reset it. var interp: SparkILoop = _ @@ -92,19 +92,15 @@ object Main extends Logging { sparkContext } - def createSQLContext(): SQLContext = { - val name = "org.apache.spark.sql.hive.HiveContext" - val loader = Utils.getContextOrSparkClassLoader - try { - sqlContext = loader.loadClass(name).getConstructor(classOf[SparkContext]) - .newInstance(sparkContext).asInstanceOf[SQLContext] - logInfo("Created sql context (with Hive support)..") - } catch { - case _: java.lang.ClassNotFoundException | _: java.lang.NoClassDefFoundError => - sqlContext = new SQLContext(sparkContext) - logInfo("Created sql context..") + def createSparkSession(): SparkSession = { + if (SparkSession.hiveClassesArePresent) { + sparkSession = SparkSession.withHiveSupport(sparkContext) + logInfo("Created Spark session with Hive support") + } else { + sparkSession = new SparkSession(sparkContext) + logInfo("Created Spark session") } - sqlContext + sparkSession } } diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala index db09d6ace1..d029659fed 100644 --- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -38,21 +38,21 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter) processLine(""" @transient val sc = { val _sc = org.apache.spark.repl.Main.createSparkContext() - println("Spark context available as sc " + + println("Spark context available as 'sc' " + s"(master = ${_sc.master}, app id = ${_sc.applicationId}).") _sc } """) processLine(""" - @transient val sqlContext = { - val _sqlContext = org.apache.spark.repl.Main.createSQLContext() - println("SQL context available as sqlContext.") - _sqlContext + @transient val spark = { + val _session = org.apache.spark.repl.Main.createSparkSession() + println("Spark session available as 'spark'.") + _session } """) processLine("import org.apache.spark.SparkContext._") - processLine("import sqlContext.implicits._") - processLine("import sqlContext.sql") + processLine("import spark.implicits._") + processLine("import spark.sql") processLine("import org.apache.spark.sql.functions._") } } diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala index d3dafe9c42..af82e7a111 100644 --- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala +++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -249,10 +249,11 @@ class ReplSuite extends SparkFunSuite { assertDoesNotContain("Exception", output) } - test("SPARK-2576 importing SQLContext.createDataFrame.") { + test("SPARK-2576 importing implicits") { // We need to use local-cluster to test this case. val output = runInterpreter("local-cluster[1,1,1024]", """ + |import spark.implicits._ |case class TestCaseClass(value: Int) |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toDF().collect() | @@ -366,7 +367,7 @@ class ReplSuite extends SparkFunSuite { test("define case class and create Dataset together with paste mode") { val output = runInterpreterInPasteMode("local-cluster[1,1,1024]", """ - |import sqlContext.implicits._ + |import spark.implicits._ |case class TestClass(value: Int) |Seq(TestClass(1)).toDS() """.stripMargin) -- cgit v1.2.3