diff options
author | Andrew Or <andrew@databricks.com> | 2016-05-19 23:44:10 -0700 |
---|---|---|
committer | Andrew Or <andrew@databricks.com> | 2016-05-19 23:44:10 -0700 |
commit | c32b1b162e7e5ecc5c823f79ba9f23cbd1407dbf (patch) | |
tree | 534f6d61bc843adf1ebd4682e5df27d45b6dba72 /python | |
parent | 257375019266ab9e3c320e33026318cc31f58ada (diff) | |
download | spark-c32b1b162e7e5ecc5c823f79ba9f23cbd1407dbf.tar.gz spark-c32b1b162e7e5ecc5c823f79ba9f23cbd1407dbf.tar.bz2 spark-c32b1b162e7e5ecc5c823f79ba9f23cbd1407dbf.zip |
[SPARK-15417][SQL][PYTHON] PySpark shell always uses in-memory catalog
## What changes were proposed in this pull request?
There is no way to use the Hive catalog in `pyspark-shell`. This is because we used to create a `SparkContext` before calling `SparkSession.enableHiveSupport().getOrCreate()`, which just gets the existing `SparkContext` instead of creating a new one. As a result, `spark.sql.catalogImplementation` was never propagated.
## How was this patch tested?
Manual.
Author: Andrew Or <andrew@databricks.com>
Closes #13203 from andrewor14/fix-pyspark-shell.
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/shell.py | 8 | ||||
-rw-r--r-- | python/pyspark/sql/session.py | 6 |
2 files changed, 11 insertions, 3 deletions
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py index adaa3b5a79..ef46d3065e 100644 --- a/python/pyspark/shell.py +++ b/python/pyspark/shell.py @@ -35,12 +35,11 @@ from pyspark.storagelevel import StorageLevel if os.environ.get("SPARK_EXECUTOR_URI"): SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"]) -sc = SparkContext() -atexit.register(lambda: sc.stop()) +SparkContext._ensure_initialized() try: # Try to access HiveConf, it will raise exception if Hive is not added - sc._jvm.org.apache.hadoop.hive.conf.HiveConf() + SparkContext._jvm.org.apache.hadoop.hive.conf.HiveConf() spark = SparkSession.builder\ .enableHiveSupport()\ .getOrCreate() @@ -49,6 +48,9 @@ except py4j.protocol.Py4JError: except TypeError: spark = SparkSession(sc) +sc = spark.sparkContext +atexit.register(lambda: sc.stop()) + # for compatibility sqlContext = spark._wrapped sqlCtx = sqlContext diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 0e04b88265..241947537f 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -204,6 +204,12 @@ class SparkSession(object): @property @since(2.0) + def sparkContext(self): + """Returns the underlying :class:`SparkContext`.""" + return self._sc + + @property + @since(2.0) def conf(self): """Runtime configuration interface for Spark. |