diff options
author | Davies Liu <davies@databricks.com> | 2015-10-19 16:18:20 -0700 |
---|---|---|
committer | Andrew Or <andrew@databricks.com> | 2015-10-19 16:18:20 -0700 |
commit | 232d7f8d42950431f1d9be2a6bb3591fb6ea20d6 (patch) | |
tree | 38abfac2a3d362fb4f8e62cc91d57791c6370ad3 /python/pyspark/sql | |
parent | a1413b3662250dd5e980e8b1f7c3dc4585ab4766 (diff) | |
download | spark-232d7f8d42950431f1d9be2a6bb3591fb6ea20d6.tar.gz spark-232d7f8d42950431f1d9be2a6bb3591fb6ea20d6.tar.bz2 spark-232d7f8d42950431f1d9be2a6bb3591fb6ea20d6.zip |
[SPARK-11114][PYSPARK] add getOrCreate for SparkContext/SQLContext in Python
Also added SQLContext.newSession()
Author: Davies Liu <davies@databricks.com>
Closes #9122 from davies/py_create.
Diffstat (limited to 'python/pyspark/sql')
-rw-r--r-- | python/pyspark/sql/context.py | 27 | ||||
-rw-r--r-- | python/pyspark/sql/tests.py | 14 |
2 files changed, 41 insertions, 0 deletions
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 89c8c6e0d9..79453658a1 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -75,6 +75,8 @@ class SQLContext(object): SQLContext in the JVM, instead we make all calls to this object. """ + _instantiatedContext = None + @ignore_unicode_prefix def __init__(self, sparkContext, sqlContext=None): """Creates a new SQLContext. @@ -99,6 +101,8 @@ class SQLContext(object): self._scala_SQLContext = sqlContext _monkey_patch_RDD(self) install_exception_handler() + if SQLContext._instantiatedContext is None: + SQLContext._instantiatedContext = self @property def _ssql_ctx(self): @@ -111,6 +115,29 @@ class SQLContext(object): self._scala_SQLContext = self._jvm.SQLContext(self._jsc.sc()) return self._scala_SQLContext + @classmethod + @since(1.6) + def getOrCreate(cls, sc): + """ + Get the existing SQLContext or create a new one with given SparkContext. + + :param sc: SparkContext + """ + if cls._instantiatedContext is None: + jsqlContext = sc._jvm.SQLContext.getOrCreate(sc._jsc.sc()) + cls(sc, jsqlContext) + return cls._instantiatedContext + + @since(1.6) + def newSession(self): + """ + Returns a new SQLContext as new session, that has separate SQLConf, + registered temporary tables and UDFs, but shared SparkContext and + table cache. + """ + jsqlContext = self._ssql_ctx.newSession() + return self.__class__(self._sc, jsqlContext) + @since(1.3) def setConf(self, key, value): """Sets the given Spark SQL configuration property. diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 645133b2b2..f465e1fa20 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -174,6 +174,20 @@ class DataTypeTests(unittest.TestCase): self.assertEqual(dt.fromInternal(0), datetime.date(1970, 1, 1)) +class SQLContextTests(ReusedPySparkTestCase): + def test_get_or_create(self): + sqlCtx = SQLContext.getOrCreate(self.sc) + self.assertTrue(SQLContext.getOrCreate(self.sc) is sqlCtx) + + def test_new_session(self): + sqlCtx = SQLContext.getOrCreate(self.sc) + sqlCtx.setConf("test_key", "a") + sqlCtx2 = sqlCtx.newSession() + sqlCtx2.setConf("test_key", "b") + self.assertEqual(sqlCtx.getConf("test_key", ""), "a") + self.assertEqual(sqlCtx2.getConf("test_key", ""), "b") + + class SQLTests(ReusedPySparkTestCase): @classmethod |