diff options
author | Sean Zhong <seanzhong@databricks.com> | 2016-05-12 15:51:53 +0800 |
---|---|---|
committer | Cheng Lian <lian@databricks.com> | 2016-05-12 15:51:53 +0800 |
commit | 33c6eb5218ce3c31cc9f632a67fd2c7057569683 (patch) | |
tree | b8c84c24107bf1ece596450ef3a3eec26df1f21d /python/pyspark/sql | |
parent | 5207a005cc86618907b8f467abc03eacef485ecd (diff) | |
download | spark-33c6eb5218ce3c31cc9f632a67fd2c7057569683.tar.gz spark-33c6eb5218ce3c31cc9f632a67fd2c7057569683.tar.bz2 spark-33c6eb5218ce3c31cc9f632a67fd2c7057569683.zip |
[SPARK-15171][SQL] Deprecate registerTempTable and add dataset.createTempView
## What changes were proposed in this pull request?
Deprecates registerTempTable and add dataset.createTempView, dataset.createOrReplaceTempView.
## How was this patch tested?
Unit tests.
Author: Sean Zhong <seanzhong@databricks.com>
Closes #12945 from clockfly/spark-15171.
Diffstat (limited to 'python/pyspark/sql')
-rw-r--r-- | python/pyspark/sql/catalog.py | 26 | ||||
-rw-r--r-- | python/pyspark/sql/context.py | 4 | ||||
-rw-r--r-- | python/pyspark/sql/dataframe.py | 51 | ||||
-rw-r--r-- | python/pyspark/sql/session.py | 6 |
4 files changed, 59 insertions, 28 deletions
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py index 9cfdd0a99f..812dbba59e 100644 --- a/python/pyspark/sql/catalog.py +++ b/python/pyspark/sql/catalog.py @@ -166,34 +166,20 @@ class Catalog(object): return DataFrame(df, self._sparkSession._wrapped) @since(2.0) - def dropTempTable(self, tableName): - """Drops the temporary table with the given table name in the catalog. - If the table has been cached before, then it will also be uncached. + def dropTempView(self, viewName): + """Drops the temporary view with the given view name in the catalog. + If the view has been cached before, then it will also be uncached. - >>> spark.createDataFrame([(1, 1)]).registerTempTable("my_table") + >>> spark.createDataFrame([(1, 1)]).createTempView("my_table") >>> spark.table("my_table").collect() [Row(_1=1, _2=1)] - >>> spark.catalog.dropTempTable("my_table") + >>> spark.catalog.dropTempView("my_table") >>> spark.table("my_table") # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... AnalysisException: ... """ - self._jcatalog.dropTempTable(tableName) - - @since(2.0) - def registerTable(self, df, tableName): - """Registers the given :class:`DataFrame` as a temporary table in the catalog. - - >>> df = spark.createDataFrame([(2, 1), (3, 1)]) - >>> spark.catalog.registerTable(df, "my_cool_table") - >>> spark.table("my_cool_table").collect() - [Row(_1=2, _2=1), Row(_1=3, _2=1)] - """ - if isinstance(df, DataFrame): - self._jsparkSession.registerTable(df._jdf, tableName) - else: - raise ValueError("Can only register DataFrame as table") + self._jcatalog.dropTempView(viewName) @ignore_unicode_prefix @since(2.0) diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index 02e742c124..ca111ae9bb 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -302,7 +302,7 @@ class SQLContext(object): >>> sqlContext.registerDataFrameAsTable(df, "table1") """ - self.sparkSession.catalog.registerTable(df, tableName) + df.createOrReplaceTempView(tableName) @since(1.6) def dropTempTable(self, tableName): @@ -311,7 +311,7 @@ class SQLContext(object): >>> sqlContext.registerDataFrameAsTable(df, "table1") >>> sqlContext.dropTempTable("table1") """ - self.sparkSession.catalog.dropTempTable(tableName) + self.sparkSession.catalog.dropTempView(tableName) @since(1.3) def createExternalTable(self, tableName, path=None, source=None, schema=None, **options): diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 49b4818bcc..a0264ce1ac 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -119,11 +119,55 @@ class DataFrame(object): that was used to create this :class:`DataFrame`. >>> df.registerTempTable("people") - >>> df2 = sqlContext.sql("select * from people") + >>> df2 = spark.sql("select * from people") >>> sorted(df.collect()) == sorted(df2.collect()) True + >>> spark.catalog.dropTempView("people") + + .. note:: Deprecated in 2.0, use createOrReplaceTempView instead. + """ + self._jdf.createOrReplaceTempView(name) + + @since(2.0) + def createTempView(self, name): + """Creates a temporary view with this DataFrame. + + The lifetime of this temporary table is tied to the :class:`SparkSession` + that was used to create this :class:`DataFrame`. + throws :class:`TempTableAlreadyExistsException`, if the view name already exists in the + catalog. + + >>> df.createTempView("people") + >>> df2 = spark.sql("select * from people") + >>> sorted(df.collect()) == sorted(df2.collect()) + True + >>> df.createTempView("people") # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + ... + Py4JJavaError: ... + : org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException... + >>> spark.catalog.dropTempView("people") + + """ + self._jdf.createTempView(name) + + @since(2.0) + def createOrReplaceTempView(self, name): + """Creates or replaces a temporary view with this DataFrame. + + The lifetime of this temporary table is tied to the :class:`SparkSession` + that was used to create this :class:`DataFrame`. + + >>> df.createOrReplaceTempView("people") + >>> df2 = df.filter(df.age > 3) + >>> df2.createOrReplaceTempView("people") + >>> df3 = spark.sql("select * from people") + >>> sorted(df3.collect()) == sorted(df2.collect()) + True + >>> spark.catalog.dropTempView("people") + """ - self._jdf.registerTempTable(name) + self._jdf.createOrReplaceTempView(name) @property @since(1.4) @@ -1479,12 +1523,13 @@ class DataFrameStatFunctions(object): def _test(): import doctest from pyspark.context import SparkContext - from pyspark.sql import Row, SQLContext + from pyspark.sql import Row, SQLContext, SparkSession import pyspark.sql.dataframe globs = pyspark.sql.dataframe.__dict__.copy() sc = SparkContext('local[4]', 'PythonTest') globs['sc'] = sc globs['sqlContext'] = SQLContext(sc) + globs['spark'] = SparkSession(sc) globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')])\ .toDF(StructType([StructField('age', IntegerType()), StructField('name', StringType())])) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 4ee9ab8ab2..ae314359d5 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -160,7 +160,7 @@ class SparkSession(object): ... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1), ... time=datetime(2014, 8, 1, 14, 1, 5))]) >>> df = allTypes.toDF() - >>> df.registerTempTable("allTypes") + >>> df.createOrReplaceTempView("allTypes") >>> spark.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a ' ... 'from allTypes where b and i > 0').collect() [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \ @@ -484,7 +484,7 @@ class SparkSession(object): :return: :class:`DataFrame` - >>> spark.catalog.registerTable(df, "table1") + >>> df.createOrReplaceTempView("table1") >>> df2 = spark.sql("SELECT field1 AS f1, field2 as f2 from table1") >>> df2.collect() [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')] @@ -497,7 +497,7 @@ class SparkSession(object): :return: :class:`DataFrame` - >>> spark.catalog.registerTable(df, "table1") + >>> df.createOrReplaceTempView("table1") >>> df2 = spark.table("table1") >>> sorted(df.collect()) == sorted(df2.collect()) True |