aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorSean Zhong <seanzhong@databricks.com>2016-05-12 15:51:53 +0800
committerCheng Lian <lian@databricks.com>2016-05-12 15:51:53 +0800
commit33c6eb5218ce3c31cc9f632a67fd2c7057569683 (patch)
treeb8c84c24107bf1ece596450ef3a3eec26df1f21d /python
parent5207a005cc86618907b8f467abc03eacef485ecd (diff)
downloadspark-33c6eb5218ce3c31cc9f632a67fd2c7057569683.tar.gz
spark-33c6eb5218ce3c31cc9f632a67fd2c7057569683.tar.bz2
spark-33c6eb5218ce3c31cc9f632a67fd2c7057569683.zip
[SPARK-15171][SQL] Deprecate registerTempTable and add dataset.createTempView
## What changes were proposed in this pull request? Deprecates registerTempTable and add dataset.createTempView, dataset.createOrReplaceTempView. ## How was this patch tested? Unit tests. Author: Sean Zhong <seanzhong@databricks.com> Closes #12945 from clockfly/spark-15171.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/catalog.py26
-rw-r--r--python/pyspark/sql/context.py4
-rw-r--r--python/pyspark/sql/dataframe.py51
-rw-r--r--python/pyspark/sql/session.py6
4 files changed, 59 insertions, 28 deletions
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 9cfdd0a99f..812dbba59e 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -166,34 +166,20 @@ class Catalog(object):
return DataFrame(df, self._sparkSession._wrapped)
@since(2.0)
- def dropTempTable(self, tableName):
- """Drops the temporary table with the given table name in the catalog.
- If the table has been cached before, then it will also be uncached.
+ def dropTempView(self, viewName):
+ """Drops the temporary view with the given view name in the catalog.
+ If the view has been cached before, then it will also be uncached.
- >>> spark.createDataFrame([(1, 1)]).registerTempTable("my_table")
+ >>> spark.createDataFrame([(1, 1)]).createTempView("my_table")
>>> spark.table("my_table").collect()
[Row(_1=1, _2=1)]
- >>> spark.catalog.dropTempTable("my_table")
+ >>> spark.catalog.dropTempView("my_table")
>>> spark.table("my_table") # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
...
AnalysisException: ...
"""
- self._jcatalog.dropTempTable(tableName)
-
- @since(2.0)
- def registerTable(self, df, tableName):
- """Registers the given :class:`DataFrame` as a temporary table in the catalog.
-
- >>> df = spark.createDataFrame([(2, 1), (3, 1)])
- >>> spark.catalog.registerTable(df, "my_cool_table")
- >>> spark.table("my_cool_table").collect()
- [Row(_1=2, _2=1), Row(_1=3, _2=1)]
- """
- if isinstance(df, DataFrame):
- self._jsparkSession.registerTable(df._jdf, tableName)
- else:
- raise ValueError("Can only register DataFrame as table")
+ self._jcatalog.dropTempView(viewName)
@ignore_unicode_prefix
@since(2.0)
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 02e742c124..ca111ae9bb 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -302,7 +302,7 @@ class SQLContext(object):
>>> sqlContext.registerDataFrameAsTable(df, "table1")
"""
- self.sparkSession.catalog.registerTable(df, tableName)
+ df.createOrReplaceTempView(tableName)
@since(1.6)
def dropTempTable(self, tableName):
@@ -311,7 +311,7 @@ class SQLContext(object):
>>> sqlContext.registerDataFrameAsTable(df, "table1")
>>> sqlContext.dropTempTable("table1")
"""
- self.sparkSession.catalog.dropTempTable(tableName)
+ self.sparkSession.catalog.dropTempView(tableName)
@since(1.3)
def createExternalTable(self, tableName, path=None, source=None, schema=None, **options):
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 49b4818bcc..a0264ce1ac 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -119,11 +119,55 @@ class DataFrame(object):
that was used to create this :class:`DataFrame`.
>>> df.registerTempTable("people")
- >>> df2 = sqlContext.sql("select * from people")
+ >>> df2 = spark.sql("select * from people")
>>> sorted(df.collect()) == sorted(df2.collect())
True
+ >>> spark.catalog.dropTempView("people")
+
+ .. note:: Deprecated in 2.0, use createOrReplaceTempView instead.
+ """
+ self._jdf.createOrReplaceTempView(name)
+
+ @since(2.0)
+ def createTempView(self, name):
+ """Creates a temporary view with this DataFrame.
+
+ The lifetime of this temporary table is tied to the :class:`SparkSession`
+ that was used to create this :class:`DataFrame`.
+ throws :class:`TempTableAlreadyExistsException`, if the view name already exists in the
+ catalog.
+
+ >>> df.createTempView("people")
+ >>> df2 = spark.sql("select * from people")
+ >>> sorted(df.collect()) == sorted(df2.collect())
+ True
+ >>> df.createTempView("people") # doctest: +IGNORE_EXCEPTION_DETAIL
+ Traceback (most recent call last):
+ ...
+ Py4JJavaError: ...
+ : org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException...
+ >>> spark.catalog.dropTempView("people")
+
+ """
+ self._jdf.createTempView(name)
+
+ @since(2.0)
+ def createOrReplaceTempView(self, name):
+ """Creates or replaces a temporary view with this DataFrame.
+
+ The lifetime of this temporary table is tied to the :class:`SparkSession`
+ that was used to create this :class:`DataFrame`.
+
+ >>> df.createOrReplaceTempView("people")
+ >>> df2 = df.filter(df.age > 3)
+ >>> df2.createOrReplaceTempView("people")
+ >>> df3 = spark.sql("select * from people")
+ >>> sorted(df3.collect()) == sorted(df2.collect())
+ True
+ >>> spark.catalog.dropTempView("people")
+
"""
- self._jdf.registerTempTable(name)
+ self._jdf.createOrReplaceTempView(name)
@property
@since(1.4)
@@ -1479,12 +1523,13 @@ class DataFrameStatFunctions(object):
def _test():
import doctest
from pyspark.context import SparkContext
- from pyspark.sql import Row, SQLContext
+ from pyspark.sql import Row, SQLContext, SparkSession
import pyspark.sql.dataframe
globs = pyspark.sql.dataframe.__dict__.copy()
sc = SparkContext('local[4]', 'PythonTest')
globs['sc'] = sc
globs['sqlContext'] = SQLContext(sc)
+ globs['spark'] = SparkSession(sc)
globs['df'] = sc.parallelize([(2, 'Alice'), (5, 'Bob')])\
.toDF(StructType([StructField('age', IntegerType()),
StructField('name', StringType())]))
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 4ee9ab8ab2..ae314359d5 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -160,7 +160,7 @@ class SparkSession(object):
... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1),
... time=datetime(2014, 8, 1, 14, 1, 5))])
>>> df = allTypes.toDF()
- >>> df.registerTempTable("allTypes")
+ >>> df.createOrReplaceTempView("allTypes")
>>> spark.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a '
... 'from allTypes where b and i > 0').collect()
[Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \
@@ -484,7 +484,7 @@ class SparkSession(object):
:return: :class:`DataFrame`
- >>> spark.catalog.registerTable(df, "table1")
+ >>> df.createOrReplaceTempView("table1")
>>> df2 = spark.sql("SELECT field1 AS f1, field2 as f2 from table1")
>>> df2.collect()
[Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
@@ -497,7 +497,7 @@ class SparkSession(object):
:return: :class:`DataFrame`
- >>> spark.catalog.registerTable(df, "table1")
+ >>> df.createOrReplaceTempView("table1")
>>> df2 = spark.table("table1")
>>> sorted(df.collect()) == sorted(df2.collect())
True