aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorSean Zhong <seanzhong@databricks.com>2016-05-18 09:01:59 +0800
committerCheng Lian <lian@databricks.com>2016-05-18 09:01:59 +0800
commit25b315e6cad7c27b62dcaa2c194293c1115fdfb3 (patch)
treecfeebcaf553d78ca80a70f7139a765e7759f0410 /python
parentb674e67c22bf663334e537e35787c00533adbb04 (diff)
downloadspark-25b315e6cad7c27b62dcaa2c194293c1115fdfb3.tar.gz
spark-25b315e6cad7c27b62dcaa2c194293c1115fdfb3.tar.bz2
spark-25b315e6cad7c27b62dcaa2c194293c1115fdfb3.zip
[SPARK-15171][SQL] Remove the references to deprecated method dataset.registerTempTable
## What changes were proposed in this pull request? Update the unit test code, examples, and documents to remove calls to deprecated method `dataset.registerTempTable`. ## How was this patch tested? This PR only changes the unit test code, examples, and comments. It should be safe. This is a follow up of PR https://github.com/apache/spark/pull/12945 which was merged. Author: Sean Zhong <seanzhong@databricks.com> Closes #13098 from clockfly/spark-15171-remove-deprecation.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/context.py4
-rw-r--r--python/pyspark/sql/readwriter.py2
-rw-r--r--python/pyspark/sql/session.py2
-rw-r--r--python/pyspark/sql/tests.py25
4 files changed, 17 insertions, 16 deletions
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index ca111ae9bb..e8e60c6412 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -57,7 +57,7 @@ class SQLContext(object):
... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1),
... time=datetime(2014, 8, 1, 14, 1, 5))])
>>> df = allTypes.toDF()
- >>> df.registerTempTable("allTypes")
+ >>> df.createOrReplaceTempView("allTypes")
>>> sqlContext.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a '
... 'from allTypes where b and i > 0').collect()
[Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \
@@ -106,7 +106,7 @@ class SQLContext(object):
def newSession(self):
"""
Returns a new SQLContext as new session, that has separate SQLConf,
- registered temporary tables and UDFs, but shared SparkContext and
+ registered temporary views and UDFs, but shared SparkContext and
table cache.
"""
return self.__class__(self._sc, self.sparkSession.newSession())
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index c98aef1a0e..8e6bce9001 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -266,7 +266,7 @@ class DataFrameReader(object):
:param tableName: string, name of the table.
>>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned')
- >>> df.registerTempTable('tmpTable')
+ >>> df.createOrReplaceTempView('tmpTable')
>>> spark.read.table('tmpTable').dtypes
[('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
"""
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 0781b442cb..257a239c8d 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -186,7 +186,7 @@ class SparkSession(object):
def newSession(self):
"""
Returns a new SparkSession as new session, that has separate SQLConf,
- registered temporary tables and UDFs, but shared SparkContext and
+ registered temporary views and UDFs, but shared SparkContext and
table cache.
"""
return self.__class__(self._sc, self._jsparkSession.newSession())
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 0977c43a39..e86f44281d 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -294,7 +294,8 @@ class SQLTests(ReusedPySparkTestCase):
def test_udf2(self):
self.spark.catalog.registerFunction("strlen", lambda string: len(string), IntegerType())
- self.spark.createDataFrame(self.sc.parallelize([Row(a="test")])).registerTempTable("test")
+ self.spark.createDataFrame(self.sc.parallelize([Row(a="test")]))\
+ .createOrReplaceTempView("test")
[res] = self.spark.sql("SELECT strlen(a) FROM test WHERE strlen(a) > 1").collect()
self.assertEqual(4, res[0])
@@ -320,7 +321,7 @@ class SQLTests(ReusedPySparkTestCase):
def test_udf_with_array_type(self):
d = [Row(l=list(range(3)), d={"key": list(range(5))})]
rdd = self.sc.parallelize(d)
- self.spark.createDataFrame(rdd).registerTempTable("test")
+ self.spark.createDataFrame(rdd).createOrReplaceTempView("test")
self.spark.catalog.registerFunction("copylist", lambda l: list(l), ArrayType(IntegerType()))
self.spark.catalog.registerFunction("maplen", lambda d: len(d), IntegerType())
[(l1, l2)] = self.spark.sql("select copylist(l), maplen(d) from test").collect()
@@ -360,7 +361,7 @@ class SQLTests(ReusedPySparkTestCase):
self.assertTrue(df.is_cached)
self.assertEqual(2, df.count())
- df.registerTempTable("temp")
+ df.createOrReplaceTempView("temp")
df = self.spark.sql("select foo from temp")
df.count()
df.collect()
@@ -420,7 +421,7 @@ class SQLTests(ReusedPySparkTestCase):
df = self.spark.createDataFrame(rdd)
self.assertEqual([], df.rdd.map(lambda r: r.l).first())
self.assertEqual([None, ""], df.rdd.map(lambda r: r.s).collect())
- df.registerTempTable("test")
+ df.createOrReplaceTempView("test")
result = self.spark.sql("SELECT l[0].a from test where d['key'].d = '2'")
self.assertEqual(1, result.head()[0])
@@ -428,7 +429,7 @@ class SQLTests(ReusedPySparkTestCase):
self.assertEqual(df.schema, df2.schema)
self.assertEqual({}, df2.rdd.map(lambda r: r.d).first())
self.assertEqual([None, ""], df2.rdd.map(lambda r: r.s).collect())
- df2.registerTempTable("test2")
+ df2.createOrReplaceTempView("test2")
result = self.spark.sql("SELECT l[0].a from test2 where d['key'].d = '2'")
self.assertEqual(1, result.head()[0])
@@ -487,7 +488,7 @@ class SQLTests(ReusedPySparkTestCase):
datetime(2010, 1, 1, 1, 1, 1), 1, 2, [1, 2, 3], None)
self.assertEqual(r, results.first())
- df.registerTempTable("table2")
+ df.createOrReplaceTempView("table2")
r = self.spark.sql("SELECT byte1 - 1 AS byte1, byte2 + 1 AS byte2, " +
"short1 + 1 AS short1, short2 - 1 AS short2, int1 - 1 AS int1, " +
"float1 + 1.5 as float1 FROM table2").first()
@@ -515,7 +516,7 @@ class SQLTests(ReusedPySparkTestCase):
row = Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})
self.assertEqual(1, row.asDict()['l'][0].a)
df = self.sc.parallelize([row]).toDF()
- df.registerTempTable("test")
+ df.createOrReplaceTempView("test")
row = self.spark.sql("select l, d from test").head()
self.assertEqual(1, row.asDict()["l"][0].a)
self.assertEqual(1.0, row.asDict()['d']['key'].c)
@@ -556,7 +557,7 @@ class SQLTests(ReusedPySparkTestCase):
schema = df.schema
field = [f for f in schema.fields if f.name == "point"][0]
self.assertEqual(type(field.dataType), ExamplePointUDT)
- df.registerTempTable("labeled_point")
+ df.createOrReplaceTempView("labeled_point")
point = self.spark.sql("SELECT point FROM labeled_point").head().point
self.assertEqual(point, ExamplePoint(1.0, 2.0))
@@ -565,7 +566,7 @@ class SQLTests(ReusedPySparkTestCase):
schema = df.schema
field = [f for f in schema.fields if f.name == "point"][0]
self.assertEqual(type(field.dataType), PythonOnlyUDT)
- df.registerTempTable("labeled_point")
+ df.createOrReplaceTempView("labeled_point")
point = self.spark.sql("SELECT point FROM labeled_point").head().point
self.assertEqual(point, PythonOnlyPoint(1.0, 2.0))
@@ -1427,7 +1428,7 @@ class SQLTests(ReusedPySparkTestCase):
spark.sql("CREATE DATABASE some_db")
self.assertEquals(spark.catalog.listTables(), [])
self.assertEquals(spark.catalog.listTables("some_db"), [])
- spark.createDataFrame([(1, 1)]).registerTempTable("temp_tab")
+ spark.createDataFrame([(1, 1)]).createOrReplaceTempView("temp_tab")
spark.sql("CREATE TABLE tab1 (name STRING, age INT)")
spark.sql("CREATE TABLE some_db.tab2 (name STRING, age INT)")
tables = sorted(spark.catalog.listTables(), key=lambda t: t.name)
@@ -1554,8 +1555,8 @@ class SQLTests(ReusedPySparkTestCase):
def test_cache(self):
spark = self.spark
- spark.createDataFrame([(2, 2), (3, 3)]).registerTempTable("tab1")
- spark.createDataFrame([(2, 2), (3, 3)]).registerTempTable("tab2")
+ spark.createDataFrame([(2, 2), (3, 3)]).createOrReplaceTempView("tab1")
+ spark.createDataFrame([(2, 2), (3, 3)]).createOrReplaceTempView("tab2")
self.assertFalse(spark.catalog.isCached("tab1"))
self.assertFalse(spark.catalog.isCached("tab2"))
spark.catalog.cacheTable("tab1")