diff options
author | Sean Zhong <seanzhong@databricks.com> | 2016-05-18 09:01:59 +0800 |
---|---|---|
committer | Cheng Lian <lian@databricks.com> | 2016-05-18 09:01:59 +0800 |
commit | 25b315e6cad7c27b62dcaa2c194293c1115fdfb3 (patch) | |
tree | cfeebcaf553d78ca80a70f7139a765e7759f0410 /python | |
parent | b674e67c22bf663334e537e35787c00533adbb04 (diff) | |
download | spark-25b315e6cad7c27b62dcaa2c194293c1115fdfb3.tar.gz spark-25b315e6cad7c27b62dcaa2c194293c1115fdfb3.tar.bz2 spark-25b315e6cad7c27b62dcaa2c194293c1115fdfb3.zip |
[SPARK-15171][SQL] Remove the references to deprecated method dataset.registerTempTable
## What changes were proposed in this pull request?
Update the unit test code, examples, and documents to remove calls to deprecated method `dataset.registerTempTable`.
## How was this patch tested?
This PR only changes the unit test code, examples, and comments. It should be safe.
This is a follow up of PR https://github.com/apache/spark/pull/12945 which was merged.
Author: Sean Zhong <seanzhong@databricks.com>
Closes #13098 from clockfly/spark-15171-remove-deprecation.
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/sql/context.py | 4 | ||||
-rw-r--r-- | python/pyspark/sql/readwriter.py | 2 | ||||
-rw-r--r-- | python/pyspark/sql/session.py | 2 | ||||
-rw-r--r-- | python/pyspark/sql/tests.py | 25 |
4 files changed, 17 insertions, 16 deletions
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index ca111ae9bb..e8e60c6412 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -57,7 +57,7 @@ class SQLContext(object): ... b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1), ... time=datetime(2014, 8, 1, 14, 1, 5))]) >>> df = allTypes.toDF() - >>> df.registerTempTable("allTypes") + >>> df.createOrReplaceTempView("allTypes") >>> sqlContext.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a ' ... 'from allTypes where b and i > 0').collect() [Row((i + CAST(1 AS BIGINT))=2, (d + CAST(1 AS DOUBLE))=2.0, (NOT b)=False, list[1]=2, \ @@ -106,7 +106,7 @@ class SQLContext(object): def newSession(self): """ Returns a new SQLContext as new session, that has separate SQLConf, - registered temporary tables and UDFs, but shared SparkContext and + registered temporary views and UDFs, but shared SparkContext and table cache. """ return self.__class__(self._sc, self.sparkSession.newSession()) diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index c98aef1a0e..8e6bce9001 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -266,7 +266,7 @@ class DataFrameReader(object): :param tableName: string, name of the table. >>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned') - >>> df.registerTempTable('tmpTable') + >>> df.createOrReplaceTempView('tmpTable') >>> spark.read.table('tmpTable').dtypes [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')] """ diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 0781b442cb..257a239c8d 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -186,7 +186,7 @@ class SparkSession(object): def newSession(self): """ Returns a new SparkSession as new session, that has separate SQLConf, - registered temporary tables and UDFs, but shared SparkContext and + registered temporary views and UDFs, but shared SparkContext and table cache. """ return self.__class__(self._sc, self._jsparkSession.newSession()) diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 0977c43a39..e86f44281d 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -294,7 +294,8 @@ class SQLTests(ReusedPySparkTestCase): def test_udf2(self): self.spark.catalog.registerFunction("strlen", lambda string: len(string), IntegerType()) - self.spark.createDataFrame(self.sc.parallelize([Row(a="test")])).registerTempTable("test") + self.spark.createDataFrame(self.sc.parallelize([Row(a="test")]))\ + .createOrReplaceTempView("test") [res] = self.spark.sql("SELECT strlen(a) FROM test WHERE strlen(a) > 1").collect() self.assertEqual(4, res[0]) @@ -320,7 +321,7 @@ class SQLTests(ReusedPySparkTestCase): def test_udf_with_array_type(self): d = [Row(l=list(range(3)), d={"key": list(range(5))})] rdd = self.sc.parallelize(d) - self.spark.createDataFrame(rdd).registerTempTable("test") + self.spark.createDataFrame(rdd).createOrReplaceTempView("test") self.spark.catalog.registerFunction("copylist", lambda l: list(l), ArrayType(IntegerType())) self.spark.catalog.registerFunction("maplen", lambda d: len(d), IntegerType()) [(l1, l2)] = self.spark.sql("select copylist(l), maplen(d) from test").collect() @@ -360,7 +361,7 @@ class SQLTests(ReusedPySparkTestCase): self.assertTrue(df.is_cached) self.assertEqual(2, df.count()) - df.registerTempTable("temp") + df.createOrReplaceTempView("temp") df = self.spark.sql("select foo from temp") df.count() df.collect() @@ -420,7 +421,7 @@ class SQLTests(ReusedPySparkTestCase): df = self.spark.createDataFrame(rdd) self.assertEqual([], df.rdd.map(lambda r: r.l).first()) self.assertEqual([None, ""], df.rdd.map(lambda r: r.s).collect()) - df.registerTempTable("test") + df.createOrReplaceTempView("test") result = self.spark.sql("SELECT l[0].a from test where d['key'].d = '2'") self.assertEqual(1, result.head()[0]) @@ -428,7 +429,7 @@ class SQLTests(ReusedPySparkTestCase): self.assertEqual(df.schema, df2.schema) self.assertEqual({}, df2.rdd.map(lambda r: r.d).first()) self.assertEqual([None, ""], df2.rdd.map(lambda r: r.s).collect()) - df2.registerTempTable("test2") + df2.createOrReplaceTempView("test2") result = self.spark.sql("SELECT l[0].a from test2 where d['key'].d = '2'") self.assertEqual(1, result.head()[0]) @@ -487,7 +488,7 @@ class SQLTests(ReusedPySparkTestCase): datetime(2010, 1, 1, 1, 1, 1), 1, 2, [1, 2, 3], None) self.assertEqual(r, results.first()) - df.registerTempTable("table2") + df.createOrReplaceTempView("table2") r = self.spark.sql("SELECT byte1 - 1 AS byte1, byte2 + 1 AS byte2, " + "short1 + 1 AS short1, short2 - 1 AS short2, int1 - 1 AS int1, " + "float1 + 1.5 as float1 FROM table2").first() @@ -515,7 +516,7 @@ class SQLTests(ReusedPySparkTestCase): row = Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")}) self.assertEqual(1, row.asDict()['l'][0].a) df = self.sc.parallelize([row]).toDF() - df.registerTempTable("test") + df.createOrReplaceTempView("test") row = self.spark.sql("select l, d from test").head() self.assertEqual(1, row.asDict()["l"][0].a) self.assertEqual(1.0, row.asDict()['d']['key'].c) @@ -556,7 +557,7 @@ class SQLTests(ReusedPySparkTestCase): schema = df.schema field = [f for f in schema.fields if f.name == "point"][0] self.assertEqual(type(field.dataType), ExamplePointUDT) - df.registerTempTable("labeled_point") + df.createOrReplaceTempView("labeled_point") point = self.spark.sql("SELECT point FROM labeled_point").head().point self.assertEqual(point, ExamplePoint(1.0, 2.0)) @@ -565,7 +566,7 @@ class SQLTests(ReusedPySparkTestCase): schema = df.schema field = [f for f in schema.fields if f.name == "point"][0] self.assertEqual(type(field.dataType), PythonOnlyUDT) - df.registerTempTable("labeled_point") + df.createOrReplaceTempView("labeled_point") point = self.spark.sql("SELECT point FROM labeled_point").head().point self.assertEqual(point, PythonOnlyPoint(1.0, 2.0)) @@ -1427,7 +1428,7 @@ class SQLTests(ReusedPySparkTestCase): spark.sql("CREATE DATABASE some_db") self.assertEquals(spark.catalog.listTables(), []) self.assertEquals(spark.catalog.listTables("some_db"), []) - spark.createDataFrame([(1, 1)]).registerTempTable("temp_tab") + spark.createDataFrame([(1, 1)]).createOrReplaceTempView("temp_tab") spark.sql("CREATE TABLE tab1 (name STRING, age INT)") spark.sql("CREATE TABLE some_db.tab2 (name STRING, age INT)") tables = sorted(spark.catalog.listTables(), key=lambda t: t.name) @@ -1554,8 +1555,8 @@ class SQLTests(ReusedPySparkTestCase): def test_cache(self): spark = self.spark - spark.createDataFrame([(2, 2), (3, 3)]).registerTempTable("tab1") - spark.createDataFrame([(2, 2), (3, 3)]).registerTempTable("tab2") + spark.createDataFrame([(2, 2), (3, 3)]).createOrReplaceTempView("tab1") + spark.createDataFrame([(2, 2), (3, 3)]).createOrReplaceTempView("tab2") self.assertFalse(spark.catalog.isCached("tab1")) self.assertFalse(spark.catalog.isCached("tab2")) spark.catalog.cacheTable("tab1") |