From 18ee55dd5de0597d7fb69e8e16ac3744356a6918 Mon Sep 17 00:00:00 2001
From: Wenchen Fan <wenchen@databricks.com>
Date: Tue, 17 Jan 2017 12:54:50 +0800
Subject: [SPARK-19148][SQL] do not expose the external table concept in
 Catalog

## What changes were proposed in this pull request?

In https://github.com/apache/spark/pull/16296 , we reached a consensus that we should hide the external/managed table concept to users and only expose custom table path.

This PR renames `Catalog.createExternalTable` to `createTable`(still keep the old versions for backward compatibility), and only set the table type to EXTERNAL if `path` is specified in options.

## How was this patch tested?

new tests in `CatalogSuite`

Author: Wenchen Fan <wenchen@databricks.com>

Closes #16528 from cloud-fan/create-table.
---
 python/pyspark/sql/catalog.py | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

(limited to 'python')

diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 30c7a3fe4f..253a750629 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 
+import warnings
 from collections import namedtuple
 
 from pyspark import since
@@ -138,7 +139,27 @@ class Catalog(object):
 
     @since(2.0)
     def createExternalTable(self, tableName, path=None, source=None, schema=None, **options):
-        """Creates an external table based on the dataset in a data source.
+        """Creates a table based on the dataset in a data source.
+
+        It returns the DataFrame associated with the external table.
+
+        The data source is specified by the ``source`` and a set of ``options``.
+        If ``source`` is not specified, the default data source configured by
+        ``spark.sql.sources.default`` will be used.
+
+        Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
+        created external table.
+
+        :return: :class:`DataFrame`
+        """
+        warnings.warn(
+            "createExternalTable is deprecated since Spark 2.2, please use createTable instead.",
+            DeprecationWarning)
+        return self.createTable(tableName, path, source, schema, **options)
+
+    @since(2.2)
+    def createTable(self, tableName, path=None, source=None, schema=None, **options):
+        """Creates a table based on the dataset in a data source.
 
         It returns the DataFrame associated with the external table.
 
@@ -157,12 +178,12 @@ class Catalog(object):
             source = self._sparkSession.conf.get(
                 "spark.sql.sources.default", "org.apache.spark.sql.parquet")
         if schema is None:
-            df = self._jcatalog.createExternalTable(tableName, source, options)
+            df = self._jcatalog.createTable(tableName, source, options)
         else:
             if not isinstance(schema, StructType):
                 raise TypeError("schema should be StructType")
             scala_datatype = self._jsparkSession.parseDataType(schema.json())
-            df = self._jcatalog.createExternalTable(tableName, source, scala_datatype, options)
+            df = self._jcatalog.createTable(tableName, source, scala_datatype, options)
         return DataFrame(df, self._sparkSession._wrapped)
 
     @since(2.0)
-- 
cgit v1.2.3