From 18ee55dd5de0597d7fb69e8e16ac3744356a6918 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 17 Jan 2017 12:54:50 +0800 Subject: [SPARK-19148][SQL] do not expose the external table concept in Catalog ## What changes were proposed in this pull request? In https://github.com/apache/spark/pull/16296 , we reached a consensus that we should hide the external/managed table concept to users and only expose custom table path. This PR renames `Catalog.createExternalTable` to `createTable`(still keep the old versions for backward compatibility), and only set the table type to EXTERNAL if `path` is specified in options. ## How was this patch tested? new tests in `CatalogSuite` Author: Wenchen Fan Closes #16528 from cloud-fan/create-table. --- python/pyspark/sql/catalog.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'python') diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py index 30c7a3fe4f..253a750629 100644 --- a/python/pyspark/sql/catalog.py +++ b/python/pyspark/sql/catalog.py @@ -15,6 +15,7 @@ # limitations under the License. # +import warnings from collections import namedtuple from pyspark import since @@ -138,7 +139,27 @@ class Catalog(object): @since(2.0) def createExternalTable(self, tableName, path=None, source=None, schema=None, **options): - """Creates an external table based on the dataset in a data source. + """Creates a table based on the dataset in a data source. + + It returns the DataFrame associated with the external table. + + The data source is specified by the ``source`` and a set of ``options``. + If ``source`` is not specified, the default data source configured by + ``spark.sql.sources.default`` will be used. + + Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and + created external table. + + :return: :class:`DataFrame` + """ + warnings.warn( + "createExternalTable is deprecated since Spark 2.2, please use createTable instead.", + DeprecationWarning) + return self.createTable(tableName, path, source, schema, **options) + + @since(2.2) + def createTable(self, tableName, path=None, source=None, schema=None, **options): + """Creates a table based on the dataset in a data source. It returns the DataFrame associated with the external table. @@ -157,12 +178,12 @@ class Catalog(object): source = self._sparkSession.conf.get( "spark.sql.sources.default", "org.apache.spark.sql.parquet") if schema is None: - df = self._jcatalog.createExternalTable(tableName, source, options) + df = self._jcatalog.createTable(tableName, source, options) else: if not isinstance(schema, StructType): raise TypeError("schema should be StructType") scala_datatype = self._jsparkSession.parseDataType(schema.json()) - df = self._jcatalog.createExternalTable(tableName, source, scala_datatype, options) + df = self._jcatalog.createTable(tableName, source, scala_datatype, options) return DataFrame(df, self._sparkSession._wrapped) @since(2.0) -- cgit v1.2.3