diff options
author | Felix Cheung <felixcheung_m@hotmail.com> | 2017-04-06 09:09:43 -0700 |
---|---|---|
committer | Felix Cheung <felixcheung@apache.org> | 2017-04-06 09:09:43 -0700 |
commit | bccc330193217b2ec9660e06f1db6dd58f7af5d8 (patch) | |
tree | b5084b020a07133d47b81cd92c731510300012c6 /python/pyspark | |
parent | d009fb369bbea0df81bbcf9c8028d14cfcaa683b (diff) | |
download | spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.tar.gz spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.tar.bz2 spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.zip |
[SPARK-20196][PYTHON][SQL] update doc for catalog functions for all languages, add pyspark refreshByPath API
## What changes were proposed in this pull request?
Update doc to remove external for createTable, add refreshByPath in python
## How was this patch tested?
manual
Author: Felix Cheung <felixcheung_m@hotmail.com>
Closes #17512 from felixcheung/catalogdoc.
Diffstat (limited to 'python/pyspark')
-rw-r--r-- | python/pyspark/sql/catalog.py | 27 | ||||
-rw-r--r-- | python/pyspark/sql/context.py | 2 |
2 files changed, 20 insertions, 9 deletions
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py index 253a750629..41e68a45a6 100644 --- a/python/pyspark/sql/catalog.py +++ b/python/pyspark/sql/catalog.py @@ -72,10 +72,10 @@ class Catalog(object): @ignore_unicode_prefix @since(2.0) def listTables(self, dbName=None): - """Returns a list of tables in the specified database. + """Returns a list of tables/views in the specified database. If no database is specified, the current database is used. - This includes all temporary tables. + This includes all temporary views. """ if dbName is None: dbName = self.currentDatabase() @@ -115,7 +115,7 @@ class Catalog(object): @ignore_unicode_prefix @since(2.0) def listColumns(self, tableName, dbName=None): - """Returns a list of columns for the given table in the specified database. + """Returns a list of columns for the given table/view in the specified database. If no database is specified, the current database is used. @@ -161,14 +161,15 @@ class Catalog(object): def createTable(self, tableName, path=None, source=None, schema=None, **options): """Creates a table based on the dataset in a data source. - It returns the DataFrame associated with the external table. + It returns the DataFrame associated with the table. The data source is specified by the ``source`` and a set of ``options``. If ``source`` is not specified, the default data source configured by - ``spark.sql.sources.default`` will be used. + ``spark.sql.sources.default`` will be used. When ``path`` is specified, an external table is + created from the data at the given path. Otherwise a managed table is created. Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and - created external table. + created table. :return: :class:`DataFrame` """ @@ -276,14 +277,24 @@ class Catalog(object): @since(2.0) def refreshTable(self, tableName): - """Invalidate and refresh all the cached metadata of the given table.""" + """Invalidates and refreshes all the cached data and metadata of the given table.""" self._jcatalog.refreshTable(tableName) @since('2.1.1') def recoverPartitions(self, tableName): - """Recover all the partitions of the given table and update the catalog.""" + """Recovers all the partitions of the given table and update the catalog. + + Only works with a partitioned table, and not a view. + """ self._jcatalog.recoverPartitions(tableName) + @since('2.2.0') + def refreshByPath(self, path): + """Invalidates and refreshes all the cached data (and the associated metadata) for any + DataFrame that contains the given data source path. + """ + self._jcatalog.refreshByPath(path) + def _reset(self): """(Internal use only) Drop all existing databases (except "default"), tables, partitions and functions, and set the current database to "default". diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index c22f4b87e1..fdb7abbad4 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -385,7 +385,7 @@ class SQLContext(object): @since(1.0) def table(self, tableName): - """Returns the specified table as a :class:`DataFrame`. + """Returns the specified table or view as a :class:`DataFrame`. :return: :class:`DataFrame` |