aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorFelix Cheung <felixcheung_m@hotmail.com>2017-04-06 09:09:43 -0700
committerFelix Cheung <felixcheung@apache.org>2017-04-06 09:09:43 -0700
commitbccc330193217b2ec9660e06f1db6dd58f7af5d8 (patch)
treeb5084b020a07133d47b81cd92c731510300012c6 /python/pyspark
parentd009fb369bbea0df81bbcf9c8028d14cfcaa683b (diff)
downloadspark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.tar.gz
spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.tar.bz2
spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.zip
[SPARK-20196][PYTHON][SQL] update doc for catalog functions for all languages, add pyspark refreshByPath API
## What changes were proposed in this pull request? Update doc to remove external for createTable, add refreshByPath in python ## How was this patch tested? manual Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #17512 from felixcheung/catalogdoc.
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/sql/catalog.py27
-rw-r--r--python/pyspark/sql/context.py2
2 files changed, 20 insertions, 9 deletions
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index 253a750629..41e68a45a6 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -72,10 +72,10 @@ class Catalog(object):
@ignore_unicode_prefix
@since(2.0)
def listTables(self, dbName=None):
- """Returns a list of tables in the specified database.
+ """Returns a list of tables/views in the specified database.
If no database is specified, the current database is used.
- This includes all temporary tables.
+ This includes all temporary views.
"""
if dbName is None:
dbName = self.currentDatabase()
@@ -115,7 +115,7 @@ class Catalog(object):
@ignore_unicode_prefix
@since(2.0)
def listColumns(self, tableName, dbName=None):
- """Returns a list of columns for the given table in the specified database.
+ """Returns a list of columns for the given table/view in the specified database.
If no database is specified, the current database is used.
@@ -161,14 +161,15 @@ class Catalog(object):
def createTable(self, tableName, path=None, source=None, schema=None, **options):
"""Creates a table based on the dataset in a data source.
- It returns the DataFrame associated with the external table.
+ It returns the DataFrame associated with the table.
The data source is specified by the ``source`` and a set of ``options``.
If ``source`` is not specified, the default data source configured by
- ``spark.sql.sources.default`` will be used.
+ ``spark.sql.sources.default`` will be used. When ``path`` is specified, an external table is
+ created from the data at the given path. Otherwise a managed table is created.
Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
- created external table.
+ created table.
:return: :class:`DataFrame`
"""
@@ -276,14 +277,24 @@ class Catalog(object):
@since(2.0)
def refreshTable(self, tableName):
- """Invalidate and refresh all the cached metadata of the given table."""
+ """Invalidates and refreshes all the cached data and metadata of the given table."""
self._jcatalog.refreshTable(tableName)
@since('2.1.1')
def recoverPartitions(self, tableName):
- """Recover all the partitions of the given table and update the catalog."""
+ """Recovers all the partitions of the given table and update the catalog.
+
+ Only works with a partitioned table, and not a view.
+ """
self._jcatalog.recoverPartitions(tableName)
+ @since('2.2.0')
+ def refreshByPath(self, path):
+ """Invalidates and refreshes all the cached data (and the associated metadata) for any
+ DataFrame that contains the given data source path.
+ """
+ self._jcatalog.refreshByPath(path)
+
def _reset(self):
"""(Internal use only) Drop all existing databases (except "default"), tables,
partitions and functions, and set the current database to "default".
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index c22f4b87e1..fdb7abbad4 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -385,7 +385,7 @@ class SQLContext(object):
@since(1.0)
def table(self, tableName):
- """Returns the specified table as a :class:`DataFrame`.
+ """Returns the specified table or view as a :class:`DataFrame`.
:return: :class:`DataFrame`