aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorYin Huai <yhuai@databricks.com>2015-02-12 18:08:01 -0800
committerMichael Armbrust <michael@databricks.com>2015-02-12 18:08:01 -0800
commit1d0596a16e1d3add2631f5d8169aeec2876a1362 (patch)
treed691a0e0e370a13f8cd35ec7925ebddb2e159ff5 /python
parentc025a468826e9b9f62032e207daa9d42d9dba3ca (diff)
downloadspark-1d0596a16e1d3add2631f5d8169aeec2876a1362.tar.gz
spark-1d0596a16e1d3add2631f5d8169aeec2876a1362.tar.bz2
spark-1d0596a16e1d3add2631f5d8169aeec2876a1362.zip
[SPARK-3299][SQL]Public API in SQLContext to list tables
https://issues.apache.org/jira/browse/SPARK-3299 Author: Yin Huai <yhuai@databricks.com> Closes #4547 from yhuai/tables and squashes the following commits: 6c8f92e [Yin Huai] Add tableNames. acbb281 [Yin Huai] Update Python test. 7793dcb [Yin Huai] Fix scala test. 572870d [Yin Huai] Address comments. aba2e88 [Yin Huai] Format. 12c86df [Yin Huai] Add tables() to SQLContext to return a DataFrame containing existing tables.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/context.py34
1 files changed, 34 insertions, 0 deletions
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index db4bcbece2..082f1b691b 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -621,6 +621,40 @@ class SQLContext(object):
"""
return DataFrame(self._ssql_ctx.table(tableName), self)
+ def tables(self, dbName=None):
+ """Returns a DataFrame containing names of tables in the given database.
+
+ If `dbName` is not specified, the current database will be used.
+
+ The returned DataFrame has two columns, tableName and isTemporary
+ (a column with BooleanType indicating if a table is a temporary one or not).
+
+ >>> sqlCtx.registerRDDAsTable(df, "table1")
+ >>> df2 = sqlCtx.tables()
+ >>> df2.filter("tableName = 'table1'").first()
+ Row(tableName=u'table1', isTemporary=True)
+ """
+ if dbName is None:
+ return DataFrame(self._ssql_ctx.tables(), self)
+ else:
+ return DataFrame(self._ssql_ctx.tables(dbName), self)
+
+ def tableNames(self, dbName=None):
+ """Returns a list of names of tables in the database `dbName`.
+
+ If `dbName` is not specified, the current database will be used.
+
+ >>> sqlCtx.registerRDDAsTable(df, "table1")
+ >>> "table1" in sqlCtx.tableNames()
+ True
+ >>> "table1" in sqlCtx.tableNames("db")
+ True
+ """
+ if dbName is None:
+ return [name for name in self._ssql_ctx.tableNames()]
+ else:
+ return [name for name in self._ssql_ctx.tableNames(dbName)]
+
def cacheTable(self, tableName):
"""Caches the specified table in-memory."""
self._ssql_ctx.cacheTable(tableName)