diff options
author | Yin Huai <yhuai@databricks.com> | 2015-02-12 18:08:01 -0800 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2015-02-12 18:08:01 -0800 |
commit | 1d0596a16e1d3add2631f5d8169aeec2876a1362 (patch) | |
tree | d691a0e0e370a13f8cd35ec7925ebddb2e159ff5 /python | |
parent | c025a468826e9b9f62032e207daa9d42d9dba3ca (diff) | |
download | spark-1d0596a16e1d3add2631f5d8169aeec2876a1362.tar.gz spark-1d0596a16e1d3add2631f5d8169aeec2876a1362.tar.bz2 spark-1d0596a16e1d3add2631f5d8169aeec2876a1362.zip |
[SPARK-3299][SQL]Public API in SQLContext to list tables
https://issues.apache.org/jira/browse/SPARK-3299
Author: Yin Huai <yhuai@databricks.com>
Closes #4547 from yhuai/tables and squashes the following commits:
6c8f92e [Yin Huai] Add tableNames.
acbb281 [Yin Huai] Update Python test.
7793dcb [Yin Huai] Fix scala test.
572870d [Yin Huai] Address comments.
aba2e88 [Yin Huai] Format.
12c86df [Yin Huai] Add tables() to SQLContext to return a DataFrame containing existing tables.
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/sql/context.py | 34 |
1 files changed, 34 insertions, 0 deletions
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index db4bcbece2..082f1b691b 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -621,6 +621,40 @@ class SQLContext(object): """ return DataFrame(self._ssql_ctx.table(tableName), self) + def tables(self, dbName=None): + """Returns a DataFrame containing names of tables in the given database. + + If `dbName` is not specified, the current database will be used. + + The returned DataFrame has two columns, tableName and isTemporary + (a column with BooleanType indicating if a table is a temporary one or not). + + >>> sqlCtx.registerRDDAsTable(df, "table1") + >>> df2 = sqlCtx.tables() + >>> df2.filter("tableName = 'table1'").first() + Row(tableName=u'table1', isTemporary=True) + """ + if dbName is None: + return DataFrame(self._ssql_ctx.tables(), self) + else: + return DataFrame(self._ssql_ctx.tables(dbName), self) + + def tableNames(self, dbName=None): + """Returns a list of names of tables in the database `dbName`. + + If `dbName` is not specified, the current database will be used. + + >>> sqlCtx.registerRDDAsTable(df, "table1") + >>> "table1" in sqlCtx.tableNames() + True + >>> "table1" in sqlCtx.tableNames("db") + True + """ + if dbName is None: + return [name for name in self._ssql_ctx.tableNames()] + else: + return [name for name in self._ssql_ctx.tableNames(dbName)] + def cacheTable(self, tableName): """Caches the specified table in-memory.""" self._ssql_ctx.cacheTable(tableName) |