aboutsummaryrefslogtreecommitdiff
path: root/R
diff options
context:
space:
mode:
authorFelix Cheung <felixcheung_m@hotmail.com>2017-04-06 09:09:43 -0700
committerFelix Cheung <felixcheung@apache.org>2017-04-06 09:09:43 -0700
commitbccc330193217b2ec9660e06f1db6dd58f7af5d8 (patch)
treeb5084b020a07133d47b81cd92c731510300012c6 /R
parentd009fb369bbea0df81bbcf9c8028d14cfcaa683b (diff)
downloadspark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.tar.gz
spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.tar.bz2
spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.zip
[SPARK-20196][PYTHON][SQL] update doc for catalog functions for all languages, add pyspark refreshByPath API
## What changes were proposed in this pull request? Update doc to remove external for createTable, add refreshByPath in python ## How was this patch tested? manual Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #17512 from felixcheung/catalogdoc.
Diffstat (limited to 'R')
-rw-r--r--R/pkg/R/SQLContext.R11
-rw-r--r--R/pkg/R/catalog.R52
2 files changed, 36 insertions, 27 deletions
diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R
index a1edef7608..c2a1e240ad 100644
--- a/R/pkg/R/SQLContext.R
+++ b/R/pkg/R/SQLContext.R
@@ -544,12 +544,15 @@ sql <- function(x, ...) {
dispatchFunc("sql(sqlQuery)", x, ...)
}
-#' Create a SparkDataFrame from a SparkSQL Table
+#' Create a SparkDataFrame from a SparkSQL table or view
#'
-#' Returns the specified Table as a SparkDataFrame. The Table must have already been registered
-#' in the SparkSession.
+#' Returns the specified table or view as a SparkDataFrame. The table or view must already exist or
+#' have already been registered in the SparkSession.
#'
-#' @param tableName The SparkSQL Table to convert to a SparkDataFrame.
+#' @param tableName the qualified or unqualified name that designates a table or view. If a database
+#' is specified, it identifies the table/view from the database.
+#' Otherwise, it first attempts to find a temporary view with the given name
+#' and then match the table/view from the current database.
#' @return SparkDataFrame
#' @rdname tableToDF
#' @name tableToDF
diff --git a/R/pkg/R/catalog.R b/R/pkg/R/catalog.R
index 07a89f763c..4b7f841b55 100644
--- a/R/pkg/R/catalog.R
+++ b/R/pkg/R/catalog.R
@@ -65,7 +65,8 @@ createExternalTable <- function(x, ...) {
#'
#' Caches the specified table in-memory.
#'
-#' @param tableName The name of the table being cached
+#' @param tableName the qualified or unqualified name that designates a table. If no database
+#' identifier is provided, it refers to a table in the current database.
#' @return SparkDataFrame
#' @rdname cacheTable
#' @export
@@ -94,7 +95,8 @@ cacheTable <- function(x, ...) {
#'
#' Removes the specified table from the in-memory cache.
#'
-#' @param tableName The name of the table being uncached
+#' @param tableName the qualified or unqualified name that designates a table. If no database
+#' identifier is provided, it refers to a table in the current database.
#' @return SparkDataFrame
#' @rdname uncacheTable
#' @export
@@ -162,6 +164,7 @@ clearCache <- function() {
#' @method dropTempTable default
#' @note dropTempTable since 1.4.0
dropTempTable.default <- function(tableName) {
+ .Deprecated("dropTempView", old = "dropTempTable")
if (class(tableName) != "character") {
stop("tableName must be a string.")
}
@@ -169,7 +172,6 @@ dropTempTable.default <- function(tableName) {
}
dropTempTable <- function(x, ...) {
- .Deprecated("dropTempView")
dispatchFunc("dropTempView(viewName)", x, ...)
}
@@ -178,7 +180,7 @@ dropTempTable <- function(x, ...) {
#' Drops the temporary view with the given view name in the catalog.
#' If the view has been cached before, then it will also be uncached.
#'
-#' @param viewName the name of the view to be dropped.
+#' @param viewName the name of the temporary view to be dropped.
#' @return TRUE if the view is dropped successfully, FALSE otherwise.
#' @rdname dropTempView
#' @name dropTempView
@@ -317,10 +319,10 @@ listDatabases <- function() {
dataFrame(callJMethod(callJMethod(catalog, "listDatabases"), "toDF"))
}
-#' Returns a list of tables in the specified database
+#' Returns a list of tables or views in the specified database
#'
-#' Returns a list of tables in the specified database.
-#' This includes all temporary tables.
+#' Returns a list of tables or views in the specified database.
+#' This includes all temporary views.
#'
#' @param databaseName (optional) name of the database
#' @return a SparkDataFrame of the list of tables.
@@ -349,11 +351,13 @@ listTables <- function(databaseName = NULL) {
dataFrame(callJMethod(jdst, "toDF"))
}
-#' Returns a list of columns for the given table in the specified database
+#' Returns a list of columns for the given table/view in the specified database
#'
-#' Returns a list of columns for the given table in the specified database.
+#' Returns a list of columns for the given table/view in the specified database.
#'
-#' @param tableName a name of the table.
+#' @param tableName the qualified or unqualified name that designates a table/view. If no database
+#' identifier is provided, it refers to a table/view in the current database.
+#' If \code{databaseName} parameter is specified, this must be an unqualified name.
#' @param databaseName (optional) name of the database
#' @return a SparkDataFrame of the list of column descriptions.
#' @rdname listColumns
@@ -409,12 +413,13 @@ listFunctions <- function(databaseName = NULL) {
dataFrame(callJMethod(jdst, "toDF"))
}
-#' Recover all the partitions in the directory of a table and update the catalog
+#' Recovers all the partitions in the directory of a table and update the catalog
#'
-#' Recover all the partitions in the directory of a table and update the catalog. The name should
-#' reference a partitioned table, and not a temporary view.
+#' Recovers all the partitions in the directory of a table and update the catalog. The name should
+#' reference a partitioned table, and not a view.
#'
-#' @param tableName a name of the table.
+#' @param tableName the qualified or unqualified name that designates a table. If no database
+#' identifier is provided, it refers to a table in the current database.
#' @rdname recoverPartitions
#' @name recoverPartitions
#' @export
@@ -430,17 +435,18 @@ recoverPartitions <- function(tableName) {
invisible(handledCallJMethod(catalog, "recoverPartitions", tableName))
}
-#' Invalidate and refresh all the cached metadata of the given table
+#' Invalidates and refreshes all the cached data and metadata of the given table
#'
-#' Invalidate and refresh all the cached metadata of the given table. For performance reasons,
-#' Spark SQL or the external data source library it uses might cache certain metadata about a
-#' table, such as the location of blocks. When those change outside of Spark SQL, users should
+#' Invalidates and refreshes all the cached data and metadata of the given table. For performance
+#' reasons, Spark SQL or the external data source library it uses might cache certain metadata about
+#' a table, such as the location of blocks. When those change outside of Spark SQL, users should
#' call this function to invalidate the cache.
#'
#' If this table is cached as an InMemoryRelation, drop the original cached version and make the
#' new version cached lazily.
#'
-#' @param tableName a name of the table.
+#' @param tableName the qualified or unqualified name that designates a table. If no database
+#' identifier is provided, it refers to a table in the current database.
#' @rdname refreshTable
#' @name refreshTable
#' @export
@@ -456,11 +462,11 @@ refreshTable <- function(tableName) {
invisible(handledCallJMethod(catalog, "refreshTable", tableName))
}
-#' Invalidate and refresh all the cached data and metadata for SparkDataFrame containing path
+#' Invalidates and refreshes all the cached data and metadata for SparkDataFrame containing path
#'
-#' Invalidate and refresh all the cached data (and the associated metadata) for any SparkDataFrame
-#' that contains the given data source path. Path matching is by prefix, i.e. "/" would invalidate
-#' everything that is cached.
+#' Invalidates and refreshes all the cached data (and the associated metadata) for any
+#' SparkDataFrame that contains the given data source path. Path matching is by prefix, i.e. "/"
+#' would invalidate everything that is cached.
#'
#' @param path the path of the data source.
#' @rdname refreshByPath