aboutsummaryrefslogtreecommitdiff
path: root/sql/core/src/main/scala
diff options
context:
space:
mode:
authorFelix Cheung <felixcheung_m@hotmail.com>2017-04-06 09:09:43 -0700
committerFelix Cheung <felixcheung@apache.org>2017-04-06 09:09:43 -0700
commitbccc330193217b2ec9660e06f1db6dd58f7af5d8 (patch)
treeb5084b020a07133d47b81cd92c731510300012c6 /sql/core/src/main/scala
parentd009fb369bbea0df81bbcf9c8028d14cfcaa683b (diff)
downloadspark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.tar.gz
spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.tar.bz2
spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.zip
[SPARK-20196][PYTHON][SQL] update doc for catalog functions for all languages, add pyspark refreshByPath API
## What changes were proposed in this pull request? Update doc to remove external for createTable, add refreshByPath in python ## How was this patch tested? manual Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #17512 from felixcheung/catalogdoc.
Diffstat (limited to 'sql/core/src/main/scala')
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala17
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala22
2 files changed, 23 insertions, 16 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index 137b0cbc84..074952ff79 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -283,7 +283,7 @@ abstract class Catalog {
/**
* :: Experimental ::
- * Creates a table from the given path based on a data source and a set of options.
+ * Creates a table based on the dataset in a data source and a set of options.
* Then, returns the corresponding DataFrame.
*
* @param tableName is either a qualified or unqualified name that designates a table.
@@ -321,7 +321,7 @@ abstract class Catalog {
/**
* :: Experimental ::
* (Scala-specific)
- * Creates a table from the given path based on a data source and a set of options.
+ * Creates a table based on the dataset in a data source and a set of options.
* Then, returns the corresponding DataFrame.
*
* @param tableName is either a qualified or unqualified name that designates a table.
@@ -357,7 +357,7 @@ abstract class Catalog {
/**
* :: Experimental ::
- * Create a table from the given path based on a data source, a schema and a set of options.
+ * Create a table based on the dataset in a data source, a schema and a set of options.
* Then, returns the corresponding DataFrame.
*
* @param tableName is either a qualified or unqualified name that designates a table.
@@ -397,7 +397,7 @@ abstract class Catalog {
/**
* :: Experimental ::
* (Scala-specific)
- * Create a table from the given path based on a data source, a schema and a set of options.
+ * Create a table based on the dataset in a data source, a schema and a set of options.
* Then, returns the corresponding DataFrame.
*
* @param tableName is either a qualified or unqualified name that designates a table.
@@ -447,6 +447,7 @@ abstract class Catalog {
/**
* Recovers all the partitions in the directory of a table and update the catalog.
+ * Only works with a partitioned table, and not a view.
*
* @param tableName is either a qualified or unqualified name that designates a table.
* If no database identifier is provided, it refers to a table in the
@@ -493,10 +494,10 @@ abstract class Catalog {
def clearCache(): Unit
/**
- * Invalidates and refreshes all the cached metadata of the given table. For performance reasons,
- * Spark SQL or the external data source library it uses might cache certain metadata about a
- * table, such as the location of blocks. When those change outside of Spark SQL, users should
- * call this function to invalidate the cache.
+ * Invalidates and refreshes all the cached data and metadata of the given table. For performance
+ * reasons, Spark SQL or the external data source library it uses might cache certain metadata
+ * about a table, such as the location of blocks. When those change outside of Spark SQL, users
+ * should call this function to invalidate the cache.
*
* If this table is cached as an InMemoryRelation, drop the original cached version and make the
* new version cached lazily.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 5d1c35aba5..aebb663df5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -141,7 +141,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
}
/**
- * Returns a list of columns for the given table temporary view.
+ * Returns a list of columns for the given table/view or temporary view.
*/
@throws[AnalysisException]("table does not exist")
override def listColumns(tableName: String): Dataset[Column] = {
@@ -150,7 +150,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
}
/**
- * Returns a list of columns for the given table in the specified database.
+ * Returns a list of columns for the given table/view or temporary view in the specified database.
*/
@throws[AnalysisException]("database or table does not exist")
override def listColumns(dbName: String, tableName: String): Dataset[Column] = {
@@ -273,7 +273,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
/**
* :: Experimental ::
- * Creates a table from the given path based on a data source and returns the corresponding
+ * Creates a table from the given path and returns the corresponding
* DataFrame.
*
* @group ddl_ops
@@ -287,7 +287,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
/**
* :: Experimental ::
* (Scala-specific)
- * Creates a table from the given path based on a data source and a set of options.
+ * Creates a table based on the dataset in a data source and a set of options.
* Then, returns the corresponding DataFrame.
*
* @group ddl_ops
@@ -304,7 +304,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
/**
* :: Experimental ::
* (Scala-specific)
- * Creates a table from the given path based on a data source, a schema and a set of options.
+ * Creates a table based on the dataset in a data source, a schema and a set of options.
* Then, returns the corresponding DataFrame.
*
* @group ddl_ops
@@ -367,6 +367,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
/**
* Recovers all the partitions in the directory of a table and update the catalog.
+ * Only works with a partitioned table, and not a temporary view.
*
* @param tableName is either a qualified or unqualified name that designates a table.
* If no database identifier is provided, it refers to a table in the
@@ -431,8 +432,12 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
}
/**
- * Refreshes the cache entry for a table or view, if any. For Hive metastore table, the metadata
- * is refreshed. For data source tables, the schema will not be inferred and refreshed.
+ * Invalidates and refreshes all the cached data and metadata of the given table or view.
+ * For Hive metastore table, the metadata is refreshed. For data source tables, the schema will
+ * not be inferred and refreshed.
+ *
+ * If this table is cached as an InMemoryRelation, drop the original cached version and make the
+ * new version cached lazily.
*
* @group cachemgmt
* @since 2.0.0
@@ -456,7 +461,8 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
/**
* Refreshes the cache entry and the associated metadata for all Dataset (if any), that contain
- * the given data source path.
+ * the given data source path. Path matching is by prefix, i.e. "/" would invalidate
+ * everything that is cached.
*
* @group cachemgmt
* @since 2.0.0