[SPARK-20196][PYTHON][SQL] update doc for catalog functions for all languages, add pyspark refreshByPath API

## What changes were proposed in this pull request? Update doc to remove external for createTable, add refreshByPath in python ## How was this patch tested? manual Author: Felix Cheung <felixcheung_m@hotmail.com> Closes #17512 from felixcheung/catalogdoc.
author: Felix Cheung <felixcheung_m@hotmail.com> 2017-04-06 09:09:43 -0700
committer: Felix Cheung <felixcheung@apache.org> 2017-04-06 09:09:43 -0700
commit: bccc330193217b2ec9660e06f1db6dd58f7af5d8 (patch)
tree: b5084b020a07133d47b81cd92c731510300012c6 /sql/core/src/main/scala
parent: d009fb369bbea0df81bbcf9c8028d14cfcaa683b (diff)
download: spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.tar.gz
spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.tar.bz2
spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.zip
2 files changed, 23 insertions, 16 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index 137b0cbc84..074952ff79 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -283,7 +283,7 @@ abstract class Catalog {
 
   /**
    * :: Experimental ::
-   * Creates a table from the given path based on a data source and a set of options.
+   * Creates a table based on the dataset in a data source and a set of options.
    * Then, returns the corresponding DataFrame.
    *
    * @param tableName is either a qualified or unqualified name that designates a table.
@@ -321,7 +321,7 @@ abstract class Catalog {
   /**
    * :: Experimental ::
    * (Scala-specific)
-   * Creates a table from the given path based on a data source and a set of options.
+   * Creates a table based on the dataset in a data source and a set of options.
    * Then, returns the corresponding DataFrame.
    *
    * @param tableName is either a qualified or unqualified name that designates a table.
@@ -357,7 +357,7 @@ abstract class Catalog {
 
   /**
    * :: Experimental ::
-   * Create a table from the given path based on a data source, a schema and a set of options.
+   * Create a table based on the dataset in a data source, a schema and a set of options.
    * Then, returns the corresponding DataFrame.
    *
    * @param tableName is either a qualified or unqualified name that designates a table.
@@ -397,7 +397,7 @@ abstract class Catalog {
   /**
    * :: Experimental ::
    * (Scala-specific)
-   * Create a table from the given path based on a data source, a schema and a set of options.
+   * Create a table based on the dataset in a data source, a schema and a set of options.
    * Then, returns the corresponding DataFrame.
    *
    * @param tableName is either a qualified or unqualified name that designates a table.
@@ -447,6 +447,7 @@ abstract class Catalog {
 
   /**
    * Recovers all the partitions in the directory of a table and update the catalog.
+   * Only works with a partitioned table, and not a view.
    *
    * @param tableName is either a qualified or unqualified name that designates a table.
    *                  If no database identifier is provided, it refers to a table in the
@@ -493,10 +494,10 @@ abstract class Catalog {
   def clearCache(): Unit
 
   /**
-   * Invalidates and refreshes all the cached metadata of the given table. For performance reasons,
-   * Spark SQL or the external data source library it uses might cache certain metadata about a
-   * table, such as the location of blocks. When those change outside of Spark SQL, users should
-   * call this function to invalidate the cache.
+   * Invalidates and refreshes all the cached data and metadata of the given table. For performance
+   * reasons, Spark SQL or the external data source library it uses might cache certain metadata
+   * about a table, such as the location of blocks. When those change outside of Spark SQL, users
+   * should call this function to invalidate the cache.
    *
    * If this table is cached as an InMemoryRelation, drop the original cached version and make the
    * new version cached lazily.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 5d1c35aba5..aebb663df5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -141,7 +141,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   }
 
   /**
-   * Returns a list of columns for the given table temporary view.
+   * Returns a list of columns for the given table/view or temporary view.
    */
   @throws[AnalysisException]("table does not exist")
   override def listColumns(tableName: String): Dataset[Column] = {
@@ -150,7 +150,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   }
 
   /**
-   * Returns a list of columns for the given table in the specified database.
+   * Returns a list of columns for the given table/view or temporary view in the specified database.
    */
   @throws[AnalysisException]("database or table does not exist")
   override def listColumns(dbName: String, tableName: String): Dataset[Column] = {
@@ -273,7 +273,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
 
   /**
    * :: Experimental ::
-   * Creates a table from the given path based on a data source and returns the corresponding
+   * Creates a table from the given path and returns the corresponding
    * DataFrame.
    *
    * @group ddl_ops
@@ -287,7 +287,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   /**
    * :: Experimental ::
    * (Scala-specific)
-   * Creates a table from the given path based on a data source and a set of options.
+   * Creates a table based on the dataset in a data source and a set of options.
    * Then, returns the corresponding DataFrame.
    *
    * @group ddl_ops
@@ -304,7 +304,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   /**
    * :: Experimental ::
    * (Scala-specific)
-   * Creates a table from the given path based on a data source, a schema and a set of options.
+   * Creates a table based on the dataset in a data source, a schema and a set of options.
    * Then, returns the corresponding DataFrame.
    *
    * @group ddl_ops
@@ -367,6 +367,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
 
   /**
    * Recovers all the partitions in the directory of a table and update the catalog.
+   * Only works with a partitioned table, and not a temporary view.
    *
    * @param tableName is either a qualified or unqualified name that designates a table.
    *                  If no database identifier is provided, it refers to a table in the
@@ -431,8 +432,12 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   }
 
   /**
-   * Refreshes the cache entry for a table or view, if any. For Hive metastore table, the metadata
-   * is refreshed. For data source tables, the schema will not be inferred and refreshed.
+   * Invalidates and refreshes all the cached data and metadata of the given table or view.
+   * For Hive metastore table, the metadata is refreshed. For data source tables, the schema will
+   * not be inferred and refreshed.
+   *
+   * If this table is cached as an InMemoryRelation, drop the original cached version and make the
+   * new version cached lazily.
    *
    * @group cachemgmt
    * @since 2.0.0
@@ -456,7 +461,8 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
 
   /**
    * Refreshes the cache entry and the associated metadata for all Dataset (if any), that contain
-   * the given data source path.
+   * the given data source path. Path matching is by prefix, i.e. "/" would invalidate
+   * everything that is cached.
    *
    * @group cachemgmt
    * @since 2.0.0
author	Felix Cheung <felixcheung_m@hotmail.com>	2017-04-06 09:09:43 -0700
committer	Felix Cheung <felixcheung@apache.org>	2017-04-06 09:09:43 -0700
commit	bccc330193217b2ec9660e06f1db6dd58f7af5d8 (patch)
tree	b5084b020a07133d47b81cd92c731510300012c6 /sql/core/src/main/scala
parent	d009fb369bbea0df81bbcf9c8028d14cfcaa683b (diff)
download	spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.tar.gz spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.tar.bz2 spark-bccc330193217b2ec9660e06f1db6dd58f7af5d8.zip