From 339b237dc18d4367b0735236b4b8be2901fcad79 Mon Sep 17 00:00:00 2001 From: Juliusz Sompolski Date: Thu, 16 Mar 2017 08:20:47 +0800 Subject: [SPARK-19948] Document that saveAsTable uses catalog as source of truth for table existence. It is quirky behaviour that saveAsTable to e.g. a JDBC source with SaveMode other than Overwrite will nevertheless overwrite the table in the external source, if that table was not a catalog table. Author: Juliusz Sompolski Closes #17289 from juliuszsompolski/saveAsTableDoc. --- sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'sql/core') diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index deaa800694..3e975ef6a3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -337,6 +337,11 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * +---+---+ * }}} * + * In this method, save mode is used to determine the behavior if the data source table exists in + * Spark catalog. We will always overwrite the underlying data of data source (e.g. a table in + * JDBC data source) if the table doesn't exist in Spark catalog, and will always append to the + * underlying data of data source if the table already exists. + * * When the DataFrame is created from a non-partitioned `HadoopFsRelation` with a single input * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC * and Parquet), the table is persisted in a Hive compatible format, which means other systems -- cgit v1.2.3