aboutsummaryrefslogtreecommitdiff
path: root/sql/hive
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-10-13 03:26:29 -0400
committerAndrew Or <andrewor14@gmail.com>2016-10-13 03:26:29 -0400
commitdb8784feaa605adcbd37af4bc8b7146479b631f8 (patch)
tree170e6e285686667624084681bd5fe0eacc731810 /sql/hive
parent6f2fa6c54a11caccd446d5560d2014c645fcf7cc (diff)
downloadspark-db8784feaa605adcbd37af4bc8b7146479b631f8.tar.gz
spark-db8784feaa605adcbd37af4bc8b7146479b631f8.tar.bz2
spark-db8784feaa605adcbd37af4bc8b7146479b631f8.zip
[SPARK-17899][SQL] add a debug mode to keep raw table properties in HiveExternalCatalog
## What changes were proposed in this pull request? Currently `HiveExternalCatalog` will filter out the Spark SQL internal table properties, e.g. `spark.sql.sources.provider`, `spark.sql.sources.schema`, etc. This is reasonable for external users as they don't want to see these internal properties in `DESC TABLE`. However, as a Spark developer, sometimes we do wanna see the raw table properties. This PR adds a new internal SQL conf, `spark.sql.debug`, to enable debug mode and keep these raw table properties. This config can also be used in similar places where we wanna retain debug information in the future. ## How was this patch tested? new test in MetastoreDataSourcesSuite Author: Wenchen Fan <wenchen@databricks.com> Closes #15458 from cloud-fan/debug.
Diffstat (limited to 'sql/hive')
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala9
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala17
2 files changed, 23 insertions, 3 deletions
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index e1c0cad907..ed189724a2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.execution.command.{ColumnStatStruct, DDLUtils}
import org.apache.spark.sql.execution.datasources.CaseInsensitiveMap
import org.apache.spark.sql.hive.client.HiveClient
import org.apache.spark.sql.internal.HiveSerDe
-import org.apache.spark.sql.internal.StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD
+import org.apache.spark.sql.internal.StaticSQLConf._
import org.apache.spark.sql.types.{DataType, StructType}
@@ -461,13 +461,18 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
} else {
table.storage
}
+ val tableProps = if (conf.get(DEBUG_MODE)) {
+ table.properties
+ } else {
+ getOriginalTableProperties(table)
+ }
table.copy(
storage = storage,
schema = getSchemaFromTableProperties(table),
provider = Some(provider),
partitionColumnNames = getPartitionColumnsFromTableProperties(table),
bucketSpec = getBucketSpecFromTableProperties(table),
- properties = getOriginalTableProperties(table))
+ properties = tableProps)
} getOrElse {
table.copy(provider = Some("hive"))
}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 0477122fc6..7cc6179d44 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -23,6 +23,7 @@ import scala.collection.mutable.ArrayBuffer
import org.apache.hadoop.fs.Path
+import org.apache.spark.SparkContext
import org.apache.spark.sql._
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
@@ -31,7 +32,7 @@ import org.apache.spark.sql.hive.HiveExternalCatalog._
import org.apache.spark.sql.hive.client.HiveClient
import org.apache.spark.sql.hive.test.TestHiveSingleton
import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.StaticSQLConf.SCHEMA_STRING_LENGTH_THRESHOLD
+import org.apache.spark.sql.internal.StaticSQLConf._
import org.apache.spark.sql.test.SQLTestUtils
import org.apache.spark.sql.types._
import org.apache.spark.util.Utils
@@ -1324,4 +1325,18 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
hiveClient.dropTable("default", "t", ignoreIfNotExists = true, purge = true)
}
}
+
+ test("should keep data source entries in table properties when debug mode is on") {
+ val previousValue = sparkSession.sparkContext.conf.get(DEBUG_MODE)
+ try {
+ sparkSession.sparkContext.conf.set(DEBUG_MODE, true)
+ val newSession = sparkSession.newSession()
+ newSession.sql("CREATE TABLE abc(i int) USING json")
+ val tableMeta = newSession.sessionState.catalog.getTableMetadata(TableIdentifier("abc"))
+ assert(tableMeta.properties(DATASOURCE_SCHEMA_NUMPARTS).toInt == 1)
+ assert(tableMeta.properties(DATASOURCE_PROVIDER) == "json")
+ } finally {
+ sparkSession.sparkContext.conf.set(DEBUG_MODE, previousValue)
+ }
+ }
}