aboutsummaryrefslogtreecommitdiff
path: root/sql/hive/src/test/scala/org
diff options
context:
space:
mode:
authorwindpiger <songjun@outlook.com>2017-02-11 22:21:14 -0800
committerXiao Li <gatorsmile@gmail.com>2017-02-11 22:21:14 -0800
commit3881f342b49efdb1e0d5ee27f616451ea1928c5d (patch)
tree9e1473aa757943cc46280cf67cbb874c176c209e /sql/hive/src/test/scala/org
parent0fbecc736df95bf757cb497c108ae3dbc5893829 (diff)
downloadspark-3881f342b49efdb1e0d5ee27f616451ea1928c5d.tar.gz
spark-3881f342b49efdb1e0d5ee27f616451ea1928c5d.tar.bz2
spark-3881f342b49efdb1e0d5ee27f616451ea1928c5d.zip
[SPARK-19448][SQL] optimize some duplication functions between HiveClientImpl and HiveUtils
## What changes were proposed in this pull request? There are some duplicate functions between `HiveClientImpl` and `HiveUtils`, we can merge them to one place. such as: `toHiveTable` 、`toHivePartition`、`fromHivePartition`. And additional modify is change `MetastoreRelation.attributes` to `MetastoreRelation.dataColKeys` https://github.com/apache/spark/blob/master/sql/hive/src/main/scala/org/apache/spark/sql/hive/MetastoreRelation.scala#L234 ## How was this patch tested? N/A Author: windpiger <songjun@outlook.com> Closes #16787 from windpiger/todoInMetaStoreRelation.
Diffstat (limited to 'sql/hive/src/test/scala/org')
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala5
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala3
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala25
3 files changed, 32 insertions, 1 deletions
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
index 00fdfbcebb..ee632d24b7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogBackwardCompatibilitySuite.scala
@@ -134,6 +134,7 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest
storage = CatalogStorageFormat.empty.copy(
properties = Map("path" -> defaultTableURI("tbl4").toString)),
schema = new StructType(),
+ provider = Some("json"),
properties = Map(
"spark.sql.sources.provider" -> "json",
"spark.sql.sources.schema.numParts" -> "1",
@@ -145,6 +146,7 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest
storage = CatalogStorageFormat.empty.copy(
properties = Map("path" -> defaultTableURI("tbl5").toString)),
schema = simpleSchema,
+ provider = Some("parquet"),
properties = Map(
"spark.sql.sources.provider" -> "parquet",
"spark.sql.sources.schema.numParts" -> "1",
@@ -156,6 +158,7 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest
storage = CatalogStorageFormat.empty.copy(
properties = Map("path" -> defaultTableURI("tbl6").toString)),
schema = new StructType(),
+ provider = Some("json"),
properties = Map(
"spark.sql.sources.provider" -> "json",
"spark.sql.sources.schema.numParts" -> "1",
@@ -170,6 +173,7 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest
locationUri = Some(defaultTableURI("tbl7").toString + "-__PLACEHOLDER__"),
properties = Map("path" -> tempDirUri)),
schema = new StructType(),
+ provider = Some("json"),
properties = Map(
"spark.sql.sources.provider" -> "json",
"spark.sql.sources.schema.numParts" -> "1",
@@ -194,6 +198,7 @@ class HiveExternalCatalogBackwardCompatibilitySuite extends QueryTest
locationUri = Some(defaultTableURI("tbl9").toString + "-__PLACEHOLDER__"),
properties = Map("path" -> tempDirUri)),
schema = new StructType(),
+ provider = Some("json"),
properties = Map("spark.sql.sources.provider" -> "json"))
// A list of all raw tables we want to test, with their expected schema.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index cf1fe2bc70..e951bbe1dc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -748,6 +748,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
identifier = TableIdentifier(tableName, Some("default")),
tableType = CatalogTableType.MANAGED,
schema = new StructType,
+ provider = Some("json"),
storage = CatalogStorageFormat(
locationUri = None,
inputFormat = None,
@@ -1276,6 +1277,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
identifier = TableIdentifier("t", Some("default")),
tableType = CatalogTableType.MANAGED,
schema = new StructType,
+ provider = Some("json"),
storage = CatalogStorageFormat.empty,
properties = Map(
DATASOURCE_PROVIDER -> "json",
@@ -1373,6 +1375,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
properties = Map("path" -> path.getAbsolutePath)
),
schema = new StructType(),
+ provider = Some("parquet"),
properties = Map(
HiveExternalCatalog.DATASOURCE_PROVIDER -> "parquet"))
hiveClient.createTable(tableDesc, ignoreIfExists = false)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index ca39c7e845..fe14824cf0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.client
import java.io.{ByteArrayOutputStream, File, PrintStream}
import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
import org.apache.hadoop.mapred.TextInputFormat
@@ -570,7 +571,6 @@ class VersionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton w
}
}
-
test(s"$version: SPARK-13709: reading partitioned Avro table with nested schema") {
withTempDir { dir =>
val path = dir.toURI.toString
@@ -649,6 +649,29 @@ class VersionsSuite extends QueryTest with SQLTestUtils with TestHiveSingleton w
}
}
+ test(s"$version: CTAS for managed data source tables") {
+ withTable("t", "t1") {
+ import spark.implicits._
+
+ val tPath = new Path(spark.sessionState.conf.warehousePath, "t")
+ Seq("1").toDF("a").write.saveAsTable("t")
+ val expectedPath = s"file:${tPath.toUri.getPath.stripSuffix("/")}"
+ val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t"))
+
+ assert(table.location.stripSuffix("/") == expectedPath)
+ assert(tPath.getFileSystem(spark.sessionState.newHadoopConf()).exists(tPath))
+ checkAnswer(spark.table("t"), Row("1") :: Nil)
+
+ val t1Path = new Path(spark.sessionState.conf.warehousePath, "t1")
+ spark.sql("create table t1 using parquet as select 2 as a")
+ val table1 = spark.sessionState.catalog.getTableMetadata(TableIdentifier("t1"))
+ val expectedPath1 = s"file:${t1Path.toUri.getPath.stripSuffix("/")}"
+
+ assert(table1.location.stripSuffix("/") == expectedPath1)
+ assert(t1Path.getFileSystem(spark.sessionState.newHadoopConf()).exists(t1Path))
+ checkAnswer(spark.table("t1"), Row(2) :: Nil)
+ }
+ }
// TODO: add more tests.
}
}