aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Or <andrew@databricks.com>2016-06-02 17:44:48 -0700
committerAndrew Or <andrew@databricks.com>2016-06-02 17:44:48 -0700
commitd1c1fbc345a704a2c8210960683f33f945660d5a (patch)
tree97f90ff39ed50705ee0584191355936e2001204b
parente23370ec617c527ffa3a1f7d285ee2c4ffc51b77 (diff)
downloadspark-d1c1fbc345a704a2c8210960683f33f945660d5a.tar.gz
spark-d1c1fbc345a704a2c8210960683f33f945660d5a.tar.bz2
spark-d1c1fbc345a704a2c8210960683f33f945660d5a.zip
[SPARK-15715][SQL] Fix alter partition with storage information in Hive
## What changes were proposed in this pull request? This command didn't work for Hive tables. Now it does: ``` ALTER TABLE boxes PARTITION (width=3) SET SERDE 'com.sparkbricks.serde.ColumnarSerDe' WITH SERDEPROPERTIES ('compress'='true') ``` ## How was this patch tested? `HiveExternalCatalogSuite` Author: Andrew Or <andrew@databricks.com> Closes #13453 from andrewor14/alter-partition-storage.
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala10
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala30
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala5
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala22
4 files changed, 57 insertions, 10 deletions
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index 377e64ba01..0c4d363365 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -382,6 +382,8 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
// See HIVE-2742 for more detail.
catalog.setCurrentDatabase("db2")
val newLocation = newUriForDatabase()
+ val newSerde = "com.sparkbricks.text.EasySerde"
+ val newSerdeProps = Map("spark" -> "bricks", "compressed" -> "false")
// alter but keep spec the same
val oldPart1 = catalog.getPartition("db2", "tbl2", part1.spec)
val oldPart2 = catalog.getPartition("db2", "tbl2", part2.spec)
@@ -394,6 +396,14 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
assert(newPart2.storage.locationUri == Some(newLocation))
assert(oldPart1.storage.locationUri != Some(newLocation))
assert(oldPart2.storage.locationUri != Some(newLocation))
+ // alter other storage information
+ catalog.alterPartitions("db2", "tbl2", Seq(
+ oldPart1.copy(storage = storageFormat.copy(serde = Some(newSerde))),
+ oldPart2.copy(storage = storageFormat.copy(serdeProperties = newSerdeProps))))
+ val newPart1b = catalog.getPartition("db2", "tbl2", part1.spec)
+ val newPart2b = catalog.getPartition("db2", "tbl2", part2.spec)
+ assert(newPart1b.storage.serde == Some(newSerde))
+ assert(newPart2b.storage.serdeProperties == newSerdeProps)
// alter but change spec, should fail because new partition specs do not exist yet
val badPart1 = part1.copy(spec = Map("a" -> "v1", "b" -> "v2"))
val badPart2 = part2.copy(spec = Map("a" -> "v3", "b" -> "v4"))
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 47fa41823c..1c89d8c62a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path
import org.apache.hadoop.hive.conf.HiveConf
import org.apache.hadoop.hive.metastore.{TableType => HiveTableType}
import org.apache.hadoop.hive.metastore.api.{Database => HiveDatabase, FieldSchema}
+import org.apache.hadoop.hive.metastore.api.{SerDeInfo, StorageDescriptor}
import org.apache.hadoop.hive.ql.Driver
import org.apache.hadoop.hive.ql.metadata.{Hive, Partition => HivePartition, Table => HiveTable}
import org.apache.hadoop.hive.ql.processors._
@@ -779,18 +780,29 @@ private[hive] class HiveClientImpl(
hiveTable
}
- private def toHiveViewTable(view: CatalogTable): HiveTable = {
- val tbl = toHiveTable(view)
- tbl.setTableType(HiveTableType.VIRTUAL_VIEW)
- tbl.setSerializationLib(null)
- tbl.clearSerDeInfo()
- tbl
- }
-
private def toHivePartition(
p: CatalogTablePartition,
ht: HiveTable): HivePartition = {
- new HivePartition(ht, p.spec.asJava, p.storage.locationUri.map { l => new Path(l) }.orNull)
+ val tpart = new org.apache.hadoop.hive.metastore.api.Partition
+ val partValues = ht.getPartCols.asScala.map { hc =>
+ p.spec.get(hc.getName).getOrElse {
+ throw new IllegalArgumentException(
+ s"Partition spec is missing a value for column '${hc.getName}': ${p.spec}")
+ }
+ }
+ val storageDesc = new StorageDescriptor
+ val serdeInfo = new SerDeInfo
+ p.storage.locationUri.foreach(storageDesc.setLocation)
+ p.storage.inputFormat.foreach(storageDesc.setInputFormat)
+ p.storage.outputFormat.foreach(storageDesc.setOutputFormat)
+ p.storage.serde.foreach(serdeInfo.setSerializationLib)
+ serdeInfo.setParameters(p.storage.serdeProperties.asJava)
+ storageDesc.setSerdeInfo(serdeInfo)
+ tpart.setDbName(ht.getDbName)
+ tpart.setTableName(ht.getTableName)
+ tpart.setValues(partValues.asJava)
+ tpart.setSd(storageDesc)
+ new HivePartition(ht, tpart)
}
private def fromHivePartition(hp: HivePartition): CatalogTablePartition = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
index 8ae4535f4e..5b209acf0f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala
@@ -354,7 +354,10 @@ class VersionsSuite extends SparkFunSuite with Logging {
test(s"$version: alterPartitions") {
val spec = Map("key1" -> "1", "key2" -> "2")
val newLocation = Utils.createTempDir().getPath()
- val storage = storageFormat.copy(locationUri = Some(newLocation))
+ val storage = storageFormat.copy(
+ locationUri = Some(newLocation),
+ // needed for 0.12 alter partitions
+ serde = Some("org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"))
val partition = CatalogTablePartition(spec, storage)
client.alterPartitions("default", "src_part", Seq(partition))
assert(client.getPartition("default", "src_part", spec)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index a98d469880..b2f01fcc83 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -345,6 +345,28 @@ class HiveDDLSuite
}
}
+ test("alter table partition - storage information") {
+ sql("CREATE TABLE boxes (height INT, length INT) PARTITIONED BY (width INT)")
+ sql("INSERT OVERWRITE TABLE boxes PARTITION (width=4) SELECT 4, 4")
+ val catalog = spark.sessionState.catalog
+ val expectedSerde = "com.sparkbricks.serde.ColumnarSerDe"
+ val expectedSerdeProps = Map("compress" -> "true")
+ val expectedSerdePropsString =
+ expectedSerdeProps.map { case (k, v) => s"'$k'='$v'" }.mkString(", ")
+ val oldPart = catalog.getPartition(TableIdentifier("boxes"), Map("width" -> "4"))
+ assume(oldPart.storage.serde != Some(expectedSerde), "bad test: serde was already set")
+ assume(oldPart.storage.serdeProperties.filterKeys(expectedSerdeProps.contains) !=
+ expectedSerdeProps, "bad test: serde properties were already set")
+ sql(s"""ALTER TABLE boxes PARTITION (width=4)
+ | SET SERDE '$expectedSerde'
+ | WITH SERDEPROPERTIES ($expectedSerdePropsString)
+ |""".stripMargin)
+ val newPart = catalog.getPartition(TableIdentifier("boxes"), Map("width" -> "4"))
+ assert(newPart.storage.serde == Some(expectedSerde))
+ assume(newPart.storage.serdeProperties.filterKeys(expectedSerdeProps.contains) ==
+ expectedSerdeProps)
+ }
+
test("drop table using drop view") {
withTable("tab1") {
sql("CREATE TABLE tab1(c1 int)")