aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@databricks.com>2016-04-08 13:58:58 -0700
committerJosh Rosen <joshrosen@databricks.com>2016-04-08 13:58:58 -0700
commit464a3c1e02c665c7ad2709f8c47898b682526eb3 (patch)
treee0b6b8a379a0c9bb36bb8295520df861e2e0c232
parentf8c9beca38f1f396eb3220b23db6d77112a50293 (diff)
downloadspark-464a3c1e02c665c7ad2709f8c47898b682526eb3.tar.gz
spark-464a3c1e02c665c7ad2709f8c47898b682526eb3.tar.bz2
spark-464a3c1e02c665c7ad2709f8c47898b682526eb3.zip
[SPARK-14435][BUILD] Shade Kryo in our custom Hive 1.2.1 fork
This patch updates our custom Hive 1.2.1 fork in order to shade Kryo in Hive. This is a blocker for upgrading Spark to use Kryo 3 (see #12076). The source for this new fork of Hive can be found at https://github.com/JoshRosen/hive/tree/release-1.2.1-spark2 Here's the complete diff from the official Hive 1.2.1 release: https://github.com/apache/hive/compare/release-1.2.1...JoshRosen:release-1.2.1-spark2 Here's the diff from the sources that pwendell used to publish the current `1.2.1.spark` release of Hive: https://github.com/pwendell/hive/compare/release-1.2.1-spark...JoshRosen:release-1.2.1-spark2. This diff looks large because his branch used a shell script to rewrite the groupId, whereas I had to commit the groupId changes in order to prevent the find-and-replace from affecting the package names in our relocated Kryo classes: https://github.com/pwendell/hive/compare/release-1.2.1-spark...JoshRosen:release-1.2.1-spark2#diff-6ada9aaec70e069df8f2c34c5519dd1e Using these changes, I was able to publish a local version of Hive and verify that this change fixes the test failures which are blocking #12076. Note that this PR will not compile until we complete the review of the Hive POM changes and stage and publish a release. /cc vanzin, steveloughran, and pwendell for review. Author: Josh Rosen <joshrosen@databricks.com> Closes #12215 from JoshRosen/shade-kryo-in-hive.
-rw-r--r--pom.xml2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala4
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala41
3 files changed, 12 insertions, 35 deletions
diff --git a/pom.xml b/pom.xml
index 1b40983a6c..f37a8988f7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -131,7 +131,7 @@
<curator.version>2.4.0</curator.version>
<hive.group>org.spark-project.hive</hive.group>
<!-- Version used in Maven Hive dependency -->
- <hive.version>1.2.1.spark</hive.version>
+ <hive.version>1.2.1.spark2</hive.version>
<!-- Version used for internal directory structure -->
<hive.version.short>1.2.1</hive.version.short>
<derby.version>10.10.1.1</derby.version>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
index da910533d0..0d2a765a38 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala
@@ -24,8 +24,6 @@ import scala.collection.JavaConverters._
import scala.language.implicitConversions
import scala.reflect.ClassTag
-import com.esotericsoftware.kryo.Kryo
-import com.esotericsoftware.kryo.io.{Input, Output}
import com.google.common.base.Objects
import org.apache.avro.Schema
import org.apache.hadoop.conf.Configuration
@@ -37,6 +35,8 @@ import org.apache.hadoop.hive.serde2.ColumnProjectionUtils
import org.apache.hadoop.hive.serde2.avro.{AvroGenericRecordWritable, AvroSerdeUtils}
import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector
import org.apache.hadoop.io.Writable
+import org.apache.hive.com.esotericsoftware.kryo.Kryo
+import org.apache.hive.com.esotericsoftware.kryo.io.{Input, Output}
import org.apache.spark.internal.Logging
import org.apache.spark.sql.types.Decimal
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
index 34b2edb44b..f262ef62be 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala
@@ -24,9 +24,7 @@ import org.apache.spark.SparkFunSuite
/**
* Verify that some classes load and that others are not found on the classpath.
*
- *
- * This is used to detect classpath and shading conflict, especially between
- * Spark's required Kryo version and that which can be found in some Hive versions.
+ * This is used to detect classpath and shading conflicts.
*/
class ClasspathDependenciesSuite extends SparkFunSuite {
private val classloader = this.getClass.getClassLoader
@@ -40,10 +38,6 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
classloader.loadClass(classname)
}
- private def assertLoads(classes: String*): Unit = {
- classes.foreach(assertLoads)
- }
-
private def findResource(classname: String): URL = {
val resource = resourceName(classname)
classloader.getResource(resource)
@@ -63,17 +57,12 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
}
}
- private def assertClassNotFound(classes: String*): Unit = {
- classes.foreach(assertClassNotFound)
+ test("shaded Protobuf") {
+ assertLoads("org.apache.hive.com.google.protobuf.ServiceException")
}
- private val KRYO = "com.esotericsoftware.kryo.Kryo"
-
- private val SPARK_HIVE = "org.apache.hive."
- private val SPARK_SHADED = "org.spark-project.hive.shaded."
-
- test("shaded Protobuf") {
- assertLoads(SPARK_SHADED + "com.google.protobuf.ServiceException")
+ test("shaded Kryo") {
+ assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo")
}
test("hive-common") {
@@ -86,25 +75,13 @@ class ClasspathDependenciesSuite extends SparkFunSuite {
private val STD_INSTANTIATOR = "org.objenesis.strategy.StdInstantiatorStrategy"
- test("unshaded kryo") {
- assertLoads(KRYO, STD_INSTANTIATOR)
- }
-
test("Forbidden Dependencies") {
- assertClassNotFound(
- SPARK_HIVE + KRYO,
- SPARK_SHADED + KRYO,
- "org.apache.hive." + KRYO,
- "com.esotericsoftware.shaded." + STD_INSTANTIATOR,
- SPARK_HIVE + "com.esotericsoftware.shaded." + STD_INSTANTIATOR,
- "org.apache.hive.com.esotericsoftware.shaded." + STD_INSTANTIATOR
- )
+ assertClassNotFound("com.esotericsoftware.shaded." + STD_INSTANTIATOR)
+ assertClassNotFound("org.apache.hive.com.esotericsoftware.shaded." + STD_INSTANTIATOR)
}
test("parquet-hadoop-bundle") {
- assertLoads(
- "parquet.hadoop.ParquetOutputFormat",
- "parquet.hadoop.ParquetInputFormat"
- )
+ assertLoads("parquet.hadoop.ParquetOutputFormat")
+ assertLoads("parquet.hadoop.ParquetInputFormat")
}
}