diff options
author | Josh Rosen <joshrosen@databricks.com> | 2016-04-08 13:58:58 -0700 |
---|---|---|
committer | Josh Rosen <joshrosen@databricks.com> | 2016-04-08 13:58:58 -0700 |
commit | 464a3c1e02c665c7ad2709f8c47898b682526eb3 (patch) | |
tree | e0b6b8a379a0c9bb36bb8295520df861e2e0c232 | |
parent | f8c9beca38f1f396eb3220b23db6d77112a50293 (diff) | |
download | spark-464a3c1e02c665c7ad2709f8c47898b682526eb3.tar.gz spark-464a3c1e02c665c7ad2709f8c47898b682526eb3.tar.bz2 spark-464a3c1e02c665c7ad2709f8c47898b682526eb3.zip |
[SPARK-14435][BUILD] Shade Kryo in our custom Hive 1.2.1 fork
This patch updates our custom Hive 1.2.1 fork in order to shade Kryo in Hive. This is a blocker for upgrading Spark to use Kryo 3 (see #12076).
The source for this new fork of Hive can be found at https://github.com/JoshRosen/hive/tree/release-1.2.1-spark2
Here's the complete diff from the official Hive 1.2.1 release: https://github.com/apache/hive/compare/release-1.2.1...JoshRosen:release-1.2.1-spark2
Here's the diff from the sources that pwendell used to publish the current `1.2.1.spark` release of Hive: https://github.com/pwendell/hive/compare/release-1.2.1-spark...JoshRosen:release-1.2.1-spark2. This diff looks large because his branch used a shell script to rewrite the groupId, whereas I had to commit the groupId changes in order to prevent the find-and-replace from affecting the package names in our relocated Kryo classes: https://github.com/pwendell/hive/compare/release-1.2.1-spark...JoshRosen:release-1.2.1-spark2#diff-6ada9aaec70e069df8f2c34c5519dd1e
Using these changes, I was able to publish a local version of Hive and verify that this change fixes the test failures which are blocking #12076. Note that this PR will not compile until we complete the review of the Hive POM changes and stage and publish a release.
/cc vanzin, steveloughran, and pwendell for review.
Author: Josh Rosen <joshrosen@databricks.com>
Closes #12215 from JoshRosen/shade-kryo-in-hive.
-rw-r--r-- | pom.xml | 2 | ||||
-rw-r--r-- | sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala | 4 | ||||
-rw-r--r-- | sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala | 41 |
3 files changed, 12 insertions, 35 deletions
@@ -131,7 +131,7 @@ <curator.version>2.4.0</curator.version> <hive.group>org.spark-project.hive</hive.group> <!-- Version used in Maven Hive dependency --> - <hive.version>1.2.1.spark</hive.version> + <hive.version>1.2.1.spark2</hive.version> <!-- Version used for internal directory structure --> <hive.version.short>1.2.1</hive.version.short> <derby.version>10.10.1.1</derby.version> diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala index da910533d0..0d2a765a38 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveShim.scala @@ -24,8 +24,6 @@ import scala.collection.JavaConverters._ import scala.language.implicitConversions import scala.reflect.ClassTag -import com.esotericsoftware.kryo.Kryo -import com.esotericsoftware.kryo.io.{Input, Output} import com.google.common.base.Objects import org.apache.avro.Schema import org.apache.hadoop.conf.Configuration @@ -37,6 +35,8 @@ import org.apache.hadoop.hive.serde2.ColumnProjectionUtils import org.apache.hadoop.hive.serde2.avro.{AvroGenericRecordWritable, AvroSerdeUtils} import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector import org.apache.hadoop.io.Writable +import org.apache.hive.com.esotericsoftware.kryo.Kryo +import org.apache.hive.com.esotericsoftware.kryo.io.{Input, Output} import org.apache.spark.internal.Logging import org.apache.spark.sql.types.Decimal diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala index 34b2edb44b..f262ef62be 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ClasspathDependenciesSuite.scala @@ -24,9 +24,7 @@ import org.apache.spark.SparkFunSuite /** * Verify that some classes load and that others are not found on the classpath. * - * - * This is used to detect classpath and shading conflict, especially between - * Spark's required Kryo version and that which can be found in some Hive versions. + * This is used to detect classpath and shading conflicts. */ class ClasspathDependenciesSuite extends SparkFunSuite { private val classloader = this.getClass.getClassLoader @@ -40,10 +38,6 @@ class ClasspathDependenciesSuite extends SparkFunSuite { classloader.loadClass(classname) } - private def assertLoads(classes: String*): Unit = { - classes.foreach(assertLoads) - } - private def findResource(classname: String): URL = { val resource = resourceName(classname) classloader.getResource(resource) @@ -63,17 +57,12 @@ class ClasspathDependenciesSuite extends SparkFunSuite { } } - private def assertClassNotFound(classes: String*): Unit = { - classes.foreach(assertClassNotFound) + test("shaded Protobuf") { + assertLoads("org.apache.hive.com.google.protobuf.ServiceException") } - private val KRYO = "com.esotericsoftware.kryo.Kryo" - - private val SPARK_HIVE = "org.apache.hive." - private val SPARK_SHADED = "org.spark-project.hive.shaded." - - test("shaded Protobuf") { - assertLoads(SPARK_SHADED + "com.google.protobuf.ServiceException") + test("shaded Kryo") { + assertLoads("org.apache.hive.com.esotericsoftware.kryo.Kryo") } test("hive-common") { @@ -86,25 +75,13 @@ class ClasspathDependenciesSuite extends SparkFunSuite { private val STD_INSTANTIATOR = "org.objenesis.strategy.StdInstantiatorStrategy" - test("unshaded kryo") { - assertLoads(KRYO, STD_INSTANTIATOR) - } - test("Forbidden Dependencies") { - assertClassNotFound( - SPARK_HIVE + KRYO, - SPARK_SHADED + KRYO, - "org.apache.hive." + KRYO, - "com.esotericsoftware.shaded." + STD_INSTANTIATOR, - SPARK_HIVE + "com.esotericsoftware.shaded." + STD_INSTANTIATOR, - "org.apache.hive.com.esotericsoftware.shaded." + STD_INSTANTIATOR - ) + assertClassNotFound("com.esotericsoftware.shaded." + STD_INSTANTIATOR) + assertClassNotFound("org.apache.hive.com.esotericsoftware.shaded." + STD_INSTANTIATOR) } test("parquet-hadoop-bundle") { - assertLoads( - "parquet.hadoop.ParquetOutputFormat", - "parquet.hadoop.ParquetInputFormat" - ) + assertLoads("parquet.hadoop.ParquetOutputFormat") + assertLoads("parquet.hadoop.ParquetInputFormat") } } |