aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xdev/create-release/create-release.sh14
-rwxr-xr-xdev/run-tests6
-rwxr-xr-xdev/scalastyle4
-rw-r--r--docs/building-spark.md11
-rw-r--r--docs/hadoop-third-party-distributions.md2
-rwxr-xr-xmake-distribution.sh2
-rw-r--r--pom.xml33
-rw-r--r--yarn/pom.xml97
8 files changed, 79 insertions, 90 deletions
diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh
index 3dbb35f705..af4f000549 100755
--- a/dev/create-release/create-release.sh
+++ b/dev/create-release/create-release.sh
@@ -118,14 +118,14 @@ if [[ ! "$@" =~ --skip-publish ]]; then
rm -rf $SPARK_REPO
- build/mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
- -Pyarn -Phive -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
+ build/mvn -DskipTests -Pyarn -Phive \
+ -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
clean install
./dev/change-version-to-2.11.sh
- build/mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
- -Dscala-2.11 -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
+ build/mvn -DskipTests -Pyarn -Phive \
+ -Dscala-2.11 -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
clean install
./dev/change-version-to-2.10.sh
@@ -228,9 +228,9 @@ if [[ ! "$@" =~ --skip-package ]]; then
# We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds
# share the same Zinc server.
- make_binary_release "hadoop1" "-Phive -Phive-thriftserver -Dhadoop.version=1.0.4" "3030" &
- make_binary_release "hadoop1-scala2.11" "-Phive -Dscala-2.11" "3031" &
- make_binary_release "cdh4" "-Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" &
+ make_binary_release "hadoop1" "-Phadoop-1 -Phive -Phive-thriftserver" "3030" &
+ make_binary_release "hadoop1-scala2.11" "-Phadoop-1 -Phive -Dscala-2.11" "3031" &
+ make_binary_release "cdh4" "-Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" &
make_binary_release "hadoop2.3" "-Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" "3033" &
make_binary_release "hadoop2.4" "-Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" "3034" &
make_binary_release "mapr3" "-Pmapr3 -Phive -Phive-thriftserver" "3035" &
diff --git a/dev/run-tests b/dev/run-tests
index ef587a1a59..44d802782c 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -40,11 +40,11 @@ function handle_error () {
{
if [ -n "$AMPLAB_JENKINS_BUILD_PROFILE" ]; then
if [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop1.0" ]; then
- export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=1.0.4"
+ export SBT_MAVEN_PROFILES_ARGS="-Phadoop-1 -Dhadoop.version=1.0.4"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.0" ]; then
- export SBT_MAVEN_PROFILES_ARGS="-Dhadoop.version=2.0.0-mr1-cdh4.1.1"
+ export SBT_MAVEN_PROFILES_ARGS="-Phadoop-1 -Dhadoop.version=2.0.0-mr1-cdh4.1.1"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.2" ]; then
- export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0"
+ export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.2"
elif [ "$AMPLAB_JENKINS_BUILD_PROFILE" = "hadoop2.3" ]; then
export SBT_MAVEN_PROFILES_ARGS="-Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0"
fi
diff --git a/dev/scalastyle b/dev/scalastyle
index 4e03f89ed5..7f014c82f1 100755
--- a/dev/scalastyle
+++ b/dev/scalastyle
@@ -20,8 +20,8 @@
echo -e "q\n" | build/sbt -Phive -Phive-thriftserver scalastyle > scalastyle.txt
echo -e "q\n" | build/sbt -Phive -Phive-thriftserver test:scalastyle >> scalastyle.txt
# Check style with YARN built too
-echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 scalastyle >> scalastyle.txt
-echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 test:scalastyle >> scalastyle.txt
+echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 scalastyle >> scalastyle.txt
+echo -e "q\n" | build/sbt -Pyarn -Phadoop-2.2 test:scalastyle >> scalastyle.txt
ERRORS=$(cat scalastyle.txt | awk '{if($1~/error/)print}')
rm scalastyle.txt
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 287fcd3c40..6e310ff424 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -59,14 +59,14 @@ You can fix this by setting the `MAVEN_OPTS` variable as discussed before.
# Specifying the Hadoop Version
-Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 1.0.4 by default. Note that certain build profiles are required for particular Hadoop versions:
+Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 2.2.0 by default. Note that certain build profiles are required for particular Hadoop versions:
<table class="table">
<thead>
<tr><th>Hadoop version</th><th>Profile required</th></tr>
</thead>
<tbody>
- <tr><td>1.x to 2.1.x</td><td>(none)</td></tr>
+ <tr><td>1.x to 2.1.x</td><td>hadoop-1</td></tr>
<tr><td>2.2.x</td><td>hadoop-2.2</td></tr>
<tr><td>2.3.x</td><td>hadoop-2.3</td></tr>
<tr><td>2.4.x</td><td>hadoop-2.4</td></tr>
@@ -77,10 +77,10 @@ For Apache Hadoop versions 1.x, Cloudera CDH "mr1" distributions, and other Hado
{% highlight bash %}
# Apache Hadoop 1.2.1
-mvn -Dhadoop.version=1.2.1 -DskipTests clean package
+mvn -Dhadoop.version=1.2.1 -Phadoop-1 -DskipTests clean package
# Cloudera CDH 4.2.0 with MapReduce v1
-mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -DskipTests clean package
+mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -Phadoop-1 -DskipTests clean package
{% endhighlight %}
You can enable the "yarn" profile and optionally set the "yarn.version" property if it is different from "hadoop.version". Spark only supports YARN versions 2.2.0 and later.
@@ -88,8 +88,9 @@ You can enable the "yarn" profile and optionally set the "yarn.version" property
Examples:
{% highlight bash %}
+
# Apache Hadoop 2.2.X
-mvn -Pyarn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package
+mvn -Pyarn -Phadoop-2.2 -DskipTests clean package
# Apache Hadoop 2.3.X
mvn -Pyarn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package
diff --git a/docs/hadoop-third-party-distributions.md b/docs/hadoop-third-party-distributions.md
index 96bd69ca3b..795dd82a6b 100644
--- a/docs/hadoop-third-party-distributions.md
+++ b/docs/hadoop-third-party-distributions.md
@@ -14,7 +14,7 @@ property. For certain versions, you will need to specify additional profiles. Fo
see the guide on [building with maven](building-spark.html#specifying-the-hadoop-version):
mvn -Dhadoop.version=1.0.4 -DskipTests clean package
- mvn -Phadoop-2.2 -Dhadoop.version=2.2.0 -DskipTests clean package
+ mvn -Phadoop-2.3 -Dhadoop.version=2.3.0 -DskipTests clean package
The table below lists the corresponding `hadoop.version` code for each CDH/HDP release. Note that
some Hadoop releases are binary compatible across client versions. This means the pre-built Spark
diff --git a/make-distribution.sh b/make-distribution.sh
index 1bfa9acb1f..8d6e91d675 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -58,7 +58,7 @@ while (( "$#" )); do
--hadoop)
echo "Error: '--hadoop' is no longer supported:"
echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead."
- echo "Error: Related profiles include hadoop-2.2, hadoop-2.3 and hadoop-2.4."
+ echo "Error: Related profiles include hadoop-1, hadoop-2.2, hadoop-2.3 and hadoop-2.4."
exit_with_usage
;;
--with-yarn)
diff --git a/pom.xml b/pom.xml
index 564a443466..91d1d843c7 100644
--- a/pom.xml
+++ b/pom.xml
@@ -122,9 +122,9 @@
<slf4j.version>1.7.10</slf4j.version>
<log4j.version>1.2.17</log4j.version>
<hadoop.version>2.2.0</hadoop.version>
- <protobuf.version>2.4.1</protobuf.version>
+ <protobuf.version>2.5.0</protobuf.version>
<yarn.version>${hadoop.version}</yarn.version>
- <hbase.version>0.98.7-hadoop1</hbase.version>
+ <hbase.version>0.98.7-hadoop2</hbase.version>
<hbase.artifact>hbase</hbase.artifact>
<flume.version>1.4.0</flume.version>
<zookeeper.version>3.4.5</zookeeper.version>
@@ -143,7 +143,7 @@
<oro.version>2.0.8</oro.version>
<codahale.metrics.version>3.1.0</codahale.metrics.version>
<avro.version>1.7.7</avro.version>
- <avro.mapred.classifier></avro.mapred.classifier>
+ <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
<jets3t.version>0.7.1</jets3t.version>
<aws.java.sdk.version>1.8.3</aws.java.sdk.version>
<aws.kinesis.client.version>1.1.0</aws.kinesis.client.version>
@@ -155,7 +155,7 @@
<jline.version>${scala.version}</jline.version>
<jline.groupid>org.scala-lang</jline.groupid>
<jodd.version>3.6.3</jodd.version>
- <codehaus.jackson.version>1.8.8</codehaus.jackson.version>
+ <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
<fasterxml.jackson.version>2.4.4</fasterxml.jackson.version>
<snappy.version>1.1.1.7</snappy.version>
<netlib.java.version>1.1.2</netlib.java.version>
@@ -1644,26 +1644,27 @@
-->
<profile>
- <id>hadoop-2.2</id>
+ <id>hadoop-1</id>
<properties>
- <hadoop.version>2.2.0</hadoop.version>
- <protobuf.version>2.5.0</protobuf.version>
- <hbase.version>0.98.7-hadoop2</hbase.version>
- <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
- <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
+ <hadoop.version>1.0.4</hadoop.version>
+ <protobuf.version>2.4.1</protobuf.version>
+ <hbase.version>0.98.7-hadoop1</hbase.version>
+ <avro.mapred.classifier>hadoop1</avro.mapred.classifier>
+ <codehaus.jackson.version>1.8.8</codehaus.jackson.version>
</properties>
</profile>
<profile>
+ <id>hadoop-2.2</id>
+ <!-- SPARK-7249: Default hadoop profile. Uses global properties. -->
+ </profile>
+
+ <profile>
<id>hadoop-2.3</id>
<properties>
<hadoop.version>2.3.0</hadoop.version>
- <protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.3</jets3t.version>
- <hbase.version>0.98.7-hadoop2</hbase.version>
<commons.math3.version>3.1.1</commons.math3.version>
- <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
- <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
</properties>
</profile>
@@ -1671,12 +1672,8 @@
<id>hadoop-2.4</id>
<properties>
<hadoop.version>2.4.0</hadoop.version>
- <protobuf.version>2.5.0</protobuf.version>
<jets3t.version>0.9.3</jets3t.version>
- <hbase.version>0.98.7-hadoop2</hbase.version>
<commons.math3.version>3.1.1</commons.math3.version>
- <avro.mapred.classifier>hadoop2</avro.mapred.classifier>
- <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
</properties>
</profile>
diff --git a/yarn/pom.xml b/yarn/pom.xml
index 7c8c3613e7..00d219f836 100644
--- a/yarn/pom.xml
+++ b/yarn/pom.xml
@@ -30,6 +30,7 @@
<name>Spark Project YARN</name>
<properties>
<sbt.project.name>yarn</sbt.project.name>
+ <jersey.version>1.9</jersey.version>
</properties>
<dependencies>
@@ -85,7 +86,12 @@
<artifactId>jetty-servlet</artifactId>
</dependency>
<!-- End of shaded deps. -->
-
+
+ <!--
+ See SPARK-3710. hadoop-yarn-server-tests in Hadoop 2.2 fails to pull some needed
+ dependencies, so they need to be added manually for the tests to work.
+ -->
+
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-tests</artifactId>
@@ -97,59 +103,44 @@
<artifactId>mockito-all</artifactId>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>jetty</artifactId>
+ <version>6.1.26</version>
+ <exclusions>
+ <exclusion>
+ <groupId>org.mortbay.jetty</groupId>
+ <artifactId>servlet-api</artifactId>
+ </exclusion>
+ </exclusions>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-core</artifactId>
+ <version>${jersey.version}</version>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-json</artifactId>
+ <version>${jersey.version}</version>
+ <scope>test</scope>
+ <exclusions>
+ <exclusion>
+ <groupId>stax</groupId>
+ <artifactId>stax-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>com.sun.jersey</groupId>
+ <artifactId>jersey-server</artifactId>
+ <version>${jersey.version}</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
-
- <!--
- See SPARK-3710. hadoop-yarn-server-tests in Hadoop 2.2 fails to pull some needed
- dependencies, so they need to be added manually for the tests to work.
- -->
- <profiles>
- <profile>
- <id>hadoop-2.2</id>
- <properties>
- <jersey.version>1.9</jersey.version>
- </properties>
- <dependencies>
- <dependency>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>jetty</artifactId>
- <version>6.1.26</version>
- <exclusions>
- <exclusion>
- <groupId>org.mortbay.jetty</groupId>
- <artifactId>servlet-api</artifactId>
- </exclusion>
- </exclusions>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-core</artifactId>
- <version>${jersey.version}</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-json</artifactId>
- <version>${jersey.version}</version>
- <scope>test</scope>
- <exclusions>
- <exclusion>
- <groupId>stax</groupId>
- <artifactId>stax-api</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>com.sun.jersey</groupId>
- <artifactId>jersey-server</artifactId>
- <version>${jersey.version}</version>
- <scope>test</scope>
- </dependency>
- </dependencies>
- </profile>
- </profiles>
-
+
<build>
<outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
<testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>