aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala10
-rw-r--r--core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala17
-rwxr-xr-xdev/create-release/release-build.sh3
-rwxr-xr-xdev/run-tests-jenkins.py4
-rwxr-xr-xdev/run-tests.py2
-rw-r--r--docs/building-spark.md18
-rwxr-xr-xmake-distribution.sh2
-rw-r--r--pom.xml13
-rw-r--r--sql/README.md2
9 files changed, 9 insertions, 62 deletions
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 718efc4f3b..6e91d73b6e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -663,16 +663,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
// For testing.
private[history] def isFsInSafeMode(dfs: DistributedFileSystem): Boolean = {
- val hadoop1Class = "org.apache.hadoop.hdfs.protocol.FSConstants$SafeModeAction"
val hadoop2Class = "org.apache.hadoop.hdfs.protocol.HdfsConstants$SafeModeAction"
- val actionClass: Class[_] =
- try {
- getClass().getClassLoader().loadClass(hadoop2Class)
- } catch {
- case _: ClassNotFoundException =>
- getClass().getClassLoader().loadClass(hadoop1Class)
- }
-
+ val actionClass: Class[_] = getClass().getClassLoader().loadClass(hadoop2Class)
val action = actionClass.getField("SAFEMODE_GET").get(null)
val method = dfs.getClass().getMethod("setSafeMode", action.getClass())
method.invoke(dfs, action).asInstanceOf[Boolean]
diff --git a/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
index 943ebcb7bd..82d807fad8 100644
--- a/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
+++ b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala
@@ -26,17 +26,13 @@ import org.apache.spark.util.Utils
private[spark]
trait SparkHadoopMapReduceUtil {
def newJobContext(conf: Configuration, jobId: JobID): JobContext = {
- val klass = firstAvailableClass(
- "org.apache.hadoop.mapreduce.task.JobContextImpl", // hadoop2, hadoop2-yarn
- "org.apache.hadoop.mapreduce.JobContext") // hadoop1
+ val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.JobContextImpl")
val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID])
ctor.newInstance(conf, jobId).asInstanceOf[JobContext]
}
def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = {
- val klass = firstAvailableClass(
- "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl", // hadoop2, hadoop2-yarn
- "org.apache.hadoop.mapreduce.TaskAttemptContext") // hadoop1
+ val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl")
val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID])
ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext]
}
@@ -69,13 +65,4 @@ trait SparkHadoopMapReduceUtil {
}
}
}
-
- private def firstAvailableClass(first: String, second: String): Class[_] = {
- try {
- Utils.classForName(first)
- } catch {
- case e: ClassNotFoundException =>
- Utils.classForName(second)
- }
- }
}
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index cb79e9eba0..b1895b16b1 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -166,9 +166,6 @@ if [[ "$1" == "package" ]]; then
# We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds
# share the same Zinc server.
- make_binary_release "hadoop1" "-Psparkr -Phadoop-1 -Phive -Phive-thriftserver" "3030" &
- make_binary_release "hadoop1-scala2.11" "-Psparkr -Phadoop-1 -Phive -Dscala-2.11" "3031" &
- make_binary_release "cdh4" "-Psparkr -Phadoop-1 -Phive -Phive-thriftserver -Dhadoop.version=2.0.0-mr1-cdh4.2.0" "3032" &
make_binary_release "hadoop2.3" "-Psparkr -Phadoop-2.3 -Phive -Phive-thriftserver -Pyarn" "3033" &
make_binary_release "hadoop2.4" "-Psparkr -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn" "3034" &
make_binary_release "hadoop2.6" "-Psparkr -Phadoop-2.6 -Phive -Phive-thriftserver -Pyarn" "3034" &
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index 7aecea25b2..42afca0e52 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -163,10 +163,6 @@ def main():
if "test-maven" in ghprb_pull_title:
os.environ["AMPLAB_JENKINS_BUILD_TOOL"] = "maven"
# Switch the Hadoop profile based on the PR title:
- if "test-hadoop1.0" in ghprb_pull_title:
- os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop1.0"
- if "test-hadoop2.0" in ghprb_pull_title:
- os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.0"
if "test-hadoop2.2" in ghprb_pull_title:
os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2.2"
if "test-hadoop2.3" in ghprb_pull_title:
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 2d4e04c468..17ceba052b 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -301,8 +301,6 @@ def get_hadoop_profiles(hadoop_version):
"""
sbt_maven_hadoop_profiles = {
- "hadoop1.0": ["-Phadoop-1", "-Dhadoop.version=1.2.1"],
- "hadoop2.0": ["-Phadoop-1", "-Dhadoop.version=2.0.0-mr1-cdh4.1.1"],
"hadoop2.2": ["-Pyarn", "-Phadoop-2.2"],
"hadoop2.3": ["-Pyarn", "-Phadoop-2.3", "-Dhadoop.version=2.3.0"],
"hadoop2.6": ["-Pyarn", "-Phadoop-2.6"],
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 3d38edbdad..785988902d 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -33,13 +33,13 @@ to the `sharedSettings` val. See also [this PR](https://github.com/apache/spark/
# Building a Runnable Distribution
-To create a Spark distribution like those distributed by the
-[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is laid out so as
-to be runnable, use `make-distribution.sh` in the project root directory. It can be configured
+To create a Spark distribution like those distributed by the
+[Spark Downloads](http://spark.apache.org/downloads.html) page, and that is laid out so as
+to be runnable, use `make-distribution.sh` in the project root directory. It can be configured
with Maven profile settings and so on like the direct Maven build. Example:
./make-distribution.sh --name custom-spark --tgz -Psparkr -Phadoop-2.4 -Phive -Phive-thriftserver -Pyarn
-
+
For more information on usage, run `./make-distribution.sh --help`
# Setting up Maven's Memory Usage
@@ -74,7 +74,6 @@ Because HDFS is not protocol-compatible across versions, if you want to read fro
<tr><th>Hadoop version</th><th>Profile required</th></tr>
</thead>
<tbody>
- <tr><td>1.x to 2.1.x</td><td>hadoop-1</td></tr>
<tr><td>2.2.x</td><td>hadoop-2.2</td></tr>
<tr><td>2.3.x</td><td>hadoop-2.3</td></tr>
<tr><td>2.4.x</td><td>hadoop-2.4</td></tr>
@@ -82,15 +81,6 @@ Because HDFS is not protocol-compatible across versions, if you want to read fro
</tbody>
</table>
-For Apache Hadoop versions 1.x, Cloudera CDH "mr1" distributions, and other Hadoop versions without YARN, use:
-
-{% highlight bash %}
-# Apache Hadoop 1.2.1
-mvn -Dhadoop.version=1.2.1 -Phadoop-1 -DskipTests clean package
-
-# Cloudera CDH 4.2.0 with MapReduce v1
-mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -Phadoop-1 -DskipTests clean package
-{% endhighlight %}
You can enable the `yarn` profile and optionally set the `yarn.version` property if it is different from `hadoop.version`. Spark only supports YARN versions 2.2.0 and later.
diff --git a/make-distribution.sh b/make-distribution.sh
index e64ceb8024..351b9e7d89 100755
--- a/make-distribution.sh
+++ b/make-distribution.sh
@@ -58,7 +58,7 @@ while (( "$#" )); do
--hadoop)
echo "Error: '--hadoop' is no longer supported:"
echo "Error: use Maven profiles and options -Dhadoop.version and -Dyarn.version instead."
- echo "Error: Related profiles include hadoop-1, hadoop-2.2, hadoop-2.3 and hadoop-2.4."
+ echo "Error: Related profiles include hadoop-2.2, hadoop-2.3 and hadoop-2.4."
exit_with_usage
;;
--with-yarn)
diff --git a/pom.xml b/pom.xml
index 32918d6a74..284c219519 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2443,19 +2443,6 @@
-->
<profile>
- <id>hadoop-1</id>
- <properties>
- <hadoop.version>1.2.1</hadoop.version>
- <protobuf.version>2.4.1</protobuf.version>
- <hbase.version>0.98.7-hadoop1</hbase.version>
- <avro.mapred.classifier>hadoop1</avro.mapred.classifier>
- <codehaus.jackson.version>1.8.8</codehaus.jackson.version>
- <akka.group>org.spark-project.akka</akka.group>
- <akka.version>2.3.4-spark</akka.version>
- </properties>
- </profile>
-
- <profile>
<id>hadoop-2.2</id>
<!-- SPARK-7249: Default hadoop profile. Uses global properties. -->
</profile>
diff --git a/sql/README.md b/sql/README.md
index 63d4dac982..a13bdab6d4 100644
--- a/sql/README.md
+++ b/sql/README.md
@@ -20,7 +20,7 @@ If you are working with Hive 0.12.0, you will need to set several environmental
```
export HIVE_HOME="<path to>/hive/build/dist"
export HIVE_DEV_HOME="<path to>/hive/"
-export HADOOP_HOME="<path to>/hadoop-1.0.4"
+export HADOOP_HOME="<path to>/hadoop"
```
If you are working with Hive 0.13.1, the following steps are needed: