diff options
-rw-r--r-- | docs/running-on-yarn.md | 31 | ||||
-rw-r--r-- | examples/pom.xml | 14 | ||||
-rw-r--r-- | pom.xml | 218 | ||||
-rw-r--r-- | project/SparkBuild.scala | 4 | ||||
-rw-r--r-- | repl-bin/pom.xml | 10 | ||||
-rw-r--r-- | repl/pom.xml | 12 | ||||
-rw-r--r-- | yarn/pom.xml | 61 |
7 files changed, 145 insertions, 205 deletions
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 93421efcbc..c611db0af4 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -42,7 +42,7 @@ This would be used to connect to the cluster, write to the dfs and submit jobs t The command to launch the YARN Client is as follows: - SPARK_JAR=<SPARK_YARN_JAR_FILE> ./spark-class org.apache.spark.deploy.yarn.Client \ + SPARK_JAR=<SPARK_ASSEMBLY_JAR_FILE> ./spark-class org.apache.spark.deploy.yarn.Client \ --jar <YOUR_APP_JAR_FILE> \ --class <APP_MAIN_CLASS> \ --args <APP_MAIN_ARGUMENTS> \ @@ -54,14 +54,27 @@ The command to launch the YARN Client is as follows: For example: - SPARK_JAR=./yarn/target/spark-yarn-assembly-{{site.SPARK_VERSION}}.jar ./spark-class org.apache.spark.deploy.yarn.Client \ - --jar examples/target/scala-{{site.SCALA_VERSION}}/spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}.jar \ - --class org.apache.spark.examples.SparkPi \ - --args yarn-standalone \ - --num-workers 3 \ - --master-memory 4g \ - --worker-memory 2g \ - --worker-cores 1 + # Build the Spark assembly JAR and the Spark examples JAR + $ SPARK_HADOOP_VERSION=2.0.5-alpha SPARK_YARN=true ./sbt/sbt assembly + + # Configure logging + $ cp conf/log4j.properties.template conf/log4j.properties + + # Submit Spark's ApplicationMaster to YARN's ResourceManager, and instruct Spark to run the SparkPi example + $ SPARK_JAR=./assembly/target/scala-{{site.SCALA_VERSION}}/spark-assembly-{{site.SPARK_VERSION}}-hadoop2.0.5-alpha.jar \ + ./spark-class org.apache.spark.deploy.yarn.Client \ + --jar examples/target/scala-{{site.SCALA_VERSION}}/spark-examples-assembly-{{site.SPARK_VERSION}}.jar \ + --class org.apache.spark.examples.SparkPi \ + --args yarn-standalone \ + --num-workers 3 \ + --master-memory 4g \ + --worker-memory 2g \ + --worker-cores 1 + + # Examine the output (replace $YARN_APP_ID in the following with the "application identifier" output by the previous command) + # (Note: YARN_APP_LOGS_DIR is usually /tmp/logs or $HADOOP_HOME/logs/userlogs depending on the Hadoop version.) + $ cat $YARN_APP_LOGS_DIR/$YARN_APP_ID/container*_000001/stdout + Pi is roughly 3.13794 The above starts a YARN Client programs which periodically polls the Application Master for status updates and displays them in the console. The client will exit once your application has finished running. diff --git a/examples/pom.xml b/examples/pom.xml index 224cf6c96c..e48f5b50ab 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -127,20 +127,6 @@ </dependency> </dependencies> - <profiles> - <profile> - <id>hadoop2-yarn</id> - <dependencies> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-yarn</artifactId> - <version>${project.version}</version> - <scope>provided</scope> - </dependency> - </dependencies> - </profile> - </profiles> - <build> <outputDirectory>target/scala-${scala.version}/classes</outputDirectory> <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory> @@ -368,6 +368,99 @@ </exclusion> </exclusions> </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-api</artifactId> + <version>${hadoop.version}</version> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-jaxrs</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-xc</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-common</artifactId> + <version>${hadoop.version}</version> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-jaxrs</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-xc</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-client</artifactId> + <version>${hadoop.version}</version> + <exclusions> + <exclusion> + <groupId>asm</groupId> + <artifactId>asm</artifactId> + </exclusion> + <exclusion> + <groupId>org.jboss.netty</groupId> + <artifactId>netty</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-core-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-jaxrs</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-xc</artifactId> + </exclusion> + </exclusions> + </dependency> <!-- Specify Avro version because Kafka also has it as a dependency --> <dependency> <groupId>org.apache.avro</groupId> @@ -620,131 +713,6 @@ <dependencyManagement> <dependencies> - <!-- TODO: check versions, bringover from yarn branch ! --> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-client</artifactId> - <version>${hadoop.version}</version> - <exclusions> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.jboss.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-core-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-mapper-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-jaxrs</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-xc</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - <version>${hadoop.version}</version> - <exclusions> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.jboss.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-core-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-mapper-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-jaxrs</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-xc</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-common</artifactId> - <version>${hadoop.version}</version> - <exclusions> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.jboss.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-core-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-mapper-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-jaxrs</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-xc</artifactId> - </exclusion> - </exclusions> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-client</artifactId> - <version>${hadoop.version}</version> - <exclusions> - <exclusion> - <groupId>asm</groupId> - <artifactId>asm</artifactId> - </exclusion> - <exclusion> - <groupId>org.jboss.netty</groupId> - <artifactId>netty</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-core-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-mapper-asl</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-jaxrs</artifactId> - </exclusion> - <exclusion> - <groupId>org.codehaus.jackson</groupId> - <artifactId>jackson-xc</artifactId> - </exclusion> - </exclusions> - </dependency> </dependencies> </dependencyManagement> </profile> diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index d038a4f479..5177325887 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -38,10 +38,10 @@ object SparkBuild extends Build { lazy val core = Project("core", file("core"), settings = coreSettings) lazy val repl = Project("repl", file("repl"), settings = replSettings) - .dependsOn(core, bagel, mllib) dependsOn(maybeYarn: _*) + .dependsOn(core, bagel, mllib) lazy val examples = Project("examples", file("examples"), settings = examplesSettings) - .dependsOn(core, mllib, bagel, streaming) dependsOn(maybeYarn: _*) + .dependsOn(core, mllib, bagel, streaming) lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming) diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index d61b36a61a..3685561501 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -106,16 +106,6 @@ <profiles> <profile> - <id>hadoop2-yarn</id> - <dependencies> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-yarn</artifactId> - <version>${project.version}</version> - </dependency> - </dependencies> - </profile> - <profile> <id>deb</id> <build> <plugins> diff --git a/repl/pom.xml b/repl/pom.xml index a1c87d7618..3123b37780 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -131,16 +131,4 @@ </plugin> </plugins> </build> - <profiles> - <profile> - <id>hadoop2-yarn</id> - <dependencies> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-yarn</artifactId> - <version>${project.version}</version> - </dependency> - </dependencies> - </profile> - </profiles> </project> diff --git a/yarn/pom.xml b/yarn/pom.xml index 654b5bcd2d..27b2002095 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -30,6 +30,34 @@ <name>Spark Project YARN Support</name> <url>http://spark.incubator.apache.org/</url> + <dependencies> + <dependency> + <groupId>org.apache.spark</groupId> + <artifactId>spark-core</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-api</artifactId> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-common</artifactId> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-yarn-client</artifactId> + </dependency> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro</artifactId> + </dependency> + <dependency> + <groupId>org.apache.avro</groupId> + <artifactId>avro-ipc</artifactId> + </dependency> + </dependencies> + <build> <outputDirectory>target/scala-${scala.version}/classes</outputDirectory> <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory> @@ -75,37 +103,4 @@ </plugin> </plugins> </build> - - <profiles> - <profile> - <id>hadoop2-yarn</id> - <dependencies> - <dependency> - <groupId>org.apache.spark</groupId> - <artifactId>spark-core</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-api</artifactId> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-common</artifactId> - </dependency> - <dependency> - <groupId>org.apache.hadoop</groupId> - <artifactId>hadoop-yarn-client</artifactId> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro</artifactId> - </dependency> - <dependency> - <groupId>org.apache.avro</groupId> - <artifactId>avro-ipc</artifactId> - </dependency> - </dependencies> - </profile> - </profiles> </project> |