From a5403acd4ead9ecabfa9f2ed6d6d58afe912a238 Mon Sep 17 00:00:00 2001 From: folone Date: Sun, 20 Jan 2013 14:42:16 +0100 Subject: Updated maven build for scala 2.10. --- pom.xml | 57 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 31 insertions(+), 26 deletions(-) (limited to 'pom.xml') diff --git a/pom.xml b/pom.xml index 751189a9d8..756fe8783b 100644 --- a/pom.xml +++ b/pom.xml @@ -41,8 +41,8 @@ core bagel examples - repl - repl-bin + @@ -50,20 +50,20 @@ UTF-8 1.5 - 2.9.2 + 2.10 0.9.0-incubating - 2.0.3 - 1.0-M2.1 - 1.1.1 + 2.1.0 + 1.1-M7 + 1.2.3 1.6.1 4.1.2 - jboss-repo - JBoss Repository - http://repository.jboss.org/nexus/content/repositories/releases/ + typesafe-repo + Typesafe Repository + http://repo.typesafe.com/typesafe/releases/ true @@ -72,9 +72,9 @@ - cloudera-repo - Cloudera Repository - https://repository.cloudera.com/artifactory/cloudera-repos/ + jboss-repo + JBoss Repository + http://repository.jboss.org/nexus/content/repositories/releases/ true @@ -83,9 +83,9 @@ - typesafe-repo - Typesafe Repository - http://repo.typesafe.com/typesafe/releases/ + cloudera-repo + Cloudera Repository + https://repository.cloudera.com/artifactory/cloudera-repos/ true @@ -189,18 +189,18 @@ 0.20 - com.typesafe.akka - akka-actor - ${akka.version} + org.scala-lang + scala-actors + 2.10.0 com.typesafe.akka - akka-remote + akka-remote_${scala.version} ${akka.version} com.typesafe.akka - akka-slf4j + akka-slf4j_${scala.version} ${akka.version} @@ -214,17 +214,22 @@ 1.2.0 - cc.spray + io.spray spray-can ${spray.version} - cc.spray - spray-server + io.spray + spray-routing + ${spray.version} + + + io.spray + spray-io ${spray.version} - cc.spray + io.spray spray-json_${scala.version} ${spray.json.version} @@ -258,13 +263,13 @@ org.scalatest scalatest_${scala.version} - 1.8 + 1.9.1 test org.scalacheck scalacheck_${scala.version} - 1.9 + 1.10.0 test -- cgit v1.2.3 From 4041a2689e9f66ecf9550c9f0d0ae577b2f904c2 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Wed, 1 May 2013 11:35:35 +0530 Subject: Updated to latest stable scala 2.10.1 and akka 2.1.2 --- pom.xml | 2 +- project/SparkBuild.scala | 19 ++++++++----------- streaming/pom.xml | 14 ++------------ 3 files changed, 11 insertions(+), 24 deletions(-) (limited to 'pom.xml') diff --git a/pom.xml b/pom.xml index 52f9228896..6e2f36dee3 100644 --- a/pom.xml +++ b/pom.xml @@ -53,7 +53,7 @@ 1.5 2.10 0.9.0-incubating - 2.1.0 + 2.1.2 1.1-M7 1.2.3 1.6.1 diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 49c8870019..1500aee672 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -36,7 +36,7 @@ object SparkBuild extends Build { def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.spark-project", version := "0.8.0-SNAPSHOT", - scalaVersion := "2.10.0", + scalaVersion := "2.10.1", scalacOptions := Seq("-unchecked", "-optimize", "-deprecation"), unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath }, retrieveManaged := true, @@ -51,9 +51,6 @@ object SparkBuild extends Build { // Only allow one test at a time, even across projects, since they run in the same JVM concurrentRestrictions in Global += Tags.limit(Tags.Test, 1), - // Shared between both core and streaming. - resolvers ++= Seq("Akka Repository" at "http://repo.akka.io/releases/"), - // For Sonatype publishing resolvers ++= Seq("sonatype-snapshots" at "https://oss.sonatype.org/content/repositories/snapshots", "sonatype-staging" at "https://oss.sonatype.org/service/local/staging/deploy/maven2/"), @@ -141,8 +138,8 @@ object SparkBuild extends Build { "asm" % "asm-all" % "3.3.1", "com.google.protobuf" % "protobuf-java" % "2.4.1", "de.javakaffee" % "kryo-serializers" % "0.20", - "com.typesafe.akka" %% "akka-remote" % "2.1.0", - "com.typesafe.akka" %% "akka-slf4j" % "2.1.0", + "com.typesafe.akka" %% "akka-remote" % "2.1.2", + "com.typesafe.akka" %% "akka-slf4j" % "2.1.2", "it.unimi.dsi" % "fastutil" % "6.4.4", "io.spray" % "spray-can" % "1.1-M7", "io.spray" % "spray-io" % "1.1-M7", @@ -150,9 +147,9 @@ object SparkBuild extends Build { "io.spray" %% "spray-json" % "1.2.3", "colt" % "colt" % "1.2.0", "org.apache.mesos" % "mesos" % "0.9.0-incubating", - "org.scala-lang" % "scala-actors" % "2.10.0", - "org.scala-lang" % "jline" % "2.10.0", - "org.scala-lang" % "scala-reflect" % "2.10.0" + "org.scala-lang" % "scala-actors" % "2.10.1", + "org.scala-lang" % "jline" % "2.10.1", + "org.scala-lang" % "scala-reflect" % "2.10.1" ) ++ (if (HADOOP_MAJOR_VERSION == "2") Some("org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION) else None).toSeq, unmanagedSourceDirectories in Compile <+= baseDirectory{ _ / ("src/hadoop" + HADOOP_MAJOR_VERSION + "/scala") } @@ -165,7 +162,7 @@ object SparkBuild extends Build { def replSettings = sharedSettings ++ Seq( name := "spark-repl", // libraryDependencies <+= scalaVersion("org.scala-lang" % "scala-compiler" % _) - libraryDependencies ++= Seq("org.scala-lang" % "scala-compiler" % "2.10.0") + libraryDependencies ++= Seq("org.scala-lang" % "scala-compiler" % "2.10.1") ) def examplesSettings = sharedSettings ++ Seq( @@ -181,7 +178,7 @@ object SparkBuild extends Build { "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile", "com.github.sgroschupf" % "zkclient" % "0.1", "org.twitter4j" % "twitter4j-stream" % "3.0.3", - "com.typesafe.akka" % "akka-zeromq" % "2.1-M1" excludeAll(ExclusionRule(name = "akka-actor"), ExclusionRule(organization = "org.scala-lang")) + "com.typesafe.akka" %% "akka-zeromq" % "2.1.2" ) ) ++ assemblySettings ++ extraAssemblySettings diff --git a/streaming/pom.xml b/streaming/pom.xml index 26f0ae951c..fe869ba66e 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -54,18 +54,8 @@ com.typesafe.akka - akka-zeromq - 2.1-M1 - - - org.scala-lang - scala-library - - - com.typesafe.akka - akka-actor - - + akka-zeromq_${scala.version} + ${akka.version} org.scalatest -- cgit v1.2.3 From 6f28067f8d2fbb371b3614ff7712aa82278dcb11 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Fri, 14 Jun 2013 21:01:36 +0530 Subject: Fixed maven build without netty fix --- pom.xml | 6 +++--- repl/pom.xml | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'pom.xml') diff --git a/pom.xml b/pom.xml index 6e2f36dee3..72fcc00d43 100644 --- a/pom.xml +++ b/pom.xml @@ -53,7 +53,7 @@ 1.5 2.10 0.9.0-incubating - 2.1.2 + 2.1.4 1.1-M7 1.2.3 1.6.1 @@ -265,12 +265,12 @@ org.scala-lang scala-compiler - ${scala.version} + 2.10.0 org.scala-lang jline - ${scala.version} + 2.10.0 diff --git a/repl/pom.xml b/repl/pom.xml index 0b5e400c3d..891b05fb8f 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -27,10 +27,12 @@ org.scala-lang scala-compiler + 2.10.0 org.scala-lang jline + 2.10.0 org.slf4j -- cgit v1.2.3 From 69ae7ea227e52c743652e5560ac7666187c0de6a Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Thu, 11 Jul 2013 18:30:18 +0530 Subject: Removed some unnecessary code and fixed dependencies --- bin/compute-classpath.sh | 2 +- pom.xml | 2 +- project/SparkBuild.scala | 12 ++++-------- 3 files changed, 6 insertions(+), 10 deletions(-) (limited to 'pom.xml') diff --git a/bin/compute-classpath.sh b/bin/compute-classpath.sh index 3a78880290..a0f35cb5eb 100755 --- a/bin/compute-classpath.sh +++ b/bin/compute-classpath.sh @@ -3,7 +3,7 @@ # This script computes Spark's classpath and prints it to stdout; it's used by both the "run" # script and the ExecutorRunner in standalone cluster mode. -SCALA_VERSION=2.9.3 +SCALA_VERSION=2.10 # Figure out where Spark is installed FWDIR="$(cd `dirname $0`/..; pwd)" diff --git a/pom.xml b/pom.xml index 5d624d1446..35eb9bf7a1 100644 --- a/pom.xml +++ b/pom.xml @@ -245,7 +245,7 @@ com.github.scala-incubator.io - scala-io-file_2.9.2 + scala-io-file_2.10 0.4.1 diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 987b78d317..d35b39fcf3 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -161,7 +161,6 @@ object SparkBuild extends Build { "colt" % "colt" % "1.2.0", "org.apache.mesos" % "mesos" % "0.9.0-incubating", "org.apache.derby" % "derby" % "10.4.2.0" % "test", - "org.scala-lang" % "scala-actors" % "2.10.1", "org.scala-lang" % "jline" % "2.10.1", "org.scala-lang" % "scala-reflect" % "2.10.1" ) ++ ( @@ -169,19 +168,19 @@ object SparkBuild extends Build { if (HADOOP_YARN) { Seq( // Exclude rule required for all ? - "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), - "org.apache.hadoop" % "hadoop-yarn-api" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), + "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), + "org.apache.hadoop" % "hadoop-yarn-api" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), "org.apache.hadoop" % "hadoop-yarn-common" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), "org.apache.hadoop" % "hadoop-yarn-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty) ) } else { Seq( - "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), + "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty) ) } } else { - Seq("org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty) ) + Seq("org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty) ) }), unmanagedSourceDirectories in Compile <+= baseDirectory{ _ / ( if (HADOOP_YARN && HADOOP_MAJOR_VERSION == "2") { @@ -222,9 +221,6 @@ object SparkBuild extends Build { def streamingSettings = sharedSettings ++ Seq( name := "spark-streaming", - resolvers ++= Seq( - "Akka Repository" at "http://repo.akka.io/releases/" - ), libraryDependencies ++= Seq( "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty), "com.github.sgroschupf" % "zkclient" % "0.1", -- cgit v1.2.3 From a90e0eff5982ba46b3658e91dec89bd08ce450e5 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Sun, 15 Sep 2013 12:47:20 +0530 Subject: version changed 2.9.3 -> 2.10 in shell script. --- bin/compute-classpath.cmd | 2 +- core/pom.xml | 2 +- docs/_config.yml | 2 +- docs/_plugins/copy_api_dirs.rb | 2 +- pom.xml | 8 -------- pyspark | 2 +- pyspark2.cmd | 2 +- repl-bin/src/deb/bin/run | 2 +- run-example | 2 +- run-example2.cmd | 2 +- 10 files changed, 9 insertions(+), 17 deletions(-) (limited to 'pom.xml') diff --git a/bin/compute-classpath.cmd b/bin/compute-classpath.cmd index cf38188c4b..9e3e10ecaa 100644 --- a/bin/compute-classpath.cmd +++ b/bin/compute-classpath.cmd @@ -20,7 +20,7 @@ rem rem This script computes Spark's classpath and prints it to stdout; it's used by both the "run" rem script and the ExecutorRunner in standalone cluster mode. -set SCALA_VERSION=2.9.3 +set SCALA_VERSION=2.10 rem Figure out where the Spark framework is installed set FWDIR=%~dp0..\ diff --git a/core/pom.xml b/core/pom.xml index 382473a3b3..d42c2ee2fd 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -83,7 +83,7 @@ com.twitter - chill_2.9.3 + chill_2.10 0.3.1 diff --git a/docs/_config.yml b/docs/_config.yml index 5c135a0242..e7a96c0c70 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -5,6 +5,6 @@ markdown: kramdown # of Spark, Scala, and Mesos. SPARK_VERSION: 0.8.0-SNAPSHOT SPARK_VERSION_SHORT: 0.8.0 -SCALA_VERSION: 2.9.3 +SCALA_VERSION: 2.10 MESOS_VERSION: 0.9.0-incubating SPARK_ISSUE_TRACKER_URL: https://spark-project.atlassian.net diff --git a/docs/_plugins/copy_api_dirs.rb b/docs/_plugins/copy_api_dirs.rb index c574ea7f5c..431de909cb 100644 --- a/docs/_plugins/copy_api_dirs.rb +++ b/docs/_plugins/copy_api_dirs.rb @@ -35,7 +35,7 @@ if not (ENV['SKIP_API'] == '1' or ENV['SKIP_SCALADOC'] == '1') # Copy over the scaladoc from each project into the docs directory. # This directory will be copied over to _site when `jekyll` command is run. projects.each do |project_name| - source = "../" + project_name + "/target/scala-2.9.3/api" + source = "../" + project_name + "/target/scala-2.10/api" dest = "api/" + project_name puts "echo making directory " + dest diff --git a/pom.xml b/pom.xml index 011f6e9006..cd4593388f 100644 --- a/pom.xml +++ b/pom.xml @@ -76,17 +76,9 @@ UTF-8 1.5 -<<<<<<< HEAD 2.10 0.9.0-incubating 2.1.4 - 1.1-M7 - 1.2.3 -======= - 2.9.3 - 0.12.1 - 2.0.5 ->>>>>>> master 1.7.2 1.2.17 1.0.4 diff --git a/pyspark b/pyspark index 4941a36d0d..69d49eb87c 100755 --- a/pyspark +++ b/pyspark @@ -23,7 +23,7 @@ FWDIR="$(cd `dirname $0`; pwd)" # Export this as SPARK_HOME export SPARK_HOME="$FWDIR" -SCALA_VERSION=2.9.3 +SCALA_VERSION=2.10 # Exit if the user hasn't compiled Spark if [ ! -f "$FWDIR/RELEASE" ]; then diff --git a/pyspark2.cmd b/pyspark2.cmd index f58e349643..21f9a34388 100644 --- a/pyspark2.cmd +++ b/pyspark2.cmd @@ -17,7 +17,7 @@ rem See the License for the specific language governing permissions and rem limitations under the License. rem -set SCALA_VERSION=2.9.3 +set SCALA_VERSION=2.10 rem Figure out where the Spark framework is installed set FWDIR=%~dp0 diff --git a/repl-bin/src/deb/bin/run b/repl-bin/src/deb/bin/run index 8b5d8300f2..47bb654baf 100755 --- a/repl-bin/src/deb/bin/run +++ b/repl-bin/src/deb/bin/run @@ -17,7 +17,7 @@ # limitations under the License. # -SCALA_VERSION=2.9.3 +SCALA_VERSION=2.10 # Figure out where the Scala framework is installed FWDIR="$(cd `dirname $0`; pwd)" diff --git a/run-example b/run-example index 08ec717ca5..feade6589a 100755 --- a/run-example +++ b/run-example @@ -17,7 +17,7 @@ # limitations under the License. # -SCALA_VERSION=2.9.3 +SCALA_VERSION=2.10 # Figure out where the Scala framework is installed FWDIR="$(cd `dirname $0`; pwd)" diff --git a/run-example2.cmd b/run-example2.cmd index dbb371ecfc..d4ad98d6e7 100644 --- a/run-example2.cmd +++ b/run-example2.cmd @@ -17,7 +17,7 @@ rem See the License for the specific language governing permissions and rem limitations under the License. rem -set SCALA_VERSION=2.9.3 +set SCALA_VERSION=2.10 rem Figure out where the Spark framework is installed set FWDIR=%~dp0 -- cgit v1.2.3 From 7ff4c2d399e1497966689cbe13edf2cd2a9a29b1 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Thu, 26 Sep 2013 10:48:24 +0530 Subject: fixed maven build for scala 2.10 --- assembly/pom.xml | 2 +- bagel/pom.xml | 8 ++++---- core/pom.xml | 31 ++++++++++++++----------------- examples/pom.xml | 10 +++++----- mllib/pom.xml | 8 ++++---- pom.xml | 42 ++++++++++++++++++------------------------ project/SparkBuild.scala | 3 +-- repl/pom.xml | 12 ++++++------ streaming/pom.xml | 14 +++++++++----- tools/pom.xml | 6 +++--- yarn/pom.xml | 4 ++-- 11 files changed, 67 insertions(+), 73 deletions(-) (limited to 'pom.xml') diff --git a/assembly/pom.xml b/assembly/pom.xml index 808a829e19..47a110ca6c 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -79,7 +79,7 @@ maven-shade-plugin false - ${project.build.directory}/scala-${scala.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar + ${project.build.directory}/scala-${scala-short.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar *:* diff --git a/bagel/pom.xml b/bagel/pom.xml index 51173c32b2..feaed6d2b0 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -43,18 +43,18 @@ org.scalatest - scalatest_${scala.version} + scalatest_${scala-short.version} test org.scalacheck - scalacheck_${scala.version} + scalacheck_${scala-short.version} test - target/scala-${scala.version}/classes - target/scala-${scala.version}/test-classes + target/scala-${scala-short.version}/classes + target/scala-${scala-short.version}/test-classes org.scalatest diff --git a/core/pom.xml b/core/pom.xml index d42c2ee2fd..8d9f0e386f 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -83,7 +83,7 @@ com.twitter - chill_2.10 + chill_${scala-short.version} 0.3.1 @@ -91,21 +91,13 @@ chill-java 0.3.1 - - org.scala-lang - scala-actors - com.typesafe.akka - akka-remote_${scala.version} + akka-remote_${scala-short.version} com.typesafe.akka - akka-slf4j_${scala.version} - - - org.scala-lang - scalap + akka-slf4j_${scala-short.version} org.scala-lang @@ -113,7 +105,7 @@ net.liftweb - lift-json_2.10 + lift-json_${scala-short.version} it.unimi.dsi @@ -125,7 +117,7 @@ com.github.scala-incubator.io - scala-io-file_2.10 + scala-io-file_${scala-short.version} org.apache.mesos @@ -160,14 +152,19 @@ derby test + + commons-io + commons-io + test + org.scalatest - scalatest_${scala.version} + scalatest_${scala-short.version} test org.scalacheck - scalacheck_${scala.version} + scalacheck_${scala-short.version} test @@ -187,8 +184,8 @@ - target/scala-${scala.version}/classes - target/scala-${scala.version}/test-classes + target/scala-${scala-short.version}/classes + target/scala-${scala-short.version}/test-classes org.apache.maven.plugins diff --git a/examples/pom.xml b/examples/pom.xml index 9eb6c8a77f..b44a126546 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -77,17 +77,17 @@ com.twitter - algebird-core_2.10 + algebird-core_${scala-short.version} 0.1.11 org.scalatest - scalatest_${scala.version} + scalatest_${scala-short.version} test org.scalacheck - scalacheck_${scala.version} + scalacheck_${scala-short.version} test @@ -128,8 +128,8 @@ - target/scala-${scala.version}/classes - target/scala-${scala.version}/test-classes + target/scala-${scala-short.version}/classes + target/scala-${scala-short.version}/test-classes org.apache.maven.plugins diff --git a/mllib/pom.xml b/mllib/pom.xml index 966caf6835..f4190148b1 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -48,12 +48,12 @@ org.scalatest - scalatest_${scala.version} + scalatest_2.10 test org.scalacheck - scalacheck_${scala.version} + scalacheck_2.10 test @@ -63,8 +63,8 @@ - target/scala-${scala.version}/classes - target/scala-${scala.version}/test-classes + target/scala-${scala-short.version}/classes + target/scala-${scala-short.version}/test-classes org.scalatest diff --git a/pom.xml b/pom.xml index cd4593388f..4033956968 100644 --- a/pom.xml +++ b/pom.xml @@ -76,9 +76,10 @@ UTF-8 1.5 - 2.10 - 0.9.0-incubating - 2.1.4 + 2.10 + 2.10.2 + 0.12.1 + 2.2.1 1.7.2 1.2.17 1.0.4 @@ -214,7 +215,7 @@ com.twitter - chill_2.9.3 + chill_${scala-short.version} 0.3.1 @@ -222,19 +223,14 @@ chill-java 0.3.1 - - org.scala-lang - scala-actors - 2.10.0 - com.typesafe.akka - akka-remote_${scala.version} + akka-remote_${scala-short.version} ${akka.version} com.typesafe.akka - akka-slf4j_${scala.version} + akka-slf4j_${scala-short.version} ${akka.version} @@ -249,7 +245,7 @@ com.github.scala-incubator.io - scala-io-file_2.10 + scala-io-file_${scala-short.version} 0.4.1 @@ -270,7 +266,7 @@ net.liftweb - lift-json_2.10 + lift-json_${scala-short.version} 2.5.1 @@ -296,36 +292,34 @@ org.scala-lang scala-compiler - 2.10.0 + ${scala.version} org.scala-lang jline - 2.10.0 - - - org.scala-lang - scala-library ${scala.version} org.scala-lang - scalap + scala-library ${scala.version} - log4j log4j ${log4j.version} - org.scalatest - scalatest_${scala.version} + scalatest_${scala-short.version} 1.9.1 test + + commons-io + commons-io + 2.4 + org.easymock easymock @@ -334,7 +328,7 @@ org.scalacheck - scalacheck_${scala.version} + scalacheck_${scala-short.version} 1.10.0 test diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f18ebf1400..ffa9c93925 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -183,8 +183,7 @@ object SparkBuild extends Build { def coreSettings = sharedSettings ++ Seq( name := "spark-core", resolvers ++= Seq( - // "JBoss Repository" at "http://repository.jboss.org/nexus/content/repositories/releases/", - // "Spray Repository" at "http://repo.spray.cc/", + "JBoss Repository" at "http://repository.jboss.org/nexus/content/repositories/releases/", "Cloudera Repository" at "https://repository.cloudera.com/artifactory/cloudera-repos/" ), diff --git a/repl/pom.xml b/repl/pom.xml index b1ca5220bc..a7b5e1f3c7 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -61,12 +61,12 @@ org.scala-lang scala-compiler - 2.10.0 + ${scala.version} org.scala-lang jline - 2.10.0 + ${scala.version} org.slf4j @@ -78,18 +78,18 @@ org.scalatest - scalatest_${scala.version} + scalatest_${scala-short.version} test org.scalacheck - scalacheck_${scala.version} + scalacheck_${scala-short.version} test - target/scala-${scala.version}/classes - target/scala-${scala.version}/test-classes + target/scala-${scala-short.version}/classes + target/scala-${scala-short.version}/test-classes org.apache.maven.plugins diff --git a/streaming/pom.xml b/streaming/pom.xml index 3c9baf5f69..8367256004 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -86,17 +86,17 @@ com.typesafe.akka - akka-zeromq_${scala.version} + akka-zeromq_${scala-short.version} ${akka.version} org.scalatest - scalatest_${scala.version} + scalatest_${scala-short.version} test org.scalacheck - scalacheck_${scala.version} + scalacheck_${scala-short.version} test @@ -109,10 +109,14 @@ slf4j-log4j12 test + + commons-io + commons-io + - target/scala-${scala.version}/classes - target/scala-${scala.version}/test-classes + target/scala-${scala-short.version}/classes + target/scala-${scala-short.version}/test-classes org.scalatest diff --git a/tools/pom.xml b/tools/pom.xml index 77646a6816..0933c75a7f 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -43,14 +43,14 @@ org.scalatest - scalatest_${scala.version} + scalatest_${scala-short.version} test - target/scala-${scala.version}/classes - target/scala-${scala.version}/test-classes + target/scala-${scala-short.version}/classes + target/scala-${scala-short.version}/test-classes org.apache.maven.plugins diff --git a/yarn/pom.xml b/yarn/pom.xml index 21b650d1ea..47e27ee41c 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -64,8 +64,8 @@ - target/scala-${scala.version}/classes - target/scala-${scala.version}/test-classes + target/scala-${scala-short.version}/classes + target/scala-${scala-short.version}/test-classes org.apache.maven.plugins -- cgit v1.2.3 From 9b0c9c893d1b7d593b98c7117081051977fc81f3 Mon Sep 17 00:00:00 2001 From: Martin Weindel Date: Sat, 5 Oct 2013 21:41:09 +0200 Subject: scala 2.10 requires Java 1.6, using Scala 2.10.3, resolved maven-scala-plugin warning --- pom.xml | 12 +++++++++--- project/SparkBuild.scala | 6 +++--- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'pom.xml') diff --git a/pom.xml b/pom.xml index b9e872255e..e9f90135dd 100644 --- a/pom.xml +++ b/pom.xml @@ -75,10 +75,10 @@ UTF-8 UTF-8 - 1.5 + 1.6 2.10 - 2.10.2 + 2.10.3 0.13.0 2.2.1 1.7.2 @@ -257,7 +257,7 @@ io.netty netty-all - 4.0.0.Beta2 + 4.0.0.CR1 org.apache.derby @@ -269,6 +269,12 @@ net.liftweb lift-json_${scala-short.version} 2.5.1 + + + org.scala-lang + scalap + + com.codahale.metrics diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index b5e65a1856..5d4250a53b 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -34,8 +34,8 @@ object SparkBuild extends Build { val HBASE_VERSION = "0.94.6" // Target JVM version - val SCALAC_JVM_VERSION = "jvm-1.5" - val JAVAC_JVM_VERSION = "1.5" + val SCALAC_JVM_VERSION = "jvm-1.6" + val JAVAC_JVM_VERSION = "1.6" lazy val root = Project("root", file("."), settings = rootSettings) aggregate(allProjects: _*) @@ -80,7 +80,7 @@ object SparkBuild extends Build { def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.apache.spark", version := "0.8.0-SNAPSHOT", - scalaVersion := "2.10.2", + scalaVersion := "2.10.3", scalacOptions := Seq("-unchecked", "-optimize", "-deprecation", "-target:" + SCALAC_JVM_VERSION), javacOptions := Seq("-target", JAVAC_JVM_VERSION, "-source", JAVAC_JVM_VERSION), unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath }, -- cgit v1.2.3 From 4e44d65b5ef7fb7c8d24186dd9e98ec10d9877b7 Mon Sep 17 00:00:00 2001 From: Reynold Xin Date: Sat, 19 Oct 2013 12:35:55 -0700 Subject: Exclusion rules for Maven build files. --- examples/pom.xml | 8 ++++++ pom.xml | 74 ++++++++++++++++++++++--------------------------------- streaming/pom.xml | 22 +++++++++++++++++ 3 files changed, 60 insertions(+), 44 deletions(-) (limited to 'pom.xml') diff --git a/examples/pom.xml b/examples/pom.xml index b8c020a321..15399a8a33 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -137,6 +137,14 @@ org.apache.cassandra.deps avro + + org.sonatype.sisu.inject + * + + + org.xerial.snappy + * + diff --git a/pom.xml b/pom.xml index 5ad7b1befb..54f100c37f 100644 --- a/pom.xml +++ b/pom.xml @@ -250,16 +250,34 @@ com.typesafe.akka akka-actor ${akka.version} + + + org.jboss.netty + netty + + com.typesafe.akka akka-remote ${akka.version} + + + org.jboss.netty + netty + + com.typesafe.akka akka-slf4j ${akka.version} + + + org.jboss.netty + netty + + it.unimi.dsi @@ -394,19 +412,11 @@ org.codehaus.jackson - jackson-core-asl - - - org.codehaus.jackson - jackson-mapper-asl - - - org.codehaus.jackson - jackson-jaxrs + * - org.codehaus.jackson - jackson-xc + org.sonatype.sisu.inject + * @@ -430,19 +440,11 @@ org.codehaus.jackson - jackson-core-asl - - - org.codehaus.jackson - jackson-mapper-asl - - - org.codehaus.jackson - jackson-jaxrs + * - org.codehaus.jackson - jackson-xc + org.sonatype.sisu.inject + * @@ -461,19 +463,11 @@ org.codehaus.jackson - jackson-core-asl + * - org.codehaus.jackson - jackson-mapper-asl - - - org.codehaus.jackson - jackson-jaxrs - - - org.codehaus.jackson - jackson-xc + org.sonatype.sisu.inject + * @@ -492,19 +486,11 @@ org.codehaus.jackson - jackson-core-asl + * - org.codehaus.jackson - jackson-mapper-asl - - - org.codehaus.jackson - jackson-jaxrs - - - org.codehaus.jackson - jackson-xc + org.sonatype.sisu.inject + * diff --git a/streaming/pom.xml b/streaming/pom.xml index 3b25fb49fb..bcbed1644a 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -69,17 +69,33 @@ org.jboss.netty netty + + org.xerial.snappy + * + com.github.sgroschupf zkclient 0.1 + + + org.jboss.netty + netty + + org.twitter4j twitter4j-stream 3.0.3 + + + org.jboss.netty + netty + + org.scala-lang @@ -89,6 +105,12 @@ com.typesafe.akka akka-zeromq 2.0.3 + + + org.jboss.netty + netty + + org.scalatest -- cgit v1.2.3 From dadfc63b0314060876ac1787d4de72b37221139c Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Wed, 23 Oct 2013 15:29:22 -0700 Subject: Fix Maven build to use MQTT repository --- pom.xml | 11 +++++++++++ project/SparkBuild.scala | 6 +++--- 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'pom.xml') diff --git a/pom.xml b/pom.xml index 54f100c37f..53ac82efd0 100644 --- a/pom.xml +++ b/pom.xml @@ -147,6 +147,17 @@ false + + mqtt-repo + MQTT Repository + https://repo.eclipse.org/content/repositories/paho-releases/ + + true + + + false + + diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 20f2c018fa..8d7cbae821 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -108,9 +108,9 @@ object SparkBuild extends Build { // Shared between both core and streaming. resolvers ++= Seq("Akka Repository" at "http://repo.akka.io/releases/"), - // Shared between both examples and streaming. + // Shared between both examples and streaming. resolvers ++= Seq("Mqtt Repository" at "https://repo.eclipse.org/content/repositories/paho-releases/"), - + // For Sonatype publishing resolvers ++= Seq("sonatype-snapshots" at "https://oss.sonatype.org/content/repositories/snapshots", "sonatype-staging" at "https://oss.sonatype.org/service/local/staging/deploy/maven2/"), @@ -289,7 +289,7 @@ object SparkBuild extends Build { "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty, excludeSnappy), "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty), "com.typesafe.akka" % "akka-zeromq" % "2.0.5" excludeAll(excludeNetty), - "org.apache.kafka" % "kafka_2.9.2" % "0.8.0-beta1" + "org.apache.kafka" % "kafka_2.9.2" % "0.8.0-beta1" exclude("com.sun.jdmk", "jmxtools") exclude("com.sun.jmx", "jmxri") ) -- cgit v1.2.3 From a35472e1dd2ea1b5a0b1fb6b382f5a98f5aeba5a Mon Sep 17 00:00:00 2001 From: tgravescs Date: Mon, 4 Nov 2013 09:40:40 -0600 Subject: Allow spark on yarn to be run from HDFS. Allows the spark.jar, app.jar, and log4j.properties to be put into hdfs. --- docs/running-on-yarn.md | 1 + pom.xml | 6 + yarn/pom.xml | 50 ++++ .../spark/deploy/yarn/ApplicationMaster.scala | 2 +- .../org/apache/spark/deploy/yarn/Client.scala | 276 +++++++++++---------- .../yarn/ClientDistributedCacheManager.scala | 228 +++++++++++++++++ .../apache/spark/deploy/yarn/WorkerRunnable.scala | 42 +--- .../yarn/ClientDistributedCacheManagerSuite.scala | 220 ++++++++++++++++ 8 files changed, 653 insertions(+), 172 deletions(-) create mode 100644 yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala create mode 100644 yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala (limited to 'pom.xml') diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 2898af0bed..6fd1d0d150 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -21,6 +21,7 @@ The assembled JAR will be something like this: # Preparations - Building a YARN-enabled assembly (see above). +- The assembled jar can be installed into HDFS or used locally. - Your application code must be packaged into a separate JAR file. If you want to test out the YARN deployment mode, you can use the current Spark examples. A `spark-examples_{{site.SCALA_VERSION}}-{{site.SPARK_VERSION}}` file can be generated by running `sbt/sbt assembly`. NOTE: since the documentation you're reading is for Spark version {{site.SPARK_VERSION}}, we are assuming here that you have downloaded Spark {{site.SPARK_VERSION}} or checked it out of source control. If you are using a different version of Spark, the version numbers in the jar generated by the sbt package command will obviously be different. diff --git a/pom.xml b/pom.xml index 53ac82efd0..edcc3b35cd 100644 --- a/pom.xml +++ b/pom.xml @@ -385,6 +385,12 @@ 3.1 test + + org.mockito + mockito-all + 1.8.5 + test + org.scalacheck scalacheck_2.9.3 diff --git a/yarn/pom.xml b/yarn/pom.xml index 3bc619df07..8a065c6d7d 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -61,6 +61,16 @@ org.apache.avro avro-ipc + + org.scalatest + scalatest_2.9.3 + test + + + org.mockito + mockito-all + test + @@ -106,6 +116,46 @@ + + org.apache.maven.plugins + maven-antrun-plugin + + + test + + run + + + true + + + + + + + + + + + + + + + + + + + + org.scalatest + scalatest-maven-plugin + + + ${basedir}/.. + 1 + ${spark.classpath} + + + diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index c1a87d3373..4302ef4cda 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -349,7 +349,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, conf: Configuration) e try { val preserveFiles = System.getProperty("spark.yarn.preserve.staging.files", "false").toBoolean if (!preserveFiles) { - stagingDirPath = new Path(System.getenv("SPARK_YARN_JAR_PATH")).getParent() + stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR")) if (stagingDirPath == null) { logError("Staging directory is null") return diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 1a380ae714..4e0e060ddc 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -17,26 +17,31 @@ package org.apache.spark.deploy.yarn -import java.net.{InetSocketAddress, URI} +import java.net.{InetAddress, InetSocketAddress, UnknownHostException, URI} import java.nio.ByteBuffer + import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} +import org.apache.hadoop.fs.{FileContext, FileStatus, FileSystem, Path, FileUtil} +import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.mapred.Master import org.apache.hadoop.net.NetUtils import org.apache.hadoop.io.DataOutputBuffer import org.apache.hadoop.security.UserGroupInformation import org.apache.hadoop.yarn.api._ +import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.api.protocolrecords._ import org.apache.hadoop.yarn.client.YarnClientImpl import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.ipc.YarnRPC +import org.apache.hadoop.yarn.util.{Apps, Records} + import scala.collection.mutable.HashMap +import scala.collection.mutable.Map import scala.collection.JavaConversions._ + import org.apache.spark.Logging import org.apache.spark.util.Utils -import org.apache.hadoop.yarn.util.{Apps, Records, ConverterUtils} -import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import org.apache.spark.deploy.SparkHadoopUtil class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl with Logging { @@ -46,13 +51,14 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl var rpc: YarnRPC = YarnRPC.create(conf) val yarnConf: YarnConfiguration = new YarnConfiguration(conf) val credentials = UserGroupInformation.getCurrentUser().getCredentials() - private var distFiles = None: Option[String] - private var distFilesTimeStamps = None: Option[String] - private var distFilesFileSizes = None: Option[String] - private var distArchives = None: Option[String] - private var distArchivesTimeStamps = None: Option[String] - private var distArchivesFileSizes = None: Option[String] - + private val SPARK_STAGING: String = ".sparkStaging" + private val distCacheMgr = new ClientDistributedCacheManager() + + // staging directory is private! -> rwx-------- + val STAGING_DIR_PERMISSION: FsPermission = FsPermission.createImmutable(0700:Short) + // app files are world-wide readable and owner writable -> rw-r--r-- + val APP_FILE_PERMISSION: FsPermission = FsPermission.createImmutable(0644:Short) + def run() { init(yarnConf) start() @@ -63,8 +69,9 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl verifyClusterResources(newApp) val appContext = createApplicationSubmissionContext(appId) - val localResources = prepareLocalResources(appId, ".sparkStaging") - val env = setupLaunchEnv(localResources) + val appStagingDir = getAppStagingDir(appId) + val localResources = prepareLocalResources(appStagingDir) + val env = setupLaunchEnv(localResources, appStagingDir) val amContainer = createContainerLaunchContext(newApp, localResources, env) appContext.setQueue(args.amQueue) @@ -76,7 +83,10 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl monitorApplication(appId) System.exit(0) } - + + def getAppStagingDir(appId: ApplicationId): String = { + SPARK_STAGING + Path.SEPARATOR + appId.toString() + Path.SEPARATOR + } def logClusterResourceDetails() { val clusterMetrics: YarnClusterMetrics = super.getYarnClusterMetrics @@ -116,73 +126,73 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl return appContext } + /* + * see if two file systems are the same or not. + */ + private def compareFs(srcFs: FileSystem, destFs: FileSystem): Boolean = { + val srcUri = srcFs.getUri() + val dstUri = destFs.getUri() + if (srcUri.getScheme() == null) { + return false + } + if (!srcUri.getScheme().equals(dstUri.getScheme())) { + return false + } + var srcHost = srcUri.getHost() + var dstHost = dstUri.getHost() + if ((srcHost != null) && (dstHost != null)) { + try { + srcHost = InetAddress.getByName(srcHost).getCanonicalHostName(); + dstHost = InetAddress.getByName(dstHost).getCanonicalHostName(); + } catch { + case e: UnknownHostException => + return false + } + if (!srcHost.equals(dstHost)) { + return false + } + } else if (srcHost == null && dstHost != null) { + return false + } else if (srcHost != null && dstHost == null) { + return false + } + //check for ports + if (srcUri.getPort() != dstUri.getPort()) { + return false + } + return true; + } + /** - * Copy the local file into HDFS and configure to be distributed with the - * job via the distributed cache. - * If a fragment is specified the file will be referenced as that fragment. + * Copy the file into HDFS if needed. */ - private def copyLocalFile( + private def copyRemoteFile( dstDir: Path, - resourceType: LocalResourceType, originalPath: Path, replication: Short, - localResources: HashMap[String,LocalResource], - fragment: String, - appMasterOnly: Boolean = false): Unit = { + setPerms: Boolean = false): Path = { val fs = FileSystem.get(conf) - val newPath = new Path(dstDir, originalPath.getName()) - logInfo("Uploading " + originalPath + " to " + newPath) - fs.copyFromLocalFile(false, true, originalPath, newPath) - fs.setReplication(newPath, replication); - val destStatus = fs.getFileStatus(newPath) - - val amJarRsrc = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] - amJarRsrc.setType(resourceType) - amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION) - amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(newPath)) - amJarRsrc.setTimestamp(destStatus.getModificationTime()) - amJarRsrc.setSize(destStatus.getLen()) - var pathURI: URI = new URI(newPath.toString() + "#" + originalPath.getName()); - if ((fragment == null) || (fragment.isEmpty())){ - localResources(originalPath.getName()) = amJarRsrc - } else { - localResources(fragment) = amJarRsrc - pathURI = new URI(newPath.toString() + "#" + fragment); - } - val distPath = pathURI.toString() - if (appMasterOnly == true) return - if (resourceType == LocalResourceType.FILE) { - distFiles match { - case Some(path) => - distFilesFileSizes = Some(distFilesFileSizes.get + "," + - destStatus.getLen().toString()) - distFilesTimeStamps = Some(distFilesTimeStamps.get + "," + - destStatus.getModificationTime().toString()) - distFiles = Some(path + "," + distPath) - case _ => - distFilesFileSizes = Some(destStatus.getLen().toString()) - distFilesTimeStamps = Some(destStatus.getModificationTime().toString()) - distFiles = Some(distPath) - } - } else { - distArchives match { - case Some(path) => - distArchivesTimeStamps = Some(distArchivesTimeStamps.get + "," + - destStatus.getModificationTime().toString()) - distArchivesFileSizes = Some(distArchivesFileSizes.get + "," + - destStatus.getLen().toString()) - distArchives = Some(path + "," + distPath) - case _ => - distArchivesTimeStamps = Some(destStatus.getModificationTime().toString()) - distArchivesFileSizes = Some(destStatus.getLen().toString()) - distArchives = Some(distPath) - } - } + val remoteFs = originalPath.getFileSystem(conf); + var newPath = originalPath + if (! compareFs(remoteFs, fs)) { + newPath = new Path(dstDir, originalPath.getName()) + logInfo("Uploading " + originalPath + " to " + newPath) + FileUtil.copy(remoteFs, originalPath, fs, newPath, false, conf); + fs.setReplication(newPath, replication); + if (setPerms) fs.setPermission(newPath, new FsPermission(APP_FILE_PERMISSION)) + } + // resolve any symlinks in the URI path so using a "current" symlink + // to point to a specific version shows the specific version + // in the distributed cache configuration + val qualPath = fs.makeQualified(newPath) + val fc = FileContext.getFileContext(qualPath.toUri(), conf) + val destPath = fc.resolvePath(qualPath) + destPath } - def prepareLocalResources(appId: ApplicationId, sparkStagingDir: String): HashMap[String, LocalResource] = { + def prepareLocalResources(appStagingDir: String): HashMap[String, LocalResource] = { logInfo("Preparing Local resources") - // Upload Spark and the application JAR to the remote file system + // Upload Spark and the application JAR to the remote file system if necessary // Add them as local resources to the AM val fs = FileSystem.get(conf) @@ -193,9 +203,7 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl System.exit(1) } } - - val pathSuffix = sparkStagingDir + "/" + appId.toString() + "/" - val dst = new Path(fs.getHomeDirectory(), pathSuffix) + val dst = new Path(fs.getHomeDirectory(), appStagingDir) val replication = System.getProperty("spark.yarn.submit.file.replication", "3").toShort if (UserGroupInformation.isSecurityEnabled()) { @@ -203,55 +211,65 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl dstFs.addDelegationTokens(delegTokenRenewer, credentials); } val localResources = HashMap[String, LocalResource]() + FileSystem.mkdirs(fs, dst, new FsPermission(STAGING_DIR_PERMISSION)) + + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() + + if (System.getenv("SPARK_JAR") == null || args.userJar == null) { + logError("Error: You must set SPARK_JAR environment variable and specify a user jar!") + System.exit(1) + } - Map("spark.jar" -> System.getenv("SPARK_JAR"), "app.jar" -> args.userJar, "log4j.properties" -> System.getenv("SPARK_LOG4J_CONF")) + Map(Client.SPARK_JAR -> System.getenv("SPARK_JAR"), Client.APP_JAR -> args.userJar, + Client.LOG4J_PROP -> System.getenv("SPARK_LOG4J_CONF")) .foreach { case(destName, _localPath) => val localPath: String = if (_localPath != null) _localPath.trim() else "" if (! localPath.isEmpty()) { - val src = new Path(localPath) - val newPath = new Path(dst, destName) - logInfo("Uploading " + src + " to " + newPath) - fs.copyFromLocalFile(false, true, src, newPath) - fs.setReplication(newPath, replication); - val destStatus = fs.getFileStatus(newPath) - - val amJarRsrc = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] - amJarRsrc.setType(LocalResourceType.FILE) - amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION) - amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(newPath)) - amJarRsrc.setTimestamp(destStatus.getModificationTime()) - amJarRsrc.setSize(destStatus.getLen()) - localResources(destName) = amJarRsrc + var localURI = new URI(localPath) + // if not specified assume these are in the local filesystem to keep behavior like Hadoop + if (localURI.getScheme() == null) { + localURI = new URI(FileSystem.getLocal(conf).makeQualified(new Path(localPath)).toString()) + } + val setPermissions = if (destName.equals(Client.APP_JAR)) true else false + val destPath = copyRemoteFile(dst, new Path(localURI), replication, setPermissions) + distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, + destName, statCache) } } // handle any add jars if ((args.addJars != null) && (!args.addJars.isEmpty())){ args.addJars.split(',').foreach { case file: String => - val tmpURI = new URI(file) - val tmp = new Path(tmpURI) - copyLocalFile(dst, LocalResourceType.FILE, tmp, replication, localResources, - tmpURI.getFragment(), true) + val localURI = new URI(file.trim()) + val localPath = new Path(localURI) + val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName()) + val destPath = copyRemoteFile(dst, localPath, replication) + distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, + linkname, statCache, true) } } // handle any distributed cache files if ((args.files != null) && (!args.files.isEmpty())){ args.files.split(',').foreach { case file: String => - val tmpURI = new URI(file) - val tmp = new Path(tmpURI) - copyLocalFile(dst, LocalResourceType.FILE, tmp, replication, localResources, - tmpURI.getFragment()) + val localURI = new URI(file.trim()) + val localPath = new Path(localURI) + val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName()) + val destPath = copyRemoteFile(dst, localPath, replication) + distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, + linkname, statCache) } } // handle any distributed cache archives if ((args.archives != null) && (!args.archives.isEmpty())) { args.archives.split(',').foreach { case file:String => - val tmpURI = new URI(file) - val tmp = new Path(tmpURI) - copyLocalFile(dst, LocalResourceType.ARCHIVE, tmp, replication, - localResources, tmpURI.getFragment()) + val localURI = new URI(file.trim()) + val localPath = new Path(localURI) + val linkname = Option(localURI.getFragment()).getOrElse(localPath.getName()) + val destPath = copyRemoteFile(dst, localPath, replication) + distCacheMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, + linkname, statCache) } } @@ -259,44 +277,21 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl return localResources } - def setupLaunchEnv(localResources: HashMap[String, LocalResource]): HashMap[String, String] = { + def setupLaunchEnv( + localResources: HashMap[String, LocalResource], + stagingDir: String): HashMap[String, String] = { logInfo("Setting up the launch environment") - val log4jConfLocalRes = localResources.getOrElse("log4j.properties", null) + val log4jConfLocalRes = localResources.getOrElse(Client.LOG4J_PROP, null) val env = new HashMap[String, String]() Client.populateClasspath(yarnConf, log4jConfLocalRes != null, env) env("SPARK_YARN_MODE") = "true" - env("SPARK_YARN_JAR_PATH") = - localResources("spark.jar").getResource().getScheme.toString() + "://" + - localResources("spark.jar").getResource().getFile().toString() - env("SPARK_YARN_JAR_TIMESTAMP") = localResources("spark.jar").getTimestamp().toString() - env("SPARK_YARN_JAR_SIZE") = localResources("spark.jar").getSize().toString() - - env("SPARK_YARN_USERJAR_PATH") = - localResources("app.jar").getResource().getScheme.toString() + "://" + - localResources("app.jar").getResource().getFile().toString() - env("SPARK_YARN_USERJAR_TIMESTAMP") = localResources("app.jar").getTimestamp().toString() - env("SPARK_YARN_USERJAR_SIZE") = localResources("app.jar").getSize().toString() - - if (log4jConfLocalRes != null) { - env("SPARK_YARN_LOG4J_PATH") = - log4jConfLocalRes.getResource().getScheme.toString() + "://" + log4jConfLocalRes.getResource().getFile().toString() - env("SPARK_YARN_LOG4J_TIMESTAMP") = log4jConfLocalRes.getTimestamp().toString() - env("SPARK_YARN_LOG4J_SIZE") = log4jConfLocalRes.getSize().toString() - } + env("SPARK_YARN_STAGING_DIR") = stagingDir // set the environment variables to be passed on to the Workers - if (distFiles != None) { - env("SPARK_YARN_CACHE_FILES") = distFiles.get - env("SPARK_YARN_CACHE_FILES_TIME_STAMPS") = distFilesTimeStamps.get - env("SPARK_YARN_CACHE_FILES_FILE_SIZES") = distFilesFileSizes.get - } - if (distArchives != None) { - env("SPARK_YARN_CACHE_ARCHIVES") = distArchives.get - env("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") = distArchivesTimeStamps.get - env("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") = distArchivesFileSizes.get - } + distCacheMgr.setDistFilesEnv(env) + distCacheMgr.setDistArchivesEnv(env) // allow users to specify some environment variables Apps.setEnvFromInputString(env, System.getenv("SPARK_YARN_USER_ENV")) @@ -365,6 +360,11 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl javaCommand = Environment.JAVA_HOME.$() + "/bin/java" } + if (args.userClass == null) { + logError("Error: You must specify a user class!") + System.exit(1) + } + val commands = List[String](javaCommand + " -server " + JAVA_OPTS + @@ -432,6 +432,10 @@ class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl } object Client { + val SPARK_JAR: String = "spark.jar" + val APP_JAR: String = "app.jar" + val LOG4J_PROP: String = "log4j.properties" + def main(argStrings: Array[String]) { // Set an env variable indicating we are running in YARN mode. // Note that anything with SPARK prefix gets propagated to all (remote) processes @@ -453,22 +457,22 @@ object Client { // If log4j present, ensure ours overrides all others if (addLog4j) { Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + - Path.SEPARATOR + "log4j.properties") + Path.SEPARATOR + LOG4J_PROP) } // normally the users app.jar is last in case conflicts with spark jars val userClasspathFirst = System.getProperty("spark.yarn.user.classpath.first", "false") .toBoolean if (userClasspathFirst) { Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + - Path.SEPARATOR + "app.jar") + Path.SEPARATOR + APP_JAR) } Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + - Path.SEPARATOR + "spark.jar") + Path.SEPARATOR + SPARK_JAR) Client.populateHadoopClasspath(conf, env) if (!userClasspathFirst) { Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + - Path.SEPARATOR + "app.jar") + Path.SEPARATOR + APP_JAR) } Apps.addToEnvironment(env, Environment.CLASSPATH.name, Environment.PWD.$() + Path.SEPARATOR + "*") diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala new file mode 100644 index 0000000000..07686fefd7 --- /dev/null +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.yarn + +import java.net.URI; + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileStatus +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.permission.FsAction +import org.apache.hadoop.yarn.api.records.LocalResource +import org.apache.hadoop.yarn.api.records.LocalResourceVisibility +import org.apache.hadoop.yarn.api.records.LocalResourceType +import org.apache.hadoop.yarn.util.{Records, ConverterUtils} + +import org.apache.spark.Logging + +import scala.collection.mutable.HashMap +import scala.collection.mutable.LinkedHashMap +import scala.collection.mutable.Map + + +/** Client side methods to setup the Hadoop distributed cache */ +class ClientDistributedCacheManager() extends Logging { + private val distCacheFiles: Map[String, Tuple3[String, String, String]] = + LinkedHashMap[String, Tuple3[String, String, String]]() + private val distCacheArchives: Map[String, Tuple3[String, String, String]] = + LinkedHashMap[String, Tuple3[String, String, String]]() + + + /** + * Add a resource to the list of distributed cache resources. This list can + * be sent to the ApplicationMaster and possibly the workers so that it can + * be downloaded into the Hadoop distributed cache for use by this application. + * Adds the LocalResource to the localResources HashMap passed in and saves + * the stats of the resources to they can be sent to the workers and verified. + * + * @param fs FileSystem + * @param conf Configuration + * @param destPath path to the resource + * @param localResources localResource hashMap to insert the resource into + * @param resourceType LocalResourceType + * @param link link presented in the distributed cache to the destination + * @param statCache cache to store the file/directory stats + * @param appMasterOnly Whether to only add the resource to the app master + */ + def addResource( + fs: FileSystem, + conf: Configuration, + destPath: Path, + localResources: HashMap[String, LocalResource], + resourceType: LocalResourceType, + link: String, + statCache: Map[URI, FileStatus], + appMasterOnly: Boolean = false) = { + val destStatus = fs.getFileStatus(destPath) + val amJarRsrc = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] + amJarRsrc.setType(resourceType) + val visibility = getVisibility(conf, destPath.toUri(), statCache) + amJarRsrc.setVisibility(visibility) + amJarRsrc.setResource(ConverterUtils.getYarnUrlFromPath(destPath)) + amJarRsrc.setTimestamp(destStatus.getModificationTime()) + amJarRsrc.setSize(destStatus.getLen()) + if (link == null || link.isEmpty()) throw new Exception("You must specify a valid link name") + localResources(link) = amJarRsrc + + if (appMasterOnly == false) { + val uri = destPath.toUri() + val pathURI = new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, link) + if (resourceType == LocalResourceType.FILE) { + distCacheFiles(pathURI.toString()) = (destStatus.getLen().toString(), + destStatus.getModificationTime().toString(), visibility.name()) + } else { + distCacheArchives(pathURI.toString()) = (destStatus.getLen().toString(), + destStatus.getModificationTime().toString(), visibility.name()) + } + } + } + + /** + * Adds the necessary cache file env variables to the env passed in + * @param env + */ + def setDistFilesEnv(env: Map[String, String]) = { + val (keys, tupleValues) = distCacheFiles.unzip + val (sizes, timeStamps, visibilities) = tupleValues.unzip3 + + if (keys.size > 0) { + env("SPARK_YARN_CACHE_FILES") = keys.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_FILES_TIME_STAMPS") = + timeStamps.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_FILES_FILE_SIZES") = + sizes.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_FILES_VISIBILITIES") = + visibilities.reduceLeft[String] { (acc,n) => acc + "," + n } + } + } + + /** + * Adds the necessary cache archive env variables to the env passed in + * @param env + */ + def setDistArchivesEnv(env: Map[String, String]) = { + val (keys, tupleValues) = distCacheArchives.unzip + val (sizes, timeStamps, visibilities) = tupleValues.unzip3 + + if (keys.size > 0) { + env("SPARK_YARN_CACHE_ARCHIVES") = keys.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") = + timeStamps.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") = + sizes.reduceLeft[String] { (acc,n) => acc + "," + n } + env("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") = + visibilities.reduceLeft[String] { (acc,n) => acc + "," + n } + } + } + + /** + * Returns the local resource visibility depending on the cache file permissions + * @param conf + * @param uri + * @param statCache + * @return LocalResourceVisibility + */ + def getVisibility(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): + LocalResourceVisibility = { + if (isPublic(conf, uri, statCache)) { + return LocalResourceVisibility.PUBLIC + } + return LocalResourceVisibility.PRIVATE + } + + /** + * Returns a boolean to denote whether a cache file is visible to all(public) + * or not + * @param conf + * @param uri + * @param statCache + * @return true if the path in the uri is visible to all, false otherwise + */ + def isPublic(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): Boolean = { + val fs = FileSystem.get(uri, conf) + val current = new Path(uri.getPath()) + //the leaf level file should be readable by others + if (!checkPermissionOfOther(fs, current, FsAction.READ, statCache)) { + return false + } + return ancestorsHaveExecutePermissions(fs, current.getParent(), statCache) + } + + /** + * Returns true if all ancestors of the specified path have the 'execute' + * permission set for all users (i.e. that other users can traverse + * the directory heirarchy to the given path) + * @param fs + * @param path + * @param statCache + * @return true if all ancestors have the 'execute' permission set for all users + */ + def ancestorsHaveExecutePermissions(fs: FileSystem, path: Path, + statCache: Map[URI, FileStatus]): Boolean = { + var current = path + while (current != null) { + //the subdirs in the path should have execute permissions for others + if (!checkPermissionOfOther(fs, current, FsAction.EXECUTE, statCache)) { + return false + } + current = current.getParent() + } + return true + } + + /** + * Checks for a given path whether the Other permissions on it + * imply the permission in the passed FsAction + * @param fs + * @param path + * @param action + * @param statCache + * @return true if the path in the uri is visible to all, false otherwise + */ + def checkPermissionOfOther(fs: FileSystem, path: Path, + action: FsAction, statCache: Map[URI, FileStatus]): Boolean = { + val status = getFileStatus(fs, path.toUri(), statCache); + val perms = status.getPermission() + val otherAction = perms.getOtherAction() + if (otherAction.implies(action)) { + return true; + } + return false + } + + /** + * Checks to see if the given uri exists in the cache, if it does it + * returns the existing FileStatus, otherwise it stats the uri, stores + * it in the cache, and returns the FileStatus. + * @param fs + * @param uri + * @param statCache + * @return FileStatus + */ + def getFileStatus(fs: FileSystem, uri: URI, statCache: Map[URI, FileStatus]): FileStatus = { + val stat = statCache.get(uri) match { + case Some(existstat) => existstat + case None => + val newStat = fs.getFileStatus(new Path(uri)) + statCache.put(uri, newStat) + newStat + } + return stat + } +} diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala index ba352daac4..7a66532254 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerRunnable.scala @@ -142,11 +142,12 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S rtype: LocalResourceType, localResources: HashMap[String, LocalResource], timestamp: String, - size: String) = { + size: String, + vis: String) = { val uri = new URI(file) val amJarRsrc = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] amJarRsrc.setType(rtype) - amJarRsrc.setVisibility(LocalResourceVisibility.APPLICATION) + amJarRsrc.setVisibility(LocalResourceVisibility.valueOf(vis)) amJarRsrc.setResource(ConverterUtils.getYarnUrlFromURI(uri)) amJarRsrc.setTimestamp(timestamp.toLong) amJarRsrc.setSize(size.toLong) @@ -158,44 +159,14 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S logInfo("Preparing Local resources") val localResources = HashMap[String, LocalResource]() - // Spark JAR - val sparkJarResource = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] - sparkJarResource.setType(LocalResourceType.FILE) - sparkJarResource.setVisibility(LocalResourceVisibility.APPLICATION) - sparkJarResource.setResource(ConverterUtils.getYarnUrlFromURI( - new URI(System.getenv("SPARK_YARN_JAR_PATH")))) - sparkJarResource.setTimestamp(System.getenv("SPARK_YARN_JAR_TIMESTAMP").toLong) - sparkJarResource.setSize(System.getenv("SPARK_YARN_JAR_SIZE").toLong) - localResources("spark.jar") = sparkJarResource - // User JAR - val userJarResource = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] - userJarResource.setType(LocalResourceType.FILE) - userJarResource.setVisibility(LocalResourceVisibility.APPLICATION) - userJarResource.setResource(ConverterUtils.getYarnUrlFromURI( - new URI(System.getenv("SPARK_YARN_USERJAR_PATH")))) - userJarResource.setTimestamp(System.getenv("SPARK_YARN_USERJAR_TIMESTAMP").toLong) - userJarResource.setSize(System.getenv("SPARK_YARN_USERJAR_SIZE").toLong) - localResources("app.jar") = userJarResource - - // Log4j conf - if available - if (System.getenv("SPARK_YARN_LOG4J_PATH") != null) { - val log4jConfResource = Records.newRecord(classOf[LocalResource]).asInstanceOf[LocalResource] - log4jConfResource.setType(LocalResourceType.FILE) - log4jConfResource.setVisibility(LocalResourceVisibility.APPLICATION) - log4jConfResource.setResource(ConverterUtils.getYarnUrlFromURI( - new URI(System.getenv("SPARK_YARN_LOG4J_PATH")))) - log4jConfResource.setTimestamp(System.getenv("SPARK_YARN_LOG4J_TIMESTAMP").toLong) - log4jConfResource.setSize(System.getenv("SPARK_YARN_LOG4J_SIZE").toLong) - localResources("log4j.properties") = log4jConfResource - } - if (System.getenv("SPARK_YARN_CACHE_FILES") != null) { val timeStamps = System.getenv("SPARK_YARN_CACHE_FILES_TIME_STAMPS").split(',') val fileSizes = System.getenv("SPARK_YARN_CACHE_FILES_FILE_SIZES").split(',') val distFiles = System.getenv("SPARK_YARN_CACHE_FILES").split(',') + val visibilities = System.getenv("SPARK_YARN_CACHE_FILES_VISIBILITIES").split(',') for( i <- 0 to distFiles.length - 1) { setupDistributedCache(distFiles(i), LocalResourceType.FILE, localResources, timeStamps(i), - fileSizes(i)) + fileSizes(i), visibilities(i)) } } @@ -203,9 +174,10 @@ class WorkerRunnable(container: Container, conf: Configuration, masterAddress: S val timeStamps = System.getenv("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS").split(',') val fileSizes = System.getenv("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES").split(',') val distArchives = System.getenv("SPARK_YARN_CACHE_ARCHIVES").split(',') + val visibilities = System.getenv("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES").split(',') for( i <- 0 to distArchives.length - 1) { setupDistributedCache(distArchives(i), LocalResourceType.ARCHIVE, localResources, - timeStamps(i), fileSizes(i)) + timeStamps(i), fileSizes(i), visibilities(i)) } } diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala new file mode 100644 index 0000000000..c0a2af0c6f --- /dev/null +++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManagerSuite.scala @@ -0,0 +1,220 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.yarn + +import java.net.URI; + +import org.scalatest.FunSuite +import org.scalatest.mock.MockitoSugar +import org.mockito.Mockito.when + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FileStatus +import org.apache.hadoop.fs.FileSystem +import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.permission.FsAction +import org.apache.hadoop.yarn.api.records.LocalResource +import org.apache.hadoop.yarn.api.records.LocalResourceVisibility +import org.apache.hadoop.yarn.api.records.LocalResourceType +import org.apache.hadoop.yarn.util.{Records, ConverterUtils} + +import scala.collection.mutable.HashMap +import scala.collection.mutable.Map + + +class ClientDistributedCacheManagerSuite extends FunSuite with MockitoSugar { + + class MockClientDistributedCacheManager extends ClientDistributedCacheManager { + override def getVisibility(conf: Configuration, uri: URI, statCache: Map[URI, FileStatus]): + LocalResourceVisibility = { + return LocalResourceVisibility.PRIVATE + } + } + + test("test getFileStatus empty") { + val distMgr = new ClientDistributedCacheManager() + val fs = mock[FileSystem] + val uri = new URI("/tmp/testing") + when(fs.getFileStatus(new Path(uri))).thenReturn(new FileStatus()) + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() + val stat = distMgr.getFileStatus(fs, uri, statCache) + assert(stat.getPath() === null) + } + + test("test getFileStatus cached") { + val distMgr = new ClientDistributedCacheManager() + val fs = mock[FileSystem] + val uri = new URI("/tmp/testing") + val realFileStatus = new FileStatus(10, false, 1, 1024, 10, 10, null, "testOwner", + null, new Path("/tmp/testing")) + when(fs.getFileStatus(new Path(uri))).thenReturn(new FileStatus()) + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus](uri -> realFileStatus) + val stat = distMgr.getFileStatus(fs, uri, statCache) + assert(stat.getPath().toString() === "/tmp/testing") + } + + test("test addResource") { + val distMgr = new MockClientDistributedCacheManager() + val fs = mock[FileSystem] + val conf = new Configuration() + val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing") + val localResources = HashMap[String, LocalResource]() + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() + when(fs.getFileStatus(destPath)).thenReturn(new FileStatus()) + + distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, "link", + statCache, false) + val resource = localResources("link") + assert(resource.getVisibility() === LocalResourceVisibility.PRIVATE) + assert(ConverterUtils.getPathFromYarnURL(resource.getResource()) === destPath) + assert(resource.getTimestamp() === 0) + assert(resource.getSize() === 0) + assert(resource.getType() === LocalResourceType.FILE) + + val env = new HashMap[String, String]() + distMgr.setDistFilesEnv(env) + assert(env("SPARK_YARN_CACHE_FILES") === "file:/foo.invalid.com:8080/tmp/testing#link") + assert(env("SPARK_YARN_CACHE_FILES_TIME_STAMPS") === "0") + assert(env("SPARK_YARN_CACHE_FILES_FILE_SIZES") === "0") + assert(env("SPARK_YARN_CACHE_FILES_VISIBILITIES") === LocalResourceVisibility.PRIVATE.name()) + + distMgr.setDistArchivesEnv(env) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES") === None) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") === None) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") === None) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") === None) + + //add another one and verify both there and order correct + val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", + null, new Path("/tmp/testing2")) + val destPath2 = new Path("file:///foo.invalid.com:8080/tmp/testing2") + when(fs.getFileStatus(destPath2)).thenReturn(realFileStatus) + distMgr.addResource(fs, conf, destPath2, localResources, LocalResourceType.FILE, "link2", + statCache, false) + val resource2 = localResources("link2") + assert(resource2.getVisibility() === LocalResourceVisibility.PRIVATE) + assert(ConverterUtils.getPathFromYarnURL(resource2.getResource()) === destPath2) + assert(resource2.getTimestamp() === 10) + assert(resource2.getSize() === 20) + assert(resource2.getType() === LocalResourceType.FILE) + + val env2 = new HashMap[String, String]() + distMgr.setDistFilesEnv(env2) + val timestamps = env2("SPARK_YARN_CACHE_FILES_TIME_STAMPS").split(',') + val files = env2("SPARK_YARN_CACHE_FILES").split(',') + val sizes = env2("SPARK_YARN_CACHE_FILES_FILE_SIZES").split(',') + val visibilities = env2("SPARK_YARN_CACHE_FILES_VISIBILITIES") .split(',') + assert(files(0) === "file:/foo.invalid.com:8080/tmp/testing#link") + assert(timestamps(0) === "0") + assert(sizes(0) === "0") + assert(visibilities(0) === LocalResourceVisibility.PRIVATE.name()) + + assert(files(1) === "file:/foo.invalid.com:8080/tmp/testing2#link2") + assert(timestamps(1) === "10") + assert(sizes(1) === "20") + assert(visibilities(1) === LocalResourceVisibility.PRIVATE.name()) + } + + test("test addResource link null") { + val distMgr = new MockClientDistributedCacheManager() + val fs = mock[FileSystem] + val conf = new Configuration() + val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing") + val localResources = HashMap[String, LocalResource]() + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() + when(fs.getFileStatus(destPath)).thenReturn(new FileStatus()) + + intercept[Exception] { + distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.FILE, null, + statCache, false) + } + assert(localResources.get("link") === None) + assert(localResources.size === 0) + } + + test("test addResource appmaster only") { + val distMgr = new MockClientDistributedCacheManager() + val fs = mock[FileSystem] + val conf = new Configuration() + val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing") + val localResources = HashMap[String, LocalResource]() + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() + val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", + null, new Path("/tmp/testing")) + when(fs.getFileStatus(destPath)).thenReturn(realFileStatus) + + distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, "link", + statCache, true) + val resource = localResources("link") + assert(resource.getVisibility() === LocalResourceVisibility.PRIVATE) + assert(ConverterUtils.getPathFromYarnURL(resource.getResource()) === destPath) + assert(resource.getTimestamp() === 10) + assert(resource.getSize() === 20) + assert(resource.getType() === LocalResourceType.ARCHIVE) + + val env = new HashMap[String, String]() + distMgr.setDistFilesEnv(env) + assert(env.get("SPARK_YARN_CACHE_FILES") === None) + assert(env.get("SPARK_YARN_CACHE_FILES_TIME_STAMPS") === None) + assert(env.get("SPARK_YARN_CACHE_FILES_FILE_SIZES") === None) + assert(env.get("SPARK_YARN_CACHE_FILES_VISIBILITIES") === None) + + distMgr.setDistArchivesEnv(env) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES") === None) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") === None) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") === None) + assert(env.get("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") === None) + } + + test("test addResource archive") { + val distMgr = new MockClientDistributedCacheManager() + val fs = mock[FileSystem] + val conf = new Configuration() + val destPath = new Path("file:///foo.invalid.com:8080/tmp/testing") + val localResources = HashMap[String, LocalResource]() + val statCache: Map[URI, FileStatus] = HashMap[URI, FileStatus]() + val realFileStatus = new FileStatus(20, false, 1, 1024, 10, 30, null, "testOwner", + null, new Path("/tmp/testing")) + when(fs.getFileStatus(destPath)).thenReturn(realFileStatus) + + distMgr.addResource(fs, conf, destPath, localResources, LocalResourceType.ARCHIVE, "link", + statCache, false) + val resource = localResources("link") + assert(resource.getVisibility() === LocalResourceVisibility.PRIVATE) + assert(ConverterUtils.getPathFromYarnURL(resource.getResource()) === destPath) + assert(resource.getTimestamp() === 10) + assert(resource.getSize() === 20) + assert(resource.getType() === LocalResourceType.ARCHIVE) + + val env = new HashMap[String, String]() + + distMgr.setDistArchivesEnv(env) + assert(env("SPARK_YARN_CACHE_ARCHIVES") === "file:/foo.invalid.com:8080/tmp/testing#link") + assert(env("SPARK_YARN_CACHE_ARCHIVES_TIME_STAMPS") === "10") + assert(env("SPARK_YARN_CACHE_ARCHIVES_FILE_SIZES") === "20") + assert(env("SPARK_YARN_CACHE_ARCHIVES_VISIBILITIES") === LocalResourceVisibility.PRIVATE.name()) + + distMgr.setDistFilesEnv(env) + assert(env.get("SPARK_YARN_CACHE_FILES") === None) + assert(env.get("SPARK_YARN_CACHE_FILES_TIME_STAMPS") === None) + assert(env.get("SPARK_YARN_CACHE_FILES_FILE_SIZES") === None) + assert(env.get("SPARK_YARN_CACHE_FILES_VISIBILITIES") === None) + } + + +} -- cgit v1.2.3 From 989203604ee79b0c22042121f52be33c3591867f Mon Sep 17 00:00:00 2001 From: LiGuoqiang Date: Mon, 25 Nov 2013 11:23:11 +0800 Subject: Fix Maven build for metrics-graphite --- pom.xml | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'pom.xml') diff --git a/pom.xml b/pom.xml index edcc3b35cd..42c1e00e9d 100644 --- a/pom.xml +++ b/pom.xml @@ -346,6 +346,11 @@ metrics-ganglia 3.0.0 + + com.codahale.metrics + metrics-graphite + 3.0.0 + org.scala-lang scala-compiler -- cgit v1.2.3 From 4738818dd6efc644a093e983ac4a5a300da14f09 Mon Sep 17 00:00:00 2001 From: Raymond Liu Date: Mon, 2 Dec 2013 13:08:36 +0800 Subject: Fix pom.xml for maven build --- core/pom.xml | 10 +++------ pom.xml | 61 +++++++++++++++++++++++++++++++++++++++++++++++-------- streaming/pom.xml | 9 +------- 3 files changed, 56 insertions(+), 24 deletions(-) (limited to 'pom.xml') diff --git a/core/pom.xml b/core/pom.xml index 6af229c71d..38f4be1280 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -95,15 +95,15 @@ 0.3.1 - com.typesafe.akka + ${akka.group} akka-actor - com.typesafe.akka + ${akka.group} akka-remote - com.typesafe.akka + ${akka.group} akka-slf4j @@ -126,10 +126,6 @@ colt colt - - com.github.scala-incubator.io - scala-io-file_2.9.2 - org.apache.mesos mesos diff --git a/pom.xml b/pom.xml index 42c1e00e9d..9348c770fb 100644 --- a/pom.xml +++ b/pom.xml @@ -102,7 +102,9 @@ 1.5 2.9.3 0.13.0 + com.typesafe.akka 2.0.5 + 2.4.1 1.7.2 1.2.17 1.0.4 @@ -245,7 +247,7 @@ com.google.protobuf protobuf-java - 2.4.1 + ${protobuf.version} com.twitter @@ -258,7 +260,7 @@ 0.3.1 - com.typesafe.akka + ${akka.group} akka-actor ${akka.version} @@ -269,7 +271,7 @@ - com.typesafe.akka + ${akka.group} akka-remote ${akka.version} @@ -280,7 +282,7 @@ - com.typesafe.akka + ${akka.group} akka-slf4j ${akka.version} @@ -290,6 +292,17 @@ + + ${akka.group} + akka-zeromq + ${akka.version} + + + org.jboss.netty + netty + + + it.unimi.dsi fastutil @@ -300,11 +313,6 @@ colt 1.2.0 - - com.github.scala-incubator.io - scala-io-file_2.9.2 - 0.4.1 - org.apache.mesos mesos @@ -770,6 +778,41 @@ + + + new-yarn + + org.spark-project + 2.0.5-protobuf-2.5-java-1.5 + 2 + 2.2.0 + 2.5.0 + + + + new-yarn + + + + + maven-root + Maven root repository + http://repo1.maven.org/maven2/ + + true + + + false + + + + + + + + + + repl-bin diff --git a/streaming/pom.xml b/streaming/pom.xml index 7a9ae6a97b..40892937b8 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -110,15 +110,8 @@ scala-library - com.typesafe.akka + ${akka.group} akka-zeromq - 2.0.3 - - - org.jboss.netty - netty - - org.scalatest -- cgit v1.2.3 From 7ad6921ae0657ca806704f859d5b8c9ff26633e4 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Sat, 7 Dec 2013 12:45:57 +0530 Subject: Incorporated Patrick's feedback comment on #211 and made maven build/dep-resolution atleast a bit faster. --- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 56 +++++-------------------------------------------------- repl-bin/pom.xml | 2 +- repl/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 11 files changed, 15 insertions(+), 61 deletions(-) (limited to 'pom.xml') diff --git a/assembly/pom.xml b/assembly/pom.xml index 28b0692dff..8103534796 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -26,7 +26,7 @@ org.apache.spark - spark-assembly_${scala-short.version} + spark-assembly_2.10 Spark Project Assembly http://spark.incubator.apache.org/ diff --git a/bagel/pom.xml b/bagel/pom.xml index c8b9c4f4cd..461e76a753 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -26,7 +26,7 @@ org.apache.spark - spark-bagel_${scala-short.version} + spark-bagel_2.10 jar Spark Project Bagel http://spark.incubator.apache.org/ diff --git a/core/pom.xml b/core/pom.xml index e2033c9912..af605e1837 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -26,7 +26,7 @@ org.apache.spark - spark-core_${scala-short.version} + spark-core_2.10 jar Spark Project Core http://spark.incubator.apache.org/ diff --git a/examples/pom.xml b/examples/pom.xml index a10dee7847..464ad82e33 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -26,7 +26,7 @@ org.apache.spark - spark-examples_${scala-short.version} + spark-examples_2.10 jar Spark Project Examples http://spark.incubator.apache.org/ diff --git a/mllib/pom.xml b/mllib/pom.xml index a57bddeff3..fce5b19bb2 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -26,7 +26,7 @@ org.apache.spark - spark-mllib_${scala-short.version} + spark-mllib_2.10 jar Spark Project ML Library http://spark.incubator.apache.org/ diff --git a/pom.xml b/pom.xml index 8700a4828f..4c11459088 100644 --- a/pom.xml +++ b/pom.xml @@ -104,7 +104,7 @@ 2.10 2.10.3 0.13.0 - 2.2.1 + 2.2.3 1.7.2 1.2.17 1.0.4 @@ -116,10 +116,10 @@ - - typesafe-repo - Typesafe Repository - http://repo.typesafe.com/typesafe/releases/ + + maven-repo + Maven Repository + http://repo.maven.apache.org/maven2/ true @@ -138,17 +138,6 @@ false - - akka-repo - Akka Repository - http://repo.akka.io/releases/ - - true - - - false - - mqtt-repo MQTT Repository @@ -161,41 +150,6 @@ - - - oss-sonatype-releases - OSS Sonatype - https://oss.sonatype.org/content/repositories/releases - - true - - - false - - - - oss-sonatype-snapshots - OSS Sonatype - https://oss.sonatype.org/content/repositories/snapshots - - false - - - true - - - - oss-sonatype - OSS Sonatype - https://oss.sonatype.org/content/groups/public - - true - - - true - - - diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index c983ea5dfb..f8a17d93cc 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -26,7 +26,7 @@ org.apache.spark - spark-repl-bin_${scala-short.version} + spark-repl-bin_2.10 pom Spark Project REPL binary packaging http://spark.incubator.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index ff66493229..2f27e76760 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -26,7 +26,7 @@ org.apache.spark - spark-repl_${scala-short.version} + spark-repl_2.10 jar Spark Project REPL http://spark.incubator.apache.org/ diff --git a/streaming/pom.xml b/streaming/pom.xml index fb15681e25..ff95591b9f 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -26,7 +26,7 @@ org.apache.spark - spark-streaming_${scala-short.version} + spark-streaming_2.10 jar Spark Project Streaming http://spark.incubator.apache.org/ diff --git a/tools/pom.xml b/tools/pom.xml index db87b54dec..353d201f90 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -25,7 +25,7 @@ org.apache.spark - spark-tools_${scala-short.version} + spark-tools_2.10 jar Spark Project Tools http://spark.incubator.apache.org/ diff --git a/yarn/pom.xml b/yarn/pom.xml index 12bc97da8a..5cf81575c9 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -25,7 +25,7 @@ org.apache.spark - spark-yarn_${scala-short.version} + spark-yarn_2.10 jar Spark Project YARN Support http://spark.incubator.apache.org/ -- cgit v1.2.3 From 17db6a9041d5e83d7b6fe47f9c36758d0613fcd6 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Tue, 10 Dec 2013 11:34:10 +0530 Subject: Style fixes and addressed review comments at #221 --- assembly/pom.xml | 14 +++++++------- bagel/pom.xml | 10 +++++----- core/pom.xml | 18 +++++++++--------- .../scala/org/apache/spark/MapOutputTracker.scala | 9 ++++----- .../main/scala/org/apache/spark/Partitioner.scala | 6 +++--- .../apache/spark/deploy/worker/ui/WorkerWebUI.scala | 14 +++++--------- .../scala/org/apache/spark/rdd/CheckpointRDD.scala | 11 ++++------- .../main/scala/org/apache/spark/rdd/JdbcRDD.scala | 1 + .../main/scala/org/apache/spark/rdd/MappedRDD.scala | 3 ++- .../org/apache/spark/rdd/OrderedRDDFunctions.scala | 3 ++- .../scala/org/apache/spark/rdd/ShuffledRDD.scala | 3 ++- .../apache/spark/storage/BlockManagerMaster.scala | 7 +++---- .../spark/util/IndestructibleActorSystem.scala | 17 +++++++++++++++-- examples/pom.xml | 20 ++++++++++---------- mllib/pom.xml | 10 +++++----- pom.xml | 17 ++++++++--------- repl-bin/pom.xml | 6 +++--- repl/pom.xml | 14 +++++++------- streaming/pom.xml | 16 ++++++++-------- .../spark/streaming/receivers/ZeroMQReceiver.scala | 4 ++-- tools/pom.xml | 10 +++++----- yarn/pom.xml | 8 ++++---- 22 files changed, 114 insertions(+), 107 deletions(-) (limited to 'pom.xml') diff --git a/assembly/pom.xml b/assembly/pom.xml index 8103534796..c2cda41c6d 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -41,27 +41,27 @@ org.apache.spark - spark-core_${scala-short.version} + spark-core_2.10 ${project.version} org.apache.spark - spark-bagel_${scala-short.version} + spark-bagel_2.10 ${project.version} org.apache.spark - spark-mllib_${scala-short.version} + spark-mllib_2.10 ${project.version} org.apache.spark - spark-repl_${scala-short.version} + spark-repl_2.10 ${project.version} org.apache.spark - spark-streaming_${scala-short.version} + spark-streaming_2.10 ${project.version} @@ -79,7 +79,7 @@ maven-shade-plugin false - ${project.build.directory}/scala-${scala-short.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar + ${project.build.directory}/scala-2.10/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar *:* @@ -128,7 +128,7 @@ org.apache.spark - spark-yarn_${scala-short.version} + spark-yarn_2.10 ${project.version} diff --git a/bagel/pom.xml b/bagel/pom.xml index 461e76a753..0f550d70d6 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -34,7 +34,7 @@ org.apache.spark - spark-core_${scala-short.version} + spark-core_2.10 ${project.version} @@ -43,18 +43,18 @@ org.scalatest - scalatest_${scala-short.version} + scalatest_2.10 test org.scalacheck - scalacheck_${scala-short.version} + scalacheck_2.10 test - target/scala-${scala-short.version}/classes - target/scala-${scala-short.version}/test-classes + target/scala-2.10/classes + target/scala-2.10/test-classes org.scalatest diff --git a/core/pom.xml b/core/pom.xml index af605e1837..71bf15c491 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -86,7 +86,7 @@ com.twitter - chill_${scala-short.version} + chill_2.10 0.3.1 @@ -96,11 +96,11 @@ com.typesafe.akka - akka-remote_${scala-short.version} + akka-remote_2.10 com.typesafe.akka - akka-slf4j_${scala-short.version} + akka-slf4j_2.10 org.scala-lang @@ -108,7 +108,7 @@ net.liftweb - lift-json_${scala-short.version} + lift-json_2.10 it.unimi.dsi @@ -120,7 +120,7 @@ com.github.scala-incubator.io - scala-io-file_${scala-short.version} + scala-io-file_2.10 org.apache.mesos @@ -166,12 +166,12 @@ org.scalatest - scalatest_${scala-short.version} + scalatest_2.10 test org.scalacheck - scalacheck_${scala-short.version} + scalacheck_2.10 test @@ -191,8 +191,8 @@ - target/scala-${scala-short.version}/classes - target/scala-${scala-short.version}/test-classes + target/scala-2.10/classes + target/scala-2.10/test-classes org.apache.maven.plugins diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala index 88a7f24884..d36e1b13a6 100644 --- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala +++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala @@ -72,12 +72,11 @@ private[spark] class MapOutputTracker extends Logging { // throw a SparkException if this fails. private def askTracker(message: Any): Any = { try { - val future = if (trackerActor.isLeft ) { - trackerActor.left.get.ask(message)(timeout) - } else { - trackerActor.right.get.ask(message)(timeout) + val future = trackerActor match { + case Left(a: ActorRef) => a.ask(message)(timeout) + case Right(b: ActorSelection) => b.ask(message)(timeout) } - return Await.result(future, timeout) + Await.result(future, timeout) } catch { case e: Exception => throw new SparkException("Error communicating with MapOutputTracker", e) diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala index 62b608c088..bcec41c439 100644 --- a/core/src/main/scala/org/apache/spark/Partitioner.scala +++ b/core/src/main/scala/org/apache/spark/Partitioner.scala @@ -17,11 +17,11 @@ package org.apache.spark -import org.apache.spark.util.Utils -import org.apache.spark.rdd.RDD - import scala.reflect.ClassTag +import org.apache.spark.rdd.RDD +import org.apache.spark.util.Utils + /** * An object that defines how the elements in a key-value pair RDD are partitioned by key. * Maps each key to a partition ID, from 0 to `numPartitions - 1`. diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala index a38e32b339..6c18a3c245 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala @@ -17,23 +17,19 @@ package org.apache.spark.deploy.worker.ui -import akka.actor.ActorRef -import akka.util.Timeout +import java.io.File import scala.concurrent.duration._ -import java.io.{FileInputStream, File} - +import akka.util.Timeout import javax.servlet.http.HttpServletRequest -import org.eclipse.jetty.server.{Handler, Server} - +import org.apache.spark.Logging import org.apache.spark.deploy.worker.Worker -import org.apache.spark.{Logging} -import org.apache.spark.ui.JettyUtils +import org.apache.spark.ui.{JettyUtils, UIUtils} import org.apache.spark.ui.JettyUtils._ -import org.apache.spark.ui.UIUtils import org.apache.spark.util.Utils +import org.eclipse.jetty.server.{Handler, Server} /** * Web UI server for the standalone worker. diff --git a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala index 99ea6e8ee8..a712ef1c27 100644 --- a/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala @@ -17,16 +17,13 @@ package org.apache.spark.rdd +import java.io.IOException + import scala.reflect.ClassTag + +import org.apache.hadoop.fs.Path import org.apache.spark._ import org.apache.spark.deploy.SparkHadoopUtil -import org.apache.hadoop.mapred.{FileInputFormat, SequenceFileInputFormat, JobConf, Reporter} -import org.apache.hadoop.conf.Configuration -import org.apache.hadoop.io.{NullWritable, BytesWritable} -import org.apache.hadoop.util.ReflectionUtils -import org.apache.hadoop.fs.Path -import java.io.{File, IOException, EOFException} -import java.text.NumberFormat private[spark] class CheckpointRDDPartition(val index: Int) extends Partition {} diff --git a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala index e72f86fb13..8df8718f3b 100644 --- a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala @@ -20,6 +20,7 @@ package org.apache.spark.rdd import java.sql.{Connection, ResultSet} import scala.reflect.ClassTag + import org.apache.spark.{Logging, Partition, SparkContext, TaskContext} import org.apache.spark.util.NextIterator diff --git a/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala index eb3b19907d..8d7c288593 100644 --- a/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala @@ -17,9 +17,10 @@ package org.apache.spark.rdd -import org.apache.spark.{Partition, TaskContext} import scala.reflect.ClassTag +import org.apache.spark.{Partition, TaskContext} + private[spark] class MappedRDD[U: ClassTag, T: ClassTag](prev: RDD[T], f: T => U) extends RDD[U](prev) { diff --git a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala index 4a465840c6..d5691f2267 100644 --- a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala @@ -17,9 +17,10 @@ package org.apache.spark.rdd -import org.apache.spark.{RangePartitioner, Logging} import scala.reflect.ClassTag +import org.apache.spark.{Logging, RangePartitioner} + /** * Extra functions available on RDDs of (key, value) pairs where the key is sortable through * an implicit conversion. Import `org.apache.spark.SparkContext._` at the top of your program to diff --git a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala index 1d109a2496..3682c84598 100644 --- a/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala @@ -17,9 +17,10 @@ package org.apache.spark.rdd -import org.apache.spark.{Dependency, Partitioner, SparkEnv, ShuffleDependency, Partition, TaskContext} import scala.reflect.ClassTag +import org.apache.spark.{Dependency, Partition, Partitioner, ShuffleDependency, + SparkEnv, TaskContext} private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition { override val index = idx diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala index e5de16fc01..e05b842476 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala @@ -157,10 +157,9 @@ private[spark] class BlockManagerMaster(var driverActor : Either[ActorRef, Actor while (attempts < AKKA_RETRY_ATTEMPTS) { attempts += 1 try { - val future = if (driverActor.isLeft ) { - driverActor.left.get.ask(message)(timeout) - } else { - driverActor.right.get.ask(message)(timeout) + val future = driverActor match { + case Left(a: ActorRef) => a.ask(message)(timeout) + case Right(b: ActorSelection) => b.ask(message)(timeout) } val result = Await.result(future, timeout) if (result == null) { diff --git a/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala b/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala index 69519860c6..bf71882ef7 100644 --- a/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala +++ b/core/src/main/scala/org/apache/spark/util/IndestructibleActorSystem.scala @@ -1,5 +1,18 @@ -/** - * Copyright (C) 2009-2013 Typesafe Inc. +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ // Must be in akka.actor package as ActorSystemImpl is protected[akka]. diff --git a/examples/pom.xml b/examples/pom.xml index 464ad82e33..97f6dfea66 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -49,25 +49,25 @@ org.apache.spark - spark-core_${scala-short.version} + spark-core_2.10 ${project.version} provided org.apache.spark - spark-streaming_${scala-short.version} + spark-streaming_2.10 ${project.version} provided org.apache.spark - spark-mllib_${scala-short.version} + spark-mllib_2.10 ${project.version} provided org.apache.spark - spark-bagel_${scala-short.version} + spark-bagel_2.10 ${project.version} provided @@ -88,7 +88,7 @@ org.apache.kafka - kafka_2.9.2 + kafka_2.10 0.8.0-beta1 @@ -107,17 +107,17 @@ com.twitter - algebird-core_${scala-short.version} + algebird-core_2.10 0.1.11 org.scalatest - scalatest_${scala-short.version} + scalatest_2.10 test org.scalacheck - scalacheck_${scala-short.version} + scalacheck_2.10 test @@ -166,8 +166,8 @@ - target/scala-${scala-short.version}/classes - target/scala-${scala-short.version}/test-classes + target/scala-2.10/classes + target/scala-2.10/test-classes org.apache.maven.plugins diff --git a/mllib/pom.xml b/mllib/pom.xml index fce5b19bb2..228f8c029b 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -34,7 +34,7 @@ org.apache.spark - spark-core_${scala-short.version} + spark-core_2.10 ${project.version} @@ -48,12 +48,12 @@ org.scalatest - scalatest_${scala-short.version} + scalatest_2.10 test org.scalacheck - scalacheck_${scala-short.version} + scalacheck_2.10 test @@ -63,8 +63,8 @@ - target/scala-${scala-short.version}/classes - target/scala-${scala-short.version}/test-classes + target/scala-2.10/classes + target/scala-2.10/test-classes org.scalatest diff --git a/pom.xml b/pom.xml index 4c11459088..979fd0c287 100644 --- a/pom.xml +++ b/pom.xml @@ -101,7 +101,6 @@ 1.6 - 2.10 2.10.3 0.13.0 2.2.3 @@ -205,7 +204,7 @@ com.twitter - chill_${scala-short.version} + chill_2.10 0.3.1 @@ -215,7 +214,7 @@ com.typesafe.akka - akka-actor_${scala-short.version} + akka-actor_2.10 ${akka.version} @@ -226,7 +225,7 @@ com.typesafe.akka - akka-remote_${scala-short.version} + akka-remote_2.10 ${akka.version} @@ -237,7 +236,7 @@ com.typesafe.akka - akka-slf4j_${scala-short.version} + akka-slf4j_2.10 ${akka.version} @@ -258,7 +257,7 @@ com.github.scala-incubator.io - scala-io-file_${scala-short.version} + scala-io-file_2.10 0.4.1 @@ -279,7 +278,7 @@ net.liftweb - lift-json_${scala-short.version} + lift-json_2.10 2.5.1 @@ -335,7 +334,7 @@ org.scalatest - scalatest_${scala-short.version} + scalatest_2.10 1.9.1 test @@ -358,7 +357,7 @@ org.scalacheck - scalacheck_${scala-short.version} + scalacheck_2.10 1.10.0 test diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index f8a17d93cc..c2a4efa36c 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -40,18 +40,18 @@ org.apache.spark - spark-core_${scala-short.version} + spark-core_2.10 ${project.version} org.apache.spark - spark-bagel_${scala-short.version} + spark-bagel_2.10 ${project.version} runtime org.apache.spark - spark-repl_${scala-short.version} + spark-repl_2.10 ${project.version} runtime diff --git a/repl/pom.xml b/repl/pom.xml index 2f27e76760..bf06d730d4 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -39,18 +39,18 @@ org.apache.spark - spark-core_${scala-short.version} + spark-core_2.10 ${project.version} org.apache.spark - spark-bagel_${scala-short.version} + spark-bagel_2.10 ${project.version} runtime org.apache.spark - spark-mllib_${scala-short.version} + spark-mllib_2.10 ${project.version} runtime @@ -78,18 +78,18 @@ org.scalatest - scalatest_${scala-short.version} + scalatest_2.10 test org.scalacheck - scalacheck_${scala-short.version} + scalacheck_2.10 test - target/scala-${scala-short.version}/classes - target/scala-${scala-short.version}/test-classes + target/scala-2.10/classes + target/scala-2.10/test-classes org.apache.maven.plugins diff --git a/streaming/pom.xml b/streaming/pom.xml index ff95591b9f..298bc83161 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -48,7 +48,7 @@ org.apache.spark - spark-core_${scala-short.version} + spark-core_2.10 ${project.version} @@ -61,8 +61,8 @@ 1.9.11 - org.apache.kafka - kafka_2.9.2 + com.sksamuel.kafka + kafka_2.10 0.8.0-beta1 @@ -111,7 +111,7 @@ com.typesafe.akka - akka-zeromq_${scala-short.version} + akka-zeromq_2.10 ${akka.version} @@ -122,12 +122,12 @@ org.scalatest - scalatest_${scala-short.version} + scalatest_2.10 test org.scalacheck - scalacheck_${scala-short.version} + scalacheck_2.10 test @@ -151,8 +151,8 @@ - target/scala-${scala-short.version}/classes - target/scala-${scala-short.version}/test-classes + target/scala-2.10/classes + target/scala-2.10/test-classes org.scalatest diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala index ce8c56fa8a..f164d516b0 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receivers/ZeroMQReceiver.scala @@ -17,14 +17,14 @@ package org.apache.spark.streaming.receivers +import scala.reflect.ClassTag + import akka.actor.Actor import akka.util.ByteString import akka.zeromq._ import org.apache.spark.Logging -import scala.reflect.ClassTag - /** * A receiver to subscribe to ZeroMQ stream. */ diff --git a/tools/pom.xml b/tools/pom.xml index 353d201f90..2bad494fc5 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -33,24 +33,24 @@ org.apache.spark - spark-core_${scala-short.version} + spark-core_2.10 ${project.version} org.apache.spark - spark-streaming_${scala-short.version} + spark-streaming_2.10 ${project.version} org.scalatest - scalatest_${scala-short.version} + scalatest_2.10 test - target/scala-${scala-short.version}/classes - target/scala-${scala-short.version}/test-classes + target/scala-2.10/classes + target/scala-2.10/test-classes org.apache.maven.plugins diff --git a/yarn/pom.xml b/yarn/pom.xml index 5cf81575c9..d18ac3736d 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -33,7 +33,7 @@ org.apache.spark - spark-core_${scala-short.version} + spark-core_2.10 ${project.version} @@ -63,7 +63,7 @@ org.scalatest - scalatest_2.9.3 + scalatest_2.10 test @@ -74,8 +74,8 @@ - target/scala-${scala-short.version}/classes - target/scala-${scala-short.version}/test-classes + target/scala-2.10/classes + target/scala-2.10/test-classes org.apache.maven.plugins -- cgit v1.2.3 From 589b83a18f60c2862780bdba655662b73bb575c5 Mon Sep 17 00:00:00 2001 From: Prashant Sharma Date: Thu, 12 Dec 2013 16:20:10 +0530 Subject: Disabled yarn 2.2 and added a message in the sbt build --- pom.xml | 60 ++++++++++++++++++++++++------------------------ project/SparkBuild.scala | 24 +++++++++++++------ 2 files changed, 47 insertions(+), 37 deletions(-) (limited to 'pom.xml') diff --git a/pom.xml b/pom.xml index 6906ad2d32..aee07c2138 100644 --- a/pom.xml +++ b/pom.xml @@ -738,39 +738,39 @@ - - new-yarn - - org.spark-project - 2.0.5-protobuf-2.5-java-1.5 - 2 - 2.2.0 - 2.5.0 - + + + + + + + + + - - new-yarn - + + + - - - maven-root - Maven root repository - http://repo1.maven.org/maven2/ - - true - - - false - - - + + + + + + + + + + + + + - - - - - + + + + + repl-bin diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 05d3f18ea2..c322cbd6b5 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -78,18 +78,28 @@ object SparkBuild extends Build { } case Some(v) => v.toBoolean } + lazy val isYarnEnabled = scala.util.Properties.envOrNone("SPARK_YARN") match { case None => DEFAULT_YARN case Some(v) => v.toBoolean } + if(isNewHadoop && isYarnEnabled) { + println("""Yarn with Hadoop version 2.2.x is not yet expected to work. + Please set env SPARK_HADOOP_VERSION to appropriate version or set SPARK_YARN to false.""") + throw new Exception("Yarn with Hadoop version 2.2.x is not yet expected to work.") + } + // Build against a protobuf-2.5 compatible Akka if Hadoop 2 is used. - lazy val protobufVersion = if (isNewHadoop) "2.5.0" else "2.4.1" - lazy val akkaVersion = if (isNewHadoop) "2.0.5-protobuf-2.5-java-1.5" else "2.0.5" - lazy val akkaGroup = if (isNewHadoop) "org.spark-project" else "com.typesafe.akka" + // lazy val protobufVersion = if (isNewHadoop) "2.5.0" else "2.4.1" + // lazy val akkaVersion = if (isNewHadoop) "2.0.5-protobuf-2.5-java-1.5" else "2.0.5" + // lazy val akkaGroup = if (isNewHadoop) "org.spark-project" else "com.typesafe.akka" // Conditionally include the yarn sub-project - lazy val yarn = Project("yarn", file(if (isNewHadoop) "new-yarn" else "yarn"), settings = yarnSettings) dependsOn(core) + //lazy val yarn = Project("yarn", file(if (isNewHadoop) "new-yarn" else "yarn"), settings = yarnSettings) dependsOn(core) + + lazy val yarn = Project("yarn", file("yarn"), settings = yarnSettings) dependsOn(core) + lazy val maybeYarn = if (isYarnEnabled) Seq[ClasspathDependency](yarn) else Seq[ClasspathDependency]() lazy val maybeYarnRef = if (isYarnEnabled) Seq[ProjectReference](yarn) else Seq[ProjectReference]() @@ -226,8 +236,8 @@ object SparkBuild extends Build { "org.xerial.snappy" % "snappy-java" % "1.0.5", "org.ow2.asm" % "asm" % "4.0", "com.google.protobuf" % "protobuf-java" % "2.4.1", - akkaGroup %% "akka-remote" % "2.2.3" excludeAll(excludeNetty), - akkaGroup %% "akka-slf4j" % "2.2.3" excludeAll(excludeNetty), + "com.typesafe.akka" %% "akka-remote" % "2.2.3" excludeAll(excludeNetty), + "com.typesafe.akka" %% "akka-slf4j" % "2.2.3" excludeAll(excludeNetty), "net.liftweb" %% "lift-json" % "2.5.1" excludeAll(excludeNetty), "it.unimi.dsi" % "fastutil" % "6.4.4", "colt" % "colt" % "1.2.0", @@ -311,7 +321,7 @@ object SparkBuild extends Build { "org.eclipse.paho" % "mqtt-client" % "0.4.0", "com.github.sgroschupf" % "zkclient" % "0.1" excludeAll(excludeNetty), "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty), - akkaGroup %% "akka-zeromq" % "2.2.3" excludeAll(excludeNetty) + "com.typesafe.akka" %% "akka-zeromq" % "2.2.3" excludeAll(excludeNetty) ) ) -- cgit v1.2.3 From 6e8a96c7e7652b3d1fc709b3c3ccc5f90ffeb623 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Fri, 13 Dec 2013 23:14:08 -0800 Subject: Fix maven build issues in 2.10 branch --- examples/pom.xml | 2 +- pom.xml | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'pom.xml') diff --git a/examples/pom.xml b/examples/pom.xml index 97f6dfea66..e16b1c80aa 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -87,7 +87,7 @@ - org.apache.kafka + com.sksamuel.kafka kafka_2.10 0.8.0-beta1 diff --git a/pom.xml b/pom.xml index aee07c2138..58fc02e029 100644 --- a/pom.xml +++ b/pom.xml @@ -498,6 +498,10 @@ org.jboss.netty netty + + io.netty + netty + -- cgit v1.2.3 From 09ed7ddfa0ef8dde12304a26be776232cadc8b1e Mon Sep 17 00:00:00 2001 From: Mark Hamstra Date: Sun, 15 Dec 2013 12:39:58 -0800 Subject: Use scala.binary.version in POMs --- assembly/pom.xml | 14 +++++++------- bagel/pom.xml | 10 +++++----- core/pom.xml | 18 +++++++++--------- examples/pom.xml | 20 ++++++++++---------- mllib/pom.xml | 10 +++++----- pom.xml | 17 +++++++++-------- repl-bin/pom.xml | 6 +++--- repl/pom.xml | 14 +++++++------- streaming/pom.xml | 14 +++++++------- tools/pom.xml | 10 +++++----- yarn/pom.xml | 8 ++++---- 11 files changed, 71 insertions(+), 70 deletions(-) (limited to 'pom.xml') diff --git a/assembly/pom.xml b/assembly/pom.xml index c2cda41c6d..fc2adc1fbb 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -41,27 +41,27 @@ org.apache.spark - spark-core_2.10 + spark-core_${scala.binary.version} ${project.version} org.apache.spark - spark-bagel_2.10 + spark-bagel_${scala.binary.version} ${project.version} org.apache.spark - spark-mllib_2.10 + spark-mllib_${scala.binary.version} ${project.version} org.apache.spark - spark-repl_2.10 + spark-repl_${scala.binary.version} ${project.version} org.apache.spark - spark-streaming_2.10 + spark-streaming_${scala.binary.version} ${project.version} @@ -79,7 +79,7 @@ maven-shade-plugin false - ${project.build.directory}/scala-2.10/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar + ${project.build.directory}/scala-${scala.binary.version}/${project.artifactId}-${project.version}-hadoop${hadoop.version}.jar *:* @@ -128,7 +128,7 @@ org.apache.spark - spark-yarn_2.10 + spark-yarn_${scala.binary.version} ${project.version} diff --git a/bagel/pom.xml b/bagel/pom.xml index 0f550d70d6..cb8e79f225 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -34,7 +34,7 @@ org.apache.spark - spark-core_2.10 + spark-core_${scala.binary.version} ${project.version} @@ -43,18 +43,18 @@ org.scalatest - scalatest_2.10 + scalatest_${scala.binary.version} test org.scalacheck - scalacheck_2.10 + scalacheck_${scala.binary.version} test - target/scala-2.10/classes - target/scala-2.10/test-classes + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes org.scalatest diff --git a/core/pom.xml b/core/pom.xml index 3fe48fd2af..cdbaa52731 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -86,7 +86,7 @@ com.twitter - chill_2.10 + chill_${scala.binary.version} 0.3.1 @@ -96,15 +96,15 @@ ${akka.group} - akka-actor_2.10 + akka-actor_${scala.binary.version} ${akka.group} - akka-remote_2.10 + akka-remote_${scala.binary.version} ${akka.group} - akka-slf4j_2.10 + akka-slf4j_${scala.binary.version} org.scala-lang @@ -112,7 +112,7 @@ net.liftweb - lift-json_2.10 + lift-json_${scala.binary.version} it.unimi.dsi @@ -166,12 +166,12 @@ org.scalatest - scalatest_2.10 + scalatest_${scala.binary.version} test org.scalacheck - scalacheck_2.10 + scalacheck_${scala.binary.version} test @@ -191,8 +191,8 @@ - target/scala-2.10/classes - target/scala-2.10/test-classes + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes org.apache.maven.plugins diff --git a/examples/pom.xml b/examples/pom.xml index e16b1c80aa..7a7032c319 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -49,25 +49,25 @@ org.apache.spark - spark-core_2.10 + spark-core_${scala.binary.version} ${project.version} provided org.apache.spark - spark-streaming_2.10 + spark-streaming_${scala.binary.version} ${project.version} provided org.apache.spark - spark-mllib_2.10 + spark-mllib_${scala.binary.version} ${project.version} provided org.apache.spark - spark-bagel_2.10 + spark-bagel_${scala.binary.version} ${project.version} provided @@ -88,7 +88,7 @@ com.sksamuel.kafka - kafka_2.10 + kafka_${scala.binary.version} 0.8.0-beta1 @@ -107,17 +107,17 @@ com.twitter - algebird-core_2.10 + algebird-core_${scala.binary.version} 0.1.11 org.scalatest - scalatest_2.10 + scalatest_${scala.binary.version} test org.scalacheck - scalacheck_2.10 + scalacheck_${scala.binary.version} test @@ -166,8 +166,8 @@ - target/scala-2.10/classes - target/scala-2.10/test-classes + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes org.apache.maven.plugins diff --git a/mllib/pom.xml b/mllib/pom.xml index 228f8c029b..dda3900afe 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -34,7 +34,7 @@ org.apache.spark - spark-core_2.10 + spark-core_${scala.binary.version} ${project.version} @@ -48,12 +48,12 @@ org.scalatest - scalatest_2.10 + scalatest_${scala.binary.version} test org.scalacheck - scalacheck_2.10 + scalacheck_${scala.binary.version} test @@ -63,8 +63,8 @@ - target/scala-2.10/classes - target/scala-2.10/test-classes + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes org.scalatest diff --git a/pom.xml b/pom.xml index 58fc02e029..fd99fabc15 100644 --- a/pom.xml +++ b/pom.xml @@ -102,6 +102,7 @@ 1.6 2.10.3 + 2.10 0.13.0 2.2.3 com.typesafe.akka @@ -206,7 +207,7 @@ com.twitter - chill_2.10 + chill_${scala.binary.version} 0.3.1 @@ -216,7 +217,7 @@ ${akka.group} - akka-actor_2.10 + akka-actor_${scala.binary.version} ${akka.version} @@ -227,7 +228,7 @@ ${akka.group} - akka-remote_2.10 + akka-remote_${scala.binary.version} ${akka.version} @@ -238,7 +239,7 @@ ${akka.group} - akka-slf4j_2.10 + akka-slf4j_${scala.binary.version} ${akka.version} @@ -249,7 +250,7 @@ ${akka.group} - akka-zeromq_2.10 + akka-zeromq_${scala.binary.version} ${akka.version} @@ -286,7 +287,7 @@ net.liftweb - lift-json_2.10 + lift-json_${scala.binary.version} 2.5.1 @@ -342,7 +343,7 @@ org.scalatest - scalatest_2.10 + scalatest_${scala.binary.version} 1.9.1 test @@ -365,7 +366,7 @@ org.scalacheck - scalacheck_2.10 + scalacheck_${scala.binary.version} 1.10.0 test diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index c2a4efa36c..869dbdb9b0 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -40,18 +40,18 @@ org.apache.spark - spark-core_2.10 + spark-core_${scala.binary.version} ${project.version} org.apache.spark - spark-bagel_2.10 + spark-bagel_${scala.binary.version} ${project.version} runtime org.apache.spark - spark-repl_2.10 + spark-repl_${scala.binary.version} ${project.version} runtime diff --git a/repl/pom.xml b/repl/pom.xml index bf06d730d4..b0e7877bbb 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -39,18 +39,18 @@ org.apache.spark - spark-core_2.10 + spark-core_${scala.binary.version} ${project.version} org.apache.spark - spark-bagel_2.10 + spark-bagel_${scala.binary.version} ${project.version} runtime org.apache.spark - spark-mllib_2.10 + spark-mllib_${scala.binary.version} ${project.version} runtime @@ -78,18 +78,18 @@ org.scalatest - scalatest_2.10 + scalatest_${scala.binary.version} test org.scalacheck - scalacheck_2.10 + scalacheck_${scala.binary.version} test - target/scala-2.10/classes - target/scala-2.10/test-classes + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes org.apache.maven.plugins diff --git a/streaming/pom.xml b/streaming/pom.xml index e27b437aae..e3b6fee9b2 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -48,7 +48,7 @@ org.apache.spark - spark-core_2.10 + spark-core_${scala.binary.version} ${project.version} @@ -62,7 +62,7 @@ com.sksamuel.kafka - kafka_2.10 + kafka_${scala.binary.version} 0.8.0-beta1 @@ -111,16 +111,16 @@ ${akka.group} - akka-zeromq_2.10 + akka-zeromq_${scala.binary.version} org.scalatest - scalatest_2.10 + scalatest_${scala.binary.version} test org.scalacheck - scalacheck_2.10 + scalacheck_${scala.binary.version} test @@ -144,8 +144,8 @@ - target/scala-2.10/classes - target/scala-2.10/test-classes + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes org.scalatest diff --git a/tools/pom.xml b/tools/pom.xml index 2bad494fc5..28f5ef14b1 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -33,24 +33,24 @@ org.apache.spark - spark-core_2.10 + spark-core_${scala.binary.version} ${project.version} org.apache.spark - spark-streaming_2.10 + spark-streaming_${scala.binary.version} ${project.version} org.scalatest - scalatest_2.10 + scalatest_${scala.binary.version} test - target/scala-2.10/classes - target/scala-2.10/test-classes + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes org.apache.maven.plugins diff --git a/yarn/pom.xml b/yarn/pom.xml index d18ac3736d..bc64a190fd 100644 --- a/yarn/pom.xml +++ b/yarn/pom.xml @@ -33,7 +33,7 @@ org.apache.spark - spark-core_2.10 + spark-core_${scala.binary.version} ${project.version} @@ -63,7 +63,7 @@ org.scalatest - scalatest_2.10 + scalatest_${scala.binary.version} test @@ -74,8 +74,8 @@ - target/scala-2.10/classes - target/scala-2.10/test-classes + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes org.apache.maven.plugins -- cgit v1.2.3 From c6f95e603e2c58b2fab6255c824a839807f03026 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Sun, 15 Dec 2013 20:30:21 -0800 Subject: Attempt with extra repositories --- README.md | 6 +- core/pom.xml | 9 +-- new-yarn/pom.xml | 6 +- .../apache/spark/deploy/yarn/WorkerLauncher.scala | 10 +-- .../spark/deploy/yarn/YarnAllocationHandler.scala | 2 +- pom.xml | 76 ++++++++++++---------- project/SparkBuild.scala | 32 +++------ 7 files changed, 65 insertions(+), 76 deletions(-) (limited to 'pom.xml') diff --git a/README.md b/README.md index 80bbe311a9..1550a8b551 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ versions without YARN, use: # Cloudera CDH 4.2.0 with MapReduce v1 $ SPARK_HADOOP_VERSION=2.0.0-mr1-cdh4.2.0 sbt/sbt assembly -For Apache Hadoop 2.0.X, 2.1.X, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions +For Apache Hadoop 2.2.X, 2.1.X, 2.0.X, 0.23.x, Cloudera CDH MRv2, and other Hadoop versions with YARN, also set `SPARK_YARN=true`: # Apache Hadoop 2.0.5-alpha @@ -63,10 +63,8 @@ with YARN, also set `SPARK_YARN=true`: # Cloudera CDH 4.2.0 with MapReduce v2 $ SPARK_HADOOP_VERSION=2.0.0-cdh4.2.0 SPARK_YARN=true sbt/sbt assembly -When building for Hadoop 2.2.X and newer, you'll need to include the additional `new-yarn` profile: - # Apache Hadoop 2.2.X and newer - $ mvn -Dyarn.version=2.2.0 -Dhadoop.version=2.2.0 -Pnew-yarn + $ SPARK_HADOOP_VERSION=2.2.0 SPARK_YARN=true sbt/sbt assembly When developing a Spark application, specify the Hadoop version by adding the "hadoop-client" artifact to your project's dependencies. For example, if you're diff --git a/core/pom.xml b/core/pom.xml index cdbaa52731..b83a2a8779 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -80,10 +80,6 @@ org.ow2.asm asm - - com.google.protobuf - protobuf-java - com.twitter chill_${scala.binary.version} @@ -94,10 +90,6 @@ chill-java 0.3.1 - - ${akka.group} - akka-actor_${scala.binary.version} - ${akka.group} akka-remote_${scala.binary.version} @@ -105,6 +97,7 @@ ${akka.group} akka-slf4j_${scala.binary.version} + org.spark-project.akka org.scala-lang diff --git a/new-yarn/pom.xml b/new-yarn/pom.xml index 8a065c6d7d..4cd28f34e3 100644 --- a/new-yarn/pom.xml +++ b/new-yarn/pom.xml @@ -25,7 +25,7 @@ org.apache.spark - spark-yarn_2.9.3 + spark-yarn_2.10 jar Spark Project YARN Support http://spark.incubator.apache.org/ @@ -33,7 +33,7 @@ org.apache.spark - spark-core_2.9.3 + spark-core_2.10 ${project.version} @@ -63,7 +63,7 @@ org.scalatest - scalatest_2.9.3 + scalatest_2.10 test diff --git a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala b/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala index c38f33e212..11da1c4e73 100644 --- a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala +++ b/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala @@ -26,7 +26,7 @@ import org.apache.hadoop.yarn.api.protocolrecords._ import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.util.{ConverterUtils, Records} import akka.actor._ -import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClientLifeCycleEvent} +import akka.remote._ import akka.actor.Terminated import org.apache.spark.{SparkContext, Logging} import org.apache.spark.util.{Utils, AkkaUtils} @@ -59,12 +59,12 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte override def preStart() { logInfo("Listen to driver: " + driverUrl) driver = context.actorFor(driverUrl) - context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent]) - context.watch(driver) // Doesn't work with remote actors, but useful for testing + driver ! "hello" + context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent]) } override def receive = { - case Terminated(_) | RemoteClientDisconnected(_, _) | RemoteClientShutdown(_, _) => + case x: DisassociatedEvent => logInfo("Driver terminated or disconnected! Shutting down.") driverClosed = true } @@ -140,7 +140,7 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte System.setProperty("spark.driver.host", driverHost) System.setProperty("spark.driver.port", driverPort.toString) - val driverUrl = "akka://spark@%s:%s/user/%s".format( + val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format( driverHost, driverPort.toString, CoarseGrainedSchedulerBackend.ACTOR_NAME) actor = actorSystem.actorOf(Props(new MonitorActor(driverUrl)), name = "YarnAM") diff --git a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala b/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala index dba0f7640e..c27257cda4 100644 --- a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala +++ b/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocationHandler.scala @@ -253,7 +253,7 @@ private[yarn] class YarnAllocationHandler( numWorkersRunning.decrementAndGet() } else { val workerId = workerIdCounter.incrementAndGet().toString - val driverUrl = "akka://spark@%s:%s/user/%s".format( + val driverUrl = "akka.tcp://spark@%s:%s/user/%s".format( System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"), CoarseGrainedSchedulerBackend.ACTOR_NAME) diff --git a/pom.xml b/pom.xml index fd99fabc15..39c8a8cc5e 100644 --- a/pom.xml +++ b/pom.xml @@ -104,12 +104,11 @@ 2.10.3 2.10 0.13.0 - 2.2.3 - com.typesafe.akka - 2.4.1 + 2.2.3-shaded-protobuf 1.7.2 1.2.17 1.0.4 + 2.4.1 0.23.7 0.94.6 @@ -200,6 +199,11 @@ asm 4.0 + com.google.protobuf protobuf-java @@ -216,6 +220,7 @@ 0.3.1 +<<<<<<< HEAD ${akka.group} akka-actor_${scala.binary.version} ${akka.version} @@ -249,8 +254,13 @@ +<<<<<<< HEAD ${akka.group} akka-zeromq_${scala.binary.version} +======= + org.spark-project.akka + akka-zeromq_2.10 +>>>>>>> Attempt with extra repositories ${akka.version} @@ -461,6 +471,7 @@ + org.apache.hadoop hadoop-yarn-client @@ -716,6 +727,7 @@ 2 0.23.7 + 2.5.0 @@ -743,39 +755,37 @@ - - - - - - - - - + + new-yarn + + 2 + 2.2.0 + 2.5.0 + - - - + + new-yarn + - - - - - - - - - - - - - + + + maven-root + Maven root repository + http://repo1.maven.org/maven2/ + + true + + + false + + + - - - - - + + + + + repl-bin diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 441dcc18fb..29f4a4b9ff 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -84,21 +84,10 @@ object SparkBuild extends Build { case Some(v) => v.toBoolean } - if (isNewHadoop && isYarnEnabled) { - println( """Yarn with Hadoop version 2.2.x is not yet expected to work. - Please set env SPARK_HADOOP_VERSION to appropriate version or set SPARK_YARN to false.""") - throw new Exception("Yarn with Hadoop version 2.2.x is not yet expected to work.") - } - - // Build against a protobuf-2.5 compatible Akka if Hadoop 2 is used. - // lazy val protobufVersion = if (isNewHadoop) "2.5.0" else "2.4.1" - // lazy val akkaVersion = if (isNewHadoop) "2.0.5-protobuf-2.5-java-1.5" else "2.0.5" - // lazy val akkaGroup = if (isNewHadoop) "org.spark-project" else "com.typesafe.akka" - // Conditionally include the yarn sub-project - //lazy val yarn = Project("yarn", file(if (isNewHadoop) "new-yarn" else "yarn"), settings = yarnSettings) dependsOn(core) + lazy val yarn = Project("yarn", file(if (isNewHadoop) "new-yarn" else "yarn"), settings = yarnSettings) dependsOn(core) - lazy val yarn = Project("yarn", file("yarn"), settings = yarnSettings) dependsOn(core) + //lazy val yarn = Project("yarn", file("yarn"), settings = yarnSettings) dependsOn(core) lazy val maybeYarn = if (isYarnEnabled) Seq[ClasspathDependency](yarn) else Seq[ClasspathDependency]() lazy val maybeYarnRef = if (isYarnEnabled) Seq[ProjectReference](yarn) else Seq[ProjectReference]() @@ -235,9 +224,8 @@ object SparkBuild extends Build { "com.ning" % "compress-lzf" % "0.8.4", "org.xerial.snappy" % "snappy-java" % "1.0.5", "org.ow2.asm" % "asm" % "4.0", - "com.google.protobuf" % "protobuf-java" % "2.4.1", - "com.typesafe.akka" %% "akka-remote" % "2.2.3" excludeAll(excludeNetty), - "com.typesafe.akka" %% "akka-slf4j" % "2.2.3" excludeAll(excludeNetty), + "org.spark-project.akka" %% "akka-remote" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty), + "org.spark-project.akka" %% "akka-slf4j" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty), "net.liftweb" %% "lift-json" % "2.5.1" excludeAll(excludeNetty), "it.unimi.dsi" % "fastutil" % "6.4.4", "colt" % "colt" % "1.2.0", @@ -312,16 +300,16 @@ object SparkBuild extends Build { ), libraryDependencies ++= Seq( - "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty, excludeSnappy), - "com.sksamuel.kafka" %% "kafka" % "0.8.0-beta1" + "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty, excludeSnappy), + "com.sksamuel.kafka" %% "kafka" % "0.8.0-beta1" exclude("com.sun.jdmk", "jmxtools") exclude("com.sun.jmx", "jmxri") exclude("net.sf.jopt-simple", "jopt-simple") excludeAll(excludeNetty), - "org.eclipse.paho" % "mqtt-client" % "0.4.0", - "com.github.sgroschupf" % "zkclient" % "0.1" excludeAll(excludeNetty), - "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty), - "com.typesafe.akka" %% "akka-zeromq" % "2.2.3" excludeAll(excludeNetty) + "org.eclipse.paho" % "mqtt-client" % "0.4.0", + "com.github.sgroschupf" % "zkclient" % "0.1" excludeAll(excludeNetty), + "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty), + "org.spark-project.akka" %% "akka-zeromq" % "2.2.3-shaded-protobuf" excludeAll(excludeNetty) ) ) -- cgit v1.2.3 From ceb013f8b97051ee96c65a8da7489a2b251ef799 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Mon, 16 Dec 2013 12:38:01 -0800 Subject: Remove trailing slashes from repository specifications. The correct format is to not have a trailing slash. For me this caused non-deterministic failures due to issues fetching certain artifacts. The issue was that some of the maven caches would fail to fetch the artifact (due to the way that the artifact path was concatenated with the repository) and this short-circuited the download process in a silent way. Here is what the log output looked like: Downloading: http://repo.maven.apache.org/maven2/org/spark-project/akka/akka-remote_2.10/2.2.3-shaded-protobuf/akka-remote_2.10-2.2.3-shaded-protobuf.pom [WARNING] The POM for org.spark-project.akka:akka-remote_2.10:jar:2.2.3-shaded-protobuf is missing, no dependency information available This was pretty brutal to debug since there was no error message anywhere and the path *looks* correct as reported by the Maven log. --- pom.xml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'pom.xml') diff --git a/pom.xml b/pom.xml index 39c8a8cc5e..30ef928618 100644 --- a/pom.xml +++ b/pom.xml @@ -120,7 +120,7 @@ maven-repo Maven Repository - http://repo.maven.apache.org/maven2/ + http://repo.maven.apache.org/maven2 true @@ -131,7 +131,7 @@ jboss-repo JBoss Repository - http://repository.jboss.org/nexus/content/repositories/releases/ + http://repository.jboss.org/nexus/content/repositories/releases true @@ -142,7 +142,7 @@ mqtt-repo MQTT Repository - https://repo.eclipse.org/content/repositories/paho-releases/ + https://repo.eclipse.org/content/repositories/paho-releases true @@ -739,7 +739,7 @@ maven-root Maven root repository - http://repo1.maven.org/maven2/ + http://repo1.maven.org/maven2 true @@ -771,7 +771,7 @@ maven-root Maven root repository - http://repo1.maven.org/maven2/ + http://repo1.maven.org/maven2 true -- cgit v1.2.3 From c1fec89895f03dbdbb6f445ea3cdcd2d050555c4 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Mon, 16 Dec 2013 21:56:21 -0800 Subject: Cleanup --- core/pom.xml | 1 - pom.xml | 6 ------ 2 files changed, 7 deletions(-) (limited to 'pom.xml') diff --git a/core/pom.xml b/core/pom.xml index b83a2a8779..043f6cf68d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -97,7 +97,6 @@ ${akka.group} akka-slf4j_${scala.binary.version} - org.spark-project.akka org.scala-lang diff --git a/pom.xml b/pom.xml index 30ef928618..21d9da84b0 100644 --- a/pom.xml +++ b/pom.xml @@ -220,7 +220,6 @@ 0.3.1 -<<<<<<< HEAD ${akka.group} akka-actor_${scala.binary.version} ${akka.version} @@ -254,13 +253,8 @@ -<<<<<<< HEAD ${akka.group} akka-zeromq_${scala.binary.version} -======= - org.spark-project.akka - akka-zeromq_2.10 ->>>>>>> Attempt with extra repositories ${akka.version} -- cgit v1.2.3 From c1c0f8099f4559ff3ff9982878da6cd36d198ba0 Mon Sep 17 00:00:00 2001 From: Patrick Wendell Date: Mon, 16 Dec 2013 22:01:27 -0800 Subject: Clean-up --- .../src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala | 2 +- pom.xml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'pom.xml') diff --git a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala b/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala index ac519fc99c..dca389d64e 100644 --- a/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala +++ b/new-yarn/src/main/scala/org/apache/spark/deploy/yarn/WorkerLauncher.scala @@ -58,7 +58,7 @@ class WorkerLauncher(args: ApplicationMasterArguments, conf: Configuration) exte override def preStart() { logInfo("Listen to driver: " + driverUrl) - driver = context.actorFor(driverUrl) + driver = context.actorSelection(driverUrl) context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent]) } diff --git a/pom.xml b/pom.xml index 21d9da84b0..57e843596f 100644 --- a/pom.xml +++ b/pom.xml @@ -104,6 +104,7 @@ 2.10.3 2.10 0.13.0 + org.spark-project.akka 2.2.3-shaded-protobuf 1.7.2 1.2.17 -- cgit v1.2.3