From f0164e5047f3a023768cdf0515a7a7d1c75ef7c3 Mon Sep 17 00:00:00 2001 From: kalpit Date: Wed, 6 Mar 2013 17:03:15 -0800 Subject: upgraded sbt version, sbt plugins and some library dependencies to latest stable version --- project/SparkBuild.scala | 8 ++++---- project/build.properties | 2 +- project/plugins.sbt | 8 ++++---- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 5f378b2398..ea146b7b0b 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -92,10 +92,10 @@ object SparkBuild extends Build { */ libraryDependencies ++= Seq( - "org.eclipse.jetty" % "jetty-server" % "7.5.3.v20111011", - "org.scalatest" %% "scalatest" % "1.8" % "test", - "org.scalacheck" %% "scalacheck" % "1.9" % "test", - "com.novocode" % "junit-interface" % "0.8" % "test", + "org.eclipse.jetty" % "jetty-server" % "7.6.8.v20121106", + "org.scalatest" %% "scalatest" % "1.9.1" % "test", + "org.scalacheck" %% "scalacheck" % "1.10.0" % "test", + "com.novocode" % "junit-interface" % "0.9" % "test", "org.easymock" % "easymock" % "3.1" % "test" ), parallelExecution := false, diff --git a/project/build.properties b/project/build.properties index d4287112c6..66ad72ce2e 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=0.11.3 +sbt.version=0.12.2 diff --git a/project/plugins.sbt b/project/plugins.sbt index 4d0e696a11..d4f2442872 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -4,13 +4,13 @@ resolvers += "Typesafe Repository" at "http://repo.typesafe.com/typesafe/release resolvers += "Spray Repository" at "http://repo.spray.cc/" -addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.8.3") +addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.8.5") -addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.1.0-RC1") +addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.1.1") -addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.0.0") +addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.2.0") -addSbtPlugin("cc.spray" %% "sbt-twirl" % "0.5.2") +addSbtPlugin("io.spray" %% "sbt-twirl" % "0.6.1") // For Sonatype publishing //resolvers += Resolver.url("sbt-plugin-releases", new URL("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases/"))(Resolver.ivyStylePatterns) -- cgit v1.2.3 From bc8ba222ff29d46cd2e3331753c9c4ce681eccb4 Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Thu, 28 Mar 2013 15:42:01 -0700 Subject: Bump development version to 0.8.0 --- bagel/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- pom.xml | 2 +- project/SparkBuild.scala | 2 +- repl-bin/pom.xml | 2 +- repl/pom.xml | 2 +- repl/src/main/scala/spark/repl/SparkILoop.scala | 2 +- streaming/pom.xml | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) (limited to 'project') diff --git a/bagel/pom.xml b/bagel/pom.xml index 510cff4669..be2e358091 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -4,7 +4,7 @@ org.spark-project spark-parent - 0.7.1-SNAPSHOT + 0.8.0-SNAPSHOT ../pom.xml diff --git a/core/pom.xml b/core/pom.xml index fe9c803728..08717860a7 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -4,7 +4,7 @@ org.spark-project spark-parent - 0.7.1-SNAPSHOT + 0.8.0-SNAPSHOT ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index f99d5bb376..a6aa38a46d 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -3,8 +3,8 @@ markdown: kramdown # These allow the documentation to be updated with nerw releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 0.7.1-SNAPSHOT -SPARK_VERSION_SHORT: 0.7.1 +SPARK_VERSION: 0.8.0-SNAPSHOT +SPARK_VERSION_SHORT: 0.8.0 SCALA_VERSION: 2.9.2 MESOS_VERSION: 0.9.0-incubating SPARK_ISSUE_TRACKER_URL: https://spark-project.atlassian.net diff --git a/examples/pom.xml b/examples/pom.xml index 39cc47c709..d014089fe4 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -4,7 +4,7 @@ org.spark-project spark-parent - 0.7.1-SNAPSHOT + 0.8.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 08d1fc12e0..1174b475d3 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 org.spark-project spark-parent - 0.7.1-SNAPSHOT + 0.8.0-SNAPSHOT pom Spark Project Parent POM http://spark-project.org/ diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 5f378b2398..250211fb0c 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -35,7 +35,7 @@ object SparkBuild extends Build { def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.spark-project", - version := "0.7.1-SNAPSHOT", + version := "0.8.0-SNAPSHOT", scalaVersion := "2.9.2", scalacOptions := Seq("-unchecked", "-optimize", "-deprecation"), unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath }, diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index dd720e2291..fe526a7616 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -4,7 +4,7 @@ org.spark-project spark-parent - 0.7.1-SNAPSHOT + 0.8.0-SNAPSHOT ../pom.xml diff --git a/repl/pom.xml b/repl/pom.xml index a3e4606edc..0b5e400c3d 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -4,7 +4,7 @@ org.spark-project spark-parent - 0.7.1-SNAPSHOT + 0.8.0-SNAPSHOT ../pom.xml diff --git a/repl/src/main/scala/spark/repl/SparkILoop.scala b/repl/src/main/scala/spark/repl/SparkILoop.scala index cd7b5128b2..39b213851f 100644 --- a/repl/src/main/scala/spark/repl/SparkILoop.scala +++ b/repl/src/main/scala/spark/repl/SparkILoop.scala @@ -200,7 +200,7 @@ class SparkILoop(in0: Option[BufferedReader], val out: PrintWriter, val master: ____ __ / __/__ ___ _____/ /__ _\ \/ _ \/ _ `/ __/ '_/ - /___/ .__/\_,_/_/ /_/\_\ version 0.7.1 + /___/ .__/\_,_/_/ /_/\_\ version 0.8.0-SNAPSHOT /_/ """) import Properties._ diff --git a/streaming/pom.xml b/streaming/pom.xml index ec077e8089..b0d0cd0ff3 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -4,7 +4,7 @@ org.spark-project spark-parent - 0.7.1-SNAPSHOT + 0.8.0-SNAPSHOT ../pom.xml -- cgit v1.2.3 From 0f54344fd858acb327174cc976a07038a3aadf24 Mon Sep 17 00:00:00 2001 From: Nick Pentreath Date: Wed, 3 Apr 2013 13:15:34 +0200 Subject: Bumping Algebird version in examples now that it supports JDK 1.6 --- examples/pom.xml | 2 +- project/SparkBuild.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'project') diff --git a/examples/pom.xml b/examples/pom.xml index 39cc47c709..0537404040 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -22,7 +22,7 @@ com.twitter algebird-core_2.9.2 - 0.1.8 + 0.1.11 org.scalatest diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index d44bf3b5e3..082e2b985d 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -157,7 +157,7 @@ object SparkBuild extends Build { def examplesSettings = sharedSettings ++ Seq( name := "spark-examples", - libraryDependencies ++= Seq("com.twitter" % "algebird-core_2.9.2" % "0.1.8") + libraryDependencies ++= Seq("com.twitter" % "algebird-core_2.9.2" % "0.1.11") ) def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel") -- cgit v1.2.3 From 5555811bd54ddb84bce11d4ab04b1f818c221a14 Mon Sep 17 00:00:00 2001 From: Andy Konwinski Date: Thu, 4 Apr 2013 13:26:45 -0700 Subject: Update build to Scala 2.9.3 --- project/SparkBuild.scala | 8 ++++---- project/plugins.sbt | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 5f378b2398..b54fa1ebf8 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -36,7 +36,7 @@ object SparkBuild extends Build { def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.spark-project", version := "0.7.1-SNAPSHOT", - scalaVersion := "2.9.2", + scalaVersion := "2.9.3", scalacOptions := Seq("-unchecked", "-optimize", "-deprecation"), unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath }, retrieveManaged := true, @@ -93,8 +93,8 @@ object SparkBuild extends Build { libraryDependencies ++= Seq( "org.eclipse.jetty" % "jetty-server" % "7.5.3.v20111011", - "org.scalatest" %% "scalatest" % "1.8" % "test", - "org.scalacheck" %% "scalacheck" % "1.9" % "test", + "org.scalatest" %% "scalatest" % "1.9.1" % "test", + "org.scalacheck" %% "scalacheck" % "1.10.0" % "test", "com.novocode" % "junit-interface" % "0.8" % "test", "org.easymock" % "easymock" % "3.1" % "test" ), @@ -140,7 +140,7 @@ object SparkBuild extends Build { "colt" % "colt" % "1.2.0", "cc.spray" % "spray-can" % "1.0-M2.1", "cc.spray" % "spray-server" % "1.0-M2.1", - "cc.spray" %% "spray-json" % "1.1.1", + "cc.spray" % "spray-json_2.9.2" % "1.1.1", "org.apache.mesos" % "mesos" % "0.9.0-incubating" ) ++ (if (HADOOP_MAJOR_VERSION == "2") Some("org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION) else None).toSeq, unmanagedSourceDirectories in Compile <+= baseDirectory{ _ / ("src/hadoop" + HADOOP_MAJOR_VERSION + "/scala") } diff --git a/project/plugins.sbt b/project/plugins.sbt index 4d0e696a11..9cea0b5ee4 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -10,7 +10,7 @@ addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "2.1.0-RC1") addSbtPlugin("com.github.mpeltonen" % "sbt-idea" % "1.0.0") -addSbtPlugin("cc.spray" %% "sbt-twirl" % "0.5.2") +addSbtPlugin("io.spray" %% "sbt-twirl" % "0.6.1") // For Sonatype publishing //resolvers += Resolver.url("sbt-plugin-releases", new URL("http://scalasbt.artifactoryonline.com/scalasbt/sbt-plugin-releases/"))(Resolver.ivyStylePatterns) -- cgit v1.2.3 From 6798a09df84fb97e196c84d55cf3e21ad676871f Mon Sep 17 00:00:00 2001 From: Mridul Muralidharan Date: Sun, 7 Apr 2013 17:47:38 +0530 Subject: Add support for building against hadoop2-yarn : adding new maven profile for it --- bagel/pom.xml | 37 +++++++++++ core/pom.xml | 62 +++++++++++++++++++ .../apache/hadoop/mapred/HadoopMapRedUtil.scala | 3 + .../hadoop/mapreduce/HadoopMapReduceUtil.scala | 3 + .../apache/hadoop/mapred/HadoopMapRedUtil.scala | 13 ++++ .../hadoop/mapreduce/HadoopMapReduceUtil.scala | 13 ++++ .../apache/hadoop/mapred/HadoopMapRedUtil.scala | 3 + .../hadoop/mapreduce/HadoopMapReduceUtil.scala | 3 + core/src/main/scala/spark/PairRDDFunctions.scala | 5 +- core/src/main/scala/spark/rdd/NewHadoopRDD.scala | 2 +- examples/pom.xml | 43 +++++++++++++ pom.xml | 54 ++++++++++++++++ project/SparkBuild.scala | 34 +++++++++-- repl-bin/pom.xml | 50 +++++++++++++++ repl/pom.xml | 71 ++++++++++++++++++++++ streaming/pom.xml | 37 +++++++++++ 16 files changed, 424 insertions(+), 9 deletions(-) create mode 100644 core/src/hadoop2-yarn/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala create mode 100644 core/src/hadoop2-yarn/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala (limited to 'project') diff --git a/bagel/pom.xml b/bagel/pom.xml index 510cff4669..89282161ea 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -102,5 +102,42 @@ + + hadoop2-yarn + + + org.spark-project + spark-core + ${project.version} + hadoop2-yarn + + + org.apache.hadoop + hadoop-client + provided + + + org.apache.hadoop + hadoop-yarn-api + provided + + + org.apache.hadoop + hadoop-yarn-common + provided + + + + + + org.apache.maven.plugins + maven-jar-plugin + + hadoop2-yarn + + + + + diff --git a/core/pom.xml b/core/pom.xml index fe9c803728..9baa447662 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -279,5 +279,67 @@ + + hadoop2-yarn + + + org.apache.hadoop + hadoop-client + provided + + + org.apache.hadoop + hadoop-yarn-api + provided + + + org.apache.hadoop + hadoop-yarn-common + provided + + + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-source + generate-sources + + add-source + + + + src/main/scala + src/hadoop2-yarn/scala + + + + + add-scala-test-sources + generate-test-sources + + add-test-source + + + + src/test/scala + + + + + + + org.apache.maven.plugins + maven-jar-plugin + + hadoop2-yarn + + + + + diff --git a/core/src/hadoop1/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/core/src/hadoop1/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala index ca9f7219de..f286f2cf9c 100644 --- a/core/src/hadoop1/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala +++ b/core/src/hadoop1/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala @@ -4,4 +4,7 @@ trait HadoopMapRedUtil { def newJobContext(conf: JobConf, jobId: JobID): JobContext = new JobContext(conf, jobId) def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContext(conf, attemptId) + + def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier, + jobId, isMap, taskId, attemptId) } diff --git a/core/src/hadoop1/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/hadoop1/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala index de7b0f81e3..264d421d14 100644 --- a/core/src/hadoop1/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala +++ b/core/src/hadoop1/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala @@ -6,4 +6,7 @@ trait HadoopMapReduceUtil { def newJobContext(conf: Configuration, jobId: JobID): JobContext = new JobContext(conf, jobId) def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContext(conf, attemptId) + + def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier, + jobId, isMap, taskId, attemptId) } diff --git a/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala new file mode 100644 index 0000000000..875c0a220b --- /dev/null +++ b/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala @@ -0,0 +1,13 @@ + +package org.apache.hadoop.mapred + +import org.apache.hadoop.mapreduce.TaskType + +trait HadoopMapRedUtil { + def newJobContext(conf: JobConf, jobId: JobID): JobContext = new JobContextImpl(conf, jobId) + + def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId) + + def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = + new TaskAttemptID(jtIdentifier, jobId, if (isMap) TaskType.MAP else TaskType.REDUCE, taskId, attemptId) +} diff --git a/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala new file mode 100644 index 0000000000..8bc6fb6dea --- /dev/null +++ b/core/src/hadoop2-yarn/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala @@ -0,0 +1,13 @@ +package org.apache.hadoop.mapreduce + +import org.apache.hadoop.conf.Configuration +import task.{TaskAttemptContextImpl, JobContextImpl} + +trait HadoopMapReduceUtil { + def newJobContext(conf: Configuration, jobId: JobID): JobContext = new JobContextImpl(conf, jobId) + + def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId) + + def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = + new TaskAttemptID(jtIdentifier, jobId, if (isMap) TaskType.MAP else TaskType.REDUCE, taskId, attemptId) +} diff --git a/core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala b/core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala index 35300cea58..a0652d7fc7 100644 --- a/core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala +++ b/core/src/hadoop2/scala/org/apache/hadoop/mapred/HadoopMapRedUtil.scala @@ -4,4 +4,7 @@ trait HadoopMapRedUtil { def newJobContext(conf: JobConf, jobId: JobID): JobContext = new JobContextImpl(conf, jobId) def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId) + + def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier, + jobId, isMap, taskId, attemptId) } diff --git a/core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala b/core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala index 7afdbff320..7fdbe322fd 100644 --- a/core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala +++ b/core/src/hadoop2/scala/org/apache/hadoop/mapreduce/HadoopMapReduceUtil.scala @@ -7,4 +7,7 @@ trait HadoopMapReduceUtil { def newJobContext(conf: Configuration, jobId: JobID): JobContext = new JobContextImpl(conf, jobId) def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = new TaskAttemptContextImpl(conf, attemptId) + + def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = new TaskAttemptID(jtIdentifier, + jobId, isMap, taskId, attemptId) } diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/spark/PairRDDFunctions.scala index 07efba9e8d..39469fa3c8 100644 --- a/core/src/main/scala/spark/PairRDDFunctions.scala +++ b/core/src/main/scala/spark/PairRDDFunctions.scala @@ -545,8 +545,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( // around by taking a mod. We expect that no task will be attempted 2 billion times. val attemptNumber = (context.attemptId % Int.MaxValue).toInt /* "reduce task" */ - val attemptId = new TaskAttemptID(jobtrackerID, - stageId, false, context.splitId, attemptNumber) + val attemptId = newTaskAttemptID(jobtrackerID, stageId, false, context.splitId, attemptNumber) val hadoopContext = newTaskAttemptContext(wrappedConf.value, attemptId) val format = outputFormatClass.newInstance val committer = format.getOutputCommitter(hadoopContext) @@ -565,7 +564,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest]( * however we're only going to use this local OutputCommitter for * setupJob/commitJob, so we just use a dummy "map" task. */ - val jobAttemptId = new TaskAttemptID(jobtrackerID, stageId, true, 0, 0) + val jobAttemptId = newTaskAttemptID(jobtrackerID, stageId, true, 0, 0) val jobTaskContext = newTaskAttemptContext(wrappedConf.value, jobAttemptId) val jobCommitter = jobFormat.getOutputCommitter(jobTaskContext) jobCommitter.setupJob(jobTaskContext) diff --git a/core/src/main/scala/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/spark/rdd/NewHadoopRDD.scala index bdd974590a..901d01ef30 100644 --- a/core/src/main/scala/spark/rdd/NewHadoopRDD.scala +++ b/core/src/main/scala/spark/rdd/NewHadoopRDD.scala @@ -57,7 +57,7 @@ class NewHadoopRDD[K, V]( override def compute(theSplit: Partition, context: TaskContext) = new Iterator[(K, V)] { val split = theSplit.asInstanceOf[NewHadoopPartition] val conf = confBroadcast.value.value - val attemptId = new TaskAttemptID(jobtrackerId, id, true, split.index, 0) + val attemptId = newTaskAttemptID(jobtrackerId, id, true, split.index, 0) val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId) val format = inputFormatClass.newInstance if (format.isInstanceOf[Configurable]) { diff --git a/examples/pom.xml b/examples/pom.xml index 39cc47c709..9594257ad4 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -118,5 +118,48 @@ + + hadoop2-yarn + + + org.spark-project + spark-core + ${project.version} + hadoop2-yarn + + + org.spark-project + spark-streaming + ${project.version} + hadoop2-yarn + + + org.apache.hadoop + hadoop-client + provided + + + org.apache.hadoop + hadoop-yarn-api + provided + + + org.apache.hadoop + hadoop-yarn-common + provided + + + + + + org.apache.maven.plugins + maven-jar-plugin + + hadoop2-yarn + + + + + diff --git a/pom.xml b/pom.xml index 08d1fc12e0..b3134a957d 100644 --- a/pom.xml +++ b/pom.xml @@ -558,5 +558,59 @@ + + + hadoop2-yarn + + 2 + 2.0.3-alpha + + + + + maven-root + Maven root repository + http://repo1.maven.org/maven2/ + + true + + + false + + + + + + + + + org.apache.hadoop + hadoop-client + ${yarn.version} + + + org.apache.hadoop + hadoop-yarn-api + ${yarn.version} + + + org.apache.hadoop + hadoop-yarn-common + ${yarn.version} + + + + org.apache.avro + avro + 1.7.1.cloudera.2 + + + org.apache.avro + avro-ipc + 1.7.1.cloudera.2 + + + + diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 5f378b2398..f041930b4e 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -1,3 +1,4 @@ + import sbt._ import sbt.Classpaths.publishTask import Keys._ @@ -10,12 +11,18 @@ import twirl.sbt.TwirlPlugin._ object SparkBuild extends Build { // Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or // "1.0.4" for Apache releases, or "0.20.2-cdh3u5" for Cloudera Hadoop. - val HADOOP_VERSION = "1.0.4" - val HADOOP_MAJOR_VERSION = "1" + //val HADOOP_VERSION = "1.0.4" + //val HADOOP_MAJOR_VERSION = "1" // For Hadoop 2 versions such as "2.0.0-mr1-cdh4.1.1", set the HADOOP_MAJOR_VERSION to "2" //val HADOOP_VERSION = "2.0.0-mr1-cdh4.1.1" //val HADOOP_MAJOR_VERSION = "2" + //val HADOOP_YARN = false + + // For Hadoop 2 YARN support + val HADOOP_VERSION = "2.0.3-alpha" + val HADOOP_MAJOR_VERSION = "2" + val HADOOP_YARN = true lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming) @@ -129,7 +136,6 @@ object SparkBuild extends Build { "org.slf4j" % "slf4j-api" % slf4jVersion, "org.slf4j" % "slf4j-log4j12" % slf4jVersion, "com.ning" % "compress-lzf" % "0.8.4", - "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION, "asm" % "asm-all" % "3.3.1", "com.google.protobuf" % "protobuf-java" % "2.4.1", "de.javakaffee" % "kryo-serializers" % "0.22", @@ -142,8 +148,26 @@ object SparkBuild extends Build { "cc.spray" % "spray-server" % "1.0-M2.1", "cc.spray" %% "spray-json" % "1.1.1", "org.apache.mesos" % "mesos" % "0.9.0-incubating" - ) ++ (if (HADOOP_MAJOR_VERSION == "2") Some("org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION) else None).toSeq, - unmanagedSourceDirectories in Compile <+= baseDirectory{ _ / ("src/hadoop" + HADOOP_MAJOR_VERSION + "/scala") } + ) ++ ( + if (HADOOP_MAJOR_VERSION == "2") { + if (HADOOP_YARN) { + Seq( + "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION, + "org.apache.hadoop" % "hadoop-yarn-api" % HADOOP_VERSION, + "org.apache.hadoop" % "hadoop-yarn-common" % HADOOP_VERSION + ) + } else { + Seq( + "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION, + "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION + ) + } + } else { + Seq("org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION) + }), + unmanagedSourceDirectories in Compile <+= baseDirectory{ _ / + ( if (HADOOP_YARN && HADOOP_MAJOR_VERSION == "2") "src/hadoop2-yarn/scala" else "src/hadoop" + HADOOP_MAJOR_VERSION + "/scala" ) + } ) ++ assemblySettings ++ extraAssemblySettings ++ Twirl.settings def rootSettings = sharedSettings ++ Seq( diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index dd720e2291..f9d84fd3c4 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -153,6 +153,56 @@ + + hadoop2-yarn + + hadoop2-yarn + + + + org.spark-project + spark-core + ${project.version} + hadoop2-yarn + + + org.spark-project + spark-bagel + ${project.version} + hadoop2-yarn + runtime + + + org.spark-project + spark-examples + ${project.version} + hadoop2-yarn + runtime + + + org.spark-project + spark-repl + ${project.version} + hadoop2-yarn + runtime + + + org.apache.hadoop + hadoop-client + provided + + + org.apache.hadoop + hadoop-yarn-api + provided + + + org.apache.hadoop + hadoop-yarn-common + provided + + + deb diff --git a/repl/pom.xml b/repl/pom.xml index a3e4606edc..1f885673f4 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -187,5 +187,76 @@ + + hadoop2-yarn + + hadoop2-yarn + + + + org.spark-project + spark-core + ${project.version} + hadoop2-yarn + + + org.spark-project + spark-bagel + ${project.version} + hadoop2-yarn + runtime + + + org.spark-project + spark-examples + ${project.version} + hadoop2-yarn + runtime + + + org.spark-project + spark-streaming + ${project.version} + hadoop2-yarn + runtime + + + org.apache.hadoop + hadoop-client + provided + + + org.apache.hadoop + hadoop-yarn-api + provided + + + org.apache.hadoop + hadoop-yarn-common + provided + + + org.apache.avro + avro + provided + + + org.apache.avro + avro-ipc + provided + + + + + + org.apache.maven.plugins + maven-jar-plugin + + hadoop2-yarn + + + + + diff --git a/streaming/pom.xml b/streaming/pom.xml index ec077e8089..fc2e211a42 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -149,5 +149,42 @@ + + hadoop2-yarn + + + org.spark-project + spark-core + ${project.version} + hadoop2-yarn + + + org.apache.hadoop + hadoop-client + provided + + + org.apache.hadoop + hadoop-yarn-api + provided + + + org.apache.hadoop + hadoop-yarn-common + provided + + + + + + org.apache.maven.plugins + maven-jar-plugin + + hadoop2-yarn + + + + + -- cgit v1.2.3 From 18bd41d1a38bdb9d68a9e6f360a15f9d7b25ebf5 Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Tue, 9 Apr 2013 14:35:29 -0400 Subject: Don't pull in old versions of Jackson via hadoop-core --- project/SparkBuild.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f03efd6c83..029ade2e2e 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -129,7 +129,7 @@ object SparkBuild extends Build { "org.slf4j" % "slf4j-api" % slf4jVersion, "org.slf4j" % "slf4j-log4j12" % slf4jVersion, "com.ning" % "compress-lzf" % "0.8.4", - "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION, + "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ), "asm" % "asm-all" % "3.3.1", "com.google.protobuf" % "protobuf-java" % "2.4.1", "de.javakaffee" % "kryo-serializers" % "0.22", -- cgit v1.2.3 From ed336e0d44d27e9be66adb0962f82af7d1ac4d87 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Thu, 11 Apr 2013 22:29:37 -0400 Subject: Fix tests from different projects running in parallel in SBT 0.12 --- project/SparkBuild.scala | 6 ++++-- project/build.properties | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f03efd6c83..0e55c51204 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -44,7 +44,10 @@ object SparkBuild extends Build { transitiveClassifiers in Scope.GlobalScope := Seq("sources"), testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))), - // shared between both core and streaming. + // Only allow one test at a time, even across projects, since they run in the same JVM + concurrentRestrictions in Global += Tags.limit(Tags.Test, 1), + + // Shared between both core and streaming. resolvers ++= Seq("Akka Repository" at "http://repo.akka.io/releases/"), // For Sonatype publishing @@ -98,7 +101,6 @@ object SparkBuild extends Build { "com.novocode" % "junit-interface" % "0.9" % "test", "org.easymock" % "easymock" % "3.1" % "test" ), - parallelExecution := false, /* Workaround for issue #206 (fixed after SBT 0.11.0) */ watchTransitiveSources <<= Defaults.inDependencies[Task[Seq[File]]](watchSources.task, const(std.TaskExtra.constant(Nil)), aggregate = true, includeRoot = true) apply { _.join.map(_.flatten) }, diff --git a/project/build.properties b/project/build.properties index 66ad72ce2e..9b860e23c5 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=0.12.2 +sbt.version=0.12.3 -- cgit v1.2.3 From 5d891534fd5ca268f6ba7c9a47680846eb3a15ae Mon Sep 17 00:00:00 2001 From: Mridul Muralidharan Date: Wed, 17 Apr 2013 05:54:43 +0530 Subject: Move back to 2.0.2-alpha, since 2.0.3-alpha is not available in cloudera yet. Also, add netty dependency explicitly to prevent resolving to older 2.3x version. Additionally, comment out retrievePattern to ensure correct netty is picked up --- project/SparkBuild.scala | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f041930b4e..91e3123bc5 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -20,7 +20,7 @@ object SparkBuild extends Build { //val HADOOP_YARN = false // For Hadoop 2 YARN support - val HADOOP_VERSION = "2.0.3-alpha" + val HADOOP_VERSION = "2.0.2-alpha" val HADOOP_MAJOR_VERSION = "2" val HADOOP_YARN = true @@ -47,9 +47,10 @@ object SparkBuild extends Build { scalacOptions := Seq("-unchecked", "-optimize", "-deprecation"), unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath }, retrieveManaged := true, - retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]", + // retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]", transitiveClassifiers in Scope.GlobalScope := Seq("sources"), - testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))), + // For some reason this fails on some nodes and works on others - not yet debugged why + // testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))), // shared between both core and streaming. resolvers ++= Seq("Akka Repository" at "http://repo.akka.io/releases/"), @@ -99,6 +100,7 @@ object SparkBuild extends Build { */ libraryDependencies ++= Seq( + "io.netty" % "netty" % "3.5.3.Final", "org.eclipse.jetty" % "jetty-server" % "7.5.3.v20111011", "org.scalatest" %% "scalatest" % "1.8" % "test", "org.scalacheck" %% "scalacheck" % "1.9" % "test", @@ -131,11 +133,13 @@ object SparkBuild extends Build { ), libraryDependencies ++= Seq( + "io.netty" % "netty" % "3.5.3.Final", "com.google.guava" % "guava" % "11.0.1", "log4j" % "log4j" % "1.2.16", "org.slf4j" % "slf4j-api" % slf4jVersion, "org.slf4j" % "slf4j-log4j12" % slf4jVersion, "com.ning" % "compress-lzf" % "0.8.4", + "commons-daemon" % "commons-daemon" % "1.0.10", "asm" % "asm-all" % "3.3.1", "com.google.protobuf" % "protobuf-java" % "2.4.1", "de.javakaffee" % "kryo-serializers" % "0.22", -- cgit v1.2.3 From 17e076de800ea0d4c55f2bd657348641f6f9c55b Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Thu, 18 Apr 2013 22:25:21 -0700 Subject: Turn on forking in test JVMs to reduce the pressure on perm gen and code cache sizes due to having 2 instances of the Scala compiler and a bunch of classloaders. --- project/SparkBuild.scala | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 91541a34f9..f0b371b2cf 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -44,6 +44,10 @@ object SparkBuild extends Build { transitiveClassifiers in Scope.GlobalScope := Seq("sources"), testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))), + // Fork new JVMs for tests and set Java options for those + fork := true, + javaOptions += "-Xmx1g", + // Only allow one test at a time, even across projects, since they run in the same JVM concurrentRestrictions in Global += Tags.limit(Tags.Test, 1), -- cgit v1.2.3 From 7acab3ab45df421601ee9a076a61de00561a0308 Mon Sep 17 00:00:00 2001 From: Mridul Muralidharan Date: Mon, 22 Apr 2013 08:01:13 +0530 Subject: Fix review comments, add a new api to SparkHadoopUtil to create appropriate Configuration. Modify an example to show how to use SplitInfo --- core/src/hadoop1/scala/spark/deploy/SparkHadoopUtil.scala | 5 +++++ .../hadoop2-yarn/scala/spark/deploy/SparkHadoopUtil.scala | 6 +++++- core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala | 5 +++++ core/src/main/scala/spark/SparkContext.scala | 14 +++++++++----- core/src/main/scala/spark/Utils.scala | 8 +++----- core/src/main/scala/spark/rdd/CheckpointRDD.scala | 7 ++++--- examples/src/main/scala/spark/examples/SparkHdfsLR.scala | 10 ++++++++-- project/SparkBuild.scala | 10 ++++++---- 8 files changed, 45 insertions(+), 20 deletions(-) (limited to 'project') diff --git a/core/src/hadoop1/scala/spark/deploy/SparkHadoopUtil.scala b/core/src/hadoop1/scala/spark/deploy/SparkHadoopUtil.scala index d4badbc5c4..a0fb4fe25d 100644 --- a/core/src/hadoop1/scala/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/hadoop1/scala/spark/deploy/SparkHadoopUtil.scala @@ -1,4 +1,6 @@ package spark.deploy +import org.apache.hadoop.conf.Configuration + /** * Contains util methods to interact with Hadoop from spark. @@ -15,4 +17,7 @@ object SparkHadoopUtil { // Add support, if exists - for now, simply run func ! func(args) } + + // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems + def newConfiguration(): Configuration = new Configuration() } diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/SparkHadoopUtil.scala b/core/src/hadoop2-yarn/scala/spark/deploy/SparkHadoopUtil.scala index 66e5ad8491..ab1ab9d8a7 100644 --- a/core/src/hadoop2-yarn/scala/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/hadoop2-yarn/scala/spark/deploy/SparkHadoopUtil.scala @@ -12,7 +12,7 @@ import java.security.PrivilegedExceptionAction */ object SparkHadoopUtil { - val yarnConf = new YarnConfiguration(new Configuration()) + val yarnConf = newConfiguration() def getUserNameFromEnvironment(): String = { // defaulting to env if -D is not present ... @@ -56,4 +56,8 @@ object SparkHadoopUtil { def setYarnMode(env: HashMap[String, String]) { env("SPARK_YARN_MODE") = "true" } + + // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems + // Always create a new config, dont reuse yarnConf. + def newConfiguration(): Configuration = new YarnConfiguration(new Configuration()) } diff --git a/core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala b/core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala index d4badbc5c4..a0fb4fe25d 100644 --- a/core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/hadoop2/scala/spark/deploy/SparkHadoopUtil.scala @@ -1,4 +1,6 @@ package spark.deploy +import org.apache.hadoop.conf.Configuration + /** * Contains util methods to interact with Hadoop from spark. @@ -15,4 +17,7 @@ object SparkHadoopUtil { // Add support, if exists - for now, simply run func ! func(args) } + + // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems + def newConfiguration(): Configuration = new Configuration() } diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/spark/SparkContext.scala index e853bce2c4..5f5ec0b0f4 100644 --- a/core/src/main/scala/spark/SparkContext.scala +++ b/core/src/main/scala/spark/SparkContext.scala @@ -31,7 +31,7 @@ import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFor import org.apache.hadoop.mapreduce.{Job => NewHadoopJob} import org.apache.mesos.MesosNativeLibrary -import spark.deploy.LocalSparkCluster +import spark.deploy.{SparkHadoopUtil, LocalSparkCluster} import spark.partial.ApproximateEvaluator import spark.partial.PartialResult import spark.rdd.{CheckpointRDD, HadoopRDD, NewHadoopRDD, UnionRDD, ParallelCollectionRDD} @@ -102,7 +102,9 @@ class SparkContext( // Add each JAR given through the constructor - if (jars != null) jars.foreach { addJar(_) } + if (jars != null) { + jars.foreach { addJar(_) } + } // Environment variables to pass to our executors private[spark] val executorEnvs = HashMap[String, String]() @@ -114,7 +116,9 @@ class SparkContext( executorEnvs(key) = value } } - if (environment != null) executorEnvs ++= environment + if (environment != null) { + executorEnvs ++= environment + } // Create and start the scheduler private var taskScheduler: TaskScheduler = { @@ -207,7 +211,7 @@ class SparkContext( /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */ val hadoopConfiguration = { - val conf = new Configuration() + val conf = SparkHadoopUtil.newConfiguration() // Explicitly check for S3 environment variables if (System.getenv("AWS_ACCESS_KEY_ID") != null && System.getenv("AWS_SECRET_ACCESS_KEY") != null) { conf.set("fs.s3.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID")) @@ -711,7 +715,7 @@ class SparkContext( */ def setCheckpointDir(dir: String, useExisting: Boolean = false) { val path = new Path(dir) - val fs = path.getFileSystem(new Configuration()) + val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration()) if (!useExisting) { if (fs.exists(path)) { throw new Exception("Checkpoint directory '" + path + "' already exists.") diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/spark/Utils.scala index 3e54fa7a7e..9f48cbe490 100644 --- a/core/src/main/scala/spark/Utils.scala +++ b/core/src/main/scala/spark/Utils.scala @@ -4,7 +4,6 @@ import java.io._ import java.net.{InetAddress, URL, URI, NetworkInterface, Inet4Address, ServerSocket} import java.util.{Locale, Random, UUID} import java.util.concurrent.{ConcurrentHashMap, Executors, ThreadFactory, ThreadPoolExecutor} -import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{Path, FileSystem, FileUtil} import scala.collection.mutable.{ArrayBuffer, HashMap} import scala.collection.JavaConversions._ @@ -208,7 +207,7 @@ private object Utils extends Logging { case _ => // Use the Hadoop filesystem library, which supports file://, hdfs://, s3://, and others val uri = new URI(url) - val conf = new Configuration() + val conf = SparkHadoopUtil.newConfiguration() val fs = FileSystem.get(uri, conf) val in = fs.open(new Path(uri)) val out = new FileOutputStream(tempFile) @@ -317,7 +316,6 @@ private object Utils extends Logging { * Get the local machine's hostname. */ def localHostName(): String = { - // customHostname.getOrElse(InetAddress.getLocalHost.getHostName) customHostname.getOrElse(localIpAddressHostname) } @@ -337,6 +335,7 @@ private object Utils extends Logging { retval } + /* // Used by DEBUG code : remove when all testing done private val ipPattern = Pattern.compile("^[0-9]+(\\.[0-9]+)*$") def checkHost(host: String, message: String = "") { @@ -358,12 +357,11 @@ private object Utils extends Logging { Utils.logErrorWithStack("Unexpected to have port " + port + " which is not valid in " + hostPort + ". Message " + message) } } + */ // Once testing is complete in various modes, replace with this ? - /* def checkHost(host: String, message: String = "") {} def checkHostPort(hostPort: String, message: String = "") {} - */ def getUserNameFromEnvironment(): String = { SparkHadoopUtil.getUserNameFromEnvironment diff --git a/core/src/main/scala/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/spark/rdd/CheckpointRDD.scala index 24d527f38f..79d00edee7 100644 --- a/core/src/main/scala/spark/rdd/CheckpointRDD.scala +++ b/core/src/main/scala/spark/rdd/CheckpointRDD.scala @@ -8,6 +8,7 @@ import org.apache.hadoop.util.ReflectionUtils import org.apache.hadoop.fs.Path import java.io.{File, IOException, EOFException} import java.text.NumberFormat +import spark.deploy.SparkHadoopUtil private[spark] class CheckpointRDDPartition(val index: Int) extends Partition {} @@ -65,7 +66,7 @@ private[spark] object CheckpointRDD extends Logging { def writeToFile[T](path: String, blockSize: Int = -1)(ctx: TaskContext, iterator: Iterator[T]) { val outputDir = new Path(path) - val fs = outputDir.getFileSystem(new Configuration()) + val fs = outputDir.getFileSystem(SparkHadoopUtil.newConfiguration()) val finalOutputName = splitIdToFile(ctx.splitId) val finalOutputPath = new Path(outputDir, finalOutputName) @@ -103,7 +104,7 @@ private[spark] object CheckpointRDD extends Logging { } def readFromFile[T](path: Path, context: TaskContext): Iterator[T] = { - val fs = path.getFileSystem(new Configuration()) + val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration()) val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt val fileInputStream = fs.open(path, bufferSize) val serializer = SparkEnv.get.serializer.newInstance() @@ -125,7 +126,7 @@ private[spark] object CheckpointRDD extends Logging { val sc = new SparkContext(cluster, "CheckpointRDD Test") val rdd = sc.makeRDD(1 to 10, 10).flatMap(x => 1 to 10000) val path = new Path(hdfsPath, "temp") - val fs = path.getFileSystem(new Configuration()) + val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration()) sc.runJob(rdd, CheckpointRDD.writeToFile(path.toString, 1024) _) val cpRDD = new CheckpointRDD[Int](sc, path.toString) assert(cpRDD.partitions.length == rdd.partitions.length, "Number of partitions is not the same") diff --git a/examples/src/main/scala/spark/examples/SparkHdfsLR.scala b/examples/src/main/scala/spark/examples/SparkHdfsLR.scala index 0f42f405a0..3d080a0257 100644 --- a/examples/src/main/scala/spark/examples/SparkHdfsLR.scala +++ b/examples/src/main/scala/spark/examples/SparkHdfsLR.scala @@ -4,6 +4,8 @@ import java.util.Random import scala.math.exp import spark.util.Vector import spark._ +import spark.deploy.SparkHadoopUtil +import spark.scheduler.InputFormatInfo /** * Logistic regression based classification. @@ -32,9 +34,13 @@ object SparkHdfsLR { System.err.println("Usage: SparkHdfsLR ") System.exit(1) } + val inputPath = args(1) + val conf = SparkHadoopUtil.newConfiguration() val sc = new SparkContext(args(0), "SparkHdfsLR", - System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR"))) - val lines = sc.textFile(args(1)) + System.getenv("SPARK_HOME"), Seq(System.getenv("SPARK_EXAMPLES_JAR")), Map(), + InputFormatInfo.computePreferredLocations( + Seq(new InputFormatInfo(conf, classOf[org.apache.hadoop.mapred.TextInputFormat], inputPath)))) + val lines = sc.textFile(inputPath) val points = lines.map(parsePoint _).cache() val ITERATIONS = args(2).toInt diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 91e3123bc5..0a5b89d927 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -47,10 +47,8 @@ object SparkBuild extends Build { scalacOptions := Seq("-unchecked", "-optimize", "-deprecation"), unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath }, retrieveManaged := true, - // retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]", transitiveClassifiers in Scope.GlobalScope := Seq("sources"), - // For some reason this fails on some nodes and works on others - not yet debugged why - // testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))), + testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))), // shared between both core and streaming. resolvers ++= Seq("Akka Repository" at "http://repo.akka.io/releases/"), @@ -170,7 +168,11 @@ object SparkBuild extends Build { Seq("org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION) }), unmanagedSourceDirectories in Compile <+= baseDirectory{ _ / - ( if (HADOOP_YARN && HADOOP_MAJOR_VERSION == "2") "src/hadoop2-yarn/scala" else "src/hadoop" + HADOOP_MAJOR_VERSION + "/scala" ) + ( if (HADOOP_YARN && HADOOP_MAJOR_VERSION == "2") { + "src/hadoop2-yarn/scala" + } else { + "src/hadoop" + HADOOP_MAJOR_VERSION + "/scala" + } ) } ) ++ assemblySettings ++ extraAssemblySettings ++ Twirl.settings -- cgit v1.2.3 From 8faf5c51c3ea0b3ad83418552b50db596fefc558 Mon Sep 17 00:00:00 2001 From: Mridul Muralidharan Date: Wed, 24 Apr 2013 02:31:57 +0530 Subject: Patch from Thomas Graves to improve the YARN Client, and move to more production ready hadoop yarn branch --- core/pom.xml | 5 ++ .../scala/spark/deploy/yarn/Client.scala | 72 +++------------------- pom.xml | 9 ++- project/SparkBuild.scala | 5 +- repl-bin/pom.xml | 5 ++ 5 files changed, 30 insertions(+), 66 deletions(-) (limited to 'project') diff --git a/core/pom.xml b/core/pom.xml index 9baa447662..7f65ce5c00 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -297,6 +297,11 @@ hadoop-yarn-common provided + + org.apache.hadoop + hadoop-yarn-client + provided + diff --git a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/Client.scala b/core/src/hadoop2-yarn/scala/spark/deploy/yarn/Client.scala index c007dae98c..7a881e26df 100644 --- a/core/src/hadoop2-yarn/scala/spark/deploy/yarn/Client.scala +++ b/core/src/hadoop2-yarn/scala/spark/deploy/yarn/Client.scala @@ -7,6 +7,7 @@ import org.apache.hadoop.net.NetUtils import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.api.protocolrecords._ +import org.apache.hadoop.yarn.client.YarnClientImpl import org.apache.hadoop.yarn.conf.YarnConfiguration import org.apache.hadoop.yarn.ipc.YarnRPC import scala.collection.mutable.HashMap @@ -16,19 +17,19 @@ import org.apache.hadoop.yarn.util.{Apps, Records, ConverterUtils} import org.apache.hadoop.yarn.api.ApplicationConstants.Environment import spark.deploy.SparkHadoopUtil -class Client(conf: Configuration, args: ClientArguments) extends Logging { +class Client(conf: Configuration, args: ClientArguments) extends YarnClientImpl with Logging { def this(args: ClientArguments) = this(new Configuration(), args) - var applicationsManager: ClientRMProtocol = null var rpc: YarnRPC = YarnRPC.create(conf) val yarnConf: YarnConfiguration = new YarnConfiguration(conf) def run() { - connectToASM() + init(yarnConf) + start() logClusterResourceDetails() - val newApp = getNewApplication() + val newApp = super.getNewApplication() val appId = newApp.getApplicationId() verifyClusterResources(newApp) @@ -47,64 +48,17 @@ class Client(conf: Configuration, args: ClientArguments) extends Logging { System.exit(0) } - - def connectToASM() { - val rmAddress: InetSocketAddress = NetUtils.createSocketAddr( - yarnConf.get(YarnConfiguration.RM_ADDRESS, YarnConfiguration.DEFAULT_RM_ADDRESS) - ) - logInfo("Connecting to ResourceManager at" + rmAddress) - applicationsManager = rpc.getProxy(classOf[ClientRMProtocol], rmAddress, conf) - .asInstanceOf[ClientRMProtocol] - } def logClusterResourceDetails() { - val clusterMetrics: YarnClusterMetrics = getYarnClusterMetrics + val clusterMetrics: YarnClusterMetrics = super.getYarnClusterMetrics logInfo("Got Cluster metric info from ASM, numNodeManagers=" + clusterMetrics.getNumNodeManagers) -/* - val clusterNodeReports: List[NodeReport] = getNodeReports - logDebug("Got Cluster node info from ASM") - for (node <- clusterNodeReports) { - logDebug("Got node report from ASM for, nodeId=" + node.getNodeId + ", nodeAddress=" + node.getHttpAddress + - ", nodeRackName=" + node.getRackName + ", nodeNumContainers=" + node.getNumContainers + ", nodeHealthStatus=" + node.getNodeHealthStatus) - } -*/ - - val queueInfo: QueueInfo = getQueueInfo(args.amQueue) + val queueInfo: QueueInfo = super.getQueueInfo(args.amQueue) logInfo("Queue info .. queueName=" + queueInfo.getQueueName + ", queueCurrentCapacity=" + queueInfo.getCurrentCapacity + ", queueMaxCapacity=" + queueInfo.getMaximumCapacity + ", queueApplicationCount=" + queueInfo.getApplications.size + ", queueChildQueueCount=" + queueInfo.getChildQueues.size) } - def getYarnClusterMetrics: YarnClusterMetrics = { - val request: GetClusterMetricsRequest = Records.newRecord(classOf[GetClusterMetricsRequest]) - val response: GetClusterMetricsResponse = applicationsManager.getClusterMetrics(request) - return response.getClusterMetrics - } - - def getNodeReports: List[NodeReport] = { - val request: GetClusterNodesRequest = Records.newRecord(classOf[GetClusterNodesRequest]) - val response: GetClusterNodesResponse = applicationsManager.getClusterNodes(request) - return response.getNodeReports.toList - } - - def getQueueInfo(queueName: String): QueueInfo = { - val request: GetQueueInfoRequest = Records.newRecord(classOf[GetQueueInfoRequest]) - request.setQueueName(queueName) - request.setIncludeApplications(true) - request.setIncludeChildQueues(false) - request.setRecursive(false) - Records.newRecord(classOf[GetQueueInfoRequest]) - return applicationsManager.getQueueInfo(request).getQueueInfo - } - - def getNewApplication(): GetNewApplicationResponse = { - logInfo("Requesting new Application") - val request = Records.newRecord(classOf[GetNewApplicationRequest]) - val response = applicationsManager.getNewApplication(request) - logInfo("Got new ApplicationId: " + response.getApplicationId()) - return response - } def verifyClusterResources(app: GetNewApplicationResponse) = { val maxMem = app.getMaximumResourceCapability().getMemory() @@ -265,23 +219,15 @@ class Client(conf: Configuration, args: ClientArguments) extends Logging { } def submitApp(appContext: ApplicationSubmissionContext) = { - // Create the request to send to the applications manager - val appRequest = Records.newRecord(classOf[SubmitApplicationRequest]) - .asInstanceOf[SubmitApplicationRequest] - appRequest.setApplicationSubmissionContext(appContext) // Submit the application to the applications manager logInfo("Submitting application to ASM") - applicationsManager.submitApplication(appRequest) + super.submitApplication(appContext) } def monitorApplication(appId: ApplicationId): Boolean = { while(true) { Thread.sleep(1000) - val reportRequest = Records.newRecord(classOf[GetApplicationReportRequest]) - .asInstanceOf[GetApplicationReportRequest] - reportRequest.setApplicationId(appId) - val reportResponse = applicationsManager.getApplicationReport(reportRequest) - val report = reportResponse.getApplicationReport() + val report = super.getApplicationReport(appId) logInfo("Application report from ASM: \n" + "\t application identifier: " + appId.toString() + "\n" + diff --git a/pom.xml b/pom.xml index ecbfaf9b47..0e95520d50 100644 --- a/pom.xml +++ b/pom.xml @@ -564,7 +564,9 @@ hadoop2-yarn 2 - 2.0.2-alpha + + 0.23.7 + @@ -599,6 +601,11 @@ hadoop-yarn-common ${yarn.version} + + org.apache.hadoop + hadoop-yarn-client + ${yarn.version} + org.apache.avro diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 0a5b89d927..819e940403 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -20,7 +20,7 @@ object SparkBuild extends Build { //val HADOOP_YARN = false // For Hadoop 2 YARN support - val HADOOP_VERSION = "2.0.2-alpha" + val HADOOP_VERSION = "0.23.7" val HADOOP_MAJOR_VERSION = "2" val HADOOP_YARN = true @@ -156,7 +156,8 @@ object SparkBuild extends Build { Seq( "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION, "org.apache.hadoop" % "hadoop-yarn-api" % HADOOP_VERSION, - "org.apache.hadoop" % "hadoop-yarn-common" % HADOOP_VERSION + "org.apache.hadoop" % "hadoop-yarn-common" % HADOOP_VERSION, + "org.apache.hadoop" % "hadoop-yarn-client" % HADOOP_VERSION ) } else { Seq( diff --git a/repl-bin/pom.xml b/repl-bin/pom.xml index b66d193b5d..46f38c2772 100644 --- a/repl-bin/pom.xml +++ b/repl-bin/pom.xml @@ -201,6 +201,11 @@ hadoop-yarn-common runtime + + org.apache.hadoop + hadoop-yarn-client + runtime + -- cgit v1.2.3 From 5b85c715c8e4241f5e07237ed62729f1a7a800a8 Mon Sep 17 00:00:00 2001 From: Mridul Muralidharan Date: Wed, 24 Apr 2013 02:57:51 +0530 Subject: Revert back to 2.0.2-alpha : 0.23.7 has protocol changes which break against cloudera --- pom.xml | 4 ++-- project/SparkBuild.scala | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'project') diff --git a/pom.xml b/pom.xml index 0e95520d50..12e310a038 100644 --- a/pom.xml +++ b/pom.xml @@ -565,8 +565,8 @@ 2 - 0.23.7 - + + 2.0.2-alpha diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 819e940403..0a761f1c13 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -20,7 +20,8 @@ object SparkBuild extends Build { //val HADOOP_YARN = false // For Hadoop 2 YARN support - val HADOOP_VERSION = "0.23.7" + // val HADOOP_VERSION = "0.23.7" + val HADOOP_VERSION = "2.0.2-alpha" val HADOOP_MAJOR_VERSION = "2" val HADOOP_YARN = true -- cgit v1.2.3 From adcda84f9646f12e6d5fb4f1e5e3a1b0a98b7c9f Mon Sep 17 00:00:00 2001 From: Mridul Muralidharan Date: Wed, 24 Apr 2013 08:57:25 +0530 Subject: Pull latest SparkBuild.scala from master and merge conflicts --- project/SparkBuild.scala | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 0a761f1c13..0c2598ab35 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -43,15 +43,22 @@ object SparkBuild extends Build { def sharedSettings = Defaults.defaultSettings ++ Seq( organization := "org.spark-project", - version := "0.7.1-SNAPSHOT", - scalaVersion := "2.9.2", + version := "0.8.0-SNAPSHOT", + scalaVersion := "2.9.3", scalacOptions := Seq("-unchecked", "-optimize", "-deprecation"), unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath }, retrieveManaged := true, transitiveClassifiers in Scope.GlobalScope := Seq("sources"), testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))), - // shared between both core and streaming. + // Fork new JVMs for tests and set Java options for those + fork := true, + javaOptions += "-Xmx1g", + + // Only allow one test at a time, even across projects, since they run in the same JVM + concurrentRestrictions in Global += Tags.limit(Tags.Test, 1), + + // Shared between both core and streaming. resolvers ++= Seq("Akka Repository" at "http://repo.akka.io/releases/"), // For Sonatype publishing @@ -100,13 +107,12 @@ object SparkBuild extends Build { libraryDependencies ++= Seq( "io.netty" % "netty" % "3.5.3.Final", - "org.eclipse.jetty" % "jetty-server" % "7.5.3.v20111011", - "org.scalatest" %% "scalatest" % "1.8" % "test", - "org.scalacheck" %% "scalacheck" % "1.9" % "test", - "com.novocode" % "junit-interface" % "0.8" % "test", + "org.eclipse.jetty" % "jetty-server" % "7.6.8.v20121106", + "org.scalatest" %% "scalatest" % "1.9.1" % "test", + "org.scalacheck" %% "scalacheck" % "1.10.0" % "test", + "com.novocode" % "junit-interface" % "0.9" % "test", "org.easymock" % "easymock" % "3.1" % "test" ), - parallelExecution := false, /* Workaround for issue #206 (fixed after SBT 0.11.0) */ watchTransitiveSources <<= Defaults.inDependencies[Task[Seq[File]]](watchSources.task, const(std.TaskExtra.constant(Nil)), aggregate = true, includeRoot = true) apply { _.join.map(_.flatten) }, @@ -137,8 +143,8 @@ object SparkBuild extends Build { "log4j" % "log4j" % "1.2.16", "org.slf4j" % "slf4j-api" % slf4jVersion, "org.slf4j" % "slf4j-log4j12" % slf4jVersion, - "com.ning" % "compress-lzf" % "0.8.4", "commons-daemon" % "commons-daemon" % "1.0.10", + "com.ning" % "compress-lzf" % "0.8.4", "asm" % "asm-all" % "3.3.1", "com.google.protobuf" % "protobuf-java" % "2.4.1", "de.javakaffee" % "kryo-serializers" % "0.22", @@ -149,25 +155,26 @@ object SparkBuild extends Build { "colt" % "colt" % "1.2.0", "cc.spray" % "spray-can" % "1.0-M2.1", "cc.spray" % "spray-server" % "1.0-M2.1", - "cc.spray" %% "spray-json" % "1.1.1", + "cc.spray" % "spray-json_2.9.2" % "1.1.1", "org.apache.mesos" % "mesos" % "0.9.0-incubating" ) ++ ( if (HADOOP_MAJOR_VERSION == "2") { if (HADOOP_YARN) { Seq( - "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION, - "org.apache.hadoop" % "hadoop-yarn-api" % HADOOP_VERSION, - "org.apache.hadoop" % "hadoop-yarn-common" % HADOOP_VERSION, - "org.apache.hadoop" % "hadoop-yarn-client" % HADOOP_VERSION + // Exclude rule required for all ? + "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ), + "org.apache.hadoop" % "hadoop-yarn-api" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ), + "org.apache.hadoop" % "hadoop-yarn-common" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ), + "org.apache.hadoop" % "hadoop-yarn-client" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ) ) } else { Seq( - "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION, - "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION + "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ), + "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ) ) } } else { - Seq("org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION) + Seq("org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ) ) }), unmanagedSourceDirectories in Compile <+= baseDirectory{ _ / ( if (HADOOP_YARN && HADOOP_MAJOR_VERSION == "2") { @@ -189,7 +196,7 @@ object SparkBuild extends Build { def examplesSettings = sharedSettings ++ Seq( name := "spark-examples", - libraryDependencies ++= Seq("com.twitter" % "algebird-core_2.9.2" % "0.1.8") + libraryDependencies ++= Seq("com.twitter" % "algebird-core_2.9.2" % "0.1.11") ) def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel") -- cgit v1.2.3 From 3b594a4e3b94de49a09dc679a30d857e3f41df69 Mon Sep 17 00:00:00 2001 From: Mridul Muralidharan Date: Wed, 24 Apr 2013 10:18:25 +0530 Subject: Do not add signature files - results in validation errors when using assembled file --- project/SparkBuild.scala | 1 + 1 file changed, 1 insertion(+) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 0c2598ab35..b3f410bfa6 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -214,6 +214,7 @@ object SparkBuild extends Build { def extraAssemblySettings() = Seq(test in assembly := {}) ++ Seq( mergeStrategy in assembly := { case m if m.toLowerCase.endsWith("manifest.mf") => MergeStrategy.discard + case m if m.toLowerCase.matches("meta-inf/.*\\.sf$") => MergeStrategy.discard case "reference.conf" => MergeStrategy.concat case _ => MergeStrategy.first } -- cgit v1.2.3 From 01d9ba503878d4191eaa8080e86c631d3c705cce Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Thu, 25 Apr 2013 00:11:27 -0700 Subject: Add back line removed during YARN merge --- project/SparkBuild.scala | 1 + 1 file changed, 1 insertion(+) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index b3f410bfa6..44758ad87e 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -48,6 +48,7 @@ object SparkBuild extends Build { scalacOptions := Seq("-unchecked", "-optimize", "-deprecation"), unmanagedJars in Compile <<= baseDirectory map { base => (base / "lib" ** "*.jar").classpath }, retrieveManaged := true, + retrievePattern := "[type]s/[artifact](-[revision])(-[classifier]).[ext]", transitiveClassifiers in Scope.GlobalScope := Seq("sources"), testListeners <<= target.map(t => Seq(new eu.henkelmann.sbt.JUnitXmlTestsListener(t.getAbsolutePath))), -- cgit v1.2.3 From eef9ea1993270d5f07e52e807e8d149e54079aad Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Wed, 24 Apr 2013 15:08:20 -0700 Subject: Update unit test memory to 2 GB --- project/SparkBuild.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 44758ad87e..f32c47e71f 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -54,7 +54,7 @@ object SparkBuild extends Build { // Fork new JVMs for tests and set Java options for those fork := true, - javaOptions += "-Xmx1g", + javaOptions += "-Xmx2g", // Only allow one test at a time, even across projects, since they run in the same JVM concurrentRestrictions in Global += Tags.limit(Tags.Test, 1), -- cgit v1.2.3 From 1b169f190c5c5210d088faced86dee1007295ac8 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Thu, 25 Apr 2013 19:52:12 -0700 Subject: Exclude old versions of Netty, which had a different Maven organization --- project/SparkBuild.scala | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f32c47e71f..7bd6c4c235 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -129,6 +129,9 @@ object SparkBuild extends Build { val slf4jVersion = "1.6.1" + val excludeJackson = ExclusionRule(organization = "org.codehaus.jackson") + val excludeNetty = ExclusionRule(organization = "org.jboss.netty") + def coreSettings = sharedSettings ++ Seq( name := "spark-core", resolvers ++= Seq( @@ -149,33 +152,33 @@ object SparkBuild extends Build { "asm" % "asm-all" % "3.3.1", "com.google.protobuf" % "protobuf-java" % "2.4.1", "de.javakaffee" % "kryo-serializers" % "0.22", - "com.typesafe.akka" % "akka-actor" % "2.0.3", - "com.typesafe.akka" % "akka-remote" % "2.0.3", - "com.typesafe.akka" % "akka-slf4j" % "2.0.3", + "com.typesafe.akka" % "akka-actor" % "2.0.3" excludeAll(excludeNetty), + "com.typesafe.akka" % "akka-remote" % "2.0.3" excludeAll(excludeNetty), + "com.typesafe.akka" % "akka-slf4j" % "2.0.3" excludeAll(excludeNetty), "it.unimi.dsi" % "fastutil" % "6.4.4", "colt" % "colt" % "1.2.0", - "cc.spray" % "spray-can" % "1.0-M2.1", - "cc.spray" % "spray-server" % "1.0-M2.1", - "cc.spray" % "spray-json_2.9.2" % "1.1.1", + "cc.spray" % "spray-can" % "1.0-M2.1" excludeAll(excludeNetty), + "cc.spray" % "spray-server" % "1.0-M2.1" excludeAll(excludeNetty), + "cc.spray" % "spray-json_2.9.2" % "1.1.1" excludeAll(excludeNetty), "org.apache.mesos" % "mesos" % "0.9.0-incubating" ) ++ ( if (HADOOP_MAJOR_VERSION == "2") { if (HADOOP_YARN) { Seq( // Exclude rule required for all ? - "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ), - "org.apache.hadoop" % "hadoop-yarn-api" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ), - "org.apache.hadoop" % "hadoop-yarn-common" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ), - "org.apache.hadoop" % "hadoop-yarn-client" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ) + "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), + "org.apache.hadoop" % "hadoop-yarn-api" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), + "org.apache.hadoop" % "hadoop-yarn-common" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), + "org.apache.hadoop" % "hadoop-yarn-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty) ) } else { Seq( - "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ), - "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ) + "org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty), + "org.apache.hadoop" % "hadoop-client" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty) ) } } else { - Seq("org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll( ExclusionRule(organization = "org.codehaus.jackson") ) ) + Seq("org.apache.hadoop" % "hadoop-core" % HADOOP_VERSION excludeAll(excludeJackson, excludeNetty) ) }), unmanagedSourceDirectories in Compile <+= baseDirectory{ _ / ( if (HADOOP_YARN && HADOOP_MAJOR_VERSION == "2") { @@ -205,10 +208,10 @@ object SparkBuild extends Build { def streamingSettings = sharedSettings ++ Seq( name := "spark-streaming", libraryDependencies ++= Seq( - "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile", + "org.apache.flume" % "flume-ng-sdk" % "1.2.0" % "compile" excludeAll(excludeNetty), "com.github.sgroschupf" % "zkclient" % "0.1", - "org.twitter4j" % "twitter4j-stream" % "3.0.3", - "com.typesafe.akka" % "akka-zeromq" % "2.0.3" + "org.twitter4j" % "twitter4j-stream" % "3.0.3" excludeAll(excludeNetty), + "com.typesafe.akka" % "akka-zeromq" % "2.0.3" excludeAll(excludeNetty) ) ) ++ assemblySettings ++ extraAssemblySettings -- cgit v1.2.3 From f1f92c88eb2960a16d33bf7dd291c8ce58f665de Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Mon, 29 Apr 2013 17:08:45 -0700 Subject: Build against Hadoop 1 by default --- project/SparkBuild.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 7bd6c4c235..f2410085d8 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -11,8 +11,9 @@ import twirl.sbt.TwirlPlugin._ object SparkBuild extends Build { // Hadoop version to build against. For example, "0.20.2", "0.20.205.0", or // "1.0.4" for Apache releases, or "0.20.2-cdh3u5" for Cloudera Hadoop. - //val HADOOP_VERSION = "1.0.4" - //val HADOOP_MAJOR_VERSION = "1" + val HADOOP_VERSION = "1.0.4" + val HADOOP_MAJOR_VERSION = "1" + val HADOOP_YARN = false // For Hadoop 2 versions such as "2.0.0-mr1-cdh4.1.1", set the HADOOP_MAJOR_VERSION to "2" //val HADOOP_VERSION = "2.0.0-mr1-cdh4.1.1" @@ -20,10 +21,9 @@ object SparkBuild extends Build { //val HADOOP_YARN = false // For Hadoop 2 YARN support - // val HADOOP_VERSION = "0.23.7" - val HADOOP_VERSION = "2.0.2-alpha" - val HADOOP_MAJOR_VERSION = "2" - val HADOOP_YARN = true + //val HADOOP_VERSION = "2.0.2-alpha" + //val HADOOP_MAJOR_VERSION = "2" + //val HADOOP_YARN = true lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming) -- cgit v1.2.3 From 207afe4088219a0c7350b3f80eb60e86c97e140f Mon Sep 17 00:00:00 2001 From: Jey Kottalam Date: Thu, 18 Apr 2013 12:08:11 -0700 Subject: Remove spark-repl's extraneous dependency on spark-streaming --- project/SparkBuild.scala | 2 +- repl/pom.xml | 14 -------------- 2 files changed, 1 insertion(+), 15 deletions(-) (limited to 'project') diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index f2410085d8..190d723435 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -29,7 +29,7 @@ object SparkBuild extends Build { lazy val core = Project("core", file("core"), settings = coreSettings) - lazy val repl = Project("repl", file("repl"), settings = replSettings) dependsOn (core) dependsOn (streaming) + lazy val repl = Project("repl", file("repl"), settings = replSettings) dependsOn (core) lazy val examples = Project("examples", file("examples"), settings = examplesSettings) dependsOn (core) dependsOn (streaming) diff --git a/repl/pom.xml b/repl/pom.xml index 038da5d988..92a2020b48 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -96,13 +96,6 @@ hadoop1 runtime - - org.spark-project - spark-streaming - ${project.version} - hadoop1 - runtime - org.apache.hadoop hadoop-core @@ -147,13 +140,6 @@ hadoop2 runtime - - org.spark-project - spark-streaming - ${project.version} - hadoop2 - runtime - org.apache.hadoop hadoop-core -- cgit v1.2.3