Merge remote-tracking branch 'old/master'

author: Matei Zaharia <matei@eecs.berkeley.edu> 2013-09-01 14:57:27 -0700
committer: Matei Zaharia <matei@eecs.berkeley.edu> 2013-09-01 14:57:27 -0700
commit: 2ce200bf7f7a38afbcacf3303ca2418e49bdbe2a (patch)
tree: 586a62e61ad15b5eda60cb13e15ca0c66cb1cc31 /core/src/main/scala
parent: 87d586e4da63e6e1875d9cac194c6f11e1cdc653 (diff)
parent: f957c26fa27486c329d82cb66595b2cf07aed0ef (diff)
download: spark-2ce200bf7f7a38afbcacf3303ca2418e49bdbe2a.tar.gz
spark-2ce200bf7f7a38afbcacf3303ca2418e49bdbe2a.tar.bz2
spark-2ce200bf7f7a38afbcacf3303ca2418e49bdbe2a.zip
277 files changed, 6480 insertions, 4503 deletions
diff --git a/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala b/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala
new file mode 100644
index 0000000000..f87460039b
--- /dev/null
+++ b/core/src/main/scala/org/apache/hadoop/mapred/SparkHadoopMapRedUtil.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapred
+
+trait SparkHadoopMapRedUtil {
+  def newJobContext(conf: JobConf, jobId: JobID): JobContext = {
+    val klass = firstAvailableClass("org.apache.hadoop.mapred.JobContextImpl", "org.apache.hadoop.mapred.JobContext");
+    val ctor = klass.getDeclaredConstructor(classOf[JobConf], classOf[org.apache.hadoop.mapreduce.JobID])
+    ctor.newInstance(conf, jobId).asInstanceOf[JobContext]
+  }
+
+  def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = {
+    val klass = firstAvailableClass("org.apache.hadoop.mapred.TaskAttemptContextImpl", "org.apache.hadoop.mapred.TaskAttemptContext")
+    val ctor = klass.getDeclaredConstructor(classOf[JobConf], classOf[TaskAttemptID])
+    ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext]
+  }
+
+  def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = {
+    new TaskAttemptID(jtIdentifier, jobId, isMap, taskId, attemptId)
+  }
+
+  private def firstAvailableClass(first: String, second: String): Class[_] = {
+    try {
+      Class.forName(first)
+    } catch {
+      case e: ClassNotFoundException =>
+        Class.forName(second)
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala
new file mode 100644
index 0000000000..93180307fa
--- /dev/null
+++ b/core/src/main/scala/org/apache/hadoop/mapreduce/SparkHadoopMapReduceUtil.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.mapreduce
+
+import org.apache.hadoop.conf.Configuration
+import java.lang.{Integer => JInteger, Boolean => JBoolean}
+
+trait SparkHadoopMapReduceUtil {
+  def newJobContext(conf: Configuration, jobId: JobID): JobContext = {
+    val klass = firstAvailableClass(
+        "org.apache.hadoop.mapreduce.task.JobContextImpl",  // hadoop2, hadoop2-yarn
+        "org.apache.hadoop.mapreduce.JobContext")           // hadoop1
+    val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID])
+    ctor.newInstance(conf, jobId).asInstanceOf[JobContext]
+  }
+
+  def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = {
+    val klass = firstAvailableClass(
+        "org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl",  // hadoop2, hadoop2-yarn
+        "org.apache.hadoop.mapreduce.TaskAttemptContext")           // hadoop1
+    val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID])
+    ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext]
+  }
+
+  def newTaskAttemptID(jtIdentifier: String, jobId: Int, isMap: Boolean, taskId: Int, attemptId: Int) = {
+    val klass = Class.forName("org.apache.hadoop.mapreduce.TaskAttemptID");
+    try {
+      // first, attempt to use the old-style constructor that takes a boolean isMap (not available in YARN)
+      val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], classOf[Boolean],
+          classOf[Int], classOf[Int])
+      ctor.newInstance(jtIdentifier, new JInteger(jobId), new JBoolean(isMap), new JInteger(taskId), new
+        JInteger(attemptId)).asInstanceOf[TaskAttemptID]
+    } catch {
+      case exc: NoSuchMethodException => {
+        // failed, look for the new ctor that takes a TaskType (not available in 1.x)
+        val taskTypeClass = Class.forName("org.apache.hadoop.mapreduce.TaskType").asInstanceOf[Class[Enum[_]]]
+        val taskType = taskTypeClass.getMethod("valueOf", classOf[String]).invoke(taskTypeClass, if(isMap) "MAP" else "REDUCE")
+        val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], taskTypeClass,
+          classOf[Int], classOf[Int])
+        ctor.newInstance(jtIdentifier, new JInteger(jobId), taskType, new JInteger(taskId), new
+          JInteger(attemptId)).asInstanceOf[TaskAttemptID]
+      }
+    }
+  }
+
+  private def firstAvailableClass(first: String, second: String): Class[_] = {
+    try {
+      Class.forName(first)
+    } catch {
+      case e: ClassNotFoundException =>
+        Class.forName(second)
+    }
+  }
+}
diff --git a/core/src/main/scala/spark/Accumulators.scala b/core/src/main/scala/org/apache/spark/Accumulators.scala
index 6ff92ce833..6e922a612a 100644
--- a/core/src/main/scala/spark/Accumulators.scala
+++ b/core/src/main/scala/org/apache/spark/Accumulators.scala
@@ -15,12 +15,13 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 import java.io._
 
 import scala.collection.mutable.Map
 import scala.collection.generic.Growable
+import org.apache.spark.serializer.JavaSerializer
 
 /**
  * A datatype that can be accumulated, i.e. has an commutative and associative "add" operation,
@@ -28,7 +29,7 @@ import scala.collection.generic.Growable
  *
  * You must define how to add data, and how to merge two of these together.  For some datatypes,
  * such as a counter, these might be the same operation. In that case, you can use the simpler
- * [[spark.Accumulator]]. They won't always be the same, though -- e.g., imagine you are
+ * [[org.apache.spark.Accumulator]]. They won't always be the same, though -- e.g., imagine you are
  * accumulating a set. You will add items to the set, and you will union two sets together.
  *
  * @param initialValue initial value of accumulator
@@ -176,7 +177,7 @@ class GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Ser
   def zero(initialValue: R): R = {
     // We need to clone initialValue, but it's hard to specify that R should also be Cloneable.
     // Instead we'll serialize it to a buffer and load it back.
-    val ser = (new spark.JavaSerializer).newInstance()
+    val ser = new JavaSerializer().newInstance()
     val copy = ser.deserialize[R](ser.serialize(initialValue))
     copy.clear()   // In case it contained stuff
     copy
@@ -184,7 +185,7 @@ class GrowableAccumulableParam[R <% Growable[T] with TraversableOnce[T] with Ser
 }
 
 /**
- * A simpler value of [[spark.Accumulable]] where the result type being accumulated is the same
+ * A simpler value of [[org.apache.spark.Accumulable]] where the result type being accumulated is the same
  * as the types of elements being merged.
  *
  * @param initialValue initial value of accumulator
@@ -195,7 +196,7 @@ class Accumulator[T](@transient initialValue: T, param: AccumulatorParam[T])
   extends Accumulable[T,T](initialValue, param)
 
 /**
- * A simpler version of [[spark.AccumulableParam]] where the only datatype you can add in is the same type
+ * A simpler version of [[org.apache.spark.AccumulableParam]] where the only datatype you can add in is the same type
  * as the accumulated value. An implicit AccumulatorParam object needs to be available when you create
  * Accumulators of a specific type.
  *
diff --git a/core/src/main/scala/spark/Aggregator.scala b/core/src/main/scala/org/apache/spark/Aggregator.scala
index 136b4da61e..3ef402926e 100644
--- a/core/src/main/scala/spark/Aggregator.scala
+++ b/core/src/main/scala/org/apache/spark/Aggregator.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 import java.util.{HashMap => JHashMap}
 
@@ -28,18 +28,18 @@ import scala.collection.JavaConversions._
   * @param mergeCombiners function to merge outputs from multiple mergeValue function.
   */
 case class Aggregator[K, V, C] (
-    val createCombiner: V => C,
-    val mergeValue: (C, V) => C,
-    val mergeCombiners: (C, C) => C) {
+    createCombiner: V => C,
+    mergeValue: (C, V) => C,
+    mergeCombiners: (C, C) => C) {
 
-  def combineValuesByKey(iter: Iterator[(K, V)]) : Iterator[(K, C)] = {
+  def combineValuesByKey(iter: Iterator[_ <: Product2[K, V]]) : Iterator[(K, C)] = {
     val combiners = new JHashMap[K, C]
-    for ((k, v) <- iter) {
-      val oldC = combiners.get(k)
+    for (kv <- iter) {
+      val oldC = combiners.get(kv._1)
       if (oldC == null) {
-        combiners.put(k, createCombiner(v))
+        combiners.put(kv._1, createCombiner(kv._2))
       } else {
-        combiners.put(k, mergeValue(oldC, v))
+        combiners.put(kv._1, mergeValue(oldC, kv._2))
       }
     }
     combiners.iterator
@@ -47,7 +47,7 @@ case class Aggregator[K, V, C] (
 
   def combineCombinersByKey(iter: Iterator[(K, C)]) : Iterator[(K, C)] = {
     val combiners = new JHashMap[K, C]
-    for ((k, c) <- iter) {
+    iter.foreach { case(k, c) =>
       val oldC = combiners.get(k)
       if (oldC == null) {
         combiners.put(k, c)
diff --git a/core/src/main/scala/spark/BlockStoreShuffleFetcher.scala b/core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala
index 8f6953b1f5..908ff56a6b 100644
--- a/core/src/main/scala/spark/BlockStoreShuffleFetcher.scala
+++ b/core/src/main/scala/org/apache/spark/BlockStoreShuffleFetcher.scala
@@ -15,21 +15,22 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
 
-import spark.executor.{ShuffleReadMetrics, TaskMetrics}
-import spark.serializer.Serializer
-import spark.storage.BlockManagerId
-import spark.util.CompletionIterator
+import org.apache.spark.executor.{ShuffleReadMetrics, TaskMetrics}
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.util.CompletionIterator
 
 
 private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Logging {
 
-  override def fetch[K, V](
-    shuffleId: Int, reduceId: Int, metrics: TaskMetrics, serializer: Serializer) = {
+  override def fetch[T](shuffleId: Int, reduceId: Int, metrics: TaskMetrics, serializer: Serializer)
+    : Iterator[T] =
+  {
 
     logDebug("Fetching outputs for shuffle %d, reduce %d".format(shuffleId, reduceId))
     val blockManager = SparkEnv.get.blockManager
@@ -49,12 +50,12 @@ private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Loggin
         (address, splits.map(s => ("shuffle_%d_%d_%d".format(shuffleId, s._1, reduceId), s._2)))
     }
 
-    def unpackBlock(blockPair: (String, Option[Iterator[Any]])) : Iterator[(K, V)] = {
+    def unpackBlock(blockPair: (String, Option[Iterator[Any]])) : Iterator[T] = {
       val blockId = blockPair._1
       val blockOption = blockPair._2
       blockOption match {
         case Some(block) => {
-          block.asInstanceOf[Iterator[(K, V)]]
+          block.asInstanceOf[Iterator[T]]
         }
         case None => {
           val regex = "shuffle_([0-9]*)_([0-9]*)_([0-9]*)".r
@@ -73,7 +74,7 @@ private[spark] class BlockStoreShuffleFetcher extends ShuffleFetcher with Loggin
     val blockFetcherItr = blockManager.getMultiple(blocksByAddress, serializer)
     val itr = blockFetcherItr.flatMap(unpackBlock)
 
-    CompletionIterator[(K,V), Iterator[(K,V)]](itr, {
+    CompletionIterator[T, Iterator[T]](itr, {
       val shuffleMetrics = new ShuffleReadMetrics
       shuffleMetrics.shuffleFinishTime = System.currentTimeMillis
       shuffleMetrics.remoteFetchTime = blockFetcherItr.remoteFetchTime
diff --git a/core/src/main/scala/spark/CacheManager.scala b/core/src/main/scala/org/apache/spark/CacheManager.scala
index 81314805a9..e299a106ee 100644
--- a/core/src/main/scala/spark/CacheManager.scala
+++ b/core/src/main/scala/org/apache/spark/CacheManager.scala
@@ -15,10 +15,11 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 import scala.collection.mutable.{ArrayBuffer, HashSet}
-import spark.storage.{BlockManager, StorageLevel}
+import org.apache.spark.storage.{BlockManager, StorageLevel}
+import org.apache.spark.rdd.RDD
 
 
 /** Spark class responsible for passing RDDs split contents to the BlockManager and making
diff --git a/core/src/main/scala/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
index d17e70a4fa..cc30105940 100644
--- a/core/src/main/scala/spark/Dependency.scala
+++ b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -15,7 +15,9 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
+
+import org.apache.spark.rdd.RDD
 
 /**
  * Base class for dependencies.
@@ -39,16 +41,15 @@ abstract class NarrowDependency[T](rdd: RDD[T]) extends Dependency(rdd) {
 
 /**
  * Represents a dependency on the output of a shuffle stage.
- * @param shuffleId the shuffle id
  * @param rdd the parent RDD
  * @param partitioner partitioner used to partition the shuffle output
  * @param serializerClass class name of the serializer to use
  */
 class ShuffleDependency[K, V](
-    @transient rdd: RDD[(K, V)],
+    @transient rdd: RDD[_ <: Product2[K, V]],
     val partitioner: Partitioner,
     val serializerClass: String = null)
-  extends Dependency(rdd) {
+  extends Dependency(rdd.asInstanceOf[RDD[Product2[K, V]]]) {
 
   val shuffleId: Int = rdd.context.newShuffleId()
 }
diff --git a/core/src/main/scala/spark/FetchFailedException.scala b/core/src/main/scala/org/apache/spark/FetchFailedException.scala
index a2dae6cae9..d242047502 100644
--- a/core/src/main/scala/spark/FetchFailedException.scala
+++ b/core/src/main/scala/org/apache/spark/FetchFailedException.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
-import spark.storage.BlockManagerId
+import org.apache.spark.storage.BlockManagerId
 
 private[spark] class FetchFailedException(
     taskEndReason: TaskEndReason,
diff --git a/core/src/main/scala/spark/HttpFileServer.scala b/core/src/main/scala/org/apache/spark/HttpFileServer.scala
index a13a7a2859..ad1ee20045 100644
--- a/core/src/main/scala/spark/HttpFileServer.scala
+++ b/core/src/main/scala/org/apache/spark/HttpFileServer.scala
@@ -15,10 +15,11 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 import java.io.{File}
 import com.google.common.io.Files
+import org.apache.spark.util.Utils
 
 private[spark] class HttpFileServer extends Logging {
   
diff --git a/core/src/main/scala/spark/HttpServer.scala b/core/src/main/scala/org/apache/spark/HttpServer.scala
index c9dffbc631..cdfc9dd54e 100644
--- a/core/src/main/scala/spark/HttpServer.scala
+++ b/core/src/main/scala/org/apache/spark/HttpServer.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 import java.io.File
 import java.net.InetAddress
@@ -26,6 +26,7 @@ import org.eclipse.jetty.server.handler.DefaultHandler
 import org.eclipse.jetty.server.handler.HandlerList
 import org.eclipse.jetty.server.handler.ResourceHandler
 import org.eclipse.jetty.util.thread.QueuedThreadPool
+import org.apache.spark.util.Utils
 
 /**
  * Exception type thrown by HttpServer when it is in the wrong state for an operation.
diff --git a/core/src/main/scala/spark/Logging.scala b/core/src/main/scala/org/apache/spark/Logging.scala
index 79b0362830..6a973ea495 100644
--- a/core/src/main/scala/spark/Logging.scala
+++ b/core/src/main/scala/org/apache/spark/Logging.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 import org.slf4j.Logger
 import org.slf4j.LoggerFactory
diff --git a/core/src/main/scala/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 2c417e31db..ae7cf2a893 100644
--- a/core/src/main/scala/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 import java.io._
 import java.util.zip.{GZIPInputStream, GZIPOutputStream}
@@ -30,9 +30,9 @@ import akka.remote._
 import akka.util.Duration
 
 
-import spark.scheduler.MapStatus
-import spark.storage.BlockManagerId
-import spark.util.{MetadataCleaner, TimeStampedHashMap}
+import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.storage.BlockManagerId
+import org.apache.spark.util.{Utils, MetadataCleaner, TimeStampedHashMap}
 
 
 private[spark] sealed trait MapOutputTrackerMessage
@@ -64,11 +64,11 @@ private[spark] class MapOutputTracker extends Logging {
 
   // Incremented every time a fetch fails so that client nodes know to clear
   // their cache of map output locations if this happens.
-  private var generation: Long = 0
-  private val generationLock = new java.lang.Object
+  private var epoch: Long = 0
+  private val epochLock = new java.lang.Object
 
   // Cache a serialized version of the output statuses for each shuffle to send them out faster
-  var cacheGeneration = generation
+  var cacheEpoch = epoch
   private val cachedSerializedStatuses = new TimeStampedHashMap[Int, Array[Byte]]
 
   val metadataCleaner = new MetadataCleaner("MapOutputTracker", this.cleanup)
@@ -108,10 +108,10 @@ private[spark] class MapOutputTracker extends Logging {
   def registerMapOutputs(
       shuffleId: Int,
       statuses: Array[MapStatus],
-      changeGeneration: Boolean = false) {
+      changeEpoch: Boolean = false) {
     mapStatuses.put(shuffleId, Array[MapStatus]() ++ statuses)
-    if (changeGeneration) {
-      incrementGeneration()
+    if (changeEpoch) {
+      incrementEpoch()
     }
   }
 
@@ -124,7 +124,7 @@ private[spark] class MapOutputTracker extends Logging {
           array(mapId) = null
         }
       }
-      incrementGeneration()
+      incrementEpoch()
     } else {
       throw new SparkException("unregisterMapOutput called for nonexistent shuffle ID")
     }
@@ -206,58 +206,58 @@ private[spark] class MapOutputTracker extends Logging {
     trackerActor = null
   }
 
-  // Called on master to increment the generation number
-  def incrementGeneration() {
-    generationLock.synchronized {
-      generation += 1
-      logDebug("Increasing generation to " + generation)
+  // Called on master to increment the epoch number
+  def incrementEpoch() {
+    epochLock.synchronized {
+      epoch += 1
+      logDebug("Increasing epoch to " + epoch)
     }
   }
 
-  // Called on master or workers to get current generation number
-  def getGeneration: Long = {
-    generationLock.synchronized {
-      return generation
+  // Called on master or workers to get current epoch number
+  def getEpoch: Long = {
+    epochLock.synchronized {
+      return epoch
     }
   }
 
-  // Called on workers to update the generation number, potentially clearing old outputs
-  // because of a fetch failure. (Each Mesos task calls this with the latest generation
+  // Called on workers to update the epoch number, potentially clearing old outputs
+  // because of a fetch failure. (Each worker task calls this with the latest epoch
   // number on the master at the time it was created.)
-  def updateGeneration(newGen: Long) {
-    generationLock.synchronized {
-      if (newGen > generation) {
-        logInfo("Updating generation to " + newGen + " and clearing cache")
+  def updateEpoch(newEpoch: Long) {
+    epochLock.synchronized {
+      if (newEpoch > epoch) {
+        logInfo("Updating epoch to " + newEpoch + " and clearing cache")
         // mapStatuses = new TimeStampedHashMap[Int, Array[MapStatus]]
         mapStatuses.clear()
-        generation = newGen
+        epoch = newEpoch
       }
     }
   }
 
   def getSerializedLocations(shuffleId: Int): Array[Byte] = {
     var statuses: Array[MapStatus] = null
-    var generationGotten: Long = -1
-    generationLock.synchronized {
-      if (generation > cacheGeneration) {
+    var epochGotten: Long = -1
+    epochLock.synchronized {
+      if (epoch > cacheEpoch) {
         cachedSerializedStatuses.clear()
-        cacheGeneration = generation
+        cacheEpoch = epoch
       }
       cachedSerializedStatuses.get(shuffleId) match {
         case Some(bytes) =>
           return bytes
         case None =>
           statuses = mapStatuses(shuffleId)
-          generationGotten = generation
+          epochGotten = epoch
       }
     }
     // If we got here, we failed to find the serialized locations in the cache, so we pulled
     // out a snapshot of the locations as "locs"; let's serialize and return that
     val bytes = serializeStatuses(statuses)
     logInfo("Size of output statuses for shuffle %d is %d bytes".format(shuffleId, bytes.length))
-    // Add them into the table only if the generation hasn't changed while we were working
-    generationLock.synchronized {
-      if (generation == generationGotten) {
+    // Add them into the table only if the epoch hasn't changed while we were working
+    epochLock.synchronized {
+      if (epoch == epochGotten) {
         cachedSerializedStatuses(shuffleId) = bytes
       }
     }
diff --git a/core/src/main/scala/spark/Partition.scala b/core/src/main/scala/org/apache/spark/Partition.scala
index 2a4edcec98..87914a061f 100644
--- a/core/src/main/scala/spark/Partition.scala
+++ b/core/src/main/scala/org/apache/spark/Partition.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 /**
  * A partition of an RDD.
diff --git a/core/src/main/scala/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index 660af70d52..0e2c987a59 100644
--- a/core/src/main/scala/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -15,7 +15,10 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
+
+import org.apache.spark.util.Utils
+import org.apache.spark.rdd.RDD
 
 /**
  * An object that defines how the elements in a key-value pair RDD are partitioned by key.
@@ -56,7 +59,7 @@ object Partitioner {
 }
 
 /**
- * A [[spark.Partitioner]] that implements hash-based partitioning using Java's `Object.hashCode`.
+ * A [[org.apache.spark.Partitioner]] that implements hash-based partitioning using Java's `Object.hashCode`.
  *
  * Java arrays have hashCodes that are based on the arrays' identities rather than their contents,
  * so attempting to partition an RDD[Array[_]] or RDD[(Array[_], _)] using a HashPartitioner will
@@ -65,17 +68,9 @@ object Partitioner {
 class HashPartitioner(partitions: Int) extends Partitioner {
   def numPartitions = partitions
 
-  def getPartition(key: Any): Int = {
-    if (key == null) {
-      return 0
-    } else {
-      val mod = key.hashCode % partitions
-      if (mod < 0) {
-        mod + partitions
-      } else {
-        mod // Guard against negative hash codes
-      }
-    }
+  def getPartition(key: Any): Int = key match {
+    case null => 0
+    case _ => Utils.nonNegativeMod(key.hashCode, numPartitions)
   }
   
   override def equals(other: Any): Boolean = other match {
@@ -87,12 +82,12 @@ class HashPartitioner(partitions: Int) extends Partitioner {
 }
 
 /**
- * A [[spark.Partitioner]] that partitions sortable records by range into roughly equal ranges.
+ * A [[org.apache.spark.Partitioner]] that partitions sortable records by range into roughly equal ranges.
  * Determines the ranges by sampling the RDD passed in.
  */
 class RangePartitioner[K <% Ordered[K]: ClassManifest, V](
     partitions: Int,
-    @transient rdd: RDD[(K,V)],
+    @transient rdd: RDD[_ <: Product2[K,V]],
     private val ascending: Boolean = true) 
   extends Partitioner {
 
diff --git a/core/src/main/scala/spark/SerializableWritable.scala b/core/src/main/scala/org/apache/spark/SerializableWritable.scala
index 0236611ef9..fdd4c24e23 100644
--- a/core/src/main/scala/spark/SerializableWritable.scala
+++ b/core/src/main/scala/org/apache/spark/SerializableWritable.scala
@@ -15,13 +15,13 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 import java.io._
 
 import org.apache.hadoop.io.ObjectWritable
 import org.apache.hadoop.io.Writable
-import org.apache.hadoop.mapred.JobConf
+import org.apache.hadoop.conf.Configuration
 
 class SerializableWritable[T <: Writable](@transient var t: T) extends Serializable {
   def value = t
@@ -35,7 +35,7 @@ class SerializableWritable[T <: Writable](@transient var t: T) extends Serializa
   private def readObject(in: ObjectInputStream) {
     in.defaultReadObject()
     val ow = new ObjectWritable()
-    ow.setConf(new JobConf())
+    ow.setConf(new Configuration())
     ow.readFields(in)
     t = ow.get().asInstanceOf[T]
   }
diff --git a/core/src/main/scala/spark/ShuffleFetcher.scala b/core/src/main/scala/org/apache/spark/ShuffleFetcher.scala
index dcced035e7..307c383a89 100644
--- a/core/src/main/scala/spark/ShuffleFetcher.scala
+++ b/core/src/main/scala/org/apache/spark/ShuffleFetcher.scala
@@ -15,19 +15,20 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
-import spark.executor.TaskMetrics
-import spark.serializer.Serializer
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.serializer.Serializer
 
 
 private[spark] abstract class ShuffleFetcher {
+
   /**
    * Fetch the shuffle outputs for a given ShuffleDependency.
    * @return An iterator over the elements of the fetched shuffle outputs.
    */
-  def fetch[K, V](shuffleId: Int, reduceId: Int, metrics: TaskMetrics,
-    serializer: Serializer = SparkEnv.get.serializerManager.default): Iterator[(K,V)]
+  def fetch[T](shuffleId: Int, reduceId: Int, metrics: TaskMetrics,
+      serializer: Serializer = SparkEnv.get.serializerManager.default): Iterator[T]
 
   /** Stop the fetcher */
   def stop() {}
diff --git a/core/src/main/scala/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 46b9935cb7..faf0c2362a 100644
--- a/core/src/main/scala/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -15,23 +15,19 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 import java.io._
 import java.net.URI
 import java.util.Properties
-import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicInteger
 
-import scala.collection.JavaConversions._
 import scala.collection.Map
 import scala.collection.generic.Growable
-import scala.collection.mutable.HashMap
 import scala.collection.JavaConversions._
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.HashMap
 import scala.util.DynamicVariable
-import scala.collection.mutable.{ConcurrentMap, HashMap}
-
-import akka.actor.Actor._
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
@@ -53,20 +49,23 @@ import org.apache.hadoop.mapred.TextInputFormat
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
 import org.apache.hadoop.mapreduce.{Job => NewHadoopJob}
 import org.apache.hadoop.mapreduce.lib.input.{FileInputFormat => NewFileInputFormat}
-import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.mesos.MesosNativeLibrary
 
-import spark.deploy.{LocalSparkCluster, SparkHadoopUtil}
-import spark.partial.{ApproximateEvaluator, PartialResult}
-import spark.rdd.{CheckpointRDD, HadoopRDD, NewHadoopRDD, UnionRDD, ParallelCollectionRDD}
-import spark.scheduler.{DAGScheduler, ResultTask, ShuffleMapTask, SparkListener, SplitInfo, Stage, StageInfo, TaskScheduler}
-import spark.scheduler.cluster.{StandaloneSchedulerBackend, SparkDeploySchedulerBackend, ClusterScheduler}
-import spark.scheduler.local.LocalScheduler
-import spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
-import spark.storage.{StorageStatus, StorageUtils, RDDInfo}
-import spark.util.{MetadataCleaner, TimeStampedHashMap}
-import ui.{SparkUI}
+import org.apache.spark.deploy.LocalSparkCluster
+import org.apache.spark.partial.{ApproximateEvaluator, PartialResult}
+import org.apache.spark.rdd._
+import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster.{StandaloneSchedulerBackend, SparkDeploySchedulerBackend,
+  ClusterScheduler, Schedulable, SchedulingMode}
+import org.apache.spark.scheduler.local.LocalScheduler
+import org.apache.spark.scheduler.mesos.{CoarseMesosSchedulerBackend, MesosSchedulerBackend}
+import org.apache.spark.storage.{StorageUtils, BlockManagerSource}
+import org.apache.spark.ui.SparkUI
+import org.apache.spark.util.{ClosureCleaner, Utils, MetadataCleaner, TimeStampedHashMap}
+import org.apache.spark.scheduler.StageInfo
+import org.apache.spark.storage.RDDInfo
+import org.apache.spark.storage.StorageStatus
 
 /**
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
@@ -101,7 +100,7 @@ class SparkContext(
     System.setProperty("spark.driver.port", "0")
   }
 
-  private val isLocal = (master == "local" || master.startsWith("local["))
+  val isLocal = (master == "local" || master.startsWith("local["))
 
   // Create the Spark execution environment (cache, map output tracker, etc)
   private[spark] val env = SparkEnv.createFromSystemProperties(
@@ -124,6 +123,8 @@ class SparkContext(
   private[spark] val ui = new SparkUI(this)
   ui.bind()
 
+  val startTime = System.currentTimeMillis()
+
   // Add each JAR given through the constructor
   if (jars != null) {
     jars.foreach { addJar(_) }
@@ -235,7 +236,8 @@ class SparkContext(
 
   /** A default Hadoop Configuration for the Hadoop code (e.g. file systems) that we reuse. */
   val hadoopConfiguration = {
-    val conf = SparkHadoopUtil.newConfiguration()
+    val env = SparkEnv.get
+    val conf = env.hadoop.newConfiguration()
     // Explicitly check for S3 environment variables
     if (System.getenv("AWS_ACCESS_KEY_ID") != null && System.getenv("AWS_SECRET_ACCESS_KEY") != null) {
       conf.set("fs.s3.awsAccessKeyId", System.getenv("AWS_ACCESS_KEY_ID"))
@@ -261,15 +263,35 @@ class SparkContext(
       localProperties.value = new Properties()
   }
 
-  def addLocalProperties(key: String, value: String) {
-    if(localProperties.value == null) {
+  def setLocalProperty(key: String, value: String) {
+    if (localProperties.value == null) {
       localProperties.value = new Properties()
     }
-    localProperties.value.setProperty(key,value)
+    if (value == null) {
+      localProperties.value.remove(key)
+    } else {
+      localProperties.value.setProperty(key, value)
+    }
+  }
+
+  /** Set a human readable description of the current job. */
+  def setJobDescription(value: String) {
+    setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, value)
   }
+
   // Post init
   taskScheduler.postStartHook()
 
+  val dagSchedulerSource = new DAGSchedulerSource(this.dagScheduler)
+  val blockManagerSource = new BlockManagerSource(SparkEnv.get.blockManager)
+
+  def initDriverMetrics() {
+    SparkEnv.get.metricsSystem.registerSource(dagSchedulerSource)
+    SparkEnv.get.metricsSystem.registerSource(blockManagerSource)
+  }
+
+  initDriverMetrics()
+
   // Methods for creating RDDs
 
   /** Distribute a local Scala collection to form an RDD. */
@@ -470,14 +492,14 @@ class SparkContext(
   // Methods for creating shared variables
 
   /**
-   * Create an [[spark.Accumulator]] variable of a given type, which tasks can "add" values
+   * Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can "add" values
    * to using the `+=` method. Only the driver can access the accumulator's `value`.
    */
   def accumulator[T](initialValue: T)(implicit param: AccumulatorParam[T]) =
     new Accumulator(initialValue, param)
 
   /**
-   * Create an [[spark.Accumulable]] shared variable, to which tasks can add values with `+=`.
+   * Create an [[org.apache.spark.Accumulable]] shared variable, to which tasks can add values with `+=`.
    * Only the driver can access the accumuable's `value`.
    * @tparam T accumulator type
    * @tparam R type that can be added to the accumulator
@@ -497,7 +519,7 @@ class SparkContext(
   }
 
   /**
-   * Broadcast a read-only variable to the cluster, returning a [[spark.broadcast.Broadcast]] object for
+   * Broadcast a read-only variable to the cluster, returning a [[org.apache.spark.broadcast.Broadcast]] object for
    * reading it in distributed functions. The variable will be sent to each cluster only once.
    */
   def broadcast[T](value: T) = env.broadcastManager.newBroadcast[T](value, isLocal)
@@ -525,7 +547,7 @@ class SparkContext(
   }
 
   def addSparkListener(listener: SparkListener) {
-    dagScheduler.sparkListeners += listener
+    dagScheduler.addSparkListener(listener)
   }
 
   /**
@@ -546,6 +568,12 @@ class SparkContext(
     StorageUtils.rddInfoFromStorageStatus(getExecutorStorageStatus, this)
   }
 
+  /**
+   * Returns an immutable map of RDDs that have marked themselves as persistent via cache() call.
+   * Note that this does not necessarily mean the caching or computation was successful.
+   */
+  def getPersistentRDDs: Map[Int, RDD[_]] = persistentRdds.toMap
+
   def getStageInfo: Map[Stage,StageInfo] = {
     dagScheduler.stageToInfos
   }
@@ -558,6 +586,28 @@ class SparkContext(
   }
 
   /**
+   *  Return pools for fair scheduler
+   *  TODO(xiajunluan): We should take nested pools into account
+   */
+  def getAllPools: ArrayBuffer[Schedulable] = {
+    taskScheduler.rootPool.schedulableQueue
+  }
+
+  /**
+   * Return the pool associated with the given name, if one exists
+   */
+  def getPoolForName(pool: String): Option[Schedulable] = {
+    taskScheduler.rootPool.schedulableNameToSchedulable.get(pool)
+  }
+
+  /**
+   *  Return current scheduling mode
+   */
+  def getSchedulingMode: SchedulingMode.SchedulingMode = {
+    taskScheduler.schedulingMode
+  }
+
+  /**
    * Clear the job's list of files added by `addFile` so that they do not get downloaded to
    * any new nodes.
    */
@@ -566,6 +616,16 @@ class SparkContext(
   }
 
   /**
+   * Gets the locality information associated with the partition in a particular rdd
+   * @param rdd of interest
+   * @param partition to be looked up for locality
+   * @return list of preferred locations for the partition
+   */
+  private [spark] def getPreferredLocs(rdd: RDD[_], partition: Int): Seq[TaskLocation] = {
+    dagScheduler.getPreferredLocs(rdd, partition)
+  }
+
+  /**
    * Adds a JAR dependency for all tasks to be executed on this SparkContext in the future.
    * The `path` passed can be either a local file, a file in HDFS (or other Hadoop-supported
    * filesystems), or an HTTP, HTTPS or FTP URI.
@@ -575,9 +635,15 @@ class SparkContext(
       logWarning("null specified as parameter to addJar",
         new SparkException("null specified as parameter to addJar"))
     } else {
+      val env = SparkEnv.get
       val uri = new URI(path)
       val key = uri.getScheme match {
-        case null | "file" => env.httpFileServer.addJar(new File(uri.getPath))
+        case null | "file" =>
+          if (env.hadoop.isYarnMode()) {
+            logWarning("local jar specified as parameter to addJar under Yarn mode")
+            return
+          }
+          env.httpFileServer.addJar(new File(uri.getPath))
         case _ => path
       }
       addedJars(key) = System.currentTimeMillis
@@ -756,8 +822,9 @@ class SparkContext(
    * prevent accidental overriding of checkpoint files in the existing directory.
    */
   def setCheckpointDir(dir: String, useExisting: Boolean = false) {
+    val env = SparkEnv.get
     val path = new Path(dir)
-    val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration())
+    val fs = path.getFileSystem(env.hadoop.newConfiguration())
     if (!useExisting) {
       if (fs.exists(path)) {
         throw new Exception("Checkpoint directory '" + path + "' already exists.")
@@ -774,11 +841,11 @@ class SparkContext(
   /** Default min number of partitions for Hadoop RDDs when not given by user */
   def defaultMinSplits: Int = math.min(defaultParallelism, 2)
 
-  private var nextShuffleId = new AtomicInteger(0)
+  private val nextShuffleId = new AtomicInteger(0)
 
   private[spark] def newShuffleId(): Int = nextShuffleId.getAndIncrement()
 
-  private var nextRddId = new AtomicInteger(0)
+  private val nextRddId = new AtomicInteger(0)
 
   /** Register a new RDD, returning its RDD ID */
   private[spark] def newRddId(): Int = nextRddId.getAndIncrement()
@@ -794,6 +861,7 @@ class SparkContext(
  * various Spark features.
  */
 object SparkContext {
+  val SPARK_JOB_DESCRIPTION = "spark.job.description"
 
   implicit object DoubleAccumulatorParam extends AccumulatorParam[Double] {
     def addInPlace(t1: Double, t2: Double): Double = t1 + t2
@@ -826,7 +894,7 @@ object SparkContext {
 
   implicit def rddToOrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest](
       rdd: RDD[(K, V)]) =
-    new OrderedRDDFunctions(rdd)
+    new OrderedRDDFunctions[K, V, (K, V)](rdd)
 
   implicit def doubleRDDToDoubleRDDFunctions(rdd: RDD[Double]) = new DoubleRDDFunctions(rdd)
 
@@ -911,7 +979,6 @@ object SparkContext {
   }
 }
 
-
 /**
  * A class encapsulating how to convert some type T to Writable. It stores both the Writable class
  * corresponding to T (e.g. IntWritable for Int) and a function for doing the conversion.
@@ -923,3 +990,4 @@ private[spark] class WritableConverter[T](
     val writableClass: ClassManifest[T] => Class[_ <: Writable],
     val convert: Writable => T)
   extends Serializable
+
diff --git a/core/src/main/scala/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index f2bdc11bdb..478e5a0aaf 100644
--- a/core/src/main/scala/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 import collection.mutable
 import serializer.Serializer
@@ -23,13 +23,14 @@ import serializer.Serializer
 import akka.actor.{Actor, ActorRef, Props, ActorSystemImpl, ActorSystem}
 import akka.remote.RemoteActorRefProvider
 
-import spark.broadcast.BroadcastManager
-import spark.storage.BlockManager
-import spark.storage.BlockManagerMaster
-import spark.network.ConnectionManager
-import spark.serializer.{Serializer, SerializerManager}
-import spark.util.AkkaUtils
-import spark.api.python.PythonWorkerFactory
+import org.apache.spark.broadcast.BroadcastManager
+import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.storage.{BlockManagerMasterActor, BlockManager, BlockManagerMaster}
+import org.apache.spark.network.ConnectionManager
+import org.apache.spark.serializer.{Serializer, SerializerManager}
+import org.apache.spark.util.{Utils, AkkaUtils}
+import org.apache.spark.api.python.PythonWorkerFactory
 
 
 /**
@@ -53,13 +54,23 @@ class SparkEnv (
     val connectionManager: ConnectionManager,
     val httpFileServer: HttpFileServer,
     val sparkFilesDir: String,
-    // To be set only as part of initialization of SparkContext.
-    // (executorId, defaultHostPort) => executorHostPort
-    // If executorId is NOT found, return defaultHostPort
-    var executorIdToHostPort: Option[(String, String) => String]) {
+    val metricsSystem: MetricsSystem) {
 
   private val pythonWorkers = mutable.HashMap[(String, Map[String, String]), PythonWorkerFactory]()
 
+  val hadoop = {
+    val yarnMode = java.lang.Boolean.valueOf(System.getProperty("SPARK_YARN_MODE", System.getenv("SPARK_YARN_MODE")))
+    if(yarnMode) {
+      try {
+        Class.forName("spark.deploy.yarn.YarnSparkHadoopUtil").newInstance.asInstanceOf[SparkHadoopUtil]
+      } catch {
+        case th: Throwable => throw new SparkException("Unable to load YARN support", th)
+      }
+    } else {
+      new SparkHadoopUtil
+    }
+  }
+
   def stop() {
     pythonWorkers.foreach { case(key, worker) => worker.stop() }
     httpFileServer.stop()
@@ -68,6 +79,7 @@ class SparkEnv (
     broadcastManager.stop()
     blockManager.stop()
     blockManager.master.stop()
+    metricsSystem.stop()
     actorSystem.shutdown()
     // Unfortunately Akka's awaitTermination doesn't actually wait for the Netty server to shut
     // down, but let's call it anyway in case it gets fixed in a later release
@@ -80,27 +92,30 @@ class SparkEnv (
       pythonWorkers.getOrElseUpdate(key, new PythonWorkerFactory(pythonExec, envVars)).create()
     }
   }
-
-  def resolveExecutorIdToHostPort(executorId: String, defaultHostPort: String): String = {
-    val env = SparkEnv.get
-    if (env.executorIdToHostPort.isEmpty) {
-      // default to using host, not host port. Relevant to non cluster modes.
-      return defaultHostPort
-    }
-
-    env.executorIdToHostPort.get(executorId, defaultHostPort)
-  }
 }
 
 object SparkEnv extends Logging {
   private val env = new ThreadLocal[SparkEnv]
+  @volatile private var lastSetSparkEnv : SparkEnv = _
 
   def set(e: SparkEnv) {
+	  lastSetSparkEnv = e
     env.set(e)
   }
 
+  /**
+   * Returns the ThreadLocal SparkEnv, if non-null. Else returns the SparkEnv
+   * previously set in any thread.
+   */
   def get: SparkEnv = {
-    env.get()
+    Option(env.get()).getOrElse(lastSetSparkEnv)
+  }
+
+  /**
+   * Returns the ThreadLocal SparkEnv.
+   */
+  def getThreadLocal : SparkEnv = {
+	  env.get()
   }
 
   def createFromSystemProperties(
@@ -140,10 +155,10 @@ object SparkEnv extends Logging {
     val serializerManager = new SerializerManager
 
     val serializer = serializerManager.setDefault(
-      System.getProperty("spark.serializer", "spark.JavaSerializer"))
+      System.getProperty("spark.serializer", "org.apache.spark.serializer.JavaSerializer"))
 
     val closureSerializer = serializerManager.get(
-      System.getProperty("spark.closure.serializer", "spark.JavaSerializer"))
+      System.getProperty("spark.closure.serializer", "org.apache.spark.serializer.JavaSerializer"))
 
     def registerOrLookup(name: String, newActor: => Actor): ActorRef = {
       if (isDriver) {
@@ -161,7 +176,7 @@ object SparkEnv extends Logging {
 
     val blockManagerMaster = new BlockManagerMaster(registerOrLookup(
       "BlockManagerMaster",
-      new spark.storage.BlockManagerMasterActor(isLocal)))
+      new BlockManagerMasterActor(isLocal)))
     val blockManager = new BlockManager(executorId, actorSystem, blockManagerMaster, serializer)
 
     val connectionManager = blockManager.connectionManager
@@ -178,12 +193,19 @@ object SparkEnv extends Logging {
       new MapOutputTrackerActor(mapOutputTracker))
 
     val shuffleFetcher = instantiateClass[ShuffleFetcher](
-      "spark.shuffle.fetcher", "spark.BlockStoreShuffleFetcher")
+      "spark.shuffle.fetcher", "org.apache.spark.BlockStoreShuffleFetcher")
 
     val httpFileServer = new HttpFileServer()
     httpFileServer.initialize()
     System.setProperty("spark.fileserver.uri", httpFileServer.serverUri)
 
+    val metricsSystem = if (isDriver) {
+      MetricsSystem.createMetricsSystem("driver")
+    } else {
+      MetricsSystem.createMetricsSystem("executor")
+    }
+    metricsSystem.start()
+
     // Set the sparkFiles directory, used when downloading dependencies.  In local mode,
     // this is a temporary directory; in distributed mode, this is the executor's current working
     // directory.
@@ -213,6 +235,6 @@ object SparkEnv extends Logging {
       connectionManager,
       httpFileServer,
       sparkFilesDir,
-      None)
+      metricsSystem)
   }
 }
diff --git a/core/src/main/scala/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala
index b7045eea63..d34e47e8ca 100644
--- a/core/src/main/scala/spark/SparkException.scala
+++ b/core/src/main/scala/org/apache/spark/SparkException.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 class SparkException(message: String, cause: Throwable)
   extends Exception(message, cause) {
diff --git a/core/src/main/scala/spark/SparkFiles.java b/core/src/main/scala/org/apache/spark/SparkFiles.java
index f9b3f7965e..af9cf85e37 100644
--- a/core/src/main/scala/spark/SparkFiles.java
+++ b/core/src/main/scala/org/apache/spark/SparkFiles.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark;
+package org.apache.spark;
 
 import java.io.File;
 
diff --git a/core/src/main/scala/spark/HadoopWriter.scala b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
index b1fe0075a3..2bab9d6e3d 100644
--- a/core/src/main/scala/spark/HadoopWriter.scala
+++ b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
@@ -25,8 +25,8 @@ import java.text.NumberFormat
 import java.io.IOException
 import java.util.Date
 
-import spark.Logging
-import spark.SerializableWritable
+import org.apache.spark.Logging
+import org.apache.spark.SerializableWritable
 
 /**
  * Internal helper class that saves an RDD using a Hadoop OutputFormat. This is only public
@@ -36,7 +36,7 @@ import spark.SerializableWritable
  * Saves the RDD using a JobConf, which should contain an output key class, an output value class,
  * a filename to write to, etc, exactly like in a Hadoop MapReduce job.
  */
-class HadoopWriter(@transient jobConf: JobConf) extends Logging with HadoopMapRedUtil with Serializable {
+class SparkHadoopWriter(@transient jobConf: JobConf) extends Logging with SparkHadoopMapRedUtil with Serializable {
 
   private val now = new Date()
   private val conf = new SerializableWritable(jobConf)
@@ -165,7 +165,7 @@ class HadoopWriter(@transient jobConf: JobConf) extends Logging with HadoopMapRe
     splitID = splitid
     attemptID = attemptid
 
-    jID = new SerializableWritable[JobID](HadoopWriter.createJobID(now, jobid))
+    jID = new SerializableWritable[JobID](SparkHadoopWriter.createJobID(now, jobid))
     taID = new SerializableWritable[TaskAttemptID](
         new TaskAttemptID(new TaskID(jID.value, true, splitID), attemptID))
   }
@@ -179,7 +179,7 @@ class HadoopWriter(@transient jobConf: JobConf) extends Logging with HadoopMapRe
   }
 }
 
-object HadoopWriter {
+object SparkHadoopWriter {
   def createJobID(time: Date, id: Int): JobID = {
     val formatter = new SimpleDateFormat("yyyyMMddHHmm")
     val jobtrackerID = formatter.format(new Date())
diff --git a/core/src/main/scala/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
index b79f4ca813..b2dd668330 100644
--- a/core/src/main/scala/spark/TaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 import executor.TaskMetrics
 import scala.collection.mutable.ArrayBuffer
diff --git a/core/src/main/scala/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index 3ad665da34..03bf268863 100644
--- a/core/src/main/scala/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
-import spark.executor.TaskMetrics
-import spark.storage.BlockManagerId
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.storage.BlockManagerId
 
 /**
  * Various possible reasons why a task ended. The low-level TaskScheduler is supposed to retry
diff --git a/core/src/main/scala/spark/TaskState.scala b/core/src/main/scala/org/apache/spark/TaskState.scala
index 9df7d8277b..19ce8369d9 100644
--- a/core/src/main/scala/spark/TaskState.scala
+++ b/core/src/main/scala/org/apache/spark/TaskState.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark
 
 import org.apache.mesos.Protos.{TaskState => MesosTaskState}
 
@@ -24,9 +24,11 @@ private[spark] object TaskState
 
   val LAUNCHING, RUNNING, FINISHED, FAILED, KILLED, LOST = Value
 
+  val FINISHED_STATES = Set(FINISHED, FAILED, KILLED, LOST)
+
   type TaskState = Value
 
-  def isFinished(state: TaskState) = Seq(FINISHED, FAILED, LOST).contains(state)
+  def isFinished(state: TaskState) = FINISHED_STATES.contains(state)
 
   def toMesos(state: TaskState): MesosTaskState = state match {
     case LAUNCHING => MesosTaskState.TASK_STARTING
diff --git a/core/src/main/scala/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
index 392556f261..5fd1fab580 100644
--- a/core/src/main/scala/spark/api/java/JavaDoubleRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
@@ -15,16 +15,16 @@
  * limitations under the License.
  */
 
-package spark.api.java
-
-import spark.RDD
-import spark.SparkContext.doubleRDDToDoubleRDDFunctions
-import spark.api.java.function.{Function => JFunction}
-import spark.util.StatCounter
-import spark.partial.{BoundedDouble, PartialResult}
-import spark.storage.StorageLevel
+package org.apache.spark.api.java
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext.doubleRDDToDoubleRDDFunctions
+import org.apache.spark.api.java.function.{Function => JFunction}
+import org.apache.spark.util.StatCounter
+import org.apache.spark.partial.{BoundedDouble, PartialResult}
+import org.apache.spark.storage.StorageLevel
 import java.lang.Double
-import spark.Partitioner
+import org.apache.spark.Partitioner
 
 class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[Double, JavaDoubleRDD] {
 
@@ -115,33 +115,48 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[Double, Jav
 
   // Double RDD functions
 
-  /** Return the sum of the elements in this RDD. */
+  /** Add up the elements in this RDD. */
   def sum(): Double = srdd.sum()
 
-  /** Return a [[spark.StatCounter]] describing the elements in this RDD. */
+  /**
+   * Return a [[org.apache.spark.util.StatCounter]] object that captures the mean, variance and count
+   * of the RDD's elements in one operation.
+   */
   def stats(): StatCounter = srdd.stats()
 
-  /** Return the mean of the elements in this RDD. */
+  /** Compute the mean of this RDD's elements. */
   def mean(): Double = srdd.mean()
 
-  /** Return the variance of the elements in this RDD. */
+  /** Compute the variance of this RDD's elements. */
   def variance(): Double = srdd.variance()
 
-  /** Return the standard deviation of the elements in this RDD. */
+  /** Compute the standard deviation of this RDD's elements. */
   def stdev(): Double = srdd.stdev()
 
+  /**
+   * Compute the sample standard deviation of this RDD's elements (which corrects for bias in
+   * estimating the standard deviation by dividing by N-1 instead of N).
+   */
+  def sampleStdev(): Double = srdd.sampleStdev()
+
+  /**
+   * Compute the sample variance of this RDD's elements (which corrects for bias in
+   * estimating the standard variance by dividing by N-1 instead of N).
+   */
+  def sampleVariance(): Double = srdd.sampleVariance()
+
   /** Return the approximate mean of the elements in this RDD. */
   def meanApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble] =
     srdd.meanApprox(timeout, confidence)
 
-  /** Return the approximate mean of the elements in this RDD. */
+  /** (Experimental) Approximate operation to return the mean within a timeout. */
   def meanApprox(timeout: Long): PartialResult[BoundedDouble] = srdd.meanApprox(timeout)
 
-  /** Return the approximate sum of the elements in this RDD. */
+  /** (Experimental) Approximate operation to return the sum within a timeout. */
   def sumApprox(timeout: Long, confidence: Double): PartialResult[BoundedDouble] =
     srdd.sumApprox(timeout, confidence)
- 
-  /** Return the approximate sum of the elements in this RDD. */
+
+  /** (Experimental) Approximate operation to return the sum within a timeout. */
   def sumApprox(timeout: Long): PartialResult[BoundedDouble] = srdd.sumApprox(timeout)
 }
 
diff --git a/core/src/main/scala/spark/api/java/JavaPairRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
index ccc511dc5f..a6518abf45 100644
--- a/core/src/main/scala/spark/api/java/JavaPairRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java
+package org.apache.spark.api.java
 
 import java.util.{List => JList}
 import java.util.Comparator
@@ -23,23 +23,25 @@ import java.util.Comparator
 import scala.Tuple2
 import scala.collection.JavaConversions._
 
+import com.google.common.base.Optional
 import org.apache.hadoop.io.compress.CompressionCodec
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapred.OutputFormat
 import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
 import org.apache.hadoop.conf.Configuration
 
-import spark.api.java.function.{Function2 => JFunction2}
-import spark.api.java.function.{Function => JFunction}
-import spark.partial.BoundedDouble
-import spark.partial.PartialResult
-import spark.OrderedRDDFunctions
-import spark.storage.StorageLevel
-import spark.HashPartitioner
-import spark.Partitioner
-import spark.Partitioner._
-import spark.RDD
-import spark.SparkContext.rddToPairRDDFunctions
+import org.apache.spark.HashPartitioner
+import org.apache.spark.Partitioner
+import org.apache.spark.Partitioner._
+import org.apache.spark.SparkContext.rddToPairRDDFunctions
+import org.apache.spark.api.java.function.{Function2 => JFunction2}
+import org.apache.spark.api.java.function.{Function => JFunction}
+import org.apache.spark.partial.BoundedDouble
+import org.apache.spark.partial.PartialResult
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.OrderedRDDFunctions
+import org.apache.spark.storage.StorageLevel
+
 
 class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManifest[K],
   implicit val vManifest: ClassManifest[V]) extends JavaRDDLike[(K, V), JavaPairRDD[K, V]] {
@@ -252,11 +254,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
     fromRDD(rdd.subtract(other, p))
 
   /**
-   * Return a copy of the RDD partitioned using the specified partitioner. If `mapSideCombine`
-   * is true, Spark will group values of the same key together on the map side before the
-   * repartitioning, to only send each key over the network once. If a large number of
-   * duplicated keys are expected, and the size of the keys are large, `mapSideCombine` should
-   * be set to true.
+   * Return a copy of the RDD partitioned using the specified partitioner.
    */
   def partitionBy(partitioner: Partitioner): JavaPairRDD[K, V] =
     fromRDD(rdd.partitionBy(partitioner))
@@ -276,8 +274,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
    * partition the output RDD.
    */
   def leftOuterJoin[W](other: JavaPairRDD[K, W], partitioner: Partitioner)
-  : JavaPairRDD[K, (V, Option[W])] =
-    fromRDD(rdd.leftOuterJoin(other, partitioner))
+  : JavaPairRDD[K, (V, Optional[W])] = {
+    val joinResult = rdd.leftOuterJoin(other, partitioner)
+    fromRDD(joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))})
+  }
 
   /**
    * Perform a right outer join of `this` and `other`. For each element (k, w) in `other`, the
@@ -286,8 +286,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
    * partition the output RDD.
    */
   def rightOuterJoin[W](other: JavaPairRDD[K, W], partitioner: Partitioner)
-  : JavaPairRDD[K, (Option[V], W)] =
-    fromRDD(rdd.rightOuterJoin(other, partitioner))
+  : JavaPairRDD[K, (Optional[V], W)] = {
+    val joinResult = rdd.rightOuterJoin(other, partitioner)
+    fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)})
+  }
 
   /** 
    * Simplified version of combineByKey that hash-partitions the resulting RDD using the existing
@@ -340,8 +342,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
    * pair (k, (v, None)) if no elements in `other` have key k. Hash-partitions the output
    * using the existing partitioner/parallelism level.
    */
-  def leftOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (V, Option[W])] =
-    fromRDD(rdd.leftOuterJoin(other))
+  def leftOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (V, Optional[W])] = {
+    val joinResult = rdd.leftOuterJoin(other)
+    fromRDD(joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))})
+  }
 
   /**
    * Perform a left outer join of `this` and `other`. For each element (k, v) in `this`, the
@@ -349,8 +353,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
    * pair (k, (v, None)) if no elements in `other` have key k. Hash-partitions the output
    * into `numPartitions` partitions.
    */
-  def leftOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, (V, Option[W])] =
-    fromRDD(rdd.leftOuterJoin(other, numPartitions))
+  def leftOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, (V, Optional[W])] = {
+    val joinResult = rdd.leftOuterJoin(other, numPartitions)
+    fromRDD(joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))})
+  }
 
   /**
    * Perform a right outer join of `this` and `other`. For each element (k, w) in `other`, the
@@ -358,8 +364,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
    * pair (k, (None, w)) if no elements in `this` have key k. Hash-partitions the resulting
    * RDD using the existing partitioner/parallelism level.
    */
-  def rightOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (Option[V], W)] =
-    fromRDD(rdd.rightOuterJoin(other))
+  def rightOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (Optional[V], W)] = {
+    val joinResult = rdd.rightOuterJoin(other)
+    fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)})
+  }
 
   /**
    * Perform a right outer join of `this` and `other`. For each element (k, w) in `other`, the
@@ -367,8 +375,10 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
    * pair (k, (None, w)) if no elements in `this` have key k. Hash-partitions the resulting
    * RDD into the given number of partitions.
    */
-  def rightOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, (Option[V], W)] =
-    fromRDD(rdd.rightOuterJoin(other, numPartitions))
+  def rightOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int): JavaPairRDD[K, (Optional[V], W)] = {
+    val joinResult = rdd.rightOuterJoin(other, numPartitions)
+    fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)})
+  }
 
   /**
    * Return the key-value pairs in this RDD to the master as a Map.
@@ -554,7 +564,7 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])(implicit val kManifest: ClassManif
       override def compare(b: K) = comp.compare(a, b)
     }
     implicit def toOrdered(x: K): Ordered[K] = new KeyOrdering(x)
-    fromRDD(new OrderedRDDFunctions(rdd).sortByKey(ascending))
+    fromRDD(new OrderedRDDFunctions[K, V, (K, V)](rdd).sortByKey(ascending))
   }
 
   /**
diff --git a/core/src/main/scala/spark/api/java/JavaRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
index c0bf2cf568..eec58abdd6 100644
--- a/core/src/main/scala/spark/api/java/JavaRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
@@ -15,11 +15,12 @@
  * limitations under the License.
  */
 
-package spark.api.java
+package org.apache.spark.api.java
 
-import spark._
-import spark.api.java.function.{Function => JFunction}
-import spark.storage.StorageLevel
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.api.java.function.{Function => JFunction}
+import org.apache.spark.storage.StorageLevel
 
 class JavaRDD[T](val rdd: RDD[T])(implicit val classManifest: ClassManifest[T]) extends
 JavaRDDLike[T, JavaRDD[T]] {
diff --git a/core/src/main/scala/spark/api/java/JavaRDDLike.scala b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
index 21b5abf053..7e6e691f11 100644
--- a/core/src/main/scala/spark/api/java/JavaRDDLike.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala
@@ -15,19 +15,21 @@
  * limitations under the License.
  */
 
-package spark.api.java
+package org.apache.spark.api.java
 
 import java.util.{List => JList, Comparator}
 import scala.Tuple2
 import scala.collection.JavaConversions._
 
-import org.apache.hadoop.io.compress.CompressionCodec
-import spark.{SparkContext, Partition, RDD, TaskContext}
-import spark.api.java.JavaPairRDD._
-import spark.api.java.function.{Function2 => JFunction2, Function => JFunction, _}
-import spark.partial.{PartialResult, BoundedDouble}
-import spark.storage.StorageLevel
 import com.google.common.base.Optional
+import org.apache.hadoop.io.compress.CompressionCodec
+
+import org.apache.spark.{SparkContext, Partition, TaskContext}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.api.java.JavaPairRDD._
+import org.apache.spark.api.java.function.{Function2 => JFunction2, Function => JFunction, _}
+import org.apache.spark.partial.{PartialResult, BoundedDouble}
+import org.apache.spark.storage.StorageLevel
 
 
 trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
@@ -40,7 +42,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
   /** Set of partitions in this RDD. */
   def splits: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq)
 
-  /** The [[spark.SparkContext]] that this RDD was created on. */
+  /** The [[org.apache.spark.SparkContext]] that this RDD was created on. */
   def context: SparkContext = rdd.context
 
   /** A unique ID for this RDD (within its SparkContext). */
@@ -207,12 +209,12 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * of elements in each partition.
    */
   def zipPartitions[U, V](
-      f: FlatMapFunction2[java.util.Iterator[T], java.util.Iterator[U], V],
-      other: JavaRDDLike[U, _]): JavaRDD[V] = {
+      other: JavaRDDLike[U, _],
+      f: FlatMapFunction2[java.util.Iterator[T], java.util.Iterator[U], V]): JavaRDD[V] = {
     def fn = (x: Iterator[T], y: Iterator[U]) => asScalaIterator(
       f.apply(asJavaIterator(x), asJavaIterator(y)).iterator())
     JavaRDD.fromRDD(
-      rdd.zipPartitions(fn, other.rdd)(other.classManifest, f.elementType()))(f.elementType())
+      rdd.zipPartitions(other.rdd)(fn)(other.classManifest, f.elementType()))(f.elementType())
   }
 
   // Actions (launch a job to return a value to the user program)
@@ -366,10 +368,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
    * Gets the name of the file to which this RDD was checkpointed
    */
   def getCheckpointFile(): Optional[String] = {
-    rdd.getCheckpointFile match {
-      case Some(file) => Optional.of(file)
-      case _ => Optional.absent()
-    }
+    JavaUtils.optionToOptional(rdd.getCheckpointFile)
   }
 
   /** A description of this RDD and its recursive dependencies for debugging. */
diff --git a/core/src/main/scala/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index fe182e7ab6..8869e072bf 100644
--- a/core/src/main/scala/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java
+package org.apache.spark.api.java
 
 import java.util.{Map => JMap}
 
@@ -26,14 +26,16 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.mapred.InputFormat
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat}
+import com.google.common.base.Optional
 
-import spark.{Accumulable, AccumulableParam, Accumulator, AccumulatorParam, RDD, SparkContext}
-import spark.SparkContext.IntAccumulatorParam
-import spark.SparkContext.DoubleAccumulatorParam
-import spark.broadcast.Broadcast
+import org.apache.spark.{Accumulable, AccumulableParam, Accumulator, AccumulatorParam, SparkContext}
+import org.apache.spark.SparkContext.IntAccumulatorParam
+import org.apache.spark.SparkContext.DoubleAccumulatorParam
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.rdd.RDD
 
 /**
- * A Java-friendly version of [[spark.SparkContext]] that returns [[spark.api.java.JavaRDD]]s and
+ * A Java-friendly version of [[org.apache.spark.SparkContext]] that returns [[org.apache.spark.api.java.JavaRDD]]s and
  * works with Java collections instead of Scala ones.
  */
 class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWorkaround {
@@ -281,48 +283,48 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
   }
 
   /**
-   * Create an [[spark.Accumulator]] integer variable, which tasks can "add" values
+   * Create an [[org.apache.spark.Accumulator]] integer variable, which tasks can "add" values
    * to using the `add` method. Only the master can access the accumulator's `value`.
    */
   def intAccumulator(initialValue: Int): Accumulator[java.lang.Integer] =
     sc.accumulator(initialValue)(IntAccumulatorParam).asInstanceOf[Accumulator[java.lang.Integer]]
 
   /**
-   * Create an [[spark.Accumulator]] double variable, which tasks can "add" values
+   * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values
    * to using the `add` method. Only the master can access the accumulator's `value`.
    */
   def doubleAccumulator(initialValue: Double): Accumulator[java.lang.Double] =
     sc.accumulator(initialValue)(DoubleAccumulatorParam).asInstanceOf[Accumulator[java.lang.Double]]
 
   /**
-   * Create an [[spark.Accumulator]] integer variable, which tasks can "add" values
+   * Create an [[org.apache.spark.Accumulator]] integer variable, which tasks can "add" values
    * to using the `add` method. Only the master can access the accumulator's `value`.
    */
   def accumulator(initialValue: Int): Accumulator[java.lang.Integer] = intAccumulator(initialValue)
 
   /**
-   * Create an [[spark.Accumulator]] double variable, which tasks can "add" values
+   * Create an [[org.apache.spark.Accumulator]] double variable, which tasks can "add" values
    * to using the `add` method. Only the master can access the accumulator's `value`.
    */
   def accumulator(initialValue: Double): Accumulator[java.lang.Double] =
     doubleAccumulator(initialValue)
 
   /**
-   * Create an [[spark.Accumulator]] variable of a given type, which tasks can "add" values
+   * Create an [[org.apache.spark.Accumulator]] variable of a given type, which tasks can "add" values
    * to using the `add` method. Only the master can access the accumulator's `value`.
    */
   def accumulator[T](initialValue: T, accumulatorParam: AccumulatorParam[T]): Accumulator[T] =
     sc.accumulator(initialValue)(accumulatorParam)
 
   /**
-   * Create an [[spark.Accumulable]] shared variable of the given type, to which tasks can
+   * Create an [[org.apache.spark.Accumulable]] shared variable of the given type, to which tasks can
    * "add" values with `add`. Only the master can access the accumuable's `value`.
    */
   def accumulable[T, R](initialValue: T, param: AccumulableParam[T, R]): Accumulable[T, R] =
     sc.accumulable(initialValue)(param)
 
   /**
-   * Broadcast a read-only variable to the cluster, returning a [[spark.Broadcast]] object for
+   * Broadcast a read-only variable to the cluster, returning a [[org.apache.spark.Broadcast]] object for
    * reading it in distributed functions. The variable will be sent to each cluster only once.
    */
   def broadcast[T](value: T): Broadcast[T] = sc.broadcast(value)
@@ -337,7 +339,7 @@ class JavaSparkContext(val sc: SparkContext) extends JavaSparkContextVarargsWork
    * or the spark.home Java property, or the SPARK_HOME environment variable
    * (in that order of preference). If neither of these is set, return None.
    */
-  def getSparkHome(): Option[String] = sc.getSparkHome()
+  def getSparkHome(): Optional[String] = JavaUtils.optionToOptional(sc.getSparkHome())
 
   /**
    * Add a file to be downloaded with this Spark job on every node.
diff --git a/core/src/main/scala/spark/api/java/JavaSparkContextVarargsWorkaround.java b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java
index 42b1de01b1..c9cbce5624 100644
--- a/core/src/main/scala/spark/api/java/JavaSparkContextVarargsWorkaround.java
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContextVarargsWorkaround.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java;
+package org.apache.spark.api.java;
 
 import java.util.Arrays;
 import java.util.ArrayList;
diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
index 07c3ddcc7e..ecbf18849a 100644
--- a/core/src/main/scala/spark/scheduler/cluster/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaUtils.scala
@@ -15,20 +15,14 @@
  * limitations under the License.
  */
 
-package spark.scheduler.cluster
+package org.apache.spark.api.java
 
-import scala.collection.mutable.ArrayBuffer
-import spark.scheduler._
-import spark.TaskState.TaskState
-import java.nio.ByteBuffer
+import com.google.common.base.Optional
 
-private[spark] trait TaskSetManager extends Schedulable {
-  def taskSet: TaskSet
-  def slaveOffer(execId: String, hostPort: String, availableCpus: Double,
-    overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription]
-  def numPendingTasksForHostPort(hostPort: String): Int
-  def numRackLocalPendingTasksForHost(hostPort :String): Int
-  def numPendingTasksForHost(hostPort: String): Int
-  def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer)
-  def error(message: String)
+object JavaUtils {
+  def optionToOptional[T](option: Option[T]): Optional[T] =
+    option match {
+      case Some(value) => Optional.of(value)
+      case None => Optional.absent()
+    }
 }
diff --git a/core/src/main/scala/spark/api/java/StorageLevels.java b/core/src/main/scala/org/apache/spark/api/java/StorageLevels.java
index f385636e83..0744269773 100644
--- a/core/src/main/scala/spark/api/java/StorageLevels.java
+++ b/core/src/main/scala/org/apache/spark/api/java/StorageLevels.java
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.api.java;
+package org.apache.spark.api.java;
 
-import spark.storage.StorageLevel;
+import org.apache.spark.storage.StorageLevel;
 
 /**
  * Expose some commonly useful storage level constants.
diff --git a/core/src/main/scala/spark/api/java/function/DoubleFlatMapFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFlatMapFunction.java
index 8bc88d757f..4830067f7a 100644
--- a/core/src/main/scala/spark/api/java/function/DoubleFlatMapFunction.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFlatMapFunction.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java.function;
+package org.apache.spark.api.java.function;
 
 
 import scala.runtime.AbstractFunction1;
diff --git a/core/src/main/scala/spark/api/java/function/DoubleFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFunction.java
index 1aa1e5dae0..db34cd190a 100644
--- a/core/src/main/scala/spark/api/java/function/DoubleFunction.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/DoubleFunction.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java.function;
+package org.apache.spark.api.java.function;
 
 
 import scala.runtime.AbstractFunction1;
diff --git a/core/src/main/scala/spark/api/java/function/FlatMapFunction.scala b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala
index 9eb0cfe3f9..158539a846 100644
--- a/core/src/main/scala/spark/api/java/function/FlatMapFunction.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java.function
+package org.apache.spark.api.java.function
 
 /**
  * A function that returns zero or more output records from each input record.
diff --git a/core/src/main/scala/spark/api/java/function/FlatMapFunction2.scala b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala
index dda98710c2..5ef6a814f5 100644
--- a/core/src/main/scala/spark/api/java/function/FlatMapFunction2.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/function/FlatMapFunction2.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java.function
+package org.apache.spark.api.java.function
 
 /**
  * A function that takes two inputs and returns zero or more output records.
diff --git a/core/src/main/scala/spark/api/java/function/Function.java b/core/src/main/scala/org/apache/spark/api/java/function/Function.java
index 2a2ea0aacf..b9070cfd83 100644
--- a/core/src/main/scala/spark/api/java/function/Function.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/Function.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java.function;
+package org.apache.spark.api.java.function;
 
 import scala.reflect.ClassManifest;
 import scala.reflect.ClassManifest$;
diff --git a/core/src/main/scala/spark/api/java/function/Function2.java b/core/src/main/scala/org/apache/spark/api/java/function/Function2.java
index 952d31ece4..d4c9154869 100644
--- a/core/src/main/scala/spark/api/java/function/Function2.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/Function2.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java.function;
+package org.apache.spark.api.java.function;
 
 import scala.reflect.ClassManifest;
 import scala.reflect.ClassManifest$;
diff --git a/core/src/main/scala/spark/api/java/function/PairFlatMapFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.java
index 4aad602da3..c0e5544b7d 100644
--- a/core/src/main/scala/spark/api/java/function/PairFlatMapFunction.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/PairFlatMapFunction.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java.function;
+package org.apache.spark.api.java.function;
 
 import scala.Tuple2;
 import scala.reflect.ClassManifest;
diff --git a/core/src/main/scala/spark/api/java/function/PairFunction.java b/core/src/main/scala/org/apache/spark/api/java/function/PairFunction.java
index ccfe64ecf1..40480fe8e8 100644
--- a/core/src/main/scala/spark/api/java/function/PairFunction.java
+++ b/core/src/main/scala/org/apache/spark/api/java/function/PairFunction.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java.function;
+package org.apache.spark.api.java.function;
 
 import scala.Tuple2;
 import scala.reflect.ClassManifest;
diff --git a/core/src/main/scala/spark/api/java/function/VoidFunction.scala b/core/src/main/scala/org/apache/spark/api/java/function/VoidFunction.scala
index f6fc0b0f7d..ea94313a4a 100644
--- a/core/src/main/scala/spark/api/java/function/VoidFunction.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/function/VoidFunction.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java.function
+package org.apache.spark.api.java.function
 
 /**
  * A function with no return value.
diff --git a/core/src/main/scala/spark/api/java/function/WrappedFunction1.scala b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction1.scala
index 1758a38c4e..cfe694f65d 100644
--- a/core/src/main/scala/spark/api/java/function/WrappedFunction1.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction1.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java.function
+package org.apache.spark.api.java.function
 
 import scala.runtime.AbstractFunction1
 
diff --git a/core/src/main/scala/spark/api/java/function/WrappedFunction2.scala b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction2.scala
index b093567d2c..eb9277c6fb 100644
--- a/core/src/main/scala/spark/api/java/function/WrappedFunction2.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/function/WrappedFunction2.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.java.function
+package org.apache.spark.api.java.function
 
 import scala.runtime.AbstractFunction2
 
diff --git a/core/src/main/scala/spark/api/python/PythonPartitioner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonPartitioner.scala
index 31a719fbff..b090c6edf3 100644
--- a/core/src/main/scala/spark/api/python/PythonPartitioner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonPartitioner.scala
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
-package spark.api.python
-
-import spark.Partitioner
+package org.apache.spark.api.python
 
+import org.apache.spark.Partitioner
 import java.util.Arrays
+import org.apache.spark.util.Utils
 
 /**
- * A [[spark.Partitioner]] that performs handling of byte arrays, for use by the Python API.
+ * A [[org.apache.spark.Partitioner]] that performs handling of byte arrays, for use by the Python API.
  *
  * Stores the unique id() of the Python-side partitioning function so that it is incorporated into
  * equality comparisons.  Correctness requires that the id is a unique identifier for the
@@ -35,25 +35,10 @@ private[spark] class PythonPartitioner(
   val pyPartitionFunctionId: Long)
   extends Partitioner {
 
-  override def getPartition(key: Any): Int = {
-    if (key == null) {
-      return 0
-    }
-    else {
-      val hashCode = {
-        if (key.isInstanceOf[Array[Byte]]) {
-          Arrays.hashCode(key.asInstanceOf[Array[Byte]])
-        } else {
-          key.hashCode()
-        }
-      }
-      val mod = hashCode % numPartitions
-      if (mod < 0) {
-        mod + numPartitions
-      } else {
-        mod // Guard against negative hash codes
-      }
-    }
+  override def getPartition(key: Any): Int = key match {
+    case null => 0
+    case key: Array[Byte] => Utils.nonNegativeMod(Arrays.hashCode(key), numPartitions)
+    case _ => Utils.nonNegativeMod(key.hashCode(), numPartitions)
   }
 
   override def equals(other: Any): Boolean = other match {
diff --git a/core/src/main/scala/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index af10822dbd..ccd3833964 100644
--- a/core/src/main/scala/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.api.python
+package org.apache.spark.api.python
 
 import java.io._
 import java.net._
@@ -23,16 +23,19 @@ import java.util.{List => JList, ArrayList => JArrayList, Map => JMap, Collectio
 
 import scala.collection.JavaConversions._
 
-import spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
-import spark.broadcast.Broadcast
-import spark._
-import spark.rdd.PipedRDD
+import org.apache.spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD}
+import org.apache.spark.broadcast.Broadcast
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.PipedRDD
+import org.apache.spark.util.Utils
 
 
 private[spark] class PythonRDD[T: ClassManifest](
     parent: RDD[T],
     command: Seq[String],
     envVars: JMap[String, String],
+    pythonIncludes: JList[String],
     preservePartitoning: Boolean,
     pythonExec: String,
     broadcastVars: JList[Broadcast[Array[Byte]]],
@@ -44,10 +47,11 @@ private[spark] class PythonRDD[T: ClassManifest](
   // Similar to Runtime.exec(), if we are given a single string, split it into words
   // using a standard StringTokenizer (i.e. by spaces)
   def this(parent: RDD[T], command: String, envVars: JMap[String, String],
+      pythonIncludes: JList[String],
       preservePartitoning: Boolean, pythonExec: String,
       broadcastVars: JList[Broadcast[Array[Byte]]],
       accumulator: Accumulator[JList[Array[Byte]]]) =
-    this(parent, PipedRDD.tokenize(command), envVars, preservePartitoning, pythonExec,
+    this(parent, PipedRDD.tokenize(command), envVars, pythonIncludes, preservePartitoning, pythonExec,
       broadcastVars, accumulator)
 
   override def getPartitions = parent.partitions
@@ -63,34 +67,47 @@ private[spark] class PythonRDD[T: ClassManifest](
     // Start a thread to feed the process input from our parent's iterator
     new Thread("stdin writer for " + pythonExec) {
       override def run() {
-        SparkEnv.set(env)
-        val stream = new BufferedOutputStream(worker.getOutputStream, bufferSize)
-        val dataOut = new DataOutputStream(stream)
-        val printOut = new PrintWriter(stream)
-        // Partition index
-        dataOut.writeInt(split.index)
-        // sparkFilesDir
-        PythonRDD.writeAsPickle(SparkFiles.getRootDirectory, dataOut)
-        // Broadcast variables
-        dataOut.writeInt(broadcastVars.length)
-        for (broadcast <- broadcastVars) {
-          dataOut.writeLong(broadcast.id)
-          dataOut.writeInt(broadcast.value.length)
-          dataOut.write(broadcast.value)
-        }
-        dataOut.flush()
-        // Serialized user code
-        for (elem <- command) {
-          printOut.println(elem)
-        }
-        printOut.flush()
-        // Data values
-        for (elem <- parent.iterator(split, context)) {
-          PythonRDD.writeAsPickle(elem, dataOut)
+        try {
+          SparkEnv.set(env)
+          val stream = new BufferedOutputStream(worker.getOutputStream, bufferSize)
+          val dataOut = new DataOutputStream(stream)
+          val printOut = new PrintWriter(stream)
+          // Partition index
+          dataOut.writeInt(split.index)
+          // sparkFilesDir
+          PythonRDD.writeAsPickle(SparkFiles.getRootDirectory, dataOut)
+          // Broadcast variables
+          dataOut.writeInt(broadcastVars.length)
+          for (broadcast <- broadcastVars) {
+            dataOut.writeLong(broadcast.id)
+            dataOut.writeInt(broadcast.value.length)
+            dataOut.write(broadcast.value)
+          }
+          // Python includes (*.zip and *.egg files)
+          dataOut.writeInt(pythonIncludes.length)
+          for (f <- pythonIncludes) {
+            PythonRDD.writeAsPickle(f, dataOut)
+          }
+          dataOut.flush()
+          // Serialized user code
+          for (elem <- command) {
+            printOut.println(elem)
+          }
+          printOut.flush()
+          // Data values
+          for (elem <- parent.iterator(split, context)) {
+            PythonRDD.writeAsPickle(elem, dataOut)
+          }
+          dataOut.flush()
+          printOut.flush()
+          worker.shutdownOutput()
+        } catch {
+          case e: IOException =>
+            // This can happen for legitimate reasons if the Python code stops returning data before we are done
+            // passing elements through, e.g., for take(). Just log a message to say it happened.
+            logInfo("stdin writer to Python finished early")
+            logDebug("stdin writer to Python finished early", e)
         }
-        dataOut.flush()
-        printOut.flush()
-        worker.shutdownOutput()
       }
     }.start()
 
@@ -283,7 +300,7 @@ private object Pickle {
   val APPENDS: Byte = 'e'
 }
 
-private class BytesToString extends spark.api.java.function.Function[Array[Byte], String] {
+private class BytesToString extends org.apache.spark.api.java.function.Function[Array[Byte], String] {
   override def call(arr: Array[Byte]) : String = new String(arr, "UTF-8")
 }
 
@@ -297,7 +314,7 @@ class PythonAccumulatorParam(@transient serverHost: String, serverPort: Int)
   Utils.checkHost(serverHost, "Expected hostname")
 
   val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt
-  
+
   override def zero(value: JList[Array[Byte]]): JList[Array[Byte]] = new JArrayList
 
   override def addInPlace(val1: JList[Array[Byte]], val2: JList[Array[Byte]])
diff --git a/core/src/main/scala/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index 078ad45ce8..08e3f670f5 100644
--- a/core/src/main/scala/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
-package spark.api.python
+package org.apache.spark.api.python
 
-import java.io.{DataInputStream, IOException}
+import java.io.{File, DataInputStream, IOException}
 import java.net.{Socket, SocketException, InetAddress}
 
 import scala.collection.JavaConversions._
 
-import spark._
+import org.apache.spark._
 
 private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String, String])
     extends Logging {
@@ -67,6 +67,8 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
         val pb = new ProcessBuilder(Seq(pythonExec, sparkHome + "/python/pyspark/daemon.py"))
         val workerEnv = pb.environment()
         workerEnv.putAll(envVars)
+        val pythonPath = sparkHome + "/python/" + File.pathSeparator + workerEnv.get("PYTHONPATH")
+        workerEnv.put("PYTHONPATH", pythonPath)
         daemon = pb.start()
 
         // Redirect the stderr to ours
diff --git a/core/src/main/scala/spark/broadcast/BitTorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/BitTorrentBroadcast.scala
index 6f7d385379..93e7815ab5 100644
--- a/core/src/main/scala/spark/broadcast/BitTorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BitTorrentBroadcast.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.broadcast
+package org.apache.spark.broadcast
 
 import java.io._
 import java.net._
@@ -25,8 +25,9 @@ import java.util.concurrent.atomic.AtomicInteger
 import scala.collection.mutable.{ListBuffer, Map, Set}
 import scala.math
 
-import spark._
-import spark.storage.StorageLevel
+import org.apache.spark._
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
 
 private[spark] class BitTorrentBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long)
   extends Broadcast[T](id)
diff --git a/core/src/main/scala/spark/broadcast/Broadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
index aba56a60ca..43c18294c5 100644
--- a/core/src/main/scala/spark/broadcast/Broadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/Broadcast.scala
@@ -15,12 +15,12 @@
  * limitations under the License.
  */
 
-package spark.broadcast
+package org.apache.spark.broadcast
 
 import java.io._
 import java.util.concurrent.atomic.AtomicLong
 
-import spark._
+import org.apache.spark._
 
 abstract class Broadcast[T](private[spark] val id: Long) extends Serializable {
   def value: T
@@ -28,7 +28,7 @@ abstract class Broadcast[T](private[spark] val id: Long) extends Serializable {
   // We cannot have an abstract readObject here due to some weird issues with
   // readObject having to be 'private' in sub-classes.
 
-  override def toString = "spark.Broadcast(" + id + ")"
+  override def toString = "Broadcast(" + id + ")"
 }
 
 private[spark] 
@@ -44,7 +44,7 @@ class BroadcastManager(val _isDriver: Boolean) extends Logging with Serializable
     synchronized {
       if (!initialized) {
         val broadcastFactoryClass = System.getProperty(
-          "spark.broadcast.factory", "spark.broadcast.HttpBroadcastFactory")
+          "spark.broadcast.factory", "org.apache.spark.broadcast.HttpBroadcastFactory")
 
         broadcastFactory =
           Class.forName(broadcastFactoryClass).newInstance.asInstanceOf[BroadcastFactory]
diff --git a/core/src/main/scala/spark/broadcast/BroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
index d33d95c7d9..68bff75b90 100644
--- a/core/src/main/scala/spark/broadcast/BroadcastFactory.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.broadcast
+package org.apache.spark.broadcast
 
 /**
  * An interface for all the broadcast implementations in Spark (to allow 
diff --git a/core/src/main/scala/spark/broadcast/HttpBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
index c565876950..9db26ae6de 100644
--- a/core/src/main/scala/spark/broadcast/HttpBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala
@@ -15,23 +15,22 @@
  * limitations under the License.
  */
 
-package spark.broadcast
+package org.apache.spark.broadcast
 
-import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
-
-import java.io._
-import java.net._
-import java.util.UUID
+import java.io.{File, FileOutputStream, ObjectInputStream, OutputStream}
+import java.net.URL
 
 import it.unimi.dsi.fastutil.io.FastBufferedInputStream
 import it.unimi.dsi.fastutil.io.FastBufferedOutputStream
 
-import spark._
-import spark.storage.StorageLevel
-import util.{MetadataCleaner, TimeStampedHashSet}
+import org.apache.spark.{HttpServer, Logging, SparkEnv}
+import org.apache.spark.io.CompressionCodec
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.{Utils, MetadataCleaner, TimeStampedHashSet}
+
 
 private[spark] class HttpBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long)
-extends Broadcast[T](id) with Logging with Serializable {
+  extends Broadcast[T](id) with Logging with Serializable {
   
   def value = value_
 
@@ -85,6 +84,7 @@ private object HttpBroadcast extends Logging {
   private val files = new TimeStampedHashSet[String]
   private val cleaner = new MetadataCleaner("HttpBroadcast", cleanup)
 
+  private lazy val compressionCodec = CompressionCodec.createCodec()
 
   def initialize(isDriver: Boolean) {
     synchronized {
@@ -122,10 +122,12 @@ private object HttpBroadcast extends Logging {
 
   def write(id: Long, value: Any) {
     val file = new File(broadcastDir, "broadcast-" + id)
-    val out: OutputStream = if (compress) {
-      new LZFOutputStream(new FileOutputStream(file)) // Does its own buffering
-    } else {
-      new FastBufferedOutputStream(new FileOutputStream(file), bufferSize)
+    val out: OutputStream = {
+      if (compress) {
+        compressionCodec.compressedOutputStream(new FileOutputStream(file))
+      } else {
+        new FastBufferedOutputStream(new FileOutputStream(file), bufferSize)
+      }
     }
     val ser = SparkEnv.get.serializer.newInstance()
     val serOut = ser.serializeStream(out)
@@ -136,10 +138,12 @@ private object HttpBroadcast extends Logging {
 
   def read[T](id: Long): T = {
     val url = serverUri + "/broadcast-" + id
-    var in = if (compress) {
-      new LZFInputStream(new URL(url).openStream()) // Does its own buffering
-    } else {
-      new FastBufferedInputStream(new URL(url).openStream(), bufferSize)
+    val in = {
+      if (compress) {
+        compressionCodec.compressedInputStream(new URL(url).openStream())
+      } else {
+        new FastBufferedInputStream(new URL(url).openStream(), bufferSize)
+      }
     }
     val ser = SparkEnv.get.serializer.newInstance()
     val serIn = ser.deserializeStream(in)
diff --git a/core/src/main/scala/spark/broadcast/MultiTracker.scala b/core/src/main/scala/org/apache/spark/broadcast/MultiTracker.scala
index 7855d44e9b..21ec94659e 100644
--- a/core/src/main/scala/spark/broadcast/MultiTracker.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/MultiTracker.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.broadcast
+package org.apache.spark.broadcast
 
 import java.io._
 import java.net._
@@ -23,7 +23,8 @@ import java.util.Random
 
 import scala.collection.mutable.Map
 
-import spark._
+import org.apache.spark._
+import org.apache.spark.util.Utils
 
 private object MultiTracker
 extends Logging {
diff --git a/core/src/main/scala/spark/broadcast/SourceInfo.scala b/core/src/main/scala/org/apache/spark/broadcast/SourceInfo.scala
index b17ae63b5c..baa1fd6da4 100644
--- a/core/src/main/scala/spark/broadcast/SourceInfo.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/SourceInfo.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.broadcast
+package org.apache.spark.broadcast
 
 import java.util.BitSet
 
-import spark._
+import org.apache.spark._
 
 /**
  * Used to keep and pass around information of peers involved in a broadcast
diff --git a/core/src/main/scala/spark/broadcast/TreeBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TreeBroadcast.scala
index ea1e9a12c1..80c97ca073 100644
--- a/core/src/main/scala/spark/broadcast/TreeBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TreeBroadcast.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.broadcast
+package org.apache.spark.broadcast
 
 import java.io._
 import java.net._
@@ -24,8 +24,9 @@ import java.util.{Comparator, Random, UUID}
 import scala.collection.mutable.{ListBuffer, Map, Set}
 import scala.math
 
-import spark._
-import spark.storage.StorageLevel
+import org.apache.spark._
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
 
 private[spark] class TreeBroadcast[T](@transient var value_ : T, isLocal: Boolean, id: Long)
 extends Broadcast[T](id) with Logging with Serializable {
diff --git a/core/src/main/scala/spark/deploy/ApplicationDescription.scala b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
index a8b22fbef8..19d393a0db 100644
--- a/core/src/main/scala/spark/deploy/ApplicationDescription.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.deploy
+package org.apache.spark.deploy
 
 private[spark] class ApplicationDescription(
     val name: String,
diff --git a/core/src/main/scala/spark/deploy/Command.scala b/core/src/main/scala/org/apache/spark/deploy/Command.scala
index bad629e965..fa8af9a646 100644
--- a/core/src/main/scala/spark/deploy/Command.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/Command.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.deploy
+package org.apache.spark.deploy
 
 import scala.collection.Map
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
new file mode 100644
index 0000000000..c31619db27
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy
+
+import scala.collection.immutable.List
+
+import org.apache.spark.deploy.ExecutorState.ExecutorState
+import org.apache.spark.deploy.master.{WorkerInfo, ApplicationInfo}
+import org.apache.spark.deploy.worker.ExecutorRunner
+import org.apache.spark.util.Utils
+
+
+private[deploy] sealed trait DeployMessage extends Serializable
+
+private[deploy] object DeployMessages {
+
+  // Worker to Master
+
+  case class RegisterWorker(
+      id: String,
+      host: String,
+      port: Int,
+      cores: Int,
+      memory: Int,
+      webUiPort: Int,
+      publicAddress: String)
+    extends DeployMessage {
+    Utils.checkHost(host, "Required hostname")
+    assert (port > 0)
+  }
+
+  case class ExecutorStateChanged(
+      appId: String,
+      execId: Int,
+      state: ExecutorState,
+      message: Option[String],
+      exitStatus: Option[Int])
+    extends DeployMessage
+
+  case class Heartbeat(workerId: String) extends DeployMessage
+
+  // Master to Worker
+
+  case class RegisteredWorker(masterWebUiUrl: String) extends DeployMessage
+
+  case class RegisterWorkerFailed(message: String) extends DeployMessage
+
+  case class KillExecutor(appId: String, execId: Int) extends DeployMessage
+
+  case class LaunchExecutor(
+      appId: String,
+      execId: Int,
+      appDesc: ApplicationDescription,
+      cores: Int,
+      memory: Int,
+      sparkHome: String)
+    extends DeployMessage
+
+  // Client to Master
+
+  case class RegisterApplication(appDescription: ApplicationDescription)
+    extends DeployMessage
+
+  // Master to Client
+
+  case class RegisteredApplication(appId: String) extends DeployMessage
+
+  // TODO(matei): replace hostPort with host
+  case class ExecutorAdded(id: Int, workerId: String, hostPort: String, cores: Int, memory: Int) {
+    Utils.checkHostPort(hostPort, "Required hostport")
+  }
+
+  case class ExecutorUpdated(id: Int, state: ExecutorState, message: Option[String],
+    exitStatus: Option[Int])
+
+  case class ApplicationRemoved(message: String)
+
+  // Internal message in Client
+
+  case object StopClient
+
+  // MasterWebUI To Master
+
+  case object RequestMasterState
+
+  // Master to MasterWebUI
+
+  case class MasterStateResponse(host: String, port: Int, workers: Array[WorkerInfo],
+    activeApps: Array[ApplicationInfo], completedApps: Array[ApplicationInfo]) {
+
+    Utils.checkHost(host, "Required hostname")
+    assert (port > 0)
+
+    def uri = "spark://" + host + ":" + port
+  }
+
+  //  WorkerWebUI to Worker
+
+  case object RequestWorkerState
+
+  // Worker to WorkerWebUI
+
+  case class WorkerStateResponse(host: String, port: Int, workerId: String,
+    executors: List[ExecutorRunner], finishedExecutors: List[ExecutorRunner], masterUrl: String,
+    cores: Int, memory: Int, coresUsed: Int, memoryUsed: Int, masterWebUiUrl: String) {
+
+    Utils.checkHost(host, "Required hostname")
+    assert (port > 0)
+  }
+
+  // Actor System to Master
+
+  case object CheckForWorkerTimeOut
+
+}
diff --git a/core/src/main/scala/spark/deploy/ExecutorState.scala b/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
index 08c9a3b725..fcfea96ad6 100644
--- a/core/src/main/scala/spark/deploy/ExecutorState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExecutorState.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.deploy
+package org.apache.spark.deploy
 
 private[spark] object ExecutorState
   extends Enumeration("LAUNCHING", "LOADING", "RUNNING", "KILLED", "FAILED", "LOST") {
diff --git a/core/src/main/scala/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
index 64f89623e1..a6be8efef1 100644
--- a/core/src/main/scala/spark/deploy/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala
@@ -15,11 +15,14 @@
  * limitations under the License.
  */
 
-package spark.deploy
+package org.apache.spark.deploy
 
-import master.{ApplicationInfo, WorkerInfo}
 import net.liftweb.json.JsonDSL._
-import worker.ExecutorRunner
+
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, WorkerStateResponse}
+import org.apache.spark.deploy.master.{ApplicationInfo, WorkerInfo}
+import org.apache.spark.deploy.worker.ExecutorRunner
+
 
 private[spark] object JsonProtocol {
  def writeWorkerInfo(obj: WorkerInfo) = {
@@ -30,7 +33,8 @@ private[spark] object JsonProtocol {
    ("cores" -> obj.cores) ~
    ("coresused" -> obj.coresUsed) ~
    ("memory" -> obj.memory) ~
-   ("memoryused" -> obj.memoryUsed)
+   ("memoryused" -> obj.memoryUsed) ~
+   ("state" -> obj.state.toString)
  }
 
   def writeApplicationInfo(obj: ApplicationInfo) = {
@@ -57,7 +61,7 @@ private[spark] object JsonProtocol {
     ("appdesc" -> writeApplicationDescription(obj.appDesc))
   }
 
-  def writeMasterState(obj: MasterState) = {
+  def writeMasterState(obj: MasterStateResponse) = {
     ("url" -> ("spark://" + obj.uri)) ~
     ("workers" -> obj.workers.toList.map(writeWorkerInfo)) ~
     ("cores" -> obj.workers.map(_.cores).sum) ~
@@ -68,7 +72,7 @@ private[spark] object JsonProtocol {
     ("completedapps" -> obj.completedApps.toList.map(writeApplicationInfo))
   }
 
-  def writeWorkerState(obj: WorkerState) = {
+  def writeWorkerState(obj: WorkerStateResponse) = {
     ("id" -> obj.workerId) ~
     ("masterurl" -> obj.masterUrl) ~
     ("masterwebuiurl" -> obj.masterWebUiUrl) ~
diff --git a/core/src/main/scala/spark/deploy/LocalSparkCluster.scala b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
index 6b8e9f27af..78e3747ad8 100644
--- a/core/src/main/scala/spark/deploy/LocalSparkCluster.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/LocalSparkCluster.scala
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
-package spark.deploy
+package org.apache.spark.deploy
 
 import akka.actor.{ActorRef, Props, Actor, ActorSystem, Terminated}
 
-import spark.deploy.worker.Worker
-import spark.deploy.master.Master
-import spark.util.AkkaUtils
-import spark.{Logging, Utils}
+import org.apache.spark.deploy.worker.Worker
+import org.apache.spark.deploy.master.Master
+import org.apache.spark.util.{Utils, AkkaUtils}
+import org.apache.spark.{Logging}
 
 import scala.collection.mutable.ArrayBuffer
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
new file mode 100644
index 0000000000..0a5f4c368f
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.mapred.JobConf
+
+
+/**
+ * Contains util methods to interact with Hadoop from spark.
+ */
+class SparkHadoopUtil {
+
+  // Return an appropriate (subclass) of Configuration. Creating config can initializes some hadoop subsystems
+  def newConfiguration(): Configuration = new Configuration()
+
+  // add any user credentials to the job conf which are necessary for running on a secure Hadoop cluster
+  def addCredentials(conf: JobConf) {}
+
+  def isYarnMode(): Boolean = { false }
+
+}
diff --git a/core/src/main/scala/spark/deploy/WebUI.scala b/core/src/main/scala/org/apache/spark/deploy/WebUI.scala
index 8ea7792ef4..ae258b58b9 100644
--- a/core/src/main/scala/spark/deploy/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/WebUI.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.deploy
+package org.apache.spark.deploy
 
 import java.text.SimpleDateFormat
 import java.util.Date
diff --git a/core/src/main/scala/spark/deploy/client/Client.scala b/core/src/main/scala/org/apache/spark/deploy/client/Client.scala
index edefa0292d..a342dd724a 100644
--- a/core/src/main/scala/spark/deploy/client/Client.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/Client.scala
@@ -15,23 +15,25 @@
  * limitations under the License.
  */
 
-package spark.deploy.client
+package org.apache.spark.deploy.client
+
+import java.util.concurrent.TimeoutException
 
-import spark.deploy._
 import akka.actor._
+import akka.actor.Terminated
 import akka.pattern.ask
 import akka.util.Duration
-import akka.util.duration._
-import akka.pattern.AskTimeoutException
-import spark.{SparkException, Logging}
+import akka.remote.RemoteClientDisconnected
 import akka.remote.RemoteClientLifeCycleEvent
 import akka.remote.RemoteClientShutdown
-import spark.deploy.RegisterApplication
-import spark.deploy.master.Master
-import akka.remote.RemoteClientDisconnected
-import akka.actor.Terminated
 import akka.dispatch.Await
 
+import org.apache.spark.Logging
+import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
+import org.apache.spark.deploy.DeployMessages._
+import org.apache.spark.deploy.master.Master
+
+
 /**
  * The main class used to talk to a Spark deploy cluster. Takes a master URL, an app description,
  * and a listener for cluster events, and calls back the listener when various events occur.
@@ -134,7 +136,8 @@ private[spark] class Client(
         val future = actor.ask(StopClient)(timeout)
         Await.result(future, timeout)
       } catch {
-        case e: AskTimeoutException =>  // Ignore it, maybe master went away
+        case e: TimeoutException =>
+          logInfo("Stop request to Master timed out; it may already be shut down.")
       }
       actor = null
     }
diff --git a/core/src/main/scala/spark/deploy/client/ClientListener.scala b/core/src/main/scala/org/apache/spark/deploy/client/ClientListener.scala
index 064024455e..4605368c11 100644
--- a/core/src/main/scala/spark/deploy/client/ClientListener.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/ClientListener.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.deploy.client
+package org.apache.spark.deploy.client
 
 /**
  * Callbacks invoked by deploy client when various events happen. There are currently four events:
diff --git a/core/src/main/scala/spark/deploy/client/TestClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
index 4f4daa141a..d5e9a0e095 100644
--- a/core/src/main/scala/spark/deploy/client/TestClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/TestClient.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.deploy.client
+package org.apache.spark.deploy.client
 
-import spark.util.AkkaUtils
-import spark.{Logging, Utils}
-import spark.deploy.{Command, ApplicationDescription}
+import org.apache.spark.util.{Utils, AkkaUtils}
+import org.apache.spark.{Logging}
+import org.apache.spark.deploy.{Command, ApplicationDescription}
 
 private[spark] object TestClient {
 
diff --git a/core/src/main/scala/spark/deploy/client/TestExecutor.scala b/core/src/main/scala/org/apache/spark/deploy/client/TestExecutor.scala
index 8a22b6b89f..c5ac45c673 100644
--- a/core/src/main/scala/spark/deploy/client/TestExecutor.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/TestExecutor.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.deploy.client
+package org.apache.spark.deploy.client
 
 private[spark] object TestExecutor {
   def main(args: Array[String]) {
diff --git a/core/src/main/scala/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
index 15ff919738..bd5327627a 100644
--- a/core/src/main/scala/spark/deploy/master/ApplicationInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.deploy.master
+package org.apache.spark.deploy.master
 
-import spark.deploy.ApplicationDescription
+import org.apache.spark.deploy.ApplicationDescription
 import java.util.Date
 import akka.actor.ActorRef
 import scala.collection.mutable
@@ -34,6 +34,7 @@ private[spark] class ApplicationInfo(
   var executors = new mutable.HashMap[Int, ExecutorInfo]
   var coresGranted = 0
   var endTime = -1L
+  val appSource = new ApplicationSource(this)
 
   private var nextExecutorId = 0
 
@@ -51,8 +52,10 @@ private[spark] class ApplicationInfo(
   }
 
   def removeExecutor(exec: ExecutorInfo) {
-    executors -= exec.id
-    coresGranted -= exec.cores
+    if (executors.contains(exec.id)) {
+      executors -= exec.id
+      coresGranted -= exec.cores
+    }
   }
 
   def coresLeft: Int = desc.maxCores - coresGranted
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala
new file mode 100644
index 0000000000..2d75ad5a2c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationSource.scala
@@ -0,0 +1,24 @@
+package org.apache.spark.deploy.master
+
+import com.codahale.metrics.{Gauge, MetricRegistry}
+
+import org.apache.spark.metrics.source.Source
+
+class ApplicationSource(val application: ApplicationInfo) extends Source {
+  val metricRegistry = new MetricRegistry()
+  val sourceName = "%s.%s.%s".format("application", application.desc.name,
+    System.currentTimeMillis())
+
+  metricRegistry.register(MetricRegistry.name("status"), new Gauge[String] {
+    override def getValue: String = application.state.toString
+  })
+
+  metricRegistry.register(MetricRegistry.name("runtime_ms"), new Gauge[Long] {
+    override def getValue: Long = application.duration
+  })
+
+  metricRegistry.register(MetricRegistry.name("cores", "number"), new Gauge[Int] {
+    override def getValue: Int = application.coresGranted
+  })
+
+}
diff --git a/core/src/main/scala/spark/deploy/master/ApplicationState.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala
index 94f0ad8bae..7e804223cf 100644
--- a/core/src/main/scala/spark/deploy/master/ApplicationState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationState.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.deploy.master
+package org.apache.spark.deploy.master
 
 private[spark] object ApplicationState
   extends Enumeration("WAITING", "RUNNING", "FINISHED", "FAILED") {
diff --git a/core/src/main/scala/spark/deploy/master/ExecutorInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorInfo.scala
index 99b60f7d09..cf384a985e 100644
--- a/core/src/main/scala/spark/deploy/master/ExecutorInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorInfo.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.deploy.master
+package org.apache.spark.deploy.master
 
-import spark.deploy.ExecutorState
+import org.apache.spark.deploy.ExecutorState
 
 private[spark] class ExecutorInfo(
     val id: Int,
diff --git a/core/src/main/scala/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index e5a7a87e2e..7cf0a7754f 100644
--- a/core/src/main/scala/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -15,28 +15,32 @@
  * limitations under the License.
  */
 
-package spark.deploy.master
-
-import akka.actor._
-import akka.actor.Terminated
-import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientDisconnected, RemoteClientShutdown}
-import akka.util.duration._
+package org.apache.spark.deploy.master
 
 import java.text.SimpleDateFormat
 import java.util.Date
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 
-import spark.deploy._
-import spark.{Logging, SparkException, Utils}
-import spark.util.AkkaUtils
-import ui.MasterWebUI
+import akka.actor._
+import akka.actor.Terminated
+import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientDisconnected, RemoteClientShutdown}
+import akka.util.duration._
+
+import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
+import org.apache.spark.deploy.DeployMessages._
+import org.apache.spark.deploy.master.ui.MasterWebUI
+import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.util.{Utils, AkkaUtils}
 
 
 private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Actor with Logging {
   val DATE_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss")  // For application IDs
   val WORKER_TIMEOUT = System.getProperty("spark.worker.timeout", "60").toLong * 1000
-
+  val RETAINED_APPLICATIONS = System.getProperty("spark.deploy.retainedApplications", "200").toInt
+  val REAPER_ITERATIONS = System.getProperty("spark.dead.worker.persistence", "15").toInt
+ 
   var nextAppNumber = 0
   val workers = new HashSet[WorkerInfo]
   val idToWorker = new HashMap[String, WorkerInfo]
@@ -53,10 +57,14 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
 
   var firstApp: Option[ApplicationInfo] = None
 
-  val webUi = new MasterWebUI(self)
-
   Utils.checkHost(host, "Expected hostname")
 
+  val masterMetricsSystem = MetricsSystem.createMetricsSystem("master")
+  val applicationMetricsSystem = MetricsSystem.createMetricsSystem("applications")
+  val masterSource = new MasterSource(this)
+
+  val webUi = new MasterWebUI(this, webUiPort)
+
   val masterPublicAddress = {
     val envVar = System.getenv("SPARK_PUBLIC_DNS")
     if (envVar != null) envVar else host
@@ -72,17 +80,23 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
     // Listen for remote client disconnection events, since they don't go through Akka's watch()
     context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
     webUi.start()
-    context.system.scheduler.schedule(0 millis, WORKER_TIMEOUT millis)(timeOutDeadWorkers())
+    context.system.scheduler.schedule(0 millis, WORKER_TIMEOUT millis, self, CheckForWorkerTimeOut)
+
+    masterMetricsSystem.registerSource(masterSource)
+    masterMetricsSystem.start()
+    applicationMetricsSystem.start()
   }
 
   override def postStop() {
     webUi.stop()
+    masterMetricsSystem.stop()
+    applicationMetricsSystem.stop()
   }
 
   override def receive = {
     case RegisterWorker(id, host, workerPort, cores, memory, worker_webUiPort, publicAddress) => {
       logInfo("Registering worker %s:%d with %d cores, %s RAM".format(
-        host, workerPort, cores, Utils.memoryMegabytesToString(memory)))
+        host, workerPort, cores, Utils.megabytesToString(memory)))
       if (idToWorker.contains(id)) {
         sender ! RegisterWorkerFailed("Duplicate worker ID")
       } else {
@@ -160,7 +174,11 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
     }
 
     case RequestMasterState => {
-      sender ! MasterState(host, port, workers.toArray, apps.toArray, completedApps.toArray)
+      sender ! MasterStateResponse(host, port, workers.toArray, apps.toArray, completedApps.toArray)
+    }
+
+    case CheckForWorkerTimeOut => {
+      timeOutDeadWorkers()
     }
   }
 
@@ -225,20 +243,27 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
   def launchExecutor(worker: WorkerInfo, exec: ExecutorInfo, sparkHome: String) {
     logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
     worker.addExecutor(exec)
-    worker.actor ! LaunchExecutor(exec.application.id, exec.id, exec.application.desc, exec.cores, exec.memory, sparkHome)
-    exec.application.driver ! ExecutorAdded(exec.id, worker.id, worker.hostPort, exec.cores, exec.memory)
+    worker.actor ! LaunchExecutor(
+      exec.application.id, exec.id, exec.application.desc, exec.cores, exec.memory, sparkHome)
+    exec.application.driver ! ExecutorAdded(
+      exec.id, worker.id, worker.hostPort, exec.cores, exec.memory)
   }
 
   def addWorker(id: String, host: String, port: Int, cores: Int, memory: Int, webUiPort: Int,
     publicAddress: String): WorkerInfo = {
-    // There may be one or more refs to dead workers on this same node (w/ different ID's), remove them.
-    workers.filter(w => (w.host == host && w.port == port) && (w.state == WorkerState.DEAD)).foreach(workers -= _)
+    // There may be one or more refs to dead workers on this same node (w/ different ID's),
+    // remove them.
+    workers.filter { w =>
+      (w.host == host && w.port == port) && (w.state == WorkerState.DEAD)
+    }.foreach { w =>
+      workers -= w
+    }
     val worker = new WorkerInfo(id, host, port, cores, memory, sender, webUiPort, publicAddress)
     workers += worker
     idToWorker(worker.id) = worker
     actorToWorker(sender) = worker
     addressToWorker(sender.path.address) = worker
-    return worker
+    worker
   }
 
   def removeWorker(worker: WorkerInfo) {
@@ -249,7 +274,8 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
     addressToWorker -= worker.actor.path.address
     for (exec <- worker.executors.values) {
       logInfo("Telling app of lost executor: " + exec.id)
-      exec.application.driver ! ExecutorUpdated(exec.id, ExecutorState.LOST, Some("worker lost"), None)
+      exec.application.driver ! ExecutorUpdated(
+        exec.id, ExecutorState.LOST, Some("worker lost"), None)
       exec.application.removeExecutor(exec)
     }
   }
@@ -258,6 +284,7 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
     val now = System.currentTimeMillis()
     val date = new Date(now)
     val app = new ApplicationInfo(now, newApplicationId(date), desc, date, driver, desc.appUiUrl)
+    applicationMetricsSystem.registerSource(app.appSource)
     apps += app
     idToApp(app.id) = app
     actorToApp(driver) = app
@@ -269,7 +296,7 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
     if (workersAlive.size > 0 && !workersAlive.exists(_.memoryFree >= desc.memoryPerSlave)) {
       logWarning("Could not find any workers with enough memory for " + firstApp.get.id)
     }
-    return app
+    app
   }
 
   def finishApplication(app: ApplicationInfo) {
@@ -283,7 +310,14 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
       idToApp -= app.id
       actorToApp -= app.driver
       addressToApp -= app.driver.path.address
-      completedApps += app   // Remember it in our history
+      if (completedApps.size >= RETAINED_APPLICATIONS) {
+        val toRemove = math.max(RETAINED_APPLICATIONS / 10, 1)
+        completedApps.take(toRemove).foreach( a => {
+          applicationMetricsSystem.removeSource(a.appSource)
+        })
+        completedApps.trimStart(toRemove)
+      }
+      completedApps += app // Remember it in our history
       waitingApps -= app
       for (exec <- app.executors.values) {
         exec.worker.removeExecutor(exec)
@@ -308,12 +342,17 @@ private[spark] class Master(host: String, port: Int, webUiPort: Int) extends Act
   /** Check for, and remove, any timed-out workers */
   def timeOutDeadWorkers() {
     // Copy the workers into an array so we don't modify the hashset while iterating through it
-    val expirationTime = System.currentTimeMillis() - WORKER_TIMEOUT
-    val toRemove = workers.filter(_.lastHeartbeat < expirationTime).toArray
+    val currentTime = System.currentTimeMillis()
+    val toRemove = workers.filter(_.lastHeartbeat < currentTime - WORKER_TIMEOUT).toArray
     for (worker <- toRemove) {
-      logWarning("Removing %s because we got no heartbeat in %d seconds".format(
-        worker.id, WORKER_TIMEOUT))
-      removeWorker(worker)
+      if (worker.state != WorkerState.DEAD) {
+        logWarning("Removing %s because we got no heartbeat in %d seconds".format(
+          worker.id, WORKER_TIMEOUT/1000))
+        removeWorker(worker)
+      } else {
+        if (worker.lastHeartbeat < currentTime - ((REAPER_ITERATIONS + 1) * WORKER_TIMEOUT))
+          workers -= worker // we've seen this DEAD worker in the UI, etc. for long enough; cull it 
+      }
     }
   }
 }
diff --git a/core/src/main/scala/spark/deploy/master/MasterArguments.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
index d0ec3d5ea0..9d89b455fb 100644
--- a/core/src/main/scala/spark/deploy/master/MasterArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
@@ -15,10 +15,9 @@
  * limitations under the License.
  */
 
-package spark.deploy.master
+package org.apache.spark.deploy.master
 
-import spark.util.IntParam
-import spark.Utils
+import org.apache.spark.util.{Utils, IntParam}
 
 /**
  * Command-line parser for the master.
@@ -38,7 +37,10 @@ private[spark] class MasterArguments(args: Array[String]) {
   if (System.getenv("SPARK_MASTER_WEBUI_PORT") != null) {
     webUiPort = System.getenv("SPARK_MASTER_WEBUI_PORT").toInt
   }
-  
+  if (System.getProperty("master.ui.port") != null) {
+    webUiPort = System.getProperty("master.ui.port").toInt
+  }
+
   parse(args.toList)
 
   def parse(args: List[String]): Unit = args match {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
new file mode 100644
index 0000000000..8dd0a42f71
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterSource.scala
@@ -0,0 +1,25 @@
+package org.apache.spark.deploy.master
+
+import com.codahale.metrics.{Gauge, MetricRegistry}
+
+import org.apache.spark.metrics.source.Source
+
+private[spark] class MasterSource(val master: Master) extends Source {
+  val metricRegistry = new MetricRegistry()
+  val sourceName = "master"
+
+  // Gauge for worker numbers in cluster
+  metricRegistry.register(MetricRegistry.name("workers","number"), new Gauge[Int] {
+    override def getValue: Int = master.workers.size
+  })
+
+  // Gauge for application numbers in cluster
+  metricRegistry.register(MetricRegistry.name("apps", "number"), new Gauge[Int] {
+    override def getValue: Int = master.apps.size
+  })
+
+  // Gauge for waiting application numbers in cluster
+  metricRegistry.register(MetricRegistry.name("waitingApps", "number"), new Gauge[Int] {
+    override def getValue: Int = master.waitingApps.size
+  })
+}
diff --git a/core/src/main/scala/spark/deploy/master/WorkerInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
index 4135cfeb28..6219f11f2a 100644
--- a/core/src/main/scala/spark/deploy/master/WorkerInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.deploy.master
+package org.apache.spark.deploy.master
 
 import akka.actor.ActorRef
 import scala.collection.mutable
-import spark.Utils
+import org.apache.spark.util.Utils
 
 private[spark] class WorkerInfo(
   val id: String,
diff --git a/core/src/main/scala/spark/deploy/master/WorkerState.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala
index 3e50b7748d..b5ee6dca79 100644
--- a/core/src/main/scala/spark/deploy/master/WorkerState.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerState.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.deploy.master
+package org.apache.spark.deploy.master
 
 private[spark] object WorkerState extends Enumeration("ALIVE", "DEAD", "DECOMMISSIONED") {
   type WorkerState = Value
diff --git a/core/src/main/scala/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index 32264af393..f4e574d15d 100644
--- a/core/src/main/scala/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -15,7 +15,9 @@
  * limitations under the License.
  */
 
-package spark.deploy.master.ui
+package org.apache.spark.deploy.master.ui
+
+import scala.xml.Node
 
 import akka.dispatch.Await
 import akka.pattern.ask
@@ -25,20 +27,20 @@ import javax.servlet.http.HttpServletRequest
 
 import net.liftweb.json.JsonAST.JValue
 
-import scala.xml.Node
-
-import spark.deploy.{RequestMasterState, JsonProtocol, MasterState}
-import spark.deploy.master.ExecutorInfo
-import spark.ui.UIUtils
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
+import org.apache.spark.deploy.JsonProtocol
+import org.apache.spark.deploy.master.ExecutorInfo
+import org.apache.spark.ui.UIUtils
+import org.apache.spark.util.Utils
 
 private[spark] class ApplicationPage(parent: MasterWebUI) {
-  val master = parent.master
+  val master = parent.masterActorRef
   implicit val timeout = parent.timeout
 
   /** Executor details for a particular application */
   def renderJson(request: HttpServletRequest): JValue = {
     val appId = request.getParameter("appId")
-    val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterState]
+    val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
     val state = Await.result(stateFuture, 30 seconds)
     val app = state.activeApps.find(_.id == appId).getOrElse({
       state.completedApps.find(_.id == appId).getOrElse(null)
@@ -49,7 +51,7 @@ private[spark] class ApplicationPage(parent: MasterWebUI) {
   /** Executor details for a particular application */
   def render(request: HttpServletRequest): Seq[Node] = {
     val appId = request.getParameter("appId")
-    val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterState]
+    val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
     val state = Await.result(stateFuture, 30 seconds)
     val app = state.activeApps.find(_.id == appId).getOrElse({
       state.completedApps.find(_.id == appId).getOrElse(null)
@@ -60,24 +62,26 @@ private[spark] class ApplicationPage(parent: MasterWebUI) {
     val executorTable = UIUtils.listingTable(executorHeaders, executorRow, executors)
 
     val content =
-        <hr />
-        <div class="row">
+        <div class="row-fluid">
           <div class="span12">
             <ul class="unstyled">
               <li><strong>ID:</strong> {app.id}</li>
-              <li><strong>Description:</strong> {app.desc.name}</li>
+              <li><strong>Name:</strong> {app.desc.name}</li>
               <li><strong>User:</strong> {app.desc.user}</li>
               <li><strong>Cores:</strong>
                 {
                 if (app.desc.maxCores == Integer.MAX_VALUE) {
-                  "Unlimited %s granted".format(app.coresGranted)
+                  "Unlimited (%s granted)".format(app.coresGranted)
                 } else {
                   "%s (%s granted, %s left)".format(
                     app.desc.maxCores, app.coresGranted, app.coresLeft)
                 }
                 }
               </li>
-              <li><strong>Memory per Slave:</strong> {app.desc.memoryPerSlave}</li>
+              <li>
+                <strong>Executor Memory:</strong>
+                {Utils.megabytesToString(app.desc.memoryPerSlave)}
+              </li>
               <li><strong>Submit Date:</strong> {app.submitDate}</li>
               <li><strong>State:</strong> {app.state}</li>
               <li><strong><a href={app.appUiUrl}>Application Detail UI</a></strong></li>
@@ -85,16 +89,13 @@ private[spark] class ApplicationPage(parent: MasterWebUI) {
           </div>
         </div>
 
-          <hr/>
-
-        <div class="row"> <!-- Executors -->
+        <div class="row-fluid"> <!-- Executors -->
           <div class="span12">
-            <h3> Executor Summary </h3>
-            <br/>
+            <h4> Executor Summary </h4>
             {executorTable}
           </div>
         </div>;
-    UIUtils.basicSparkPage(content, "Application Info: " + app.desc.name)
+    UIUtils.basicSparkPage(content, "Application: " + app.desc.name)
   }
 
   def executorRow(executor: ExecutorInfo): Seq[Node] = {
diff --git a/core/src/main/scala/spark/deploy/master/ui/IndexPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/IndexPage.scala
index b05197c1b9..d7a57229b0 100644
--- a/core/src/main/scala/spark/deploy/master/ui/IndexPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/IndexPage.scala
@@ -15,35 +15,45 @@
  * limitations under the License.
  */
 
-package spark.deploy.master.ui
+package org.apache.spark.deploy.master.ui
+
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.Node
 
 import akka.dispatch.Await
 import akka.pattern.ask
 import akka.util.duration._
 
-import javax.servlet.http.HttpServletRequest
-
-import scala.xml.Node
+import net.liftweb.json.JsonAST.JValue
 
-import spark.deploy.{RequestMasterState, DeployWebUI, MasterState}
-import spark.Utils
-import spark.ui.UIUtils
-import spark.deploy.master.{ApplicationInfo, WorkerInfo}
+import org.apache.spark.deploy.DeployWebUI
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
+import org.apache.spark.deploy.JsonProtocol
+import org.apache.spark.deploy.master.{ApplicationInfo, WorkerInfo}
+import org.apache.spark.ui.UIUtils
+import org.apache.spark.util.Utils
 
 private[spark] class IndexPage(parent: MasterWebUI) {
-  val master = parent.master
+  val master = parent.masterActorRef
   implicit val timeout = parent.timeout
 
+  def renderJson(request: HttpServletRequest): JValue = {
+    val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
+    val state = Await.result(stateFuture, 30 seconds)
+    JsonProtocol.writeMasterState(state)
+  }
+
   /** Index view listing applications and executors */
   def render(request: HttpServletRequest): Seq[Node] = {
-    val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterState]
+    val stateFuture = (master ? RequestMasterState)(timeout).mapTo[MasterStateResponse]
     val state = Await.result(stateFuture, 30 seconds)
 
     val workerHeaders = Seq("Id", "Address", "State", "Cores", "Memory")
     val workers = state.workers.sortBy(_.id)
     val workerTable = UIUtils.listingTable(workerHeaders, workerRow, workers)
 
-    val appHeaders = Seq("ID", "Description", "Cores", "Memory per Node", "Submit Time", "User",
+    val appHeaders = Seq("ID", "Name", "Cores", "Memory per Node", "Submitted Time", "User",
       "State", "Duration")
     val activeApps = state.activeApps.sortBy(_.startTime).reverse
     val activeAppsTable = UIUtils.listingTable(appHeaders, appRow, activeApps)
@@ -51,8 +61,7 @@ private[spark] class IndexPage(parent: MasterWebUI) {
     val completedAppsTable = UIUtils.listingTable(appHeaders, appRow, completedApps)
 
     val content =
-        <hr />
-        <div class="row">
+        <div class="row-fluid">
           <div class="span12">
             <ul class="unstyled">
               <li><strong>URL:</strong> {state.uri}</li>
@@ -60,8 +69,8 @@ private[spark] class IndexPage(parent: MasterWebUI) {
               <li><strong>Cores:</strong> {state.workers.map(_.cores).sum} Total,
                 {state.workers.map(_.coresUsed).sum} Used</li>
               <li><strong>Memory:</strong>
-                {Utils.memoryMegabytesToString(state.workers.map(_.memory).sum)} Total,
-                {Utils.memoryMegabytesToString(state.workers.map(_.memoryUsed).sum)} Used</li>
+                {Utils.megabytesToString(state.workers.map(_.memory).sum)} Total,
+                {Utils.megabytesToString(state.workers.map(_.memoryUsed).sum)} Used</li>
               <li><strong>Applications:</strong>
                 {state.activeApps.size} Running,
                 {state.completedApps.size} Completed </li>
@@ -69,34 +78,28 @@ private[spark] class IndexPage(parent: MasterWebUI) {
           </div>
         </div>
 
-        <div class="row">
+        <div class="row-fluid">
           <div class="span12">
-            <h3> Workers </h3>
-            <br/>
+            <h4> Workers </h4>
             {workerTable}
           </div>
         </div>
 
-          <hr/>
-
-        <div class="row">
+        <div class="row-fluid">
           <div class="span12">
-            <h3> Running Applications </h3>
-            <br/>
+            <h4> Running Applications </h4>
+
             {activeAppsTable}
           </div>
         </div>
 
-          <hr/>
-
-        <div class="row">
+        <div class="row-fluid">
           <div class="span12">
-            <h3> Completed Applications </h3>
-            <br/>
+            <h4> Completed Applications </h4>
             {completedAppsTable}
           </div>
         </div>;
-    UIUtils.basicSparkPage(content, "Spark Master: " + state.uri)
+    UIUtils.basicSparkPage(content, "Spark Master at " + state.uri)
   }
 
   def workerRow(worker: WorkerInfo): Seq[Node] = {
@@ -108,8 +111,8 @@ private[spark] class IndexPage(parent: MasterWebUI) {
       <td>{worker.state}</td>
       <td>{worker.cores} ({worker.coresUsed} Used)</td>
       <td sorttable_customkey={"%s.%s".format(worker.memory, worker.memoryUsed)}>
-        {Utils.memoryMegabytesToString(worker.memory)}
-        ({Utils.memoryMegabytesToString(worker.memoryUsed)} Used)
+        {Utils.megabytesToString(worker.memory)}
+        ({Utils.megabytesToString(worker.memoryUsed)} Used)
       </td>
     </tr>
   }
@@ -127,7 +130,7 @@ private[spark] class IndexPage(parent: MasterWebUI) {
         {app.coresGranted}
       </td>
       <td sorttable_customkey={app.desc.memoryPerSlave.toString}>
-        {Utils.memoryMegabytesToString(app.desc.memoryPerSlave)}
+        {Utils.megabytesToString(app.desc.memoryPerSlave)}
       </td>
       <td>{DeployWebUI.formatDate(app.submitDate)}</td>
       <td>{app.desc.user}</td>
diff --git a/core/src/main/scala/spark/deploy/master/ui/MasterWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
index 04b32c7968..f4df729e87 100644
--- a/core/src/main/scala/spark/deploy/master/ui/MasterWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterWebUI.scala
@@ -15,29 +15,31 @@
  * limitations under the License.
  */
 
-package spark.deploy.master.ui
+package org.apache.spark.deploy.master.ui
 
-import akka.actor.ActorRef
 import akka.util.Duration
 
 import javax.servlet.http.HttpServletRequest
 
 import org.eclipse.jetty.server.{Handler, Server}
 
-import spark.{Logging, Utils}
-import spark.ui.JettyUtils
-import spark.ui.JettyUtils._
+import org.apache.spark.{Logging}
+import org.apache.spark.deploy.master.Master
+import org.apache.spark.ui.JettyUtils
+import org.apache.spark.ui.JettyUtils._
+import org.apache.spark.util.Utils
 
 /**
  * Web UI server for the standalone master.
  */
 private[spark]
-class MasterWebUI(val master: ActorRef, requestedPort: Option[Int] = None) extends Logging {
+class MasterWebUI(val master: Master, requestedPort: Int) extends Logging {
   implicit val timeout = Duration.create(
     System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds")
   val host = Utils.localHostName()
-  val port = requestedPort.getOrElse(
-    System.getProperty("master.ui.port", MasterWebUI.DEFAULT_PORT).toInt)
+  val port = requestedPort
+
+  val masterActorRef = master.self
 
   var server: Option[Server] = None
   var boundPort: Option[Int] = None
@@ -58,10 +60,14 @@ class MasterWebUI(val master: ActorRef, requestedPort: Option[Int] = None) exten
     }
   }
 
-  val handlers = Array[(String, Handler)](
+  val metricsHandlers = master.masterMetricsSystem.getServletHandlers ++
+    master.applicationMetricsSystem.getServletHandlers
+
+  val handlers = metricsHandlers ++ Array[(String, Handler)](
     ("/static", createStaticHandler(MasterWebUI.STATIC_RESOURCE_DIR)),
     ("/app/json", (request: HttpServletRequest) => applicationPage.renderJson(request)),
     ("/app", (request: HttpServletRequest) => applicationPage.render(request)),
+    ("/json", (request: HttpServletRequest) => indexPage.renderJson(request)),
     ("*", (request: HttpServletRequest) => indexPage.render(request))
   )
 
@@ -71,6 +77,5 @@ class MasterWebUI(val master: ActorRef, requestedPort: Option[Int] = None) exten
 }
 
 private[spark] object MasterWebUI {
-  val STATIC_RESOURCE_DIR = "spark/ui/static"
-  val DEFAULT_PORT = "8080"
+  val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static"
 }
diff --git a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 8b51ff1c3a..e3dc30eefc 100644
--- a/core/src/main/scala/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -15,18 +15,20 @@
  * limitations under the License.
  */
 
-package spark.deploy.worker
+package org.apache.spark.deploy.worker
 
 import java.io._
 import java.lang.System.getenv
-import spark.deploy.{ExecutorState, ExecutorStateChanged, ApplicationDescription}
+
 import akka.actor.ActorRef
-import spark.{Utils, Logging}
-import java.net.{URI, URL}
-import org.apache.hadoop.fs.{Path, FileSystem}
-import org.apache.hadoop.conf.Configuration
-import scala.Some
-import spark.deploy.ExecutorStateChanged
+
+import com.google.common.base.Charsets
+import com.google.common.io.Files
+
+import org.apache.spark.{Logging}
+import org.apache.spark.deploy.{ExecutorState, ApplicationDescription}
+import org.apache.spark.deploy.DeployMessages.ExecutorStateChanged
+import org.apache.spark.util.Utils
 
 /**
  * Manages the execution of one executor process.
@@ -39,18 +41,19 @@ private[spark] class ExecutorRunner(
     val memory: Int,
     val worker: ActorRef,
     val workerId: String,
-    val hostPort: String,
+    val host: String,
     val sparkHome: File,
     val workDir: File)
   extends Logging {
 
-  Utils.checkHostPort(hostPort, "Expected hostport")
-
   val fullId = appId + "/" + execId
   var workerThread: Thread = null
   var process: Process = null
   var shutdownHook: Thread = null
 
+  private def getAppEnv(key: String): Option[String] =
+    appDesc.command.environment.get(key).orElse(Option(getenv(key)))
+
   def start() {
     workerThread = new Thread("ExecutorRunner for " + fullId) {
       override def run() { fetchAndRunExecutor() }
@@ -88,14 +91,14 @@ private[spark] class ExecutorRunner(
   /** Replace variables such as {{EXECUTOR_ID}} and {{CORES}} in a command argument passed to us */
   def substituteVariables(argument: String): String = argument match {
     case "{{EXECUTOR_ID}}" => execId.toString
-    case "{{HOSTNAME}}" => Utils.parseHostPort(hostPort)._1
+    case "{{HOSTNAME}}" => host
     case "{{CORES}}" => cores.toString
     case other => other
   }
 
   def buildCommandSeq(): Seq[String] = {
     val command = appDesc.command
-    val runner = Option(getenv("JAVA_HOME")).map(_ + "/bin/java").getOrElse("java")
+    val runner = getAppEnv("JAVA_HOME").map(_ + "/bin/java").getOrElse("java")
     // SPARK-698: do not call the run.cmd script, as process.destroy()
     // fails to kill a process tree on Windows
     Seq(runner) ++ buildJavaOpts() ++ Seq(command.mainClass) ++
@@ -107,10 +110,11 @@ private[spark] class ExecutorRunner(
    * the way the JAVA_OPTS are assembled there.
    */
   def buildJavaOpts(): Seq[String] = {
-    val libraryOpts = Option(getenv("SPARK_LIBRARY_PATH"))
+    val libraryOpts = getAppEnv("SPARK_LIBRARY_PATH")
       .map(p => List("-Djava.library.path=" + p))
       .getOrElse(Nil)
-    val userOpts = Option(getenv("SPARK_JAVA_OPTS")).map(Utils.splitCommandString).getOrElse(Nil)
+    val workerLocalOpts = Option(getenv("SPARK_JAVA_OPTS")).map(Utils.splitCommandString).getOrElse(Nil)
+    val userOpts = getAppEnv("SPARK_JAVA_OPTS").map(Utils.splitCommandString).getOrElse(Nil)
     val memoryOpts = Seq("-Xms" + memory + "M", "-Xmx" + memory + "M")
 
     // Figure out our classpath with the external compute-classpath script
@@ -119,12 +123,12 @@ private[spark] class ExecutorRunner(
         Seq(sparkHome + "/bin/compute-classpath" + ext),
         extraEnvironment=appDesc.command.environment)
 
-    Seq("-cp", classPath) ++ libraryOpts ++ userOpts ++ memoryOpts
+    Seq("-cp", classPath) ++ libraryOpts ++ workerLocalOpts ++ userOpts ++ memoryOpts
   }
 
   /** Spawn a thread that will redirect a given stream to a file */
   def redirectStream(in: InputStream, file: File) {
-    val out = new FileOutputStream(file)
+    val out = new FileOutputStream(file, true)
     new Thread("redirect output to " + file) {
       override def run() {
         try {
@@ -150,6 +154,7 @@ private[spark] class ExecutorRunner(
 
       // Launch the process
       val command = buildCommandSeq()
+      logInfo("Launch command: " + command.mkString("\"", "\" \"", "\""))
       val builder = new ProcessBuilder(command: _*).directory(executorDir)
       val env = builder.environment()
       for ((key, value) <- appDesc.command.environment) {
@@ -160,9 +165,16 @@ private[spark] class ExecutorRunner(
       env.put("SPARK_LAUNCH_WITH_SCALA", "0")
       process = builder.start()
 
+      val header = "Spark Executor Command: %s\n%s\n\n".format(
+        command.mkString("\"", "\" \"", "\""), "=" * 40)
+
       // Redirect its stdout and stderr to files
-      redirectStream(process.getInputStream, new File(executorDir, "stdout"))
-      redirectStream(process.getErrorStream, new File(executorDir, "stderr"))
+      val stdout = new File(executorDir, "stdout")
+      redirectStream(process.getInputStream, stdout)
+
+      val stderr = new File(executorDir, "stderr")
+      Files.write(header, stderr, Charsets.UTF_8)
+      redirectStream(process.getErrorStream, stderr)
 
       // Wait for it to exit; this is actually a bad thing if it happens, because we expect to run
       // long-lived processes only. However, in the future, we might restart the executor a few
diff --git a/core/src/main/scala/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 0bd88ea253..09530beb3b 100644
--- a/core/src/main/scala/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -15,23 +15,26 @@
  * limitations under the License.
  */
 
-package spark.deploy.worker
+package org.apache.spark.deploy.worker
 
-import scala.collection.mutable.{ArrayBuffer, HashMap}
-import akka.actor.{ActorRef, Props, Actor, ActorSystem, Terminated}
-import akka.util.duration._
-import spark.{Logging, Utils}
-import spark.util.AkkaUtils
-import spark.deploy._
-import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected}
 import java.text.SimpleDateFormat
 import java.util.Date
-import spark.deploy.RegisterWorker
-import spark.deploy.LaunchExecutor
-import spark.deploy.RegisterWorkerFailed
-import spark.deploy.master.Master
 import java.io.File
-import ui.WorkerWebUI
+
+import scala.collection.mutable.HashMap
+
+import akka.actor.{ActorRef, Props, Actor, ActorSystem, Terminated}
+import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected}
+import akka.util.duration._
+
+import org.apache.spark.{Logging}
+import org.apache.spark.deploy.ExecutorState
+import org.apache.spark.deploy.DeployMessages._
+import org.apache.spark.deploy.master.Master
+import org.apache.spark.deploy.worker.ui.WorkerWebUI
+import org.apache.spark.metrics.MetricsSystem
+import org.apache.spark.util.{Utils, AkkaUtils}
+
 
 private[spark] class Worker(
     host: String,
@@ -67,6 +70,9 @@ private[spark] class Worker(
   var coresUsed = 0
   var memoryUsed = 0
 
+  val metricsSystem = MetricsSystem.createMetricsSystem("worker")
+  val workerSource = new WorkerSource(this)
+
   def coresFree: Int = cores - coresUsed
   def memoryFree: Int = memory - memoryUsed
 
@@ -90,13 +96,17 @@ private[spark] class Worker(
 
   override def preStart() {
     logInfo("Starting Spark worker %s:%d with %d cores, %s RAM".format(
-      host, port, cores, Utils.memoryMegabytesToString(memory)))
+      host, port, cores, Utils.megabytesToString(memory)))
     sparkHome = new File(Option(System.getenv("SPARK_HOME")).getOrElse("."))
     logInfo("Spark home: " + sparkHome)
     createWorkDir()
     webUi = new WorkerWebUI(this, workDir, Some(webUiPort))
+
     webUi.start()
     connectToMaster()
+
+    metricsSystem.registerSource(workerSource)
+    metricsSystem.start()
   }
 
   def connectToMaster() {
@@ -122,7 +132,7 @@ private[spark] class Worker(
     case LaunchExecutor(appId, execId, appDesc, cores_, memory_, execSparkHome_) =>
       logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name))
       val manager = new ExecutorRunner(
-        appId, execId, appDesc, cores_, memory_, self, workerId, host + ":" + port, new File(execSparkHome_), workDir)
+        appId, execId, appDesc, cores_, memory_, self, workerId, host, new File(execSparkHome_), workDir)
       executors(appId + "/" + execId) = manager
       manager.start()
       coresUsed += cores_
@@ -155,10 +165,10 @@ private[spark] class Worker(
 
     case Terminated(_) | RemoteClientDisconnected(_, _) | RemoteClientShutdown(_, _) =>
       masterDisconnected()
-      
+
     case RequestWorkerState => {
-      sender ! WorkerState(host, port, workerId, executors.values.toList,
-        finishedExecutors.values.toList, masterUrl, cores, memory, 
+      sender ! WorkerStateResponse(host, port, workerId, executors.values.toList,
+        finishedExecutors.values.toList, masterUrl, cores, memory,
         coresUsed, memoryUsed, masterWebUiUrl)
     }
   }
@@ -178,6 +188,7 @@ private[spark] class Worker(
   override def postStop() {
     executors.values.foreach(_.kill())
     webUi.stop()
+    metricsSystem.stop()
   }
 }
 
diff --git a/core/src/main/scala/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
index 9fcd3260ca..0ae89a864f 100644
--- a/core/src/main/scala/spark/deploy/worker/WorkerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
@@ -15,11 +15,9 @@
  * limitations under the License.
  */
 
-package spark.deploy.worker
+package org.apache.spark.deploy.worker
 
-import spark.util.IntParam
-import spark.util.MemoryParam
-import spark.Utils
+import org.apache.spark.util.{Utils, IntParam, MemoryParam}
 import java.lang.management.ManagementFactory
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala
new file mode 100644
index 0000000000..6427c0178f
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerSource.scala
@@ -0,0 +1,34 @@
+package org.apache.spark.deploy.worker
+
+import com.codahale.metrics.{Gauge, MetricRegistry}
+
+import org.apache.spark.metrics.source.Source
+
+private[spark] class WorkerSource(val worker: Worker) extends Source {
+  val sourceName = "worker"
+  val metricRegistry = new MetricRegistry()
+
+  metricRegistry.register(MetricRegistry.name("executors", "number"), new Gauge[Int] {
+    override def getValue: Int = worker.executors.size
+  })
+
+  // Gauge for cores used of this worker
+  metricRegistry.register(MetricRegistry.name("coresUsed", "number"), new Gauge[Int] {
+    override def getValue: Int = worker.coresUsed
+  })
+
+  // Gauge for memory used of this worker
+  metricRegistry.register(MetricRegistry.name("memUsed", "MBytes"), new Gauge[Int] {
+    override def getValue: Int = worker.memoryUsed
+  })
+
+  // Gauge for cores free of this worker
+  metricRegistry.register(MetricRegistry.name("coresFree", "number"), new Gauge[Int] {
+    override def getValue: Int = worker.coresFree
+  })
+
+  // Gauge for memory free of this worker
+  metricRegistry.register(MetricRegistry.name("memFree", "MBytes"), new Gauge[Int] {
+    override def getValue: Int = worker.memoryFree
+  })
+}
diff --git a/core/src/main/scala/spark/deploy/worker/ui/IndexPage.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/IndexPage.scala
index 7548a26c2e..d2d3617498 100644
--- a/core/src/main/scala/spark/deploy/worker/ui/IndexPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/IndexPage.scala
@@ -15,22 +15,24 @@
  * limitations under the License.
  */
 
-package spark.deploy.worker.ui
+package org.apache.spark.deploy.worker.ui
+
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.Node
 
 import akka.dispatch.Await
 import akka.pattern.ask
 import akka.util.duration._
 
-import javax.servlet.http.HttpServletRequest
-
 import net.liftweb.json.JsonAST.JValue
 
-import scala.xml.Node
+import org.apache.spark.deploy.JsonProtocol
+import org.apache.spark.deploy.DeployMessages.{RequestWorkerState, WorkerStateResponse}
+import org.apache.spark.deploy.worker.ExecutorRunner
+import org.apache.spark.ui.UIUtils
+import org.apache.spark.util.Utils
 
-import spark.deploy.{RequestWorkerState, JsonProtocol, WorkerState}
-import spark.deploy.worker.ExecutorRunner
-import spark.Utils
-import spark.ui.UIUtils
 
 private[spark] class IndexPage(parent: WorkerWebUI) {
   val workerActor = parent.worker.self
@@ -38,13 +40,13 @@ private[spark] class IndexPage(parent: WorkerWebUI) {
   val timeout = parent.timeout
 
   def renderJson(request: HttpServletRequest): JValue = {
-    val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerState]
+    val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerStateResponse]
     val workerState = Await.result(stateFuture, 30 seconds)
     JsonProtocol.writeWorkerState(workerState)
   }
 
   def render(request: HttpServletRequest): Seq[Node] = {
-    val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerState]
+    val stateFuture = (workerActor ? RequestWorkerState)(timeout).mapTo[WorkerStateResponse]
     val workerState = Await.result(stateFuture, 30 seconds)
 
     val executorHeaders = Seq("ExecutorID", "Cores", "Memory", "Job Details", "Logs")
@@ -54,8 +56,7 @@ private[spark] class IndexPage(parent: WorkerWebUI) {
       UIUtils.listingTable(executorHeaders, executorRow, workerState.finishedExecutors)
 
     val content =
-        <hr />
-        <div class="row"> <!-- Worker Details -->
+        <div class="row-fluid"> <!-- Worker Details -->
           <div class="span12">
             <ul class="unstyled">
               <li><strong>ID:</strong> {workerState.workerId}</li>
@@ -63,32 +64,29 @@ private[spark] class IndexPage(parent: WorkerWebUI) {
                 Master URL:</strong> {workerState.masterUrl}
               </li>
               <li><strong>Cores:</strong> {workerState.cores} ({workerState.coresUsed} Used)</li>
-              <li><strong>Memory:</strong> {Utils.memoryMegabytesToString(workerState.memory)}
-                ({Utils.memoryMegabytesToString(workerState.memoryUsed)} Used)</li>
+              <li><strong>Memory:</strong> {Utils.megabytesToString(workerState.memory)}
+                ({Utils.megabytesToString(workerState.memoryUsed)} Used)</li>
             </ul>
             <p><a href={workerState.masterWebUiUrl}>Back to Master</a></p>
           </div>
         </div>
-          <hr/>
 
-        <div class="row"> <!-- Running Executors -->
+        <div class="row-fluid"> <!-- Running Executors -->
           <div class="span12">
-            <h3> Running Executors {workerState.executors.size} </h3>
-            <br/>
+            <h4> Running Executors {workerState.executors.size} </h4>
             {runningExecutorTable}
           </div>
         </div>
-          <hr/>
 
-        <div class="row"> <!-- Finished Executors  -->
+        <div class="row-fluid"> <!-- Finished Executors  -->
           <div class="span12">
-            <h3> Finished Executors </h3>
-            <br/>
+            <h4> Finished Executors </h4>
             {finishedExecutorTable}
           </div>
         </div>;
 
-    UIUtils.basicSparkPage(content, "Spark Worker on %s:%s".format(workerState.host, workerState.port))
+    UIUtils.basicSparkPage(content, "Spark Worker at %s:%s".format(
+      workerState.host, workerState.port))
   }
 
   def executorRow(executor: ExecutorRunner): Seq[Node] = {
@@ -96,7 +94,7 @@ private[spark] class IndexPage(parent: WorkerWebUI) {
       <td>{executor.execId}</td>
       <td>{executor.cores}</td>
       <td sorttable_customkey={executor.memory.toString}>
-        {Utils.memoryMegabytesToString(executor.memory)}
+        {Utils.megabytesToString(executor.memory)}
       </td>
       <td>
         <ul class="unstyled">
diff --git a/core/src/main/scala/spark/deploy/worker/ui/WorkerWebUI.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
index 61d4cd6d99..95d6007f3b 100644
--- a/core/src/main/scala/spark/deploy/worker/ui/WorkerWebUI.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ui/WorkerWebUI.scala
@@ -15,9 +15,8 @@
  * limitations under the License.
  */
 
-package spark.deploy.worker.ui
+package org.apache.spark.deploy.worker.ui
 
-import akka.actor.ActorRef
 import akka.util.{Duration, Timeout}
 
 import java.io.{FileInputStream, File}
@@ -26,18 +25,19 @@ import javax.servlet.http.HttpServletRequest
 
 import org.eclipse.jetty.server.{Handler, Server}
 
-import spark.deploy.worker.Worker
-import spark.{Utils, Logging}
-import spark.ui.JettyUtils
-import spark.ui.JettyUtils._
-import spark.ui.UIUtils
+import org.apache.spark.deploy.worker.Worker
+import org.apache.spark.{Logging}
+import org.apache.spark.ui.JettyUtils
+import org.apache.spark.ui.JettyUtils._
+import org.apache.spark.ui.UIUtils
+import org.apache.spark.util.Utils
 
 /**
  * Web UI server for the standalone worker.
  */
 private[spark]
 class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[Int] = None)
-    extends Logging {
+  extends Logging {
   implicit val timeout = Timeout(
     Duration.create(System.getProperty("spark.akka.askTimeout", "10").toLong, "seconds"))
   val host = Utils.localHostName()
@@ -49,7 +49,9 @@ class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[I
 
   val indexPage = new IndexPage(this)
 
-  val handlers = Array[(String, Handler)](
+  val metricsHandlers = worker.metricsSystem.getServletHandlers
+
+  val handlers = metricsHandlers ++ Array[(String, Handler)](
     ("/static", createStaticHandler(WorkerWebUI.STATIC_RESOURCE_DIR)),
     ("/log", (request: HttpServletRequest) => log(request)),
     ("/logPage", (request: HttpServletRequest) => logPage(request)),
@@ -111,30 +113,37 @@ class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[I
       if (startByte > 0) {
         <a href={"?appId=%s&executorId=%s&logType=%s&offset=%s&byteLength=%s"
           .format(appId, executorId, logType, math.max(startByte-byteLength, 0),
-            byteLength)}>
-          <button>Previous {Utils.memoryBytesToString(math.min(byteLength, startByte))}</button>
+          byteLength)}>
+          <button type="button" class="btn btn-default">
+            Previous {Utils.bytesToString(math.min(byteLength, startByte))}
+          </button>
         </a>
       }
       else {
-        <button disabled="disabled">Previous 0 B</button>
+        <button type="button" class="btn btn-default" disabled="disabled">
+          Previous 0 B
+        </button>
       }
 
     val nextButton =
       if (endByte < logLength) {
         <a href={"?appId=%s&executorId=%s&logType=%s&offset=%s&byteLength=%s".
           format(appId, executorId, logType, endByte, byteLength)}>
-          <button>Next {Utils.memoryBytesToString(math.min(byteLength, logLength-endByte))}</button>
+          <button type="button" class="btn btn-default">
+            Next {Utils.bytesToString(math.min(byteLength, logLength-endByte))}
+          </button>
         </a>
       }
       else {
-        <button disabled="disabled">Next 0 B</button>
+        <button type="button" class="btn btn-default" disabled="disabled">
+          Next 0 B
+        </button>
       }
 
     val content =
       <html>
         <body>
           {linkToMaster}
-          <hr />
           <div>
             <div style="float:left;width:40%">{backButton}</div>
             <div style="float:left;">{range}</div>
@@ -177,6 +186,6 @@ class WorkerWebUI(val worker: Worker, val workDir: File, requestedPort: Option[I
 }
 
 private[spark] object WorkerWebUI {
-  val STATIC_RESOURCE_DIR = "spark/ui/static"
+  val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static"
   val DEFAULT_PORT="8081"
 }
diff --git a/core/src/main/scala/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 2e81151882..d365804994 100644
--- a/core/src/main/scala/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -15,26 +15,30 @@
  * limitations under the License.
  */
 
-package spark.executor
+package org.apache.spark.executor
 
-import java.io.{File, FileOutputStream}
-import java.net.{URI, URL, URLClassLoader}
+import java.io.{File}
+import java.lang.management.ManagementFactory
+import java.nio.ByteBuffer
 import java.util.concurrent._
 
-import org.apache.hadoop.fs.FileUtil
+import scala.collection.JavaConversions._
+import scala.collection.mutable.HashMap
 
-import scala.collection.mutable.{ArrayBuffer, Map, HashMap}
+import org.apache.spark.scheduler._
+import org.apache.spark._
+import org.apache.spark.util.Utils
 
-import spark.broadcast._
-import spark.scheduler._
-import spark._
-import java.nio.ByteBuffer
 
 /**
  * The Mesos executor for Spark.
  */
-private[spark] class Executor(executorId: String, slaveHostname: String, properties: Seq[(String, String)]) extends Logging {
-
+private[spark] class Executor(
+    executorId: String,
+    slaveHostname: String,
+    properties: Seq[(String, String)])
+  extends Logging
+{
   // Application dependencies (added through SparkContext) that we've fetched so far on this node.
   // Each map holds the master's timestamp for the version of that file or JAR we got.
   private val currentFiles: HashMap[String, Long] = new HashMap[String, Long]()
@@ -57,6 +61,13 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
     System.setProperty(key, value)
   }
 
+  // If we are in yarn mode, systems can have different disk layouts so we must set it
+  // to what Yarn on this system said was available. This will be used later when SparkEnv
+  // created.
+  if (java.lang.Boolean.valueOf(System.getenv("SPARK_YARN_MODE"))) {
+    System.setProperty("spark.local.dir", getYarnLocalDirs())
+  }
+
   // Create our ClassLoader and set it on this thread
   private val urlClassLoader = createClassLoader()
   private val replClassLoader = addReplClassLoaderIfNeeded(urlClassLoader)
@@ -69,7 +80,7 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
       override def uncaughtException(thread: Thread, exception: Throwable) {
         try {
           logError("Uncaught exception in thread " + thread, exception)
-          
+
           // We may have been called from a shutdown hook. If so, we must not call System.exit().
           // (If we do, we will deadlock.)
           if (!Utils.inShutdown()) {
@@ -87,9 +98,13 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
     }
   )
 
+  val executorSource = new ExecutorSource(this)
+
   // Initialize Spark environment (using system properties read above)
   val env = SparkEnv.createFromSystemProperties(executorId, slaveHostname, 0, false, false)
   SparkEnv.set(env)
+  env.metricsSystem.registerSource(executorSource)
+
   private val akkaFrameSize = env.actorSystem.settings.config.getBytes("akka.remote.netty.message-frame-size")
 
   // Start worker thread pool
@@ -100,6 +115,21 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
     threadPool.execute(new TaskRunner(context, taskId, serializedTask))
   }
 
+  /** Get the Yarn approved local directories. */
+  private def getYarnLocalDirs(): String = {
+    // Hadoop 0.23 and 2.x have different Environment variable names for the
+    // local dirs, so lets check both. We assume one of the 2 is set.
+    // LOCAL_DIRS => 2.X, YARN_LOCAL_DIRS => 0.23.X
+    val localDirs = Option(System.getenv("YARN_LOCAL_DIRS"))
+      .getOrElse(Option(System.getenv("LOCAL_DIRS"))
+      .getOrElse(""))
+
+    if (localDirs.isEmpty()) {
+      throw new Exception("Yarn Local dirs can't be empty")
+    }
+    return localDirs
+  }
+
   class TaskRunner(context: ExecutorBackend, taskId: Long, serializedTask: ByteBuffer)
     extends Runnable {
 
@@ -112,6 +142,9 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
       context.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
       var attemptedTask: Option[Task[Any]] = None
       var taskStart: Long = 0
+      def getTotalGCTime = ManagementFactory.getGarbageCollectorMXBeans.map(g => g.getCollectionTime).sum
+      val startGCTime = getTotalGCTime
+
       try {
         SparkEnv.set(env)
         Accumulators.clear()
@@ -119,15 +152,16 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
         updateDependencies(taskFiles, taskJars)
         val task = ser.deserialize[Task[Any]](taskBytes, Thread.currentThread.getContextClassLoader)
         attemptedTask = Some(task)
-        logInfo("Its generation is " + task.generation)
-        env.mapOutputTracker.updateGeneration(task.generation)
+        logInfo("Its epoch is " + task.epoch)
+        env.mapOutputTracker.updateEpoch(task.epoch)
         taskStart = System.currentTimeMillis()
         val value = task.run(taskId.toInt)
         val taskFinish = System.currentTimeMillis()
-        task.metrics.foreach{ m =>
+        for (m <- task.metrics) {
           m.hostname = Utils.localHostName
           m.executorDeserializeTime = (taskStart - startTime).toInt
           m.executorRunTime = (taskFinish - taskStart).toInt
+          m.jvmGCTime = getTotalGCTime - startGCTime
         }
         //TODO I'd also like to track the time it takes to serialize the task results, but that is huge headache, b/c
         // we need to serialize the task metrics first.  If TaskMetrics had a custom serialized format, we could
@@ -151,7 +185,10 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
         case t: Throwable => {
           val serviceTime = (System.currentTimeMillis() - taskStart).toInt
           val metrics = attemptedTask.flatMap(t => t.metrics)
-          metrics.foreach{m => m.executorRunTime = serviceTime}
+          for (m <- metrics) {
+            m.executorRunTime = serviceTime
+            m.jvmGCTime = getTotalGCTime - startGCTime
+          }
           val reason = ExceptionFailure(t.getClass.getName, t.toString, t.getStackTrace, metrics)
           context.statusUpdate(taskId, TaskState.FAILED, ser.serialize(reason))
 
@@ -189,13 +226,13 @@ private[spark] class Executor(executorId: String, slaveHostname: String, propert
     if (classUri != null) {
       logInfo("Using REPL class URI: " + classUri)
       try {
-        val klass = Class.forName("spark.repl.ExecutorClassLoader")
+        val klass = Class.forName("org.apache.spark.repl.ExecutorClassLoader")
           .asInstanceOf[Class[_ <: ClassLoader]]
         val constructor = klass.getConstructor(classOf[String], classOf[ClassLoader])
         return constructor.newInstance(classUri, parent)
       } catch {
         case _: ClassNotFoundException =>
-          logError("Could not find spark.repl.ExecutorClassLoader on classpath!")
+          logError("Could not find org.apache.spark.repl.ExecutorClassLoader on classpath!")
           System.exit(1)
           null
       }
diff --git a/core/src/main/scala/spark/executor/ExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorBackend.scala
index 33a6f8a824..ad7dd34c76 100644
--- a/core/src/main/scala/spark/executor/ExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorBackend.scala
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package spark.executor
+package org.apache.spark.executor
 
 import java.nio.ByteBuffer
-import spark.TaskState.TaskState
+import org.apache.spark.TaskState.TaskState
 
 /**
  * A pluggable interface used by the Executor to send updates to the cluster scheduler.
diff --git a/core/src/main/scala/spark/executor/ExecutorExitCode.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
index 64b9fb88f8..e5c9bbbe28 100644
--- a/core/src/main/scala/spark/executor/ExecutorExitCode.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.executor
+package org.apache.spark.executor
 
 /**
  * These are exit codes that executors should use to provide the master with information about
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
new file mode 100644
index 0000000000..17653cd560
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
@@ -0,0 +1,55 @@
+package org.apache.spark.executor
+
+import com.codahale.metrics.{Gauge, MetricRegistry}
+
+import org.apache.hadoop.fs.FileSystem
+import org.apache.hadoop.hdfs.DistributedFileSystem
+import org.apache.hadoop.fs.LocalFileSystem
+
+import scala.collection.JavaConversions._
+
+import org.apache.spark.metrics.source.Source
+
+class ExecutorSource(val executor: Executor) extends Source {
+  private def fileStats(scheme: String) : Option[FileSystem.Statistics] =
+    FileSystem.getAllStatistics().filter(s => s.getScheme.equals(scheme)).headOption
+
+  private def registerFileSystemStat[T](
+        scheme: String, name: String, f: FileSystem.Statistics => T, defaultValue: T) = {
+    metricRegistry.register(MetricRegistry.name("filesystem", scheme, name), new Gauge[T] {
+      override def getValue: T = fileStats(scheme).map(f).getOrElse(defaultValue)
+    })
+  }
+
+  val metricRegistry = new MetricRegistry()
+  val sourceName = "executor"
+
+  // Gauge for executor thread pool's actively executing task counts
+  metricRegistry.register(MetricRegistry.name("threadpool", "activeTask", "count"), new Gauge[Int] {
+    override def getValue: Int = executor.threadPool.getActiveCount()
+  })
+
+  // Gauge for executor thread pool's approximate total number of tasks that have been completed
+  metricRegistry.register(MetricRegistry.name("threadpool", "completeTask", "count"), new Gauge[Long] {
+    override def getValue: Long = executor.threadPool.getCompletedTaskCount()
+  })
+
+  // Gauge for executor thread pool's current number of threads
+  metricRegistry.register(MetricRegistry.name("threadpool", "currentPool", "size"), new Gauge[Int] {
+    override def getValue: Int = executor.threadPool.getPoolSize()
+  })
+
+  // Gauge got executor thread pool's largest number of threads that have ever simultaneously been in th pool
+  metricRegistry.register(MetricRegistry.name("threadpool", "maxPool", "size"), new Gauge[Int] {
+    override def getValue: Int = executor.threadPool.getMaximumPoolSize()
+  })
+
+  // Gauge for file system stats of this executor
+  for (scheme <- Array("hdfs", "file")) {
+    registerFileSystemStat(scheme, "bytesRead", _.getBytesRead(), 0L)
+    registerFileSystemStat(scheme, "bytesWritten", _.getBytesWritten(), 0L)
+    registerFileSystemStat(scheme, "readOps", _.getReadOps(), 0)
+    registerFileSystemStat(scheme, "largeReadOps", _.getLargeReadOps(), 0)
+    registerFileSystemStat(scheme, "writeOps", _.getWriteOps(), 0)
+  }
+}
diff --git a/core/src/main/scala/spark/executor/ExecutorURLClassLoader.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorURLClassLoader.scala
index 09d12fb65b..f9bfe8ed2f 100644
--- a/core/src/main/scala/spark/executor/ExecutorURLClassLoader.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorURLClassLoader.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.executor
+package org.apache.spark.executor
 
 import java.net.{URLClassLoader, URL}
 
diff --git a/core/src/main/scala/spark/executor/MesosExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
index 4961c42fad..da62091980 100644
--- a/core/src/main/scala/spark/executor/MesosExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/MesosExecutorBackend.scala
@@ -15,15 +15,16 @@
  * limitations under the License.
  */
 
-package spark.executor
+package org.apache.spark.executor
 
 import java.nio.ByteBuffer
 import org.apache.mesos.{Executor => MesosExecutor, MesosExecutorDriver, MesosNativeLibrary, ExecutorDriver}
 import org.apache.mesos.Protos.{TaskState => MesosTaskState, TaskStatus => MesosTaskStatus, _}
-import spark.TaskState.TaskState
+import org.apache.spark.TaskState.TaskState
 import com.google.protobuf.ByteString
-import spark.{Utils, Logging}
-import spark.TaskState
+import org.apache.spark.{Logging}
+import org.apache.spark.TaskState
+import org.apache.spark.util.Utils
 
 private[spark] class MesosExecutorBackend
   extends MesosExecutor
diff --git a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/StandaloneExecutorBackend.scala
index f4003da732..7839023868 100644
--- a/core/src/main/scala/spark/executor/StandaloneExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/StandaloneExecutorBackend.scala
@@ -15,22 +15,18 @@
  * limitations under the License.
  */
 
-package spark.executor
+package org.apache.spark.executor
 
 import java.nio.ByteBuffer
-import spark.Logging
-import spark.TaskState.TaskState
-import spark.util.AkkaUtils
+
 import akka.actor.{ActorRef, Actor, Props, Terminated}
 import akka.remote.{RemoteClientLifeCycleEvent, RemoteClientShutdown, RemoteClientDisconnected}
-import java.util.concurrent.{TimeUnit, ThreadPoolExecutor, SynchronousQueue}
-import spark.scheduler.cluster._
-import spark.scheduler.cluster.RegisteredExecutor
-import spark.scheduler.cluster.LaunchTask
-import spark.scheduler.cluster.RegisterExecutorFailed
-import spark.scheduler.cluster.RegisterExecutor
-import spark.Utils
-import spark.deploy.SparkHadoopUtil
+
+import org.apache.spark.{Logging, SparkEnv}
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.scheduler.cluster.StandaloneClusterMessages._
+import org.apache.spark.util.{Utils, AkkaUtils}
+
 
 private[spark] class StandaloneExecutorBackend(
     driverUrl: String,
@@ -85,19 +81,6 @@ private[spark] class StandaloneExecutorBackend(
 
 private[spark] object StandaloneExecutorBackend {
   def run(driverUrl: String, executorId: String, hostname: String, cores: Int) {
-    SparkHadoopUtil.runAsUser(run0, Tuple4[Any, Any, Any, Any] (driverUrl, executorId, hostname, cores))
-  }
-
-  // This will be run 'as' the user
-  def run0(args: Product) {
-    assert(4 == args.productArity)
-    runImpl(args.productElement(0).asInstanceOf[String], 
-      args.productElement(1).asInstanceOf[String],
-      args.productElement(2).asInstanceOf[String],
-      args.productElement(3).asInstanceOf[Int])
-  }
-  
-  private def runImpl(driverUrl: String, executorId: String, hostname: String, cores: Int) {
     // Debug code
     Utils.checkHost(hostname)
 
diff --git a/core/src/main/scala/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index 3151627839..f311141148 100644
--- a/core/src/main/scala/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.executor
+package org.apache.spark.executor
 
 class TaskMetrics extends Serializable {
   /**
@@ -31,7 +31,7 @@ class TaskMetrics extends Serializable {
   /**
    * Time the executor spends actually running the task (including fetching shuffle data)
    */
-  var executorRunTime:Int = _
+  var executorRunTime: Int = _
 
   /**
    * The number of bytes this task transmitted back to the driver as the TaskResult
@@ -39,6 +39,11 @@ class TaskMetrics extends Serializable {
   var resultSize: Long = _
 
   /**
+   * Amount of time the JVM spent in garbage collection while executing this task
+   */
+  var jvmGCTime: Long = _
+
+  /**
    * If this task reads from shuffle output, metrics on getting shuffle data will be collected here
    */
   var shuffleReadMetrics: Option[ShuffleReadMetrics] = None
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
new file mode 100644
index 0000000000..90a0420caf
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.io
+
+import java.io.{InputStream, OutputStream}
+
+import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
+
+import org.xerial.snappy.{SnappyInputStream, SnappyOutputStream}
+
+
+/**
+ * CompressionCodec allows the customization of choosing different compression implementations
+ * to be used in block storage.
+ */
+trait CompressionCodec {
+
+  def compressedOutputStream(s: OutputStream): OutputStream
+
+  def compressedInputStream(s: InputStream): InputStream
+}
+
+
+private[spark] object CompressionCodec {
+
+  def createCodec(): CompressionCodec = {
+    // Set the default codec to Snappy since the LZF implementation initializes a pretty large
+    // buffer for every stream, which results in a lot of memory overhead when the number of
+    // shuffle reduce buckets are large.
+    createCodec(classOf[SnappyCompressionCodec].getName)
+  }
+
+  def createCodec(codecName: String): CompressionCodec = {
+    Class.forName(
+      System.getProperty("spark.io.compression.codec", codecName),
+      true,
+      Thread.currentThread.getContextClassLoader).newInstance().asInstanceOf[CompressionCodec]
+  }
+}
+
+
+/**
+ * LZF implementation of [[org.apache.spark.io.CompressionCodec]].
+ */
+class LZFCompressionCodec extends CompressionCodec {
+
+  override def compressedOutputStream(s: OutputStream): OutputStream = {
+    new LZFOutputStream(s).setFinishBlockOnFlush(true)
+  }
+
+  override def compressedInputStream(s: InputStream): InputStream = new LZFInputStream(s)
+}
+
+
+/**
+ * Snappy implementation of [[org.apache.spark.io.CompressionCodec]].
+ * Block size can be configured by spark.io.compression.snappy.block.size.
+ */
+class SnappyCompressionCodec extends CompressionCodec {
+
+  override def compressedOutputStream(s: OutputStream): OutputStream = {
+    val blockSize = System.getProperty("spark.io.compression.snappy.block.size", "32768").toInt
+    new SnappyOutputStream(s, blockSize)
+  }
+
+  override def compressedInputStream(s: InputStream): InputStream = new SnappyInputStream(s)
+}
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
new file mode 100644
index 0000000000..0f9c4e00b1
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsConfig.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics
+
+import java.util.Properties
+import java.io.{File, FileInputStream, InputStream, IOException}
+
+import scala.collection.mutable
+import scala.util.matching.Regex
+
+import org.apache.spark.Logging
+
+private[spark] class MetricsConfig(val configFile: Option[String]) extends Logging {
+  initLogging()
+
+  val DEFAULT_PREFIX = "*"
+  val INSTANCE_REGEX = "^(\\*|[a-zA-Z]+)\\.(.+)".r
+  val METRICS_CONF = "metrics.properties"
+
+  val properties = new Properties()
+  var propertyCategories: mutable.HashMap[String, Properties] = null
+
+  private def setDefaultProperties(prop: Properties) {
+    prop.setProperty("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")
+    prop.setProperty("*.sink.servlet.uri", "/metrics/json")
+    prop.setProperty("*.sink.servlet.sample", "false")
+    prop.setProperty("master.sink.servlet.uri", "/metrics/master/json")
+    prop.setProperty("applications.sink.servlet.uri", "/metrics/applications/json")
+  }
+
+  def initialize() {
+    //Add default properties in case there's no properties file
+    setDefaultProperties(properties)
+
+    // If spark.metrics.conf is not set, try to get file in class path
+    var is: InputStream = null
+    try {
+      is = configFile match {
+        case Some(f) => new FileInputStream(f)
+        case None => getClass.getClassLoader.getResourceAsStream(METRICS_CONF)
+      }
+
+      if (is != null) {
+        properties.load(is)
+      }
+    } catch {
+      case e: Exception => logError("Error loading configure file", e)
+    } finally {
+      if (is != null) is.close()
+    }
+
+    propertyCategories = subProperties(properties, INSTANCE_REGEX)
+    if (propertyCategories.contains(DEFAULT_PREFIX)) {
+      import scala.collection.JavaConversions._
+
+      val defaultProperty = propertyCategories(DEFAULT_PREFIX)
+      for { (inst, prop) <- propertyCategories
+            if (inst != DEFAULT_PREFIX)
+            (k, v) <- defaultProperty
+            if (prop.getProperty(k) == null) } {
+        prop.setProperty(k, v)
+      }
+    }
+  }
+
+  def subProperties(prop: Properties, regex: Regex): mutable.HashMap[String, Properties] = {
+    val subProperties = new mutable.HashMap[String, Properties]
+    import scala.collection.JavaConversions._
+    prop.foreach { kv =>
+      if (regex.findPrefixOf(kv._1) != None) {
+        val regex(prefix, suffix) = kv._1
+        subProperties.getOrElseUpdate(prefix, new Properties).setProperty(suffix, kv._2)
+      }
+    }
+    subProperties
+  }
+
+  def getInstance(inst: String): Properties = {
+    propertyCategories.get(inst) match {
+      case Some(s) => s
+      case None => propertyCategories.getOrElse(DEFAULT_PREFIX, new Properties)
+    }
+  }
+}
+
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
new file mode 100644
index 0000000000..bec0c83be8
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics
+
+import com.codahale.metrics.{Metric, MetricFilter, MetricRegistry}
+
+import java.util.Properties
+import java.util.concurrent.TimeUnit
+
+import scala.collection.mutable
+
+import org.apache.spark.Logging
+import org.apache.spark.metrics.sink.{MetricsServlet, Sink}
+import org.apache.spark.metrics.source.Source
+
+/**
+ * Spark Metrics System, created by specific "instance", combined by source,
+ * sink, periodically poll source metrics data to sink destinations.
+ *
+ * "instance" specify "who" (the role) use metrics system. In spark there are several roles
+ * like master, worker, executor, client driver, these roles will create metrics system
+ * for monitoring. So instance represents these roles. Currently in Spark, several instances
+ * have already implemented: master, worker, executor, driver, applications.
+ *
+ * "source" specify "where" (source) to collect metrics data. In metrics system, there exists
+ * two kinds of source:
+ *   1. Spark internal source, like MasterSource, WorkerSource, etc, which will collect
+ *   Spark component's internal state, these sources are related to instance and will be
+ *   added after specific metrics system is created.
+ *   2. Common source, like JvmSource, which will collect low level state, is configured by
+ *   configuration and loaded through reflection.
+ *
+ * "sink" specify "where" (destination) to output metrics data to. Several sinks can be
+ * coexisted and flush metrics to all these sinks.
+ *
+ * Metrics configuration format is like below:
+ * [instance].[sink|source].[name].[options] = xxxx
+ *
+ * [instance] can be "master", "worker", "executor", "driver", "applications" which means only
+ * the specified instance has this property.
+ * wild card "*" can be used to replace instance name, which means all the instances will have
+ * this property.
+ *
+ * [sink|source] means this property belongs to source or sink. This field can only be source or sink.
+ *
+ * [name] specify the name of sink or source, it is custom defined.
+ *
+ * [options] is the specific property of this source or sink.
+ */
+private[spark] class MetricsSystem private (val instance: String) extends Logging {
+  initLogging()
+
+  val confFile = System.getProperty("spark.metrics.conf")
+  val metricsConfig = new MetricsConfig(Option(confFile))
+
+  val sinks = new mutable.ArrayBuffer[Sink]
+  val sources = new mutable.ArrayBuffer[Source]
+  val registry = new MetricRegistry()
+
+  // Treat MetricsServlet as a special sink as it should be exposed to add handlers to web ui
+  private var metricsServlet: Option[MetricsServlet] = None
+
+  /** Get any UI handlers used by this metrics system. */
+  def getServletHandlers = metricsServlet.map(_.getHandlers).getOrElse(Array())
+
+  metricsConfig.initialize()
+  registerSources()
+  registerSinks()
+
+  def start() {
+    sinks.foreach(_.start)
+  }
+
+  def stop() {
+    sinks.foreach(_.stop)
+  }
+
+  def registerSource(source: Source) {
+    sources += source
+    try {
+      registry.register(source.sourceName, source.metricRegistry)
+    } catch {
+      case e: IllegalArgumentException => logInfo("Metrics already registered", e)
+    }
+  }
+
+  def removeSource(source: Source) {
+    sources -= source
+    registry.removeMatching(new MetricFilter {
+      def matches(name: String, metric: Metric): Boolean = name.startsWith(source.sourceName)
+    })
+  }
+
+  def registerSources() {
+    val instConfig = metricsConfig.getInstance(instance)
+    val sourceConfigs = metricsConfig.subProperties(instConfig, MetricsSystem.SOURCE_REGEX)
+
+    // Register all the sources related to instance
+    sourceConfigs.foreach { kv =>
+      val classPath = kv._2.getProperty("class")
+      try {
+        val source = Class.forName(classPath).newInstance()
+        registerSource(source.asInstanceOf[Source])
+      } catch {
+        case e: Exception => logError("Source class " + classPath + " cannot be instantialized", e)
+      }
+    }
+  }
+
+  def registerSinks() {
+    val instConfig = metricsConfig.getInstance(instance)
+    val sinkConfigs = metricsConfig.subProperties(instConfig, MetricsSystem.SINK_REGEX)
+
+    sinkConfigs.foreach { kv =>
+      val classPath = kv._2.getProperty("class")
+      try {
+        val sink = Class.forName(classPath)
+          .getConstructor(classOf[Properties], classOf[MetricRegistry])
+          .newInstance(kv._2, registry)
+        if (kv._1 == "servlet") {
+           metricsServlet = Some(sink.asInstanceOf[MetricsServlet])
+        } else {
+          sinks += sink.asInstanceOf[Sink]
+        }
+      } catch {
+        case e: Exception => logError("Sink class " + classPath + " cannot be instantialized", e)
+      }
+    }
+  }
+}
+
+private[spark] object MetricsSystem {
+  val SINK_REGEX = "^sink\\.(.+)\\.(.+)".r
+  val SOURCE_REGEX = "^source\\.(.+)\\.(.+)".r
+
+  val MINIMAL_POLL_UNIT = TimeUnit.SECONDS
+  val MINIMAL_POLL_PERIOD = 1
+
+  def checkMinimalPollingPeriod(pollUnit: TimeUnit, pollPeriod: Int) {
+    val period = MINIMAL_POLL_UNIT.convert(pollPeriod, pollUnit)
+    if (period < MINIMAL_POLL_PERIOD) {
+      throw new IllegalArgumentException("Polling period " + pollPeriod + " " + pollUnit +
+        " below than minimal polling period ")
+    }
+  }
+
+  def createMetricsSystem(instance: String): MetricsSystem = new MetricsSystem(instance)
+}
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala
new file mode 100644
index 0000000000..bce257d6e6
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/ConsoleSink.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.sink
+
+import com.codahale.metrics.{ConsoleReporter, MetricRegistry}
+
+import java.util.Properties
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.metrics.MetricsSystem
+
+class ConsoleSink(val property: Properties, val registry: MetricRegistry) extends Sink {
+  val CONSOLE_DEFAULT_PERIOD = 10
+  val CONSOLE_DEFAULT_UNIT = "SECONDS"
+
+  val CONSOLE_KEY_PERIOD = "period"
+  val CONSOLE_KEY_UNIT = "unit"
+
+  val pollPeriod = Option(property.getProperty(CONSOLE_KEY_PERIOD)) match {
+    case Some(s) => s.toInt
+    case None => CONSOLE_DEFAULT_PERIOD
+  }
+
+  val pollUnit = Option(property.getProperty(CONSOLE_KEY_UNIT)) match {
+    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
+    case None => TimeUnit.valueOf(CONSOLE_DEFAULT_UNIT)
+  }
+
+  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)
+
+  val reporter: ConsoleReporter = ConsoleReporter.forRegistry(registry)
+      .convertDurationsTo(TimeUnit.MILLISECONDS)
+      .convertRatesTo(TimeUnit.SECONDS)
+      .build()
+
+  override def start() {
+    reporter.start(pollPeriod, pollUnit)
+  }
+
+  override def stop() {
+    reporter.stop()
+  }
+}
+
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala
new file mode 100644
index 0000000000..3d1a06a395
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/CsvSink.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.sink
+
+import com.codahale.metrics.{CsvReporter, MetricRegistry}
+
+import java.io.File
+import java.util.{Locale, Properties}
+import java.util.concurrent.TimeUnit
+
+import org.apache.spark.metrics.MetricsSystem
+
+class CsvSink(val property: Properties, val registry: MetricRegistry) extends Sink {
+  val CSV_KEY_PERIOD = "period"
+  val CSV_KEY_UNIT = "unit"
+  val CSV_KEY_DIR = "directory"
+
+  val CSV_DEFAULT_PERIOD = 10
+  val CSV_DEFAULT_UNIT = "SECONDS"
+  val CSV_DEFAULT_DIR = "/tmp/"
+
+  val pollPeriod = Option(property.getProperty(CSV_KEY_PERIOD)) match {
+    case Some(s) => s.toInt
+    case None => CSV_DEFAULT_PERIOD
+  }
+
+  val pollUnit = Option(property.getProperty(CSV_KEY_UNIT)) match {
+    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
+    case None => TimeUnit.valueOf(CSV_DEFAULT_UNIT)
+  }
+  
+  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)
+
+  val pollDir = Option(property.getProperty(CSV_KEY_DIR)) match {
+    case Some(s) => s
+    case None => CSV_DEFAULT_DIR
+  }
+
+  val reporter: CsvReporter = CsvReporter.forRegistry(registry)
+      .formatFor(Locale.US)
+      .convertDurationsTo(TimeUnit.MILLISECONDS)
+      .convertRatesTo(TimeUnit.SECONDS)
+      .build(new File(pollDir))
+
+  override def start() {
+    reporter.start(pollPeriod, pollUnit)
+  }
+
+  override def stop() {
+    reporter.stop()
+  }
+}
+
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala
new file mode 100644
index 0000000000..621d086d41
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/JmxSink.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.sink
+
+import com.codahale.metrics.{JmxReporter, MetricRegistry}
+
+import java.util.Properties
+
+class JmxSink(val property: Properties, val registry: MetricRegistry) extends Sink {
+  val reporter: JmxReporter = JmxReporter.forRegistry(registry).build()
+
+  override def start() {
+    reporter.start()
+  }
+
+  override def stop() {
+    reporter.stop()
+  }
+
+}
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
new file mode 100644
index 0000000000..4e90dd4323
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/MetricsServlet.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.sink
+
+import com.codahale.metrics.MetricRegistry
+import com.codahale.metrics.json.MetricsModule
+
+import com.fasterxml.jackson.databind.ObjectMapper
+
+import java.util.Properties
+import java.util.concurrent.TimeUnit
+import javax.servlet.http.HttpServletRequest
+
+import org.eclipse.jetty.server.Handler
+
+import org.apache.spark.ui.JettyUtils
+
+class MetricsServlet(val property: Properties, val registry: MetricRegistry) extends Sink {
+  val SERVLET_KEY_URI = "uri"
+  val SERVLET_KEY_SAMPLE = "sample"
+
+  val servletURI = property.getProperty(SERVLET_KEY_URI)
+
+  val servletShowSample = property.getProperty(SERVLET_KEY_SAMPLE).toBoolean
+
+  val mapper = new ObjectMapper().registerModule(
+    new MetricsModule(TimeUnit.SECONDS, TimeUnit.MILLISECONDS, servletShowSample))
+
+  def getHandlers = Array[(String, Handler)](
+    (servletURI, JettyUtils.createHandler(request => getMetricsSnapshot(request), "text/json"))
+  )
+
+  def getMetricsSnapshot(request: HttpServletRequest): String = {
+    mapper.writeValueAsString(registry)
+  }
+
+  override def start() { }
+
+  override def stop() { }
+}
diff --git a/core/src/main/scala/spark/scheduler/cluster/SchedulingMode.scala b/core/src/main/scala/org/apache/spark/metrics/sink/Sink.scala
index 4b3e3e50e1..3a739aa563 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SchedulingMode.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/Sink.scala
@@ -15,10 +15,9 @@
  * limitations under the License.
  */
 
-package spark.scheduler.cluster
+package org.apache.spark.metrics.sink
 
-object SchedulingMode extends Enumeration("FAIR","FIFO"){
-
-  type SchedulingMode = Value
-  val FAIR,FIFO = Value
+trait Sink {
+  def start: Unit
+  def stop: Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala b/core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala
new file mode 100644
index 0000000000..75cb2b8973
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/source/JvmSource.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.source
+
+import com.codahale.metrics.MetricRegistry
+import com.codahale.metrics.jvm.{GarbageCollectorMetricSet, MemoryUsageGaugeSet}
+
+class JvmSource extends Source {
+  val sourceName = "jvm"
+  val metricRegistry = new MetricRegistry()
+
+  val gcMetricSet = new GarbageCollectorMetricSet
+  val memGaugeSet = new MemoryUsageGaugeSet
+
+  metricRegistry.registerAll(gcMetricSet)
+  metricRegistry.registerAll(memGaugeSet)
+}
diff --git a/core/src/main/scala/org/apache/spark/metrics/source/Source.scala b/core/src/main/scala/org/apache/spark/metrics/source/Source.scala
new file mode 100644
index 0000000000..3fee55cc6d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/source/Source.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.source
+
+import com.codahale.metrics.MetricRegistry
+
+trait Source {
+  def sourceName: String
+  def metricRegistry: MetricRegistry
+}
diff --git a/core/src/main/scala/spark/network/BufferMessage.scala b/core/src/main/scala/org/apache/spark/network/BufferMessage.scala
index e566aeac13..f736bb3713 100644
--- a/core/src/main/scala/spark/network/BufferMessage.scala
+++ b/core/src/main/scala/org/apache/spark/network/BufferMessage.scala
@@ -15,13 +15,13 @@
  * limitations under the License.
  */
 
-package spark.network
+package org.apache.spark.network
 
 import java.nio.ByteBuffer
 
 import scala.collection.mutable.ArrayBuffer
 
-import spark.storage.BlockManager
+import org.apache.spark.storage.BlockManager
 
 
 private[spark]
diff --git a/core/src/main/scala/spark/network/Connection.scala b/core/src/main/scala/org/apache/spark/network/Connection.scala
index b66c00b58c..95cb0206ac 100644
--- a/core/src/main/scala/spark/network/Connection.scala
+++ b/core/src/main/scala/org/apache/spark/network/Connection.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.network
+package org.apache.spark.network
 
-import spark._
+import org.apache.spark._
 
 import scala.collection.mutable.{HashMap, Queue, ArrayBuffer}
 
@@ -45,12 +45,15 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector,
   channel.socket.setKeepAlive(true)
   /*channel.socket.setReceiveBufferSize(32768) */
 
+  @volatile private var closed = false
   var onCloseCallback: Connection => Unit = null
   var onExceptionCallback: (Connection, Exception) => Unit = null
   var onKeyInterestChangeCallback: (Connection, Int) => Unit = null
 
   val remoteAddress = getRemoteAddress()
 
+  def resetForceReregister(): Boolean
+
   // Read channels typically do not register for write and write does not for read
   // Now, we do have write registering for read too (temporarily), but this is to detect
   // channel close NOT to actually read/consume data on it !
@@ -95,6 +98,7 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector,
   }
 
   def close() {
+    closed = true
     val k = key()
     if (k != null) {
       k.cancel()
@@ -103,6 +107,8 @@ abstract class Connection(val channel: SocketChannel, val selector: Selector,
     callOnCloseCallback()
   }
 
+  protected def isClosed: Boolean = closed
+
   def onClose(callback: Connection => Unit) {
     onCloseCallback = callback
   }
@@ -168,7 +174,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
     remoteId_ : ConnectionManagerId)
   extends Connection(SocketChannel.open, selector_, remoteId_) {
 
-  class Outbox(fair: Int = 0) {
+  private class Outbox(fair: Int = 0) {
     val messages = new Queue[Message]()
     val defaultChunkSize = 65536  //32768 //16384
     var nextMessageToBeUsed = 0
@@ -245,7 +251,17 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
     }
   }
 
+  // outbox is used as a lock - ensure that it is always used as a leaf (since methods which 
+  // lock it are invoked in context of other locks)
   private val outbox = new Outbox(1)
+  /*
+    This is orthogonal to whether we have pending bytes to write or not - and satisfies a slightly 
+    different purpose. This flag is to see if we need to force reregister for write even when we 
+    do not have any pending bytes to write to socket.
+    This can happen due to a race between adding pending buffers, and checking for existing of 
+    data as detailed in https://github.com/mesos/spark/pull/791
+   */
+  private var needForceReregister = false
   val currentBuffers = new ArrayBuffer[ByteBuffer]()
 
   /*channel.socket.setSendBufferSize(256 * 1024)*/
@@ -267,9 +283,19 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
   def send(message: Message) {
     outbox.synchronized {
       outbox.addMessage(message)
-      if (channel.isConnected) {
-        registerInterest()
-      }
+      needForceReregister = true
+    }
+    if (channel.isConnected) {
+      registerInterest()
+    }
+  }
+
+  // return previous value after resetting it.
+  def resetForceReregister(): Boolean = {
+    outbox.synchronized {
+      val result = needForceReregister
+      needForceReregister = false
+      result
     }
   }
 
@@ -322,7 +348,11 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
           outbox.synchronized {
             outbox.getChunk() match {
               case Some(chunk) => {
-                currentBuffers ++= chunk.buffers
+                val buffers = chunk.buffers
+                // If we have 'seen' pending messages, then reset flag - since we handle that as normal 
+                // registering of event (below)
+                if (needForceReregister && buffers.exists(_.remaining() > 0)) resetForceReregister()
+                currentBuffers ++= buffers
               }
               case None => {
                 // changeConnectionKeyInterest(0)
@@ -384,7 +414,7 @@ class SendingConnection(val address: InetSocketAddress, selector_ : Selector,
 
   override def changeInterestForRead(): Boolean = false
 
-  override def changeInterestForWrite(): Boolean = true
+  override def changeInterestForWrite(): Boolean = ! isClosed
 }
 
 
@@ -534,6 +564,7 @@ private[spark] class ReceivingConnection(channel_ : SocketChannel, selector_ : S
 
   def onReceive(callback: (Connection, Message) => Unit) {onReceiveCallback = callback}
 
+  // override def changeInterestForRead(): Boolean = ! isClosed
   override def changeInterestForRead(): Boolean = true
 
   override def changeInterestForWrite(): Boolean = {
@@ -549,4 +580,7 @@ private[spark] class ReceivingConnection(channel_ : SocketChannel, selector_ : S
   override def unregisterInterest() {
     changeConnectionKeyInterest(0)
   }
+
+  // For read conn, always false.
+  override def resetForceReregister(): Boolean = false
 }
diff --git a/core/src/main/scala/spark/network/ConnectionManager.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
index 6c4e7dc03e..e15a839c4e 100644
--- a/core/src/main/scala/spark/network/ConnectionManager.scala
+++ b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.network
+package org.apache.spark.network
 
-import spark._
+import org.apache.spark._
 
 import java.nio._
 import java.nio.channels._
@@ -34,6 +34,7 @@ import scala.collection.mutable.ArrayBuffer
 import akka.dispatch.{Await, Promise, ExecutionContext, Future}
 import akka.util.Duration
 import akka.util.duration._
+import org.apache.spark.util.Utils
 
 
 private[spark] class ConnectionManager(port: Int) extends Logging {
@@ -123,7 +124,8 @@ private[spark] class ConnectionManager(port: Int) extends Logging {
         } finally {
           writeRunnableStarted.synchronized {
             writeRunnableStarted -= key
-            if (register && conn.changeInterestForWrite()) {
+            val needReregister = register || conn.resetForceReregister()
+            if (needReregister && conn.changeInterestForWrite()) {
               conn.registerInterest()
             }
           }
diff --git a/core/src/main/scala/spark/network/ConnectionManagerId.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManagerId.scala
index 9d5c518293..50dd9bc2d1 100644
--- a/core/src/main/scala/spark/network/ConnectionManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/network/ConnectionManagerId.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.network
+package org.apache.spark.network
 
 import java.net.InetSocketAddress
 
-import spark.Utils
+import org.apache.spark.util.Utils
 
 
 private[spark] case class ConnectionManagerId(host: String, port: Int) {
diff --git a/core/src/main/scala/spark/network/ConnectionManagerTest.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala
index 9e3827aaf5..8d9ad9604d 100644
--- a/core/src/main/scala/spark/network/ConnectionManagerTest.scala
+++ b/core/src/main/scala/org/apache/spark/network/ConnectionManagerTest.scala
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package spark.network
+package org.apache.spark.network
 
-import spark._
-import spark.SparkContext._
+import org.apache.spark._
+import org.apache.spark.SparkContext._
 
 import scala.io.Source
 
diff --git a/core/src/main/scala/spark/network/Message.scala b/core/src/main/scala/org/apache/spark/network/Message.scala
index a25457ea35..f2ecc6d439 100644
--- a/core/src/main/scala/spark/network/Message.scala
+++ b/core/src/main/scala/org/apache/spark/network/Message.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.network
+package org.apache.spark.network
 
 import java.nio.ByteBuffer
 import java.net.InetSocketAddress
diff --git a/core/src/main/scala/spark/network/MessageChunk.scala b/core/src/main/scala/org/apache/spark/network/MessageChunk.scala
index 784db5ab62..e0fe57b80d 100644
--- a/core/src/main/scala/spark/network/MessageChunk.scala
+++ b/core/src/main/scala/org/apache/spark/network/MessageChunk.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.network
+package org.apache.spark.network
 
 import java.nio.ByteBuffer
 
diff --git a/core/src/main/scala/spark/network/MessageChunkHeader.scala b/core/src/main/scala/org/apache/spark/network/MessageChunkHeader.scala
index 18d0cbcc14..235fbc39b3 100644
--- a/core/src/main/scala/spark/network/MessageChunkHeader.scala
+++ b/core/src/main/scala/org/apache/spark/network/MessageChunkHeader.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.network
+package org.apache.spark.network
 
 import java.net.InetAddress
 import java.net.InetSocketAddress
diff --git a/core/src/main/scala/spark/network/ReceiverTest.scala b/core/src/main/scala/org/apache/spark/network/ReceiverTest.scala
index 2bbc736f40..781715108b 100644
--- a/core/src/main/scala/spark/network/ReceiverTest.scala
+++ b/core/src/main/scala/org/apache/spark/network/ReceiverTest.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.network
+package org.apache.spark.network
 
 import java.nio.ByteBuffer
 import java.net.InetAddress
diff --git a/core/src/main/scala/spark/network/SenderTest.scala b/core/src/main/scala/org/apache/spark/network/SenderTest.scala
index 542c54c36b..777574980f 100644
--- a/core/src/main/scala/spark/network/SenderTest.scala
+++ b/core/src/main/scala/org/apache/spark/network/SenderTest.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.network
+package org.apache.spark.network
 
 import java.nio.ByteBuffer
 import java.net.InetAddress
diff --git a/core/src/main/scala/spark/network/netty/FileHeader.scala b/core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala
index bf46d32aa3..3c29700920 100644
--- a/core/src/main/scala/spark/network/netty/FileHeader.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/FileHeader.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.network.netty
+package org.apache.spark.network.netty
 
 import io.netty.buffer._
 
-import spark.Logging
+import org.apache.spark.Logging
 
 private[spark] class FileHeader (
   val fileLen: Int,
diff --git a/core/src/main/scala/spark/network/netty/ShuffleCopier.scala b/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala
index b01f6369f6..9493ccffd9 100644
--- a/core/src/main/scala/spark/network/netty/ShuffleCopier.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/ShuffleCopier.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.network.netty
+package org.apache.spark.network.netty
 
 import java.util.concurrent.Executors
 
@@ -23,8 +23,8 @@ import io.netty.buffer.ByteBuf
 import io.netty.channel.ChannelHandlerContext
 import io.netty.util.CharsetUtil
 
-import spark.Logging
-import spark.network.ConnectionManagerId
+import org.apache.spark.Logging
+import org.apache.spark.network.ConnectionManagerId
 
 import scala.collection.JavaConverters._
 
diff --git a/core/src/main/scala/spark/network/netty/ShuffleSender.scala b/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala
index cdf88b03a0..537f225469 100644
--- a/core/src/main/scala/spark/network/netty/ShuffleSender.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/ShuffleSender.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.network.netty
+package org.apache.spark.network.netty
 
 import java.io.File
 
-import spark.Logging
+import org.apache.spark.Logging
 
 
 private[spark] class ShuffleSender(portIn: Int, val pResolver: PathResolver) extends Logging {
diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala
new file mode 100644
index 0000000000..c0ec527339
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/package.scala
@@ -0,0 +1,35 @@
+import org.apache.spark.rdd.{SequenceFileRDDFunctions, DoubleRDDFunctions, PairRDDFunctions}
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Core Spark functionality. [[org.apache.spark.SparkContext]] serves as the main entry point to
+ * Spark, while [[org.apache.spark.rdd.RDD]] is the data type representing a distributed collection,
+ * and provides most parallel operations.
+ *
+ * In addition, [[org.apache.spark.rdd.PairRDDFunctions]] contains operations available only on RDDs
+ * of key-value pairs, such as `groupByKey` and `join`; [[org.apache.spark.rdd.DoubleRDDFunctions]]
+ * contains operations available only on RDDs of Doubles; and
+ * [[org.apache.spark.rdd.SequenceFileRDDFunctions]] contains operations available on RDDs that can
+ * be saved as SequenceFiles. These operations are automatically available on any RDD of the right
+ * type (e.g. RDD[(Int, Int)] through implicit conversions when you
+ * `import org.apache.spark.SparkContext._`.
+ */
+package object spark { 
+  // For package docs only
+}
diff --git a/core/src/main/scala/spark/partial/ApproximateActionListener.scala b/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala
index 691d939150..d71069444a 100644
--- a/core/src/main/scala/spark/partial/ApproximateActionListener.scala
+++ b/core/src/main/scala/org/apache/spark/partial/ApproximateActionListener.scala
@@ -15,10 +15,11 @@
  * limitations under the License.
  */
 
-package spark.partial
+package org.apache.spark.partial
 
-import spark._
-import spark.scheduler.JobListener
+import org.apache.spark._
+import org.apache.spark.scheduler.JobListener
+import org.apache.spark.rdd.RDD
 
 /**
  * A JobListener for an approximate single-result action, such as count() or non-parallel reduce().
diff --git a/core/src/main/scala/spark/partial/ApproximateEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/ApproximateEvaluator.scala
index 5eae144dfb..9c2859c8b9 100644
--- a/core/src/main/scala/spark/partial/ApproximateEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/ApproximateEvaluator.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.partial
+package org.apache.spark.partial
 
 /**
  * An object that computes a function incrementally by merging in results of type U from multiple
diff --git a/core/src/main/scala/spark/partial/BoundedDouble.scala b/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala
index 8bdbe6c012..5f4450859c 100644
--- a/core/src/main/scala/spark/partial/BoundedDouble.scala
+++ b/core/src/main/scala/org/apache/spark/partial/BoundedDouble.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.partial
+package org.apache.spark.partial
 
 /**
  * A Double with error bars on it.
diff --git a/core/src/main/scala/spark/partial/CountEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala
index 6aa92094eb..3155dfe165 100644
--- a/core/src/main/scala/spark/partial/CountEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/CountEvaluator.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.partial
+package org.apache.spark.partial
 
 import cern.jet.stat.Probability
 
diff --git a/core/src/main/scala/spark/partial/GroupedCountEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala
index ebe2e5a1e3..e519e3a548 100644
--- a/core/src/main/scala/spark/partial/GroupedCountEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/GroupedCountEvaluator.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.partial
+package org.apache.spark.partial
 
 import java.util.{HashMap => JHashMap}
 import java.util.{Map => JMap}
diff --git a/core/src/main/scala/spark/partial/GroupedMeanEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/GroupedMeanEvaluator.scala
index 2dadbbd5fb..cf8a5680b6 100644
--- a/core/src/main/scala/spark/partial/GroupedMeanEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/GroupedMeanEvaluator.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.partial
+package org.apache.spark.partial
 
 import java.util.{HashMap => JHashMap}
 import java.util.{Map => JMap}
@@ -24,7 +24,7 @@ import scala.collection.mutable.HashMap
 import scala.collection.Map
 import scala.collection.JavaConversions.mapAsScalaMap
 
-import spark.util.StatCounter
+import org.apache.spark.util.StatCounter
 
 /**
  * An ApproximateEvaluator for means by key. Returns a map of key to confidence interval.
diff --git a/core/src/main/scala/spark/partial/GroupedSumEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/GroupedSumEvaluator.scala
index ae2b63f7cb..8225a5d933 100644
--- a/core/src/main/scala/spark/partial/GroupedSumEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/GroupedSumEvaluator.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.partial
+package org.apache.spark.partial
 
 import java.util.{HashMap => JHashMap}
 import java.util.{Map => JMap}
@@ -24,7 +24,7 @@ import scala.collection.mutable.HashMap
 import scala.collection.Map
 import scala.collection.JavaConversions.mapAsScalaMap
 
-import spark.util.StatCounter
+import org.apache.spark.util.StatCounter
 
 /**
  * An ApproximateEvaluator for sums by key. Returns a map of key to confidence interval.
diff --git a/core/src/main/scala/spark/partial/MeanEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala
index 5ddcad7075..d24959cba8 100644
--- a/core/src/main/scala/spark/partial/MeanEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/MeanEvaluator.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.partial
+package org.apache.spark.partial
 
 import cern.jet.stat.Probability
 
-import spark.util.StatCounter
+import org.apache.spark.util.StatCounter
 
 /**
  * An ApproximateEvaluator for means.
diff --git a/core/src/main/scala/spark/partial/PartialResult.scala b/core/src/main/scala/org/apache/spark/partial/PartialResult.scala
index 922a9f9bc6..5ce49b8100 100644
--- a/core/src/main/scala/spark/partial/PartialResult.scala
+++ b/core/src/main/scala/org/apache/spark/partial/PartialResult.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.partial
+package org.apache.spark.partial
 
 class PartialResult[R](initialVal: R, isFinal: Boolean) {
   private var finalValue: Option[R] = if (isFinal) Some(initialVal) else None
diff --git a/core/src/main/scala/spark/partial/StudentTCacher.scala b/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala
index f3bb987d46..92915ee66d 100644
--- a/core/src/main/scala/spark/partial/StudentTCacher.scala
+++ b/core/src/main/scala/org/apache/spark/partial/StudentTCacher.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.partial
+package org.apache.spark.partial
 
 import cern.jet.stat.Probability
 
diff --git a/core/src/main/scala/spark/partial/SumEvaluator.scala b/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala
index 4083abef03..a74f800944 100644
--- a/core/src/main/scala/spark/partial/SumEvaluator.scala
+++ b/core/src/main/scala/org/apache/spark/partial/SumEvaluator.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.partial
+package org.apache.spark.partial
 
 import cern.jet.stat.Probability
 
-import spark.util.StatCounter
+import org.apache.spark.util.StatCounter
 
 /**
  * An ApproximateEvaluator for sums. It estimates the mean and the cont and multiplies them
diff --git a/core/src/main/scala/spark/rdd/BlockRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
index 0ebb722d73..bca6956a18 100644
--- a/core/src/main/scala/spark/rdd/BlockRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
-import spark.{RDD, SparkContext, SparkEnv, Partition, TaskContext}
-import spark.storage.BlockManager
+import org.apache.spark.{SparkContext, SparkEnv, Partition, TaskContext}
+import org.apache.spark.storage.BlockManager
 
 private[spark] class BlockRDDPartition(val blockId: String, idx: Int) extends Partition {
   val index = idx
@@ -28,13 +28,12 @@ private[spark]
 class BlockRDD[T: ClassManifest](sc: SparkContext, @transient blockIds: Array[String])
   extends RDD[T](sc, Nil) {
 
-  @transient lazy val locations_ = BlockManager.blockIdsToExecutorLocations(blockIds, SparkEnv.get)
+  @transient lazy val locations_ = BlockManager.blockIdsToHosts(blockIds, SparkEnv.get)
 
   override def getPartitions: Array[Partition] = (0 until blockIds.size).map(i => {
     new BlockRDDPartition(blockIds(i), i).asInstanceOf[Partition]
   }).toArray
 
-
   override def compute(split: Partition, context: TaskContext): Iterator[T] = {
     val blockManager = SparkEnv.get.blockManager
     val blockId = split.asInstanceOf[BlockRDDPartition].blockId
@@ -45,8 +44,8 @@ class BlockRDD[T: ClassManifest](sc: SparkContext, @transient blockIds: Array[St
     }
   }
 
-  override def getPreferredLocations(split: Partition): Seq[String] =
+  override def getPreferredLocations(split: Partition): Seq[String] = {
     locations_(split.asInstanceOf[BlockRDDPartition].blockId)
-
+  }
 }
 
diff --git a/core/src/main/scala/spark/rdd/CartesianRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
index 150e5bca29..9b0c882481 100644
--- a/core/src/main/scala/spark/rdd/CartesianRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CartesianRDD.scala
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
 import java.io.{ObjectOutputStream, IOException}
-import spark._
+import org.apache.spark._
 
 
 private[spark]
@@ -64,7 +64,7 @@ class CartesianRDD[T: ClassManifest, U:ClassManifest](
 
   override def getPreferredLocations(split: Partition): Seq[String] = {
     val currSplit = split.asInstanceOf[CartesianPartition]
-    rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)
+    (rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)).distinct
   }
 
   override def compute(split: Partition, context: TaskContext) = {
diff --git a/core/src/main/scala/spark/rdd/CheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
index 6794e0e201..3311757189 100644
--- a/core/src/main/scala/spark/rdd/CheckpointRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CheckpointRDD.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
-import spark._
+import org.apache.spark._
 import org.apache.hadoop.mapred.{FileInputFormat, SequenceFileInputFormat, JobConf, Reporter}
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.io.{NullWritable, BytesWritable}
@@ -25,7 +25,6 @@ import org.apache.hadoop.util.ReflectionUtils
 import org.apache.hadoop.fs.Path
 import java.io.{File, IOException, EOFException}
 import java.text.NumberFormat
-import spark.deploy.SparkHadoopUtil
 
 private[spark] class CheckpointRDDPartition(val index: Int) extends Partition {}
 
@@ -82,8 +81,9 @@ private[spark] object CheckpointRDD extends Logging {
   }
 
   def writeToFile[T](path: String, blockSize: Int = -1)(ctx: TaskContext, iterator: Iterator[T]) {
+    val env = SparkEnv.get
     val outputDir = new Path(path)
-    val fs = outputDir.getFileSystem(SparkHadoopUtil.newConfiguration())
+    val fs = outputDir.getFileSystem(env.hadoop.newConfiguration())
 
     val finalOutputName = splitIdToFile(ctx.splitId)
     val finalOutputPath = new Path(outputDir, finalOutputName)
@@ -101,7 +101,7 @@ private[spark] object CheckpointRDD extends Logging {
       // This is mainly for testing purpose
       fs.create(tempOutputPath, false, bufferSize, fs.getDefaultReplication, blockSize)
     }
-    val serializer = SparkEnv.get.serializer.newInstance()
+    val serializer = env.serializer.newInstance()
     val serializeStream = serializer.serializeStream(fileOutputStream)
     serializeStream.writeAll(iterator)
     serializeStream.close()
@@ -121,10 +121,11 @@ private[spark] object CheckpointRDD extends Logging {
   }
 
   def readFromFile[T](path: Path, context: TaskContext): Iterator[T] = {
-    val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration())
+    val env = SparkEnv.get
+    val fs = path.getFileSystem(env.hadoop.newConfiguration())
     val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt
     val fileInputStream = fs.open(path, bufferSize)
-    val serializer = SparkEnv.get.serializer.newInstance()
+    val serializer = env.serializer.newInstance()
     val deserializeStream = serializer.deserializeStream(fileInputStream)
 
     // Register an on-task-completion callback to close the input stream.
@@ -137,13 +138,14 @@ private[spark] object CheckpointRDD extends Logging {
   // each split file having multiple blocks. This needs to be run on a
   // cluster (mesos or standalone) using HDFS.
   def main(args: Array[String]) {
-    import spark._
+    import org.apache.spark._
 
     val Array(cluster, hdfsPath) = args
+    val env = SparkEnv.get
     val sc = new SparkContext(cluster, "CheckpointRDD Test")
     val rdd = sc.makeRDD(1 to 10, 10).flatMap(x => 1 to 10000)
     val path = new Path(hdfsPath, "temp")
-    val fs = path.getFileSystem(SparkHadoopUtil.newConfiguration())
+    val fs = path.getFileSystem(env.hadoop.newConfiguration())
     sc.runJob(rdd, CheckpointRDD.writeToFile(path.toString, 1024) _)
     val cpRDD = new CheckpointRDD[Int](sc, path.toString)
     assert(cpRDD.partitions.length == rdd.partitions.length, "Number of partitions is not the same")
diff --git a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index c540cd36eb..0187256a8e 100644
--- a/core/src/main/scala/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
 import java.io.{ObjectOutputStream, IOException}
 import java.util.{HashMap => JHashMap}
@@ -23,8 +23,8 @@ import java.util.{HashMap => JHashMap}
 import scala.collection.JavaConversions
 import scala.collection.mutable.ArrayBuffer
 
-import spark.{Aggregator, Partition, Partitioner, RDD, SparkEnv, TaskContext}
-import spark.{Dependency, OneToOneDependency, ShuffleDependency}
+import org.apache.spark.{Partition, Partitioner, SparkEnv, TaskContext}
+import org.apache.spark.{Dependency, OneToOneDependency, ShuffleDependency}
 
 
 private[spark] sealed trait CoGroupSplitDep extends Serializable
@@ -52,13 +52,6 @@ class CoGroupPartition(idx: Int, val deps: Array[CoGroupSplitDep])
   override def hashCode(): Int = idx
 }
 
-private[spark] class CoGroupAggregator
-  extends Aggregator[Any, Any, ArrayBuffer[Any]](
-    { x => ArrayBuffer(x) },
-    { (b, x) => b += x },
-    { (b1, b2) => b1 ++ b2 })
-  with Serializable
-
 
 /**
  * A RDD that cogroups its parents. For each key k in parent RDDs, the resulting RDD contains a
@@ -66,34 +59,25 @@ private[spark] class CoGroupAggregator
  *
  * @param rdds parent RDDs.
  * @param part partitioner used to partition the shuffle output.
- * @param mapSideCombine flag indicating whether to merge values before shuffle step. If the flag
- *                       is on, Spark does an extra pass over the data on the map side to merge
- *                       all values belonging to the same key together. This can reduce the amount
- *                       of data shuffled if and only if the number of distinct keys is very small,
- *                       and the ratio of key size to value size is also very small.
  */
-class CoGroupedRDD[K](
-  @transient var rdds: Seq[RDD[(K, _)]],
-  part: Partitioner,
-  val mapSideCombine: Boolean = false,
-  val serializerClass: String = null)
+class CoGroupedRDD[K](@transient var rdds: Seq[RDD[_ <: Product2[K, _]]], part: Partitioner)
   extends RDD[(K, Seq[Seq[_]])](rdds.head.context, Nil) {
 
-  private val aggr = new CoGroupAggregator
+  private var serializerClass: String = null
+
+  def setSerializer(cls: String): CoGroupedRDD[K] = {
+    serializerClass = cls
+    this
+  }
 
   override def getDependencies: Seq[Dependency[_]] = {
-    rdds.map { rdd =>
+    rdds.map { rdd: RDD[_ <: Product2[K, _]] =>
       if (rdd.partitioner == Some(part)) {
-        logInfo("Adding one-to-one dependency with " + rdd)
+        logDebug("Adding one-to-one dependency with " + rdd)
         new OneToOneDependency(rdd)
       } else {
-        logInfo("Adding shuffle dependency with " + rdd)
-        if (mapSideCombine) {
-          val mapSideCombinedRDD = rdd.mapPartitions(aggr.combineValuesByKey(_), true)
-          new ShuffleDependency[Any, ArrayBuffer[Any]](mapSideCombinedRDD, part, serializerClass)
-        } else {
-          new ShuffleDependency[Any, Any](rdd.asInstanceOf[RDD[(Any, Any)]], part, serializerClass)
-        }
+        logDebug("Adding shuffle dependency with " + rdd)
+        new ShuffleDependency[Any, Any](rdd, part, serializerClass)
       }
     }
   }
@@ -138,23 +122,15 @@ class CoGroupedRDD[K](
     for ((dep, depNum) <- split.deps.zipWithIndex) dep match {
       case NarrowCoGroupSplitDep(rdd, _, itsSplit) => {
         // Read them from the parent
-        for ((k, v) <- rdd.iterator(itsSplit, context)) {
-          getSeq(k.asInstanceOf[K])(depNum) += v
+        rdd.iterator(itsSplit, context).asInstanceOf[Iterator[Product2[K, Any]]].foreach { kv =>
+          getSeq(kv._1)(depNum) += kv._2
         }
       }
       case ShuffleCoGroupSplitDep(shuffleId) => {
         // Read map outputs of shuffle
         val fetcher = SparkEnv.get.shuffleFetcher
-        if (mapSideCombine) {
-          // With map side combine on, for each key, the shuffle fetcher returns a list of values.
-          fetcher.fetch[K, Seq[Any]](shuffleId, split.index, context.taskMetrics, ser).foreach {
-            case (key, values) => getSeq(key)(depNum) ++= values
-          }
-        } else {
-          // With map side combine off, for each key the shuffle fetcher returns a single value.
-          fetcher.fetch[K, Any](shuffleId, split.index, context.taskMetrics, ser).foreach {
-            case (key, value) => getSeq(key)(depNum) += value
-          }
+        fetcher.fetch[Product2[K, Any]](shuffleId, split.index, context.taskMetrics, ser).foreach {
+          kv => getSeq(kv._1)(depNum) += kv._2
         }
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
new file mode 100644
index 0000000000..c5de6362a9
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/rdd/CoalescedRDD.scala
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rdd
+
+import org.apache.spark._
+import java.io.{ObjectOutputStream, IOException}
+import scala.collection.mutable
+import scala.Some
+import scala.collection.mutable.ArrayBuffer
+
+/**
+ * Class that captures a coalesced RDD by essentially keeping track of parent partitions
+ * @param index of this coalesced partition
+ * @param rdd which it belongs to
+ * @param parentsIndices list of indices in the parent that have been coalesced into this partition
+ * @param preferredLocation the preferred location for this partition
+ */
+case class CoalescedRDDPartition(
+                                  index: Int,
+                                  @transient rdd: RDD[_],
+                                  parentsIndices: Array[Int],
+                                  @transient preferredLocation: String = ""
+                                  ) extends Partition {
+  var parents: Seq[Partition] = parentsIndices.map(rdd.partitions(_))
+
+  @throws(classOf[IOException])
+  private def writeObject(oos: ObjectOutputStream) {
+    // Update the reference to parent partition at the time of task serialization
+    parents = parentsIndices.map(rdd.partitions(_))
+    oos.defaultWriteObject()
+  }
+
+  /**
+   * Computes how many of the parents partitions have getPreferredLocation
+   * as one of their preferredLocations
+   * @return locality of this coalesced partition between 0 and 1
+   */
+  def localFraction: Double = {
+    val loc = parents.count(p =>
+      rdd.context.getPreferredLocs(rdd, p.index).map(tl => tl.host).contains(preferredLocation))
+
+    if (parents.size == 0) 0.0 else (loc.toDouble / parents.size.toDouble)
+  }
+}
+
+/**
+ * Represents a coalesced RDD that has fewer partitions than its parent RDD
+ * This class uses the PartitionCoalescer class to find a good partitioning of the parent RDD
+ * so that each new partition has roughly the same number of parent partitions and that
+ * the preferred location of each new partition overlaps with as many preferred locations of its
+ * parent partitions
+ * @param prev RDD to be coalesced
+ * @param maxPartitions number of desired partitions in the coalesced RDD
+ * @param balanceSlack used to trade-off balance and locality. 1.0 is all locality, 0 is all balance
+ */
+class CoalescedRDD[T: ClassManifest](
+                                      @transient var prev: RDD[T],
+                                      maxPartitions: Int,
+                                      balanceSlack: Double = 0.10)
+  extends RDD[T](prev.context, Nil) {  // Nil since we implement getDependencies
+
+  override def getPartitions: Array[Partition] = {
+    val pc = new PartitionCoalescer(maxPartitions, prev, balanceSlack)
+
+    pc.run().zipWithIndex.map {
+      case (pg, i) =>
+        val ids = pg.arr.map(_.index).toArray
+        new CoalescedRDDPartition(i, prev, ids, pg.prefLoc)
+    }
+  }
+
+  override def compute(partition: Partition, context: TaskContext): Iterator[T] = {
+    partition.asInstanceOf[CoalescedRDDPartition].parents.iterator.flatMap { parentPartition =>
+      firstParent[T].iterator(parentPartition, context)
+    }
+  }
+
+  override def getDependencies: Seq[Dependency[_]] = {
+    Seq(new NarrowDependency(prev) {
+      def getParents(id: Int): Seq[Int] =
+        partitions(id).asInstanceOf[CoalescedRDDPartition].parentsIndices
+    })
+  }
+
+  override def clearDependencies() {
+    super.clearDependencies()
+    prev = null
+  }
+
+  /**
+   * Returns the preferred machine for the partition. If split is of type CoalescedRDDPartition,
+   * then the preferred machine will be one which most parent splits prefer too.
+   * @param partition
+   * @return the machine most preferred by split
+   */
+  override def getPreferredLocations(partition: Partition): Seq[String] = {
+    List(partition.asInstanceOf[CoalescedRDDPartition].preferredLocation)
+  }
+}
+
+/**
+ * Coalesce the partitions of a parent RDD (`prev`) into fewer partitions, so that each partition of
+ * this RDD computes one or more of the parent ones. It will produce exactly `maxPartitions` if the
+ * parent had more than maxPartitions, or fewer if the parent had fewer.
+ *
+ * This transformation is useful when an RDD with many partitions gets filtered into a smaller one,
+ * or to avoid having a large number of small tasks when processing a directory with many files.
+ *
+ * If there is no locality information (no preferredLocations) in the parent, then the coalescing
+ * is very simple: chunk parents that are close in the Array in chunks.
+ * If there is locality information, it proceeds to pack them with the following four goals:
+ *
+ * (1) Balance the groups so they roughly have the same number of parent partitions
+ * (2) Achieve locality per partition, i.e. find one machine which most parent partitions prefer
+ * (3) Be efficient, i.e. O(n) algorithm for n parent partitions (problem is likely NP-hard)
+ * (4) Balance preferred machines, i.e. avoid as much as possible picking the same preferred machine
+ *
+ * Furthermore, it is assumed that the parent RDD may have many partitions, e.g. 100 000.
+ * We assume the final number of desired partitions is small, e.g. less than 1000.
+ *
+ * The algorithm tries to assign unique preferred machines to each partition. If the number of
+ * desired partitions is greater than the number of preferred machines (can happen), it needs to
+ * start picking duplicate preferred machines. This is determined using coupon collector estimation
+ * (2n log(n)). The load balancing is done using power-of-two randomized bins-balls with one twist:
+ * it tries to also achieve locality. This is done by allowing a slack (balanceSlack) between two
+ * bins. If two bins are within the slack in terms of balance, the algorithm will assign partitions
+ * according to locality. (contact alig for questions)
+ *
+ */
+
+private[spark] class PartitionCoalescer(maxPartitions: Int, prev: RDD[_], balanceSlack: Double) {
+
+  def compare(o1: PartitionGroup, o2: PartitionGroup): Boolean = o1.size < o2.size
+  def compare(o1: Option[PartitionGroup], o2: Option[PartitionGroup]): Boolean =
+    if (o1 == None) false else if (o2 == None) true else compare(o1.get, o2.get)
+
+  val rnd = new scala.util.Random(7919) // keep this class deterministic
+
+  // each element of groupArr represents one coalesced partition
+  val groupArr = ArrayBuffer[PartitionGroup]()
+
+  // hash used to check whether some machine is already in groupArr
+  val groupHash = mutable.Map[String, ArrayBuffer[PartitionGroup]]()
+
+  // hash used for the first maxPartitions (to avoid duplicates)
+  val initialHash = mutable.Set[Partition]()
+
+  // determines the tradeoff between load-balancing the partitions sizes and their locality
+  // e.g. balanceSlack=0.10 means that it allows up to 10% imbalance in favor of locality
+  val slack = (balanceSlack * prev.partitions.size).toInt
+
+  var noLocality = true  // if true if no preferredLocations exists for parent RDD
+
+  // gets the *current* preferred locations from the DAGScheduler (as opposed to the static ones)
+  def currPrefLocs(part: Partition): Seq[String] = {
+    prev.context.getPreferredLocs(prev, part.index).map(tl => tl.host)
+  }
+
+  // this class just keeps iterating and rotating infinitely over the partitions of the RDD
+  // next() returns the next preferred machine that a partition is replicated on
+  // the rotator first goes through the first replica copy of each partition, then second, third
+  // the iterators return type is a tuple: (replicaString, partition)
+  class LocationIterator(prev: RDD[_]) extends Iterator[(String, Partition)] {
+
+    var it: Iterator[(String, Partition)] = resetIterator()
+
+    override val isEmpty = !it.hasNext
+
+    // initializes/resets to start iterating from the beginning
+    def resetIterator() = {
+      val iterators = (0 to 2).map( x =>
+        prev.partitions.iterator.flatMap(p => {
+          if (currPrefLocs(p).size > x) Some((currPrefLocs(p)(x), p)) else None
+        } )
+      )
+      iterators.reduceLeft((x, y) => x ++ y)
+    }
+
+    // hasNext() is false iff there are no preferredLocations for any of the partitions of the RDD
+    def hasNext(): Boolean = { !isEmpty }
+
+    // return the next preferredLocation of some partition of the RDD
+    def next(): (String, Partition) = {
+      if (it.hasNext)
+        it.next()
+      else {
+        it = resetIterator() // ran out of preferred locations, reset and rotate to the beginning
+        it.next()
+      }
+    }
+  }
+
+  /**
+   * Sorts and gets the least element of the list associated with key in groupHash
+   * The returned PartitionGroup is the least loaded of all groups that represent the machine "key"
+   * @param key string representing a partitioned group on preferred machine key
+   * @return Option of PartitionGroup that has least elements for key
+   */
+  def getLeastGroupHash(key: String): Option[PartitionGroup] = {
+    groupHash.get(key).map(_.sortWith(compare).head)
+  }
+
+  def addPartToPGroup(part: Partition, pgroup: PartitionGroup): Boolean = {
+    if (!initialHash.contains(part)) {
+      pgroup.arr += part           // already assign this element
+      initialHash += part // needed to avoid assigning partitions to multiple buckets
+      true
+    } else { false }
+  }
+
+  /**
+   * Initializes targetLen partition groups and assigns a preferredLocation
+   * This uses coupon collector to estimate how many preferredLocations it must rotate through
+   * until it has seen most of the preferred locations (2 * n log(n))
+   * @param targetLen
+   */
+  def setupGroups(targetLen: Int) {
+    val rotIt = new LocationIterator(prev)
+
+    // deal with empty case, just create targetLen partition groups with no preferred location
+    if (!rotIt.hasNext()) {
+      (1 to targetLen).foreach(x => groupArr += PartitionGroup())
+      return
+    }
+
+    noLocality = false
+
+    // number of iterations needed to be certain that we've seen most preferred locations
+    val expectedCoupons2 = 2 * (math.log(targetLen)*targetLen + targetLen + 0.5).toInt
+    var numCreated = 0
+    var tries = 0
+
+    // rotate through until either targetLen unique/distinct preferred locations have been created
+    // OR we've rotated expectedCoupons2, in which case we have likely seen all preferred locations,
+    // i.e. likely targetLen >> number of preferred locations (more buckets than there are machines)
+    while (numCreated < targetLen && tries < expectedCoupons2) {
+      tries += 1
+      val (nxt_replica, nxt_part) = rotIt.next()
+      if (!groupHash.contains(nxt_replica)) {
+        val pgroup = PartitionGroup(nxt_replica)
+        groupArr += pgroup
+        addPartToPGroup(nxt_part, pgroup)
+        groupHash += (nxt_replica -> (ArrayBuffer(pgroup))) // list in case we have multiple
+        numCreated += 1
+      }
+    }
+
+    while (numCreated < targetLen) {  // if we don't have enough partition groups, create duplicates
+      var (nxt_replica, nxt_part) = rotIt.next()
+      val pgroup = PartitionGroup(nxt_replica)
+      groupArr += pgroup
+      groupHash.get(nxt_replica).get += pgroup
+      var tries = 0
+      while (!addPartToPGroup(nxt_part, pgroup) && tries < targetLen) { // ensure at least one part
+        nxt_part = rotIt.next()._2
+        tries += 1
+      }
+      numCreated += 1
+    }
+
+  }
+
+  /**
+   * Takes a parent RDD partition and decides which of the partition groups to put it in
+   * Takes locality into account, but also uses power of 2 choices to load balance
+   * It strikes a balance between the two use the balanceSlack variable
+   * @param p partition (ball to be thrown)
+   * @return partition group (bin to be put in)
+   */
+  def pickBin(p: Partition): PartitionGroup = {
+    val pref = currPrefLocs(p).map(getLeastGroupHash(_)).sortWith(compare) // least loaded pref locs
+    val prefPart = if (pref == Nil) None else pref.head
+
+    val r1 = rnd.nextInt(groupArr.size)
+    val r2 = rnd.nextInt(groupArr.size)
+    val minPowerOfTwo = if (groupArr(r1).size < groupArr(r2).size) groupArr(r1) else groupArr(r2)
+    if (prefPart== None) // if no preferred locations, just use basic power of two
+      return minPowerOfTwo
+
+    val prefPartActual = prefPart.get
+
+    if (minPowerOfTwo.size + slack <= prefPartActual.size)  // more imbalance than the slack allows
+      return minPowerOfTwo  // prefer balance over locality
+    else {
+      return prefPartActual // prefer locality over balance
+    }
+  }
+
+  def throwBalls() {
+    if (noLocality) {  // no preferredLocations in parent RDD, no randomization needed
+      if (maxPartitions > groupArr.size) { // just return prev.partitions
+        for ((p,i) <- prev.partitions.zipWithIndex) {
+          groupArr(i).arr += p
+        }
+      } else { // no locality available, then simply split partitions based on positions in array
+        for(i <- 0 until maxPartitions) {
+          val rangeStart = ((i.toLong * prev.partitions.length) / maxPartitions).toInt
+          val rangeEnd = (((i.toLong + 1) * prev.partitions.length) / maxPartitions).toInt
+          (rangeStart until rangeEnd).foreach{ j => groupArr(i).arr += prev.partitions(j) }
+        }
+      }
+    } else {
+      for (p <- prev.partitions if (!initialHash.contains(p))) { // throw every partition into group
+        pickBin(p).arr += p
+      }
+    }
+  }
+
+  def getPartitions: Array[PartitionGroup] = groupArr.filter( pg => pg.size > 0).toArray
+
+  /**
+   * Runs the packing algorithm and returns an array of PartitionGroups that if possible are
+   * load balanced and grouped by locality
+   * @return array of partition groups
+   */
+  def run(): Array[PartitionGroup] = {
+    setupGroups(math.min(prev.partitions.length, maxPartitions))   // setup the groups (bins)
+    throwBalls()             // assign partitions (balls) to each group (bins)
+    getPartitions
+  }
+}
+
+private[spark] case class PartitionGroup(prefLoc: String = "") {
+  var arr = mutable.ArrayBuffer[Partition]()
+
+  def size = arr.size
+}
diff --git a/core/src/main/scala/spark/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
index 93ef097702..a4bec41752 100644
--- a/core/src/main/scala/spark/DoubleRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
@@ -15,17 +15,18 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark.rdd
 
-import spark.partial.BoundedDouble
-import spark.partial.MeanEvaluator
-import spark.partial.PartialResult
-import spark.partial.SumEvaluator
-import spark.util.StatCounter
+import org.apache.spark.partial.BoundedDouble
+import org.apache.spark.partial.MeanEvaluator
+import org.apache.spark.partial.PartialResult
+import org.apache.spark.partial.SumEvaluator
+import org.apache.spark.util.StatCounter
+import org.apache.spark.{TaskContext, Logging}
 
 /**
  * Extra functions available on RDDs of Doubles through an implicit conversion.
- * Import `spark.SparkContext._` at the top of your program to use these functions.
+ * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
  */
 class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
   /** Add up the elements in this RDD. */
@@ -34,7 +35,7 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
   }
 
   /**
-   * Return a [[spark.util.StatCounter]] object that captures the mean, variance and count
+   * Return a [[org.apache.spark.util.StatCounter]] object that captures the mean, variance and count
    * of the RDD's elements in one operation.
    */
   def stats(): StatCounter = {
@@ -54,7 +55,13 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
    * Compute the sample standard deviation of this RDD's elements (which corrects for bias in
    * estimating the standard deviation by dividing by N-1 instead of N).
    */
-  def sampleStdev(): Double = stats().stdev
+  def sampleStdev(): Double = stats().sampleStdev
+
+  /**
+   * Compute the sample variance of this RDD's elements (which corrects for bias in
+   * estimating the variance by dividing by N-1 instead of N).
+   */
+  def sampleVariance(): Double = stats().sampleVariance
 
   /** (Experimental) Approximate operation to return the mean within a timeout. */
   def meanApprox(timeout: Long, confidence: Double = 0.95): PartialResult[BoundedDouble] = {
diff --git a/core/src/main/scala/spark/rdd/EmptyRDD.scala b/core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala
index d7d4db5d30..c8900d1a93 100644
--- a/core/src/main/scala/spark/rdd/EmptyRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/EmptyRDD.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
-import spark.{RDD, SparkContext, SparkEnv, Partition, TaskContext}
+import org.apache.spark.{SparkContext, SparkEnv, Partition, TaskContext}
 
 
 /**
diff --git a/core/src/main/scala/spark/rdd/FilteredRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala
index 783508cfd1..5312dc0b59 100644
--- a/core/src/main/scala/spark/rdd/FilteredRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/FilteredRDD.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
-import spark.{OneToOneDependency, RDD, Partition, TaskContext}
+import org.apache.spark.{OneToOneDependency, Partition, TaskContext}
 
 private[spark] class FilteredRDD[T: ClassManifest](
     prev: RDD[T],
diff --git a/core/src/main/scala/spark/rdd/FlatMappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala
index ed75eac3ff..cbdf6d84c0 100644
--- a/core/src/main/scala/spark/rdd/FlatMappedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/FlatMappedRDD.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
-import spark.{RDD, Partition, TaskContext}
+import org.apache.spark.{Partition, TaskContext}
 
 
 private[spark]
diff --git a/core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala b/core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala
new file mode 100644
index 0000000000..82000bac09
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/rdd/FlatMappedValuesRDD.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rdd
+
+import org.apache.spark.{TaskContext, Partition}
+
+
+private[spark]
+class FlatMappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => TraversableOnce[U])
+  extends RDD[(K, U)](prev) {
+
+  override def getPartitions = firstParent[Product2[K, V]].partitions
+
+  override val partitioner = firstParent[Product2[K, V]].partitioner
+
+  override def compute(split: Partition, context: TaskContext) = {
+    firstParent[Product2[K, V]].iterator(split, context).flatMap { case Product2(k, v) =>
+      f(v).map(x => (k, x))
+    }
+  }
+}
diff --git a/core/src/main/scala/spark/rdd/GlommedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala
index 1573f8a289..829545d7b0 100644
--- a/core/src/main/scala/spark/rdd/GlommedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/GlommedRDD.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
-import spark.{RDD, Partition, TaskContext}
+import org.apache.spark.{Partition, TaskContext}
 
 private[spark] class GlommedRDD[T: ClassManifest](prev: RDD[T])
   extends RDD[Array[T]](prev) {
diff --git a/core/src/main/scala/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index d0fdeb741e..2cb6734e41 100644
--- a/core/src/main/scala/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -15,27 +15,20 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
 import java.io.EOFException
-import java.util.NoSuchElementException
 
-import org.apache.hadoop.io.LongWritable
-import org.apache.hadoop.io.NullWritable
-import org.apache.hadoop.io.Text
-import org.apache.hadoop.mapred.FileInputFormat
 import org.apache.hadoop.mapred.InputFormat
 import org.apache.hadoop.mapred.InputSplit
 import org.apache.hadoop.mapred.JobConf
-import org.apache.hadoop.mapred.TextInputFormat
 import org.apache.hadoop.mapred.RecordReader
 import org.apache.hadoop.mapred.Reporter
 import org.apache.hadoop.util.ReflectionUtils
 
-import spark.deploy.SparkHadoopUtil
-import spark.{Dependency, Logging, Partition, RDD, SerializableWritable, SparkContext, TaskContext}
-import spark.util.NextIterator
-import org.apache.hadoop.conf.Configurable
+import org.apache.spark.{Logging, Partition, SerializableWritable, SparkContext, SparkEnv, TaskContext}
+import org.apache.spark.util.NextIterator
+import org.apache.hadoop.conf.{Configuration, Configurable}
 
 
 /**
@@ -68,7 +61,8 @@ class HadoopRDD[K, V](
   private val confBroadcast = sc.broadcast(new SerializableWritable(conf))
 
   override def getPartitions: Array[Partition] = {
-    SparkHadoopUtil.addCredentials(conf);
+    val env = SparkEnv.get
+    env.hadoop.addCredentials(conf)
     val inputFormat = createInputFormat(conf)
     if (inputFormat.isInstanceOf[Configurable]) {
       inputFormat.asInstanceOf[Configurable].setConf(conf)
@@ -88,6 +82,7 @@ class HadoopRDD[K, V](
 
   override def compute(theSplit: Partition, context: TaskContext) = new NextIterator[(K, V)] {
     val split = theSplit.asInstanceOf[HadoopPartition]
+    logInfo("Input split: " + split.inputSplit)
     var reader: RecordReader[K, V] = null
 
     val conf = confBroadcast.value.value
@@ -131,4 +126,6 @@ class HadoopRDD[K, V](
   override def checkpoint() {
     // Do nothing. Hadoop RDD should not be checkpointed.
   }
+
+  def getConf: Configuration = confBroadcast.value.value
 }
diff --git a/core/src/main/scala/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
index 59132437d2..aca0146884 100644
--- a/core/src/main/scala/spark/rdd/JdbcRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
@@ -15,12 +15,12 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
 import java.sql.{Connection, ResultSet}
 
-import spark.{Logging, Partition, RDD, SparkContext, TaskContext}
-import spark.util.NextIterator
+import org.apache.spark.{Logging, Partition, SparkContext, TaskContext}
+import org.apache.spark.util.NextIterator
 
 private[spark] class JdbcPartition(idx: Int, val lower: Long, val upper: Long) extends Partition {
   override def index = idx
diff --git a/core/src/main/scala/spark/rdd/MapPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
index af8f0a112f..203179c4ea 100644
--- a/core/src/main/scala/spark/rdd/MapPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsRDD.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
-import spark.{RDD, Partition, TaskContext}
+import org.apache.spark.{Partition, TaskContext}
 
 
 private[spark]
diff --git a/core/src/main/scala/spark/rdd/MapPartitionsWithIndexRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithIndexRDD.scala
index 3b4e9518fd..3ed8339010 100644
--- a/core/src/main/scala/spark/rdd/MapPartitionsWithIndexRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MapPartitionsWithIndexRDD.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
-import spark.{RDD, Partition, TaskContext}
+import org.apache.spark.{Partition, TaskContext}
 
 
 /**
diff --git a/core/src/main/scala/spark/rdd/MappedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala
index 8b411dd85d..e8be1c4816 100644
--- a/core/src/main/scala/spark/rdd/MappedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/MappedRDD.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
-import spark.{RDD, Partition, TaskContext}
+import org.apache.spark.{Partition, TaskContext}
 
 private[spark]
 class MappedRDD[U: ClassManifest, T: ClassManifest](prev: RDD[T], f: T => U)
diff --git a/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala b/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala
new file mode 100644
index 0000000000..d33c1af581
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/rdd/MappedValuesRDD.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rdd
+
+
+import org.apache.spark.{TaskContext, Partition}
+
+private[spark]
+class MappedValuesRDD[K, V, U](prev: RDD[_ <: Product2[K, V]], f: V => U)
+  extends RDD[(K, U)](prev) {
+
+  override def getPartitions = firstParent[Product2[K, U]].partitions
+
+  override val partitioner = firstParent[Product2[K, U]].partitioner
+
+  override def compute(split: Partition, context: TaskContext): Iterator[(K, U)] = {
+    firstParent[Product2[K, V]].iterator(split, context).map { case Product2(k ,v) => (k, f(v)) }
+  }
+}
diff --git a/core/src/main/scala/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index 17fe805fd4..7b3a89f7e0 100644
--- a/core/src/main/scala/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
 import java.text.SimpleDateFormat
 import java.util.Date
@@ -24,7 +24,7 @@ import org.apache.hadoop.conf.{Configurable, Configuration}
 import org.apache.hadoop.io.Writable
 import org.apache.hadoop.mapreduce._
 
-import spark.{Dependency, Logging, Partition, RDD, SerializableWritable, SparkContext, TaskContext}
+import org.apache.spark.{Dependency, Logging, Partition, SerializableWritable, SparkContext, TaskContext}
 
 
 private[spark]
@@ -43,7 +43,7 @@ class NewHadoopRDD[K, V](
     valueClass: Class[V],
     @transient conf: Configuration)
   extends RDD[(K, V)](sc, Nil)
-  with HadoopMapReduceUtil
+  with SparkHadoopMapReduceUtil
   with Logging {
 
   // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it
@@ -73,6 +73,7 @@ class NewHadoopRDD[K, V](
 
   override def compute(theSplit: Partition, context: TaskContext) = new Iterator[(K, V)] {
     val split = theSplit.asInstanceOf[NewHadoopPartition]
+    logInfo("Input split: " + split.serializableHadoopSplit)
     val conf = confBroadcast.value.value
     val attemptId = newTaskAttemptID(jobtrackerId, id, true, split.index, 0)
     val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId)
@@ -119,4 +120,7 @@ class NewHadoopRDD[K, V](
     val theSplit = split.asInstanceOf[NewHadoopPartition]
     theSplit.serializableHadoopSplit.value.getLocations.filter(_ != "localhost")
   }
+
+  def getConf: Configuration = confBroadcast.value.value
 }
+
diff --git a/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
new file mode 100644
index 0000000000..697be8b997
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/rdd/OrderedRDDFunctions.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rdd
+
+import org.apache.spark.{RangePartitioner, Logging}
+
+/**
+ * Extra functions available on RDDs of (key, value) pairs where the key is sortable through
+ * an implicit conversion. Import `org.apache.spark.SparkContext._` at the top of your program to
+ * use these functions. They will work with any key type that has a `scala.math.Ordered`
+ * implementation.
+ */
+class OrderedRDDFunctions[K <% Ordered[K]: ClassManifest,
+                          V: ClassManifest,
+                          P <: Product2[K, V] : ClassManifest](
+    self: RDD[P])
+  extends Logging with Serializable {
+
+  /**
+   * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling
+   * `collect` or `save` on the resulting RDD will return or output an ordered list of records
+   * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in
+   * order of the keys).
+   */
+  def sortByKey(ascending: Boolean = true, numPartitions: Int = self.partitions.size): RDD[P] = {
+    val part = new RangePartitioner(numPartitions, self, ascending)
+    val shuffled = new ShuffledRDD[K, V, P](self, part)
+    shuffled.mapPartitions(iter => {
+      val buf = iter.toArray
+      if (ascending) {
+        buf.sortWith((x, y) => x._1 < y._1).iterator
+      } else {
+        buf.sortWith((x, y) => x._1 > y._1).iterator
+      }
+    }, preservesPartitioning = true)
+  }
+}
diff --git a/core/src/main/scala/spark/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index 6b0cc2fbf1..a47c512275 100644
--- a/core/src/main/scala/spark/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -15,45 +15,44 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark.rdd
 
 import java.nio.ByteBuffer
-import java.util.{Date, HashMap => JHashMap}
+import java.util.Date
 import java.text.SimpleDateFormat
+import java.util.{HashMap => JHashMap}
 
-import scala.collection.Map
+import scala.collection.{mutable, Map}
 import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
 import scala.collection.JavaConversions._
 
+import org.apache.hadoop.mapred._
+import org.apache.hadoop.io.compress.CompressionCodec
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
-import org.apache.hadoop.io.compress.CompressionCodec
 import org.apache.hadoop.io.SequenceFile.CompressionType
-import org.apache.hadoop.mapred.FileOutputCommitter
 import org.apache.hadoop.mapred.FileOutputFormat
-import org.apache.hadoop.mapred.HadoopWriter
-import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapred.OutputFormat
-
+import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat}
 import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat => NewFileOutputFormat}
-import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat, RecordWriter => NewRecordWriter, Job => NewAPIHadoopJob, HadoopMapReduceUtil}
-import org.apache.hadoop.security.UserGroupInformation
+import org.apache.hadoop.mapreduce.SparkHadoopMapReduceUtil
+import org.apache.hadoop.mapreduce.{Job => NewAPIHadoopJob}
+import org.apache.hadoop.mapreduce.{RecordWriter => NewRecordWriter}
 
-import spark.partial.BoundedDouble
-import spark.partial.PartialResult
-import spark.rdd._
-import spark.SparkContext._
-import spark.Partitioner._
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+import org.apache.spark.partial.{BoundedDouble, PartialResult}
+import org.apache.spark.Aggregator
+import org.apache.spark.Partitioner
+import org.apache.spark.Partitioner.defaultPartitioner
 
 /**
  * Extra functions available on RDDs of (key, value) pairs through an implicit conversion.
- * Import `spark.SparkContext._` at the top of your program to use these functions.
+ * Import `org.apache.spark.SparkContext._` at the top of your program to use these functions.
  */
-class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
-    self: RDD[(K, V)])
+class PairRDDFunctions[K: ClassManifest, V: ClassManifest](self: RDD[(K, V)])
   extends Logging
-  with HadoopMapReduceUtil
+  with SparkHadoopMapReduceUtil
   with Serializable {
 
   /**
@@ -85,17 +84,18 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
     }
     val aggregator = new Aggregator[K, V, C](createCombiner, mergeValue, mergeCombiners)
     if (self.partitioner == Some(partitioner)) {
-      self.mapPartitions(aggregator.combineValuesByKey(_), true)
+      self.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true)
     } else if (mapSideCombine) {
-      val mapSideCombined = self.mapPartitions(aggregator.combineValuesByKey(_), true)
-      val partitioned = new ShuffledRDD[K, C](mapSideCombined, partitioner, serializerClass)
-      partitioned.mapPartitions(aggregator.combineCombinersByKey(_), true)
+      val combined = self.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true)
+      val partitioned = new ShuffledRDD[K, C, (K, C)](combined, partitioner)
+        .setSerializer(serializerClass)
+      partitioned.mapPartitions(aggregator.combineCombinersByKey, preservesPartitioning = true)
     } else {
       // Don't apply map-side combiner.
       // A sanity check to make sure mergeCombiners is not defined.
       assert(mergeCombiners == null)
-      val values = new ShuffledRDD[K, V](self, partitioner, serializerClass)
-      values.mapPartitions(aggregator.combineValuesByKey(_), true)
+      val values = new ShuffledRDD[K, V, (K, V)](self, partitioner).setSerializer(serializerClass)
+      values.mapPartitions(aggregator.combineValuesByKey, preservesPartitioning = true)
     }
   }
 
@@ -167,7 +167,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
 
     def reducePartition(iter: Iterator[(K, V)]): Iterator[JHashMap[K, V]] = {
       val map = new JHashMap[K, V]
-      for ((k, v) <- iter) {
+      iter.foreach { case (k, v) =>
         val old = map.get(k)
         map.put(k, if (old == null) v else func(old, v))
       }
@@ -175,11 +175,11 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
     }
 
     def mergeMaps(m1: JHashMap[K, V], m2: JHashMap[K, V]): JHashMap[K, V] = {
-      for ((k, v) <- m2) {
+      m2.foreach { case (k, v) =>
         val old = m1.get(k)
         m1.put(k, if (old == null) v else func(old, v))
       }
-      return m1
+      m1
     }
 
     self.mapPartitions(reducePartition).reduce(mergeMaps)
@@ -233,31 +233,13 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
   }
 
   /**
-   * Return a copy of the RDD partitioned using the specified partitioner. If `mapSideCombine`
-   * is true, Spark will group values of the same key together on the map side before the
-   * repartitioning, to only send each key over the network once. If a large number of
-   * duplicated keys are expected, and the size of the keys are large, `mapSideCombine` should
-   * be set to true.
+   * Return a copy of the RDD partitioned using the specified partitioner.
    */
-  def partitionBy(partitioner: Partitioner, mapSideCombine: Boolean = false): RDD[(K, V)] = {
-    if (getKeyClass().isArray) {
-      if (mapSideCombine) {
-        throw new SparkException("Cannot use map-side combining with array keys.")
-      }
-      if (partitioner.isInstanceOf[HashPartitioner]) {
-        throw new SparkException("Default partitioner cannot partition array keys.")
-      }
-    }
-    if (mapSideCombine) {
-      def createCombiner(v: V) = ArrayBuffer(v)
-      def mergeValue(buf: ArrayBuffer[V], v: V) = buf += v
-      def mergeCombiners(b1: ArrayBuffer[V], b2: ArrayBuffer[V]) = b1 ++= b2
-      val bufs = combineByKey[ArrayBuffer[V]](
-        createCombiner _, mergeValue _, mergeCombiners _, partitioner)
-      bufs.flatMapValues(buf => buf)
-    } else {
-      new ShuffledRDD[K, V](self, partitioner)
+  def partitionBy(partitioner: Partitioner): RDD[(K, V)] = {
+    if (getKeyClass().isArray && partitioner.isInstanceOf[HashPartitioner]) {
+      throw new SparkException("Default partitioner cannot partition array keys.")
     }
+    new ShuffledRDD[K, V, (K, V)](self, partitioner)
   }
 
   /**
@@ -266,9 +248,8 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
    * (k, v2) is in `other`. Uses the given Partitioner to partition the output RDD.
    */
   def join[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, W))] = {
-    this.cogroup(other, partitioner).flatMapValues {
-      case (vs, ws) =>
-        for (v <- vs.iterator; w <- ws.iterator) yield (v, w)
+    this.cogroup(other, partitioner).flatMapValues { case (vs, ws) =>
+      for (v <- vs.iterator; w <- ws.iterator) yield (v, w)
     }
   }
 
@@ -279,13 +260,12 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
    * partition the output RDD.
    */
   def leftOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner): RDD[(K, (V, Option[W]))] = {
-    this.cogroup(other, partitioner).flatMapValues {
-      case (vs, ws) =>
-        if (ws.isEmpty) {
-          vs.iterator.map(v => (v, None))
-        } else {
-          for (v <- vs.iterator; w <- ws.iterator) yield (v, Some(w))
-        }
+    this.cogroup(other, partitioner).flatMapValues { case (vs, ws) =>
+      if (ws.isEmpty) {
+        vs.iterator.map(v => (v, None))
+      } else {
+        for (v <- vs.iterator; w <- ws.iterator) yield (v, Some(w))
+      }
     }
   }
 
@@ -297,13 +277,12 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
    */
   def rightOuterJoin[W](other: RDD[(K, W)], partitioner: Partitioner)
       : RDD[(K, (Option[V], W))] = {
-    this.cogroup(other, partitioner).flatMapValues {
-      case (vs, ws) =>
-        if (vs.isEmpty) {
-          ws.iterator.map(w => (None, w))
-        } else {
-          for (v <- vs.iterator; w <- ws.iterator) yield (Some(v), w)
-        }
+    this.cogroup(other, partitioner).flatMapValues { case (vs, ws) =>
+      if (vs.isEmpty) {
+        ws.iterator.map(w => (None, w))
+      } else {
+        for (v <- vs.iterator; w <- ws.iterator) yield (Some(v), w)
+      }
     }
   }
 
@@ -395,7 +374,13 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
   /**
    * Return the key-value pairs in this RDD to the master as a Map.
    */
-  def collectAsMap(): Map[K, V] = HashMap(self.collect(): _*)
+  def collectAsMap(): Map[K, V] = {
+    val data = self.toArray()
+    val map = new mutable.HashMap[K, V]
+    map.sizeHint(data.length)
+    data.foreach { case (k, v) => map.put(k, v) }
+    map
+  }
 
   /**
    * Pass each value in the key-value pair RDD through a map function without changing the keys;
@@ -423,13 +408,10 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
     if (partitioner.isInstanceOf[HashPartitioner] && getKeyClass().isArray) {
       throw new SparkException("Default partitioner cannot partition array keys.")
     }
-    val cg = new CoGroupedRDD[K](
-        Seq(self.asInstanceOf[RDD[(K, _)]], other.asInstanceOf[RDD[(K, _)]]),
-        partitioner)
+    val cg = new CoGroupedRDD[K](Seq(self, other), partitioner)
     val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classManifest[K], Manifests.seqSeqManifest)
-    prfs.mapValues {
-      case Seq(vs, ws) =>
-        (vs.asInstanceOf[Seq[V]], ws.asInstanceOf[Seq[W]])
+    prfs.mapValues { case Seq(vs, ws) =>
+      (vs.asInstanceOf[Seq[V]], ws.asInstanceOf[Seq[W]])
     }
   }
 
@@ -442,15 +424,10 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
     if (partitioner.isInstanceOf[HashPartitioner] && getKeyClass().isArray) {
       throw new SparkException("Default partitioner cannot partition array keys.")
     }
-    val cg = new CoGroupedRDD[K](
-        Seq(self.asInstanceOf[RDD[(K, _)]],
-            other1.asInstanceOf[RDD[(K, _)]],
-            other2.asInstanceOf[RDD[(K, _)]]),
-        partitioner)
+    val cg = new CoGroupedRDD[K](Seq(self, other1, other2), partitioner)
     val prfs = new PairRDDFunctions[K, Seq[Seq[_]]](cg)(classManifest[K], Manifests.seqSeqManifest)
-    prfs.mapValues {
-      case Seq(vs, w1s, w2s) =>
-        (vs.asInstanceOf[Seq[V]], w1s.asInstanceOf[Seq[W1]], w2s.asInstanceOf[Seq[W2]])
+    prfs.mapValues { case Seq(vs, w1s, w2s) =>
+      (vs.asInstanceOf[Seq[V]], w1s.asInstanceOf[Seq[W1]], w2s.asInstanceOf[Seq[W2]])
     }
   }
 
@@ -582,7 +559,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
     val formatter = new SimpleDateFormat("yyyyMMddHHmm")
     val jobtrackerID = formatter.format(new Date())
     val stageId = self.id
-    def writeShard(context: spark.TaskContext, iter: Iterator[(K,V)]): Int = {
+    def writeShard(context: TaskContext, iter: Iterator[(K,V)]): Int = {
       // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
       // around by taking a mod. We expect that no task will be attempted 2 billion times.
       val attemptNumber = (context.attemptId % Int.MaxValue).toInt
@@ -594,7 +571,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
       committer.setupTask(hadoopContext)
       val writer = format.getRecordWriter(hadoopContext).asInstanceOf[NewRecordWriter[K,V]]
       while (iter.hasNext) {
-        val (k, v) = iter.next
+        val (k, v) = iter.next()
         writer.write(k, v)
       }
       writer.close(hadoopContext)
@@ -652,7 +629,7 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
       conf.set("mapred.output.compression.type", CompressionType.BLOCK.toString)
     }
     conf.setOutputCommitter(classOf[FileOutputCommitter])
-    FileOutputFormat.setOutputPath(conf, HadoopWriter.createPathFromString(path, conf))
+    FileOutputFormat.setOutputPath(conf, SparkHadoopWriter.createPathFromString(path, conf))
     saveAsHadoopDataset(conf)
   }
 
@@ -678,10 +655,10 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
 
     logInfo("Saving as hadoop file of type (" + keyClass.getSimpleName+ ", " + valueClass.getSimpleName+ ")")
 
-    val writer = new HadoopWriter(conf)
+    val writer = new SparkHadoopWriter(conf)
     writer.preSetup()
 
-    def writeToFile(context: TaskContext, iter: Iterator[(K,V)]) {
+    def writeToFile(context: TaskContext, iter: Iterator[(K, V)]) {
       // Hadoop wants a 32-bit task attempt ID, so if ours is bigger than Int.MaxValue, roll it
       // around by taking a mod. We expect that no task will be attempted 2 billion times.
       val attemptNumber = (context.attemptId % Int.MaxValue).toInt
@@ -720,55 +697,6 @@ class PairRDDFunctions[K: ClassManifest, V: ClassManifest](
   private[spark] def getValueClass() = implicitly[ClassManifest[V]].erasure
 }
 
-/**
- * Extra functions available on RDDs of (key, value) pairs where the key is sortable through
- * an implicit conversion. Import `spark.SparkContext._` at the top of your program to use these
- * functions. They will work with any key type that has a `scala.math.Ordered` implementation.
- */
-class OrderedRDDFunctions[K <% Ordered[K]: ClassManifest, V: ClassManifest](
-  self: RDD[(K, V)])
-  extends Logging
-  with Serializable {
-
-  /**
-   * Sort the RDD by key, so that each partition contains a sorted range of the elements. Calling
-   * `collect` or `save` on the resulting RDD will return or output an ordered list of records
-   * (in the `save` case, they will be written to multiple `part-X` files in the filesystem, in
-   * order of the keys).
-   */
-  def sortByKey(ascending: Boolean = true, numPartitions: Int = self.partitions.size): RDD[(K,V)] = {
-    val shuffled =
-      new ShuffledRDD[K, V](self, new RangePartitioner(numPartitions, self, ascending))
-    shuffled.mapPartitions(iter => {
-      val buf = iter.toArray
-      if (ascending) {
-        buf.sortWith((x, y) => x._1 < y._1).iterator
-      } else {
-        buf.sortWith((x, y) => x._1 > y._1).iterator
-      }
-    }, true)
-  }
-}
-
-private[spark]
-class MappedValuesRDD[K, V, U](prev: RDD[(K, V)], f: V => U) extends RDD[(K, U)](prev) {
-  override def getPartitions = firstParent[(K, V)].partitions
-  override val partitioner = firstParent[(K, V)].partitioner
-  override def compute(split: Partition, context: TaskContext) =
-    firstParent[(K, V)].iterator(split, context).map{ case (k, v) => (k, f(v)) }
-}
-
-private[spark]
-class FlatMappedValuesRDD[K, V, U](prev: RDD[(K, V)], f: V => TraversableOnce[U])
-  extends RDD[(K, U)](prev) {
-
-  override def getPartitions = firstParent[(K, V)].partitions
-  override val partitioner = firstParent[(K, V)].partitioner
-  override def compute(split: Partition, context: TaskContext) = {
-    firstParent[(K, V)].iterator(split, context).flatMap { case (k, v) => f(v).map(x => (k, x)) }
-  }
-}
-
 private[spark] object Manifests {
   val seqSeqManifest = classManifest[Seq[Seq[_]]]
 }
diff --git a/core/src/main/scala/spark/rdd/ParallelCollectionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
index 16ba0c26f8..6dbd4309aa 100644
--- a/core/src/main/scala/spark/rdd/ParallelCollectionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ParallelCollectionRDD.scala
@@ -15,18 +15,22 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
 import scala.collection.immutable.NumericRange
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.Map
-import spark.{RDD, TaskContext, SparkContext, Partition}
+import org.apache.spark._
+import java.io._
+import scala.Serializable
+import org.apache.spark.serializer.JavaSerializer
+import org.apache.spark.util.Utils
 
 private[spark] class ParallelCollectionPartition[T: ClassManifest](
-    val rddId: Long,
-    val slice: Int,
-    values: Seq[T])
-  extends Partition with Serializable {
+    var rddId: Long,
+    var slice: Int,
+    var values: Seq[T])
+    extends Partition with Serializable {
 
   def iterator: Iterator[T] = values.iterator
 
@@ -37,15 +41,49 @@ private[spark] class ParallelCollectionPartition[T: ClassManifest](
     case _ => false
   }
 
-  override val index: Int = slice
+  override def index: Int = slice
+
+  @throws(classOf[IOException])
+  private def writeObject(out: ObjectOutputStream): Unit = {
+
+    val sfactory = SparkEnv.get.serializer
+
+    // Treat java serializer with default action rather than going thru serialization, to avoid a
+    // separate serialization header.
+
+    sfactory match {
+      case js: JavaSerializer => out.defaultWriteObject()
+      case _ =>
+        out.writeLong(rddId)
+        out.writeInt(slice)
+
+        val ser = sfactory.newInstance()
+        Utils.serializeViaNestedStream(out, ser)(_.writeObject(values))
+    }
+  }
+
+  @throws(classOf[IOException])
+  private def readObject(in: ObjectInputStream): Unit = {
+
+    val sfactory = SparkEnv.get.serializer
+    sfactory match {
+      case js: JavaSerializer => in.defaultReadObject()
+      case _ =>
+        rddId = in.readLong()
+        slice = in.readInt()
+
+        val ser = sfactory.newInstance()
+        Utils.deserializeViaNestedStream(in, ser)(ds => values = ds.readObject())
+    }
+  }
 }
 
 private[spark] class ParallelCollectionRDD[T: ClassManifest](
     @transient sc: SparkContext,
     @transient data: Seq[T],
     numSlices: Int,
-    locationPrefs: Map[Int,Seq[String]])
-  extends RDD[T](sc, Nil) {
+    locationPrefs: Map[Int, Seq[String]])
+    extends RDD[T](sc, Nil) {
   // TODO: Right now, each split sends along its full data, even if later down the RDD chain it gets
   // cached. It might be worthwhile to write the data to a file in the DFS and read it in the split
   // instead.
@@ -82,16 +120,17 @@ private object ParallelCollectionRDD {
           1
         }
         slice(new Range(
-            r.start, r.end + sign, r.step).asInstanceOf[Seq[T]], numSlices)
+          r.start, r.end + sign, r.step).asInstanceOf[Seq[T]], numSlices)
       }
       case r: Range => {
         (0 until numSlices).map(i => {
           val start = ((i * r.length.toLong) / numSlices).toInt
-          val end = (((i+1) * r.length.toLong) / numSlices).toInt
+          val end = (((i + 1) * r.length.toLong) / numSlices).toInt
           new Range(r.start + start * r.step, r.start + end * r.step, r.step)
         }).asInstanceOf[Seq[Seq[T]]]
       }
-      case nr: NumericRange[_] => { // For ranges of Long, Double, BigInteger, etc
+      case nr: NumericRange[_] => {
+        // For ranges of Long, Double, BigInteger, etc
         val slices = new ArrayBuffer[Seq[T]](numSlices)
         val sliceSize = (nr.size + numSlices - 1) / numSlices // Round up to catch everything
         var r = nr
@@ -102,10 +141,10 @@ private object ParallelCollectionRDD {
         slices
       }
       case _ => {
-        val array = seq.toArray  // To prevent O(n^2) operations for List etc
+        val array = seq.toArray // To prevent O(n^2) operations for List etc
         (0 until numSlices).map(i => {
           val start = ((i * array.length.toLong) / numSlices).toInt
-          val end = (((i+1) * array.length.toLong) / numSlices).toInt
+          val end = (((i + 1) * array.length.toLong) / numSlices).toInt
           array.slice(start, end).toSeq
         })
       }
diff --git a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala
index 191cfde565..165cd412fc 100644
--- a/core/src/main/scala/spark/rdd/PartitionPruningRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionPruningRDD.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
-import spark.{NarrowDependency, RDD, SparkEnv, Partition, TaskContext}
+import org.apache.spark.{NarrowDependency, SparkEnv, Partition, TaskContext}
 
 
 class PartitionPruningRDDPartition(idx: Int, val parentSplit: Partition) extends Partition {
@@ -33,8 +33,9 @@ class PruneDependency[T](rdd: RDD[T], @transient partitionFilterFunc: Int => Boo
   extends NarrowDependency[T](rdd) {
 
   @transient
-  val partitions: Array[Partition] = rdd.partitions.filter(s => partitionFilterFunc(s.index))
-    .zipWithIndex.map { case(split, idx) => new PartitionPruningRDDPartition(idx, split) : Partition }
+  val partitions: Array[Partition] = rdd.partitions.zipWithIndex
+    .filter(s => partitionFilterFunc(s._2))
+    .map { case(split, idx) => new PartitionPruningRDDPartition(idx, split) : Partition }
 
   override def getParents(partitionId: Int) = List(partitions(partitionId).index)
 }
diff --git a/core/src/main/scala/spark/rdd/PipedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
index 2cefdc78b0..d5304ab0ae 100644
--- a/core/src/main/scala/spark/rdd/PipedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
 import java.io.PrintWriter
 import java.util.StringTokenizer
@@ -25,8 +25,8 @@ import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
 import scala.io.Source
 
-import spark.{RDD, SparkEnv, Partition, TaskContext}
-import spark.broadcast.Broadcast
+import org.apache.spark.{SparkEnv, Partition, TaskContext}
+import org.apache.spark.broadcast.Broadcast
 
 
 /**
diff --git a/core/src/main/scala/spark/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index ca7cdd622a..e143ecd096 100644
--- a/core/src/main/scala/spark/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark.rdd
 
 import java.util.Random
 
@@ -31,43 +31,28 @@ import org.apache.hadoop.mapred.TextOutputFormat
 
 import it.unimi.dsi.fastutil.objects.{Object2LongOpenHashMap => OLMap}
 
-import spark.broadcast.Broadcast
-import spark.Partitioner._
-import spark.partial.BoundedDouble
-import spark.partial.CountEvaluator
-import spark.partial.GroupedCountEvaluator
-import spark.partial.PartialResult
-import spark.rdd.CoalescedRDD
-import spark.rdd.CartesianRDD
-import spark.rdd.FilteredRDD
-import spark.rdd.FlatMappedRDD
-import spark.rdd.GlommedRDD
-import spark.rdd.MappedRDD
-import spark.rdd.MapPartitionsRDD
-import spark.rdd.MapPartitionsWithIndexRDD
-import spark.rdd.PipedRDD
-import spark.rdd.SampledRDD
-import spark.rdd.ShuffledRDD
-import spark.rdd.UnionRDD
-import spark.rdd.ZippedRDD
-import spark.rdd.ZippedPartitionsRDD2
-import spark.rdd.ZippedPartitionsRDD3
-import spark.rdd.ZippedPartitionsRDD4
-import spark.storage.StorageLevel
-import spark.util.BoundedPriorityQueue
-
-import SparkContext._
+import org.apache.spark.Partitioner._
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.partial.BoundedDouble
+import org.apache.spark.partial.CountEvaluator
+import org.apache.spark.partial.GroupedCountEvaluator
+import org.apache.spark.partial.PartialResult
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.{Utils, BoundedPriorityQueue}
+
+import org.apache.spark.SparkContext._
+import org.apache.spark._
 
 /**
  * A Resilient Distributed Dataset (RDD), the basic abstraction in Spark. Represents an immutable,
  * partitioned collection of elements that can be operated on in parallel. This class contains the
  * basic operations available on all RDDs, such as `map`, `filter`, and `persist`. In addition,
- * [[spark.PairRDDFunctions]] contains operations available only on RDDs of key-value pairs, such
- * as `groupByKey` and `join`; [[spark.DoubleRDDFunctions]] contains operations available only on
- * RDDs of Doubles; and [[spark.SequenceFileRDDFunctions]] contains operations available on RDDs
- * that can be saved as SequenceFiles. These operations are automatically available on any RDD of
- * the right type (e.g. RDD[(Int, Int)] through implicit conversions when you
- * `import spark.SparkContext._`.
+ * [[org.apache.spark.rdd.PairRDDFunctions]] contains operations available only on RDDs of key-value
+ * pairs, such as `groupByKey` and `join`; [[org.apache.spark.rdd.DoubleRDDFunctions]] contains
+ * operations available only on RDDs of Doubles; and [[org.apache.spark.rdd.SequenceFileRDDFunctions]]
+ * contains operations available on RDDs that can be saved as SequenceFiles. These operations are
+ * automatically available on any RDD of the right type (e.g. RDD[(Int, Int)] through implicit
+ * conversions when you `import org.apache.spark.SparkContext._`.
  *
  * Internally, each RDD is characterized by five main properties:
  *
@@ -220,8 +205,8 @@ abstract class RDD[T: ClassManifest](
   }
 
   /**
-   * Get the preferred location of a split, taking into account whether the
-   * RDD is checkpointed or not.
+   * Get the preferred locations of a partition (as hostnames), taking into account whether the
+   * RDD is checkpointed.
    */
   final def preferredLocations(split: Partition): Seq[String] = {
     checkpointRDD.map(_.getPreferredLocations(split)).getOrElse {
@@ -286,7 +271,10 @@ abstract class RDD[T: ClassManifest](
   def coalesce(numPartitions: Int, shuffle: Boolean = false): RDD[T] = {
     if (shuffle) {
       // include a shuffle step so that our upstream tasks are still distributed
-      new CoalescedRDD(new ShuffledRDD(map(x => (x, null)), new HashPartitioner(numPartitions)), numPartitions).keys
+      new CoalescedRDD(
+        new ShuffledRDD[T, Null, (T, Null)](map(x => (x, null)),
+        new HashPartitioner(numPartitions)),
+        numPartitions).keys
     } else {
       new CoalescedRDD(this, numPartitions)
     }
@@ -301,8 +289,8 @@ abstract class RDD[T: ClassManifest](
   def takeSample(withReplacement: Boolean, num: Int, seed: Int): Array[T] = {
     var fraction = 0.0
     var total = 0
-    var multiplier = 3.0
-    var initialCount = this.count()
+    val multiplier = 3.0
+    val initialCount = this.count()
     var maxSelected = 0
 
     if (num < 0) {
@@ -514,22 +502,19 @@ abstract class RDD[T: ClassManifest](
    * *same number of partitions*, but does *not* require them to have the same number
    * of elements in each partition.
    */
-  def zipPartitions[B: ClassManifest, V: ClassManifest](
-      f: (Iterator[T], Iterator[B]) => Iterator[V],
-      rdd2: RDD[B]): RDD[V] =
+  def zipPartitions[B: ClassManifest, V: ClassManifest]
+      (rdd2: RDD[B])
+      (f: (Iterator[T], Iterator[B]) => Iterator[V]): RDD[V] =
     new ZippedPartitionsRDD2(sc, sc.clean(f), this, rdd2)
 
-  def zipPartitions[B: ClassManifest, C: ClassManifest, V: ClassManifest](
-      f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V],
-      rdd2: RDD[B],
-      rdd3: RDD[C]): RDD[V] =
+  def zipPartitions[B: ClassManifest, C: ClassManifest, V: ClassManifest]
+      (rdd2: RDD[B], rdd3: RDD[C])
+      (f: (Iterator[T], Iterator[B], Iterator[C]) => Iterator[V]): RDD[V] =
     new ZippedPartitionsRDD3(sc, sc.clean(f), this, rdd2, rdd3)
 
-  def zipPartitions[B: ClassManifest, C: ClassManifest, D: ClassManifest, V: ClassManifest](
-      f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V],
-      rdd2: RDD[B],
-      rdd3: RDD[C],
-      rdd4: RDD[D]): RDD[V] =
+  def zipPartitions[B: ClassManifest, C: ClassManifest, D: ClassManifest, V: ClassManifest]
+      (rdd2: RDD[B], rdd3: RDD[C], rdd4: RDD[D])
+      (f: (Iterator[T], Iterator[B], Iterator[C], Iterator[D]) => Iterator[V]): RDD[V] =
     new ZippedPartitionsRDD4(sc, sc.clean(f), this, rdd2, rdd3, rdd4)
 
 
@@ -893,7 +878,7 @@ abstract class RDD[T: ClassManifest](
     dependencies.head.rdd.asInstanceOf[RDD[U]]
   }
 
-  /** The [[spark.SparkContext]] that this RDD was created on. */
+  /** The [[org.apache.spark.SparkContext]] that this RDD was created on. */
   def context = sc
 
   // Avoid handling doCheckpoint multiple times to prevent excessive recursion
@@ -929,7 +914,7 @@ abstract class RDD[T: ClassManifest](
    * Clears the dependencies of this RDD. This method must ensure that all references
    * to the original parent RDDs is removed to enable the parent RDDs to be garbage
    * collected. Subclasses of RDD may override this method for implementing their own cleaning
-   * logic. See [[spark.rdd.UnionRDD]] for an example.
+   * logic. See [[org.apache.spark.rdd.UnionRDD]] for an example.
    */
   protected def clearDependencies() {
     dependencies_ = null
@@ -950,4 +935,8 @@ abstract class RDD[T: ClassManifest](
     id,
     origin)
 
+  def toJavaRDD() : JavaRDD[T] = {
+    new JavaRDD(this)(elementClassManifest)
+  }
+
 }
diff --git a/core/src/main/scala/spark/RDDCheckpointData.scala b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
index b615f820eb..6009a41570 100644
--- a/core/src/main/scala/spark/RDDCheckpointData.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDDCheckpointData.scala
@@ -15,12 +15,13 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark.rdd
 
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.conf.Configuration
-import rdd.{CheckpointRDD, CoalescedRDD}
-import scheduler.{ResultTask, ShuffleMapTask}
+
+import org.apache.spark.{Partition, SparkException, Logging}
+import org.apache.spark.scheduler.{ResultTask, ShuffleMapTask}
 
 /**
  * Enumeration to manage state transitions of an RDD through checkpointing
diff --git a/core/src/main/scala/spark/rdd/SampledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
index 574c9b141d..2c5253ae30 100644
--- a/core/src/main/scala/spark/rdd/SampledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SampledRDD.scala
@@ -15,14 +15,14 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
 import java.util.Random
 
 import cern.jet.random.Poisson
 import cern.jet.random.engine.DRand
 
-import spark.{RDD, Partition, TaskContext}
+import org.apache.spark.{Partition, TaskContext}
 
 private[spark]
 class SampledRDDPartition(val prev: Partition, val seed: Int) extends Partition with Serializable {
diff --git a/core/src/main/scala/spark/SequenceFileRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
index 9f30b7f22f..5fe4676029 100644
--- a/core/src/main/scala/spark/SequenceFileRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SequenceFileRDDFunctions.scala
@@ -15,40 +15,22 @@
  * limitations under the License.
  */
 
-package spark
-
-import java.io.EOFException
-import java.net.URL
-import java.io.ObjectInputStream
-import java.util.concurrent.atomic.AtomicLong
-import java.util.HashSet
-import java.util.Random
-import java.util.Date
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.Map
-import scala.collection.mutable.HashMap
+package org.apache.spark.rdd
 
 import org.apache.hadoop.mapred.JobConf
-import org.apache.hadoop.mapred.OutputFormat
-import org.apache.hadoop.mapred.TextOutputFormat
 import org.apache.hadoop.mapred.SequenceFileOutputFormat
-import org.apache.hadoop.mapred.OutputCommitter
-import org.apache.hadoop.mapred.FileOutputCommitter
 import org.apache.hadoop.io.compress.CompressionCodec
 import org.apache.hadoop.io.Writable
-import org.apache.hadoop.io.NullWritable
-import org.apache.hadoop.io.BytesWritable
-import org.apache.hadoop.io.Text
 
-import spark.SparkContext._
+import org.apache.spark.SparkContext._
+import org.apache.spark.Logging
 
 /**
  * Extra functions available on RDDs of (key, value) pairs to create a Hadoop SequenceFile,
  * through an implicit conversion. Note that this can't be part of PairRDDFunctions because
  * we need more implicit parameters to convert our keys and values to Writable.
  *
- * Users should import `spark.SparkContext._` at the top of their program to use these functions.
+ * Import `org.apache.spark.SparkContext._` at the top of their program to use these functions.
  */
 class SequenceFileRDDFunctions[K <% Writable: ClassManifest, V <% Writable : ClassManifest](
     self: RDD[(K, V)])
diff --git a/core/src/main/scala/spark/rdd/ShuffledRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
index 0137f80953..9537152335 100644
--- a/core/src/main/scala/spark/rdd/ShuffledRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ShuffledRDD.scala
@@ -15,10 +15,9 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
-import spark.{Partitioner, RDD, SparkEnv, ShuffleDependency, Partition, TaskContext}
-import spark.SparkContext._
+import org.apache.spark.{Dependency, Partitioner, SparkEnv, ShuffleDependency, Partition, TaskContext}
 
 
 private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition {
@@ -30,15 +29,24 @@ private[spark] class ShuffledRDDPartition(val idx: Int) extends Partition {
  * The resulting RDD from a shuffle (e.g. repartitioning of data).
  * @param prev the parent RDD.
  * @param part the partitioner used to partition the RDD
- * @param serializerClass class name of the serializer to use.
  * @tparam K the key class.
  * @tparam V the value class.
  */
-class ShuffledRDD[K, V](
-    @transient prev: RDD[(K, V)],
-    part: Partitioner,
-    serializerClass: String = null)
-  extends RDD[(K, V)](prev.context, List(new ShuffleDependency(prev, part, serializerClass))) {
+class ShuffledRDD[K, V, P <: Product2[K, V] : ClassManifest](
+    @transient var prev: RDD[P],
+    part: Partitioner)
+  extends RDD[P](prev.context, Nil) {
+
+  private var serializerClass: String = null
+
+  def setSerializer(cls: String): ShuffledRDD[K, V, P] = {
+    serializerClass = cls
+    this
+  }
+
+  override def getDependencies: Seq[Dependency[_]] = {
+    List(new ShuffleDependency(prev, part, serializerClass))
+  }
 
   override val partitioner = Some(part)
 
@@ -46,9 +54,14 @@ class ShuffledRDD[K, V](
     Array.tabulate[Partition](part.numPartitions)(i => new ShuffledRDDPartition(i))
   }
 
-  override def compute(split: Partition, context: TaskContext): Iterator[(K, V)] = {
+  override def compute(split: Partition, context: TaskContext): Iterator[P] = {
     val shuffledId = dependencies.head.asInstanceOf[ShuffleDependency[K, V]].shuffleId
-    SparkEnv.get.shuffleFetcher.fetch[K, V](shuffledId, split.index, context.taskMetrics,
+    SparkEnv.get.shuffleFetcher.fetch[P](shuffledId, split.index, context.taskMetrics,
       SparkEnv.get.serializerManager.get(serializerClass))
   }
+
+  override def clearDependencies() {
+    super.clearDependencies()
+    prev = null
+  }
 }
diff --git a/core/src/main/scala/spark/rdd/SubtractedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
index 0402b9f250..8c1a29dfff 100644
--- a/core/src/main/scala/spark/rdd/SubtractedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
@@ -15,19 +15,18 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
 import java.util.{HashMap => JHashMap}
 import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
-import spark.RDD
-import spark.Partitioner
-import spark.Dependency
-import spark.TaskContext
-import spark.Partition
-import spark.SparkEnv
-import spark.ShuffleDependency
-import spark.OneToOneDependency
+import org.apache.spark.Partitioner
+import org.apache.spark.Dependency
+import org.apache.spark.TaskContext
+import org.apache.spark.Partition
+import org.apache.spark.SparkEnv
+import org.apache.spark.ShuffleDependency
+import org.apache.spark.OneToOneDependency
 
 
 /**
@@ -47,20 +46,26 @@ import spark.OneToOneDependency
  * out of memory because of the size of `rdd2`.
  */
 private[spark] class SubtractedRDD[K: ClassManifest, V: ClassManifest, W: ClassManifest](
-    @transient var rdd1: RDD[(K, V)],
-    @transient var rdd2: RDD[(K, W)],
-    part: Partitioner,
-    val serializerClass: String = null)
+    @transient var rdd1: RDD[_ <: Product2[K, V]],
+    @transient var rdd2: RDD[_ <: Product2[K, W]],
+    part: Partitioner)
   extends RDD[(K, V)](rdd1.context, Nil) {
 
+  private var serializerClass: String = null
+
+  def setSerializer(cls: String): SubtractedRDD[K, V, W] = {
+    serializerClass = cls
+    this
+  }
+
   override def getDependencies: Seq[Dependency[_]] = {
     Seq(rdd1, rdd2).map { rdd =>
       if (rdd.partitioner == Some(part)) {
-        logInfo("Adding one-to-one dependency with " + rdd)
+        logDebug("Adding one-to-one dependency with " + rdd)
         new OneToOneDependency(rdd)
       } else {
-        logInfo("Adding shuffle dependency with " + rdd)
-        new ShuffleDependency(rdd.asInstanceOf[RDD[(K, Any)]], part, serializerClass)
+        logDebug("Adding shuffle dependency with " + rdd)
+        new ShuffleDependency(rdd, part, serializerClass)
       }
     }
   }
@@ -97,16 +102,14 @@ private[spark] class SubtractedRDD[K: ClassManifest, V: ClassManifest, W: ClassM
         seq
       }
     }
-    def integrate(dep: CoGroupSplitDep, op: ((K, V)) => Unit) = dep match {
+    def integrate(dep: CoGroupSplitDep, op: Product2[K, V] => Unit) = dep match {
       case NarrowCoGroupSplitDep(rdd, _, itsSplit) => {
-        for (t <- rdd.iterator(itsSplit, context))
-          op(t.asInstanceOf[(K, V)])
+        rdd.iterator(itsSplit, context).asInstanceOf[Iterator[Product2[K, V]]].foreach(op)
       }
       case ShuffleCoGroupSplitDep(shuffleId) => {
-        val iter = SparkEnv.get.shuffleFetcher.fetch(shuffleId, partition.index,
+        val iter = SparkEnv.get.shuffleFetcher.fetch[Product2[K, V]](shuffleId, partition.index,
           context.taskMetrics, serializer)
-        for (t <- iter)
-          op(t.asInstanceOf[(K, V)])
+        iter.foreach(op)
       }
     }
     // the first dep is rdd1; add all values to the map
diff --git a/core/src/main/scala/spark/rdd/UnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
index 2776826f18..ae8a9f36a6 100644
--- a/core/src/main/scala/spark/rdd/UnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/UnionRDD.scala
@@ -15,10 +15,10 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
 import scala.collection.mutable.ArrayBuffer
-import spark.{Dependency, RangeDependency, RDD, SparkContext, Partition, TaskContext}
+import org.apache.spark.{Dependency, RangeDependency, SparkContext, Partition, TaskContext}
 import java.io.{ObjectOutputStream, IOException}
 
 private[spark] class UnionPartition[T: ClassManifest](idx: Int, rdd: RDD[T], splitIndex: Int)
diff --git a/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
index 6a4fa13ad6..31e6fd519d 100644
--- a/core/src/main/scala/spark/rdd/ZippedPartitionsRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedPartitionsRDD.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
-import spark.{Utils, OneToOneDependency, RDD, SparkContext, Partition, TaskContext}
+import org.apache.spark.{OneToOneDependency, SparkContext, Partition, TaskContext}
 import java.io.{ObjectOutputStream, IOException}
 
 private[spark] class ZippedPartitionsPartition(
@@ -55,27 +55,15 @@ abstract class ZippedPartitionsBaseRDD[V: ClassManifest](
   }
 
   override def getPreferredLocations(s: Partition): Seq[String] = {
-    // Note that as number of rdd's increase and/or number of slaves in cluster increase, the computed preferredLocations below
-    // become diminishingly small : so we might need to look at alternate strategies to alleviate this.
-    // If there are no (or very small number of preferred locations), we will end up transferred the blocks to 'any' node in the
-    // cluster - paying with n/w and cache cost.
-    // Maybe pick a node which figures max amount of time ?
-    // Choose node which is hosting 'larger' of some subset of blocks ?
-    // Look at rack locality to ensure chosen host is atleast rack local to both hosting node ?, etc (would be good to defer this if possible)
-    val splits = s.asInstanceOf[ZippedPartitionsPartition].partitions
-    val rddSplitZip = rdds.zip(splits)
-
-    // exact match.
-    val exactMatchPreferredLocations = rddSplitZip.map(x => x._1.preferredLocations(x._2))
-    val exactMatchLocations = exactMatchPreferredLocations.reduce((x, y) => x.intersect(y))
-
-    // Remove exact match and then do host local match.
-    val exactMatchHosts = exactMatchLocations.map(Utils.parseHostPort(_)._1)
-    val matchPreferredHosts = exactMatchPreferredLocations.map(locs => locs.map(Utils.parseHostPort(_)._1))
-      .reduce((x, y) => x.intersect(y))
-    val otherNodeLocalLocations = matchPreferredHosts.filter { s => !exactMatchHosts.contains(s) }
-
-    otherNodeLocalLocations ++ exactMatchLocations
+    val parts = s.asInstanceOf[ZippedPartitionsPartition].partitions
+    val prefs = rdds.zip(parts).map { case (rdd, p) => rdd.preferredLocations(p) }
+    // Check whether there are any hosts that match all RDDs; otherwise return the union
+    val exactMatchLocations = prefs.reduce((x, y) => x.intersect(y))
+    if (!exactMatchLocations.isEmpty) {
+      exactMatchLocations
+    } else {
+      prefs.flatten.distinct
+    }
   }
 
   override def clearDependencies() {
diff --git a/core/src/main/scala/spark/rdd/ZippedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ZippedRDD.scala
index b1c43b3195..567b67dfee 100644
--- a/core/src/main/scala/spark/rdd/ZippedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/ZippedRDD.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.rdd
+package org.apache.spark.rdd
 
-import spark.{Utils, OneToOneDependency, RDD, SparkContext, Partition, TaskContext}
+import org.apache.spark.{OneToOneDependency, SparkContext, Partition, TaskContext}
 import java.io.{ObjectOutputStream, IOException}
 
 
@@ -65,27 +65,16 @@ class ZippedRDD[T: ClassManifest, U: ClassManifest](
   }
 
   override def getPreferredLocations(s: Partition): Seq[String] = {
-    // Note that as number of slaves in cluster increase, the computed preferredLocations can become small : so we might need
-    // to look at alternate strategies to alleviate this. (If there are no (or very small number of preferred locations), we
-    // will end up transferred the blocks to 'any' node in the cluster - paying with n/w and cache cost.
-    // Maybe pick one or the other ? (so that atleast one block is local ?).
-    // Choose node which is hosting 'larger' of the blocks ?
-    // Look at rack locality to ensure chosen host is atleast rack local to both hosting node ?, etc (would be good to defer this if possible)
     val (partition1, partition2) = s.asInstanceOf[ZippedPartition[T, U]].partitions
     val pref1 = rdd1.preferredLocations(partition1)
     val pref2 = rdd2.preferredLocations(partition2)
-
-    // exact match - instance local and host local.
+    // Check whether there are any hosts that match both RDDs; otherwise return the union
     val exactMatchLocations = pref1.intersect(pref2)
-
-    // remove locations which are already handled via exactMatchLocations, and intersect where both partitions are node local.
-    val otherNodeLocalPref1 = pref1.filter(loc => ! exactMatchLocations.contains(loc)).map(loc => Utils.parseHostPort(loc)._1)
-    val otherNodeLocalPref2 = pref2.filter(loc => ! exactMatchLocations.contains(loc)).map(loc => Utils.parseHostPort(loc)._1)
-    val otherNodeLocalLocations = otherNodeLocalPref1.intersect(otherNodeLocalPref2)
-
-
-    // Can have mix of instance local (hostPort) and node local (host) locations as preference !
-    exactMatchLocations ++ otherNodeLocalLocations
+    if (!exactMatchLocations.isEmpty) {
+      exactMatchLocations
+    } else {
+      (pref1 ++ pref2).distinct
+    }
   }
 
   override def clearDependencies() {
diff --git a/core/src/main/scala/spark/scheduler/ActiveJob.scala b/core/src/main/scala/org/apache/spark/scheduler/ActiveJob.scala
index 71cc94edb6..0b04607d01 100644
--- a/core/src/main/scala/spark/scheduler/ActiveJob.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ActiveJob.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
-import spark.TaskContext
+import org.apache.spark.TaskContext
 
 import java.util.Properties
 
@@ -25,7 +25,7 @@ import java.util.Properties
  * Tracks information about an active job in the DAGScheduler.
  */
 private[spark] class ActiveJob(
-    val runId: Int,
+    val jobId: Int,
     val finalStage: Stage,
     val func: (TaskContext, Iterator[_]) => _,
     val partitions: Array[Int],
diff --git a/core/src/main/scala/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 29e879aa42..92add5b073 100644
--- a/core/src/main/scala/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -15,29 +15,40 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
-import cluster.TaskInfo
-import java.util.concurrent.atomic.AtomicInteger
-import java.util.concurrent.LinkedBlockingQueue
-import java.util.concurrent.TimeUnit
+import java.io.NotSerializableException
 import java.util.Properties
+import java.util.concurrent.{LinkedBlockingQueue, TimeUnit}
+import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
 
-import spark._
-import spark.executor.TaskMetrics
-import spark.partial.ApproximateActionListener
-import spark.partial.ApproximateEvaluator
-import spark.partial.PartialResult
-import spark.storage.{BlockManager, BlockManagerMaster}
-import spark.util.{MetadataCleaner, TimeStampedHashMap}
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.partial.{ApproximateActionListener, ApproximateEvaluator, PartialResult}
+import org.apache.spark.scheduler.cluster.TaskInfo
+import org.apache.spark.storage.{BlockManager, BlockManagerMaster}
+import org.apache.spark.util.{MetadataCleaner, TimeStampedHashMap}
 
 /**
- * A Scheduler subclass that implements stage-oriented scheduling. It computes a DAG of stages for
- * each job, keeps track of which RDDs and stage outputs are materialized, and computes a minimal
- * schedule to run the job. Subclasses only need to implement the code to send a task to the cluster
- * and to report fetch failures (the submitTasks method, and code to add CompletionEvents).
+ * The high-level scheduling layer that implements stage-oriented scheduling. It computes a DAG of
+ * stages for each job, keeps track of which RDDs and stage outputs are materialized, and finds a
+ * minimal schedule to run the job. It then submits stages as TaskSets to an underlying
+ * TaskScheduler implementation that runs them on the cluster.
+ *
+ * In addition to coming up with a DAG of stages, this class also determines the preferred
+ * locations to run each task on, based on the current cache status, and passes these to the
+ * low-level TaskScheduler. Furthermore, it handles failures due to shuffle output files being
+ * lost, in which case old stages may need to be resubmitted. Failures *within* a stage that are
+ * not caused by shuffie file loss are handled by the TaskScheduler, which will retry each task
+ * a small number of times before cancelling the whole stage.
+ *
+ * THREADING: This class runs all its logic in a single thread executing the run() method, to which
+ * events are submitted using a synchonized queue (eventQueue). The public API methods, such as
+ * runJob, taskEnded and executorLost, post events asynchronously to this queue. All other methods
+ * should be private.
  */
 private[spark]
 class DAGScheduler(
@@ -52,6 +63,11 @@ class DAGScheduler(
   }
   taskSched.setListener(this)
 
+  // Called by TaskScheduler to report task's starting.
+  override def taskStarted(task: Task[_], taskInfo: TaskInfo) {
+    eventQueue.put(BeginEvent(task, taskInfo))
+  }
+
   // Called by TaskScheduler to report task completions or failures.
   override def taskEnded(
       task: Task[_],
@@ -69,8 +85,8 @@ class DAGScheduler(
   }
 
   // Called by TaskScheduler when a host is added
-  override def executorGained(execId: String, hostPort: String) {
-    eventQueue.put(ExecutorGained(execId, hostPort))
+  override def executorGained(execId: String, host: String) {
+    eventQueue.put(ExecutorGained(execId, host))
   }
 
   // Called by TaskScheduler to cancel an entire TaskSet due to repeated failures.
@@ -89,27 +105,28 @@ class DAGScheduler(
 
   private val eventQueue = new LinkedBlockingQueue[DAGSchedulerEvent]
 
-  val nextRunId = new AtomicInteger(0)
+  val nextJobId = new AtomicInteger(0)
 
   val nextStageId = new AtomicInteger(0)
 
-  val idToStage = new TimeStampedHashMap[Int, Stage]
+  val stageIdToStage = new TimeStampedHashMap[Int, Stage]
 
   val shuffleToMapStage = new TimeStampedHashMap[Int, Stage]
 
   private[spark] val stageToInfos = new TimeStampedHashMap[Stage, StageInfo]
 
-  private[spark] val sparkListeners = ArrayBuffer[SparkListener]()
+  private val listenerBus = new SparkListenerBus()
 
-  var cacheLocs = new HashMap[Int, Array[List[String]]]
+  // Contains the locations that each RDD's partitions are cached on
+  private val cacheLocs = new HashMap[Int, Array[Seq[TaskLocation]]]
 
-  // For tracking failed nodes, we use the MapOutputTracker's generation number, which is
-  // sent with every task. When we detect a node failing, we note the current generation number
-  // and failed executor, increment it for new tasks, and use this to ignore stray ShuffleMapTask
-  // results.
-  // TODO: Garbage collect information about failure generations when we know there are no more
+  // For tracking failed nodes, we use the MapOutputTracker's epoch number, which is sent with
+  // every task. When we detect a node failing, we note the current epoch number and failed
+  // executor, increment it for new tasks, and use this to ignore stray ShuffleMapTask results.
+  //
+  // TODO: Garbage collect information about failure epochs when we know there are no more
   //       stray messages to detect.
-  val failedGeneration = new HashMap[String, Long]
+  val failedEpoch = new HashMap[String, Long]
 
   val idToActiveJob = new HashMap[Int, ActiveJob]
 
@@ -134,11 +151,17 @@ class DAGScheduler(
     }.start()
   }
 
-  private def getCacheLocs(rdd: RDD[_]): Array[List[String]] = {
+  def addSparkListener(listener: SparkListener) {
+    listenerBus.addListener(listener)
+  }
+
+  private def getCacheLocs(rdd: RDD[_]): Array[Seq[TaskLocation]] = {
     if (!cacheLocs.contains(rdd.id)) {
       val blockIds = rdd.partitions.indices.map(index=> "rdd_%d_%d".format(rdd.id, index)).toArray
-      val locs = BlockManager.blockIdsToExecutorLocations(blockIds, env, blockManagerMaster)
-      cacheLocs(rdd.id) = blockIds.map(locs.getOrElse(_, Nil))
+      val locs = BlockManager.blockIdsToBlockManagers(blockIds, env, blockManagerMaster)
+      cacheLocs(rdd.id) = blockIds.map { id =>
+        locs.getOrElse(id, Nil).map(bm => TaskLocation(bm.host, bm.executorId))
+      }
     }
     cacheLocs(rdd.id)
   }
@@ -149,14 +172,14 @@ class DAGScheduler(
 
   /**
    * Get or create a shuffle map stage for the given shuffle dependency's map side.
-   * The priority value passed in will be used if the stage doesn't already exist with
-   * a lower priority (we assume that priorities always increase across jobs for now).
+   * The jobId value passed in will be used if the stage doesn't already exist with
+   * a lower jobId (jobId always increases across jobs.)
    */
-  private def getShuffleMapStage(shuffleDep: ShuffleDependency[_,_], priority: Int): Stage = {
+  private def getShuffleMapStage(shuffleDep: ShuffleDependency[_,_], jobId: Int): Stage = {
     shuffleToMapStage.get(shuffleDep.shuffleId) match {
       case Some(stage) => stage
       case None =>
-        val stage = newStage(shuffleDep.rdd, Some(shuffleDep), priority)
+        val stage = newStage(shuffleDep.rdd, Some(shuffleDep), jobId)
         shuffleToMapStage(shuffleDep.shuffleId) = stage
         stage
     }
@@ -164,13 +187,13 @@ class DAGScheduler(
 
   /**
    * Create a Stage for the given RDD, either as a shuffle map stage (for a ShuffleDependency) or
-   * as a result stage for the final RDD used directly in an action. The stage will also be given
-   * the provided priority.
+   * as a result stage for the final RDD used directly in an action. The stage will also be
+   * associated with the provided jobId.
    */
   private def newStage(
       rdd: RDD[_],
       shuffleDep: Option[ShuffleDependency[_,_]],
-      priority: Int,
+      jobId: Int,
       callSite: Option[String] = None)
     : Stage =
   {
@@ -181,17 +204,17 @@ class DAGScheduler(
       mapOutputTracker.registerShuffle(shuffleDep.get.shuffleId, rdd.partitions.size)
     }
     val id = nextStageId.getAndIncrement()
-    val stage = new Stage(id, rdd, shuffleDep, getParentStages(rdd, priority), priority, callSite)
-    idToStage(id) = stage
+    val stage = new Stage(id, rdd, shuffleDep, getParentStages(rdd, jobId), jobId, callSite)
+    stageIdToStage(id) = stage
     stageToInfos(stage) = StageInfo(stage)
     stage
   }
 
   /**
    * Get or create the list of parent stages for a given RDD. The stages will be assigned the
-   * provided priority if they haven't already been created with a lower priority.
+   * provided jobId if they haven't already been created with a lower jobId.
    */
-  private def getParentStages(rdd: RDD[_], priority: Int): List[Stage] = {
+  private def getParentStages(rdd: RDD[_], jobId: Int): List[Stage] = {
     val parents = new HashSet[Stage]
     val visited = new HashSet[RDD[_]]
     def visit(r: RDD[_]) {
@@ -202,7 +225,7 @@ class DAGScheduler(
         for (dep <- r.dependencies) {
           dep match {
             case shufDep: ShuffleDependency[_,_] =>
-              parents += getShuffleMapStage(shufDep, priority)
+              parents += getShuffleMapStage(shufDep, jobId)
             case _ =>
               visit(dep.rdd)
           }
@@ -223,7 +246,7 @@ class DAGScheduler(
           for (dep <- rdd.dependencies) {
             dep match {
               case shufDep: ShuffleDependency[_,_] =>
-                val mapStage = getShuffleMapStage(shufDep, stage.priority)
+                val mapStage = getShuffleMapStage(shufDep, stage.jobId)
                 if (!mapStage.isAvailable) {
                   missing += mapStage
                 }
@@ -258,8 +281,9 @@ class DAGScheduler(
     assert(partitions.size > 0)
     val waiter = new JobWaiter(partitions.size, resultHandler)
     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
-    val toSubmit = JobSubmitted(finalRdd, func2, partitions.toArray, allowLocal, callSite, waiter, properties)
-    return (toSubmit, waiter)
+    val toSubmit = JobSubmitted(finalRdd, func2, partitions.toArray, allowLocal, callSite, waiter,
+      properties)
+    (toSubmit, waiter)
   }
 
   def runJob[T, U: ClassManifest](
@@ -283,7 +307,7 @@ class DAGScheduler(
         "Total number of partitions: " + maxPartitions)
     }
 
-    val (toSubmit, waiter) = prepareJob(
+    val (toSubmit: JobSubmitted, waiter: JobWaiter[_]) = prepareJob(
         finalRdd, func, partitions, callSite, allowLocal, resultHandler, properties)
     eventQueue.put(toSubmit)
     waiter.awaitResult() match {
@@ -306,8 +330,8 @@ class DAGScheduler(
     val listener = new ApproximateActionListener(rdd, func, evaluator, timeout)
     val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
     val partitions = (0 until rdd.partitions.size).toArray
-    eventQueue.put(JobSubmitted(rdd, func2, partitions, false, callSite, listener, properties))
-    return listener.awaitResult()    // Will throw an exception if the job fails
+    eventQueue.put(JobSubmitted(rdd, func2, partitions, allowLocal = false, callSite, listener, properties))
+    listener.awaitResult()    // Will throw an exception if the job fails
   }
 
   /**
@@ -317,11 +341,11 @@ class DAGScheduler(
   private[scheduler] def processEvent(event: DAGSchedulerEvent): Boolean = {
     event match {
       case JobSubmitted(finalRDD, func, partitions, allowLocal, callSite, listener, properties) =>
-        val runId = nextRunId.getAndIncrement()
-        val finalStage = newStage(finalRDD, None, runId, Some(callSite))
-        val job = new ActiveJob(runId, finalStage, func, partitions, callSite, listener, properties)
+        val jobId = nextJobId.getAndIncrement()
+        val finalStage = newStage(finalRDD, None, jobId, Some(callSite))
+        val job = new ActiveJob(jobId, finalStage, func, partitions, callSite, listener, properties)
         clearCacheLocs()
-        logInfo("Got job " + job.runId + " (" + callSite + ") with " + partitions.length +
+        logInfo("Got job " + job.jobId + " (" + callSite + ") with " + partitions.length +
                 " output partitions (allowLocal=" + allowLocal + ")")
         logInfo("Final stage: " + finalStage + " (" + finalStage.name + ")")
         logInfo("Parents of final stage: " + finalStage.parents)
@@ -330,37 +354,40 @@ class DAGScheduler(
           // Compute very short actions like first() or take() with no parent stages locally.
           runLocally(job)
         } else {
-          sparkListeners.foreach(_.onJobStart(SparkListenerJobStart(job, properties)))
-          idToActiveJob(runId) = job
+          listenerBus.post(SparkListenerJobStart(job, properties))
+          idToActiveJob(jobId) = job
           activeJobs += job
           resultStageToJob(finalStage) = job
           submitStage(finalStage)
         }
 
-      case ExecutorGained(execId, hostPort) =>
-        handleExecutorGained(execId, hostPort)
+      case ExecutorGained(execId, host) =>
+        handleExecutorGained(execId, host)
 
       case ExecutorLost(execId) =>
         handleExecutorLost(execId)
 
+      case begin: BeginEvent =>
+        listenerBus.post(SparkListenerTaskStart(begin.task, begin.taskInfo))
+
       case completion: CompletionEvent =>
-        sparkListeners.foreach(_.onTaskEnd(SparkListenerTaskEnd(completion.task,
-                               completion.reason, completion.taskInfo, completion.taskMetrics)))
+        listenerBus.post(SparkListenerTaskEnd(
+          completion.task, completion.reason, completion.taskInfo, completion.taskMetrics))
         handleTaskCompletion(completion)
 
       case TaskSetFailed(taskSet, reason) =>
-        abortStage(idToStage(taskSet.stageId), reason)
+        abortStage(stageIdToStage(taskSet.stageId), reason)
 
       case StopDAGScheduler =>
         // Cancel any active jobs
         for (job <- activeJobs) {
           val error = new SparkException("Job cancelled because SparkContext was shut down")
           job.listener.jobFailed(error)
-          sparkListeners.foreach(_.onJobEnd(SparkListenerJobEnd(job, JobFailed(error, None))))
+          listenerBus.post(SparkListenerJobEnd(job, JobFailed(error, None)))
         }
         return true
     }
-    return false
+    false
   }
 
   /**
@@ -372,7 +399,7 @@ class DAGScheduler(
     clearCacheLocs()
     val failed2 = failed.toArray
     failed.clear()
-    for (stage <- failed2.sortBy(_.priority)) {
+    for (stage <- failed2.sortBy(_.jobId)) {
       submitStage(stage)
     }
   }
@@ -390,7 +417,7 @@ class DAGScheduler(
     logTrace("failed: " + failed)
     val waiting2 = waiting.toArray
     waiting.clear()
-    for (stage <- waiting2.sortBy(_.priority)) {
+    for (stage <- waiting2.sortBy(_.jobId)) {
       submitStage(stage)
     }
   }
@@ -409,23 +436,24 @@ class DAGScheduler(
       if (event != null) {
         logDebug("Got event of type " + event.getClass.getName)
       }
-
-      if (event != null) {
-        if (processEvent(event)) {
-          return
+      this.synchronized { // needed in case other threads makes calls into methods of this class
+        if (event != null) {
+          if (processEvent(event)) {
+            return
+          }
         }
-      }
 
-      val time = System.currentTimeMillis() // TODO: use a pluggable clock for testability
-      // Periodically resubmit failed stages if some map output fetches have failed and we have
-      // waited at least RESUBMIT_TIMEOUT. We wait for this short time because when a node fails,
-      // tasks on many other nodes are bound to get a fetch failure, and they won't all get it at
-      // the same time, so we want to make sure we've identified all the reduce tasks that depend
-      // on the failed node.
-      if (failed.size > 0 && time > lastFetchFailureTime + RESUBMIT_TIMEOUT) {
-        resubmitFailedStages()
-      } else {
-        submitWaitingStages()
+        val time = System.currentTimeMillis() // TODO: use a pluggable clock for testability
+        // Periodically resubmit failed stages if some map output fetches have failed and we have
+        // waited at least RESUBMIT_TIMEOUT. We wait for this short time because when a node fails,
+        // tasks on many other nodes are bound to get a fetch failure, and they won't all get it at
+        // the same time, so we want to make sure we've identified all the reduce tasks that depend
+        // on the failed node.
+        if (failed.size > 0 && time > lastFetchFailureTime + RESUBMIT_TIMEOUT) {
+          resubmitFailedStages()
+        } else {
+          submitWaitingStages()
+        }
       }
     }
   }
@@ -437,7 +465,7 @@ class DAGScheduler(
    */
   protected def runLocally(job: ActiveJob) {
     logInfo("Computing the requested partition locally")
-    new Thread("Local computation of job " + job.runId) {
+    new Thread("Local computation of job " + job.jobId) {
       override def run() {
         runLocallyWithinThread(job)
       }
@@ -497,20 +525,36 @@ class DAGScheduler(
     } else {
       // This is a final stage; figure out its job's missing partitions
       val job = resultStageToJob(stage)
-      for (id <- 0 until job.numPartitions if (!job.finished(id))) {
+      for (id <- 0 until job.numPartitions if !job.finished(id)) {
         val partition = job.partitions(id)
         val locs = getPreferredLocs(stage.rdd, partition)
         tasks += new ResultTask(stage.id, stage.rdd, job.func, partition, locs, id)
       }
     }
+    // must be run listener before possible NotSerializableException
+    // should be "StageSubmitted" first and then "JobEnded"
+    val properties = idToActiveJob(stage.jobId).properties
+    listenerBus.post(SparkListenerStageSubmitted(stage, tasks.size, properties))
+
     if (tasks.size > 0) {
-      sparkListeners.foreach(_.onStageSubmitted(SparkListenerStageSubmitted(stage, tasks.size)))
+      // Preemptively serialize a task to make sure it can be serialized. We are catching this
+      // exception here because it would be fairly hard to catch the non-serializable exception
+      // down the road, where we have several different implementations for local scheduler and
+      // cluster schedulers.
+      try {
+        SparkEnv.get.closureSerializer.newInstance().serialize(tasks.head)
+      } catch {
+        case e: NotSerializableException =>
+          abortStage(stage, e.toString)
+          running -= stage
+          return
+      }
+
       logInfo("Submitting " + tasks.size + " missing tasks from " + stage + " (" + stage.rdd + ")")
       myPending ++= tasks
       logDebug("New pending tasks: " + myPending)
-      val properties = idToActiveJob(stage.priority).properties
       taskSched.submitTasks(
-        new TaskSet(tasks.toArray, stage.id, stage.newAttemptId(), stage.priority, properties))
+        new TaskSet(tasks.toArray, stage.id, stage.newAttemptId(), stage.jobId, properties))
       if (!stage.submissionTime.isDefined) {
         stage.submissionTime = Some(System.currentTimeMillis())
       }
@@ -527,7 +571,7 @@ class DAGScheduler(
    */
   private def handleTaskCompletion(event: CompletionEvent) {
     val task = event.task
-    val stage = idToStage(task.stageId)
+    val stage = stageIdToStage(task.stageId)
 
     def markStageAsFinished(stage: Stage) = {
       val serviceTime = stage.submissionTime match {
@@ -536,8 +580,7 @@ class DAGScheduler(
       }
       logInfo("%s (%s) finished in %s s".format(stage, stage.name, serviceTime))
       stage.completionTime = Some(System.currentTimeMillis)
-      val stageComp = StageCompleted(stageToInfos(stage))
-      sparkListeners.foreach{_.onStageCompleted(stageComp)}
+      listenerBus.post(StageCompleted(stageToInfos(stage)))
       running -= stage
     }
     event.reason match {
@@ -557,11 +600,11 @@ class DAGScheduler(
                   job.numFinished += 1
                   // If the whole job has finished, remove it
                   if (job.numFinished == job.numPartitions) {
-                    idToActiveJob -= stage.priority
+                    idToActiveJob -= stage.jobId
                     activeJobs -= job
                     resultStageToJob -= stage
                     markStageAsFinished(stage)
-                    sparkListeners.foreach(_.onJobEnd(SparkListenerJobEnd(job, JobSucceeded)))
+                    listenerBus.post(SparkListenerJobEnd(job, JobSucceeded))
                   }
                   job.listener.taskSucceeded(rt.outputId, event.result)
                 }
@@ -573,7 +616,7 @@ class DAGScheduler(
             val status = event.result.asInstanceOf[MapStatus]
             val execId = status.location.executorId
             logDebug("ShuffleMapTask finished on " + execId)
-            if (failedGeneration.contains(execId) && smt.generation <= failedGeneration(execId)) {
+            if (failedEpoch.contains(execId) && smt.epoch <= failedEpoch(execId)) {
               logInfo("Ignoring possibly bogus ShuffleMapTask completion from " + execId)
             } else {
               stage.addOutputLoc(smt.partition, status)
@@ -585,16 +628,16 @@ class DAGScheduler(
               logInfo("waiting: " + waiting)
               logInfo("failed: " + failed)
               if (stage.shuffleDep != None) {
-                // We supply true to increment the generation number here in case this is a
+                // We supply true to increment the epoch number here in case this is a
                 // recomputation of the map outputs. In that case, some nodes may have cached
                 // locations with holes (from when we detected the error) and will need the
-                // generation incremented to refetch them.
-                // TODO: Only increment the generation number if this is not the first time
+                // epoch incremented to refetch them.
+                // TODO: Only increment the epoch number if this is not the first time
                 //       we registered these map outputs.
                 mapOutputTracker.registerMapOutputs(
                   stage.shuffleDep.get.shuffleId,
                   stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray,
-                  true)
+                  changeEpoch = true)
               }
               clearCacheLocs()
               if (stage.outputLocs.count(_ == Nil) != 0) {
@@ -628,7 +671,7 @@ class DAGScheduler(
 
       case FetchFailed(bmAddress, shuffleId, mapId, reduceId) =>
         // Mark the stage that the reducer was in as unrunnable
-        val failedStage = idToStage(task.stageId)
+        val failedStage = stageIdToStage(task.stageId)
         running -= failedStage
         failed += failedStage
         // TODO: Cancel running tasks in the stage
@@ -648,7 +691,7 @@ class DAGScheduler(
         lastFetchFailureTime = System.currentTimeMillis() // TODO: Use pluggable clock
         // TODO: mark the executor as failed only if there were lots of fetch failures on it
         if (bmAddress != null) {
-          handleExecutorLost(bmAddress.executorId, Some(task.generation))
+          handleExecutorLost(bmAddress.executorId, Some(task.epoch))
         }
 
       case ExceptionFailure(className, description, stackTrace, metrics) =>
@@ -656,7 +699,7 @@ class DAGScheduler(
 
       case other =>
         // Unrecognized failure - abort all jobs depending on this stage
-        abortStage(idToStage(task.stageId), task + " failed: " + other)
+        abortStage(stageIdToStage(task.stageId), task + " failed: " + other)
     }
   }
 
@@ -664,36 +707,36 @@ class DAGScheduler(
    * Responds to an executor being lost. This is called inside the event loop, so it assumes it can
    * modify the scheduler's internal state. Use executorLost() to post a loss event from outside.
    *
-   * Optionally the generation during which the failure was caught can be passed to avoid allowing
+   * Optionally the epoch during which the failure was caught can be passed to avoid allowing
    * stray fetch failures from possibly retriggering the detection of a node as lost.
    */
-  private def handleExecutorLost(execId: String, maybeGeneration: Option[Long] = None) {
-    val currentGeneration = maybeGeneration.getOrElse(mapOutputTracker.getGeneration)
-    if (!failedGeneration.contains(execId) || failedGeneration(execId) < currentGeneration) {
-      failedGeneration(execId) = currentGeneration
-      logInfo("Executor lost: %s (generation %d)".format(execId, currentGeneration))
+  private def handleExecutorLost(execId: String, maybeEpoch: Option[Long] = None) {
+    val currentEpoch = maybeEpoch.getOrElse(mapOutputTracker.getEpoch)
+    if (!failedEpoch.contains(execId) || failedEpoch(execId) < currentEpoch) {
+      failedEpoch(execId) = currentEpoch
+      logInfo("Executor lost: %s (epoch %d)".format(execId, currentEpoch))
       blockManagerMaster.removeExecutor(execId)
       // TODO: This will be really slow if we keep accumulating shuffle map stages
       for ((shuffleId, stage) <- shuffleToMapStage) {
         stage.removeOutputsOnExecutor(execId)
         val locs = stage.outputLocs.map(list => if (list.isEmpty) null else list.head).toArray
-        mapOutputTracker.registerMapOutputs(shuffleId, locs, true)
+        mapOutputTracker.registerMapOutputs(shuffleId, locs, changeEpoch = true)
       }
       if (shuffleToMapStage.isEmpty) {
-        mapOutputTracker.incrementGeneration()
+        mapOutputTracker.incrementEpoch()
       }
       clearCacheLocs()
     } else {
       logDebug("Additional executor lost message for " + execId +
-               "(generation " + currentGeneration + ")")
+               "(epoch " + currentEpoch + ")")
     }
   }
 
-  private def handleExecutorGained(execId: String, hostPort: String) {
-    // remove from failedGeneration(execId) ?
-    if (failedGeneration.contains(execId)) {
-      logInfo("Host gained which was in lost list earlier: " + hostPort)
-      failedGeneration -= execId
+  private def handleExecutorGained(execId: String, host: String) {
+    // remove from failedEpoch(execId) ?
+    if (failedEpoch.contains(execId)) {
+      logInfo("Host gained which was in lost list earlier: " + host)
+      failedEpoch -= execId
     }
   }
 
@@ -708,8 +751,8 @@ class DAGScheduler(
       val job = resultStageToJob(resultStage)
       val error = new SparkException("Job failed: " + reason)
       job.listener.jobFailed(error)
-      sparkListeners.foreach(_.onJobEnd(SparkListenerJobEnd(job, JobFailed(error, Some(failedStage)))))
-      idToActiveJob -= resultStage.priority
+      listenerBus.post(SparkListenerJobEnd(job, JobFailed(error, Some(failedStage))))
+      idToActiveJob -= resultStage.jobId
       activeJobs -= job
       resultStageToJob -= resultStage
     }
@@ -733,7 +776,7 @@ class DAGScheduler(
         for (dep <- rdd.dependencies) {
           dep match {
             case shufDep: ShuffleDependency[_,_] =>
-              val mapStage = getShuffleMapStage(shufDep, stage.priority)
+              val mapStage = getShuffleMapStage(shufDep, stage.jobId)
               if (!mapStage.isAvailable) {
                 visitedStages += mapStage
                 visit(mapStage.rdd)
@@ -748,16 +791,23 @@ class DAGScheduler(
     visitedRdds.contains(target.rdd)
   }
 
-  private def getPreferredLocs(rdd: RDD[_], partition: Int): List[String] = {
+  /**
+   * Synchronized method that might be called from other threads.
+   * @param rdd whose partitions are to be looked at
+   * @param partition to lookup locality information for
+   * @return list of machines that are preferred by the partition
+   */
+  private[spark]
+  def getPreferredLocs(rdd: RDD[_], partition: Int): Seq[TaskLocation] = synchronized {
     // If the partition is cached, return the cache locations
     val cached = getCacheLocs(rdd)(partition)
-    if (cached != Nil) {
+    if (!cached.isEmpty) {
       return cached
     }
     // If the RDD has some placement preferences (as is the case for input RDDs), get those
     val rddPrefs = rdd.preferredLocations(rdd.partitions(partition)).toList
-    if (rddPrefs != Nil) {
-      return rddPrefs
+    if (!rddPrefs.isEmpty) {
+      return rddPrefs.map(host => TaskLocation(host))
     }
     // If the RDD has narrow dependencies, pick the first partition of the first narrow dep
     // that has any placement preferences. Ideally we would choose based on transfer sizes,
@@ -771,13 +821,13 @@ class DAGScheduler(
         }
       case _ =>
     })
-    return Nil
+    Nil
   }
 
   private def cleanup(cleanupTime: Long) {
-    var sizeBefore = idToStage.size
-    idToStage.clearOldValues(cleanupTime)
-    logInfo("idToStage " + sizeBefore + " --> " + idToStage.size)
+    var sizeBefore = stageIdToStage.size
+    stageIdToStage.clearOldValues(cleanupTime)
+    logInfo("stageIdToStage " + sizeBefore + " --> " + stageIdToStage.size)
 
     sizeBefore = shuffleToMapStage.size
     shuffleToMapStage.clearOldValues(cleanupTime)
diff --git a/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
index 506c87f65b..0d99670648 100644
--- a/core/src/main/scala/spark/scheduler/DAGSchedulerEvent.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
@@ -15,15 +15,16 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
 import java.util.Properties
 
-import spark.scheduler.cluster.TaskInfo
+import org.apache.spark.scheduler.cluster.TaskInfo
 import scala.collection.mutable.Map
 
-import spark._
-import spark.executor.TaskMetrics
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.executor.TaskMetrics
 
 /**
  * Types of events that can be handled by the DAGScheduler. The DAGScheduler uses an event queue
@@ -43,6 +44,8 @@ private[spark] case class JobSubmitted(
     properties: Properties = null)
   extends DAGSchedulerEvent
 
+private[spark] case class BeginEvent(task: Task[_], taskInfo: TaskInfo) extends DAGSchedulerEvent
+
 private[spark] case class CompletionEvent(
     task: Task[_],
     reason: TaskEndReason,
@@ -52,9 +55,7 @@ private[spark] case class CompletionEvent(
     taskMetrics: TaskMetrics)
   extends DAGSchedulerEvent
 
-private[spark] case class ExecutorGained(execId: String, hostPort: String) extends DAGSchedulerEvent {
-  Utils.checkHostPort(hostPort, "Required hostport")
-}
+private[spark] case class ExecutorGained(execId: String, host: String) extends DAGSchedulerEvent
 
 private[spark] case class ExecutorLost(execId: String) extends DAGSchedulerEvent
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala
new file mode 100644
index 0000000000..ce0dc9093d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerSource.scala
@@ -0,0 +1,30 @@
+package org.apache.spark.scheduler
+
+import com.codahale.metrics.{Gauge,MetricRegistry}
+
+import org.apache.spark.metrics.source.Source
+
+private[spark] class DAGSchedulerSource(val dagScheduler: DAGScheduler) extends Source {
+  val metricRegistry = new MetricRegistry()
+  val sourceName = "DAGScheduler"
+
+  metricRegistry.register(MetricRegistry.name("stage", "failedStages", "number"), new Gauge[Int] {
+    override def getValue: Int = dagScheduler.failed.size
+  })
+
+  metricRegistry.register(MetricRegistry.name("stage", "runningStages", "number"), new Gauge[Int] {
+    override def getValue: Int = dagScheduler.running.size
+  })
+
+  metricRegistry.register(MetricRegistry.name("stage", "waitingStages", "number"), new Gauge[Int] {
+    override def getValue: Int = dagScheduler.waiting.size
+  })
+
+  metricRegistry.register(MetricRegistry.name("job", "allJobs", "number"), new Gauge[Int] {
+    override def getValue: Int = dagScheduler.nextJobId.get()
+  })
+
+  metricRegistry.register(MetricRegistry.name("job", "activeJobs", "number"), new Gauge[Int] {
+    override def getValue: Int = dagScheduler.activeJobs.size
+  })
+}
diff --git a/core/src/main/scala/spark/scheduler/InputFormatInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
index 65f8c3200e..370ccd183c 100644
--- a/core/src/main/scala/spark/scheduler/InputFormatInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
-import spark.Logging
+import org.apache.spark.{Logging, SparkEnv}
 import scala.collection.immutable.Set
 import org.apache.hadoop.mapred.{FileInputFormat, JobConf}
 import org.apache.hadoop.security.UserGroupInformation
@@ -26,7 +26,6 @@ import org.apache.hadoop.mapreduce.Job
 import org.apache.hadoop.conf.Configuration
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 import scala.collection.JavaConversions._
-import spark.deploy.SparkHadoopUtil
 
 
 /**
@@ -88,8 +87,9 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl
 
   // This method does not expect failures, since validate has already passed ...
   private def prefLocsFromMapreduceInputFormat(): Set[SplitInfo] = {
+    val env = SparkEnv.get
     val conf = new JobConf(configuration)
-    SparkHadoopUtil.addCredentials(conf);
+    env.hadoop.addCredentials(conf)
     FileInputFormat.setInputPaths(conf, path)
 
     val instance: org.apache.hadoop.mapreduce.InputFormat[_, _] =
@@ -108,8 +108,9 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl
 
   // This method does not expect failures, since validate has already passed ...
   private def prefLocsFromMapredInputFormat(): Set[SplitInfo] = {
+    val env = SparkEnv.get
     val jobConf = new JobConf(configuration)
-    SparkHadoopUtil.addCredentials(jobConf);
+    env.hadoop.addCredentials(jobConf)
     FileInputFormat.setInputPaths(jobConf, path)
 
     val instance: org.apache.hadoop.mapred.InputFormat[_, _] =
diff --git a/core/src/main/scala/spark/scheduler/JobListener.scala b/core/src/main/scala/org/apache/spark/scheduler/JobListener.scala
index af108b8fec..50c2b9acd6 100644
--- a/core/src/main/scala/spark/scheduler/JobListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobListener.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
 /**
  * Interface used to listen for job completion or failure events after submitting a job to the
diff --git a/core/src/main/scala/spark/scheduler/JobLogger.scala b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
index 85b5ddd4a8..c8b78bf00a 100644
--- a/core/src/main/scala/spark/scheduler/JobLogger.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
 import java.io.PrintWriter
 import java.io.File
@@ -23,11 +23,14 @@ import java.io.FileNotFoundException
 import java.text.SimpleDateFormat
 import java.util.{Date, Properties}
 import java.util.concurrent.LinkedBlockingQueue
+
 import scala.collection.mutable.{Map, HashMap, ListBuffer}
 import scala.io.Source
-import spark._
-import spark.executor.TaskMetrics
-import spark.scheduler.cluster.TaskInfo
+
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler.cluster.TaskInfo
 
 // Used to record runtime information for each job, including RDD graph 
 // tasks' start/stop shuffle information and information from outside
@@ -53,29 +56,6 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging {
   def getJobIDToStages = jobIDToStages
   def getEventQueue = eventQueue
   
-  new Thread("JobLogger") {
-    setDaemon(true)
-    override def run() {
-      while (true) {
-        val event = eventQueue.take
-        logDebug("Got event of type " + event.getClass.getName)
-        event match {
-          case SparkListenerJobStart(job, properties) =>
-            processJobStartEvent(job, properties)
-          case SparkListenerStageSubmitted(stage, taskSize) =>
-            processStageSubmittedEvent(stage, taskSize)
-          case StageCompleted(stageInfo) =>
-            processStageCompletedEvent(stageInfo)
-          case SparkListenerJobEnd(job, result) =>
-            processJobEndEvent(job, result)
-          case SparkListenerTaskEnd(task, reason, taskInfo, taskMetrics) =>
-            processTaskEndEvent(task, reason, taskInfo, taskMetrics)
-          case _ =>
-        }
-      }
-    }
-  }.start()
-
   // Create a folder for log files, the folder's name is the creation time of the jobLogger
   protected def createLogDir() {
     val dir = new File(logDir + "/" + logDirName + "/")
@@ -123,7 +103,7 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging {
     stageIDToJobID.get(stageID).foreach(jobID => jobLogInfo(jobID, info, withTime))
 
   protected def buildJobDep(jobID: Int, stage: Stage) {
-    if (stage.priority == jobID) {
+    if (stage.jobId == jobID) {
       jobIDToStages.get(jobID) match {
         case Some(stageList) => stageList += stage
         case None => val stageList = new  ListBuffer[Stage]
@@ -199,12 +179,12 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging {
     }else{
       stageInfo = "STAGE_ID=" + stage.id + " RESULT_STAGE"
     }
-    if (stage.priority == jobID) {
+    if (stage.jobId == jobID) {
       jobLogInfo(jobID, indentString(indent) + stageInfo, false)
       recordRddInStageGraph(jobID, stage.rdd, indent)
       stage.parents.foreach(recordStageDepGraph(jobID, _, indent + 2))
     } else
-      jobLogInfo(jobID, indentString(indent) + stageInfo + " JOB_ID=" + stage.priority, false)
+      jobLogInfo(jobID, indentString(indent) + stageInfo + " JOB_ID=" + stage.jobId, false)
   }
   
   // Record task metrics into job log files
@@ -236,37 +216,32 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging {
   }
   
   override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) {
-    eventQueue.put(stageSubmitted)
-  }
-
-  protected def processStageSubmittedEvent(stage: Stage, taskSize: Int) {
-    stageLogInfo(stage.id, "STAGE_ID=" + stage.id + " STATUS=SUBMITTED" + " TASK_SIZE=" + taskSize)
+    stageLogInfo(
+      stageSubmitted.stage.id,
+      "STAGE_ID=%d STATUS=SUBMITTED TASK_SIZE=%d".format(
+        stageSubmitted.stage.id, stageSubmitted.taskSize))
   }
   
   override def onStageCompleted(stageCompleted: StageCompleted) {
-    eventQueue.put(stageCompleted)
-  }
-
-  protected def processStageCompletedEvent(stageInfo: StageInfo) {
-    stageLogInfo(stageInfo.stage.id, "STAGE_ID=" + 
-                 stageInfo.stage.id + " STATUS=COMPLETED")
+    stageLogInfo(
+      stageCompleted.stageInfo.stage.id,
+      "STAGE_ID=%d STATUS=COMPLETED".format(stageCompleted.stageInfo.stage.id))
     
   }
-  
-  override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
-    eventQueue.put(taskEnd)
-  }
 
-  protected def processTaskEndEvent(task: Task[_], reason: TaskEndReason, 
-      taskInfo: TaskInfo, taskMetrics: TaskMetrics) {
+  override def onTaskStart(taskStart: SparkListenerTaskStart) { }
+
+  override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+    val task = taskEnd.task
+    val taskInfo = taskEnd.taskInfo
     var taskStatus = ""
     task match {
       case resultTask: ResultTask[_, _] => taskStatus = "TASK_TYPE=RESULT_TASK"
       case shuffleMapTask: ShuffleMapTask => taskStatus = "TASK_TYPE=SHUFFLE_MAP_TASK"
     }
-    reason match {
+    taskEnd.reason match {
       case Success => taskStatus += " STATUS=SUCCESS"
-        recordTaskMetrics(task.stageId, taskStatus, taskInfo, taskMetrics)
+        recordTaskMetrics(task.stageId, taskStatus, taskInfo, taskEnd.taskMetrics)
       case Resubmitted => 
         taskStatus += " STATUS=RESUBMITTED TID=" + taskInfo.taskId + 
                       " STAGE_ID=" + task.stageId
@@ -285,39 +260,34 @@ class JobLogger(val logDirName: String) extends SparkListener with Logging {
   }
   
   override def onJobEnd(jobEnd: SparkListenerJobEnd) {
-    eventQueue.put(jobEnd)
-  }
-
-  protected def processJobEndEvent(job: ActiveJob, reason: JobResult) {
-    var info = "JOB_ID=" + job.runId
-    reason match {
+    val job = jobEnd.job
+    var info = "JOB_ID=" + job.jobId
+    jobEnd.jobResult match {
       case JobSucceeded => info += " STATUS=SUCCESS"
       case JobFailed(exception, _) =>
         info += " STATUS=FAILED REASON="
         exception.getMessage.split("\\s+").foreach(info += _ + "_")
       case _ =>
     }
-    jobLogInfo(job.runId, info.substring(0, info.length - 1).toUpperCase)
-    closeLogWriter(job.runId)
+    jobLogInfo(job.jobId, info.substring(0, info.length - 1).toUpperCase)
+    closeLogWriter(job.jobId)
   }
 
   protected def recordJobProperties(jobID: Int, properties: Properties) {
     if(properties != null) {
-      val annotation = properties.getProperty("spark.job.annotation", "")
-      jobLogInfo(jobID, annotation, false)
+      val description = properties.getProperty(SparkContext.SPARK_JOB_DESCRIPTION, "")
+      jobLogInfo(jobID, description, false)
     }
   }
 
   override def onJobStart(jobStart: SparkListenerJobStart) {
-    eventQueue.put(jobStart)
-  }
- 
-  protected def processJobStartEvent(job: ActiveJob, properties: Properties) {
-    createLogWriter(job.runId)
-    recordJobProperties(job.runId, properties)
-    buildJobDep(job.runId, job.finalStage)
-    recordStageDep(job.runId)
-    recordStageDepGraph(job.runId, job.finalStage) 
-    jobLogInfo(job.runId, "JOB_ID=" + job.runId + " STATUS=STARTED")
+    val job = jobStart.job
+    val properties = jobStart.properties
+    createLogWriter(job.jobId)
+    recordJobProperties(job.jobId, properties)
+    buildJobDep(job.jobId, job.finalStage)
+    recordStageDep(job.jobId)
+    recordStageDepGraph(job.jobId, job.finalStage)
+    jobLogInfo(job.jobId, "JOB_ID=" + job.jobId + " STATUS=STARTED")
   }
 }
diff --git a/core/src/main/scala/spark/scheduler/JobResult.scala b/core/src/main/scala/org/apache/spark/scheduler/JobResult.scala
index a61b335152..c381348a8d 100644
--- a/core/src/main/scala/spark/scheduler/JobResult.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobResult.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
 /**
  * A result of a job in the DAGScheduler.
diff --git a/core/src/main/scala/spark/scheduler/JobWaiter.scala b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
index 69cd161c1f..200d881799 100644
--- a/core/src/main/scala/spark/scheduler/JobWaiter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
 import scala.collection.mutable.ArrayBuffer
 
diff --git a/core/src/main/scala/spark/scheduler/MapStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
index 2f6a68ee85..1c61687f28 100644
--- a/core/src/main/scala/spark/scheduler/MapStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MapStatus.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
-import spark.storage.BlockManagerId
+import org.apache.spark.storage.BlockManagerId
 import java.io.{ObjectOutput, ObjectInput, Externalizable}
 
 /**
diff --git a/core/src/main/scala/spark/scheduler/ResultTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
index 361b1e6b91..2b007cbe82 100644
--- a/core/src/main/scala/spark/scheduler/ResultTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
@@ -15,13 +15,16 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
-import spark._
 import java.io._
-import util.{MetadataCleaner, TimeStampedHashMap}
 import java.util.zip.{GZIPInputStream, GZIPOutputStream}
 
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.RDDCheckpointData
+import org.apache.spark.util.{MetadataCleaner, TimeStampedHashMap}
+
 private[spark] object ResultTask {
 
   // A simple map between the stage id to the serialized byte array of a task.
@@ -51,15 +54,13 @@ private[spark] object ResultTask {
   }
 
   def deserializeInfo(stageId: Int, bytes: Array[Byte]): (RDD[_], (TaskContext, Iterator[_]) => _) = {
-    synchronized {
-      val loader = Thread.currentThread.getContextClassLoader
-      val in = new GZIPInputStream(new ByteArrayInputStream(bytes))
-      val ser = SparkEnv.get.closureSerializer.newInstance
-      val objIn = ser.deserializeStream(in)
-      val rdd = objIn.readObject().asInstanceOf[RDD[_]]
-      val func = objIn.readObject().asInstanceOf[(TaskContext, Iterator[_]) => _]
-      return (rdd, func)
-    }
+    val loader = Thread.currentThread.getContextClassLoader
+    val in = new GZIPInputStream(new ByteArrayInputStream(bytes))
+    val ser = SparkEnv.get.closureSerializer.newInstance
+    val objIn = ser.deserializeStream(in)
+    val rdd = objIn.readObject().asInstanceOf[RDD[_]]
+    val func = objIn.readObject().asInstanceOf[(TaskContext, Iterator[_]) => _]
+    return (rdd, func)
   }
 
   def clearCache() {
@@ -75,7 +76,7 @@ private[spark] class ResultTask[T, U](
     var rdd: RDD[T],
     var func: (TaskContext, Iterator[T]) => U,
     var partition: Int,
-    @transient locs: Seq[String],
+    @transient locs: Seq[TaskLocation],
     val outputId: Int)
   extends Task[U](stageId) with Externalizable {
 
@@ -87,11 +88,8 @@ private[spark] class ResultTask[T, U](
     rdd.partitions(partition)
   }
 
-  private val preferredLocs: Seq[String] = if (locs == null) Nil else locs.toSet.toSeq
-
-  {
-    // DEBUG code
-    preferredLocs.foreach (hostPort => Utils.checkHost(Utils.parseHostPort(hostPort)._1, "preferredLocs : " + preferredLocs))
+  @transient private val preferredLocs: Seq[TaskLocation] = {
+    if (locs == null) Nil else locs.toSet.toSeq
   }
 
   override def run(attemptId: Long): U = {
@@ -104,7 +102,7 @@ private[spark] class ResultTask[T, U](
     }
   }
 
-  override def preferredLocations: Seq[String] = preferredLocs
+  override def preferredLocations: Seq[TaskLocation] = preferredLocs
 
   override def toString = "ResultTask(" + stageId + ", " + partition + ")"
 
@@ -118,6 +116,7 @@ private[spark] class ResultTask[T, U](
       out.write(bytes)
       out.writeInt(partition)
       out.writeInt(outputId)
+      out.writeLong(epoch)
       out.writeObject(split)
     }
   }
@@ -132,6 +131,7 @@ private[spark] class ResultTask[T, U](
     func = func_.asInstanceOf[(TaskContext, Iterator[T]) => U]
     partition = in.readInt()
     val outputId = in.readInt()
+    epoch = in.readLong()
     split = in.readObject().asInstanceOf[Partition]
   }
 }
diff --git a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 1c25605f75..764775fede 100644
--- a/core/src/main/scala/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -15,24 +15,19 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
 import java.io._
-import java.util.{HashMap => JHashMap}
 import java.util.zip.{GZIPInputStream, GZIPOutputStream}
 
-import scala.collection.mutable.{ArrayBuffer, HashMap}
-import scala.collection.JavaConversions._
+import scala.collection.mutable.HashMap
 
-import it.unimi.dsi.fastutil.io.FastBufferedOutputStream
-
-import com.ning.compress.lzf.LZFInputStream
-import com.ning.compress.lzf.LZFOutputStream
-
-import spark._
-import spark.executor.ShuffleWriteMetrics
-import spark.storage._
-import spark.util.{TimeStampedHashMap, MetadataCleaner}
+import org.apache.spark._
+import org.apache.spark.executor.ShuffleWriteMetrics
+import org.apache.spark.storage._
+import org.apache.spark.util.{TimeStampedHashMap, MetadataCleaner}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.RDDCheckpointData
 
 
 private[spark] object ShuffleMapTask {
@@ -95,25 +90,18 @@ private[spark] class ShuffleMapTask(
     var rdd: RDD[_],
     var dep: ShuffleDependency[_,_],
     var partition: Int,
-    @transient private var locs: Seq[String])
+    @transient private var locs: Seq[TaskLocation])
   extends Task[MapStatus](stageId)
   with Externalizable
   with Logging {
 
   protected def this() = this(0, null, null, 0, null)
 
-  @transient private val preferredLocs: Seq[String] = if (locs == null) Nil else locs.toSet.toSeq
-
-  {
-    // DEBUG code
-    preferredLocs.foreach (hostPort => Utils.checkHost(Utils.parseHostPort(hostPort)._1, "preferredLocs : " + preferredLocs))
+  @transient private val preferredLocs: Seq[TaskLocation] = {
+    if (locs == null) Nil else locs.toSet.toSeq
   }
 
-  var split = if (rdd == null) {
-    null
-  } else {
-    rdd.partitions(partition)
-  }
+  var split = if (rdd == null) null else rdd.partitions(partition)
 
   override def writeExternal(out: ObjectOutput) {
     RDDCheckpointData.synchronized {
@@ -123,7 +111,7 @@ private[spark] class ShuffleMapTask(
       out.writeInt(bytes.length)
       out.write(bytes)
       out.writeInt(partition)
-      out.writeLong(generation)
+      out.writeLong(epoch)
       out.writeObject(split)
     }
   }
@@ -137,7 +125,7 @@ private[spark] class ShuffleMapTask(
     rdd = rdd_
     dep = dep_
     partition = in.readInt()
-    generation = in.readLong()
+    epoch = in.readLong()
     split = in.readObject().asInstanceOf[Partition]
   }
 
@@ -159,7 +147,7 @@ private[spark] class ShuffleMapTask(
 
       // Write the map output to its associated buckets.
       for (elem <- rdd.iterator(split, taskContext)) {
-        val pair = elem.asInstanceOf[(Any, Any)]
+        val pair = elem.asInstanceOf[Product2[Any, Any]]
         val bucketId = dep.partitioner.getPartition(pair._1)
         buckets.writers(bucketId).write(pair)
       }
@@ -197,7 +185,7 @@ private[spark] class ShuffleMapTask(
     }
   }
 
-  override def preferredLocations: Seq[String] = preferredLocs
+  override def preferredLocations: Seq[TaskLocation] = preferredLocs
 
   override def toString = "ShuffleMapTask(%d, %d)".format(stageId, partition)
 }
diff --git a/core/src/main/scala/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
index 4fb1c5d42d..c3cf4b8907 100644
--- a/core/src/main/scala/spark/scheduler/SparkListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
@@ -15,27 +15,30 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
 import java.util.Properties
-import spark.scheduler.cluster.TaskInfo
-import spark.util.Distribution
-import spark.{Logging, SparkContext, TaskEndReason, Utils}
-import spark.executor.TaskMetrics
+import org.apache.spark.scheduler.cluster.TaskInfo
+import org.apache.spark.util.{Utils, Distribution}
+import org.apache.spark.{Logging, SparkContext, TaskEndReason}
+import org.apache.spark.executor.TaskMetrics
 
 sealed trait SparkListenerEvents
 
-case class SparkListenerStageSubmitted(stage: Stage, taskSize: Int) extends SparkListenerEvents
+case class SparkListenerStageSubmitted(stage: Stage, taskSize: Int, properties: Properties)
+     extends SparkListenerEvents
 
 case class StageCompleted(val stageInfo: StageInfo) extends SparkListenerEvents
 
+case class SparkListenerTaskStart(task: Task[_], taskInfo: TaskInfo) extends SparkListenerEvents
+
 case class SparkListenerTaskEnd(task: Task[_], reason: TaskEndReason, taskInfo: TaskInfo,
      taskMetrics: TaskMetrics) extends SparkListenerEvents
 
-case class SparkListenerJobStart(job: ActiveJob, properties: Properties = null) 
+case class SparkListenerJobStart(job: ActiveJob, properties: Properties = null)
      extends SparkListenerEvents
 
-case class SparkListenerJobEnd(job: ActiveJob, jobResult: JobResult) 
+case class SparkListenerJobEnd(job: ActiveJob, jobResult: JobResult)
      extends SparkListenerEvents
 
 trait SparkListener {
@@ -43,12 +46,17 @@ trait SparkListener {
    * Called when a stage is completed, with information on the completed stage
    */
   def onStageCompleted(stageCompleted: StageCompleted) { }
-  
+
   /**
    * Called when a stage is submitted
    */
   def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) { }
-  
+
+  /**
+   * Called when a task starts
+   */
+  def onTaskStart(taskEnd: SparkListenerTaskStart) { }
+
   /**
    * Called when a task ends
    */
@@ -58,12 +66,12 @@ trait SparkListener {
    * Called when a job starts
    */
   def onJobStart(jobStart: SparkListenerJobStart) { }
-  
+
   /**
    * Called when a job ends
    */
   def onJobEnd(jobEnd: SparkListenerJobEnd) { }
-  
+
 }
 
 /**
@@ -71,7 +79,7 @@ trait SparkListener {
  */
 class StatsReportListener extends SparkListener with Logging {
   override def onStageCompleted(stageCompleted: StageCompleted) {
-    import spark.scheduler.StatsReportListener._
+    import org.apache.spark.scheduler.StatsReportListener._
     implicit val sc = stageCompleted
     this.logInfo("Finished stage: " + stageCompleted.stageInfo)
     showMillisDistribution("task runtime:", (info, _) => Some(info.duration))
@@ -145,7 +153,7 @@ object StatsReportListener extends Logging {
   }
 
   def showBytesDistribution(heading: String, dist: Distribution) {
-    showDistribution(heading, dist, (d => Utils.memoryBytesToString(d.toLong)): Double => String)
+    showDistribution(heading, dist, (d => Utils.bytesToString(d.toLong)): Double => String)
   }
 
   def showMillisDistribution(heading: String, dOpt: Option[Distribution]) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
new file mode 100644
index 0000000000..a65e1ecd6d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListenerBus.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+import java.util.concurrent.LinkedBlockingQueue
+
+import scala.collection.mutable.{ArrayBuffer, SynchronizedBuffer}
+
+import org.apache.spark.Logging
+
+/** Asynchronously passes SparkListenerEvents to registered SparkListeners. */
+private[spark] class SparkListenerBus() extends Logging {
+  private val sparkListeners = new ArrayBuffer[SparkListener]() with SynchronizedBuffer[SparkListener]
+
+  /* Cap the capacity of the SparkListenerEvent queue so we get an explicit error (rather than
+   * an OOM exception) if it's perpetually being added to more quickly than it's being drained. */
+  private val EVENT_QUEUE_CAPACITY = 10000 
+  private val eventQueue = new LinkedBlockingQueue[SparkListenerEvents](EVENT_QUEUE_CAPACITY)
+  private var queueFullErrorMessageLogged = false
+
+  new Thread("SparkListenerBus") {
+    setDaemon(true)
+    override def run() {
+      while (true) {
+        val event = eventQueue.take
+        event match {
+          case stageSubmitted: SparkListenerStageSubmitted =>
+            sparkListeners.foreach(_.onStageSubmitted(stageSubmitted))
+          case stageCompleted: StageCompleted =>
+            sparkListeners.foreach(_.onStageCompleted(stageCompleted))
+          case jobStart: SparkListenerJobStart =>
+            sparkListeners.foreach(_.onJobStart(jobStart))
+          case jobEnd: SparkListenerJobEnd =>
+            sparkListeners.foreach(_.onJobEnd(jobEnd))
+          case taskStart: SparkListenerTaskStart =>
+            sparkListeners.foreach(_.onTaskStart(taskStart))
+          case taskEnd: SparkListenerTaskEnd =>
+            sparkListeners.foreach(_.onTaskEnd(taskEnd))
+          case _ =>
+        }
+      }
+    }
+  }.start()
+
+  def addListener(listener: SparkListener) {
+    sparkListeners += listener
+  }
+
+  def post(event: SparkListenerEvents) {
+    val eventAdded = eventQueue.offer(event)
+    if (!eventAdded && !queueFullErrorMessageLogged) {
+      logError("Dropping SparkListenerEvent because no remaining room in event queue. " +
+        "This likely means one of the SparkListeners is too slow and cannot keep up with the " +
+        "rate at which tasks are being started by the scheduler.")
+      queueFullErrorMessageLogged = true
+    }
+  }
+}
+
diff --git a/core/src/main/scala/spark/scheduler/SplitInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
index 4e3661ec5d..5b40a3eb29 100644
--- a/core/src/main/scala/spark/scheduler/SplitInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
 import collection.mutable.ArrayBuffer
 
diff --git a/core/src/main/scala/spark/scheduler/Stage.scala b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
index 5428daeb94..aa293dc6b3 100644
--- a/core/src/main/scala/spark/scheduler/Stage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Stage.scala
@@ -15,12 +15,11 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
-import java.net.URI
-
-import spark._
-import spark.storage.BlockManagerId
+import org.apache.spark._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.BlockManagerId
 
 /**
  * A stage is a set of independent tasks all computing the same function that need to run as part
@@ -33,15 +32,16 @@ import spark.storage.BlockManagerId
  * initiated a job (e.g. count(), save(), etc). For shuffle map stages, we also track the nodes
  * that each output partition is on.
  *
- * Each Stage also has a priority, which is (by default) based on the job it was submitted in.
- * This allows Stages from earlier jobs to be computed first or recovered faster on failure.
+ * Each Stage also has a jobId, identifying the job that first submitted the stage.  When FIFO
+ * scheduling is used, this allows Stages from earlier jobs to be computed first or recovered
+ * faster on failure.
  */
 private[spark] class Stage(
     val id: Int,
     val rdd: RDD[_],
     val shuffleDep: Option[ShuffleDependency[_,_]],  // Output shuffle if stage is a map stage
     val parents: List[Stage],
-    val priority: Int,
+    val jobId: Int,
     callSite: Option[String])
   extends Logging {
 
diff --git a/core/src/main/scala/spark/scheduler/StageInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
index c4026f995a..72cb1c9ce8 100644
--- a/core/src/main/scala/spark/scheduler/StageInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
-import spark.scheduler.cluster.TaskInfo
+import org.apache.spark.scheduler.cluster.TaskInfo
 import scala.collection._
-import spark.executor.TaskMetrics
+import org.apache.spark.executor.TaskMetrics
 
 case class StageInfo(
     val stage: Stage,
diff --git a/core/src/main/scala/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 50768d43e0..598d91752a 100644
--- a/core/src/main/scala/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -15,24 +15,24 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
-import spark.serializer.SerializerInstance
+import org.apache.spark.serializer.SerializerInstance
 import java.io.{DataInputStream, DataOutputStream}
 import java.nio.ByteBuffer
 import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream
-import spark.util.ByteBufferInputStream
+import org.apache.spark.util.ByteBufferInputStream
 import scala.collection.mutable.HashMap
-import spark.executor.TaskMetrics
+import org.apache.spark.executor.TaskMetrics
 
 /**
  * A task to execute on a worker node.
  */
 private[spark] abstract class Task[T](val stageId: Int) extends Serializable {
   def run(attemptId: Long): T
-  def preferredLocations: Seq[String] = Nil
+  def preferredLocations: Seq[TaskLocation] = Nil
 
-  var generation: Long = -1   // Map output tracker generation. Will be set by TaskScheduler.
+  var epoch: Long = -1   // Map output tracker epoch. Will be set by TaskScheduler.
 
   var metrics: Option[TaskMetrics] = None
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
new file mode 100644
index 0000000000..67c9a6760b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskLocation.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler
+
+/**
+ * A location where a task should run. This can either be a host or a (host, executorID) pair.
+ * In the latter case, we will prefer to launch the task on that executorID, but our next level
+ * of preference will be executors on the same host if this is not possible.
+ */
+private[spark]
+class TaskLocation private (val host: String, val executorId: Option[String]) extends Serializable {
+  override def toString: String = "TaskLocation(" + host + ", " + executorId + ")"
+}
+
+private[spark] object TaskLocation {
+  def apply(host: String, executorId: String) = new TaskLocation(host, Some(executorId))
+
+  def apply(host: String) = new TaskLocation(host, None)
+}
diff --git a/core/src/main/scala/spark/scheduler/TaskResult.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
index dc0621ea7b..5c7e5bb977 100644
--- a/core/src/main/scala/spark/scheduler/TaskResult.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
@@ -15,22 +15,33 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
 import java.io._
 
 import scala.collection.mutable.Map
-import spark.executor.TaskMetrics
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.{SparkEnv}
+import java.nio.ByteBuffer
+import org.apache.spark.util.Utils
 
 // Task result. Also contains updates to accumulator variables.
 // TODO: Use of distributed cache to return result is a hack to get around
 // what seems to be a bug with messages over 60KB in libprocess; fix it
 private[spark]
-class TaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var metrics: TaskMetrics) extends Externalizable {
+class TaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var metrics: TaskMetrics)
+  extends Externalizable
+{
   def this() = this(null.asInstanceOf[T], null, null)
 
   override def writeExternal(out: ObjectOutput) {
-    out.writeObject(value)
+
+    val objectSer = SparkEnv.get.serializer.newInstance()
+    val bb = objectSer.serialize(value)
+
+    out.writeInt(bb.remaining())
+    Utils.writeByteBuffer(bb, out)
+
     out.writeInt(accumUpdates.size)
     for ((key, value) <- accumUpdates) {
       out.writeLong(key)
@@ -40,7 +51,14 @@ class TaskResult[T](var value: T, var accumUpdates: Map[Long, Any], var metrics:
   }
 
   override def readExternal(in: ObjectInput) {
-    value = in.readObject().asInstanceOf[T]
+
+    val objectSer = SparkEnv.get.serializer.newInstance()
+
+    val blen = in.readInt()
+    val byteVal = new Array[Byte](blen)
+    in.readFully(byteVal)
+    value = objectSer.deserialize(ByteBuffer.wrap(byteVal))
+
     val numUpdates = in.readInt
     if (numUpdates == 0) {
       accumUpdates = null
diff --git a/core/src/main/scala/spark/scheduler/TaskScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
index 5188308006..63be8ba3f5 100644
--- a/core/src/main/scala/spark/scheduler/TaskScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -15,8 +15,10 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
+import org.apache.spark.scheduler.cluster.Pool
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
 /**
  * Low-level task scheduler interface, implemented by both ClusterScheduler and LocalScheduler.
  * These schedulers get sets of tasks submitted to them from the DAGScheduler for each stage,
@@ -25,6 +27,11 @@ package spark.scheduler
  * the TaskSchedulerListener interface.
  */
 private[spark] trait TaskScheduler {
+
+  def rootPool: Pool
+
+  def schedulingMode: SchedulingMode
+
   def start(): Unit
 
   // Invoked after system has successfully initialized (typically in spark context).
diff --git a/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerListener.scala
index 245e7ccb52..83be051c1a 100644
--- a/core/src/main/scala/spark/scheduler/TaskSchedulerListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerListener.scala
@@ -15,24 +15,27 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
-import spark.scheduler.cluster.TaskInfo
+import org.apache.spark.scheduler.cluster.TaskInfo
 import scala.collection.mutable.Map
 
-import spark.TaskEndReason
-import spark.executor.TaskMetrics
+import org.apache.spark.TaskEndReason
+import org.apache.spark.executor.TaskMetrics
 
 /**
  * Interface for getting events back from the TaskScheduler.
  */
 private[spark] trait TaskSchedulerListener {
+  // A task has started.
+  def taskStarted(task: Task[_], taskInfo: TaskInfo)
+
   // A task has finished or failed.
   def taskEnded(task: Task[_], reason: TaskEndReason, result: Any, accumUpdates: Map[Long, Any],
                 taskInfo: TaskInfo, taskMetrics: TaskMetrics): Unit
 
   // A node was added to the cluster.
-  def executorGained(execId: String, hostPort: String): Unit
+  def executorGained(execId: String, host: String): Unit
 
   // A node was lost from the cluster.
   def executorLost(execId: String): Unit
diff --git a/core/src/main/scala/spark/scheduler/TaskSet.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
index dc3550dd0b..c3ad325156 100644
--- a/core/src/main/scala/spark/scheduler/TaskSet.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.scheduler
+package org.apache.spark.scheduler
 
 import java.util.Properties
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala
new file mode 100644
index 0000000000..3196ab5022
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterScheduler.scala
@@ -0,0 +1,440 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import java.lang.{Boolean => JBoolean}
+
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.HashMap
+import scala.collection.mutable.HashSet
+
+import org.apache.spark._
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
+import java.nio.ByteBuffer
+import java.util.concurrent.atomic.AtomicLong
+import java.util.{TimerTask, Timer}
+
+/**
+ * The main TaskScheduler implementation, for running tasks on a cluster. Clients should first call
+ * initialize() and start(), then submit task sets through the runTasks method.
+ *
+ * This class can work with multiple types of clusters by acting through a SchedulerBackend.
+ * It handles common logic, like determining a scheduling order across jobs, waking up to launch
+ * speculative tasks, etc.
+ *
+ * THREADING: SchedulerBackends and task-submitting clients can call this class from multiple
+ * threads, so it needs locks in public API methods to maintain its state. In addition, some
+ * SchedulerBackends sycnchronize on themselves when they want to send events here, and then
+ * acquire a lock on us, so we need to make sure that we don't try to lock the backend while
+ * we are holding a lock on ourselves.
+ */
+private[spark] class ClusterScheduler(val sc: SparkContext)
+  extends TaskScheduler
+  with Logging
+{
+  // How often to check for speculative tasks
+  val SPECULATION_INTERVAL = System.getProperty("spark.speculation.interval", "100").toLong
+
+  // Threshold above which we warn user initial TaskSet may be starved
+  val STARVATION_TIMEOUT = System.getProperty("spark.starvation.timeout", "15000").toLong
+
+  val activeTaskSets = new HashMap[String, TaskSetManager]
+
+  val taskIdToTaskSetId = new HashMap[Long, String]
+  val taskIdToExecutorId = new HashMap[Long, String]
+  val taskSetTaskIds = new HashMap[String, HashSet[Long]]
+
+  @volatile private var hasReceivedTask = false
+  @volatile private var hasLaunchedTask = false
+  private val starvationTimer = new Timer(true)
+
+  // Incrementing Mesos task IDs
+  val nextTaskId = new AtomicLong(0)
+
+  // Which executor IDs we have executors on
+  val activeExecutorIds = new HashSet[String]
+
+  // The set of executors we have on each host; this is used to compute hostsAlive, which
+  // in turn is used to decide when we can attain data locality on a given host
+  private val executorsByHost = new HashMap[String, HashSet[String]]
+
+  private val executorIdToHost = new HashMap[String, String]
+
+  // JAR server, if any JARs were added by the user to the SparkContext
+  var jarServer: HttpServer = null
+
+  // URIs of JARs to pass to executor
+  var jarUris: String = ""
+
+  // Listener object to pass upcalls into
+  var listener: TaskSchedulerListener = null
+
+  var backend: SchedulerBackend = null
+
+  val mapOutputTracker = SparkEnv.get.mapOutputTracker
+
+  var schedulableBuilder: SchedulableBuilder = null
+  var rootPool: Pool = null
+  // default scheduler is FIFO
+  val schedulingMode: SchedulingMode = SchedulingMode.withName(
+    System.getProperty("spark.cluster.schedulingmode", "FIFO"))
+
+  override def setListener(listener: TaskSchedulerListener) {
+    this.listener = listener
+  }
+
+  def initialize(context: SchedulerBackend) {
+    backend = context
+    // temporarily set rootPool name to empty
+    rootPool = new Pool("", schedulingMode, 0, 0)
+    schedulableBuilder = {
+      schedulingMode match {
+        case SchedulingMode.FIFO =>
+          new FIFOSchedulableBuilder(rootPool)
+        case SchedulingMode.FAIR =>
+          new FairSchedulableBuilder(rootPool)
+      }
+    }
+    schedulableBuilder.buildPools()
+  }
+
+  def newTaskId(): Long = nextTaskId.getAndIncrement()
+
+  override def start() {
+    backend.start()
+
+    if (System.getProperty("spark.speculation", "false").toBoolean) {
+      new Thread("ClusterScheduler speculation check") {
+        setDaemon(true)
+
+        override def run() {
+          logInfo("Starting speculative execution thread")
+          while (true) {
+            try {
+              Thread.sleep(SPECULATION_INTERVAL)
+            } catch {
+              case e: InterruptedException => {}
+            }
+            checkSpeculatableTasks()
+          }
+        }
+      }.start()
+    }
+  }
+
+  override def submitTasks(taskSet: TaskSet) {
+    val tasks = taskSet.tasks
+    logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks")
+    this.synchronized {
+      val manager = new ClusterTaskSetManager(this, taskSet)
+      activeTaskSets(taskSet.id) = manager
+      schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)
+      taskSetTaskIds(taskSet.id) = new HashSet[Long]()
+
+      if (!hasReceivedTask) {
+        starvationTimer.scheduleAtFixedRate(new TimerTask() {
+          override def run() {
+            if (!hasLaunchedTask) {
+              logWarning("Initial job has not accepted any resources; " +
+                "check your cluster UI to ensure that workers are registered " +
+                "and have sufficient memory")
+            } else {
+              this.cancel()
+            }
+          }
+        }, STARVATION_TIMEOUT, STARVATION_TIMEOUT)
+      }
+      hasReceivedTask = true
+    }
+    backend.reviveOffers()
+  }
+
+  def taskSetFinished(manager: TaskSetManager) {
+    this.synchronized {
+      activeTaskSets -= manager.taskSet.id
+      manager.parent.removeSchedulable(manager)
+      logInfo("Remove TaskSet %s from pool %s".format(manager.taskSet.id, manager.parent.name))
+      taskIdToTaskSetId --= taskSetTaskIds(manager.taskSet.id)
+      taskIdToExecutorId --= taskSetTaskIds(manager.taskSet.id)
+      taskSetTaskIds.remove(manager.taskSet.id)
+    }
+  }
+
+  /**
+   * Called by cluster manager to offer resources on slaves. We respond by asking our active task
+   * sets for tasks in order of priority. We fill each node with tasks in a round-robin manner so
+   * that tasks are balanced across the cluster.
+   */
+  def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = synchronized {
+    SparkEnv.set(sc.env)
+
+    // Mark each slave as alive and remember its hostname
+    for (o <- offers) {
+      executorIdToHost(o.executorId) = o.host
+      if (!executorsByHost.contains(o.host)) {
+        executorsByHost(o.host) = new HashSet[String]()
+        executorGained(o.executorId, o.host)
+      }
+    }
+
+    // Build a list of tasks to assign to each worker
+    val tasks = offers.map(o => new ArrayBuffer[TaskDescription](o.cores))
+    val availableCpus = offers.map(o => o.cores).toArray
+    val sortedTaskSets = rootPool.getSortedTaskSetQueue()
+    for (taskSet <- sortedTaskSets) {
+      logDebug("parentName: %s, name: %s, runningTasks: %s".format(
+        taskSet.parent.name, taskSet.name, taskSet.runningTasks))
+    }
+
+    // Take each TaskSet in our scheduling order, and then offer it each node in increasing order
+    // of locality levels so that it gets a chance to launch local tasks on all of them.
+    var launchedTask = false
+    for (taskSet <- sortedTaskSets; maxLocality <- TaskLocality.values) {
+      do {
+        launchedTask = false
+        for (i <- 0 until offers.size) {
+          val execId = offers(i).executorId
+          val host = offers(i).host
+          for (task <- taskSet.resourceOffer(execId, host, availableCpus(i), maxLocality)) {
+            tasks(i) += task
+            val tid = task.taskId
+            taskIdToTaskSetId(tid) = taskSet.taskSet.id
+            taskSetTaskIds(taskSet.taskSet.id) += tid
+            taskIdToExecutorId(tid) = execId
+            activeExecutorIds += execId
+            executorsByHost(host) += execId
+            availableCpus(i) -= 1
+            launchedTask = true
+          }
+        }
+      } while (launchedTask)
+    }
+
+    if (tasks.size > 0) {
+      hasLaunchedTask = true
+    }
+    return tasks
+  }
+
+  def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+    var taskSetToUpdate: Option[TaskSetManager] = None
+    var failedExecutor: Option[String] = None
+    var taskFailed = false
+    synchronized {
+      try {
+        if (state == TaskState.LOST && taskIdToExecutorId.contains(tid)) {
+          // We lost this entire executor, so remember that it's gone
+          val execId = taskIdToExecutorId(tid)
+          if (activeExecutorIds.contains(execId)) {
+            removeExecutor(execId)
+            failedExecutor = Some(execId)
+          }
+        }
+        taskIdToTaskSetId.get(tid) match {
+          case Some(taskSetId) =>
+            if (activeTaskSets.contains(taskSetId)) {
+              taskSetToUpdate = Some(activeTaskSets(taskSetId))
+            }
+            if (TaskState.isFinished(state)) {
+              taskIdToTaskSetId.remove(tid)
+              if (taskSetTaskIds.contains(taskSetId)) {
+                taskSetTaskIds(taskSetId) -= tid
+              }
+              taskIdToExecutorId.remove(tid)
+            }
+            if (state == TaskState.FAILED) {
+              taskFailed = true
+            }
+          case None =>
+            logInfo("Ignoring update from TID " + tid + " because its task set is gone")
+        }
+      } catch {
+        case e: Exception => logError("Exception in statusUpdate", e)
+      }
+    }
+    // Update the task set and DAGScheduler without holding a lock on this, since that can deadlock
+    if (taskSetToUpdate != None) {
+      taskSetToUpdate.get.statusUpdate(tid, state, serializedData)
+    }
+    if (failedExecutor != None) {
+      listener.executorLost(failedExecutor.get)
+      backend.reviveOffers()
+    }
+    if (taskFailed) {
+      // Also revive offers if a task had failed for some reason other than host lost
+      backend.reviveOffers()
+    }
+  }
+
+  def error(message: String) {
+    synchronized {
+      if (activeTaskSets.size > 0) {
+        // Have each task set throw a SparkException with the error
+        for ((taskSetId, manager) <- activeTaskSets) {
+          try {
+            manager.error(message)
+          } catch {
+            case e: Exception => logError("Exception in error callback", e)
+          }
+        }
+      } else {
+        // No task sets are active but we still got an error. Just exit since this
+        // must mean the error is during registration.
+        // It might be good to do something smarter here in the future.
+        logError("Exiting due to error from cluster scheduler: " + message)
+        System.exit(1)
+      }
+    }
+  }
+
+  override def stop() {
+    if (backend != null) {
+      backend.stop()
+    }
+    if (jarServer != null) {
+      jarServer.stop()
+    }
+
+    // sleeping for an arbitrary 5 seconds : to ensure that messages are sent out.
+    // TODO: Do something better !
+    Thread.sleep(5000L)
+  }
+
+  override def defaultParallelism() = backend.defaultParallelism()
+
+
+  // Check for speculatable tasks in all our active jobs.
+  def checkSpeculatableTasks() {
+    var shouldRevive = false
+    synchronized {
+      shouldRevive = rootPool.checkSpeculatableTasks()
+    }
+    if (shouldRevive) {
+      backend.reviveOffers()
+    }
+  }
+
+  // Check for pending tasks in all our active jobs.
+  def hasPendingTasks: Boolean = {
+    synchronized {
+      rootPool.hasPendingTasks()
+    }
+  }
+
+  def executorLost(executorId: String, reason: ExecutorLossReason) {
+    var failedExecutor: Option[String] = None
+
+    synchronized {
+      if (activeExecutorIds.contains(executorId)) {
+        val hostPort = executorIdToHost(executorId)
+        logError("Lost executor %s on %s: %s".format(executorId, hostPort, reason))
+        removeExecutor(executorId)
+        failedExecutor = Some(executorId)
+      } else {
+         // We may get multiple executorLost() calls with different loss reasons. For example, one
+         // may be triggered by a dropped connection from the slave while another may be a report
+         // of executor termination from Mesos. We produce log messages for both so we eventually
+         // report the termination reason.
+         logError("Lost an executor " + executorId + " (already removed): " + reason)
+      }
+    }
+    // Call listener.executorLost without holding the lock on this to prevent deadlock
+    if (failedExecutor != None) {
+      listener.executorLost(failedExecutor.get)
+      backend.reviveOffers()
+    }
+  }
+
+  /** Remove an executor from all our data structures and mark it as lost */
+  private def removeExecutor(executorId: String) {
+    activeExecutorIds -= executorId
+    val host = executorIdToHost(executorId)
+    val execs = executorsByHost.getOrElse(host, new HashSet)
+    execs -= executorId
+    if (execs.isEmpty) {
+      executorsByHost -= host
+    }
+    executorIdToHost -= executorId
+    rootPool.executorLost(executorId, host)
+  }
+
+  def executorGained(execId: String, host: String) {
+    listener.executorGained(execId, host)
+  }
+
+  def getExecutorsAliveOnHost(host: String): Option[Set[String]] = synchronized {
+    executorsByHost.get(host).map(_.toSet)
+  }
+
+  def hasExecutorsAliveOnHost(host: String): Boolean = synchronized {
+    executorsByHost.contains(host)
+  }
+
+  def isExecutorAlive(execId: String): Boolean = synchronized {
+    activeExecutorIds.contains(execId)
+  }
+
+  // By default, rack is unknown
+  def getRackForHost(value: String): Option[String] = None
+}
+
+
+object ClusterScheduler {
+  /**
+   * Used to balance containers across hosts.
+   *
+   * Accepts a map of hosts to resource offers for that host, and returns a prioritized list of
+   * resource offers representing the order in which the offers should be used.  The resource
+   * offers are ordered such that we'll allocate one container on each host before allocating a
+   * second container on any host, and so on, in order to reduce the damage if a host fails.
+   *
+   * For example, given <h1, [o1, o2, o3]>, <h2, [o4]>, <h1, [o5, o6]>, returns
+   * [o1, o5, o4, 02, o6, o3]
+   */
+  def prioritizeContainers[K, T] (map: HashMap[K, ArrayBuffer[T]]): List[T] = {
+    val _keyList = new ArrayBuffer[K](map.size)
+    _keyList ++= map.keys
+
+    // order keyList based on population of value in map
+    val keyList = _keyList.sortWith(
+      (left, right) => map(left).size > map(right).size
+    )
+
+    val retval = new ArrayBuffer[T](keyList.size * 2)
+    var index = 0
+    var found = true
+
+    while (found) {
+      found = false
+      for (key <- keyList) {
+        val containerList: ArrayBuffer[T] = map.get(key).getOrElse(null)
+        assert(containerList != null)
+        // Get the index'th entry for this host - if present
+        if (index < containerList.size){
+          retval += containerList.apply(index)
+          found = true
+        }
+      }
+      index += 1
+    }
+
+    retval.toList
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala
new file mode 100644
index 0000000000..1b31c8c57e
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ClusterTaskSetManager.scala
@@ -0,0 +1,712 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import java.nio.ByteBuffer
+import java.util.{Arrays, NoSuchElementException}
+
+import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.HashMap
+import scala.collection.mutable.HashSet
+import scala.math.max
+import scala.math.min
+
+import org.apache.spark.{FetchFailed, Logging, Resubmitted, SparkEnv, Success, TaskEndReason, TaskState}
+import org.apache.spark.{ExceptionFailure, SparkException, TaskResultTooBigFailure}
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.scheduler._
+import scala.Some
+import org.apache.spark.FetchFailed
+import org.apache.spark.ExceptionFailure
+import org.apache.spark.TaskResultTooBigFailure
+import org.apache.spark.util.{SystemClock, Clock}
+
+
+/**
+ * Schedules the tasks within a single TaskSet in the ClusterScheduler. This class keeps track of
+ * the status of each task, retries tasks if they fail (up to a limited number of times), and
+ * handles locality-aware scheduling for this TaskSet via delay scheduling. The main interfaces
+ * to it are resourceOffer, which asks the TaskSet whether it wants to run a task on one node,
+ * and statusUpdate, which tells it that one of its tasks changed state (e.g. finished).
+ *
+ * THREADING: This class is designed to only be called from code with a lock on the
+ * ClusterScheduler (e.g. its event handlers). It should not be called from other threads.
+ */
+private[spark] class ClusterTaskSetManager(
+    sched: ClusterScheduler,
+    val taskSet: TaskSet,
+    clock: Clock = SystemClock)
+  extends TaskSetManager
+  with Logging
+{
+  // CPUs to request per task
+  val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toInt
+
+  // Maximum times a task is allowed to fail before failing the job
+  val MAX_TASK_FAILURES = System.getProperty("spark.task.maxFailures", "4").toInt
+
+  // Quantile of tasks at which to start speculation
+  val SPECULATION_QUANTILE = System.getProperty("spark.speculation.quantile", "0.75").toDouble
+  val SPECULATION_MULTIPLIER = System.getProperty("spark.speculation.multiplier", "1.5").toDouble
+
+  // Serializer for closures and tasks.
+  val env = SparkEnv.get
+  val ser = env.closureSerializer.newInstance()
+
+  val tasks = taskSet.tasks
+  val numTasks = tasks.length
+  val copiesRunning = new Array[Int](numTasks)
+  val finished = new Array[Boolean](numTasks)
+  val numFailures = new Array[Int](numTasks)
+  val taskAttempts = Array.fill[List[TaskInfo]](numTasks)(Nil)
+  var tasksFinished = 0
+
+  var weight = 1
+  var minShare = 0
+  var runningTasks = 0
+  var priority = taskSet.priority
+  var stageId = taskSet.stageId
+  var name = "TaskSet_"+taskSet.stageId.toString
+  var parent: Schedulable = null
+
+  // Set of pending tasks for each executor. These collections are actually
+  // treated as stacks, in which new tasks are added to the end of the
+  // ArrayBuffer and removed from the end. This makes it faster to detect
+  // tasks that repeatedly fail because whenever a task failed, it is put
+  // back at the head of the stack. They are also only cleaned up lazily;
+  // when a task is launched, it remains in all the pending lists except
+  // the one that it was launched from, but gets removed from them later.
+  private val pendingTasksForExecutor = new HashMap[String, ArrayBuffer[Int]]
+
+  // Set of pending tasks for each host. Similar to pendingTasksForExecutor,
+  // but at host level.
+  private val pendingTasksForHost = new HashMap[String, ArrayBuffer[Int]]
+
+  // Set of pending tasks for each rack -- similar to the above.
+  private val pendingTasksForRack = new HashMap[String, ArrayBuffer[Int]]
+
+  // Set containing pending tasks with no locality preferences.
+  val pendingTasksWithNoPrefs = new ArrayBuffer[Int]
+
+  // Set containing all pending tasks (also used as a stack, as above).
+  val allPendingTasks = new ArrayBuffer[Int]
+
+  // Tasks that can be speculated. Since these will be a small fraction of total
+  // tasks, we'll just hold them in a HashSet.
+  val speculatableTasks = new HashSet[Int]
+
+  // Task index, start and finish time for each task attempt (indexed by task ID)
+  val taskInfos = new HashMap[Long, TaskInfo]
+
+  // Did the TaskSet fail?
+  var failed = false
+  var causeOfFailure = ""
+
+  // How frequently to reprint duplicate exceptions in full, in milliseconds
+  val EXCEPTION_PRINT_INTERVAL =
+    System.getProperty("spark.logging.exceptionPrintInterval", "10000").toLong
+
+  // Map of recent exceptions (identified by string representation and top stack frame) to
+  // duplicate count (how many times the same exception has appeared) and time the full exception
+  // was printed. This should ideally be an LRU map that can drop old exceptions automatically.
+  val recentExceptions = HashMap[String, (Int, Long)]()
+
+  // Figure out the current map output tracker epoch and set it on all tasks
+  val epoch = sched.mapOutputTracker.getEpoch
+  logDebug("Epoch for " + taskSet + ": " + epoch)
+  for (t <- tasks) {
+    t.epoch = epoch
+  }
+
+  // Add all our tasks to the pending lists. We do this in reverse order
+  // of task index so that tasks with low indices get launched first.
+  for (i <- (0 until numTasks).reverse) {
+    addPendingTask(i)
+  }
+
+  // Figure out which locality levels we have in our TaskSet, so we can do delay scheduling
+  val myLocalityLevels = computeValidLocalityLevels()
+  val localityWaits = myLocalityLevels.map(getLocalityWait) // Time to wait at each level
+
+  // Delay scheduling variables: we keep track of our current locality level and the time we
+  // last launched a task at that level, and move up a level when localityWaits[curLevel] expires.
+  // We then move down if we manage to launch a "more local" task.
+  var currentLocalityIndex = 0    // Index of our current locality level in validLocalityLevels
+  var lastLaunchTime = clock.getTime()  // Time we last launched a task at this level
+
+  /**
+   * Add a task to all the pending-task lists that it should be on. If readding is set, we are
+   * re-adding the task so only include it in each list if it's not already there.
+   */
+  private def addPendingTask(index: Int, readding: Boolean = false) {
+    // Utility method that adds `index` to a list only if readding=false or it's not already there
+    def addTo(list: ArrayBuffer[Int]) {
+      if (!readding || !list.contains(index)) {
+        list += index
+      }
+    }
+
+    var hadAliveLocations = false
+    for (loc <- tasks(index).preferredLocations) {
+      for (execId <- loc.executorId) {
+        if (sched.isExecutorAlive(execId)) {
+          addTo(pendingTasksForExecutor.getOrElseUpdate(execId, new ArrayBuffer))
+          hadAliveLocations = true
+        }
+      }
+      if (sched.hasExecutorsAliveOnHost(loc.host)) {
+        addTo(pendingTasksForHost.getOrElseUpdate(loc.host, new ArrayBuffer))
+        for (rack <- sched.getRackForHost(loc.host)) {
+          addTo(pendingTasksForRack.getOrElseUpdate(rack, new ArrayBuffer))
+        }
+        hadAliveLocations = true
+      }
+    }
+
+    if (!hadAliveLocations) {
+      // Even though the task might've had preferred locations, all of those hosts or executors
+      // are dead; put it in the no-prefs list so we can schedule it elsewhere right away.
+      addTo(pendingTasksWithNoPrefs)
+    }
+
+    if (!readding) {
+      allPendingTasks += index  // No point scanning this whole list to find the old task there
+    }
+  }
+
+  /**
+   * Return the pending tasks list for a given executor ID, or an empty list if
+   * there is no map entry for that host
+   */
+  private def getPendingTasksForExecutor(executorId: String): ArrayBuffer[Int] = {
+    pendingTasksForExecutor.getOrElse(executorId, ArrayBuffer())
+  }
+
+  /**
+   * Return the pending tasks list for a given host, or an empty list if
+   * there is no map entry for that host
+   */
+  private def getPendingTasksForHost(host: String): ArrayBuffer[Int] = {
+    pendingTasksForHost.getOrElse(host, ArrayBuffer())
+  }
+
+  /**
+   * Return the pending rack-local task list for a given rack, or an empty list if
+   * there is no map entry for that rack
+   */
+  private def getPendingTasksForRack(rack: String): ArrayBuffer[Int] = {
+    pendingTasksForRack.getOrElse(rack, ArrayBuffer())
+  }
+
+  /**
+   * Dequeue a pending task from the given list and return its index.
+   * Return None if the list is empty.
+   * This method also cleans up any tasks in the list that have already
+   * been launched, since we want that to happen lazily.
+   */
+  private def findTaskFromList(list: ArrayBuffer[Int]): Option[Int] = {
+    while (!list.isEmpty) {
+      val index = list.last
+      list.trimEnd(1)
+      if (copiesRunning(index) == 0 && !finished(index)) {
+        return Some(index)
+      }
+    }
+    return None
+  }
+
+  /** Check whether a task is currently running an attempt on a given host */
+  private def hasAttemptOnHost(taskIndex: Int, host: String): Boolean = {
+    !taskAttempts(taskIndex).exists(_.host == host)
+  }
+
+  /**
+   * Return a speculative task for a given executor if any are available. The task should not have
+   * an attempt running on this host, in case the host is slow. In addition, the task should meet
+   * the given locality constraint.
+   */
+  private def findSpeculativeTask(execId: String, host: String, locality: TaskLocality.Value)
+    : Option[(Int, TaskLocality.Value)] =
+  {
+    speculatableTasks.retain(index => !finished(index)) // Remove finished tasks from set
+
+    if (!speculatableTasks.isEmpty) {
+      // Check for process-local or preference-less tasks; note that tasks can be process-local
+      // on multiple nodes when we replicate cached blocks, as in Spark Streaming
+      for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
+        val prefs = tasks(index).preferredLocations
+        val executors = prefs.flatMap(_.executorId)
+        if (prefs.size == 0 || executors.contains(execId)) {
+          speculatableTasks -= index
+          return Some((index, TaskLocality.PROCESS_LOCAL))
+        }
+      }
+
+      // Check for node-local tasks
+      if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) {
+        for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
+          val locations = tasks(index).preferredLocations.map(_.host)
+          if (locations.contains(host)) {
+            speculatableTasks -= index
+            return Some((index, TaskLocality.NODE_LOCAL))
+          }
+        }
+      }
+
+      // Check for rack-local tasks
+      if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
+        for (rack <- sched.getRackForHost(host)) {
+          for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
+            val racks = tasks(index).preferredLocations.map(_.host).map(sched.getRackForHost)
+            if (racks.contains(rack)) {
+              speculatableTasks -= index
+              return Some((index, TaskLocality.RACK_LOCAL))
+            }
+          }
+        }
+      }
+
+      // Check for non-local tasks
+      if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
+        for (index <- speculatableTasks if !hasAttemptOnHost(index, host)) {
+          speculatableTasks -= index
+          return Some((index, TaskLocality.ANY))
+        }
+      }
+    }
+
+    return None
+  }
+
+  /**
+   * Dequeue a pending task for a given node and return its index and locality level.
+   * Only search for tasks matching the given locality constraint.
+   */
+  private def findTask(execId: String, host: String, locality: TaskLocality.Value)
+    : Option[(Int, TaskLocality.Value)] =
+  {
+    for (index <- findTaskFromList(getPendingTasksForExecutor(execId))) {
+      return Some((index, TaskLocality.PROCESS_LOCAL))
+    }
+
+    if (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL)) {
+      for (index <- findTaskFromList(getPendingTasksForHost(host))) {
+        return Some((index, TaskLocality.NODE_LOCAL))
+      }
+    }
+
+    if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
+      for {
+        rack <- sched.getRackForHost(host)
+        index <- findTaskFromList(getPendingTasksForRack(rack))
+      } {
+        return Some((index, TaskLocality.RACK_LOCAL))
+      }
+    }
+
+    // Look for no-pref tasks after rack-local tasks since they can run anywhere.
+    for (index <- findTaskFromList(pendingTasksWithNoPrefs)) {
+      return Some((index, TaskLocality.PROCESS_LOCAL))
+    }
+
+    if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
+      for (index <- findTaskFromList(allPendingTasks)) {
+        return Some((index, TaskLocality.ANY))
+      }
+    }
+
+    // Finally, if all else has failed, find a speculative task
+    return findSpeculativeTask(execId, host, locality)
+  }
+
+  /**
+   * Respond to an offer of a single slave from the scheduler by finding a task
+   */
+  override def resourceOffer(
+      execId: String,
+      host: String,
+      availableCpus: Int,
+      maxLocality: TaskLocality.TaskLocality)
+    : Option[TaskDescription] =
+  {
+    if (tasksFinished < numTasks && availableCpus >= CPUS_PER_TASK) {
+      val curTime = clock.getTime()
+
+      var allowedLocality = getAllowedLocalityLevel(curTime)
+      if (allowedLocality > maxLocality) {
+        allowedLocality = maxLocality   // We're not allowed to search for farther-away tasks
+      }
+
+      findTask(execId, host, allowedLocality) match {
+        case Some((index, taskLocality)) => {
+          // Found a task; do some bookkeeping and return a task description
+          val task = tasks(index)
+          val taskId = sched.newTaskId()
+          // Figure out whether this should count as a preferred launch
+          logInfo("Starting task %s:%d as TID %s on slave %s: %s (%s)".format(
+            taskSet.id, index, taskId, execId, host, taskLocality))
+          // Do various bookkeeping
+          copiesRunning(index) += 1
+          val info = new TaskInfo(taskId, index, curTime, execId, host, taskLocality)
+          taskInfos(taskId) = info
+          taskAttempts(index) = info :: taskAttempts(index)
+          // Update our locality level for delay scheduling
+          currentLocalityIndex = getLocalityIndex(taskLocality)
+          lastLaunchTime = curTime
+          // Serialize and return the task
+          val startTime = clock.getTime()
+          // We rely on the DAGScheduler to catch non-serializable closures and RDDs, so in here
+          // we assume the task can be serialized without exceptions.
+          val serializedTask = Task.serializeWithDependencies(
+            task, sched.sc.addedFiles, sched.sc.addedJars, ser)
+          val timeTaken = clock.getTime() - startTime
+          increaseRunningTasks(1)
+          logInfo("Serialized task %s:%d as %d bytes in %d ms".format(
+            taskSet.id, index, serializedTask.limit, timeTaken))
+          val taskName = "task %s:%d".format(taskSet.id, index)
+          if (taskAttempts(index).size == 1)
+            taskStarted(task,info)
+          return Some(new TaskDescription(taskId, execId, taskName, index, serializedTask))
+        }
+        case _ =>
+      }
+    }
+    return None
+  }
+
+  /**
+   * Get the level we can launch tasks according to delay scheduling, based on current wait time.
+   */
+  private def getAllowedLocalityLevel(curTime: Long): TaskLocality.TaskLocality = {
+    while (curTime - lastLaunchTime >= localityWaits(currentLocalityIndex) &&
+        currentLocalityIndex < myLocalityLevels.length - 1)
+    {
+      // Jump to the next locality level, and remove our waiting time for the current one since
+      // we don't want to count it again on the next one
+      lastLaunchTime += localityWaits(currentLocalityIndex)
+      currentLocalityIndex += 1
+    }
+    myLocalityLevels(currentLocalityIndex)
+  }
+
+  /**
+   * Find the index in myLocalityLevels for a given locality. This is also designed to work with
+   * localities that are not in myLocalityLevels (in case we somehow get those) by returning the
+   * next-biggest level we have. Uses the fact that the last value in myLocalityLevels is ANY.
+   */
+  def getLocalityIndex(locality: TaskLocality.TaskLocality): Int = {
+    var index = 0
+    while (locality > myLocalityLevels(index)) {
+      index += 1
+    }
+    index
+  }
+
+  /** Called by cluster scheduler when one of our tasks changes state */
+  override def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+    SparkEnv.set(env)
+    state match {
+      case TaskState.FINISHED =>
+        taskFinished(tid, state, serializedData)
+      case TaskState.LOST =>
+        taskLost(tid, state, serializedData)
+      case TaskState.FAILED =>
+        taskLost(tid, state, serializedData)
+      case TaskState.KILLED =>
+        taskLost(tid, state, serializedData)
+      case _ =>
+    }
+  }
+
+  def taskStarted(task: Task[_], info: TaskInfo) {
+    sched.listener.taskStarted(task, info)
+  }
+
+  def taskFinished(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+    val info = taskInfos(tid)
+    if (info.failed) {
+      // We might get two task-lost messages for the same task in coarse-grained Mesos mode,
+      // or even from Mesos itself when acks get delayed.
+      return
+    }
+    val index = info.index
+    info.markSuccessful()
+    decreaseRunningTasks(1)
+    if (!finished(index)) {
+      tasksFinished += 1
+      logInfo("Finished TID %s in %d ms on %s (progress: %d/%d)".format(
+        tid, info.duration, info.host, tasksFinished, numTasks))
+      // Deserialize task result and pass it to the scheduler
+      try {
+        val result = ser.deserialize[TaskResult[_]](serializedData)
+        result.metrics.resultSize = serializedData.limit()
+        sched.listener.taskEnded(
+          tasks(index), Success, result.value, result.accumUpdates, info, result.metrics)
+      } catch {
+        case cnf: ClassNotFoundException =>
+          val loader = Thread.currentThread().getContextClassLoader
+          throw new SparkException("ClassNotFound with classloader: " + loader, cnf)
+        case ex => throw ex
+      }
+      // Mark finished and stop if we've finished all the tasks
+      finished(index) = true
+      if (tasksFinished == numTasks) {
+        sched.taskSetFinished(this)
+      }
+    } else {
+      logInfo("Ignoring task-finished event for TID " + tid +
+        " because task " + index + " is already finished")
+    }
+  }
+
+  def taskLost(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+    val info = taskInfos(tid)
+    if (info.failed) {
+      // We might get two task-lost messages for the same task in coarse-grained Mesos mode,
+      // or even from Mesos itself when acks get delayed.
+      return
+    }
+    val index = info.index
+    info.markFailed()
+    decreaseRunningTasks(1)
+    if (!finished(index)) {
+      logInfo("Lost TID %s (task %s:%d)".format(tid, taskSet.id, index))
+      copiesRunning(index) -= 1
+      // Check if the problem is a map output fetch failure. In that case, this
+      // task will never succeed on any node, so tell the scheduler about it.
+      if (serializedData != null && serializedData.limit() > 0) {
+        val reason = ser.deserialize[TaskEndReason](serializedData, getClass.getClassLoader)
+        reason match {
+          case fetchFailed: FetchFailed =>
+            logInfo("Loss was due to fetch failure from " + fetchFailed.bmAddress)
+            sched.listener.taskEnded(tasks(index), fetchFailed, null, null, info, null)
+            finished(index) = true
+            tasksFinished += 1
+            sched.taskSetFinished(this)
+            decreaseRunningTasks(runningTasks)
+            return
+
+          case taskResultTooBig: TaskResultTooBigFailure =>
+            logInfo("Loss was due to task %s result exceeding Akka frame size; aborting job".format(
+              tid))
+            abort("Task %s result exceeded Akka frame size".format(tid))
+            return
+
+          case ef: ExceptionFailure =>
+            sched.listener.taskEnded(tasks(index), ef, null, null, info, ef.metrics.getOrElse(null))
+            val key = ef.description
+            val now = clock.getTime()
+            val (printFull, dupCount) = {
+              if (recentExceptions.contains(key)) {
+                val (dupCount, printTime) = recentExceptions(key)
+                if (now - printTime > EXCEPTION_PRINT_INTERVAL) {
+                  recentExceptions(key) = (0, now)
+                  (true, 0)
+                } else {
+                  recentExceptions(key) = (dupCount + 1, printTime)
+                  (false, dupCount + 1)
+                }
+              } else {
+                recentExceptions(key) = (0, now)
+                (true, 0)
+              }
+            }
+            if (printFull) {
+              val locs = ef.stackTrace.map(loc => "\tat %s".format(loc.toString))
+              logInfo("Loss was due to %s\n%s\n%s".format(
+                ef.className, ef.description, locs.mkString("\n")))
+            } else {
+              logInfo("Loss was due to %s [duplicate %d]".format(ef.description, dupCount))
+            }
+
+          case _ => {}
+        }
+      }
+      // On non-fetch failures, re-enqueue the task as pending for a max number of retries
+      addPendingTask(index)
+      // Count failed attempts only on FAILED and LOST state (not on KILLED)
+      if (state == TaskState.FAILED || state == TaskState.LOST) {
+        numFailures(index) += 1
+        if (numFailures(index) > MAX_TASK_FAILURES) {
+          logError("Task %s:%d failed more than %d times; aborting job".format(
+            taskSet.id, index, MAX_TASK_FAILURES))
+          abort("Task %s:%d failed more than %d times".format(taskSet.id, index, MAX_TASK_FAILURES))
+        }
+      }
+    } else {
+      logInfo("Ignoring task-lost event for TID " + tid +
+        " because task " + index + " is already finished")
+    }
+  }
+
+  override def error(message: String) {
+    // Save the error message
+    abort("Error: " + message)
+  }
+
+  def abort(message: String) {
+    failed = true
+    causeOfFailure = message
+    // TODO: Kill running tasks if we were not terminated due to a Mesos error
+    sched.listener.taskSetFailed(taskSet, message)
+    decreaseRunningTasks(runningTasks)
+    sched.taskSetFinished(this)
+  }
+
+  override def increaseRunningTasks(taskNum: Int) {
+    runningTasks += taskNum
+    if (parent != null) {
+      parent.increaseRunningTasks(taskNum)
+    }
+  }
+
+  override def decreaseRunningTasks(taskNum: Int) {
+    runningTasks -= taskNum
+    if (parent != null) {
+      parent.decreaseRunningTasks(taskNum)
+    }
+  }
+
+  override def getSchedulableByName(name: String): Schedulable = {
+    return null
+  }
+
+  override def addSchedulable(schedulable: Schedulable) {}
+
+  override def removeSchedulable(schedulable: Schedulable) {}
+
+  override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
+    var sortedTaskSetQueue = ArrayBuffer[TaskSetManager](this)
+    sortedTaskSetQueue += this
+    return sortedTaskSetQueue
+  }
+
+  /** Called by cluster scheduler when an executor is lost so we can re-enqueue our tasks */
+  override def executorLost(execId: String, host: String) {
+    logInfo("Re-queueing tasks for " + execId + " from TaskSet " + taskSet.id)
+
+    // Re-enqueue pending tasks for this host based on the status of the cluster -- for example, a
+    // task that used to have locations on only this host might now go to the no-prefs list. Note
+    // that it's okay if we add a task to the same queue twice (if it had multiple preferred
+    // locations), because findTaskFromList will skip already-running tasks.
+    for (index <- getPendingTasksForExecutor(execId)) {
+      addPendingTask(index, readding=true)
+    }
+    for (index <- getPendingTasksForHost(host)) {
+      addPendingTask(index, readding=true)
+    }
+
+    // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage
+    if (tasks(0).isInstanceOf[ShuffleMapTask]) {
+      for ((tid, info) <- taskInfos if info.executorId == execId) {
+        val index = taskInfos(tid).index
+        if (finished(index)) {
+          finished(index) = false
+          copiesRunning(index) -= 1
+          tasksFinished -= 1
+          addPendingTask(index)
+          // Tell the DAGScheduler that this task was resubmitted so that it doesn't think our
+          // stage finishes when a total of tasks.size tasks finish.
+          sched.listener.taskEnded(tasks(index), Resubmitted, null, null, info, null)
+        }
+      }
+    }
+    // Also re-enqueue any tasks that were running on the node
+    for ((tid, info) <- taskInfos if info.running && info.executorId == execId) {
+      taskLost(tid, TaskState.KILLED, null)
+    }
+  }
+
+  /**
+   * Check for tasks to be speculated and return true if there are any. This is called periodically
+   * by the ClusterScheduler.
+   *
+   * TODO: To make this scale to large jobs, we need to maintain a list of running tasks, so that
+   * we don't scan the whole task set. It might also help to make this sorted by launch time.
+   */
+  override def checkSpeculatableTasks(): Boolean = {
+    // Can't speculate if we only have one task, or if all tasks have finished.
+    if (numTasks == 1 || tasksFinished == numTasks) {
+      return false
+    }
+    var foundTasks = false
+    val minFinishedForSpeculation = (SPECULATION_QUANTILE * numTasks).floor.toInt
+    logDebug("Checking for speculative tasks: minFinished = " + minFinishedForSpeculation)
+    if (tasksFinished >= minFinishedForSpeculation) {
+      val time = clock.getTime()
+      val durations = taskInfos.values.filter(_.successful).map(_.duration).toArray
+      Arrays.sort(durations)
+      val medianDuration = durations(min((0.5 * numTasks).round.toInt, durations.size - 1))
+      val threshold = max(SPECULATION_MULTIPLIER * medianDuration, 100)
+      // TODO: Threshold should also look at standard deviation of task durations and have a lower
+      // bound based on that.
+      logDebug("Task length threshold for speculation: " + threshold)
+      for ((tid, info) <- taskInfos) {
+        val index = info.index
+        if (!finished(index) && copiesRunning(index) == 1 && info.timeRunning(time) > threshold &&
+          !speculatableTasks.contains(index)) {
+          logInfo(
+            "Marking task %s:%d (on %s) as speculatable because it ran more than %.0f ms".format(
+              taskSet.id, index, info.host, threshold))
+          speculatableTasks += index
+          foundTasks = true
+        }
+      }
+    }
+    return foundTasks
+  }
+
+  override def hasPendingTasks(): Boolean = {
+    numTasks > 0 && tasksFinished < numTasks
+  }
+
+  private def getLocalityWait(level: TaskLocality.TaskLocality): Long = {
+    val defaultWait = System.getProperty("spark.locality.wait", "3000")
+    level match {
+      case TaskLocality.PROCESS_LOCAL =>
+        System.getProperty("spark.locality.wait.process", defaultWait).toLong
+      case TaskLocality.NODE_LOCAL =>
+        System.getProperty("spark.locality.wait.node", defaultWait).toLong
+      case TaskLocality.RACK_LOCAL =>
+        System.getProperty("spark.locality.wait.rack", defaultWait).toLong
+      case TaskLocality.ANY =>
+        0L
+    }
+  }
+
+  /**
+   * Compute the locality levels used in this TaskSet. Assumes that all tasks have already been
+   * added to queues using addPendingTask.
+   */
+  private def computeValidLocalityLevels(): Array[TaskLocality.TaskLocality] = {
+    import TaskLocality.{PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY}
+    val levels = new ArrayBuffer[TaskLocality.TaskLocality]
+    if (!pendingTasksForExecutor.isEmpty && getLocalityWait(PROCESS_LOCAL) != 0) {
+      levels += PROCESS_LOCAL
+    }
+    if (!pendingTasksForHost.isEmpty && getLocalityWait(NODE_LOCAL) != 0) {
+      levels += NODE_LOCAL
+    }
+    if (!pendingTasksForRack.isEmpty && getLocalityWait(RACK_LOCAL) != 0) {
+      levels += RACK_LOCAL
+    }
+    levels += ANY
+    logDebug("Valid locality levels for " + taskSet + ": " + levels.mkString(", "))
+    levels.toArray
+  }
+}
diff --git a/core/src/main/scala/spark/scheduler/cluster/ExecutorLossReason.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorLossReason.scala
index 8825f2dd24..5077b2b48b 100644
--- a/core/src/main/scala/spark/scheduler/cluster/ExecutorLossReason.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorLossReason.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
 
-import spark.executor.ExecutorExitCode
+import org.apache.spark.executor.ExecutorExitCode
 
 /**
  * Represents an explanation for a executor or whole slave failing or exiting.
diff --git a/core/src/main/scala/spark/scheduler/cluster/Pool.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/Pool.scala
index 83708f07e1..35b32600da 100644
--- a/core/src/main/scala/spark/scheduler/cluster/Pool.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/Pool.scala
@@ -15,13 +15,13 @@
  * limitations under the License.
  */
 
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
 
-import spark.Logging
-import spark.scheduler.cluster.SchedulingMode.SchedulingMode
+import org.apache.spark.Logging
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
 
 /**
  * An Schedulable entity that represent collection of Pools or TaskSetManagers
diff --git a/core/src/main/scala/spark/scheduler/cluster/Schedulable.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/Schedulable.scala
index f557b142c4..f4726450ec 100644
--- a/core/src/main/scala/spark/scheduler/cluster/Schedulable.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/Schedulable.scala
@@ -15,16 +15,20 @@
  * limitations under the License.
  */
 
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
 
-import scala.collection.mutable.ArrayBuffer
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
 
+import scala.collection.mutable.ArrayBuffer
 /**
  * An interface for schedulable entities.
  * there are two type of Schedulable entities(Pools and TaskSetManagers)
  */
 private[spark] trait Schedulable {
   var parent: Schedulable
+  // child queues
+  def schedulableQueue: ArrayBuffer[Schedulable]
+  def schedulingMode: SchedulingMode
   def weight: Int
   def minShare: Int
   def runningTasks: Int
diff --git a/core/src/main/scala/spark/scheduler/cluster/SchedulableBuilder.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulableBuilder.scala
index 95554023c0..d04eeb6b98 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SchedulableBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulableBuilder.scala
@@ -15,21 +15,16 @@
  * limitations under the License.
  */
 
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
 
-import java.io.{File, FileInputStream, FileOutputStream}
+import java.io.{File, FileInputStream, FileOutputStream, FileNotFoundException}
+import java.util.Properties
 
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
-import scala.collection.mutable.HashSet
-import scala.util.control.Breaks._
-import scala.xml._
+import scala.xml.XML
 
-import spark.Logging
-import spark.scheduler.cluster.SchedulingMode.SchedulingMode
+import org.apache.spark.Logging
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
 
-import java.util.Properties
 
 /**
  * An interface to build Schedulable tree
@@ -41,10 +36,11 @@ private[spark] trait SchedulableBuilder {
   def addTaskSetManager(manager: Schedulable, properties: Properties)
 }
 
-private[spark] class FIFOSchedulableBuilder(val rootPool: Pool) extends SchedulableBuilder with Logging {
+private[spark] class FIFOSchedulableBuilder(val rootPool: Pool)
+  extends SchedulableBuilder with Logging {
 
   override def buildPools() {
-    //nothing
+    // nothing
   }
 
   override def addTaskSetManager(manager: Schedulable, properties: Properties) {
@@ -52,9 +48,10 @@ private[spark] class FIFOSchedulableBuilder(val rootPool: Pool) extends Schedula
   }
 }
 
-private[spark] class FairSchedulableBuilder(val rootPool: Pool) extends SchedulableBuilder with Logging {
+private[spark] class FairSchedulableBuilder(val rootPool: Pool)
+  extends SchedulableBuilder with Logging {
 
-  val schedulerAllocFile = System.getProperty("spark.fairscheduler.allocation.file","unspecified")
+  val schedulerAllocFile = System.getProperty("spark.fairscheduler.allocation.file")
   val FAIR_SCHEDULER_PROPERTIES = "spark.scheduler.cluster.fair.pool"
   val DEFAULT_POOL_NAME = "default"
   val MINIMUM_SHARES_PROPERTY = "minShare"
@@ -67,47 +64,53 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool) extends Schedula
   val DEFAULT_WEIGHT = 1
 
   override def buildPools() {
+    if (schedulerAllocFile != null) {
     val file = new File(schedulerAllocFile)
-    if (file.exists()) {
-      val xml = XML.loadFile(file)
-      for (poolNode <- (xml \\ POOLS_PROPERTY)) {
-
-        val poolName = (poolNode \ POOL_NAME_PROPERTY).text
-        var schedulingMode = DEFAULT_SCHEDULING_MODE
-        var minShare = DEFAULT_MINIMUM_SHARE
-        var weight = DEFAULT_WEIGHT
-
-        val xmlSchedulingMode = (poolNode \ SCHEDULING_MODE_PROPERTY).text
-        if (xmlSchedulingMode != "") {
-          try {
-            schedulingMode = SchedulingMode.withName(xmlSchedulingMode)
-          } catch {
-            case e: Exception => logInfo("Error xml schedulingMode, using default schedulingMode")
+      if (file.exists()) {
+        val xml = XML.loadFile(file)
+        for (poolNode <- (xml \\ POOLS_PROPERTY)) {
+
+          val poolName = (poolNode \ POOL_NAME_PROPERTY).text
+          var schedulingMode = DEFAULT_SCHEDULING_MODE
+          var minShare = DEFAULT_MINIMUM_SHARE
+          var weight = DEFAULT_WEIGHT
+
+          val xmlSchedulingMode = (poolNode \ SCHEDULING_MODE_PROPERTY).text
+          if (xmlSchedulingMode != "") {
+            try {
+              schedulingMode = SchedulingMode.withName(xmlSchedulingMode)
+            } catch {
+              case e: Exception => logInfo("Error xml schedulingMode, using default schedulingMode")
+            }
           }
-        }
 
-        val xmlMinShare = (poolNode \ MINIMUM_SHARES_PROPERTY).text
-        if (xmlMinShare != "") {
-          minShare = xmlMinShare.toInt
-        }
+          val xmlMinShare = (poolNode \ MINIMUM_SHARES_PROPERTY).text
+          if (xmlMinShare != "") {
+            minShare = xmlMinShare.toInt
+          }
 
-        val xmlWeight = (poolNode \ WEIGHT_PROPERTY).text
-        if (xmlWeight != "") {
-          weight = xmlWeight.toInt
-        }
+          val xmlWeight = (poolNode \ WEIGHT_PROPERTY).text
+          if (xmlWeight != "") {
+            weight = xmlWeight.toInt
+          }
 
-        val pool = new Pool(poolName, schedulingMode, minShare, weight)
-        rootPool.addSchedulable(pool)
-        logInfo("Create new pool with name:%s,schedulingMode:%s,minShare:%d,weight:%d".format(
-          poolName, schedulingMode, minShare, weight))
+          val pool = new Pool(poolName, schedulingMode, minShare, weight)
+          rootPool.addSchedulable(pool)
+          logInfo("Created pool %s, schedulingMode: %s, minShare: %d, weight: %d".format(
+            poolName, schedulingMode, minShare, weight))
+        }
+      } else {
+        throw new java.io.FileNotFoundException(
+          "Fair scheduler allocation file not found: " + schedulerAllocFile)
       }
     }
 
-    //finally create "default" pool
+    // finally create "default" pool
     if (rootPool.getSchedulableByName(DEFAULT_POOL_NAME) == null) {
-      val pool = new Pool(DEFAULT_POOL_NAME, DEFAULT_SCHEDULING_MODE, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)
+      val pool = new Pool(DEFAULT_POOL_NAME, DEFAULT_SCHEDULING_MODE,
+        DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)
       rootPool.addSchedulable(pool)
-      logInfo("Create default pool with name:%s,schedulingMode:%s,minShare:%d,weight:%d".format(
+      logInfo("Created default pool %s, schedulingMode: %s, minShare: %d, weight: %d".format(
         DEFAULT_POOL_NAME, DEFAULT_SCHEDULING_MODE, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT))
     }
   }
@@ -119,10 +122,12 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool) extends Schedula
       poolName = properties.getProperty(FAIR_SCHEDULER_PROPERTIES, DEFAULT_POOL_NAME)
       parentPool = rootPool.getSchedulableByName(poolName)
       if (parentPool == null) {
-        //we will create a new pool that user has configured in app instead of being defined in xml file
-        parentPool = new Pool(poolName,DEFAULT_SCHEDULING_MODE, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)
+        // we will create a new pool that user has configured in app
+        // instead of being defined in xml file
+        parentPool = new Pool(poolName, DEFAULT_SCHEDULING_MODE,
+          DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT)
         rootPool.addSchedulable(parentPool)
-        logInfo("Create pool with name:%s,schedulingMode:%s,minShare:%d,weight:%d".format(
+        logInfo("Created pool %s, schedulingMode: %s, minShare: %d, weight: %d".format(
           poolName, DEFAULT_SCHEDULING_MODE, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT))
       }
     }
diff --git a/core/src/main/scala/spark/scheduler/cluster/SchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackend.scala
index 4431744ec3..d57eb3276f 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulerBackend.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
 
-import spark.{SparkContext, Utils}
+import org.apache.spark.{SparkContext}
 
 /**
  * A backend interface for cluster scheduling systems that allows plugging in different ones under
diff --git a/core/src/main/scala/spark/scheduler/cluster/SchedulingAlgorithm.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingAlgorithm.scala
index 69e0ac2a6b..cbeed4731a 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SchedulingAlgorithm.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingAlgorithm.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
 
 /**
  * An interface for sort algorithm
diff --git a/core/src/main/scala/spark/SoftReferenceCache.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingMode.scala
index f41a379582..34811389a0 100644
--- a/core/src/main/scala/spark/SoftReferenceCache.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SchedulingMode.scala
@@ -15,21 +15,15 @@
  * limitations under the License.
  */
 
-package spark
-
-import com.google.common.collect.MapMaker
+package org.apache.spark.scheduler.cluster
 
 /**
- * An implementation of Cache that uses soft references.
+ *  "FAIR" and "FIFO" determines which policy is used
+ *    to order tasks amongst a Schedulable's sub-queues
+ *  "NONE" is used when the a Schedulable has no sub-queues.
  */
-private[spark] class SoftReferenceCache extends Cache {
-  val map = new MapMaker().softValues().makeMap[Any, Any]()
-
-  override def get(datasetId: Any, partition: Int): Any =
-    map.get((datasetId, partition))
+object SchedulingMode extends Enumeration("FAIR", "FIFO", "NONE") {
 
-  override def put(datasetId: Any, partition: Int, value: Any): CachePutResponse = {
-    map.put((datasetId, partition), value)
-    return CachePutSuccess(0)
-  }
+  type SchedulingMode = Value
+  val FAIR,FIFO,NONE = Value
 }
diff --git a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
index 55d6c0a47e..d003bf1bba 100644
--- a/core/src/main/scala/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala
@@ -15,12 +15,13 @@
  * limitations under the License.
  */
 
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
 
-import spark.{Utils, Logging, SparkContext}
-import spark.deploy.client.{Client, ClientListener}
-import spark.deploy.{Command, ApplicationDescription}
+import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.deploy.client.{Client, ClientListener}
+import org.apache.spark.deploy.{Command, ApplicationDescription}
 import scala.collection.mutable.HashMap
+import org.apache.spark.util.Utils
 
 private[spark] class SparkDeploySchedulerBackend(
     scheduler: ClusterScheduler,
@@ -45,9 +46,9 @@ private[spark] class SparkDeploySchedulerBackend(
       System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"),
       StandaloneSchedulerBackend.ACTOR_NAME)
     val args = Seq(driverUrl, "{{EXECUTOR_ID}}", "{{HOSTNAME}}", "{{CORES}}")
-    val command = Command("spark.executor.StandaloneExecutorBackend", args, sc.executorEnvs)
-    val sparkHome = sc.getSparkHome().getOrElse(
-      throw new IllegalArgumentException("must supply spark home for spark standalone"))
+    val command = Command(
+      "org.apache.spark.executor.StandaloneExecutorBackend", args, sc.executorEnvs)
+    val sparkHome = sc.getSparkHome().getOrElse(null)
     val appDesc = new ApplicationDescription(appName, maxCores, executorMemory, command, sparkHome,
         sc.ui.appUIAddress)
 
@@ -77,7 +78,7 @@ private[spark] class SparkDeploySchedulerBackend(
 
   override def executorAdded(executorId: String, workerId: String, hostPort: String, cores: Int, memory: Int) {
     logInfo("Granted executor ID %s on hostPort %s with %d cores, %s RAM".format(
-       executorId, hostPort, cores, Utils.memoryMegabytesToString(memory)))
+       executorId, hostPort, cores, Utils.megabytesToString(memory)))
   }
 
   override def executorRemoved(executorId: String, message: String, exitStatus: Option[Int]) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneClusterMessage.scala
new file mode 100644
index 0000000000..9c36d221f6
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneClusterMessage.scala
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import java.nio.ByteBuffer
+
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.util.{Utils, SerializableBuffer}
+
+
+private[spark] sealed trait StandaloneClusterMessage extends Serializable
+
+private[spark] object StandaloneClusterMessages {
+
+  // Driver to executors
+  case class LaunchTask(task: TaskDescription) extends StandaloneClusterMessage
+
+  case class RegisteredExecutor(sparkProperties: Seq[(String, String)])
+    extends StandaloneClusterMessage
+
+  case class RegisterExecutorFailed(message: String) extends StandaloneClusterMessage
+
+  // Executors to driver
+  case class RegisterExecutor(executorId: String, hostPort: String, cores: Int)
+    extends StandaloneClusterMessage {
+    Utils.checkHostPort(hostPort, "Expected host port")
+  }
+
+  case class StatusUpdate(executorId: String, taskId: Long, state: TaskState,
+    data: SerializableBuffer) extends StandaloneClusterMessage
+
+  object StatusUpdate {
+    /** Alternate factory method that takes a ByteBuffer directly for the data field */
+    def apply(executorId: String, taskId: Long, state: TaskState, data: ByteBuffer)
+      : StatusUpdate = {
+      StatusUpdate(executorId, taskId, state, new SerializableBuffer(data))
+    }
+  }
+
+  // Internal messages in driver
+  case object ReviveOffers extends StandaloneClusterMessage
+
+  case object StopDriver extends StandaloneClusterMessage
+
+  case class RemoveExecutor(executorId: String, reason: String) extends StandaloneClusterMessage
+
+}
diff --git a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 03a64e0192..b4ea0be415 100644
--- a/core/src/main/scala/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -15,19 +15,22 @@
  * limitations under the License.
  */
 
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
+
+import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 
 import akka.actor._
-import akka.util.duration._
+import akka.dispatch.Await
 import akka.pattern.ask
+import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClientLifeCycleEvent}
 import akka.util.Duration
+import akka.util.duration._
 
-import spark.{Utils, SparkException, Logging, TaskState}
-import akka.dispatch.Await
-import java.util.concurrent.atomic.AtomicInteger
-import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClientLifeCycleEvent}
+import org.apache.spark.{SparkException, Logging, TaskState}
+import org.apache.spark.scheduler.cluster.StandaloneClusterMessages._
+import org.apache.spark.util.Utils
 
 /**
  * A standalone scheduler backend, which waits for standalone executors to connect to it through
@@ -36,15 +39,15 @@ import akka.remote.{RemoteClientShutdown, RemoteClientDisconnected, RemoteClient
  */
 private[spark]
 class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: ActorSystem)
-  extends SchedulerBackend with Logging {
-
+  extends SchedulerBackend with Logging
+{
   // Use an atomic variable to track total number of cores in the cluster for simplicity and speed
   var totalCoreCount = new AtomicInteger(0)
 
   class DriverActor(sparkProperties: Seq[(String, String)]) extends Actor {
     private val executorActor = new HashMap[String, ActorRef]
     private val executorAddress = new HashMap[String, Address]
-    private val executorHostPort = new HashMap[String, String]
+    private val executorHost = new HashMap[String, String]
     private val freeCores = new HashMap[String, Int]
     private val actorToExecutorId = new HashMap[ActorRef, String]
     private val addressToExecutorId = new HashMap[Address, String]
@@ -52,6 +55,10 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
     override def preStart() {
       // Listen for remote client disconnection events, since they don't go through Akka's watch()
       context.system.eventStream.subscribe(self, classOf[RemoteClientLifeCycleEvent])
+
+      // Periodically revive offers to allow delay scheduling to work
+      val reviveInterval = System.getProperty("spark.scheduler.revive.interval", "1000").toLong
+      context.system.scheduler.schedule(0.millis, reviveInterval.millis, self, ReviveOffers)
     }
 
     def receive = {
@@ -64,7 +71,7 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
           sender ! RegisteredExecutor(sparkProperties)
           context.watch(sender)
           executorActor(executorId) = sender
-          executorHostPort(executorId) = hostPort
+          executorHost(executorId) = Utils.parseHostPort(hostPort)._1
           freeCores(executorId) = cores
           executorAddress(executorId) = sender.path.address
           actorToExecutorId(sender) = executorId
@@ -104,13 +111,13 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
     // Make fake resource offers on all executors
     def makeOffers() {
       launchTasks(scheduler.resourceOffers(
-        executorHostPort.toArray.map {case (id, hostPort) => new WorkerOffer(id, hostPort, freeCores(id))}))
+        executorHost.toArray.map {case (id, host) => new WorkerOffer(id, host, freeCores(id))}))
     }
 
     // Make fake resource offers on just one executor
     def makeOffers(executorId: String) {
       launchTasks(scheduler.resourceOffers(
-        Seq(new WorkerOffer(executorId, executorHostPort(executorId), freeCores(executorId)))))
+        Seq(new WorkerOffer(executorId, executorHost(executorId), freeCores(executorId)))))
     }
 
     // Launch tasks returned by a set of resource offers
@@ -129,9 +136,8 @@ class StandaloneSchedulerBackend(scheduler: ClusterScheduler, actorSystem: Actor
         actorToExecutorId -= executorActor(executorId)
         addressToExecutorId -= executorAddress(executorId)
         executorActor -= executorId
-        executorHostPort -= executorId
+        executorHost -= executorId
         freeCores -= executorId
-        executorHostPort -= executorId
         totalCoreCount.addAndGet(-numCores)
         scheduler.executorLost(executorId, SlaveLost(reason))
       }
diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskDescription.scala
index 761fdf6919..309ac2f6c9 100644
--- a/core/src/main/scala/spark/scheduler/cluster/TaskDescription.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskDescription.scala
@@ -15,15 +15,16 @@
  * limitations under the License.
  */
 
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
 
 import java.nio.ByteBuffer
-import spark.util.SerializableBuffer
+import org.apache.spark.util.SerializableBuffer
 
 private[spark] class TaskDescription(
     val taskId: Long,
     val executorId: String,
     val name: String,
+    val index: Int,    // Index within this task's TaskSet
     _serializedTask: ByteBuffer)
   extends Serializable {
 
@@ -31,4 +32,6 @@ private[spark] class TaskDescription(
   private val buffer = new SerializableBuffer(_serializedTask)
 
   def serializedTask: ByteBuffer = buffer.value
+
+  override def toString: String = "TaskDescription(TID=%d, index=%d)".format(taskId, index)
 }
diff --git a/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskInfo.scala
index a1ebd48b01..9685fb1a67 100644
--- a/core/src/main/scala/spark/scheduler/cluster/TaskInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskInfo.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
 
-import spark.Utils
+import org.apache.spark.util.Utils
 
 /**
  * Information about a running task attempt inside a TaskSet.
@@ -28,11 +28,9 @@ class TaskInfo(
     val index: Int,
     val launchTime: Long,
     val executorId: String,
-    val hostPort: String,
+    val host: String,
     val taskLocality: TaskLocality.TaskLocality) {
 
-  Utils.checkHostPort(hostPort, "Expected hostport")
-
   var finishTime: Long = 0
   var failed = false
 
@@ -51,6 +49,17 @@ class TaskInfo(
 
   def running: Boolean = !finished
 
+  def status: String = {
+    if (running)
+      "RUNNING"
+    else if (failed)
+      "FAILED"
+    else if (successful)
+      "SUCCESS"
+    else
+      "UNKNOWN"
+  }
+
   def duration: Long = {
     if (!finished) {
       throw new UnsupportedOperationException("duration() called on unfinished tasks")
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskLocality.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskLocality.scala
new file mode 100644
index 0000000000..5d4130e14a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskLocality.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+
+private[spark] object TaskLocality
+  extends Enumeration("PROCESS_LOCAL", "NODE_LOCAL", "RACK_LOCAL", "ANY")
+{
+  // process local is expected to be used ONLY within tasksetmanager for now.
+  val PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY = Value
+
+  type TaskLocality = Value
+
+  def isAllowed(constraint: TaskLocality, condition: TaskLocality): Boolean = {
+    condition <= constraint
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskSetManager.scala
new file mode 100644
index 0000000000..648a3ef922
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/TaskSetManager.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.scheduler.cluster
+
+import java.nio.ByteBuffer
+
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.scheduler.TaskSet
+
+/**
+ * Tracks and schedules the tasks within a single TaskSet. This class keeps track of the status of
+ * each task and is responsible for retries on failure and locality. The main interfaces to it
+ * are resourceOffer, which asks the TaskSet whether it wants to run a task on one node, and
+ * statusUpdate, which tells it that one of its tasks changed state (e.g. finished).
+ *
+ * THREADING: This class is designed to only be called from code with a lock on the TaskScheduler
+ * (e.g. its event handlers). It should not be called from other threads.
+ */
+private[spark] trait TaskSetManager extends Schedulable {
+  def schedulableQueue = null
+  
+  def schedulingMode = SchedulingMode.NONE
+  
+  def taskSet: TaskSet
+
+  def resourceOffer(
+      execId: String,
+      host: String,
+      availableCpus: Int,
+      maxLocality: TaskLocality.TaskLocality)
+    : Option[TaskDescription]
+
+  def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer)
+
+  def error(message: String)
+}
diff --git a/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/WorkerOffer.scala
index 06d1203f70..938f62883a 100644
--- a/core/src/main/scala/spark/scheduler/cluster/WorkerOffer.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/WorkerOffer.scala
@@ -15,11 +15,10 @@
  * limitations under the License.
  */
 
-package spark.scheduler.cluster
+package org.apache.spark.scheduler.cluster
 
 /**
  * Represents free resources available on an executor.
  */
 private[spark]
-class WorkerOffer(val executorId: String, val hostPort: String, val cores: Int) {
-}
+class WorkerOffer(val executorId: String, val host: String, val cores: Int)
diff --git a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalScheduler.scala
index 1f73cb99a7..e8fa5e2f17 100644
--- a/core/src/main/scala/spark/scheduler/local/LocalScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalScheduler.scala
@@ -15,21 +15,26 @@
  * limitations under the License.
  */
 
-package spark.scheduler.local
+package org.apache.spark.scheduler.local
 
 import java.io.File
+import java.lang.management.ManagementFactory
 import java.util.concurrent.atomic.AtomicInteger
 import java.nio.ByteBuffer
+
+import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
 import scala.collection.mutable.HashSet
 
-import spark._
-import spark.TaskState.TaskState
-import spark.executor.ExecutorURLClassLoader
-import spark.scheduler._
-import spark.scheduler.cluster._
+import org.apache.spark._
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.executor.ExecutorURLClassLoader
+import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster._
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
 import akka.actor._
+import org.apache.spark.util.Utils
 
 /**
  * A FIFO or Fair TaskScheduler implementation that runs tasks locally in a thread pool. Optionally
@@ -37,10 +42,15 @@ import akka.actor._
  * testing fault recovery.
  */
 
-private[spark] case class LocalReviveOffers()
-private[spark] case class LocalStatusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer)
+private[spark]
+case class LocalReviveOffers()
+
+private[spark]
+case class LocalStatusUpdate(taskId: Long, state: TaskState, serializedData: ByteBuffer)
+
+private[spark]
+class LocalActor(localScheduler: LocalScheduler, var freeCores: Int) extends Actor with Logging {
 
-private[spark] class LocalActor(localScheduler: LocalScheduler, var freeCores: Int) extends Actor with Logging {
   def receive = {
     case LocalReviveOffers =>
       launchTask(localScheduler.resourceOffer(freeCores))
@@ -55,7 +65,7 @@ private[spark] class LocalActor(localScheduler: LocalScheduler, var freeCores: I
       freeCores -= 1
       localScheduler.threadPool.submit(new Runnable {
         def run() {
-          localScheduler.runTask(task.taskId,task.serializedTask)
+          localScheduler.runTask(task.taskId, task.serializedTask)
         }
       })
     }
@@ -80,6 +90,8 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
 
   var schedulableBuilder: SchedulableBuilder = null
   var rootPool: Pool = null
+  val schedulingMode: SchedulingMode = SchedulingMode.withName(
+    System.getProperty("spark.cluster.schedulingmode", "FIFO"))
   val activeTaskSets = new HashMap[String, TaskSetManager]
   val taskIdToTaskSetId = new HashMap[Long, String]
   val taskSetTaskIds = new HashMap[String, HashSet[Long]]
@@ -87,15 +99,13 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
   var localActor: ActorRef = null
 
   override def start() {
-    //default scheduler is FIFO
-    val schedulingMode = System.getProperty("spark.cluster.schedulingmode", "FIFO")
-    //temporarily set rootPool name to empty
-    rootPool = new Pool("", SchedulingMode.withName(schedulingMode), 0, 0)
+    // temporarily set rootPool name to empty
+    rootPool = new Pool("", schedulingMode, 0, 0)
     schedulableBuilder = {
       schedulingMode match {
-        case "FIFO" =>
+        case SchedulingMode.FIFO =>
           new FIFOSchedulableBuilder(rootPool)
-        case "FAIR" =>
+        case SchedulingMode.FAIR =>
           new FairSchedulableBuilder(rootPool)
       }
     }
@@ -110,7 +120,7 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
 
   override def submitTasks(taskSet: TaskSet) {
     synchronized {
-      var manager = new LocalTaskSetManager(this, taskSet)
+      val manager = new LocalTaskSetManager(this, taskSet)
       schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)
       activeTaskSets(taskSet.id) = manager
       taskSetTaskIds(taskSet.id) = new HashSet[Long]()
@@ -124,14 +134,15 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
       val tasks = new ArrayBuffer[TaskDescription](freeCores)
       val sortedTaskSetQueue = rootPool.getSortedTaskSetQueue()
       for (manager <- sortedTaskSetQueue) {
-        logDebug("parentName:%s,name:%s,runningTasks:%s".format(manager.parent.name, manager.name, manager.runningTasks))
+        logDebug("parentName:%s,name:%s,runningTasks:%s".format(
+          manager.parent.name, manager.name, manager.runningTasks))
       }
 
       var launchTask = false
       for (manager <- sortedTaskSetQueue) {
         do {
           launchTask = false
-          manager.slaveOffer(null,null,freeCpuCores) match {
+          manager.resourceOffer(null, null, freeCpuCores, null) match {
             case Some(task) =>
               tasks += task
               taskIdToTaskSetId(task.taskId) = manager.taskSet.id
@@ -139,7 +150,7 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
               freeCpuCores -= 1
               launchTask = true
             case None => {}
-            }
+          }
         } while(launchTask)
       }
       return tasks
@@ -162,9 +173,13 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
     // Set the Spark execution environment for the worker thread
     SparkEnv.set(env)
     val ser = SparkEnv.get.closureSerializer.newInstance()
-    var attemptedTask: Option[Task[_]] = None
+    val objectSer = SparkEnv.get.serializer.newInstance()
+    var attemptedTask: Option[Task[_]] = None   
     val start = System.currentTimeMillis()
     var taskStart: Long = 0
+    def getTotalGCTime = ManagementFactory.getGarbageCollectorMXBeans.map(g => g.getCollectionTime).sum
+    val startGCTime = getTotalGCTime
+
     try {
       Accumulators.clear()
       Thread.currentThread().setContextClassLoader(classLoader)
@@ -186,14 +201,15 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
       // executor does. This is useful to catch serialization errors early
       // on in development (so when users move their local Spark programs
       // to the cluster, they don't get surprised by serialization errors).
-      val serResult = ser.serialize(result)
+      val serResult = objectSer.serialize(result)
       deserializedTask.metrics.get.resultSize = serResult.limit()
-      val resultToReturn = ser.deserialize[Any](serResult)
+      val resultToReturn = objectSer.deserialize[Any](serResult)
       val accumUpdates = ser.deserialize[collection.mutable.Map[Long, Any]](
         ser.serialize(Accumulators.values))
       val serviceTime = System.currentTimeMillis() - taskStart
       logInfo("Finished " + taskId)
       deserializedTask.metrics.get.executorRunTime = serviceTime.toInt
+      deserializedTask.metrics.get.jvmGCTime = getTotalGCTime - startGCTime
       deserializedTask.metrics.get.executorDeserializeTime = deserTime.toInt
       val taskResult = new TaskResult(result, accumUpdates, deserializedTask.metrics.getOrElse(null))
       val serializedResult = ser.serialize(taskResult)
@@ -202,7 +218,10 @@ private[spark] class LocalScheduler(threads: Int, val maxFailures: Int, val sc:
       case t: Throwable => {
         val serviceTime = System.currentTimeMillis() - taskStart
         val metrics = attemptedTask.flatMap(t => t.metrics)
-        metrics.foreach{m => m.executorRunTime = serviceTime.toInt}
+        for (m <- metrics) {
+          m.executorRunTime = serviceTime.toInt
+          m.jvmGCTime = getTotalGCTime - startGCTime
+        }
         val failure = new ExceptionFailure(t.getClass.getName, t.toString, t.getStackTrace, metrics)
         localActor ! LocalStatusUpdate(taskId, TaskState.FAILED, ser.serialize(failure))
       }
diff --git a/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/local/LocalTaskSetManager.scala
index e662ad6709..e52cb998bd 100644
--- a/core/src/main/scala/spark/scheduler/local/LocalTaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/local/LocalTaskSetManager.scala
@@ -15,83 +15,79 @@
  * limitations under the License.
  */
 
-package spark.scheduler.local
+package org.apache.spark.scheduler.local
 
-import java.io.File
-import java.util.concurrent.atomic.AtomicInteger
 import java.nio.ByteBuffer
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
-import scala.collection.mutable.HashSet
 
-import spark._
-import spark.TaskState.TaskState
-import spark.scheduler._
-import spark.scheduler.cluster._
+import org.apache.spark.{ExceptionFailure, Logging, SparkEnv, Success, TaskState}
+import org.apache.spark.TaskState.TaskState
+import org.apache.spark.scheduler.{Task, TaskResult, TaskSet}
+import org.apache.spark.scheduler.cluster.{Schedulable, TaskDescription, TaskInfo, TaskLocality, TaskSetManager}
+
+
+private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: TaskSet)
+  extends TaskSetManager with Logging {
 
-private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: TaskSet) extends TaskSetManager with Logging {
   var parent: Schedulable = null
   var weight: Int = 1
   var minShare: Int = 0
   var runningTasks: Int = 0
   var priority: Int = taskSet.priority
   var stageId: Int = taskSet.stageId
-  var name: String = "TaskSet_"+taskSet.stageId.toString
-
+  var name: String = "TaskSet_" + taskSet.stageId.toString
 
   var failCount = new Array[Int](taskSet.tasks.size)
   val taskInfos = new HashMap[Long, TaskInfo]
   val numTasks = taskSet.tasks.size
   var numFinished = 0
-  val ser = SparkEnv.get.closureSerializer.newInstance()
+  val env = SparkEnv.get
+  val ser = env.closureSerializer.newInstance()
   val copiesRunning = new Array[Int](numTasks)
   val finished = new Array[Boolean](numTasks)
   val numFailures = new Array[Int](numTasks)
   val MAX_TASK_FAILURES = sched.maxFailures
 
-  def increaseRunningTasks(taskNum: Int): Unit = {
-     runningTasks += taskNum
-     if (parent != null) {
-       parent.increaseRunningTasks(taskNum)
-     }
+  override def increaseRunningTasks(taskNum: Int): Unit = {
+    runningTasks += taskNum
+    if (parent != null) {
+     parent.increaseRunningTasks(taskNum)
+    }
   }
 
-  def decreaseRunningTasks(taskNum: Int): Unit = {
+  override def decreaseRunningTasks(taskNum: Int): Unit = {
     runningTasks -= taskNum
     if (parent != null) {
       parent.decreaseRunningTasks(taskNum)
     }
   }
 
-  def addSchedulable(schedulable: Schedulable): Unit = {
-    //nothing
+  override def addSchedulable(schedulable: Schedulable): Unit = {
+    // nothing
   }
 
-  def removeSchedulable(schedulable: Schedulable): Unit = {
-    //nothing
+  override def removeSchedulable(schedulable: Schedulable): Unit = {
+    // nothing
   }
 
-  def getSchedulableByName(name: String): Schedulable = {
+  override def getSchedulableByName(name: String): Schedulable = {
     return null
   }
 
-  def executorLost(executorId: String, host: String): Unit = {
-    //nothing
+  override def executorLost(executorId: String, host: String): Unit = {
+    // nothing
   }
 
-  def checkSpeculatableTasks(): Boolean = {
-    return true
-  }
+  override def checkSpeculatableTasks() = true
 
-  def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
+  override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
     var sortedTaskSetQueue = new ArrayBuffer[TaskSetManager]
     sortedTaskSetQueue += this
     return sortedTaskSetQueue
   }
 
-  def hasPendingTasks(): Boolean = {
-    return true
-  }
+  override def hasPendingTasks() = true
 
   def findTask(): Option[Int] = {
     for (i <- 0 to numTasks-1) {
@@ -102,41 +98,42 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas
     return None
   }
 
-  def slaveOffer(execId: String, hostPort: String, availableCpus: Double, overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription] = {
+  override def resourceOffer(
+      execId: String,
+      host: String,
+      availableCpus: Int,
+      maxLocality: TaskLocality.TaskLocality)
+    : Option[TaskDescription] =
+  {
     SparkEnv.set(sched.env)
-    logDebug("availableCpus:%d,numFinished:%d,numTasks:%d".format(availableCpus.toInt, numFinished, numTasks))
+    logDebug("availableCpus:%d, numFinished:%d, numTasks:%d".format(
+      availableCpus.toInt, numFinished, numTasks))
     if (availableCpus > 0 && numFinished < numTasks) {
       findTask() match {
         case Some(index) =>
           val taskId = sched.attemptId.getAndIncrement()
           val task = taskSet.tasks(index)
-          val info = new TaskInfo(taskId, index, System.currentTimeMillis(), "local", "local:1", TaskLocality.NODE_LOCAL)
+          val info = new TaskInfo(taskId, index, System.currentTimeMillis(), "local", "local:1",
+            TaskLocality.NODE_LOCAL)
           taskInfos(taskId) = info
-          val bytes = Task.serializeWithDependencies(task, sched.sc.addedFiles, sched.sc.addedJars, ser)
+          // We rely on the DAGScheduler to catch non-serializable closures and RDDs, so in here
+          // we assume the task can be serialized without exceptions.
+          val bytes = Task.serializeWithDependencies(
+            task, sched.sc.addedFiles, sched.sc.addedJars, ser)
           logInfo("Size of task " + taskId + " is " + bytes.limit + " bytes")
           val taskName = "task %s:%d".format(taskSet.id, index)
           copiesRunning(index) += 1
           increaseRunningTasks(1)
-          return Some(new TaskDescription(taskId, null, taskName, bytes))
+          taskStarted(task, info)
+          return Some(new TaskDescription(taskId, null, taskName, index, bytes))
         case None => {}
       }
     }
     return None
   }
 
-  def numPendingTasksForHostPort(hostPort: String): Int = {
-    return 0
-  }
-
-  def numRackLocalPendingTasksForHost(hostPort :String): Int = {
-    return 0
-  }
-
-  def numPendingTasksForHost(hostPort: String): Int = {
-    return 0
-  }
-
-  def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+  override def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
+    SparkEnv.set(env)
     state match {
       case TaskState.FINISHED =>
         taskEnded(tid, state, serializedData)
@@ -146,6 +143,10 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas
     }
   }
 
+  def taskStarted(task: Task[_], info: TaskInfo) {
+    sched.listener.taskStarted(task, info)
+  }
+
   def taskEnded(tid: Long, state: TaskState, serializedData: ByteBuffer) {
     val info = taskInfos(tid)
     val index = info.index
@@ -168,15 +169,18 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas
     val task = taskSet.tasks(index)
     info.markFailed()
     decreaseRunningTasks(1)
-    val reason: ExceptionFailure = ser.deserialize[ExceptionFailure](serializedData, getClass.getClassLoader)
+    val reason: ExceptionFailure = ser.deserialize[ExceptionFailure](
+      serializedData, getClass.getClassLoader)
     sched.listener.taskEnded(task, reason, null, null, info, reason.metrics.getOrElse(null))
     if (!finished(index)) {
       copiesRunning(index) -= 1
       numFailures(index) += 1
       val locs = reason.stackTrace.map(loc => "\tat %s".format(loc.toString))
-      logInfo("Loss was due to %s\n%s\n%s".format(reason.className, reason.description, locs.mkString("\n")))
+      logInfo("Loss was due to %s\n%s\n%s".format(
+        reason.className, reason.description, locs.mkString("\n")))
       if (numFailures(index) > MAX_TASK_FAILURES) {
-        val errorMessage = "Task %s:%d failed more than %d times; aborting job %s".format(taskSet.id, index, 4, reason.description)
+        val errorMessage = "Task %s:%d failed more than %d times; aborting job %s".format(
+          taskSet.id, index, 4, reason.description)
         decreaseRunningTasks(runningTasks)
         sched.listener.taskSetFailed(taskSet, errorMessage)
         // need to delete failed Taskset from schedule queue
@@ -185,6 +189,6 @@ private[spark] class LocalTaskSetManager(sched: LocalScheduler, val taskSet: Tas
     }
   }
 
-  def error(message: String) {
+  override def error(message: String) {
   }
 }
diff --git a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
index 7bc6040544..3dbe61d706 100644
--- a/core/src/main/scala/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/mesos/CoarseMesosSchedulerBackend.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.scheduler.mesos
+package org.apache.spark.scheduler.mesos
 
 import com.google.protobuf.ByteString
 
@@ -23,14 +23,14 @@ import org.apache.mesos.{Scheduler => MScheduler}
 import org.apache.mesos._
 import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTaskState, _}
 
-import spark.{SparkException, Utils, Logging, SparkContext}
+import org.apache.spark.{SparkException, Logging, SparkContext}
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 import scala.collection.JavaConversions._
 import java.io.File
-import spark.scheduler.cluster._
+import org.apache.spark.scheduler.cluster._
 import java.util.{ArrayList => JArrayList, List => JList}
 import java.util.Collections
-import spark.TaskState
+import org.apache.spark.TaskState
 
 /**
  * A SchedulerBackend that runs tasks on Mesos, but uses "coarse-grained" tasks, where it holds
@@ -110,12 +110,6 @@ private[spark] class CoarseMesosSchedulerBackend(
   }
 
   def createCommand(offer: Offer, numCores: Int): CommandInfo = {
-    val runScript = new File(sparkHome, "run").getCanonicalPath
-    val driverUrl = "akka://spark@%s:%s/user/%s".format(
-      System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port"),
-      StandaloneSchedulerBackend.ACTOR_NAME)
-    val command = "\"%s\" spark.executor.StandaloneExecutorBackend %s %s %s %d".format(
-      runScript, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores)
     val environment = Environment.newBuilder()
     sc.executorEnvs.foreach { case (key, value) =>
       environment.addVariables(Environment.Variable.newBuilder()
@@ -123,7 +117,28 @@ private[spark] class CoarseMesosSchedulerBackend(
         .setValue(value)
         .build())
     }
-    return CommandInfo.newBuilder().setValue(command).setEnvironment(environment).build()
+    val command = CommandInfo.newBuilder()
+      .setEnvironment(environment)
+    val driverUrl = "akka://spark@%s:%s/user/%s".format(
+      System.getProperty("spark.driver.host"),
+      System.getProperty("spark.driver.port"),
+      StandaloneSchedulerBackend.ACTOR_NAME)
+    val uri = System.getProperty("spark.executor.uri")
+    if (uri == null) {
+      val runScript = new File(sparkHome, "spark-class").getCanonicalPath
+      command.setValue(
+        "\"%s\" org.apache.spark.executor.StandaloneExecutorBackend %s %s %s %d".format(
+          runScript, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores))
+    } else {
+      // Grab everything to the first '.'. We'll use that and '*' to
+      // glob the directory "correctly".
+      val basename = uri.split('/').last.split('.').head
+      command.setValue(
+        "cd %s*; ./spark-class org.apache.spark.executor.StandaloneExecutorBackend %s %s %s %d".format(
+          basename, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores))
+      command.addUris(CommandInfo.URI.newBuilder().setValue(uri))
+    }
+    return command.build()
   }
 
   override def offerRescinded(d: SchedulerDriver, o: OfferID) {}
diff --git a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackend.scala
index 75b8268b55..541f86e338 100644
--- a/core/src/main/scala/spark/scheduler/mesos/MesosSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/mesos/MesosSchedulerBackend.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.scheduler.mesos
+package org.apache.spark.scheduler.mesos
 
 import com.google.protobuf.ByteString
 
@@ -23,14 +23,15 @@ import org.apache.mesos.{Scheduler => MScheduler}
 import org.apache.mesos._
 import org.apache.mesos.Protos.{TaskInfo => MesosTaskInfo, TaskState => MesosTaskState, _}
 
-import spark.{SparkException, Utils, Logging, SparkContext}
+import org.apache.spark.{SparkException, Logging, SparkContext}
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 import scala.collection.JavaConversions._
 import java.io.File
-import spark.scheduler.cluster._
+import org.apache.spark.scheduler.cluster._
 import java.util.{ArrayList => JArrayList, List => JList}
 import java.util.Collections
-import spark.TaskState
+import org.apache.spark.TaskState
+import org.apache.spark.util.Utils
 
 /**
  * A SchedulerBackend for running fine-grained tasks on Mesos. Each Spark task is mapped to a
@@ -89,7 +90,6 @@ private[spark] class MesosSchedulerBackend(
     val sparkHome = sc.getSparkHome().getOrElse(throw new SparkException(
       "Spark home is not set; set it through the spark.home system " +
       "property, the SPARK_HOME environment variable or the SparkContext constructor"))
-    val execScript = new File(sparkHome, "spark-executor").getCanonicalPath
     val environment = Environment.newBuilder()
     sc.executorEnvs.foreach { case (key, value) =>
       environment.addVariables(Environment.Variable.newBuilder()
@@ -97,15 +97,23 @@ private[spark] class MesosSchedulerBackend(
         .setValue(value)
         .build())
     }
+    val command = CommandInfo.newBuilder()
+      .setEnvironment(environment)
+    val uri = System.getProperty("spark.executor.uri")
+    if (uri == null) {
+      command.setValue(new File(sparkHome, "spark-executor").getCanonicalPath)
+    } else {
+      // Grab everything to the first '.'. We'll use that and '*' to
+      // glob the directory "correctly".
+      val basename = uri.split('/').last.split('.').head
+      command.setValue("cd %s*; ./spark-executor".format(basename))
+      command.addUris(CommandInfo.URI.newBuilder().setValue(uri))
+    }
     val memory = Resource.newBuilder()
       .setName("mem")
       .setType(Value.Type.SCALAR)
       .setScalar(Value.Scalar.newBuilder().setValue(executorMemory).build())
       .build()
-    val command = CommandInfo.newBuilder()
-      .setValue(execScript)
-      .setEnvironment(environment)
-      .build()
     ExecutorInfo.newBuilder()
       .setExecutorId(ExecutorID.newBuilder().setValue(execId).build())
       .setCommand(command)
diff --git a/core/src/main/scala/spark/JavaSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
index 04c5f44e6b..4de81617b1 100644
--- a/core/src/main/scala/spark/JavaSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/JavaSerializer.scala
@@ -15,13 +15,12 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark.serializer
 
 import java.io._
 import java.nio.ByteBuffer
 
-import serializer.{Serializer, SerializerInstance, DeserializationStream, SerializationStream}
-import spark.util.ByteBufferInputStream
+import org.apache.spark.util.ByteBufferInputStream
 
 private[spark] class JavaSerializationStream(out: OutputStream) extends SerializationStream {
   val objOut = new ObjectOutputStream(out)
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
new file mode 100644
index 0000000000..24ef204aa1
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.serializer
+
+import java.nio.ByteBuffer
+import java.io.{EOFException, InputStream, OutputStream}
+
+import com.esotericsoftware.kryo.serializers.{JavaSerializer => KryoJavaSerializer}
+import com.esotericsoftware.kryo.{KryoException, Kryo}
+import com.esotericsoftware.kryo.io.{Input => KryoInput, Output => KryoOutput}
+import com.twitter.chill.ScalaKryoInstantiator
+
+import org.apache.spark.{SerializableWritable, Logging}
+import org.apache.spark.storage.{GetBlock, GotBlock, PutBlock, StorageLevel}
+
+import org.apache.spark.broadcast.HttpBroadcast
+
+/**
+ * A Spark serializer that uses the [[http://code.google.com/p/kryo/wiki/V1Documentation Kryo 1.x library]].
+ */
+class KryoSerializer extends org.apache.spark.serializer.Serializer with Logging {
+  private val bufferSize = System.getProperty("spark.kryoserializer.buffer.mb", "2").toInt * 1024 * 1024
+
+  def newKryoOutput() = new KryoOutput(bufferSize)
+
+  def newKryoInput() = new KryoInput(bufferSize)
+
+  def newKryo(): Kryo = {
+    val instantiator = new ScalaKryoInstantiator
+    val kryo = instantiator.newKryo()
+    val classLoader = Thread.currentThread.getContextClassLoader
+
+    // Register some commonly used classes
+    val toRegister: Seq[AnyRef] = Seq(
+      ByteBuffer.allocate(1),
+      StorageLevel.MEMORY_ONLY,
+      PutBlock("1", ByteBuffer.allocate(1), StorageLevel.MEMORY_ONLY),
+      GotBlock("1", ByteBuffer.allocate(1)),
+      GetBlock("1")
+    )
+
+    for (obj <- toRegister) kryo.register(obj.getClass)
+
+    // Allow sending SerializableWritable
+    kryo.register(classOf[SerializableWritable[_]], new KryoJavaSerializer())
+    kryo.register(classOf[HttpBroadcast[_]], new KryoJavaSerializer())
+
+    // Allow the user to register their own classes by setting spark.kryo.registrator
+    try {
+      Option(System.getProperty("spark.kryo.registrator")).foreach { regCls =>
+        logDebug("Running user registrator: " + regCls)
+        val reg = Class.forName(regCls, true, classLoader).newInstance().asInstanceOf[KryoRegistrator]
+        reg.registerClasses(kryo)
+      }
+    } catch {
+      case _: Exception => println("Failed to register spark.kryo.registrator")
+    }
+
+    kryo.setClassLoader(classLoader)
+
+    // Allow disabling Kryo reference tracking if user knows their object graphs don't have loops
+    kryo.setReferences(System.getProperty("spark.kryo.referenceTracking", "true").toBoolean)
+
+    kryo
+  }
+
+  def newInstance(): SerializerInstance = {
+    new KryoSerializerInstance(this)
+  }
+}
+
+private[spark]
+class KryoSerializationStream(kryo: Kryo, outStream: OutputStream) extends SerializationStream {
+  val output = new KryoOutput(outStream)
+
+  def writeObject[T](t: T): SerializationStream = {
+    kryo.writeClassAndObject(output, t)
+    this
+  }
+
+  def flush() { output.flush() }
+  def close() { output.close() }
+}
+
+private[spark]
+class KryoDeserializationStream(kryo: Kryo, inStream: InputStream) extends DeserializationStream {
+  val input = new KryoInput(inStream)
+
+  def readObject[T](): T = {
+    try {
+      kryo.readClassAndObject(input).asInstanceOf[T]
+    } catch {
+      // DeserializationStream uses the EOF exception to indicate stopping condition.
+      case _: KryoException => throw new EOFException
+    }
+  }
+
+  def close() {
+    // Kryo's Input automatically closes the input stream it is using.
+    input.close()
+  }
+}
+
+private[spark] class KryoSerializerInstance(ks: KryoSerializer) extends SerializerInstance {
+  val kryo = ks.newKryo()
+  val output = ks.newKryoOutput()
+  val input = ks.newKryoInput()
+
+  def serialize[T](t: T): ByteBuffer = {
+    output.clear()
+    kryo.writeClassAndObject(output, t)
+    ByteBuffer.wrap(output.toBytes)
+  }
+
+  def deserialize[T](bytes: ByteBuffer): T = {
+    input.setBuffer(bytes.array)
+    kryo.readClassAndObject(input).asInstanceOf[T]
+  }
+
+  def deserialize[T](bytes: ByteBuffer, loader: ClassLoader): T = {
+    val oldClassLoader = kryo.getClassLoader
+    kryo.setClassLoader(loader)
+    input.setBuffer(bytes.array)
+    val obj = kryo.readClassAndObject(input).asInstanceOf[T]
+    kryo.setClassLoader(oldClassLoader)
+    obj
+  }
+
+  def serializeStream(s: OutputStream): SerializationStream = {
+    new KryoSerializationStream(kryo, s)
+  }
+
+  def deserializeStream(s: InputStream): DeserializationStream = {
+    new KryoDeserializationStream(kryo, s)
+  }
+}
+
+/**
+ * Interface implemented by clients to register their classes with Kryo when using Kryo
+ * serialization.
+ */
+trait KryoRegistrator {
+  def registerClasses(kryo: Kryo)
+}
diff --git a/core/src/main/scala/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
index dc94d42bb6..160cca4d6c 100644
--- a/core/src/main/scala/spark/serializer/Serializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
@@ -15,19 +15,19 @@
  * limitations under the License.
  */
 
-package spark.serializer
+package org.apache.spark.serializer
 
 import java.io.{EOFException, InputStream, OutputStream}
 import java.nio.ByteBuffer
 
 import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream
 
-import spark.util.ByteBufferInputStream
+import org.apache.spark.util.{NextIterator, ByteBufferInputStream}
 
 
 /**
  * A serializer. Because some serialization libraries are not thread safe, this class is used to
- * create [[spark.serializer.SerializerInstance]] objects that do the actual serialization and are
+ * create [[org.apache.spark.serializer.SerializerInstance]] objects that do the actual serialization and are
  * guaranteed to only be called from one thread at a time.
  */
 trait Serializer {
@@ -95,7 +95,7 @@ trait DeserializationStream {
    * Read the elements of this stream through an iterator. This can only be called once, as
    * reading each element will consume data from the input source.
    */
-  def asIterator: Iterator[Any] = new spark.util.NextIterator[Any] {
+  def asIterator: Iterator[Any] = new NextIterator[Any] {
     override protected def getNext() = {
       try {
         readObject[Any]()
diff --git a/core/src/main/scala/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
index b7b24705a2..2955986fec 100644
--- a/core/src/main/scala/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.serializer
+package org.apache.spark.serializer
 
 import java.util.concurrent.ConcurrentHashMap
 
diff --git a/core/src/main/scala/spark/storage/BlockException.scala b/core/src/main/scala/org/apache/spark/storage/BlockException.scala
index 8ebfaf3cbf..290dbce4f5 100644
--- a/core/src/main/scala/spark/storage/BlockException.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockException.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 private[spark]
 case class BlockException(blockId: String, message: String) extends Exception(message)
diff --git a/core/src/main/scala/spark/storage/BlockFetchTracker.scala b/core/src/main/scala/org/apache/spark/storage/BlockFetchTracker.scala
index 265e554ad8..2e0b0e6eda 100644
--- a/core/src/main/scala/spark/storage/BlockFetchTracker.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockFetchTracker.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 private[spark] trait BlockFetchTracker {
   def totalBlocks : Int
diff --git a/core/src/main/scala/spark/storage/BlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala
index 1965c5bc19..3aeda3879d 100644
--- a/core/src/main/scala/spark/storage/BlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockFetcherIterator.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import java.nio.ByteBuffer
 import java.util.concurrent.LinkedBlockingQueue
@@ -26,13 +26,13 @@ import scala.collection.mutable.Queue
 
 import io.netty.buffer.ByteBuf
 
-import spark.Logging
-import spark.Utils
-import spark.SparkException
-import spark.network.BufferMessage
-import spark.network.ConnectionManagerId
-import spark.network.netty.ShuffleCopier
-import spark.serializer.Serializer
+import org.apache.spark.Logging
+import org.apache.spark.SparkException
+import org.apache.spark.network.BufferMessage
+import org.apache.spark.network.ConnectionManagerId
+import org.apache.spark.network.netty.ShuffleCopier
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.util.Utils
 
 
 /**
@@ -111,7 +111,7 @@ object BlockFetcherIterator {
 
     protected def sendRequest(req: FetchRequest) {
       logDebug("Sending request for %d blocks (%s) from %s".format(
-        req.blocks.size, Utils.memoryBytesToString(req.size), req.address.hostPort))
+        req.blocks.size, Utils.bytesToString(req.size), req.address.hostPort))
       val cmId = new ConnectionManagerId(req.address.host, req.address.port)
       val blockMessageArray = new BlockMessageArray(req.blocks.map {
         case (blockId, size) => BlockMessage.fromGetBlock(GetBlock(blockId))
@@ -132,9 +132,10 @@ object BlockFetcherIterator {
                 "Unexpected message " + blockMessage.getType + " received from " + cmId)
             }
             val blockId = blockMessage.getId
+            val networkSize = blockMessage.getData.limit()
             results.put(new FetchResult(blockId, sizeMap(blockId),
               () => dataDeserialize(blockId, blockMessage.getData, serializer)))
-            _remoteBytesRead += req.size
+            _remoteBytesRead += networkSize
             logDebug("Got remote block " + blockId + " after " + Utils.getUsedTimeMs(startTime))
           }
         }
@@ -309,7 +310,7 @@ object BlockFetcherIterator {
       }
 
       logDebug("Sending request for %d blocks (%s) from %s".format(
-        req.blocks.size, Utils.memoryBytesToString(req.size), req.address.host))
+        req.blocks.size, Utils.bytesToString(req.size), req.address.host))
       val cmId = new ConnectionManagerId(req.address.host, req.address.nettyPort)
       val cpier = new ShuffleCopier
       cpier.getBlocks(cmId, req.blocks, putResult)
diff --git a/core/src/main/scala/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index e4ffa57ad2..60fdc5f2ee 100644
--- a/core/src/main/scala/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import java.io.{InputStream, OutputStream}
 import java.nio.{ByteBuffer, MappedByteBuffer}
@@ -27,14 +27,13 @@ import akka.dispatch.{Await, Future}
 import akka.util.Duration
 import akka.util.duration._
 
-import com.ning.compress.lzf.{LZFInputStream, LZFOutputStream}
-
 import it.unimi.dsi.fastutil.io.FastByteArrayOutputStream
 
-import spark.{Logging, SparkEnv, SparkException, Utils}
-import spark.network._
-import spark.serializer.Serializer
-import spark.util.{ByteBufferInputStream, IdGenerator, MetadataCleaner, TimeStampedHashMap}
+import org.apache.spark.{Logging, SparkEnv, SparkException}
+import org.apache.spark.io.CompressionCodec
+import org.apache.spark.network._
+import org.apache.spark.serializer.Serializer
+import org.apache.spark.util._
 
 import sun.nio.ch.DirectBuffer
 
@@ -158,6 +157,13 @@ private[spark] class BlockManager(
   val metadataCleaner = new MetadataCleaner("BlockManager", this.dropOldBlocks)
   initialize()
 
+  // The compression codec to use. Note that the "lazy" val is necessary because we want to delay
+  // the initialization of the compression codec until it is first used. The reason is that a Spark
+  // program could be using a user-defined codec in a third party jar, which is loaded in
+  // Executor.updateDependencies. When the BlockManager is initialized, user level jars hasn't been
+  // loaded yet.
+  private lazy val compressionCodec: CompressionCodec = CompressionCodec.createCodec()
+
   /**
    * Construct a BlockManager with a memory limit set based on system properties.
    */
@@ -919,18 +925,14 @@ private[spark] class BlockManager(
    * Wrap an output stream for compression if block compression is enabled for its block type
    */
   def wrapForCompression(blockId: String, s: OutputStream): OutputStream = {
-    if (shouldCompress(blockId)) {
-      (new LZFOutputStream(s)).setFinishBlockOnFlush(true)
-    } else {
-      s
-    }
+    if (shouldCompress(blockId)) compressionCodec.compressedOutputStream(s) else s
   }
 
   /**
    * Wrap an input stream for compression if block compression is enabled for its block type
    */
   def wrapForCompression(blockId: String, s: InputStream): InputStream = {
-    if (shouldCompress(blockId)) new LZFInputStream(s) else s
+    if (shouldCompress(blockId)) compressionCodec.compressedInputStream(s) else s
   }
 
   def dataSerialize(
@@ -1002,43 +1004,43 @@ private[spark] object BlockManager extends Logging {
     }
   }
 
-  def blockIdsToExecutorLocations(blockIds: Array[String], env: SparkEnv, blockManagerMaster: BlockManagerMaster = null): HashMap[String, List[String]] = {
+  def blockIdsToBlockManagers(
+      blockIds: Array[String],
+      env: SparkEnv,
+      blockManagerMaster: BlockManagerMaster = null)
+  : Map[String, Seq[BlockManagerId]] =
+  {
     // env == null and blockManagerMaster != null is used in tests
     assert (env != null || blockManagerMaster != null)
-    val locationBlockIds: Seq[Seq[BlockManagerId]] =
-      if (env != null) {
-        env.blockManager.getLocationBlockIds(blockIds)
-      } else {
-        blockManagerMaster.getLocations(blockIds)
-      }
+    val blockLocations: Seq[Seq[BlockManagerId]] = if (env != null) {
+      env.blockManager.getLocationBlockIds(blockIds)
+    } else {
+      blockManagerMaster.getLocations(blockIds)
+    }
 
-    // Convert from block master locations to executor locations (we need that for task scheduling)
-    val executorLocations = new HashMap[String, List[String]]()
+    val blockManagers = new HashMap[String, Seq[BlockManagerId]]
     for (i <- 0 until blockIds.length) {
-      val blockId = blockIds(i)
-      val blockLocations = locationBlockIds(i)
-
-      val executors = new HashSet[String]()
-
-      if (env != null) {
-        for (bkLocation <- blockLocations) {
-          val executorHostPort = env.resolveExecutorIdToHostPort(bkLocation.executorId, bkLocation.host)
-          executors += executorHostPort
-          // logInfo("bkLocation = " + bkLocation + ", executorHostPort = " + executorHostPort)
-        }
-      } else {
-        // Typically while testing, etc - revert to simply using host.
-        for (bkLocation <- blockLocations) {
-          executors += bkLocation.host
-          // logInfo("bkLocation = " + bkLocation + ", executorHostPort = " + executorHostPort)
-        }
-      }
-
-      executorLocations.put(blockId, executors.toSeq.toList)
+      blockManagers(blockIds(i)) = blockLocations(i)
     }
+    blockManagers.toMap
+  }
 
-    executorLocations
+  def blockIdsToExecutorIds(
+      blockIds: Array[String],
+      env: SparkEnv,
+      blockManagerMaster: BlockManagerMaster = null)
+    : Map[String, Seq[String]] =
+  {
+    blockIdsToBlockManagers(blockIds, env, blockManagerMaster).mapValues(s => s.map(_.executorId))
   }
 
+  def blockIdsToHosts(
+      blockIds: Array[String],
+      env: SparkEnv,
+      blockManagerMaster: BlockManagerMaster = null)
+    : Map[String, Seq[String]] =
+  {
+    blockIdsToBlockManagers(blockIds, env, blockManagerMaster).mapValues(s => s.map(_.host))
+  }
 }
 
diff --git a/core/src/main/scala/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
index b36a6176c0..74207f59af 100644
--- a/core/src/main/scala/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
 import java.util.concurrent.ConcurrentHashMap
-import spark.Utils
+import org.apache.spark.util.Utils
 
 /**
  * This class represent an unique identifier for a BlockManager.
@@ -92,13 +92,13 @@ private[spark] class BlockManagerId private (
 private[spark] object BlockManagerId {
 
   /**
-   * Returns a [[spark.storage.BlockManagerId]] for the given configuraiton.
+   * Returns a [[org.apache.spark.storage.BlockManagerId]] for the given configuraiton.
    *
    * @param execId ID of the executor.
    * @param host Host name of the block manager.
    * @param port Port of the block manager.
    * @param nettyPort Optional port for the Netty-based shuffle sender.
-   * @return A new [[spark.storage.BlockManagerId]].
+   * @return A new [[org.apache.spark.storage.BlockManagerId]].
    */
   def apply(execId: String, host: String, port: Int, nettyPort: Int) =
     getCachedBlockManagerId(new BlockManagerId(execId, host, port, nettyPort))
diff --git a/core/src/main/scala/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
index 3186f7c85b..cf463d6ffc 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
@@ -15,14 +15,15 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import akka.actor.ActorRef
 import akka.dispatch.{Await, Future}
 import akka.pattern.ask
 import akka.util.Duration
 
-import spark.{Logging, SparkException}
+import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.storage.BlockManagerMessages._
 
 
 private[spark] class BlockManagerMaster(var driverActor: ActorRef) extends Logging {
diff --git a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala
index 244000d952..c7b23ab094 100644
--- a/core/src/main/scala/spark/storage/BlockManagerMasterActor.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterActor.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import java.util.{HashMap => JHashMap}
 
@@ -28,7 +28,10 @@ import akka.pattern.ask
 import akka.util.Duration
 import akka.util.duration._
 
-import spark.{Logging, Utils, SparkException}
+import org.apache.spark.{Logging, SparkException}
+import org.apache.spark.storage.BlockManagerMessages._
+import org.apache.spark.util.Utils
+
 
 /**
  * BlockManagerMasterActor is an actor on the master node to track statuses of
@@ -330,7 +333,7 @@ object BlockManagerMasterActor {
     private val _blocks = new JHashMap[String, BlockStatus]
 
     logInfo("Registering block manager %s with %s RAM".format(
-      blockManagerId.hostPort, Utils.memoryBytesToString(maxMem)))
+      blockManagerId.hostPort, Utils.bytesToString(maxMem)))
 
     def updateLastSeenMs() {
       _lastSeenMs = System.currentTimeMillis()
@@ -356,12 +359,12 @@ object BlockManagerMasterActor {
         if (storageLevel.useMemory) {
           _remainingMem -= memSize
           logInfo("Added %s in memory on %s (size: %s, free: %s)".format(
-            blockId, blockManagerId.hostPort, Utils.memoryBytesToString(memSize),
-            Utils.memoryBytesToString(_remainingMem)))
+            blockId, blockManagerId.hostPort, Utils.bytesToString(memSize),
+            Utils.bytesToString(_remainingMem)))
         }
         if (storageLevel.useDisk) {
           logInfo("Added %s on disk on %s (size: %s)".format(
-            blockId, blockManagerId.hostPort, Utils.memoryBytesToString(diskSize)))
+            blockId, blockManagerId.hostPort, Utils.bytesToString(diskSize)))
         }
       } else if (_blocks.containsKey(blockId)) {
         // If isValid is not true, drop the block.
@@ -370,12 +373,12 @@ object BlockManagerMasterActor {
         if (blockStatus.storageLevel.useMemory) {
           _remainingMem += blockStatus.memSize
           logInfo("Removed %s on %s in memory (size: %s, free: %s)".format(
-            blockId, blockManagerId.hostPort, Utils.memoryBytesToString(memSize),
-            Utils.memoryBytesToString(_remainingMem)))
+            blockId, blockManagerId.hostPort, Utils.bytesToString(blockStatus.memSize),
+            Utils.bytesToString(_remainingMem)))
         }
         if (blockStatus.storageLevel.useDisk) {
           logInfo("Removed %s on %s on disk (size: %s)".format(
-            blockId, blockManagerId.hostPort, Utils.memoryBytesToString(diskSize)))
+            blockId, blockManagerId.hostPort, Utils.bytesToString(blockStatus.diskSize)))
         }
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
new file mode 100644
index 0000000000..24333a179c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.storage
+
+import java.io.{Externalizable, ObjectInput, ObjectOutput}
+
+import akka.actor.ActorRef
+
+
+private[storage] object BlockManagerMessages {
+  //////////////////////////////////////////////////////////////////////////////////
+  // Messages from the master to slaves.
+  //////////////////////////////////////////////////////////////////////////////////
+  sealed trait ToBlockManagerSlave
+
+  // Remove a block from the slaves that have it. This can only be used to remove
+  // blocks that the master knows about.
+  case class RemoveBlock(blockId: String) extends ToBlockManagerSlave
+
+  // Remove all blocks belonging to a specific RDD.
+  case class RemoveRdd(rddId: Int) extends ToBlockManagerSlave
+
+
+  //////////////////////////////////////////////////////////////////////////////////
+  // Messages from slaves to the master.
+  //////////////////////////////////////////////////////////////////////////////////
+  sealed trait ToBlockManagerMaster
+
+  case class RegisterBlockManager(
+      blockManagerId: BlockManagerId,
+      maxMemSize: Long,
+      sender: ActorRef)
+    extends ToBlockManagerMaster
+
+  case class HeartBeat(blockManagerId: BlockManagerId) extends ToBlockManagerMaster
+
+  class UpdateBlockInfo(
+      var blockManagerId: BlockManagerId,
+      var blockId: String,
+      var storageLevel: StorageLevel,
+      var memSize: Long,
+      var diskSize: Long)
+    extends ToBlockManagerMaster
+    with Externalizable {
+
+    def this() = this(null, null, null, 0, 0)  // For deserialization only
+
+    override def writeExternal(out: ObjectOutput) {
+      blockManagerId.writeExternal(out)
+      out.writeUTF(blockId)
+      storageLevel.writeExternal(out)
+      out.writeLong(memSize)
+      out.writeLong(diskSize)
+    }
+
+    override def readExternal(in: ObjectInput) {
+      blockManagerId = BlockManagerId(in)
+      blockId = in.readUTF()
+      storageLevel = StorageLevel(in)
+      memSize = in.readLong()
+      diskSize = in.readLong()
+    }
+  }
+
+  object UpdateBlockInfo {
+    def apply(blockManagerId: BlockManagerId,
+        blockId: String,
+        storageLevel: StorageLevel,
+        memSize: Long,
+        diskSize: Long): UpdateBlockInfo = {
+      new UpdateBlockInfo(blockManagerId, blockId, storageLevel, memSize, diskSize)
+    }
+
+    // For pattern-matching
+    def unapply(h: UpdateBlockInfo): Option[(BlockManagerId, String, StorageLevel, Long, Long)] = {
+      Some((h.blockManagerId, h.blockId, h.storageLevel, h.memSize, h.diskSize))
+    }
+  }
+
+  case class GetLocations(blockId: String) extends ToBlockManagerMaster
+
+  case class GetLocationsMultipleBlockIds(blockIds: Array[String]) extends ToBlockManagerMaster
+
+  case class GetPeers(blockManagerId: BlockManagerId, size: Int) extends ToBlockManagerMaster
+
+  case class RemoveExecutor(execId: String) extends ToBlockManagerMaster
+
+  case object StopBlockManagerMaster extends ToBlockManagerMaster
+
+  case object GetMemoryStatus extends ToBlockManagerMaster
+
+  case object ExpireDeadHosts extends ToBlockManagerMaster
+
+  case object GetStorageStatus extends ToBlockManagerMaster
+}
diff --git a/core/src/main/scala/spark/storage/BlockManagerSlaveActor.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveActor.scala
index 45cffad810..951503019f 100644
--- a/core/src/main/scala/spark/storage/BlockManagerSlaveActor.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerSlaveActor.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import akka.actor.Actor
 
-import spark.{Logging, SparkException, Utils}
+import org.apache.spark.storage.BlockManagerMessages._
 
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala
new file mode 100644
index 0000000000..24190cdd67
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerSource.scala
@@ -0,0 +1,48 @@
+package org.apache.spark.storage
+
+import com.codahale.metrics.{Gauge,MetricRegistry}
+
+import org.apache.spark.metrics.source.Source
+
+
+private[spark] class BlockManagerSource(val blockManager: BlockManager) extends Source {
+  val metricRegistry = new MetricRegistry()
+  val sourceName = "BlockManager"
+
+  metricRegistry.register(MetricRegistry.name("memory", "maxMem", "MBytes"), new Gauge[Long] {
+    override def getValue: Long = {
+      val storageStatusList = blockManager.master.getStorageStatus
+      val maxMem = storageStatusList.map(_.maxMem).reduce(_ + _)
+      maxMem / 1024 / 1024
+    }
+  })
+
+  metricRegistry.register(MetricRegistry.name("memory", "remainingMem", "MBytes"), new Gauge[Long] {
+    override def getValue: Long = {
+      val storageStatusList = blockManager.master.getStorageStatus
+      val remainingMem = storageStatusList.map(_.memRemaining).reduce(_ + _)
+      remainingMem / 1024 / 1024
+    }
+  })
+
+  metricRegistry.register(MetricRegistry.name("memory", "memUsed", "MBytes"), new Gauge[Long] {
+    override def getValue: Long = {
+      val storageStatusList = blockManager.master.getStorageStatus
+      val maxMem = storageStatusList.map(_.maxMem).reduce(_ + _)
+      val remainingMem = storageStatusList.map(_.memRemaining).reduce(_ + _)
+      (maxMem - remainingMem) / 1024 / 1024
+    }
+  })
+
+  metricRegistry.register(MetricRegistry.name("disk", "diskSpaceUsed", "MBytes"), new Gauge[Long] {
+    override def getValue: Long = {
+      val storageStatusList = blockManager.master.getStorageStatus
+      val diskSpaceUsed = storageStatusList
+      	.flatMap(_.blocks.values.map(_.diskSize))
+      	.reduceOption(_ + _)
+      	.getOrElse(0L)
+
+      diskSpaceUsed / 1024 / 1024
+    }
+  })
+}
diff --git a/core/src/main/scala/spark/storage/BlockManagerWorker.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala
index 39064bce92..678c38203c 100644
--- a/core/src/main/scala/spark/storage/BlockManagerWorker.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerWorker.scala
@@ -15,12 +15,13 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import java.nio.ByteBuffer
 
-import spark.{Logging, Utils}
-import spark.network._
+import org.apache.spark.{Logging}
+import org.apache.spark.network._
+import org.apache.spark.util.Utils
 
 /**
  * A network interface for BlockManager. Each slave should have one
diff --git a/core/src/main/scala/spark/storage/BlockMessage.scala b/core/src/main/scala/org/apache/spark/storage/BlockMessage.scala
index ab72dbb62b..d8fa6a91d1 100644
--- a/core/src/main/scala/spark/storage/BlockMessage.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockMessage.scala
@@ -15,15 +15,14 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import java.nio.ByteBuffer
 
 import scala.collection.mutable.StringBuilder
 import scala.collection.mutable.ArrayBuffer
 
-import spark._
-import spark.network._
+import org.apache.spark.network._
 
 private[spark] case class GetBlock(id: String)
 private[spark] case class GotBlock(id: String, data: ByteBuffer)
diff --git a/core/src/main/scala/spark/storage/BlockMessageArray.scala b/core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala
index b0229d6124..0aaf846b5b 100644
--- a/core/src/main/scala/spark/storage/BlockMessageArray.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockMessageArray.scala
@@ -15,15 +15,14 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import java.nio.ByteBuffer
 
-import scala.collection.mutable.StringBuilder
 import scala.collection.mutable.ArrayBuffer
 
-import spark._
-import spark.network._
+import org.apache.spark._
+import org.apache.spark.network._
 
 private[spark]
 class BlockMessageArray(var blockMessages: Seq[BlockMessage]) extends Seq[BlockMessage] with Logging {
@@ -113,7 +112,7 @@ private[spark] object BlockMessageArray {
   
   def main(args: Array[String]) {
     val blockMessages = 
-      (0 until 10).map(i => {
+      (0 until 10).map { i =>
         if (i % 2 == 0) {
           val buffer =  ByteBuffer.allocate(100)
           buffer.clear
@@ -121,7 +120,7 @@ private[spark] object BlockMessageArray {
         } else {
           BlockMessage.fromGetBlock(GetBlock(i.toString))
         }
-      })
+      }
     val blockMessageArray = new BlockMessageArray(blockMessages)
     println("Block message array created")
     
diff --git a/core/src/main/scala/spark/storage/BlockObjectWriter.scala b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
index 01ed6e8c1f..39f103297f 100644
--- a/core/src/main/scala/spark/storage/BlockObjectWriter.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockObjectWriter.scala
@@ -15,9 +15,7 @@
  * limitations under the License.
  */
 
-package spark.storage
-
-import java.nio.ByteBuffer
+package org.apache.spark.storage
 
 
 /**
diff --git a/core/src/main/scala/spark/storage/BlockStore.scala b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
index c8db0022b0..fa834371f4 100644
--- a/core/src/main/scala/spark/storage/BlockStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockStore.scala
@@ -15,12 +15,12 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import java.nio.ByteBuffer
 import scala.collection.mutable.ArrayBuffer
 
-import spark.Logging
+import org.apache.spark.Logging
 
 /**
  * Abstract class to store blocks
diff --git a/core/src/main/scala/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 3495d653bd..fc25ef0fae 100644
--- a/core/src/main/scala/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import java.io.{File, FileOutputStream, OutputStream, RandomAccessFile}
 import java.nio.ByteBuffer
@@ -28,12 +28,12 @@ import scala.collection.mutable.ArrayBuffer
 
 import it.unimi.dsi.fastutil.io.FastBufferedOutputStream
 
-import spark.Utils
-import spark.executor.ExecutorExitCode
-import spark.serializer.{Serializer, SerializationStream}
-import spark.Logging
-import spark.network.netty.ShuffleSender
-import spark.network.netty.PathResolver
+import org.apache.spark.executor.ExecutorExitCode
+import org.apache.spark.serializer.{Serializer, SerializationStream}
+import org.apache.spark.Logging
+import org.apache.spark.network.netty.ShuffleSender
+import org.apache.spark.network.netty.PathResolver
+import org.apache.spark.util.Utils
 
 
 /**
@@ -66,7 +66,6 @@ private class DiskStore(blockManager: BlockManager, rootDirs: String)
     override def close() {
       if (initialized) {
         objOut.close()
-        bs.close()
         channel = null
         bs = null
         objOut = null
@@ -148,7 +147,7 @@ private class DiskStore(blockManager: BlockManager, rootDirs: String)
     channel.close()
     val finishTime = System.currentTimeMillis
     logDebug("Block %s stored as %s file on disk in %d ms".format(
-      blockId, Utils.memoryBytesToString(bytes.limit), (finishTime - startTime)))
+      blockId, Utils.bytesToString(bytes.limit), (finishTime - startTime)))
   }
 
   private def getFileBytes(file: File): ByteBuffer = {
@@ -182,7 +181,7 @@ private class DiskStore(blockManager: BlockManager, rootDirs: String)
 
     val timeTaken = System.currentTimeMillis - startTime
     logDebug("Block %s stored as %s file on disk in %d ms".format(
-      blockId, Utils.memoryBytesToString(length), timeTaken))
+      blockId, Utils.bytesToString(length), timeTaken))
 
     if (returnValues) {
       // Return a byte buffer for the contents of the file
diff --git a/core/src/main/scala/spark/storage/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
index b5a86b85a7..3b3b2342fa 100644
--- a/core/src/main/scala/spark/storage/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
@@ -15,13 +15,13 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import java.util.LinkedHashMap
 import java.util.concurrent.ArrayBlockingQueue
-import spark.{SizeEstimator, Utils}
 import java.nio.ByteBuffer
 import collection.mutable.ArrayBuffer
+import org.apache.spark.util.{SizeEstimator, Utils}
 
 /**
  * Stores blocks in memory, either as ArrayBuffers of deserialized Java objects or as
@@ -38,7 +38,7 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
   // blocks from the memory store.
   private val putLock = new Object()
 
-  logInfo("MemoryStore started with capacity %s.".format(Utils.memoryBytesToString(maxMemory)))
+  logInfo("MemoryStore started with capacity %s.".format(Utils.bytesToString(maxMemory)))
 
   def freeMemory: Long = maxMemory - currentMemory
 
@@ -164,10 +164,10 @@ private class MemoryStore(blockManager: BlockManager, maxMemory: Long)
         currentMemory += size
         if (deserialized) {
           logInfo("Block %s stored as values to memory (estimated size %s, free %s)".format(
-            blockId, Utils.memoryBytesToString(size), Utils.memoryBytesToString(freeMemory)))
+            blockId, Utils.bytesToString(size), Utils.bytesToString(freeMemory)))
         } else {
           logInfo("Block %s stored as bytes to memory (size %s, free %s)".format(
-            blockId, Utils.memoryBytesToString(size), Utils.memoryBytesToString(freeMemory)))
+            blockId, Utils.bytesToString(size), Utils.bytesToString(freeMemory)))
         }
         true
       } else {
diff --git a/core/src/main/scala/spark/storage/PutResult.scala b/core/src/main/scala/org/apache/spark/storage/PutResult.scala
index 3a0974fe15..2eba2f06b5 100644
--- a/core/src/main/scala/spark/storage/PutResult.scala
+++ b/core/src/main/scala/org/apache/spark/storage/PutResult.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import java.nio.ByteBuffer
 
diff --git a/core/src/main/scala/spark/storage/ShuffleBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
index 8a7a6f9ed3..9da11efb57 100644
--- a/core/src/main/scala/spark/storage/ShuffleBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockManager.scala
@@ -15,9 +15,9 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
-import spark.serializer.Serializer
+import org.apache.spark.serializer.Serializer
 
 
 private[spark]
diff --git a/core/src/main/scala/spark/storage/StorageLevel.scala b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
index f52650988c..755f1a760e 100644
--- a/core/src/main/scala/spark/storage/StorageLevel.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageLevel.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
 
@@ -23,7 +23,7 @@ import java.io.{Externalizable, IOException, ObjectInput, ObjectOutput}
  * Flags for controlling the storage of an RDD. Each StorageLevel records whether to use memory,
  * whether to drop the RDD to disk if it falls out of memory, whether to keep the data in memory
  * in a serialized format, and whether to replicate the RDD partitions on multiple nodes.
- * The [[spark.storage.StorageLevel$]] singleton object contains some static constants for
+ * The [[org.apache.spark.storage.StorageLevel$]] singleton object contains some static constants for
  * commonly useful storage levels. To create your own storage level object, use the factor method
  * of the singleton object (`StorageLevel(...)`).
  */
diff --git a/core/src/main/scala/spark/storage/StorageUtils.scala b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
index 2aeed4ea3c..2bb7715696 100644
--- a/core/src/main/scala/spark/storage/StorageUtils.scala
+++ b/core/src/main/scala/org/apache/spark/storage/StorageUtils.scala
@@ -15,10 +15,11 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
-import spark.{Utils, SparkContext}
+import org.apache.spark.{SparkContext}
 import BlockManagerMasterActor.BlockStatus
+import org.apache.spark.util.Utils
 
 private[spark]
 case class StorageStatus(blockManagerId: BlockManagerId, maxMem: Long,
@@ -42,9 +43,9 @@ case class RDDInfo(id: Int, name: String, storageLevel: StorageLevel,
   numCachedPartitions: Int, numPartitions: Int, memSize: Long, diskSize: Long)
   extends Ordered[RDDInfo] {
   override def toString = {
-    import Utils.memoryBytesToString
+    import Utils.bytesToString
     "RDD \"%s\" (%d) Storage: %s; CachedPartitions: %d; TotalPartitions: %d; MemorySize: %s; DiskSize: %s".format(name, id,
-      storageLevel.toString, numCachedPartitions, numPartitions, memoryBytesToString(memSize), memoryBytesToString(diskSize))
+      storageLevel.toString, numCachedPartitions, numPartitions, bytesToString(memSize), bytesToString(diskSize))
   }
 
   override def compare(that: RDDInfo) = {
diff --git a/core/src/main/scala/spark/storage/ThreadingTest.scala b/core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala
index b3ab1ff4b4..f2ae8dd97d 100644
--- a/core/src/main/scala/spark/storage/ThreadingTest.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ThreadingTest.scala
@@ -15,13 +15,13 @@
  * limitations under the License.
  */
 
-package spark.storage
+package org.apache.spark.storage
 
 import akka.actor._
 
-import spark.KryoSerializer
 import java.util.concurrent.ArrayBlockingQueue
 import util.Random
+import org.apache.spark.serializer.KryoSerializer
 
 /**
  * This class tests the BlockManager and MemoryStore for thread safety and
diff --git a/core/src/main/scala/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index ca6088ad93..7211dbc7c6 100644
--- a/core/src/main/scala/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -15,22 +15,22 @@
  * limitations under the License.
  */
 
-package spark.ui
-
-import annotation.tailrec
+package org.apache.spark.ui
 
 import javax.servlet.http.{HttpServletResponse, HttpServletRequest}
 
+import scala.annotation.tailrec
+import scala.util.{Try, Success, Failure}
+import scala.xml.Node
+
 import net.liftweb.json.{JValue, pretty, render}
 
 import org.eclipse.jetty.server.{Server, Request, Handler}
 import org.eclipse.jetty.server.handler.{ResourceHandler, HandlerList, ContextHandler, AbstractHandler}
 import org.eclipse.jetty.util.thread.QueuedThreadPool
 
-import scala.util.{Try, Success, Failure}
-import scala.xml.Node
+import org.apache.spark.Logging
 
-import spark.Logging
 
 /** Utilities for launching a web server using Jetty's HTTP Server class */
 private[spark] object JettyUtils extends Logging {
@@ -48,7 +48,7 @@ private[spark] object JettyUtils extends Logging {
   implicit def textResponderToHandler(responder: Responder[String]): Handler =
     createHandler(responder, "text/plain")
 
-  private def createHandler[T <% AnyRef](responder: Responder[T], contentType: String,
+  def createHandler[T <% AnyRef](responder: Responder[T], contentType: String,
                                  extractFn: T => String = (in: Any) => in.toString): Handler = {
     new AbstractHandler {
       def handle(target: String,
@@ -117,7 +117,6 @@ private[spark] object JettyUtils extends Logging {
 
       Try { server.start() } match {
         case s: Success[_] =>
-          sys.addShutdownHook(server.stop()) // Be kind, un-bind
           (server, server.getConnectors.head.getLocalPort)
         case f: Failure[_] =>
           server.stop()
diff --git a/core/src/main/scala/spark/ui/Page.scala b/core/src/main/scala/org/apache/spark/ui/Page.scala
index a31e750d06..b2a069a375 100644
--- a/core/src/main/scala/spark/ui/Page.scala
+++ b/core/src/main/scala/org/apache/spark/ui/Page.scala
@@ -15,6 +15,8 @@
  * limitations under the License.
  */
 
-package spark.ui
+package org.apache.spark.ui
 
-private[spark] object Page extends Enumeration { val Storage, Jobs, Environment = Value }
+private[spark] object Page extends Enumeration {
+  val Stages, Storage, Environment, Executors = Value
+}
diff --git a/core/src/main/scala/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index 9396f22063..ad456ea565 100644
--- a/core/src/main/scala/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -15,21 +15,23 @@
  * limitations under the License.
  */
 
-package spark.ui
+package org.apache.spark.ui
 
 import javax.servlet.http.HttpServletRequest
 
 import org.eclipse.jetty.server.{Handler, Server}
 
-import spark.{Logging, SparkContext, Utils}
-import spark.ui.env.EnvironmentUI
-import spark.ui.storage.BlockManagerUI
-import spark.ui.jobs.JobProgressUI
-import spark.ui.JettyUtils._
+import org.apache.spark.{Logging, SparkContext, SparkEnv}
+import org.apache.spark.ui.env.EnvironmentUI
+import org.apache.spark.ui.exec.ExecutorsUI
+import org.apache.spark.ui.storage.BlockManagerUI
+import org.apache.spark.ui.jobs.JobProgressUI
+import org.apache.spark.ui.JettyUtils._
+import org.apache.spark.util.Utils
 
 /** Top level user interface for Spark */
 private[spark] class SparkUI(sc: SparkContext) extends Logging {
-  val host = Utils.localHostName()
+  val host = Option(System.getenv("SPARK_PUBLIC_DNS")).getOrElse(Utils.localHostName())
   val port = Option(System.getProperty("spark.ui.port")).getOrElse(SparkUI.DEFAULT_PORT).toInt
   var boundPort: Option[Int] = None
   var server: Option[Server] = None
@@ -41,7 +43,13 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging {
   val storage = new BlockManagerUI(sc)
   val jobs = new JobProgressUI(sc)
   val env = new EnvironmentUI(sc)
-  val allHandlers = storage.getHandlers ++ jobs.getHandlers ++ env.getHandlers ++ handlers
+  val exec = new ExecutorsUI(sc)
+
+  // Add MetricsServlet handlers by default
+  val metricsServletHandlers = SparkEnv.get.metricsSystem.getServletHandlers
+
+  val allHandlers = storage.getHandlers ++ jobs.getHandlers ++ env.getHandlers ++
+    exec.getHandlers ++ metricsServletHandlers ++ handlers
 
   /** Bind the HTTP server which backs this web interface */
   def bind() {
@@ -51,9 +59,9 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging {
       server = Some(srv)
       boundPort = Some(usedPort)
     } catch {
-    case e: Exception =>
-      logError("Failed to create Spark JettyUtils", e)
-      System.exit(1)
+      case e: Exception =>
+        logError("Failed to create Spark JettyUtils", e)
+        System.exit(1)
     }
   }
 
@@ -64,6 +72,7 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging {
     //  This server must register all handlers, including JobProgressUI, before binding
     //  JobProgressUI registers a listener with SparkContext, which requires sc to initialize
     jobs.start()
+    exec.start()
   }
 
   def stop() {
@@ -74,6 +83,6 @@ private[spark] class SparkUI(sc: SparkContext) extends Logging {
 }
 
 private[spark] object SparkUI {
-  val DEFAULT_PORT = "33000"
-  val STATIC_RESOURCE_DIR = "spark/ui/static"
+  val DEFAULT_PORT = "3030"
+  val STATIC_RESOURCE_DIR = "org/apache/spark/ui/static"
 }
diff --git a/core/src/main/scala/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index b1d11954dd..ce1acf564c 100644
--- a/core/src/main/scala/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.ui
+package org.apache.spark.ui
 
 import scala.xml.Node
 
-import spark.SparkContext
+import org.apache.spark.SparkContext
 
 /** Utility functions for generating XML pages with spark content. */
 private[spark] object UIUtils {
@@ -28,64 +28,53 @@ private[spark] object UIUtils {
   /** Returns a spark page with correctly formatted headers */
   def headerSparkPage(content: => Seq[Node], sc: SparkContext, title: String, page: Page.Value)
   : Seq[Node] = {
+    val jobs = page match {
+      case Stages => <li class="active"><a href="/stages">Stages</a></li>
+      case _ => <li><a href="/stages">Stages</a></li>
+    }
     val storage = page match {
       case Storage => <li class="active"><a href="/storage">Storage</a></li>
       case _ => <li><a href="/storage">Storage</a></li>
     }
-    val jobs = page match {
-      case Jobs => <li class="active"><a href="/stages">Jobs</a></li>
-      case _ => <li><a href="/stages">Jobs</a></li>
-    }
     val environment = page match {
       case Environment => <li class="active"><a href="/environment">Environment</a></li>
       case _ => <li><a href="/environment">Environment</a></li>
     }
+    val executors = page match {
+      case Executors => <li class="active"><a href="/executors">Executors</a></li>
+      case _ => <li><a href="/executors">Executors</a></li>
+    }
 
     <html>
       <head>
         <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
         <link rel="stylesheet" href="/static/bootstrap.min.css" type="text/css" />
         <link rel="stylesheet" href="/static/webui.css" type="text/css" />
-        <link rel="stylesheet" href="/static/bootstrap-responsive.min.css" type="text/css" />
         <script src="/static/sorttable.js"></script>
         <title>{sc.appName} - {title}</title>
-        <style type="text/css">
-          table.sortable thead {{ cursor: pointer; }}
-        </style>
       </head>
       <body>
-        <div class="container">
-
-          <div class="row">
-            <div class="span12">
-              <div class="navbar">
-                <div class="navbar-inner">
-                  <div class="container">
-                    <div class="brand"><img src="/static/spark-logo-77x50px-hd.png" /></div>
-                    <ul class="nav">
-                      {storage}
-                      {jobs}
-                      {environment}
-                    </ul>
-                    <ul id="infolist">
-                      <li>Application: <strong>{sc.appName}</strong></li>
-                      <li>Master: <strong>{sc.master}</strong></li>
-                      <li>Executors: <strong>{sc.getExecutorStorageStatus.size}</strong></li>
-                    </ul>
-                  </div>
-                </div>
-              </div>
-            </div>
+        <div class="navbar navbar-static-top">
+          <div class="navbar-inner">
+            <a href="/" class="brand"><img src="/static/spark-logo-77x50px-hd.png" /></a>
+            <ul class="nav">
+              {jobs}
+              {storage}
+              {environment}
+              {executors}
+            </ul>
+            <p class="navbar-text pull-right"><strong>{sc.appName}</strong> application UI</p>
           </div>
+        </div>
 
-          <div class="row" style="padding-top: 5px;">
+        <div class="container-fluid">
+          <div class="row-fluid">
             <div class="span12">
-              <h1 style="vertical-align: bottom; display: inline-block;">
+              <h3 style="vertical-align: bottom; display: inline-block;">
                 {title}
-              </h1>
+              </h3>
             </div>
           </div>
-          <hr/>
           {content}
         </div>
       </body>
@@ -98,23 +87,18 @@ private[spark] object UIUtils {
       <head>
         <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
         <link rel="stylesheet" href="/static/bootstrap.min.css" type="text/css" />
-        <link rel="stylesheet" href="/static/bootstrap-responsive.min.css" type="text/css" />
+        <link rel="stylesheet" href="/static/webui.css" type="text/css" />
         <script src="/static/sorttable.js"></script>
         <title>{title}</title>
-        <style type="text/css">
-          table.sortable thead {{ cursor: pointer; }}
-        </style>
       </head>
       <body>
-        <div class="container">
-          <div class="row">
-            <div class="span2">
-              <img src="/static/spark_logo.png" />
-            </div>
-            <div class="span10">
-              <h1 style="vertical-align: bottom; margin-top: 40px; display: inline-block;">
+        <div class="container-fluid">
+          <div class="row-fluid">
+            <div class="span12">
+              <h3 style="vertical-align: middle; display: inline-block;">
+                <img src="/static/spark-logo-77x50px-hd.png" style="margin-right: 15px;" />
                 {title}
-              </h1>
+              </h3>
             </div>
           </div>
           {content}
@@ -124,9 +108,21 @@ private[spark] object UIUtils {
   }
 
   /** Returns an HTML table constructed by generating a row for each object in a sequence. */
-  def listingTable[T](headers: Seq[String], makeRow: T => Seq[Node], rows: Seq[T]): Seq[Node] = {
-    <table class="table table-bordered table-striped table-condensed sortable">
-      <thead>{headers.map(h => <th>{h}</th>)}</thead>
+  def listingTable[T](
+      headers: Seq[String],
+      makeRow: T => Seq[Node],
+      rows: Seq[T],
+      fixedWidth: Boolean = false): Seq[Node] = {
+
+    val colWidth = 100.toDouble / headers.size
+    val colWidthAttr = if (fixedWidth) colWidth + "%" else ""
+    var tableClass = "table table-bordered table-striped table-condensed sortable"
+    if (fixedWidth) {
+      tableClass += " table-fixed"
+    }
+
+    <table class={tableClass}>
+      <thead>{headers.map(h => <th width={colWidthAttr}>{h}</th>)}</thead>
       <tbody>
         {rows.map(r => makeRow(r))}
       </tbody>
diff --git a/core/src/main/scala/spark/ui/UIWorkloadGenerator.scala b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
index a80e2d7002..0ecb22d2f9 100644
--- a/core/src/main/scala/spark/ui/UIWorkloadGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
@@ -15,12 +15,14 @@
  * limitations under the License.
  */
 
-package spark.ui
+package org.apache.spark.ui
 
 import scala.util.Random
 
-import spark.SparkContext
-import spark.SparkContext._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.scheduler.cluster.SchedulingMode
+
 
 /**
  * Continuously generates jobs that expose various features of the WebUI (internal testing tool).
@@ -29,18 +31,29 @@ import spark.SparkContext._
  */
 private[spark] object UIWorkloadGenerator {
   val NUM_PARTITIONS = 100
-  val INTER_JOB_WAIT_MS = 500
+  val INTER_JOB_WAIT_MS = 5000
 
   def main(args: Array[String]) {
+    if (args.length < 2) {
+      println("usage: ./spark-class spark.ui.UIWorkloadGenerator [master] [FIFO|FAIR]")
+      System.exit(1)
+    }
     val master = args(0)
+    val schedulingMode = SchedulingMode.withName(args(1))
     val appName = "Spark UI Tester"
+
+    if (schedulingMode == SchedulingMode.FAIR) {
+      System.setProperty("spark.cluster.schedulingmode", "FAIR")
+    }
     val sc = new SparkContext(master, appName)
 
-    // NOTE: Right now there is no easy way for us to show spark.job.annotation for a given phase,
-    //       but we pass it here anyways since it will be useful once we do.
-    def setName(s: String) = {
-      sc.addLocalProperties("spark.job.annotation", s)
+    def setProperties(s: String) = {
+      if(schedulingMode == SchedulingMode.FAIR) {
+        sc.setLocalProperty("spark.scheduler.cluster.fair.pool", s)
+      }
+      sc.setLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION, s)
     }
+
     val baseData = sc.makeRDD(1 to NUM_PARTITIONS * 10, NUM_PARTITIONS)
     def nextFloat() = (new Random()).nextFloat()
 
@@ -73,14 +86,18 @@ private[spark] object UIWorkloadGenerator {
 
     while (true) {
       for ((desc, job) <- jobs) {
-        try {
-          setName(desc)
-          job()
-          println("Job funished: " + desc)
-        } catch {
-          case e: Exception =>
-            println("Job Failed: " + desc)
-        }
+        new Thread {
+          override def run() {
+            try {
+              setProperties(desc)
+              job()
+              println("Job funished: " + desc)
+            } catch {
+              case e: Exception =>
+                println("Job Failed: " + desc)
+            }
+          }
+        }.start
         Thread.sleep(INTER_JOB_WAIT_MS)
       }
     }
diff --git a/core/src/main/scala/spark/ui/env/EnvironmentUI.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentUI.scala
index 5ae7935ed4..c5bf2acc9e 100644
--- a/core/src/main/scala/spark/ui/env/EnvironmentUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentUI.scala
@@ -15,22 +15,21 @@
  * limitations under the License.
  */
 
-package spark.ui.env
+package org.apache.spark.ui.env
 
 import javax.servlet.http.HttpServletRequest
 
-import org.eclipse.jetty.server.Handler
-
 import scala.collection.JavaConversions._
 import scala.util.Properties
+import scala.xml.Node
 
-import spark.ui.JettyUtils._
-import spark.ui.UIUtils.headerSparkPage
-import spark.ui.Page.Environment
-import spark.SparkContext
-import spark.ui.UIUtils
+import org.eclipse.jetty.server.Handler
+
+import org.apache.spark.ui.JettyUtils._
+import org.apache.spark.ui.UIUtils
+import org.apache.spark.ui.Page.Environment
+import org.apache.spark.SparkContext
 
-import scala.xml.Node
 
 private[spark] class EnvironmentUI(sc: SparkContext) {
 
@@ -44,22 +43,24 @@ private[spark] class EnvironmentUI(sc: SparkContext) {
       ("Java Home", Properties.javaHome),
       ("Scala Version", Properties.versionString),
       ("Scala Home", Properties.scalaHome)
-    )
+    ).sorted
     def jvmRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr>
-    def jvmTable = UIUtils.listingTable(Seq("Name", "Value"), jvmRow, jvmInformation)
+    def jvmTable =
+      UIUtils.listingTable(Seq("Name", "Value"), jvmRow, jvmInformation, fixedWidth = true)
 
     val properties = System.getProperties.iterator.toSeq
-    val classPathProperty = properties
-        .filter{case (k, v) => k.contains("java.class.path")}
-        .headOption
-        .getOrElse("", "")
-    val sparkProperties = properties.filter(_._1.startsWith("spark"))
-    val otherProperties = properties.diff(sparkProperties :+ classPathProperty)
+    val classPathProperty = properties.find { case (k, v) =>
+      k.contains("java.class.path")
+    }.getOrElse(("", ""))
+    val sparkProperties = properties.filter(_._1.startsWith("spark")).sorted
+    val otherProperties = properties.diff(sparkProperties :+ classPathProperty).sorted
 
     val propertyHeaders = Seq("Name", "Value")
     def propertyRow(kv: (String, String)) = <tr><td>{kv._1}</td><td>{kv._2}</td></tr>
-    val sparkPropertyTable = UIUtils.listingTable(propertyHeaders, propertyRow, sparkProperties)
-    val otherPropertyTable = UIUtils.listingTable(propertyHeaders, propertyRow, otherProperties)
+    val sparkPropertyTable =
+      UIUtils.listingTable(propertyHeaders, propertyRow, sparkProperties, fixedWidth = true)
+    val otherPropertyTable =
+      UIUtils.listingTable(propertyHeaders, propertyRow, otherProperties, fixedWidth = true)
 
     val classPathEntries = classPathProperty._2
         .split(System.getProperty("path.separator", ":"))
@@ -67,20 +68,24 @@ private[spark] class EnvironmentUI(sc: SparkContext) {
         .map(e => (e, "System Classpath"))
     val addedJars = sc.addedJars.iterator.toSeq.map{case (path, time) => (path, "Added By User")}
     val addedFiles = sc.addedFiles.iterator.toSeq.map{case (path, time) => (path, "Added By User")}
-    val classPath = addedJars ++ addedFiles ++ classPathEntries
+    val classPath = (addedJars ++ addedFiles ++ classPathEntries).sorted
 
     val classPathHeaders = Seq("Resource", "Source")
     def classPathRow(data: (String, String)) = <tr><td>{data._1}</td><td>{data._2}</td></tr>
-    val classPathTable = UIUtils.listingTable(classPathHeaders, classPathRow, classPath)
+    val classPathTable =
+      UIUtils.listingTable(classPathHeaders, classPathRow, classPath, fixedWidth = true)
 
     val content =
       <span>
-        <h2>Runtime Information</h2> {jvmTable}
-        <h2>Spark Properties</h2> {sparkPropertyTable}
-        <h2>System Properties</h2> {otherPropertyTable}
-        <h2>Classpath Entries</h2> {classPathTable}
+        <h4>Runtime Information</h4> {jvmTable}
+        <h4>Spark Properties</h4>
+        {sparkPropertyTable}
+        <h4>System Properties</h4>
+        {otherPropertyTable}
+        <h4>Classpath Entries</h4>
+        {classPathTable}
       </span>
 
-    headerSparkPage(content, sc, "Environment", Environment)
+    UIUtils.headerSparkPage(content, sc, "Environment", Environment)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala
new file mode 100644
index 0000000000..6e56c22d04
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsUI.scala
@@ -0,0 +1,137 @@
+package org.apache.spark.ui.exec
+
+import javax.servlet.http.HttpServletRequest
+
+import scala.collection.mutable.{HashMap, HashSet}
+import scala.xml.Node
+
+import org.eclipse.jetty.server.Handler
+
+import org.apache.spark.{ExceptionFailure, Logging, SparkContext}
+import org.apache.spark.executor.TaskMetrics
+import org.apache.spark.scheduler.cluster.TaskInfo
+import org.apache.spark.scheduler.{SparkListenerTaskStart, SparkListenerTaskEnd, SparkListener}
+import org.apache.spark.ui.JettyUtils._
+import org.apache.spark.ui.Page.Executors
+import org.apache.spark.ui.UIUtils
+import org.apache.spark.util.Utils
+
+
+private[spark] class ExecutorsUI(val sc: SparkContext) {
+
+  private var _listener: Option[ExecutorsListener] = None
+  def listener = _listener.get
+
+  def start() {
+    _listener = Some(new ExecutorsListener)
+    sc.addSparkListener(listener)
+  }
+
+  def getHandlers = Seq[(String, Handler)](
+    ("/executors", (request: HttpServletRequest) => render(request))
+  )
+
+  def render(request: HttpServletRequest): Seq[Node] = {
+    val storageStatusList = sc.getExecutorStorageStatus
+
+    val maxMem = storageStatusList.map(_.maxMem).fold(0L)(_+_)
+    val memUsed = storageStatusList.map(_.memUsed()).fold(0L)(_+_)
+    val diskSpaceUsed = storageStatusList.flatMap(_.blocks.values.map(_.diskSize)).fold(0L)(_+_)
+
+    val execHead = Seq("Executor ID", "Address", "RDD blocks", "Memory used", "Disk used",
+      "Active tasks", "Failed tasks", "Complete tasks", "Total tasks")
+
+    def execRow(kv: Seq[String]) = {
+      <tr>
+        <td>{kv(0)}</td>
+        <td>{kv(1)}</td>
+        <td>{kv(2)}</td>
+        <td sorttable_customkey={kv(3)}>
+          {Utils.bytesToString(kv(3).toLong)} / {Utils.bytesToString(kv(4).toLong)}
+        </td>
+        <td sorttable_customkey={kv(5)}>
+          {Utils.bytesToString(kv(5).toLong)}
+        </td>
+        <td>{kv(6)}</td>
+        <td>{kv(7)}</td>
+        <td>{kv(8)}</td>
+        <td>{kv(9)}</td>
+      </tr>
+    }
+
+    val execInfo = for (b <- 0 until storageStatusList.size) yield getExecInfo(b)
+    val execTable = UIUtils.listingTable(execHead, execRow, execInfo)
+
+    val content =
+      <div class="row-fluid">
+        <div class="span12">
+          <ul class="unstyled">
+            <li><strong>Memory:</strong>
+              {Utils.bytesToString(memUsed)} Used
+              ({Utils.bytesToString(maxMem)} Total) </li>
+            <li><strong>Disk:</strong> {Utils.bytesToString(diskSpaceUsed)} Used </li>
+          </ul>
+        </div>
+      </div>
+      <div class = "row">
+        <div class="span12">
+          {execTable}
+        </div>
+      </div>;
+
+    UIUtils.headerSparkPage(content, sc, "Executors (" + execInfo.size + ")", Executors)
+  }
+
+  def getExecInfo(a: Int): Seq[String] = {
+    val execId = sc.getExecutorStorageStatus(a).blockManagerId.executorId
+    val hostPort = sc.getExecutorStorageStatus(a).blockManagerId.hostPort
+    val rddBlocks = sc.getExecutorStorageStatus(a).blocks.size.toString
+    val memUsed = sc.getExecutorStorageStatus(a).memUsed().toString
+    val maxMem = sc.getExecutorStorageStatus(a).maxMem.toString
+    val diskUsed = sc.getExecutorStorageStatus(a).diskUsed().toString
+    val activeTasks = listener.executorToTasksActive.get(a.toString).map(l => l.size).getOrElse(0)
+    val failedTasks = listener.executorToTasksFailed.getOrElse(a.toString, 0)
+    val completedTasks = listener.executorToTasksComplete.getOrElse(a.toString, 0)
+    val totalTasks = activeTasks + failedTasks + completedTasks
+
+    Seq(
+      execId,
+      hostPort,
+      rddBlocks,
+      memUsed,
+      maxMem,
+      diskUsed,
+      activeTasks.toString,
+      failedTasks.toString,
+      completedTasks.toString,
+      totalTasks.toString
+    )
+  }
+
+  private[spark] class ExecutorsListener extends SparkListener with Logging {
+    val executorToTasksActive = HashMap[String, HashSet[TaskInfo]]()
+    val executorToTasksComplete = HashMap[String, Int]()
+    val executorToTasksFailed = HashMap[String, Int]()
+
+    override def onTaskStart(taskStart: SparkListenerTaskStart) {
+      val eid = taskStart.taskInfo.executorId
+      val activeTasks = executorToTasksActive.getOrElseUpdate(eid, new HashSet[TaskInfo]())
+      activeTasks += taskStart.taskInfo
+    }
+
+    override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
+      val eid = taskEnd.taskInfo.executorId
+      val activeTasks = executorToTasksActive.getOrElseUpdate(eid, new HashSet[TaskInfo]())
+      activeTasks -= taskEnd.taskInfo
+      val (failureInfo, metrics): (Option[ExceptionFailure], Option[TaskMetrics]) =
+        taskEnd.reason match {
+          case e: ExceptionFailure =>
+            executorToTasksFailed(eid) = executorToTasksFailed.getOrElse(eid, 0) + 1
+            (Some(e), e.metrics)
+          case _ =>
+            executorToTasksComplete(eid) = executorToTasksComplete.getOrElse(eid, 0) + 1
+            (None, Option(taskEnd.taskMetrics))
+        }
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/IndexPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/IndexPage.scala
new file mode 100644
index 0000000000..3b428effaf
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/IndexPage.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.{NodeSeq, Node}
+
+import org.apache.spark.scheduler.cluster.SchedulingMode
+import org.apache.spark.ui.Page._
+import org.apache.spark.ui.UIUtils._
+
+
+/** Page showing list of all ongoing and recently finished stages and pools*/
+private[spark] class IndexPage(parent: JobProgressUI) {
+  def listener = parent.listener
+
+  def render(request: HttpServletRequest): Seq[Node] = {
+    listener.synchronized {
+      val activeStages = listener.activeStages.toSeq
+      val completedStages = listener.completedStages.reverse.toSeq
+      val failedStages = listener.failedStages.reverse.toSeq
+      val now = System.currentTimeMillis()
+
+      var activeTime = 0L
+      for (tasks <- listener.stageToTasksActive.values; t <- tasks) {
+        activeTime += t.timeRunning(now)
+      }
+
+      val activeStagesTable = new StageTable(activeStages.sortBy(_.submissionTime).reverse, parent)
+      val completedStagesTable = new StageTable(completedStages.sortBy(_.submissionTime).reverse, parent)
+      val failedStagesTable = new StageTable(failedStages.sortBy(_.submissionTime).reverse, parent)
+
+      val pools = listener.sc.getAllPools
+      val poolTable = new PoolTable(pools, listener)
+      val summary: NodeSeq =
+       <div>
+         <ul class="unstyled">
+           <li>
+             <strong>Total Duration: </strong>
+             {parent.formatDuration(now - listener.sc.startTime)}
+           </li>
+           <li><strong>Scheduling Mode:</strong> {parent.sc.getSchedulingMode}</li>
+           <li>
+             <a href="#active"><strong>Active Stages:</strong></a>
+             {activeStages.size}
+           </li>
+           <li>
+             <a href="#completed"><strong>Completed Stages:</strong></a>
+             {completedStages.size}
+           </li>
+           <li>
+             <a href="#failed"><strong>Failed Stages:</strong></a>
+             {failedStages.size}
+           </li>
+         </ul>
+       </div>
+
+      val content = summary ++
+        {if (listener.sc.getSchedulingMode == SchedulingMode.FAIR) {
+           <h4>{pools.size} Fair Scheduler Pools</h4> ++ poolTable.toNodeSeq
+        } else {
+          Seq()
+        }} ++
+        <h4 id="active">Active Stages ({activeStages.size})</h4> ++
+        activeStagesTable.toNodeSeq++
+        <h4 id="completed">Completed Stages ({completedStages.size})</h4> ++
+        completedStagesTable.toNodeSeq++
+        <h4 id ="failed">Failed Stages ({failedStages.size})</h4> ++
+        failedStagesTable.toNodeSeq
+
+      headerSparkPage(content, parent.sc, "Spark Stages", Stages)
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
new file mode 100644
index 0000000000..86e0af0399
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala
@@ -0,0 +1,156 @@
+package org.apache.spark.ui.jobs
+
+import scala.Seq
+import scala.collection.mutable.{ListBuffer, HashMap, HashSet}
+
+import org.apache.spark.{ExceptionFailure, SparkContext, Success}
+import org.apache.spark.scheduler._
+import org.apache.spark.scheduler.cluster.TaskInfo
+import org.apache.spark.executor.TaskMetrics
+import collection.mutable
+
+/**
+ * Tracks task-level information to be displayed in the UI.
+ *
+ * All access to the data structures in this class must be synchronized on the
+ * class, since the UI thread and the DAGScheduler event loop may otherwise
+ * be reading/updating the internal data structures concurrently.
+ */
+private[spark] class JobProgressListener(val sc: SparkContext) extends SparkListener {
+  // How many stages to remember
+  val RETAINED_STAGES = System.getProperty("spark.ui.retained_stages", "1000").toInt
+  val DEFAULT_POOL_NAME = "default"
+
+  val stageToPool = new HashMap[Stage, String]()
+  val stageToDescription = new HashMap[Stage, String]()
+  val poolToActiveStages = new HashMap[String, HashSet[Stage]]()
+
+  val activeStages = HashSet[Stage]()
+  val completedStages = ListBuffer[Stage]()
+  val failedStages = ListBuffer[Stage]()
+
+  // Total metrics reflect metrics only for completed tasks
+  var totalTime = 0L
+  var totalShuffleRead = 0L
+  var totalShuffleWrite = 0L
+
+  val stageToTime = HashMap[Int, Long]()
+  val stageToShuffleRead = HashMap[Int, Long]()
+  val stageToShuffleWrite = HashMap[Int, Long]()
+  val stageToTasksActive = HashMap[Int, HashSet[TaskInfo]]()
+  val stageToTasksComplete = HashMap[Int, Int]()
+  val stageToTasksFailed = HashMap[Int, Int]()
+  val stageToTaskInfos =
+    HashMap[Int, HashSet[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]]()
+
+  override def onJobStart(jobStart: SparkListenerJobStart) {}
+
+  override def onStageCompleted(stageCompleted: StageCompleted) = synchronized {
+    val stage = stageCompleted.stageInfo.stage
+    poolToActiveStages(stageToPool(stage)) -= stage
+    activeStages -= stage
+    completedStages += stage
+    trimIfNecessary(completedStages)
+  }
+
+  /** If stages is too large, remove and garbage collect old stages */
+  def trimIfNecessary(stages: ListBuffer[Stage]) = synchronized {
+    if (stages.size > RETAINED_STAGES) {
+      val toRemove = RETAINED_STAGES / 10
+      stages.takeRight(toRemove).foreach( s => {
+        stageToTaskInfos.remove(s.id)
+        stageToTime.remove(s.id)
+        stageToShuffleRead.remove(s.id)
+        stageToShuffleWrite.remove(s.id)
+        stageToTasksActive.remove(s.id)
+        stageToTasksComplete.remove(s.id)
+        stageToTasksFailed.remove(s.id)
+        stageToPool.remove(s)
+        if (stageToDescription.contains(s)) {stageToDescription.remove(s)}
+      })
+      stages.trimEnd(toRemove)
+    }
+  }
+
+  /** For FIFO, all stages are contained by "default" pool but "default" pool here is meaningless */
+  override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) = synchronized {
+    val stage = stageSubmitted.stage
+    activeStages += stage
+
+    val poolName = Option(stageSubmitted.properties).map {
+      p => p.getProperty("spark.scheduler.cluster.fair.pool", DEFAULT_POOL_NAME)
+    }.getOrElse(DEFAULT_POOL_NAME)
+    stageToPool(stage) = poolName
+
+    val description = Option(stageSubmitted.properties).flatMap {
+      p => Option(p.getProperty(SparkContext.SPARK_JOB_DESCRIPTION))
+    }
+    description.map(d => stageToDescription(stage) = d)
+
+    val stages = poolToActiveStages.getOrElseUpdate(poolName, new HashSet[Stage]())
+    stages += stage
+  }
+  
+  override def onTaskStart(taskStart: SparkListenerTaskStart) = synchronized {
+    val sid = taskStart.task.stageId
+    val tasksActive = stageToTasksActive.getOrElseUpdate(sid, new HashSet[TaskInfo]())
+    tasksActive += taskStart.taskInfo
+    val taskList = stageToTaskInfos.getOrElse(
+      sid, HashSet[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]())
+    taskList += ((taskStart.taskInfo, None, None))
+    stageToTaskInfos(sid) = taskList
+  }
+ 
+  override def onTaskEnd(taskEnd: SparkListenerTaskEnd) = synchronized {
+    val sid = taskEnd.task.stageId
+    val tasksActive = stageToTasksActive.getOrElseUpdate(sid, new HashSet[TaskInfo]())
+    tasksActive -= taskEnd.taskInfo
+    val (failureInfo, metrics): (Option[ExceptionFailure], Option[TaskMetrics]) =
+      taskEnd.reason match {
+        case e: ExceptionFailure =>
+          stageToTasksFailed(sid) = stageToTasksFailed.getOrElse(sid, 0) + 1
+          (Some(e), e.metrics)
+        case _ =>
+          stageToTasksComplete(sid) = stageToTasksComplete.getOrElse(sid, 0) + 1
+          (None, Option(taskEnd.taskMetrics))
+      }
+
+    stageToTime.getOrElseUpdate(sid, 0L)
+    val time = metrics.map(m => m.executorRunTime).getOrElse(0)
+    stageToTime(sid) += time
+    totalTime += time
+
+    stageToShuffleRead.getOrElseUpdate(sid, 0L)
+    val shuffleRead = metrics.flatMap(m => m.shuffleReadMetrics).map(s =>
+      s.remoteBytesRead).getOrElse(0L)
+    stageToShuffleRead(sid) += shuffleRead
+    totalShuffleRead += shuffleRead
+
+    stageToShuffleWrite.getOrElseUpdate(sid, 0L)
+    val shuffleWrite = metrics.flatMap(m => m.shuffleWriteMetrics).map(s =>
+      s.shuffleBytesWritten).getOrElse(0L)
+    stageToShuffleWrite(sid) += shuffleWrite
+    totalShuffleWrite += shuffleWrite
+
+    val taskList = stageToTaskInfos.getOrElse(
+      sid, HashSet[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]())
+    taskList -= ((taskEnd.taskInfo, None, None))
+    taskList += ((taskEnd.taskInfo, metrics, failureInfo))
+    stageToTaskInfos(sid) = taskList
+  }
+
+  override def onJobEnd(jobEnd: SparkListenerJobEnd) = synchronized {
+    jobEnd match {
+      case end: SparkListenerJobEnd =>
+        end.jobResult match {
+          case JobFailed(ex, Some(stage)) =>
+            activeStages -= stage
+            poolToActiveStages(stageToPool(stage)) -= stage
+            failedStages += stage
+            trimIfNecessary(failedStages)
+          case _ =>
+        }
+      case _ =>
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala
new file mode 100644
index 0000000000..6aecef5120
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressUI.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import akka.util.Duration
+
+import java.text.SimpleDateFormat
+
+import javax.servlet.http.HttpServletRequest
+
+import org.eclipse.jetty.server.Handler
+
+import scala.Seq
+import scala.collection.mutable.{HashSet, ListBuffer, HashMap, ArrayBuffer}
+
+import org.apache.spark.ui.JettyUtils._
+import org.apache.spark.{ExceptionFailure, SparkContext, Success}
+import org.apache.spark.scheduler._
+import collection.mutable
+import org.apache.spark.scheduler.cluster.SchedulingMode
+import org.apache.spark.scheduler.cluster.SchedulingMode.SchedulingMode
+import org.apache.spark.util.Utils
+
+/** Web UI showing progress status of all jobs in the given SparkContext. */
+private[spark] class JobProgressUI(val sc: SparkContext) {
+  private var _listener: Option[JobProgressListener] = None
+  def listener = _listener.get
+  val dateFmt = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
+
+  private val indexPage = new IndexPage(this)
+  private val stagePage = new StagePage(this)
+  private val poolPage = new PoolPage(this)
+
+  def start() {
+    _listener = Some(new JobProgressListener(sc))
+    sc.addSparkListener(listener)
+  }
+
+  def formatDuration(ms: Long) = Utils.msDurationToString(ms)
+
+  def getHandlers = Seq[(String, Handler)](
+    ("/stages/stage", (request: HttpServletRequest) => stagePage.render(request)),
+    ("/stages/pool", (request: HttpServletRequest) => poolPage.render(request)),
+    ("/stages", (request: HttpServletRequest) => indexPage.render(request))
+  )
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
new file mode 100644
index 0000000000..ce92b6932b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolPage.scala
@@ -0,0 +1,32 @@
+package org.apache.spark.ui.jobs
+
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.{NodeSeq, Node}
+import scala.collection.mutable.HashSet
+
+import org.apache.spark.scheduler.Stage
+import org.apache.spark.ui.UIUtils._
+import org.apache.spark.ui.Page._
+
+/** Page showing specific pool details */
+private[spark] class PoolPage(parent: JobProgressUI) {
+  def listener = parent.listener
+
+  def render(request: HttpServletRequest): Seq[Node] = {
+    listener.synchronized {
+      val poolName = request.getParameter("poolname")
+      val poolToActiveStages = listener.poolToActiveStages
+      val activeStages = poolToActiveStages.get(poolName).toSeq.flatten
+      val activeStagesTable = new StageTable(activeStages.sortBy(_.submissionTime).reverse, parent)
+
+      val pool = listener.sc.getPoolForName(poolName).get
+      val poolTable = new PoolTable(Seq(pool), listener)
+
+      val content = <h4>Summary </h4> ++ poolTable.toNodeSeq() ++
+                    <h4>{activeStages.size} Active Stages</h4> ++ activeStagesTable.toNodeSeq()
+
+      headerSparkPage(content, parent.sc, "Fair Scheduler Pool: " + poolName, Stages)
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
new file mode 100644
index 0000000000..f31465e59d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/PoolTable.scala
@@ -0,0 +1,55 @@
+package org.apache.spark.ui.jobs
+
+import scala.collection.mutable.HashMap
+import scala.collection.mutable.HashSet
+import scala.xml.Node
+
+import org.apache.spark.scheduler.Stage
+import org.apache.spark.scheduler.cluster.Schedulable
+
+/** Table showing list of pools */
+private[spark] class PoolTable(pools: Seq[Schedulable], listener: JobProgressListener) {
+
+  var poolToActiveStages: HashMap[String, HashSet[Stage]] = listener.poolToActiveStages
+
+  def toNodeSeq(): Seq[Node] = {
+    listener.synchronized {
+      poolTable(poolRow, pools)
+    }
+  }
+
+  private def poolTable(makeRow: (Schedulable, HashMap[String, HashSet[Stage]]) => Seq[Node],
+    rows: Seq[Schedulable]
+    ): Seq[Node] = {
+    <table class="table table-bordered table-striped table-condensed sortable table-fixed">
+      <thead>
+        <th>Pool Name</th>
+        <th>Minimum Share</th>
+        <th>Pool Weight</th>
+        <th>Active Stages</th>
+        <th>Running Tasks</th>
+        <th>SchedulingMode</th>
+      </thead>
+      <tbody>
+        {rows.map(r => makeRow(r, poolToActiveStages))}
+      </tbody>
+    </table>
+  }
+
+  private def poolRow(p: Schedulable, poolToActiveStages: HashMap[String, HashSet[Stage]])
+    : Seq[Node] = {
+    val activeStages = poolToActiveStages.get(p.name) match {
+      case Some(stages) => stages.size
+      case None => 0
+    }
+    <tr>
+      <td><a href={"/stages/pool?poolname=%s".format(p.name)}>{p.name}</a></td>
+      <td>{p.minShare}</td>
+      <td>{p.weight}</td>
+      <td>{activeStages}</td>
+      <td>{p.runningTasks}</td>
+      <td>{p.schedulingMode}</td>
+    </tr>
+  }
+}
+
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
new file mode 100644
index 0000000000..a9969ab1c0
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -0,0 +1,183 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.jobs
+
+import java.util.Date
+
+import javax.servlet.http.HttpServletRequest
+
+import scala.xml.Node
+
+import org.apache.spark.ui.UIUtils._
+import org.apache.spark.ui.Page._
+import org.apache.spark.util.{Utils, Distribution}
+import org.apache.spark.{ExceptionFailure}
+import org.apache.spark.scheduler.cluster.TaskInfo
+import org.apache.spark.executor.TaskMetrics
+
+/** Page showing statistics and task list for a given stage */
+private[spark] class StagePage(parent: JobProgressUI) {
+  def listener = parent.listener
+  val dateFmt = parent.dateFmt
+
+  def render(request: HttpServletRequest): Seq[Node] = {
+    listener.synchronized {
+      val stageId = request.getParameter("id").toInt
+      val now = System.currentTimeMillis()
+
+      if (!listener.stageToTaskInfos.contains(stageId)) {
+        val content =
+          <div>
+            <h4>Summary Metrics</h4> No tasks have started yet
+            <h4>Tasks</h4> No tasks have started yet
+          </div>
+        return headerSparkPage(content, parent.sc, "Details for Stage %s".format(stageId), Stages)
+      }
+
+      val tasks = listener.stageToTaskInfos(stageId).toSeq.sortBy(_._1.launchTime)
+
+      val numCompleted = tasks.count(_._1.finished)
+      val shuffleReadBytes = listener.stageToShuffleRead.getOrElse(stageId, 0L)
+      val hasShuffleRead = shuffleReadBytes > 0
+      val shuffleWriteBytes = listener.stageToShuffleWrite.getOrElse(stageId, 0L)
+      val hasShuffleWrite = shuffleWriteBytes > 0
+
+      var activeTime = 0L
+      listener.stageToTasksActive(stageId).foreach(activeTime += _.timeRunning(now))
+
+      val summary =
+        <div>
+          <ul class="unstyled">
+            <li>
+              <strong>CPU time: </strong>
+              {parent.formatDuration(listener.stageToTime.getOrElse(stageId, 0L) + activeTime)}
+            </li>
+            {if (hasShuffleRead)
+              <li>
+                <strong>Shuffle read: </strong>
+                {Utils.bytesToString(shuffleReadBytes)}
+              </li>
+            }
+            {if (hasShuffleWrite)
+              <li>
+                <strong>Shuffle write: </strong>
+                {Utils.bytesToString(shuffleWriteBytes)}
+              </li>
+            }
+          </ul>
+        </div>
+
+      val taskHeaders: Seq[String] =
+        Seq("Task ID", "Status", "Locality Level", "Executor", "Launch Time", "Duration") ++
+        Seq("GC Time") ++
+        {if (hasShuffleRead) Seq("Shuffle Read")  else Nil} ++
+        {if (hasShuffleWrite) Seq("Shuffle Write") else Nil} ++
+        Seq("Errors")
+
+      val taskTable = listingTable(taskHeaders, taskRow(hasShuffleRead, hasShuffleWrite), tasks)
+
+      // Excludes tasks which failed and have incomplete metrics
+      val validTasks = tasks.filter(t => t._1.status == "SUCCESS" && (t._2.isDefined))
+
+      val summaryTable: Option[Seq[Node]] =
+        if (validTasks.size == 0) {
+          None
+        }
+        else {
+          val serviceTimes = validTasks.map{case (info, metrics, exception) =>
+            metrics.get.executorRunTime.toDouble}
+          val serviceQuantiles = "Duration" +: Distribution(serviceTimes).get.getQuantiles().map(
+            ms => parent.formatDuration(ms.toLong))
+
+          def getQuantileCols(data: Seq[Double]) =
+            Distribution(data).get.getQuantiles().map(d => Utils.bytesToString(d.toLong))
+
+          val shuffleReadSizes = validTasks.map {
+            case(info, metrics, exception) =>
+              metrics.get.shuffleReadMetrics.map(_.remoteBytesRead).getOrElse(0L).toDouble
+          }
+          val shuffleReadQuantiles = "Shuffle Read (Remote)" +: getQuantileCols(shuffleReadSizes)
+
+          val shuffleWriteSizes = validTasks.map {
+            case(info, metrics, exception) =>
+              metrics.get.shuffleWriteMetrics.map(_.shuffleBytesWritten).getOrElse(0L).toDouble
+          }
+          val shuffleWriteQuantiles = "Shuffle Write" +: getQuantileCols(shuffleWriteSizes)
+
+          val listings: Seq[Seq[String]] = Seq(serviceQuantiles,
+            if (hasShuffleRead) shuffleReadQuantiles else Nil,
+            if (hasShuffleWrite) shuffleWriteQuantiles else Nil)
+
+          val quantileHeaders = Seq("Metric", "Min", "25th percentile",
+            "Median", "75th percentile", "Max")
+          def quantileRow(data: Seq[String]): Seq[Node] = <tr> {data.map(d => <td>{d}</td>)} </tr>
+          Some(listingTable(quantileHeaders, quantileRow, listings, fixedWidth = true))
+        }
+
+      val content =
+        summary ++
+        <h4>Summary Metrics for {numCompleted} Completed Tasks</h4> ++
+        <div>{summaryTable.getOrElse("No tasks have reported metrics yet.")}</div> ++
+        <h4>Tasks</h4> ++ taskTable;
+
+      headerSparkPage(content, parent.sc, "Details for Stage %d".format(stageId), Stages)
+    }
+  }
+
+
+  def taskRow(shuffleRead: Boolean, shuffleWrite: Boolean)
+             (taskData: (TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])): Seq[Node] = {
+    def fmtStackTrace(trace: Seq[StackTraceElement]): Seq[Node] =
+      trace.map(e => <span style="display:block;">{e.toString}</span>)
+    val (info, metrics, exception) = taskData
+
+    val duration = if (info.status == "RUNNING") info.timeRunning(System.currentTimeMillis())
+      else metrics.map(m => m.executorRunTime).getOrElse(1)
+    val formatDuration = if (info.status == "RUNNING") parent.formatDuration(duration)
+      else metrics.map(m => parent.formatDuration(m.executorRunTime)).getOrElse("")
+    val gcTime = metrics.map(m => m.jvmGCTime).getOrElse(0L)
+
+    <tr>
+      <td>{info.taskId}</td>
+      <td>{info.status}</td>
+      <td>{info.taskLocality}</td>
+      <td>{info.host}</td>
+      <td>{dateFmt.format(new Date(info.launchTime))}</td>
+      <td sorttable_customkey={duration.toString}>
+        {formatDuration}
+      </td>
+      <td sorttable_customkey={gcTime.toString}>
+        {if (gcTime > 0) parent.formatDuration(gcTime) else ""}
+      </td>
+      {if (shuffleRead) {
+        <td>{metrics.flatMap{m => m.shuffleReadMetrics}.map{s =>
+          Utils.bytesToString(s.remoteBytesRead)}.getOrElse("")}</td>
+      }}
+      {if (shuffleWrite) {
+        <td>{metrics.flatMap{m => m.shuffleWriteMetrics}.map{s =>
+          Utils.bytesToString(s.shuffleBytesWritten)}.getOrElse("")}</td>
+      }}
+      <td>{exception.map(e =>
+        <span>
+          {e.className} ({e.description})<br/>
+          {fmtStackTrace(e.stackTrace)}
+        </span>).getOrElse("")}
+      </td>
+    </tr>
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
new file mode 100644
index 0000000000..71e58a977e
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StageTable.scala
@@ -0,0 +1,107 @@
+package org.apache.spark.ui.jobs
+
+import java.util.Date
+
+import scala.xml.Node
+import scala.collection.mutable.HashSet
+
+import org.apache.spark.scheduler.cluster.{SchedulingMode, TaskInfo}
+import org.apache.spark.scheduler.Stage
+import org.apache.spark.util.Utils
+
+
+/** Page showing list of all ongoing and recently finished stages */
+private[spark] class StageTable(val stages: Seq[Stage], val parent: JobProgressUI) {
+
+  val listener = parent.listener
+  val dateFmt = parent.dateFmt
+  val isFairScheduler = listener.sc.getSchedulingMode == SchedulingMode.FAIR
+
+  def toNodeSeq(): Seq[Node] = {
+    listener.synchronized {
+      stageTable(stageRow, stages)
+    }
+  }
+
+  /** Special table which merges two header cells. */
+  private def stageTable[T](makeRow: T => Seq[Node], rows: Seq[T]): Seq[Node] = {
+    <table class="table table-bordered table-striped table-condensed sortable">
+      <thead>
+        <th>Stage Id</th>
+        {if (isFairScheduler) {<th>Pool Name</th>} else {}}
+        <th>Description</th>
+        <th>Submitted</th>
+        <th>Duration</th>
+        <th>Tasks: Succeeded/Total</th>
+        <th>Shuffle Read</th>
+        <th>Shuffle Write</th>
+      </thead>
+      <tbody>
+        {rows.map(r => makeRow(r))}
+      </tbody>
+    </table>
+  }
+
+  private def makeProgressBar(started: Int, completed: Int, failed: String, total: Int): Seq[Node] = {
+    val completeWidth = "width: %s%%".format((completed.toDouble/total)*100)
+    val startWidth = "width: %s%%".format((started.toDouble/total)*100)
+
+    <div class="progress">
+      <span style="text-align:center; position:absolute; width:100%;">
+        {completed}/{total} {failed}
+      </span>
+      <div class="bar bar-completed" style={completeWidth}></div>
+      <div class="bar bar-running" style={startWidth}></div>
+    </div>
+  }
+
+
+  private def stageRow(s: Stage): Seq[Node] = {
+    val submissionTime = s.submissionTime match {
+      case Some(t) => dateFmt.format(new Date(t))
+      case None => "Unknown"
+    }
+
+    val shuffleRead = listener.stageToShuffleRead.getOrElse(s.id, 0L) match {
+      case 0 => ""
+      case b => Utils.bytesToString(b)
+    }
+    val shuffleWrite = listener.stageToShuffleWrite.getOrElse(s.id, 0L) match {
+      case 0 => ""
+      case b => Utils.bytesToString(b)
+    }
+
+    val startedTasks = listener.stageToTasksActive.getOrElse(s.id, HashSet[TaskInfo]()).size
+    val completedTasks = listener.stageToTasksComplete.getOrElse(s.id, 0)
+    val failedTasks = listener.stageToTasksFailed.getOrElse(s.id, 0) match {
+        case f if f > 0 => "(%s failed)".format(f)
+        case _ => ""
+    }
+    val totalTasks = s.numPartitions
+
+    val poolName = listener.stageToPool.get(s)
+
+    val nameLink = <a href={"/stages/stage?id=%s".format(s.id)}>{s.name}</a>
+    val description = listener.stageToDescription.get(s)
+      .map(d => <div><em>{d}</em></div><div>{nameLink}</div>).getOrElse(nameLink)
+    val finishTime = s.completionTime.getOrElse(System.currentTimeMillis())
+    val duration =  s.submissionTime.map(t => finishTime - t)
+
+    <tr>
+      <td>{s.id}</td>
+      {if (isFairScheduler) {
+        <td><a href={"/stages/pool?poolname=%s".format(poolName.get)}>{poolName.get}</a></td>}
+      }
+      <td>{description}</td>
+      <td valign="middle">{submissionTime}</td>
+      <td sorttable_customkey={duration.getOrElse(-1).toString}>
+        {duration.map(d => parent.formatDuration(d)).getOrElse("Unknown")}
+      </td>
+      <td class="progress-cell">
+        {makeProgressBar(startedTasks, completedTasks, failedTasks, totalTasks)}
+      </td>
+      <td>{shuffleRead}</td>
+      <td>{shuffleWrite}</td>
+    </tr>
+  }
+}
diff --git a/core/src/main/scala/spark/ui/storage/BlockManagerUI.scala b/core/src/main/scala/org/apache/spark/ui/storage/BlockManagerUI.scala
index 49ed069c75..1d633d374a 100644
--- a/core/src/main/scala/spark/ui/storage/BlockManagerUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/BlockManagerUI.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.ui.storage
+package org.apache.spark.ui.storage
 
 import akka.util.Duration
 
@@ -23,8 +23,8 @@ import javax.servlet.http.HttpServletRequest
 
 import org.eclipse.jetty.server.Handler
 
-import spark.{Logging, SparkContext}
-import spark.ui.JettyUtils._
+import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.ui.JettyUtils._
 
 /** Web UI showing storage status of all RDD's in the given SparkContext. */
 private[spark] class BlockManagerUI(val sc: SparkContext) extends Logging {
diff --git a/core/src/main/scala/spark/ui/storage/IndexPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/IndexPage.scala
index 4e0360d19a..c3ec907370 100644
--- a/core/src/main/scala/spark/ui/storage/IndexPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/IndexPage.scala
@@ -15,16 +15,16 @@
  * limitations under the License.
  */
 
-package spark.ui.storage
+package org.apache.spark.ui.storage
 
 import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
-import spark.storage.{RDDInfo, StorageUtils}
-import spark.Utils
-import spark.ui.UIUtils._
-import spark.ui.Page._
+import org.apache.spark.storage.{RDDInfo, StorageUtils}
+import org.apache.spark.ui.UIUtils._
+import org.apache.spark.ui.Page._
+import org.apache.spark.util.Utils
 
 /** Page showing list of RDD's currently stored in the cluster */
 private[spark] class IndexPage(parent: BlockManagerUI) {
@@ -33,34 +33,18 @@ private[spark] class IndexPage(parent: BlockManagerUI) {
   def render(request: HttpServletRequest): Seq[Node] = {
     val storageStatusList = sc.getExecutorStorageStatus
     // Calculate macro-level statistics
-    val maxMem = storageStatusList.map(_.maxMem).reduce(_+_)
-    val remainingMem = storageStatusList.map(_.memRemaining).reduce(_+_)
-    val diskSpaceUsed = storageStatusList.flatMap(_.blocks.values.map(_.diskSize))
-      .reduceOption(_+_).getOrElse(0L)
 
     val rddHeaders = Seq(
       "RDD Name",
       "Storage Level",
       "Cached Partitions",
-      "Fraction Partitions Cached",
+      "Fraction Cached",
       "Size in Memory",
       "Size on Disk")
     val rdds = StorageUtils.rddInfoFromStorageStatus(storageStatusList, sc)
-    val rddTable = listingTable(rddHeaders, rddRow, rdds)
+    val content = listingTable(rddHeaders, rddRow, rdds)
 
-    val content =
-      <div class="row">
-        <div class="span12">
-          <ul class="unstyled">
-            <li><strong>Memory:</strong>
-              {Utils.memoryBytesToString(maxMem - remainingMem)} Used
-              ({Utils.memoryBytesToString(remainingMem)} Available) </li>
-            <li><strong>Disk:</strong> {Utils.memoryBytesToString(diskSpaceUsed)} Used </li>
-          </ul>
-        </div>
-      </div> ++ {rddTable};
-
-    headerSparkPage(content, parent.sc, "Spark Storage ", Storage)
+    headerSparkPage(content, parent.sc, "Storage ", Storage)
   }
 
   def rddRow(rdd: RDDInfo): Seq[Node] = {
@@ -73,9 +57,9 @@ private[spark] class IndexPage(parent: BlockManagerUI) {
       <td>{rdd.storageLevel.description}
       </td>
       <td>{rdd.numCachedPartitions}</td>
-      <td>{rdd.numCachedPartitions / rdd.numPartitions.toDouble}</td>
-      <td>{Utils.memoryBytesToString(rdd.memSize)}</td>
-      <td>{Utils.memoryBytesToString(rdd.diskSize)}</td>
+      <td>{"%.0f%%".format(rdd.numCachedPartitions * 100.0 / rdd.numPartitions)}</td>
+      <td>{Utils.bytesToString(rdd.memSize)}</td>
+      <td>{Utils.bytesToString(rdd.diskSize)}</td>
     </tr>
   }
 }
diff --git a/core/src/main/scala/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index 003be54ad8..43c1257677 100644
--- a/core/src/main/scala/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -15,17 +15,18 @@
  * limitations under the License.
  */
 
-package spark.ui.storage
+package org.apache.spark.ui.storage
 
 import javax.servlet.http.HttpServletRequest
 
 import scala.xml.Node
 
-import spark.storage.{StorageStatus, StorageUtils}
-import spark.ui.UIUtils._
-import spark.Utils
-import spark.storage.BlockManagerMasterActor.BlockStatus
-import spark.ui.Page._
+import org.apache.spark.storage.{StorageStatus, StorageUtils}
+import org.apache.spark.storage.BlockManagerMasterActor.BlockStatus
+import org.apache.spark.ui.UIUtils._
+import org.apache.spark.ui.Page._
+import org.apache.spark.util.Utils
+
 
 /** Page showing storage details for a given RDD */
 private[spark] class RDDPage(parent: BlockManagerUI) {
@@ -44,7 +45,7 @@ private[spark] class RDDPage(parent: BlockManagerUI) {
     val workerTable = listingTable(workerHeaders, workerRow, workers)
 
     val blockHeaders = Seq("Block Name", "Storage Level", "Size in Memory", "Size on Disk",
-      "Locations")
+      "Executors")
 
     val blockStatuses = filteredStorageStatusList.flatMap(_.blocks).toArray.sortWith(_._1 < _._1)
     val blockLocations = StorageUtils.blockLocationsFromStorageStatus(filteredStorageStatusList)
@@ -54,7 +55,7 @@ private[spark] class RDDPage(parent: BlockManagerUI) {
     val blockTable = listingTable(blockHeaders, blockRow, blocks)
 
     val content =
-      <div class="row">
+      <div class="row-fluid">
         <div class="span12">
           <ul class="unstyled">
             <li>
@@ -71,30 +72,31 @@ private[spark] class RDDPage(parent: BlockManagerUI) {
             </li>
             <li>
               <strong>Memory Size:</strong>
-              {Utils.memoryBytesToString(rddInfo.memSize)}
+              {Utils.bytesToString(rddInfo.memSize)}
             </li>
             <li>
               <strong>Disk Size:</strong>
-              {Utils.memoryBytesToString(rddInfo.diskSize)}
+              {Utils.bytesToString(rddInfo.diskSize)}
             </li>
           </ul>
         </div>
       </div>
-      <hr/>
-      <div class="row">
+
+      <div class="row-fluid">
         <div class="span12">
+          <h4> Data Distribution on {workers.size} Executors </h4>
           {workerTable}
         </div>
       </div>
-      <hr/>
-      <div class="row">
+
+      <div class="row-fluid">
         <div class="span12">
-          <h3> RDD Summary </h3>
+          <h4> {blocks.size} Partitions </h4>
           {blockTable}
         </div>
       </div>;
 
-    headerSparkPage(content, parent.sc, "RDD Info: " + rddInfo.name, Jobs)
+    headerSparkPage(content, parent.sc, "RDD Storage Info for " + rddInfo.name, Storage)
   }
 
   def blockRow(row: (String, BlockStatus, Seq[String])): Seq[Node] = {
@@ -105,10 +107,10 @@ private[spark] class RDDPage(parent: BlockManagerUI) {
         {block.storageLevel.description}
       </td>
       <td sorttable_customkey={block.memSize.toString}>
-        {Utils.memoryBytesToString(block.memSize)}
+        {Utils.bytesToString(block.memSize)}
       </td>
       <td sorttable_customkey={block.diskSize.toString}>
-        {Utils.memoryBytesToString(block.diskSize)}
+        {Utils.bytesToString(block.diskSize)}
       </td>
       <td>
         {locations.map(l => <span>{l}<br/></span>)}
@@ -121,10 +123,10 @@ private[spark] class RDDPage(parent: BlockManagerUI) {
     <tr>
       <td>{status.blockManagerId.host + ":" + status.blockManagerId.port}</td>
       <td>
-        {Utils.memoryBytesToString(status.memUsed(prefix))}
-        ({Utils.memoryBytesToString(status.memRemaining)} Total Available)
+        {Utils.bytesToString(status.memUsed(prefix))}
+        ({Utils.bytesToString(status.memRemaining)} Remaining)
       </td>
-      <td>{Utils.memoryBytesToString(status.diskUsed(prefix))}</td>
+      <td>{Utils.bytesToString(status.diskUsed(prefix))}</td>
     </tr>
   }
 }
diff --git a/core/src/main/scala/spark/util/AkkaUtils.scala b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
index 9233277bdb..d4c5065c3f 100644
--- a/core/src/main/scala/spark/util/AkkaUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 import akka.actor.{ActorSystem, ExtendedActorSystem}
 import com.typesafe.config.ConfigFactory
diff --git a/core/src/main/scala/spark/util/BoundedPriorityQueue.scala b/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala
index 0575497f5d..0b51c23f7b 100644
--- a/core/src/main/scala/spark/util/BoundedPriorityQueue.scala
+++ b/core/src/main/scala/org/apache/spark/util/BoundedPriorityQueue.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 import java.io.Serializable
 import java.util.{PriorityQueue => JPriorityQueue}
diff --git a/core/src/main/scala/spark/util/ByteBufferInputStream.scala b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
index 47a28e2f76..e214d2a519 100644
--- a/core/src/main/scala/spark/util/ByteBufferInputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/ByteBufferInputStream.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 import java.io.InputStream
 import java.nio.ByteBuffer
-import spark.storage.BlockManager
+import org.apache.spark.storage.BlockManager
 
 /**
  * Reads data from a ByteBuffer, and optionally cleans it up using BlockManager.dispose()
diff --git a/core/src/main/scala/org/apache/spark/util/Clock.scala b/core/src/main/scala/org/apache/spark/util/Clock.scala
new file mode 100644
index 0000000000..97c2b45aab
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/Clock.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+/**
+ * An interface to represent clocks, so that they can be mocked out in unit tests.
+ */
+private[spark] trait Clock {
+  def getTime(): Long
+}
+
+private[spark] object SystemClock extends Clock {
+  def getTime(): Long = System.currentTimeMillis()
+}
diff --git a/core/src/main/scala/spark/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
index 8b39241095..7108595e3e 100644
--- a/core/src/main/scala/spark/ClosureCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark.util
 
 import java.lang.reflect.Field
 
@@ -25,6 +25,7 @@ import scala.collection.mutable.Set
 import org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor, Type}
 import org.objectweb.asm.Opcodes._
 import java.io.{InputStream, IOException, ByteArrayOutputStream, ByteArrayInputStream, BufferedInputStream}
+import org.apache.spark.Logging
 
 private[spark] object ClosureCleaner extends Logging {
   // Get an ASM class reader for a given class from the JAR that loaded it
diff --git a/core/src/main/scala/spark/util/CompletionIterator.scala b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
index 210450892b..dc15a38b29 100644
--- a/core/src/main/scala/spark/util/CompletionIterator.scala
+++ b/core/src/main/scala/org/apache/spark/util/CompletionIterator.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 /**
  * Wrapper around an iterator which calls a completion method after it successfully iterates through all the elements
diff --git a/core/src/main/scala/spark/util/Distribution.scala b/core/src/main/scala/org/apache/spark/util/Distribution.scala
index 5d4d7a6c50..33bf3562fe 100644
--- a/core/src/main/scala/spark/util/Distribution.scala
+++ b/core/src/main/scala/org/apache/spark/util/Distribution.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 import java.io.PrintStream
 
diff --git a/core/src/main/scala/spark/util/IdGenerator.scala b/core/src/main/scala/org/apache/spark/util/IdGenerator.scala
index 3422280559..17e55f7996 100644
--- a/core/src/main/scala/spark/util/IdGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/util/IdGenerator.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 import java.util.concurrent.atomic.AtomicInteger
 
diff --git a/core/src/main/scala/spark/util/IntParam.scala b/core/src/main/scala/org/apache/spark/util/IntParam.scala
index daf0d58fa2..626bb49eea 100644
--- a/core/src/main/scala/spark/util/IntParam.scala
+++ b/core/src/main/scala/org/apache/spark/util/IntParam.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 /**
  * An extractor object for parsing strings into integers.
diff --git a/core/src/main/scala/spark/util/MemoryParam.scala b/core/src/main/scala/org/apache/spark/util/MemoryParam.scala
index 298562323a..4869c9897a 100644
--- a/core/src/main/scala/spark/util/MemoryParam.scala
+++ b/core/src/main/scala/org/apache/spark/util/MemoryParam.scala
@@ -15,9 +15,7 @@
  * limitations under the License.
  */
 
-package spark.util
-
-import spark.Utils
+package org.apache.spark.util
 
 /**
  * An extractor object for parsing JVM memory strings, such as "10g", into an Int representing
diff --git a/core/src/main/scala/spark/util/MetadataCleaner.scala b/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala
index 92909e0959..a430a75451 100644
--- a/core/src/main/scala/spark/util/MetadataCleaner.scala
+++ b/core/src/main/scala/org/apache/spark/util/MetadataCleaner.scala
@@ -15,11 +15,11 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 import java.util.concurrent.{TimeUnit, ScheduledFuture, Executors}
 import java.util.{TimerTask, Timer}
-import spark.Logging
+import org.apache.spark.Logging
 
 
 /**
diff --git a/core/src/main/scala/spark/package.scala b/core/src/main/scala/org/apache/spark/util/MutablePair.scala
index b244bfbf06..34f1f6606f 100644
--- a/core/src/main/scala/spark/package.scala
+++ b/core/src/main/scala/org/apache/spark/util/MutablePair.scala
@@ -15,18 +15,22 @@
  * limitations under the License.
  */
 
+package org.apache.spark.util
+
+
 /**
- * Core Spark functionality. [[spark.SparkContext]] serves as the main entry point to Spark, while
- * [[spark.RDD]] is the data type representing a distributed collection, and provides most
- * parallel operations. 
+ * A tuple of 2 elements. This can be used as an alternative to Scala's Tuple2 when we want to
+ * minimize object allocation.
  *
- * In addition, [[spark.PairRDDFunctions]] contains operations available only on RDDs of key-value
- * pairs, such as `groupByKey` and `join`; [[spark.DoubleRDDFunctions]] contains operations
- * available only on RDDs of Doubles; and [[spark.SequenceFileRDDFunctions]] contains operations
- * available on RDDs that can be saved as SequenceFiles. These operations are automatically
- * available on any RDD of the right type (e.g. RDD[(Int, Int)] through implicit conversions when
- * you `import spark.SparkContext._`.
+ * @param  _1   Element 1 of this MutablePair
+ * @param  _2   Element 2 of this MutablePair
  */
-package object spark { 
-  // For package docs only
+case class MutablePair[@specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/) T1,
+                      @specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/) T2]
+  (var _1: T1, var _2: T2)
+  extends Product2[T1, T2]
+{
+  override def toString = "(" + _1 + "," + _2 + ")"
+
+  override def canEqual(that: Any): Boolean = that.isInstanceOf[MutablePair[_,_]]
 }
diff --git a/core/src/main/scala/spark/util/NextIterator.scala b/core/src/main/scala/org/apache/spark/util/NextIterator.scala
index 22163ece8d..8266e5e495 100644
--- a/core/src/main/scala/spark/util/NextIterator.scala
+++ b/core/src/main/scala/org/apache/spark/util/NextIterator.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 /** Provides a basic/boilerplate Iterator implementation. */
 private[spark] abstract class NextIterator[U] extends Iterator[U] {
diff --git a/core/src/main/scala/spark/util/RateLimitedOutputStream.scala b/core/src/main/scala/org/apache/spark/util/RateLimitedOutputStream.scala
index 00f782bbe7..47e1b45004 100644
--- a/core/src/main/scala/spark/util/RateLimitedOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/RateLimitedOutputStream.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 import scala.annotation.tailrec
 
diff --git a/core/src/main/scala/spark/util/SerializableBuffer.scala b/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala
index 7e6842628a..f2b1ad7d0e 100644
--- a/core/src/main/scala/spark/util/SerializableBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/SerializableBuffer.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 import java.nio.ByteBuffer
 import java.io.{IOException, ObjectOutputStream, EOFException, ObjectInputStream}
diff --git a/core/src/main/scala/spark/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 6cc57566d7..a25b37a2a9 100644
--- a/core/src/main/scala/spark/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark.util
 
 import java.lang.reflect.Field
 import java.lang.reflect.Modifier
@@ -30,6 +30,7 @@ import java.lang.management.ManagementFactory
 import scala.collection.mutable.ArrayBuffer
 
 import it.unimi.dsi.fastutil.ints.IntOpenHashSet
+import org.apache.spark.Logging
 
 /**
  * Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in 
diff --git a/core/src/main/scala/spark/util/StatCounter.scala b/core/src/main/scala/org/apache/spark/util/StatCounter.scala
index 76358d4151..020d5edba9 100644
--- a/core/src/main/scala/spark/util/StatCounter.scala
+++ b/core/src/main/scala/org/apache/spark/util/StatCounter.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 /**
  * A class for tracking the statistics of a set of numbers (count, mean and variance) in a
diff --git a/core/src/main/scala/spark/util/TimeStampedHashMap.scala b/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala
index cc7909194a..277de2f8a6 100644
--- a/core/src/main/scala/spark/util/TimeStampedHashMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/TimeStampedHashMap.scala
@@ -15,12 +15,14 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 import java.util.concurrent.ConcurrentHashMap
 import scala.collection.JavaConversions
 import scala.collection.mutable.Map
-import spark.scheduler.MapStatus
+import scala.collection.immutable
+import org.apache.spark.scheduler.MapStatus
+import org.apache.spark.Logging
 
 /**
  * This is a custom implementation of scala.collection.mutable.Map which stores the insertion
@@ -28,7 +30,7 @@ import spark.scheduler.MapStatus
  * threshold time can them be removed using the clearOldValues method. This is intended to be a drop-in
  * replacement of scala.collection.mutable.HashMap.
  */
-class TimeStampedHashMap[A, B] extends Map[A, B]() with spark.Logging {
+class TimeStampedHashMap[A, B] extends Map[A, B]() with Logging {
   val internalMap = new ConcurrentHashMap[A, (B, Long)]()
 
   def get(key: A): Option[B] = {
@@ -99,6 +101,8 @@ class TimeStampedHashMap[A, B] extends Map[A, B]() with spark.Logging {
     }
   }
 
+  def toMap: immutable.Map[A, B] = iterator.toMap
+
   /**
    * Removes old key-value pairs that have timestamp earlier than `threshTime`
    */
diff --git a/core/src/main/scala/spark/util/TimeStampedHashSet.scala b/core/src/main/scala/org/apache/spark/util/TimeStampedHashSet.scala
index 41e3fd8cba..26983138ff 100644
--- a/core/src/main/scala/spark/util/TimeStampedHashSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/TimeStampedHashSet.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 import scala.collection.mutable.Set
 import scala.collection.JavaConversions
diff --git a/core/src/main/scala/spark/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index e6a96a5ec1..bb47fc0a2c 100644
--- a/core/src/main/scala/spark/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark
+package org.apache.spark.util
 
 import java.io._
 import java.net.{InetAddress, URL, URI, NetworkInterface, Inet4Address, ServerSocket}
@@ -33,14 +33,16 @@ import com.google.common.util.concurrent.ThreadFactoryBuilder
 
 import org.apache.hadoop.fs.{Path, FileSystem, FileUtil}
 
-import spark.serializer.SerializerInstance
-import spark.deploy.SparkHadoopUtil
+import org.apache.spark.serializer.{DeserializationStream, SerializationStream, SerializerInstance}
+import org.apache.spark.deploy.SparkHadoopUtil
+import java.nio.ByteBuffer
+import org.apache.spark.{SparkEnv, SparkException, Logging}
 
 
 /**
  * Various utility methods used by Spark.
  */
-private object Utils extends Logging {
+private[spark] object Utils extends Logging {
 
   /** Serialize an object using Java serialization */
   def serialize[T](o: T): Array[Byte] = {
@@ -68,6 +70,47 @@ private object Utils extends Logging {
     return ois.readObject.asInstanceOf[T]
   }
 
+  /** Serialize via nested stream using specific serializer */
+  def serializeViaNestedStream(os: OutputStream, ser: SerializerInstance)(f: SerializationStream => Unit) = {
+    val osWrapper = ser.serializeStream(new OutputStream {
+      def write(b: Int) = os.write(b)
+
+      override def write(b: Array[Byte], off: Int, len: Int) = os.write(b, off, len)
+    })
+    try {
+      f(osWrapper)
+    } finally {
+      osWrapper.close()
+    }
+  }
+
+  /** Deserialize via nested stream using specific serializer */
+  def deserializeViaNestedStream(is: InputStream, ser: SerializerInstance)(f: DeserializationStream => Unit) = {
+    val isWrapper = ser.deserializeStream(new InputStream {
+      def read(): Int = is.read()
+
+      override def read(b: Array[Byte], off: Int, len: Int): Int = is.read(b, off, len)
+    })
+    try {
+      f(isWrapper)
+    } finally {
+      isWrapper.close()
+    }
+  }
+
+  /**
+   * Primitive often used when writing {@link java.nio.ByteBuffer} to {@link java.io.DataOutput}.
+   */
+  def writeByteBuffer(bb: ByteBuffer, out: ObjectOutput) = {
+    if (bb.hasArray) {
+      out.write(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())
+    } else {
+      val bbval = new Array[Byte](bb.remaining())
+      bb.get(bbval)
+      out.write(bbval)
+    }
+  }
+
   def isAlpha(c: Char): Boolean = {
     (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
   }
@@ -224,8 +267,9 @@ private object Utils extends Logging {
         }
       case _ =>
         // Use the Hadoop filesystem library, which supports file://, hdfs://, s3://, and others
+        val env = SparkEnv.get
         val uri = new URI(url)
-        val conf = SparkHadoopUtil.newConfiguration()
+        val conf = env.hadoop.newConfiguration()
         val fs = FileSystem.get(uri, conf)
         val in = fs.open(new Path(uri))
         val out = new FileOutputStream(tempFile)
@@ -351,48 +395,17 @@ private object Utils extends Logging {
     retval
   }
 
-/*
-  // Used by DEBUG code : remove when all testing done
-  private val ipPattern = Pattern.compile("^[0-9]+(\\.[0-9]+)*$")
   def checkHost(host: String, message: String = "") {
-    // Currently catches only ipv4 pattern, this is just a debugging tool - not rigourous !
-    // if (host.matches("^[0-9]+(\\.[0-9]+)*$")) {
-    if (ipPattern.matcher(host).matches()) {
-      Utils.logErrorWithStack("Unexpected to have host " + host + " which matches IP pattern. Message " + message)
-    }
-    if (Utils.parseHostPort(host)._2 != 0){
-      Utils.logErrorWithStack("Unexpected to have host " + host + " which has port in it. Message " + message)
-    }
+    assert(host.indexOf(':') == -1, message)
   }
 
-  // Used by DEBUG code : remove when all testing done
   def checkHostPort(hostPort: String, message: String = "") {
-    val (host, port) = Utils.parseHostPort(hostPort)
-    checkHost(host)
-    if (port <= 0){
-      Utils.logErrorWithStack("Unexpected to have port " + port + " which is not valid in " + hostPort + ". Message " + message)
-    }
+    assert(hostPort.indexOf(':') != -1, message)
   }
 
   // Used by DEBUG code : remove when all testing done
   def logErrorWithStack(msg: String) {
     try { throw new Exception } catch { case ex: Exception => { logError(msg, ex) } }
-    // temp code for debug
-    System.exit(-1)
-  }
-*/
-
-  // Once testing is complete in various modes, replace with this ?
-  def checkHost(host: String, message: String = "") {}
-  def checkHostPort(hostPort: String, message: String = "") {}
-
-  // Used by DEBUG code : remove when all testing done
-  def logErrorWithStack(msg: String) {
-    try { throw new Exception } catch { case ex: Exception => { logError(msg, ex) } }
-  }
-
-  def getUserNameFromEnvironment(): String = {
-    SparkHadoopUtil.getUserNameFromEnvironment
   }
 
   // Typically, this will be of order of number of nodes in cluster
@@ -479,9 +492,9 @@ private object Utils extends Logging {
   }
 
   /**
-   * Convert a memory quantity in bytes to a human-readable string such as "4.0 MB".
+   * Convert a quantity in bytes to a human-readable string such as "4.0 MB".
    */
-  def memoryBytesToString(size: Long): String = {
+  def bytesToString(size: Long): String = {
     val TB = 1L << 40
     val GB = 1L << 30
     val MB = 1L << 20
@@ -524,10 +537,10 @@ private object Utils extends Logging {
   }
 
   /**
-   * Convert a memory quantity in megabytes to a human-readable string such as "4.0 MB".
+   * Convert a quantity in megabytes to a human-readable string such as "4.0 MB".
    */
-  def memoryMegabytesToString(megabytes: Long): String = {
-    memoryBytesToString(megabytes * 1024L * 1024L)
+  def megabytesToString(megabytes: Long): String = {
+    bytesToString(megabytes * 1024L * 1024L)
   }
 
   /**
@@ -596,7 +609,7 @@ private object Utils extends Logging {
     output.toString
   }
 
-  /** 
+  /**
    * A regular expression to match classes of the "core" Spark API that we want to skip when
    * finding the call site of a method.
    */
@@ -756,4 +769,13 @@ private object Utils extends Logging {
     }
     return buf
   }
+
+ /* Calculates 'x' modulo 'mod', takes to consideration sign of x,
+  * i.e. if 'x' is negative, than 'x' % 'mod' is negative too
+  * so function return (x % mod) + mod in that case.
+  */
+  def nonNegativeMod(x: Int, mod: Int): Int = {
+    val rawMod = x % mod
+    rawMod + (if (rawMod < 0) mod else 0)
+  }
 }
diff --git a/core/src/main/scala/spark/util/Vector.scala b/core/src/main/scala/org/apache/spark/util/Vector.scala
index ed49386f18..fe710c58ac 100644
--- a/core/src/main/scala/spark/util/Vector.scala
+++ b/core/src/main/scala/org/apache/spark/util/Vector.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package spark.util
+package org.apache.spark.util
 
 class Vector(val elements: Array[Double]) extends Serializable {
   def length = elements.length
@@ -73,7 +73,6 @@ class Vector(val elements: Array[Double]) extends Serializable {
   def += (other: Vector): Vector = {
     if (length != other.length)
       throw new IllegalArgumentException("Vectors of different length")
-    var ans = 0.0
     var i = 0
     while (i < length) {
       elements(i) += other(i)
@@ -117,9 +116,7 @@ object Vector {
   def apply(elements: Double*) = new Vector(elements.toArray)
 
   def apply(length: Int, initializer: Int => Double): Vector = {
-    val elements = new Array[Double](length)
-    for (i <- 0 until length)
-      elements(i) = initializer(i)
+    val elements: Array[Double] = Array.tabulate(length)(initializer)
     return new Vector(elements)
   }
 
@@ -133,7 +130,7 @@ object Vector {
 
   implicit def doubleToMultiplier(num: Double) = new Multiplier(num)
 
-  implicit object VectorAccumParam extends spark.AccumulatorParam[Vector] {
+  implicit object VectorAccumParam extends org.apache.spark.AccumulatorParam[Vector] {
     def addInPlace(t1: Vector, t2: Vector) = t1 + t2
 
     def zero(initialValue: Vector) = Vector.zeros(initialValue.length)
diff --git a/core/src/main/scala/spark/Cache.scala b/core/src/main/scala/spark/Cache.scala
deleted file mode 100644
index b0c83ce59d..0000000000
--- a/core/src/main/scala/spark/Cache.scala
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark
-
-import java.util.concurrent.atomic.AtomicInteger
-
-private[spark] sealed trait CachePutResponse
-private[spark] case class CachePutSuccess(size: Long) extends CachePutResponse
-private[spark] case class CachePutFailure() extends CachePutResponse
-
-/**
- * An interface for caches in Spark, to allow for multiple implementations. Caches are used to store
- * both partitions of cached RDDs and broadcast variables on Spark executors. Caches are also aware
- * of which entries are part of the same dataset (for example, partitions in the same RDD). The key
- * for each value in a cache is a (datasetID, partition) pair.
- *
- * A single Cache instance gets created on each machine and is shared by all caches (i.e. both the 
- * RDD split cache and the broadcast variable cache), to enable global replacement policies. 
- * However, because these several independent modules all perform caching, it is important to give
- * them separate key namespaces, so that an RDD and a broadcast variable (for example) do not use
- * the same key. For this purpose, Cache has the notion of KeySpaces. Each client module must first
- * ask for a KeySpace, and then call get() and put() on that space using its own keys.
- * 
- * This abstract class handles the creation of key spaces, so that subclasses need only deal with
- * keys that are unique across modules.
- */
-private[spark] abstract class Cache {
-  private val nextKeySpaceId = new AtomicInteger(0)
-  private def newKeySpaceId() = nextKeySpaceId.getAndIncrement()
-
-  def newKeySpace() = new KeySpace(this, newKeySpaceId())
-
-  /**
-   * Get the value for a given (datasetId, partition), or null if it is not
-   * found.
-   */
-  def get(datasetId: Any, partition: Int): Any
-
-  /**
-   * Attempt to put a value in the cache; returns CachePutFailure if this was
-   * not successful (e.g. because the cache replacement policy forbids it), and
-   * CachePutSuccess if successful. If size estimation is available, the cache
-   * implementation should set the size field in CachePutSuccess.
-   */
-  def put(datasetId: Any, partition: Int, value: Any): CachePutResponse
-
-  /**
-   * Report the capacity of the cache partition. By default this just reports
-   * zero. Specific implementations can choose to provide the capacity number.
-   */
-  def getCapacity: Long = 0L
-}
-
-/**
- * A key namespace in a Cache.
- */
-private[spark] class KeySpace(cache: Cache, val keySpaceId: Int) {
-  def get(datasetId: Any, partition: Int): Any =
-    cache.get((keySpaceId, datasetId), partition)
-
-  def put(datasetId: Any, partition: Int, value: Any): CachePutResponse =
-    cache.put((keySpaceId, datasetId), partition, value)
-
-  def getCapacity: Long = cache.getCapacity
-}
diff --git a/core/src/main/scala/spark/KryoSerializer.scala b/core/src/main/scala/spark/KryoSerializer.scala
deleted file mode 100644
index ee37da7948..0000000000
--- a/core/src/main/scala/spark/KryoSerializer.scala
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark
-
-import java.io._
-import java.nio.ByteBuffer
-import java.nio.channels.Channels
-
-import scala.collection.immutable
-import scala.collection.mutable
-
-import com.esotericsoftware.kryo._
-import com.esotericsoftware.kryo.{Serializer => KSerializer}
-import com.esotericsoftware.kryo.io.{Input => KryoInput, Output => KryoOutput}
-import com.esotericsoftware.kryo.serializers.{JavaSerializer => KryoJavaSerializer}
-import de.javakaffee.kryoserializers.KryoReflectionFactorySupport
-
-import serializer.{SerializerInstance, DeserializationStream, SerializationStream}
-import spark.broadcast._
-import spark.storage._
-
-private[spark]
-class KryoSerializationStream(kryo: Kryo, outStream: OutputStream) extends SerializationStream {
-
-  val output = new KryoOutput(outStream)
-
-  def writeObject[T](t: T): SerializationStream = {
-    kryo.writeClassAndObject(output, t)
-    this
-  }
-
-  def flush() { output.flush() }
-  def close() { output.close() }
-}
-
-private[spark]
-class KryoDeserializationStream(kryo: Kryo, inStream: InputStream) extends DeserializationStream {
-
-  val input = new KryoInput(inStream)
-
-  def readObject[T](): T = {
-    try {
-      kryo.readClassAndObject(input).asInstanceOf[T]
-    } catch {
-      // DeserializationStream uses the EOF exception to indicate stopping condition.
-      case e: com.esotericsoftware.kryo.KryoException => throw new java.io.EOFException
-    }
-  }
-
-  def close() {
-    // Kryo's Input automatically closes the input stream it is using.
-    input.close()
-  }
-}
-
-private[spark] class KryoSerializerInstance(ks: KryoSerializer) extends SerializerInstance {
-
-  val kryo = ks.kryo.get()
-  val output = ks.output.get()
-  val input = ks.input.get()
-
-  def serialize[T](t: T): ByteBuffer = {
-    output.clear()
-    kryo.writeClassAndObject(output, t)
-    ByteBuffer.wrap(output.toBytes)
-  }
-
-  def deserialize[T](bytes: ByteBuffer): T = {
-    input.setBuffer(bytes.array)
-    kryo.readClassAndObject(input).asInstanceOf[T]
-  }
-
-  def deserialize[T](bytes: ByteBuffer, loader: ClassLoader): T = {
-    val oldClassLoader = kryo.getClassLoader
-    kryo.setClassLoader(loader)
-    input.setBuffer(bytes.array)
-    val obj = kryo.readClassAndObject(input).asInstanceOf[T]
-    kryo.setClassLoader(oldClassLoader)
-    obj
-  }
-
-  def serializeStream(s: OutputStream): SerializationStream = {
-    new KryoSerializationStream(kryo, s)
-  }
-
-  def deserializeStream(s: InputStream): DeserializationStream = {
-    new KryoDeserializationStream(kryo, s)
-  }
-}
-
-/**
- * Interface implemented by clients to register their classes with Kryo when using Kryo
- * serialization.
- */
-trait KryoRegistrator {
-  def registerClasses(kryo: Kryo): Unit
-}
-
-/**
- * A Spark serializer that uses the [[http://code.google.com/p/kryo/wiki/V1Documentation Kryo 1.x library]].
- */
-class KryoSerializer extends spark.serializer.Serializer with Logging {
-
-  val bufferSize = System.getProperty("spark.kryoserializer.buffer.mb", "2").toInt * 1024 * 1024
-
-  val kryo = new ThreadLocal[Kryo] {
-    override def initialValue = createKryo()
-  }
-
-  val output = new ThreadLocal[KryoOutput] {
-    override def initialValue = new KryoOutput(bufferSize)
-  }
-
-  val input = new ThreadLocal[KryoInput] {
-    override def initialValue = new KryoInput(bufferSize)
-  }
-
-  def createKryo(): Kryo = {
-    val kryo = new KryoReflectionFactorySupport()
-
-    // Register some commonly used classes
-    val toRegister: Seq[AnyRef] = Seq(
-      // Arrays
-      Array(1), Array(1.0), Array(1.0f), Array(1L), Array(""), Array(("", "")),
-      Array(new java.lang.Object), Array(1.toByte), Array(true), Array('c'),
-      // Specialized Tuple2s
-      ("", ""), ("", 1), (1, 1), (1.0, 1.0), (1L, 1L),
-      (1, 1.0), (1.0, 1), (1L, 1.0), (1.0, 1L), (1, 1L), (1L, 1),
-      // Scala collections
-      List(1), mutable.ArrayBuffer(1),
-      // Options and Either
-      Some(1), Left(1), Right(1),
-      // Higher-dimensional tuples
-      (1, 1, 1), (1, 1, 1, 1), (1, 1, 1, 1, 1),
-      None,
-      ByteBuffer.allocate(1),
-      StorageLevel.MEMORY_ONLY,
-      PutBlock("1", ByteBuffer.allocate(1), StorageLevel.MEMORY_ONLY),
-      GotBlock("1", ByteBuffer.allocate(1)),
-      GetBlock("1")
-    )
-    for (obj <- toRegister) {
-      kryo.register(obj.getClass)
-    }
-
-    // Allow sending SerializableWritable
-    kryo.register(classOf[SerializableWritable[_]], new KryoJavaSerializer())
-    kryo.register(classOf[HttpBroadcast[_]], new KryoJavaSerializer())
-
-    // Register some commonly used Scala singleton objects. Because these
-    // are singletons, we must return the exact same local object when we
-    // deserialize rather than returning a clone as FieldSerializer would.
-    class SingletonSerializer[T](obj: T) extends KSerializer[T] {
-      override def write(kryo: Kryo, output: KryoOutput, obj: T) {}
-      override def read(kryo: Kryo, input: KryoInput, cls: java.lang.Class[T]): T = obj
-    }
-    kryo.register(None.getClass, new SingletonSerializer[AnyRef](None))
-    kryo.register(Nil.getClass, new SingletonSerializer[AnyRef](Nil))
-
-    // Register maps with a special serializer since they have complex internal structure
-    class ScalaMapSerializer(buildMap: Array[(Any, Any)] => scala.collection.Map[Any, Any])
-      extends KSerializer[Array[(Any, Any)] => scala.collection.Map[Any, Any]] {
-
-      //hack, look at https://groups.google.com/forum/#!msg/kryo-users/Eu5V4bxCfws/k-8UQ22y59AJ
-      private final val FAKE_REFERENCE = new Object()
-      override def write(
-                          kryo: Kryo,
-                          output: KryoOutput,
-                          obj: Array[(Any, Any)] => scala.collection.Map[Any, Any]) {
-        val map = obj.asInstanceOf[scala.collection.Map[Any, Any]]
-        output.writeInt(map.size)
-        for ((k, v) <- map) {
-          kryo.writeClassAndObject(output, k)
-          kryo.writeClassAndObject(output, v)
-        }
-      }
-      override def read (
-                          kryo: Kryo,
-                          input: KryoInput,
-                          cls: Class[Array[(Any, Any)] => scala.collection.Map[Any, Any]])
-      : Array[(Any, Any)] => scala.collection.Map[Any, Any] = {
-        kryo.reference(FAKE_REFERENCE)
-        val size = input.readInt()
-        val elems = new Array[(Any, Any)](size)
-        for (i <- 0 until size) {
-          val k = kryo.readClassAndObject(input)
-          val v = kryo.readClassAndObject(input)
-          elems(i)=(k,v)
-        }
-        buildMap(elems).asInstanceOf[Array[(Any, Any)] => scala.collection.Map[Any, Any]]
-      }
-    }
-    kryo.register(mutable.HashMap().getClass, new ScalaMapSerializer(mutable.HashMap() ++ _))
-    // TODO: add support for immutable maps too; this is more annoying because there are many
-    // subclasses of immutable.Map for small maps (with <= 4 entries)
-    val map1  = Map[Any, Any](1 -> 1)
-    val map2  = Map[Any, Any](1 -> 1, 2 -> 2)
-    val map3  = Map[Any, Any](1 -> 1, 2 -> 2, 3 -> 3)
-    val map4  = Map[Any, Any](1 -> 1, 2 -> 2, 3 -> 3, 4 -> 4)
-    val map5  = Map[Any, Any](1 -> 1, 2 -> 2, 3 -> 3, 4 -> 4, 5 -> 5)
-    kryo.register(map1.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap))
-    kryo.register(map2.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap))
-    kryo.register(map3.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap))
-    kryo.register(map4.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap))
-    kryo.register(map5.getClass, new ScalaMapSerializer(mutable.HashMap() ++ _ toMap))
-
-    // Allow the user to register their own classes by setting spark.kryo.registrator
-    val regCls = System.getProperty("spark.kryo.registrator")
-    if (regCls != null) {
-      logInfo("Running user registrator: " + regCls)
-      val classLoader = Thread.currentThread.getContextClassLoader
-      val reg = Class.forName(regCls, true, classLoader).newInstance().asInstanceOf[KryoRegistrator]
-      reg.registerClasses(kryo)
-    }
-
-    // Allow disabling Kryo reference tracking if user knows their object graphs don't have loops
-    kryo.setReferences(System.getProperty("spark.kryo.referenceTracking", "true").toBoolean)
-
-    kryo
-  }
-
-  def newInstance(): SerializerInstance = {
-    this.kryo.get().setClassLoader(Thread.currentThread().getContextClassLoader)
-    new KryoSerializerInstance(this)
-  }
-}
diff --git a/core/src/main/scala/spark/deploy/DeployMessage.scala b/core/src/main/scala/spark/deploy/DeployMessage.scala
deleted file mode 100644
index e1f8aff6f5..0000000000
--- a/core/src/main/scala/spark/deploy/DeployMessage.scala
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.deploy
-
-import spark.deploy.ExecutorState.ExecutorState
-import spark.deploy.master.{WorkerInfo, ApplicationInfo}
-import spark.deploy.worker.ExecutorRunner
-import scala.collection.immutable.List
-import spark.Utils
-
-
-private[spark] sealed trait DeployMessage extends Serializable
-
-// Worker to Master
-
-private[spark]
-case class RegisterWorker(
-    id: String,
-    host: String,
-    port: Int,
-    cores: Int,
-    memory: Int,
-    webUiPort: Int,
-    publicAddress: String)
-  extends DeployMessage {
-  Utils.checkHost(host, "Required hostname")
-  assert (port > 0)
-}
-
-private[spark] 
-case class ExecutorStateChanged(
-    appId: String,
-    execId: Int,
-    state: ExecutorState,
-    message: Option[String],
-    exitStatus: Option[Int])
-  extends DeployMessage
-
-private[spark] case class Heartbeat(workerId: String) extends DeployMessage
-
-// Master to Worker
-
-private[spark] case class RegisteredWorker(masterWebUiUrl: String) extends DeployMessage
-private[spark] case class RegisterWorkerFailed(message: String) extends DeployMessage
-private[spark] case class KillExecutor(appId: String, execId: Int) extends DeployMessage
-
-private[spark] case class LaunchExecutor(
-    appId: String,
-    execId: Int,
-    appDesc: ApplicationDescription,
-    cores: Int,
-    memory: Int,
-    sparkHome: String)
-  extends DeployMessage
-
-// Client to Master
-
-private[spark] case class RegisterApplication(appDescription: ApplicationDescription)
-  extends DeployMessage
-
-// Master to Client
-
-private[spark] 
-case class RegisteredApplication(appId: String) extends DeployMessage
-
-private[spark] 
-case class ExecutorAdded(id: Int, workerId: String, hostPort: String, cores: Int, memory: Int) {
-  Utils.checkHostPort(hostPort, "Required hostport")
-}
-
-private[spark]
-case class ExecutorUpdated(id: Int, state: ExecutorState, message: Option[String],
-                           exitStatus: Option[Int])
-
-private[spark]
-case class ApplicationRemoved(message: String)
-
-// Internal message in Client
-
-private[spark] case object StopClient
-
-// MasterWebUI To Master
-
-private[spark] case object RequestMasterState
-
-// Master to MasterWebUI
-
-private[spark] 
-case class MasterState(host: String, port: Int, workers: Array[WorkerInfo],
-  activeApps: Array[ApplicationInfo], completedApps: Array[ApplicationInfo]) {
-
-  Utils.checkHost(host, "Required hostname")
-  assert (port > 0)
-
-  def uri = "spark://" + host + ":" + port
-}
-
-//  WorkerWebUI to Worker
-private[spark] case object RequestWorkerState
-
-// Worker to WorkerWebUI
-
-private[spark]
-case class WorkerState(host: String, port: Int, workerId: String, executors: List[ExecutorRunner],
-  finishedExecutors: List[ExecutorRunner], masterUrl: String, cores: Int, memory: Int, 
-  coresUsed: Int, memoryUsed: Int, masterWebUiUrl: String) {
-
-  Utils.checkHost(host, "Required hostname")
-  assert (port > 0)
-}
diff --git a/core/src/main/scala/spark/rdd/CoalescedRDD.scala b/core/src/main/scala/spark/rdd/CoalescedRDD.scala
deleted file mode 100644
index 2b5bf18541..0000000000
--- a/core/src/main/scala/spark/rdd/CoalescedRDD.scala
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.rdd
-
-import spark.{Dependency, OneToOneDependency, NarrowDependency, RDD, Partition, TaskContext}
-import java.io.{ObjectOutputStream, IOException}
-
-private[spark] case class CoalescedRDDPartition(
-    index: Int,
-    @transient rdd: RDD[_],
-    parentsIndices: Array[Int]
-  ) extends Partition {
-  var parents: Seq[Partition] = parentsIndices.map(rdd.partitions(_))
-
-  @throws(classOf[IOException])
-  private def writeObject(oos: ObjectOutputStream) {
-    // Update the reference to parent split at the time of task serialization
-    parents = parentsIndices.map(rdd.partitions(_))
-    oos.defaultWriteObject()
-  }
-}
-
-/**
- * Coalesce the partitions of a parent RDD (`prev`) into fewer partitions, so that each partition of
- * this RDD computes one or more of the parent ones. Will produce exactly `maxPartitions` if the
- * parent had more than this many partitions, or fewer if the parent had fewer.
- *
- * This transformation is useful when an RDD with many partitions gets filtered into a smaller one,
- * or to avoid having a large number of small tasks when processing a directory with many files.
- */
-class CoalescedRDD[T: ClassManifest](
-    @transient var prev: RDD[T],
-    maxPartitions: Int)
-  extends RDD[T](prev.context, Nil) {  // Nil since we implement getDependencies
-
-  override def getPartitions: Array[Partition] = {
-    val prevSplits = prev.partitions
-    if (prevSplits.length < maxPartitions) {
-      prevSplits.map(_.index).map{idx => new CoalescedRDDPartition(idx, prev, Array(idx)) }
-    } else {
-      (0 until maxPartitions).map { i =>
-        val rangeStart = ((i.toLong * prevSplits.length) / maxPartitions).toInt
-        val rangeEnd = (((i.toLong + 1) * prevSplits.length) / maxPartitions).toInt
-        new CoalescedRDDPartition(i, prev, (rangeStart until rangeEnd).toArray)
-      }.toArray
-    }
-  }
-
-  override def compute(split: Partition, context: TaskContext): Iterator[T] = {
-    split.asInstanceOf[CoalescedRDDPartition].parents.iterator.flatMap { parentSplit =>
-      firstParent[T].iterator(parentSplit, context)
-    }
-  }
-
-  override def getDependencies: Seq[Dependency[_]] = {
-    Seq(new NarrowDependency(prev) {
-      def getParents(id: Int): Seq[Int] =
-        partitions(id).asInstanceOf[CoalescedRDDPartition].parentsIndices
-    })
-  }
-
-  override def clearDependencies() {
-    super.clearDependencies()
-    prev = null
-  }
-}
diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
deleted file mode 100644
index 7c10074dc7..0000000000
--- a/core/src/main/scala/spark/scheduler/cluster/ClusterScheduler.scala
+++ /dev/null
@@ -1,631 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.scheduler.cluster
-
-import java.lang.{Boolean => JBoolean}
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
-import scala.collection.mutable.HashSet
-
-import spark._
-import spark.TaskState.TaskState
-import spark.scheduler._
-import java.nio.ByteBuffer
-import java.util.concurrent.atomic.AtomicLong
-import java.util.{TimerTask, Timer}
-
-/**
- * The main TaskScheduler implementation, for running tasks on a cluster. Clients should first call
- * start(), then submit task sets through the runTasks method.
- */
-private[spark] class ClusterScheduler(val sc: SparkContext)
-  extends TaskScheduler
-  with Logging {
-
-  // How often to check for speculative tasks
-  val SPECULATION_INTERVAL = System.getProperty("spark.speculation.interval", "100").toLong
-  // Threshold above which we warn user initial TaskSet may be starved
-  val STARVATION_TIMEOUT = System.getProperty("spark.starvation.timeout", "15000").toLong
-  // How often to revive offers in case there are pending tasks - that is how often to try to get
-  // tasks scheduled in case there are nodes available : default 0 is to disable it - to preserve existing behavior
-  // Note that this is required due to delayed scheduling due to data locality waits, etc.
-  // TODO: rename property ?
-  val TASK_REVIVAL_INTERVAL = System.getProperty("spark.tasks.revive.interval", "0").toLong
-
-  /*
-   This property controls how aggressive we should be to modulate waiting for node local task scheduling.
-   To elaborate, currently there is a time limit (3 sec def) to ensure that spark attempts to wait for node locality of tasks before
-   scheduling on other nodes. We have modified this in yarn branch such that offers to task set happen in prioritized order :
-   node-local, rack-local and then others
-   But once all available node local (and no pref) tasks are scheduled, instead of waiting for 3 sec before
-   scheduling to other nodes (which degrades performance for time sensitive tasks and on larger clusters), we can
-   modulate that : to also allow rack local nodes or any node. The default is still set to HOST - so that previous behavior is
-   maintained. This is to allow tuning the tension between pulling rdd data off node and scheduling computation asap.
-
-   TODO: rename property ? The value is one of
-   - NODE_LOCAL (default, no change w.r.t current behavior),
-   - RACK_LOCAL and
-   - ANY
-
-   Note that this property makes more sense when used in conjugation with spark.tasks.revive.interval > 0 : else it is not very effective.
-
-   Additional Note: For non trivial clusters, there is a 4x - 5x reduction in running time (in some of our experiments) based on whether
-   it is left at default NODE_LOCAL, RACK_LOCAL (if cluster is configured to be rack aware) or ANY.
-   If cluster is rack aware, then setting it to RACK_LOCAL gives best tradeoff and a 3x - 4x performance improvement while minimizing IO impact.
-   Also, it brings down the variance in running time drastically.
-    */
-  val TASK_SCHEDULING_AGGRESSION = TaskLocality.parse(System.getProperty("spark.tasks.schedule.aggression", "NODE_LOCAL"))
-
-  val activeTaskSets = new HashMap[String, TaskSetManager]
-
-  val taskIdToTaskSetId = new HashMap[Long, String]
-  val taskIdToExecutorId = new HashMap[Long, String]
-  val taskSetTaskIds = new HashMap[String, HashSet[Long]]
-
-  @volatile private var hasReceivedTask = false
-  @volatile private var hasLaunchedTask = false
-  private val starvationTimer = new Timer(true)
-
-  // Incrementing Mesos task IDs
-  val nextTaskId = new AtomicLong(0)
-
-  // Which executor IDs we have executors on
-  val activeExecutorIds = new HashSet[String]
-
-  // TODO: We might want to remove this and merge it with execId datastructures - but later.
-  // Which hosts in the cluster are alive (contains hostPort's) - used for process local and node local task locality.
-  private val hostPortsAlive = new HashSet[String]
-  private val hostToAliveHostPorts = new HashMap[String, HashSet[String]]
-
-  // The set of executors we have on each host; this is used to compute hostsAlive, which
-  // in turn is used to decide when we can attain data locality on a given host
-  private val executorsByHostPort = new HashMap[String, HashSet[String]]
-
-  private val executorIdToHostPort = new HashMap[String, String]
-
-  // JAR server, if any JARs were added by the user to the SparkContext
-  var jarServer: HttpServer = null
-
-  // URIs of JARs to pass to executor
-  var jarUris: String = ""
-
-  // Listener object to pass upcalls into
-  var listener: TaskSchedulerListener = null
-
-  var backend: SchedulerBackend = null
-
-  val mapOutputTracker = SparkEnv.get.mapOutputTracker
-
-  var schedulableBuilder: SchedulableBuilder = null
-  var rootPool: Pool = null
-
-  override def setListener(listener: TaskSchedulerListener) {
-    this.listener = listener
-  }
-
-  def initialize(context: SchedulerBackend) {
-    backend = context
-    //default scheduler is FIFO
-    val schedulingMode = System.getProperty("spark.cluster.schedulingmode", "FIFO")
-    //temporarily set rootPool name to empty
-    rootPool = new Pool("", SchedulingMode.withName(schedulingMode), 0, 0)
-    schedulableBuilder = {
-      schedulingMode match {
-        case "FIFO" =>
-          new FIFOSchedulableBuilder(rootPool)
-        case "FAIR" =>
-          new FairSchedulableBuilder(rootPool)
-      }
-    }
-    schedulableBuilder.buildPools()
-    // resolve executorId to hostPort mapping.
-    def executorToHostPort(executorId: String, defaultHostPort: String): String = {
-      executorIdToHostPort.getOrElse(executorId, defaultHostPort)
-    }
-
-    // Unfortunately, this means that SparkEnv is indirectly referencing ClusterScheduler
-    // Will that be a design violation ?
-    SparkEnv.get.executorIdToHostPort = Some(executorToHostPort)
-  }
-
-
-  def newTaskId(): Long = nextTaskId.getAndIncrement()
-
-  override def start() {
-    backend.start()
-
-    if (JBoolean.getBoolean("spark.speculation")) {
-      new Thread("ClusterScheduler speculation check") {
-        setDaemon(true)
-
-        override def run() {
-          logInfo("Starting speculative execution thread")
-          while (true) {
-            try {
-              Thread.sleep(SPECULATION_INTERVAL)
-            } catch {
-              case e: InterruptedException => {}
-            }
-            checkSpeculatableTasks()
-          }
-        }
-      }.start()
-    }
-
-
-    // Change to always run with some default if TASK_REVIVAL_INTERVAL <= 0 ?
-    if (TASK_REVIVAL_INTERVAL > 0) {
-      new Thread("ClusterScheduler task offer revival check") {
-        setDaemon(true)
-
-        override def run() {
-          logInfo("Starting speculative task offer revival thread")
-          while (true) {
-            try {
-              Thread.sleep(TASK_REVIVAL_INTERVAL)
-            } catch {
-              case e: InterruptedException => {}
-            }
-
-            if (hasPendingTasks()) backend.reviveOffers()
-          }
-        }
-      }.start()
-    }
-  }
-
-  override def submitTasks(taskSet: TaskSet) {
-    val tasks = taskSet.tasks
-    logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks")
-    this.synchronized {
-      val manager = new ClusterTaskSetManager(this, taskSet)
-      activeTaskSets(taskSet.id) = manager
-      schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)
-      taskSetTaskIds(taskSet.id) = new HashSet[Long]()
-
-      if (hasReceivedTask == false) {
-        starvationTimer.scheduleAtFixedRate(new TimerTask() {
-          override def run() {
-            if (!hasLaunchedTask) {
-              logWarning("Initial job has not accepted any resources; " +
-                "check your cluster UI to ensure that workers are registered")
-            } else {
-              this.cancel()
-            }
-          }
-        }, STARVATION_TIMEOUT, STARVATION_TIMEOUT)
-      }
-      hasReceivedTask = true;
-    }
-    backend.reviveOffers()
-  }
-
-  def taskSetFinished(manager: TaskSetManager) {
-    this.synchronized {
-      activeTaskSets -= manager.taskSet.id
-      manager.parent.removeSchedulable(manager)
-      logInfo("Remove TaskSet %s from pool %s".format(manager.taskSet.id, manager.parent.name))
-      taskIdToTaskSetId --= taskSetTaskIds(manager.taskSet.id)
-      taskIdToExecutorId --= taskSetTaskIds(manager.taskSet.id)
-      taskSetTaskIds.remove(manager.taskSet.id)
-    }
-  }
-
-  /**
-   * Called by cluster manager to offer resources on slaves. We respond by asking our active task
-   * sets for tasks in order of priority. We fill each node with tasks in a round-robin manner so
-   * that tasks are balanced across the cluster.
-   */
-  def resourceOffers(offers: Seq[WorkerOffer]): Seq[Seq[TaskDescription]] = {
-    synchronized {
-      SparkEnv.set(sc.env)
-      // Mark each slave as alive and remember its hostname
-      for (o <- offers) {
-        // DEBUG Code
-        Utils.checkHostPort(o.hostPort)
-
-        executorIdToHostPort(o.executorId) = o.hostPort
-        if (! executorsByHostPort.contains(o.hostPort)) {
-          executorsByHostPort(o.hostPort) = new HashSet[String]()
-        }
-
-        hostPortsAlive += o.hostPort
-        hostToAliveHostPorts.getOrElseUpdate(Utils.parseHostPort(o.hostPort)._1, new HashSet[String]).add(o.hostPort)
-        executorGained(o.executorId, o.hostPort)
-      }
-      // Build a list of tasks to assign to each slave
-      val tasks = offers.map(o => new ArrayBuffer[TaskDescription](o.cores))
-      // merge availableCpus into nodeToAvailableCpus block ?
-      val availableCpus = offers.map(o => o.cores).toArray
-      val nodeToAvailableCpus = {
-        val map = new HashMap[String, Int]()
-        for (offer <- offers) {
-          val hostPort = offer.hostPort
-          val cores = offer.cores
-          // DEBUG code
-          Utils.checkHostPort(hostPort)
-
-          val host = Utils.parseHostPort(hostPort)._1
-
-          map.put(host, map.getOrElse(host, 0) + cores)
-        }
-
-        map
-      }
-      var launchedTask = false
-      val sortedTaskSetQueue = rootPool.getSortedTaskSetQueue()
-      for (manager <- sortedTaskSetQueue)
-      {
-        logInfo("parentName:%s,name:%s,runningTasks:%s".format(manager.parent.name, manager.name, manager.runningTasks))
-      }
-      for (manager <- sortedTaskSetQueue) {
-
-        // Split offers based on node local, rack local and off-rack tasks.
-        val processLocalOffers = new HashMap[String, ArrayBuffer[Int]]()
-        val nodeLocalOffers = new HashMap[String, ArrayBuffer[Int]]()
-        val rackLocalOffers = new HashMap[String, ArrayBuffer[Int]]()
-        val otherOffers = new HashMap[String, ArrayBuffer[Int]]()
-
-        for (i <- 0 until offers.size) {
-          val hostPort = offers(i).hostPort
-          // DEBUG code
-          Utils.checkHostPort(hostPort)
-
-          val numProcessLocalTasks =  math.max(0, math.min(manager.numPendingTasksForHostPort(hostPort), availableCpus(i)))
-          if (numProcessLocalTasks > 0){
-            val list = processLocalOffers.getOrElseUpdate(hostPort, new ArrayBuffer[Int])
-            for (j <- 0 until numProcessLocalTasks) list += i
-          }
-
-          val host = Utils.parseHostPort(hostPort)._1
-          val numNodeLocalTasks =  math.max(0,
-            // Remove process local tasks (which are also host local btw !) from this
-            math.min(manager.numPendingTasksForHost(hostPort) - numProcessLocalTasks, nodeToAvailableCpus(host)))
-          if (numNodeLocalTasks > 0){
-            val list = nodeLocalOffers.getOrElseUpdate(host, new ArrayBuffer[Int])
-            for (j <- 0 until numNodeLocalTasks) list += i
-          }
-
-          val numRackLocalTasks =  math.max(0,
-            // Remove node local tasks (which are also rack local btw !) from this
-            math.min(manager.numRackLocalPendingTasksForHost(hostPort) - numProcessLocalTasks - numNodeLocalTasks, nodeToAvailableCpus(host)))
-          if (numRackLocalTasks > 0){
-            val list = rackLocalOffers.getOrElseUpdate(host, new ArrayBuffer[Int])
-            for (j <- 0 until numRackLocalTasks) list += i
-          }
-          if (numNodeLocalTasks <= 0 && numRackLocalTasks <= 0){
-            // add to others list - spread even this across cluster.
-            val list = otherOffers.getOrElseUpdate(host, new ArrayBuffer[Int])
-            list += i
-          }
-        }
-
-        val offersPriorityList = new ArrayBuffer[Int](
-          processLocalOffers.size + nodeLocalOffers.size + rackLocalOffers.size + otherOffers.size)
-
-        // First process local, then host local, then rack, then others
-
-        // numNodeLocalOffers contains count of both process local and host offers.
-        val numNodeLocalOffers = {
-          val processLocalPriorityList = ClusterScheduler.prioritizeContainers(processLocalOffers)
-          offersPriorityList ++= processLocalPriorityList
-
-          val nodeLocalPriorityList = ClusterScheduler.prioritizeContainers(nodeLocalOffers)
-          offersPriorityList ++= nodeLocalPriorityList
-
-          processLocalPriorityList.size + nodeLocalPriorityList.size
-        }
-        val numRackLocalOffers = {
-          val rackLocalPriorityList = ClusterScheduler.prioritizeContainers(rackLocalOffers)
-          offersPriorityList ++= rackLocalPriorityList
-          rackLocalPriorityList.size
-        }
-        offersPriorityList ++= ClusterScheduler.prioritizeContainers(otherOffers)
-
-        var lastLoop = false
-        val lastLoopIndex = TASK_SCHEDULING_AGGRESSION match {
-          case TaskLocality.NODE_LOCAL => numNodeLocalOffers
-          case TaskLocality.RACK_LOCAL => numRackLocalOffers + numNodeLocalOffers
-          case TaskLocality.ANY => offersPriorityList.size
-        }
-
-        do {
-          launchedTask = false
-          var loopCount = 0
-          for (i <- offersPriorityList) {
-            val execId = offers(i).executorId
-            val hostPort = offers(i).hostPort
-
-            // If last loop and within the lastLoopIndex, expand scope - else use null (which will use default/existing)
-            val overrideLocality = if (lastLoop && loopCount < lastLoopIndex) TASK_SCHEDULING_AGGRESSION else null
-
-            // If last loop, override waiting for host locality - we scheduled all local tasks already and there might be more available ...
-            loopCount += 1
-
-            manager.slaveOffer(execId, hostPort, availableCpus(i), overrideLocality) match {
-              case Some(task) =>
-                tasks(i) += task
-                val tid = task.taskId
-                taskIdToTaskSetId(tid) = manager.taskSet.id
-                taskSetTaskIds(manager.taskSet.id) += tid
-                taskIdToExecutorId(tid) = execId
-                activeExecutorIds += execId
-                executorsByHostPort(hostPort) += execId
-                availableCpus(i) -= 1
-                launchedTask = true
-
-              case None => {}
-              }
-            }
-          // Loop once more - when lastLoop = true, then we try to schedule task on all nodes irrespective of
-          // data locality (we still go in order of priority : but that would not change anything since
-          // if data local tasks had been available, we would have scheduled them already)
-          if (lastLoop) {
-            // prevent more looping
-            launchedTask = false
-          } else if (!lastLoop && !launchedTask) {
-            // Do this only if TASK_SCHEDULING_AGGRESSION != NODE_LOCAL
-            if (TASK_SCHEDULING_AGGRESSION != TaskLocality.NODE_LOCAL) {
-              // fudge launchedTask to ensure we loop once more
-              launchedTask = true
-              // dont loop anymore
-              lastLoop = true
-            }
-          }
-        } while (launchedTask)
-      }
-
-      if (tasks.size > 0) {
-        hasLaunchedTask = true
-      }
-      return tasks
-    }
-  }
-
-  def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
-    var taskSetToUpdate: Option[TaskSetManager] = None
-    var failedExecutor: Option[String] = None
-    var taskFailed = false
-    synchronized {
-      try {
-        if (state == TaskState.LOST && taskIdToExecutorId.contains(tid)) {
-          // We lost this entire executor, so remember that it's gone
-          val execId = taskIdToExecutorId(tid)
-          if (activeExecutorIds.contains(execId)) {
-            removeExecutor(execId)
-            failedExecutor = Some(execId)
-          }
-        }
-        taskIdToTaskSetId.get(tid) match {
-          case Some(taskSetId) =>
-            if (activeTaskSets.contains(taskSetId)) {
-              taskSetToUpdate = Some(activeTaskSets(taskSetId))
-            }
-            if (TaskState.isFinished(state)) {
-              taskIdToTaskSetId.remove(tid)
-              if (taskSetTaskIds.contains(taskSetId)) {
-                taskSetTaskIds(taskSetId) -= tid
-              }
-              taskIdToExecutorId.remove(tid)
-            }
-            if (state == TaskState.FAILED) {
-              taskFailed = true
-            }
-          case None =>
-            logInfo("Ignoring update from TID " + tid + " because its task set is gone")
-        }
-      } catch {
-        case e: Exception => logError("Exception in statusUpdate", e)
-      }
-    }
-    // Update the task set and DAGScheduler without holding a lock on this, since that can deadlock
-    if (taskSetToUpdate != None) {
-      taskSetToUpdate.get.statusUpdate(tid, state, serializedData)
-    }
-    if (failedExecutor != None) {
-      listener.executorLost(failedExecutor.get)
-      backend.reviveOffers()
-    }
-    if (taskFailed) {
-
-      // Also revive offers if a task had failed for some reason other than host lost
-      backend.reviveOffers()
-    }
-  }
-
-  def error(message: String) {
-    synchronized {
-      if (activeTaskSets.size > 0) {
-        // Have each task set throw a SparkException with the error
-        for ((taskSetId, manager) <- activeTaskSets) {
-          try {
-            manager.error(message)
-          } catch {
-            case e: Exception => logError("Exception in error callback", e)
-          }
-        }
-      } else {
-        // No task sets are active but we still got an error. Just exit since this
-        // must mean the error is during registration.
-        // It might be good to do something smarter here in the future.
-        logError("Exiting due to error from cluster scheduler: " + message)
-        System.exit(1)
-      }
-    }
-  }
-
-  override def stop() {
-    if (backend != null) {
-      backend.stop()
-    }
-    if (jarServer != null) {
-      jarServer.stop()
-    }
-
-    // sleeping for an arbitrary 5 seconds : to ensure that messages are sent out.
-    // TODO: Do something better !
-    Thread.sleep(5000L)
-  }
-
-  override def defaultParallelism() = backend.defaultParallelism()
-
-
-  // Check for speculatable tasks in all our active jobs.
-  def checkSpeculatableTasks() {
-    var shouldRevive = false
-    synchronized {
-      shouldRevive = rootPool.checkSpeculatableTasks()
-    }
-    if (shouldRevive) {
-      backend.reviveOffers()
-    }
-  }
-
-  // Check for pending tasks in all our active jobs.
-  def hasPendingTasks(): Boolean = {
-    synchronized {
-      rootPool.hasPendingTasks()
-    }
-  }
-
-  def executorLost(executorId: String, reason: ExecutorLossReason) {
-    var failedExecutor: Option[String] = None
-
-    synchronized {
-      if (activeExecutorIds.contains(executorId)) {
-        val hostPort = executorIdToHostPort(executorId)
-        logError("Lost executor %s on %s: %s".format(executorId, hostPort, reason))
-        removeExecutor(executorId)
-        failedExecutor = Some(executorId)
-      } else {
-         // We may get multiple executorLost() calls with different loss reasons. For example, one
-         // may be triggered by a dropped connection from the slave while another may be a report
-         // of executor termination from Mesos. We produce log messages for both so we eventually
-         // report the termination reason.
-         logError("Lost an executor " + executorId + " (already removed): " + reason)
-      }
-    }
-    // Call listener.executorLost without holding the lock on this to prevent deadlock
-    if (failedExecutor != None) {
-      listener.executorLost(failedExecutor.get)
-      backend.reviveOffers()
-    }
-  }
-
-  /** Remove an executor from all our data structures and mark it as lost */
-  private def removeExecutor(executorId: String) {
-    activeExecutorIds -= executorId
-    val hostPort = executorIdToHostPort(executorId)
-    if (hostPortsAlive.contains(hostPort)) {
-      // DEBUG Code
-      Utils.checkHostPort(hostPort)
-
-      hostPortsAlive -= hostPort
-      hostToAliveHostPorts.getOrElseUpdate(Utils.parseHostPort(hostPort)._1, new HashSet[String]).remove(hostPort)
-    }
-
-    val execs = executorsByHostPort.getOrElse(hostPort, new HashSet)
-    execs -= executorId
-    if (execs.isEmpty) {
-      executorsByHostPort -= hostPort
-    }
-    executorIdToHostPort -= executorId
-    rootPool.executorLost(executorId, hostPort)
-  }
-
-  def executorGained(execId: String, hostPort: String) {
-    listener.executorGained(execId, hostPort)
-  }
-
-  def getExecutorsAliveOnHost(host: String): Option[Set[String]] = {
-    Utils.checkHost(host)
-
-    val retval = hostToAliveHostPorts.get(host)
-    if (retval.isDefined) {
-      return Some(retval.get.toSet)
-    }
-
-    None
-  }
-
-  def isExecutorAliveOnHostPort(hostPort: String): Boolean = {
-    // Even if hostPort is a host, it does not matter - it is just a specific check.
-    // But we do have to ensure that only hostPort get into hostPortsAlive !
-    // So no check against Utils.checkHostPort
-    hostPortsAlive.contains(hostPort)
-  }
-
-  // By default, rack is unknown
-  def getRackForHost(value: String): Option[String] = None
-
-  // By default, (cached) hosts for rack is unknown
-  def getCachedHostsForRack(rack: String): Option[Set[String]] = None
-}
-
-
-object ClusterScheduler {
-
-  // Used to 'spray' available containers across the available set to ensure too many containers on same host
-  // are not used up. Used in yarn mode and in task scheduling (when there are multiple containers available
-  // to execute a task)
-  // For example: yarn can returns more containers than we would have requested under ANY, this method
-  // prioritizes how to use the allocated containers.
-  // flatten the map such that the array buffer entries are spread out across the returned value.
-  // given <host, list[container]> == <h1, [c1 .. c5]>, <h2, [c1 .. c3]>, <h3, [c1, c2]>, <h4, c1>, <h5, c1>, i
-  // the return value would be something like : h1c1, h2c1, h3c1, h4c1, h5c1, h1c2, h2c2, h3c2, h1c3, h2c3, h1c4, h1c5
-  // We then 'use' the containers in this order (consuming only the top K from this list where
-  // K = number to be user). This is to ensure that if we have multiple eligible allocations,
-  // they dont end up allocating all containers on a small number of hosts - increasing probability of
-  // multiple container failure when a host goes down.
-  // Note, there is bias for keys with higher number of entries in value to be picked first (by design)
-  // Also note that invocation of this method is expected to have containers of same 'type'
-  // (host-local, rack-local, off-rack) and not across types : so that reordering is simply better from
-  // the available list - everything else being same.
-  // That is, we we first consume data local, then rack local and finally off rack nodes. So the
-  // prioritization from this method applies to within each category
-  def prioritizeContainers[K, T] (map: HashMap[K, ArrayBuffer[T]]): List[T] = {
-    val _keyList = new ArrayBuffer[K](map.size)
-    _keyList ++= map.keys
-
-    // order keyList based on population of value in map
-    val keyList = _keyList.sortWith(
-      (left, right) => map.get(left).getOrElse(Set()).size > map.get(right).getOrElse(Set()).size
-    )
-
-    val retval = new ArrayBuffer[T](keyList.size * 2)
-    var index = 0
-    var found = true
-
-    while (found){
-      found = false
-      for (key <- keyList) {
-        val containerList: ArrayBuffer[T] = map.get(key).getOrElse(null)
-        assert(containerList != null)
-        // Get the index'th entry for this host - if present
-        if (index < containerList.size){
-          retval += containerList.apply(index)
-          found = true
-        }
-      }
-      index += 1
-    }
-
-    retval.toList
-  }
-}
diff --git a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala b/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala
deleted file mode 100644
index 3d06520675..0000000000
--- a/core/src/main/scala/spark/scheduler/cluster/ClusterTaskSetManager.scala
+++ /dev/null
@@ -1,765 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.scheduler.cluster
-
-import java.util.{HashMap => JHashMap, NoSuchElementException, Arrays}
-
-import scala.collection.mutable.ArrayBuffer
-import scala.collection.mutable.HashMap
-import scala.collection.mutable.HashSet
-import scala.math.max
-import scala.math.min
-
-import spark._
-import spark.scheduler._
-import spark.TaskState.TaskState
-import java.nio.ByteBuffer
-
-private[spark] object TaskLocality extends Enumeration("PROCESS_LOCAL", "NODE_LOCAL", "RACK_LOCAL", "ANY") with Logging {
-
-  // process local is expected to be used ONLY within tasksetmanager for now.
-  val PROCESS_LOCAL, NODE_LOCAL, RACK_LOCAL, ANY = Value
-
-  type TaskLocality = Value
-
-  def isAllowed(constraint: TaskLocality, condition: TaskLocality): Boolean = {
-
-    // Must not be the constraint.
-    assert (constraint != TaskLocality.PROCESS_LOCAL)
-
-    constraint match {
-      case TaskLocality.NODE_LOCAL => condition == TaskLocality.NODE_LOCAL
-      case TaskLocality.RACK_LOCAL => condition == TaskLocality.NODE_LOCAL || condition == TaskLocality.RACK_LOCAL
-      // For anything else, allow
-      case _ => true
-    }
-  }
-
-  def parse(str: String): TaskLocality = {
-    // better way to do this ?
-    try {
-      val retval = TaskLocality.withName(str)
-      // Must not specify PROCESS_LOCAL !
-      assert (retval != TaskLocality.PROCESS_LOCAL)
-
-      retval
-    } catch {
-      case nEx: NoSuchElementException => {
-        logWarning("Invalid task locality specified '" + str + "', defaulting to NODE_LOCAL");
-        // default to preserve earlier behavior
-        NODE_LOCAL
-      }
-    }
-  }
-}
-
-/**
- * Schedules the tasks within a single TaskSet in the ClusterScheduler.
- */
-private[spark] class ClusterTaskSetManager(
-    sched: ClusterScheduler,
-    val taskSet: TaskSet)
-  extends TaskSetManager
-  with Logging {
-
-  // Maximum time to wait to run a task in a preferred location (in ms)
-  val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong
-
-  // CPUs to request per task
-  val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toDouble
-
-  // Maximum times a task is allowed to fail before failing the job
-  val MAX_TASK_FAILURES = 4
-
-  // Quantile of tasks at which to start speculation
-  val SPECULATION_QUANTILE = System.getProperty("spark.speculation.quantile", "0.75").toDouble
-  val SPECULATION_MULTIPLIER = System.getProperty("spark.speculation.multiplier", "1.5").toDouble
-
-  // Serializer for closures and tasks.
-  val ser = SparkEnv.get.closureSerializer.newInstance()
-
-  val tasks = taskSet.tasks
-  val numTasks = tasks.length
-  val copiesRunning = new Array[Int](numTasks)
-  val finished = new Array[Boolean](numTasks)
-  val numFailures = new Array[Int](numTasks)
-  val taskAttempts = Array.fill[List[TaskInfo]](numTasks)(Nil)
-  var tasksFinished = 0
-
-  var weight = 1
-  var minShare = 0
-  var runningTasks = 0
-  var priority = taskSet.priority
-  var stageId = taskSet.stageId
-  var name = "TaskSet_"+taskSet.stageId.toString
-  var parent:Schedulable = null
-
-  // Last time when we launched a preferred task (for delay scheduling)
-  var lastPreferredLaunchTime = System.currentTimeMillis
-
-  // List of pending tasks for each node (process local to container). These collections are actually
-  // treated as stacks, in which new tasks are added to the end of the
-  // ArrayBuffer and removed from the end. This makes it faster to detect
-  // tasks that repeatedly fail because whenever a task failed, it is put
-  // back at the head of the stack. They are also only cleaned up lazily;
-  // when a task is launched, it remains in all the pending lists except
-  // the one that it was launched from, but gets removed from them later.
-  private val pendingTasksForHostPort = new HashMap[String, ArrayBuffer[Int]]
-
-  // List of pending tasks for each node.
-  // Essentially, similar to pendingTasksForHostPort, except at host level
-  private val pendingTasksForHost = new HashMap[String, ArrayBuffer[Int]]
-
-  // List of pending tasks for each node based on rack locality.
-  // Essentially, similar to pendingTasksForHost, except at rack level
-  private val pendingRackLocalTasksForHost = new HashMap[String, ArrayBuffer[Int]]
-
-  // List containing pending tasks with no locality preferences
-  val pendingTasksWithNoPrefs = new ArrayBuffer[Int]
-
-  // List containing all pending tasks (also used as a stack, as above)
-  val allPendingTasks = new ArrayBuffer[Int]
-
-  // Tasks that can be speculated. Since these will be a small fraction of total
-  // tasks, we'll just hold them in a HashSet.
-  val speculatableTasks = new HashSet[Int]
-
-  // Task index, start and finish time for each task attempt (indexed by task ID)
-  val taskInfos = new HashMap[Long, TaskInfo]
-
-  // Did the job fail?
-  var failed = false
-  var causeOfFailure = ""
-
-  // How frequently to reprint duplicate exceptions in full, in milliseconds
-  val EXCEPTION_PRINT_INTERVAL =
-    System.getProperty("spark.logging.exceptionPrintInterval", "10000").toLong
-  // Map of recent exceptions (identified by string representation and
-  // top stack frame) to duplicate count (how many times the same
-  // exception has appeared) and time the full exception was
-  // printed. This should ideally be an LRU map that can drop old
-  // exceptions automatically.
-  val recentExceptions = HashMap[String, (Int, Long)]()
-
-  // Figure out the current map output tracker generation and set it on all tasks
-  val generation = sched.mapOutputTracker.getGeneration
-  logDebug("Generation for " + taskSet.id + ": " + generation)
-  for (t <- tasks) {
-    t.generation = generation
-  }
-
-  // Add all our tasks to the pending lists. We do this in reverse order
-  // of task index so that tasks with low indices get launched first.
-  for (i <- (0 until numTasks).reverse) {
-    addPendingTask(i)
-  }
-
-  // Note that it follows the hierarchy.
-  // if we search for NODE_LOCAL, the output will include PROCESS_LOCAL and
-  // if we search for RACK_LOCAL, it will include PROCESS_LOCAL & NODE_LOCAL
-  private def findPreferredLocations(_taskPreferredLocations: Seq[String], scheduler: ClusterScheduler,
-                                     taskLocality: TaskLocality.TaskLocality): HashSet[String] = {
-
-    if (TaskLocality.PROCESS_LOCAL == taskLocality) {
-      // straight forward comparison ! Special case it.
-      val retval = new HashSet[String]()
-      scheduler.synchronized {
-        for (location <- _taskPreferredLocations) {
-          if (scheduler.isExecutorAliveOnHostPort(location)) {
-            retval += location
-          }
-        }
-      }
-
-      return retval
-    }
-
-    val taskPreferredLocations =
-      if (TaskLocality.NODE_LOCAL == taskLocality) {
-        _taskPreferredLocations
-      } else {
-        assert (TaskLocality.RACK_LOCAL == taskLocality)
-        // Expand set to include all 'seen' rack local hosts.
-        // This works since container allocation/management happens within master - so any rack locality information is updated in msater.
-        // Best case effort, and maybe sort of kludge for now ... rework it later ?
-        val hosts = new HashSet[String]
-        _taskPreferredLocations.foreach(h => {
-          val rackOpt = scheduler.getRackForHost(h)
-          if (rackOpt.isDefined) {
-            val hostsOpt = scheduler.getCachedHostsForRack(rackOpt.get)
-            if (hostsOpt.isDefined) {
-              hosts ++= hostsOpt.get
-            }
-          }
-
-          // Ensure that irrespective of what scheduler says, host is always added !
-          hosts += h
-        })
-
-        hosts
-      }
-
-    val retval = new HashSet[String]
-    scheduler.synchronized {
-      for (prefLocation <- taskPreferredLocations) {
-        val aliveLocationsOpt = scheduler.getExecutorsAliveOnHost(Utils.parseHostPort(prefLocation)._1)
-        if (aliveLocationsOpt.isDefined) {
-          retval ++= aliveLocationsOpt.get
-        }
-      }
-    }
-
-    retval
-  }
-
-  // Add a task to all the pending-task lists that it should be on.
-  private def addPendingTask(index: Int) {
-    // We can infer hostLocalLocations from rackLocalLocations by joining it against tasks(index).preferredLocations (with appropriate
-    // hostPort <-> host conversion). But not doing it for simplicity sake. If this becomes a performance issue, modify it.
-    val processLocalLocations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.PROCESS_LOCAL)
-    val hostLocalLocations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.NODE_LOCAL)
-    val rackLocalLocations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.RACK_LOCAL)
-
-    if (rackLocalLocations.size == 0) {
-      // Current impl ensures this.
-      assert (processLocalLocations.size == 0)
-      assert (hostLocalLocations.size == 0)
-      pendingTasksWithNoPrefs += index
-    } else {
-
-      // process local locality
-      for (hostPort <- processLocalLocations) {
-        // DEBUG Code
-        Utils.checkHostPort(hostPort)
-
-        val hostPortList = pendingTasksForHostPort.getOrElseUpdate(hostPort, ArrayBuffer())
-        hostPortList += index
-      }
-
-      // host locality (includes process local)
-      for (hostPort <- hostLocalLocations) {
-        // DEBUG Code
-        Utils.checkHostPort(hostPort)
-
-        val host = Utils.parseHostPort(hostPort)._1
-        val hostList = pendingTasksForHost.getOrElseUpdate(host, ArrayBuffer())
-        hostList += index
-      }
-
-      // rack locality (includes process local and host local)
-      for (rackLocalHostPort <- rackLocalLocations) {
-        // DEBUG Code
-        Utils.checkHostPort(rackLocalHostPort)
-
-        val rackLocalHost = Utils.parseHostPort(rackLocalHostPort)._1
-        val list = pendingRackLocalTasksForHost.getOrElseUpdate(rackLocalHost, ArrayBuffer())
-        list += index
-      }
-    }
-
-    allPendingTasks += index
-  }
-
-  // Return the pending tasks list for a given host port (process local), or an empty list if
-  // there is no map entry for that host
-  private def getPendingTasksForHostPort(hostPort: String): ArrayBuffer[Int] = {
-    // DEBUG Code
-    Utils.checkHostPort(hostPort)
-    pendingTasksForHostPort.getOrElse(hostPort, ArrayBuffer())
-  }
-
-  // Return the pending tasks list for a given host, or an empty list if
-  // there is no map entry for that host
-  private def getPendingTasksForHost(hostPort: String): ArrayBuffer[Int] = {
-    val host = Utils.parseHostPort(hostPort)._1
-    pendingTasksForHost.getOrElse(host, ArrayBuffer())
-  }
-
-  // Return the pending tasks (rack level) list for a given host, or an empty list if
-  // there is no map entry for that host
-  private def getRackLocalPendingTasksForHost(hostPort: String): ArrayBuffer[Int] = {
-    val host = Utils.parseHostPort(hostPort)._1
-    pendingRackLocalTasksForHost.getOrElse(host, ArrayBuffer())
-  }
-
-  // Number of pending tasks for a given host Port (which would be process local)
-  def numPendingTasksForHostPort(hostPort: String): Int = {
-    getPendingTasksForHostPort(hostPort).count( index => copiesRunning(index) == 0 && !finished(index) )
-  }
-
-  // Number of pending tasks for a given host (which would be data local)
-  def numPendingTasksForHost(hostPort: String): Int = {
-    getPendingTasksForHost(hostPort).count( index => copiesRunning(index) == 0 && !finished(index) )
-  }
-
-  // Number of pending rack local tasks for a given host
-  def numRackLocalPendingTasksForHost(hostPort: String): Int = {
-    getRackLocalPendingTasksForHost(hostPort).count( index => copiesRunning(index) == 0 && !finished(index) )
-  }
-
-
-  // Dequeue a pending task from the given list and return its index.
-  // Return None if the list is empty.
-  // This method also cleans up any tasks in the list that have already
-  // been launched, since we want that to happen lazily.
-  private def findTaskFromList(list: ArrayBuffer[Int]): Option[Int] = {
-    while (!list.isEmpty) {
-      val index = list.last
-      list.trimEnd(1)
-      if (copiesRunning(index) == 0 && !finished(index)) {
-        return Some(index)
-      }
-    }
-    return None
-  }
-
-  // Return a speculative task for a given host if any are available. The task should not have an
-  // attempt running on this host, in case the host is slow. In addition, if locality is set, the
-  // task must have a preference for this host/rack/no preferred locations at all.
-  private def findSpeculativeTask(hostPort: String, locality: TaskLocality.TaskLocality): Option[Int] = {
-
-    assert (TaskLocality.isAllowed(locality, TaskLocality.NODE_LOCAL))
-    speculatableTasks.retain(index => !finished(index)) // Remove finished tasks from set
-
-    if (speculatableTasks.size > 0) {
-      val localTask = speculatableTasks.find {
-          index =>
-            val locations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.NODE_LOCAL)
-            val attemptLocs = taskAttempts(index).map(_.hostPort)
-            (locations.size == 0 || locations.contains(hostPort)) && !attemptLocs.contains(hostPort)
-        }
-
-      if (localTask != None) {
-        speculatableTasks -= localTask.get
-        return localTask
-      }
-
-      // check for rack locality
-      if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
-        val rackTask = speculatableTasks.find {
-          index =>
-            val locations = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.RACK_LOCAL)
-            val attemptLocs = taskAttempts(index).map(_.hostPort)
-            locations.contains(hostPort) && !attemptLocs.contains(hostPort)
-        }
-
-        if (rackTask != None) {
-          speculatableTasks -= rackTask.get
-          return rackTask
-        }
-      }
-
-      // Any task ...
-      if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
-        // Check for attemptLocs also ?
-        val nonLocalTask = speculatableTasks.find(i => !taskAttempts(i).map(_.hostPort).contains(hostPort))
-        if (nonLocalTask != None) {
-          speculatableTasks -= nonLocalTask.get
-          return nonLocalTask
-        }
-      }
-    }
-    return None
-  }
-
-  // Dequeue a pending task for a given node and return its index.
-  // If localOnly is set to false, allow non-local tasks as well.
-  private def findTask(hostPort: String, locality: TaskLocality.TaskLocality): Option[Int] = {
-    val processLocalTask = findTaskFromList(getPendingTasksForHostPort(hostPort))
-    if (processLocalTask != None) {
-      return processLocalTask
-    }
-
-    val localTask = findTaskFromList(getPendingTasksForHost(hostPort))
-    if (localTask != None) {
-      return localTask
-    }
-
-    if (TaskLocality.isAllowed(locality, TaskLocality.RACK_LOCAL)) {
-      val rackLocalTask = findTaskFromList(getRackLocalPendingTasksForHost(hostPort))
-      if (rackLocalTask != None) {
-        return rackLocalTask
-      }
-    }
-
-    // Look for no pref tasks AFTER rack local tasks - this has side effect that we will get to failed tasks later rather than sooner.
-    // TODO: That code path needs to be revisited (adding to no prefs list when host:port goes down).
-    val noPrefTask = findTaskFromList(pendingTasksWithNoPrefs)
-    if (noPrefTask != None) {
-      return noPrefTask
-    }
-
-    if (TaskLocality.isAllowed(locality, TaskLocality.ANY)) {
-      val nonLocalTask = findTaskFromList(allPendingTasks)
-      if (nonLocalTask != None) {
-        return nonLocalTask
-      }
-    }
-
-    // Finally, if all else has failed, find a speculative task
-    return findSpeculativeTask(hostPort, locality)
-  }
-
-  private def isProcessLocalLocation(task: Task[_], hostPort: String): Boolean = {
-    Utils.checkHostPort(hostPort)
-
-    val locs = task.preferredLocations
-
-    locs.contains(hostPort)
-  }
-
-  private def isHostLocalLocation(task: Task[_], hostPort: String): Boolean = {
-    val locs = task.preferredLocations
-
-    // If no preference, consider it as host local
-    if (locs.isEmpty) return true
-
-    val host = Utils.parseHostPort(hostPort)._1
-    locs.find(h => Utils.parseHostPort(h)._1 == host).isDefined
-  }
-
-  // Does a host count as a rack local preferred location for a task? (assumes host is NOT preferred location).
-  // This is true if either the task has preferred locations and this host is one, or it has
-  // no preferred locations (in which we still count the launch as preferred).
-  private def isRackLocalLocation(task: Task[_], hostPort: String): Boolean = {
-
-    val locs = task.preferredLocations
-
-    val preferredRacks = new HashSet[String]()
-    for (preferredHost <- locs) {
-      val rack = sched.getRackForHost(preferredHost)
-      if (None != rack) preferredRacks += rack.get
-    }
-
-    if (preferredRacks.isEmpty) return false
-
-    val hostRack = sched.getRackForHost(hostPort)
-
-    return None != hostRack && preferredRacks.contains(hostRack.get)
-  }
-
-  // Respond to an offer of a single slave from the scheduler by finding a task
-  def slaveOffer(execId: String, hostPort: String, availableCpus: Double, overrideLocality: TaskLocality.TaskLocality = null): Option[TaskDescription] = {
-
-    if (tasksFinished < numTasks && availableCpus >= CPUS_PER_TASK) {
-      // If explicitly specified, use that
-      val locality = if (overrideLocality != null) overrideLocality else {
-        // expand only if we have waited for more than LOCALITY_WAIT for a host local task ...
-        val time = System.currentTimeMillis
-        if (time - lastPreferredLaunchTime < LOCALITY_WAIT) TaskLocality.NODE_LOCAL else TaskLocality.ANY
-      }
-
-      findTask(hostPort, locality) match {
-        case Some(index) => {
-          // Found a task; do some bookkeeping and return a Mesos task for it
-          val task = tasks(index)
-          val taskId = sched.newTaskId()
-          // Figure out whether this should count as a preferred launch
-          val taskLocality =
-            if (isProcessLocalLocation(task, hostPort)) TaskLocality.PROCESS_LOCAL
-            else if (isHostLocalLocation(task, hostPort)) TaskLocality.NODE_LOCAL
-            else if (isRackLocalLocation(task, hostPort)) TaskLocality.RACK_LOCAL
-            else  TaskLocality.ANY
-          val prefStr = taskLocality.toString
-          logInfo("Starting task %s:%d as TID %s on slave %s: %s (%s)".format(
-            taskSet.id, index, taskId, execId, hostPort, prefStr))
-          // Do various bookkeeping
-          copiesRunning(index) += 1
-          val time = System.currentTimeMillis
-          val info = new TaskInfo(taskId, index, time, execId, hostPort, taskLocality)
-          taskInfos(taskId) = info
-          taskAttempts(index) = info :: taskAttempts(index)
-          if (taskLocality == TaskLocality.PROCESS_LOCAL || taskLocality == TaskLocality.NODE_LOCAL) {
-            lastPreferredLaunchTime = time
-          }
-          // Serialize and return the task
-          val startTime = System.currentTimeMillis
-          val serializedTask = Task.serializeWithDependencies(
-            task, sched.sc.addedFiles, sched.sc.addedJars, ser)
-          val timeTaken = System.currentTimeMillis - startTime
-          increaseRunningTasks(1)
-          logInfo("Serialized task %s:%d as %d bytes in %d ms".format(
-            taskSet.id, index, serializedTask.limit, timeTaken))
-          val taskName = "task %s:%d".format(taskSet.id, index)
-          return Some(new TaskDescription(taskId, execId, taskName, serializedTask))
-        }
-        case _ =>
-      }
-    }
-    return None
-  }
-
-  def statusUpdate(tid: Long, state: TaskState, serializedData: ByteBuffer) {
-    state match {
-      case TaskState.FINISHED =>
-        taskFinished(tid, state, serializedData)
-      case TaskState.LOST =>
-        taskLost(tid, state, serializedData)
-      case TaskState.FAILED =>
-        taskLost(tid, state, serializedData)
-      case TaskState.KILLED =>
-        taskLost(tid, state, serializedData)
-      case _ =>
-    }
-  }
-
-  def taskFinished(tid: Long, state: TaskState, serializedData: ByteBuffer) {
-    val info = taskInfos(tid)
-    if (info.failed) {
-      // We might get two task-lost messages for the same task in coarse-grained Mesos mode,
-      // or even from Mesos itself when acks get delayed.
-      return
-    }
-    val index = info.index
-    info.markSuccessful()
-    decreaseRunningTasks(1)
-    if (!finished(index)) {
-      tasksFinished += 1
-      logInfo("Finished TID %s in %d ms (progress: %d/%d)".format(
-        tid, info.duration, tasksFinished, numTasks))
-      // Deserialize task result and pass it to the scheduler
-      try {
-        val result = ser.deserialize[TaskResult[_]](serializedData)
-        result.metrics.resultSize = serializedData.limit()
-        sched.listener.taskEnded(tasks(index), Success, result.value, result.accumUpdates, info, result.metrics)
-      } catch {
-        case cnf: ClassNotFoundException =>
-          val loader = Thread.currentThread().getContextClassLoader
-          throw new SparkException("ClassNotFound with classloader: " + loader, cnf)
-        case ex => throw ex
-      }
-      // Mark finished and stop if we've finished all the tasks
-      finished(index) = true
-      if (tasksFinished == numTasks) {
-        sched.taskSetFinished(this)
-      }
-    } else {
-      logInfo("Ignoring task-finished event for TID " + tid +
-        " because task " + index + " is already finished")
-    }
-  }
-
-  def taskLost(tid: Long, state: TaskState, serializedData: ByteBuffer) {
-    val info = taskInfos(tid)
-    if (info.failed) {
-      // We might get two task-lost messages for the same task in coarse-grained Mesos mode,
-      // or even from Mesos itself when acks get delayed.
-      return
-    }
-    val index = info.index
-    info.markFailed()
-    decreaseRunningTasks(1)
-    if (!finished(index)) {
-      logInfo("Lost TID %s (task %s:%d)".format(tid, taskSet.id, index))
-      copiesRunning(index) -= 1
-      // Check if the problem is a map output fetch failure. In that case, this
-      // task will never succeed on any node, so tell the scheduler about it.
-      if (serializedData != null && serializedData.limit() > 0) {
-        val reason = ser.deserialize[TaskEndReason](serializedData, getClass.getClassLoader)
-        reason match {
-          case fetchFailed: FetchFailed =>
-            logInfo("Loss was due to fetch failure from " + fetchFailed.bmAddress)
-            sched.listener.taskEnded(tasks(index), fetchFailed, null, null, info, null)
-            finished(index) = true
-            tasksFinished += 1
-            sched.taskSetFinished(this)
-            decreaseRunningTasks(runningTasks)
-            return
-
-          case taskResultTooBig: TaskResultTooBigFailure =>
-            logInfo("Loss was due to task %s result exceeding Akka frame size; " +
-                    "aborting job".format(tid))
-            abort("Task %s result exceeded Akka frame size".format(tid))
-            return
-
-          case ef: ExceptionFailure =>
-            sched.listener.taskEnded(tasks(index), ef, null, null, info, ef.metrics.getOrElse(null))
-            val key = ef.description
-            val now = System.currentTimeMillis
-            val (printFull, dupCount) = {
-              if (recentExceptions.contains(key)) {
-                val (dupCount, printTime) = recentExceptions(key)
-                if (now - printTime > EXCEPTION_PRINT_INTERVAL) {
-                  recentExceptions(key) = (0, now)
-                  (true, 0)
-                } else {
-                  recentExceptions(key) = (dupCount + 1, printTime)
-                  (false, dupCount + 1)
-                }
-              } else {
-                recentExceptions(key) = (0, now)
-                (true, 0)
-              }
-            }
-            if (printFull) {
-              val locs = ef.stackTrace.map(loc => "\tat %s".format(loc.toString))
-              logInfo("Loss was due to %s\n%s\n%s".format(
-                ef.className, ef.description, locs.mkString("\n")))
-            } else {
-              logInfo("Loss was due to %s [duplicate %d]".format(ef.description, dupCount))
-            }
-
-          case _ => {}
-        }
-      }
-      // On non-fetch failures, re-enqueue the task as pending for a max number of retries
-      addPendingTask(index)
-      // Count failed attempts only on FAILED and LOST state (not on KILLED)
-      if (state == TaskState.FAILED || state == TaskState.LOST) {
-        numFailures(index) += 1
-        if (numFailures(index) > MAX_TASK_FAILURES) {
-          logError("Task %s:%d failed more than %d times; aborting job".format(
-            taskSet.id, index, MAX_TASK_FAILURES))
-          abort("Task %s:%d failed more than %d times".format(taskSet.id, index, MAX_TASK_FAILURES))
-        }
-      }
-    } else {
-      logInfo("Ignoring task-lost event for TID " + tid +
-        " because task " + index + " is already finished")
-    }
-  }
-
-  def error(message: String) {
-    // Save the error message
-    abort("Error: " + message)
-  }
-
-  def abort(message: String) {
-    failed = true
-    causeOfFailure = message
-    // TODO: Kill running tasks if we were not terminated due to a Mesos error
-    sched.listener.taskSetFailed(taskSet, message)
-    decreaseRunningTasks(runningTasks)
-    sched.taskSetFinished(this)
-  }
-
-  override def increaseRunningTasks(taskNum: Int) {
-    runningTasks += taskNum
-    if (parent != null) {
-      parent.increaseRunningTasks(taskNum)
-    }
-  }
-
-  override def decreaseRunningTasks(taskNum: Int) {
-    runningTasks -= taskNum
-    if (parent != null) {
-      parent.decreaseRunningTasks(taskNum)
-    }
-  }
-
-  //TODO: for now we just find Pool not TaskSetManager, we can extend this function in future if needed
-  override def getSchedulableByName(name: String): Schedulable = {
-    return null
-  }
-
-  override def addSchedulable(schedulable:Schedulable) {
-    //nothing
-  }
-
-  override def removeSchedulable(schedulable:Schedulable) {
-    //nothing
-  }
-
-  override def getSortedTaskSetQueue(): ArrayBuffer[TaskSetManager] = {
-    var sortedTaskSetQueue = new ArrayBuffer[TaskSetManager]
-    sortedTaskSetQueue += this
-    return sortedTaskSetQueue
-  }
-
-  override def executorLost(execId: String, hostPort: String) {
-    logInfo("Re-queueing tasks for " + execId + " from TaskSet " + taskSet.id)
-
-    // If some task has preferred locations only on hostname, and there are no more executors there,
-    // put it in the no-prefs list to avoid the wait from delay scheduling
-
-    // host local tasks - should we push this to rack local or no pref list ? For now, preserving behavior and moving to
-    // no prefs list. Note, this was done due to impliations related to 'waiting' for data local tasks, etc.
-    // Note: NOT checking process local list - since host local list is super set of that. We need to ad to no prefs only if
-    // there is no host local node for the task (not if there is no process local node for the task)
-    for (index <- getPendingTasksForHost(Utils.parseHostPort(hostPort)._1)) {
-      // val newLocs = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.RACK_LOCAL)
-      val newLocs = findPreferredLocations(tasks(index).preferredLocations, sched, TaskLocality.NODE_LOCAL)
-      if (newLocs.isEmpty) {
-        pendingTasksWithNoPrefs += index
-      }
-    }
-
-    // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage
-    if (tasks(0).isInstanceOf[ShuffleMapTask]) {
-      for ((tid, info) <- taskInfos if info.executorId == execId) {
-        val index = taskInfos(tid).index
-        if (finished(index)) {
-          finished(index) = false
-          copiesRunning(index) -= 1
-          tasksFinished -= 1
-          addPendingTask(index)
-          // Tell the DAGScheduler that this task was resubmitted so that it doesn't think our
-          // stage finishes when a total of tasks.size tasks finish.
-          sched.listener.taskEnded(tasks(index), Resubmitted, null, null, info, null)
-        }
-      }
-    }
-    // Also re-enqueue any tasks that were running on the node
-    for ((tid, info) <- taskInfos if info.running && info.executorId == execId) {
-      taskLost(tid, TaskState.KILLED, null)
-    }
-  }
-
-  /**
-   * Check for tasks to be speculated and return true if there are any. This is called periodically
-   * by the ClusterScheduler.
-   *
-   * TODO: To make this scale to large jobs, we need to maintain a list of running tasks, so that
-   * we don't scan the whole task set. It might also help to make this sorted by launch time.
-   */
-  override def checkSpeculatableTasks(): Boolean = {
-    // Can't speculate if we only have one task, or if all tasks have finished.
-    if (numTasks == 1 || tasksFinished == numTasks) {
-      return false
-    }
-    var foundTasks = false
-    val minFinishedForSpeculation = (SPECULATION_QUANTILE * numTasks).floor.toInt
-    logDebug("Checking for speculative tasks: minFinished = " + minFinishedForSpeculation)
-    if (tasksFinished >= minFinishedForSpeculation) {
-      val time = System.currentTimeMillis()
-      val durations = taskInfos.values.filter(_.successful).map(_.duration).toArray
-      Arrays.sort(durations)
-      val medianDuration = durations(min((0.5 * numTasks).round.toInt, durations.size - 1))
-      val threshold = max(SPECULATION_MULTIPLIER * medianDuration, 100)
-      // TODO: Threshold should also look at standard deviation of task durations and have a lower
-      // bound based on that.
-      logDebug("Task length threshold for speculation: " + threshold)
-      for ((tid, info) <- taskInfos) {
-        val index = info.index
-        if (!finished(index) && copiesRunning(index) == 1 && info.timeRunning(time) > threshold &&
-          !speculatableTasks.contains(index)) {
-          logInfo(
-            "Marking task %s:%d (on %s) as speculatable because it ran more than %.0f ms".format(
-              taskSet.id, index, info.hostPort, threshold))
-          speculatableTasks += index
-          foundTasks = true
-        }
-      }
-    }
-    return foundTasks
-  }
-
-  override def hasPendingTasks(): Boolean = {
-    numTasks > 0 && tasksFinished < numTasks
-  }
-}
diff --git a/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala b/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala
deleted file mode 100644
index ac9e5ef94d..0000000000
--- a/core/src/main/scala/spark/scheduler/cluster/StandaloneClusterMessage.scala
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.scheduler.cluster
-
-import spark.TaskState.TaskState
-import java.nio.ByteBuffer
-import spark.util.SerializableBuffer
-import spark.Utils
-
-private[spark] sealed trait StandaloneClusterMessage extends Serializable
-
-// Driver to executors
-private[spark]
-case class LaunchTask(task: TaskDescription) extends StandaloneClusterMessage
-
-private[spark]
-case class RegisteredExecutor(sparkProperties: Seq[(String, String)])
-  extends StandaloneClusterMessage
-
-private[spark]
-case class RegisterExecutorFailed(message: String) extends StandaloneClusterMessage
-
-// Executors to driver
-private[spark]
-case class RegisterExecutor(executorId: String, hostPort: String, cores: Int)
-  extends StandaloneClusterMessage {
-  Utils.checkHostPort(hostPort, "Expected host port")
-}
-
-private[spark]
-case class StatusUpdate(executorId: String, taskId: Long, state: TaskState, data: SerializableBuffer)
-  extends StandaloneClusterMessage
-
-private[spark]
-object StatusUpdate {
-  /** Alternate factory method that takes a ByteBuffer directly for the data field */
-  def apply(executorId: String, taskId: Long, state: TaskState, data: ByteBuffer): StatusUpdate = {
-    StatusUpdate(executorId, taskId, state, new SerializableBuffer(data))
-  }
-}
-
-// Internal messages in driver
-private[spark] case object ReviveOffers extends StandaloneClusterMessage
-private[spark] case object StopDriver extends StandaloneClusterMessage
-
-private[spark] case class RemoveExecutor(executorId: String, reason: String)
-  extends StandaloneClusterMessage
diff --git a/core/src/main/scala/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/spark/storage/BlockManagerMessages.scala
deleted file mode 100644
index 01de4ccb8f..0000000000
--- a/core/src/main/scala/spark/storage/BlockManagerMessages.scala
+++ /dev/null
@@ -1,123 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.storage
-
-import java.io.{Externalizable, ObjectInput, ObjectOutput}
-
-import akka.actor.ActorRef
-
-
-//////////////////////////////////////////////////////////////////////////////////
-// Messages from the master to slaves.
-//////////////////////////////////////////////////////////////////////////////////
-private[spark]
-sealed trait ToBlockManagerSlave
-
-// Remove a block from the slaves that have it. This can only be used to remove
-// blocks that the master knows about.
-private[spark]
-case class RemoveBlock(blockId: String) extends ToBlockManagerSlave
-
-// Remove all blocks belonging to a specific RDD.
-private[spark] case class RemoveRdd(rddId: Int) extends ToBlockManagerSlave
-
-
-//////////////////////////////////////////////////////////////////////////////////
-// Messages from slaves to the master.
-//////////////////////////////////////////////////////////////////////////////////
-private[spark]
-sealed trait ToBlockManagerMaster
-
-private[spark]
-case class RegisterBlockManager(
-    blockManagerId: BlockManagerId,
-    maxMemSize: Long,
-    sender: ActorRef)
-  extends ToBlockManagerMaster
-
-private[spark]
-case class HeartBeat(blockManagerId: BlockManagerId) extends ToBlockManagerMaster
-
-private[spark]
-class UpdateBlockInfo(
-    var blockManagerId: BlockManagerId,
-    var blockId: String,
-    var storageLevel: StorageLevel,
-    var memSize: Long,
-    var diskSize: Long)
-  extends ToBlockManagerMaster
-  with Externalizable {
-
-  def this() = this(null, null, null, 0, 0)  // For deserialization only
-
-  override def writeExternal(out: ObjectOutput) {
-    blockManagerId.writeExternal(out)
-    out.writeUTF(blockId)
-    storageLevel.writeExternal(out)
-    out.writeLong(memSize)
-    out.writeLong(diskSize)
-  }
-
-  override def readExternal(in: ObjectInput) {
-    blockManagerId = BlockManagerId(in)
-    blockId = in.readUTF()
-    storageLevel = StorageLevel(in)
-    memSize = in.readLong()
-    diskSize = in.readLong()
-  }
-}
-
-private[spark]
-object UpdateBlockInfo {
-  def apply(blockManagerId: BlockManagerId,
-      blockId: String,
-      storageLevel: StorageLevel,
-      memSize: Long,
-      diskSize: Long): UpdateBlockInfo = {
-    new UpdateBlockInfo(blockManagerId, blockId, storageLevel, memSize, diskSize)
-  }
-
-  // For pattern-matching
-  def unapply(h: UpdateBlockInfo): Option[(BlockManagerId, String, StorageLevel, Long, Long)] = {
-    Some((h.blockManagerId, h.blockId, h.storageLevel, h.memSize, h.diskSize))
-  }
-}
-
-private[spark]
-case class GetLocations(blockId: String) extends ToBlockManagerMaster
-
-private[spark]
-case class GetLocationsMultipleBlockIds(blockIds: Array[String]) extends ToBlockManagerMaster
-
-private[spark]
-case class GetPeers(blockManagerId: BlockManagerId, size: Int) extends ToBlockManagerMaster
-
-private[spark]
-case class RemoveExecutor(execId: String) extends ToBlockManagerMaster
-
-private[spark]
-case object StopBlockManagerMaster extends ToBlockManagerMaster
-
-private[spark]
-case object GetMemoryStatus extends ToBlockManagerMaster
-
-private[spark]
-case object ExpireDeadHosts extends ToBlockManagerMaster
-
-private[spark]
-case object GetStorageStatus extends ToBlockManagerMaster
diff --git a/core/src/main/scala/spark/ui/jobs/IndexPage.scala b/core/src/main/scala/spark/ui/jobs/IndexPage.scala
deleted file mode 100644
index f31af3cda6..0000000000
--- a/core/src/main/scala/spark/ui/jobs/IndexPage.scala
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.ui.jobs
-
-import java.util.Date
-
-import javax.servlet.http.HttpServletRequest
-
-import scala.Some
-import scala.xml.{NodeSeq, Node}
-
-import spark.scheduler.Stage
-import spark.ui.UIUtils._
-import spark.ui.Page._
-import spark.storage.StorageLevel
-
-/** Page showing list of all ongoing and recently finished stages */
-private[spark] class IndexPage(parent: JobProgressUI) {
-  def listener = parent.listener
-  val dateFmt = parent.dateFmt
-
-  def render(request: HttpServletRequest): Seq[Node] = {
-    val activeStages = listener.activeStages.toSeq
-    val completedStages = listener.completedStages.reverse.toSeq
-    val failedStages = listener.failedStages.reverse.toSeq
-
-    /** Special table which merges two header cells. */
-    def stageTable[T](makeRow: T => Seq[Node], rows: Seq[T]): Seq[Node] = {
-      <table class="table table-bordered table-striped table-condensed sortable">
-        <thead>
-          <th>Stage Id</th>
-          <th>Origin</th>
-          <th>Submitted</th>
-          <th>Duration</th>
-          <th colspan="2">Tasks: Complete/Total</th>
-          <th>Shuffle Activity</th>
-          <th>Stored RDD</th>
-        </thead>
-        <tbody>
-          {rows.map(r => makeRow(r))}
-        </tbody>
-      </table>
-    }
-
-    val activeStageTable: NodeSeq = stageTable(stageRow, activeStages)
-    val completedStageTable = stageTable(stageRow, completedStages)
-    val failedStageTable: NodeSeq = stageTable(stageRow, failedStages)
-
-    val content = <h2>Active Stages</h2> ++ activeStageTable ++
-                  <h2>Completed Stages</h2>  ++ completedStageTable ++
-                  <h2>Failed Stages</h2>  ++ failedStageTable
-
-    headerSparkPage(content, parent.sc, "Spark Stages", Jobs)
-  }
-
-  def getElapsedTime(submitted: Option[Long], completed: Long): String = {
-    submitted match {
-      case Some(t) => parent.formatDuration(completed - t)
-      case _ => "Unknown"
-    }
-  }
-
-  def makeProgressBar(completed: Int, total: Int): Seq[Node] = {
-    val width=130
-    val height=15
-    val completeWidth = (completed.toDouble / total) * width
-
-    <svg width={width.toString} height={height.toString}>
-      <rect width={width.toString} height={height.toString}
-            fill="white" stroke="rgb(51,51,51)" stroke-width="1" />
-      <rect width={completeWidth.toString} height={height.toString}
-            fill="rgb(0,136,204)" stroke="black" stroke-width="1" />
-    </svg>
-  }
-
-
-  def stageRow(s: Stage): Seq[Node] = {
-    val submissionTime = s.submissionTime match {
-      case Some(t) => dateFmt.format(new Date(t))
-      case None => "Unknown"
-    }
-    val (read, write) = (listener.hasShuffleRead(s.id), listener.hasShuffleWrite(s.id))
-    val shuffleInfo = (read, write) match {
-      case (true, true) => "Read/Write"
-      case (true, false) => "Read"
-      case (false, true) => "Write"
-      case _ => ""
-    }
-    val completedTasks = listener.stageToTasksComplete.getOrElse(s.id, 0)
-    val totalTasks = s.numPartitions
-
-    <tr>
-      <td>{s.id}</td>
-      <td><a href={"/stages/stage?id=%s".format(s.id)}>{s.name}</a></td>
-      <td>{submissionTime}</td>
-      <td>{getElapsedTime(s.submissionTime,
-             s.completionTime.getOrElse(System.currentTimeMillis()))}</td>
-      <td class="progress-cell">{makeProgressBar(completedTasks, totalTasks)}</td>
-      <td style="border-left: 0; text-align: center;">{completedTasks} / {totalTasks}
-        {listener.stageToTasksFailed.getOrElse(s.id, 0) match {
-        case f if f > 0 => "(%s failed)".format(f)
-        case _ =>
-      }}
-      </td>
-      <td>{shuffleInfo}</td>
-      <td>{if (s.rdd.getStorageLevel != StorageLevel.NONE) {
-             <a href={"/storage/rdd?id=%s".format(s.rdd.id)}>
-               {Option(s.rdd.name).getOrElse(s.rdd.id)}
-             </a>
-          }}
-      </td>
-    </tr>
-  }
-}
diff --git a/core/src/main/scala/spark/ui/jobs/JobProgressUI.scala b/core/src/main/scala/spark/ui/jobs/JobProgressUI.scala
deleted file mode 100644
index 44dcf82d11..0000000000
--- a/core/src/main/scala/spark/ui/jobs/JobProgressUI.scala
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.ui.jobs
-
-import akka.util.Duration
-
-import java.text.SimpleDateFormat
-
-import javax.servlet.http.HttpServletRequest
-
-import org.eclipse.jetty.server.Handler
-
-import scala.Seq
-import scala.collection.mutable.{HashSet, ListBuffer, HashMap, ArrayBuffer}
-
-import spark.ui.JettyUtils._
-import spark.{ExceptionFailure, SparkContext, Success, Utils}
-import spark.scheduler._
-import spark.scheduler.cluster.TaskInfo
-import spark.executor.TaskMetrics
-import collection.mutable
-
-/** Web UI showing progress status of all jobs in the given SparkContext. */
-private[spark] class JobProgressUI(val sc: SparkContext) {
-  private var _listener: Option[JobProgressListener] = None
-  def listener = _listener.get
-  val dateFmt = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss")
-
-  private val indexPage = new IndexPage(this)
-  private val stagePage = new StagePage(this)
-
-  def start() {
-    _listener = Some(new JobProgressListener)
-    sc.addSparkListener(listener)
-  }
-
-  def formatDuration(ms: Long) = Utils.msDurationToString(ms)
-
-  def getHandlers = Seq[(String, Handler)](
-    ("/stages/stage", (request: HttpServletRequest) => stagePage.render(request)),
-    ("/stages", (request: HttpServletRequest) => indexPage.render(request))
-  )
-}
-
-private[spark] class JobProgressListener extends SparkListener {
-  // How many stages to remember
-  val RETAINED_STAGES = System.getProperty("spark.ui.retained_stages", "1000").toInt
-
-  val activeStages = HashSet[Stage]()
-  val completedStages = ListBuffer[Stage]()
-  val failedStages = ListBuffer[Stage]()
-
-  val stageToTasksComplete = HashMap[Int, Int]()
-  val stageToTasksFailed = HashMap[Int, Int]()
-  val stageToTaskInfos =
-    HashMap[Int, ArrayBuffer[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]]()
-
-  override def onJobStart(jobStart: SparkListenerJobStart) {}
-
-  override def onStageCompleted(stageCompleted: StageCompleted) = {
-    val stage = stageCompleted.stageInfo.stage
-    activeStages -= stage
-    completedStages += stage
-    trimIfNecessary(completedStages)
-  }
-
-  /** If stages is too large, remove and garbage collect old stages */
-  def trimIfNecessary(stages: ListBuffer[Stage]) {
-    if (stages.size > RETAINED_STAGES) {
-      val toRemove = RETAINED_STAGES / 10
-      stages.takeRight(toRemove).foreach( s => {
-        stageToTaskInfos.remove(s.id)
-      })
-      stages.trimEnd(toRemove)
-    }
-  }
-
-  override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) =
-    activeStages += stageSubmitted.stage
-
-  override def onTaskEnd(taskEnd: SparkListenerTaskEnd) {
-    val sid = taskEnd.task.stageId
-    val (failureInfo, metrics): (Option[ExceptionFailure], Option[TaskMetrics]) =
-      taskEnd.reason match {
-        case e: ExceptionFailure =>
-          stageToTasksFailed(sid) = stageToTasksFailed.getOrElse(sid, 0) + 1
-          (Some(e), e.metrics)
-        case _ =>
-          stageToTasksComplete(sid) = stageToTasksComplete.getOrElse(sid, 0) + 1
-          (None, Some(taskEnd.taskMetrics))
-      }
-    val taskList = stageToTaskInfos.getOrElse(
-      sid, ArrayBuffer[(TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])]())
-    taskList += ((taskEnd.taskInfo, metrics, failureInfo))
-    stageToTaskInfos(sid) = taskList
-  }
-
-  override def onJobEnd(jobEnd: SparkListenerJobEnd) {
-    jobEnd match {
-      case end: SparkListenerJobEnd =>
-        end.jobResult match {
-          case JobFailed(ex, Some(stage)) =>
-            activeStages -= stage
-            failedStages += stage
-            trimIfNecessary(failedStages)
-          case _ =>
-        }
-      case _ =>
-    }
-  }
-
-  /** Is this stage's input from a shuffle read. */
-  def hasShuffleRead(stageID: Int): Boolean = {
-    // This is written in a slightly complicated way to avoid having to scan all tasks
-    for (s <- stageToTaskInfos.get(stageID).getOrElse(Seq())) {
-      if (s._2 != null) return s._2.flatMap(m => m.shuffleReadMetrics).isDefined
-    }
-    return false // No tasks have finished for this stage
-  }
-
-  /** Is this stage's output to a shuffle write. */
-  def hasShuffleWrite(stageID: Int): Boolean = {
-    // This is written in a slightly complicated way to avoid having to scan all tasks
-    for (s <- stageToTaskInfos.get(stageID).getOrElse(Seq())) {
-      if (s._2 != null) return s._2.flatMap(m => m.shuffleWriteMetrics).isDefined
-    }
-    return false // No tasks have finished for this stage
-  }
-}
diff --git a/core/src/main/scala/spark/ui/jobs/StagePage.scala b/core/src/main/scala/spark/ui/jobs/StagePage.scala
deleted file mode 100644
index 292966f23a..0000000000
--- a/core/src/main/scala/spark/ui/jobs/StagePage.scala
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package spark.ui.jobs
-
-import java.util.Date
-
-import javax.servlet.http.HttpServletRequest
-
-import scala.xml.Node
-
-import spark.ui.UIUtils._
-import spark.ui.Page._
-import spark.util.Distribution
-import spark.{ExceptionFailure, Utils}
-import spark.scheduler.cluster.TaskInfo
-import spark.executor.TaskMetrics
-
-/** Page showing statistics and task list for a given stage */
-private[spark] class StagePage(parent: JobProgressUI) {
-  def listener = parent.listener
-  val dateFmt = parent.dateFmt
-
-  def render(request: HttpServletRequest): Seq[Node] = {
-    val stageId = request.getParameter("id").toInt
-
-    if (!listener.stageToTaskInfos.contains(stageId)) {
-      val content =
-        <div>
-          <h2>Summary Metrics</h2> No tasks have finished yet
-          <h2>Tasks</h2> No tasks have finished yet
-        </div>
-      return headerSparkPage(content, parent.sc, "Stage Details: %s".format(stageId), Jobs)
-    }
-
-    val tasks = listener.stageToTaskInfos(stageId)
-
-    val shuffleRead = listener.hasShuffleRead(stageId)
-    val shuffleWrite = listener.hasShuffleWrite(stageId)
-
-    val taskHeaders: Seq[String] =
-      Seq("Task ID", "Duration", "Locality Level", "Worker", "Launch Time") ++
-        {if (shuffleRead) Seq("Shuffle Read")  else Nil} ++
-        {if (shuffleWrite) Seq("Shuffle Write") else Nil} ++
-      Seq("Details")
-
-    val taskTable = listingTable(taskHeaders, taskRow, tasks)
-
-    // Excludes tasks which failed and have incomplete metrics
-    val validTasks = tasks.filter(t => Option(t._2).isDefined)
-
-    val summaryTable: Option[Seq[Node]] =
-      if (validTasks.size == 0) {
-        None
-      }
-      else {
-        val serviceTimes = validTasks.map{case (info, metrics, exception) =>
-          metrics.get.executorRunTime.toDouble}
-        val serviceQuantiles = "Duration" +: Distribution(serviceTimes).get.getQuantiles().map(
-          ms => parent.formatDuration(ms.toLong))
-
-        def getQuantileCols(data: Seq[Double]) =
-          Distribution(data).get.getQuantiles().map(d => Utils.memoryBytesToString(d.toLong))
-
-        val shuffleReadSizes = validTasks.map {
-          case(info, metrics, exception) =>
-            metrics.get.shuffleReadMetrics.map(_.remoteBytesRead).getOrElse(0L).toDouble
-        }
-        val shuffleReadQuantiles = "Shuffle Read (Remote)" +: getQuantileCols(shuffleReadSizes)
-
-        val shuffleWriteSizes = validTasks.map {
-          case(info, metrics, exception) =>
-            metrics.get.shuffleWriteMetrics.map(_.shuffleBytesWritten).getOrElse(0L).toDouble
-        }
-        val shuffleWriteQuantiles = "Shuffle Write" +: getQuantileCols(shuffleWriteSizes)
-
-        val listings: Seq[Seq[String]] = Seq(serviceQuantiles,
-          if (shuffleRead) shuffleReadQuantiles else Nil,
-          if (shuffleWrite) shuffleWriteQuantiles else Nil)
-
-        val quantileHeaders = Seq("Metric", "Min", "25%", "50%", "75%", "Max")
-        def quantileRow(data: Seq[String]): Seq[Node] = <tr> {data.map(d => <td>{d}</td>)} </tr>
-        Some(listingTable(quantileHeaders, quantileRow, listings))
-      }
-
-    val content =
-      <h2>Summary Metrics</h2> ++ summaryTable.getOrElse(Nil) ++ <h2>Tasks</h2> ++ taskTable;
-
-    headerSparkPage(content, parent.sc, "Stage Details: %s".format(stageId), Jobs)
-  }
-
-
-  def taskRow(taskData: (TaskInfo, Option[TaskMetrics], Option[ExceptionFailure])): Seq[Node] = {
-    def fmtStackTrace(trace: Seq[StackTraceElement]): Seq[Node] =
-      trace.map(e => <span style="display:block;">{e.toString}</span>)
-    val (info, metrics, exception) = taskData
-    <tr>
-      <td>{info.taskId}</td>
-      <td sorttable_customkey={metrics.map{m => m.executorRunTime.toString}.getOrElse("1")}>
-        {metrics.map{m => parent.formatDuration(m.executorRunTime)}.getOrElse("")}
-      </td>
-      <td>{info.taskLocality}</td>
-      <td>{info.hostPort}</td>
-      <td>{dateFmt.format(new Date(info.launchTime))}</td>
-      {metrics.flatMap{m => m.shuffleReadMetrics}.map{s =>
-        <td>{Utils.memoryBytesToString(s.remoteBytesRead)}</td>}.getOrElse("")}
-      {metrics.flatMap{m => m.shuffleWriteMetrics}.map{s =>
-        <td>{Utils.memoryBytesToString(s.shuffleBytesWritten)}</td>}.getOrElse("")}
-      <td>{exception.map(e =>
-        <span>
-          {e.className} ({e.description})<br/>
-          {fmtStackTrace(e.stackTrace)}
-        </span>).getOrElse("")}
-      </td>
-    </tr>
-  }
-}
author	Matei Zaharia <matei@eecs.berkeley.edu>	2013-09-01 14:57:27 -0700
committer	Matei Zaharia <matei@eecs.berkeley.edu>	2013-09-01 14:57:27 -0700
commit	2ce200bf7f7a38afbcacf3303ca2418e49bdbe2a (patch)
tree	586a62e61ad15b5eda60cb13e15ca0c66cb1cc31 /core/src/main/scala
parent	87d586e4da63e6e1875d9cac194c6f11e1cdc653 (diff)
parent	f957c26fa27486c329d82cb66595b2cf07aed0ef (diff)
download	spark-2ce200bf7f7a38afbcacf3303ca2418e49bdbe2a.tar.gz spark-2ce200bf7f7a38afbcacf3303ca2418e49bdbe2a.tar.bz2 spark-2ce200bf7f7a38afbcacf3303ca2418e49bdbe2a.zip