From 81c4d19c612208b932f8579427cb895385336c6e Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Tue, 19 Feb 2013 12:43:13 -0800
Subject: Maven and sbt build changes for SparkGraph.

---
 project/SparkBuild.scala | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'project/SparkBuild.scala')

diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 03b8094f7d..3e383b1300 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -17,7 +17,7 @@ object SparkBuild extends Build {
   //val HADOOP_VERSION = "2.0.0-mr1-cdh4.1.1"
   //val HADOOP_MAJOR_VERSION = "2"
 
-  lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, streaming)
+  lazy val root = Project("root", file("."), settings = rootSettings) aggregate(core, repl, examples, bagel, graph, streaming)
 
   lazy val core = Project("core", file("core"), settings = coreSettings)
 
@@ -27,6 +27,8 @@ object SparkBuild extends Build {
 
   lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn (core)
 
+  lazy val graph = Project("graph", file("graph"), settings = graphSettings) dependsOn (core)
+
   lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn (core)
 
   // A configuration to set an alternative publishLocalConfiguration
@@ -161,6 +163,8 @@ object SparkBuild extends Build {
 
   def bagelSettings = sharedSettings ++ Seq(name := "spark-bagel")
 
+  def graphSettings = sharedSettings ++ Seq(name := "spark-graph")
+
   def streamingSettings = sharedSettings ++ Seq(
     name := "spark-streaming",
     libraryDependencies ++= Seq(
-- 
cgit v1.2.3


From 3728e1bc408a5666ee0bc0af208b296abdc92427 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@cs.berkeley.edu>
Date: Sun, 7 Apr 2013 15:05:46 +0800
Subject: Code to run bagel vs graph experiments.

---
 .../scala/spark/bagel/examples/PageRankUtils.scala |  6 +-
 graph/src/main/scala/spark/graph/Analytics.scala   | 16 ++++-
 graph/src/main/scala/spark/graph/BagelTest.scala   | 71 ++++++++++++++++++++++
 graph/src/main/scala/spark/graph/GraphLab.scala    | 47 +++++++-------
 project/SparkBuild.scala                           |  2 +-
 5 files changed, 112 insertions(+), 30 deletions(-)
 create mode 100644 graph/src/main/scala/spark/graph/BagelTest.scala

(limited to 'project/SparkBuild.scala')

diff --git a/bagel/src/main/scala/spark/bagel/examples/PageRankUtils.scala b/bagel/src/main/scala/spark/bagel/examples/PageRankUtils.scala
index b97d786ed4..df63000150 100644
--- a/bagel/src/main/scala/spark/bagel/examples/PageRankUtils.scala
+++ b/bagel/src/main/scala/spark/bagel/examples/PageRankUtils.scala
@@ -13,16 +13,16 @@ import java.io.{InputStream, OutputStream, DataInputStream, DataOutputStream}
 import com.esotericsoftware.kryo._
 
 class PageRankUtils extends Serializable {
-  def computeWithCombiner(numVertices: Long, epsilon: Double)(
+  def computeWithCombiner(numVertices: Long, epsilon: Double, terminateSteps: Int = 10)(
     self: PRVertex, messageSum: Option[Double], superstep: Int
   ): (PRVertex, Array[PRMessage]) = {
     val newValue = messageSum match {
       case Some(msgSum) if msgSum != 0 =>
-        0.15 / numVertices + 0.85 * msgSum
+        0.15 + 0.85 * msgSum
       case _ => self.value
     }
 
-    val terminate = superstep >= 10
+    val terminate = superstep >= terminateSteps
 
     val outbox: Array[PRMessage] =
       if (!terminate)
diff --git a/graph/src/main/scala/spark/graph/Analytics.scala b/graph/src/main/scala/spark/graph/Analytics.scala
index 8f2844f34f..4a7449ba4f 100644
--- a/graph/src/main/scala/spark/graph/Analytics.scala
+++ b/graph/src/main/scala/spark/graph/Analytics.scala
@@ -10,15 +10,27 @@ object Analytics {
   /**
    * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
    */
+  // def pagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int) = {
+  //   // Compute the out degree of each vertex
+  //   val pagerankGraph = graph.updateVertices[Int, (Int, Float)](graph.outDegrees,
+  //     (vertex, deg) => (deg.getOrElse(0), 1.0F)
+  //   )
+  //   GraphLab.iterateGA[(Int, Float), ED, Float](pagerankGraph)(
+  //     (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather
+  //     (a: Float, b: Float) => a + b, // merge
+  //     (vertex, a: Option[Float]) => (vertex.data._1, (0.15F + 0.85F * a.getOrElse(0F))), // apply
+  //     numIter).mapVertices{ case Vertex(id, (outDeg, r)) => Vertex(id, r) }
+  // }
   def pagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int) = {
     // Compute the out degree of each vertex
     val pagerankGraph = graph.updateVertices[Int, (Int, Float)](graph.outDegrees,
       (vertex, deg) => (deg.getOrElse(0), 1.0F)
     )
-    GraphLab.iterateGA[(Int, Float), ED, Float](pagerankGraph)(
+    GraphLab.iterateGA2[(Int, Float), ED, Float](pagerankGraph)(
       (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather
       (a: Float, b: Float) => a + b, // merge
-      (vertex, a: Option[Float]) => (vertex.data._1, (0.15F + 0.85F * a.getOrElse(0F))), // apply
+      0.0F, // default
+      (vertex, a: Float) => (vertex.data._1, (0.15F + 0.85F * a)), // apply
       numIter).mapVertices{ case Vertex(id, (outDeg, r)) => Vertex(id, r) }
   }
 
diff --git a/graph/src/main/scala/spark/graph/BagelTest.scala b/graph/src/main/scala/spark/graph/BagelTest.scala
new file mode 100644
index 0000000000..eee53bd6f6
--- /dev/null
+++ b/graph/src/main/scala/spark/graph/BagelTest.scala
@@ -0,0 +1,71 @@
+package spark.graph
+
+import spark._
+import spark.SparkContext._
+import spark.bagel.Bagel
+import spark.bagel.examples._
+
+
+object BagelTest {
+
+  def main(args: Array[String]) {
+    val host = args(0)
+    val taskType = args(1)
+    val fname = args(2)
+    val options =  args.drop(3).map { arg =>
+      arg.dropWhile(_ == '-').split('=') match {
+        case Array(opt, v) => (opt -> v)
+        case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
+      }
+    }
+
+    System.setProperty("spark.serializer", "spark.KryoSerializer")
+    //System.setProperty("spark.shuffle.compress", "false")
+    System.setProperty("spark.kryo.registrator", "spark.bagel.examples.PRKryoRegistrator")
+
+    var numIter = Int.MaxValue
+    var isDynamic = false
+    var tol:Float = 0.001F
+    var outFname = ""
+    var numVPart = 4
+    var numEPart = 4
+
+    options.foreach{
+      case ("numIter", v) => numIter = v.toInt
+      case ("dynamic", v) => isDynamic = v.toBoolean
+      case ("tol", v) => tol = v.toFloat
+      case ("output", v) => outFname = v
+      case ("numVPart", v) => numVPart = v.toInt
+      case ("numEPart", v) => numEPart = v.toInt
+      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+    }
+
+    val sc = new SparkContext(host, "PageRank(" + fname + ")")
+    val g = Graph.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache()
+    val startTime = System.currentTimeMillis
+
+    val numVertices = g.vertices.count()
+
+    val vertices = g.collectNeighborIds(EdgeDirection.Out).map { case (vid, neighbors) =>
+      (vid.toString, new PRVertex(1.0, neighbors.map(_.toString)))
+    }
+
+    // Do the computation
+    val epsilon = 0.01 / numVertices
+    val messages = sc.parallelize(Array[(String, PRMessage)]())
+    val utils = new PageRankUtils
+    val result =
+        Bagel.run(
+          sc, vertices, messages, combiner = new PRCombiner(),
+          numPartitions = numVPart)(
+          utils.computeWithCombiner(numVertices, epsilon, numIter))
+
+    println("Total rank: " + result.map{ case (id, r) => r.value }.reduce(_+_) )
+    if (!outFname.isEmpty) {
+      println("Saving pageranks of pages to " + outFname)
+      result.map{ case (id, r) => id + "\t" + r.value }.saveAsTextFile(outFname)
+    }
+    println("Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
+    sc.stop()
+  }
+}
diff --git a/graph/src/main/scala/spark/graph/GraphLab.scala b/graph/src/main/scala/spark/graph/GraphLab.scala
index b0efdadce9..4de453663d 100644
--- a/graph/src/main/scala/spark/graph/GraphLab.scala
+++ b/graph/src/main/scala/spark/graph/GraphLab.scala
@@ -6,30 +6,29 @@ import spark.RDD
 
 object GraphLab {
 
-  // def iterateGA[VD: ClassManifest, ED: ClassManifest, A: ClassManifest](
-  //   rawGraph: Graph[VD, ED])(
-  //   gather: (Vid, EdgeWithVertices[VD, ED]) => A,
-  //   merge: (A, A) => A,
-  //   default: A,
-  //   apply: (Vertex[VD], A) => VD,
-  //   numIter: Int,
-  //   gatherDirection: EdgeDirection.EdgeDirection = EdgeDirection.In) : Graph[VD, ED] = {
-
-  //   var graph = rawGraph.cache()
-
-  //   var i = 0
-  //   while (i < numIter) {
-
-  //     val accUpdates: RDD[(Vid, A)] =
-  //       graph.mapReduceNeighborhood(gather, merge, default, gatherDirection)
-
-  //     def applyFunc(v: Vertex[VD], update: Option[A]): VD = { apply(v, update.get) }
-  //     graph = graph.updateVertices(accUpdates, applyFunc).cache()
-
-  //     i += 1
-  //   }
-  //   graph
-  // }
+  def iterateGA2[VD: ClassManifest, ED: ClassManifest, A: ClassManifest](graph: Graph[VD, ED])(
+    gather: (Vid, EdgeWithVertices[VD, ED]) => A,
+    merge: (A, A) => A,
+    default: A,
+    apply: (Vertex[VD], A) => VD,
+    numIter: Int,
+    gatherDirection: EdgeDirection = EdgeDirection.In) : Graph[VD, ED] = {
+
+    var g = graph.cache()
+
+    var i = 0
+    while (i < numIter) {
+
+      val accUpdates: RDD[(Vid, A)] =
+        g.mapReduceNeighborhood(gather, merge, default, gatherDirection)
+
+      def applyFunc(v: Vertex[VD], update: Option[A]): VD = { apply(v, update.get) }
+      g = g.updateVertices(accUpdates, applyFunc).cache()
+
+      i += 1
+    }
+    g
+  }
 
   def iterateGA[VD: ClassManifest, ED: ClassManifest, A: ClassManifest](graph: Graph[VD, ED])(
     gatherFunc: (Vid, EdgeWithVertices[VD, ED]) => A,
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 6e6c72517a..56610e4385 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -27,7 +27,7 @@ object SparkBuild extends Build {
 
   lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn (core)
 
-  lazy val graph = Project("graph", file("graph"), settings = graphSettings) dependsOn (core)
+  lazy val graph = Project("graph", file("graph"), settings = graphSettings) dependsOn (core, bagel)
 
   lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn (core)
 
-- 
cgit v1.2.3


From 55696e258456798d73325655428899c5b4931730 Mon Sep 17 00:00:00 2001
From: "Joseph E. Gonzalez" <joseph.e.gonzalez@gmail.com>
Date: Tue, 17 Sep 2013 22:42:12 -0700
Subject: GraphX now builds with all merged changes.

---
 .../scala/org/apache/spark/graph/Analytics.scala   | 793 +++++++++++++++++++++
 .../main/scala/org/apache/spark/graph/Edge.scala   |  13 +
 .../org/apache/spark/graph/EdgeDirection.scala     |  32 +
 .../scala/org/apache/spark/graph/EdgeTriplet.scala |  53 ++
 .../main/scala/org/apache/spark/graph/Graph.scala  | 395 ++++++++++
 .../apache/spark/graph/GraphKryoRegistrator.scala  |  24 +
 .../scala/org/apache/spark/graph/GraphLab.scala    | 127 ++++
 .../scala/org/apache/spark/graph/GraphLoader.scala |  54 ++
 .../scala/org/apache/spark/graph/GraphOps.scala    |  30 +
 .../main/scala/org/apache/spark/graph/Pregel.scala |  36 +
 .../main/scala/org/apache/spark/graph/Vertex.scala |  15 +
 .../apache/spark/graph/impl/EdgePartition.scala    |  53 ++
 .../apache/spark/graph/impl/EdgeTripletRDD.scala   |  87 +++
 .../org/apache/spark/graph/impl/GraphImpl.scala    | 441 ++++++++++++
 .../scala/org/apache/spark/graph/package.scala     |  23 +
 .../org/apache/spark/graph/perf/BagelTest.scala    |  76 ++
 .../org/apache/spark/graph/perf/SparkTest.scala    |  75 ++
 .../apache/spark/graph/util/BytecodeUtils.scala    | 112 +++
 .../org/apache/spark/graph/util/HashUtils.scala    |  21 +
 graph/src/main/scala/spark/graph/Analytics.scala   | 793 ---------------------
 graph/src/main/scala/spark/graph/Edge.scala        |  13 -
 .../src/main/scala/spark/graph/EdgeDirection.scala |  32 -
 graph/src/main/scala/spark/graph/EdgeTriplet.scala |  53 --
 graph/src/main/scala/spark/graph/Graph.scala       | 394 ----------
 .../scala/spark/graph/GraphKryoRegistrator.scala   |  24 -
 graph/src/main/scala/spark/graph/GraphLab.scala    | 127 ----
 graph/src/main/scala/spark/graph/GraphLoader.scala |  54 --
 graph/src/main/scala/spark/graph/GraphOps.scala    |  30 -
 graph/src/main/scala/spark/graph/Pregel.scala      |  36 -
 graph/src/main/scala/spark/graph/Vertex.scala      |  15 -
 .../scala/spark/graph/impl/EdgePartition.scala     |  53 --
 .../scala/spark/graph/impl/EdgeTripletRDD.scala    |  81 ---
 .../main/scala/spark/graph/impl/GraphImpl.scala    | 437 ------------
 graph/src/main/scala/spark/graph/package.scala     |  23 -
 .../main/scala/spark/graph/perf/BagelTest.scala    |  72 --
 .../main/scala/spark/graph/perf/SparkTest.scala    |  72 --
 .../scala/spark/graph/util/BytecodeUtils.scala     | 113 ---
 .../main/scala/spark/graph/util/HashUtils.scala    |  21 -
 project/SparkBuild.scala                           |  16 +-
 39 files changed, 2469 insertions(+), 2450 deletions(-)
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/Analytics.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/Edge.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/EdgeDirection.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/EdgeTriplet.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/Graph.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/GraphKryoRegistrator.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/GraphLab.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/GraphLoader.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/GraphOps.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/Pregel.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/Vertex.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/impl/EdgePartition.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/impl/EdgeTripletRDD.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/impl/GraphImpl.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/package.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/perf/BagelTest.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/perf/SparkTest.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/util/BytecodeUtils.scala
 create mode 100644 graph/src/main/scala/org/apache/spark/graph/util/HashUtils.scala
 delete mode 100644 graph/src/main/scala/spark/graph/Analytics.scala
 delete mode 100644 graph/src/main/scala/spark/graph/Edge.scala
 delete mode 100644 graph/src/main/scala/spark/graph/EdgeDirection.scala
 delete mode 100644 graph/src/main/scala/spark/graph/EdgeTriplet.scala
 delete mode 100644 graph/src/main/scala/spark/graph/Graph.scala
 delete mode 100644 graph/src/main/scala/spark/graph/GraphKryoRegistrator.scala
 delete mode 100644 graph/src/main/scala/spark/graph/GraphLab.scala
 delete mode 100644 graph/src/main/scala/spark/graph/GraphLoader.scala
 delete mode 100644 graph/src/main/scala/spark/graph/GraphOps.scala
 delete mode 100644 graph/src/main/scala/spark/graph/Pregel.scala
 delete mode 100644 graph/src/main/scala/spark/graph/Vertex.scala
 delete mode 100644 graph/src/main/scala/spark/graph/impl/EdgePartition.scala
 delete mode 100644 graph/src/main/scala/spark/graph/impl/EdgeTripletRDD.scala
 delete mode 100644 graph/src/main/scala/spark/graph/impl/GraphImpl.scala
 delete mode 100644 graph/src/main/scala/spark/graph/package.scala
 delete mode 100644 graph/src/main/scala/spark/graph/perf/BagelTest.scala
 delete mode 100644 graph/src/main/scala/spark/graph/perf/SparkTest.scala
 delete mode 100644 graph/src/main/scala/spark/graph/util/BytecodeUtils.scala
 delete mode 100644 graph/src/main/scala/spark/graph/util/HashUtils.scala

(limited to 'project/SparkBuild.scala')

diff --git a/graph/src/main/scala/org/apache/spark/graph/Analytics.scala b/graph/src/main/scala/org/apache/spark/graph/Analytics.scala
new file mode 100644
index 0000000000..09cf81eeeb
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/Analytics.scala
@@ -0,0 +1,793 @@
+package org.apache.spark.graph
+
+import org.apache.spark._
+
+
+
+object Analytics extends Logging {
+
+//  def main(args: Array[String]) {
+//    //pregelPagerank()
+//  }
+
+  // /**
+  //  * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
+  //  */
+  // // def pagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int) = {
+  // //   // Compute the out degree of each vertex
+  // //   val pagerankGraph = graph.updateVertices[Int, (Int, Float)](graph.outDegrees,
+  // //     (vertex, deg) => (deg.getOrElse(0), 1.0F)
+  // //   )
+  // //   GraphLab.iterateGA[(Int, Float), ED, Float](pagerankGraph)(
+  // //     (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather
+  // //     (a: Float, b: Float) => a + b, // merge
+  // //     (vertex, a: Option[Float]) => (vertex.data._1, (0.15F + 0.85F * a.getOrElse(0F))), // apply
+  // //     numIter).mapVertices{ case Vertex(id, (outDeg, r)) => Vertex(id, r) }
+  // // }
+  // def pagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int) = {
+  //   // Compute the out degree of each vertex
+  //   val pagerankGraph = graph.updateVertices[Int, (Int, Double)](graph.outDegrees,
+  //     (vertex, deg) => (deg.getOrElse(0), 1.0)
+  //   )
+  //   GraphLab.iterateGA2[(Int, Double), ED, Double](pagerankGraph)(
+  //     (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather
+  //     (a: Double, b: Double) => a + b, // merge
+  //     0.0, // default
+  //     (vertex, a: Double) => (vertex.data._1, (0.15 + 0.85 * a)), // apply
+  //     numIter).mapVertices{ case Vertex(id, (outDeg, r)) => Vertex(id, r) }
+  // }
+
+  /**
+   * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
+   */
+  def pagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
+                                           numIter: Int,
+                                           resetProb: Double = 0.15) = {
+    // Compute the out degree of each vertex
+    val pagerankGraph = graph.leftJoinVertices[Int, (Int, Double)](graph.outDegrees,
+      (vertex, deg) => (deg.getOrElse(0), 1.0)
+    )
+    Pregel.iterate[(Int, Double), ED, Double](pagerankGraph)(
+      (vertex, a: Double) => (vertex.data._1, (resetProb + (1.0 - resetProb) * a)), // apply
+      (me_id, edge) => Some(edge.src.data._2 / edge.src.data._1), // gather
+      (a: Double, b: Double) => a + b, // merge
+      1.0,
+      numIter).mapVertices{ case Vertex(id, (outDeg, r)) => r }
+  }
+
+  /**
+   * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
+   */
+  def dynamicPagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
+                                                  tol: Float,
+                                                  maxIter: Int = Integer.MAX_VALUE,
+                                                  resetProb: Double = 0.15) = {
+    // Compute the out degree of each vertex
+    val pagerankGraph = graph.leftJoinVertices[Int, (Int, Double, Double)](graph.outDegrees,
+      (vertex, degIter) => (degIter.sum, 1.0, 1.0)
+    )
+
+    // Run PageRank
+    GraphLab.iterate(pagerankGraph)(
+      (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather
+      (a: Double, b: Double) => a + b,
+      (vertex, a: Option[Double]) =>
+        (vertex.data._1, (resetProb + (1.0 - resetProb) * a.getOrElse(0.0)), vertex.data._2), // apply
+      (me_id, edge) => math.abs(edge.src.data._3 - edge.src.data._2) > tol, // scatter
+      maxIter).mapVertices { case Vertex(vid, data) => data._2 }
+  }
+
+
+  /**
+   * Compute the connected component membership of each vertex
+   * and return an RDD with the vertex value containing the
+   * lowest vertex id in the connected component containing
+   * that vertex.
+   */
+  def connectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED]) = {
+    val ccGraph = graph.mapVertices { case Vertex(vid, _) => vid }
+
+    GraphLab.iterate(ccGraph)(
+      (me_id, edge) => edge.otherVertex(me_id).data, // gather
+      (a: Vid, b: Vid) => math.min(a, b), // merge
+      (v, a: Option[Vid]) => math.min(v.data, a.getOrElse(Long.MaxValue)), // apply
+      (me_id, edge) => (edge.vertex(me_id).data < edge.otherVertex(me_id).data), // scatter
+      gatherDirection = EdgeDirection.Both, scatterDirection = EdgeDirection.Both
+    )
+  }
+
+  //   /**
+  //    * Compute the shortest path to a set of markers
+  //    */
+  //   def shortestPath[VD: Manifest](graph: Graph[VD, Float], sources: List[Int], numIter: Int) = {
+  //     val sourceSet = sources.toSet
+  //     val spGraph = graph.mapVertices {
+  //       case Vertex(vid, _) => Vertex(vid, (if(sourceSet.contains(vid)) 0.0F else Float.MaxValue))
+  //     }
+  //     GraphLab.iterateGA[Float, Float, Float](spGraph)(
+  //       (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
+  //       (a: Float, b: Float) => math.min(a, b), // merge
+  //       (v, a: Option[Float]) => math.min(v.data, a.getOrElse(Float.MaxValue)), // apply
+  //       numIter,
+  //       gatherDirection = EdgeDirection.In)
+  //   }
+
+  //   // /**
+  //   //  * Compute the connected component membership of each vertex
+  //   //  * and return an RDD with the vertex value containing the
+  //   //  * lowest vertex id in the connected component containing
+  //   //  * that vertex.
+  //   //  */
+  //   // def dynamicConnectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
+  //   //   numIter: Int = Int.MaxValue) = {
+
+  //   //   val vertices = graph.vertices.mapPartitions(iter => iter.map { case (vid, _) => (vid, vid) })
+  //   //   val edges = graph.edges // .mapValues(v => None)
+  //   //   val ccGraph = new Graph(vertices, edges)
+
+  //   //   ccGraph.iterateDynamic(
+  //   //     (me_id, edge) => edge.otherVertex(me_id).data, // gather
+  //   //     (a: Int, b: Int) => math.min(a, b), // merge
+  //   //     Integer.MAX_VALUE,
+  //   //     (v, a: Int) => math.min(v.data, a), // apply
+  //   //     (me_id, edge) => edge.otherVertex(me_id).data > edge.vertex(me_id).data, // scatter
+  //   //     numIter,
+  //   //     gatherEdges = EdgeDirection.Both,
+  //   //     scatterEdges = EdgeDirection.Both).vertices
+  //   //   //
+  //   //   //    graph_ret.vertices.collect.foreach(println)
+  //   //   //    graph_ret.edges.take(10).foreach(println)
+  //   // }
+
+
+  //   // /**
+  //   //  * Compute the shortest path to a set of markers
+  //   //  */
+  //   //  def dynamicShortestPath[VD: Manifest, ED: Manifest](graph: Graph[VD, Float],
+  //   //   sources: List[Int], numIter: Int) = {
+  //   //   val sourceSet = sources.toSet
+  //   //   val vertices = graph.vertices.mapPartitions(
+  //   //     iter => iter.map {
+  //   //       case (vid, _) => (vid, (if(sourceSet.contains(vid)) 0.0F else Float.MaxValue) )
+  //   //       });
+
+  //   //   val edges = graph.edges // .mapValues(v => None)
+  //   //   val spGraph = new Graph(vertices, edges)
+
+  //   //   val niterations = Int.MaxValue
+  //   //   spGraph.iterateDynamic(
+  //   //     (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
+  //   //     (a: Float, b: Float) => math.min(a, b), // merge
+  //   //     Float.MaxValue,
+  //   //     (v, a: Float) => math.min(v.data, a), // apply
+  //   //     (me_id, edge) => edge.vertex(me_id).data + edge.data < edge.otherVertex(me_id).data, // scatter
+  //   //     numIter,
+  //   //     gatherEdges = EdgeDirection.In,
+  //   //     scatterEdges = EdgeDirection.Out).vertices
+  //   // }
+
+
+  //   // /**
+  //   //  *
+  //   //  */
+  //   // def alternatingLeastSquares[VD: ClassManifest, ED: ClassManifest](graph: Graph[VD, Double],
+  //   //   latentK: Int, lambda: Double, numIter: Int) = {
+  //   //   val vertices = graph.vertices.mapPartitions( _.map {
+  //   //       case (vid, _) => (vid,  Array.fill(latentK){ scala.util.Random.nextDouble() } )
+  //   //       }).cache
+  //   //   val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
+  //   //   val edges = graph.edges // .mapValues(v => None)
+  //   //   val alsGraph = new Graph(vertices, edges)
+  //   //   alsGraph.numVPart = graph.numVPart
+  //   //   alsGraph.numEPart = graph.numEPart
+
+  //   //   val niterations = Int.MaxValue
+  //   //   alsGraph.iterateDynamic[(Array[Double], Array[Double])](
+  //   //     (me_id, edge) => { // gather
+  //   //       val X = edge.otherVertex(me_id).data
+  //   //       val y = edge.data
+  //   //       val Xy = X.map(_ * y)
+  //   //       val XtX = (for(i <- 0 until latentK; j <- i until latentK) yield(X(i) * X(j))).toArray
+  //   //       (Xy, XtX)
+  //   //     },
+  //   //     (a, b) => {
+  //   //     // The difference between the while loop and the zip is a FACTOR OF TWO in overall
+  //   //     //  runtime
+  //   //       var i = 0
+  //   //       while(i < a._1.length) { a._1(i) += b._1(i); i += 1 }
+  //   //       i = 0
+  //   //       while(i < a._2.length) { a._2(i) += b._2(i); i += 1 }
+  //   //       a
+  //   //       // (a._1.zip(b._1).map{ case (q,r) => q+r }, a._2.zip(b._2).map{ case (q,r) => q+r })
+  //   //     },
+  //   //     (Array.empty[Double], Array.empty[Double]), // default value is empty
+  //   //     (vertex, accum) => { // apply
+  //   //       val XyArray  = accum._1
+  //   //       val XtXArray = accum._2
+  //   //       if(XyArray.isEmpty) vertex.data // no neighbors
+  //   //       else {
+  //   //         val XtX = DenseMatrix.tabulate(latentK,latentK){ (i,j) =>
+  //   //           (if(i < j) XtXArray(i + (j+1)*j/2) else XtXArray(i + (j+1)*j/2)) +
+  //   //           (if(i == j) lambda else 1.0F) //regularization
+  //   //         }
+  //   //         val Xy = DenseMatrix.create(latentK,1,XyArray)
+  //   //         val w = XtX \ Xy
+  //   //         w.data
+  //   //       }
+  //   //     },
+  //   //     (me_id, edge) => true,
+  //   //     numIter,
+  //   //     gatherEdges = EdgeDirection.Both,
+  //   //     scatterEdges = EdgeDirection.Both,
+  //   //     vertex => vertex.id < maxUser).vertices
+  //   // }
+
+  //   def main(args: Array[String]) = {
+  //     val host = args(0)
+  //     val taskType = args(1)
+  //     val fname = args(2)
+  //     val options =  args.drop(3).map { arg =>
+  //       arg.dropWhile(_ == '-').split('=') match {
+  //         case Array(opt, v) => (opt -> v)
+  //         case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
+  //       }
+  //     }
+
+  //     System.setProperty("spark.serializer", "spark.KryoSerializer")
+  //     //System.setProperty("spark.shuffle.compress", "false")
+  //     System.setProperty("spark.kryo.registrator", "spark.graph.GraphKryoRegistrator")
+
+  //     taskType match {
+  //       case "pagerank" => {
+
+  //         var numIter = Int.MaxValue
+  //         var isDynamic = false
+  //         var tol:Float = 0.001F
+  //         var outFname = ""
+  //         var numVPart = 4
+  //         var numEPart = 4
+
+  //         options.foreach{
+  //           case ("numIter", v) => numIter = v.toInt
+  //           case ("dynamic", v) => isDynamic = v.toBoolean
+  //           case ("tol", v) => tol = v.toFloat
+  //           case ("output", v) => outFname = v
+  //           case ("numVPart", v) => numVPart = v.toInt
+  //           case ("numEPart", v) => numEPart = v.toInt
+  //           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+  //         }
+
+  //         if(!isDynamic && numIter == Int.MaxValue) {
+  //           println("Set number of iterations!")
+  //           sys.exit(1)
+  //         }
+  //         println("======================================")
+  //         println("|             PageRank               |")
+  //         println("--------------------------------------")
+  //         println(" Using parameters:")
+  //         println(" \tDynamic:  " + isDynamic)
+  //         if(isDynamic) println(" \t  |-> Tolerance: " + tol)
+  //         println(" \tNumIter:  " + numIter)
+  //         println("======================================")
+
+  //         val sc = new SparkContext(host, "PageRank(" + fname + ")")
+
+  //         val graph = Graph.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache()
+
+  //         val startTime = System.currentTimeMillis
+  //         logInfo("GRAPHX: starting tasks")
+  //         logInfo("GRAPHX: Number of vertices " + graph.vertices.count)
+  //         logInfo("GRAPHX: Number of edges " + graph.edges.count)
+
+  //         val pr = Analytics.pagerank(graph, numIter)
+  //         // val pr = if(isDynamic) Analytics.dynamicPagerank(graph, tol, numIter)
+  //         //   else  Analytics.pagerank(graph, numIter)
+  //         logInfo("GRAPHX: Total rank: " + pr.vertices.map{ case Vertex(id,r) => r }.reduce(_+_) )
+  //         if (!outFname.isEmpty) {
+  //           println("Saving pageranks of pages to " + outFname)
+  //           pr.vertices.map{case Vertex(id, r) => id + "\t" + r}.saveAsTextFile(outFname)
+  //         }
+  //         logInfo("GRAPHX: Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
+  //         sc.stop()
+  //       }
+
+  //      case "cc" => {
+
+  //         var numIter = Int.MaxValue
+  //         var isDynamic = false
+
+  //         options.foreach{
+  //           case ("numIter", v) => numIter = v.toInt
+  //           case ("dynamic", v) => isDynamic = v.toBoolean
+  //           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+  //         }
+
+  //         if(!isDynamic && numIter == Int.MaxValue) {
+  //           println("Set number of iterations!")
+  //           sys.exit(1)
+  //         }
+  //         println("======================================")
+  //         println("|      Connected Components          |")
+  //         println("--------------------------------------")
+  //         println(" Using parameters:")
+  //         println(" \tDynamic:  " + isDynamic)
+  //         println(" \tNumIter:  " + numIter)
+  //         println("======================================")
+
+  //         val sc = new SparkContext(host, "ConnectedComponents(" + fname + ")")
+  //         val graph = Graph.textFile(sc, fname, a => 1.0F)
+  //         val cc = Analytics.connectedComponents(graph, numIter)
+  //         // val cc = if(isDynamic) Analytics.dynamicConnectedComponents(graph, numIter)
+  //         //   else  Analytics.connectedComponents(graph, numIter)
+  //         println("Components: " + cc.vertices.map(_.data).distinct())
+
+  //         sc.stop()
+  //       }
+
+  //      case "shortestpath" => {
+
+  //         var numIter = Int.MaxValue
+  //         var isDynamic = true
+  //         var sources: List[Int] = List.empty
+
+  //         options.foreach{
+  //           case ("numIter", v) => numIter = v.toInt
+  //           case ("dynamic", v) => isDynamic = v.toBoolean
+  //           case ("source", v) => sources ++= List(v.toInt)
+  //           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+  //         }
+
+
+  //         if(!isDynamic && numIter == Int.MaxValue) {
+  //           println("Set number of iterations!")
+  //           sys.exit(1)
+  //         }
+
+  //         if(sources.isEmpty) {
+  //           println("No sources provided!")
+  //           sys.exit(1)
+  //         }
+
+  //         println("======================================")
+  //         println("|          Shortest Path             |")
+  //         println("--------------------------------------")
+  //         println(" Using parameters:")
+  //         println(" \tDynamic:  " + isDynamic)
+  //         println(" \tNumIter:  " + numIter)
+  //         println(" \tSources:  [" + sources.mkString(", ") + "]")
+  //         println("======================================")
+
+  //         val sc = new SparkContext(host, "ShortestPath(" + fname + ")")
+  //         val graph = Graph.textFile(sc, fname, a => (if(a.isEmpty) 1.0F else a(0).toFloat ) )
+  //         val sp = Analytics.shortestPath(graph, sources, numIter)
+  //         // val cc = if(isDynamic) Analytics.dynamicShortestPath(graph, sources, numIter)
+  //         //   else  Analytics.shortestPath(graph, sources, numIter)
+  //         println("Longest Path: " + sp.vertices.map(_.data).reduce(math.max(_,_)))
+
+  //         sc.stop()
+  //       }
+
+
+  //      //  case "als" => {
+
+  //      //    var numIter = 5
+  //      //    var lambda = 0.01
+  //      //    var latentK = 10
+  //      //    var usersFname = "usersFactors.tsv"
+  //      //    var moviesFname = "moviesFname.tsv"
+  //      //    var numVPart = 4
+  //      //    var numEPart = 4
+
+  //      //    options.foreach{
+  //      //      case ("numIter", v) => numIter = v.toInt
+  //      //      case ("lambda", v) => lambda = v.toDouble
+  //      //      case ("latentK", v) => latentK = v.toInt
+  //      //      case ("usersFname", v) => usersFname = v
+  //      //      case ("moviesFname", v) => moviesFname = v
+  //      //      case ("numVPart", v) => numVPart = v.toInt
+  //      //      case ("numEPart", v) => numEPart = v.toInt
+  //      //      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+  //      //    }
+
+  //      //    println("======================================")
+  //      //    println("|       Alternating Least Squares    |")
+  //      //    println("--------------------------------------")
+  //      //    println(" Using parameters:")
+  //      //    println(" \tNumIter:     " + numIter)
+  //      //    println(" \tLambda:      " + lambda)
+  //      //    println(" \tLatentK:     " + latentK)
+  //      //    println(" \tusersFname:  " + usersFname)
+  //      //    println(" \tmoviesFname: " + moviesFname)
+  //      //    println("======================================")
+
+  //      //    val sc = new SparkContext(host, "ALS(" + fname + ")")
+  //      //    val graph = Graph.textFile(sc, fname, a => a(0).toDouble )
+  //      //    graph.numVPart = numVPart
+  //      //    graph.numEPart = numEPart
+
+  //      //    val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
+  //      //    val minMovie = graph.edges.map(_._2).reduce(math.min(_,_))
+  //      //    assert(maxUser < minMovie)
+
+  //      //    val factors = Analytics.alternatingLeastSquares(graph, latentK, lambda, numIter).cache
+  //      //    factors.filter(_._1 <= maxUser).map(r => r._1 + "\t" + r._2.mkString("\t"))
+  //      //      .saveAsTextFile(usersFname)
+  //      //    factors.filter(_._1 >= minMovie).map(r => r._1 + "\t" + r._2.mkString("\t"))
+  //      //      .saveAsTextFile(moviesFname)
+
+  //      //    sc.stop()
+  //      //  }
+
+
+  //       case _ => {
+  //         println("Invalid task type.")
+  //       }
+  //     }
+  //   }
+
+  // /**
+  //  * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
+  //  */
+  // def dynamicPagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
+  //   tol: Double, maxIter: Int = 10) = {
+  //   // Compute the out degree of each vertex
+  //   val pagerankGraph = graph.updateVertices[Int, (Int, Double, Double)](graph.outDegrees,
+  //     (vertex, degIter) => (degIter.sum, 1.0, 1.0)
+  //   )
+
+  //   // Run PageRank
+  //   GraphLab.iterateGAS(pagerankGraph)(
+  //     (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather
+  //     (a: Double, b: Double) => a + b,
+  //     (vertex, a: Option[Double]) =>
+  //       (vertex.data._1, (0.15 + 0.85 * a.getOrElse(0.0)), vertex.data._2), // apply
+  //     (me_id, edge) => math.abs(edge.src.data._2 - edge.dst.data._1) > tol, // scatter
+  //     maxIter).mapVertices { case Vertex(vid, data) => Vertex(vid, data._2) }
+  // }
+
+  // /**
+  //  * Compute the connected component membership of each vertex
+  //  * and return an RDD with the vertex value containing the
+  //  * lowest vertex id in the connected component containing
+  //  * that vertex.
+  //  */
+  // def connectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int) = {
+  //   val ccGraph = graph.mapVertices { case Vertex(vid, _) => Vertex(vid, vid) }
+  //   GraphLab.iterateGA[Int, ED, Int](ccGraph)(
+  //     (me_id, edge) => edge.otherVertex(me_id).data, // gather
+  //     (a: Int, b: Int) => math.min(a, b), // merge
+  //     (v, a: Option[Int]) => math.min(v.data, a.getOrElse(Integer.MAX_VALUE)), // apply
+  //     numIter,
+  //     gatherDirection = EdgeDirection.Both)
+  // }
+
+  // /**
+  //  * Compute the shortest path to a set of markers
+  //  */
+  // def shortestPath[VD: Manifest](graph: Graph[VD, Double], sources: List[Int], numIter: Int) = {
+  //   val sourceSet = sources.toSet
+  //   val spGraph = graph.mapVertices {
+  //     case Vertex(vid, _) => Vertex(vid, (if(sourceSet.contains(vid)) 0.0 else Double.MaxValue))
+  //   }
+  //   GraphLab.iterateGA[Double, Double, Double](spGraph)(
+  //     (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
+  //     (a: Double, b: Double) => math.min(a, b), // merge
+  //     (v, a: Option[Double]) => math.min(v.data, a.getOrElse(Double.MaxValue)), // apply
+  //     numIter,
+  //     gatherDirection = EdgeDirection.In)
+  // }
+
+  // /**
+  //  * Compute the connected component membership of each vertex
+  //  * and return an RDD with the vertex value containing the
+  //  * lowest vertex id in the connected component containing
+  //  * that vertex.
+  //  */
+  // def dynamicConnectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
+  //   numIter: Int = Int.MaxValue) = {
+
+  //   val vertices = graph.vertices.mapPartitions(iter => iter.map { case (vid, _) => (vid, vid) })
+  //   val edges = graph.edges // .mapValues(v => None)
+  //   val ccGraph = new Graph(vertices, edges)
+
+  //   ccGraph.iterateDynamic(
+  //     (me_id, edge) => edge.otherVertex(me_id).data, // gather
+  //     (a: Int, b: Int) => math.min(a, b), // merge
+  //     Integer.MAX_VALUE,
+  //     (v, a: Int) => math.min(v.data, a), // apply
+  //     (me_id, edge) => edge.otherVertex(me_id).data > edge.vertex(me_id).data, // scatter
+  //     numIter,
+  //     gatherEdges = EdgeDirection.Both,
+  //     scatterEdges = EdgeDirection.Both).vertices
+  //   //
+  //   //    graph_ret.vertices.collect.foreach(println)
+  //   //    graph_ret.edges.take(10).foreach(println)
+  // }
+
+
+  // /**
+  //  * Compute the shortest path to a set of markers
+  //  */
+  //  def dynamicShortestPath[VD: Manifest, ED: Manifest](graph: Graph[VD, Double],
+  //   sources: List[Int], numIter: Int) = {
+  //   val sourceSet = sources.toSet
+  //   val vertices = graph.vertices.mapPartitions(
+  //     iter => iter.map {
+  //       case (vid, _) => (vid, (if(sourceSet.contains(vid)) 0.0F else Double.MaxValue) )
+  //       });
+
+  //   val edges = graph.edges // .mapValues(v => None)
+  //   val spGraph = new Graph(vertices, edges)
+
+  //   val niterations = Int.MaxValue
+  //   spGraph.iterateDynamic(
+  //     (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
+  //     (a: Double, b: Double) => math.min(a, b), // merge
+  //     Double.MaxValue,
+  //     (v, a: Double) => math.min(v.data, a), // apply
+  //     (me_id, edge) => edge.vertex(me_id).data + edge.data < edge.otherVertex(me_id).data, // scatter
+  //     numIter,
+  //     gatherEdges = EdgeDirection.In,
+  //     scatterEdges = EdgeDirection.Out).vertices
+  // }
+
+
+  // /**
+  //  *
+  //  */
+  // def alternatingLeastSquares[VD: ClassManifest, ED: ClassManifest](graph: Graph[VD, Double],
+  //   latentK: Int, lambda: Double, numIter: Int) = {
+  //   val vertices = graph.vertices.mapPartitions( _.map {
+  //       case (vid, _) => (vid,  Array.fill(latentK){ scala.util.Random.nextDouble() } )
+  //       }).cache
+  //   val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
+  //   val edges = graph.edges // .mapValues(v => None)
+  //   val alsGraph = new Graph(vertices, edges)
+  //   alsGraph.numVPart = graph.numVPart
+  //   alsGraph.numEPart = graph.numEPart
+
+  //   val niterations = Int.MaxValue
+  //   alsGraph.iterateDynamic[(Array[Double], Array[Double])](
+  //     (me_id, edge) => { // gather
+  //       val X = edge.otherVertex(me_id).data
+  //       val y = edge.data
+  //       val Xy = X.map(_ * y)
+  //       val XtX = (for(i <- 0 until latentK; j <- i until latentK) yield(X(i) * X(j))).toArray
+  //       (Xy, XtX)
+  //     },
+  //     (a, b) => {
+  //     // The difference between the while loop and the zip is a FACTOR OF TWO in overall
+  //     //  runtime
+  //       var i = 0
+  //       while(i < a._1.length) { a._1(i) += b._1(i); i += 1 }
+  //       i = 0
+  //       while(i < a._2.length) { a._2(i) += b._2(i); i += 1 }
+  //       a
+  //       // (a._1.zip(b._1).map{ case (q,r) => q+r }, a._2.zip(b._2).map{ case (q,r) => q+r })
+  //     },
+  //     (Array.empty[Double], Array.empty[Double]), // default value is empty
+  //     (vertex, accum) => { // apply
+  //       val XyArray  = accum._1
+  //       val XtXArray = accum._2
+  //       if(XyArray.isEmpty) vertex.data // no neighbors
+  //       else {
+  //         val XtX = DenseMatrix.tabulate(latentK,latentK){ (i,j) =>
+  //           (if(i < j) XtXArray(i + (j+1)*j/2) else XtXArray(i + (j+1)*j/2)) +
+  //           (if(i == j) lambda else 1.0F) //regularization
+  //         }
+  //         val Xy = DenseMatrix.create(latentK,1,XyArray)
+  //         val w = XtX \ Xy
+  //         w.data
+  //       }
+  //     },
+  //     (me_id, edge) => true,
+  //     numIter,
+  //     gatherEdges = EdgeDirection.Both,
+  //     scatterEdges = EdgeDirection.Both,
+  //     vertex => vertex.id < maxUser).vertices
+  // }
+
+  // def main(args: Array[String]) = {
+  //   val host = args(0)
+  //   val taskType = args(1)
+  //   val fname = args(2)
+  //   val options =  args.drop(3).map { arg =>
+  //     arg.dropWhile(_ == '-').split('=') match {
+  //       case Array(opt, v) => (opt -> v)
+  //       case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
+  //     }
+  //   }
+
+  //   System.setProperty("spark.serializer", "spark.KryoSerializer")
+  //   //System.setProperty("spark.shuffle.compress", "false")
+  //   System.setProperty("spark.kryo.registrator", "spark.graph.GraphKryoRegistrator")
+
+  //   taskType match {
+  //     case "pagerank" => {
+
+  //       var numIter = Int.MaxValue
+  //       var isDynamic = false
+  //       var tol:Double = 0.001
+  //       var outFname = ""
+  //       var numVPart = 4
+  //       var numEPart = 4
+
+  //       options.foreach{
+  //         case ("numIter", v) => numIter = v.toInt
+  //         case ("dynamic", v) => isDynamic = v.toBoolean
+  //         case ("tol", v) => tol = v.toDouble
+  //         case ("output", v) => outFname = v
+  //         case ("numVPart", v) => numVPart = v.toInt
+  //         case ("numEPart", v) => numEPart = v.toInt
+  //         case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+  //       }
+
+  //       if(!isDynamic && numIter == Int.MaxValue) {
+  //         println("Set number of iterations!")
+  //         sys.exit(1)
+  //       }
+  //       println("======================================")
+  //       println("|             PageRank               |")
+  //       println("--------------------------------------")
+  //       println(" Using parameters:")
+  //       println(" \tDynamic:  " + isDynamic)
+  //       if(isDynamic) println(" \t  |-> Tolerance: " + tol)
+  //       println(" \tNumIter:  " + numIter)
+  //       println("======================================")
+
+  //       val sc = new SparkContext(host, "PageRank(" + fname + ")")
+
+  //       val graph = Graph.textFile(sc, fname, a => 1.0).withPartitioner(numVPart, numEPart).cache()
+
+  //       val startTime = System.currentTimeMillis
+  //       logInfo("GRAPHX: starting tasks")
+  //       logInfo("GRAPHX: Number of vertices " + graph.vertices.count)
+  //       logInfo("GRAPHX: Number of edges " + graph.edges.count)
+
+  //       val pr = Analytics.pagerank(graph, numIter)
+  //       // val pr = if(isDynamic) Analytics.dynamicPagerank(graph, tol, numIter)
+  //       //   else  Analytics.pagerank(graph, numIter)
+  //       logInfo("GRAPHX: Total rank: " + pr.vertices.map{ case Vertex(id,r) => r }.reduce(_+_) )
+  //       if (!outFname.isEmpty) {
+  //         println("Saving pageranks of pages to " + outFname)
+  //         pr.vertices.map{case Vertex(id, r) => id + "\t" + r}.saveAsTextFile(outFname)
+  //       }
+  //       logInfo("GRAPHX: Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
+  //       sc.stop()
+  //     }
+
+  //    case "cc" => {
+
+  //       var numIter = Int.MaxValue
+  //       var isDynamic = false
+
+  //       options.foreach{
+  //         case ("numIter", v) => numIter = v.toInt
+  //         case ("dynamic", v) => isDynamic = v.toBoolean
+  //         case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+  //       }
+
+  //       if(!isDynamic && numIter == Int.MaxValue) {
+  //         println("Set number of iterations!")
+  //         sys.exit(1)
+  //       }
+  //       println("======================================")
+  //       println("|      Connected Components          |")
+  //       println("--------------------------------------")
+  //       println(" Using parameters:")
+  //       println(" \tDynamic:  " + isDynamic)
+  //       println(" \tNumIter:  " + numIter)
+  //       println("======================================")
+
+  //       val sc = new SparkContext(host, "ConnectedComponents(" + fname + ")")
+  //       val graph = Graph.textFile(sc, fname, a => 1.0)
+  //       val cc = Analytics.connectedComponents(graph, numIter)
+  //       // val cc = if(isDynamic) Analytics.dynamicConnectedComponents(graph, numIter)
+  //       //   else  Analytics.connectedComponents(graph, numIter)
+  //       println("Components: " + cc.vertices.map(_.data).distinct())
+
+  //       sc.stop()
+  //     }
+
+  //    case "shortestpath" => {
+
+  //       var numIter = Int.MaxValue
+  //       var isDynamic = true
+  //       var sources: List[Int] = List.empty
+
+  //       options.foreach{
+  //         case ("numIter", v) => numIter = v.toInt
+  //         case ("dynamic", v) => isDynamic = v.toBoolean
+  //         case ("source", v) => sources ++= List(v.toInt)
+  //         case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+  //       }
+
+
+  //       if(!isDynamic && numIter == Int.MaxValue) {
+  //         println("Set number of iterations!")
+  //         sys.exit(1)
+  //       }
+
+  //       if(sources.isEmpty) {
+  //         println("No sources provided!")
+  //         sys.exit(1)
+  //       }
+
+  //       println("======================================")
+  //       println("|          Shortest Path             |")
+  //       println("--------------------------------------")
+  //       println(" Using parameters:")
+  //       println(" \tDynamic:  " + isDynamic)
+  //       println(" \tNumIter:  " + numIter)
+  //       println(" \tSources:  [" + sources.mkString(", ") + "]")
+  //       println("======================================")
+
+  //       val sc = new SparkContext(host, "ShortestPath(" + fname + ")")
+  //       val graph = Graph.textFile(sc, fname, a => (if(a.isEmpty) 1.0 else a(0).toDouble ) )
+  //       val sp = Analytics.shortestPath(graph, sources, numIter)
+  //       // val cc = if(isDynamic) Analytics.dynamicShortestPath(graph, sources, numIter)
+  //       //   else  Analytics.shortestPath(graph, sources, numIter)
+  //       println("Longest Path: " + sp.vertices.map(_.data).reduce(math.max(_,_)))
+
+  //       sc.stop()
+  //     }
+
+
+  //  case "als" => {
+
+  //    var numIter = 5
+  //    var lambda = 0.01
+  //    var latentK = 10
+  //    var usersFname = "usersFactors.tsv"
+  //    var moviesFname = "moviesFname.tsv"
+  //    var numVPart = 4
+  //    var numEPart = 4
+
+  //    options.foreach{
+  //      case ("numIter", v) => numIter = v.toInt
+  //      case ("lambda", v) => lambda = v.toDouble
+  //      case ("latentK", v) => latentK = v.toInt
+  //      case ("usersFname", v) => usersFname = v
+  //      case ("moviesFname", v) => moviesFname = v
+  //      case ("numVPart", v) => numVPart = v.toInt
+  //      case ("numEPart", v) => numEPart = v.toInt
+  //      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+  //    }
+
+  //    println("======================================")
+  //    println("|       Alternating Least Squares    |")
+  //    println("--------------------------------------")
+  //    println(" Using parameters:")
+  //    println(" \tNumIter:     " + numIter)
+  //    println(" \tLambda:      " + lambda)
+  //    println(" \tLatentK:     " + latentK)
+  //    println(" \tusersFname:  " + usersFname)
+  //    println(" \tmoviesFname: " + moviesFname)
+  //    println("======================================")
+
+  //    val sc = new SparkContext(host, "ALS(" + fname + ")")
+  //    val graph = Graph.textFile(sc, fname, a => a(0).toDouble )
+  //    graph.numVPart = numVPart
+  //    graph.numEPart = numEPart
+
+  //    val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
+  //    val minMovie = graph.edges.map(_._2).reduce(math.min(_,_))
+  //    assert(maxUser < minMovie)
+
+  //    val factors = Analytics.alternatingLeastSquares(graph, latentK, lambda, numIter).cache
+  //    factors.filter(_._1 <= maxUser).map(r => r._1 + "\t" + r._2.mkString("\t"))
+  //      .saveAsTextFile(usersFname)
+  //    factors.filter(_._1 >= minMovie).map(r => r._1 + "\t" + r._2.mkString("\t"))
+  //      .saveAsTextFile(moviesFname)
+
+  //    sc.stop()
+  //  }
+
+
+  //     case _ => {
+  //       println("Invalid task type.")
+  //     }
+  //   }
+  // }
+
+}
diff --git a/graph/src/main/scala/org/apache/spark/graph/Edge.scala b/graph/src/main/scala/org/apache/spark/graph/Edge.scala
new file mode 100644
index 0000000000..20539b8af0
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/Edge.scala
@@ -0,0 +1,13 @@
+package org.apache.spark.graph
+
+
+/**
+ * A single directed edge consisting of a source id, target id,
+ * and the data associated with the Edgee.
+ *
+ * @tparam ED type of the edge attribute
+ */
+case class Edge[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED] (
+  var src: Vid = 0,
+  var dst: Vid = 0,
+  var data: ED = nullValue[ED])
diff --git a/graph/src/main/scala/org/apache/spark/graph/EdgeDirection.scala b/graph/src/main/scala/org/apache/spark/graph/EdgeDirection.scala
new file mode 100644
index 0000000000..99af2d5458
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/EdgeDirection.scala
@@ -0,0 +1,32 @@
+package org.apache.spark.graph
+
+
+/**
+ * The direction of directed edge relative to a vertex used to select
+ * the set of adjacent neighbors when running a neighborhood query.
+ */
+sealed abstract class EdgeDirection {
+  def reverse: EdgeDirection = this match {
+    case EdgeDirection.In   => EdgeDirection.In
+    case EdgeDirection.Out  => EdgeDirection.Out
+    case EdgeDirection.Both => EdgeDirection.Both
+  }
+}
+
+
+object EdgeDirection {
+  /**
+   * Edges arriving at a vertex.
+   */
+  case object In extends EdgeDirection
+
+  /**
+   * Edges originating from a vertex
+   */
+  case object Out extends EdgeDirection
+
+  /**
+   * All edges adjacent to a vertex
+   */
+  case object Both extends EdgeDirection
+}
diff --git a/graph/src/main/scala/org/apache/spark/graph/EdgeTriplet.scala b/graph/src/main/scala/org/apache/spark/graph/EdgeTriplet.scala
new file mode 100644
index 0000000000..4ade1d7333
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/EdgeTriplet.scala
@@ -0,0 +1,53 @@
+package org.apache.spark.graph
+
+/**
+ * An edge triplet represents two vertices and edge along with their attributes.
+ *
+ * @tparam VD the type of the vertex attribute.
+ * @tparam ED the type of the edge attribute
+ */
+class EdgeTriplet[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) VD,
+                  @specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED] {
+  /**
+   * The vertex (id and attribute) corresponding to the source vertex.
+   */
+  var src: Vertex[VD] = _
+
+  /**
+   * The vertex (id and attribute) corresponding to the target vertex.
+   */
+  var dst: Vertex[VD] = _
+
+  /**
+   * The attribute associated with the edge.
+   */
+  var data: ED = _
+
+  /**
+   * Given one vertex in the edge return the other vertex.
+   *
+   * @param vid the id one of the two vertices on the edge.
+   * @return the other vertex on the edge.
+   */
+  def otherVertex(vid: Vid): Vertex[VD] =
+    if (src.id == vid) dst else { assert(dst.id == vid); src }
+
+  /**
+   * Get the vertex object for the given vertex in the edge.
+   *
+   * @param vid the id of one of the two vertices on the edge
+   * @return the vertex object with that id.
+   */
+  def vertex(vid: Vid): Vertex[VD] =
+    if (src.id == vid) src else { assert(dst.id == vid); dst }
+
+  /**
+   * Return the relative direction of the edge to the corresponding vertex.
+   *
+   * @param vid the id of one of the two vertices in the edge.
+   * @return the relative direction of the edge to the corresponding vertex.
+   */
+  def relativeDirection(vid: Vid): EdgeDirection =
+    if (vid == src.id) EdgeDirection.Out else { assert(vid == dst.id); EdgeDirection.In }
+
+}
diff --git a/graph/src/main/scala/org/apache/spark/graph/Graph.scala b/graph/src/main/scala/org/apache/spark/graph/Graph.scala
new file mode 100644
index 0000000000..1fb22c56ff
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/Graph.scala
@@ -0,0 +1,395 @@
+package org.apache.spark.graph
+
+
+import org.apache.spark.rdd.RDD
+
+
+
+/**
+ * The Graph abstractly represents a graph with arbitrary objects associated
+ * with vertices and edges.  The graph provides basic operations to access and
+ * manipulate the data associated with vertices and edges as well as the
+ * underlying structure.  Like Spark RDDs, the graph is a functional
+ * data-structure in which mutating operations return new graphs.
+ *
+ * @tparam VD The type of object associated with each vertex.
+ *
+ * @tparam ED The type of object associated with each edge
+ */
+abstract class Graph[VD: ClassManifest, ED: ClassManifest] {
+
+  /**
+   * Get the vertices and their data.
+   *
+   * @return An RDD containing the vertices in this graph
+   *
+   * @see Vertex for the vertex type.
+   *
+   * @todo should vertices return tuples instead of vertex objects?
+   */
+  def vertices: RDD[Vertex[VD]]
+
+  /**
+   * Get the Edges and their data as an RDD.  The entries in the RDD contain
+   * just the source id and target id along with the edge data.
+   *
+   * @return An RDD containing the edges in this graph
+   *
+   * @see Edge for the edge type.
+   * @see edgesWithVertices to get an RDD which contains all the edges along
+   * with their vertex data.
+   *
+   * @todo Should edges return 3 tuples instead of Edge objects?  In this case
+   * we could rename EdgeTriplet to Edge?
+   */
+  def edges: RDD[Edge[ED]]
+
+  /**
+   * Get the edges with the vertex data associated with the adjacent pair of
+   * vertices.
+   *
+   * @return An RDD containing edge triplets.
+   *
+   * @example This operation might be used to evaluate a graph coloring where
+   * we would like to check that both vertices are a different color.
+   * {{{
+   * type Color = Int
+   * val graph: Graph[Color, Int] = Graph.textFile("hdfs://file.tsv")
+   * val numInvalid = graph.edgesWithVertices()
+   *   .map(e => if(e.src.data == e.dst.data) 1 else 0).sum
+   * }}}
+   *
+   * @see edges() If only the edge data and adjacent vertex ids are required.
+   *
+   */
+  def triplets: RDD[EdgeTriplet[VD, ED]]
+
+  /**
+   * Return a graph that is cached when first created. This is used to pin a
+   * graph in memory enabling multiple queries to reuse the same construction
+   * process.
+   *
+   * @see RDD.cache() for a more detailed explanation of caching.
+   */
+  def cache(): Graph[VD, ED]
+
+  /**
+   * Construct a new graph where each vertex value has been transformed by the
+   * map function.
+   *
+   * @note This graph is not changed and that the new graph has the same
+   * structure.  As a consequence the underlying index structures can be
+   * reused.
+   *
+   * @param map the function from a vertex object to a new vertex value.
+   *
+   * @tparam VD2 the new vertex data type
+   *
+   * @example We might use this operation to change the vertex values from one
+   * type to another to initialize an algorithm.
+   * {{{
+   * val rawGraph: Graph[(), ()] = Graph.textFile("hdfs://file")
+   * val root = 42
+   * var bfsGraph = rawGraph
+   *   .mapVertices[Int](v => if(v.id == 0) 0 else Math.MaxValue)
+   * }}}
+   *
+   */
+  def mapVertices[VD2: ClassManifest](map: Vertex[VD] => VD2): Graph[VD2, ED]
+
+  /**
+   * Construct a new graph where each the value of each edge is transformed by
+   * the map operation.  This function is not passed the vertex value for the
+   * vertices adjacent to the edge.  If vertex values are desired use the
+   * mapEdgesWithVertices function.
+   *
+   * @note This graph is not changed and that the new graph has the same
+   * structure.  As a consequence the underlying index structures can be
+   * reused.
+   *
+   * @param map the function from an edge object to a new edge value.
+   *
+   * @tparam ED2 the new edge data type
+   *
+   * @example This function might be used to initialize edge attributes.
+   *
+   */
+  def mapEdges[ED2: ClassManifest](map: Edge[ED] => ED2): Graph[VD, ED2]
+
+  /**
+   * Construct a new graph where each the value of each edge is transformed by
+   * the map operation.  This function passes vertex values for the adjacent
+   * vertices to the map function.  If adjacent vertex values are not required,
+   * consider using the mapEdges function instead.
+   *
+   * @note This graph is not changed and that the new graph has the same
+   * structure.  As a consequence the underlying index structures can be
+   * reused.
+   *
+   * @param map the function from an edge object to a new edge value.
+   *
+   * @tparam ED2 the new edge data type
+   *
+   * @example This function might be used to initialize edge attributes based
+   * on the attributes associated with each vertex.
+   * {{{
+   * val rawGraph: Graph[Int, Int] = someLoadFunction()
+   * val graph = rawGraph.mapEdgesWithVertices[Int]( edge =>
+   *   edge.src.data - edge.dst.data)
+   * }}}
+   *
+   */
+  def mapTriplets[ED2: ClassManifest](
+    map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2]
+
+
+  /**
+   * Remove edges conntecting vertices that are not in the graph.
+   *
+   * @todo remove this function and ensure that for a graph G=(V,E):
+   *     if (u,v) in E then u in V and v in V 
+   */
+  def correctEdges(): Graph[VD, ED]
+
+  /**
+   * Construct a new graph with all the edges reversed.  If this graph contains
+   * an edge from a to b then the returned graph contains an edge from b to a.
+   *
+   */
+  def reverse: Graph[VD, ED]
+
+
+  /**
+   * This function takes a vertex and edge predicate and constructs the subgraph
+   * that consists of vertices and edges that satisfy the predict.  The resulting 
+   * graph contains the vertices and edges that satisfy:
+   *
+   * V' = {v : for all v in V where vpred(v)}
+   * E' = {(u,v): for all (u,v) in E where epred((u,v)) && vpred(u) && vpred(v)}
+   *
+   * @param epred the edge predicate which takes a triplet and evaluates to true
+   * if the edge is to remain in the subgraph.  Note that only edges in which both 
+   * vertices satisfy the vertex predicate are considered.
+   *
+   * @param vpred the vertex predicate which takes a vertex object and evaluates
+   * to true if the vertex is to be included in the subgraph
+   *
+   * @return the subgraph containing only the vertices and edges that satisfy the
+   * predicates. 
+   */
+  def subgraph(epred: EdgeTriplet[VD,ED] => Boolean = (_ => true), 
+    vpred: Vertex[VD] => Boolean = (_ => true) ): Graph[VD, ED]
+
+
+  // /**
+  //  * Combine the attrributes of edges connecting the same vertices.   
+  //  *
+  //  * @todo Do we want to support this function
+  //  */
+  // def combineEdges(reduce: (ED, ED) => ED): Graph[VD, ED]
+
+
+  /**
+   * This function is used to compute a statistic for the neighborhood of each
+   * vertex.
+   *
+   * This is one of the core functions in the Graph API in that enables
+   * neighborhood level computation.  For example this function can be used to
+   * count neighbors satisfying a predicate or implement PageRank.
+   *
+   * @note The returned RDD may contain fewer entries than their are vertices
+   * in the graph.  This is because some vertices may not have neighbors or the
+   * map function may return None for all neighbors.
+   *
+   * @param mapFunc the function applied to each edge adjacent to each vertex.
+   * The mapFunc can optionally return None in which case it does not
+   * contribute to the final sum.
+   * @param mergeFunc the function used to merge the results of each map
+   * operation.
+   * @param direction the direction of edges to consider (e.g., In, Out, Both).
+   * @tparam VD2 The returned type of the aggregation operation.
+   *
+   * @return A Spark.RDD containing tuples of vertex identifiers and thee
+   * resulting value.  Note that the returned RDD may contain fewer vertices
+   * than in the original graph since some vertices may not have neighbors or
+   * the map function could return None for all neighbors.
+   *
+   * @example We can use this function to compute the average follower age for
+   * each user
+   * {{{
+   * val graph: Graph[Int,Int] = loadGraph()
+   * val averageFollowerAge: RDD[(Int, Int)] =
+   *   graph.aggregateNeigbhros[(Int,Double)](
+   *     (vid, edge) => (edge.otherVertex(vid).data, 1),
+   *     (a, b) => (a._1 + b._1, a._2 + b._2),
+   *     EdgeDirection.In)
+   *     .mapValues{ case (sum,followers) => sum.toDouble / followers}
+   * }}}
+   *
+   */
+  def aggregateNeighbors[VD2: ClassManifest](
+      mapFunc: (Vid, EdgeTriplet[VD, ED]) => Option[VD2],
+      mergeFunc: (VD2, VD2) => VD2,
+      direction: EdgeDirection)
+    : RDD[(Vid, VD2)]
+
+
+  /**
+   * This function is used to compute a statistic for the neighborhood of each
+   * vertex and returns a value for all vertices (including those without
+   * neighbors).
+   *
+   * This is one of the core functions in the Graph API in that enables
+   * neighborhood level computation. For example this function can be used to
+   * count neighbors satisfying a predicate or implement PageRank.
+   *
+   * @note Because the a default value is provided all vertices will have a
+   * corresponding entry in the returned RDD.
+   *
+   * @param mapFunc the function applied to each edge adjacent to each vertex.
+   * The mapFunc can optionally return None in which case it does not
+   * contribute to the final sum.
+   * @param reduceFunc the function used to merge the results of each map
+   * operation.
+   * @param default the default value to use for each vertex if it has no
+   * neighbors or the map function repeatedly evaluates to none
+   * @param direction the direction of edges to consider (e.g., In, Out, Both).
+   * @tparam VD2 The returned type of the aggregation operation.
+   *
+   * @return A Spark.RDD containing tuples of vertex identifiers and
+   * their resulting value.  There will be exactly one entry for ever vertex in
+   * the original graph.
+   *
+   * @example We can use this function to compute the average follower age
+   * for each user
+   * {{{
+   * val graph: Graph[Int,Int] = loadGraph()
+   * val averageFollowerAge: RDD[(Int, Int)] =
+   *   graph.aggregateNeigbhros[(Int,Double)](
+   *     (vid, edge) => (edge.otherVertex(vid).data, 1),
+   *     (a, b) => (a._1 + b._1, a._2 + b._2),
+   *     -1,
+   *     EdgeDirection.In)
+   *     .mapValues{ case (sum,followers) => sum.toDouble / followers}
+   * }}}
+   *
+   * @todo Should this return a graph with the new vertex values?
+   *
+   */
+  def aggregateNeighbors[VD2: ClassManifest](
+      mapFunc: (Vid, EdgeTriplet[VD, ED]) => Option[VD2],
+      reduceFunc: (VD2, VD2) => VD2,
+      default: VD2, // Should this be a function or a value?
+      direction: EdgeDirection)
+    : RDD[(Vid, VD2)]
+
+
+  /**
+   * Join the vertices with an RDD and then apply a function from the the
+   * vertex and RDD entry to a new vertex value and type.  The input table should
+   * contain at most one entry for each vertex.  If no entry is provided the
+   * map function is invoked passing none.
+   *
+   * @tparam U the type of entry in the table of updates
+   * @tparam VD2 the new vertex value type
+   *
+   * @param table the table to join with the vertices in the graph.  The table
+   * should contain at most one entry for each vertex.
+   * @param mapFunc the function used to compute the new vertex values.  The
+   * map function is invoked for all vertices, even those that do not have a
+   * corresponding entry in the table.
+   *
+   * @example This function is used to update the vertices with new values
+   * based on external data.  For example we could add the out degree to each
+   * vertex record
+   * {{{
+   * val rawGraph: Graph[(),()] = Graph.textFile("webgraph")
+   * val outDeg: RDD[(Int, Int)] = rawGraph.outDegrees()
+   * val graph = rawGraph.leftJoinVertices[Int,Int](outDeg,
+   *   (v, deg) => deg.getOrElse(0) )
+   * }}}
+   *
+   * @todo Should this function be curried to enable type inference?  For
+   * example
+   * {{{
+   * graph.leftJoinVertices(tbl)( (v, row) => row.getOrElse(0) )
+   * }}}
+   * @todo Is leftJoinVertices the right name?
+   */
+  def leftJoinVertices[U: ClassManifest, VD2: ClassManifest](
+      table: RDD[(Vid, U)],
+      mapFunc: (Vertex[VD], Option[U]) => VD2)
+    : Graph[VD2, ED]
+
+  /**
+   * Join the vertices with an RDD and then apply a function from the the
+   * vertex and RDD entry to a new vertex value.  The input table should
+   * contain at most one entry for each vertex.  If no entry is provided the
+   * map function is skipped and the old value is used.
+   *
+   * @tparam U the type of entry in the table of updates
+   * @param table the table to join with the vertices in the graph.  The table
+   * should contain at most one entry for each vertex.
+   * @param mapFunc the function used to compute the new vertex values.  The
+   * map function is invoked only for vertices with a corresponding entry in
+   * the table otherwise the old vertex value is used.
+   *
+   * @note for small tables this function can be much more efficient than
+   * leftJoinVertices
+   *
+   * @example This function is used to update the vertices with new values
+   * based on external data.  For example we could add the out degree to each
+   * vertex record
+   * {{{
+   * val rawGraph: Graph[Int,()] = Graph.textFile("webgraph")
+   *   .mapVertices(v => 0)
+   * val outDeg: RDD[(Int, Int)] = rawGraph.outDegrees()
+   * val graph = rawGraph.leftJoinVertices[Int,Int](outDeg,
+   *   (v, deg) => deg )
+   * }}}
+   *
+   * @todo Should this function be curried to enable type inference?  For
+   * example
+   * {{{
+   * graph.joinVertices(tbl)( (v, row) => row )
+   * }}}
+   */
+  def joinVertices[U: ClassManifest](
+      table: RDD[(Vid, U)],
+      mapFunc: (Vertex[VD], U) => VD)
+    : Graph[VD, ED]
+
+  // Save a copy of the GraphOps object so there is always one unique GraphOps object
+  // for a given Graph object, and thus the lazy vals in GraphOps would work as intended.
+  val ops = new GraphOps(this)
+}
+
+
+object Graph {
+
+  import org.apache.spark.graph.impl._
+  import org.apache.spark.SparkContext._
+
+  def apply(rawEdges: RDD[(Vid, Vid)], uniqueEdges: Boolean = true): Graph[Int, Int] = {
+    // Reduce to unique edges.
+    val edges: RDD[Edge[Int]] =
+      if (uniqueEdges) {
+        rawEdges.map((_, 1)).reduceByKey(_ + _).map { case ((s, t), cnt) => Edge(s, t, cnt) }
+      } else {
+        rawEdges.map { case (s, t) => Edge(s, t, 1) }
+      }
+    // Determine unique vertices
+    val vertices: RDD[Vertex[Int]] = edges.flatMap{ case Edge(s, t, cnt) => Array((s, 1), (t, 1)) }
+      .reduceByKey(_ + _)
+      .map{ case (id, deg) => Vertex(id, deg) }
+    // Return graph
+    new GraphImpl(vertices, edges)
+  }
+
+  def apply[VD: ClassManifest, ED: ClassManifest](
+      vertices: RDD[Vertex[VD]], edges: RDD[Edge[ED]]): Graph[VD, ED] = {
+    new GraphImpl(vertices, edges)
+  }
+
+  implicit def graphToGraphOps[VD: ClassManifest, ED: ClassManifest](g: Graph[VD, ED]) = g.ops
+}
diff --git a/graph/src/main/scala/org/apache/spark/graph/GraphKryoRegistrator.scala b/graph/src/main/scala/org/apache/spark/graph/GraphKryoRegistrator.scala
new file mode 100644
index 0000000000..13a22f9051
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/GraphKryoRegistrator.scala
@@ -0,0 +1,24 @@
+package org.apache.spark.graph
+
+import com.esotericsoftware.kryo.Kryo
+
+import org.apache.spark.serializer.KryoRegistrator
+
+
+class GraphKryoRegistrator extends KryoRegistrator {
+
+  def registerClasses(kryo: Kryo) {
+    //kryo.register(classOf[(Int, Float, Float)])
+    registerClass[Int, Int, Int](kryo)
+
+    // This avoids a large number of hash table lookups.
+    kryo.setReferences(false)
+  }
+
+  private def registerClass[VD: Manifest, ED: Manifest, VD2: Manifest](kryo: Kryo) {
+    kryo.register(classOf[Vertex[VD]])
+    kryo.register(classOf[Edge[ED]])
+    kryo.register(classOf[MutableTuple2[VD, VD2]])
+    kryo.register(classOf[(Vid, VD2)])
+  }
+}
diff --git a/graph/src/main/scala/org/apache/spark/graph/GraphLab.scala b/graph/src/main/scala/org/apache/spark/graph/GraphLab.scala
new file mode 100644
index 0000000000..1dba813e91
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/GraphLab.scala
@@ -0,0 +1,127 @@
+package org.apache.spark.graph
+
+import scala.collection.JavaConversions._
+import org.apache.spark.rdd.RDD
+
+/**
+ * This object implement the graphlab gather-apply-scatter api.
+ */
+object GraphLab {
+
+  /**
+   * Execute the GraphLab Gather-Apply-Scatter API
+   *
+   * @todo finish documenting GraphLab Gather-Apply-Scatter API
+   *
+   * @param graph The graph on which to execute the GraphLab API
+   * @param gatherFunc The gather function is executed on each edge triplet
+   *                   adjacent to a vertex and returns an accumulator which
+   *                   is then merged using the merge function.
+   * @param mergeFunc An accumulative associative operation on the result of
+   *                  the gather type.
+   * @param applyFunc Takes a vertex and the final result of the merge operations
+   *                  on the adjacent edges and returns a new vertex value.
+   * @param scatterFunc Executed after the apply function the scatter function takes
+   *                    a triplet and signals whether the neighboring vertex program
+   *                    must be recomputed.
+   * @param numIter The maximum number of iterations to run.
+   * @param gatherDirection The direction of edges to consider during the gather phase
+   * @param scatterDirection The direction of edges to consider during the scatter phase
+   *
+   * @tparam VD The graph vertex attribute type
+   * @tparam ED The graph edge attribute type
+   * @tparam A The type accumulated during the gather phase
+   * @return the resulting graph after the algorithm converges
+   */
+  def iterate[VD: ClassManifest, ED: ClassManifest, A: ClassManifest](graph: Graph[VD, ED])(
+    gatherFunc: (Vid, EdgeTriplet[VD, ED]) => A,
+    mergeFunc: (A, A) => A,
+    applyFunc: (Vertex[VD], Option[A]) => VD,
+    scatterFunc: (Vid, EdgeTriplet[VD, ED]) => Boolean,
+    numIter: Int = Integer.MAX_VALUE,
+    gatherDirection: EdgeDirection = EdgeDirection.In,
+    scatterDirection: EdgeDirection = EdgeDirection.Out): Graph[VD, ED] = {
+
+
+    // Add an active attribute to all vertices to track convergence.
+    var activeGraph = graph.mapVertices {
+      case Vertex(id, data) => (true, data)
+    }.cache()
+
+    // The gather function wrapper strips the active attribute and
+    // only invokes the gather function on active vertices
+    def gather(vid: Vid, e: EdgeTriplet[(Boolean, VD), ED]) = {
+      if (e.vertex(vid).data._1) {
+        val edge = new EdgeTriplet[VD,ED]
+        edge.src = Vertex(e.src.id, e.src.data._2)
+        edge.dst = Vertex(e.dst.id, e.dst.data._2)
+        edge.data = e.data
+        Some(gatherFunc(vid, edge))
+      } else {
+        None
+      }
+    }
+
+    // The apply function wrapper strips the vertex of the active attribute
+    // and only invokes the apply function on active vertices
+    def apply(v: Vertex[(Boolean, VD)], accum: Option[A]) = {
+      if (v.data._1) (true, applyFunc(Vertex(v.id, v.data._2), accum))
+      else (false, v.data._2)
+    }
+
+    // The scatter function wrapper strips the vertex of the active attribute
+    // and only invokes the scatter function on active vertices
+    def scatter(rawVid: Vid, e: EdgeTriplet[(Boolean, VD), ED]) = {
+      val vid = e.otherVertex(rawVid).id
+      if (e.vertex(vid).data._1) {
+        val edge = new EdgeTriplet[VD,ED]
+        edge.src = Vertex(e.src.id, e.src.data._2)
+        edge.dst = Vertex(e.dst.id, e.dst.data._2)
+        edge.data = e.data
+//        val src = Vertex(e.src.id, e.src.data._2)
+//        val dst = Vertex(e.dst.id, e.dst.data._2)
+//        val edge = new EdgeTriplet[VD,ED](src, dst, e.data)
+        Some(scatterFunc(vid, edge))
+      } else {
+        None
+      }
+    }
+
+    // Used to set the active status of vertices for the next round
+    def applyActive(v: Vertex[(Boolean, VD)], accum: Option[Boolean]) =
+      (accum.getOrElse(false), v.data._2)
+
+    // Main Loop ---------------------------------------------------------------------
+    var i = 0
+    var numActive = activeGraph.numVertices
+    while (i < numIter && numActive > 0) {
+
+      val accUpdates: RDD[(Vid, A)] =
+        activeGraph.aggregateNeighbors(gather, mergeFunc, gatherDirection)
+
+      activeGraph = activeGraph.leftJoinVertices(accUpdates, apply).cache()
+
+      // Scatter is basically a gather in the opposite direction so we reverse the edge direction
+      val activeVertices: RDD[(Vid, Boolean)] =
+        activeGraph.aggregateNeighbors(scatter, _ || _, scatterDirection.reverse)
+
+      activeGraph = activeGraph.leftJoinVertices(activeVertices, applyActive).cache()
+
+      numActive = activeGraph.vertices.map(v => if (v.data._1) 1 else 0).reduce(_ + _)
+      println("Number active vertices: " + numActive)
+      i += 1
+    }
+
+    // Remove the active attribute from the vertex data before returning the graph
+    activeGraph.mapVertices(v => v.data._2)
+  }
+}
+
+
+
+
+
+
+
+
+
diff --git a/graph/src/main/scala/org/apache/spark/graph/GraphLoader.scala b/graph/src/main/scala/org/apache/spark/graph/GraphLoader.scala
new file mode 100644
index 0000000000..4d7ca1268d
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/GraphLoader.scala
@@ -0,0 +1,54 @@
+package org.apache.spark.graph
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.graph.impl.GraphImpl
+
+
+object GraphLoader {
+
+  /**
+   * Load an edge list from file initializing the Graph RDD
+   */
+  def textFile[ED: ClassManifest](
+      sc: SparkContext,
+      path: String,
+      edgeParser: Array[String] => ED,
+      minEdgePartitions: Int = 1,
+      minVertexPartitions: Int = 1)
+    : GraphImpl[Int, ED] = {
+
+    // Parse the edge data table
+    val edges = sc.textFile(path).flatMap { line =>
+      if (!line.isEmpty && line(0) != '#') {
+        val lineArray = line.split("\\s+")
+        if(lineArray.length < 2) {
+          println("Invalid line: " + line)
+          assert(false)
+        }
+        val source = lineArray(0)
+        val target = lineArray(1)
+        val tail = lineArray.drop(2)
+        val edata = edgeParser(tail)
+        Array(Edge(source.trim.toInt, target.trim.toInt, edata))
+      } else {
+        Array.empty[Edge[ED]]
+      }
+    }.cache()
+
+    val graph = fromEdges(edges)
+    // println("Loaded graph:" +
+    //   "\n\t#edges:    " + graph.numEdges +
+    //   "\n\t#vertices: " + graph.numVertices)
+
+    graph
+  }
+
+  def fromEdges[ED: ClassManifest](edges: RDD[Edge[ED]]): GraphImpl[Int, ED] = {
+    val vertices = edges.flatMap { edge => List((edge.src, 1), (edge.dst, 1)) }
+      .reduceByKey(_ + _)
+      .map{ case (vid, degree) => Vertex(vid, degree) }
+    new GraphImpl[Int, ED](vertices, edges)
+  }
+}
diff --git a/graph/src/main/scala/org/apache/spark/graph/GraphOps.scala b/graph/src/main/scala/org/apache/spark/graph/GraphOps.scala
new file mode 100644
index 0000000000..8de96680b8
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/GraphOps.scala
@@ -0,0 +1,30 @@
+package org.apache.spark.graph
+
+import org.apache.spark.rdd.RDD
+
+
+class GraphOps[VD: ClassManifest, ED: ClassManifest](g: Graph[VD, ED]) {
+
+  lazy val numEdges: Long = g.edges.count()
+
+  lazy val numVertices: Long = g.vertices.count()
+
+  lazy val inDegrees: RDD[(Vid, Int)] = {
+    g.aggregateNeighbors((vid, edge) => Some(1), _+_, EdgeDirection.In)
+  }
+
+  lazy val outDegrees: RDD[(Vid, Int)] = {
+    g.aggregateNeighbors((vid, edge) => Some(1), _+_, EdgeDirection.Out)
+  }
+
+  lazy val degrees: RDD[(Vid, Int)] = {
+    g.aggregateNeighbors((vid, edge) => Some(1), _+_, EdgeDirection.Both)
+  }
+
+  def collectNeighborIds(edgeDirection: EdgeDirection) : RDD[(Vid, Array[Vid])] = {
+    g.aggregateNeighbors(
+      (vid, edge) => Some(Array(edge.otherVertex(vid).id)),
+      (a, b) => a ++ b,
+      edgeDirection)
+  }
+}
diff --git a/graph/src/main/scala/org/apache/spark/graph/Pregel.scala b/graph/src/main/scala/org/apache/spark/graph/Pregel.scala
new file mode 100644
index 0000000000..27b75a7988
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/Pregel.scala
@@ -0,0 +1,36 @@
+package org.apache.spark.graph
+
+import org.apache.spark.rdd.RDD
+
+
+object Pregel {
+
+  def iterate[VD: ClassManifest, ED: ClassManifest, A: ClassManifest](graph: Graph[VD, ED])(
+      vprog: (Vertex[VD], A) => VD,
+      sendMsg: (Vid, EdgeTriplet[VD, ED]) => Option[A],
+      mergeMsg: (A, A) => A,
+      initialMsg: A,
+      numIter: Int)
+    : Graph[VD, ED] = {
+
+    var g = graph
+    //var g = graph.cache()
+    var i = 0
+
+    def mapF(vid: Vid, edge: EdgeTriplet[VD,ED]) = sendMsg(edge.otherVertex(vid).id, edge)
+
+    def runProg(v: Vertex[VD], msg: Option[A]): VD = {
+      if (msg.isEmpty) v.data else vprog(v, msg.get)
+    }
+
+    var msgs: RDD[(Vid, A)] = g.vertices.map{ v => (v.id, initialMsg) }
+
+    while (i < numIter) {
+      g = g.leftJoinVertices(msgs, runProg).cache()
+      msgs = g.aggregateNeighbors(mapF, mergeMsg, EdgeDirection.In)
+      i += 1
+    }
+    g
+  }
+
+}
diff --git a/graph/src/main/scala/org/apache/spark/graph/Vertex.scala b/graph/src/main/scala/org/apache/spark/graph/Vertex.scala
new file mode 100644
index 0000000000..c8671b7f13
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/Vertex.scala
@@ -0,0 +1,15 @@
+package org.apache.spark.graph
+
+/**
+ * A graph vertex consists of a vertex id and attribute.
+ *
+ * @tparam VD the type of the vertex attribute.
+ */
+case class Vertex[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) VD] (
+  var id: Vid = 0,
+  var data: VD = nullValue[VD]) {
+
+  def this(tuple: (Vid, VD)) = this(tuple._1, tuple._2)
+
+  def tuple = (id, data)
+}
diff --git a/graph/src/main/scala/org/apache/spark/graph/impl/EdgePartition.scala b/graph/src/main/scala/org/apache/spark/graph/impl/EdgePartition.scala
new file mode 100644
index 0000000000..3d218f27b1
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/impl/EdgePartition.scala
@@ -0,0 +1,53 @@
+package org.apache.spark.graph.impl
+
+import scala.collection.mutable.ArrayBuilder
+
+import it.unimi.dsi.fastutil.ints.IntArrayList
+
+import org.apache.spark.graph._
+
+
+/**
+ * A partition of edges in 3 large columnar arrays.
+ */
+private[graph]
+class EdgePartition[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED: ClassManifest] {
+
+  private var _data: Array[ED] = _
+  private var _dataBuilder = ArrayBuilder.make[ED]
+
+  val srcIds = new VertexArrayList
+  val dstIds = new VertexArrayList
+
+  def data: Array[ED] = _data
+
+  /** Add a new edge to the partition. */
+  def add(src: Vid, dst: Vid, d: ED) {
+    srcIds.add(src)
+    dstIds.add(dst)
+    _dataBuilder += d
+  }
+
+  def trim() {
+    srcIds.trim()
+    dstIds.trim()
+    _data = _dataBuilder.result()
+  }
+
+  def size: Int = srcIds.size
+
+  def iterator = new Iterator[Edge[ED]] {
+    private val edge = new Edge[ED]
+    private var pos = 0
+
+    override def hasNext: Boolean = pos < EdgePartition.this.size
+
+    override def next(): Edge[ED] = {
+      edge.src = srcIds.get(pos)
+      edge.dst = dstIds.get(pos)
+      edge.data = _data(pos)
+      pos += 1
+      edge
+    }
+  }
+}
diff --git a/graph/src/main/scala/org/apache/spark/graph/impl/EdgeTripletRDD.scala b/graph/src/main/scala/org/apache/spark/graph/impl/EdgeTripletRDD.scala
new file mode 100644
index 0000000000..18d5d2b5aa
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/impl/EdgeTripletRDD.scala
@@ -0,0 +1,87 @@
+package org.apache.spark.graph.impl
+
+import org.apache.spark.Aggregator
+import org.apache.spark.Partition
+import org.apache.spark.SparkEnv
+import org.apache.spark.TaskContext
+import org.apache.spark.rdd.RDD
+import org.apache.spark.Dependency
+import org.apache.spark.OneToOneDependency
+import org.apache.spark.ShuffleDependency
+import org.apache.spark.SparkContext._
+import org.apache.spark.graph._
+
+
+private[graph]
+class EdgeTripletPartition(idx: Int, val vPart: Partition, val ePart: Partition)
+  extends Partition {
+  override val index: Int = idx
+  override def hashCode(): Int = idx
+}
+
+
+/**
+ * A RDD that brings together edge data with its associated vertex data.
+ */
+private[graph]
+class EdgeTripletRDD[VD: ClassManifest, ED: ClassManifest](
+    vTableReplicated: RDD[(Vid, VD)],
+    eTable: RDD[(Pid, EdgePartition[ED])])
+  extends RDD[(VertexHashMap[VD], Iterator[EdgeTriplet[VD, ED]])](eTable.context, Nil) {
+
+  println(vTableReplicated.partitioner.get.numPartitions)
+  println(eTable.partitioner.get.numPartitions)
+
+  assert(vTableReplicated.partitioner == eTable.partitioner)
+
+  override def getDependencies: List[Dependency[_]] = {
+    List(new OneToOneDependency(eTable), new OneToOneDependency(vTableReplicated))
+  }
+
+  override def getPartitions = Array.tabulate[Partition](eTable.partitions.size) {
+    i => new EdgeTripletPartition(i, eTable.partitions(i), vTableReplicated.partitions(i))
+  }
+
+  override val partitioner = eTable.partitioner
+
+  override def getPreferredLocations(s: Partition) =
+    eTable.preferredLocations(s.asInstanceOf[EdgeTripletPartition].ePart)
+
+  override def compute(s: Partition, context: TaskContext)
+    : Iterator[(VertexHashMap[VD], Iterator[EdgeTriplet[VD, ED]])] = {
+
+    val split = s.asInstanceOf[EdgeTripletPartition]
+
+    // Fetch the vertices and put them in a hashmap.
+    // TODO: use primitive hashmaps for primitive VD types.
+    val vmap = new VertexHashMap[VD]//(1000000)
+    vTableReplicated.iterator(split.vPart, context).foreach { v => vmap.put(v._1, v._2) }
+
+    val (pid, edgePartition) = eTable.iterator(split.ePart, context).next()
+      .asInstanceOf[(Pid, EdgePartition[ED])]
+
+    // Return an iterator that looks up the hash map to find matching vertices for each edge.
+    val iter = new Iterator[EdgeTriplet[VD, ED]] {
+      private var pos = 0
+      private val e = new EdgeTriplet[VD, ED]
+      e.src = new Vertex[VD]
+      e.dst = new Vertex[VD]
+
+      override def hasNext: Boolean = pos < edgePartition.size
+      override def next() = {
+        e.src.id = edgePartition.srcIds.getLong(pos)
+        // assert(vmap.containsKey(e.src.id))
+        e.src.data = vmap.get(e.src.id)
+
+        e.dst.id = edgePartition.dstIds.getLong(pos)
+        // assert(vmap.containsKey(e.dst.id))
+        e.dst.data = vmap.get(e.dst.id)
+
+        e.data = edgePartition.data(pos)
+        pos += 1
+        e
+      }
+    }
+    Iterator((vmap, iter))
+  }
+}
diff --git a/graph/src/main/scala/org/apache/spark/graph/impl/GraphImpl.scala b/graph/src/main/scala/org/apache/spark/graph/impl/GraphImpl.scala
new file mode 100644
index 0000000000..68ac9f724c
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/impl/GraphImpl.scala
@@ -0,0 +1,441 @@
+package org.apache.spark.graph.impl
+
+import scala.collection.JavaConversions._
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.Partitioner
+import org.apache.spark.HashPartitioner 
+import org.apache.spark.util.ClosureCleaner
+
+import org.apache.spark.rdd.RDD
+
+import org.apache.spark.graph._
+import org.apache.spark.graph.impl.GraphImpl._
+
+
+
+
+
+/**
+ * A Graph RDD that supports computation on graphs.
+ */
+class GraphImpl[VD: ClassManifest, ED: ClassManifest] protected (
+    val numVertexPartitions: Int,
+    val numEdgePartitions: Int,
+    _rawVertices: RDD[Vertex[VD]],
+    _rawEdges: RDD[Edge[ED]],
+    _rawVTable: RDD[(Vid, (VD, Array[Pid]))],
+    _rawETable: RDD[(Pid, EdgePartition[ED])])
+  extends Graph[VD, ED] {
+
+  def this(vertices: RDD[Vertex[VD]], edges: RDD[Edge[ED]]) = {
+    this(vertices.partitions.size, edges.partitions.size, vertices, edges, null, null)
+  }
+
+  def withPartitioner(numVertexPartitions: Int, numEdgePartitions: Int): Graph[VD, ED] = {
+    if (_cached) {
+      new GraphImpl(numVertexPartitions, numEdgePartitions, null, null, _rawVTable, _rawETable)
+        .cache()
+    } else {
+      new GraphImpl(numVertexPartitions, numEdgePartitions, _rawVertices, _rawEdges, null, null)
+    }
+  }
+
+  def withVertexPartitioner(numVertexPartitions: Int) = {
+    withPartitioner(numVertexPartitions, numEdgePartitions)
+  }
+
+  def withEdgePartitioner(numEdgePartitions: Int) = {
+    withPartitioner(numVertexPartitions, numEdgePartitions)
+  }
+
+  protected var _cached = false
+
+  override def cache(): Graph[VD, ED] = {
+    eTable.cache()
+    vTable.cache()
+    _cached = true
+    this
+  }
+
+  override def reverse: Graph[VD, ED] = {
+    newGraph(vertices, edges.map{ case Edge(s, t, e) => Edge(t, s, e) })
+  }
+
+  /** Return a RDD of vertices. */
+  override def vertices: RDD[Vertex[VD]] = {
+    if (!_cached && _rawVertices != null) {
+      _rawVertices
+    } else {
+      vTable.map { case(vid, (data, pids)) => new Vertex(vid, data) }
+    }
+  }
+
+  /** Return a RDD of edges. */
+  override def edges: RDD[Edge[ED]] = {
+    if (!_cached && _rawEdges != null) {
+      _rawEdges
+    } else {
+      eTable.mapPartitions { iter => iter.next()._2.iterator }
+    }
+  }
+
+  /** Return a RDD that brings edges with its source and destination vertices together. */
+  override def triplets: RDD[EdgeTriplet[VD, ED]] = {
+    new EdgeTripletRDD(vTableReplicated, eTable).mapPartitions { part => part.next()._2 }
+  }
+
+  override def mapVertices[VD2: ClassManifest](f: Vertex[VD] => VD2): Graph[VD2, ED] = {
+    newGraph(vertices.map(v => Vertex(v.id, f(v))), edges)
+  }
+
+  override def mapEdges[ED2: ClassManifest](f: Edge[ED] => ED2): Graph[VD, ED2] = {
+    newGraph(vertices, edges.map(e => Edge(e.src, e.dst, f(e))))
+  }
+
+  override def mapTriplets[ED2: ClassManifest](f: EdgeTriplet[VD, ED] => ED2):
+    Graph[VD, ED2] = {
+    newGraph(vertices, triplets.map(e => Edge(e.src.id, e.dst.id, f(e))))
+  }
+
+  override def correctEdges(): Graph[VD, ED] = {
+    val sc = vertices.context
+    val vset = sc.broadcast(vertices.map(_.id).collect().toSet)
+    val newEdges = edges.filter(e => vset.value.contains(e.src) && vset.value.contains(e.dst))
+    Graph(vertices, newEdges)
+  }
+
+
+  override def subgraph(epred: EdgeTriplet[VD,ED] => Boolean = (_ => true), 
+    vpred: Vertex[VD] => Boolean = (_ => true) ): Graph[VD, ED] = {
+
+    // Restrict the set of vertices to those that satisfy the vertex predicate
+    val newVertices = vertices.filter(vpred)
+    // Restrict the set of edges to those that satisfy the vertex and the edge predicate.
+    val newEdges = triplets.filter(t => vpred(t.src) && vpred(t.dst) && epred(t))
+      .map( t => Edge(t.src.id, t.dst.id, t.data) )
+
+    new GraphImpl(newVertices, newEdges)
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+  // Lower level transformation methods
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+
+  override def aggregateNeighbors[VD2: ClassManifest](
+      mapFunc: (Vid, EdgeTriplet[VD, ED]) => Option[VD2],
+      reduceFunc: (VD2, VD2) => VD2,
+      default: VD2,
+      gatherDirection: EdgeDirection)
+    : RDD[(Vid, VD2)] = {
+
+    ClosureCleaner.clean(mapFunc)
+    ClosureCleaner.clean(reduceFunc)
+
+    val newVTable = vTableReplicated.mapPartitions({ part =>
+        part.map { v => (v._1, MutableTuple2(v._2, Option.empty[VD2])) }
+      }, preservesPartitioning = true)
+
+    new EdgeTripletRDD[MutableTuple2[VD, Option[VD2]], ED](newVTable, eTable)
+      .mapPartitions { part =>
+        val (vmap, edges) = part.next()
+        val edgeSansAcc = new EdgeTriplet[VD, ED]()
+        edgeSansAcc.src = new Vertex[VD]
+        edgeSansAcc.dst = new Vertex[VD]
+        edges.foreach { e: EdgeTriplet[MutableTuple2[VD, Option[VD2]], ED] =>
+          edgeSansAcc.data = e.data
+          edgeSansAcc.src.data = e.src.data._1
+          edgeSansAcc.dst.data = e.dst.data._1
+          edgeSansAcc.src.id = e.src.id
+          edgeSansAcc.dst.id = e.dst.id
+          if (gatherDirection == EdgeDirection.In || gatherDirection == EdgeDirection.Both) {
+            e.dst.data._2 =
+              if (e.dst.data._2.isEmpty) {
+                mapFunc(edgeSansAcc.dst.id, edgeSansAcc)
+              } else {
+                val tmp = mapFunc(edgeSansAcc.dst.id, edgeSansAcc)
+                if (!tmp.isEmpty) Some(reduceFunc(e.dst.data._2.get, tmp.get)) else e.dst.data._2
+              }
+          }
+          if (gatherDirection == EdgeDirection.Out || gatherDirection == EdgeDirection.Both) {
+            e.dst.data._2 =
+              if (e.dst.data._2.isEmpty) {
+                mapFunc(edgeSansAcc.src.id, edgeSansAcc)
+              } else {
+                val tmp = mapFunc(edgeSansAcc.src.id, edgeSansAcc)
+                if (!tmp.isEmpty) Some(reduceFunc(e.src.data._2.get, tmp.get)) else e.src.data._2
+              }
+          }
+        }
+        vmap.long2ObjectEntrySet().fastIterator().filter(!_.getValue()._2.isEmpty).map{ entry =>
+          (entry.getLongKey(), entry.getValue()._2)
+        }
+      }
+      .map{ case (vid, aOpt) => (vid, aOpt.get) }
+      .combineByKey((v: VD2) => v, reduceFunc, null, vertexPartitioner, false)
+  }
+
+  /**
+   * Same as aggregateNeighbors but map function can return none and there is no default value.
+   * As a consequence, the resulting table may be much smaller than the set of vertices.
+   */
+  override def aggregateNeighbors[VD2: ClassManifest](
+    mapFunc: (Vid, EdgeTriplet[VD, ED]) => Option[VD2],
+    reduceFunc: (VD2, VD2) => VD2,
+    gatherDirection: EdgeDirection): RDD[(Vid, VD2)] = {
+
+    ClosureCleaner.clean(mapFunc)
+    ClosureCleaner.clean(reduceFunc)
+
+    val newVTable = vTableReplicated.mapPartitions({ part =>
+        part.map { v => (v._1, MutableTuple2(v._2, Option.empty[VD2])) }
+      }, preservesPartitioning = true)
+
+    new EdgeTripletRDD[MutableTuple2[VD, Option[VD2]], ED](newVTable, eTable)
+      .mapPartitions { part =>
+        val (vmap, edges) = part.next()
+        val edgeSansAcc = new EdgeTriplet[VD, ED]()
+        edgeSansAcc.src = new Vertex[VD]
+        edgeSansAcc.dst = new Vertex[VD]
+        edges.foreach { e: EdgeTriplet[MutableTuple2[VD, Option[VD2]], ED] =>
+          edgeSansAcc.data = e.data
+          edgeSansAcc.src.data = e.src.data._1
+          edgeSansAcc.dst.data = e.dst.data._1
+          edgeSansAcc.src.id = e.src.id
+          edgeSansAcc.dst.id = e.dst.id
+          if (gatherDirection == EdgeDirection.In || gatherDirection == EdgeDirection.Both) {
+            e.dst.data._2 =
+              if (e.dst.data._2.isEmpty) {
+                mapFunc(edgeSansAcc.dst.id, edgeSansAcc)
+              } else {
+                val tmp = mapFunc(edgeSansAcc.dst.id, edgeSansAcc)
+                if (!tmp.isEmpty) Some(reduceFunc(e.dst.data._2.get, tmp.get)) else e.dst.data._2
+              }
+          }
+          if (gatherDirection == EdgeDirection.Out || gatherDirection == EdgeDirection.Both) {
+            e.src.data._2 =
+              if (e.src.data._2.isEmpty) {
+                mapFunc(edgeSansAcc.src.id, edgeSansAcc)
+              } else {
+                val tmp = mapFunc(edgeSansAcc.src.id, edgeSansAcc)
+                if (!tmp.isEmpty) Some(reduceFunc(e.src.data._2.get, tmp.get)) else e.src.data._2
+              }
+          }
+        }
+        vmap.long2ObjectEntrySet().fastIterator().filter(!_.getValue()._2.isEmpty).map{ entry =>
+          (entry.getLongKey(), entry.getValue()._2)
+        }
+      }
+      .map{ case (vid, aOpt) => (vid, aOpt.get) }
+      .combineByKey((v: VD2) => v, reduceFunc, null, vertexPartitioner, false)
+  }
+
+  override def leftJoinVertices[U: ClassManifest, VD2: ClassManifest](
+      updates: RDD[(Vid, U)],
+      updateF: (Vertex[VD], Option[U]) => VD2)
+    : Graph[VD2, ED] = {
+
+    ClosureCleaner.clean(updateF)
+
+    val newVTable = vTable.leftOuterJoin(updates).mapPartitions({ iter =>
+      iter.map { case (vid, ((vdata, pids), update)) =>
+        val newVdata = updateF(Vertex(vid, vdata), update)
+        (vid, (newVdata, pids))
+      }
+    }, preservesPartitioning = true).cache()
+
+    new GraphImpl(newVTable.partitions.length, eTable.partitions.length, null, null, newVTable, eTable)
+  }
+
+  override def joinVertices[U: ClassManifest](
+      updates: RDD[(Vid, U)],
+      updateF: (Vertex[VD], U) => VD)
+    : Graph[VD, ED] = {
+
+    ClosureCleaner.clean(updateF)
+
+    val newVTable = vTable.leftOuterJoin(updates).mapPartitions({ iter =>
+      iter.map { case (vid, ((vdata, pids), update)) =>
+        if (update.isDefined) {
+          val newVdata = updateF(Vertex(vid, vdata), update.get)
+          (vid, (newVdata, pids))
+        } else {
+          (vid, (vdata, pids))
+        }
+      }
+    }, preservesPartitioning = true).cache()
+
+    new GraphImpl(newVTable.partitions.length, eTable.partitions.length, null, null, newVTable, eTable)
+  }
+
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+  // Internals hidden from callers
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+
+  // TODO: Support non-hash partitioning schemes.
+  protected val vertexPartitioner = new HashPartitioner(numVertexPartitions)
+  protected val edgePartitioner = new HashPartitioner(numEdgePartitions)
+
+  /** Create a new graph but keep the current partitioning scheme. */
+  protected def newGraph[VD2: ClassManifest, ED2: ClassManifest](
+    vertices: RDD[Vertex[VD2]], edges: RDD[Edge[ED2]]): Graph[VD2, ED2] = {
+    (new GraphImpl[VD2, ED2](vertices, edges)).withPartitioner(numVertexPartitions, numEdgePartitions)
+  }
+
+  protected lazy val eTable: RDD[(Pid, EdgePartition[ED])] = {
+    if (_rawETable == null) {
+      createETable(_rawEdges, numEdgePartitions)
+    } else {
+      _rawETable
+    }
+  }
+
+  protected lazy val vTable: RDD[(Vid, (VD, Array[Pid]))] = {
+    if (_rawVTable == null) {
+      createVTable(_rawVertices, eTable, numVertexPartitions)
+    } else {
+      _rawVTable
+    }
+  }
+
+  protected lazy val vTableReplicated: RDD[(Vid, VD)] = {
+    // Join vid2pid and vTable, generate a shuffle dependency on the joined result, and get
+    // the shuffle id so we can use it on the slave.
+    vTable
+      .flatMap { case (vid, (vdata, pids)) => pids.iterator.map { pid => (pid, (vid, vdata)) } }
+      .partitionBy(edgePartitioner)
+      .mapPartitions(
+        { part => part.map { case(pid, (vid, vdata)) => (vid, vdata) } },
+        preservesPartitioning = true)
+  }
+}
+
+
+object GraphImpl {
+
+
+  protected def edgePartitionFunction1D(src: Vid, dst: Vid, numParts: Pid): Pid = {
+    val mixingPrime: Vid = 1125899906842597L 
+    (math.abs(src) * mixingPrime).toInt % numParts
+  }
+
+
+
+  /**
+   * This function implements a classic 2D-Partitioning of a sparse matrix.  
+   * Suppose we have a graph with 11 vertices that we want to partition 
+   * over 9 machines.  We can use the following sparse matrix representation:
+   *
+   *       __________________________________
+   *  v0   | P0 *     | P1       | P2    *  |      
+   *  v1   |  ****    |  *       |          |
+   *  v2   |  ******* |      **  |  ****    |
+   *  v3   |  *****   |  *  *    |       *  |   
+   *       ----------------------------------
+   *  v4   | P3 *     | P4 ***   | P5 **  * |      
+   *  v5   |  *  *    |  *       |          |
+   *  v6   |       *  |      **  |  ****    |
+   *  v7   |  * * *   |  *  *    |       *  |   
+   *       ----------------------------------
+   *  v8   | P6   *   | P7    *  | P8  *   *|      
+   *  v9   |     *    |  *    *  |          |
+   *  v10  |       *  |      **  |  *  *    |
+   *  v11  | * <-E    |  ***     |       ** |   
+   *       ----------------------------------
+   *
+   * The edge denoted by E connects v11 with v1 and is assigned to 
+   * processor P6.  To get the processor number we divide the matrix
+   * into sqrt(numProc) by sqrt(numProc) blocks.  Notice that edges
+   * adjacent to v11 can only be in the first colum of 
+   * blocks (P0, P3, P6) or the last row of blocks (P6, P7, P8).  
+   * As a consequence we can guarantee that v11 will need to be 
+   * replicated to at most 2 * sqrt(numProc) machines.
+   *
+   * Notice that P0 has many edges and as a consequence this 
+   * partitioning would lead to poor work balance.  To improve
+   * balance we first multiply each vertex id by a large prime 
+   * to effectively suffle the vertex locations. 
+   *
+   * One of the limitations of this approach is that the number of
+   * machines must either be a perfect square.  We partially address
+   * this limitation by computing the machine assignment to the next 
+   * largest perfect square and then mapping back down to the actual 
+   * number of machines.  Unfortunately, this can also lead to work 
+   * imbalance and so it is suggested that a perfect square is used. 
+   *   
+   *
+   */
+  protected def edgePartitionFunction2D(src: Vid, dst: Vid, 
+    numParts: Pid, ceilSqrtNumParts: Pid): Pid = {
+    val mixingPrime: Vid = 1125899906842597L 
+    val col: Pid = ((math.abs(src) * mixingPrime) % ceilSqrtNumParts).toInt
+    val row: Pid = ((math.abs(dst) * mixingPrime) % ceilSqrtNumParts).toInt
+    (col * ceilSqrtNumParts + row) % numParts
+  }
+
+
+  /**
+   * Create the edge table RDD, which is much more efficient for Java heap storage than the
+   * normal edges data structure (RDD[(Vid, Vid, ED)]).
+   *
+   * The edge table contains multiple partitions, and each partition contains only one RDD
+   * key-value pair: the key is the partition id, and the value is an EdgePartition object
+   * containing all the edges in a partition.
+   */
+  protected def createETable[ED: ClassManifest](edges: RDD[Edge[ED]], numPartitions: Int)
+    : RDD[(Pid, EdgePartition[ED])] = {
+      val ceilSqrt: Pid = math.ceil(math.sqrt(numPartitions)).toInt 
+
+    edges
+      .map { e =>
+        // Random partitioning based on the source vertex id.
+        // val part: Pid = edgePartitionFunction1D(e.src, e.dst, numPartitions)
+        val part: Pid = edgePartitionFunction2D(e.src, e.dst, numPartitions, ceilSqrt)
+
+        // Should we be using 3-tuple or an optimized class
+        (part, (e.src, e.dst, e.data))
+        //  (math.abs(e.src) % numPartitions, (e.src, e.dst, e.data))
+       
+      }
+      .partitionBy(new HashPartitioner(numPartitions))
+      .mapPartitionsWithIndex({ (pid, iter) =>
+        val edgePartition = new EdgePartition[ED]
+        iter.foreach { case (_, (src, dst, data)) => edgePartition.add(src, dst, data) }
+        edgePartition.trim()
+        Iterator((pid, edgePartition))
+      }, preservesPartitioning = true)
+  }
+
+  protected def createVTable[VD: ClassManifest, ED: ClassManifest](
+      vertices: RDD[Vertex[VD]],
+      eTable: RDD[(Pid, EdgePartition[ED])],
+      numPartitions: Int)
+    : RDD[(Vid, (VD, Array[Pid]))] = {
+    val partitioner = new HashPartitioner(numPartitions)
+
+    // A key-value RDD. The key is a vertex id, and the value is a list of
+    // partitions that contains edges referencing the vertex.
+    val vid2pid : RDD[(Vid, Seq[Pid])] = eTable.mapPartitions { iter =>
+      val (pid, edgePartition) = iter.next()
+      val vSet = new VertexSet
+      var i = 0
+      while (i < edgePartition.srcIds.size) {
+        vSet.add(edgePartition.srcIds.getLong(i))
+        vSet.add(edgePartition.dstIds.getLong(i))
+        i += 1
+      }
+      vSet.iterator.map { vid => (vid.toLong, pid) }
+    }.groupByKey(partitioner)
+
+    vertices
+      .map { v => (v.id, v.data) }
+      .partitionBy(partitioner)
+      .leftOuterJoin(vid2pid)
+      .mapValues {
+        case (vdata, None)       => (vdata, Array.empty[Pid])
+        case (vdata, Some(pids)) => (vdata, pids.toArray)
+      }
+  }
+}
+
diff --git a/graph/src/main/scala/org/apache/spark/graph/package.scala b/graph/src/main/scala/org/apache/spark/graph/package.scala
new file mode 100644
index 0000000000..474ace520f
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/package.scala
@@ -0,0 +1,23 @@
+package org.apache.spark
+
+package object graph {
+
+  type Vid = Long
+  type Pid = Int
+
+  type VertexHashMap[T] = it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap[T]
+  type VertexSet = it.unimi.dsi.fastutil.longs.LongOpenHashSet
+  type VertexArrayList = it.unimi.dsi.fastutil.longs.LongArrayList
+
+  /**
+   * Return the default null-like value for a data type T.
+   */
+  def nullValue[T] = null.asInstanceOf[T]
+
+
+  private[graph]
+  case class MutableTuple2[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) U,
+                           @specialized(Char, Int, Boolean, Byte, Long, Float, Double) V](
+    var _1: U, var _2: V)
+
+}
diff --git a/graph/src/main/scala/org/apache/spark/graph/perf/BagelTest.scala b/graph/src/main/scala/org/apache/spark/graph/perf/BagelTest.scala
new file mode 100644
index 0000000000..eaff27a33e
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/perf/BagelTest.scala
@@ -0,0 +1,76 @@
+///// This file creates circular dependencies between examples bagle and graph
+
+// package org.apache.spark.graph.perf
+
+// import org.apache.spark._
+// import org.apache.spark.SparkContext._
+// import org.apache.spark.bagel.Bagel
+
+// import org.apache.spark.examples.bagel
+// //import org.apache.spark.bagel.examples._
+// import org.apache.spark.graph._
+
+
+// object BagelTest {
+
+//   def main(args: Array[String]) {
+//     val host = args(0)
+//     val taskType = args(1)
+//     val fname = args(2)
+//     val options =  args.drop(3).map { arg =>
+//       arg.dropWhile(_ == '-').split('=') match {
+//         case Array(opt, v) => (opt -> v)
+//         case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
+//       }
+//     }
+
+//     System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+//     //System.setProperty("spark.shuffle.compress", "false")
+//     System.setProperty("spark.kryo.registrator", "org.apache.spark.bagel.examples.PRKryoRegistrator")
+
+//     var numIter = Int.MaxValue
+//     var isDynamic = false
+//     var tol:Float = 0.001F
+//     var outFname = ""
+//     var numVPart = 4
+//     var numEPart = 4
+
+//     options.foreach{
+//       case ("numIter", v) => numIter = v.toInt
+//       case ("dynamic", v) => isDynamic = v.toBoolean
+//       case ("tol", v) => tol = v.toFloat
+//       case ("output", v) => outFname = v
+//       case ("numVPart", v) => numVPart = v.toInt
+//       case ("numEPart", v) => numEPart = v.toInt
+//       case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+//     }
+
+//     val sc = new SparkContext(host, "PageRank(" + fname + ")")
+//     val g = GraphLoader.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache()
+//     val startTime = System.currentTimeMillis
+
+//     val numVertices = g.vertices.count()
+
+//     val vertices = g.collectNeighborIds(EdgeDirection.Out).map { case (vid, neighbors) =>
+//       (vid.toString, new PRVertex(1.0, neighbors.map(_.toString)))
+//     }
+
+//     // Do the computation
+//     val epsilon = 0.01 / numVertices
+//     val messages = sc.parallelize(Array[(String, PRMessage)]())
+//     val utils = new PageRankUtils
+//     val result =
+//         Bagel.run(
+//           sc, vertices, messages, combiner = new PRCombiner(),
+//           numPartitions = numVPart)(
+//           utils.computeWithCombiner(numVertices, epsilon, numIter))
+
+//     println("Total rank: " + result.map{ case (id, r) => r.value }.reduce(_+_) )
+//     if (!outFname.isEmpty) {
+//       println("Saving pageranks of pages to " + outFname)
+//       result.map{ case (id, r) => id + "\t" + r.value }.saveAsTextFile(outFname)
+//     }
+//     println("Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
+//     sc.stop()
+//   }
+// }
diff --git a/graph/src/main/scala/org/apache/spark/graph/perf/SparkTest.scala b/graph/src/main/scala/org/apache/spark/graph/perf/SparkTest.scala
new file mode 100644
index 0000000000..01bd968550
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/perf/SparkTest.scala
@@ -0,0 +1,75 @@
+///// This file creates circular dependencies between examples bagle and graph
+
+
+// package org.apache.spark.graph.perf
+
+// import org.apache.spark._
+// import org.apache.spark.SparkContext._
+// import org.apache.spark.bagel.Bagel
+// import org.apache.spark.bagel.examples._
+// import org.apache.spark.graph._
+
+
+// object SparkTest {
+
+//   def main(args: Array[String]) {
+//     val host = args(0)
+//     val taskType = args(1)
+//     val fname = args(2)
+//     val options =  args.drop(3).map { arg =>
+//       arg.dropWhile(_ == '-').split('=') match {
+//         case Array(opt, v) => (opt -> v)
+//         case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
+//       }
+//     }
+
+//     System.setProperty("spark.serializer", "org.apache.spark.KryoSerializer")
+//     //System.setProperty("spark.shuffle.compress", "false")
+//     System.setProperty("spark.kryo.registrator", "spark.bagel.examples.PRKryoRegistrator")
+
+//     var numIter = Int.MaxValue
+//     var isDynamic = false
+//     var tol:Float = 0.001F
+//     var outFname = ""
+//     var numVPart = 4
+//     var numEPart = 4
+
+//     options.foreach{
+//       case ("numIter", v) => numIter = v.toInt
+//       case ("dynamic", v) => isDynamic = v.toBoolean
+//       case ("tol", v) => tol = v.toFloat
+//       case ("output", v) => outFname = v
+//       case ("numVPart", v) => numVPart = v.toInt
+//       case ("numEPart", v) => numEPart = v.toInt
+//       case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+//     }
+
+//     val sc = new SparkContext(host, "PageRank(" + fname + ")")
+//     val g = GraphLoader.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache()
+//     val startTime = System.currentTimeMillis
+
+//     val numVertices = g.vertices.count()
+
+//     val vertices = g.collectNeighborIds(EdgeDirection.Out).map { case (vid, neighbors) =>
+//       (vid.toString, new PRVertex(1.0, neighbors.map(_.toString)))
+//     }
+
+//     // Do the computation
+//     val epsilon = 0.01 / numVertices
+//     val messages = sc.parallelize(Array[(String, PRMessage)]())
+//     val utils = new PageRankUtils
+//     val result =
+//         Bagel.run(
+//           sc, vertices, messages, combiner = new PRCombiner(),
+//           numPartitions = numVPart)(
+//           utils.computeWithCombiner(numVertices, epsilon, numIter))
+
+//     println("Total rank: " + result.map{ case (id, r) => r.value }.reduce(_+_) )
+//     if (!outFname.isEmpty) {
+//       println("Saving pageranks of pages to " + outFname)
+//       result.map{ case (id, r) => id + "\t" + r.value }.saveAsTextFile(outFname)
+//     }
+//     println("Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
+//     sc.stop()
+//   }
+// }
diff --git a/graph/src/main/scala/org/apache/spark/graph/util/BytecodeUtils.scala b/graph/src/main/scala/org/apache/spark/graph/util/BytecodeUtils.scala
new file mode 100644
index 0000000000..5db13fe3bc
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/util/BytecodeUtils.scala
@@ -0,0 +1,112 @@
+package org.apache.spark.util
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
+
+import scala.collection.mutable.HashSet
+
+import org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor}
+import org.objectweb.asm.Opcodes._
+
+
+
+private[spark] object BytecodeUtils {
+
+  /**
+   * Test whether the given closure invokes the specified method in the specified class.
+   */
+  def invokedMethod(closure: AnyRef, targetClass: Class[_], targetMethod: String): Boolean = {
+    if (_invokedMethod(closure.getClass, "apply", targetClass, targetMethod)) {
+      true
+    } else {
+      // look at closures enclosed in this closure
+      for (f <- closure.getClass.getDeclaredFields
+           if f.getType.getName.startsWith("scala.Function")) {
+        f.setAccessible(true)
+        if (invokedMethod(f.get(closure), targetClass, targetMethod)) {
+          return true
+        }
+      }
+      return false
+    }
+  }
+
+  private def _invokedMethod(cls: Class[_], method: String,
+      targetClass: Class[_], targetMethod: String): Boolean = {
+
+    val seen = new HashSet[(Class[_], String)]
+    var stack = List[(Class[_], String)]((cls, method))
+
+    while (stack.nonEmpty) {
+      val (c, m) = stack.head
+      stack = stack.tail
+      seen.add((c, m))
+      val finder = new MethodInvocationFinder(c.getName, m)
+      getClassReader(c).accept(finder, 0)
+      for (classMethod <- finder.methodsInvoked) {
+        //println(classMethod)
+        if (classMethod._1 == targetClass && classMethod._2 == targetMethod) {
+          return true
+        } else if (!seen.contains(classMethod)) {
+          stack = classMethod :: stack
+        }
+      }
+    }
+    return false
+  }
+
+  /**
+   * Get an ASM class reader for a given class from the JAR that loaded it.
+   */
+  private def getClassReader(cls: Class[_]): ClassReader = {
+    // Copy data over, before delegating to ClassReader - else we can run out of open file handles.
+    val className = cls.getName.replaceFirst("^.*\\.", "") + ".class"
+    val resourceStream = cls.getResourceAsStream(className)
+    // todo: Fixme - continuing with earlier behavior ...
+    if (resourceStream == null) return new ClassReader(resourceStream)
+
+    val baos = new ByteArrayOutputStream(128)
+    Utils.copyStream(resourceStream, baos, true)
+    new ClassReader(new ByteArrayInputStream(baos.toByteArray))
+  }
+
+  /**
+   * Given the class name, return whether we should look into the class or not. This is used to
+   * skip examing a large quantity of Java or Scala classes that we know for sure wouldn't access
+   * the closures. Note that the class name is expected in ASM style (i.e. use "/" instead of ".").
+   */
+  private def skipClass(className: String): Boolean = {
+    val c = className
+    c.startsWith("java/") || c.startsWith("scala/") || c.startsWith("javax/")
+  }
+
+  /**
+   * Find the set of methods invoked by the specified method in the specified class.
+   * For example, after running the visitor,
+   *   MethodInvocationFinder("spark/graph/Foo", "test")
+   * its methodsInvoked variable will contain the set of methods invoked directly by
+   * Foo.test(). Interface invocations are not returned as part of the result set because we cannot
+   * determine the actual metod invoked by inspecting the bytecode.
+   */
+  private class MethodInvocationFinder(className: String, methodName: String)
+    extends ClassVisitor(ASM4) {
+
+    val methodsInvoked = new HashSet[(Class[_], String)]
+
+    override def visitMethod(access: Int, name: String, desc: String,
+                             sig: String, exceptions: Array[String]): MethodVisitor = {
+      if (name == methodName) {
+        new MethodVisitor(ASM4) {
+          override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) {
+            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
+              if (!skipClass(owner)) {
+                methodsInvoked.add((Class.forName(owner.replace("/", ".")), name))
+              }
+            }
+          }
+        }
+      } else {
+        null
+      }
+    }
+  }
+}
diff --git a/graph/src/main/scala/org/apache/spark/graph/util/HashUtils.scala b/graph/src/main/scala/org/apache/spark/graph/util/HashUtils.scala
new file mode 100644
index 0000000000..cb18ef3d26
--- /dev/null
+++ b/graph/src/main/scala/org/apache/spark/graph/util/HashUtils.scala
@@ -0,0 +1,21 @@
+package org.apache.spark.graph.util
+
+
+object HashUtils {
+
+  /**
+   * Compute a 64-bit hash value for the given string.
+   * See http://stackoverflow.com/questions/1660501/what-is-a-good-64bit-hash-function-in-java-for-textual-strings
+   */
+  def hash(str: String): Long = {
+    var h = 1125899906842597L
+    val len = str.length
+    var i = 0
+
+    while (i < len) {
+      h = 31 * h + str(i)
+      i += 1
+    }
+    h
+  }
+}
diff --git a/graph/src/main/scala/spark/graph/Analytics.scala b/graph/src/main/scala/spark/graph/Analytics.scala
deleted file mode 100644
index 601a0785e1..0000000000
--- a/graph/src/main/scala/spark/graph/Analytics.scala
+++ /dev/null
@@ -1,793 +0,0 @@
-package spark.graph
-
-import spark._
-
-
-
-object Analytics extends Logging {
-
-//  def main(args: Array[String]) {
-//    //pregelPagerank()
-//  }
-
-  // /**
-  //  * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
-  //  */
-  // // def pagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int) = {
-  // //   // Compute the out degree of each vertex
-  // //   val pagerankGraph = graph.updateVertices[Int, (Int, Float)](graph.outDegrees,
-  // //     (vertex, deg) => (deg.getOrElse(0), 1.0F)
-  // //   )
-  // //   GraphLab.iterateGA[(Int, Float), ED, Float](pagerankGraph)(
-  // //     (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather
-  // //     (a: Float, b: Float) => a + b, // merge
-  // //     (vertex, a: Option[Float]) => (vertex.data._1, (0.15F + 0.85F * a.getOrElse(0F))), // apply
-  // //     numIter).mapVertices{ case Vertex(id, (outDeg, r)) => Vertex(id, r) }
-  // // }
-  // def pagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int) = {
-  //   // Compute the out degree of each vertex
-  //   val pagerankGraph = graph.updateVertices[Int, (Int, Double)](graph.outDegrees,
-  //     (vertex, deg) => (deg.getOrElse(0), 1.0)
-  //   )
-  //   GraphLab.iterateGA2[(Int, Double), ED, Double](pagerankGraph)(
-  //     (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather
-  //     (a: Double, b: Double) => a + b, // merge
-  //     0.0, // default
-  //     (vertex, a: Double) => (vertex.data._1, (0.15 + 0.85 * a)), // apply
-  //     numIter).mapVertices{ case Vertex(id, (outDeg, r)) => Vertex(id, r) }
-  // }
-
-  /**
-   * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
-   */
-  def pagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
-                                           numIter: Int,
-                                           resetProb: Double = 0.15) = {
-    // Compute the out degree of each vertex
-    val pagerankGraph = graph.leftJoinVertices[Int, (Int, Double)](graph.outDegrees,
-      (vertex, deg) => (deg.getOrElse(0), 1.0)
-    )
-    Pregel.iterate[(Int, Double), ED, Double](pagerankGraph)(
-      (vertex, a: Double) => (vertex.data._1, (resetProb + (1.0 - resetProb) * a)), // apply
-      (me_id, edge) => Some(edge.src.data._2 / edge.src.data._1), // gather
-      (a: Double, b: Double) => a + b, // merge
-      1.0,
-      numIter).mapVertices{ case Vertex(id, (outDeg, r)) => r }
-  }
-
-  /**
-   * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
-   */
-  def dynamicPagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
-                                                  tol: Float,
-                                                  maxIter: Int = Integer.MAX_VALUE,
-                                                  resetProb: Double = 0.15) = {
-    // Compute the out degree of each vertex
-    val pagerankGraph = graph.leftJoinVertices[Int, (Int, Double, Double)](graph.outDegrees,
-      (vertex, degIter) => (degIter.sum, 1.0, 1.0)
-    )
-
-    // Run PageRank
-    GraphLab.iterate(pagerankGraph)(
-      (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather
-      (a: Double, b: Double) => a + b,
-      (vertex, a: Option[Double]) =>
-        (vertex.data._1, (resetProb + (1.0 - resetProb) * a.getOrElse(0.0)), vertex.data._2), // apply
-      (me_id, edge) => math.abs(edge.src.data._3 - edge.src.data._2) > tol, // scatter
-      maxIter).mapVertices { case Vertex(vid, data) => data._2 }
-  }
-
-
-  /**
-   * Compute the connected component membership of each vertex
-   * and return an RDD with the vertex value containing the
-   * lowest vertex id in the connected component containing
-   * that vertex.
-   */
-  def connectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED]) = {
-    val ccGraph = graph.mapVertices { case Vertex(vid, _) => vid }
-
-    GraphLab.iterate(ccGraph)(
-      (me_id, edge) => edge.otherVertex(me_id).data, // gather
-      (a: Vid, b: Vid) => math.min(a, b), // merge
-      (v, a: Option[Vid]) => math.min(v.data, a.getOrElse(Long.MaxValue)), // apply
-      (me_id, edge) => (edge.vertex(me_id).data < edge.otherVertex(me_id).data), // scatter
-      gatherDirection = EdgeDirection.Both, scatterDirection = EdgeDirection.Both
-    )
-  }
-
-  //   /**
-  //    * Compute the shortest path to a set of markers
-  //    */
-  //   def shortestPath[VD: Manifest](graph: Graph[VD, Float], sources: List[Int], numIter: Int) = {
-  //     val sourceSet = sources.toSet
-  //     val spGraph = graph.mapVertices {
-  //       case Vertex(vid, _) => Vertex(vid, (if(sourceSet.contains(vid)) 0.0F else Float.MaxValue))
-  //     }
-  //     GraphLab.iterateGA[Float, Float, Float](spGraph)(
-  //       (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
-  //       (a: Float, b: Float) => math.min(a, b), // merge
-  //       (v, a: Option[Float]) => math.min(v.data, a.getOrElse(Float.MaxValue)), // apply
-  //       numIter,
-  //       gatherDirection = EdgeDirection.In)
-  //   }
-
-  //   // /**
-  //   //  * Compute the connected component membership of each vertex
-  //   //  * and return an RDD with the vertex value containing the
-  //   //  * lowest vertex id in the connected component containing
-  //   //  * that vertex.
-  //   //  */
-  //   // def dynamicConnectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
-  //   //   numIter: Int = Int.MaxValue) = {
-
-  //   //   val vertices = graph.vertices.mapPartitions(iter => iter.map { case (vid, _) => (vid, vid) })
-  //   //   val edges = graph.edges // .mapValues(v => None)
-  //   //   val ccGraph = new Graph(vertices, edges)
-
-  //   //   ccGraph.iterateDynamic(
-  //   //     (me_id, edge) => edge.otherVertex(me_id).data, // gather
-  //   //     (a: Int, b: Int) => math.min(a, b), // merge
-  //   //     Integer.MAX_VALUE,
-  //   //     (v, a: Int) => math.min(v.data, a), // apply
-  //   //     (me_id, edge) => edge.otherVertex(me_id).data > edge.vertex(me_id).data, // scatter
-  //   //     numIter,
-  //   //     gatherEdges = EdgeDirection.Both,
-  //   //     scatterEdges = EdgeDirection.Both).vertices
-  //   //   //
-  //   //   //    graph_ret.vertices.collect.foreach(println)
-  //   //   //    graph_ret.edges.take(10).foreach(println)
-  //   // }
-
-
-  //   // /**
-  //   //  * Compute the shortest path to a set of markers
-  //   //  */
-  //   //  def dynamicShortestPath[VD: Manifest, ED: Manifest](graph: Graph[VD, Float],
-  //   //   sources: List[Int], numIter: Int) = {
-  //   //   val sourceSet = sources.toSet
-  //   //   val vertices = graph.vertices.mapPartitions(
-  //   //     iter => iter.map {
-  //   //       case (vid, _) => (vid, (if(sourceSet.contains(vid)) 0.0F else Float.MaxValue) )
-  //   //       });
-
-  //   //   val edges = graph.edges // .mapValues(v => None)
-  //   //   val spGraph = new Graph(vertices, edges)
-
-  //   //   val niterations = Int.MaxValue
-  //   //   spGraph.iterateDynamic(
-  //   //     (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
-  //   //     (a: Float, b: Float) => math.min(a, b), // merge
-  //   //     Float.MaxValue,
-  //   //     (v, a: Float) => math.min(v.data, a), // apply
-  //   //     (me_id, edge) => edge.vertex(me_id).data + edge.data < edge.otherVertex(me_id).data, // scatter
-  //   //     numIter,
-  //   //     gatherEdges = EdgeDirection.In,
-  //   //     scatterEdges = EdgeDirection.Out).vertices
-  //   // }
-
-
-  //   // /**
-  //   //  *
-  //   //  */
-  //   // def alternatingLeastSquares[VD: ClassManifest, ED: ClassManifest](graph: Graph[VD, Double],
-  //   //   latentK: Int, lambda: Double, numIter: Int) = {
-  //   //   val vertices = graph.vertices.mapPartitions( _.map {
-  //   //       case (vid, _) => (vid,  Array.fill(latentK){ scala.util.Random.nextDouble() } )
-  //   //       }).cache
-  //   //   val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
-  //   //   val edges = graph.edges // .mapValues(v => None)
-  //   //   val alsGraph = new Graph(vertices, edges)
-  //   //   alsGraph.numVPart = graph.numVPart
-  //   //   alsGraph.numEPart = graph.numEPart
-
-  //   //   val niterations = Int.MaxValue
-  //   //   alsGraph.iterateDynamic[(Array[Double], Array[Double])](
-  //   //     (me_id, edge) => { // gather
-  //   //       val X = edge.otherVertex(me_id).data
-  //   //       val y = edge.data
-  //   //       val Xy = X.map(_ * y)
-  //   //       val XtX = (for(i <- 0 until latentK; j <- i until latentK) yield(X(i) * X(j))).toArray
-  //   //       (Xy, XtX)
-  //   //     },
-  //   //     (a, b) => {
-  //   //     // The difference between the while loop and the zip is a FACTOR OF TWO in overall
-  //   //     //  runtime
-  //   //       var i = 0
-  //   //       while(i < a._1.length) { a._1(i) += b._1(i); i += 1 }
-  //   //       i = 0
-  //   //       while(i < a._2.length) { a._2(i) += b._2(i); i += 1 }
-  //   //       a
-  //   //       // (a._1.zip(b._1).map{ case (q,r) => q+r }, a._2.zip(b._2).map{ case (q,r) => q+r })
-  //   //     },
-  //   //     (Array.empty[Double], Array.empty[Double]), // default value is empty
-  //   //     (vertex, accum) => { // apply
-  //   //       val XyArray  = accum._1
-  //   //       val XtXArray = accum._2
-  //   //       if(XyArray.isEmpty) vertex.data // no neighbors
-  //   //       else {
-  //   //         val XtX = DenseMatrix.tabulate(latentK,latentK){ (i,j) =>
-  //   //           (if(i < j) XtXArray(i + (j+1)*j/2) else XtXArray(i + (j+1)*j/2)) +
-  //   //           (if(i == j) lambda else 1.0F) //regularization
-  //   //         }
-  //   //         val Xy = DenseMatrix.create(latentK,1,XyArray)
-  //   //         val w = XtX \ Xy
-  //   //         w.data
-  //   //       }
-  //   //     },
-  //   //     (me_id, edge) => true,
-  //   //     numIter,
-  //   //     gatherEdges = EdgeDirection.Both,
-  //   //     scatterEdges = EdgeDirection.Both,
-  //   //     vertex => vertex.id < maxUser).vertices
-  //   // }
-
-  //   def main(args: Array[String]) = {
-  //     val host = args(0)
-  //     val taskType = args(1)
-  //     val fname = args(2)
-  //     val options =  args.drop(3).map { arg =>
-  //       arg.dropWhile(_ == '-').split('=') match {
-  //         case Array(opt, v) => (opt -> v)
-  //         case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
-  //       }
-  //     }
-
-  //     System.setProperty("spark.serializer", "spark.KryoSerializer")
-  //     //System.setProperty("spark.shuffle.compress", "false")
-  //     System.setProperty("spark.kryo.registrator", "spark.graph.GraphKryoRegistrator")
-
-  //     taskType match {
-  //       case "pagerank" => {
-
-  //         var numIter = Int.MaxValue
-  //         var isDynamic = false
-  //         var tol:Float = 0.001F
-  //         var outFname = ""
-  //         var numVPart = 4
-  //         var numEPart = 4
-
-  //         options.foreach{
-  //           case ("numIter", v) => numIter = v.toInt
-  //           case ("dynamic", v) => isDynamic = v.toBoolean
-  //           case ("tol", v) => tol = v.toFloat
-  //           case ("output", v) => outFname = v
-  //           case ("numVPart", v) => numVPart = v.toInt
-  //           case ("numEPart", v) => numEPart = v.toInt
-  //           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-  //         }
-
-  //         if(!isDynamic && numIter == Int.MaxValue) {
-  //           println("Set number of iterations!")
-  //           sys.exit(1)
-  //         }
-  //         println("======================================")
-  //         println("|             PageRank               |")
-  //         println("--------------------------------------")
-  //         println(" Using parameters:")
-  //         println(" \tDynamic:  " + isDynamic)
-  //         if(isDynamic) println(" \t  |-> Tolerance: " + tol)
-  //         println(" \tNumIter:  " + numIter)
-  //         println("======================================")
-
-  //         val sc = new SparkContext(host, "PageRank(" + fname + ")")
-
-  //         val graph = Graph.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache()
-
-  //         val startTime = System.currentTimeMillis
-  //         logInfo("GRAPHX: starting tasks")
-  //         logInfo("GRAPHX: Number of vertices " + graph.vertices.count)
-  //         logInfo("GRAPHX: Number of edges " + graph.edges.count)
-
-  //         val pr = Analytics.pagerank(graph, numIter)
-  //         // val pr = if(isDynamic) Analytics.dynamicPagerank(graph, tol, numIter)
-  //         //   else  Analytics.pagerank(graph, numIter)
-  //         logInfo("GRAPHX: Total rank: " + pr.vertices.map{ case Vertex(id,r) => r }.reduce(_+_) )
-  //         if (!outFname.isEmpty) {
-  //           println("Saving pageranks of pages to " + outFname)
-  //           pr.vertices.map{case Vertex(id, r) => id + "\t" + r}.saveAsTextFile(outFname)
-  //         }
-  //         logInfo("GRAPHX: Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
-  //         sc.stop()
-  //       }
-
-  //      case "cc" => {
-
-  //         var numIter = Int.MaxValue
-  //         var isDynamic = false
-
-  //         options.foreach{
-  //           case ("numIter", v) => numIter = v.toInt
-  //           case ("dynamic", v) => isDynamic = v.toBoolean
-  //           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-  //         }
-
-  //         if(!isDynamic && numIter == Int.MaxValue) {
-  //           println("Set number of iterations!")
-  //           sys.exit(1)
-  //         }
-  //         println("======================================")
-  //         println("|      Connected Components          |")
-  //         println("--------------------------------------")
-  //         println(" Using parameters:")
-  //         println(" \tDynamic:  " + isDynamic)
-  //         println(" \tNumIter:  " + numIter)
-  //         println("======================================")
-
-  //         val sc = new SparkContext(host, "ConnectedComponents(" + fname + ")")
-  //         val graph = Graph.textFile(sc, fname, a => 1.0F)
-  //         val cc = Analytics.connectedComponents(graph, numIter)
-  //         // val cc = if(isDynamic) Analytics.dynamicConnectedComponents(graph, numIter)
-  //         //   else  Analytics.connectedComponents(graph, numIter)
-  //         println("Components: " + cc.vertices.map(_.data).distinct())
-
-  //         sc.stop()
-  //       }
-
-  //      case "shortestpath" => {
-
-  //         var numIter = Int.MaxValue
-  //         var isDynamic = true
-  //         var sources: List[Int] = List.empty
-
-  //         options.foreach{
-  //           case ("numIter", v) => numIter = v.toInt
-  //           case ("dynamic", v) => isDynamic = v.toBoolean
-  //           case ("source", v) => sources ++= List(v.toInt)
-  //           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-  //         }
-
-
-  //         if(!isDynamic && numIter == Int.MaxValue) {
-  //           println("Set number of iterations!")
-  //           sys.exit(1)
-  //         }
-
-  //         if(sources.isEmpty) {
-  //           println("No sources provided!")
-  //           sys.exit(1)
-  //         }
-
-  //         println("======================================")
-  //         println("|          Shortest Path             |")
-  //         println("--------------------------------------")
-  //         println(" Using parameters:")
-  //         println(" \tDynamic:  " + isDynamic)
-  //         println(" \tNumIter:  " + numIter)
-  //         println(" \tSources:  [" + sources.mkString(", ") + "]")
-  //         println("======================================")
-
-  //         val sc = new SparkContext(host, "ShortestPath(" + fname + ")")
-  //         val graph = Graph.textFile(sc, fname, a => (if(a.isEmpty) 1.0F else a(0).toFloat ) )
-  //         val sp = Analytics.shortestPath(graph, sources, numIter)
-  //         // val cc = if(isDynamic) Analytics.dynamicShortestPath(graph, sources, numIter)
-  //         //   else  Analytics.shortestPath(graph, sources, numIter)
-  //         println("Longest Path: " + sp.vertices.map(_.data).reduce(math.max(_,_)))
-
-  //         sc.stop()
-  //       }
-
-
-  //      //  case "als" => {
-
-  //      //    var numIter = 5
-  //      //    var lambda = 0.01
-  //      //    var latentK = 10
-  //      //    var usersFname = "usersFactors.tsv"
-  //      //    var moviesFname = "moviesFname.tsv"
-  //      //    var numVPart = 4
-  //      //    var numEPart = 4
-
-  //      //    options.foreach{
-  //      //      case ("numIter", v) => numIter = v.toInt
-  //      //      case ("lambda", v) => lambda = v.toDouble
-  //      //      case ("latentK", v) => latentK = v.toInt
-  //      //      case ("usersFname", v) => usersFname = v
-  //      //      case ("moviesFname", v) => moviesFname = v
-  //      //      case ("numVPart", v) => numVPart = v.toInt
-  //      //      case ("numEPart", v) => numEPart = v.toInt
-  //      //      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-  //      //    }
-
-  //      //    println("======================================")
-  //      //    println("|       Alternating Least Squares    |")
-  //      //    println("--------------------------------------")
-  //      //    println(" Using parameters:")
-  //      //    println(" \tNumIter:     " + numIter)
-  //      //    println(" \tLambda:      " + lambda)
-  //      //    println(" \tLatentK:     " + latentK)
-  //      //    println(" \tusersFname:  " + usersFname)
-  //      //    println(" \tmoviesFname: " + moviesFname)
-  //      //    println("======================================")
-
-  //      //    val sc = new SparkContext(host, "ALS(" + fname + ")")
-  //      //    val graph = Graph.textFile(sc, fname, a => a(0).toDouble )
-  //      //    graph.numVPart = numVPart
-  //      //    graph.numEPart = numEPart
-
-  //      //    val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
-  //      //    val minMovie = graph.edges.map(_._2).reduce(math.min(_,_))
-  //      //    assert(maxUser < minMovie)
-
-  //      //    val factors = Analytics.alternatingLeastSquares(graph, latentK, lambda, numIter).cache
-  //      //    factors.filter(_._1 <= maxUser).map(r => r._1 + "\t" + r._2.mkString("\t"))
-  //      //      .saveAsTextFile(usersFname)
-  //      //    factors.filter(_._1 >= minMovie).map(r => r._1 + "\t" + r._2.mkString("\t"))
-  //      //      .saveAsTextFile(moviesFname)
-
-  //      //    sc.stop()
-  //      //  }
-
-
-  //       case _ => {
-  //         println("Invalid task type.")
-  //       }
-  //     }
-  //   }
-
-  // /**
-  //  * Compute the PageRank of a graph returning the pagerank of each vertex as an RDD
-  //  */
-  // def dynamicPagerank[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
-  //   tol: Double, maxIter: Int = 10) = {
-  //   // Compute the out degree of each vertex
-  //   val pagerankGraph = graph.updateVertices[Int, (Int, Double, Double)](graph.outDegrees,
-  //     (vertex, degIter) => (degIter.sum, 1.0, 1.0)
-  //   )
-
-  //   // Run PageRank
-  //   GraphLab.iterateGAS(pagerankGraph)(
-  //     (me_id, edge) => edge.src.data._2 / edge.src.data._1, // gather
-  //     (a: Double, b: Double) => a + b,
-  //     (vertex, a: Option[Double]) =>
-  //       (vertex.data._1, (0.15 + 0.85 * a.getOrElse(0.0)), vertex.data._2), // apply
-  //     (me_id, edge) => math.abs(edge.src.data._2 - edge.dst.data._1) > tol, // scatter
-  //     maxIter).mapVertices { case Vertex(vid, data) => Vertex(vid, data._2) }
-  // }
-
-  // /**
-  //  * Compute the connected component membership of each vertex
-  //  * and return an RDD with the vertex value containing the
-  //  * lowest vertex id in the connected component containing
-  //  * that vertex.
-  //  */
-  // def connectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int) = {
-  //   val ccGraph = graph.mapVertices { case Vertex(vid, _) => Vertex(vid, vid) }
-  //   GraphLab.iterateGA[Int, ED, Int](ccGraph)(
-  //     (me_id, edge) => edge.otherVertex(me_id).data, // gather
-  //     (a: Int, b: Int) => math.min(a, b), // merge
-  //     (v, a: Option[Int]) => math.min(v.data, a.getOrElse(Integer.MAX_VALUE)), // apply
-  //     numIter,
-  //     gatherDirection = EdgeDirection.Both)
-  // }
-
-  // /**
-  //  * Compute the shortest path to a set of markers
-  //  */
-  // def shortestPath[VD: Manifest](graph: Graph[VD, Double], sources: List[Int], numIter: Int) = {
-  //   val sourceSet = sources.toSet
-  //   val spGraph = graph.mapVertices {
-  //     case Vertex(vid, _) => Vertex(vid, (if(sourceSet.contains(vid)) 0.0 else Double.MaxValue))
-  //   }
-  //   GraphLab.iterateGA[Double, Double, Double](spGraph)(
-  //     (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
-  //     (a: Double, b: Double) => math.min(a, b), // merge
-  //     (v, a: Option[Double]) => math.min(v.data, a.getOrElse(Double.MaxValue)), // apply
-  //     numIter,
-  //     gatherDirection = EdgeDirection.In)
-  // }
-
-  // /**
-  //  * Compute the connected component membership of each vertex
-  //  * and return an RDD with the vertex value containing the
-  //  * lowest vertex id in the connected component containing
-  //  * that vertex.
-  //  */
-  // def dynamicConnectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
-  //   numIter: Int = Int.MaxValue) = {
-
-  //   val vertices = graph.vertices.mapPartitions(iter => iter.map { case (vid, _) => (vid, vid) })
-  //   val edges = graph.edges // .mapValues(v => None)
-  //   val ccGraph = new Graph(vertices, edges)
-
-  //   ccGraph.iterateDynamic(
-  //     (me_id, edge) => edge.otherVertex(me_id).data, // gather
-  //     (a: Int, b: Int) => math.min(a, b), // merge
-  //     Integer.MAX_VALUE,
-  //     (v, a: Int) => math.min(v.data, a), // apply
-  //     (me_id, edge) => edge.otherVertex(me_id).data > edge.vertex(me_id).data, // scatter
-  //     numIter,
-  //     gatherEdges = EdgeDirection.Both,
-  //     scatterEdges = EdgeDirection.Both).vertices
-  //   //
-  //   //    graph_ret.vertices.collect.foreach(println)
-  //   //    graph_ret.edges.take(10).foreach(println)
-  // }
-
-
-  // /**
-  //  * Compute the shortest path to a set of markers
-  //  */
-  //  def dynamicShortestPath[VD: Manifest, ED: Manifest](graph: Graph[VD, Double],
-  //   sources: List[Int], numIter: Int) = {
-  //   val sourceSet = sources.toSet
-  //   val vertices = graph.vertices.mapPartitions(
-  //     iter => iter.map {
-  //       case (vid, _) => (vid, (if(sourceSet.contains(vid)) 0.0F else Double.MaxValue) )
-  //       });
-
-  //   val edges = graph.edges // .mapValues(v => None)
-  //   val spGraph = new Graph(vertices, edges)
-
-  //   val niterations = Int.MaxValue
-  //   spGraph.iterateDynamic(
-  //     (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
-  //     (a: Double, b: Double) => math.min(a, b), // merge
-  //     Double.MaxValue,
-  //     (v, a: Double) => math.min(v.data, a), // apply
-  //     (me_id, edge) => edge.vertex(me_id).data + edge.data < edge.otherVertex(me_id).data, // scatter
-  //     numIter,
-  //     gatherEdges = EdgeDirection.In,
-  //     scatterEdges = EdgeDirection.Out).vertices
-  // }
-
-
-  // /**
-  //  *
-  //  */
-  // def alternatingLeastSquares[VD: ClassManifest, ED: ClassManifest](graph: Graph[VD, Double],
-  //   latentK: Int, lambda: Double, numIter: Int) = {
-  //   val vertices = graph.vertices.mapPartitions( _.map {
-  //       case (vid, _) => (vid,  Array.fill(latentK){ scala.util.Random.nextDouble() } )
-  //       }).cache
-  //   val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
-  //   val edges = graph.edges // .mapValues(v => None)
-  //   val alsGraph = new Graph(vertices, edges)
-  //   alsGraph.numVPart = graph.numVPart
-  //   alsGraph.numEPart = graph.numEPart
-
-  //   val niterations = Int.MaxValue
-  //   alsGraph.iterateDynamic[(Array[Double], Array[Double])](
-  //     (me_id, edge) => { // gather
-  //       val X = edge.otherVertex(me_id).data
-  //       val y = edge.data
-  //       val Xy = X.map(_ * y)
-  //       val XtX = (for(i <- 0 until latentK; j <- i until latentK) yield(X(i) * X(j))).toArray
-  //       (Xy, XtX)
-  //     },
-  //     (a, b) => {
-  //     // The difference between the while loop and the zip is a FACTOR OF TWO in overall
-  //     //  runtime
-  //       var i = 0
-  //       while(i < a._1.length) { a._1(i) += b._1(i); i += 1 }
-  //       i = 0
-  //       while(i < a._2.length) { a._2(i) += b._2(i); i += 1 }
-  //       a
-  //       // (a._1.zip(b._1).map{ case (q,r) => q+r }, a._2.zip(b._2).map{ case (q,r) => q+r })
-  //     },
-  //     (Array.empty[Double], Array.empty[Double]), // default value is empty
-  //     (vertex, accum) => { // apply
-  //       val XyArray  = accum._1
-  //       val XtXArray = accum._2
-  //       if(XyArray.isEmpty) vertex.data // no neighbors
-  //       else {
-  //         val XtX = DenseMatrix.tabulate(latentK,latentK){ (i,j) =>
-  //           (if(i < j) XtXArray(i + (j+1)*j/2) else XtXArray(i + (j+1)*j/2)) +
-  //           (if(i == j) lambda else 1.0F) //regularization
-  //         }
-  //         val Xy = DenseMatrix.create(latentK,1,XyArray)
-  //         val w = XtX \ Xy
-  //         w.data
-  //       }
-  //     },
-  //     (me_id, edge) => true,
-  //     numIter,
-  //     gatherEdges = EdgeDirection.Both,
-  //     scatterEdges = EdgeDirection.Both,
-  //     vertex => vertex.id < maxUser).vertices
-  // }
-
-  // def main(args: Array[String]) = {
-  //   val host = args(0)
-  //   val taskType = args(1)
-  //   val fname = args(2)
-  //   val options =  args.drop(3).map { arg =>
-  //     arg.dropWhile(_ == '-').split('=') match {
-  //       case Array(opt, v) => (opt -> v)
-  //       case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
-  //     }
-  //   }
-
-  //   System.setProperty("spark.serializer", "spark.KryoSerializer")
-  //   //System.setProperty("spark.shuffle.compress", "false")
-  //   System.setProperty("spark.kryo.registrator", "spark.graph.GraphKryoRegistrator")
-
-  //   taskType match {
-  //     case "pagerank" => {
-
-  //       var numIter = Int.MaxValue
-  //       var isDynamic = false
-  //       var tol:Double = 0.001
-  //       var outFname = ""
-  //       var numVPart = 4
-  //       var numEPart = 4
-
-  //       options.foreach{
-  //         case ("numIter", v) => numIter = v.toInt
-  //         case ("dynamic", v) => isDynamic = v.toBoolean
-  //         case ("tol", v) => tol = v.toDouble
-  //         case ("output", v) => outFname = v
-  //         case ("numVPart", v) => numVPart = v.toInt
-  //         case ("numEPart", v) => numEPart = v.toInt
-  //         case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-  //       }
-
-  //       if(!isDynamic && numIter == Int.MaxValue) {
-  //         println("Set number of iterations!")
-  //         sys.exit(1)
-  //       }
-  //       println("======================================")
-  //       println("|             PageRank               |")
-  //       println("--------------------------------------")
-  //       println(" Using parameters:")
-  //       println(" \tDynamic:  " + isDynamic)
-  //       if(isDynamic) println(" \t  |-> Tolerance: " + tol)
-  //       println(" \tNumIter:  " + numIter)
-  //       println("======================================")
-
-  //       val sc = new SparkContext(host, "PageRank(" + fname + ")")
-
-  //       val graph = Graph.textFile(sc, fname, a => 1.0).withPartitioner(numVPart, numEPart).cache()
-
-  //       val startTime = System.currentTimeMillis
-  //       logInfo("GRAPHX: starting tasks")
-  //       logInfo("GRAPHX: Number of vertices " + graph.vertices.count)
-  //       logInfo("GRAPHX: Number of edges " + graph.edges.count)
-
-  //       val pr = Analytics.pagerank(graph, numIter)
-  //       // val pr = if(isDynamic) Analytics.dynamicPagerank(graph, tol, numIter)
-  //       //   else  Analytics.pagerank(graph, numIter)
-  //       logInfo("GRAPHX: Total rank: " + pr.vertices.map{ case Vertex(id,r) => r }.reduce(_+_) )
-  //       if (!outFname.isEmpty) {
-  //         println("Saving pageranks of pages to " + outFname)
-  //         pr.vertices.map{case Vertex(id, r) => id + "\t" + r}.saveAsTextFile(outFname)
-  //       }
-  //       logInfo("GRAPHX: Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
-  //       sc.stop()
-  //     }
-
-  //    case "cc" => {
-
-  //       var numIter = Int.MaxValue
-  //       var isDynamic = false
-
-  //       options.foreach{
-  //         case ("numIter", v) => numIter = v.toInt
-  //         case ("dynamic", v) => isDynamic = v.toBoolean
-  //         case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-  //       }
-
-  //       if(!isDynamic && numIter == Int.MaxValue) {
-  //         println("Set number of iterations!")
-  //         sys.exit(1)
-  //       }
-  //       println("======================================")
-  //       println("|      Connected Components          |")
-  //       println("--------------------------------------")
-  //       println(" Using parameters:")
-  //       println(" \tDynamic:  " + isDynamic)
-  //       println(" \tNumIter:  " + numIter)
-  //       println("======================================")
-
-  //       val sc = new SparkContext(host, "ConnectedComponents(" + fname + ")")
-  //       val graph = Graph.textFile(sc, fname, a => 1.0)
-  //       val cc = Analytics.connectedComponents(graph, numIter)
-  //       // val cc = if(isDynamic) Analytics.dynamicConnectedComponents(graph, numIter)
-  //       //   else  Analytics.connectedComponents(graph, numIter)
-  //       println("Components: " + cc.vertices.map(_.data).distinct())
-
-  //       sc.stop()
-  //     }
-
-  //    case "shortestpath" => {
-
-  //       var numIter = Int.MaxValue
-  //       var isDynamic = true
-  //       var sources: List[Int] = List.empty
-
-  //       options.foreach{
-  //         case ("numIter", v) => numIter = v.toInt
-  //         case ("dynamic", v) => isDynamic = v.toBoolean
-  //         case ("source", v) => sources ++= List(v.toInt)
-  //         case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-  //       }
-
-
-  //       if(!isDynamic && numIter == Int.MaxValue) {
-  //         println("Set number of iterations!")
-  //         sys.exit(1)
-  //       }
-
-  //       if(sources.isEmpty) {
-  //         println("No sources provided!")
-  //         sys.exit(1)
-  //       }
-
-  //       println("======================================")
-  //       println("|          Shortest Path             |")
-  //       println("--------------------------------------")
-  //       println(" Using parameters:")
-  //       println(" \tDynamic:  " + isDynamic)
-  //       println(" \tNumIter:  " + numIter)
-  //       println(" \tSources:  [" + sources.mkString(", ") + "]")
-  //       println("======================================")
-
-  //       val sc = new SparkContext(host, "ShortestPath(" + fname + ")")
-  //       val graph = Graph.textFile(sc, fname, a => (if(a.isEmpty) 1.0 else a(0).toDouble ) )
-  //       val sp = Analytics.shortestPath(graph, sources, numIter)
-  //       // val cc = if(isDynamic) Analytics.dynamicShortestPath(graph, sources, numIter)
-  //       //   else  Analytics.shortestPath(graph, sources, numIter)
-  //       println("Longest Path: " + sp.vertices.map(_.data).reduce(math.max(_,_)))
-
-  //       sc.stop()
-  //     }
-
-
-  //  case "als" => {
-
-  //    var numIter = 5
-  //    var lambda = 0.01
-  //    var latentK = 10
-  //    var usersFname = "usersFactors.tsv"
-  //    var moviesFname = "moviesFname.tsv"
-  //    var numVPart = 4
-  //    var numEPart = 4
-
-  //    options.foreach{
-  //      case ("numIter", v) => numIter = v.toInt
-  //      case ("lambda", v) => lambda = v.toDouble
-  //      case ("latentK", v) => latentK = v.toInt
-  //      case ("usersFname", v) => usersFname = v
-  //      case ("moviesFname", v) => moviesFname = v
-  //      case ("numVPart", v) => numVPart = v.toInt
-  //      case ("numEPart", v) => numEPart = v.toInt
-  //      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-  //    }
-
-  //    println("======================================")
-  //    println("|       Alternating Least Squares    |")
-  //    println("--------------------------------------")
-  //    println(" Using parameters:")
-  //    println(" \tNumIter:     " + numIter)
-  //    println(" \tLambda:      " + lambda)
-  //    println(" \tLatentK:     " + latentK)
-  //    println(" \tusersFname:  " + usersFname)
-  //    println(" \tmoviesFname: " + moviesFname)
-  //    println("======================================")
-
-  //    val sc = new SparkContext(host, "ALS(" + fname + ")")
-  //    val graph = Graph.textFile(sc, fname, a => a(0).toDouble )
-  //    graph.numVPart = numVPart
-  //    graph.numEPart = numEPart
-
-  //    val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
-  //    val minMovie = graph.edges.map(_._2).reduce(math.min(_,_))
-  //    assert(maxUser < minMovie)
-
-  //    val factors = Analytics.alternatingLeastSquares(graph, latentK, lambda, numIter).cache
-  //    factors.filter(_._1 <= maxUser).map(r => r._1 + "\t" + r._2.mkString("\t"))
-  //      .saveAsTextFile(usersFname)
-  //    factors.filter(_._1 >= minMovie).map(r => r._1 + "\t" + r._2.mkString("\t"))
-  //      .saveAsTextFile(moviesFname)
-
-  //    sc.stop()
-  //  }
-
-
-  //     case _ => {
-  //       println("Invalid task type.")
-  //     }
-  //   }
-  // }
-
-}
diff --git a/graph/src/main/scala/spark/graph/Edge.scala b/graph/src/main/scala/spark/graph/Edge.scala
deleted file mode 100644
index cb057a467a..0000000000
--- a/graph/src/main/scala/spark/graph/Edge.scala
+++ /dev/null
@@ -1,13 +0,0 @@
-package spark.graph
-
-
-/**
- * A single directed edge consisting of a source id, target id,
- * and the data associated with the Edgee.
- *
- * @tparam ED type of the edge attribute
- */
-case class Edge[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED] (
-  var src: Vid = 0,
-  var dst: Vid = 0,
-  var data: ED = nullValue[ED])
diff --git a/graph/src/main/scala/spark/graph/EdgeDirection.scala b/graph/src/main/scala/spark/graph/EdgeDirection.scala
deleted file mode 100644
index 38caac44d6..0000000000
--- a/graph/src/main/scala/spark/graph/EdgeDirection.scala
+++ /dev/null
@@ -1,32 +0,0 @@
-package spark.graph
-
-
-/**
- * The direction of directed edge relative to a vertex used to select
- * the set of adjacent neighbors when running a neighborhood query.
- */
-sealed abstract class EdgeDirection {
-  def reverse: EdgeDirection = this match {
-    case EdgeDirection.In   => EdgeDirection.In
-    case EdgeDirection.Out  => EdgeDirection.Out
-    case EdgeDirection.Both => EdgeDirection.Both
-  }
-}
-
-
-object EdgeDirection {
-  /**
-   * Edges arriving at a vertex.
-   */
-  case object In extends EdgeDirection
-
-  /**
-   * Edges originating from a vertex
-   */
-  case object Out extends EdgeDirection
-
-  /**
-   * All edges adjacent to a vertex
-   */
-  case object Both extends EdgeDirection
-}
diff --git a/graph/src/main/scala/spark/graph/EdgeTriplet.scala b/graph/src/main/scala/spark/graph/EdgeTriplet.scala
deleted file mode 100644
index 3ed8052794..0000000000
--- a/graph/src/main/scala/spark/graph/EdgeTriplet.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-package spark.graph
-
-/**
- * An edge triplet represents two vertices and edge along with their attributes.
- *
- * @tparam VD the type of the vertex attribute.
- * @tparam ED the type of the edge attribute
- */
-class EdgeTriplet[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) VD,
-                  @specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED] {
-  /**
-   * The vertex (id and attribute) corresponding to the source vertex.
-   */
-  var src: Vertex[VD] = _
-
-  /**
-   * The vertex (id and attribute) corresponding to the target vertex.
-   */
-  var dst: Vertex[VD] = _
-
-  /**
-   * The attribute associated with the edge.
-   */
-  var data: ED = _
-
-  /**
-   * Given one vertex in the edge return the other vertex.
-   *
-   * @param vid the id one of the two vertices on the edge.
-   * @return the other vertex on the edge.
-   */
-  def otherVertex(vid: Vid): Vertex[VD] =
-    if (src.id == vid) dst else { assert(dst.id == vid); src }
-
-  /**
-   * Get the vertex object for the given vertex in the edge.
-   *
-   * @param vid the id of one of the two vertices on the edge
-   * @return the vertex object with that id.
-   */
-  def vertex(vid: Vid): Vertex[VD] =
-    if (src.id == vid) src else { assert(dst.id == vid); dst }
-
-  /**
-   * Return the relative direction of the edge to the corresponding vertex.
-   *
-   * @param vid the id of one of the two vertices in the edge.
-   * @return the relative direction of the edge to the corresponding vertex.
-   */
-  def relativeDirection(vid: Vid): EdgeDirection =
-    if (vid == src.id) EdgeDirection.Out else { assert(vid == dst.id); EdgeDirection.In }
-
-}
diff --git a/graph/src/main/scala/spark/graph/Graph.scala b/graph/src/main/scala/spark/graph/Graph.scala
deleted file mode 100644
index 594b3b5495..0000000000
--- a/graph/src/main/scala/spark/graph/Graph.scala
+++ /dev/null
@@ -1,394 +0,0 @@
-package spark.graph
-
-import spark.RDD
-
-
-
-/**
- * The Graph abstractly represents a graph with arbitrary objects associated
- * with vertices and edges.  The graph provides basic operations to access and
- * manipulate the data associated with vertices and edges as well as the
- * underlying structure.  Like Spark RDDs, the graph is a functional
- * data-structure in which mutating operations return new graphs.
- *
- * @tparam VD The type of object associated with each vertex.
- *
- * @tparam ED The type of object associated with each edge
- */
-abstract class Graph[VD: ClassManifest, ED: ClassManifest] {
-
-  /**
-   * Get the vertices and their data.
-   *
-   * @return An RDD containing the vertices in this graph
-   *
-   * @see Vertex for the vertex type.
-   *
-   * @todo should vertices return tuples instead of vertex objects?
-   */
-  def vertices: RDD[Vertex[VD]]
-
-  /**
-   * Get the Edges and their data as an RDD.  The entries in the RDD contain
-   * just the source id and target id along with the edge data.
-   *
-   * @return An RDD containing the edges in this graph
-   *
-   * @see Edge for the edge type.
-   * @see edgesWithVertices to get an RDD which contains all the edges along
-   * with their vertex data.
-   *
-   * @todo Should edges return 3 tuples instead of Edge objects?  In this case
-   * we could rename EdgeTriplet to Edge?
-   */
-  def edges: RDD[Edge[ED]]
-
-  /**
-   * Get the edges with the vertex data associated with the adjacent pair of
-   * vertices.
-   *
-   * @return An RDD containing edge triplets.
-   *
-   * @example This operation might be used to evaluate a graph coloring where
-   * we would like to check that both vertices are a different color.
-   * {{{
-   * type Color = Int
-   * val graph: Graph[Color, Int] = Graph.textFile("hdfs://file.tsv")
-   * val numInvalid = graph.edgesWithVertices()
-   *   .map(e => if(e.src.data == e.dst.data) 1 else 0).sum
-   * }}}
-   *
-   * @see edges() If only the edge data and adjacent vertex ids are required.
-   *
-   */
-  def triplets: RDD[EdgeTriplet[VD, ED]]
-
-  /**
-   * Return a graph that is cached when first created. This is used to pin a
-   * graph in memory enabling multiple queries to reuse the same construction
-   * process.
-   *
-   * @see RDD.cache() for a more detailed explanation of caching.
-   */
-  def cache(): Graph[VD, ED]
-
-  /**
-   * Construct a new graph where each vertex value has been transformed by the
-   * map function.
-   *
-   * @note This graph is not changed and that the new graph has the same
-   * structure.  As a consequence the underlying index structures can be
-   * reused.
-   *
-   * @param map the function from a vertex object to a new vertex value.
-   *
-   * @tparam VD2 the new vertex data type
-   *
-   * @example We might use this operation to change the vertex values from one
-   * type to another to initialize an algorithm.
-   * {{{
-   * val rawGraph: Graph[(), ()] = Graph.textFile("hdfs://file")
-   * val root = 42
-   * var bfsGraph = rawGraph
-   *   .mapVertices[Int](v => if(v.id == 0) 0 else Math.MaxValue)
-   * }}}
-   *
-   */
-  def mapVertices[VD2: ClassManifest](map: Vertex[VD] => VD2): Graph[VD2, ED]
-
-  /**
-   * Construct a new graph where each the value of each edge is transformed by
-   * the map operation.  This function is not passed the vertex value for the
-   * vertices adjacent to the edge.  If vertex values are desired use the
-   * mapEdgesWithVertices function.
-   *
-   * @note This graph is not changed and that the new graph has the same
-   * structure.  As a consequence the underlying index structures can be
-   * reused.
-   *
-   * @param map the function from an edge object to a new edge value.
-   *
-   * @tparam ED2 the new edge data type
-   *
-   * @example This function might be used to initialize edge attributes.
-   *
-   */
-  def mapEdges[ED2: ClassManifest](map: Edge[ED] => ED2): Graph[VD, ED2]
-
-  /**
-   * Construct a new graph where each the value of each edge is transformed by
-   * the map operation.  This function passes vertex values for the adjacent
-   * vertices to the map function.  If adjacent vertex values are not required,
-   * consider using the mapEdges function instead.
-   *
-   * @note This graph is not changed and that the new graph has the same
-   * structure.  As a consequence the underlying index structures can be
-   * reused.
-   *
-   * @param map the function from an edge object to a new edge value.
-   *
-   * @tparam ED2 the new edge data type
-   *
-   * @example This function might be used to initialize edge attributes based
-   * on the attributes associated with each vertex.
-   * {{{
-   * val rawGraph: Graph[Int, Int] = someLoadFunction()
-   * val graph = rawGraph.mapEdgesWithVertices[Int]( edge =>
-   *   edge.src.data - edge.dst.data)
-   * }}}
-   *
-   */
-  def mapTriplets[ED2: ClassManifest](
-    map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2]
-
-
-  /**
-   * Remove edges conntecting vertices that are not in the graph.
-   *
-   * @todo remove this function and ensure that for a graph G=(V,E):
-   *     if (u,v) in E then u in V and v in V 
-   */
-  def correctEdges(): Graph[VD, ED]
-
-  /**
-   * Construct a new graph with all the edges reversed.  If this graph contains
-   * an edge from a to b then the returned graph contains an edge from b to a.
-   *
-   */
-  def reverse: Graph[VD, ED]
-
-
-  /**
-   * This function takes a vertex and edge predicate and constructs the subgraph
-   * that consists of vertices and edges that satisfy the predict.  The resulting 
-   * graph contains the vertices and edges that satisfy:
-   *
-   * V' = {v : for all v in V where vpred(v)}
-   * E' = {(u,v): for all (u,v) in E where epred((u,v)) && vpred(u) && vpred(v)}
-   *
-   * @param epred the edge predicate which takes a triplet and evaluates to true
-   * if the edge is to remain in the subgraph.  Note that only edges in which both 
-   * vertices satisfy the vertex predicate are considered.
-   *
-   * @param vpred the vertex predicate which takes a vertex object and evaluates
-   * to true if the vertex is to be included in the subgraph
-   *
-   * @return the subgraph containing only the vertices and edges that satisfy the
-   * predicates. 
-   */
-  def subgraph(epred: EdgeTriplet[VD,ED] => Boolean = (_ => true), 
-    vpred: Vertex[VD] => Boolean = (_ => true) ): Graph[VD, ED]
-
-
-  // /**
-  //  * Combine the attrributes of edges connecting the same vertices.   
-  //  *
-  //  * @todo Do we want to support this function
-  //  */
-  // def combineEdges(reduce: (ED, ED) => ED): Graph[VD, ED]
-
-
-  /**
-   * This function is used to compute a statistic for the neighborhood of each
-   * vertex.
-   *
-   * This is one of the core functions in the Graph API in that enables
-   * neighborhood level computation.  For example this function can be used to
-   * count neighbors satisfying a predicate or implement PageRank.
-   *
-   * @note The returned RDD may contain fewer entries than their are vertices
-   * in the graph.  This is because some vertices may not have neighbors or the
-   * map function may return None for all neighbors.
-   *
-   * @param mapFunc the function applied to each edge adjacent to each vertex.
-   * The mapFunc can optionally return None in which case it does not
-   * contribute to the final sum.
-   * @param mergeFunc the function used to merge the results of each map
-   * operation.
-   * @param direction the direction of edges to consider (e.g., In, Out, Both).
-   * @tparam VD2 The returned type of the aggregation operation.
-   *
-   * @return A Spark.RDD containing tuples of vertex identifiers and thee
-   * resulting value.  Note that the returned RDD may contain fewer vertices
-   * than in the original graph since some vertices may not have neighbors or
-   * the map function could return None for all neighbors.
-   *
-   * @example We can use this function to compute the average follower age for
-   * each user
-   * {{{
-   * val graph: Graph[Int,Int] = loadGraph()
-   * val averageFollowerAge: RDD[(Int, Int)] =
-   *   graph.aggregateNeigbhros[(Int,Double)](
-   *     (vid, edge) => (edge.otherVertex(vid).data, 1),
-   *     (a, b) => (a._1 + b._1, a._2 + b._2),
-   *     EdgeDirection.In)
-   *     .mapValues{ case (sum,followers) => sum.toDouble / followers}
-   * }}}
-   *
-   */
-  def aggregateNeighbors[VD2: ClassManifest](
-      mapFunc: (Vid, EdgeTriplet[VD, ED]) => Option[VD2],
-      mergeFunc: (VD2, VD2) => VD2,
-      direction: EdgeDirection)
-    : RDD[(Vid, VD2)]
-
-
-  /**
-   * This function is used to compute a statistic for the neighborhood of each
-   * vertex and returns a value for all vertices (including those without
-   * neighbors).
-   *
-   * This is one of the core functions in the Graph API in that enables
-   * neighborhood level computation. For example this function can be used to
-   * count neighbors satisfying a predicate or implement PageRank.
-   *
-   * @note Because the a default value is provided all vertices will have a
-   * corresponding entry in the returned RDD.
-   *
-   * @param mapFunc the function applied to each edge adjacent to each vertex.
-   * The mapFunc can optionally return None in which case it does not
-   * contribute to the final sum.
-   * @param reduceFunc the function used to merge the results of each map
-   * operation.
-   * @param default the default value to use for each vertex if it has no
-   * neighbors or the map function repeatedly evaluates to none
-   * @param direction the direction of edges to consider (e.g., In, Out, Both).
-   * @tparam VD2 The returned type of the aggregation operation.
-   *
-   * @return A Spark.RDD containing tuples of vertex identifiers and
-   * their resulting value.  There will be exactly one entry for ever vertex in
-   * the original graph.
-   *
-   * @example We can use this function to compute the average follower age
-   * for each user
-   * {{{
-   * val graph: Graph[Int,Int] = loadGraph()
-   * val averageFollowerAge: RDD[(Int, Int)] =
-   *   graph.aggregateNeigbhros[(Int,Double)](
-   *     (vid, edge) => (edge.otherVertex(vid).data, 1),
-   *     (a, b) => (a._1 + b._1, a._2 + b._2),
-   *     -1,
-   *     EdgeDirection.In)
-   *     .mapValues{ case (sum,followers) => sum.toDouble / followers}
-   * }}}
-   *
-   * @todo Should this return a graph with the new vertex values?
-   *
-   */
-  def aggregateNeighbors[VD2: ClassManifest](
-      mapFunc: (Vid, EdgeTriplet[VD, ED]) => Option[VD2],
-      reduceFunc: (VD2, VD2) => VD2,
-      default: VD2, // Should this be a function or a value?
-      direction: EdgeDirection)
-    : RDD[(Vid, VD2)]
-
-
-  /**
-   * Join the vertices with an RDD and then apply a function from the the
-   * vertex and RDD entry to a new vertex value and type.  The input table should
-   * contain at most one entry for each vertex.  If no entry is provided the
-   * map function is invoked passing none.
-   *
-   * @tparam U the type of entry in the table of updates
-   * @tparam VD2 the new vertex value type
-   *
-   * @param table the table to join with the vertices in the graph.  The table
-   * should contain at most one entry for each vertex.
-   * @param mapFunc the function used to compute the new vertex values.  The
-   * map function is invoked for all vertices, even those that do not have a
-   * corresponding entry in the table.
-   *
-   * @example This function is used to update the vertices with new values
-   * based on external data.  For example we could add the out degree to each
-   * vertex record
-   * {{{
-   * val rawGraph: Graph[(),()] = Graph.textFile("webgraph")
-   * val outDeg: RDD[(Int, Int)] = rawGraph.outDegrees()
-   * val graph = rawGraph.leftJoinVertices[Int,Int](outDeg,
-   *   (v, deg) => deg.getOrElse(0) )
-   * }}}
-   *
-   * @todo Should this function be curried to enable type inference?  For
-   * example
-   * {{{
-   * graph.leftJoinVertices(tbl)( (v, row) => row.getOrElse(0) )
-   * }}}
-   * @todo Is leftJoinVertices the right name?
-   */
-  def leftJoinVertices[U: ClassManifest, VD2: ClassManifest](
-      table: RDD[(Vid, U)],
-      mapFunc: (Vertex[VD], Option[U]) => VD2)
-    : Graph[VD2, ED]
-
-  /**
-   * Join the vertices with an RDD and then apply a function from the the
-   * vertex and RDD entry to a new vertex value.  The input table should
-   * contain at most one entry for each vertex.  If no entry is provided the
-   * map function is skipped and the old value is used.
-   *
-   * @tparam U the type of entry in the table of updates
-   * @param table the table to join with the vertices in the graph.  The table
-   * should contain at most one entry for each vertex.
-   * @param mapFunc the function used to compute the new vertex values.  The
-   * map function is invoked only for vertices with a corresponding entry in
-   * the table otherwise the old vertex value is used.
-   *
-   * @note for small tables this function can be much more efficient than
-   * leftJoinVertices
-   *
-   * @example This function is used to update the vertices with new values
-   * based on external data.  For example we could add the out degree to each
-   * vertex record
-   * {{{
-   * val rawGraph: Graph[Int,()] = Graph.textFile("webgraph")
-   *   .mapVertices(v => 0)
-   * val outDeg: RDD[(Int, Int)] = rawGraph.outDegrees()
-   * val graph = rawGraph.leftJoinVertices[Int,Int](outDeg,
-   *   (v, deg) => deg )
-   * }}}
-   *
-   * @todo Should this function be curried to enable type inference?  For
-   * example
-   * {{{
-   * graph.joinVertices(tbl)( (v, row) => row )
-   * }}}
-   */
-  def joinVertices[U: ClassManifest](
-      table: RDD[(Vid, U)],
-      mapFunc: (Vertex[VD], U) => VD)
-    : Graph[VD, ED]
-
-  // Save a copy of the GraphOps object so there is always one unique GraphOps object
-  // for a given Graph object, and thus the lazy vals in GraphOps would work as intended.
-  val ops = new GraphOps(this)
-}
-
-
-object Graph {
-
-  import spark.graph.impl._
-  import spark.SparkContext._
-
-  def apply(rawEdges: RDD[(Vid, Vid)], uniqueEdges: Boolean = true): Graph[Int, Int] = {
-    // Reduce to unique edges.
-    val edges: RDD[Edge[Int]] =
-      if (uniqueEdges) {
-        rawEdges.map((_, 1)).reduceByKey(_ + _).map { case ((s, t), cnt) => Edge(s, t, cnt) }
-      } else {
-        rawEdges.map { case (s, t) => Edge(s, t, 1) }
-      }
-    // Determine unique vertices
-    val vertices: RDD[Vertex[Int]] = edges.flatMap{ case Edge(s, t, cnt) => Array((s, 1), (t, 1)) }
-      .reduceByKey(_ + _)
-      .map{ case (id, deg) => Vertex(id, deg) }
-    // Return graph
-    new GraphImpl(vertices, edges)
-  }
-
-  def apply[VD: ClassManifest, ED: ClassManifest](
-      vertices: RDD[Vertex[VD]], edges: RDD[Edge[ED]]): Graph[VD, ED] = {
-    new GraphImpl(vertices, edges)
-  }
-
-  implicit def graphToGraphOps[VD: ClassManifest, ED: ClassManifest](g: Graph[VD, ED]) = g.ops
-}
diff --git a/graph/src/main/scala/spark/graph/GraphKryoRegistrator.scala b/graph/src/main/scala/spark/graph/GraphKryoRegistrator.scala
deleted file mode 100644
index e1cb77f114..0000000000
--- a/graph/src/main/scala/spark/graph/GraphKryoRegistrator.scala
+++ /dev/null
@@ -1,24 +0,0 @@
-package spark.graph
-
-import com.esotericsoftware.kryo.Kryo
-
-import spark.KryoRegistrator
-
-
-class GraphKryoRegistrator extends KryoRegistrator {
-
-  def registerClasses(kryo: Kryo) {
-    //kryo.register(classOf[(Int, Float, Float)])
-    registerClass[Int, Int, Int](kryo)
-
-    // This avoids a large number of hash table lookups.
-    kryo.setReferences(false)
-  }
-
-  private def registerClass[VD: Manifest, ED: Manifest, VD2: Manifest](kryo: Kryo) {
-    kryo.register(classOf[Vertex[VD]])
-    kryo.register(classOf[Edge[ED]])
-    kryo.register(classOf[MutableTuple2[VD, VD2]])
-    kryo.register(classOf[(Vid, VD2)])
-  }
-}
diff --git a/graph/src/main/scala/spark/graph/GraphLab.scala b/graph/src/main/scala/spark/graph/GraphLab.scala
deleted file mode 100644
index f89c2a39d7..0000000000
--- a/graph/src/main/scala/spark/graph/GraphLab.scala
+++ /dev/null
@@ -1,127 +0,0 @@
-package spark.graph
-
-import scala.collection.JavaConversions._
-import spark.RDD
-
-/**
- * This object implement the graphlab gather-apply-scatter api.
- */
-object GraphLab {
-
-  /**
-   * Execute the GraphLab Gather-Apply-Scatter API
-   *
-   * @todo finish documenting GraphLab Gather-Apply-Scatter API
-   *
-   * @param graph The graph on which to execute the GraphLab API
-   * @param gatherFunc The gather function is executed on each edge triplet
-   *                   adjacent to a vertex and returns an accumulator which
-   *                   is then merged using the merge function.
-   * @param mergeFunc An accumulative associative operation on the result of
-   *                  the gather type.
-   * @param applyFunc Takes a vertex and the final result of the merge operations
-   *                  on the adjacent edges and returns a new vertex value.
-   * @param scatterFunc Executed after the apply function the scatter function takes
-   *                    a triplet and signals whether the neighboring vertex program
-   *                    must be recomputed.
-   * @param numIter The maximum number of iterations to run.
-   * @param gatherDirection The direction of edges to consider during the gather phase
-   * @param scatterDirection The direction of edges to consider during the scatter phase
-   *
-   * @tparam VD The graph vertex attribute type
-   * @tparam ED The graph edge attribute type
-   * @tparam A The type accumulated during the gather phase
-   * @return the resulting graph after the algorithm converges
-   */
-  def iterate[VD: ClassManifest, ED: ClassManifest, A: ClassManifest](graph: Graph[VD, ED])(
-    gatherFunc: (Vid, EdgeTriplet[VD, ED]) => A,
-    mergeFunc: (A, A) => A,
-    applyFunc: (Vertex[VD], Option[A]) => VD,
-    scatterFunc: (Vid, EdgeTriplet[VD, ED]) => Boolean,
-    numIter: Int = Integer.MAX_VALUE,
-    gatherDirection: EdgeDirection = EdgeDirection.In,
-    scatterDirection: EdgeDirection = EdgeDirection.Out): Graph[VD, ED] = {
-
-
-    // Add an active attribute to all vertices to track convergence.
-    var activeGraph = graph.mapVertices {
-      case Vertex(id, data) => (true, data)
-    }.cache()
-
-    // The gather function wrapper strips the active attribute and
-    // only invokes the gather function on active vertices
-    def gather(vid: Vid, e: EdgeTriplet[(Boolean, VD), ED]) = {
-      if (e.vertex(vid).data._1) {
-        val edge = new EdgeTriplet[VD,ED]
-        edge.src = Vertex(e.src.id, e.src.data._2)
-        edge.dst = Vertex(e.dst.id, e.dst.data._2)
-        edge.data = e.data
-        Some(gatherFunc(vid, edge))
-      } else {
-        None
-      }
-    }
-
-    // The apply function wrapper strips the vertex of the active attribute
-    // and only invokes the apply function on active vertices
-    def apply(v: Vertex[(Boolean, VD)], accum: Option[A]) = {
-      if (v.data._1) (true, applyFunc(Vertex(v.id, v.data._2), accum))
-      else (false, v.data._2)
-    }
-
-    // The scatter function wrapper strips the vertex of the active attribute
-    // and only invokes the scatter function on active vertices
-    def scatter(rawVid: Vid, e: EdgeTriplet[(Boolean, VD), ED]) = {
-      val vid = e.otherVertex(rawVid).id
-      if (e.vertex(vid).data._1) {
-        val edge = new EdgeTriplet[VD,ED]
-        edge.src = Vertex(e.src.id, e.src.data._2)
-        edge.dst = Vertex(e.dst.id, e.dst.data._2)
-        edge.data = e.data
-//        val src = Vertex(e.src.id, e.src.data._2)
-//        val dst = Vertex(e.dst.id, e.dst.data._2)
-//        val edge = new EdgeTriplet[VD,ED](src, dst, e.data)
-        Some(scatterFunc(vid, edge))
-      } else {
-        None
-      }
-    }
-
-    // Used to set the active status of vertices for the next round
-    def applyActive(v: Vertex[(Boolean, VD)], accum: Option[Boolean]) =
-      (accum.getOrElse(false), v.data._2)
-
-    // Main Loop ---------------------------------------------------------------------
-    var i = 0
-    var numActive = activeGraph.numVertices
-    while (i < numIter && numActive > 0) {
-
-      val accUpdates: RDD[(Vid, A)] =
-        activeGraph.aggregateNeighbors(gather, mergeFunc, gatherDirection)
-
-      activeGraph = activeGraph.leftJoinVertices(accUpdates, apply).cache()
-
-      // Scatter is basically a gather in the opposite direction so we reverse the edge direction
-      val activeVertices: RDD[(Vid, Boolean)] =
-        activeGraph.aggregateNeighbors(scatter, _ || _, scatterDirection.reverse)
-
-      activeGraph = activeGraph.leftJoinVertices(activeVertices, applyActive).cache()
-
-      numActive = activeGraph.vertices.map(v => if (v.data._1) 1 else 0).reduce(_ + _)
-      println("Number active vertices: " + numActive)
-      i += 1
-    }
-
-    // Remove the active attribute from the vertex data before returning the graph
-    activeGraph.mapVertices(v => v.data._2)
-  }
-}
-
-
-
-
-
-
-
-
-
diff --git a/graph/src/main/scala/spark/graph/GraphLoader.scala b/graph/src/main/scala/spark/graph/GraphLoader.scala
deleted file mode 100644
index 7e1a054413..0000000000
--- a/graph/src/main/scala/spark/graph/GraphLoader.scala
+++ /dev/null
@@ -1,54 +0,0 @@
-package spark.graph
-
-import spark.RDD
-import spark.SparkContext
-import spark.SparkContext._
-import spark.graph.impl.GraphImpl
-
-
-object GraphLoader {
-
-  /**
-   * Load an edge list from file initializing the Graph RDD
-   */
-  def textFile[ED: ClassManifest](
-      sc: SparkContext,
-      path: String,
-      edgeParser: Array[String] => ED,
-      minEdgePartitions: Int = 1,
-      minVertexPartitions: Int = 1)
-    : GraphImpl[Int, ED] = {
-
-    // Parse the edge data table
-    val edges = sc.textFile(path).flatMap { line =>
-      if (!line.isEmpty && line(0) != '#') {
-        val lineArray = line.split("\\s+")
-        if(lineArray.length < 2) {
-          println("Invalid line: " + line)
-          assert(false)
-        }
-        val source = lineArray(0)
-        val target = lineArray(1)
-        val tail = lineArray.drop(2)
-        val edata = edgeParser(tail)
-        Array(Edge(source.trim.toInt, target.trim.toInt, edata))
-      } else {
-        Array.empty[Edge[ED]]
-      }
-    }.cache()
-
-    val graph = fromEdges(edges)
-    // println("Loaded graph:" +
-    //   "\n\t#edges:    " + graph.numEdges +
-    //   "\n\t#vertices: " + graph.numVertices)
-
-    graph
-  }
-
-  def fromEdges[ED: ClassManifest](edges: RDD[Edge[ED]]): GraphImpl[Int, ED] = {
-    val vertices = edges.flatMap { edge => List((edge.src, 1), (edge.dst, 1)) }
-      .reduceByKey(_ + _)
-      .map{ case (vid, degree) => Vertex(vid, degree) }
-    new GraphImpl[Int, ED](vertices, edges)
-  }
-}
diff --git a/graph/src/main/scala/spark/graph/GraphOps.scala b/graph/src/main/scala/spark/graph/GraphOps.scala
deleted file mode 100644
index d98cd8d44c..0000000000
--- a/graph/src/main/scala/spark/graph/GraphOps.scala
+++ /dev/null
@@ -1,30 +0,0 @@
-package spark.graph
-
-import spark.RDD
-
-
-class GraphOps[VD: ClassManifest, ED: ClassManifest](g: Graph[VD, ED]) {
-
-  lazy val numEdges: Long = g.edges.count()
-
-  lazy val numVertices: Long = g.vertices.count()
-
-  lazy val inDegrees: RDD[(Vid, Int)] = {
-    g.aggregateNeighbors((vid, edge) => Some(1), _+_, EdgeDirection.In)
-  }
-
-  lazy val outDegrees: RDD[(Vid, Int)] = {
-    g.aggregateNeighbors((vid, edge) => Some(1), _+_, EdgeDirection.Out)
-  }
-
-  lazy val degrees: RDD[(Vid, Int)] = {
-    g.aggregateNeighbors((vid, edge) => Some(1), _+_, EdgeDirection.Both)
-  }
-
-  def collectNeighborIds(edgeDirection: EdgeDirection) : RDD[(Vid, Array[Vid])] = {
-    g.aggregateNeighbors(
-      (vid, edge) => Some(Array(edge.otherVertex(vid).id)),
-      (a, b) => a ++ b,
-      edgeDirection)
-  }
-}
diff --git a/graph/src/main/scala/spark/graph/Pregel.scala b/graph/src/main/scala/spark/graph/Pregel.scala
deleted file mode 100644
index 0a564b8041..0000000000
--- a/graph/src/main/scala/spark/graph/Pregel.scala
+++ /dev/null
@@ -1,36 +0,0 @@
-package spark.graph
-
-import spark.RDD
-
-
-object Pregel {
-
-  def iterate[VD: ClassManifest, ED: ClassManifest, A: ClassManifest](graph: Graph[VD, ED])(
-      vprog: (Vertex[VD], A) => VD,
-      sendMsg: (Vid, EdgeTriplet[VD, ED]) => Option[A],
-      mergeMsg: (A, A) => A,
-      initialMsg: A,
-      numIter: Int)
-    : Graph[VD, ED] = {
-
-    var g = graph
-    //var g = graph.cache()
-    var i = 0
-
-    def mapF(vid: Vid, edge: EdgeTriplet[VD,ED]) = sendMsg(edge.otherVertex(vid).id, edge)
-
-    def runProg(v: Vertex[VD], msg: Option[A]): VD = {
-      if (msg.isEmpty) v.data else vprog(v, msg.get)
-    }
-
-    var msgs: RDD[(Vid, A)] = g.vertices.map{ v => (v.id, initialMsg) }
-
-    while (i < numIter) {
-      g = g.leftJoinVertices(msgs, runProg).cache()
-      msgs = g.aggregateNeighbors(mapF, mergeMsg, EdgeDirection.In)
-      i += 1
-    }
-    g
-  }
-
-}
diff --git a/graph/src/main/scala/spark/graph/Vertex.scala b/graph/src/main/scala/spark/graph/Vertex.scala
deleted file mode 100644
index 32653571f7..0000000000
--- a/graph/src/main/scala/spark/graph/Vertex.scala
+++ /dev/null
@@ -1,15 +0,0 @@
-package spark.graph
-
-/**
- * A graph vertex consists of a vertex id and attribute.
- *
- * @tparam VD the type of the vertex attribute.
- */
-case class Vertex[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) VD] (
-  var id: Vid = 0,
-  var data: VD = nullValue[VD]) {
-
-  def this(tuple: (Vid, VD)) = this(tuple._1, tuple._2)
-
-  def tuple = (id, data)
-}
diff --git a/graph/src/main/scala/spark/graph/impl/EdgePartition.scala b/graph/src/main/scala/spark/graph/impl/EdgePartition.scala
deleted file mode 100644
index 4e0d5f41b9..0000000000
--- a/graph/src/main/scala/spark/graph/impl/EdgePartition.scala
+++ /dev/null
@@ -1,53 +0,0 @@
-package spark.graph.impl
-
-import scala.collection.mutable.ArrayBuilder
-
-import it.unimi.dsi.fastutil.ints.IntArrayList
-
-import spark.graph._
-
-
-/**
- * A partition of edges in 3 large columnar arrays.
- */
-private[graph]
-class EdgePartition[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED: ClassManifest] {
-
-  private var _data: Array[ED] = _
-  private var _dataBuilder = ArrayBuilder.make[ED]
-
-  val srcIds = new VertexArrayList
-  val dstIds = new VertexArrayList
-
-  def data: Array[ED] = _data
-
-  /** Add a new edge to the partition. */
-  def add(src: Vid, dst: Vid, d: ED) {
-    srcIds.add(src)
-    dstIds.add(dst)
-    _dataBuilder += d
-  }
-
-  def trim() {
-    srcIds.trim()
-    dstIds.trim()
-    _data = _dataBuilder.result()
-  }
-
-  def size: Int = srcIds.size
-
-  def iterator = new Iterator[Edge[ED]] {
-    private val edge = new Edge[ED]
-    private var pos = 0
-
-    override def hasNext: Boolean = pos < EdgePartition.this.size
-
-    override def next(): Edge[ED] = {
-      edge.src = srcIds.get(pos)
-      edge.dst = dstIds.get(pos)
-      edge.data = _data(pos)
-      pos += 1
-      edge
-    }
-  }
-}
diff --git a/graph/src/main/scala/spark/graph/impl/EdgeTripletRDD.scala b/graph/src/main/scala/spark/graph/impl/EdgeTripletRDD.scala
deleted file mode 100644
index f6de8e59af..0000000000
--- a/graph/src/main/scala/spark/graph/impl/EdgeTripletRDD.scala
+++ /dev/null
@@ -1,81 +0,0 @@
-package spark.graph.impl
-
-import spark.{Aggregator, HashPartitioner, Partition, RDD, SparkEnv, TaskContext}
-import spark.{Dependency, OneToOneDependency, ShuffleDependency}
-import spark.SparkContext._
-import spark.graph._
-
-
-private[graph]
-class EdgeTripletPartition(idx: Int, val vPart: Partition, val ePart: Partition)
-  extends Partition {
-  override val index: Int = idx
-  override def hashCode(): Int = idx
-}
-
-
-/**
- * A RDD that brings together edge data with its associated vertex data.
- */
-private[graph]
-class EdgeTripletRDD[VD: ClassManifest, ED: ClassManifest](
-    vTableReplicated: RDD[(Vid, VD)],
-    eTable: RDD[(Pid, EdgePartition[ED])])
-  extends RDD[(VertexHashMap[VD], Iterator[EdgeTriplet[VD, ED]])](eTable.context, Nil) {
-
-  println(vTableReplicated.partitioner.get.numPartitions)
-  println(eTable.partitioner.get.numPartitions)
-
-  assert(vTableReplicated.partitioner == eTable.partitioner)
-
-  override def getDependencies: List[Dependency[_]] = {
-    List(new OneToOneDependency(eTable), new OneToOneDependency(vTableReplicated))
-  }
-
-  override def getPartitions = Array.tabulate[Partition](eTable.partitions.size) {
-    i => new EdgeTripletPartition(i, eTable.partitions(i), vTableReplicated.partitions(i))
-  }
-
-  override val partitioner = eTable.partitioner
-
-  override def getPreferredLocations(s: Partition) =
-    eTable.preferredLocations(s.asInstanceOf[EdgeTripletPartition].ePart)
-
-  override def compute(s: Partition, context: TaskContext)
-    : Iterator[(VertexHashMap[VD], Iterator[EdgeTriplet[VD, ED]])] = {
-
-    val split = s.asInstanceOf[EdgeTripletPartition]
-
-    // Fetch the vertices and put them in a hashmap.
-    // TODO: use primitive hashmaps for primitive VD types.
-    val vmap = new VertexHashMap[VD]//(1000000)
-    vTableReplicated.iterator(split.vPart, context).foreach { v => vmap.put(v._1, v._2) }
-
-    val (pid, edgePartition) = eTable.iterator(split.ePart, context).next()
-      .asInstanceOf[(Pid, EdgePartition[ED])]
-
-    // Return an iterator that looks up the hash map to find matching vertices for each edge.
-    val iter = new Iterator[EdgeTriplet[VD, ED]] {
-      private var pos = 0
-      private val e = new EdgeTriplet[VD, ED]
-      e.src = new Vertex[VD]
-      e.dst = new Vertex[VD]
-
-      override def hasNext: Boolean = pos < edgePartition.size
-      override def next() = {
-        e.src.id = edgePartition.srcIds.getLong(pos)
-        // assert(vmap.containsKey(e.src.id))
-        e.src.data = vmap.get(e.src.id)
-
-        e.dst.id = edgePartition.dstIds.getLong(pos)
-        // assert(vmap.containsKey(e.dst.id))
-        e.dst.data = vmap.get(e.dst.id)
-
-        e.data = edgePartition.data(pos)
-        pos += 1
-        e
-      }
-    }
-    Iterator((vmap, iter))
-  }
-}
diff --git a/graph/src/main/scala/spark/graph/impl/GraphImpl.scala b/graph/src/main/scala/spark/graph/impl/GraphImpl.scala
deleted file mode 100644
index 08fc016a43..0000000000
--- a/graph/src/main/scala/spark/graph/impl/GraphImpl.scala
+++ /dev/null
@@ -1,437 +0,0 @@
-package spark.graph.impl
-
-import scala.collection.JavaConversions._
-
-import spark.{ClosureCleaner, Partitioner, HashPartitioner, RDD}
-import spark.SparkContext._
-
-import spark.graph._
-import spark.graph.impl.GraphImpl._
-
-
-
-
-
-/**
- * A Graph RDD that supports computation on graphs.
- */
-class GraphImpl[VD: ClassManifest, ED: ClassManifest] protected (
-    val numVertexPartitions: Int,
-    val numEdgePartitions: Int,
-    _rawVertices: RDD[Vertex[VD]],
-    _rawEdges: RDD[Edge[ED]],
-    _rawVTable: RDD[(Vid, (VD, Array[Pid]))],
-    _rawETable: RDD[(Pid, EdgePartition[ED])])
-  extends Graph[VD, ED] {
-
-  def this(vertices: RDD[Vertex[VD]], edges: RDD[Edge[ED]]) = {
-    this(vertices.partitions.size, edges.partitions.size, vertices, edges, null, null)
-  }
-
-  def withPartitioner(numVertexPartitions: Int, numEdgePartitions: Int): Graph[VD, ED] = {
-    if (_cached) {
-      new GraphImpl(numVertexPartitions, numEdgePartitions, null, null, _rawVTable, _rawETable)
-        .cache()
-    } else {
-      new GraphImpl(numVertexPartitions, numEdgePartitions, _rawVertices, _rawEdges, null, null)
-    }
-  }
-
-  def withVertexPartitioner(numVertexPartitions: Int) = {
-    withPartitioner(numVertexPartitions, numEdgePartitions)
-  }
-
-  def withEdgePartitioner(numEdgePartitions: Int) = {
-    withPartitioner(numVertexPartitions, numEdgePartitions)
-  }
-
-  protected var _cached = false
-
-  override def cache(): Graph[VD, ED] = {
-    eTable.cache()
-    vTable.cache()
-    _cached = true
-    this
-  }
-
-  override def reverse: Graph[VD, ED] = {
-    newGraph(vertices, edges.map{ case Edge(s, t, e) => Edge(t, s, e) })
-  }
-
-  /** Return a RDD of vertices. */
-  override def vertices: RDD[Vertex[VD]] = {
-    if (!_cached && _rawVertices != null) {
-      _rawVertices
-    } else {
-      vTable.map { case(vid, (data, pids)) => new Vertex(vid, data) }
-    }
-  }
-
-  /** Return a RDD of edges. */
-  override def edges: RDD[Edge[ED]] = {
-    if (!_cached && _rawEdges != null) {
-      _rawEdges
-    } else {
-      eTable.mapPartitions { iter => iter.next()._2.iterator }
-    }
-  }
-
-  /** Return a RDD that brings edges with its source and destination vertices together. */
-  override def triplets: RDD[EdgeTriplet[VD, ED]] = {
-    new EdgeTripletRDD(vTableReplicated, eTable).mapPartitions { part => part.next()._2 }
-  }
-
-  override def mapVertices[VD2: ClassManifest](f: Vertex[VD] => VD2): Graph[VD2, ED] = {
-    newGraph(vertices.map(v => Vertex(v.id, f(v))), edges)
-  }
-
-  override def mapEdges[ED2: ClassManifest](f: Edge[ED] => ED2): Graph[VD, ED2] = {
-    newGraph(vertices, edges.map(e => Edge(e.src, e.dst, f(e))))
-  }
-
-  override def mapTriplets[ED2: ClassManifest](f: EdgeTriplet[VD, ED] => ED2):
-    Graph[VD, ED2] = {
-    newGraph(vertices, triplets.map(e => Edge(e.src.id, e.dst.id, f(e))))
-  }
-
-  override def correctEdges(): Graph[VD, ED] = {
-    val sc = vertices.context
-    val vset = sc.broadcast(vertices.map(_.id).collect().toSet)
-    val newEdges = edges.filter(e => vset.value.contains(e.src) && vset.value.contains(e.dst))
-    Graph(vertices, newEdges)
-  }
-
-
-  override def subgraph(epred: EdgeTriplet[VD,ED] => Boolean = (_ => true), 
-    vpred: Vertex[VD] => Boolean = (_ => true) ): Graph[VD, ED] = {
-
-    // Restrict the set of vertices to those that satisfy the vertex predicate
-    val newVertices = vertices.filter(vpred)
-    // Restrict the set of edges to those that satisfy the vertex and the edge predicate.
-    val newEdges = triplets.filter(t => vpred(t.src) && vpred(t.dst) && epred(t))
-      .map( t => Edge(t.src.id, t.dst.id, t.data) )
-
-    new GraphImpl(newVertices, newEdges)
-  }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-  // Lower level transformation methods
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-
-  override def aggregateNeighbors[VD2: ClassManifest](
-      mapFunc: (Vid, EdgeTriplet[VD, ED]) => Option[VD2],
-      reduceFunc: (VD2, VD2) => VD2,
-      default: VD2,
-      gatherDirection: EdgeDirection)
-    : RDD[(Vid, VD2)] = {
-
-    ClosureCleaner.clean(mapFunc)
-    ClosureCleaner.clean(reduceFunc)
-
-    val newVTable = vTableReplicated.mapPartitions({ part =>
-        part.map { v => (v._1, MutableTuple2(v._2, Option.empty[VD2])) }
-      }, preservesPartitioning = true)
-
-    new EdgeTripletRDD[MutableTuple2[VD, Option[VD2]], ED](newVTable, eTable)
-      .mapPartitions { part =>
-        val (vmap, edges) = part.next()
-        val edgeSansAcc = new EdgeTriplet[VD, ED]()
-        edgeSansAcc.src = new Vertex[VD]
-        edgeSansAcc.dst = new Vertex[VD]
-        edges.foreach { e: EdgeTriplet[MutableTuple2[VD, Option[VD2]], ED] =>
-          edgeSansAcc.data = e.data
-          edgeSansAcc.src.data = e.src.data._1
-          edgeSansAcc.dst.data = e.dst.data._1
-          edgeSansAcc.src.id = e.src.id
-          edgeSansAcc.dst.id = e.dst.id
-          if (gatherDirection == EdgeDirection.In || gatherDirection == EdgeDirection.Both) {
-            e.dst.data._2 =
-              if (e.dst.data._2.isEmpty) {
-                mapFunc(edgeSansAcc.dst.id, edgeSansAcc)
-              } else {
-                val tmp = mapFunc(edgeSansAcc.dst.id, edgeSansAcc)
-                if (!tmp.isEmpty) Some(reduceFunc(e.dst.data._2.get, tmp.get)) else e.dst.data._2
-              }
-          }
-          if (gatherDirection == EdgeDirection.Out || gatherDirection == EdgeDirection.Both) {
-            e.dst.data._2 =
-              if (e.dst.data._2.isEmpty) {
-                mapFunc(edgeSansAcc.src.id, edgeSansAcc)
-              } else {
-                val tmp = mapFunc(edgeSansAcc.src.id, edgeSansAcc)
-                if (!tmp.isEmpty) Some(reduceFunc(e.src.data._2.get, tmp.get)) else e.src.data._2
-              }
-          }
-        }
-        vmap.long2ObjectEntrySet().fastIterator().filter(!_.getValue()._2.isEmpty).map{ entry =>
-          (entry.getLongKey(), entry.getValue()._2)
-        }
-      }
-      .map{ case (vid, aOpt) => (vid, aOpt.get) }
-      .combineByKey((v: VD2) => v, reduceFunc, null, vertexPartitioner, false)
-  }
-
-  /**
-   * Same as aggregateNeighbors but map function can return none and there is no default value.
-   * As a consequence, the resulting table may be much smaller than the set of vertices.
-   */
-  override def aggregateNeighbors[VD2: ClassManifest](
-    mapFunc: (Vid, EdgeTriplet[VD, ED]) => Option[VD2],
-    reduceFunc: (VD2, VD2) => VD2,
-    gatherDirection: EdgeDirection): RDD[(Vid, VD2)] = {
-
-    ClosureCleaner.clean(mapFunc)
-    ClosureCleaner.clean(reduceFunc)
-
-    val newVTable = vTableReplicated.mapPartitions({ part =>
-        part.map { v => (v._1, MutableTuple2(v._2, Option.empty[VD2])) }
-      }, preservesPartitioning = true)
-
-    new EdgeTripletRDD[MutableTuple2[VD, Option[VD2]], ED](newVTable, eTable)
-      .mapPartitions { part =>
-        val (vmap, edges) = part.next()
-        val edgeSansAcc = new EdgeTriplet[VD, ED]()
-        edgeSansAcc.src = new Vertex[VD]
-        edgeSansAcc.dst = new Vertex[VD]
-        edges.foreach { e: EdgeTriplet[MutableTuple2[VD, Option[VD2]], ED] =>
-          edgeSansAcc.data = e.data
-          edgeSansAcc.src.data = e.src.data._1
-          edgeSansAcc.dst.data = e.dst.data._1
-          edgeSansAcc.src.id = e.src.id
-          edgeSansAcc.dst.id = e.dst.id
-          if (gatherDirection == EdgeDirection.In || gatherDirection == EdgeDirection.Both) {
-            e.dst.data._2 =
-              if (e.dst.data._2.isEmpty) {
-                mapFunc(edgeSansAcc.dst.id, edgeSansAcc)
-              } else {
-                val tmp = mapFunc(edgeSansAcc.dst.id, edgeSansAcc)
-                if (!tmp.isEmpty) Some(reduceFunc(e.dst.data._2.get, tmp.get)) else e.dst.data._2
-              }
-          }
-          if (gatherDirection == EdgeDirection.Out || gatherDirection == EdgeDirection.Both) {
-            e.src.data._2 =
-              if (e.src.data._2.isEmpty) {
-                mapFunc(edgeSansAcc.src.id, edgeSansAcc)
-              } else {
-                val tmp = mapFunc(edgeSansAcc.src.id, edgeSansAcc)
-                if (!tmp.isEmpty) Some(reduceFunc(e.src.data._2.get, tmp.get)) else e.src.data._2
-              }
-          }
-        }
-        vmap.long2ObjectEntrySet().fastIterator().filter(!_.getValue()._2.isEmpty).map{ entry =>
-          (entry.getLongKey(), entry.getValue()._2)
-        }
-      }
-      .map{ case (vid, aOpt) => (vid, aOpt.get) }
-      .combineByKey((v: VD2) => v, reduceFunc, null, vertexPartitioner, false)
-  }
-
-  override def leftJoinVertices[U: ClassManifest, VD2: ClassManifest](
-      updates: RDD[(Vid, U)],
-      updateF: (Vertex[VD], Option[U]) => VD2)
-    : Graph[VD2, ED] = {
-
-    ClosureCleaner.clean(updateF)
-
-    val newVTable = vTable.leftOuterJoin(updates).mapPartitions({ iter =>
-      iter.map { case (vid, ((vdata, pids), update)) =>
-        val newVdata = updateF(Vertex(vid, vdata), update)
-        (vid, (newVdata, pids))
-      }
-    }, preservesPartitioning = true).cache()
-
-    new GraphImpl(newVTable.partitions.length, eTable.partitions.length, null, null, newVTable, eTable)
-  }
-
-  override def joinVertices[U: ClassManifest](
-      updates: RDD[(Vid, U)],
-      updateF: (Vertex[VD], U) => VD)
-    : Graph[VD, ED] = {
-
-    ClosureCleaner.clean(updateF)
-
-    val newVTable = vTable.leftOuterJoin(updates).mapPartitions({ iter =>
-      iter.map { case (vid, ((vdata, pids), update)) =>
-        if (update.isDefined) {
-          val newVdata = updateF(Vertex(vid, vdata), update.get)
-          (vid, (newVdata, pids))
-        } else {
-          (vid, (vdata, pids))
-        }
-      }
-    }, preservesPartitioning = true).cache()
-
-    new GraphImpl(newVTable.partitions.length, eTable.partitions.length, null, null, newVTable, eTable)
-  }
-
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-  // Internals hidden from callers
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-
-  // TODO: Support non-hash partitioning schemes.
-  protected val vertexPartitioner = new HashPartitioner(numVertexPartitions)
-  protected val edgePartitioner = new HashPartitioner(numEdgePartitions)
-
-  /** Create a new graph but keep the current partitioning scheme. */
-  protected def newGraph[VD2: ClassManifest, ED2: ClassManifest](
-    vertices: RDD[Vertex[VD2]], edges: RDD[Edge[ED2]]): Graph[VD2, ED2] = {
-    (new GraphImpl[VD2, ED2](vertices, edges)).withPartitioner(numVertexPartitions, numEdgePartitions)
-  }
-
-  protected lazy val eTable: RDD[(Pid, EdgePartition[ED])] = {
-    if (_rawETable == null) {
-      createETable(_rawEdges, numEdgePartitions)
-    } else {
-      _rawETable
-    }
-  }
-
-  protected lazy val vTable: RDD[(Vid, (VD, Array[Pid]))] = {
-    if (_rawVTable == null) {
-      createVTable(_rawVertices, eTable, numVertexPartitions)
-    } else {
-      _rawVTable
-    }
-  }
-
-  protected lazy val vTableReplicated: RDD[(Vid, VD)] = {
-    // Join vid2pid and vTable, generate a shuffle dependency on the joined result, and get
-    // the shuffle id so we can use it on the slave.
-    vTable
-      .flatMap { case (vid, (vdata, pids)) => pids.iterator.map { pid => (pid, (vid, vdata)) } }
-      .partitionBy(edgePartitioner)
-      .mapPartitions(
-        { part => part.map { case(pid, (vid, vdata)) => (vid, vdata) } },
-        preservesPartitioning = true)
-  }
-}
-
-
-object GraphImpl {
-
-
-  protected def edgePartitionFunction1D(src: Vid, dst: Vid, numParts: Pid): Pid = {
-    val mixingPrime: Vid = 1125899906842597L 
-    (math.abs(src) * mixingPrime).toInt % numParts
-  }
-
-
-
-  /**
-   * This function implements a classic 2D-Partitioning of a sparse matrix.  
-   * Suppose we have a graph with 11 vertices that we want to partition 
-   * over 9 machines.  We can use the following sparse matrix representation:
-   *
-   *       __________________________________
-   *  v0   | P0 *     | P1       | P2    *  |      
-   *  v1   |  ****    |  *       |          |
-   *  v2   |  ******* |      **  |  ****    |
-   *  v3   |  *****   |  *  *    |       *  |   
-   *       ----------------------------------
-   *  v4   | P3 *     | P4 ***   | P5 **  * |      
-   *  v5   |  *  *    |  *       |          |
-   *  v6   |       *  |      **  |  ****    |
-   *  v7   |  * * *   |  *  *    |       *  |   
-   *       ----------------------------------
-   *  v8   | P6   *   | P7    *  | P8  *   *|      
-   *  v9   |     *    |  *    *  |          |
-   *  v10  |       *  |      **  |  *  *    |
-   *  v11  | * <-E    |  ***     |       ** |   
-   *       ----------------------------------
-   *
-   * The edge denoted by E connects v11 with v1 and is assigned to 
-   * processor P6.  To get the processor number we divide the matrix
-   * into sqrt(numProc) by sqrt(numProc) blocks.  Notice that edges
-   * adjacent to v11 can only be in the first colum of 
-   * blocks (P0, P3, P6) or the last row of blocks (P6, P7, P8).  
-   * As a consequence we can guarantee that v11 will need to be 
-   * replicated to at most 2 * sqrt(numProc) machines.
-   *
-   * Notice that P0 has many edges and as a consequence this 
-   * partitioning would lead to poor work balance.  To improve
-   * balance we first multiply each vertex id by a large prime 
-   * to effectively suffle the vertex locations. 
-   *
-   * One of the limitations of this approach is that the number of
-   * machines must either be a perfect square.  We partially address
-   * this limitation by computing the machine assignment to the next 
-   * largest perfect square and then mapping back down to the actual 
-   * number of machines.  Unfortunately, this can also lead to work 
-   * imbalance and so it is suggested that a perfect square is used. 
-   *   
-   *
-   */
-  protected def edgePartitionFunction2D(src: Vid, dst: Vid, 
-    numParts: Pid, ceilSqrtNumParts: Pid): Pid = {
-    val mixingPrime: Vid = 1125899906842597L 
-    val col: Pid = ((math.abs(src) * mixingPrime) % ceilSqrtNumParts).toInt
-    val row: Pid = ((math.abs(dst) * mixingPrime) % ceilSqrtNumParts).toInt
-    (col * ceilSqrtNumParts + row) % numParts
-  }
-
-
-  /**
-   * Create the edge table RDD, which is much more efficient for Java heap storage than the
-   * normal edges data structure (RDD[(Vid, Vid, ED)]).
-   *
-   * The edge table contains multiple partitions, and each partition contains only one RDD
-   * key-value pair: the key is the partition id, and the value is an EdgePartition object
-   * containing all the edges in a partition.
-   */
-  protected def createETable[ED: ClassManifest](edges: RDD[Edge[ED]], numPartitions: Int)
-    : RDD[(Pid, EdgePartition[ED])] = {
-      val ceilSqrt: Pid = math.ceil(math.sqrt(numPartitions)).toInt 
-
-    edges
-      .map { e =>
-        // Random partitioning based on the source vertex id.
-        // val part: Pid = edgePartitionFunction1D(e.src, e.dst, numPartitions)
-        val part: Pid = edgePartitionFunction2D(e.src, e.dst, numPartitions, ceilSqrt)
-
-        // Should we be using 3-tuple or an optimized class
-        (part, (e.src, e.dst, e.data))
-        //  (math.abs(e.src) % numPartitions, (e.src, e.dst, e.data))
-       
-      }
-      .partitionBy(new HashPartitioner(numPartitions))
-      .mapPartitionsWithIndex({ (pid, iter) =>
-        val edgePartition = new EdgePartition[ED]
-        iter.foreach { case (_, (src, dst, data)) => edgePartition.add(src, dst, data) }
-        edgePartition.trim()
-        Iterator((pid, edgePartition))
-      }, preservesPartitioning = true)
-  }
-
-  protected def createVTable[VD: ClassManifest, ED: ClassManifest](
-      vertices: RDD[Vertex[VD]],
-      eTable: RDD[(Pid, EdgePartition[ED])],
-      numPartitions: Int)
-    : RDD[(Vid, (VD, Array[Pid]))] = {
-    val partitioner = new HashPartitioner(numPartitions)
-
-    // A key-value RDD. The key is a vertex id, and the value is a list of
-    // partitions that contains edges referencing the vertex.
-    val vid2pid : RDD[(Vid, Seq[Pid])] = eTable.mapPartitions { iter =>
-      val (pid, edgePartition) = iter.next()
-      val vSet = new VertexSet
-      var i = 0
-      while (i < edgePartition.srcIds.size) {
-        vSet.add(edgePartition.srcIds.getLong(i))
-        vSet.add(edgePartition.dstIds.getLong(i))
-        i += 1
-      }
-      vSet.iterator.map { vid => (vid.toLong, pid) }
-    }.groupByKey(partitioner)
-
-    vertices
-      .map { v => (v.id, v.data) }
-      .partitionBy(partitioner)
-      .leftOuterJoin(vid2pid)
-      .mapValues {
-        case (vdata, None)       => (vdata, Array.empty[Pid])
-        case (vdata, Some(pids)) => (vdata, pids.toArray)
-      }
-  }
-}
-
diff --git a/graph/src/main/scala/spark/graph/package.scala b/graph/src/main/scala/spark/graph/package.scala
deleted file mode 100644
index d95dcdce08..0000000000
--- a/graph/src/main/scala/spark/graph/package.scala
+++ /dev/null
@@ -1,23 +0,0 @@
-package spark
-
-package object graph {
-
-  type Vid = Long
-  type Pid = Int
-
-  type VertexHashMap[T] = it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap[T]
-  type VertexSet = it.unimi.dsi.fastutil.longs.LongOpenHashSet
-  type VertexArrayList = it.unimi.dsi.fastutil.longs.LongArrayList
-
-  /**
-   * Return the default null-like value for a data type T.
-   */
-  def nullValue[T] = null.asInstanceOf[T]
-
-
-  private[graph]
-  case class MutableTuple2[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) U,
-                           @specialized(Char, Int, Boolean, Byte, Long, Float, Double) V](
-    var _1: U, var _2: V)
-
-}
diff --git a/graph/src/main/scala/spark/graph/perf/BagelTest.scala b/graph/src/main/scala/spark/graph/perf/BagelTest.scala
deleted file mode 100644
index 7547292500..0000000000
--- a/graph/src/main/scala/spark/graph/perf/BagelTest.scala
+++ /dev/null
@@ -1,72 +0,0 @@
-package spark.graph.perf
-
-import spark._
-import spark.SparkContext._
-import spark.bagel.Bagel
-import spark.bagel.examples._
-import spark.graph._
-
-
-object BagelTest {
-
-  def main(args: Array[String]) {
-    val host = args(0)
-    val taskType = args(1)
-    val fname = args(2)
-    val options =  args.drop(3).map { arg =>
-      arg.dropWhile(_ == '-').split('=') match {
-        case Array(opt, v) => (opt -> v)
-        case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
-      }
-    }
-
-    System.setProperty("spark.serializer", "spark.KryoSerializer")
-    //System.setProperty("spark.shuffle.compress", "false")
-    System.setProperty("spark.kryo.registrator", "spark.bagel.examples.PRKryoRegistrator")
-
-    var numIter = Int.MaxValue
-    var isDynamic = false
-    var tol:Float = 0.001F
-    var outFname = ""
-    var numVPart = 4
-    var numEPart = 4
-
-    options.foreach{
-      case ("numIter", v) => numIter = v.toInt
-      case ("dynamic", v) => isDynamic = v.toBoolean
-      case ("tol", v) => tol = v.toFloat
-      case ("output", v) => outFname = v
-      case ("numVPart", v) => numVPart = v.toInt
-      case ("numEPart", v) => numEPart = v.toInt
-      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-    }
-
-    val sc = new SparkContext(host, "PageRank(" + fname + ")")
-    val g = GraphLoader.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache()
-    val startTime = System.currentTimeMillis
-
-    val numVertices = g.vertices.count()
-
-    val vertices = g.collectNeighborIds(EdgeDirection.Out).map { case (vid, neighbors) =>
-      (vid.toString, new PRVertex(1.0, neighbors.map(_.toString)))
-    }
-
-    // Do the computation
-    val epsilon = 0.01 / numVertices
-    val messages = sc.parallelize(Array[(String, PRMessage)]())
-    val utils = new PageRankUtils
-    val result =
-        Bagel.run(
-          sc, vertices, messages, combiner = new PRCombiner(),
-          numPartitions = numVPart)(
-          utils.computeWithCombiner(numVertices, epsilon, numIter))
-
-    println("Total rank: " + result.map{ case (id, r) => r.value }.reduce(_+_) )
-    if (!outFname.isEmpty) {
-      println("Saving pageranks of pages to " + outFname)
-      result.map{ case (id, r) => id + "\t" + r.value }.saveAsTextFile(outFname)
-    }
-    println("Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
-    sc.stop()
-  }
-}
diff --git a/graph/src/main/scala/spark/graph/perf/SparkTest.scala b/graph/src/main/scala/spark/graph/perf/SparkTest.scala
deleted file mode 100644
index 85ebd14bcb..0000000000
--- a/graph/src/main/scala/spark/graph/perf/SparkTest.scala
+++ /dev/null
@@ -1,72 +0,0 @@
-package spark.graph.perf
-
-import spark._
-import spark.SparkContext._
-import spark.bagel.Bagel
-import spark.bagel.examples._
-import spark.graph._
-
-
-object SparkTest {
-
-  def main(args: Array[String]) {
-    val host = args(0)
-    val taskType = args(1)
-    val fname = args(2)
-    val options =  args.drop(3).map { arg =>
-      arg.dropWhile(_ == '-').split('=') match {
-        case Array(opt, v) => (opt -> v)
-        case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
-      }
-    }
-
-    System.setProperty("spark.serializer", "spark.KryoSerializer")
-    //System.setProperty("spark.shuffle.compress", "false")
-    System.setProperty("spark.kryo.registrator", "spark.bagel.examples.PRKryoRegistrator")
-
-    var numIter = Int.MaxValue
-    var isDynamic = false
-    var tol:Float = 0.001F
-    var outFname = ""
-    var numVPart = 4
-    var numEPart = 4
-
-    options.foreach{
-      case ("numIter", v) => numIter = v.toInt
-      case ("dynamic", v) => isDynamic = v.toBoolean
-      case ("tol", v) => tol = v.toFloat
-      case ("output", v) => outFname = v
-      case ("numVPart", v) => numVPart = v.toInt
-      case ("numEPart", v) => numEPart = v.toInt
-      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-    }
-
-    val sc = new SparkContext(host, "PageRank(" + fname + ")")
-    val g = GraphLoader.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache()
-    val startTime = System.currentTimeMillis
-
-    val numVertices = g.vertices.count()
-
-    val vertices = g.collectNeighborIds(EdgeDirection.Out).map { case (vid, neighbors) =>
-      (vid.toString, new PRVertex(1.0, neighbors.map(_.toString)))
-    }
-
-    // Do the computation
-    val epsilon = 0.01 / numVertices
-    val messages = sc.parallelize(Array[(String, PRMessage)]())
-    val utils = new PageRankUtils
-    val result =
-        Bagel.run(
-          sc, vertices, messages, combiner = new PRCombiner(),
-          numPartitions = numVPart)(
-          utils.computeWithCombiner(numVertices, epsilon, numIter))
-
-    println("Total rank: " + result.map{ case (id, r) => r.value }.reduce(_+_) )
-    if (!outFname.isEmpty) {
-      println("Saving pageranks of pages to " + outFname)
-      result.map{ case (id, r) => id + "\t" + r.value }.saveAsTextFile(outFname)
-    }
-    println("Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
-    sc.stop()
-  }
-}
diff --git a/graph/src/main/scala/spark/graph/util/BytecodeUtils.scala b/graph/src/main/scala/spark/graph/util/BytecodeUtils.scala
deleted file mode 100644
index ac3a1fb957..0000000000
--- a/graph/src/main/scala/spark/graph/util/BytecodeUtils.scala
+++ /dev/null
@@ -1,113 +0,0 @@
-package spark.graph.util
-
-import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
-
-import scala.collection.mutable.HashSet
-
-import org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor}
-import org.objectweb.asm.Opcodes._
-
-import spark.Utils
-
-
-private[graph] object BytecodeUtils {
-
-  /**
-   * Test whether the given closure invokes the specified method in the specified class.
-   */
-  def invokedMethod(closure: AnyRef, targetClass: Class[_], targetMethod: String): Boolean = {
-    if (_invokedMethod(closure.getClass, "apply", targetClass, targetMethod)) {
-      true
-    } else {
-      // look at closures enclosed in this closure
-      for (f <- closure.getClass.getDeclaredFields
-           if f.getType.getName.startsWith("scala.Function")) {
-        f.setAccessible(true)
-        if (invokedMethod(f.get(closure), targetClass, targetMethod)) {
-          return true
-        }
-      }
-      return false
-    }
-  }
-
-  private def _invokedMethod(cls: Class[_], method: String,
-      targetClass: Class[_], targetMethod: String): Boolean = {
-
-    val seen = new HashSet[(Class[_], String)]
-    var stack = List[(Class[_], String)]((cls, method))
-
-    while (stack.nonEmpty) {
-      val (c, m) = stack.head
-      stack = stack.tail
-      seen.add((c, m))
-      val finder = new MethodInvocationFinder(c.getName, m)
-      getClassReader(c).accept(finder, 0)
-      for (classMethod <- finder.methodsInvoked) {
-        //println(classMethod)
-        if (classMethod._1 == targetClass && classMethod._2 == targetMethod) {
-          return true
-        } else if (!seen.contains(classMethod)) {
-          stack = classMethod :: stack
-        }
-      }
-    }
-    return false
-  }
-
-  /**
-   * Get an ASM class reader for a given class from the JAR that loaded it.
-   */
-  private def getClassReader(cls: Class[_]): ClassReader = {
-    // Copy data over, before delegating to ClassReader - else we can run out of open file handles.
-    val className = cls.getName.replaceFirst("^.*\\.", "") + ".class"
-    val resourceStream = cls.getResourceAsStream(className)
-    // todo: Fixme - continuing with earlier behavior ...
-    if (resourceStream == null) return new ClassReader(resourceStream)
-
-    val baos = new ByteArrayOutputStream(128)
-    Utils.copyStream(resourceStream, baos, true)
-    new ClassReader(new ByteArrayInputStream(baos.toByteArray))
-  }
-
-  /**
-   * Given the class name, return whether we should look into the class or not. This is used to
-   * skip examing a large quantity of Java or Scala classes that we know for sure wouldn't access
-   * the closures. Note that the class name is expected in ASM style (i.e. use "/" instead of ".").
-   */
-  private def skipClass(className: String): Boolean = {
-    val c = className
-    c.startsWith("java/") || c.startsWith("scala/") || c.startsWith("javax/")
-  }
-
-  /**
-   * Find the set of methods invoked by the specified method in the specified class.
-   * For example, after running the visitor,
-   *   MethodInvocationFinder("spark/graph/Foo", "test")
-   * its methodsInvoked variable will contain the set of methods invoked directly by
-   * Foo.test(). Interface invocations are not returned as part of the result set because we cannot
-   * determine the actual metod invoked by inspecting the bytecode.
-   */
-  private class MethodInvocationFinder(className: String, methodName: String)
-    extends ClassVisitor(ASM4) {
-
-    val methodsInvoked = new HashSet[(Class[_], String)]
-
-    override def visitMethod(access: Int, name: String, desc: String,
-                             sig: String, exceptions: Array[String]): MethodVisitor = {
-      if (name == methodName) {
-        new MethodVisitor(ASM4) {
-          override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) {
-            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
-              if (!skipClass(owner)) {
-                methodsInvoked.add((Class.forName(owner.replace("/", ".")), name))
-              }
-            }
-          }
-        }
-      } else {
-        null
-      }
-    }
-  }
-}
diff --git a/graph/src/main/scala/spark/graph/util/HashUtils.scala b/graph/src/main/scala/spark/graph/util/HashUtils.scala
deleted file mode 100644
index 0dfaef4c48..0000000000
--- a/graph/src/main/scala/spark/graph/util/HashUtils.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-package spark.graph.util
-
-
-object HashUtils {
-
-  /**
-   * Compute a 64-bit hash value for the given string.
-   * See http://stackoverflow.com/questions/1660501/what-is-a-good-64bit-hash-function-in-java-for-textual-strings
-   */
-  def hash(str: String): Long = {
-    var h = 1125899906842597L
-    val len = str.length
-    var i = 0
-
-    while (i < len) {
-      h = 31 * h + str(i)
-      i += 1
-    }
-    h
-  }
-}
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 7598060cb9..7dc6c58401 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -42,16 +42,16 @@ object SparkBuild extends Build {
   lazy val core = Project("core", file("core"), settings = coreSettings)
 
   lazy val repl = Project("repl", file("repl"), settings = replSettings)
-    .dependsOn(core, bagel, mllib)
+    .dependsOn(core, graph, bagel, mllib)
 
   lazy val examples = Project("examples", file("examples"), settings = examplesSettings)
-    .dependsOn(core, mllib, bagel, streaming)
+    .dependsOn(core, mllib, graph, bagel, streaming)
 
   lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming)
 
   lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn(core)
 
-  lazy val graph = Project("graph", file("graph"), settings = graphSettings) dependsOn(core)
+  lazy val graph = Project("graph", file("graph"), settings = graphSettings) dependsOn(core) 
 
   lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn(core)
 
@@ -60,7 +60,7 @@ object SparkBuild extends Build {
   lazy val yarn = Project("yarn", file("yarn"), settings = yarnSettings) dependsOn(core)
 
   lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings)
-    .dependsOn(core, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*)
+    .dependsOn(core, graph, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*)
 
   // A configuration to set an alternative publishLocalConfiguration
   lazy val MavenCompile = config("m2r") extend(Compile)
@@ -77,7 +77,7 @@ object SparkBuild extends Build {
   lazy val maybeYarn = if(isYarnEnabled) Seq[ClasspathDependency](yarn) else Seq[ClasspathDependency]()
   lazy val maybeYarnRef = if(isYarnEnabled) Seq[ProjectReference](yarn) else Seq[ProjectReference]()
   lazy val allProjects = Seq[ProjectReference](
-    core, repl, examples, bagel, streaming, mllib, tools, assemblyProj) ++ maybeYarnRef
+    core, repl, examples, graph, bagel, streaming, mllib, tools, assemblyProj) ++ maybeYarnRef
 
   def sharedSettings = Defaults.defaultSettings ++ Seq(
     organization := "org.apache.spark",
@@ -254,6 +254,10 @@ object SparkBuild extends Build {
     name := "spark-tools"
   )
 
+  def graphSettings = sharedSettings ++ Seq(
+    name := "spark-graphx"
+  )
+
   def bagelSettings = sharedSettings ++ Seq(
     name := "spark-bagel"
   )
@@ -265,8 +269,6 @@ object SparkBuild extends Build {
     )
   )
 
-  def graphSettings = sharedSettings ++ Seq(name := "spark-graph")
-
   def streamingSettings = sharedSettings ++ Seq(
     name := "spark-streaming",
     resolvers ++= Seq(
-- 
cgit v1.2.3


From 731f56f309914e3fc7c22c8ef1c8cb9dd40d42c1 Mon Sep 17 00:00:00 2001
From: Ankur Dave <ankurdave@gmail.com>
Date: Thu, 9 Jan 2014 14:31:33 -0800
Subject: graph -> graphx

---
 graph/pom.xml                                      | 129 -----
 .../scala/org/apache/spark/graph/Analytics.scala   | 593 ---------------------
 .../main/scala/org/apache/spark/graph/Edge.scala   |  50 --
 .../org/apache/spark/graph/EdgeDirection.scala     |  36 --
 .../scala/org/apache/spark/graph/EdgeRDD.scala     |  73 ---
 .../scala/org/apache/spark/graph/EdgeTriplet.scala |  63 ---
 .../main/scala/org/apache/spark/graph/Graph.scala  | 437 ---------------
 .../apache/spark/graph/GraphKryoRegistrator.scala  |  28 -
 .../scala/org/apache/spark/graph/GraphLab.scala    | 134 -----
 .../scala/org/apache/spark/graph/GraphLoader.scala | 113 ----
 .../scala/org/apache/spark/graph/GraphOps.scala    | 277 ----------
 .../org/apache/spark/graph/PartitionStrategy.scala |  94 ----
 .../main/scala/org/apache/spark/graph/Pregel.scala | 122 -----
 .../scala/org/apache/spark/graph/VertexRDD.scala   | 361 -------------
 .../graph/algorithms/ConnectedComponents.scala     |  37 --
 .../apache/spark/graph/algorithms/PageRank.scala   | 205 -------
 .../spark/graph/algorithms/SVDPlusPlus.scala       | 103 ----
 .../algorithms/StronglyConnectedComponents.scala   |  87 ---
 .../spark/graph/algorithms/TriangleCount.scala     |  78 ---
 .../apache/spark/graph/impl/EdgePartition.scala    | 220 --------
 .../spark/graph/impl/EdgePartitionBuilder.scala    |  46 --
 .../spark/graph/impl/EdgeTripletIterator.scala     |  43 --
 .../org/apache/spark/graph/impl/GraphImpl.scala    | 422 ---------------
 .../spark/graph/impl/MessageToPartition.scala      |  93 ----
 .../spark/graph/impl/ReplicatedVertexView.scala    | 182 -------
 .../org/apache/spark/graph/impl/RoutingTable.scala |  64 ---
 .../org/apache/spark/graph/impl/Serializers.scala  | 386 --------------
 .../apache/spark/graph/impl/VertexPartition.scala  | 262 ---------
 .../scala/org/apache/spark/graph/package.scala     |  22 -
 .../org/apache/spark/graph/perf/BagelTest.scala    |  76 ---
 .../org/apache/spark/graph/perf/SparkTest.scala    |  75 ---
 .../apache/spark/graph/util/BytecodeUtils.scala    | 114 ----
 .../apache/spark/graph/util/GraphGenerators.scala  | 282 ----------
 .../org/apache/spark/graph/util/HashUtils.scala    |  21 -
 graph/src/test/resources/log4j.properties          |  28 -
 .../org/apache/spark/graph/GraphOpsSuite.scala     |  92 ----
 .../scala/org/apache/spark/graph/GraphSuite.scala  | 272 ----------
 .../org/apache/spark/graph/LocalSparkContext.scala |  28 -
 .../scala/org/apache/spark/graph/PregelSuite.scala |  41 --
 .../org/apache/spark/graph/SerializerSuite.scala   | 183 -------
 .../org/apache/spark/graph/VertexRDDSuite.scala    |  85 ---
 .../algorithms/ConnectedComponentsSuite.scala      |  83 ---
 .../spark/graph/algorithms/PageRankSuite.scala     | 126 -----
 .../spark/graph/algorithms/SVDPlusPlusSuite.scala  |  30 --
 .../StronglyConnectedComponentsSuite.scala         |  57 --
 .../graph/algorithms/TriangleCountSuite.scala      |  73 ---
 .../spark/graph/impl/EdgePartitionSuite.scala      |  76 ---
 .../spark/graph/impl/VertexPartitionSuite.scala    | 113 ----
 .../spark/graph/util/BytecodeUtilsSuite.scala      |  93 ----
 graphx/pom.xml                                     | 129 +++++
 .../scala/org/apache/spark/graphx/Analytics.scala  | 593 +++++++++++++++++++++
 .../main/scala/org/apache/spark/graphx/Edge.scala  |  50 ++
 .../org/apache/spark/graphx/EdgeDirection.scala    |  36 ++
 .../scala/org/apache/spark/graphx/EdgeRDD.scala    |  73 +++
 .../org/apache/spark/graphx/EdgeTriplet.scala      |  63 +++
 .../main/scala/org/apache/spark/graphx/Graph.scala | 437 +++++++++++++++
 .../apache/spark/graphx/GraphKryoRegistrator.scala |  28 +
 .../scala/org/apache/spark/graphx/GraphLab.scala   | 134 +++++
 .../org/apache/spark/graphx/GraphLoader.scala      | 113 ++++
 .../scala/org/apache/spark/graphx/GraphOps.scala   | 277 ++++++++++
 .../apache/spark/graphx/PartitionStrategy.scala    |  94 ++++
 .../scala/org/apache/spark/graphx/Pregel.scala     | 122 +++++
 .../scala/org/apache/spark/graphx/VertexRDD.scala  | 361 +++++++++++++
 .../graphx/algorithms/ConnectedComponents.scala    |  37 ++
 .../apache/spark/graphx/algorithms/PageRank.scala  | 205 +++++++
 .../spark/graphx/algorithms/SVDPlusPlus.scala      | 103 ++++
 .../algorithms/StronglyConnectedComponents.scala   |  87 +++
 .../spark/graphx/algorithms/TriangleCount.scala    |  78 +++
 .../apache/spark/graphx/impl/EdgePartition.scala   | 220 ++++++++
 .../spark/graphx/impl/EdgePartitionBuilder.scala   |  46 ++
 .../spark/graphx/impl/EdgeTripletIterator.scala    |  43 ++
 .../org/apache/spark/graphx/impl/GraphImpl.scala   | 422 +++++++++++++++
 .../spark/graphx/impl/MessageToPartition.scala     |  93 ++++
 .../spark/graphx/impl/ReplicatedVertexView.scala   | 182 +++++++
 .../apache/spark/graphx/impl/RoutingTable.scala    |  64 +++
 .../org/apache/spark/graphx/impl/Serializers.scala | 386 ++++++++++++++
 .../apache/spark/graphx/impl/VertexPartition.scala | 262 +++++++++
 .../scala/org/apache/spark/graphx/package.scala    |  22 +
 .../org/apache/spark/graphx/perf/BagelTest.scala   |  76 +++
 .../org/apache/spark/graphx/perf/SparkTest.scala   |  75 +++
 .../apache/spark/graphx/util/BytecodeUtils.scala   | 114 ++++
 .../apache/spark/graphx/util/GraphGenerators.scala | 282 ++++++++++
 .../org/apache/spark/graphx/util/HashUtils.scala   |  21 +
 graphx/src/test/resources/log4j.properties         |  28 +
 .../org/apache/spark/graphx/GraphOpsSuite.scala    |  92 ++++
 .../scala/org/apache/spark/graphx/GraphSuite.scala | 272 ++++++++++
 .../apache/spark/graphx/LocalSparkContext.scala    |  28 +
 .../org/apache/spark/graphx/PregelSuite.scala      |  41 ++
 .../org/apache/spark/graphx/SerializerSuite.scala  | 183 +++++++
 .../org/apache/spark/graphx/VertexRDDSuite.scala   |  85 +++
 .../algorithms/ConnectedComponentsSuite.scala      |  83 +++
 .../spark/graphx/algorithms/PageRankSuite.scala    | 126 +++++
 .../spark/graphx/algorithms/SVDPlusPlusSuite.scala |  30 ++
 .../StronglyConnectedComponentsSuite.scala         |  57 ++
 .../graphx/algorithms/TriangleCountSuite.scala     |  73 +++
 .../spark/graphx/impl/EdgePartitionSuite.scala     |  76 +++
 .../spark/graphx/impl/VertexPartitionSuite.scala   | 113 ++++
 .../spark/graphx/util/BytecodeUtilsSuite.scala     |  93 ++++
 project/SparkBuild.scala                           |  12 +-
 99 files changed, 6714 insertions(+), 6714 deletions(-)
 delete mode 100644 graph/pom.xml
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/Analytics.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/Edge.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/EdgeDirection.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/EdgeRDD.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/EdgeTriplet.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/Graph.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/GraphKryoRegistrator.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/GraphLab.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/GraphLoader.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/GraphOps.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/PartitionStrategy.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/Pregel.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/VertexRDD.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/algorithms/ConnectedComponents.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/algorithms/PageRank.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/algorithms/SVDPlusPlus.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/algorithms/StronglyConnectedComponents.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/algorithms/TriangleCount.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/impl/EdgePartition.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/impl/EdgePartitionBuilder.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/impl/EdgeTripletIterator.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/impl/GraphImpl.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/impl/MessageToPartition.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/impl/ReplicatedVertexView.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/impl/RoutingTable.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/impl/Serializers.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/impl/VertexPartition.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/package.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/perf/BagelTest.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/perf/SparkTest.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/util/BytecodeUtils.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/util/GraphGenerators.scala
 delete mode 100644 graph/src/main/scala/org/apache/spark/graph/util/HashUtils.scala
 delete mode 100644 graph/src/test/resources/log4j.properties
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/GraphOpsSuite.scala
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/GraphSuite.scala
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/LocalSparkContext.scala
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/PregelSuite.scala
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/SerializerSuite.scala
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/VertexRDDSuite.scala
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/algorithms/ConnectedComponentsSuite.scala
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/algorithms/PageRankSuite.scala
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/algorithms/SVDPlusPlusSuite.scala
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/algorithms/StronglyConnectedComponentsSuite.scala
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/algorithms/TriangleCountSuite.scala
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/impl/EdgePartitionSuite.scala
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/impl/VertexPartitionSuite.scala
 delete mode 100644 graph/src/test/scala/org/apache/spark/graph/util/BytecodeUtilsSuite.scala
 create mode 100644 graphx/pom.xml
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/Analytics.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/algorithms/ConnectedComponents.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/algorithms/PageRank.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/algorithms/SVDPlusPlus.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/algorithms/StronglyConnectedComponents.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/algorithms/TriangleCount.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/package.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/perf/BagelTest.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/perf/SparkTest.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
 create mode 100644 graphx/src/main/scala/org/apache/spark/graphx/util/HashUtils.scala
 create mode 100644 graphx/src/test/resources/log4j.properties
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/algorithms/ConnectedComponentsSuite.scala
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/algorithms/PageRankSuite.scala
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/algorithms/SVDPlusPlusSuite.scala
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/algorithms/StronglyConnectedComponentsSuite.scala
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/algorithms/TriangleCountSuite.scala
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala
 create mode 100644 graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala

(limited to 'project/SparkBuild.scala')

diff --git a/graph/pom.xml b/graph/pom.xml
deleted file mode 100644
index fd3dcaad7c..0000000000
--- a/graph/pom.xml
+++ /dev/null
@@ -1,129 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent</artifactId>
-    <version>0.9.0-incubating-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <groupId>org.apache.spark</groupId>
-  <artifactId>spark-graph_2.9.3</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Graph</name>
-  <url>http://spark-project.org/</url>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_2.9.3</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-server</artifactId>
-    </dependency>
-
-    <dependency>
-      <groupId>org.scalatest</groupId>
-      <artifactId>scalatest_${scala.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_${scala.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-  <build>
-    <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
-    <plugins>
-      <plugin>
-        <groupId>org.scalatest</groupId>
-        <artifactId>scalatest-maven-plugin</artifactId>
-      </plugin>
-    </plugins>
-  </build>
-
-  <profiles>
-    <profile>
-      <id>hadoop1</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.spark-project</groupId>
-          <artifactId>spark-core</artifactId>
-          <version>${project.version}</version>
-          <classifier>hadoop1</classifier>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-core</artifactId>
-          <scope>provided</scope>
-        </dependency>
-      </dependencies>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-jar-plugin</artifactId>
-            <configuration>
-              <classifier>hadoop1</classifier>
-            </configuration>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-    <profile>
-      <id>hadoop2</id>
-      <dependencies>
-        <dependency>
-          <groupId>org.spark-project</groupId>
-          <artifactId>spark-core</artifactId>
-          <version>${project.version}</version>
-          <classifier>hadoop2</classifier>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-core</artifactId>
-          <scope>provided</scope>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-client</artifactId>
-          <scope>provided</scope>
-        </dependency>
-      </dependencies>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-jar-plugin</artifactId>
-            <configuration>
-              <classifier>hadoop2</classifier>
-            </configuration>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-  </profiles>
-</project>
diff --git a/graph/src/main/scala/org/apache/spark/graph/Analytics.scala b/graph/src/main/scala/org/apache/spark/graph/Analytics.scala
deleted file mode 100644
index 14b9be73f1..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/Analytics.scala
+++ /dev/null
@@ -1,593 +0,0 @@
-package org.apache.spark.graph
-
-import org.apache.spark._
-import org.apache.spark.graph.algorithms._
-
-
-/**
- * The Analytics object contains a collection of basic graph analytics
- * algorithms that operate largely on the graph structure.
- *
- * In addition the Analytics object contains a driver `main` which can
- * be used to apply the various functions to graphs in standard
- * formats.
- */
-object Analytics extends Logging {
-
-  def main(args: Array[String]) = {
-    val host = args(0)
-    val taskType = args(1)
-    val fname = args(2)
-    val options =  args.drop(3).map { arg =>
-      arg.dropWhile(_ == '-').split('=') match {
-        case Array(opt, v) => (opt -> v)
-        case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
-      }
-    }
-
-    def setLogLevels(level: org.apache.log4j.Level, loggers: TraversableOnce[String]) = {
-      loggers.map{
-        loggerName =>
-          val logger = org.apache.log4j.Logger.getLogger(loggerName)
-        val prevLevel = logger.getLevel()
-        logger.setLevel(level)
-        loggerName -> prevLevel
-      }.toMap
-    }
-
-    def pickPartitioner(v: String): PartitionStrategy = {
-      v match {
-         case "RandomVertexCut" => RandomVertexCut
-         case "EdgePartition1D" => EdgePartition1D
-         case "EdgePartition2D" => EdgePartition2D
-         case "CanonicalRandomVertexCut" => CanonicalRandomVertexCut
-         case _ => throw new IllegalArgumentException("Invalid Partition Strategy: " + v)
-       }
-    }
-//       setLogLevels(org.apache.log4j.Level.DEBUG, Seq("org.apache.spark"))
-
-     val serializer = "org.apache.spark.serializer.KryoSerializer"
-     System.setProperty("spark.serializer", serializer)
-     //System.setProperty("spark.shuffle.compress", "false")
-     System.setProperty("spark.kryo.registrator", "org.apache.spark.graph.GraphKryoRegistrator")
-
-     taskType match {
-       case "pagerank" => {
-
-         var tol:Float = 0.001F
-         var outFname = ""
-         var numVPart = 4
-         var numEPart = 4
-         var partitionStrategy: Option[PartitionStrategy] = None
-
-         options.foreach{
-           case ("tol", v) => tol = v.toFloat
-           case ("output", v) => outFname = v
-           case ("numVPart", v) => numVPart = v.toInt
-           case ("numEPart", v) => numEPart = v.toInt
-           case ("partStrategy", v) => partitionStrategy = Some(pickPartitioner(v))
-           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-         }
-
-         println("======================================")
-         println("|             PageRank               |")
-         println("======================================")
-
-         val sc = new SparkContext(host, "PageRank(" + fname + ")")
-
-         val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
-           minEdgePartitions = numEPart).cache()
-         val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
-
-         println("GRAPHX: Number of vertices " + graph.vertices.count)
-         println("GRAPHX: Number of edges " + graph.edges.count)
-
-         //val pr = Analytics.pagerank(graph, numIter)
-         val pr = PageRank.runStandalone(graph, tol)
-
-         println("GRAPHX: Total rank: " + pr.map(_._2).reduce(_+_))
-
-         if (!outFname.isEmpty) {
-           logWarning("Saving pageranks of pages to " + outFname)
-           pr.map{case (id, r) => id + "\t" + r}.saveAsTextFile(outFname)
-         }
-
-         sc.stop()
-       }
-
-        case "cc" => {
-
-          var numIter = Int.MaxValue
-          var numVPart = 4
-          var numEPart = 4
-          var isDynamic = false
-          var partitionStrategy: Option[PartitionStrategy] = None
-
-          options.foreach{
-            case ("numIter", v) => numIter = v.toInt
-            case ("dynamic", v) => isDynamic = v.toBoolean
-            case ("numEPart", v) => numEPart = v.toInt
-            case ("numVPart", v) => numVPart = v.toInt
-            case ("partStrategy", v) => partitionStrategy = Some(pickPartitioner(v))
-            case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-          }
-
-          if(!isDynamic && numIter == Int.MaxValue) {
-            println("Set number of iterations!")
-            sys.exit(1)
-          }
-          println("======================================")
-          println("|      Connected Components          |")
-          println("--------------------------------------")
-          println(" Using parameters:")
-          println(" \tDynamic:  " + isDynamic)
-          println(" \tNumIter:  " + numIter)
-          println("======================================")
-
-          val sc = new SparkContext(host, "ConnectedComponents(" + fname + ")")
-          val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
-            minEdgePartitions = numEPart).cache()
-          val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
-
-          val cc = ConnectedComponents.run(graph)
-          println("Components: " + cc.vertices.map{ case (vid,data) => data}.distinct())
-          sc.stop()
-        }
-
-       case "triangles" => {
-         var numVPart = 4
-         var numEPart = 4
-         // TriangleCount requires the graph to be partitioned
-         var partitionStrategy: PartitionStrategy = RandomVertexCut
-
-         options.foreach{
-           case ("numEPart", v) => numEPart = v.toInt
-           case ("numVPart", v) => numVPart = v.toInt
-           case ("partStrategy", v) => partitionStrategy = pickPartitioner(v)
-           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-         }
-         println("======================================")
-         println("|      Triangle Count                |")
-         println("--------------------------------------")
-         val sc = new SparkContext(host, "TriangleCount(" + fname + ")")
-         val graph = GraphLoader.edgeListFile(sc, fname, canonicalOrientation = true,
-           minEdgePartitions = numEPart).partitionBy(partitionStrategy).cache()
-         val triangles = TriangleCount.run(graph)
-         println("Triangles: " + triangles.vertices.map {
-            case (vid,data) => data.toLong
-          }.reduce(_+_) / 3)
-         sc.stop()
-       }
-
-//
-//        case "shortestpath" => {
-//
-//           var numIter = Int.MaxValue
-//           var isDynamic = true
-//           var sources: List[Int] = List.empty
-//
-//           options.foreach{
-//             case ("numIter", v) => numIter = v.toInt
-//             case ("dynamic", v) => isDynamic = v.toBoolean
-//             case ("source", v) => sources ++= List(v.toInt)
-//             case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-//           }
-//
-//
-//           if(!isDynamic && numIter == Int.MaxValue) {
-//             println("Set number of iterations!")
-//             sys.exit(1)
-//           }
-//
-//           if(sources.isEmpty) {
-//             println("No sources provided!")
-//             sys.exit(1)
-//           }
-//
-//           println("======================================")
-//           println("|          Shortest Path             |")
-//           println("--------------------------------------")
-//           println(" Using parameters:")
-//           println(" \tDynamic:  " + isDynamic)
-//           println(" \tNumIter:  " + numIter)
-//           println(" \tSources:  [" + sources.mkString(", ") + "]")
-//           println("======================================")
-//
-//           val sc = new SparkContext(host, "ShortestPath(" + fname + ")")
-//           val graph = GraphLoader.textFile(sc, fname, a => (if(a.isEmpty) 1.0F else a(0).toFloat ) )
-//           //val sp = Analytics.shortestPath(graph, sources, numIter)
-//           // val cc = if(isDynamic) Analytics.dynamicShortestPath(graph, sources, numIter)
-//           //   else  Analytics.shortestPath(graph, sources, numIter)
-//           println("Longest Path: " + sp.vertices.map(_.data).reduce(math.max(_,_)))
-//
-//           sc.stop()
-//         }
-
-
-      //  case "als" => {
-
-      //    var numIter = 5
-      //    var lambda = 0.01
-      //    var latentK = 10
-      //    var usersFname = "usersFactors.tsv"
-      //    var moviesFname = "moviesFname.tsv"
-      //    var numVPart = 4
-      //    var numEPart = 4
-
-      //    options.foreach{
-      //      case ("numIter", v) => numIter = v.toInt
-      //      case ("lambda", v) => lambda = v.toDouble
-      //      case ("latentK", v) => latentK = v.toInt
-      //      case ("usersFname", v) => usersFname = v
-      //      case ("moviesFname", v) => moviesFname = v
-      //      case ("numVPart", v) => numVPart = v.toInt
-      //      case ("numEPart", v) => numEPart = v.toInt
-      //      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-      //    }
-
-      //    println("======================================")
-      //    println("|       Alternating Least Squares    |")
-      //    println("--------------------------------------")
-      //    println(" Using parameters:")
-      //    println(" \tNumIter:     " + numIter)
-      //    println(" \tLambda:      " + lambda)
-      //    println(" \tLatentK:     " + latentK)
-      //    println(" \tusersFname:  " + usersFname)
-      //    println(" \tmoviesFname: " + moviesFname)
-      //    println("======================================")
-
-      //    val sc = new SparkContext(host, "ALS(" + fname + ")")
-      //    val graph = GraphLoader.textFile(sc, fname, a => a(0).toDouble )
-      //    graph.numVPart = numVPart
-      //    graph.numEPart = numEPart
-
-      //    val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
-      //    val minMovie = graph.edges.map(_._2).reduce(math.min(_,_))
-      //    assert(maxUser < minMovie)
-
-      //    val factors = Analytics.alternatingLeastSquares(graph, latentK, lambda, numIter).cache
-      //    factors.filter(_._1 <= maxUser).map(r => r._1 + "\t" + r._2.mkString("\t"))
-      //      .saveAsTextFile(usersFname)
-      //    factors.filter(_._1 >= minMovie).map(r => r._1 + "\t" + r._2.mkString("\t"))
-      //      .saveAsTextFile(moviesFname)
-
-      //    sc.stop()
-      //  }
-
-
-       case _ => {
-         println("Invalid task type.")
-       }
-     }
-   }
-
-  // /**
-  //  * Compute the shortest path to a set of markers
-  //  */
-  // def shortestPath[VD: Manifest](graph: Graph[VD, Double], sources: List[Int], numIter: Int) = {
-  //   val sourceSet = sources.toSet
-  //   val spGraph = graph.mapVertices {
-  //     case Vertex(vid, _) => Vertex(vid, (if(sourceSet.contains(vid)) 0.0 else Double.MaxValue))
-  //   }
-  //   GraphLab.iterateGA[Double, Double, Double](spGraph)(
-  //     (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
-  //     (a: Double, b: Double) => math.min(a, b), // merge
-  //     (v, a: Option[Double]) => math.min(v.data, a.getOrElse(Double.MaxValue)), // apply
-  //     numIter,
-  //     gatherDirection = EdgeDirection.In)
-  // }
-
-  // /**
-  //  * Compute the connected component membership of each vertex
-  //  * and return an RDD with the vertex value containing the
-  //  * lowest vertex id in the connected component containing
-  //  * that vertex.
-  //  */
-  // def dynamicConnectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
-  //   numIter: Int = Int.MaxValue) = {
-
-  //   val vertices = graph.vertices.mapPartitions(iter => iter.map { case (vid, _) => (vid, vid) })
-  //   val edges = graph.edges // .mapValues(v => None)
-  //   val ccGraph = new Graph(vertices, edges)
-
-  //   ccGraph.iterateDynamic(
-  //     (me_id, edge) => edge.otherVertex(me_id).data, // gather
-  //     (a: Int, b: Int) => math.min(a, b), // merge
-  //     Integer.MAX_VALUE,
-  //     (v, a: Int) => math.min(v.data, a), // apply
-  //     (me_id, edge) => edge.otherVertex(me_id).data > edge.vertex(me_id).data, // scatter
-  //     numIter,
-  //     gatherEdges = EdgeDirection.Both,
-  //     scatterEdges = EdgeDirection.Both).vertices
-  //   //
-  //   //    graph_ret.vertices.collect.foreach(println)
-  //   //    graph_ret.edges.take(10).foreach(println)
-  // }
-
-
-  // /**
-  //  * Compute the shortest path to a set of markers
-  //  */
-  //  def dynamicShortestPath[VD: Manifest, ED: Manifest](graph: Graph[VD, Double],
-  //   sources: List[Int], numIter: Int) = {
-  //   val sourceSet = sources.toSet
-  //   val vertices = graph.vertices.mapPartitions(
-  //     iter => iter.map {
-  //       case (vid, _) => (vid, (if(sourceSet.contains(vid)) 0.0F else Double.MaxValue) )
-  //       });
-
-  //   val edges = graph.edges // .mapValues(v => None)
-  //   val spGraph = new Graph(vertices, edges)
-
-  //   val niterations = Int.MaxValue
-  //   spGraph.iterateDynamic(
-  //     (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
-  //     (a: Double, b: Double) => math.min(a, b), // merge
-  //     Double.MaxValue,
-  //     (v, a: Double) => math.min(v.data, a), // apply
-  //     (me_id, edge) => edge.vertex(me_id).data + edge.data < edge.otherVertex(me_id).data, // scatter
-  //     numIter,
-  //     gatherEdges = EdgeDirection.In,
-  //     scatterEdges = EdgeDirection.Out).vertices
-  // }
-
-
-  // /**
-  //  *
-  //  */
-  // def alternatingLeastSquares[VD: ClassTag, ED: ClassTag](graph: Graph[VD, Double],
-  //   latentK: Int, lambda: Double, numIter: Int) = {
-  //   val vertices = graph.vertices.mapPartitions( _.map {
-  //       case (vid, _) => (vid,  Array.fill(latentK){ scala.util.Random.nextDouble() } )
-  //       }).cache
-  //   val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
-  //   val edges = graph.edges // .mapValues(v => None)
-  //   val alsGraph = new Graph(vertices, edges)
-  //   alsGraph.numVPart = graph.numVPart
-  //   alsGraph.numEPart = graph.numEPart
-
-  //   val niterations = Int.MaxValue
-  //   alsGraph.iterateDynamic[(Array[Double], Array[Double])](
-  //     (me_id, edge) => { // gather
-  //       val X = edge.otherVertex(me_id).data
-  //       val y = edge.data
-  //       val Xy = X.map(_ * y)
-  //       val XtX = (for(i <- 0 until latentK; j <- i until latentK) yield(X(i) * X(j))).toArray
-  //       (Xy, XtX)
-  //     },
-  //     (a, b) => {
-  //     // The difference between the while loop and the zip is a FACTOR OF TWO in overall
-  //     //  runtime
-  //       var i = 0
-  //       while(i < a._1.length) { a._1(i) += b._1(i); i += 1 }
-  //       i = 0
-  //       while(i < a._2.length) { a._2(i) += b._2(i); i += 1 }
-  //       a
-  //       // (a._1.zip(b._1).map{ case (q,r) => q+r }, a._2.zip(b._2).map{ case (q,r) => q+r })
-  //     },
-  //     (Array.empty[Double], Array.empty[Double]), // default value is empty
-  //     (vertex, accum) => { // apply
-  //       val XyArray  = accum._1
-  //       val XtXArray = accum._2
-  //       if(XyArray.isEmpty) vertex.data // no neighbors
-  //       else {
-  //         val XtX = DenseMatrix.tabulate(latentK,latentK){ (i,j) =>
-  //           (if(i < j) XtXArray(i + (j+1)*j/2) else XtXArray(i + (j+1)*j/2)) +
-  //           (if(i == j) lambda else 1.0F) //regularization
-  //         }
-  //         val Xy = DenseMatrix.create(latentK,1,XyArray)
-  //         val w = XtX \ Xy
-  //         w.data
-  //       }
-  //     },
-  //     (me_id, edge) => true,
-  //     numIter,
-  //     gatherEdges = EdgeDirection.Both,
-  //     scatterEdges = EdgeDirection.Both,
-  //     vertex => vertex.id < maxUser).vertices
-  // }
-
-  // def main(args: Array[String]) = {
-  //   val host = args(0)
-  //   val taskType = args(1)
-  //   val fname = args(2)
-  //   val options =  args.drop(3).map { arg =>
-  //     arg.dropWhile(_ == '-').split('=') match {
-  //       case Array(opt, v) => (opt -> v)
-  //       case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
-  //     }
-  //   }
-
-  //   System.setProperty("spark.serializer", "spark.KryoSerializer")
-  //   //System.setProperty("spark.shuffle.compress", "false")
-  //   System.setProperty("spark.kryo.registrator", "spark.graph.GraphKryoRegistrator")
-
-  //   taskType match {
-  //     case "pagerank" => {
-
-  //       var numIter = Int.MaxValue
-  //       var isDynamic = false
-  //       var tol:Double = 0.001
-  //       var outFname = ""
-  //       var numVPart = 4
-  //       var numEPart = 4
-
-  //       options.foreach{
-  //         case ("numIter", v) => numIter = v.toInt
-  //         case ("dynamic", v) => isDynamic = v.toBoolean
-  //         case ("tol", v) => tol = v.toDouble
-  //         case ("output", v) => outFname = v
-  //         case ("numVPart", v) => numVPart = v.toInt
-  //         case ("numEPart", v) => numEPart = v.toInt
-  //         case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-  //       }
-
-  //       if(!isDynamic && numIter == Int.MaxValue) {
-  //         println("Set number of iterations!")
-  //         sys.exit(1)
-  //       }
-  //       println("======================================")
-  //       println("|             PageRank               |")
-  //       println("--------------------------------------")
-  //       println(" Using parameters:")
-  //       println(" \tDynamic:  " + isDynamic)
-  //       if(isDynamic) println(" \t  |-> Tolerance: " + tol)
-  //       println(" \tNumIter:  " + numIter)
-  //       println("======================================")
-
-  //       val sc = new SparkContext(host, "PageRank(" + fname + ")")
-
-  //       val graph = GraphLoader.textFile(sc, fname, a => 1.0).withPartitioner(numVPart, numEPart).cache()
-
-  //       val startTime = System.currentTimeMillis
-  //       logInfo("GRAPHX: starting tasks")
-  //       logInfo("GRAPHX: Number of vertices " + graph.vertices.count)
-  //       logInfo("GRAPHX: Number of edges " + graph.edges.count)
-
-  //       val pr = Analytics.pagerank(graph, numIter)
-  //       // val pr = if(isDynamic) Analytics.dynamicPagerank(graph, tol, numIter)
-  //       //   else  Analytics.pagerank(graph, numIter)
-  //       logInfo("GRAPHX: Total rank: " + pr.vertices.map{ case Vertex(id,r) => r }.reduce(_+_) )
-  //       if (!outFname.isEmpty) {
-  //         println("Saving pageranks of pages to " + outFname)
-  //         pr.vertices.map{case Vertex(id, r) => id + "\t" + r}.saveAsTextFile(outFname)
-  //       }
-  //       logInfo("GRAPHX: Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
-  //       sc.stop()
-  //     }
-
-  //    case "cc" => {
-
-  //       var numIter = Int.MaxValue
-  //       var isDynamic = false
-
-  //       options.foreach{
-  //         case ("numIter", v) => numIter = v.toInt
-  //         case ("dynamic", v) => isDynamic = v.toBoolean
-  //         case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-  //       }
-
-  //       if(!isDynamic && numIter == Int.MaxValue) {
-  //         println("Set number of iterations!")
-  //         sys.exit(1)
-  //       }
-  //       println("======================================")
-  //       println("|      Connected Components          |")
-  //       println("--------------------------------------")
-  //       println(" Using parameters:")
-  //       println(" \tDynamic:  " + isDynamic)
-  //       println(" \tNumIter:  " + numIter)
-  //       println("======================================")
-
-  //       val sc = new SparkContext(host, "ConnectedComponents(" + fname + ")")
-  //       val graph = GraphLoader.textFile(sc, fname, a => 1.0)
-  //       val cc = Analytics.connectedComponents(graph, numIter)
-  //       // val cc = if(isDynamic) Analytics.dynamicConnectedComponents(graph, numIter)
-  //       //   else  Analytics.connectedComponents(graph, numIter)
-  //       println("Components: " + cc.vertices.map(_.data).distinct())
-
-  //       sc.stop()
-  //     }
-
-  //    case "shortestpath" => {
-
-  //       var numIter = Int.MaxValue
-  //       var isDynamic = true
-  //       var sources: List[Int] = List.empty
-
-  //       options.foreach{
-  //         case ("numIter", v) => numIter = v.toInt
-  //         case ("dynamic", v) => isDynamic = v.toBoolean
-  //         case ("source", v) => sources ++= List(v.toInt)
-  //         case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-  //       }
-
-
-  //       if(!isDynamic && numIter == Int.MaxValue) {
-  //         println("Set number of iterations!")
-  //         sys.exit(1)
-  //       }
-
-  //       if(sources.isEmpty) {
-  //         println("No sources provided!")
-  //         sys.exit(1)
-  //       }
-
-  //       println("======================================")
-  //       println("|          Shortest Path             |")
-  //       println("--------------------------------------")
-  //       println(" Using parameters:")
-  //       println(" \tDynamic:  " + isDynamic)
-  //       println(" \tNumIter:  " + numIter)
-  //       println(" \tSources:  [" + sources.mkString(", ") + "]")
-  //       println("======================================")
-
-  //       val sc = new SparkContext(host, "ShortestPath(" + fname + ")")
-  //       val graph = GraphLoader.textFile(sc, fname, a => (if(a.isEmpty) 1.0 else a(0).toDouble ) )
-  //       val sp = Analytics.shortestPath(graph, sources, numIter)
-  //       // val cc = if(isDynamic) Analytics.dynamicShortestPath(graph, sources, numIter)
-  //       //   else  Analytics.shortestPath(graph, sources, numIter)
-  //       println("Longest Path: " + sp.vertices.map(_.data).reduce(math.max(_,_)))
-
-  //       sc.stop()
-  //     }
-
-
-  //  case "als" => {
-
-  //    var numIter = 5
-  //    var lambda = 0.01
-  //    var latentK = 10
-  //    var usersFname = "usersFactors.tsv"
-  //    var moviesFname = "moviesFname.tsv"
-  //    var numVPart = 4
-  //    var numEPart = 4
-
-  //    options.foreach{
-  //      case ("numIter", v) => numIter = v.toInt
-  //      case ("lambda", v) => lambda = v.toDouble
-  //      case ("latentK", v) => latentK = v.toInt
-  //      case ("usersFname", v) => usersFname = v
-  //      case ("moviesFname", v) => moviesFname = v
-  //      case ("numVPart", v) => numVPart = v.toInt
-  //      case ("numEPart", v) => numEPart = v.toInt
-  //      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-  //    }
-
-  //    println("======================================")
-  //    println("|       Alternating Least Squares    |")
-  //    println("--------------------------------------")
-  //    println(" Using parameters:")
-  //    println(" \tNumIter:     " + numIter)
-  //    println(" \tLambda:      " + lambda)
-  //    println(" \tLatentK:     " + latentK)
-  //    println(" \tusersFname:  " + usersFname)
-  //    println(" \tmoviesFname: " + moviesFname)
-  //    println("======================================")
-
-  //    val sc = new SparkContext(host, "ALS(" + fname + ")")
-  //    val graph = GraphLoader.textFile(sc, fname, a => a(0).toDouble )
-  //    graph.numVPart = numVPart
-  //    graph.numEPart = numEPart
-
-  //    val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
-  //    val minMovie = graph.edges.map(_._2).reduce(math.min(_,_))
-  //    assert(maxUser < minMovie)
-
-  //    val factors = Analytics.alternatingLeastSquares(graph, latentK, lambda, numIter).cache
-  //    factors.filter(_._1 <= maxUser).map(r => r._1 + "\t" + r._2.mkString("\t"))
-  //      .saveAsTextFile(usersFname)
-  //    factors.filter(_._1 >= minMovie).map(r => r._1 + "\t" + r._2.mkString("\t"))
-  //      .saveAsTextFile(moviesFname)
-
-  //    sc.stop()
-  //  }
-
-
-  //     case _ => {
-  //       println("Invalid task type.")
-  //     }
-  //   }
-  // }
-
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/Edge.scala b/graph/src/main/scala/org/apache/spark/graph/Edge.scala
deleted file mode 100644
index 19c28bea68..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/Edge.scala
+++ /dev/null
@@ -1,50 +0,0 @@
-package org.apache.spark.graph
-
-
-/**
- * A single directed edge consisting of a source id, target id,
- * and the data associated with the Edgee.
- *
- * @tparam ED type of the edge attribute
- */
-case class Edge[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED] (
-  /**
-   * The vertex id of the source vertex
-   */
-  var srcId: VertexID = 0,
-  /**
-   * The vertex id of the target vertex.
-   */
-  var dstId: VertexID = 0,
-  /**
-   * The attribute associated with the edge.
-   */
-  var attr: ED = nullValue[ED]) extends Serializable {
-
-  /**
-   * Given one vertex in the edge return the other vertex.
-   *
-   * @param vid the id one of the two vertices on the edge.
-   * @return the id of the other vertex on the edge.
-   */
-  def otherVertexId(vid: VertexID): VertexID =
-    if (srcId == vid) dstId else { assert(dstId == vid); srcId }
-
-  /**
-   * Return the relative direction of the edge to the corresponding
-   * vertex.
-   *
-   * @param vid the id of one of the two vertices in the edge.
-   * @return the relative direction of the edge to the corresponding
-   * vertex.
-   */
-  def relativeDirection(vid: VertexID): EdgeDirection =
-    if (vid == srcId) EdgeDirection.Out else { assert(vid == dstId); EdgeDirection.In }
-}
-
-object Edge {
-  def lexicographicOrdering[ED] = new Ordering[Edge[ED]] {
-    override def compare(a: Edge[ED], b: Edge[ED]): Int =
-      Ordering[(VertexID, VertexID)].compare((a.srcId, a.dstId), (b.srcId, b.dstId))
-  }
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/EdgeDirection.scala b/graph/src/main/scala/org/apache/spark/graph/EdgeDirection.scala
deleted file mode 100644
index a1468a152b..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/EdgeDirection.scala
+++ /dev/null
@@ -1,36 +0,0 @@
-package org.apache.spark.graph
-
-
-/**
- * The direction of directed edge relative to a vertex used to select
- * the set of adjacent neighbors when running a neighborhood query.
- */
-sealed abstract class EdgeDirection {
-  /**
-   * Reverse the direction of an edge.  An in becomes out, 
-   * out becomes in and both remains both.
-   */
-  def reverse: EdgeDirection = this match {
-    case EdgeDirection.In   => EdgeDirection.Out
-    case EdgeDirection.Out  => EdgeDirection.In
-    case EdgeDirection.Both => EdgeDirection.Both
-  }
-}
-
-
-object EdgeDirection {
-  /**
-   * Edges arriving at a vertex.
-   */
-  case object In extends EdgeDirection
-
-  /**
-   * Edges originating from a vertex
-   */
-  case object Out extends EdgeDirection
-
-  /**
-   * All edges adjacent to a vertex
-   */
-  case object Both extends EdgeDirection
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/EdgeRDD.scala b/graph/src/main/scala/org/apache/spark/graph/EdgeRDD.scala
deleted file mode 100644
index 78821bf568..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/EdgeRDD.scala
+++ /dev/null
@@ -1,73 +0,0 @@
-package org.apache.spark.graph
-
-import scala.reflect.{classTag, ClassTag}
-
-import org.apache.spark.{OneToOneDependency, Partition, Partitioner, TaskContext}
-import org.apache.spark.graph.impl.EdgePartition
-import org.apache.spark.rdd.RDD
-import org.apache.spark.storage.StorageLevel
-
-
-class EdgeRDD[@specialized ED: ClassTag](
-    val partitionsRDD: RDD[(PartitionID, EdgePartition[ED])])
-  extends RDD[Edge[ED]](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
-
-  partitionsRDD.setName("EdgeRDD")
-
-  override protected def getPartitions: Array[Partition] = partitionsRDD.partitions
-
-  /**
-   * If partitionsRDD already has a partitioner, use it. Otherwise assume that the PartitionIDs in
-   * partitionsRDD correspond to the actual partitions and create a new partitioner that allows
-   * co-partitioning with partitionsRDD.
-   */
-  override val partitioner =
-    partitionsRDD.partitioner.orElse(Some(Partitioner.defaultPartitioner(partitionsRDD)))
-
-  override def compute(part: Partition, context: TaskContext): Iterator[Edge[ED]] = {
-    firstParent[(PartitionID, EdgePartition[ED])].iterator(part, context).next._2.iterator
-  }
-
-  override def collect(): Array[Edge[ED]] = this.map(_.copy()).collect()
-
-  /**
-   * Caching a VertexRDD causes the index and values to be cached separately.
-   */
-  override def persist(newLevel: StorageLevel): EdgeRDD[ED] = {
-    partitionsRDD.persist(newLevel)
-    this
-  }
-
-  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
-  override def persist(): EdgeRDD[ED] = persist(StorageLevel.MEMORY_ONLY)
-
-  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
-  override def cache(): EdgeRDD[ED] = persist()
-
-  def mapEdgePartitions[ED2: ClassTag](f: (PartitionID, EdgePartition[ED]) => EdgePartition[ED2])
-    : EdgeRDD[ED2] = {
-//       iter => iter.map { case (pid, ep) => (pid, f(ep)) }
-    new EdgeRDD[ED2](partitionsRDD.mapPartitions({ iter =>
-      val (pid, ep) = iter.next()
-      Iterator(Tuple2(pid, f(pid, ep)))
-    }, preservesPartitioning = true))
-  }
-
-  def innerJoin[ED2: ClassTag, ED3: ClassTag]
-      (other: EdgeRDD[ED2])
-      (f: (VertexID, VertexID, ED, ED2) => ED3): EdgeRDD[ED3] = {
-    val ed2Tag = classTag[ED2]
-    val ed3Tag = classTag[ED3]
-    new EdgeRDD[ED3](partitionsRDD.zipPartitions(other.partitionsRDD, true) {
-      (thisIter, otherIter) =>
-        val (pid, thisEPart) = thisIter.next()
-        val (_, otherEPart) = otherIter.next()
-        Iterator(Tuple2(pid, thisEPart.innerJoin(otherEPart)(f)(ed2Tag, ed3Tag)))
-    })
-  }
-
-  def collectVertexIDs(): RDD[VertexID] = {
-    partitionsRDD.flatMap { case (_, p) => Array.concat(p.srcIds, p.dstIds) }
-  }
-
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/EdgeTriplet.scala b/graph/src/main/scala/org/apache/spark/graph/EdgeTriplet.scala
deleted file mode 100644
index a5103ed3cb..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/EdgeTriplet.scala
+++ /dev/null
@@ -1,63 +0,0 @@
-package org.apache.spark.graph
-
-import org.apache.spark.graph.impl.VertexPartition
-
-/**
- * An edge triplet represents two vertices and edge along with their
- * attributes.
- *
- * @tparam VD the type of the vertex attribute.
- * @tparam ED the type of the edge attribute
- *
- * @todo specialize edge triplet for basic types, though when I last
- * tried specializing I got a warning about inherenting from a type
- * that is not a trait.
- */
-class EdgeTriplet[VD, ED] extends Edge[ED] {
-// class EdgeTriplet[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) VD: ClassTag,
-//                   @specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED: ClassTag] extends Edge[ED] {
-
-
-  /**
-   * The source vertex attribute
-   */
-  var srcAttr: VD = _ //nullValue[VD]
-
-  /**
-   * The destination vertex attribute
-   */
-  var dstAttr: VD = _ //nullValue[VD]
-
-  var srcStale: Boolean = false
-  var dstStale: Boolean = false
-
-  /**
-   * Set the edge properties of this triplet.
-   */
-  protected[spark] def set(other: Edge[ED]): EdgeTriplet[VD,ED] = {
-    srcId = other.srcId
-    dstId = other.dstId
-    attr = other.attr
-    this
-  }
-
-  /**
-   * Given one vertex in the edge return the other vertex.
-   *
-   * @param vid the id one of the two vertices on the edge.
-   * @return the attribute for the other vertex on the edge.
-   */
-  def otherVertexAttr(vid: VertexID): VD =
-    if (srcId == vid) dstAttr else { assert(dstId == vid); srcAttr }
-
-  /**
-   * Get the vertex object for the given vertex in the edge.
-   *
-   * @param vid the id of one of the two vertices on the edge
-   * @return the attr for the vertex with that id.
-   */
-  def vertexAttr(vid: VertexID): VD =
-    if (srcId == vid) srcAttr else { assert(dstId == vid); dstAttr }
-
-  override def toString() = ((srcId, srcAttr), (dstId, dstAttr), attr).toString()
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/Graph.scala b/graph/src/main/scala/org/apache/spark/graph/Graph.scala
deleted file mode 100644
index 86282e607e..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/Graph.scala
+++ /dev/null
@@ -1,437 +0,0 @@
-package org.apache.spark.graph
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.graph.impl._
-import org.apache.spark.rdd.RDD
-import org.apache.spark.storage.StorageLevel
-
-
-/**
- * The Graph abstractly represents a graph with arbitrary objects
- * associated with vertices and edges.  The graph provides basic
- * operations to access and manipulate the data associated with
- * vertices and edges as well as the underlying structure.  Like Spark
- * RDDs, the graph is a functional data-structure in which mutating
- * operations return new graphs.
- *
- * @see GraphOps for additional graph member functions.
- *
- * @note The majority of the graph operations are implemented in
- * `GraphOps`.  All the convenience operations are defined in the
- * `GraphOps` class which may be shared across multiple graph
- * implementations.
- *
- * @tparam VD the vertex attribute type
- * @tparam ED the edge attribute type
- */
-abstract class Graph[VD: ClassTag, ED: ClassTag] {
-
-  /**
-   * Get the vertices and their data.
-   *
-   * @note vertex ids are unique.
-   * @return An RDD containing the vertices in this graph
-   *
-   * @see Vertex for the vertex type.
-   *
-   */
-  val vertices: VertexRDD[VD]
-
-  /**
-   * Get the Edges and their data as an RDD.  The entries in the RDD
-   * contain just the source id and target id along with the edge
-   * data.
-   *
-   * @return An RDD containing the edges in this graph
-   *
-   * @see Edge for the edge type.
-   * @see edgesWithVertices to get an RDD which contains all the edges
-   * along with their vertex data.
-   *
-   */
-  val edges: EdgeRDD[ED]
-
-  /**
-   * Get the edges with the vertex data associated with the adjacent
-   * pair of vertices.
-   *
-   * @return An RDD containing edge triplets.
-   *
-   * @example This operation might be used to evaluate a graph
-   * coloring where we would like to check that both vertices are a
-   * different color.
-   * {{{
-   * type Color = Int
-   * val graph: Graph[Color, Int] = Graph.textFile("hdfs://file.tsv")
-   * val numInvalid = graph.edgesWithVertices()
-   *   .map(e => if (e.src.data == e.dst.data) 1 else 0).sum
-   * }}}
-   *
-   * @see edges() If only the edge data and adjacent vertex ids are
-   * required.
-   *
-   */
-  val triplets: RDD[EdgeTriplet[VD, ED]]
-
-  /**
-   * Cache the vertices and edges associated with this graph.
-   *
-   * @param newLevel the level at which to cache the graph.
-
-   * @return A reference to this graph for convenience.
-   *
-   */
-  def persist(newLevel: StorageLevel = StorageLevel.MEMORY_ONLY): Graph[VD, ED]
-
-  /**
-   * Return a graph that is cached when first created. This is used to
-   * pin a graph in memory enabling multiple queries to reuse the same
-   * construction process.
-   *
-   * @see RDD.cache() for a more detailed explanation of caching.
-   */
-  def cache(): Graph[VD, ED]
-
-  /**
-   * Repartition the edges in the graph according to partitionStrategy.
-   */
-  def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED]
-
-  /**
-   * Compute statistics describing the graph representation.
-   */
-  def statistics: Map[String, Any]
-
-  /**
-   * Construct a new graph where each vertex value has been
-   * transformed by the map function.
-   *
-   * @note This graph is not changed and that the new graph has the
-   * same structure.  As a consequence the underlying index structures
-   * can be reused.
-   *
-   * @param map the function from a vertex object to a new vertex value.
-   *
-   * @tparam VD2 the new vertex data type
-   *
-   * @example We might use this operation to change the vertex values
-   * from one type to another to initialize an algorithm.
-   * {{{
-   * val rawGraph: Graph[(), ()] = Graph.textFile("hdfs://file")
-   * val root = 42
-   * var bfsGraph = rawGraph
-   *   .mapVertices[Int]((vid, data) => if (vid == root) 0 else Math.MaxValue)
-   * }}}
-   *
-   */
-  def mapVertices[VD2: ClassTag](map: (VertexID, VD) => VD2): Graph[VD2, ED]
-
-  /**
-   * Construct a new graph where the value of each edge is
-   * transformed by the map operation.  This function is not passed
-   * the vertex value for the vertices adjacent to the edge.  If
-   * vertex values are desired use the mapTriplets function.
-   *
-   * @note This graph is not changed and that the new graph has the
-   * same structure.  As a consequence the underlying index structures
-   * can be reused.
-   *
-   * @param map the function from an edge object to a new edge value.
-   *
-   * @tparam ED2 the new edge data type
-   *
-   * @example This function might be used to initialize edge
-   * attributes.
-   *
-   */
-  def mapEdges[ED2: ClassTag](map: Edge[ED] => ED2): Graph[VD, ED2] = {
-    mapEdges((pid, iter) => iter.map(map))
-  }
-
-  /**
-   * Construct a new graph transforming the value of each edge using
-   * the user defined iterator transform.  The iterator transform is
-   * given an iterator over edge triplets within a logical partition
-   * and should yield a new iterator over the new values of each edge
-   * in the order in which they are provided to the iterator transform
-   * If adjacent vertex values are not required, consider using the
-   * mapEdges function instead.
-   *
-   * @note This that this does not change the structure of the
-   * graph or modify the values of this graph.  As a consequence
-   * the underlying index structures can be reused.
-   *
-   * @param map the function which takes a partition id and an iterator
-   * over all the edges in the partition and must return an iterator over
-   * the new values for each edge in the order of the input iterator.
-   *
-   * @tparam ED2 the new edge data type
-   *
-   */
-  def mapEdges[ED2: ClassTag](
-      map: (PartitionID, Iterator[Edge[ED]]) => Iterator[ED2]): Graph[VD, ED2]
-
-  /**
-   * Construct a new graph where the value of each edge is
-   * transformed by the map operation.  This function passes vertex
-   * values for the adjacent vertices to the map function.  If
-   * adjacent vertex values are not required, consider using the
-   * mapEdges function instead.
-   *
-   * @note This that this does not change the structure of the
-   * graph or modify the values of this graph.  As a consequence
-   * the underlying index structures can be reused.
-   *
-   * @param map the function from an edge object to a new edge value.
-   *
-   * @tparam ED2 the new edge data type
-   *
-   * @example This function might be used to initialize edge
-   * attributes based on the attributes associated with each vertex.
-   * {{{
-   * val rawGraph: Graph[Int, Int] = someLoadFunction()
-   * val graph = rawGraph.mapTriplets[Int]( edge =>
-   *   edge.src.data - edge.dst.data)
-   * }}}
-   *
-   */
-  def mapTriplets[ED2: ClassTag](map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2] = {
-    mapTriplets((pid, iter) => iter.map(map))
-  }
-
-  /**
-   * Construct a new graph transforming the value of each edge using
-   * the user defined iterator transform.  The iterator transform is
-   * given an iterator over edge triplets within a logical partition
-   * and should yield a new iterator over the new values of each edge
-   * in the order in which they are provided to the iterator transform
-   * If adjacent vertex values are not required, consider using the
-   * mapEdges function instead.
-   *
-   * @note This that this does not change the structure of the
-   * graph or modify the values of this graph.  As a consequence
-   * the underlying index structures can be reused.
-   *
-   * @param map the function which takes a partition id and an iterator
-   * over all the edges in the partition and must return an iterator over
-   * the new values for each edge in the order of the input iterator.
-   *
-   * @tparam ED2 the new edge data type
-   *
-   */
-  def mapTriplets[ED2: ClassTag](
-      map: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2]):
-    Graph[VD, ED2]
-
-  /**
-   * Construct a new graph with all the edges reversed.  If this graph
-   * contains an edge from a to b then the returned graph contains an
-   * edge from b to a.
-   */
-  def reverse: Graph[VD, ED]
-
-  /**
-   * This function takes a vertex and edge predicate and constructs
-   * the subgraph that consists of vertices and edges that satisfy the
-   * predict.  The resulting graph contains the vertices and edges
-   * that satisfy:
-   *
-   * {{{
-   * V' = {v : for all v in V where vpred(v)}
-   * E' = {(u,v): for all (u,v) in E where epred((u,v)) && vpred(u) && vpred(v)}
-   * }}}
-   *
-   * @param epred the edge predicate which takes a triplet and
-   * evaluates to true if the edge is to remain in the subgraph.  Note
-   * that only edges in which both vertices satisfy the vertex
-   * predicate are considered.
-   *
-   * @param vpred the vertex predicate which takes a vertex object and
-   * evaluates to true if the vertex is to be included in the subgraph
-   *
-   * @return the subgraph containing only the vertices and edges that
-   * satisfy the predicates.
-   */
-  def subgraph(epred: EdgeTriplet[VD,ED] => Boolean = (x => true),
-    vpred: (VertexID, VD) => Boolean = ((v,d) => true) ): Graph[VD, ED]
-
-  /**
-   * Subgraph of this graph with only vertices and edges from the other graph.
-   * @param other the graph to project this graph onto
-   * @return a graph with vertices and edges that exists in both the current graph and other,
-   * with vertex and edge data from the current graph.
-   */
-  def mask[VD2: ClassTag, ED2: ClassTag](other: Graph[VD2, ED2]): Graph[VD, ED]
-
-  /**
-   * This function merges multiple edges between two vertices into a single Edge. For correct
-   * results, the graph must have been partitioned using partitionBy.
-   *
-   * @tparam ED2 the type of the resulting edge data after grouping.
-   *
-   * @param f the user supplied commutative associative function to merge edge attributes for
-   * duplicate edges.
-   *
-   * @return Graph[VD,ED2] The resulting graph with a single Edge for each source, dest vertex pair.
-   */
-  def groupEdges(merge: (ED, ED) => ED): Graph[VD,ED]
-
-  /**
-   * The mapReduceTriplets function is used to compute statistics
-   * about the neighboring edges and vertices of each vertex.  The
-   * user supplied `mapFunc` function is invoked on each edge of the
-   * graph generating 0 or more "messages" to be "sent" to either
-   * vertex in the edge.  The `reduceFunc` is then used to combine the
-   * output of the map phase destined to each vertex.
-   *
-   * @tparam A the type of "message" to be sent to each vertex
-   *
-   * @param mapFunc the user defined map function which returns 0 or
-   * more messages to neighboring vertices.
-   *
-   * @param reduceFunc the user defined reduce function which should
-   * be commutative and assosciative and is used to combine the output
-   * of the map phase.
-   *
-   * @param activeSet optionally, a set of "active" vertices and a direction of edges to consider
-   * when running `mapFunc`. For example, if the direction is Out, `mapFunc` will only be run on
-   * edges originating from vertices in the active set. `activeSet` must have the same index as the
-   * graph's vertices.
-   *
-   * @example We can use this function to compute the inDegree of each
-   * vertex
-   * {{{
-   * val rawGraph: Graph[(),()] = Graph.textFile("twittergraph")
-   * val inDeg: RDD[(VertexID, Int)] =
-   *   mapReduceTriplets[Int](et => Array((et.dst.id, 1)), _ + _)
-   * }}}
-   *
-   * @note By expressing computation at the edge level we achieve
-   * maximum parallelism.  This is one of the core functions in the
-   * Graph API in that enables neighborhood level computation. For
-   * example this function can be used to count neighbors satisfying a
-   * predicate or implement PageRank.
-   *
-   */
-  def mapReduceTriplets[A: ClassTag](
-      mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexID, A)],
-      reduceFunc: (A, A) => A,
-      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None)
-    : VertexRDD[A]
-
-  /**
-   * Join the vertices with an RDD and then apply a function from the
-   * the vertex and RDD entry to a new vertex value and type.  The
-   * input table should contain at most one entry for each vertex.  If
-   * no entry is provided the map function is invoked passing none.
-   *
-   * @tparam U the type of entry in the table of updates
-   * @tparam VD2 the new vertex value type
-   *
-   * @param table the table to join with the vertices in the graph.
-   * The table should contain at most one entry for each vertex.
-   *
-   * @param mapFunc the function used to compute the new vertex
-   * values.  The map function is invoked for all vertices, even those
-   * that do not have a corresponding entry in the table.
-   *
-   * @example This function is used to update the vertices with new
-   * values based on external data.  For example we could add the out
-   * degree to each vertex record
-   *
-   * {{{
-   * val rawGraph: Graph[(),()] = Graph.textFile("webgraph")
-   * val outDeg: RDD[(VertexID, Int)] = rawGraph.outDegrees()
-   * val graph = rawGraph.outerJoinVertices(outDeg) {
-   *   (vid, data, optDeg) => optDeg.getOrElse(0)
-   * }
-   * }}}
-   *
-   */
-  def outerJoinVertices[U: ClassTag, VD2: ClassTag](table: RDD[(VertexID, U)])
-      (mapFunc: (VertexID, VD, Option[U]) => VD2)
-    : Graph[VD2, ED]
-
-  // Save a copy of the GraphOps object so there is always one unique GraphOps object
-  // for a given Graph object, and thus the lazy vals in GraphOps would work as intended.
-  val ops = new GraphOps(this)
-} // end of Graph
-
-
-
-
-/**
- * The Graph object contains a collection of routines used to construct graphs from RDDs.
- */
-object Graph {
-
-  /**
-   * Construct a graph from a collection of edges encoded as vertex id pairs.
-   *
-   * @param rawEdges a collection of edges in (src,dst) form.
-   * @param uniqueEdges if multiple identical edges are found they are combined and the edge
-   * attribute is set to the sum.  Otherwise duplicate edges are treated as separate. To enable
-   * uniqueEdges, a [[PartitionStrategy]] must be provided.
-   *
-   * @return a graph with edge attributes containing either the count of duplicate edges or 1
-   * (if `uniqueEdges=None`) and vertex attributes containing the total degree of each vertex.
-   */
-  def fromEdgeTuples[VD: ClassTag](
-      rawEdges: RDD[(VertexID, VertexID)],
-      defaultValue: VD,
-      uniqueEdges: Option[PartitionStrategy] = None): Graph[VD, Int] = {
-    val edges = rawEdges.map(p => Edge(p._1, p._2, 1))
-    val graph = GraphImpl(edges, defaultValue)
-    uniqueEdges match {
-      case Some(p) => graph.partitionBy(p).groupEdges((a, b) => a + b)
-      case None => graph
-    }
-  }
-
-  /**
-   * Construct a graph from a collection of edges.
-   *
-   * @param edges the RDD containing the set of edges in the graph
-   * @param defaultValue the default vertex attribute to use for each vertex
-   *
-   * @return a graph with edge attributes described by `edges` and vertices
-   *         given by all vertices in `edges` with value `defaultValue`
-   */
-  def fromEdges[VD: ClassTag, ED: ClassTag](
-      edges: RDD[Edge[ED]],
-      defaultValue: VD): Graph[VD, ED] = {
-    GraphImpl(edges, defaultValue)
-  }
-
-  /**
-   * Construct a graph from a collection attributed vertices and
-   * edges.  Duplicate vertices are picked arbitrarily and
-   * vertices found in the edge collection but not in the input
-   * vertices are the default attribute.
-   *
-   * @tparam VD the vertex attribute type
-   * @tparam ED the edge attribute type
-   * @param vertices the "set" of vertices and their attributes
-   * @param edges the collection of edges in the graph
-   * @param defaultVertexAttr the default vertex attribute to use for
-   * vertices that are mentioned in edges but not in vertices
-   * @param partitionStrategy the partition strategy to use when
-   * partitioning the edges.
-   */
-  def apply[VD: ClassTag, ED: ClassTag](
-      vertices: RDD[(VertexID, VD)],
-      edges: RDD[Edge[ED]],
-      defaultVertexAttr: VD = null.asInstanceOf[VD]): Graph[VD, ED] = {
-    GraphImpl(vertices, edges, defaultVertexAttr)
-  }
-
-  /**
-   * The implicit graphToGraphOPs function extracts the GraphOps member from a graph.
-   *
-   * To improve modularity the Graph type only contains a small set of basic operations.  All the
-   * convenience operations are defined in the GraphOps class which may be shared across multiple
-   * graph implementations.
-   */
-  implicit def graphToGraphOps[VD: ClassTag, ED: ClassTag](g: Graph[VD, ED]) = g.ops
-} // end of Graph object
diff --git a/graph/src/main/scala/org/apache/spark/graph/GraphKryoRegistrator.scala b/graph/src/main/scala/org/apache/spark/graph/GraphKryoRegistrator.scala
deleted file mode 100644
index 296f3848f1..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/GraphKryoRegistrator.scala
+++ /dev/null
@@ -1,28 +0,0 @@
-package org.apache.spark.graph
-
-import com.esotericsoftware.kryo.Kryo
-
-import org.apache.spark.graph.impl._
-import org.apache.spark.serializer.KryoRegistrator
-import org.apache.spark.util.collection.BitSet
-import org.apache.spark.util.BoundedPriorityQueue
-
-
-class GraphKryoRegistrator extends KryoRegistrator {
-
-  def registerClasses(kryo: Kryo) {
-    kryo.register(classOf[Edge[Object]])
-    kryo.register(classOf[MessageToPartition[Object]])
-    kryo.register(classOf[VertexBroadcastMsg[Object]])
-    kryo.register(classOf[(VertexID, Object)])
-    kryo.register(classOf[EdgePartition[Object]])
-    kryo.register(classOf[BitSet])
-    kryo.register(classOf[VertexIdToIndexMap])
-    kryo.register(classOf[VertexAttributeBlock[Object]])
-    kryo.register(classOf[PartitionStrategy])
-    kryo.register(classOf[BoundedPriorityQueue[Object]])
-
-    // This avoids a large number of hash table lookups.
-    kryo.setReferences(false)
-  }
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/GraphLab.scala b/graph/src/main/scala/org/apache/spark/graph/GraphLab.scala
deleted file mode 100644
index 22f4854019..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/GraphLab.scala
+++ /dev/null
@@ -1,134 +0,0 @@
-package org.apache.spark.graph
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.Logging
-import scala.collection.JavaConversions._
-import org.apache.spark.rdd.RDD
-
-/**
- * This object implements the GraphLab gather-apply-scatter api.
- */
-object GraphLab extends Logging {
-
-  /**
-   * Execute the GraphLab Gather-Apply-Scatter API
-   *
-   * @todo finish documenting GraphLab Gather-Apply-Scatter API
-   *
-   * @param graph The graph on which to execute the GraphLab API
-   * @param gatherFunc The gather function is executed on each edge triplet
-   *                   adjacent to a vertex and returns an accumulator which
-   *                   is then merged using the merge function.
-   * @param mergeFunc An accumulative associative operation on the result of
-   *                  the gather type.
-   * @param applyFunc Takes a vertex and the final result of the merge operations
-   *                  on the adjacent edges and returns a new vertex value.
-   * @param scatterFunc Executed after the apply function the scatter function takes
-   *                    a triplet and signals whether the neighboring vertex program
-   *                    must be recomputed.
-   * @param startVertices predicate to determine which vertices to start the computation on.
-   *                      these will be the active vertices in the first iteration.
-   * @param numIter The maximum number of iterations to run.
-   * @param gatherDirection The direction of edges to consider during the gather phase
-   * @param scatterDirection The direction of edges to consider during the scatter phase
-   *
-   * @tparam VD The graph vertex attribute type
-   * @tparam ED The graph edge attribute type
-   * @tparam A The type accumulated during the gather phase
-   * @return the resulting graph after the algorithm converges
-   */
-  def apply[VD: ClassTag, ED: ClassTag, A: ClassTag]
-    (graph: Graph[VD, ED], numIter: Int,
-     gatherDirection: EdgeDirection = EdgeDirection.In,
-     scatterDirection: EdgeDirection = EdgeDirection.Out)
-    (gatherFunc: (VertexID, EdgeTriplet[VD, ED]) => A,
-     mergeFunc: (A, A) => A,
-     applyFunc: (VertexID, VD, Option[A]) => VD,
-     scatterFunc: (VertexID, EdgeTriplet[VD, ED]) => Boolean,
-     startVertices: (VertexID, VD) => Boolean = (vid: VertexID, data: VD) => true)
-    : Graph[VD, ED] = {
-
-
-    // Add an active attribute to all vertices to track convergence.
-    var activeGraph: Graph[(Boolean, VD), ED] = graph.mapVertices {
-      case (id, data) => (startVertices(id, data), data)
-    }.cache()
-
-    // The gather function wrapper strips the active attribute and
-    // only invokes the gather function on active vertices
-    def gather(vid: VertexID, e: EdgeTriplet[(Boolean, VD), ED]): Option[A] = {
-      if (e.vertexAttr(vid)._1) {
-        val edgeTriplet = new EdgeTriplet[VD,ED]
-        edgeTriplet.set(e)
-        edgeTriplet.srcAttr = e.srcAttr._2
-        edgeTriplet.dstAttr = e.dstAttr._2
-        Some(gatherFunc(vid, edgeTriplet))
-      } else {
-        None
-      }
-    }
-
-    // The apply function wrapper strips the vertex of the active attribute
-    // and only invokes the apply function on active vertices
-    def apply(vid: VertexID, data: (Boolean, VD), accum: Option[A]): (Boolean, VD) = {
-      val (active, vData) = data
-      if (active) (true, applyFunc(vid, vData, accum))
-      else (false, vData)
-    }
-
-    // The scatter function wrapper strips the vertex of the active attribute
-    // and only invokes the scatter function on active vertices
-    def scatter(rawVertexID: VertexID, e: EdgeTriplet[(Boolean, VD), ED]): Option[Boolean] = {
-      val vid = e.otherVertexId(rawVertexID)
-      if (e.vertexAttr(vid)._1) {
-        val edgeTriplet = new EdgeTriplet[VD,ED]
-        edgeTriplet.set(e)
-        edgeTriplet.srcAttr = e.srcAttr._2
-        edgeTriplet.dstAttr = e.dstAttr._2
-        Some(scatterFunc(vid, edgeTriplet))
-      } else {
-        None
-      }
-    }
-
-    // Used to set the active status of vertices for the next round
-    def applyActive(
-        vid: VertexID, data: (Boolean, VD), newActiveOpt: Option[Boolean]): (Boolean, VD) = {
-      val (prevActive, vData) = data
-      (newActiveOpt.getOrElse(false), vData)
-    }
-
-    // Main Loop ---------------------------------------------------------------------
-    var i = 0
-    var numActive = activeGraph.numVertices
-    while (i < numIter && numActive > 0) {
-
-      // Gather
-      val gathered: RDD[(VertexID, A)] =
-        activeGraph.aggregateNeighbors(gather, mergeFunc, gatherDirection)
-
-      // Apply
-      activeGraph = activeGraph.outerJoinVertices(gathered)(apply).cache()
-
-
-
-      // Scatter is basically a gather in the opposite direction so we reverse the edge direction
-      // activeGraph: Graph[(Boolean, VD), ED]
-      val scattered: RDD[(VertexID, Boolean)] =
-        activeGraph.aggregateNeighbors(scatter, _ || _, scatterDirection.reverse)
-
-      activeGraph = activeGraph.outerJoinVertices(scattered)(applyActive).cache()
-
-      // Calculate the number of active vertices
-      numActive = activeGraph.vertices.map{
-        case (vid, data) => if (data._1) 1 else 0
-        }.reduce(_ + _)
-      logInfo("Number active vertices: " + numActive)
-      i += 1
-    }
-
-    // Remove the active attribute from the vertex data before returning the graph
-    activeGraph.mapVertices{case (vid, data) => data._2 }
-  }
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/GraphLoader.scala b/graph/src/main/scala/org/apache/spark/graph/GraphLoader.scala
deleted file mode 100644
index 7daac4fcc5..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/GraphLoader.scala
+++ /dev/null
@@ -1,113 +0,0 @@
-package org.apache.spark.graph
-
-import java.util.{Arrays => JArrays}
-import scala.reflect.ClassTag
-
-import org.apache.spark.graph.impl.EdgePartitionBuilder
-import org.apache.spark.{Logging, SparkContext}
-import org.apache.spark.graph.impl.{EdgePartition, GraphImpl}
-import org.apache.spark.util.collection.PrimitiveVector
-
-
-object GraphLoader extends Logging {
-
-  /**
-   * Load an edge list from file initializing the Graph
-   *
-   * @tparam ED the type of the edge data of the resulting Graph
-   *
-   * @param sc the SparkContext used to construct RDDs
-   * @param path the path to the text file containing the edge list
-   * @param edgeParser a function that takes an array of strings and
-   * returns an ED object
-   * @param minEdgePartitions the number of partitions for the
-   * the Edge RDD
-   *
-   */
-  def textFile[ED: ClassTag](
-      sc: SparkContext,
-      path: String,
-      edgeParser: Array[String] => ED,
-      minEdgePartitions: Int = 1):
-    Graph[Int, ED] = {
-    // Parse the edge data table
-    val edges = sc.textFile(path, minEdgePartitions).mapPartitions( iter =>
-      iter.filter(line => !line.isEmpty && line(0) != '#').map { line =>
-        val lineArray = line.split("\\s+")
-        if(lineArray.length < 2) {
-          println("Invalid line: " + line)
-          assert(false)
-        }
-        val source = lineArray(0).trim.toLong
-        val target = lineArray(1).trim.toLong
-        val tail = lineArray.drop(2)
-        val edata = edgeParser(tail)
-        Edge(source, target, edata)
-      })
-    val defaultVertexAttr = 1
-    Graph.fromEdges(edges, defaultVertexAttr)
-  }
-
-  /**
-   * Load a graph from an edge list formatted file with each line containing
-   * two integers: a source Id and a target Id.
-   *
-   * @example A file in the following format:
-   * {{{
-   * # Comment Line
-   * # Source Id <\t> Target Id
-   * 1   -5
-   * 1    2
-   * 2    7
-   * 1    8
-   * }}}
-   *
-   * If desired the edges can be automatically oriented in the positive
-   * direction (source Id < target Id) by setting `canonicalOrientation` to
-   * true
-   *
-   * @param sc
-   * @param path the path to the file (e.g., /Home/data/file or hdfs://file)
-   * @param canonicalOrientation whether to orient edges in the positive
-   *        direction.
-   * @param minEdgePartitions the number of partitions for the
-   *        the Edge RDD
-   * @tparam ED
-   * @return
-   */
-  def edgeListFile(
-      sc: SparkContext,
-      path: String,
-      canonicalOrientation: Boolean = false,
-      minEdgePartitions: Int = 1):
-    Graph[Int, Int] = {
-    val startTime = System.currentTimeMillis
-
-    // Parse the edge data table directly into edge partitions
-    val edges = sc.textFile(path, minEdgePartitions).mapPartitionsWithIndex { (pid, iter) =>
-      val builder = new EdgePartitionBuilder[Int]
-      iter.foreach { line =>
-        if (!line.isEmpty && line(0) != '#') {
-          val lineArray = line.split("\\s+")
-          if (lineArray.length < 2) {
-            logWarning("Invalid line: " + line)
-          }
-          val srcId = lineArray(0).toLong
-          val dstId = lineArray(1).toLong
-          if (canonicalOrientation && dstId > srcId) {
-            builder.add(dstId, srcId, 1)
-          } else {
-            builder.add(srcId, dstId, 1)
-          }
-        }
-      }
-      Iterator((pid, builder.toEdgePartition))
-    }.cache()
-    edges.count()
-
-    logInfo("It took %d ms to load the edges".format(System.currentTimeMillis - startTime))
-
-    GraphImpl.fromEdgePartitions(edges, defaultVertexAttr = 1)
-  } // end of edgeListFile
-
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/GraphOps.scala b/graph/src/main/scala/org/apache/spark/graph/GraphOps.scala
deleted file mode 100644
index e41287c1ed..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/GraphOps.scala
+++ /dev/null
@@ -1,277 +0,0 @@
-package org.apache.spark.graph
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.rdd.RDD
-import org.apache.spark.SparkContext._
-import org.apache.spark.SparkException
-
-
-/**
- * `GraphOps` contains additional functionality (syntatic sugar) for
- * the graph type and is implicitly constructed for each Graph object.
- * All operations in `GraphOps` are expressed in terms of the
- * efficient GraphX API.
- *
- * @tparam VD the vertex attribute type
- * @tparam ED the edge attribute type
- *
- */
-class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) {
-
-  /**
-   * Compute the number of edges in the graph.
-   */
-  lazy val numEdges: Long = graph.edges.count()
-
-
-  /**
-   * Compute the number of vertices in the graph.
-   */
-  lazy val numVertices: Long = graph.vertices.count()
-
-
-  /**
-   * Compute the in-degree of each vertex in the Graph returning an
-   * RDD.
-   * @note Vertices with no in edges are not returned in the resulting RDD.
-   */
-  lazy val inDegrees: VertexRDD[Int] = degreesRDD(EdgeDirection.In)
-
-
-  /**
-   * Compute the out-degree of each vertex in the Graph returning an RDD.
-   * @note Vertices with no out edges are not returned in the resulting RDD.
-   */
-  lazy val outDegrees: VertexRDD[Int] = degreesRDD(EdgeDirection.Out)
-
-
-  /**
-   * Compute the degrees of each vertex in the Graph returning an RDD.
-   * @note Vertices with no edges are not returned in the resulting
-   * RDD.
-   */
-  lazy val degrees: VertexRDD[Int] = degreesRDD(EdgeDirection.Both)
-
-
-  /**
-   * Compute the neighboring vertex degrees.
-   *
-   * @param edgeDirection the direction along which to collect
-   * neighboring vertex attributes.
-   */
-  private def degreesRDD(edgeDirection: EdgeDirection): VertexRDD[Int] = {
-    if (edgeDirection == EdgeDirection.In) {
-      graph.mapReduceTriplets(et => Iterator((et.dstId,1)), _ + _)
-    } else if (edgeDirection == EdgeDirection.Out) {
-      graph.mapReduceTriplets(et => Iterator((et.srcId,1)), _ + _)
-    } else { // EdgeDirection.both
-      graph.mapReduceTriplets(et => Iterator((et.srcId,1), (et.dstId,1)), _ + _)
-    }
-  }
-
-
-  /**
-   * This function is used to compute a statistic for the neighborhood
-   * of each vertex and returns a value for all vertices (including
-   * those without neighbors).
-   *
-   * @note Because the a default value is provided all vertices will
-   * have a corresponding entry in the returned RDD.
-   *
-   * @param mapFunc the function applied to each edge adjacent to each
-   * vertex.  The mapFunc can optionally return None in which case it
-   * does not contribute to the final sum.
-   * @param reduceFunc the function used to merge the results of each
-   * map operation.
-   * @param default the default value to use for each vertex if it has
-   * no neighbors or the map function repeatedly evaluates to none
-   * @param direction the direction of edges to consider (e.g., In,
-   * Out, Both).
-   * @tparam VD2 The returned type of the aggregation operation.
-   *
-   * @return A Spark.RDD containing tuples of vertex identifiers and
-   * their resulting value.  There will be exactly one entry for ever
-   * vertex in the original graph.
-   *
-   * @example We can use this function to compute the average follower
-   * age for each user
-   *
-   * {{{
-   * val graph: Graph[Int,Int] = loadGraph()
-   * val averageFollowerAge: RDD[(Int, Int)] =
-   *   graph.aggregateNeighbors[(Int,Double)](
-   *     (vid, edge) => (edge.otherVertex(vid).data, 1),
-   *     (a, b) => (a._1 + b._1, a._2 + b._2),
-   *     -1,
-   *     EdgeDirection.In)
-   *     .mapValues{ case (sum,followers) => sum.toDouble / followers}
-   * }}}
-   *
-   * @todo Should this return a graph with the new vertex values?
-   *
-   */
-  def aggregateNeighbors[A: ClassTag](
-      mapFunc: (VertexID, EdgeTriplet[VD, ED]) => Option[A],
-      reduceFunc: (A, A) => A,
-      dir: EdgeDirection)
-    : VertexRDD[A] = {
-
-    // Define a new map function over edge triplets
-    val mf = (et: EdgeTriplet[VD,ED]) => {
-      // Compute the message to the dst vertex
-      val dst =
-        if (dir == EdgeDirection.In || dir == EdgeDirection.Both) {
-          mapFunc(et.dstId, et)
-        } else { Option.empty[A] }
-      // Compute the message to the source vertex
-      val src =
-        if (dir == EdgeDirection.Out || dir == EdgeDirection.Both) {
-          mapFunc(et.srcId, et)
-        } else { Option.empty[A] }
-      // construct the return array
-      (src, dst) match {
-        case (None, None) => Iterator.empty
-        case (Some(srcA),None) => Iterator((et.srcId, srcA))
-        case (None, Some(dstA)) => Iterator((et.dstId, dstA))
-        case (Some(srcA), Some(dstA)) => Iterator((et.srcId, srcA), (et.dstId, dstA))
-      }
-    }
-
-    graph.mapReduceTriplets(mf, reduceFunc)
-  } // end of aggregateNeighbors
-
-
-  /**
-   * Return the Ids of the neighboring vertices.
-   *
-   * @param edgeDirection the direction along which to collect
-   * neighboring vertices
-   *
-   * @return the vertex set of neighboring ids for each vertex.
-   */
-  def collectNeighborIds(edgeDirection: EdgeDirection) :
-    VertexRDD[Array[VertexID]] = {
-    val nbrs =
-      if (edgeDirection == EdgeDirection.Both) {
-        graph.mapReduceTriplets[Array[VertexID]](
-          mapFunc = et => Iterator((et.srcId, Array(et.dstId)), (et.dstId, Array(et.srcId))),
-          reduceFunc = _ ++ _
-        )
-      } else if (edgeDirection == EdgeDirection.Out) {
-        graph.mapReduceTriplets[Array[VertexID]](
-          mapFunc = et => Iterator((et.srcId, Array(et.dstId))),
-          reduceFunc = _ ++ _)
-      } else if (edgeDirection == EdgeDirection.In) {
-        graph.mapReduceTriplets[Array[VertexID]](
-          mapFunc = et => Iterator((et.dstId, Array(et.srcId))),
-          reduceFunc = _ ++ _)
-      } else {
-        throw new SparkException("It doesn't make sense to collect neighbor ids without a direction.")
-      }
-    graph.vertices.leftZipJoin(nbrs) { (vid, vdata, nbrsOpt) =>
-      nbrsOpt.getOrElse(Array.empty[VertexID])
-    }
-  } // end of collectNeighborIds
-
-
-  /**
-   * Collect the neighbor vertex attributes for each vertex.
-   *
-   * @note This function could be highly inefficient on power-law
-   * graphs where high degree vertices may force a large ammount of
-   * information to be collected to a single location.
-   *
-   * @param edgeDirection the direction along which to collect
-   * neighboring vertices
-   *
-   * @return the vertex set of neighboring vertex attributes for each
-   * vertex.
-   */
-  def collectNeighbors(edgeDirection: EdgeDirection) :
-    VertexRDD[ Array[(VertexID, VD)] ] = {
-    val nbrs = graph.aggregateNeighbors[Array[(VertexID,VD)]](
-      (vid, edge) =>
-        Some(Array( (edge.otherVertexId(vid), edge.otherVertexAttr(vid)) )),
-      (a, b) => a ++ b,
-      edgeDirection)
-
-    graph.vertices.leftZipJoin(nbrs) { (vid, vdata, nbrsOpt) =>
-      nbrsOpt.getOrElse(Array.empty[(VertexID, VD)])
-    }
-  } // end of collectNeighbor
-
-
-  /**
-   * Join the vertices with an RDD and then apply a function from the
-   * the vertex and RDD entry to a new vertex value.  The input table
-   * should contain at most one entry for each vertex.  If no entry is
-   * provided the map function is skipped and the old value is used.
-   *
-   * @tparam U the type of entry in the table of updates
-   * @param table the table to join with the vertices in the graph.
-   * The table should contain at most one entry for each vertex.
-   * @param mapFunc the function used to compute the new vertex
-   * values.  The map function is invoked only for vertices with a
-   * corresponding entry in the table otherwise the old vertex value
-   * is used.
-   *
-   * @note for small tables this function can be much more efficient
-   * than leftJoinVertices
-   *
-   * @example This function is used to update the vertices with new
-   * values based on external data.  For example we could add the out
-   * degree to each vertex record
-   *
-   * {{{
-   * val rawGraph: Graph[Int,()] = Graph.textFile("webgraph")
-   *   .mapVertices(v => 0)
-   * val outDeg: RDD[(Int, Int)] = rawGraph.outDegrees()
-   * val graph = rawGraph.leftJoinVertices[Int,Int](outDeg,
-   *   (v, deg) => deg )
-   * }}}
-   *
-   */
-  def joinVertices[U: ClassTag](table: RDD[(VertexID, U)])(mapFunc: (VertexID, VD, U) => VD)
-    : Graph[VD, ED] = {
-    val uf = (id: VertexID, data: VD, o: Option[U]) => {
-      o match {
-        case Some(u) => mapFunc(id, data, u)
-        case None => data
-      }
-    }
-    graph.outerJoinVertices(table)(uf)
-  }
-
-  /**
-   * Filter the graph by computing some values to filter on, and applying the predicates.
-   *
-   * @param preprocess a function to compute new vertex and edge data before filtering
-   * @param epred edge pred to filter on after preprocess, see more details under Graph#subgraph
-   * @param vpred vertex pred to filter on after prerocess, see more details under Graph#subgraph
-   * @tparam VD2 vertex type the vpred operates on
-   * @tparam ED2 edge type the epred operates on
-   * @return a subgraph of the orginal graph, with its data unchanged
-   *
-   * @example This function can be used to filter the graph based on some property, without
-   * changing the vertex and edge values in your program. For example, we could remove the vertices
-   * in a graph with 0 outdegree
-   *
-   * {{{
-   * graph.filter(
-   *   graph => {
-   *     val degrees: VertexSetRDD[Int] = graph.outDegrees
-   *     graph.outerJoinVertices(degrees) {(vid, data, deg) => deg.getOrElse(0)}
-   *   },
-   *   vpred = (vid: VertexID, deg:Int) => deg > 0
-   * )
-   * }}}
-   *
-   */
-  def filter[VD2: ClassTag, ED2: ClassTag](
-      preprocess: Graph[VD, ED] => Graph[VD2, ED2],
-      epred: (EdgeTriplet[VD2, ED2]) => Boolean = (x: EdgeTriplet[VD2, ED2]) => true,
-      vpred: (VertexID, VD2) => Boolean = (v:VertexID, d:VD2) => true): Graph[VD, ED] = {
-    graph.mask(preprocess(graph).subgraph(epred, vpred))
-  }
-} // end of GraphOps
diff --git a/graph/src/main/scala/org/apache/spark/graph/PartitionStrategy.scala b/graph/src/main/scala/org/apache/spark/graph/PartitionStrategy.scala
deleted file mode 100644
index bc05fb812c..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/PartitionStrategy.scala
+++ /dev/null
@@ -1,94 +0,0 @@
-package org.apache.spark.graph
-
-
-sealed trait PartitionStrategy extends Serializable {
-  def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID
-}
-
-
-/**
- * This function implements a classic 2D-Partitioning of a sparse matrix.
- * Suppose we have a graph with 11 vertices that we want to partition
- * over 9 machines.  We can use the following sparse matrix representation:
- *
- *       __________________________________
- *  v0   | P0 *     | P1       | P2    *  |
- *  v1   |  ****    |  *       |          |
- *  v2   |  ******* |      **  |  ****    |
- *  v3   |  *****   |  *  *    |       *  |
- *       ----------------------------------
- *  v4   | P3 *     | P4 ***   | P5 **  * |
- *  v5   |  *  *    |  *       |          |
- *  v6   |       *  |      **  |  ****    |
- *  v7   |  * * *   |  *  *    |       *  |
- *       ----------------------------------
- *  v8   | P6   *   | P7    *  | P8  *   *|
- *  v9   |     *    |  *    *  |          |
- *  v10  |       *  |      **  |  *  *    |
- *  v11  | * <-E    |  ***     |       ** |
- *       ----------------------------------
- *
- * The edge denoted by E connects v11 with v1 and is assigned to
- * processor P6.  To get the processor number we divide the matrix
- * into sqrt(numProc) by sqrt(numProc) blocks.  Notice that edges
- * adjacent to v11 can only be in the first colum of
- * blocks (P0, P3, P6) or the last row of blocks (P6, P7, P8).
- * As a consequence we can guarantee that v11 will need to be
- * replicated to at most 2 * sqrt(numProc) machines.
- *
- * Notice that P0 has many edges and as a consequence this
- * partitioning would lead to poor work balance.  To improve
- * balance we first multiply each vertex id by a large prime
- * to effectively shuffle the vertex locations.
- *
- * One of the limitations of this approach is that the number of
- * machines must either be a perfect square.  We partially address
- * this limitation by computing the machine assignment to the next
- * largest perfect square and then mapping back down to the actual
- * number of machines.  Unfortunately, this can also lead to work
- * imbalance and so it is suggested that a perfect square is used.
- *
- *
- */
-case object EdgePartition2D extends PartitionStrategy {
-  override def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID = {
-    val ceilSqrtNumParts: PartitionID = math.ceil(math.sqrt(numParts)).toInt
-    val mixingPrime: VertexID = 1125899906842597L
-    val col: PartitionID = ((math.abs(src) * mixingPrime) % ceilSqrtNumParts).toInt
-    val row: PartitionID = ((math.abs(dst) * mixingPrime) % ceilSqrtNumParts).toInt
-    (col * ceilSqrtNumParts + row) % numParts
-  }
-}
-
-
-case object EdgePartition1D extends PartitionStrategy {
-  override def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID = {
-    val mixingPrime: VertexID = 1125899906842597L
-    (math.abs(src) * mixingPrime).toInt % numParts
-  }
-}
-
-
-/**
- * Assign edges to an aribtrary machine corresponding to a
- * random vertex cut.
- */
-case object RandomVertexCut extends PartitionStrategy {
-  override def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID = {
-    math.abs((src, dst).hashCode()) % numParts
-  }
-}
-
-
-/**
- * Assign edges to an arbitrary machine corresponding to a random vertex cut. This
- * function ensures that edges of opposite direction between the same two vertices
- * will end up on the same partition.
- */
-case object CanonicalRandomVertexCut extends PartitionStrategy {
-  override def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID = {
-    val lower = math.min(src, dst)
-    val higher = math.max(src, dst)
-    math.abs((lower, higher).hashCode()) % numParts
-  }
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/Pregel.scala b/graph/src/main/scala/org/apache/spark/graph/Pregel.scala
deleted file mode 100644
index 3b84e2e5e4..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/Pregel.scala
+++ /dev/null
@@ -1,122 +0,0 @@
-package org.apache.spark.graph
-
-import scala.reflect.ClassTag
-
-
-/**
- * This object implements a Pregel-like bulk-synchronous
- * message-passing API.  However, unlike the original Pregel API the
- * GraphX pregel API factors the sendMessage computation over edges,
- * enables the message sending computation to read both vertex
- * attributes, and finally constrains messages to the graph structure.
- * These changes allow for substantially more efficient distributed
- * execution while also exposing greater flexibility for graph based
- * computation.
- *
- * @example We can use the Pregel abstraction to implement PageRank
- * {{{
- * val pagerankGraph: Graph[Double, Double] = graph
- *   // Associate the degree with each vertex
- *   .outerJoinVertices(graph.outDegrees){
- *     (vid, vdata, deg) => deg.getOrElse(0)
- *   }
- *   // Set the weight on the edges based on the degree
- *   .mapTriplets( e => 1.0 / e.srcAttr )
- *   // Set the vertex attributes to the initial pagerank values
- *   .mapVertices( (id, attr) => 1.0 )
- *
- * def vertexProgram(id: VertexID, attr: Double, msgSum: Double): Double =
- *   resetProb + (1.0 - resetProb) * msgSum
- * def sendMessage(id: VertexID, edge: EdgeTriplet[Double, Double]): Option[Double] =
- *   Some(edge.srcAttr * edge.attr)
- * def messageCombiner(a: Double, b: Double): Double = a + b
- * val initialMessage = 0.0
- * // Execute pregel for a fixed number of iterations.
- * Pregel(pagerankGraph, initialMessage, numIter)(
- *   vertexProgram, sendMessage, messageCombiner)
- * }}}
- *
- */
-object Pregel {
-
-  /**
-   * Execute a Pregel-like iterative vertex-parallel abstraction.  The
-   * user-defined vertex-program `vprog` is executed in parallel on
-   * each vertex receiving any inbound messages and computing a new
-   * value for the vertex.  The `sendMsg` function is then invoked on
-   * all out-edges and is used to compute an optional message to the
-   * destination vertex. The `mergeMsg` function is a commutative
-   * associative function used to combine messages destined to the
-   * same vertex.
-   *
-   * On the first iteration all vertices receive the `initialMsg` and
-   * on subsequent iterations if a vertex does not receive a message
-   * then the vertex-program is not invoked.
-   *
-   * This function iterates until there are no remaining messages, or
-   * for maxIterations iterations.
-   *
-   * @tparam VD the vertex data type
-   * @tparam ED the edge data type
-   * @tparam A the Pregel message type
-   *
-   * @param graph the input graph.
-   *
-   * @param initialMsg the message each vertex will receive at the on
-   * the first iteration.
-   *
-   * @param maxIterations the maximum number of iterations to run for.
-   *
-   * @param vprog the user-defined vertex program which runs on each
-   * vertex and receives the inbound message and computes a new vertex
-   * value.  On the first iteration the vertex program is invoked on
-   * all vertices and is passed the default message.  On subsequent
-   * iterations the vertex program is only invoked on those vertices
-   * that receive messages.
-   *
-   * @param sendMsg a user supplied function that is applied to out
-   * edges of vertices that received messages in the current
-   * iteration.
-   *
-   * @param mergeMsg a user supplied function that takes two incoming
-   * messages of type A and merges them into a single message of type
-   * A.  ''This function must be commutative and associative and
-   * ideally the size of A should not increase.''
-   *
-   * @return the resulting graph at the end of the computation
-   *
-   */
-  def apply[VD: ClassTag, ED: ClassTag, A: ClassTag]
-    (graph: Graph[VD, ED], initialMsg: A, maxIterations: Int = Int.MaxValue)(
-      vprog: (VertexID, VD, A) => VD,
-      sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexID,A)],
-      mergeMsg: (A, A) => A)
-    : Graph[VD, ED] = {
-
-    var g = graph.mapVertices( (vid, vdata) => vprog(vid, vdata, initialMsg) )
-    // compute the messages
-    var messages = g.mapReduceTriplets(sendMsg, mergeMsg).cache()
-    var activeMessages = messages.count()
-    // Loop
-    var i = 0
-    while (activeMessages > 0 && i < maxIterations) {
-      // Receive the messages. Vertices that didn't get any messages do not appear in newVerts.
-      val newVerts = g.vertices.innerJoin(messages)(vprog).cache()
-      // Update the graph with the new vertices.
-      g = g.outerJoinVertices(newVerts) { (vid, old, newOpt) => newOpt.getOrElse(old) }
-
-      val oldMessages = messages
-      // Send new messages. Vertices that didn't get any messages don't appear in newVerts, so don't
-      // get to send messages.
-      messages = g.mapReduceTriplets(sendMsg, mergeMsg, Some((newVerts, EdgeDirection.Out))).cache()
-      activeMessages = messages.count()
-      // after counting we can unpersist the old messages
-      oldMessages.unpersist(blocking=false)
-      // count the iteration
-      i += 1
-    }
-
-    g
-  } // end of apply
-
-} // end of class Pregel
diff --git a/graph/src/main/scala/org/apache/spark/graph/VertexRDD.scala b/graph/src/main/scala/org/apache/spark/graph/VertexRDD.scala
deleted file mode 100644
index 25b0aed85a..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/VertexRDD.scala
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.graph
-
-import scala.reflect.ClassTag
-
-import org.apache.spark._
-import org.apache.spark.SparkContext._
-import org.apache.spark.rdd._
-import org.apache.spark.storage.StorageLevel
-
-import org.apache.spark.graph.impl.MsgRDDFunctions
-import org.apache.spark.graph.impl.VertexPartition
-
-
-/**
- * A `VertexRDD[VD]` extends the `RDD[(VertexID, VD)]` by ensuring that there is
- * only one entry for each vertex and by pre-indexing the entries for fast,
- * efficient joins.
- *
- * @tparam VD the vertex attribute associated with each vertex in the set.
- *
- * To construct a `VertexRDD` use the singleton object:
- *
- * @example Construct a `VertexRDD` from a plain RDD
- * {{{
- * // Construct an intial vertex set
- * val someData: RDD[(VertexID, SomeType)] = loadData(someFile)
- * val vset = VertexRDD(someData)
- * // If there were redundant values in someData we would use a reduceFunc
- * val vset2 = VertexRDD(someData, reduceFunc)
- * // Finally we can use the VertexRDD to index another dataset
- * val otherData: RDD[(VertexID, OtherType)] = loadData(otherFile)
- * val vset3 = VertexRDD(otherData, vset.index)
- * // Now we can construct very fast joins between the two sets
- * val vset4: VertexRDD[(SomeType, OtherType)] = vset.leftJoin(vset3)
- * }}}
- *
- */
-class VertexRDD[@specialized VD: ClassTag](
-    val partitionsRDD: RDD[VertexPartition[VD]])
-  extends RDD[(VertexID, VD)](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
-
-  require(partitionsRDD.partitioner.isDefined)
-
-  partitionsRDD.setName("VertexRDD")
-
-  /**
-   * Construct a new VertexRDD that is indexed by only the keys in the RDD.
-   * The resulting VertexRDD will be based on a different index and can
-   * no longer be quickly joined with this RDD.
-   */
-  def reindex(): VertexRDD[VD] = new VertexRDD(partitionsRDD.map(_.reindex()))
-
-  /**
-   * The partitioner is defined by the index.
-   */
-  override val partitioner = partitionsRDD.partitioner
-
-  /**
-   * The actual partitions are defined by the tuples.
-   */
-  override protected def getPartitions: Array[Partition] = partitionsRDD.partitions
-
-  /**
-   * The preferred locations are computed based on the preferred
-   * locations of the tuples.
-   */
-  override protected def getPreferredLocations(s: Partition): Seq[String] =
-    partitionsRDD.preferredLocations(s)
-
-  /**
-   * Caching a VertexRDD causes the index and values to be cached separately.
-   */
-  override def persist(newLevel: StorageLevel): VertexRDD[VD] = {
-    partitionsRDD.persist(newLevel)
-    this
-  }
-
-  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
-  override def persist(): VertexRDD[VD] = persist(StorageLevel.MEMORY_ONLY)
-
-  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
-  override def cache(): VertexRDD[VD] = persist()
-
-  /** Return the number of vertices in this set. */
-  override def count(): Long = {
-    partitionsRDD.map(_.size).reduce(_ + _)
-  }
-
-  /**
-   * Provide the `RDD[(VertexID, VD)]` equivalent output.
-   */
-  override def compute(part: Partition, context: TaskContext): Iterator[(VertexID, VD)] = {
-    firstParent[VertexPartition[VD]].iterator(part, context).next.iterator
-  }
-
-  /**
-   * Return a new VertexRDD by applying a function to each VertexPartition of this RDD.
-   */
-  def mapVertexPartitions[VD2: ClassTag](f: VertexPartition[VD] => VertexPartition[VD2])
-    : VertexRDD[VD2] = {
-    val newPartitionsRDD = partitionsRDD.mapPartitions(_.map(f), preservesPartitioning = true)
-    new VertexRDD(newPartitionsRDD)
-  }
-
-
-  /**
-   * Restrict the vertex set to the set of vertices satisfying the
-   * given predicate.
-   *
-   * @param pred the user defined predicate, which takes a tuple to conform to
-   * the RDD[(VertexID, VD)] interface
-   *
-   * @note The vertex set preserves the original index structure
-   * which means that the returned RDD can be easily joined with
-   * the original vertex-set.  Furthermore, the filter only
-   * modifies the bitmap index and so no new values are allocated.
-   */
-  override def filter(pred: Tuple2[VertexID, VD] => Boolean): VertexRDD[VD] =
-    this.mapVertexPartitions(_.filter(Function.untupled(pred)))
-
-  /**
-   * Pass each vertex attribute through a map function and retain the
-   * original RDD's partitioning and index.
-   *
-   * @tparam VD2 the type returned by the map function
-   *
-   * @param f the function applied to each value in the RDD
-   * @return a new VertexRDD with values obtained by applying `f` to
-   * each of the entries in the original VertexRDD.  The resulting
-   * VertexRDD retains the same index.
-   */
-  def mapValues[VD2: ClassTag](f: VD => VD2): VertexRDD[VD2] =
-    this.mapVertexPartitions(_.map((vid, attr) => f(attr)))
-
-  /**
-   * Pass each vertex attribute through a map function and retain the
-   * original RDD's partitioning and index.
-   *
-   * @tparam VD2 the type returned by the map function
-   *
-   * @param f the function applied to each value in the RDD
-   * @return a new VertexRDD with values obtained by applying `f` to
-   * each of the entries in the original VertexRDD.  The resulting
-   * VertexRDD retains the same index.
-   */
-  def mapValues[VD2: ClassTag](f: (VertexID, VD) => VD2): VertexRDD[VD2] =
-    this.mapVertexPartitions(_.map(f))
-
-  /**
-   * Hides vertices that are the same between this and other. For vertices that are different, keeps
-   * the values from `other`.
-   */
-  def diff(other: VertexRDD[VD]): VertexRDD[VD] = {
-    val newPartitionsRDD = partitionsRDD.zipPartitions(
-      other.partitionsRDD, preservesPartitioning = true
-    ) { (thisIter, otherIter) =>
-      val thisPart = thisIter.next()
-      val otherPart = otherIter.next()
-      Iterator(thisPart.diff(otherPart))
-    }
-    new VertexRDD(newPartitionsRDD)
-  }
-
-  /**
-   * Left join this VertexSet with another VertexSet which has the
-   * same Index.  This function will fail if both VertexSets do not
-   * share the same index.  The resulting vertex set contains an entry
-   * for each vertex in this set.  If the other VertexSet is missing
-   * any vertex in this VertexSet then a `None` attribute is generated
-   *
-   * @tparam VD2 the attribute type of the other VertexSet
-   * @tparam VD3 the attribute type of the resulting VertexSet
-   *
-   * @param other the other VertexSet with which to join.
-   * @param f the function mapping a vertex id and its attributes in
-   * this and the other vertex set to a new vertex attribute.
-   * @return a VertexRDD containing all the vertices in this
-   * VertexSet with `None` attributes used for Vertices missing in the
-   * other VertexSet.
-   *
-   */
-  def leftZipJoin[VD2: ClassTag, VD3: ClassTag]
-      (other: VertexRDD[VD2])(f: (VertexID, VD, Option[VD2]) => VD3): VertexRDD[VD3] = {
-    val newPartitionsRDD = partitionsRDD.zipPartitions(
-      other.partitionsRDD, preservesPartitioning = true
-    ) { (thisIter, otherIter) =>
-      val thisPart = thisIter.next()
-      val otherPart = otherIter.next()
-      Iterator(thisPart.leftJoin(otherPart)(f))
-    }
-    new VertexRDD(newPartitionsRDD)
-  }
-
-  /**
-   * Left join this VertexRDD with an RDD containing vertex attribute
-   * pairs.  If the other RDD is backed by a VertexRDD with the same
-   * index than the efficient leftZipJoin implementation is used.  The
-   * resulting vertex set contains an entry for each vertex in this
-   * set.  If the other VertexRDD is missing any vertex in this
-   * VertexRDD then a `None` attribute is generated.
-   *
-   * If there are duplicates, the vertex is picked at random.
-   *
-   * @tparam VD2 the attribute type of the other VertexRDD
-   * @tparam VD3 the attribute type of the resulting VertexRDD
-   *
-   * @param other the other VertexRDD with which to join.
-   * @param f the function mapping a vertex id and its attributes in
-   * this and the other vertex set to a new vertex attribute.
-   * @return a VertexRDD containing all the vertices in this
-   * VertexRDD with the attribute emitted by f.
-   */
-  def leftJoin[VD2: ClassTag, VD3: ClassTag]
-      (other: RDD[(VertexID, VD2)])
-      (f: (VertexID, VD, Option[VD2]) => VD3)
-    : VertexRDD[VD3] =
-  {
-    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
-    // If the other set is a VertexRDD then we use the much more efficient leftZipJoin
-    other match {
-      case other: VertexRDD[_] =>
-        leftZipJoin(other)(f)
-      case _ =>
-        new VertexRDD[VD3](
-          partitionsRDD.zipPartitions(
-            other.partitionBy(this.partitioner.get), preservesPartitioning = true)
-          { (part, msgs) =>
-            val vertexPartition: VertexPartition[VD] = part.next()
-            Iterator(vertexPartition.leftJoin(msgs)(f))
-          }
-        )
-    }
-  }
-
-  /**
-   * Same effect as leftJoin(other) { (vid, a, bOpt) => bOpt.getOrElse(a) }, but `this` and `other`
-   * must have the same index.
-   */
-  def innerZipJoin[U: ClassTag, VD2: ClassTag](other: VertexRDD[U])
-      (f: (VertexID, VD, U) => VD2): VertexRDD[VD2] = {
-    val newPartitionsRDD = partitionsRDD.zipPartitions(
-      other.partitionsRDD, preservesPartitioning = true
-    ) { (thisIter, otherIter) =>
-      val thisPart = thisIter.next()
-      val otherPart = otherIter.next()
-      Iterator(thisPart.innerJoin(otherPart)(f))
-    }
-    new VertexRDD(newPartitionsRDD)
-  }
-
-  /**
-   * Replace vertices with corresponding vertices in `other`, and drop vertices without a
-   * corresponding vertex in `other`.
-   */
-  def innerJoin[U: ClassTag, VD2: ClassTag](other: RDD[(VertexID, U)])
-      (f: (VertexID, VD, U) => VD2): VertexRDD[VD2] = {
-    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
-    // If the other set is a VertexRDD then we use the much more efficient innerZipJoin
-    other match {
-      case other: VertexRDD[_] =>
-        innerZipJoin(other)(f)
-      case _ =>
-        new VertexRDD(
-          partitionsRDD.zipPartitions(
-            other.partitionBy(this.partitioner.get), preservesPartitioning = true)
-          { (part, msgs) =>
-            val vertexPartition: VertexPartition[VD] = part.next()
-            Iterator(vertexPartition.innerJoin(msgs)(f))
-          }
-        )
-    }
-  }
-
-  /**
-   * Aggregate messages with the same ids using `reduceFunc`, returning a VertexRDD that is
-   * co-indexed with this one.
-   */
-  def aggregateUsingIndex[VD2: ClassTag](
-      messages: RDD[(VertexID, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2] =
-  {
-    val shuffled = MsgRDDFunctions.partitionForAggregation(messages, this.partitioner.get)
-    val parts = partitionsRDD.zipPartitions(shuffled, true) { (thisIter, msgIter) =>
-      val vertexPartition: VertexPartition[VD] = thisIter.next()
-      Iterator(vertexPartition.aggregateUsingIndex(msgIter, reduceFunc))
-    }
-    new VertexRDD[VD2](parts)
-  }
-
-} // end of VertexRDD
-
-
-/**
- * The VertexRDD singleton is used to construct VertexRDDs
- */
-object VertexRDD {
-
-  /**
-   * Construct a vertex set from an RDD of vertex-attribute pairs.
-   * Duplicate entries are removed arbitrarily.
-   *
-   * @tparam VD the vertex attribute type
-   *
-   * @param rdd the collection of vertex-attribute pairs
-   */
-  def apply[VD: ClassTag](rdd: RDD[(VertexID, VD)]): VertexRDD[VD] = {
-    val partitioned: RDD[(VertexID, VD)] = rdd.partitioner match {
-      case Some(p) => rdd
-      case None => rdd.partitionBy(new HashPartitioner(rdd.partitions.size))
-    }
-    val vertexPartitions = partitioned.mapPartitions(
-      iter => Iterator(VertexPartition(iter)),
-      preservesPartitioning = true)
-    new VertexRDD(vertexPartitions)
-  }
-
-  /**
-   * Construct a vertex set from an RDD of vertex-attribute pairs.
-   * Duplicate entries are merged using mergeFunc.
-   *
-   * @tparam VD the vertex attribute type
-   *
-   * @param rdd the collection of vertex-attribute pairs
-   * @param mergeFunc the associative, commutative merge function.
-   */
-  def apply[VD: ClassTag](rdd: RDD[(VertexID, VD)], mergeFunc: (VD, VD) => VD): VertexRDD[VD] =
-  {
-    val partitioned: RDD[(VertexID, VD)] = rdd.partitioner match {
-      case Some(p) => rdd
-      case None => rdd.partitionBy(new HashPartitioner(rdd.partitions.size))
-    }
-    val vertexPartitions = partitioned.mapPartitions(
-      iter => Iterator(VertexPartition(iter)),
-      preservesPartitioning = true)
-    new VertexRDD(vertexPartitions)
-  }
-
-  def apply[VD: ClassTag](vids: RDD[VertexID], rdd: RDD[(VertexID, VD)], defaultVal: VD)
-    : VertexRDD[VD] =
-  {
-    VertexRDD(vids.map(vid => (vid, defaultVal))).leftJoin(rdd) { (vid, default, value) =>
-      value.getOrElse(default)
-    }
-  }
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/algorithms/ConnectedComponents.scala b/graph/src/main/scala/org/apache/spark/graph/algorithms/ConnectedComponents.scala
deleted file mode 100644
index 2a6b8c0999..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/algorithms/ConnectedComponents.scala
+++ /dev/null
@@ -1,37 +0,0 @@
-package org.apache.spark.graph.algorithms
-
-import org.apache.spark.graph._
-
-
-object ConnectedComponents {
-  /**
-   * Compute the connected component membership of each vertex and return an RDD with the vertex
-   * value containing the lowest vertex id in the connected component containing that vertex.
-   *
-   * @tparam VD the vertex attribute type (discarded in the computation)
-   * @tparam ED the edge attribute type (preserved in the computation)
-   *
-   * @param graph the graph for which to compute the connected components
-   *
-   * @return a graph with vertex attributes containing the smallest vertex in each
-   *         connected component
-   */
-  def run[VD: Manifest, ED: Manifest](graph: Graph[VD, ED]): Graph[VertexID, ED] = {
-    val ccGraph = graph.mapVertices { case (vid, _) => vid }
-
-    def sendMessage(edge: EdgeTriplet[VertexID, ED]) = {
-      if (edge.srcAttr < edge.dstAttr) {
-        Iterator((edge.dstId, edge.srcAttr))
-      } else if (edge.srcAttr > edge.dstAttr) {
-        Iterator((edge.srcId, edge.dstAttr))
-      } else {
-        Iterator.empty
-      }
-    }
-    val initialMessage = Long.MaxValue
-    Pregel(ccGraph, initialMessage)(
-      vprog = (id, attr, msg) => math.min(attr, msg),
-      sendMsg = sendMessage,
-      mergeMsg = (a, b) => math.min(a, b))
-  } // end of connectedComponents
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/algorithms/PageRank.scala b/graph/src/main/scala/org/apache/spark/graph/algorithms/PageRank.scala
deleted file mode 100644
index 26b8dc5ab6..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/algorithms/PageRank.scala
+++ /dev/null
@@ -1,205 +0,0 @@
-package org.apache.spark.graph.algorithms
-
-import org.apache.spark.Logging
-import org.apache.spark.graph._
-
-
-object PageRank extends Logging {
-
-  /**
-   * Run PageRank for a fixed number of iterations returning a graph
-   * with vertex attributes containing the PageRank and edge
-   * attributes the normalized edge weight.
-   *
-   * The following PageRank fixed point is computed for each vertex.
-   *
-   * {{{
-   * var PR = Array.fill(n)( 1.0 )
-   * val oldPR = Array.fill(n)( 1.0 )
-   * for( iter <- 0 until numIter ) {
-   *   swap(oldPR, PR)
-   *   for( i <- 0 until n ) {
-   *     PR[i] = alpha + (1 - alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
-   *   }
-   * }
-   * }}}
-   *
-   * where `alpha` is the random reset probability (typically 0.15),
-   * `inNbrs[i]` is the set of neighbors whick link to `i` and
-   * `outDeg[j]` is the out degree of vertex `j`.
-   *
-   * Note that this is not the "normalized" PageRank and as a consequence pages that have no
-   * inlinks will have a PageRank of alpha.
-   *
-   * @tparam VD the original vertex attribute (not used)
-   * @tparam ED the original edge attribute (not used)
-   *
-   * @param graph the graph on which to compute PageRank
-   * @param numIter the number of iterations of PageRank to run
-   * @param resetProb the random reset probability (alpha)
-   *
-   * @return the graph containing with each vertex containing the PageRank and each edge
-   *         containing the normalized weight.
-   *
-   */
-  def run[VD: Manifest, ED: Manifest](
-      graph: Graph[VD, ED], numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] =
-  {
-
-    /**
-     * Initialize the pagerankGraph with each edge attribute having
-     * weight 1/outDegree and each vertex with attribute 1.0.
-     */
-    val pagerankGraph: Graph[Double, Double] = graph
-      // Associate the degree with each vertex
-      .outerJoinVertices(graph.outDegrees){
-      (vid, vdata, deg) => deg.getOrElse(0)
-    }
-      // Set the weight on the edges based on the degree
-      .mapTriplets( e => 1.0 / e.srcAttr )
-      // Set the vertex attributes to the initial pagerank values
-      .mapVertices( (id, attr) => 1.0 )
-
-    // Display statistics about pagerank
-    logInfo(pagerankGraph.statistics.toString)
-
-    // Define the three functions needed to implement PageRank in the GraphX
-    // version of Pregel
-    def vertexProgram(id: VertexID, attr: Double, msgSum: Double): Double =
-      resetProb + (1.0 - resetProb) * msgSum
-    def sendMessage(edge: EdgeTriplet[Double, Double]) =
-      Iterator((edge.dstId, edge.srcAttr * edge.attr))
-    def messageCombiner(a: Double, b: Double): Double = a + b
-    // The initial message received by all vertices in PageRank
-    val initialMessage = 0.0
-
-    // Execute pregel for a fixed number of iterations.
-    Pregel(pagerankGraph, initialMessage, numIter)(
-      vertexProgram, sendMessage, messageCombiner)
-  }
-
-  /**
-   * Run a dynamic version of PageRank returning a graph with vertex attributes containing the
-   * PageRank and edge attributes containing the normalized edge weight.
-   *
-   * {{{
-   * var PR = Array.fill(n)( 1.0 )
-   * val oldPR = Array.fill(n)( 0.0 )
-   * while( max(abs(PR - oldPr)) > tol ) {
-   *   swap(oldPR, PR)
-   *   for( i <- 0 until n if abs(PR[i] - oldPR[i]) > tol ) {
-   *     PR[i] = alpha + (1 - \alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
-   *   }
-   * }
-   * }}}
-   *
-   * where `alpha` is the random reset probability (typically 0.15), `inNbrs[i]` is the set of
-   * neighbors whick link to `i` and `outDeg[j]` is the out degree of vertex `j`.
-   *
-   * Note that this is not the "normalized" PageRank and as a consequence pages that have no
-   * inlinks will have a PageRank of alpha.
-   *
-   * @tparam VD the original vertex attribute (not used)
-   * @tparam ED the original edge attribute (not used)
-   *
-   * @param graph the graph on which to compute PageRank
-   * @param tol the tolerance allowed at convergence (smaller => more * accurate).
-   * @param resetProb the random reset probability (alpha)
-   *
-   * @return the graph containing with each vertex containing the PageRank and each edge
-   *         containing the normalized weight.
-   */
-  def runUntillConvergence[VD: Manifest, ED: Manifest](
-      graph: Graph[VD, ED], tol: Double, resetProb: Double = 0.15): Graph[Double, Double] =
-  {
-    // Initialize the pagerankGraph with each edge attribute
-    // having weight 1/outDegree and each vertex with attribute 1.0.
-    val pagerankGraph: Graph[(Double, Double), Double] = graph
-      // Associate the degree with each vertex
-      .outerJoinVertices(graph.outDegrees) {
-        (vid, vdata, deg) => deg.getOrElse(0)
-      }
-      // Set the weight on the edges based on the degree
-      .mapTriplets( e => 1.0 / e.srcAttr )
-      // Set the vertex attributes to (initalPR, delta = 0)
-      .mapVertices( (id, attr) => (0.0, 0.0) )
-
-    // Display statistics about pagerank
-    logInfo(pagerankGraph.statistics.toString)
-
-    // Define the three functions needed to implement PageRank in the GraphX
-    // version of Pregel
-    def vertexProgram(id: VertexID, attr: (Double, Double), msgSum: Double): (Double, Double) = {
-      val (oldPR, lastDelta) = attr
-      val newPR = oldPR + (1.0 - resetProb) * msgSum
-      (newPR, newPR - oldPR)
-    }
-
-    def sendMessage(edge: EdgeTriplet[(Double, Double), Double]) = {
-      if (edge.srcAttr._2 > tol) {
-        Iterator((edge.dstId, edge.srcAttr._2 * edge.attr))
-      } else {
-        Iterator.empty
-      }
-    }
-
-    def messageCombiner(a: Double, b: Double): Double = a + b
-
-    // The initial message received by all vertices in PageRank
-    val initialMessage = resetProb / (1.0 - resetProb)
-
-    // Execute a dynamic version of Pregel.
-    Pregel(pagerankGraph, initialMessage)(vertexProgram, sendMessage, messageCombiner)
-      .mapVertices((vid, attr) => attr._1)
-  } // end of deltaPageRank
-
-  def runStandalone[VD: Manifest, ED: Manifest](
-      graph: Graph[VD, ED], tol: Double, resetProb: Double = 0.15): VertexRDD[Double] = {
-
-    // Initialize the ranks
-    var ranks: VertexRDD[Double] = graph.vertices.mapValues((vid, attr) => resetProb).cache()
-
-    // Initialize the delta graph where each vertex stores its delta and each edge knows its weight
-    var deltaGraph: Graph[Double, Double] =
-      graph.outerJoinVertices(graph.outDegrees)((vid, vdata, deg) => deg.getOrElse(0))
-      .mapTriplets(e => 1.0 / e.srcAttr)
-      .mapVertices((vid, degree) => resetProb).cache()
-    var numDeltas: Long = ranks.count()
-
-    var prevDeltas: Option[VertexRDD[Double]] = None
-
-    var i = 0
-    val weight = (1.0 - resetProb)
-    while (numDeltas > 0) {
-      // Compute new deltas. Only deltas that existed in the last round (i.e., were greater than
-      // `tol`) get to send messages; those that were less than `tol` would send messages less than
-      // `tol` as well.
-      val deltas = deltaGraph
-        .mapReduceTriplets[Double](
-          et => Iterator((et.dstId, et.srcAttr * et.attr * weight)),
-          _ + _,
-          prevDeltas.map((_, EdgeDirection.Out)))
-        .filter { case (vid, delta) => delta > tol }
-        .cache()
-      prevDeltas = Some(deltas)
-      numDeltas = deltas.count()
-      logInfo("Standalone PageRank: iter %d has %d deltas".format(i, numDeltas))
-
-      // Update deltaGraph with the deltas
-      deltaGraph = deltaGraph.outerJoinVertices(deltas) { (vid, old, newOpt) =>
-        newOpt.getOrElse(old)
-      }.cache()
-
-      // Update ranks
-      ranks = ranks.leftZipJoin(deltas) { (vid, oldRank, deltaOpt) =>
-        oldRank + deltaOpt.getOrElse(0.0)
-      }
-      ranks.foreach(x => {}) // force the iteration for ease of debugging
-
-      i += 1
-    }
-
-    ranks
-  }
-
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/algorithms/SVDPlusPlus.scala b/graph/src/main/scala/org/apache/spark/graph/algorithms/SVDPlusPlus.scala
deleted file mode 100644
index 083aa30538..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/algorithms/SVDPlusPlus.scala
+++ /dev/null
@@ -1,103 +0,0 @@
-package org.apache.spark.graph.algorithms
-
-import org.apache.spark.rdd._
-import org.apache.spark.graph._
-import scala.util.Random
-import org.apache.commons.math.linear._
-
-class SVDPlusPlusConf( // SVDPlusPlus parameters
-  var rank: Int,
-  var maxIters: Int,
-  var minVal: Double,
-  var maxVal: Double,
-  var gamma1: Double,
-  var gamma2: Double,
-  var gamma6: Double,
-  var gamma7: Double) extends Serializable
-
-object SVDPlusPlus {
-  /**
-   * Implement SVD++ based on "Factorization Meets the Neighborhood: a Multifaceted Collaborative Filtering Model",
-   * paper is available at [[http://public.research.att.com/~volinsky/netflix/kdd08koren.pdf]].
-   * The prediction rule is rui = u + bu + bi + qi*(pu + |N(u)|^(-0.5)*sum(y)), see the details on page 6.
-   *
-   * @param edges edges for constructing the graph
-   *
-   * @param conf SVDPlusPlus parameters
-   *
-   * @return a graph with vertex attributes containing the trained model
-   */
-
-  def run(edges: RDD[Edge[Double]], conf: SVDPlusPlusConf): (Graph[(RealVector, RealVector, Double, Double), Double], Double) = {
-
-    // generate default vertex attribute
-    def defaultF(rank: Int): (RealVector, RealVector, Double, Double) = {
-      val v1 = new ArrayRealVector(rank)
-      val v2 = new ArrayRealVector(rank)
-      for (i <- 0 until rank) {
-        v1.setEntry(i, Random.nextDouble)
-        v2.setEntry(i, Random.nextDouble)
-      }
-      (v1, v2, 0.0, 0.0)
-    }
-
-    // calculate global rating mean
-    val (rs, rc) = edges.map(e => (e.attr, 1L)).reduce((a, b) => (a._1 + b._1, a._2 + b._2))
-    val u = rs / rc
-
-    // construct graph
-    var g = Graph.fromEdges(edges, defaultF(conf.rank)).cache()
-
-    // calculate initial bias and norm
-    var t0 = g.mapReduceTriplets(et =>
-      Iterator((et.srcId, (1L, et.attr)), (et.dstId, (1L, et.attr))), (g1: (Long, Double), g2: (Long, Double)) => (g1._1 + g2._1, g1._2 + g2._2))
-    g = g.outerJoinVertices(t0) { (vid: VertexID, vd: (RealVector, RealVector, Double, Double), msg: Option[(Long, Double)]) =>
-      (vd._1, vd._2, msg.get._2 / msg.get._1, 1.0 / scala.math.sqrt(msg.get._1))
-    }
-
-    def mapTrainF(conf: SVDPlusPlusConf, u: Double)(et: EdgeTriplet[(RealVector, RealVector, Double, Double), Double])
-      : Iterator[(VertexID, (RealVector, RealVector, Double))] = {
-      val (usr, itm) = (et.srcAttr, et.dstAttr)
-      val (p, q) = (usr._1, itm._1)
-      var pred = u + usr._3 + itm._3 + q.dotProduct(usr._2)
-      pred = math.max(pred, conf.minVal)
-      pred = math.min(pred, conf.maxVal)
-      val err = et.attr - pred
-      val updateP = ((q.mapMultiply(err)).subtract(p.mapMultiply(conf.gamma7))).mapMultiply(conf.gamma2)
-      val updateQ = ((usr._2.mapMultiply(err)).subtract(q.mapMultiply(conf.gamma7))).mapMultiply(conf.gamma2)
-      val updateY = ((q.mapMultiply(err * usr._4)).subtract((itm._2).mapMultiply(conf.gamma7))).mapMultiply(conf.gamma2)
-      Iterator((et.srcId, (updateP, updateY, (err - conf.gamma6 * usr._3) * conf.gamma1)),
-        (et.dstId, (updateQ, updateY, (err - conf.gamma6 * itm._3) * conf.gamma1)))
-    }
-
-    for (i <- 0 until conf.maxIters) {
-      // phase 1, calculate pu + |N(u)|^(-0.5)*sum(y) for user nodes
-      var t1 = g.mapReduceTriplets(et => Iterator((et.srcId, et.dstAttr._2)), (g1: RealVector, g2: RealVector) => g1.add(g2))
-      g = g.outerJoinVertices(t1) { (vid: VertexID, vd: (RealVector, RealVector, Double, Double), msg: Option[RealVector]) =>
-        if (msg.isDefined) (vd._1, vd._1.add(msg.get.mapMultiply(vd._4)), vd._3, vd._4) else vd
-      }
-      // phase 2, update p for user nodes and q, y for item nodes
-      val t2 = g.mapReduceTriplets(mapTrainF(conf, u), (g1: (RealVector, RealVector, Double), g2: (RealVector, RealVector, Double)) =>
-        (g1._1.add(g2._1), g1._2.add(g2._2), g1._3 + g2._3))
-      g = g.outerJoinVertices(t2) { (vid: VertexID, vd: (RealVector, RealVector, Double, Double), msg: Option[(RealVector, RealVector, Double)]) =>
-        (vd._1.add(msg.get._1), vd._2.add(msg.get._2), vd._3 + msg.get._3, vd._4)
-      }
-    }
-
-    // calculate error on training set
-    def mapTestF(conf: SVDPlusPlusConf, u: Double)(et: EdgeTriplet[(RealVector, RealVector, Double, Double), Double]): Iterator[(VertexID, Double)] = {
-      val (usr, itm) = (et.srcAttr, et.dstAttr)
-      val (p, q) = (usr._1, itm._1)
-      var pred = u + usr._3 + itm._3 + q.dotProduct(usr._2)
-      pred = math.max(pred, conf.minVal)
-      pred = math.min(pred, conf.maxVal)
-      val err = (et.attr - pred) * (et.attr - pred)
-      Iterator((et.dstId, err))
-    }
-    val t3 = g.mapReduceTriplets(mapTestF(conf, u), (g1: Double, g2: Double) => g1 + g2)
-    g = g.outerJoinVertices(t3) { (vid: VertexID, vd: (RealVector, RealVector, Double, Double), msg: Option[Double]) =>
-      if (msg.isDefined) (vd._1, vd._2, vd._3, msg.get) else vd
-    }
-    (g, u)
-  }
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/algorithms/StronglyConnectedComponents.scala b/graph/src/main/scala/org/apache/spark/graph/algorithms/StronglyConnectedComponents.scala
deleted file mode 100644
index 8031aa10ce..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/algorithms/StronglyConnectedComponents.scala
+++ /dev/null
@@ -1,87 +0,0 @@
-package org.apache.spark.graph.algorithms
-
-import org.apache.spark.graph._
-
-object StronglyConnectedComponents {
-
-  /**
-   * Compute the strongly connected component (SCC) of each vertex and return an RDD with the vertex
-   * value containing the lowest vertex id in the SCC containing that vertex.
-   *
-   * @tparam VD the vertex attribute type (discarded in the computation)
-   * @tparam ED the edge attribute type (preserved in the computation)
-   *
-   * @param graph the graph for which to compute the SCC
-   *
-   * @return a graph with vertex attributes containing the smallest vertex id in each SCC
-   */
-  def run[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int): Graph[VertexID, ED] = {
-
-    // the graph we update with final SCC ids, and the graph we return at the end
-    var sccGraph = graph.mapVertices { case (vid, _) => vid }
-    // graph we are going to work with in our iterations
-    var sccWorkGraph = graph.mapVertices { case (vid, _) => (vid, false) }
-
-    var numVertices = sccWorkGraph.numVertices
-    var iter = 0
-    while (sccWorkGraph.numVertices > 0 && iter < numIter) {
-      iter += 1
-      do {
-        numVertices = sccWorkGraph.numVertices
-        sccWorkGraph = sccWorkGraph.outerJoinVertices(sccWorkGraph.outDegrees) {
-          (vid, data, degreeOpt) => if (degreeOpt.isDefined) data else (vid, true)
-        }
-        sccWorkGraph = sccWorkGraph.outerJoinVertices(sccWorkGraph.inDegrees) {
-          (vid, data, degreeOpt) => if (degreeOpt.isDefined) data else (vid, true)
-        }
-
-        // get all vertices to be removed
-        val finalVertices = sccWorkGraph.vertices
-            .filter { case (vid, (scc, isFinal)) => isFinal}
-            .mapValues { (vid, data) => data._1}
-
-        // write values to sccGraph
-        sccGraph = sccGraph.outerJoinVertices(finalVertices) {
-          (vid, scc, opt) => opt.getOrElse(scc)
-        }
-        // only keep vertices that are not final
-        sccWorkGraph = sccWorkGraph.subgraph(vpred = (vid, data) => !data._2)
-      } while (sccWorkGraph.numVertices < numVertices)
-
-      sccWorkGraph = sccWorkGraph.mapVertices{ case (vid, (color, isFinal)) => (vid, isFinal) }
-
-      // collect min of all my neighbor's scc values, update if it's smaller than mine
-      // then notify any neighbors with scc values larger than mine
-      sccWorkGraph = GraphLab[(VertexID, Boolean), ED, VertexID](sccWorkGraph, Integer.MAX_VALUE)(
-        (vid, e) => e.otherVertexAttr(vid)._1,
-        (vid1, vid2) => math.min(vid1, vid2),
-        (vid, scc, optScc) =>
-          (math.min(scc._1, optScc.getOrElse(scc._1)), scc._2),
-        (vid, e) => e.vertexAttr(vid)._1 < e.otherVertexAttr(vid)._1
-      )
-
-      // start at root of SCCs. Traverse values in reverse, notify all my neighbors
-      // do not propagate if colors do not match!
-      sccWorkGraph = GraphLab[(VertexID, Boolean), ED, Boolean](
-        sccWorkGraph,
-        Integer.MAX_VALUE,
-        EdgeDirection.Out,
-        EdgeDirection.In
-      )(
-        // vertex is final if it is the root of a color
-        // or it has the same color as a neighbor that is final
-        (vid, e) => (vid == e.vertexAttr(vid)._1) || (e.vertexAttr(vid)._1 == e.otherVertexAttr(vid)._1),
-        (final1, final2) => final1 || final2,
-        (vid, scc, optFinal) =>
-          (scc._1, scc._2 || optFinal.getOrElse(false)),
-       // activate neighbor if they are not final, you are, and you have the same color
-        (vid, e) => e.vertexAttr(vid)._2 &&
-            !e.otherVertexAttr(vid)._2 && (e.vertexAttr(vid)._1 == e.otherVertexAttr(vid)._1),
-        // start at root of colors
-        (vid, data) => vid == data._1
-      )
-    }
-    sccGraph
-  }
-
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/algorithms/TriangleCount.scala b/graph/src/main/scala/org/apache/spark/graph/algorithms/TriangleCount.scala
deleted file mode 100644
index 81774d52e4..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/algorithms/TriangleCount.scala
+++ /dev/null
@@ -1,78 +0,0 @@
-package org.apache.spark.graph.algorithms
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.graph._
-
-
-object TriangleCount {
-  /**
-   * Compute the number of triangles passing through each vertex.
-   *
-   * The algorithm is relatively straightforward and can be computed in three steps:
-   *
-   * 1) Compute the set of neighbors for each vertex
-   * 2) For each edge compute the intersection of the sets and send the
-   *    count to both vertices.
-   * 3) Compute the sum at each vertex and divide by two since each
-   *    triangle is counted twice.
-   *
-   *
-   * @param graph a graph with `sourceId` less than `destId`. The graph must have been partitioned
-   * using Graph.partitionBy.
-   *
-   * @return
-   */
-  def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD,ED]): Graph[Int, ED] = {
-    // Remove redundant edges
-    val g = graph.groupEdges((a, b) => a).cache
-
-    // Construct set representations of the neighborhoods
-    val nbrSets: VertexRDD[VertexSet] =
-      g.collectNeighborIds(EdgeDirection.Both).mapValues { (vid, nbrs) =>
-        val set = new VertexSet(4)
-        var i = 0
-        while (i < nbrs.size) {
-          // prevent self cycle
-          if(nbrs(i) != vid) {
-            set.add(nbrs(i))
-          }
-          i += 1
-        }
-        set
-      }
-    // join the sets with the graph
-    val setGraph: Graph[VertexSet, ED] = g.outerJoinVertices(nbrSets) {
-      (vid, _, optSet) => optSet.getOrElse(null)
-    }
-    // Edge function computes intersection of smaller vertex with larger vertex
-    def edgeFunc(et: EdgeTriplet[VertexSet, ED]): Iterator[(VertexID, Int)] = {
-      assert(et.srcAttr != null)
-      assert(et.dstAttr != null)
-      val (smallSet, largeSet) = if (et.srcAttr.size < et.dstAttr.size) {
-        (et.srcAttr, et.dstAttr)
-      } else {
-        (et.dstAttr, et.srcAttr)
-      }
-      val iter = smallSet.iterator
-      var counter: Int = 0
-      while (iter.hasNext) {
-        val vid = iter.next
-        if (vid != et.srcId && vid != et.dstId && largeSet.contains(vid)) { counter += 1 }
-      }
-      Iterator((et.srcId, counter), (et.dstId, counter))
-    }
-    // compute the intersection along edges
-    val counters: VertexRDD[Int] = setGraph.mapReduceTriplets(edgeFunc, _ + _)
-    // Merge counters with the graph and divide by two since each triangle is counted twice
-    g.outerJoinVertices(counters) {
-      (vid, _, optCounter: Option[Int]) =>
-        val dblCount = optCounter.getOrElse(0)
-        // double count should be even (divisible by two)
-        assert((dblCount & 1) == 0)
-        dblCount / 2
-    }
-
-  } // end of TriangleCount
-
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/impl/EdgePartition.scala b/graph/src/main/scala/org/apache/spark/graph/impl/EdgePartition.scala
deleted file mode 100644
index b4311fa9f8..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/impl/EdgePartition.scala
+++ /dev/null
@@ -1,220 +0,0 @@
-package org.apache.spark.graph.impl
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.graph._
-import org.apache.spark.util.collection.PrimitiveKeyOpenHashMap
-
-/**
- * A collection of edges stored in 3 large columnar arrays (src, dst, attribute). The arrays are
- * clustered by src.
- *
- * @param srcIds the source vertex id of each edge
- * @param dstIds the destination vertex id of each edge
- * @param data the attribute associated with each edge
- * @param index a clustered index on source vertex id
- * @tparam ED the edge attribute type.
- */
-class EdgePartition[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED: ClassTag](
-    val srcIds: Array[VertexID],
-    val dstIds: Array[VertexID],
-    val data: Array[ED],
-    val index: PrimitiveKeyOpenHashMap[VertexID, Int]) extends Serializable {
-
-  /**
-   * Reverse all the edges in this partition.
-   *
-   * @return a new edge partition with all edges reversed.
-   */
-  def reverse: EdgePartition[ED] = {
-    val builder = new EdgePartitionBuilder(size)
-    for (e <- iterator) {
-      builder.add(e.dstId, e.srcId, e.attr)
-    }
-    builder.toEdgePartition
-  }
-
-  /**
-   * Construct a new edge partition by applying the function f to all
-   * edges in this partition.
-   *
-   * @param f a function from an edge to a new attribute
-   * @tparam ED2 the type of the new attribute
-   * @return a new edge partition with the result of the function `f`
-   *         applied to each edge
-   */
-  def map[ED2: ClassTag](f: Edge[ED] => ED2): EdgePartition[ED2] = {
-    val newData = new Array[ED2](data.size)
-    val edge = new Edge[ED]()
-    val size = data.size
-    var i = 0
-    while (i < size) {
-      edge.srcId  = srcIds(i)
-      edge.dstId  = dstIds(i)
-      edge.attr = data(i)
-      newData(i) = f(edge)
-      i += 1
-    }
-    new EdgePartition(srcIds, dstIds, newData, index)
-  }
-
-  /**
-   * Construct a new edge partition by using the edge attributes
-   * contained in the iterator.
-   *
-   * @note The input iterator should return edge attributes in the
-   * order of the edges returned by `EdgePartition.iterator` and
-   * should return attributes equal to the number of edges.
-   *
-   * @param f a function from an edge to a new attribute
-   * @tparam ED2 the type of the new attribute
-   * @return a new edge partition with the result of the function `f`
-   *         applied to each edge
-   */
-  def map[ED2: ClassTag](iter: Iterator[ED2]): EdgePartition[ED2] = {
-    val newData = new Array[ED2](data.size)
-    var i = 0
-    while (iter.hasNext) {
-      newData(i) = iter.next()
-      i += 1
-    }
-    assert(newData.size == i)
-    new EdgePartition(srcIds, dstIds, newData, index)
-  }
-
-  /**
-   * Apply the function f to all edges in this partition.
-   *
-   * @param f an external state mutating user defined function.
-   */
-  def foreach(f: Edge[ED] => Unit) {
-    iterator.foreach(f)
-  }
-
-  /**
-   * Merge all the edges with the same src and dest id into a single
-   * edge using the `merge` function
-   *
-   * @param merge a commutative associative merge operation
-   * @return a new edge partition without duplicate edges
-   */
-  def groupEdges(merge: (ED, ED) => ED): EdgePartition[ED] = {
-    val builder = new EdgePartitionBuilder[ED]
-    var firstIter: Boolean = true
-    var currSrcId: VertexID = nullValue[VertexID]
-    var currDstId: VertexID = nullValue[VertexID]
-    var currAttr: ED = nullValue[ED]
-    var i = 0
-    while (i < size) {
-      if (i > 0 && currSrcId == srcIds(i) && currDstId == dstIds(i)) {
-        currAttr = merge(currAttr, data(i))
-      } else {
-        if (i > 0) {
-          builder.add(currSrcId, currDstId, currAttr)
-        }
-        currSrcId = srcIds(i)
-        currDstId = dstIds(i)
-        currAttr = data(i)
-      }
-      i += 1
-    }
-    if (size > 0) {
-      builder.add(currSrcId, currDstId, currAttr)
-    }
-    builder.toEdgePartition
-  }
-
-  /**
-   * Apply `f` to all edges present in both `this` and `other` and return a new EdgePartition
-   * containing the resulting edges.
-   *
-   * If there are multiple edges with the same src and dst in `this`, `f` will be invoked once for
-   * each edge, but each time it may be invoked on any corresponding edge in `other`.
-   *
-   * If there are multiple edges with the same src and dst in `other`, `f` will only be invoked
-   * once.
-   */
-  def innerJoin[ED2: ClassTag, ED3: ClassTag]
-      (other: EdgePartition[ED2])
-      (f: (VertexID, VertexID, ED, ED2) => ED3): EdgePartition[ED3] = {
-    val builder = new EdgePartitionBuilder[ED3]
-    var i = 0
-    var j = 0
-    // For i = index of each edge in `this`...
-    while (i < size && j < other.size) {
-      val srcId = this.srcIds(i)
-      val dstId = this.dstIds(i)
-      // ... forward j to the index of the corresponding edge in `other`, and...
-      while (j < other.size && other.srcIds(j) < srcId) { j += 1 }
-      if (j < other.size && other.srcIds(j) == srcId) {
-        while (j < other.size && other.srcIds(j) == srcId && other.dstIds(j) < dstId) { j += 1 }
-        if (j < other.size && other.srcIds(j) == srcId && other.dstIds(j) == dstId) {
-          // ... run `f` on the matching edge
-          builder.add(srcId, dstId, f(srcId, dstId, this.data(i), other.data(j)))
-        }
-      }
-      i += 1
-    }
-    builder.toEdgePartition
-  }
-
-  /**
-   * The number of edges in this partition
-   *
-   * @return size of the partition
-   */
-  def size: Int = srcIds.size
-
-  /** The number of unique source vertices in the partition. */
-  def indexSize: Int = index.size
-
-  /**
-   * Get an iterator over the edges in this partition.
-   *
-   * @return an iterator over edges in the partition
-   */
-  def iterator = new Iterator[Edge[ED]] {
-    private[this] val edge = new Edge[ED]
-    private[this] var pos = 0
-
-    override def hasNext: Boolean = pos < EdgePartition.this.size
-
-    override def next(): Edge[ED] = {
-      edge.srcId = srcIds(pos)
-      edge.dstId = dstIds(pos)
-      edge.attr = data(pos)
-      pos += 1
-      edge
-    }
-  }
-
-  /**
-   * Get an iterator over the edges in this partition whose source vertex ids match srcIdPred. The
-   * iterator is generated using an index scan, so it is efficient at skipping edges that don't
-   * match srcIdPred.
-   */
-  def indexIterator(srcIdPred: VertexID => Boolean): Iterator[Edge[ED]] =
-    index.iterator.filter(kv => srcIdPred(kv._1)).flatMap(Function.tupled(clusterIterator))
-
-  /**
-   * Get an iterator over the cluster of edges in this partition with source vertex id `srcId`. The
-   * cluster must start at position `index`.
-   */
-  private def clusterIterator(srcId: VertexID, index: Int) = new Iterator[Edge[ED]] {
-    private[this] val edge = new Edge[ED]
-    private[this] var pos = index
-
-    override def hasNext: Boolean = {
-      pos >= 0 && pos < EdgePartition.this.size && srcIds(pos) == srcId
-    }
-
-    override def next(): Edge[ED] = {
-      assert(srcIds(pos) == srcId)
-      edge.srcId = srcIds(pos)
-      edge.dstId = dstIds(pos)
-      edge.attr = data(pos)
-      pos += 1
-      edge
-    }
-  }
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/impl/EdgePartitionBuilder.scala b/graph/src/main/scala/org/apache/spark/graph/impl/EdgePartitionBuilder.scala
deleted file mode 100644
index 56624ef60a..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/impl/EdgePartitionBuilder.scala
+++ /dev/null
@@ -1,46 +0,0 @@
-package org.apache.spark.graph.impl
-
-import scala.reflect.ClassTag
-import scala.util.Sorting
-
-import org.apache.spark.graph._
-import org.apache.spark.util.collection.{PrimitiveKeyOpenHashMap, PrimitiveVector}
-
-
-//private[graph]
-class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag](size: Int = 64) {
-
-  var edges = new PrimitiveVector[Edge[ED]](size)
-
-  /** Add a new edge to the partition. */
-  def add(src: VertexID, dst: VertexID, d: ED) {
-    edges += Edge(src, dst, d)
-  }
-
-  def toEdgePartition: EdgePartition[ED] = {
-    val edgeArray = edges.trim().array
-    Sorting.quickSort(edgeArray)(Edge.lexicographicOrdering)
-    val srcIds = new Array[VertexID](edgeArray.size)
-    val dstIds = new Array[VertexID](edgeArray.size)
-    val data = new Array[ED](edgeArray.size)
-    val index = new PrimitiveKeyOpenHashMap[VertexID, Int]
-    // Copy edges into columnar structures, tracking the beginnings of source vertex id clusters and
-    // adding them to the index
-    if (edgeArray.length > 0) {
-      index.update(srcIds(0), 0)
-      var currSrcId: VertexID = srcIds(0)
-      var i = 0
-      while (i < edgeArray.size) {
-        srcIds(i) = edgeArray(i).srcId
-        dstIds(i) = edgeArray(i).dstId
-        data(i) = edgeArray(i).attr
-        if (edgeArray(i).srcId != currSrcId) {
-          currSrcId = edgeArray(i).srcId
-          index.update(currSrcId, i)
-        }
-        i += 1
-      }
-    }
-    new EdgePartition(srcIds, dstIds, data, index)
-  }
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/impl/EdgeTripletIterator.scala b/graph/src/main/scala/org/apache/spark/graph/impl/EdgeTripletIterator.scala
deleted file mode 100644
index e95d79e3d6..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/impl/EdgeTripletIterator.scala
+++ /dev/null
@@ -1,43 +0,0 @@
-package org.apache.spark.graph.impl
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.graph._
-import org.apache.spark.util.collection.PrimitiveKeyOpenHashMap
-
-
-/**
- * The Iterator type returned when constructing edge triplets. This class technically could be
- * an anonymous class in GraphImpl.triplets, but we name it here explicitly so it is easier to
- * debug / profile.
- */
-private[impl]
-class EdgeTripletIterator[VD: ClassTag, ED: ClassTag](
-    val vidToIndex: VertexIdToIndexMap,
-    val vertexArray: Array[VD],
-    val edgePartition: EdgePartition[ED])
-  extends Iterator[EdgeTriplet[VD, ED]] {
-
-  // Current position in the array.
-  private var pos = 0
-
-  // A triplet object that this iterator.next() call returns. We reuse this object to avoid
-  // allocating too many temporary Java objects.
-  private val triplet = new EdgeTriplet[VD, ED]
-
-  private val vmap = new PrimitiveKeyOpenHashMap[VertexID, VD](vidToIndex, vertexArray)
-
-  override def hasNext: Boolean = pos < edgePartition.size
-
-  override def next() = {
-    triplet.srcId = edgePartition.srcIds(pos)
-    // assert(vmap.containsKey(e.src.id))
-    triplet.srcAttr = vmap(triplet.srcId)
-    triplet.dstId = edgePartition.dstIds(pos)
-    // assert(vmap.containsKey(e.dst.id))
-    triplet.dstAttr = vmap(triplet.dstId)
-    triplet.attr = edgePartition.data(pos)
-    pos += 1
-    triplet
-  }
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/impl/GraphImpl.scala b/graph/src/main/scala/org/apache/spark/graph/impl/GraphImpl.scala
deleted file mode 100644
index 8f42e7d592..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/impl/GraphImpl.scala
+++ /dev/null
@@ -1,422 +0,0 @@
-package org.apache.spark.graph.impl
-
-import scala.reflect.{classTag, ClassTag}
-
-import org.apache.spark.util.collection.PrimitiveVector
-import org.apache.spark.{HashPartitioner, Partitioner}
-import org.apache.spark.SparkContext._
-import org.apache.spark.graph._
-import org.apache.spark.graph.impl.GraphImpl._
-import org.apache.spark.graph.impl.MsgRDDFunctions._
-import org.apache.spark.graph.util.BytecodeUtils
-import org.apache.spark.rdd.{ShuffledRDD, RDD}
-import org.apache.spark.storage.StorageLevel
-import org.apache.spark.util.ClosureCleaner
-
-
-/**
- * A Graph RDD that supports computation on graphs.
- *
- * Graphs are represented using two classes of data: vertex-partitioned and
- * edge-partitioned. `vertices` contains vertex attributes, which are vertex-partitioned. `edges`
- * contains edge attributes, which are edge-partitioned. For operations on vertex neighborhoods,
- * vertex attributes are replicated to the edge partitions where they appear as sources or
- * destinations. `routingTable` stores the routing information for shipping vertex attributes to
- * edge partitions. `replicatedVertexView` stores a view of the replicated vertex attributes created
- * using the routing table.
- */
-class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
-    @transient val vertices: VertexRDD[VD],
-    @transient val edges: EdgeRDD[ED],
-    @transient val routingTable: RoutingTable,
-    @transient val replicatedVertexView: ReplicatedVertexView[VD])
-  extends Graph[VD, ED] with Serializable {
-
-  def this(
-      vertices: VertexRDD[VD],
-      edges: EdgeRDD[ED],
-      routingTable: RoutingTable) = {
-    this(vertices, edges, routingTable, new ReplicatedVertexView(vertices, edges, routingTable))
-  }
-
-  def this(
-      vertices: VertexRDD[VD],
-      edges: EdgeRDD[ED]) = {
-    this(vertices, edges, new RoutingTable(edges, vertices))
-  }
-
-  /** Return a RDD that brings edges together with their source and destination vertices. */
-  @transient override val triplets: RDD[EdgeTriplet[VD, ED]] = {
-    val vdTag = classTag[VD]
-    val edTag = classTag[ED]
-    edges.partitionsRDD.zipPartitions(
-      replicatedVertexView.get(true, true), true) { (ePartIter, vPartIter) =>
-      val (pid, ePart) = ePartIter.next()
-      val (_, vPart) = vPartIter.next()
-      new EdgeTripletIterator(vPart.index, vPart.values, ePart)(vdTag, edTag)
-    }
-  }
-
-  override def persist(newLevel: StorageLevel): Graph[VD, ED] = {
-    vertices.persist(newLevel)
-    edges.persist(newLevel)
-    this
-  }
-
-  override def cache(): Graph[VD, ED] = persist(StorageLevel.MEMORY_ONLY)
-
-  override def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED] = {
-    val numPartitions = edges.partitions.size
-    val edTag = classTag[ED]
-    val newEdges = new EdgeRDD(edges.map { e =>
-      val part: PartitionID = partitionStrategy.getPartition(e.srcId, e.dstId, numPartitions)
-
-      // Should we be using 3-tuple or an optimized class
-      new MessageToPartition(part, (e.srcId, e.dstId, e.attr))
-    }
-      .partitionBy(new HashPartitioner(numPartitions))
-      .mapPartitionsWithIndex( { (pid, iter) =>
-        val builder = new EdgePartitionBuilder[ED]()(edTag)
-        iter.foreach { message =>
-          val data = message.data
-          builder.add(data._1, data._2, data._3)
-        }
-        val edgePartition = builder.toEdgePartition
-        Iterator((pid, edgePartition))
-      }, preservesPartitioning = true).cache())
-    new GraphImpl(vertices, newEdges)
-  }
-
-  override def statistics: Map[String, Any] = {
-    // Get the total number of vertices after replication, used to compute the replication ratio.
-    def numReplicatedVertices(vid2pids: RDD[Array[Array[VertexID]]]): Double = {
-      vid2pids.map(_.map(_.size).sum.toLong).reduce(_ + _).toDouble
-    }
-
-    val numVertices = this.ops.numVertices
-    val numEdges = this.ops.numEdges
-    val replicationRatioBoth = numReplicatedVertices(routingTable.bothAttrs) / numVertices
-    val replicationRatioSrcOnly = numReplicatedVertices(routingTable.srcAttrOnly) / numVertices
-    val replicationRatioDstOnly = numReplicatedVertices(routingTable.dstAttrOnly) / numVertices
-    // One entry for each partition, indicate the total number of edges on that partition.
-    val loadArray = edges.partitionsRDD.map(_._2.size).collect().map(_.toDouble / numEdges)
-    val minLoad = loadArray.min
-    val maxLoad = loadArray.max
-    Map(
-      "Num Vertices" -> numVertices,
-      "Num Edges" -> numEdges,
-      "Replication (both)" -> replicationRatioBoth,
-      "Replication (src only)" -> replicationRatioSrcOnly,
-      "Replication (dest only)" -> replicationRatioDstOnly,
-      "Load Array" -> loadArray,
-      "Min Load" -> minLoad,
-      "Max Load" -> maxLoad)
-  }
-
-  /**
-   * Display the lineage information for this graph.
-   */
-  def printLineage() = {
-    def traverseLineage(
-        rdd: RDD[_],
-        indent: String = "",
-        visited: Map[Int, String] = Map.empty[Int, String]) {
-      if (visited.contains(rdd.id)) {
-        println(indent + visited(rdd.id))
-        println(indent)
-      } else {
-        val locs = rdd.partitions.map( p => rdd.preferredLocations(p) )
-        val cacheLevel = rdd.getStorageLevel
-        val name = rdd.id
-        val deps = rdd.dependencies
-        val partitioner = rdd.partitioner
-        val numparts = partitioner match { case Some(p) => p.numPartitions; case None => 0}
-        println(indent + name + ": " + cacheLevel.description + " (partitioner: " + partitioner +
-          ", " + numparts +")")
-        println(indent + " |--->  Deps:    " + deps.map(d => (d, d.rdd.id) ).toString)
-        println(indent + " |--->  PrefLoc: " + locs.map(x=> x.toString).mkString(", "))
-        deps.foreach(d => traverseLineage(d.rdd, indent + " | ", visited))
-      }
-    }
-    println("edges ------------------------------------------")
-    traverseLineage(edges, "  ")
-    var visited = Map(edges.id -> "edges")
-    println("\n\nvertices ------------------------------------------")
-    traverseLineage(vertices, "  ", visited)
-    visited += (vertices.id -> "vertices")
-    println("\n\nroutingTable.bothAttrs -------------------------------")
-    traverseLineage(routingTable.bothAttrs, "  ", visited)
-    visited += (routingTable.bothAttrs.id -> "routingTable.bothAttrs")
-    println("\n\ntriplets ----------------------------------------")
-    traverseLineage(triplets, "  ", visited)
-    println(visited)
-  } // end of printLineage
-
-  override def reverse: Graph[VD, ED] = {
-    val newETable = edges.mapEdgePartitions((pid, part) => part.reverse)
-    new GraphImpl(vertices, newETable, routingTable, replicatedVertexView)
-  }
-
-  override def mapVertices[VD2: ClassTag](f: (VertexID, VD) => VD2): Graph[VD2, ED] = {
-    if (classTag[VD] equals classTag[VD2]) {
-      // The map preserves type, so we can use incremental replication
-      val newVerts = vertices.mapVertexPartitions(_.map(f))
-      val changedVerts = vertices.asInstanceOf[VertexRDD[VD2]].diff(newVerts)
-      val newReplicatedVertexView = new ReplicatedVertexView[VD2](
-        changedVerts, edges, routingTable,
-        Some(replicatedVertexView.asInstanceOf[ReplicatedVertexView[VD2]]))
-      new GraphImpl(newVerts, edges, routingTable, newReplicatedVertexView)
-    } else {
-      // The map does not preserve type, so we must re-replicate all vertices
-      new GraphImpl(vertices.mapVertexPartitions(_.map(f)), edges, routingTable)
-    }
-  }
-
-  override def mapEdges[ED2: ClassTag](
-      f: (PartitionID, Iterator[Edge[ED]]) => Iterator[ED2]): Graph[VD, ED2] = {
-    val newETable = edges.mapEdgePartitions((pid, part) => part.map(f(pid, part.iterator)))
-    new GraphImpl(vertices, newETable , routingTable, replicatedVertexView)
-  }
-
-  override def mapTriplets[ED2: ClassTag](
-      f: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2]): Graph[VD, ED2] = {
-    // Use an explicit manifest in PrimitiveKeyOpenHashMap init so we don't pull in the implicit
-    // manifest from GraphImpl (which would require serializing GraphImpl).
-    val vdTag = classTag[VD]
-    val newEdgePartitions =
-      edges.partitionsRDD.zipPartitions(replicatedVertexView.get(true, true), true) {
-        (ePartIter, vTableReplicatedIter) =>
-        val (ePid, edgePartition) = ePartIter.next()
-        val (vPid, vPart) = vTableReplicatedIter.next()
-        assert(!vTableReplicatedIter.hasNext)
-        assert(ePid == vPid)
-        val et = new EdgeTriplet[VD, ED]
-        val inputIterator = edgePartition.iterator.map { e =>
-          et.set(e)
-          et.srcAttr = vPart(e.srcId)
-          et.dstAttr = vPart(e.dstId)
-          et
-        }
-        // Apply the user function to the vertex partition
-        val outputIter = f(ePid, inputIterator)
-        // Consume the iterator to update the edge attributes
-        val newEdgePartition = edgePartition.map(outputIter)
-        Iterator((ePid, newEdgePartition))
-      }
-    new GraphImpl(vertices, new EdgeRDD(newEdgePartitions), routingTable, replicatedVertexView)
-  }
-
-  override def subgraph(
-      epred: EdgeTriplet[VD, ED] => Boolean = x => true,
-      vpred: (VertexID, VD) => Boolean = (a, b) => true): Graph[VD, ED] = {
-    // Filter the vertices, reusing the partitioner and the index from this graph
-    val newVerts = vertices.mapVertexPartitions(_.filter(vpred))
-
-    // Filter the edges
-    val edTag = classTag[ED]
-    val newEdges = new EdgeRDD[ED](triplets.filter { et =>
-      vpred(et.srcId, et.srcAttr) && vpred(et.dstId, et.dstAttr) && epred(et)
-    }.mapPartitionsWithIndex( { (pid, iter) =>
-      val builder = new EdgePartitionBuilder[ED]()(edTag)
-      iter.foreach { et => builder.add(et.srcId, et.dstId, et.attr) }
-      val edgePartition = builder.toEdgePartition
-      Iterator((pid, edgePartition))
-    }, preservesPartitioning = true)).cache()
-
-    // Reuse the previous ReplicatedVertexView unmodified. The replicated vertices that have been
-    // removed will be ignored, since we only refer to replicated vertices when they are adjacent to
-    // an edge.
-    new GraphImpl(newVerts, newEdges, new RoutingTable(newEdges, newVerts), replicatedVertexView)
-  } // end of subgraph
-
-  override def mask[VD2: ClassTag, ED2: ClassTag] (
-      other: Graph[VD2, ED2]): Graph[VD, ED] = {
-    val newVerts = vertices.innerJoin(other.vertices) { (vid, v, w) => v }
-    val newEdges = edges.innerJoin(other.edges) { (src, dst, v, w) => v }
-    // Reuse the previous ReplicatedVertexView unmodified. The replicated vertices that have been
-    // removed will be ignored, since we only refer to replicated vertices when they are adjacent to
-    // an edge.
-    new GraphImpl(newVerts, newEdges, routingTable, replicatedVertexView)
-  }
-
-  override def groupEdges(merge: (ED, ED) => ED): Graph[VD, ED] = {
-    ClosureCleaner.clean(merge)
-    val newETable = edges.mapEdgePartitions((pid, part) => part.groupEdges(merge))
-    new GraphImpl(vertices, newETable, routingTable, replicatedVertexView)
-  }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-  // Lower level transformation methods
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-
-  override def mapReduceTriplets[A: ClassTag](
-      mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexID, A)],
-      reduceFunc: (A, A) => A,
-      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None) = {
-
-    ClosureCleaner.clean(mapFunc)
-    ClosureCleaner.clean(reduceFunc)
-
-    // For each vertex, replicate its attribute only to partitions where it is
-    // in the relevant position in an edge.
-    val mapUsesSrcAttr = accessesVertexAttr[VD, ED](mapFunc, "srcAttr")
-    val mapUsesDstAttr = accessesVertexAttr[VD, ED](mapFunc, "dstAttr")
-    val vs = activeSetOpt match {
-      case Some((activeSet, _)) =>
-        replicatedVertexView.get(mapUsesSrcAttr, mapUsesDstAttr, activeSet)
-      case None =>
-        replicatedVertexView.get(mapUsesSrcAttr, mapUsesDstAttr)
-    }
-    val activeDirectionOpt = activeSetOpt.map(_._2)
-
-    // Map and combine.
-    val preAgg = edges.partitionsRDD.zipPartitions(vs, true) { (ePartIter, vPartIter) =>
-      val (ePid, edgePartition) = ePartIter.next()
-      val (vPid, vPart) = vPartIter.next()
-      assert(!vPartIter.hasNext)
-      assert(ePid == vPid)
-      // Choose scan method
-      val activeFraction = vPart.numActives.getOrElse(0) / edgePartition.indexSize.toFloat
-      val edgeIter = activeDirectionOpt match {
-        case Some(EdgeDirection.Both) =>
-          if (activeFraction < 0.8) {
-            edgePartition.indexIterator(srcVertexID => vPart.isActive(srcVertexID))
-              .filter(e => vPart.isActive(e.dstId))
-          } else {
-            edgePartition.iterator.filter(e => vPart.isActive(e.srcId) && vPart.isActive(e.dstId))
-          }
-        case Some(EdgeDirection.Out) =>
-          if (activeFraction < 0.8) {
-            edgePartition.indexIterator(srcVertexID => vPart.isActive(srcVertexID))
-          } else {
-            edgePartition.iterator.filter(e => vPart.isActive(e.srcId))
-          }
-        case Some(EdgeDirection.In) =>
-          edgePartition.iterator.filter(e => vPart.isActive(e.dstId))
-        case None =>
-          edgePartition.iterator
-      }
-
-      // Scan edges and run the map function
-      val et = new EdgeTriplet[VD, ED]
-      val mapOutputs = edgeIter.flatMap { e =>
-        et.set(e)
-        if (mapUsesSrcAttr) {
-          et.srcAttr = vPart(e.srcId)
-        }
-        if (mapUsesDstAttr) {
-          et.dstAttr = vPart(e.dstId)
-        }
-        mapFunc(et)
-      }
-      // Note: This doesn't allow users to send messages to arbitrary vertices.
-      vPart.aggregateUsingIndex(mapOutputs, reduceFunc).iterator
-    }
-
-    // do the final reduction reusing the index map
-    vertices.aggregateUsingIndex(preAgg, reduceFunc)
-  } // end of mapReduceTriplets
-
-  override def outerJoinVertices[U: ClassTag, VD2: ClassTag]
-      (updates: RDD[(VertexID, U)])(updateF: (VertexID, VD, Option[U]) => VD2): Graph[VD2, ED] = {
-    if (classTag[VD] equals classTag[VD2]) {
-      // updateF preserves type, so we can use incremental replication
-      val newVerts = vertices.leftJoin(updates)(updateF)
-      val changedVerts = vertices.asInstanceOf[VertexRDD[VD2]].diff(newVerts)
-      val newReplicatedVertexView = new ReplicatedVertexView[VD2](
-        changedVerts, edges, routingTable,
-        Some(replicatedVertexView.asInstanceOf[ReplicatedVertexView[VD2]]))
-      new GraphImpl(newVerts, edges, routingTable, newReplicatedVertexView)
-    } else {
-      // updateF does not preserve type, so we must re-replicate all vertices
-      val newVerts = vertices.leftJoin(updates)(updateF)
-      new GraphImpl(newVerts, edges, routingTable)
-    }
-  }
-
-  private def accessesVertexAttr[VD, ED](closure: AnyRef, attrName: String): Boolean = {
-    try {
-      BytecodeUtils.invokedMethod(closure, classOf[EdgeTriplet[VD, ED]], attrName)
-    } catch {
-      case _: ClassNotFoundException => true // if we don't know, be conservative
-    }
-  }
-} // end of class GraphImpl
-
-
-object GraphImpl {
-
-  def apply[VD: ClassTag, ED: ClassTag](
-      edges: RDD[Edge[ED]],
-      defaultVertexAttr: VD): GraphImpl[VD, ED] =
-  {
-    fromEdgeRDD(createEdgeRDD(edges), defaultVertexAttr)
-  }
-
-  def fromEdgePartitions[VD: ClassTag, ED: ClassTag](
-      edgePartitions: RDD[(PartitionID, EdgePartition[ED])],
-      defaultVertexAttr: VD): GraphImpl[VD, ED] = {
-    fromEdgeRDD(new EdgeRDD(edgePartitions), defaultVertexAttr)
-  }
-
-  def apply[VD: ClassTag, ED: ClassTag](
-      vertices: RDD[(VertexID, VD)],
-      edges: RDD[Edge[ED]],
-      defaultVertexAttr: VD): GraphImpl[VD, ED] =
-  {
-    val edgeRDD = createEdgeRDD(edges).cache()
-
-    // Get the set of all vids
-    val partitioner = Partitioner.defaultPartitioner(vertices)
-    val vPartitioned = vertices.partitionBy(partitioner)
-    val vidsFromEdges = collectVertexIDsFromEdges(edgeRDD, partitioner)
-    val vids = vPartitioned.zipPartitions(vidsFromEdges) { (vertexIter, vidsFromEdgesIter) =>
-      vertexIter.map(_._1) ++ vidsFromEdgesIter.map(_._1)
-    }
-
-    val vertexRDD = VertexRDD(vids, vPartitioned, defaultVertexAttr)
-
-    new GraphImpl(vertexRDD, edgeRDD)
-  }
-
-  /**
-   * Create the edge RDD, which is much more efficient for Java heap storage than the normal edges
-   * data structure (RDD[(VertexID, VertexID, ED)]).
-   *
-   * The edge RDD contains multiple partitions, and each partition contains only one RDD key-value
-   * pair: the key is the partition id, and the value is an EdgePartition object containing all the
-   * edges in a partition.
-   */
-  private def createEdgeRDD[ED: ClassTag](
-      edges: RDD[Edge[ED]]): EdgeRDD[ED] = {
-    val edgePartitions = edges.mapPartitionsWithIndex { (pid, iter) =>
-      val builder = new EdgePartitionBuilder[ED]
-      iter.foreach { e =>
-        builder.add(e.srcId, e.dstId, e.attr)
-      }
-      Iterator((pid, builder.toEdgePartition))
-    }
-    new EdgeRDD(edgePartitions)
-  }
-
-  private def fromEdgeRDD[VD: ClassTag, ED: ClassTag](
-      edges: EdgeRDD[ED],
-      defaultVertexAttr: VD): GraphImpl[VD, ED] = {
-    edges.cache()
-    // Get the set of all vids
-    val vids = collectVertexIDsFromEdges(edges, new HashPartitioner(edges.partitions.size))
-    // Create the VertexRDD.
-    val vertices = VertexRDD(vids.mapValues(x => defaultVertexAttr))
-    new GraphImpl(vertices, edges)
-  }
-
-  /** Collects all vids mentioned in edges and partitions them by partitioner. */
-  private def collectVertexIDsFromEdges(
-      edges: EdgeRDD[_],
-      partitioner: Partitioner): RDD[(VertexID, Int)] = {
-    // TODO: Consider doing map side distinct before shuffle.
-    new ShuffledRDD[VertexID, Int, (VertexID, Int)](
-      edges.collectVertexIDs.map(vid => (vid, 0)), partitioner)
-      .setSerializer(classOf[VertexIDMsgSerializer].getName)
-  }
-} // end of object GraphImpl
diff --git a/graph/src/main/scala/org/apache/spark/graph/impl/MessageToPartition.scala b/graph/src/main/scala/org/apache/spark/graph/impl/MessageToPartition.scala
deleted file mode 100644
index b2fa728482..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/impl/MessageToPartition.scala
+++ /dev/null
@@ -1,93 +0,0 @@
-package org.apache.spark.graph.impl
-
-import scala.reflect.{classTag, ClassTag}
-
-import org.apache.spark.Partitioner
-import org.apache.spark.graph.{PartitionID, VertexID}
-import org.apache.spark.rdd.{ShuffledRDD, RDD}
-
-
-class VertexBroadcastMsg[@specialized(Int, Long, Double, Boolean) T](
-    @transient var partition: PartitionID,
-    var vid: VertexID,
-    var data: T)
-  extends Product2[PartitionID, (VertexID, T)] with Serializable {
-
-  override def _1 = partition
-
-  override def _2 = (vid, data)
-
-  override def canEqual(that: Any): Boolean = that.isInstanceOf[VertexBroadcastMsg[_]]
-}
-
-
-/**
- * A message used to send a specific value to a partition.
- * @param partition index of the target partition.
- * @param data value to send
- */
-class MessageToPartition[@specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/) T](
-    @transient var partition: PartitionID,
-    var data: T)
-  extends Product2[PartitionID, T] with Serializable {
-
-  override def _1 = partition
-
-  override def _2 = data
-
-  override def canEqual(that: Any): Boolean = that.isInstanceOf[MessageToPartition[_]]
-}
-
-
-class VertexBroadcastMsgRDDFunctions[T: ClassTag](self: RDD[VertexBroadcastMsg[T]]) {
-  def partitionBy(partitioner: Partitioner): RDD[VertexBroadcastMsg[T]] = {
-    val rdd = new ShuffledRDD[PartitionID, (VertexID, T), VertexBroadcastMsg[T]](self, partitioner)
-
-    // Set a custom serializer if the data is of int or double type.
-    if (classTag[T] == ClassTag.Int) {
-      rdd.setSerializer(classOf[IntVertexBroadcastMsgSerializer].getName)
-    } else if (classTag[T] == ClassTag.Long) {
-      rdd.setSerializer(classOf[LongVertexBroadcastMsgSerializer].getName)
-    } else if (classTag[T] == ClassTag.Double) {
-      rdd.setSerializer(classOf[DoubleVertexBroadcastMsgSerializer].getName)
-    }
-    rdd
-  }
-}
-
-
-class MsgRDDFunctions[T: ClassTag](self: RDD[MessageToPartition[T]]) {
-
-  /**
-   * Return a copy of the RDD partitioned using the specified partitioner.
-   */
-  def partitionBy(partitioner: Partitioner): RDD[MessageToPartition[T]] = {
-    new ShuffledRDD[PartitionID, T, MessageToPartition[T]](self, partitioner)
-  }
-
-}
-
-
-object MsgRDDFunctions {
-  implicit def rdd2PartitionRDDFunctions[T: ClassTag](rdd: RDD[MessageToPartition[T]]) = {
-    new MsgRDDFunctions(rdd)
-  }
-
-  implicit def rdd2vertexMessageRDDFunctions[T: ClassTag](rdd: RDD[VertexBroadcastMsg[T]]) = {
-    new VertexBroadcastMsgRDDFunctions(rdd)
-  }
-
-  def partitionForAggregation[T: ClassTag](msgs: RDD[(VertexID, T)], partitioner: Partitioner) = {
-    val rdd = new ShuffledRDD[VertexID, T, (VertexID, T)](msgs, partitioner)
-
-    // Set a custom serializer if the data is of int or double type.
-    if (classTag[T] == ClassTag.Int) {
-      rdd.setSerializer(classOf[IntAggMsgSerializer].getName)
-    } else if (classTag[T] == ClassTag.Long) {
-      rdd.setSerializer(classOf[LongAggMsgSerializer].getName)
-    } else if (classTag[T] == ClassTag.Double) {
-      rdd.setSerializer(classOf[DoubleAggMsgSerializer].getName)
-    }
-    rdd
-  }
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/impl/ReplicatedVertexView.scala b/graph/src/main/scala/org/apache/spark/graph/impl/ReplicatedVertexView.scala
deleted file mode 100644
index 7d29861db1..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/impl/ReplicatedVertexView.scala
+++ /dev/null
@@ -1,182 +0,0 @@
-package org.apache.spark.graph.impl
-
-import scala.reflect.{classTag, ClassTag}
-
-import org.apache.spark.SparkContext._
-import org.apache.spark.rdd.RDD
-import org.apache.spark.util.collection.{PrimitiveVector, OpenHashSet}
-
-import org.apache.spark.graph._
-
-/**
- * A view of the vertices after they are shipped to the join sites specified in
- * `vertexPlacement`. The resulting view is co-partitioned with `edges`. If `prevViewOpt` is
- * specified, `updatedVerts` are treated as incremental updates to the previous view. Otherwise, a
- * fresh view is created.
- *
- * The view is always cached (i.e., once it is created, it remains materialized). This avoids
- * constructing it twice if the user calls graph.triplets followed by graph.mapReduceTriplets, for
- * example.
- */
-private[impl]
-class ReplicatedVertexView[VD: ClassTag](
-    updatedVerts: VertexRDD[VD],
-    edges: EdgeRDD[_],
-    routingTable: RoutingTable,
-    prevViewOpt: Option[ReplicatedVertexView[VD]] = None) {
-
-  /**
-   * Within each edge partition, create a local map from vid to an index into the attribute
-   * array. Each map contains a superset of the vertices that it will receive, because it stores
-   * vids from both the source and destination of edges. It must always include both source and
-   * destination vids because some operations, such as GraphImpl.mapReduceTriplets, rely on this.
-   */
-  private val localVertexIDMap: RDD[(Int, VertexIdToIndexMap)] = prevViewOpt match {
-    case Some(prevView) =>
-      prevView.localVertexIDMap
-    case None =>
-      edges.partitionsRDD.mapPartitions(_.map {
-        case (pid, epart) =>
-          val vidToIndex = new VertexIdToIndexMap
-          epart.foreach { e =>
-            vidToIndex.add(e.srcId)
-            vidToIndex.add(e.dstId)
-          }
-          (pid, vidToIndex)
-      }, preservesPartitioning = true).cache().setName("ReplicatedVertexView localVertexIDMap")
-  }
-
-  private lazy val bothAttrs: RDD[(PartitionID, VertexPartition[VD])] = create(true, true)
-  private lazy val srcAttrOnly: RDD[(PartitionID, VertexPartition[VD])] = create(true, false)
-  private lazy val dstAttrOnly: RDD[(PartitionID, VertexPartition[VD])] = create(false, true)
-  private lazy val noAttrs: RDD[(PartitionID, VertexPartition[VD])] = create(false, false)
-
-  def get(includeSrc: Boolean, includeDst: Boolean): RDD[(PartitionID, VertexPartition[VD])] = {
-    (includeSrc, includeDst) match {
-      case (true, true) => bothAttrs
-      case (true, false) => srcAttrOnly
-      case (false, true) => dstAttrOnly
-      case (false, false) => noAttrs
-    }
-  }
-
-  def get(
-      includeSrc: Boolean,
-      includeDst: Boolean,
-      actives: VertexRDD[_]): RDD[(PartitionID, VertexPartition[VD])] = {
-    // Ship active sets to edge partitions using vertexPlacement, but ignoring includeSrc and
-    // includeDst. These flags govern attribute shipping, but the activeness of a vertex must be
-    // shipped to all edges mentioning that vertex, regardless of whether the vertex attribute is
-    // also shipped there.
-    val shippedActives = routingTable.get(true, true)
-      .zipPartitions(actives.partitionsRDD)(ReplicatedVertexView.buildActiveBuffer(_, _))
-      .partitionBy(edges.partitioner.get)
-    // Update the view with shippedActives, setting activeness flags in the resulting
-    // VertexPartitions
-    get(includeSrc, includeDst).zipPartitions(shippedActives) { (viewIter, shippedActivesIter) =>
-      val (pid, vPart) = viewIter.next()
-      val newPart = vPart.replaceActives(shippedActivesIter.flatMap(_._2.iterator))
-      Iterator((pid, newPart))
-    }
-  }
-
-  private def create(includeSrc: Boolean, includeDst: Boolean)
-    : RDD[(PartitionID, VertexPartition[VD])] = {
-    val vdTag = classTag[VD]
-
-    // Ship vertex attributes to edge partitions according to vertexPlacement
-    val verts = updatedVerts.partitionsRDD
-    val shippedVerts = routingTable.get(includeSrc, includeDst)
-      .zipPartitions(verts)(ReplicatedVertexView.buildBuffer(_, _)(vdTag))
-      .partitionBy(edges.partitioner.get)
-    // TODO: Consider using a specialized shuffler.
-
-    prevViewOpt match {
-      case Some(prevView) =>
-        // Update prevView with shippedVerts, setting staleness flags in the resulting
-        // VertexPartitions
-        prevView.get(includeSrc, includeDst).zipPartitions(shippedVerts) {
-          (prevViewIter, shippedVertsIter) =>
-            val (pid, prevVPart) = prevViewIter.next()
-            val newVPart = prevVPart.innerJoinKeepLeft(shippedVertsIter.flatMap(_._2.iterator))
-            Iterator((pid, newVPart))
-        }.cache().setName("ReplicatedVertexView delta %s %s".format(includeSrc, includeDst))
-
-      case None =>
-        // Within each edge partition, place the shipped vertex attributes into the correct
-        // locations specified in localVertexIDMap
-        localVertexIDMap.zipPartitions(shippedVerts) { (mapIter, shippedVertsIter) =>
-          val (pid, vidToIndex) = mapIter.next()
-          assert(!mapIter.hasNext)
-          // Populate the vertex array using the vidToIndex map
-          val vertexArray = vdTag.newArray(vidToIndex.capacity)
-          for ((_, block) <- shippedVertsIter) {
-            for (i <- 0 until block.vids.size) {
-              val vid = block.vids(i)
-              val attr = block.attrs(i)
-              val ind = vidToIndex.getPos(vid)
-              vertexArray(ind) = attr
-            }
-          }
-          val newVPart = new VertexPartition(
-            vidToIndex, vertexArray, vidToIndex.getBitSet)(vdTag)
-          Iterator((pid, newVPart))
-        }.cache().setName("ReplicatedVertexView %s %s".format(includeSrc, includeDst))
-    }
-  }
-}
-
-object ReplicatedVertexView {
-  protected def buildBuffer[VD: ClassTag](
-      pid2vidIter: Iterator[Array[Array[VertexID]]],
-      vertexPartIter: Iterator[VertexPartition[VD]]) = {
-    val pid2vid: Array[Array[VertexID]] = pid2vidIter.next()
-    val vertexPart: VertexPartition[VD] = vertexPartIter.next()
-
-    Iterator.tabulate(pid2vid.size) { pid =>
-      val vidsCandidate = pid2vid(pid)
-      val size = vidsCandidate.length
-      val vids = new PrimitiveVector[VertexID](pid2vid(pid).size)
-      val attrs = new PrimitiveVector[VD](pid2vid(pid).size)
-      var i = 0
-      while (i < size) {
-        val vid = vidsCandidate(i)
-        if (vertexPart.isDefined(vid)) {
-          vids += vid
-          attrs += vertexPart(vid)
-        }
-        i += 1
-      }
-      (pid, new VertexAttributeBlock(vids.trim().array, attrs.trim().array))
-    }
-  }
-
-  protected def buildActiveBuffer(
-      pid2vidIter: Iterator[Array[Array[VertexID]]],
-      activePartIter: Iterator[VertexPartition[_]])
-    : Iterator[(Int, Array[VertexID])] = {
-    val pid2vid: Array[Array[VertexID]] = pid2vidIter.next()
-    val activePart: VertexPartition[_] = activePartIter.next()
-
-    Iterator.tabulate(pid2vid.size) { pid =>
-      val vidsCandidate = pid2vid(pid)
-      val size = vidsCandidate.length
-      val actives = new PrimitiveVector[VertexID](vidsCandidate.size)
-      var i = 0
-      while (i < size) {
-        val vid = vidsCandidate(i)
-        if (activePart.isDefined(vid)) {
-          actives += vid
-        }
-        i += 1
-      }
-      (pid, actives.trim().array)
-    }
-  }
-}
-
-class VertexAttributeBlock[VD: ClassTag](val vids: Array[VertexID], val attrs: Array[VD])
-  extends Serializable {
-  def iterator: Iterator[(VertexID, VD)] =
-    (0 until vids.size).iterator.map { i => (vids(i), attrs(i)) }
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/impl/RoutingTable.scala b/graph/src/main/scala/org/apache/spark/graph/impl/RoutingTable.scala
deleted file mode 100644
index 96d9e9d7f8..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/impl/RoutingTable.scala
+++ /dev/null
@@ -1,64 +0,0 @@
-package org.apache.spark.graph.impl
-
-import org.apache.spark.SparkContext._
-import org.apache.spark.graph._
-import org.apache.spark.rdd.RDD
-import org.apache.spark.storage.StorageLevel
-import org.apache.spark.util.collection.PrimitiveVector
-
-/**
- * Stores the locations of edge-partition join sites for each vertex attribute; that is, the routing
- * information for shipping vertex attributes to edge partitions. This is always cached because it
- * may be used multiple times in ReplicatedVertexView -- once to ship the vertex attributes and
- * (possibly) once to ship the active-set information.
- */
-class RoutingTable(edges: EdgeRDD[_], vertices: VertexRDD[_]) {
-
-  val bothAttrs: RDD[Array[Array[VertexID]]] = createPid2Vid(true, true)
-  val srcAttrOnly: RDD[Array[Array[VertexID]]] = createPid2Vid(true, false)
-  val dstAttrOnly: RDD[Array[Array[VertexID]]] = createPid2Vid(false, true)
-  val noAttrs: RDD[Array[Array[VertexID]]] = createPid2Vid(false, false)
-
-  def get(includeSrcAttr: Boolean, includeDstAttr: Boolean): RDD[Array[Array[VertexID]]] =
-    (includeSrcAttr, includeDstAttr) match {
-      case (true, true) => bothAttrs
-      case (true, false) => srcAttrOnly
-      case (false, true) => dstAttrOnly
-      case (false, false) => noAttrs
-    }
-
-  private def createPid2Vid(
-      includeSrcAttr: Boolean, includeDstAttr: Boolean): RDD[Array[Array[VertexID]]] = {
-    // Determine which vertices each edge partition needs by creating a mapping from vid to pid.
-    val vid2pid: RDD[(VertexID, PartitionID)] = edges.partitionsRDD.mapPartitions { iter =>
-      val (pid: PartitionID, edgePartition: EdgePartition[_]) = iter.next()
-      val numEdges = edgePartition.size
-      val vSet = new VertexSet
-      if (includeSrcAttr) {  // Add src vertices to the set.
-        var i = 0
-        while (i < numEdges) {
-          vSet.add(edgePartition.srcIds(i))
-          i += 1
-        }
-      }
-      if (includeDstAttr) {  // Add dst vertices to the set.
-      var i = 0
-        while (i < numEdges) {
-          vSet.add(edgePartition.dstIds(i))
-          i += 1
-        }
-      }
-      vSet.iterator.map { vid => (vid, pid) }
-    }
-
-    val numPartitions = vertices.partitions.size
-    vid2pid.partitionBy(vertices.partitioner.get).mapPartitions { iter =>
-      val pid2vid = Array.fill(numPartitions)(new PrimitiveVector[VertexID])
-      for ((vid, pid) <- iter) {
-        pid2vid(pid) += vid
-      }
-
-      Iterator(pid2vid.map(_.trim().array))
-    }.cache().setName("RoutingTable %s %s".format(includeSrcAttr, includeDstAttr))
-  }
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/impl/Serializers.scala b/graph/src/main/scala/org/apache/spark/graph/impl/Serializers.scala
deleted file mode 100644
index a3b0ea7689..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/impl/Serializers.scala
+++ /dev/null
@@ -1,386 +0,0 @@
-package org.apache.spark.graph.impl
-
-import java.io.{EOFException, InputStream, OutputStream}
-import java.nio.ByteBuffer
-
-import org.apache.spark.SparkConf
-import org.apache.spark.graph._
-import org.apache.spark.serializer._
-
-class VertexIDMsgSerializer(conf: SparkConf) extends Serializer {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T](t: T) = {
-        val msg = t.asInstanceOf[(VertexID, _)]
-        writeVarLong(msg._1, optimizePositive = false)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      override def readObject[T](): T = {
-        (readVarLong(optimizePositive = false), null).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for VertexBroadcastMessage[Int]. */
-class IntVertexBroadcastMsgSerializer(conf: SparkConf) extends Serializer {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T](t: T) = {
-        val msg = t.asInstanceOf[VertexBroadcastMsg[Int]]
-        writeVarLong(msg.vid, optimizePositive = false)
-        writeInt(msg.data)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      override def readObject[T](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readInt()
-        new VertexBroadcastMsg[Int](0, a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for VertexBroadcastMessage[Long]. */
-class LongVertexBroadcastMsgSerializer(conf: SparkConf) extends Serializer {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T](t: T) = {
-        val msg = t.asInstanceOf[VertexBroadcastMsg[Long]]
-        writeVarLong(msg.vid, optimizePositive = false)
-        writeLong(msg.data)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      override def readObject[T](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readLong()
-        new VertexBroadcastMsg[Long](0, a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for VertexBroadcastMessage[Double]. */
-class DoubleVertexBroadcastMsgSerializer(conf: SparkConf) extends Serializer {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T](t: T) = {
-        val msg = t.asInstanceOf[VertexBroadcastMsg[Double]]
-        writeVarLong(msg.vid, optimizePositive = false)
-        writeDouble(msg.data)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      def readObject[T](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readDouble()
-        new VertexBroadcastMsg[Double](0, a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for AggregationMessage[Int]. */
-class IntAggMsgSerializer(conf: SparkConf) extends Serializer {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T](t: T) = {
-        val msg = t.asInstanceOf[(VertexID, Int)]
-        writeVarLong(msg._1, optimizePositive = false)
-        writeUnsignedVarInt(msg._2)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      override def readObject[T](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readUnsignedVarInt()
-        (a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for AggregationMessage[Long]. */
-class LongAggMsgSerializer(conf: SparkConf) extends Serializer {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T](t: T) = {
-        val msg = t.asInstanceOf[(VertexID, Long)]
-        writeVarLong(msg._1, optimizePositive = false)
-        writeVarLong(msg._2, optimizePositive = true)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      override def readObject[T](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readVarLong(optimizePositive = true)
-        (a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-/** A special shuffle serializer for AggregationMessage[Double]. */
-class DoubleAggMsgSerializer(conf: SparkConf) extends Serializer {
-  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
-
-    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
-      def writeObject[T](t: T) = {
-        val msg = t.asInstanceOf[(VertexID, Double)]
-        writeVarLong(msg._1, optimizePositive = false)
-        writeDouble(msg._2)
-        this
-      }
-    }
-
-    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
-      def readObject[T](): T = {
-        val a = readVarLong(optimizePositive = false)
-        val b = readDouble()
-        (a, b).asInstanceOf[T]
-      }
-    }
-  }
-}
-
-////////////////////////////////////////////////////////////////////////////////
-// Helper classes to shorten the implementation of those special serializers.
-////////////////////////////////////////////////////////////////////////////////
-
-abstract class ShuffleSerializationStream(s: OutputStream) extends SerializationStream {
-  // The implementation should override this one.
-  def writeObject[T](t: T): SerializationStream
-
-  def writeInt(v: Int) {
-    s.write(v >> 24)
-    s.write(v >> 16)
-    s.write(v >> 8)
-    s.write(v)
-  }
-
-  def writeUnsignedVarInt(value: Int) {
-    if ((value >>> 7) == 0) {
-      s.write(value.toInt)
-    } else if ((value >>> 14) == 0) {
-      s.write((value & 0x7F) | 0x80)
-      s.write(value >>> 7)
-    } else if ((value >>> 21) == 0) {
-      s.write((value & 0x7F) | 0x80)
-      s.write(value >>> 7 | 0x80)
-      s.write(value >>> 14)
-    } else if ((value >>> 28) == 0) {
-      s.write((value & 0x7F) | 0x80)
-      s.write(value >>> 7 | 0x80)
-      s.write(value >>> 14 | 0x80)
-      s.write(value >>> 21)
-    } else {
-      s.write((value & 0x7F) | 0x80)
-      s.write(value >>> 7 | 0x80)
-      s.write(value >>> 14 | 0x80)
-      s.write(value >>> 21 | 0x80)
-      s.write(value >>> 28)
-    }
-  }
-
-  def writeVarLong(value: Long, optimizePositive: Boolean) {
-    val v = if (!optimizePositive) (value << 1) ^ (value >> 63) else value
-    if ((v >>> 7) == 0) {
-      s.write(v.toInt)
-    } else if ((v >>> 14) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7).toInt)
-    } else if ((v >>> 21) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14).toInt)
-    } else if ((v >>> 28) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21).toInt)
-    } else if ((v >>> 35) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28).toInt)
-    } else if ((v >>> 42) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28 | 0x80).toInt)
-      s.write((v >>> 35).toInt)
-    } else if ((v >>> 49) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28 | 0x80).toInt)
-      s.write((v >>> 35 | 0x80).toInt)
-      s.write((v >>> 42).toInt)
-    } else if ((v >>> 56) == 0) {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28 | 0x80).toInt)
-      s.write((v >>> 35 | 0x80).toInt)
-      s.write((v >>> 42 | 0x80).toInt)
-      s.write((v >>> 49).toInt)
-    } else {
-      s.write(((v & 0x7F) | 0x80).toInt)
-      s.write((v >>> 7 | 0x80).toInt)
-      s.write((v >>> 14 | 0x80).toInt)
-      s.write((v >>> 21 | 0x80).toInt)
-      s.write((v >>> 28 | 0x80).toInt)
-      s.write((v >>> 35 | 0x80).toInt)
-      s.write((v >>> 42 | 0x80).toInt)
-      s.write((v >>> 49 | 0x80).toInt)
-      s.write((v >>> 56).toInt)
-    }
-  }
-
-  def writeLong(v: Long) {
-    s.write((v >>> 56).toInt)
-    s.write((v >>> 48).toInt)
-    s.write((v >>> 40).toInt)
-    s.write((v >>> 32).toInt)
-    s.write((v >>> 24).toInt)
-    s.write((v >>> 16).toInt)
-    s.write((v >>> 8).toInt)
-    s.write(v.toInt)
-  }
-
-  //def writeDouble(v: Double): Unit = writeUnsignedVarLong(java.lang.Double.doubleToLongBits(v))
-  def writeDouble(v: Double): Unit = writeLong(java.lang.Double.doubleToLongBits(v))
-
-  override def flush(): Unit = s.flush()
-
-  override def close(): Unit = s.close()
-}
-
-abstract class ShuffleDeserializationStream(s: InputStream) extends DeserializationStream {
-  // The implementation should override this one.
-  def readObject[T](): T
-
-  def readInt(): Int = {
-    val first = s.read()
-    if (first < 0) throw new EOFException
-    (first & 0xFF) << 24 | (s.read() & 0xFF) << 16 | (s.read() & 0xFF) << 8 | (s.read() & 0xFF)
-  }
-
-  def readUnsignedVarInt(): Int = {
-    var value: Int = 0
-    var i: Int = 0
-    def readOrThrow(): Int = {
-      val in = s.read()
-      if (in < 0) throw new EOFException
-      in & 0xFF
-    }
-    var b: Int = readOrThrow()
-    while ((b & 0x80) != 0) {
-      value |= (b & 0x7F) << i
-      i += 7
-      if (i > 35) throw new IllegalArgumentException("Variable length quantity is too long")
-      b = readOrThrow()
-    }
-    value | (b << i)
-  }
-
-  def readVarLong(optimizePositive: Boolean): Long = {
-    def readOrThrow(): Int = {
-      val in = s.read()
-      if (in < 0) throw new EOFException
-      in & 0xFF
-    }
-    var b = readOrThrow()
-    var ret: Long = b & 0x7F
-    if ((b & 0x80) != 0) {
-      b = readOrThrow()
-      ret |= (b & 0x7F) << 7
-      if ((b & 0x80) != 0) {
-        b = readOrThrow()
-        ret |= (b & 0x7F) << 14
-        if ((b & 0x80) != 0) {
-          b = readOrThrow()
-          ret |= (b & 0x7F) << 21
-          if ((b & 0x80) != 0) {
-            b = readOrThrow()
-            ret |= (b & 0x7F).toLong << 28
-            if ((b & 0x80) != 0) {
-              b = readOrThrow()
-              ret |= (b & 0x7F).toLong << 35
-              if ((b & 0x80) != 0) {
-                b = readOrThrow()
-                ret |= (b & 0x7F).toLong << 42
-                if ((b & 0x80) != 0) {
-                  b = readOrThrow()
-                  ret |= (b & 0x7F).toLong << 49
-                  if ((b & 0x80) != 0) {
-                    b = readOrThrow()
-                    ret |= b.toLong << 56
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-    if (!optimizePositive) (ret >>> 1) ^ -(ret & 1) else ret
-  }
-
-  def readLong(): Long = {
-    val first = s.read()
-    if (first < 0) throw new EOFException()
-    (first.toLong << 56) |
-      (s.read() & 0xFF).toLong << 48 |
-      (s.read() & 0xFF).toLong << 40 |
-      (s.read() & 0xFF).toLong << 32 |
-      (s.read() & 0xFF).toLong << 24 |
-      (s.read() & 0xFF) << 16 |
-      (s.read() & 0xFF) << 8 |
-      (s.read() & 0xFF)
-  }
-
-  //def readDouble(): Double = java.lang.Double.longBitsToDouble(readUnsignedVarLong())
-  def readDouble(): Double = java.lang.Double.longBitsToDouble(readLong())
-
-  override def close(): Unit = s.close()
-}
-
-sealed trait ShuffleSerializerInstance extends SerializerInstance {
-
-  override def serialize[T](t: T): ByteBuffer = throw new UnsupportedOperationException
-
-  override def deserialize[T](bytes: ByteBuffer): T = throw new UnsupportedOperationException
-
-  override def deserialize[T](bytes: ByteBuffer, loader: ClassLoader): T =
-    throw new UnsupportedOperationException
-
-  // The implementation should override the following two.
-  override def serializeStream(s: OutputStream): SerializationStream
-  override def deserializeStream(s: InputStream): DeserializationStream
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/impl/VertexPartition.scala b/graph/src/main/scala/org/apache/spark/graph/impl/VertexPartition.scala
deleted file mode 100644
index 91244daa54..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/impl/VertexPartition.scala
+++ /dev/null
@@ -1,262 +0,0 @@
-package org.apache.spark.graph.impl
-
-import scala.reflect.ClassTag
-
-import org.apache.spark.util.collection.{BitSet, PrimitiveKeyOpenHashMap}
-
-import org.apache.spark.Logging
-import org.apache.spark.graph._
-
-
-private[graph] object VertexPartition {
-
-  def apply[VD: ClassTag](iter: Iterator[(VertexID, VD)]): VertexPartition[VD] = {
-    val map = new PrimitiveKeyOpenHashMap[VertexID, VD]
-    iter.foreach { case (k, v) =>
-      map(k) = v
-    }
-    new VertexPartition(map.keySet, map._values, map.keySet.getBitSet)
-  }
-
-  def apply[VD: ClassTag](iter: Iterator[(VertexID, VD)], mergeFunc: (VD, VD) => VD)
-    : VertexPartition[VD] =
-  {
-    val map = new PrimitiveKeyOpenHashMap[VertexID, VD]
-    iter.foreach { case (k, v) =>
-      map.setMerge(k, v, mergeFunc)
-    }
-    new VertexPartition(map.keySet, map._values, map.keySet.getBitSet)
-  }
-}
-
-
-private[graph]
-class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
-    val index: VertexIdToIndexMap,
-    val values: Array[VD],
-    val mask: BitSet,
-    /** A set of vids of active vertices. May contain vids not in index due to join rewrite. */
-    private val activeSet: Option[VertexSet] = None)
-  extends Logging {
-
-  val capacity: Int = index.capacity
-
-  def size: Int = mask.cardinality()
-
-  /** Return the vertex attribute for the given vertex ID. */
-  def apply(vid: VertexID): VD = values(index.getPos(vid))
-
-  def isDefined(vid: VertexID): Boolean = {
-    val pos = index.getPos(vid)
-    pos >= 0 && mask.get(pos)
-  }
-
-  /** Look up vid in activeSet, throwing an exception if it is None. */
-  def isActive(vid: VertexID): Boolean = {
-    activeSet.get.contains(vid)
-  }
-
-  /** The number of active vertices, if any exist. */
-  def numActives: Option[Int] = activeSet.map(_.size)
-
-  /**
-   * Pass each vertex attribute along with the vertex id through a map
-   * function and retain the original RDD's partitioning and index.
-   *
-   * @tparam VD2 the type returned by the map function
-   *
-   * @param f the function applied to each vertex id and vertex
-   * attribute in the RDD
-   *
-   * @return a new VertexPartition with values obtained by applying `f` to
-   * each of the entries in the original VertexRDD.  The resulting
-   * VertexPartition retains the same index.
-   */
-  def map[VD2: ClassTag](f: (VertexID, VD) => VD2): VertexPartition[VD2] = {
-    // Construct a view of the map transformation
-    val newValues = new Array[VD2](capacity)
-    var i = mask.nextSetBit(0)
-    while (i >= 0) {
-      newValues(i) = f(index.getValue(i), values(i))
-      i = mask.nextSetBit(i + 1)
-    }
-    new VertexPartition[VD2](index, newValues, mask)
-  }
-
-  /**
-   * Restrict the vertex set to the set of vertices satisfying the given predicate.
-   *
-   * @param pred the user defined predicate
-   *
-   * @note The vertex set preserves the original index structure which means that the returned
-   *       RDD can be easily joined with the original vertex-set. Furthermore, the filter only
-   *       modifies the bitmap index and so no new values are allocated.
-   */
-  def filter(pred: (VertexID, VD) => Boolean): VertexPartition[VD] = {
-    // Allocate the array to store the results into
-    val newMask = new BitSet(capacity)
-    // Iterate over the active bits in the old mask and evaluate the predicate
-    var i = mask.nextSetBit(0)
-    while (i >= 0) {
-      if (pred(index.getValue(i), values(i))) {
-        newMask.set(i)
-      }
-      i = mask.nextSetBit(i + 1)
-    }
-    new VertexPartition(index, values, newMask)
-  }
-
-  /**
-   * Hides vertices that are the same between this and other. For vertices that are different, keeps
-   * the values from `other`. The indices of `this` and `other` must be the same.
-   */
-  def diff(other: VertexPartition[VD]): VertexPartition[VD] = {
-    if (index != other.index) {
-      logWarning("Diffing two VertexPartitions with different indexes is slow.")
-      diff(createUsingIndex(other.iterator))
-    } else {
-      val newMask = mask & other.mask
-      var i = newMask.nextSetBit(0)
-      while (i >= 0) {
-        if (values(i) == other.values(i)) {
-          newMask.unset(i)
-        }
-        i = newMask.nextSetBit(i + 1)
-      }
-      new VertexPartition(index, other.values, newMask)
-    }
-  }
-
-  /** Left outer join another VertexPartition. */
-  def leftJoin[VD2: ClassTag, VD3: ClassTag]
-      (other: VertexPartition[VD2])
-      (f: (VertexID, VD, Option[VD2]) => VD3): VertexPartition[VD3] = {
-    if (index != other.index) {
-      logWarning("Joining two VertexPartitions with different indexes is slow.")
-      leftJoin(createUsingIndex(other.iterator))(f)
-    } else {
-      val newValues = new Array[VD3](capacity)
-
-      var i = mask.nextSetBit(0)
-      while (i >= 0) {
-        val otherV: Option[VD2] = if (other.mask.get(i)) Some(other.values(i)) else None
-        newValues(i) = f(index.getValue(i), values(i), otherV)
-        i = mask.nextSetBit(i + 1)
-      }
-      new VertexPartition(index, newValues, mask)
-    }
-  }
-
-  /** Left outer join another iterator of messages. */
-  def leftJoin[VD2: ClassTag, VD3: ClassTag]
-      (other: Iterator[(VertexID, VD2)])
-      (f: (VertexID, VD, Option[VD2]) => VD3): VertexPartition[VD3] = {
-    leftJoin(createUsingIndex(other))(f)
-  }
-
-  /** Inner join another VertexPartition. */
-  def innerJoin[U: ClassTag, VD2: ClassTag](other: VertexPartition[U])
-      (f: (VertexID, VD, U) => VD2): VertexPartition[VD2] = {
-    if (index != other.index) {
-      logWarning("Joining two VertexPartitions with different indexes is slow.")
-      innerJoin(createUsingIndex(other.iterator))(f)
-    } else {
-      val newMask = mask & other.mask
-      val newValues = new Array[VD2](capacity)
-      var i = newMask.nextSetBit(0)
-      while (i >= 0) {
-        newValues(i) = f(index.getValue(i), values(i), other.values(i))
-        i = newMask.nextSetBit(i + 1)
-      }
-      new VertexPartition(index, newValues, newMask)
-    }
-  }
-
-  /**
-   * Inner join an iterator of messages.
-   */
-  def innerJoin[U: ClassTag, VD2: ClassTag]
-      (iter: Iterator[Product2[VertexID, U]])
-      (f: (VertexID, VD, U) => VD2): VertexPartition[VD2] = {
-    innerJoin(createUsingIndex(iter))(f)
-  }
-
-  /**
-   * Similar effect as aggregateUsingIndex((a, b) => a)
-   */
-  def createUsingIndex[VD2: ClassTag](iter: Iterator[Product2[VertexID, VD2]])
-    : VertexPartition[VD2] = {
-    val newMask = new BitSet(capacity)
-    val newValues = new Array[VD2](capacity)
-    iter.foreach { case (vid, vdata) =>
-      val pos = index.getPos(vid)
-      if (pos >= 0) {
-        newMask.set(pos)
-        newValues(pos) = vdata
-      }
-    }
-    new VertexPartition[VD2](index, newValues, newMask)
-  }
-
-  /**
-   * Similar to innerJoin, but vertices from the left side that don't appear in iter will remain in
-   * the partition, hidden by the bitmask.
-   */
-  def innerJoinKeepLeft(iter: Iterator[Product2[VertexID, VD]]): VertexPartition[VD] = {
-    val newMask = new BitSet(capacity)
-    val newValues = new Array[VD](capacity)
-    System.arraycopy(values, 0, newValues, 0, newValues.length)
-    iter.foreach { case (vid, vdata) =>
-      val pos = index.getPos(vid)
-      if (pos >= 0) {
-        newMask.set(pos)
-        newValues(pos) = vdata
-      }
-    }
-    new VertexPartition(index, newValues, newMask)
-  }
-
-  def aggregateUsingIndex[VD2: ClassTag](
-      iter: Iterator[Product2[VertexID, VD2]],
-      reduceFunc: (VD2, VD2) => VD2): VertexPartition[VD2] = {
-    val newMask = new BitSet(capacity)
-    val newValues = new Array[VD2](capacity)
-    iter.foreach { product =>
-      val vid = product._1
-      val vdata = product._2
-      val pos = index.getPos(vid)
-      if (pos >= 0) {
-        if (newMask.get(pos)) {
-          newValues(pos) = reduceFunc(newValues(pos), vdata)
-        } else { // otherwise just store the new value
-          newMask.set(pos)
-          newValues(pos) = vdata
-        }
-      }
-    }
-    new VertexPartition[VD2](index, newValues, newMask)
-  }
-
-  def replaceActives(iter: Iterator[VertexID]): VertexPartition[VD] = {
-    val newActiveSet = new VertexSet
-    iter.foreach(newActiveSet.add(_))
-    new VertexPartition(index, values, mask, Some(newActiveSet))
-  }
-
-  /**
-   * Construct a new VertexPartition whose index contains only the vertices in the mask.
-   */
-  def reindex(): VertexPartition[VD] = {
-    val hashMap = new PrimitiveKeyOpenHashMap[VertexID, VD]
-    val arbitraryMerge = (a: VD, b: VD) => a
-    for ((k, v) <- this.iterator) {
-      hashMap.setMerge(k, v, arbitraryMerge)
-    }
-    new VertexPartition(hashMap.keySet, hashMap._values, hashMap.keySet.getBitSet)
-  }
-
-  def iterator: Iterator[(VertexID, VD)] =
-    mask.iterator.map(ind => (index.getValue(ind), values(ind)))
-
-  def vidIterator: Iterator[VertexID] = mask.iterator.map(ind => index.getValue(ind))
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/package.scala b/graph/src/main/scala/org/apache/spark/graph/package.scala
deleted file mode 100644
index b98a11b918..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/package.scala
+++ /dev/null
@@ -1,22 +0,0 @@
-package org.apache.spark
-
-import org.apache.spark.util.collection.OpenHashSet
-
-
-package object graph {
-
-  type VertexID = Long
-
-  // TODO: Consider using Char.
-  type PartitionID = Int
-
-  type VertexSet = OpenHashSet[VertexID]
-
-  //  type VertexIdToIndexMap = it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap
-  type VertexIdToIndexMap = OpenHashSet[VertexID]
-
-  /**
-   * Return the default null-like value for a data type T.
-   */
-  def nullValue[T] = null.asInstanceOf[T]
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/perf/BagelTest.scala b/graph/src/main/scala/org/apache/spark/graph/perf/BagelTest.scala
deleted file mode 100644
index eaff27a33e..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/perf/BagelTest.scala
+++ /dev/null
@@ -1,76 +0,0 @@
-///// This file creates circular dependencies between examples bagle and graph
-
-// package org.apache.spark.graph.perf
-
-// import org.apache.spark._
-// import org.apache.spark.SparkContext._
-// import org.apache.spark.bagel.Bagel
-
-// import org.apache.spark.examples.bagel
-// //import org.apache.spark.bagel.examples._
-// import org.apache.spark.graph._
-
-
-// object BagelTest {
-
-//   def main(args: Array[String]) {
-//     val host = args(0)
-//     val taskType = args(1)
-//     val fname = args(2)
-//     val options =  args.drop(3).map { arg =>
-//       arg.dropWhile(_ == '-').split('=') match {
-//         case Array(opt, v) => (opt -> v)
-//         case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
-//       }
-//     }
-
-//     System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-//     //System.setProperty("spark.shuffle.compress", "false")
-//     System.setProperty("spark.kryo.registrator", "org.apache.spark.bagel.examples.PRKryoRegistrator")
-
-//     var numIter = Int.MaxValue
-//     var isDynamic = false
-//     var tol:Float = 0.001F
-//     var outFname = ""
-//     var numVPart = 4
-//     var numEPart = 4
-
-//     options.foreach{
-//       case ("numIter", v) => numIter = v.toInt
-//       case ("dynamic", v) => isDynamic = v.toBoolean
-//       case ("tol", v) => tol = v.toFloat
-//       case ("output", v) => outFname = v
-//       case ("numVPart", v) => numVPart = v.toInt
-//       case ("numEPart", v) => numEPart = v.toInt
-//       case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-//     }
-
-//     val sc = new SparkContext(host, "PageRank(" + fname + ")")
-//     val g = GraphLoader.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache()
-//     val startTime = System.currentTimeMillis
-
-//     val numVertices = g.vertices.count()
-
-//     val vertices = g.collectNeighborIds(EdgeDirection.Out).map { case (vid, neighbors) =>
-//       (vid.toString, new PRVertex(1.0, neighbors.map(_.toString)))
-//     }
-
-//     // Do the computation
-//     val epsilon = 0.01 / numVertices
-//     val messages = sc.parallelize(Array[(String, PRMessage)]())
-//     val utils = new PageRankUtils
-//     val result =
-//         Bagel.run(
-//           sc, vertices, messages, combiner = new PRCombiner(),
-//           numPartitions = numVPart)(
-//           utils.computeWithCombiner(numVertices, epsilon, numIter))
-
-//     println("Total rank: " + result.map{ case (id, r) => r.value }.reduce(_+_) )
-//     if (!outFname.isEmpty) {
-//       println("Saving pageranks of pages to " + outFname)
-//       result.map{ case (id, r) => id + "\t" + r.value }.saveAsTextFile(outFname)
-//     }
-//     println("Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
-//     sc.stop()
-//   }
-// }
diff --git a/graph/src/main/scala/org/apache/spark/graph/perf/SparkTest.scala b/graph/src/main/scala/org/apache/spark/graph/perf/SparkTest.scala
deleted file mode 100644
index 01bd968550..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/perf/SparkTest.scala
+++ /dev/null
@@ -1,75 +0,0 @@
-///// This file creates circular dependencies between examples bagle and graph
-
-
-// package org.apache.spark.graph.perf
-
-// import org.apache.spark._
-// import org.apache.spark.SparkContext._
-// import org.apache.spark.bagel.Bagel
-// import org.apache.spark.bagel.examples._
-// import org.apache.spark.graph._
-
-
-// object SparkTest {
-
-//   def main(args: Array[String]) {
-//     val host = args(0)
-//     val taskType = args(1)
-//     val fname = args(2)
-//     val options =  args.drop(3).map { arg =>
-//       arg.dropWhile(_ == '-').split('=') match {
-//         case Array(opt, v) => (opt -> v)
-//         case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
-//       }
-//     }
-
-//     System.setProperty("spark.serializer", "org.apache.spark.KryoSerializer")
-//     //System.setProperty("spark.shuffle.compress", "false")
-//     System.setProperty("spark.kryo.registrator", "spark.bagel.examples.PRKryoRegistrator")
-
-//     var numIter = Int.MaxValue
-//     var isDynamic = false
-//     var tol:Float = 0.001F
-//     var outFname = ""
-//     var numVPart = 4
-//     var numEPart = 4
-
-//     options.foreach{
-//       case ("numIter", v) => numIter = v.toInt
-//       case ("dynamic", v) => isDynamic = v.toBoolean
-//       case ("tol", v) => tol = v.toFloat
-//       case ("output", v) => outFname = v
-//       case ("numVPart", v) => numVPart = v.toInt
-//       case ("numEPart", v) => numEPart = v.toInt
-//       case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
-//     }
-
-//     val sc = new SparkContext(host, "PageRank(" + fname + ")")
-//     val g = GraphLoader.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache()
-//     val startTime = System.currentTimeMillis
-
-//     val numVertices = g.vertices.count()
-
-//     val vertices = g.collectNeighborIds(EdgeDirection.Out).map { case (vid, neighbors) =>
-//       (vid.toString, new PRVertex(1.0, neighbors.map(_.toString)))
-//     }
-
-//     // Do the computation
-//     val epsilon = 0.01 / numVertices
-//     val messages = sc.parallelize(Array[(String, PRMessage)]())
-//     val utils = new PageRankUtils
-//     val result =
-//         Bagel.run(
-//           sc, vertices, messages, combiner = new PRCombiner(),
-//           numPartitions = numVPart)(
-//           utils.computeWithCombiner(numVertices, epsilon, numIter))
-
-//     println("Total rank: " + result.map{ case (id, r) => r.value }.reduce(_+_) )
-//     if (!outFname.isEmpty) {
-//       println("Saving pageranks of pages to " + outFname)
-//       result.map{ case (id, r) => id + "\t" + r.value }.saveAsTextFile(outFname)
-//     }
-//     println("Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
-//     sc.stop()
-//   }
-// }
diff --git a/graph/src/main/scala/org/apache/spark/graph/util/BytecodeUtils.scala b/graph/src/main/scala/org/apache/spark/graph/util/BytecodeUtils.scala
deleted file mode 100644
index bc00ce2151..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/util/BytecodeUtils.scala
+++ /dev/null
@@ -1,114 +0,0 @@
-package org.apache.spark.graph.util
-
-import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
-
-import scala.collection.mutable.HashSet
-
-import org.apache.spark.util.Utils
-
-import org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor}
-import org.objectweb.asm.Opcodes._
-
-
-
-private[spark] object BytecodeUtils {
-
-  /**
-   * Test whether the given closure invokes the specified method in the specified class.
-   */
-  def invokedMethod(closure: AnyRef, targetClass: Class[_], targetMethod: String): Boolean = {
-    if (_invokedMethod(closure.getClass, "apply", targetClass, targetMethod)) {
-      true
-    } else {
-      // look at closures enclosed in this closure
-      for (f <- closure.getClass.getDeclaredFields
-           if f.getType.getName.startsWith("scala.Function")) {
-        f.setAccessible(true)
-        if (invokedMethod(f.get(closure), targetClass, targetMethod)) {
-          return true
-        }
-      }
-      return false
-    }
-  }
-
-  private def _invokedMethod(cls: Class[_], method: String,
-      targetClass: Class[_], targetMethod: String): Boolean = {
-
-    val seen = new HashSet[(Class[_], String)]
-    var stack = List[(Class[_], String)]((cls, method))
-
-    while (stack.nonEmpty) {
-      val (c, m) = stack.head
-      stack = stack.tail
-      seen.add((c, m))
-      val finder = new MethodInvocationFinder(c.getName, m)
-      getClassReader(c).accept(finder, 0)
-      for (classMethod <- finder.methodsInvoked) {
-        //println(classMethod)
-        if (classMethod._1 == targetClass && classMethod._2 == targetMethod) {
-          return true
-        } else if (!seen.contains(classMethod)) {
-          stack = classMethod :: stack
-        }
-      }
-    }
-    return false
-  }
-
-  /**
-   * Get an ASM class reader for a given class from the JAR that loaded it.
-   */
-  private def getClassReader(cls: Class[_]): ClassReader = {
-    // Copy data over, before delegating to ClassReader - else we can run out of open file handles.
-    val className = cls.getName.replaceFirst("^.*\\.", "") + ".class"
-    val resourceStream = cls.getResourceAsStream(className)
-    // todo: Fixme - continuing with earlier behavior ...
-    if (resourceStream == null) return new ClassReader(resourceStream)
-
-    val baos = new ByteArrayOutputStream(128)
-    Utils.copyStream(resourceStream, baos, true)
-    new ClassReader(new ByteArrayInputStream(baos.toByteArray))
-  }
-
-  /**
-   * Given the class name, return whether we should look into the class or not. This is used to
-   * skip examing a large quantity of Java or Scala classes that we know for sure wouldn't access
-   * the closures. Note that the class name is expected in ASM style (i.e. use "/" instead of ".").
-   */
-  private def skipClass(className: String): Boolean = {
-    val c = className
-    c.startsWith("java/") || c.startsWith("scala/") || c.startsWith("javax/")
-  }
-
-  /**
-   * Find the set of methods invoked by the specified method in the specified class.
-   * For example, after running the visitor,
-   *   MethodInvocationFinder("spark/graph/Foo", "test")
-   * its methodsInvoked variable will contain the set of methods invoked directly by
-   * Foo.test(). Interface invocations are not returned as part of the result set because we cannot
-   * determine the actual metod invoked by inspecting the bytecode.
-   */
-  private class MethodInvocationFinder(className: String, methodName: String)
-    extends ClassVisitor(ASM4) {
-
-    val methodsInvoked = new HashSet[(Class[_], String)]
-
-    override def visitMethod(access: Int, name: String, desc: String,
-                             sig: String, exceptions: Array[String]): MethodVisitor = {
-      if (name == methodName) {
-        new MethodVisitor(ASM4) {
-          override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) {
-            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
-              if (!skipClass(owner)) {
-                methodsInvoked.add((Class.forName(owner.replace("/", ".")), name))
-              }
-            }
-          }
-        }
-      } else {
-        null
-      }
-    }
-  }
-}
diff --git a/graph/src/main/scala/org/apache/spark/graph/util/GraphGenerators.scala b/graph/src/main/scala/org/apache/spark/graph/util/GraphGenerators.scala
deleted file mode 100644
index 51f45cb892..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/util/GraphGenerators.scala
+++ /dev/null
@@ -1,282 +0,0 @@
-package org.apache.spark.graph.util
-
-import scala.annotation.tailrec
-import scala.math._
-import scala.reflect.ClassTag
-import scala.util._
-
-import org.apache.spark._
-import org.apache.spark.serializer._
-import org.apache.spark.rdd.RDD
-import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
-import org.apache.spark.graph._
-import org.apache.spark.graph.Graph
-import org.apache.spark.graph.Edge
-import org.apache.spark.graph.impl.GraphImpl
-
-/**
- * @todo cleanup and modularize code
- */
-object GraphGenerators {
-
-  val RMATa = 0.45
-  val RMATb = 0.15
-  val RMATc = 0.15
-  val RMATd = 0.25
-
-  def main(args: Array[String]) {
-
-
-    val serializer = "org.apache.spark.serializer.KryoSerializer"
-    System.setProperty("spark.serializer", serializer)
-    //System.setProperty("spark.shuffle.compress", "false")
-    System.setProperty("spark.kryo.registrator", "spark.graph.GraphKryoRegistrator")
-    val host = "local[4]"
-    val sc = new SparkContext(host, "Lognormal graph generator")
-
-    val lnGraph = logNormalGraph(sc, 10000)
-
-    val rmat = rmatGraph(sc, 1000, 3000)
-
-    //for (v <- lnGraph.vertices) {
-    //  println(v.id + ":\t" + v.data)
-    //}
-
-    val times = 100000
-    //val nums = (1 to times).flatMap { n => List(sampleLogNormal(4.0, 1.3, times)) }.toList
-    //val avg = nums.sum / nums.length
-    //val sumSquares = nums.foldLeft(0.0) {(total, next) =>
-    //  (total + math.pow((next - avg), 2)) }
-    //val stdev = math.sqrt(sumSquares/(nums.length - 1))
-
-    //println("avg: " + avg + "+-" + stdev)
-
-
-    //for (i <- 1 to 1000) {
-    //  println(sampleLogNormal(4.0, 1.3, 1000))
-    //}
-
-    sc.stop()
-
-  }
-
-
-  // Right now it just generates a bunch of edges where
-  // the edge data is the weight (default 1)
-  def logNormalGraph(sc: SparkContext, numVertices: Int): Graph[Int, Int] = {
-    // based on Pregel settings
-    val mu = 4
-    val sigma = 1.3
-    //val vertsAndEdges = (0 until numVertices).flatMap { src => {
-
-    val vertices: RDD[(VertexID, Int)] = sc.parallelize(0 until numVertices).map{
-      src => (src, sampleLogNormal(mu, sigma, numVertices))
-    }
-
-    val edges = vertices.flatMap{
-      v => generateRandomEdges(v._1.toInt, v._2, numVertices)
-    }
-
-    Graph(vertices, edges, 0)
-    //println("Vertices:")
-    //for (v <- vertices) {
-    //  println(v.id)
-    //}
-
-    //println("Edges")
-    //for (e <- edges) {
-    //  println(e.src, e.dst, e.data)
-    //}
-
-  }
-
-
-  def generateRandomEdges(src: Int, numEdges: Int, maxVertexID: Int): Array[Edge[Int]] = {
-    val rand = new Random()
-    var dsts: Set[Int] = Set()
-    while (dsts.size < numEdges) {
-      val nextDst = rand.nextInt(maxVertexID)
-      if (nextDst != src) {
-        dsts += nextDst
-      }
-    }
-    dsts.map {dst => Edge[Int](src, dst, 1) }.toArray
-  }
-
-
-  /**
-   * Randomly samples from a log normal distribution
-   * whose corresponding normal distribution has the
-   * the given mean and standard deviation. It uses
-   * the formula X = exp(m+s*Z) where m, s are the
-   * mean, standard deviation of the lognormal distribution
-   * and Z~N(0, 1). In this function,
-   * m = e^(mu+sigma^2/2) and
-   * s = sqrt[(e^(sigma^2) - 1)(e^(2*mu+sigma^2))].
-   *
-   * @param mu the mean of the normal distribution
-   * @param sigma the standard deviation of the normal distribution
-   * @param macVal exclusive upper bound on the value of the sample
-   */
-  def sampleLogNormal(mu: Double, sigma: Double, maxVal: Int): Int = {
-    val rand = new Random()
-    val m = math.exp(mu+(sigma*sigma)/2.0)
-    val s = math.sqrt((math.exp(sigma*sigma) - 1) * math.exp(2*mu + sigma*sigma))
-    // Z ~ N(0, 1)
-    var X: Double = maxVal
-
-    while (X >= maxVal) {
-      val Z = rand.nextGaussian()
-      //X = math.exp((m + s*Z))
-      X = math.exp((mu + sigma*Z))
-    }
-    math.round(X.toFloat)
-  }
-
-
-
-  def rmatGraph(sc: SparkContext, requestedNumVertices: Int, numEdges: Int): Graph[Int, Int] = {
-    // let N = requestedNumVertices
-    // the number of vertices is 2^n where n=ceil(log2[N])
-    // This ensures that the 4 quadrants are the same size at all recursion levels
-    val numVertices = math.round(math.pow(2.0, math.ceil(math.log(requestedNumVertices)/math.log(2.0)))).toInt
-    var edges: Set[Edge[Int]] = Set()
-    while (edges.size < numEdges) {
-      if (edges.size % 100 == 0) {
-        println(edges.size + " edges")
-      }
-      edges += addEdge(numVertices)
-
-    }
-    val graph = outDegreeFromEdges(sc.parallelize(edges.toList))
-    graph
-
-  }
-
-  def outDegreeFromEdges[ED: ClassTag](edges: RDD[Edge[ED]]): Graph[Int, ED] = {
-
-    val vertices = edges.flatMap { edge => List((edge.srcId, 1)) }
-      .reduceByKey(_ + _)
-      .map{ case (vid, degree) => (vid, degree) }
-    Graph(vertices, edges, 0)
-  }
-
-  /**
-   * @param numVertices Specifies the total number of vertices in the graph (used to get
-   * the dimensions of the adjacency matrix
-   */
-  def addEdge(numVertices: Int): Edge[Int] = {
-    //val (src, dst) = chooseCell(numVertices/2.0, numVertices/2.0, numVertices/2.0)
-    val v = math.round(numVertices.toFloat/2.0).toInt
-
-    val (src, dst) = chooseCell(v, v, v)
-    Edge[Int](src, dst, 1)
-  }
-
-
-  /**
-   * This method recursively subdivides the the adjacency matrix into quadrants
-   * until it picks a single cell. The naming conventions in this paper match
-   * those of the R-MAT paper. There are a power of 2 number of nodes in the graph.
-   * The adjacency matrix looks like:
-   *
-   *          dst ->
-   * (x,y) ***************  _
-   *       |      |      |  |
-   *       |  a   |  b   |  |
-   *  src  |      |      |  |
-   *   |   ***************  | T
-   *  \|/  |      |      |  |
-   *       |   c  |   d  |  |
-   *       |      |      |  |
-   *       ***************  -
-   *
-   * where this represents the subquadrant of the adj matrix currently being
-   * subdivided. (x,y) represent the upper left hand corner of the subquadrant,
-   * and T represents the side length (guaranteed to be a power of 2).
-   *
-   * After choosing the next level subquadrant, we get the resulting sets
-   * of parameters:
-   *    quad = a, x'=x, y'=y, T'=T/2
-   *    quad = b, x'=x+T/2, y'=y, T'=T/2
-   *    quad = c, x'=x, y'=y+T/2, T'=T/2
-   *    quad = d, x'=x+T/2, y'=y+T/2, T'=T/2
-   *
-   * @param src is the
-   */
-  @tailrec
-  def chooseCell(x: Int, y: Int, t: Int): (Int, Int) = {
-    if (t <= 1)
-      (x,y)
-    else {
-      val newT = math.round(t.toFloat/2.0).toInt
-      pickQuadrant(RMATa, RMATb, RMATc, RMATd) match {
-        case 0 => chooseCell(x, y, newT)
-        case 1 => chooseCell(x+newT, y, newT)
-        case 2 => chooseCell(x, y+newT, newT)
-        case 3 => chooseCell(x+newT, y+newT, newT)
-      }
-    }
-  }
-
-  // TODO(crankshaw) turn result into an enum (or case class for pattern matching}
-  def pickQuadrant(a: Double, b: Double, c: Double, d: Double): Int = {
-    if (a+b+c+d != 1.0) {
-      throw new IllegalArgumentException("R-MAT probability parameters sum to " + (a+b+c+d) + ", should sum to 1.0")
-    }
-    val rand = new Random()
-    val result = rand.nextDouble()
-    result match {
-      case x if x < a => 0 // 0 corresponds to quadrant a
-      case x if (x >= a && x < a+b) => 1 // 1 corresponds to b
-      case x if (x >= a+b && x < a+b+c) => 2 // 2 corresponds to c
-      case _ => 3 // 3 corresponds to d
-    }
-  }
-
-
-
-  /**
-   * Create `rows` by `cols` grid graph with each vertex connected to its
-   * row+1 and col+1 neighbors.  Vertex ids are assigned in row major
-   * order.
-   *
-   * @param sc the spark context in which to construct the graph
-   * @param rows the number of rows
-   * @param cols the number of columns
-   *
-   * @return A graph containing vertices with the row and column ids
-   * as their attributes and edge values as 1.0.
-   */
-  def gridGraph(sc: SparkContext, rows: Int, cols: Int): Graph[(Int,Int), Double] = {
-    // Convert row column address into vertex ids (row major order)
-    def sub2ind(r: Int, c: Int): VertexID = r * cols + c
-
-    val vertices: RDD[(VertexID, (Int,Int))] =
-      sc.parallelize(0 until rows).flatMap( r => (0 until cols).map( c => (sub2ind(r,c), (r,c)) ) )
-    val edges: RDD[Edge[Double]] =
-      vertices.flatMap{ case (vid, (r,c)) =>
-        (if (r+1 < rows) { Seq( (sub2ind(r, c), sub2ind(r+1, c))) } else { Seq.empty }) ++
-        (if (c+1 < cols) { Seq( (sub2ind(r, c), sub2ind(r, c+1))) } else { Seq.empty })
-      }.map{ case (src, dst) => Edge(src, dst, 1.0) }
-    Graph(vertices, edges)
-  } // end of gridGraph
-
-  /**
-   * Create a star graph with vertex 0 being the center.
-   *
-   * @param sc the spark context in which to construct the graph
-   * @param nverts the number of vertices in the star
-   *
-   * @return A star graph containing `nverts` vertices with vertex 0
-   * being the center vertex.
-   */
-  def starGraph(sc: SparkContext, nverts: Int): Graph[Int, Int] = {
-    val edges: RDD[(VertexID, VertexID)] = sc.parallelize(1 until nverts).map(vid => (vid, 0))
-    Graph.fromEdgeTuples(edges, 1)
-  } // end of starGraph
-
-
-
-} // end of Graph Generators
diff --git a/graph/src/main/scala/org/apache/spark/graph/util/HashUtils.scala b/graph/src/main/scala/org/apache/spark/graph/util/HashUtils.scala
deleted file mode 100644
index cb18ef3d26..0000000000
--- a/graph/src/main/scala/org/apache/spark/graph/util/HashUtils.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-package org.apache.spark.graph.util
-
-
-object HashUtils {
-
-  /**
-   * Compute a 64-bit hash value for the given string.
-   * See http://stackoverflow.com/questions/1660501/what-is-a-good-64bit-hash-function-in-java-for-textual-strings
-   */
-  def hash(str: String): Long = {
-    var h = 1125899906842597L
-    val len = str.length
-    var i = 0
-
-    while (i < len) {
-      h = 31 * h + str(i)
-      i += 1
-    }
-    h
-  }
-}
diff --git a/graph/src/test/resources/log4j.properties b/graph/src/test/resources/log4j.properties
deleted file mode 100644
index 896936d8c4..0000000000
--- a/graph/src/test/resources/log4j.properties
+++ /dev/null
@@ -1,28 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Set everything to be logged to the file core/target/unit-tests.log
-log4j.rootCategory=INFO, file
-log4j.appender.file=org.apache.log4j.FileAppender
-log4j.appender.file.append=false
-log4j.appender.file.file=graph/target/unit-tests.log
-log4j.appender.file.layout=org.apache.log4j.PatternLayout
-log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
-
-# Ignore messages below warning level from Jetty, because it's a bit verbose
-log4j.logger.org.eclipse.jetty=WARN
-org.eclipse.jetty.LEVEL=WARN
diff --git a/graph/src/test/scala/org/apache/spark/graph/GraphOpsSuite.scala b/graph/src/test/scala/org/apache/spark/graph/GraphOpsSuite.scala
deleted file mode 100644
index 132e6be24a..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/GraphOpsSuite.scala
+++ /dev/null
@@ -1,92 +0,0 @@
-package org.apache.spark.graph
-
-import org.apache.spark.SparkContext
-import org.apache.spark.graph.Graph._
-import org.apache.spark.graph.impl.EdgePartition
-import org.apache.spark.rdd._
-import org.scalatest.FunSuite
-
-class GraphOpsSuite extends FunSuite with LocalSparkContext {
-
-  test("aggregateNeighbors") {
-    withSpark { sc =>
-      val n = 3
-      val star =
-        Graph.fromEdgeTuples(sc.parallelize((1 to n).map(x => (0: VertexID, x: VertexID))), 1)
-
-      val indegrees = star.aggregateNeighbors(
-        (vid, edge) => Some(1),
-        (a: Int, b: Int) => a + b,
-        EdgeDirection.In)
-      assert(indegrees.collect().toSet === (1 to n).map(x => (x, 1)).toSet)
-
-      val outdegrees = star.aggregateNeighbors(
-        (vid, edge) => Some(1),
-        (a: Int, b: Int) => a + b,
-        EdgeDirection.Out)
-      assert(outdegrees.collect().toSet === Set((0, n)))
-
-      val noVertexValues = star.aggregateNeighbors[Int](
-        (vid: VertexID, edge: EdgeTriplet[Int, Int]) => None,
-        (a: Int, b: Int) => throw new Exception("reduceFunc called unexpectedly"),
-        EdgeDirection.In)
-      assert(noVertexValues.collect().toSet === Set.empty[(VertexID, Int)])
-    }
-  }
-
-  test("joinVertices") {
-    withSpark { sc =>
-      val vertices =
-        sc.parallelize(Seq[(VertexID, String)]((1, "one"), (2, "two"), (3, "three")), 2)
-      val edges = sc.parallelize((Seq(Edge(1, 2, "onetwo"))))
-      val g: Graph[String, String] = Graph(vertices, edges)
-
-      val tbl = sc.parallelize(Seq[(VertexID, Int)]((1, 10), (2, 20)))
-      val g1 = g.joinVertices(tbl) { (vid: VertexID, attr: String, u: Int) => attr + u }
-
-      val v = g1.vertices.collect().toSet
-      assert(v === Set((1, "one10"), (2, "two20"), (3, "three")))
-    }
-  }
-
-  test("collectNeighborIds") {
-    withSpark { sc =>
-      val chain = (0 until 100).map(x => (x, (x+1)%100) )
-      val rawEdges = sc.parallelize(chain, 3).map { case (s,d) => (s.toLong, d.toLong) }
-      val graph = Graph.fromEdgeTuples(rawEdges, 1.0)
-      val nbrs = graph.collectNeighborIds(EdgeDirection.Both)
-      assert(nbrs.count === chain.size)
-      assert(graph.numVertices === nbrs.count)
-      nbrs.collect.foreach { case (vid, nbrs) => assert(nbrs.size === 2) }
-      nbrs.collect.foreach { case (vid, nbrs) =>
-        val s = nbrs.toSet
-        assert(s.contains((vid + 1) % 100))
-        assert(s.contains(if (vid > 0) vid - 1 else 99 ))
-      }
-    }
-  }
-
-  test ("filter") {
-    withSpark { sc =>
-      val n = 5
-      val vertices = sc.parallelize((0 to n).map(x => (x:VertexID, x)))
-      val edges = sc.parallelize((1 to n).map(x => Edge(0, x, x)))
-      val graph: Graph[Int, Int] = Graph(vertices, edges)
-      val filteredGraph = graph.filter(
-        graph => {
-          val degrees: VertexRDD[Int] = graph.outDegrees
-          graph.outerJoinVertices(degrees) {(vid, data, deg) => deg.getOrElse(0)}
-        },
-        vpred = (vid: VertexID, deg:Int) => deg > 0
-      )
-
-      val v = filteredGraph.vertices.collect().toSet
-      assert(v === Set((0,0)))
-
-      // the map is necessary because of object-reuse in the edge iterator
-      val e = filteredGraph.edges.map(e => Edge(e.srcId, e.dstId, e.attr)).collect().toSet
-      assert(e.isEmpty)
-    }
-  }
-
-}
diff --git a/graph/src/test/scala/org/apache/spark/graph/GraphSuite.scala b/graph/src/test/scala/org/apache/spark/graph/GraphSuite.scala
deleted file mode 100644
index 41f3a8311d..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/GraphSuite.scala
+++ /dev/null
@@ -1,272 +0,0 @@
-package org.apache.spark.graph
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.SparkContext
-import org.apache.spark.graph.Graph._
-import org.apache.spark.rdd._
-
-class GraphSuite extends FunSuite with LocalSparkContext {
-
-  def starGraph(sc: SparkContext, n: Int): Graph[String, Int] = {
-    Graph.fromEdgeTuples(sc.parallelize((1 to n).map(x => (0: VertexID, x: VertexID)), 3), "v")
-  }
-
-  test("Graph.fromEdgeTuples") {
-    withSpark { sc =>
-      val ring = (0L to 100L).zip((1L to 99L) :+ 0L)
-      val doubleRing = ring ++ ring
-      val graph = Graph.fromEdgeTuples(sc.parallelize(doubleRing), 1)
-      assert(graph.edges.count() === doubleRing.size)
-      assert(graph.edges.collect.forall(e => e.attr == 1))
-
-      // uniqueEdges option should uniquify edges and store duplicate count in edge attributes
-      val uniqueGraph = Graph.fromEdgeTuples(sc.parallelize(doubleRing), 1, Some(RandomVertexCut))
-      assert(uniqueGraph.edges.count() === ring.size)
-      assert(uniqueGraph.edges.collect.forall(e => e.attr == 2))
-    }
-  }
-
-  test("Graph.fromEdges") {
-    withSpark { sc =>
-      val ring = (0L to 100L).zip((1L to 99L) :+ 0L).map { case (a, b) => Edge(a, b, 1) }
-      val graph = Graph.fromEdges(sc.parallelize(ring), 1.0F)
-      assert(graph.edges.count() === ring.size)
-    }
-  }
-
-  test("Graph.apply") {
-    withSpark { sc =>
-      val rawEdges = (0L to 98L).zip((1L to 99L) :+ 0L)
-      val edges: RDD[Edge[Int]] = sc.parallelize(rawEdges).map { case (s, t) => Edge(s, t, 1) }
-      val vertices: RDD[(VertexID, Boolean)] = sc.parallelize((0L until 10L).map(id => (id, true)))
-      val graph = Graph(vertices, edges, false)
-      assert( graph.edges.count() === rawEdges.size )
-      // Vertices not explicitly provided but referenced by edges should be created automatically
-      assert( graph.vertices.count() === 100)
-      graph.triplets.map { et =>
-        assert((et.srcId < 10 && et.srcAttr) || (et.srcId >= 10 && !et.srcAttr))
-        assert((et.dstId < 10 && et.dstAttr) || (et.dstId >= 10 && !et.dstAttr))
-      }
-    }
-  }
-
-  test("triplets") {
-    withSpark { sc =>
-      val n = 5
-      val star = starGraph(sc, n)
-      assert(star.triplets.map(et => (et.srcId, et.dstId, et.srcAttr, et.dstAttr)).collect.toSet ===
-        (1 to n).map(x => (0: VertexID, x: VertexID, "v", "v")).toSet)
-    }
-  }
-
-  test("partitionBy") {
-    withSpark { sc =>
-      def mkGraph(edges: List[(Long, Long)]) = Graph.fromEdgeTuples(sc.parallelize(edges, 2), 0)
-      def nonemptyParts(graph: Graph[Int, Int]) = {
-        graph.edges.partitionsRDD.mapPartitions { iter =>
-          Iterator(iter.next()._2.iterator.toList)
-        }.filter(_.nonEmpty)
-      }
-      val identicalEdges = List((0L, 1L), (0L, 1L))
-      val canonicalEdges = List((0L, 1L), (1L, 0L))
-      val sameSrcEdges = List((0L, 1L), (0L, 2L))
-
-      // The two edges start out in different partitions
-      for (edges <- List(identicalEdges, canonicalEdges, sameSrcEdges)) {
-        assert(nonemptyParts(mkGraph(edges)).count === 2)
-      }
-      // partitionBy(RandomVertexCut) puts identical edges in the same partition
-      assert(nonemptyParts(mkGraph(identicalEdges).partitionBy(RandomVertexCut)).count === 1)
-      // partitionBy(EdgePartition1D) puts same-source edges in the same partition
-      assert(nonemptyParts(mkGraph(sameSrcEdges).partitionBy(EdgePartition1D)).count === 1)
-      // partitionBy(CanonicalRandomVertexCut) puts edges that are identical modulo direction into
-      // the same partition
-      assert(nonemptyParts(mkGraph(canonicalEdges).partitionBy(CanonicalRandomVertexCut)).count === 1)
-      // partitionBy(EdgePartition2D) puts identical edges in the same partition
-      assert(nonemptyParts(mkGraph(identicalEdges).partitionBy(EdgePartition2D)).count === 1)
-
-      // partitionBy(EdgePartition2D) ensures that vertices need only be replicated to 2 * sqrt(p)
-      // partitions
-      val n = 100
-      val p = 100
-      val verts = 1 to n
-      val graph = Graph.fromEdgeTuples(sc.parallelize(verts.flatMap(x =>
-        verts.filter(y => y % x == 0).map(y => (x: VertexID, y: VertexID))), p), 0)
-      assert(graph.edges.partitions.length === p)
-      val partitionedGraph = graph.partitionBy(EdgePartition2D)
-      assert(graph.edges.partitions.length === p)
-      val bound = 2 * math.sqrt(p)
-      // Each vertex should be replicated to at most 2 * sqrt(p) partitions
-      val partitionSets = partitionedGraph.edges.partitionsRDD.mapPartitions { iter =>
-        val part = iter.next()._2
-        Iterator((part.srcIds ++ part.dstIds).toSet)
-      }.collect
-      assert(verts.forall(id => partitionSets.count(_.contains(id)) <= bound))
-      // This should not be true for the default hash partitioning
-      val partitionSetsUnpartitioned = graph.edges.partitionsRDD.mapPartitions { iter =>
-        val part = iter.next()._2
-        Iterator((part.srcIds ++ part.dstIds).toSet)
-      }.collect
-      assert(verts.exists(id => partitionSetsUnpartitioned.count(_.contains(id)) > bound))
-    }
-  }
-
-  test("mapVertices") {
-    withSpark { sc =>
-      val n = 5
-      val star = starGraph(sc, n)
-      // mapVertices preserving type
-      val mappedVAttrs = star.mapVertices((vid, attr) => attr + "2")
-      assert(mappedVAttrs.vertices.collect.toSet === (0 to n).map(x => (x: VertexID, "v2")).toSet)
-      // mapVertices changing type
-      val mappedVAttrs2 = star.mapVertices((vid, attr) => attr.length)
-      assert(mappedVAttrs2.vertices.collect.toSet === (0 to n).map(x => (x: VertexID, 1)).toSet)
-    }
-  }
-
-  test("mapEdges") {
-    withSpark { sc =>
-      val n = 3
-      val star = starGraph(sc, n)
-      val starWithEdgeAttrs = star.mapEdges(e => e.dstId)
-
-      val edges = starWithEdgeAttrs.edges.collect()
-      assert(edges.size === n)
-      assert(edges.toSet === (1 to n).map(x => Edge(0, x, x)).toSet)
-    }
-  }
-
-  test("mapTriplets") {
-    withSpark { sc =>
-      val n = 5
-      val star = starGraph(sc, n)
-      assert(star.mapTriplets(et => et.srcAttr + et.dstAttr).edges.collect.toSet ===
-        (1L to n).map(x => Edge(0, x, "vv")).toSet)
-    }
-  }
-
-  test("reverse") {
-    withSpark { sc =>
-      val n = 5
-      val star = starGraph(sc, n)
-      assert(star.reverse.outDegrees.collect.toSet === (1 to n).map(x => (x: VertexID, 1)).toSet)
-    }
-  }
-
-  test("subgraph") {
-    withSpark { sc =>
-      // Create a star graph of 10 veritces.
-      val n = 10
-      val star = starGraph(sc, n)
-      // Take only vertices whose vids are even
-      val subgraph = star.subgraph(vpred = (vid, attr) => vid % 2 == 0)
-
-      // We should have 5 vertices.
-      assert(subgraph.vertices.collect().toSet === (0 to n by 2).map(x => (x, "v")).toSet)
-
-      // And 4 edges.
-      assert(subgraph.edges.map(_.copy()).collect().toSet === (2 to n by 2).map(x => Edge(0, x, 1)).toSet)
-    }
-  }
-
-  test("mask") {
-    withSpark { sc =>
-      val n = 5
-      val vertices = sc.parallelize((0 to n).map(x => (x:VertexID, x)))
-      val edges = sc.parallelize((1 to n).map(x => Edge(0, x, x)))
-      val graph: Graph[Int, Int] = Graph(vertices, edges)
-
-      val subgraph = graph.subgraph(
-        e => e.dstId != 4L,
-        (vid, vdata) => vid != 3L
-      ).mapVertices((vid, vdata) => -1).mapEdges(e => -1)
-
-      val projectedGraph = graph.mask(subgraph)
-
-      val v = projectedGraph.vertices.collect().toSet
-      assert(v === Set((0,0), (1,1), (2,2), (4,4), (5,5)))
-
-      // the map is necessary because of object-reuse in the edge iterator
-      val e = projectedGraph.edges.map(e => Edge(e.srcId, e.dstId, e.attr)).collect().toSet
-      assert(e === Set(Edge(0,1,1), Edge(0,2,2), Edge(0,5,5)))
-
-    }
-  }
-
-  test("groupEdges") {
-    withSpark { sc =>
-      val n = 5
-      val star = starGraph(sc, n)
-      val doubleStar = Graph.fromEdgeTuples(
-        sc.parallelize((1 to n).flatMap(x =>
-          List((0: VertexID, x: VertexID), (0: VertexID, x: VertexID))), 1), "v")
-      val star2 = doubleStar.groupEdges { (a, b) => a}
-      assert(star2.edges.collect.toArray.sorted(Edge.lexicographicOrdering[Int]) ===
-        star.edges.collect.toArray.sorted(Edge.lexicographicOrdering[Int]))
-      assert(star2.vertices.collect.toSet === star.vertices.collect.toSet)
-    }
-  }
-
-  test("mapReduceTriplets") {
-    withSpark { sc =>
-      val n = 5
-      val star = starGraph(sc, n).mapVertices { (_, _) => 0 }
-      val starDeg = star.joinVertices(star.degrees){ (vid, oldV, deg) => deg }
-      val neighborDegreeSums = starDeg.mapReduceTriplets(
-        edge => Iterator((edge.srcId, edge.dstAttr), (edge.dstId, edge.srcAttr)),
-        (a: Int, b: Int) => a + b)
-      assert(neighborDegreeSums.collect().toSet === (0 to n).map(x => (x, n)).toSet)
-
-      // activeSetOpt
-      val allPairs = for (x <- 1 to n; y <- 1 to n) yield (x: VertexID, y: VertexID)
-      val complete = Graph.fromEdgeTuples(sc.parallelize(allPairs, 3), 0)
-      val vids = complete.mapVertices((vid, attr) => vid).cache()
-      val active = vids.vertices.filter { case (vid, attr) => attr % 2 == 0 }
-      val numEvenNeighbors = vids.mapReduceTriplets(et => {
-        // Map function should only run on edges with destination in the active set
-        if (et.dstId % 2 != 0) {
-          throw new Exception("map ran on edge with dst vid %d, which is odd".format(et.dstId))
-        }
-        Iterator((et.srcId, 1))
-      }, (a: Int, b: Int) => a + b, Some((active, EdgeDirection.In))).collect.toSet
-      assert(numEvenNeighbors === (1 to n).map(x => (x: VertexID, n / 2)).toSet)
-
-      // outerJoinVertices followed by mapReduceTriplets(activeSetOpt)
-      val ringEdges = sc.parallelize((0 until n).map(x => (x: VertexID, (x+1) % n: VertexID)), 3)
-      val ring = Graph.fromEdgeTuples(ringEdges, 0) .mapVertices((vid, attr) => vid).cache()
-      val changed = ring.vertices.filter { case (vid, attr) => attr % 2 == 1 }.mapValues(-_)
-      val changedGraph = ring.outerJoinVertices(changed) { (vid, old, newOpt) => newOpt.getOrElse(old) }
-      val numOddNeighbors = changedGraph.mapReduceTriplets(et => {
-        // Map function should only run on edges with source in the active set
-        if (et.srcId % 2 != 1) {
-          throw new Exception("map ran on edge with src vid %d, which is even".format(et.dstId))
-        }
-        Iterator((et.dstId, 1))
-      }, (a: Int, b: Int) => a + b, Some(changed, EdgeDirection.Out)).collect.toSet
-      assert(numOddNeighbors === (2 to n by 2).map(x => (x: VertexID, 1)).toSet)
-
-    }
-  }
-
-  test("outerJoinVertices") {
-    withSpark { sc =>
-      val n = 5
-      val reverseStar = starGraph(sc, n).reverse
-      // outerJoinVertices changing type
-      val reverseStarDegrees =
-        reverseStar.outerJoinVertices(reverseStar.outDegrees) { (vid, a, bOpt) => bOpt.getOrElse(0) }
-      val neighborDegreeSums = reverseStarDegrees.mapReduceTriplets(
-        et => Iterator((et.srcId, et.dstAttr), (et.dstId, et.srcAttr)),
-        (a: Int, b: Int) => a + b).collect.toSet
-      assert(neighborDegreeSums === Set((0: VertexID, n)) ++ (1 to n).map(x => (x: VertexID, 0)))
-      // outerJoinVertices preserving type
-      val messages = reverseStar.vertices.mapValues { (vid, attr) => vid.toString }
-      val newReverseStar =
-        reverseStar.outerJoinVertices(messages) { (vid, a, bOpt) => a + bOpt.getOrElse("") }
-      assert(newReverseStar.vertices.map(_._2).collect.toSet ===
-        (0 to n).map(x => "v%d".format(x)).toSet)
-    }
-  }
-
-}
diff --git a/graph/src/test/scala/org/apache/spark/graph/LocalSparkContext.scala b/graph/src/test/scala/org/apache/spark/graph/LocalSparkContext.scala
deleted file mode 100644
index 5c20d559aa..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/LocalSparkContext.scala
+++ /dev/null
@@ -1,28 +0,0 @@
-package org.apache.spark.graph
-
-import org.scalatest.Suite
-import org.scalatest.BeforeAndAfterEach
-
-import org.apache.spark.SparkContext
-
-
-/**
- * Provides a method to run tests against a {@link SparkContext} variable that is correctly stopped
- * after each test.
-*/
-trait LocalSparkContext {
-  System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
-  System.setProperty("spark.kryo.registrator", "org.apache.spark.graph.GraphKryoRegistrator")
-
-  /** Runs `f` on a new SparkContext and ensures that it is stopped afterwards. */
-  def withSpark[T](f: SparkContext => T) = {
-    val sc = new SparkContext("local", "test")
-    try {
-      f(sc)
-    } finally {
-      sc.stop()
-      // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
-      System.clearProperty("spark.driver.port")
-    }
-  }
-}
diff --git a/graph/src/test/scala/org/apache/spark/graph/PregelSuite.scala b/graph/src/test/scala/org/apache/spark/graph/PregelSuite.scala
deleted file mode 100644
index de7e3872ca..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/PregelSuite.scala
+++ /dev/null
@@ -1,41 +0,0 @@
-package org.apache.spark.graph
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.SparkContext
-import org.apache.spark.rdd._
-
-class PregelSuite extends FunSuite with LocalSparkContext {
-
-  test("1 iteration") {
-    withSpark { sc =>
-      val n = 5
-      val star =
-        Graph.fromEdgeTuples(sc.parallelize((1 to n).map(x => (0: VertexID, x: VertexID)), 3), "v")
-      val result = Pregel(star, 0)(
-        (vid, attr, msg) => attr,
-        et => Iterator.empty,
-        (a: Int, b: Int) => throw new Exception("mergeMsg run unexpectedly"))
-      assert(result.vertices.collect.toSet === star.vertices.collect.toSet)
-    }
-  }
-
-  test("chain propagation") {
-    withSpark { sc =>
-      val n = 5
-      val chain = Graph.fromEdgeTuples(
-        sc.parallelize((1 until n).map(x => (x: VertexID, x + 1: VertexID)), 3),
-        0).cache()
-      assert(chain.vertices.collect.toSet === (1 to n).map(x => (x: VertexID, 0)).toSet)
-      val chainWithSeed = chain.mapVertices { (vid, attr) => if (vid == 1) 1 else 0 }
-      assert(chainWithSeed.vertices.collect.toSet ===
-        Set((1: VertexID, 1)) ++ (2 to n).map(x => (x: VertexID, 0)).toSet)
-      val result = Pregel(chainWithSeed, 0)(
-        (vid, attr, msg) => math.max(msg, attr),
-        et => Iterator((et.dstId, et.srcAttr)),
-        (a: Int, b: Int) => math.max(a, b))
-      assert(result.vertices.collect.toSet ===
-        chain.vertices.mapValues { (vid, attr) => attr + 1 }.collect.toSet)
-    }
-  }
-}
diff --git a/graph/src/test/scala/org/apache/spark/graph/SerializerSuite.scala b/graph/src/test/scala/org/apache/spark/graph/SerializerSuite.scala
deleted file mode 100644
index 2864ffd1ca..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/SerializerSuite.scala
+++ /dev/null
@@ -1,183 +0,0 @@
-package org.apache.spark.graph
-
-import java.io.{EOFException, ByteArrayInputStream, ByteArrayOutputStream}
-
-import scala.util.Random
-
-import org.scalatest.FunSuite
-
-import org.apache.spark._
-import org.apache.spark.graph.impl._
-import org.apache.spark.graph.impl.MsgRDDFunctions._
-import org.apache.spark.serializer.SerializationStream
-
-
-class SerializerSuite extends FunSuite with LocalSparkContext {
-
-  test("IntVertexBroadcastMsgSerializer") {
-    val conf = new SparkConf(false)
-    val outMsg = new VertexBroadcastMsg[Int](3, 4, 5)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new IntVertexBroadcastMsgSerializer(conf).newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new IntVertexBroadcastMsgSerializer(conf).newInstance().deserializeStream(bin)
-    val inMsg1: VertexBroadcastMsg[Int] = inStrm.readObject()
-    val inMsg2: VertexBroadcastMsg[Int] = inStrm.readObject()
-    assert(outMsg.vid === inMsg1.vid)
-    assert(outMsg.vid === inMsg2.vid)
-    assert(outMsg.data === inMsg1.data)
-    assert(outMsg.data === inMsg2.data)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("LongVertexBroadcastMsgSerializer") {
-    val conf = new SparkConf(false)
-    val outMsg = new VertexBroadcastMsg[Long](3, 4, 5)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new LongVertexBroadcastMsgSerializer(conf).newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new LongVertexBroadcastMsgSerializer(conf).newInstance().deserializeStream(bin)
-    val inMsg1: VertexBroadcastMsg[Long] = inStrm.readObject()
-    val inMsg2: VertexBroadcastMsg[Long] = inStrm.readObject()
-    assert(outMsg.vid === inMsg1.vid)
-    assert(outMsg.vid === inMsg2.vid)
-    assert(outMsg.data === inMsg1.data)
-    assert(outMsg.data === inMsg2.data)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("DoubleVertexBroadcastMsgSerializer") {
-    val conf = new SparkConf(false)
-    val outMsg = new VertexBroadcastMsg[Double](3, 4, 5.0)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new DoubleVertexBroadcastMsgSerializer(conf).newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new DoubleVertexBroadcastMsgSerializer(conf).newInstance().deserializeStream(bin)
-    val inMsg1: VertexBroadcastMsg[Double] = inStrm.readObject()
-    val inMsg2: VertexBroadcastMsg[Double] = inStrm.readObject()
-    assert(outMsg.vid === inMsg1.vid)
-    assert(outMsg.vid === inMsg2.vid)
-    assert(outMsg.data === inMsg1.data)
-    assert(outMsg.data === inMsg2.data)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("IntAggMsgSerializer") {
-    val conf = new SparkConf(false)
-    val outMsg = (4: VertexID, 5)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new IntAggMsgSerializer(conf).newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new IntAggMsgSerializer(conf).newInstance().deserializeStream(bin)
-    val inMsg1: (VertexID, Int) = inStrm.readObject()
-    val inMsg2: (VertexID, Int) = inStrm.readObject()
-    assert(outMsg === inMsg1)
-    assert(outMsg === inMsg2)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("LongAggMsgSerializer") {
-    val conf = new SparkConf(false)
-    val outMsg = (4: VertexID, 1L << 32)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new LongAggMsgSerializer(conf).newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new LongAggMsgSerializer(conf).newInstance().deserializeStream(bin)
-    val inMsg1: (VertexID, Long) = inStrm.readObject()
-    val inMsg2: (VertexID, Long) = inStrm.readObject()
-    assert(outMsg === inMsg1)
-    assert(outMsg === inMsg2)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("DoubleAggMsgSerializer") {
-    val conf = new SparkConf(false)
-    val outMsg = (4: VertexID, 5.0)
-    val bout = new ByteArrayOutputStream
-    val outStrm = new DoubleAggMsgSerializer(conf).newInstance().serializeStream(bout)
-    outStrm.writeObject(outMsg)
-    outStrm.writeObject(outMsg)
-    bout.flush()
-    val bin = new ByteArrayInputStream(bout.toByteArray)
-    val inStrm = new DoubleAggMsgSerializer(conf).newInstance().deserializeStream(bin)
-    val inMsg1: (VertexID, Double) = inStrm.readObject()
-    val inMsg2: (VertexID, Double) = inStrm.readObject()
-    assert(outMsg === inMsg1)
-    assert(outMsg === inMsg2)
-
-    intercept[EOFException] {
-      inStrm.readObject()
-    }
-  }
-
-  test("TestShuffleVertexBroadcastMsg") {
-    withSpark { sc =>
-      val bmsgs = sc.parallelize(0 until 100, 10).map { pid =>
-        new VertexBroadcastMsg[Int](pid, pid, pid)
-      }
-      bmsgs.partitionBy(new HashPartitioner(3)).collect()
-    }
-  }
-
-  test("variable long encoding") {
-    def testVarLongEncoding(v: Long, optimizePositive: Boolean) {
-      val bout = new ByteArrayOutputStream
-      val stream = new ShuffleSerializationStream(bout) {
-        def writeObject[T](t: T): SerializationStream = {
-          writeVarLong(t.asInstanceOf[Long], optimizePositive = optimizePositive)
-          this
-        }
-      }
-      stream.writeObject(v)
-
-      val bin = new ByteArrayInputStream(bout.toByteArray)
-      val dstream = new ShuffleDeserializationStream(bin) {
-        def readObject[T](): T = {
-          readVarLong(optimizePositive).asInstanceOf[T]
-        }
-      }
-      val read = dstream.readObject[Long]()
-      assert(read === v)
-    }
-
-    // Test all variable encoding code path (each branch uses 7 bits, i.e. 1L << 7 difference)
-    val d = Random.nextLong() % 128
-    Seq[Long](0, 1L << 0 + d, 1L << 7 + d, 1L << 14 + d, 1L << 21 + d, 1L << 28 + d, 1L << 35 + d,
-      1L << 42 + d, 1L << 49 + d, 1L << 56 + d, 1L << 63 + d).foreach { number =>
-      testVarLongEncoding(number, optimizePositive = false)
-      testVarLongEncoding(number, optimizePositive = true)
-      testVarLongEncoding(-number, optimizePositive = false)
-      testVarLongEncoding(-number, optimizePositive = true)
-    }
-  }
-}
diff --git a/graph/src/test/scala/org/apache/spark/graph/VertexRDDSuite.scala b/graph/src/test/scala/org/apache/spark/graph/VertexRDDSuite.scala
deleted file mode 100644
index e876b8e4e8..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/VertexRDDSuite.scala
+++ /dev/null
@@ -1,85 +0,0 @@
-package org.apache.spark.graph
-
-import org.apache.spark.SparkContext
-import org.apache.spark.graph.Graph._
-import org.apache.spark.graph.impl.EdgePartition
-import org.apache.spark.rdd._
-import org.scalatest.FunSuite
-
-class VertexRDDSuite extends FunSuite with LocalSparkContext {
-
-  def vertices(sc: SparkContext, n: Int) = {
-    VertexRDD(sc.parallelize((0 to n).map(x => (x.toLong, x)), 5))
-  }
-
-  test("filter") {
-    withSpark { sc =>
-      val n = 100
-      val verts = vertices(sc, n)
-      val evens = verts.filter(q => ((q._2 % 2) == 0))
-      assert(evens.count === (0 to n).filter(_ % 2 == 0).size)
-    }
-  }
-
-  test("mapValues") {
-    withSpark { sc =>
-      val n = 100
-      val verts = vertices(sc, n)
-      val negatives = verts.mapValues(x => -x).cache() // Allow joining b with a derived RDD of b
-      assert(negatives.count === n + 1)
-    }
-  }
-
-  test("diff") {
-    withSpark { sc =>
-      val n = 100
-      val verts = vertices(sc, n)
-      val flipEvens = verts.mapValues(x => if (x % 2 == 0) -x else x)
-      // diff should keep only the changed vertices
-      assert(verts.diff(flipEvens).map(_._2).collect().toSet === (2 to n by 2).map(-_).toSet)
-      // diff should keep the vertex values from `other`
-      assert(flipEvens.diff(verts).map(_._2).collect().toSet === (2 to n by 2).toSet)
-    }
-  }
-
-  test("leftJoin") {
-    withSpark { sc =>
-      val n = 100
-      val verts = vertices(sc, n)
-      val evens = verts.filter(q => ((q._2 % 2) == 0))
-      // leftJoin with another VertexRDD
-      assert(verts.leftJoin(evens) { (id, a, bOpt) => a - bOpt.getOrElse(0) }.collect.toSet ===
-        (0 to n by 2).map(x => (x.toLong, 0)).toSet ++ (1 to n by 2).map(x => (x.toLong, x)).toSet)
-      // leftJoin with an RDD
-      val evensRDD = evens.map(identity)
-      assert(verts.leftJoin(evensRDD) { (id, a, bOpt) => a - bOpt.getOrElse(0) }.collect.toSet ===
-        (0 to n by 2).map(x => (x.toLong, 0)).toSet ++ (1 to n by 2).map(x => (x.toLong, x)).toSet)
-    }
-  }
-
-  test("innerJoin") {
-    withSpark { sc =>
-      val n = 100
-      val verts = vertices(sc, n)
-      val evens = verts.filter(q => ((q._2 % 2) == 0))
-      // innerJoin with another VertexRDD
-      assert(verts.innerJoin(evens) { (id, a, b) => a - b }.collect.toSet ===
-        (0 to n by 2).map(x => (x.toLong, 0)).toSet)
-      // innerJoin with an RDD
-      val evensRDD = evens.map(identity)
-      assert(verts.innerJoin(evensRDD) { (id, a, b) => a - b }.collect.toSet ===
-        (0 to n by 2).map(x => (x.toLong, 0)).toSet)    }
-  }
-
-  test("aggregateUsingIndex") {
-    withSpark { sc =>
-      val n = 100
-      val verts = vertices(sc, n)
-      val messageTargets = (0 to n) ++ (0 to n by 2)
-      val messages = sc.parallelize(messageTargets.map(x => (x.toLong, 1)))
-      assert(verts.aggregateUsingIndex[Int](messages, _ + _).collect.toSet ===
-        (0 to n).map(x => (x.toLong, if (x % 2 == 0) 2 else 1)).toSet)
-    }
-  }
-
-}
diff --git a/graph/src/test/scala/org/apache/spark/graph/algorithms/ConnectedComponentsSuite.scala b/graph/src/test/scala/org/apache/spark/graph/algorithms/ConnectedComponentsSuite.scala
deleted file mode 100644
index 81a1b7337f..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/algorithms/ConnectedComponentsSuite.scala
+++ /dev/null
@@ -1,83 +0,0 @@
-package org.apache.spark.graph.algorithms
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
-import org.apache.spark.graph._
-import org.apache.spark.graph.util.GraphGenerators
-import org.apache.spark.rdd._
-
-
-class ConnectedComponentsSuite extends FunSuite with LocalSparkContext {
-
-  test("Grid Connected Components") {
-    withSpark { sc =>
-      val gridGraph = GraphGenerators.gridGraph(sc, 10, 10).cache()
-      val ccGraph = ConnectedComponents.run(gridGraph).cache()
-      val maxCCid = ccGraph.vertices.map { case (vid, ccId) => ccId }.sum
-      assert(maxCCid === 0)
-    }
-  } // end of Grid connected components
-
-
-  test("Reverse Grid Connected Components") {
-    withSpark { sc =>
-      val gridGraph = GraphGenerators.gridGraph(sc, 10, 10).reverse.cache()
-      val ccGraph = ConnectedComponents.run(gridGraph).cache()
-      val maxCCid = ccGraph.vertices.map { case (vid, ccId) => ccId }.sum
-      assert(maxCCid === 0)
-    }
-  } // end of Grid connected components
-
-
-  test("Chain Connected Components") {
-    withSpark { sc =>
-      val chain1 = (0 until 9).map(x => (x, x+1) )
-      val chain2 = (10 until 20).map(x => (x, x+1) )
-      val rawEdges = sc.parallelize(chain1 ++ chain2, 3).map { case (s,d) => (s.toLong, d.toLong) }
-      val twoChains = Graph.fromEdgeTuples(rawEdges, 1.0).cache()
-      val ccGraph = ConnectedComponents.run(twoChains).cache()
-      val vertices = ccGraph.vertices.collect()
-      for ( (id, cc) <- vertices ) {
-        if(id < 10) { assert(cc === 0) }
-        else { assert(cc === 10) }
-      }
-      val ccMap = vertices.toMap
-      for (id <- 0 until 20) {
-        if (id < 10) {
-          assert(ccMap(id) === 0)
-        } else {
-          assert(ccMap(id) === 10)
-        }
-      }
-    }
-  } // end of chain connected components
-
-  test("Reverse Chain Connected Components") {
-    withSpark { sc =>
-      val chain1 = (0 until 9).map(x => (x, x+1) )
-      val chain2 = (10 until 20).map(x => (x, x+1) )
-      val rawEdges = sc.parallelize(chain1 ++ chain2, 3).map { case (s,d) => (s.toLong, d.toLong) }
-      val twoChains = Graph.fromEdgeTuples(rawEdges, true).reverse.cache()
-      val ccGraph = ConnectedComponents.run(twoChains).cache()
-      val vertices = ccGraph.vertices.collect
-      for ( (id, cc) <- vertices ) {
-        if (id < 10) {
-          assert(cc === 0)
-        } else {
-          assert(cc === 10)
-        }
-      }
-      val ccMap = vertices.toMap
-      for ( id <- 0 until 20 ) {
-        if (id < 10) {
-          assert(ccMap(id) === 0)
-        } else {
-          assert(ccMap(id) === 10)
-        }
-      }
-    }
-  } // end of reverse chain connected components
-
-}
diff --git a/graph/src/test/scala/org/apache/spark/graph/algorithms/PageRankSuite.scala b/graph/src/test/scala/org/apache/spark/graph/algorithms/PageRankSuite.scala
deleted file mode 100644
index 81d82a5a6b..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/algorithms/PageRankSuite.scala
+++ /dev/null
@@ -1,126 +0,0 @@
-package org.apache.spark.graph.algorithms
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.graph._
-import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
-import org.apache.spark.rdd._
-
-import org.apache.spark.graph.util.GraphGenerators
-
-
-object GridPageRank {
-  def apply(nRows: Int, nCols: Int, nIter: Int, resetProb: Double) = {
-    val inNbrs = Array.fill(nRows * nCols)(collection.mutable.MutableList.empty[Int])
-    val outDegree = Array.fill(nRows * nCols)(0)
-    // Convert row column address into vertex ids (row major order)
-    def sub2ind(r: Int, c: Int): Int = r * nCols + c
-    // Make the grid graph
-    for (r <- 0 until nRows; c <- 0 until nCols) {
-      val ind = sub2ind(r,c)
-      if (r+1 < nRows) {
-        outDegree(ind) += 1
-        inNbrs(sub2ind(r+1,c)) += ind
-      }
-      if (c+1 < nCols) {
-        outDegree(ind) += 1
-        inNbrs(sub2ind(r,c+1)) += ind
-      }
-    }
-    // compute the pagerank
-    var pr = Array.fill(nRows * nCols)(resetProb)
-    for (iter <- 0 until nIter) {
-      val oldPr = pr
-      pr = new Array[Double](nRows * nCols)
-      for (ind <- 0 until (nRows * nCols)) {
-        pr(ind) = resetProb + (1.0 - resetProb) *
-          inNbrs(ind).map( nbr => oldPr(nbr) / outDegree(nbr)).sum
-      }
-    }
-    (0L until (nRows * nCols)).zip(pr)
-  }
-
-}
-
-
-class PageRankSuite extends FunSuite with LocalSparkContext {
-
-  def compareRanks(a: VertexRDD[Double], b: VertexRDD[Double]): Double = {
-    a.leftJoin(b) { case (id, a, bOpt) => (a - bOpt.getOrElse(0.0)) * (a - bOpt.getOrElse(0.0)) }
-      .map { case (id, error) => error }.sum
-  }
-
-  test("Star PageRank") {
-    withSpark { sc =>
-      val nVertices = 100
-      val starGraph = GraphGenerators.starGraph(sc, nVertices).cache()
-      val resetProb = 0.15
-      val errorTol = 1.0e-5
-
-      val staticRanks1 = PageRank.run(starGraph, numIter = 1, resetProb).vertices.cache()
-      val staticRanks2 = PageRank.run(starGraph, numIter = 2, resetProb).vertices.cache()
-
-      // Static PageRank should only take 2 iterations to converge
-      val notMatching = staticRanks1.innerZipJoin(staticRanks2) { (vid, pr1, pr2) =>
-        if (pr1 != pr2) 1 else 0
-      }.map { case (vid, test) => test }.sum
-      assert(notMatching === 0)
-
-      val staticErrors = staticRanks2.map { case (vid, pr) =>
-        val correct = (vid > 0 && pr == resetProb) ||
-          (vid == 0 && math.abs(pr - (resetProb + (1.0 - resetProb) * (resetProb * (nVertices - 1)) )) < 1.0E-5)
-        if (!correct) 1 else 0
-      }
-      assert(staticErrors.sum === 0)
-
-      val dynamicRanks = PageRank.runUntillConvergence(starGraph, 0, resetProb).vertices.cache()
-      val standaloneRanks = PageRank.runStandalone(starGraph, 0, resetProb).cache()
-      assert(compareRanks(staticRanks2, dynamicRanks) < errorTol)
-      assert(compareRanks(staticRanks2, standaloneRanks) < errorTol)
-    }
-  } // end of test Star PageRank
-
-
-
-  test("Grid PageRank") {
-    withSpark { sc =>
-      val rows = 10
-      val cols = 10
-      val resetProb = 0.15
-      val tol = 0.0001
-      val numIter = 50
-      val errorTol = 1.0e-5
-      val gridGraph = GraphGenerators.gridGraph(sc, rows, cols).cache()
-
-      val staticRanks = PageRank.run(gridGraph, numIter, resetProb).vertices.cache()
-      val dynamicRanks = PageRank.runUntillConvergence(gridGraph, tol, resetProb).vertices.cache()
-      val standaloneRanks = PageRank.runStandalone(gridGraph, tol, resetProb).cache()
-      val referenceRanks = VertexRDD(sc.parallelize(GridPageRank(rows, cols, numIter, resetProb)))
-
-      assert(compareRanks(staticRanks, referenceRanks) < errorTol)
-      assert(compareRanks(dynamicRanks, referenceRanks) < errorTol)
-      assert(compareRanks(standaloneRanks, referenceRanks) < errorTol)
-    }
-  } // end of Grid PageRank
-
-
-  test("Chain PageRank") {
-    withSpark { sc =>
-      val chain1 = (0 until 9).map(x => (x, x+1) )
-      val rawEdges = sc.parallelize(chain1, 1).map { case (s,d) => (s.toLong, d.toLong) }
-      val chain = Graph.fromEdgeTuples(rawEdges, 1.0).cache()
-      val resetProb = 0.15
-      val tol = 0.0001
-      val numIter = 10
-      val errorTol = 1.0e-5
-
-      val staticRanks = PageRank.run(chain, numIter, resetProb).vertices.cache()
-      val dynamicRanks = PageRank.runUntillConvergence(chain, tol, resetProb).vertices.cache()
-      val standaloneRanks = PageRank.runStandalone(chain, tol, resetProb).cache()
-
-      assert(compareRanks(staticRanks, dynamicRanks) < errorTol)
-      assert(compareRanks(dynamicRanks, standaloneRanks) < errorTol)
-    }
-  }
-}
diff --git a/graph/src/test/scala/org/apache/spark/graph/algorithms/SVDPlusPlusSuite.scala b/graph/src/test/scala/org/apache/spark/graph/algorithms/SVDPlusPlusSuite.scala
deleted file mode 100644
index a0a6eb33e3..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/algorithms/SVDPlusPlusSuite.scala
+++ /dev/null
@@ -1,30 +0,0 @@
-package org.apache.spark.graph.algorithms
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
-import org.apache.spark.graph._
-import org.apache.spark.graph.util.GraphGenerators
-import org.apache.spark.rdd._
-
-
-class SVDPlusPlusSuite extends FunSuite with LocalSparkContext {
-
-  test("Test SVD++ with mean square error on training set") {
-    withSpark { sc =>
-      val svdppErr = 8.0
-      val edges = sc.textFile("mllib/data/als/test.data").map { line =>
-        val fields = line.split(",")
-        Edge(fields(0).toLong * 2, fields(1).toLong * 2 + 1, fields(2).toDouble)
-      }
-      val conf = new SVDPlusPlusConf(10, 2, 0.0, 5.0, 0.007, 0.007, 0.005, 0.015) // 2 iterations
-      var (graph, u) = SVDPlusPlus.run(edges, conf)
-      val err = graph.vertices.collect.map{ case (vid, vd) =>
-        if (vid % 2 == 1) vd._4 else 0.0
-      }.reduce(_ + _) / graph.triplets.collect.size
-      assert(err <= svdppErr)
-    }
-  }
-
-}
diff --git a/graph/src/test/scala/org/apache/spark/graph/algorithms/StronglyConnectedComponentsSuite.scala b/graph/src/test/scala/org/apache/spark/graph/algorithms/StronglyConnectedComponentsSuite.scala
deleted file mode 100644
index 4afb158a68..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/algorithms/StronglyConnectedComponentsSuite.scala
+++ /dev/null
@@ -1,57 +0,0 @@
-package org.apache.spark.graph.algorithms
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
-import org.apache.spark.graph._
-import org.apache.spark.graph.util.GraphGenerators
-import org.apache.spark.rdd._
-
-
-class StronglyConnectedComponentsSuite extends FunSuite with LocalSparkContext {
-
-  test("Island Strongly Connected Components") {
-    withSpark { sc =>
-      val vertices = sc.parallelize((1L to 5L).map(x => (x, -1)))
-      val edges = sc.parallelize(Seq.empty[Edge[Int]])
-      val graph = Graph(vertices, edges)
-      val sccGraph = StronglyConnectedComponents.run(graph, 5)
-      for ((id, scc) <- sccGraph.vertices.collect) {
-        assert(id == scc)
-      }
-    }
-  }
-
-  test("Cycle Strongly Connected Components") {
-    withSpark { sc =>
-      val rawEdges = sc.parallelize((0L to 6L).map(x => (x, (x + 1) % 7)))
-      val graph = Graph.fromEdgeTuples(rawEdges, -1)
-      val sccGraph = StronglyConnectedComponents.run(graph, 20)
-      for ((id, scc) <- sccGraph.vertices.collect) {
-        assert(0L == scc)
-      }
-    }
-  }
-
-  test("2 Cycle Strongly Connected Components") {
-    withSpark { sc =>
-      val edges =
-        Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
-        Array(3L -> 4L, 4L -> 5L, 5L -> 3L) ++
-        Array(6L -> 0L, 5L -> 7L)
-      val rawEdges = sc.parallelize(edges)
-      val graph = Graph.fromEdgeTuples(rawEdges, -1)
-      val sccGraph = StronglyConnectedComponents.run(graph, 20)
-      for ((id, scc) <- sccGraph.vertices.collect) {
-        if (id < 3)
-          assert(0L == scc)
-        else if (id < 6)
-          assert(3L == scc)
-        else
-          assert(id == scc)
-      }
-    }
-  }
-
-}
diff --git a/graph/src/test/scala/org/apache/spark/graph/algorithms/TriangleCountSuite.scala b/graph/src/test/scala/org/apache/spark/graph/algorithms/TriangleCountSuite.scala
deleted file mode 100644
index 274ab11f0c..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/algorithms/TriangleCountSuite.scala
+++ /dev/null
@@ -1,73 +0,0 @@
-package org.apache.spark.graph.algorithms
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
-import org.apache.spark.graph._
-import org.apache.spark.graph.util.GraphGenerators
-import org.apache.spark.rdd._
-
-
-class TriangleCountSuite extends FunSuite with LocalSparkContext {
-
-  test("Count a single triangle") {
-    withSpark { sc =>
-      val rawEdges = sc.parallelize(Array( 0L->1L, 1L->2L, 2L->0L ), 2)
-      val graph = Graph.fromEdgeTuples(rawEdges, true).cache()
-      val triangleCount = TriangleCount.run(graph)
-      val verts = triangleCount.vertices
-      verts.collect.foreach { case (vid, count) => assert(count === 1) }
-    }
-  }
-
-  test("Count two triangles") {
-    withSpark { sc =>
-      val triangles = Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
-        Array(0L -> -1L, -1L -> -2L, -2L -> 0L)
-      val rawEdges = sc.parallelize(triangles, 2)
-      val graph = Graph.fromEdgeTuples(rawEdges, true).cache()
-      val triangleCount = TriangleCount.run(graph)
-      val verts = triangleCount.vertices
-      verts.collect().foreach { case (vid, count) =>
-        if (vid == 0) {
-          assert(count === 2)
-        } else {
-          assert(count === 1)
-        }
-      }
-    }
-  }
-
-  test("Count two triangles with bi-directed edges") {
-    withSpark { sc =>
-      val triangles =
-        Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
-        Array(0L -> -1L, -1L -> -2L, -2L -> 0L)
-      val revTriangles = triangles.map { case (a,b) => (b,a) }
-      val rawEdges = sc.parallelize(triangles ++ revTriangles, 2)
-      val graph = Graph.fromEdgeTuples(rawEdges, true).cache()
-      val triangleCount = TriangleCount.run(graph)
-      val verts = triangleCount.vertices
-      verts.collect().foreach { case (vid, count) =>
-        if (vid == 0) {
-          assert(count === 4)
-        } else {
-          assert(count === 2)
-        }
-      }
-    }
-  }
-
-  test("Count a single triangle with duplicate edges") {
-    withSpark { sc =>
-      val rawEdges = sc.parallelize(Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
-        Array(0L -> 1L, 1L -> 2L, 2L -> 0L), 2)
-      val graph = Graph.fromEdgeTuples(rawEdges, true, uniqueEdges = Some(RandomVertexCut)).cache()
-      val triangleCount = TriangleCount.run(graph)
-      val verts = triangleCount.vertices
-      verts.collect.foreach { case (vid, count) => assert(count === 1) }
-    }
-  }
-
-}
diff --git a/graph/src/test/scala/org/apache/spark/graph/impl/EdgePartitionSuite.scala b/graph/src/test/scala/org/apache/spark/graph/impl/EdgePartitionSuite.scala
deleted file mode 100644
index fd0beee2f6..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/impl/EdgePartitionSuite.scala
+++ /dev/null
@@ -1,76 +0,0 @@
-package org.apache.spark.graph.impl
-
-import scala.reflect.ClassTag
-import scala.util.Random
-
-import org.scalatest.FunSuite
-
-import org.apache.spark.graph._
-
-class EdgePartitionSuite extends FunSuite {
-
-  test("reverse") {
-    val edges = List(Edge(0, 1, 0), Edge(1, 2, 0), Edge(2, 0, 0))
-    val reversedEdges = List(Edge(0, 2, 0), Edge(1, 0, 0), Edge(2, 1, 0))
-    val builder = new EdgePartitionBuilder[Int]
-    for (e <- edges) {
-      builder.add(e.srcId, e.dstId, e.attr)
-    }
-    val edgePartition = builder.toEdgePartition
-    assert(edgePartition.reverse.iterator.map(_.copy()).toList === reversedEdges)
-    assert(edgePartition.reverse.reverse.iterator.map(_.copy()).toList === edges)
-  }
-
-  test("map") {
-    val edges = List(Edge(0, 1, 0), Edge(1, 2, 0), Edge(2, 0, 0))
-    val builder = new EdgePartitionBuilder[Int]
-    for (e <- edges) {
-      builder.add(e.srcId, e.dstId, e.attr)
-    }
-    val edgePartition = builder.toEdgePartition
-    assert(edgePartition.map(e => e.srcId + e.dstId).iterator.map(_.copy()).toList ===
-      edges.map(e => e.copy(attr = e.srcId + e.dstId)))
-  }
-
-  test("groupEdges") {
-    val edges = List(
-      Edge(0, 1, 1), Edge(1, 2, 2), Edge(2, 0, 4), Edge(0, 1, 8), Edge(1, 2, 16), Edge(2, 0, 32))
-    val groupedEdges = List(Edge(0, 1, 9), Edge(1, 2, 18), Edge(2, 0, 36))
-    val builder = new EdgePartitionBuilder[Int]
-    for (e <- edges) {
-      builder.add(e.srcId, e.dstId, e.attr)
-    }
-    val edgePartition = builder.toEdgePartition
-    assert(edgePartition.groupEdges(_ + _).iterator.map(_.copy()).toList === groupedEdges)
-  }
-
-  test("indexIterator") {
-    val edgesFrom0 = List(Edge(0, 1, 0))
-    val edgesFrom1 = List(Edge(1, 0, 0), Edge(1, 2, 0))
-    val sortedEdges = edgesFrom0 ++ edgesFrom1
-    val builder = new EdgePartitionBuilder[Int]
-    for (e <- Random.shuffle(sortedEdges)) {
-      builder.add(e.srcId, e.dstId, e.attr)
-    }
-
-    val edgePartition = builder.toEdgePartition
-    assert(edgePartition.iterator.map(_.copy()).toList === sortedEdges)
-    assert(edgePartition.indexIterator(_ == 0).map(_.copy()).toList === edgesFrom0)
-    assert(edgePartition.indexIterator(_ == 1).map(_.copy()).toList === edgesFrom1)
-  }
-
-  test("innerJoin") {
-    def makeEdgePartition[A: ClassTag](xs: Iterable[(Int, Int, A)]): EdgePartition[A] = {
-      val builder = new EdgePartitionBuilder[A]
-      for ((src, dst, attr) <- xs) { builder.add(src: VertexID, dst: VertexID, attr) }
-      builder.toEdgePartition
-    }
-    val aList = List((0, 1, 0), (1, 0, 0), (1, 2, 0), (5, 4, 0), (5, 5, 0))
-    val bList = List((0, 1, 0), (1, 0, 0), (1, 1, 0), (3, 4, 0), (5, 5, 0))
-    val a = makeEdgePartition(aList)
-    val b = makeEdgePartition(bList)
-
-    assert(a.innerJoin(b) { (src, dst, a, b) => a }.iterator.map(_.copy()).toList ===
-      List(Edge(0, 1, 0), Edge(1, 0, 0), Edge(5, 5, 0)))
-  }
-}
diff --git a/graph/src/test/scala/org/apache/spark/graph/impl/VertexPartitionSuite.scala b/graph/src/test/scala/org/apache/spark/graph/impl/VertexPartitionSuite.scala
deleted file mode 100644
index 72579a48c2..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/impl/VertexPartitionSuite.scala
+++ /dev/null
@@ -1,113 +0,0 @@
-package org.apache.spark.graph.impl
-
-import org.apache.spark.graph._
-import org.scalatest.FunSuite
-
-class VertexPartitionSuite extends FunSuite {
-
-  test("isDefined, filter") {
-    val vp = VertexPartition(Iterator((0L, 1), (1L, 1))).filter { (vid, attr) => vid == 0 }
-    assert(vp.isDefined(0))
-    assert(!vp.isDefined(1))
-    assert(!vp.isDefined(2))
-    assert(!vp.isDefined(-1))
-  }
-
-  test("isActive, numActives, replaceActives") {
-    val vp = VertexPartition(Iterator((0L, 1), (1L, 1)))
-      .filter { (vid, attr) => vid == 0 }
-      .replaceActives(Iterator(0, 2, 0))
-    assert(vp.isActive(0))
-    assert(!vp.isActive(1))
-    assert(vp.isActive(2))
-    assert(!vp.isActive(-1))
-    assert(vp.numActives == Some(2))
-  }
-
-  test("map") {
-    val vp = VertexPartition(Iterator((0L, 1), (1L, 1))).map { (vid, attr) => 2 }
-    assert(vp(0) === 2)
-  }
-
-  test("diff") {
-    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
-    val vp2 = vp.filter { (vid, attr) => vid <= 1 }
-    val vp3a = vp.map { (vid, attr) => 2 }
-    val vp3b = VertexPartition(vp3a.iterator)
-    // diff with same index
-    val diff1 = vp2.diff(vp3a)
-    assert(diff1(0) === 2)
-    assert(diff1(1) === 2)
-    assert(diff1(2) === 2)
-    assert(!diff1.isDefined(2))
-    // diff with different indexes
-    val diff2 = vp2.diff(vp3b)
-    assert(diff2(0) === 2)
-    assert(diff2(1) === 2)
-    assert(diff2(2) === 2)
-    assert(!diff2.isDefined(2))
-  }
-
-  test("leftJoin") {
-    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
-    val vp2a = vp.filter { (vid, attr) => vid <= 1 }.map { (vid, attr) => 2 }
-    val vp2b = VertexPartition(vp2a.iterator)
-    // leftJoin with same index
-    val join1 = vp.leftJoin(vp2a) { (vid, a, bOpt) => bOpt.getOrElse(a) }
-    assert(join1.iterator.toSet === Set((0L, 2), (1L, 2), (2L, 1)))
-    // leftJoin with different indexes
-    val join2 = vp.leftJoin(vp2b) { (vid, a, bOpt) => bOpt.getOrElse(a) }
-    assert(join2.iterator.toSet === Set((0L, 2), (1L, 2), (2L, 1)))
-    // leftJoin an iterator
-    val join3 = vp.leftJoin(vp2a.iterator) { (vid, a, bOpt) => bOpt.getOrElse(a) }
-    assert(join3.iterator.toSet === Set((0L, 2), (1L, 2), (2L, 1)))
-  }
-
-  test("innerJoin") {
-    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
-    val vp2a = vp.filter { (vid, attr) => vid <= 1 }.map { (vid, attr) => 2 }
-    val vp2b = VertexPartition(vp2a.iterator)
-    // innerJoin with same index
-    val join1 = vp.innerJoin(vp2a) { (vid, a, b) => b }
-    assert(join1.iterator.toSet === Set((0L, 2), (1L, 2)))
-    // innerJoin with different indexes
-    val join2 = vp.innerJoin(vp2b) { (vid, a, b) => b }
-    assert(join2.iterator.toSet === Set((0L, 2), (1L, 2)))
-    // innerJoin an iterator
-    val join3 = vp.innerJoin(vp2a.iterator) { (vid, a, b) => b }
-    assert(join3.iterator.toSet === Set((0L, 2), (1L, 2)))
-  }
-
-  test("createUsingIndex") {
-    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
-    val elems = List((0L, 2), (2L, 2), (3L, 2))
-    val vp2 = vp.createUsingIndex(elems.iterator)
-    assert(vp2.iterator.toSet === Set((0L, 2), (2L, 2)))
-    assert(vp.index === vp2.index)
-  }
-
-  test("innerJoinKeepLeft") {
-    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
-    val elems = List((0L, 2), (2L, 2), (3L, 2))
-    val vp2 = vp.innerJoinKeepLeft(elems.iterator)
-    assert(vp2.iterator.toSet === Set((0L, 2), (2L, 2)))
-    assert(vp2(1) === 1)
-  }
-
-  test("aggregateUsingIndex") {
-    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
-    val messages = List((0L, "a"), (2L, "b"), (0L, "c"), (3L, "d"))
-    val vp2 = vp.aggregateUsingIndex[String](messages.iterator, _ + _)
-    assert(vp2.iterator.toSet === Set((0L, "ac"), (2L, "b")))
-  }
-
-  test("reindex") {
-    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
-    val vp2 = vp.filter { (vid, attr) => vid <= 1 }
-    val vp3 = vp2.reindex()
-    assert(vp2.iterator.toSet === vp3.iterator.toSet)
-    assert(vp2(2) === 1)
-    assert(vp3.index.getPos(2) === -1)
-  }
-
-}
diff --git a/graph/src/test/scala/org/apache/spark/graph/util/BytecodeUtilsSuite.scala b/graph/src/test/scala/org/apache/spark/graph/util/BytecodeUtilsSuite.scala
deleted file mode 100644
index d85e877ddf..0000000000
--- a/graph/src/test/scala/org/apache/spark/graph/util/BytecodeUtilsSuite.scala
+++ /dev/null
@@ -1,93 +0,0 @@
-package org.apache.spark.graph.util
-
-import org.scalatest.FunSuite
-
-
-class BytecodeUtilsSuite extends FunSuite {
-
-  import BytecodeUtilsSuite.TestClass
-
-  test("closure invokes a method") {
-    val c1 = {e: TestClass => println(e.foo); println(e.bar); println(e.baz); }
-    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
-    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
-    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
-
-    val c2 = {e: TestClass => println(e.foo); println(e.bar); }
-    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "foo"))
-    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "bar"))
-    assert(!BytecodeUtils.invokedMethod(c2, classOf[TestClass], "baz"))
-
-    val c3 = {e: TestClass => println(e.foo); }
-    assert(BytecodeUtils.invokedMethod(c3, classOf[TestClass], "foo"))
-    assert(!BytecodeUtils.invokedMethod(c3, classOf[TestClass], "bar"))
-    assert(!BytecodeUtils.invokedMethod(c3, classOf[TestClass], "baz"))
-  }
-
-  test("closure inside a closure invokes a method") {
-    val c1 = {e: TestClass => println(e.foo); println(e.bar); println(e.baz); }
-    val c2 = {e: TestClass => c1(e); println(e.foo); }
-    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "foo"))
-    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "bar"))
-    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "baz"))
-  }
-
-  test("closure inside a closure inside a closure invokes a method") {
-    val c1 = {e: TestClass => println(e.baz); }
-    val c2 = {e: TestClass => c1(e); println(e.foo); }
-    val c3 = {e: TestClass => c2(e) }
-    assert(BytecodeUtils.invokedMethod(c3, classOf[TestClass], "foo"))
-    assert(!BytecodeUtils.invokedMethod(c3, classOf[TestClass], "bar"))
-    assert(BytecodeUtils.invokedMethod(c3, classOf[TestClass], "baz"))
-  }
-
-  test("closure calling a function that invokes a method") {
-    def zoo(e: TestClass) {
-      println(e.baz)
-    }
-    val c1 = {e: TestClass => zoo(e)}
-    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
-    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
-    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
-  }
-
-  test("closure calling a function that invokes a method which uses another closure") {
-    val c2 = {e: TestClass => println(e.baz)}
-    def zoo(e: TestClass) {
-      c2(e)
-    }
-    val c1 = {e: TestClass => zoo(e)}
-    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
-    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
-    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
-  }
-
-  test("nested closure") {
-    val c2 = {e: TestClass => println(e.baz)}
-    def zoo(e: TestClass, c: TestClass => Unit) {
-      c(e)
-    }
-    val c1 = {e: TestClass => zoo(e, c2)}
-    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
-    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
-    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
-  }
-
-  // The following doesn't work yet, because the byte code doesn't contain any information
-  // about what exactly "c" is.
-//  test("invoke interface") {
-//    val c1 = {e: TestClass => c(e)}
-//    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
-//    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
-//    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
-//  }
-
-  private val c = {e: TestClass => println(e.baz)}
-}
-
-
-object BytecodeUtilsSuite {
-  class TestClass(val foo: Int, val bar: Long) {
-    def baz: Boolean = false
-  }
-}
diff --git a/graphx/pom.xml b/graphx/pom.xml
new file mode 100644
index 0000000000..fd3dcaad7c
--- /dev/null
+++ b/graphx/pom.xml
@@ -0,0 +1,129 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent</artifactId>
+    <version>0.9.0-incubating-SNAPSHOT</version>
+    <relativePath>../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-graph_2.9.3</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Graph</name>
+  <url>http://spark-project.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_2.9.3</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-server</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.scalatest</groupId>
+      <artifactId>scalatest_${scala.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.scalatest</groupId>
+        <artifactId>scalatest-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
+
+  <profiles>
+    <profile>
+      <id>hadoop1</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.spark-project</groupId>
+          <artifactId>spark-core</artifactId>
+          <version>${project.version}</version>
+          <classifier>hadoop1</classifier>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-core</artifactId>
+          <scope>provided</scope>
+        </dependency>
+      </dependencies>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-jar-plugin</artifactId>
+            <configuration>
+              <classifier>hadoop1</classifier>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+    <profile>
+      <id>hadoop2</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.spark-project</groupId>
+          <artifactId>spark-core</artifactId>
+          <version>${project.version}</version>
+          <classifier>hadoop2</classifier>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-core</artifactId>
+          <scope>provided</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-client</artifactId>
+          <scope>provided</scope>
+        </dependency>
+      </dependencies>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-jar-plugin</artifactId>
+            <configuration>
+              <classifier>hadoop2</classifier>
+            </configuration>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
+</project>
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Analytics.scala b/graphx/src/main/scala/org/apache/spark/graphx/Analytics.scala
new file mode 100644
index 0000000000..0cafc3fdf9
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Analytics.scala
@@ -0,0 +1,593 @@
+package org.apache.spark.graphx
+
+import org.apache.spark._
+import org.apache.spark.graphx.algorithms._
+
+
+/**
+ * The Analytics object contains a collection of basic graph analytics
+ * algorithms that operate largely on the graph structure.
+ *
+ * In addition the Analytics object contains a driver `main` which can
+ * be used to apply the various functions to graphs in standard
+ * formats.
+ */
+object Analytics extends Logging {
+
+  def main(args: Array[String]) = {
+    val host = args(0)
+    val taskType = args(1)
+    val fname = args(2)
+    val options =  args.drop(3).map { arg =>
+      arg.dropWhile(_ == '-').split('=') match {
+        case Array(opt, v) => (opt -> v)
+        case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
+      }
+    }
+
+    def setLogLevels(level: org.apache.log4j.Level, loggers: TraversableOnce[String]) = {
+      loggers.map{
+        loggerName =>
+          val logger = org.apache.log4j.Logger.getLogger(loggerName)
+        val prevLevel = logger.getLevel()
+        logger.setLevel(level)
+        loggerName -> prevLevel
+      }.toMap
+    }
+
+    def pickPartitioner(v: String): PartitionStrategy = {
+      v match {
+         case "RandomVertexCut" => RandomVertexCut
+         case "EdgePartition1D" => EdgePartition1D
+         case "EdgePartition2D" => EdgePartition2D
+         case "CanonicalRandomVertexCut" => CanonicalRandomVertexCut
+         case _ => throw new IllegalArgumentException("Invalid Partition Strategy: " + v)
+       }
+    }
+//       setLogLevels(org.apache.log4j.Level.DEBUG, Seq("org.apache.spark"))
+
+     val serializer = "org.apache.spark.serializer.KryoSerializer"
+     System.setProperty("spark.serializer", serializer)
+     //System.setProperty("spark.shuffle.compress", "false")
+     System.setProperty("spark.kryo.registrator", "org.apache.spark.graphx.GraphKryoRegistrator")
+
+     taskType match {
+       case "pagerank" => {
+
+         var tol:Float = 0.001F
+         var outFname = ""
+         var numVPart = 4
+         var numEPart = 4
+         var partitionStrategy: Option[PartitionStrategy] = None
+
+         options.foreach{
+           case ("tol", v) => tol = v.toFloat
+           case ("output", v) => outFname = v
+           case ("numVPart", v) => numVPart = v.toInt
+           case ("numEPart", v) => numEPart = v.toInt
+           case ("partStrategy", v) => partitionStrategy = Some(pickPartitioner(v))
+           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+         }
+
+         println("======================================")
+         println("|             PageRank               |")
+         println("======================================")
+
+         val sc = new SparkContext(host, "PageRank(" + fname + ")")
+
+         val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
+           minEdgePartitions = numEPart).cache()
+         val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
+
+         println("GRAPHX: Number of vertices " + graph.vertices.count)
+         println("GRAPHX: Number of edges " + graph.edges.count)
+
+         //val pr = Analytics.pagerank(graph, numIter)
+         val pr = PageRank.runStandalone(graph, tol)
+
+         println("GRAPHX: Total rank: " + pr.map(_._2).reduce(_+_))
+
+         if (!outFname.isEmpty) {
+           logWarning("Saving pageranks of pages to " + outFname)
+           pr.map{case (id, r) => id + "\t" + r}.saveAsTextFile(outFname)
+         }
+
+         sc.stop()
+       }
+
+        case "cc" => {
+
+          var numIter = Int.MaxValue
+          var numVPart = 4
+          var numEPart = 4
+          var isDynamic = false
+          var partitionStrategy: Option[PartitionStrategy] = None
+
+          options.foreach{
+            case ("numIter", v) => numIter = v.toInt
+            case ("dynamic", v) => isDynamic = v.toBoolean
+            case ("numEPart", v) => numEPart = v.toInt
+            case ("numVPart", v) => numVPart = v.toInt
+            case ("partStrategy", v) => partitionStrategy = Some(pickPartitioner(v))
+            case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+          }
+
+          if(!isDynamic && numIter == Int.MaxValue) {
+            println("Set number of iterations!")
+            sys.exit(1)
+          }
+          println("======================================")
+          println("|      Connected Components          |")
+          println("--------------------------------------")
+          println(" Using parameters:")
+          println(" \tDynamic:  " + isDynamic)
+          println(" \tNumIter:  " + numIter)
+          println("======================================")
+
+          val sc = new SparkContext(host, "ConnectedComponents(" + fname + ")")
+          val unpartitionedGraph = GraphLoader.edgeListFile(sc, fname,
+            minEdgePartitions = numEPart).cache()
+          val graph = partitionStrategy.foldLeft(unpartitionedGraph)(_.partitionBy(_))
+
+          val cc = ConnectedComponents.run(graph)
+          println("Components: " + cc.vertices.map{ case (vid,data) => data}.distinct())
+          sc.stop()
+        }
+
+       case "triangles" => {
+         var numVPart = 4
+         var numEPart = 4
+         // TriangleCount requires the graph to be partitioned
+         var partitionStrategy: PartitionStrategy = RandomVertexCut
+
+         options.foreach{
+           case ("numEPart", v) => numEPart = v.toInt
+           case ("numVPart", v) => numVPart = v.toInt
+           case ("partStrategy", v) => partitionStrategy = pickPartitioner(v)
+           case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+         }
+         println("======================================")
+         println("|      Triangle Count                |")
+         println("--------------------------------------")
+         val sc = new SparkContext(host, "TriangleCount(" + fname + ")")
+         val graph = GraphLoader.edgeListFile(sc, fname, canonicalOrientation = true,
+           minEdgePartitions = numEPart).partitionBy(partitionStrategy).cache()
+         val triangles = TriangleCount.run(graph)
+         println("Triangles: " + triangles.vertices.map {
+            case (vid,data) => data.toLong
+          }.reduce(_+_) / 3)
+         sc.stop()
+       }
+
+//
+//        case "shortestpath" => {
+//
+//           var numIter = Int.MaxValue
+//           var isDynamic = true
+//           var sources: List[Int] = List.empty
+//
+//           options.foreach{
+//             case ("numIter", v) => numIter = v.toInt
+//             case ("dynamic", v) => isDynamic = v.toBoolean
+//             case ("source", v) => sources ++= List(v.toInt)
+//             case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+//           }
+//
+//
+//           if(!isDynamic && numIter == Int.MaxValue) {
+//             println("Set number of iterations!")
+//             sys.exit(1)
+//           }
+//
+//           if(sources.isEmpty) {
+//             println("No sources provided!")
+//             sys.exit(1)
+//           }
+//
+//           println("======================================")
+//           println("|          Shortest Path             |")
+//           println("--------------------------------------")
+//           println(" Using parameters:")
+//           println(" \tDynamic:  " + isDynamic)
+//           println(" \tNumIter:  " + numIter)
+//           println(" \tSources:  [" + sources.mkString(", ") + "]")
+//           println("======================================")
+//
+//           val sc = new SparkContext(host, "ShortestPath(" + fname + ")")
+//           val graph = GraphLoader.textFile(sc, fname, a => (if(a.isEmpty) 1.0F else a(0).toFloat ) )
+//           //val sp = Analytics.shortestPath(graph, sources, numIter)
+//           // val cc = if(isDynamic) Analytics.dynamicShortestPath(graph, sources, numIter)
+//           //   else  Analytics.shortestPath(graph, sources, numIter)
+//           println("Longest Path: " + sp.vertices.map(_.data).reduce(math.max(_,_)))
+//
+//           sc.stop()
+//         }
+
+
+      //  case "als" => {
+
+      //    var numIter = 5
+      //    var lambda = 0.01
+      //    var latentK = 10
+      //    var usersFname = "usersFactors.tsv"
+      //    var moviesFname = "moviesFname.tsv"
+      //    var numVPart = 4
+      //    var numEPart = 4
+
+      //    options.foreach{
+      //      case ("numIter", v) => numIter = v.toInt
+      //      case ("lambda", v) => lambda = v.toDouble
+      //      case ("latentK", v) => latentK = v.toInt
+      //      case ("usersFname", v) => usersFname = v
+      //      case ("moviesFname", v) => moviesFname = v
+      //      case ("numVPart", v) => numVPart = v.toInt
+      //      case ("numEPart", v) => numEPart = v.toInt
+      //      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+      //    }
+
+      //    println("======================================")
+      //    println("|       Alternating Least Squares    |")
+      //    println("--------------------------------------")
+      //    println(" Using parameters:")
+      //    println(" \tNumIter:     " + numIter)
+      //    println(" \tLambda:      " + lambda)
+      //    println(" \tLatentK:     " + latentK)
+      //    println(" \tusersFname:  " + usersFname)
+      //    println(" \tmoviesFname: " + moviesFname)
+      //    println("======================================")
+
+      //    val sc = new SparkContext(host, "ALS(" + fname + ")")
+      //    val graph = GraphLoader.textFile(sc, fname, a => a(0).toDouble )
+      //    graph.numVPart = numVPart
+      //    graph.numEPart = numEPart
+
+      //    val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
+      //    val minMovie = graph.edges.map(_._2).reduce(math.min(_,_))
+      //    assert(maxUser < minMovie)
+
+      //    val factors = Analytics.alternatingLeastSquares(graph, latentK, lambda, numIter).cache
+      //    factors.filter(_._1 <= maxUser).map(r => r._1 + "\t" + r._2.mkString("\t"))
+      //      .saveAsTextFile(usersFname)
+      //    factors.filter(_._1 >= minMovie).map(r => r._1 + "\t" + r._2.mkString("\t"))
+      //      .saveAsTextFile(moviesFname)
+
+      //    sc.stop()
+      //  }
+
+
+       case _ => {
+         println("Invalid task type.")
+       }
+     }
+   }
+
+  // /**
+  //  * Compute the shortest path to a set of markers
+  //  */
+  // def shortestPath[VD: Manifest](graph: Graph[VD, Double], sources: List[Int], numIter: Int) = {
+  //   val sourceSet = sources.toSet
+  //   val spGraph = graph.mapVertices {
+  //     case Vertex(vid, _) => Vertex(vid, (if(sourceSet.contains(vid)) 0.0 else Double.MaxValue))
+  //   }
+  //   GraphLab.iterateGA[Double, Double, Double](spGraph)(
+  //     (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
+  //     (a: Double, b: Double) => math.min(a, b), // merge
+  //     (v, a: Option[Double]) => math.min(v.data, a.getOrElse(Double.MaxValue)), // apply
+  //     numIter,
+  //     gatherDirection = EdgeDirection.In)
+  // }
+
+  // /**
+  //  * Compute the connected component membership of each vertex
+  //  * and return an RDD with the vertex value containing the
+  //  * lowest vertex id in the connected component containing
+  //  * that vertex.
+  //  */
+  // def dynamicConnectedComponents[VD: Manifest, ED: Manifest](graph: Graph[VD, ED],
+  //   numIter: Int = Int.MaxValue) = {
+
+  //   val vertices = graph.vertices.mapPartitions(iter => iter.map { case (vid, _) => (vid, vid) })
+  //   val edges = graph.edges // .mapValues(v => None)
+  //   val ccGraph = new Graph(vertices, edges)
+
+  //   ccGraph.iterateDynamic(
+  //     (me_id, edge) => edge.otherVertex(me_id).data, // gather
+  //     (a: Int, b: Int) => math.min(a, b), // merge
+  //     Integer.MAX_VALUE,
+  //     (v, a: Int) => math.min(v.data, a), // apply
+  //     (me_id, edge) => edge.otherVertex(me_id).data > edge.vertex(me_id).data, // scatter
+  //     numIter,
+  //     gatherEdges = EdgeDirection.Both,
+  //     scatterEdges = EdgeDirection.Both).vertices
+  //   //
+  //   //    graph_ret.vertices.collect.foreach(println)
+  //   //    graph_ret.edges.take(10).foreach(println)
+  // }
+
+
+  // /**
+  //  * Compute the shortest path to a set of markers
+  //  */
+  //  def dynamicShortestPath[VD: Manifest, ED: Manifest](graph: Graph[VD, Double],
+  //   sources: List[Int], numIter: Int) = {
+  //   val sourceSet = sources.toSet
+  //   val vertices = graph.vertices.mapPartitions(
+  //     iter => iter.map {
+  //       case (vid, _) => (vid, (if(sourceSet.contains(vid)) 0.0F else Double.MaxValue) )
+  //       });
+
+  //   val edges = graph.edges // .mapValues(v => None)
+  //   val spGraph = new Graph(vertices, edges)
+
+  //   val niterations = Int.MaxValue
+  //   spGraph.iterateDynamic(
+  //     (me_id, edge) => edge.otherVertex(me_id).data + edge.data, // gather
+  //     (a: Double, b: Double) => math.min(a, b), // merge
+  //     Double.MaxValue,
+  //     (v, a: Double) => math.min(v.data, a), // apply
+  //     (me_id, edge) => edge.vertex(me_id).data + edge.data < edge.otherVertex(me_id).data, // scatter
+  //     numIter,
+  //     gatherEdges = EdgeDirection.In,
+  //     scatterEdges = EdgeDirection.Out).vertices
+  // }
+
+
+  // /**
+  //  *
+  //  */
+  // def alternatingLeastSquares[VD: ClassTag, ED: ClassTag](graph: Graph[VD, Double],
+  //   latentK: Int, lambda: Double, numIter: Int) = {
+  //   val vertices = graph.vertices.mapPartitions( _.map {
+  //       case (vid, _) => (vid,  Array.fill(latentK){ scala.util.Random.nextDouble() } )
+  //       }).cache
+  //   val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
+  //   val edges = graph.edges // .mapValues(v => None)
+  //   val alsGraph = new Graph(vertices, edges)
+  //   alsGraph.numVPart = graph.numVPart
+  //   alsGraph.numEPart = graph.numEPart
+
+  //   val niterations = Int.MaxValue
+  //   alsGraph.iterateDynamic[(Array[Double], Array[Double])](
+  //     (me_id, edge) => { // gather
+  //       val X = edge.otherVertex(me_id).data
+  //       val y = edge.data
+  //       val Xy = X.map(_ * y)
+  //       val XtX = (for(i <- 0 until latentK; j <- i until latentK) yield(X(i) * X(j))).toArray
+  //       (Xy, XtX)
+  //     },
+  //     (a, b) => {
+  //     // The difference between the while loop and the zip is a FACTOR OF TWO in overall
+  //     //  runtime
+  //       var i = 0
+  //       while(i < a._1.length) { a._1(i) += b._1(i); i += 1 }
+  //       i = 0
+  //       while(i < a._2.length) { a._2(i) += b._2(i); i += 1 }
+  //       a
+  //       // (a._1.zip(b._1).map{ case (q,r) => q+r }, a._2.zip(b._2).map{ case (q,r) => q+r })
+  //     },
+  //     (Array.empty[Double], Array.empty[Double]), // default value is empty
+  //     (vertex, accum) => { // apply
+  //       val XyArray  = accum._1
+  //       val XtXArray = accum._2
+  //       if(XyArray.isEmpty) vertex.data // no neighbors
+  //       else {
+  //         val XtX = DenseMatrix.tabulate(latentK,latentK){ (i,j) =>
+  //           (if(i < j) XtXArray(i + (j+1)*j/2) else XtXArray(i + (j+1)*j/2)) +
+  //           (if(i == j) lambda else 1.0F) //regularization
+  //         }
+  //         val Xy = DenseMatrix.create(latentK,1,XyArray)
+  //         val w = XtX \ Xy
+  //         w.data
+  //       }
+  //     },
+  //     (me_id, edge) => true,
+  //     numIter,
+  //     gatherEdges = EdgeDirection.Both,
+  //     scatterEdges = EdgeDirection.Both,
+  //     vertex => vertex.id < maxUser).vertices
+  // }
+
+  // def main(args: Array[String]) = {
+  //   val host = args(0)
+  //   val taskType = args(1)
+  //   val fname = args(2)
+  //   val options =  args.drop(3).map { arg =>
+  //     arg.dropWhile(_ == '-').split('=') match {
+  //       case Array(opt, v) => (opt -> v)
+  //       case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
+  //     }
+  //   }
+
+  //   System.setProperty("spark.serializer", "spark.KryoSerializer")
+  //   //System.setProperty("spark.shuffle.compress", "false")
+  //   System.setProperty("spark.kryo.registrator", "spark.graphx.GraphKryoRegistrator")
+
+  //   taskType match {
+  //     case "pagerank" => {
+
+  //       var numIter = Int.MaxValue
+  //       var isDynamic = false
+  //       var tol:Double = 0.001
+  //       var outFname = ""
+  //       var numVPart = 4
+  //       var numEPart = 4
+
+  //       options.foreach{
+  //         case ("numIter", v) => numIter = v.toInt
+  //         case ("dynamic", v) => isDynamic = v.toBoolean
+  //         case ("tol", v) => tol = v.toDouble
+  //         case ("output", v) => outFname = v
+  //         case ("numVPart", v) => numVPart = v.toInt
+  //         case ("numEPart", v) => numEPart = v.toInt
+  //         case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+  //       }
+
+  //       if(!isDynamic && numIter == Int.MaxValue) {
+  //         println("Set number of iterations!")
+  //         sys.exit(1)
+  //       }
+  //       println("======================================")
+  //       println("|             PageRank               |")
+  //       println("--------------------------------------")
+  //       println(" Using parameters:")
+  //       println(" \tDynamic:  " + isDynamic)
+  //       if(isDynamic) println(" \t  |-> Tolerance: " + tol)
+  //       println(" \tNumIter:  " + numIter)
+  //       println("======================================")
+
+  //       val sc = new SparkContext(host, "PageRank(" + fname + ")")
+
+  //       val graph = GraphLoader.textFile(sc, fname, a => 1.0).withPartitioner(numVPart, numEPart).cache()
+
+  //       val startTime = System.currentTimeMillis
+  //       logInfo("GRAPHX: starting tasks")
+  //       logInfo("GRAPHX: Number of vertices " + graph.vertices.count)
+  //       logInfo("GRAPHX: Number of edges " + graph.edges.count)
+
+  //       val pr = Analytics.pagerank(graph, numIter)
+  //       // val pr = if(isDynamic) Analytics.dynamicPagerank(graph, tol, numIter)
+  //       //   else  Analytics.pagerank(graph, numIter)
+  //       logInfo("GRAPHX: Total rank: " + pr.vertices.map{ case Vertex(id,r) => r }.reduce(_+_) )
+  //       if (!outFname.isEmpty) {
+  //         println("Saving pageranks of pages to " + outFname)
+  //         pr.vertices.map{case Vertex(id, r) => id + "\t" + r}.saveAsTextFile(outFname)
+  //       }
+  //       logInfo("GRAPHX: Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
+  //       sc.stop()
+  //     }
+
+  //    case "cc" => {
+
+  //       var numIter = Int.MaxValue
+  //       var isDynamic = false
+
+  //       options.foreach{
+  //         case ("numIter", v) => numIter = v.toInt
+  //         case ("dynamic", v) => isDynamic = v.toBoolean
+  //         case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+  //       }
+
+  //       if(!isDynamic && numIter == Int.MaxValue) {
+  //         println("Set number of iterations!")
+  //         sys.exit(1)
+  //       }
+  //       println("======================================")
+  //       println("|      Connected Components          |")
+  //       println("--------------------------------------")
+  //       println(" Using parameters:")
+  //       println(" \tDynamic:  " + isDynamic)
+  //       println(" \tNumIter:  " + numIter)
+  //       println("======================================")
+
+  //       val sc = new SparkContext(host, "ConnectedComponents(" + fname + ")")
+  //       val graph = GraphLoader.textFile(sc, fname, a => 1.0)
+  //       val cc = Analytics.connectedComponents(graph, numIter)
+  //       // val cc = if(isDynamic) Analytics.dynamicConnectedComponents(graph, numIter)
+  //       //   else  Analytics.connectedComponents(graph, numIter)
+  //       println("Components: " + cc.vertices.map(_.data).distinct())
+
+  //       sc.stop()
+  //     }
+
+  //    case "shortestpath" => {
+
+  //       var numIter = Int.MaxValue
+  //       var isDynamic = true
+  //       var sources: List[Int] = List.empty
+
+  //       options.foreach{
+  //         case ("numIter", v) => numIter = v.toInt
+  //         case ("dynamic", v) => isDynamic = v.toBoolean
+  //         case ("source", v) => sources ++= List(v.toInt)
+  //         case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+  //       }
+
+
+  //       if(!isDynamic && numIter == Int.MaxValue) {
+  //         println("Set number of iterations!")
+  //         sys.exit(1)
+  //       }
+
+  //       if(sources.isEmpty) {
+  //         println("No sources provided!")
+  //         sys.exit(1)
+  //       }
+
+  //       println("======================================")
+  //       println("|          Shortest Path             |")
+  //       println("--------------------------------------")
+  //       println(" Using parameters:")
+  //       println(" \tDynamic:  " + isDynamic)
+  //       println(" \tNumIter:  " + numIter)
+  //       println(" \tSources:  [" + sources.mkString(", ") + "]")
+  //       println("======================================")
+
+  //       val sc = new SparkContext(host, "ShortestPath(" + fname + ")")
+  //       val graph = GraphLoader.textFile(sc, fname, a => (if(a.isEmpty) 1.0 else a(0).toDouble ) )
+  //       val sp = Analytics.shortestPath(graph, sources, numIter)
+  //       // val cc = if(isDynamic) Analytics.dynamicShortestPath(graph, sources, numIter)
+  //       //   else  Analytics.shortestPath(graph, sources, numIter)
+  //       println("Longest Path: " + sp.vertices.map(_.data).reduce(math.max(_,_)))
+
+  //       sc.stop()
+  //     }
+
+
+  //  case "als" => {
+
+  //    var numIter = 5
+  //    var lambda = 0.01
+  //    var latentK = 10
+  //    var usersFname = "usersFactors.tsv"
+  //    var moviesFname = "moviesFname.tsv"
+  //    var numVPart = 4
+  //    var numEPart = 4
+
+  //    options.foreach{
+  //      case ("numIter", v) => numIter = v.toInt
+  //      case ("lambda", v) => lambda = v.toDouble
+  //      case ("latentK", v) => latentK = v.toInt
+  //      case ("usersFname", v) => usersFname = v
+  //      case ("moviesFname", v) => moviesFname = v
+  //      case ("numVPart", v) => numVPart = v.toInt
+  //      case ("numEPart", v) => numEPart = v.toInt
+  //      case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+  //    }
+
+  //    println("======================================")
+  //    println("|       Alternating Least Squares    |")
+  //    println("--------------------------------------")
+  //    println(" Using parameters:")
+  //    println(" \tNumIter:     " + numIter)
+  //    println(" \tLambda:      " + lambda)
+  //    println(" \tLatentK:     " + latentK)
+  //    println(" \tusersFname:  " + usersFname)
+  //    println(" \tmoviesFname: " + moviesFname)
+  //    println("======================================")
+
+  //    val sc = new SparkContext(host, "ALS(" + fname + ")")
+  //    val graph = GraphLoader.textFile(sc, fname, a => a(0).toDouble )
+  //    graph.numVPart = numVPart
+  //    graph.numEPart = numEPart
+
+  //    val maxUser = graph.edges.map(_._1).reduce(math.max(_,_))
+  //    val minMovie = graph.edges.map(_._2).reduce(math.min(_,_))
+  //    assert(maxUser < minMovie)
+
+  //    val factors = Analytics.alternatingLeastSquares(graph, latentK, lambda, numIter).cache
+  //    factors.filter(_._1 <= maxUser).map(r => r._1 + "\t" + r._2.mkString("\t"))
+  //      .saveAsTextFile(usersFname)
+  //    factors.filter(_._1 >= minMovie).map(r => r._1 + "\t" + r._2.mkString("\t"))
+  //      .saveAsTextFile(moviesFname)
+
+  //    sc.stop()
+  //  }
+
+
+  //     case _ => {
+  //       println("Invalid task type.")
+  //     }
+  //   }
+  // }
+
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
new file mode 100644
index 0000000000..29b46674f1
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Edge.scala
@@ -0,0 +1,50 @@
+package org.apache.spark.graphx
+
+
+/**
+ * A single directed edge consisting of a source id, target id,
+ * and the data associated with the Edgee.
+ *
+ * @tparam ED type of the edge attribute
+ */
+case class Edge[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED] (
+  /**
+   * The vertex id of the source vertex
+   */
+  var srcId: VertexID = 0,
+  /**
+   * The vertex id of the target vertex.
+   */
+  var dstId: VertexID = 0,
+  /**
+   * The attribute associated with the edge.
+   */
+  var attr: ED = nullValue[ED]) extends Serializable {
+
+  /**
+   * Given one vertex in the edge return the other vertex.
+   *
+   * @param vid the id one of the two vertices on the edge.
+   * @return the id of the other vertex on the edge.
+   */
+  def otherVertexId(vid: VertexID): VertexID =
+    if (srcId == vid) dstId else { assert(dstId == vid); srcId }
+
+  /**
+   * Return the relative direction of the edge to the corresponding
+   * vertex.
+   *
+   * @param vid the id of one of the two vertices in the edge.
+   * @return the relative direction of the edge to the corresponding
+   * vertex.
+   */
+  def relativeDirection(vid: VertexID): EdgeDirection =
+    if (vid == srcId) EdgeDirection.Out else { assert(vid == dstId); EdgeDirection.In }
+}
+
+object Edge {
+  def lexicographicOrdering[ED] = new Ordering[Edge[ED]] {
+    override def compare(a: Edge[ED], b: Edge[ED]): Int =
+      Ordering[(VertexID, VertexID)].compare((a.srcId, a.dstId), (b.srcId, b.dstId))
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala
new file mode 100644
index 0000000000..785f941650
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala
@@ -0,0 +1,36 @@
+package org.apache.spark.graphx
+
+
+/**
+ * The direction of directed edge relative to a vertex used to select
+ * the set of adjacent neighbors when running a neighborhood query.
+ */
+sealed abstract class EdgeDirection {
+  /**
+   * Reverse the direction of an edge.  An in becomes out,
+   * out becomes in and both remains both.
+   */
+  def reverse: EdgeDirection = this match {
+    case EdgeDirection.In   => EdgeDirection.Out
+    case EdgeDirection.Out  => EdgeDirection.In
+    case EdgeDirection.Both => EdgeDirection.Both
+  }
+}
+
+
+object EdgeDirection {
+  /**
+   * Edges arriving at a vertex.
+   */
+  case object In extends EdgeDirection
+
+  /**
+   * Edges originating from a vertex
+   */
+  case object Out extends EdgeDirection
+
+  /**
+   * All edges adjacent to a vertex
+   */
+  case object Both extends EdgeDirection
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
new file mode 100644
index 0000000000..e4ef460e6f
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala
@@ -0,0 +1,73 @@
+package org.apache.spark.graphx
+
+import scala.reflect.{classTag, ClassTag}
+
+import org.apache.spark.{OneToOneDependency, Partition, Partitioner, TaskContext}
+import org.apache.spark.graphx.impl.EdgePartition
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+
+
+class EdgeRDD[@specialized ED: ClassTag](
+    val partitionsRDD: RDD[(PartitionID, EdgePartition[ED])])
+  extends RDD[Edge[ED]](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
+
+  partitionsRDD.setName("EdgeRDD")
+
+  override protected def getPartitions: Array[Partition] = partitionsRDD.partitions
+
+  /**
+   * If partitionsRDD already has a partitioner, use it. Otherwise assume that the PartitionIDs in
+   * partitionsRDD correspond to the actual partitions and create a new partitioner that allows
+   * co-partitioning with partitionsRDD.
+   */
+  override val partitioner =
+    partitionsRDD.partitioner.orElse(Some(Partitioner.defaultPartitioner(partitionsRDD)))
+
+  override def compute(part: Partition, context: TaskContext): Iterator[Edge[ED]] = {
+    firstParent[(PartitionID, EdgePartition[ED])].iterator(part, context).next._2.iterator
+  }
+
+  override def collect(): Array[Edge[ED]] = this.map(_.copy()).collect()
+
+  /**
+   * Caching a VertexRDD causes the index and values to be cached separately.
+   */
+  override def persist(newLevel: StorageLevel): EdgeRDD[ED] = {
+    partitionsRDD.persist(newLevel)
+    this
+  }
+
+  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
+  override def persist(): EdgeRDD[ED] = persist(StorageLevel.MEMORY_ONLY)
+
+  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
+  override def cache(): EdgeRDD[ED] = persist()
+
+  def mapEdgePartitions[ED2: ClassTag](f: (PartitionID, EdgePartition[ED]) => EdgePartition[ED2])
+    : EdgeRDD[ED2] = {
+//       iter => iter.map { case (pid, ep) => (pid, f(ep)) }
+    new EdgeRDD[ED2](partitionsRDD.mapPartitions({ iter =>
+      val (pid, ep) = iter.next()
+      Iterator(Tuple2(pid, f(pid, ep)))
+    }, preservesPartitioning = true))
+  }
+
+  def innerJoin[ED2: ClassTag, ED3: ClassTag]
+      (other: EdgeRDD[ED2])
+      (f: (VertexID, VertexID, ED, ED2) => ED3): EdgeRDD[ED3] = {
+    val ed2Tag = classTag[ED2]
+    val ed3Tag = classTag[ED3]
+    new EdgeRDD[ED3](partitionsRDD.zipPartitions(other.partitionsRDD, true) {
+      (thisIter, otherIter) =>
+        val (pid, thisEPart) = thisIter.next()
+        val (_, otherEPart) = otherIter.next()
+        Iterator(Tuple2(pid, thisEPart.innerJoin(otherEPart)(f)(ed2Tag, ed3Tag)))
+    })
+  }
+
+  def collectVertexIDs(): RDD[VertexID] = {
+    partitionsRDD.flatMap { case (_, p) => Array.concat(p.srcIds, p.dstIds) }
+  }
+
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
new file mode 100644
index 0000000000..b0565b7e0e
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala
@@ -0,0 +1,63 @@
+package org.apache.spark.graphx
+
+import org.apache.spark.graphx.impl.VertexPartition
+
+/**
+ * An edge triplet represents two vertices and edge along with their
+ * attributes.
+ *
+ * @tparam VD the type of the vertex attribute.
+ * @tparam ED the type of the edge attribute
+ *
+ * @todo specialize edge triplet for basic types, though when I last
+ * tried specializing I got a warning about inherenting from a type
+ * that is not a trait.
+ */
+class EdgeTriplet[VD, ED] extends Edge[ED] {
+// class EdgeTriplet[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) VD: ClassTag,
+//                   @specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED: ClassTag] extends Edge[ED] {
+
+
+  /**
+   * The source vertex attribute
+   */
+  var srcAttr: VD = _ //nullValue[VD]
+
+  /**
+   * The destination vertex attribute
+   */
+  var dstAttr: VD = _ //nullValue[VD]
+
+  var srcStale: Boolean = false
+  var dstStale: Boolean = false
+
+  /**
+   * Set the edge properties of this triplet.
+   */
+  protected[spark] def set(other: Edge[ED]): EdgeTriplet[VD,ED] = {
+    srcId = other.srcId
+    dstId = other.dstId
+    attr = other.attr
+    this
+  }
+
+  /**
+   * Given one vertex in the edge return the other vertex.
+   *
+   * @param vid the id one of the two vertices on the edge.
+   * @return the attribute for the other vertex on the edge.
+   */
+  def otherVertexAttr(vid: VertexID): VD =
+    if (srcId == vid) dstAttr else { assert(dstId == vid); srcAttr }
+
+  /**
+   * Get the vertex object for the given vertex in the edge.
+   *
+   * @param vid the id of one of the two vertices on the edge
+   * @return the attr for the vertex with that id.
+   */
+  def vertexAttr(vid: VertexID): VD =
+    if (srcId == vid) srcAttr else { assert(dstId == vid); dstAttr }
+
+  override def toString() = ((srcId, srcAttr), (dstId, dstAttr), attr).toString()
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
new file mode 100644
index 0000000000..2b7c0a2583
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -0,0 +1,437 @@
+package org.apache.spark.graphx
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.graphx.impl._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+
+
+/**
+ * The Graph abstractly represents a graph with arbitrary objects
+ * associated with vertices and edges.  The graph provides basic
+ * operations to access and manipulate the data associated with
+ * vertices and edges as well as the underlying structure.  Like Spark
+ * RDDs, the graph is a functional data-structure in which mutating
+ * operations return new graphs.
+ *
+ * @see GraphOps for additional graph member functions.
+ *
+ * @note The majority of the graph operations are implemented in
+ * `GraphOps`.  All the convenience operations are defined in the
+ * `GraphOps` class which may be shared across multiple graph
+ * implementations.
+ *
+ * @tparam VD the vertex attribute type
+ * @tparam ED the edge attribute type
+ */
+abstract class Graph[VD: ClassTag, ED: ClassTag] {
+
+  /**
+   * Get the vertices and their data.
+   *
+   * @note vertex ids are unique.
+   * @return An RDD containing the vertices in this graph
+   *
+   * @see Vertex for the vertex type.
+   *
+   */
+  val vertices: VertexRDD[VD]
+
+  /**
+   * Get the Edges and their data as an RDD.  The entries in the RDD
+   * contain just the source id and target id along with the edge
+   * data.
+   *
+   * @return An RDD containing the edges in this graph
+   *
+   * @see Edge for the edge type.
+   * @see edgesWithVertices to get an RDD which contains all the edges
+   * along with their vertex data.
+   *
+   */
+  val edges: EdgeRDD[ED]
+
+  /**
+   * Get the edges with the vertex data associated with the adjacent
+   * pair of vertices.
+   *
+   * @return An RDD containing edge triplets.
+   *
+   * @example This operation might be used to evaluate a graph
+   * coloring where we would like to check that both vertices are a
+   * different color.
+   * {{{
+   * type Color = Int
+   * val graph: Graph[Color, Int] = Graph.textFile("hdfs://file.tsv")
+   * val numInvalid = graph.edgesWithVertices()
+   *   .map(e => if (e.src.data == e.dst.data) 1 else 0).sum
+   * }}}
+   *
+   * @see edges() If only the edge data and adjacent vertex ids are
+   * required.
+   *
+   */
+  val triplets: RDD[EdgeTriplet[VD, ED]]
+
+  /**
+   * Cache the vertices and edges associated with this graph.
+   *
+   * @param newLevel the level at which to cache the graph.
+
+   * @return A reference to this graph for convenience.
+   *
+   */
+  def persist(newLevel: StorageLevel = StorageLevel.MEMORY_ONLY): Graph[VD, ED]
+
+  /**
+   * Return a graph that is cached when first created. This is used to
+   * pin a graph in memory enabling multiple queries to reuse the same
+   * construction process.
+   *
+   * @see RDD.cache() for a more detailed explanation of caching.
+   */
+  def cache(): Graph[VD, ED]
+
+  /**
+   * Repartition the edges in the graph according to partitionStrategy.
+   */
+  def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED]
+
+  /**
+   * Compute statistics describing the graph representation.
+   */
+  def statistics: Map[String, Any]
+
+  /**
+   * Construct a new graph where each vertex value has been
+   * transformed by the map function.
+   *
+   * @note This graph is not changed and that the new graph has the
+   * same structure.  As a consequence the underlying index structures
+   * can be reused.
+   *
+   * @param map the function from a vertex object to a new vertex value.
+   *
+   * @tparam VD2 the new vertex data type
+   *
+   * @example We might use this operation to change the vertex values
+   * from one type to another to initialize an algorithm.
+   * {{{
+   * val rawGraph: Graph[(), ()] = Graph.textFile("hdfs://file")
+   * val root = 42
+   * var bfsGraph = rawGraph
+   *   .mapVertices[Int]((vid, data) => if (vid == root) 0 else Math.MaxValue)
+   * }}}
+   *
+   */
+  def mapVertices[VD2: ClassTag](map: (VertexID, VD) => VD2): Graph[VD2, ED]
+
+  /**
+   * Construct a new graph where the value of each edge is
+   * transformed by the map operation.  This function is not passed
+   * the vertex value for the vertices adjacent to the edge.  If
+   * vertex values are desired use the mapTriplets function.
+   *
+   * @note This graph is not changed and that the new graph has the
+   * same structure.  As a consequence the underlying index structures
+   * can be reused.
+   *
+   * @param map the function from an edge object to a new edge value.
+   *
+   * @tparam ED2 the new edge data type
+   *
+   * @example This function might be used to initialize edge
+   * attributes.
+   *
+   */
+  def mapEdges[ED2: ClassTag](map: Edge[ED] => ED2): Graph[VD, ED2] = {
+    mapEdges((pid, iter) => iter.map(map))
+  }
+
+  /**
+   * Construct a new graph transforming the value of each edge using
+   * the user defined iterator transform.  The iterator transform is
+   * given an iterator over edge triplets within a logical partition
+   * and should yield a new iterator over the new values of each edge
+   * in the order in which they are provided to the iterator transform
+   * If adjacent vertex values are not required, consider using the
+   * mapEdges function instead.
+   *
+   * @note This that this does not change the structure of the
+   * graph or modify the values of this graph.  As a consequence
+   * the underlying index structures can be reused.
+   *
+   * @param map the function which takes a partition id and an iterator
+   * over all the edges in the partition and must return an iterator over
+   * the new values for each edge in the order of the input iterator.
+   *
+   * @tparam ED2 the new edge data type
+   *
+   */
+  def mapEdges[ED2: ClassTag](
+      map: (PartitionID, Iterator[Edge[ED]]) => Iterator[ED2]): Graph[VD, ED2]
+
+  /**
+   * Construct a new graph where the value of each edge is
+   * transformed by the map operation.  This function passes vertex
+   * values for the adjacent vertices to the map function.  If
+   * adjacent vertex values are not required, consider using the
+   * mapEdges function instead.
+   *
+   * @note This that this does not change the structure of the
+   * graph or modify the values of this graph.  As a consequence
+   * the underlying index structures can be reused.
+   *
+   * @param map the function from an edge object to a new edge value.
+   *
+   * @tparam ED2 the new edge data type
+   *
+   * @example This function might be used to initialize edge
+   * attributes based on the attributes associated with each vertex.
+   * {{{
+   * val rawGraph: Graph[Int, Int] = someLoadFunction()
+   * val graph = rawGraph.mapTriplets[Int]( edge =>
+   *   edge.src.data - edge.dst.data)
+   * }}}
+   *
+   */
+  def mapTriplets[ED2: ClassTag](map: EdgeTriplet[VD, ED] => ED2): Graph[VD, ED2] = {
+    mapTriplets((pid, iter) => iter.map(map))
+  }
+
+  /**
+   * Construct a new graph transforming the value of each edge using
+   * the user defined iterator transform.  The iterator transform is
+   * given an iterator over edge triplets within a logical partition
+   * and should yield a new iterator over the new values of each edge
+   * in the order in which they are provided to the iterator transform
+   * If adjacent vertex values are not required, consider using the
+   * mapEdges function instead.
+   *
+   * @note This that this does not change the structure of the
+   * graph or modify the values of this graph.  As a consequence
+   * the underlying index structures can be reused.
+   *
+   * @param map the function which takes a partition id and an iterator
+   * over all the edges in the partition and must return an iterator over
+   * the new values for each edge in the order of the input iterator.
+   *
+   * @tparam ED2 the new edge data type
+   *
+   */
+  def mapTriplets[ED2: ClassTag](
+      map: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2]):
+    Graph[VD, ED2]
+
+  /**
+   * Construct a new graph with all the edges reversed.  If this graph
+   * contains an edge from a to b then the returned graph contains an
+   * edge from b to a.
+   */
+  def reverse: Graph[VD, ED]
+
+  /**
+   * This function takes a vertex and edge predicate and constructs
+   * the subgraph that consists of vertices and edges that satisfy the
+   * predict.  The resulting graph contains the vertices and edges
+   * that satisfy:
+   *
+   * {{{
+   * V' = {v : for all v in V where vpred(v)}
+   * E' = {(u,v): for all (u,v) in E where epred((u,v)) && vpred(u) && vpred(v)}
+   * }}}
+   *
+   * @param epred the edge predicate which takes a triplet and
+   * evaluates to true if the edge is to remain in the subgraph.  Note
+   * that only edges in which both vertices satisfy the vertex
+   * predicate are considered.
+   *
+   * @param vpred the vertex predicate which takes a vertex object and
+   * evaluates to true if the vertex is to be included in the subgraph
+   *
+   * @return the subgraph containing only the vertices and edges that
+   * satisfy the predicates.
+   */
+  def subgraph(epred: EdgeTriplet[VD,ED] => Boolean = (x => true),
+    vpred: (VertexID, VD) => Boolean = ((v,d) => true) ): Graph[VD, ED]
+
+  /**
+   * Subgraph of this graph with only vertices and edges from the other graph.
+   * @param other the graph to project this graph onto
+   * @return a graph with vertices and edges that exists in both the current graph and other,
+   * with vertex and edge data from the current graph.
+   */
+  def mask[VD2: ClassTag, ED2: ClassTag](other: Graph[VD2, ED2]): Graph[VD, ED]
+
+  /**
+   * This function merges multiple edges between two vertices into a single Edge. For correct
+   * results, the graph must have been partitioned using partitionBy.
+   *
+   * @tparam ED2 the type of the resulting edge data after grouping.
+   *
+   * @param f the user supplied commutative associative function to merge edge attributes for
+   * duplicate edges.
+   *
+   * @return Graph[VD,ED2] The resulting graph with a single Edge for each source, dest vertex pair.
+   */
+  def groupEdges(merge: (ED, ED) => ED): Graph[VD,ED]
+
+  /**
+   * The mapReduceTriplets function is used to compute statistics
+   * about the neighboring edges and vertices of each vertex.  The
+   * user supplied `mapFunc` function is invoked on each edge of the
+   * graph generating 0 or more "messages" to be "sent" to either
+   * vertex in the edge.  The `reduceFunc` is then used to combine the
+   * output of the map phase destined to each vertex.
+   *
+   * @tparam A the type of "message" to be sent to each vertex
+   *
+   * @param mapFunc the user defined map function which returns 0 or
+   * more messages to neighboring vertices.
+   *
+   * @param reduceFunc the user defined reduce function which should
+   * be commutative and assosciative and is used to combine the output
+   * of the map phase.
+   *
+   * @param activeSet optionally, a set of "active" vertices and a direction of edges to consider
+   * when running `mapFunc`. For example, if the direction is Out, `mapFunc` will only be run on
+   * edges originating from vertices in the active set. `activeSet` must have the same index as the
+   * graph's vertices.
+   *
+   * @example We can use this function to compute the inDegree of each
+   * vertex
+   * {{{
+   * val rawGraph: Graph[(),()] = Graph.textFile("twittergraph")
+   * val inDeg: RDD[(VertexID, Int)] =
+   *   mapReduceTriplets[Int](et => Array((et.dst.id, 1)), _ + _)
+   * }}}
+   *
+   * @note By expressing computation at the edge level we achieve
+   * maximum parallelism.  This is one of the core functions in the
+   * Graph API in that enables neighborhood level computation. For
+   * example this function can be used to count neighbors satisfying a
+   * predicate or implement PageRank.
+   *
+   */
+  def mapReduceTriplets[A: ClassTag](
+      mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexID, A)],
+      reduceFunc: (A, A) => A,
+      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None)
+    : VertexRDD[A]
+
+  /**
+   * Join the vertices with an RDD and then apply a function from the
+   * the vertex and RDD entry to a new vertex value and type.  The
+   * input table should contain at most one entry for each vertex.  If
+   * no entry is provided the map function is invoked passing none.
+   *
+   * @tparam U the type of entry in the table of updates
+   * @tparam VD2 the new vertex value type
+   *
+   * @param table the table to join with the vertices in the graph.
+   * The table should contain at most one entry for each vertex.
+   *
+   * @param mapFunc the function used to compute the new vertex
+   * values.  The map function is invoked for all vertices, even those
+   * that do not have a corresponding entry in the table.
+   *
+   * @example This function is used to update the vertices with new
+   * values based on external data.  For example we could add the out
+   * degree to each vertex record
+   *
+   * {{{
+   * val rawGraph: Graph[(),()] = Graph.textFile("webgraph")
+   * val outDeg: RDD[(VertexID, Int)] = rawGraph.outDegrees()
+   * val graph = rawGraph.outerJoinVertices(outDeg) {
+   *   (vid, data, optDeg) => optDeg.getOrElse(0)
+   * }
+   * }}}
+   *
+   */
+  def outerJoinVertices[U: ClassTag, VD2: ClassTag](table: RDD[(VertexID, U)])
+      (mapFunc: (VertexID, VD, Option[U]) => VD2)
+    : Graph[VD2, ED]
+
+  // Save a copy of the GraphOps object so there is always one unique GraphOps object
+  // for a given Graph object, and thus the lazy vals in GraphOps would work as intended.
+  val ops = new GraphOps(this)
+} // end of Graph
+
+
+
+
+/**
+ * The Graph object contains a collection of routines used to construct graphs from RDDs.
+ */
+object Graph {
+
+  /**
+   * Construct a graph from a collection of edges encoded as vertex id pairs.
+   *
+   * @param rawEdges a collection of edges in (src,dst) form.
+   * @param uniqueEdges if multiple identical edges are found they are combined and the edge
+   * attribute is set to the sum.  Otherwise duplicate edges are treated as separate. To enable
+   * uniqueEdges, a [[PartitionStrategy]] must be provided.
+   *
+   * @return a graph with edge attributes containing either the count of duplicate edges or 1
+   * (if `uniqueEdges=None`) and vertex attributes containing the total degree of each vertex.
+   */
+  def fromEdgeTuples[VD: ClassTag](
+      rawEdges: RDD[(VertexID, VertexID)],
+      defaultValue: VD,
+      uniqueEdges: Option[PartitionStrategy] = None): Graph[VD, Int] = {
+    val edges = rawEdges.map(p => Edge(p._1, p._2, 1))
+    val graph = GraphImpl(edges, defaultValue)
+    uniqueEdges match {
+      case Some(p) => graph.partitionBy(p).groupEdges((a, b) => a + b)
+      case None => graph
+    }
+  }
+
+  /**
+   * Construct a graph from a collection of edges.
+   *
+   * @param edges the RDD containing the set of edges in the graph
+   * @param defaultValue the default vertex attribute to use for each vertex
+   *
+   * @return a graph with edge attributes described by `edges` and vertices
+   *         given by all vertices in `edges` with value `defaultValue`
+   */
+  def fromEdges[VD: ClassTag, ED: ClassTag](
+      edges: RDD[Edge[ED]],
+      defaultValue: VD): Graph[VD, ED] = {
+    GraphImpl(edges, defaultValue)
+  }
+
+  /**
+   * Construct a graph from a collection attributed vertices and
+   * edges.  Duplicate vertices are picked arbitrarily and
+   * vertices found in the edge collection but not in the input
+   * vertices are the default attribute.
+   *
+   * @tparam VD the vertex attribute type
+   * @tparam ED the edge attribute type
+   * @param vertices the "set" of vertices and their attributes
+   * @param edges the collection of edges in the graph
+   * @param defaultVertexAttr the default vertex attribute to use for
+   * vertices that are mentioned in edges but not in vertices
+   * @param partitionStrategy the partition strategy to use when
+   * partitioning the edges.
+   */
+  def apply[VD: ClassTag, ED: ClassTag](
+      vertices: RDD[(VertexID, VD)],
+      edges: RDD[Edge[ED]],
+      defaultVertexAttr: VD = null.asInstanceOf[VD]): Graph[VD, ED] = {
+    GraphImpl(vertices, edges, defaultVertexAttr)
+  }
+
+  /**
+   * The implicit graphToGraphOPs function extracts the GraphOps member from a graph.
+   *
+   * To improve modularity the Graph type only contains a small set of basic operations.  All the
+   * convenience operations are defined in the GraphOps class which may be shared across multiple
+   * graph implementations.
+   */
+  implicit def graphToGraphOps[VD: ClassTag, ED: ClassTag](g: Graph[VD, ED]) = g.ops
+} // end of Graph object
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
new file mode 100644
index 0000000000..f8aab951f0
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala
@@ -0,0 +1,28 @@
+package org.apache.spark.graphx
+
+import com.esotericsoftware.kryo.Kryo
+
+import org.apache.spark.graphx.impl._
+import org.apache.spark.serializer.KryoRegistrator
+import org.apache.spark.util.collection.BitSet
+import org.apache.spark.util.BoundedPriorityQueue
+
+
+class GraphKryoRegistrator extends KryoRegistrator {
+
+  def registerClasses(kryo: Kryo) {
+    kryo.register(classOf[Edge[Object]])
+    kryo.register(classOf[MessageToPartition[Object]])
+    kryo.register(classOf[VertexBroadcastMsg[Object]])
+    kryo.register(classOf[(VertexID, Object)])
+    kryo.register(classOf[EdgePartition[Object]])
+    kryo.register(classOf[BitSet])
+    kryo.register(classOf[VertexIdToIndexMap])
+    kryo.register(classOf[VertexAttributeBlock[Object]])
+    kryo.register(classOf[PartitionStrategy])
+    kryo.register(classOf[BoundedPriorityQueue[Object]])
+
+    // This avoids a large number of hash table lookups.
+    kryo.setReferences(false)
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
new file mode 100644
index 0000000000..437288405f
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala
@@ -0,0 +1,134 @@
+package org.apache.spark.graphx
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.Logging
+import scala.collection.JavaConversions._
+import org.apache.spark.rdd.RDD
+
+/**
+ * This object implements the GraphLab gather-apply-scatter api.
+ */
+object GraphLab extends Logging {
+
+  /**
+   * Execute the GraphLab Gather-Apply-Scatter API
+   *
+   * @todo finish documenting GraphLab Gather-Apply-Scatter API
+   *
+   * @param graph The graph on which to execute the GraphLab API
+   * @param gatherFunc The gather function is executed on each edge triplet
+   *                   adjacent to a vertex and returns an accumulator which
+   *                   is then merged using the merge function.
+   * @param mergeFunc An accumulative associative operation on the result of
+   *                  the gather type.
+   * @param applyFunc Takes a vertex and the final result of the merge operations
+   *                  on the adjacent edges and returns a new vertex value.
+   * @param scatterFunc Executed after the apply function the scatter function takes
+   *                    a triplet and signals whether the neighboring vertex program
+   *                    must be recomputed.
+   * @param startVertices predicate to determine which vertices to start the computation on.
+   *                      these will be the active vertices in the first iteration.
+   * @param numIter The maximum number of iterations to run.
+   * @param gatherDirection The direction of edges to consider during the gather phase
+   * @param scatterDirection The direction of edges to consider during the scatter phase
+   *
+   * @tparam VD The graph vertex attribute type
+   * @tparam ED The graph edge attribute type
+   * @tparam A The type accumulated during the gather phase
+   * @return the resulting graph after the algorithm converges
+   */
+  def apply[VD: ClassTag, ED: ClassTag, A: ClassTag]
+    (graph: Graph[VD, ED], numIter: Int,
+     gatherDirection: EdgeDirection = EdgeDirection.In,
+     scatterDirection: EdgeDirection = EdgeDirection.Out)
+    (gatherFunc: (VertexID, EdgeTriplet[VD, ED]) => A,
+     mergeFunc: (A, A) => A,
+     applyFunc: (VertexID, VD, Option[A]) => VD,
+     scatterFunc: (VertexID, EdgeTriplet[VD, ED]) => Boolean,
+     startVertices: (VertexID, VD) => Boolean = (vid: VertexID, data: VD) => true)
+    : Graph[VD, ED] = {
+
+
+    // Add an active attribute to all vertices to track convergence.
+    var activeGraph: Graph[(Boolean, VD), ED] = graph.mapVertices {
+      case (id, data) => (startVertices(id, data), data)
+    }.cache()
+
+    // The gather function wrapper strips the active attribute and
+    // only invokes the gather function on active vertices
+    def gather(vid: VertexID, e: EdgeTriplet[(Boolean, VD), ED]): Option[A] = {
+      if (e.vertexAttr(vid)._1) {
+        val edgeTriplet = new EdgeTriplet[VD,ED]
+        edgeTriplet.set(e)
+        edgeTriplet.srcAttr = e.srcAttr._2
+        edgeTriplet.dstAttr = e.dstAttr._2
+        Some(gatherFunc(vid, edgeTriplet))
+      } else {
+        None
+      }
+    }
+
+    // The apply function wrapper strips the vertex of the active attribute
+    // and only invokes the apply function on active vertices
+    def apply(vid: VertexID, data: (Boolean, VD), accum: Option[A]): (Boolean, VD) = {
+      val (active, vData) = data
+      if (active) (true, applyFunc(vid, vData, accum))
+      else (false, vData)
+    }
+
+    // The scatter function wrapper strips the vertex of the active attribute
+    // and only invokes the scatter function on active vertices
+    def scatter(rawVertexID: VertexID, e: EdgeTriplet[(Boolean, VD), ED]): Option[Boolean] = {
+      val vid = e.otherVertexId(rawVertexID)
+      if (e.vertexAttr(vid)._1) {
+        val edgeTriplet = new EdgeTriplet[VD,ED]
+        edgeTriplet.set(e)
+        edgeTriplet.srcAttr = e.srcAttr._2
+        edgeTriplet.dstAttr = e.dstAttr._2
+        Some(scatterFunc(vid, edgeTriplet))
+      } else {
+        None
+      }
+    }
+
+    // Used to set the active status of vertices for the next round
+    def applyActive(
+        vid: VertexID, data: (Boolean, VD), newActiveOpt: Option[Boolean]): (Boolean, VD) = {
+      val (prevActive, vData) = data
+      (newActiveOpt.getOrElse(false), vData)
+    }
+
+    // Main Loop ---------------------------------------------------------------------
+    var i = 0
+    var numActive = activeGraph.numVertices
+    while (i < numIter && numActive > 0) {
+
+      // Gather
+      val gathered: RDD[(VertexID, A)] =
+        activeGraph.aggregateNeighbors(gather, mergeFunc, gatherDirection)
+
+      // Apply
+      activeGraph = activeGraph.outerJoinVertices(gathered)(apply).cache()
+
+
+
+      // Scatter is basically a gather in the opposite direction so we reverse the edge direction
+      // activeGraph: Graph[(Boolean, VD), ED]
+      val scattered: RDD[(VertexID, Boolean)] =
+        activeGraph.aggregateNeighbors(scatter, _ || _, scatterDirection.reverse)
+
+      activeGraph = activeGraph.outerJoinVertices(scattered)(applyActive).cache()
+
+      // Calculate the number of active vertices
+      numActive = activeGraph.vertices.map{
+        case (vid, data) => if (data._1) 1 else 0
+        }.reduce(_ + _)
+      logInfo("Number active vertices: " + numActive)
+      i += 1
+    }
+
+    // Remove the active attribute from the vertex data before returning the graph
+    activeGraph.mapVertices{case (vid, data) => data._2 }
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
new file mode 100644
index 0000000000..473cfb18cf
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
@@ -0,0 +1,113 @@
+package org.apache.spark.graphx
+
+import java.util.{Arrays => JArrays}
+import scala.reflect.ClassTag
+
+import org.apache.spark.graphx.impl.EdgePartitionBuilder
+import org.apache.spark.{Logging, SparkContext}
+import org.apache.spark.graphx.impl.{EdgePartition, GraphImpl}
+import org.apache.spark.util.collection.PrimitiveVector
+
+
+object GraphLoader extends Logging {
+
+  /**
+   * Load an edge list from file initializing the Graph
+   *
+   * @tparam ED the type of the edge data of the resulting Graph
+   *
+   * @param sc the SparkContext used to construct RDDs
+   * @param path the path to the text file containing the edge list
+   * @param edgeParser a function that takes an array of strings and
+   * returns an ED object
+   * @param minEdgePartitions the number of partitions for the
+   * the Edge RDD
+   *
+   */
+  def textFile[ED: ClassTag](
+      sc: SparkContext,
+      path: String,
+      edgeParser: Array[String] => ED,
+      minEdgePartitions: Int = 1):
+    Graph[Int, ED] = {
+    // Parse the edge data table
+    val edges = sc.textFile(path, minEdgePartitions).mapPartitions( iter =>
+      iter.filter(line => !line.isEmpty && line(0) != '#').map { line =>
+        val lineArray = line.split("\\s+")
+        if(lineArray.length < 2) {
+          println("Invalid line: " + line)
+          assert(false)
+        }
+        val source = lineArray(0).trim.toLong
+        val target = lineArray(1).trim.toLong
+        val tail = lineArray.drop(2)
+        val edata = edgeParser(tail)
+        Edge(source, target, edata)
+      })
+    val defaultVertexAttr = 1
+    Graph.fromEdges(edges, defaultVertexAttr)
+  }
+
+  /**
+   * Load a graph from an edge list formatted file with each line containing
+   * two integers: a source Id and a target Id.
+   *
+   * @example A file in the following format:
+   * {{{
+   * # Comment Line
+   * # Source Id <\t> Target Id
+   * 1   -5
+   * 1    2
+   * 2    7
+   * 1    8
+   * }}}
+   *
+   * If desired the edges can be automatically oriented in the positive
+   * direction (source Id < target Id) by setting `canonicalOrientation` to
+   * true
+   *
+   * @param sc
+   * @param path the path to the file (e.g., /Home/data/file or hdfs://file)
+   * @param canonicalOrientation whether to orient edges in the positive
+   *        direction.
+   * @param minEdgePartitions the number of partitions for the
+   *        the Edge RDD
+   * @tparam ED
+   * @return
+   */
+  def edgeListFile(
+      sc: SparkContext,
+      path: String,
+      canonicalOrientation: Boolean = false,
+      minEdgePartitions: Int = 1):
+    Graph[Int, Int] = {
+    val startTime = System.currentTimeMillis
+
+    // Parse the edge data table directly into edge partitions
+    val edges = sc.textFile(path, minEdgePartitions).mapPartitionsWithIndex { (pid, iter) =>
+      val builder = new EdgePartitionBuilder[Int]
+      iter.foreach { line =>
+        if (!line.isEmpty && line(0) != '#') {
+          val lineArray = line.split("\\s+")
+          if (lineArray.length < 2) {
+            logWarning("Invalid line: " + line)
+          }
+          val srcId = lineArray(0).toLong
+          val dstId = lineArray(1).toLong
+          if (canonicalOrientation && dstId > srcId) {
+            builder.add(dstId, srcId, 1)
+          } else {
+            builder.add(srcId, dstId, 1)
+          }
+        }
+      }
+      Iterator((pid, builder.toEdgePartition))
+    }.cache()
+    edges.count()
+
+    logInfo("It took %d ms to load the edges".format(System.currentTimeMillis - startTime))
+
+    GraphImpl.fromEdgePartitions(edges, defaultVertexAttr = 1)
+  } // end of edgeListFile
+
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
new file mode 100644
index 0000000000..cacfcb1c90
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -0,0 +1,277 @@
+package org.apache.spark.graphx
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext._
+import org.apache.spark.SparkException
+
+
+/**
+ * `GraphOps` contains additional functionality (syntatic sugar) for
+ * the graph type and is implicitly constructed for each Graph object.
+ * All operations in `GraphOps` are expressed in terms of the
+ * efficient GraphX API.
+ *
+ * @tparam VD the vertex attribute type
+ * @tparam ED the edge attribute type
+ *
+ */
+class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) {
+
+  /**
+   * Compute the number of edges in the graph.
+   */
+  lazy val numEdges: Long = graph.edges.count()
+
+
+  /**
+   * Compute the number of vertices in the graph.
+   */
+  lazy val numVertices: Long = graph.vertices.count()
+
+
+  /**
+   * Compute the in-degree of each vertex in the Graph returning an
+   * RDD.
+   * @note Vertices with no in edges are not returned in the resulting RDD.
+   */
+  lazy val inDegrees: VertexRDD[Int] = degreesRDD(EdgeDirection.In)
+
+
+  /**
+   * Compute the out-degree of each vertex in the Graph returning an RDD.
+   * @note Vertices with no out edges are not returned in the resulting RDD.
+   */
+  lazy val outDegrees: VertexRDD[Int] = degreesRDD(EdgeDirection.Out)
+
+
+  /**
+   * Compute the degrees of each vertex in the Graph returning an RDD.
+   * @note Vertices with no edges are not returned in the resulting
+   * RDD.
+   */
+  lazy val degrees: VertexRDD[Int] = degreesRDD(EdgeDirection.Both)
+
+
+  /**
+   * Compute the neighboring vertex degrees.
+   *
+   * @param edgeDirection the direction along which to collect
+   * neighboring vertex attributes.
+   */
+  private def degreesRDD(edgeDirection: EdgeDirection): VertexRDD[Int] = {
+    if (edgeDirection == EdgeDirection.In) {
+      graph.mapReduceTriplets(et => Iterator((et.dstId,1)), _ + _)
+    } else if (edgeDirection == EdgeDirection.Out) {
+      graph.mapReduceTriplets(et => Iterator((et.srcId,1)), _ + _)
+    } else { // EdgeDirection.both
+      graph.mapReduceTriplets(et => Iterator((et.srcId,1), (et.dstId,1)), _ + _)
+    }
+  }
+
+
+  /**
+   * This function is used to compute a statistic for the neighborhood
+   * of each vertex and returns a value for all vertices (including
+   * those without neighbors).
+   *
+   * @note Because the a default value is provided all vertices will
+   * have a corresponding entry in the returned RDD.
+   *
+   * @param mapFunc the function applied to each edge adjacent to each
+   * vertex.  The mapFunc can optionally return None in which case it
+   * does not contribute to the final sum.
+   * @param reduceFunc the function used to merge the results of each
+   * map operation.
+   * @param default the default value to use for each vertex if it has
+   * no neighbors or the map function repeatedly evaluates to none
+   * @param direction the direction of edges to consider (e.g., In,
+   * Out, Both).
+   * @tparam VD2 The returned type of the aggregation operation.
+   *
+   * @return A Spark.RDD containing tuples of vertex identifiers and
+   * their resulting value.  There will be exactly one entry for ever
+   * vertex in the original graph.
+   *
+   * @example We can use this function to compute the average follower
+   * age for each user
+   *
+   * {{{
+   * val graph: Graph[Int,Int] = loadGraph()
+   * val averageFollowerAge: RDD[(Int, Int)] =
+   *   graph.aggregateNeighbors[(Int,Double)](
+   *     (vid, edge) => (edge.otherVertex(vid).data, 1),
+   *     (a, b) => (a._1 + b._1, a._2 + b._2),
+   *     -1,
+   *     EdgeDirection.In)
+   *     .mapValues{ case (sum,followers) => sum.toDouble / followers}
+   * }}}
+   *
+   * @todo Should this return a graph with the new vertex values?
+   *
+   */
+  def aggregateNeighbors[A: ClassTag](
+      mapFunc: (VertexID, EdgeTriplet[VD, ED]) => Option[A],
+      reduceFunc: (A, A) => A,
+      dir: EdgeDirection)
+    : VertexRDD[A] = {
+
+    // Define a new map function over edge triplets
+    val mf = (et: EdgeTriplet[VD,ED]) => {
+      // Compute the message to the dst vertex
+      val dst =
+        if (dir == EdgeDirection.In || dir == EdgeDirection.Both) {
+          mapFunc(et.dstId, et)
+        } else { Option.empty[A] }
+      // Compute the message to the source vertex
+      val src =
+        if (dir == EdgeDirection.Out || dir == EdgeDirection.Both) {
+          mapFunc(et.srcId, et)
+        } else { Option.empty[A] }
+      // construct the return array
+      (src, dst) match {
+        case (None, None) => Iterator.empty
+        case (Some(srcA),None) => Iterator((et.srcId, srcA))
+        case (None, Some(dstA)) => Iterator((et.dstId, dstA))
+        case (Some(srcA), Some(dstA)) => Iterator((et.srcId, srcA), (et.dstId, dstA))
+      }
+    }
+
+    graph.mapReduceTriplets(mf, reduceFunc)
+  } // end of aggregateNeighbors
+
+
+  /**
+   * Return the Ids of the neighboring vertices.
+   *
+   * @param edgeDirection the direction along which to collect
+   * neighboring vertices
+   *
+   * @return the vertex set of neighboring ids for each vertex.
+   */
+  def collectNeighborIds(edgeDirection: EdgeDirection) :
+    VertexRDD[Array[VertexID]] = {
+    val nbrs =
+      if (edgeDirection == EdgeDirection.Both) {
+        graph.mapReduceTriplets[Array[VertexID]](
+          mapFunc = et => Iterator((et.srcId, Array(et.dstId)), (et.dstId, Array(et.srcId))),
+          reduceFunc = _ ++ _
+        )
+      } else if (edgeDirection == EdgeDirection.Out) {
+        graph.mapReduceTriplets[Array[VertexID]](
+          mapFunc = et => Iterator((et.srcId, Array(et.dstId))),
+          reduceFunc = _ ++ _)
+      } else if (edgeDirection == EdgeDirection.In) {
+        graph.mapReduceTriplets[Array[VertexID]](
+          mapFunc = et => Iterator((et.dstId, Array(et.srcId))),
+          reduceFunc = _ ++ _)
+      } else {
+        throw new SparkException("It doesn't make sense to collect neighbor ids without a direction.")
+      }
+    graph.vertices.leftZipJoin(nbrs) { (vid, vdata, nbrsOpt) =>
+      nbrsOpt.getOrElse(Array.empty[VertexID])
+    }
+  } // end of collectNeighborIds
+
+
+  /**
+   * Collect the neighbor vertex attributes for each vertex.
+   *
+   * @note This function could be highly inefficient on power-law
+   * graphs where high degree vertices may force a large ammount of
+   * information to be collected to a single location.
+   *
+   * @param edgeDirection the direction along which to collect
+   * neighboring vertices
+   *
+   * @return the vertex set of neighboring vertex attributes for each
+   * vertex.
+   */
+  def collectNeighbors(edgeDirection: EdgeDirection) :
+    VertexRDD[ Array[(VertexID, VD)] ] = {
+    val nbrs = graph.aggregateNeighbors[Array[(VertexID,VD)]](
+      (vid, edge) =>
+        Some(Array( (edge.otherVertexId(vid), edge.otherVertexAttr(vid)) )),
+      (a, b) => a ++ b,
+      edgeDirection)
+
+    graph.vertices.leftZipJoin(nbrs) { (vid, vdata, nbrsOpt) =>
+      nbrsOpt.getOrElse(Array.empty[(VertexID, VD)])
+    }
+  } // end of collectNeighbor
+
+
+  /**
+   * Join the vertices with an RDD and then apply a function from the
+   * the vertex and RDD entry to a new vertex value.  The input table
+   * should contain at most one entry for each vertex.  If no entry is
+   * provided the map function is skipped and the old value is used.
+   *
+   * @tparam U the type of entry in the table of updates
+   * @param table the table to join with the vertices in the graph.
+   * The table should contain at most one entry for each vertex.
+   * @param mapFunc the function used to compute the new vertex
+   * values.  The map function is invoked only for vertices with a
+   * corresponding entry in the table otherwise the old vertex value
+   * is used.
+   *
+   * @note for small tables this function can be much more efficient
+   * than leftJoinVertices
+   *
+   * @example This function is used to update the vertices with new
+   * values based on external data.  For example we could add the out
+   * degree to each vertex record
+   *
+   * {{{
+   * val rawGraph: Graph[Int,()] = Graph.textFile("webgraph")
+   *   .mapVertices(v => 0)
+   * val outDeg: RDD[(Int, Int)] = rawGraph.outDegrees()
+   * val graph = rawGraph.leftJoinVertices[Int,Int](outDeg,
+   *   (v, deg) => deg )
+   * }}}
+   *
+   */
+  def joinVertices[U: ClassTag](table: RDD[(VertexID, U)])(mapFunc: (VertexID, VD, U) => VD)
+    : Graph[VD, ED] = {
+    val uf = (id: VertexID, data: VD, o: Option[U]) => {
+      o match {
+        case Some(u) => mapFunc(id, data, u)
+        case None => data
+      }
+    }
+    graph.outerJoinVertices(table)(uf)
+  }
+
+  /**
+   * Filter the graph by computing some values to filter on, and applying the predicates.
+   *
+   * @param preprocess a function to compute new vertex and edge data before filtering
+   * @param epred edge pred to filter on after preprocess, see more details under Graph#subgraph
+   * @param vpred vertex pred to filter on after prerocess, see more details under Graph#subgraph
+   * @tparam VD2 vertex type the vpred operates on
+   * @tparam ED2 edge type the epred operates on
+   * @return a subgraph of the orginal graph, with its data unchanged
+   *
+   * @example This function can be used to filter the graph based on some property, without
+   * changing the vertex and edge values in your program. For example, we could remove the vertices
+   * in a graph with 0 outdegree
+   *
+   * {{{
+   * graph.filter(
+   *   graph => {
+   *     val degrees: VertexSetRDD[Int] = graph.outDegrees
+   *     graph.outerJoinVertices(degrees) {(vid, data, deg) => deg.getOrElse(0)}
+   *   },
+   *   vpred = (vid: VertexID, deg:Int) => deg > 0
+   * )
+   * }}}
+   *
+   */
+  def filter[VD2: ClassTag, ED2: ClassTag](
+      preprocess: Graph[VD, ED] => Graph[VD2, ED2],
+      epred: (EdgeTriplet[VD2, ED2]) => Boolean = (x: EdgeTriplet[VD2, ED2]) => true,
+      vpred: (VertexID, VD2) => Boolean = (v:VertexID, d:VD2) => true): Graph[VD, ED] = {
+    graph.mask(preprocess(graph).subgraph(epred, vpred))
+  }
+} // end of GraphOps
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
new file mode 100644
index 0000000000..5e80a535f1
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/PartitionStrategy.scala
@@ -0,0 +1,94 @@
+package org.apache.spark.graphx
+
+
+sealed trait PartitionStrategy extends Serializable {
+  def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID
+}
+
+
+/**
+ * This function implements a classic 2D-Partitioning of a sparse matrix.
+ * Suppose we have a graph with 11 vertices that we want to partition
+ * over 9 machines.  We can use the following sparse matrix representation:
+ *
+ *       __________________________________
+ *  v0   | P0 *     | P1       | P2    *  |
+ *  v1   |  ****    |  *       |          |
+ *  v2   |  ******* |      **  |  ****    |
+ *  v3   |  *****   |  *  *    |       *  |
+ *       ----------------------------------
+ *  v4   | P3 *     | P4 ***   | P5 **  * |
+ *  v5   |  *  *    |  *       |          |
+ *  v6   |       *  |      **  |  ****    |
+ *  v7   |  * * *   |  *  *    |       *  |
+ *       ----------------------------------
+ *  v8   | P6   *   | P7    *  | P8  *   *|
+ *  v9   |     *    |  *    *  |          |
+ *  v10  |       *  |      **  |  *  *    |
+ *  v11  | * <-E    |  ***     |       ** |
+ *       ----------------------------------
+ *
+ * The edge denoted by E connects v11 with v1 and is assigned to
+ * processor P6.  To get the processor number we divide the matrix
+ * into sqrt(numProc) by sqrt(numProc) blocks.  Notice that edges
+ * adjacent to v11 can only be in the first colum of
+ * blocks (P0, P3, P6) or the last row of blocks (P6, P7, P8).
+ * As a consequence we can guarantee that v11 will need to be
+ * replicated to at most 2 * sqrt(numProc) machines.
+ *
+ * Notice that P0 has many edges and as a consequence this
+ * partitioning would lead to poor work balance.  To improve
+ * balance we first multiply each vertex id by a large prime
+ * to effectively shuffle the vertex locations.
+ *
+ * One of the limitations of this approach is that the number of
+ * machines must either be a perfect square.  We partially address
+ * this limitation by computing the machine assignment to the next
+ * largest perfect square and then mapping back down to the actual
+ * number of machines.  Unfortunately, this can also lead to work
+ * imbalance and so it is suggested that a perfect square is used.
+ *
+ *
+ */
+case object EdgePartition2D extends PartitionStrategy {
+  override def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID = {
+    val ceilSqrtNumParts: PartitionID = math.ceil(math.sqrt(numParts)).toInt
+    val mixingPrime: VertexID = 1125899906842597L
+    val col: PartitionID = ((math.abs(src) * mixingPrime) % ceilSqrtNumParts).toInt
+    val row: PartitionID = ((math.abs(dst) * mixingPrime) % ceilSqrtNumParts).toInt
+    (col * ceilSqrtNumParts + row) % numParts
+  }
+}
+
+
+case object EdgePartition1D extends PartitionStrategy {
+  override def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID = {
+    val mixingPrime: VertexID = 1125899906842597L
+    (math.abs(src) * mixingPrime).toInt % numParts
+  }
+}
+
+
+/**
+ * Assign edges to an aribtrary machine corresponding to a
+ * random vertex cut.
+ */
+case object RandomVertexCut extends PartitionStrategy {
+  override def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID = {
+    math.abs((src, dst).hashCode()) % numParts
+  }
+}
+
+
+/**
+ * Assign edges to an arbitrary machine corresponding to a random vertex cut. This
+ * function ensures that edges of opposite direction between the same two vertices
+ * will end up on the same partition.
+ */
+case object CanonicalRandomVertexCut extends PartitionStrategy {
+  override def getPartition(src: VertexID, dst: VertexID, numParts: PartitionID): PartitionID = {
+    val lower = math.min(src, dst)
+    val higher = math.max(src, dst)
+    math.abs((lower, higher).hashCode()) % numParts
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
new file mode 100644
index 0000000000..8ddb788135
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala
@@ -0,0 +1,122 @@
+package org.apache.spark.graphx
+
+import scala.reflect.ClassTag
+
+
+/**
+ * This object implements a Pregel-like bulk-synchronous
+ * message-passing API.  However, unlike the original Pregel API the
+ * GraphX pregel API factors the sendMessage computation over edges,
+ * enables the message sending computation to read both vertex
+ * attributes, and finally constrains messages to the graph structure.
+ * These changes allow for substantially more efficient distributed
+ * execution while also exposing greater flexibility for graph based
+ * computation.
+ *
+ * @example We can use the Pregel abstraction to implement PageRank
+ * {{{
+ * val pagerankGraph: Graph[Double, Double] = graph
+ *   // Associate the degree with each vertex
+ *   .outerJoinVertices(graph.outDegrees){
+ *     (vid, vdata, deg) => deg.getOrElse(0)
+ *   }
+ *   // Set the weight on the edges based on the degree
+ *   .mapTriplets( e => 1.0 / e.srcAttr )
+ *   // Set the vertex attributes to the initial pagerank values
+ *   .mapVertices( (id, attr) => 1.0 )
+ *
+ * def vertexProgram(id: VertexID, attr: Double, msgSum: Double): Double =
+ *   resetProb + (1.0 - resetProb) * msgSum
+ * def sendMessage(id: VertexID, edge: EdgeTriplet[Double, Double]): Option[Double] =
+ *   Some(edge.srcAttr * edge.attr)
+ * def messageCombiner(a: Double, b: Double): Double = a + b
+ * val initialMessage = 0.0
+ * // Execute pregel for a fixed number of iterations.
+ * Pregel(pagerankGraph, initialMessage, numIter)(
+ *   vertexProgram, sendMessage, messageCombiner)
+ * }}}
+ *
+ */
+object Pregel {
+
+  /**
+   * Execute a Pregel-like iterative vertex-parallel abstraction.  The
+   * user-defined vertex-program `vprog` is executed in parallel on
+   * each vertex receiving any inbound messages and computing a new
+   * value for the vertex.  The `sendMsg` function is then invoked on
+   * all out-edges and is used to compute an optional message to the
+   * destination vertex. The `mergeMsg` function is a commutative
+   * associative function used to combine messages destined to the
+   * same vertex.
+   *
+   * On the first iteration all vertices receive the `initialMsg` and
+   * on subsequent iterations if a vertex does not receive a message
+   * then the vertex-program is not invoked.
+   *
+   * This function iterates until there are no remaining messages, or
+   * for maxIterations iterations.
+   *
+   * @tparam VD the vertex data type
+   * @tparam ED the edge data type
+   * @tparam A the Pregel message type
+   *
+   * @param graph the input graph.
+   *
+   * @param initialMsg the message each vertex will receive at the on
+   * the first iteration.
+   *
+   * @param maxIterations the maximum number of iterations to run for.
+   *
+   * @param vprog the user-defined vertex program which runs on each
+   * vertex and receives the inbound message and computes a new vertex
+   * value.  On the first iteration the vertex program is invoked on
+   * all vertices and is passed the default message.  On subsequent
+   * iterations the vertex program is only invoked on those vertices
+   * that receive messages.
+   *
+   * @param sendMsg a user supplied function that is applied to out
+   * edges of vertices that received messages in the current
+   * iteration.
+   *
+   * @param mergeMsg a user supplied function that takes two incoming
+   * messages of type A and merges them into a single message of type
+   * A.  ''This function must be commutative and associative and
+   * ideally the size of A should not increase.''
+   *
+   * @return the resulting graph at the end of the computation
+   *
+   */
+  def apply[VD: ClassTag, ED: ClassTag, A: ClassTag]
+    (graph: Graph[VD, ED], initialMsg: A, maxIterations: Int = Int.MaxValue)(
+      vprog: (VertexID, VD, A) => VD,
+      sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexID,A)],
+      mergeMsg: (A, A) => A)
+    : Graph[VD, ED] = {
+
+    var g = graph.mapVertices( (vid, vdata) => vprog(vid, vdata, initialMsg) )
+    // compute the messages
+    var messages = g.mapReduceTriplets(sendMsg, mergeMsg).cache()
+    var activeMessages = messages.count()
+    // Loop
+    var i = 0
+    while (activeMessages > 0 && i < maxIterations) {
+      // Receive the messages. Vertices that didn't get any messages do not appear in newVerts.
+      val newVerts = g.vertices.innerJoin(messages)(vprog).cache()
+      // Update the graph with the new vertices.
+      g = g.outerJoinVertices(newVerts) { (vid, old, newOpt) => newOpt.getOrElse(old) }
+
+      val oldMessages = messages
+      // Send new messages. Vertices that didn't get any messages don't appear in newVerts, so don't
+      // get to send messages.
+      messages = g.mapReduceTriplets(sendMsg, mergeMsg, Some((newVerts, EdgeDirection.Out))).cache()
+      activeMessages = messages.count()
+      // after counting we can unpersist the old messages
+      oldMessages.unpersist(blocking=false)
+      // count the iteration
+      i += 1
+    }
+
+    g
+  } // end of apply
+
+} // end of class Pregel
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
new file mode 100644
index 0000000000..cfee9b089f
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala
@@ -0,0 +1,361 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import scala.reflect.ClassTag
+
+import org.apache.spark._
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd._
+import org.apache.spark.storage.StorageLevel
+
+import org.apache.spark.graphx.impl.MsgRDDFunctions
+import org.apache.spark.graphx.impl.VertexPartition
+
+
+/**
+ * A `VertexRDD[VD]` extends the `RDD[(VertexID, VD)]` by ensuring that there is
+ * only one entry for each vertex and by pre-indexing the entries for fast,
+ * efficient joins.
+ *
+ * @tparam VD the vertex attribute associated with each vertex in the set.
+ *
+ * To construct a `VertexRDD` use the singleton object:
+ *
+ * @example Construct a `VertexRDD` from a plain RDD
+ * {{{
+ * // Construct an intial vertex set
+ * val someData: RDD[(VertexID, SomeType)] = loadData(someFile)
+ * val vset = VertexRDD(someData)
+ * // If there were redundant values in someData we would use a reduceFunc
+ * val vset2 = VertexRDD(someData, reduceFunc)
+ * // Finally we can use the VertexRDD to index another dataset
+ * val otherData: RDD[(VertexID, OtherType)] = loadData(otherFile)
+ * val vset3 = VertexRDD(otherData, vset.index)
+ * // Now we can construct very fast joins between the two sets
+ * val vset4: VertexRDD[(SomeType, OtherType)] = vset.leftJoin(vset3)
+ * }}}
+ *
+ */
+class VertexRDD[@specialized VD: ClassTag](
+    val partitionsRDD: RDD[VertexPartition[VD]])
+  extends RDD[(VertexID, VD)](partitionsRDD.context, List(new OneToOneDependency(partitionsRDD))) {
+
+  require(partitionsRDD.partitioner.isDefined)
+
+  partitionsRDD.setName("VertexRDD")
+
+  /**
+   * Construct a new VertexRDD that is indexed by only the keys in the RDD.
+   * The resulting VertexRDD will be based on a different index and can
+   * no longer be quickly joined with this RDD.
+   */
+  def reindex(): VertexRDD[VD] = new VertexRDD(partitionsRDD.map(_.reindex()))
+
+  /**
+   * The partitioner is defined by the index.
+   */
+  override val partitioner = partitionsRDD.partitioner
+
+  /**
+   * The actual partitions are defined by the tuples.
+   */
+  override protected def getPartitions: Array[Partition] = partitionsRDD.partitions
+
+  /**
+   * The preferred locations are computed based on the preferred
+   * locations of the tuples.
+   */
+  override protected def getPreferredLocations(s: Partition): Seq[String] =
+    partitionsRDD.preferredLocations(s)
+
+  /**
+   * Caching a VertexRDD causes the index and values to be cached separately.
+   */
+  override def persist(newLevel: StorageLevel): VertexRDD[VD] = {
+    partitionsRDD.persist(newLevel)
+    this
+  }
+
+  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
+  override def persist(): VertexRDD[VD] = persist(StorageLevel.MEMORY_ONLY)
+
+  /** Persist this RDD with the default storage level (`MEMORY_ONLY`). */
+  override def cache(): VertexRDD[VD] = persist()
+
+  /** Return the number of vertices in this set. */
+  override def count(): Long = {
+    partitionsRDD.map(_.size).reduce(_ + _)
+  }
+
+  /**
+   * Provide the `RDD[(VertexID, VD)]` equivalent output.
+   */
+  override def compute(part: Partition, context: TaskContext): Iterator[(VertexID, VD)] = {
+    firstParent[VertexPartition[VD]].iterator(part, context).next.iterator
+  }
+
+  /**
+   * Return a new VertexRDD by applying a function to each VertexPartition of this RDD.
+   */
+  def mapVertexPartitions[VD2: ClassTag](f: VertexPartition[VD] => VertexPartition[VD2])
+    : VertexRDD[VD2] = {
+    val newPartitionsRDD = partitionsRDD.mapPartitions(_.map(f), preservesPartitioning = true)
+    new VertexRDD(newPartitionsRDD)
+  }
+
+
+  /**
+   * Restrict the vertex set to the set of vertices satisfying the
+   * given predicate.
+   *
+   * @param pred the user defined predicate, which takes a tuple to conform to
+   * the RDD[(VertexID, VD)] interface
+   *
+   * @note The vertex set preserves the original index structure
+   * which means that the returned RDD can be easily joined with
+   * the original vertex-set.  Furthermore, the filter only
+   * modifies the bitmap index and so no new values are allocated.
+   */
+  override def filter(pred: Tuple2[VertexID, VD] => Boolean): VertexRDD[VD] =
+    this.mapVertexPartitions(_.filter(Function.untupled(pred)))
+
+  /**
+   * Pass each vertex attribute through a map function and retain the
+   * original RDD's partitioning and index.
+   *
+   * @tparam VD2 the type returned by the map function
+   *
+   * @param f the function applied to each value in the RDD
+   * @return a new VertexRDD with values obtained by applying `f` to
+   * each of the entries in the original VertexRDD.  The resulting
+   * VertexRDD retains the same index.
+   */
+  def mapValues[VD2: ClassTag](f: VD => VD2): VertexRDD[VD2] =
+    this.mapVertexPartitions(_.map((vid, attr) => f(attr)))
+
+  /**
+   * Pass each vertex attribute through a map function and retain the
+   * original RDD's partitioning and index.
+   *
+   * @tparam VD2 the type returned by the map function
+   *
+   * @param f the function applied to each value in the RDD
+   * @return a new VertexRDD with values obtained by applying `f` to
+   * each of the entries in the original VertexRDD.  The resulting
+   * VertexRDD retains the same index.
+   */
+  def mapValues[VD2: ClassTag](f: (VertexID, VD) => VD2): VertexRDD[VD2] =
+    this.mapVertexPartitions(_.map(f))
+
+  /**
+   * Hides vertices that are the same between this and other. For vertices that are different, keeps
+   * the values from `other`.
+   */
+  def diff(other: VertexRDD[VD]): VertexRDD[VD] = {
+    val newPartitionsRDD = partitionsRDD.zipPartitions(
+      other.partitionsRDD, preservesPartitioning = true
+    ) { (thisIter, otherIter) =>
+      val thisPart = thisIter.next()
+      val otherPart = otherIter.next()
+      Iterator(thisPart.diff(otherPart))
+    }
+    new VertexRDD(newPartitionsRDD)
+  }
+
+  /**
+   * Left join this VertexSet with another VertexSet which has the
+   * same Index.  This function will fail if both VertexSets do not
+   * share the same index.  The resulting vertex set contains an entry
+   * for each vertex in this set.  If the other VertexSet is missing
+   * any vertex in this VertexSet then a `None` attribute is generated
+   *
+   * @tparam VD2 the attribute type of the other VertexSet
+   * @tparam VD3 the attribute type of the resulting VertexSet
+   *
+   * @param other the other VertexSet with which to join.
+   * @param f the function mapping a vertex id and its attributes in
+   * this and the other vertex set to a new vertex attribute.
+   * @return a VertexRDD containing all the vertices in this
+   * VertexSet with `None` attributes used for Vertices missing in the
+   * other VertexSet.
+   *
+   */
+  def leftZipJoin[VD2: ClassTag, VD3: ClassTag]
+      (other: VertexRDD[VD2])(f: (VertexID, VD, Option[VD2]) => VD3): VertexRDD[VD3] = {
+    val newPartitionsRDD = partitionsRDD.zipPartitions(
+      other.partitionsRDD, preservesPartitioning = true
+    ) { (thisIter, otherIter) =>
+      val thisPart = thisIter.next()
+      val otherPart = otherIter.next()
+      Iterator(thisPart.leftJoin(otherPart)(f))
+    }
+    new VertexRDD(newPartitionsRDD)
+  }
+
+  /**
+   * Left join this VertexRDD with an RDD containing vertex attribute
+   * pairs.  If the other RDD is backed by a VertexRDD with the same
+   * index than the efficient leftZipJoin implementation is used.  The
+   * resulting vertex set contains an entry for each vertex in this
+   * set.  If the other VertexRDD is missing any vertex in this
+   * VertexRDD then a `None` attribute is generated.
+   *
+   * If there are duplicates, the vertex is picked at random.
+   *
+   * @tparam VD2 the attribute type of the other VertexRDD
+   * @tparam VD3 the attribute type of the resulting VertexRDD
+   *
+   * @param other the other VertexRDD with which to join.
+   * @param f the function mapping a vertex id and its attributes in
+   * this and the other vertex set to a new vertex attribute.
+   * @return a VertexRDD containing all the vertices in this
+   * VertexRDD with the attribute emitted by f.
+   */
+  def leftJoin[VD2: ClassTag, VD3: ClassTag]
+      (other: RDD[(VertexID, VD2)])
+      (f: (VertexID, VD, Option[VD2]) => VD3)
+    : VertexRDD[VD3] =
+  {
+    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
+    // If the other set is a VertexRDD then we use the much more efficient leftZipJoin
+    other match {
+      case other: VertexRDD[_] =>
+        leftZipJoin(other)(f)
+      case _ =>
+        new VertexRDD[VD3](
+          partitionsRDD.zipPartitions(
+            other.partitionBy(this.partitioner.get), preservesPartitioning = true)
+          { (part, msgs) =>
+            val vertexPartition: VertexPartition[VD] = part.next()
+            Iterator(vertexPartition.leftJoin(msgs)(f))
+          }
+        )
+    }
+  }
+
+  /**
+   * Same effect as leftJoin(other) { (vid, a, bOpt) => bOpt.getOrElse(a) }, but `this` and `other`
+   * must have the same index.
+   */
+  def innerZipJoin[U: ClassTag, VD2: ClassTag](other: VertexRDD[U])
+      (f: (VertexID, VD, U) => VD2): VertexRDD[VD2] = {
+    val newPartitionsRDD = partitionsRDD.zipPartitions(
+      other.partitionsRDD, preservesPartitioning = true
+    ) { (thisIter, otherIter) =>
+      val thisPart = thisIter.next()
+      val otherPart = otherIter.next()
+      Iterator(thisPart.innerJoin(otherPart)(f))
+    }
+    new VertexRDD(newPartitionsRDD)
+  }
+
+  /**
+   * Replace vertices with corresponding vertices in `other`, and drop vertices without a
+   * corresponding vertex in `other`.
+   */
+  def innerJoin[U: ClassTag, VD2: ClassTag](other: RDD[(VertexID, U)])
+      (f: (VertexID, VD, U) => VD2): VertexRDD[VD2] = {
+    // Test if the other vertex is a VertexRDD to choose the optimal join strategy.
+    // If the other set is a VertexRDD then we use the much more efficient innerZipJoin
+    other match {
+      case other: VertexRDD[_] =>
+        innerZipJoin(other)(f)
+      case _ =>
+        new VertexRDD(
+          partitionsRDD.zipPartitions(
+            other.partitionBy(this.partitioner.get), preservesPartitioning = true)
+          { (part, msgs) =>
+            val vertexPartition: VertexPartition[VD] = part.next()
+            Iterator(vertexPartition.innerJoin(msgs)(f))
+          }
+        )
+    }
+  }
+
+  /**
+   * Aggregate messages with the same ids using `reduceFunc`, returning a VertexRDD that is
+   * co-indexed with this one.
+   */
+  def aggregateUsingIndex[VD2: ClassTag](
+      messages: RDD[(VertexID, VD2)], reduceFunc: (VD2, VD2) => VD2): VertexRDD[VD2] =
+  {
+    val shuffled = MsgRDDFunctions.partitionForAggregation(messages, this.partitioner.get)
+    val parts = partitionsRDD.zipPartitions(shuffled, true) { (thisIter, msgIter) =>
+      val vertexPartition: VertexPartition[VD] = thisIter.next()
+      Iterator(vertexPartition.aggregateUsingIndex(msgIter, reduceFunc))
+    }
+    new VertexRDD[VD2](parts)
+  }
+
+} // end of VertexRDD
+
+
+/**
+ * The VertexRDD singleton is used to construct VertexRDDs
+ */
+object VertexRDD {
+
+  /**
+   * Construct a vertex set from an RDD of vertex-attribute pairs.
+   * Duplicate entries are removed arbitrarily.
+   *
+   * @tparam VD the vertex attribute type
+   *
+   * @param rdd the collection of vertex-attribute pairs
+   */
+  def apply[VD: ClassTag](rdd: RDD[(VertexID, VD)]): VertexRDD[VD] = {
+    val partitioned: RDD[(VertexID, VD)] = rdd.partitioner match {
+      case Some(p) => rdd
+      case None => rdd.partitionBy(new HashPartitioner(rdd.partitions.size))
+    }
+    val vertexPartitions = partitioned.mapPartitions(
+      iter => Iterator(VertexPartition(iter)),
+      preservesPartitioning = true)
+    new VertexRDD(vertexPartitions)
+  }
+
+  /**
+   * Construct a vertex set from an RDD of vertex-attribute pairs.
+   * Duplicate entries are merged using mergeFunc.
+   *
+   * @tparam VD the vertex attribute type
+   *
+   * @param rdd the collection of vertex-attribute pairs
+   * @param mergeFunc the associative, commutative merge function.
+   */
+  def apply[VD: ClassTag](rdd: RDD[(VertexID, VD)], mergeFunc: (VD, VD) => VD): VertexRDD[VD] =
+  {
+    val partitioned: RDD[(VertexID, VD)] = rdd.partitioner match {
+      case Some(p) => rdd
+      case None => rdd.partitionBy(new HashPartitioner(rdd.partitions.size))
+    }
+    val vertexPartitions = partitioned.mapPartitions(
+      iter => Iterator(VertexPartition(iter)),
+      preservesPartitioning = true)
+    new VertexRDD(vertexPartitions)
+  }
+
+  def apply[VD: ClassTag](vids: RDD[VertexID], rdd: RDD[(VertexID, VD)], defaultVal: VD)
+    : VertexRDD[VD] =
+  {
+    VertexRDD(vids.map(vid => (vid, defaultVal))).leftJoin(rdd) { (vid, default, value) =>
+      value.getOrElse(default)
+    }
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/algorithms/ConnectedComponents.scala b/graphx/src/main/scala/org/apache/spark/graphx/algorithms/ConnectedComponents.scala
new file mode 100644
index 0000000000..a0dd36da60
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/algorithms/ConnectedComponents.scala
@@ -0,0 +1,37 @@
+package org.apache.spark.graphx.algorithms
+
+import org.apache.spark.graphx._
+
+
+object ConnectedComponents {
+  /**
+   * Compute the connected component membership of each vertex and return an RDD with the vertex
+   * value containing the lowest vertex id in the connected component containing that vertex.
+   *
+   * @tparam VD the vertex attribute type (discarded in the computation)
+   * @tparam ED the edge attribute type (preserved in the computation)
+   *
+   * @param graph the graph for which to compute the connected components
+   *
+   * @return a graph with vertex attributes containing the smallest vertex in each
+   *         connected component
+   */
+  def run[VD: Manifest, ED: Manifest](graph: Graph[VD, ED]): Graph[VertexID, ED] = {
+    val ccGraph = graph.mapVertices { case (vid, _) => vid }
+
+    def sendMessage(edge: EdgeTriplet[VertexID, ED]) = {
+      if (edge.srcAttr < edge.dstAttr) {
+        Iterator((edge.dstId, edge.srcAttr))
+      } else if (edge.srcAttr > edge.dstAttr) {
+        Iterator((edge.srcId, edge.dstAttr))
+      } else {
+        Iterator.empty
+      }
+    }
+    val initialMessage = Long.MaxValue
+    Pregel(ccGraph, initialMessage)(
+      vprog = (id, attr, msg) => math.min(attr, msg),
+      sendMsg = sendMessage,
+      mergeMsg = (a, b) => math.min(a, b))
+  } // end of connectedComponents
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/algorithms/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/algorithms/PageRank.scala
new file mode 100644
index 0000000000..0292b7316d
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/algorithms/PageRank.scala
@@ -0,0 +1,205 @@
+package org.apache.spark.graphx.algorithms
+
+import org.apache.spark.Logging
+import org.apache.spark.graphx._
+
+
+object PageRank extends Logging {
+
+  /**
+   * Run PageRank for a fixed number of iterations returning a graph
+   * with vertex attributes containing the PageRank and edge
+   * attributes the normalized edge weight.
+   *
+   * The following PageRank fixed point is computed for each vertex.
+   *
+   * {{{
+   * var PR = Array.fill(n)( 1.0 )
+   * val oldPR = Array.fill(n)( 1.0 )
+   * for( iter <- 0 until numIter ) {
+   *   swap(oldPR, PR)
+   *   for( i <- 0 until n ) {
+   *     PR[i] = alpha + (1 - alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
+   *   }
+   * }
+   * }}}
+   *
+   * where `alpha` is the random reset probability (typically 0.15),
+   * `inNbrs[i]` is the set of neighbors whick link to `i` and
+   * `outDeg[j]` is the out degree of vertex `j`.
+   *
+   * Note that this is not the "normalized" PageRank and as a consequence pages that have no
+   * inlinks will have a PageRank of alpha.
+   *
+   * @tparam VD the original vertex attribute (not used)
+   * @tparam ED the original edge attribute (not used)
+   *
+   * @param graph the graph on which to compute PageRank
+   * @param numIter the number of iterations of PageRank to run
+   * @param resetProb the random reset probability (alpha)
+   *
+   * @return the graph containing with each vertex containing the PageRank and each edge
+   *         containing the normalized weight.
+   *
+   */
+  def run[VD: Manifest, ED: Manifest](
+      graph: Graph[VD, ED], numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] =
+  {
+
+    /**
+     * Initialize the pagerankGraph with each edge attribute having
+     * weight 1/outDegree and each vertex with attribute 1.0.
+     */
+    val pagerankGraph: Graph[Double, Double] = graph
+      // Associate the degree with each vertex
+      .outerJoinVertices(graph.outDegrees){
+      (vid, vdata, deg) => deg.getOrElse(0)
+    }
+      // Set the weight on the edges based on the degree
+      .mapTriplets( e => 1.0 / e.srcAttr )
+      // Set the vertex attributes to the initial pagerank values
+      .mapVertices( (id, attr) => 1.0 )
+
+    // Display statistics about pagerank
+    logInfo(pagerankGraph.statistics.toString)
+
+    // Define the three functions needed to implement PageRank in the GraphX
+    // version of Pregel
+    def vertexProgram(id: VertexID, attr: Double, msgSum: Double): Double =
+      resetProb + (1.0 - resetProb) * msgSum
+    def sendMessage(edge: EdgeTriplet[Double, Double]) =
+      Iterator((edge.dstId, edge.srcAttr * edge.attr))
+    def messageCombiner(a: Double, b: Double): Double = a + b
+    // The initial message received by all vertices in PageRank
+    val initialMessage = 0.0
+
+    // Execute pregel for a fixed number of iterations.
+    Pregel(pagerankGraph, initialMessage, numIter)(
+      vertexProgram, sendMessage, messageCombiner)
+  }
+
+  /**
+   * Run a dynamic version of PageRank returning a graph with vertex attributes containing the
+   * PageRank and edge attributes containing the normalized edge weight.
+   *
+   * {{{
+   * var PR = Array.fill(n)( 1.0 )
+   * val oldPR = Array.fill(n)( 0.0 )
+   * while( max(abs(PR - oldPr)) > tol ) {
+   *   swap(oldPR, PR)
+   *   for( i <- 0 until n if abs(PR[i] - oldPR[i]) > tol ) {
+   *     PR[i] = alpha + (1 - \alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
+   *   }
+   * }
+   * }}}
+   *
+   * where `alpha` is the random reset probability (typically 0.15), `inNbrs[i]` is the set of
+   * neighbors whick link to `i` and `outDeg[j]` is the out degree of vertex `j`.
+   *
+   * Note that this is not the "normalized" PageRank and as a consequence pages that have no
+   * inlinks will have a PageRank of alpha.
+   *
+   * @tparam VD the original vertex attribute (not used)
+   * @tparam ED the original edge attribute (not used)
+   *
+   * @param graph the graph on which to compute PageRank
+   * @param tol the tolerance allowed at convergence (smaller => more * accurate).
+   * @param resetProb the random reset probability (alpha)
+   *
+   * @return the graph containing with each vertex containing the PageRank and each edge
+   *         containing the normalized weight.
+   */
+  def runUntillConvergence[VD: Manifest, ED: Manifest](
+      graph: Graph[VD, ED], tol: Double, resetProb: Double = 0.15): Graph[Double, Double] =
+  {
+    // Initialize the pagerankGraph with each edge attribute
+    // having weight 1/outDegree and each vertex with attribute 1.0.
+    val pagerankGraph: Graph[(Double, Double), Double] = graph
+      // Associate the degree with each vertex
+      .outerJoinVertices(graph.outDegrees) {
+        (vid, vdata, deg) => deg.getOrElse(0)
+      }
+      // Set the weight on the edges based on the degree
+      .mapTriplets( e => 1.0 / e.srcAttr )
+      // Set the vertex attributes to (initalPR, delta = 0)
+      .mapVertices( (id, attr) => (0.0, 0.0) )
+
+    // Display statistics about pagerank
+    logInfo(pagerankGraph.statistics.toString)
+
+    // Define the three functions needed to implement PageRank in the GraphX
+    // version of Pregel
+    def vertexProgram(id: VertexID, attr: (Double, Double), msgSum: Double): (Double, Double) = {
+      val (oldPR, lastDelta) = attr
+      val newPR = oldPR + (1.0 - resetProb) * msgSum
+      (newPR, newPR - oldPR)
+    }
+
+    def sendMessage(edge: EdgeTriplet[(Double, Double), Double]) = {
+      if (edge.srcAttr._2 > tol) {
+        Iterator((edge.dstId, edge.srcAttr._2 * edge.attr))
+      } else {
+        Iterator.empty
+      }
+    }
+
+    def messageCombiner(a: Double, b: Double): Double = a + b
+
+    // The initial message received by all vertices in PageRank
+    val initialMessage = resetProb / (1.0 - resetProb)
+
+    // Execute a dynamic version of Pregel.
+    Pregel(pagerankGraph, initialMessage)(vertexProgram, sendMessage, messageCombiner)
+      .mapVertices((vid, attr) => attr._1)
+  } // end of deltaPageRank
+
+  def runStandalone[VD: Manifest, ED: Manifest](
+      graph: Graph[VD, ED], tol: Double, resetProb: Double = 0.15): VertexRDD[Double] = {
+
+    // Initialize the ranks
+    var ranks: VertexRDD[Double] = graph.vertices.mapValues((vid, attr) => resetProb).cache()
+
+    // Initialize the delta graph where each vertex stores its delta and each edge knows its weight
+    var deltaGraph: Graph[Double, Double] =
+      graph.outerJoinVertices(graph.outDegrees)((vid, vdata, deg) => deg.getOrElse(0))
+      .mapTriplets(e => 1.0 / e.srcAttr)
+      .mapVertices((vid, degree) => resetProb).cache()
+    var numDeltas: Long = ranks.count()
+
+    var prevDeltas: Option[VertexRDD[Double]] = None
+
+    var i = 0
+    val weight = (1.0 - resetProb)
+    while (numDeltas > 0) {
+      // Compute new deltas. Only deltas that existed in the last round (i.e., were greater than
+      // `tol`) get to send messages; those that were less than `tol` would send messages less than
+      // `tol` as well.
+      val deltas = deltaGraph
+        .mapReduceTriplets[Double](
+          et => Iterator((et.dstId, et.srcAttr * et.attr * weight)),
+          _ + _,
+          prevDeltas.map((_, EdgeDirection.Out)))
+        .filter { case (vid, delta) => delta > tol }
+        .cache()
+      prevDeltas = Some(deltas)
+      numDeltas = deltas.count()
+      logInfo("Standalone PageRank: iter %d has %d deltas".format(i, numDeltas))
+
+      // Update deltaGraph with the deltas
+      deltaGraph = deltaGraph.outerJoinVertices(deltas) { (vid, old, newOpt) =>
+        newOpt.getOrElse(old)
+      }.cache()
+
+      // Update ranks
+      ranks = ranks.leftZipJoin(deltas) { (vid, oldRank, deltaOpt) =>
+        oldRank + deltaOpt.getOrElse(0.0)
+      }
+      ranks.foreach(x => {}) // force the iteration for ease of debugging
+
+      i += 1
+    }
+
+    ranks
+  }
+
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/algorithms/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/algorithms/SVDPlusPlus.scala
new file mode 100644
index 0000000000..8fdfa3d907
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/algorithms/SVDPlusPlus.scala
@@ -0,0 +1,103 @@
+package org.apache.spark.graphx.algorithms
+
+import org.apache.spark.rdd._
+import org.apache.spark.graphx._
+import scala.util.Random
+import org.apache.commons.math.linear._
+
+class SVDPlusPlusConf( // SVDPlusPlus parameters
+  var rank: Int,
+  var maxIters: Int,
+  var minVal: Double,
+  var maxVal: Double,
+  var gamma1: Double,
+  var gamma2: Double,
+  var gamma6: Double,
+  var gamma7: Double) extends Serializable
+
+object SVDPlusPlus {
+  /**
+   * Implement SVD++ based on "Factorization Meets the Neighborhood: a Multifaceted Collaborative Filtering Model",
+   * paper is available at [[http://public.research.att.com/~volinsky/netflix/kdd08koren.pdf]].
+   * The prediction rule is rui = u + bu + bi + qi*(pu + |N(u)|^(-0.5)*sum(y)), see the details on page 6.
+   *
+   * @param edges edges for constructing the graph
+   *
+   * @param conf SVDPlusPlus parameters
+   *
+   * @return a graph with vertex attributes containing the trained model
+   */
+
+  def run(edges: RDD[Edge[Double]], conf: SVDPlusPlusConf): (Graph[(RealVector, RealVector, Double, Double), Double], Double) = {
+
+    // generate default vertex attribute
+    def defaultF(rank: Int): (RealVector, RealVector, Double, Double) = {
+      val v1 = new ArrayRealVector(rank)
+      val v2 = new ArrayRealVector(rank)
+      for (i <- 0 until rank) {
+        v1.setEntry(i, Random.nextDouble)
+        v2.setEntry(i, Random.nextDouble)
+      }
+      (v1, v2, 0.0, 0.0)
+    }
+
+    // calculate global rating mean
+    val (rs, rc) = edges.map(e => (e.attr, 1L)).reduce((a, b) => (a._1 + b._1, a._2 + b._2))
+    val u = rs / rc
+
+    // construct graph
+    var g = Graph.fromEdges(edges, defaultF(conf.rank)).cache()
+
+    // calculate initial bias and norm
+    var t0 = g.mapReduceTriplets(et =>
+      Iterator((et.srcId, (1L, et.attr)), (et.dstId, (1L, et.attr))), (g1: (Long, Double), g2: (Long, Double)) => (g1._1 + g2._1, g1._2 + g2._2))
+    g = g.outerJoinVertices(t0) { (vid: VertexID, vd: (RealVector, RealVector, Double, Double), msg: Option[(Long, Double)]) =>
+      (vd._1, vd._2, msg.get._2 / msg.get._1, 1.0 / scala.math.sqrt(msg.get._1))
+    }
+
+    def mapTrainF(conf: SVDPlusPlusConf, u: Double)(et: EdgeTriplet[(RealVector, RealVector, Double, Double), Double])
+      : Iterator[(VertexID, (RealVector, RealVector, Double))] = {
+      val (usr, itm) = (et.srcAttr, et.dstAttr)
+      val (p, q) = (usr._1, itm._1)
+      var pred = u + usr._3 + itm._3 + q.dotProduct(usr._2)
+      pred = math.max(pred, conf.minVal)
+      pred = math.min(pred, conf.maxVal)
+      val err = et.attr - pred
+      val updateP = ((q.mapMultiply(err)).subtract(p.mapMultiply(conf.gamma7))).mapMultiply(conf.gamma2)
+      val updateQ = ((usr._2.mapMultiply(err)).subtract(q.mapMultiply(conf.gamma7))).mapMultiply(conf.gamma2)
+      val updateY = ((q.mapMultiply(err * usr._4)).subtract((itm._2).mapMultiply(conf.gamma7))).mapMultiply(conf.gamma2)
+      Iterator((et.srcId, (updateP, updateY, (err - conf.gamma6 * usr._3) * conf.gamma1)),
+        (et.dstId, (updateQ, updateY, (err - conf.gamma6 * itm._3) * conf.gamma1)))
+    }
+
+    for (i <- 0 until conf.maxIters) {
+      // phase 1, calculate pu + |N(u)|^(-0.5)*sum(y) for user nodes
+      var t1 = g.mapReduceTriplets(et => Iterator((et.srcId, et.dstAttr._2)), (g1: RealVector, g2: RealVector) => g1.add(g2))
+      g = g.outerJoinVertices(t1) { (vid: VertexID, vd: (RealVector, RealVector, Double, Double), msg: Option[RealVector]) =>
+        if (msg.isDefined) (vd._1, vd._1.add(msg.get.mapMultiply(vd._4)), vd._3, vd._4) else vd
+      }
+      // phase 2, update p for user nodes and q, y for item nodes
+      val t2 = g.mapReduceTriplets(mapTrainF(conf, u), (g1: (RealVector, RealVector, Double), g2: (RealVector, RealVector, Double)) =>
+        (g1._1.add(g2._1), g1._2.add(g2._2), g1._3 + g2._3))
+      g = g.outerJoinVertices(t2) { (vid: VertexID, vd: (RealVector, RealVector, Double, Double), msg: Option[(RealVector, RealVector, Double)]) =>
+        (vd._1.add(msg.get._1), vd._2.add(msg.get._2), vd._3 + msg.get._3, vd._4)
+      }
+    }
+
+    // calculate error on training set
+    def mapTestF(conf: SVDPlusPlusConf, u: Double)(et: EdgeTriplet[(RealVector, RealVector, Double, Double), Double]): Iterator[(VertexID, Double)] = {
+      val (usr, itm) = (et.srcAttr, et.dstAttr)
+      val (p, q) = (usr._1, itm._1)
+      var pred = u + usr._3 + itm._3 + q.dotProduct(usr._2)
+      pred = math.max(pred, conf.minVal)
+      pred = math.min(pred, conf.maxVal)
+      val err = (et.attr - pred) * (et.attr - pred)
+      Iterator((et.dstId, err))
+    }
+    val t3 = g.mapReduceTriplets(mapTestF(conf, u), (g1: Double, g2: Double) => g1 + g2)
+    g = g.outerJoinVertices(t3) { (vid: VertexID, vd: (RealVector, RealVector, Double, Double), msg: Option[Double]) =>
+      if (msg.isDefined) (vd._1, vd._2, vd._3, msg.get) else vd
+    }
+    (g, u)
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/algorithms/StronglyConnectedComponents.scala b/graphx/src/main/scala/org/apache/spark/graphx/algorithms/StronglyConnectedComponents.scala
new file mode 100644
index 0000000000..f64fc3ef0f
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/algorithms/StronglyConnectedComponents.scala
@@ -0,0 +1,87 @@
+package org.apache.spark.graphx.algorithms
+
+import org.apache.spark.graphx._
+
+object StronglyConnectedComponents {
+
+  /**
+   * Compute the strongly connected component (SCC) of each vertex and return an RDD with the vertex
+   * value containing the lowest vertex id in the SCC containing that vertex.
+   *
+   * @tparam VD the vertex attribute type (discarded in the computation)
+   * @tparam ED the edge attribute type (preserved in the computation)
+   *
+   * @param graph the graph for which to compute the SCC
+   *
+   * @return a graph with vertex attributes containing the smallest vertex id in each SCC
+   */
+  def run[VD: Manifest, ED: Manifest](graph: Graph[VD, ED], numIter: Int): Graph[VertexID, ED] = {
+
+    // the graph we update with final SCC ids, and the graph we return at the end
+    var sccGraph = graph.mapVertices { case (vid, _) => vid }
+    // graph we are going to work with in our iterations
+    var sccWorkGraph = graph.mapVertices { case (vid, _) => (vid, false) }
+
+    var numVertices = sccWorkGraph.numVertices
+    var iter = 0
+    while (sccWorkGraph.numVertices > 0 && iter < numIter) {
+      iter += 1
+      do {
+        numVertices = sccWorkGraph.numVertices
+        sccWorkGraph = sccWorkGraph.outerJoinVertices(sccWorkGraph.outDegrees) {
+          (vid, data, degreeOpt) => if (degreeOpt.isDefined) data else (vid, true)
+        }
+        sccWorkGraph = sccWorkGraph.outerJoinVertices(sccWorkGraph.inDegrees) {
+          (vid, data, degreeOpt) => if (degreeOpt.isDefined) data else (vid, true)
+        }
+
+        // get all vertices to be removed
+        val finalVertices = sccWorkGraph.vertices
+            .filter { case (vid, (scc, isFinal)) => isFinal}
+            .mapValues { (vid, data) => data._1}
+
+        // write values to sccGraph
+        sccGraph = sccGraph.outerJoinVertices(finalVertices) {
+          (vid, scc, opt) => opt.getOrElse(scc)
+        }
+        // only keep vertices that are not final
+        sccWorkGraph = sccWorkGraph.subgraph(vpred = (vid, data) => !data._2)
+      } while (sccWorkGraph.numVertices < numVertices)
+
+      sccWorkGraph = sccWorkGraph.mapVertices{ case (vid, (color, isFinal)) => (vid, isFinal) }
+
+      // collect min of all my neighbor's scc values, update if it's smaller than mine
+      // then notify any neighbors with scc values larger than mine
+      sccWorkGraph = GraphLab[(VertexID, Boolean), ED, VertexID](sccWorkGraph, Integer.MAX_VALUE)(
+        (vid, e) => e.otherVertexAttr(vid)._1,
+        (vid1, vid2) => math.min(vid1, vid2),
+        (vid, scc, optScc) =>
+          (math.min(scc._1, optScc.getOrElse(scc._1)), scc._2),
+        (vid, e) => e.vertexAttr(vid)._1 < e.otherVertexAttr(vid)._1
+      )
+
+      // start at root of SCCs. Traverse values in reverse, notify all my neighbors
+      // do not propagate if colors do not match!
+      sccWorkGraph = GraphLab[(VertexID, Boolean), ED, Boolean](
+        sccWorkGraph,
+        Integer.MAX_VALUE,
+        EdgeDirection.Out,
+        EdgeDirection.In
+      )(
+        // vertex is final if it is the root of a color
+        // or it has the same color as a neighbor that is final
+        (vid, e) => (vid == e.vertexAttr(vid)._1) || (e.vertexAttr(vid)._1 == e.otherVertexAttr(vid)._1),
+        (final1, final2) => final1 || final2,
+        (vid, scc, optFinal) =>
+          (scc._1, scc._2 || optFinal.getOrElse(false)),
+       // activate neighbor if they are not final, you are, and you have the same color
+        (vid, e) => e.vertexAttr(vid)._2 &&
+            !e.otherVertexAttr(vid)._2 && (e.vertexAttr(vid)._1 == e.otherVertexAttr(vid)._1),
+        // start at root of colors
+        (vid, data) => vid == data._1
+      )
+    }
+    sccGraph
+  }
+
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/algorithms/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/algorithms/TriangleCount.scala
new file mode 100644
index 0000000000..b5a93c1bd1
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/algorithms/TriangleCount.scala
@@ -0,0 +1,78 @@
+package org.apache.spark.graphx.algorithms
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.graphx._
+
+
+object TriangleCount {
+  /**
+   * Compute the number of triangles passing through each vertex.
+   *
+   * The algorithm is relatively straightforward and can be computed in three steps:
+   *
+   * 1) Compute the set of neighbors for each vertex
+   * 2) For each edge compute the intersection of the sets and send the
+   *    count to both vertices.
+   * 3) Compute the sum at each vertex and divide by two since each
+   *    triangle is counted twice.
+   *
+   *
+   * @param graph a graph with `sourceId` less than `destId`. The graph must have been partitioned
+   * using Graph.partitionBy.
+   *
+   * @return
+   */
+  def run[VD: ClassTag, ED: ClassTag](graph: Graph[VD,ED]): Graph[Int, ED] = {
+    // Remove redundant edges
+    val g = graph.groupEdges((a, b) => a).cache
+
+    // Construct set representations of the neighborhoods
+    val nbrSets: VertexRDD[VertexSet] =
+      g.collectNeighborIds(EdgeDirection.Both).mapValues { (vid, nbrs) =>
+        val set = new VertexSet(4)
+        var i = 0
+        while (i < nbrs.size) {
+          // prevent self cycle
+          if(nbrs(i) != vid) {
+            set.add(nbrs(i))
+          }
+          i += 1
+        }
+        set
+      }
+    // join the sets with the graph
+    val setGraph: Graph[VertexSet, ED] = g.outerJoinVertices(nbrSets) {
+      (vid, _, optSet) => optSet.getOrElse(null)
+    }
+    // Edge function computes intersection of smaller vertex with larger vertex
+    def edgeFunc(et: EdgeTriplet[VertexSet, ED]): Iterator[(VertexID, Int)] = {
+      assert(et.srcAttr != null)
+      assert(et.dstAttr != null)
+      val (smallSet, largeSet) = if (et.srcAttr.size < et.dstAttr.size) {
+        (et.srcAttr, et.dstAttr)
+      } else {
+        (et.dstAttr, et.srcAttr)
+      }
+      val iter = smallSet.iterator
+      var counter: Int = 0
+      while (iter.hasNext) {
+        val vid = iter.next
+        if (vid != et.srcId && vid != et.dstId && largeSet.contains(vid)) { counter += 1 }
+      }
+      Iterator((et.srcId, counter), (et.dstId, counter))
+    }
+    // compute the intersection along edges
+    val counters: VertexRDD[Int] = setGraph.mapReduceTriplets(edgeFunc, _ + _)
+    // Merge counters with the graph and divide by two since each triangle is counted twice
+    g.outerJoinVertices(counters) {
+      (vid, _, optCounter: Option[Int]) =>
+        val dblCount = optCounter.getOrElse(0)
+        // double count should be even (divisible by two)
+        assert((dblCount & 1) == 0)
+        dblCount / 2
+    }
+
+  } // end of TriangleCount
+
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
new file mode 100644
index 0000000000..4176563d22
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartition.scala
@@ -0,0 +1,220 @@
+package org.apache.spark.graphx.impl
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.graphx._
+import org.apache.spark.util.collection.PrimitiveKeyOpenHashMap
+
+/**
+ * A collection of edges stored in 3 large columnar arrays (src, dst, attribute). The arrays are
+ * clustered by src.
+ *
+ * @param srcIds the source vertex id of each edge
+ * @param dstIds the destination vertex id of each edge
+ * @param data the attribute associated with each edge
+ * @param index a clustered index on source vertex id
+ * @tparam ED the edge attribute type.
+ */
+class EdgePartition[@specialized(Char, Int, Boolean, Byte, Long, Float, Double) ED: ClassTag](
+    val srcIds: Array[VertexID],
+    val dstIds: Array[VertexID],
+    val data: Array[ED],
+    val index: PrimitiveKeyOpenHashMap[VertexID, Int]) extends Serializable {
+
+  /**
+   * Reverse all the edges in this partition.
+   *
+   * @return a new edge partition with all edges reversed.
+   */
+  def reverse: EdgePartition[ED] = {
+    val builder = new EdgePartitionBuilder(size)
+    for (e <- iterator) {
+      builder.add(e.dstId, e.srcId, e.attr)
+    }
+    builder.toEdgePartition
+  }
+
+  /**
+   * Construct a new edge partition by applying the function f to all
+   * edges in this partition.
+   *
+   * @param f a function from an edge to a new attribute
+   * @tparam ED2 the type of the new attribute
+   * @return a new edge partition with the result of the function `f`
+   *         applied to each edge
+   */
+  def map[ED2: ClassTag](f: Edge[ED] => ED2): EdgePartition[ED2] = {
+    val newData = new Array[ED2](data.size)
+    val edge = new Edge[ED]()
+    val size = data.size
+    var i = 0
+    while (i < size) {
+      edge.srcId  = srcIds(i)
+      edge.dstId  = dstIds(i)
+      edge.attr = data(i)
+      newData(i) = f(edge)
+      i += 1
+    }
+    new EdgePartition(srcIds, dstIds, newData, index)
+  }
+
+  /**
+   * Construct a new edge partition by using the edge attributes
+   * contained in the iterator.
+   *
+   * @note The input iterator should return edge attributes in the
+   * order of the edges returned by `EdgePartition.iterator` and
+   * should return attributes equal to the number of edges.
+   *
+   * @param f a function from an edge to a new attribute
+   * @tparam ED2 the type of the new attribute
+   * @return a new edge partition with the result of the function `f`
+   *         applied to each edge
+   */
+  def map[ED2: ClassTag](iter: Iterator[ED2]): EdgePartition[ED2] = {
+    val newData = new Array[ED2](data.size)
+    var i = 0
+    while (iter.hasNext) {
+      newData(i) = iter.next()
+      i += 1
+    }
+    assert(newData.size == i)
+    new EdgePartition(srcIds, dstIds, newData, index)
+  }
+
+  /**
+   * Apply the function f to all edges in this partition.
+   *
+   * @param f an external state mutating user defined function.
+   */
+  def foreach(f: Edge[ED] => Unit) {
+    iterator.foreach(f)
+  }
+
+  /**
+   * Merge all the edges with the same src and dest id into a single
+   * edge using the `merge` function
+   *
+   * @param merge a commutative associative merge operation
+   * @return a new edge partition without duplicate edges
+   */
+  def groupEdges(merge: (ED, ED) => ED): EdgePartition[ED] = {
+    val builder = new EdgePartitionBuilder[ED]
+    var firstIter: Boolean = true
+    var currSrcId: VertexID = nullValue[VertexID]
+    var currDstId: VertexID = nullValue[VertexID]
+    var currAttr: ED = nullValue[ED]
+    var i = 0
+    while (i < size) {
+      if (i > 0 && currSrcId == srcIds(i) && currDstId == dstIds(i)) {
+        currAttr = merge(currAttr, data(i))
+      } else {
+        if (i > 0) {
+          builder.add(currSrcId, currDstId, currAttr)
+        }
+        currSrcId = srcIds(i)
+        currDstId = dstIds(i)
+        currAttr = data(i)
+      }
+      i += 1
+    }
+    if (size > 0) {
+      builder.add(currSrcId, currDstId, currAttr)
+    }
+    builder.toEdgePartition
+  }
+
+  /**
+   * Apply `f` to all edges present in both `this` and `other` and return a new EdgePartition
+   * containing the resulting edges.
+   *
+   * If there are multiple edges with the same src and dst in `this`, `f` will be invoked once for
+   * each edge, but each time it may be invoked on any corresponding edge in `other`.
+   *
+   * If there are multiple edges with the same src and dst in `other`, `f` will only be invoked
+   * once.
+   */
+  def innerJoin[ED2: ClassTag, ED3: ClassTag]
+      (other: EdgePartition[ED2])
+      (f: (VertexID, VertexID, ED, ED2) => ED3): EdgePartition[ED3] = {
+    val builder = new EdgePartitionBuilder[ED3]
+    var i = 0
+    var j = 0
+    // For i = index of each edge in `this`...
+    while (i < size && j < other.size) {
+      val srcId = this.srcIds(i)
+      val dstId = this.dstIds(i)
+      // ... forward j to the index of the corresponding edge in `other`, and...
+      while (j < other.size && other.srcIds(j) < srcId) { j += 1 }
+      if (j < other.size && other.srcIds(j) == srcId) {
+        while (j < other.size && other.srcIds(j) == srcId && other.dstIds(j) < dstId) { j += 1 }
+        if (j < other.size && other.srcIds(j) == srcId && other.dstIds(j) == dstId) {
+          // ... run `f` on the matching edge
+          builder.add(srcId, dstId, f(srcId, dstId, this.data(i), other.data(j)))
+        }
+      }
+      i += 1
+    }
+    builder.toEdgePartition
+  }
+
+  /**
+   * The number of edges in this partition
+   *
+   * @return size of the partition
+   */
+  def size: Int = srcIds.size
+
+  /** The number of unique source vertices in the partition. */
+  def indexSize: Int = index.size
+
+  /**
+   * Get an iterator over the edges in this partition.
+   *
+   * @return an iterator over edges in the partition
+   */
+  def iterator = new Iterator[Edge[ED]] {
+    private[this] val edge = new Edge[ED]
+    private[this] var pos = 0
+
+    override def hasNext: Boolean = pos < EdgePartition.this.size
+
+    override def next(): Edge[ED] = {
+      edge.srcId = srcIds(pos)
+      edge.dstId = dstIds(pos)
+      edge.attr = data(pos)
+      pos += 1
+      edge
+    }
+  }
+
+  /**
+   * Get an iterator over the edges in this partition whose source vertex ids match srcIdPred. The
+   * iterator is generated using an index scan, so it is efficient at skipping edges that don't
+   * match srcIdPred.
+   */
+  def indexIterator(srcIdPred: VertexID => Boolean): Iterator[Edge[ED]] =
+    index.iterator.filter(kv => srcIdPred(kv._1)).flatMap(Function.tupled(clusterIterator))
+
+  /**
+   * Get an iterator over the cluster of edges in this partition with source vertex id `srcId`. The
+   * cluster must start at position `index`.
+   */
+  private def clusterIterator(srcId: VertexID, index: Int) = new Iterator[Edge[ED]] {
+    private[this] val edge = new Edge[ED]
+    private[this] var pos = index
+
+    override def hasNext: Boolean = {
+      pos >= 0 && pos < EdgePartition.this.size && srcIds(pos) == srcId
+    }
+
+    override def next(): Edge[ED] = {
+      assert(srcIds(pos) == srcId)
+      edge.srcId = srcIds(pos)
+      edge.dstId = dstIds(pos)
+      edge.attr = data(pos)
+      pos += 1
+      edge
+    }
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
new file mode 100644
index 0000000000..d4f08497a2
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala
@@ -0,0 +1,46 @@
+package org.apache.spark.graphx.impl
+
+import scala.reflect.ClassTag
+import scala.util.Sorting
+
+import org.apache.spark.graphx._
+import org.apache.spark.util.collection.{PrimitiveKeyOpenHashMap, PrimitiveVector}
+
+
+//private[graph]
+class EdgePartitionBuilder[@specialized(Long, Int, Double) ED: ClassTag](size: Int = 64) {
+
+  var edges = new PrimitiveVector[Edge[ED]](size)
+
+  /** Add a new edge to the partition. */
+  def add(src: VertexID, dst: VertexID, d: ED) {
+    edges += Edge(src, dst, d)
+  }
+
+  def toEdgePartition: EdgePartition[ED] = {
+    val edgeArray = edges.trim().array
+    Sorting.quickSort(edgeArray)(Edge.lexicographicOrdering)
+    val srcIds = new Array[VertexID](edgeArray.size)
+    val dstIds = new Array[VertexID](edgeArray.size)
+    val data = new Array[ED](edgeArray.size)
+    val index = new PrimitiveKeyOpenHashMap[VertexID, Int]
+    // Copy edges into columnar structures, tracking the beginnings of source vertex id clusters and
+    // adding them to the index
+    if (edgeArray.length > 0) {
+      index.update(srcIds(0), 0)
+      var currSrcId: VertexID = srcIds(0)
+      var i = 0
+      while (i < edgeArray.size) {
+        srcIds(i) = edgeArray(i).srcId
+        dstIds(i) = edgeArray(i).dstId
+        data(i) = edgeArray(i).attr
+        if (edgeArray(i).srcId != currSrcId) {
+          currSrcId = edgeArray(i).srcId
+          index.update(currSrcId, i)
+        }
+        i += 1
+      }
+    }
+    new EdgePartition(srcIds, dstIds, data, index)
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
new file mode 100644
index 0000000000..79fd962ffd
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeTripletIterator.scala
@@ -0,0 +1,43 @@
+package org.apache.spark.graphx.impl
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.graphx._
+import org.apache.spark.util.collection.PrimitiveKeyOpenHashMap
+
+
+/**
+ * The Iterator type returned when constructing edge triplets. This class technically could be
+ * an anonymous class in GraphImpl.triplets, but we name it here explicitly so it is easier to
+ * debug / profile.
+ */
+private[impl]
+class EdgeTripletIterator[VD: ClassTag, ED: ClassTag](
+    val vidToIndex: VertexIdToIndexMap,
+    val vertexArray: Array[VD],
+    val edgePartition: EdgePartition[ED])
+  extends Iterator[EdgeTriplet[VD, ED]] {
+
+  // Current position in the array.
+  private var pos = 0
+
+  // A triplet object that this iterator.next() call returns. We reuse this object to avoid
+  // allocating too many temporary Java objects.
+  private val triplet = new EdgeTriplet[VD, ED]
+
+  private val vmap = new PrimitiveKeyOpenHashMap[VertexID, VD](vidToIndex, vertexArray)
+
+  override def hasNext: Boolean = pos < edgePartition.size
+
+  override def next() = {
+    triplet.srcId = edgePartition.srcIds(pos)
+    // assert(vmap.containsKey(e.src.id))
+    triplet.srcAttr = vmap(triplet.srcId)
+    triplet.dstId = edgePartition.dstIds(pos)
+    // assert(vmap.containsKey(e.dst.id))
+    triplet.dstAttr = vmap(triplet.dstId)
+    triplet.attr = edgePartition.data(pos)
+    pos += 1
+    triplet
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
new file mode 100644
index 0000000000..be9f188150
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala
@@ -0,0 +1,422 @@
+package org.apache.spark.graphx.impl
+
+import scala.reflect.{classTag, ClassTag}
+
+import org.apache.spark.util.collection.PrimitiveVector
+import org.apache.spark.{HashPartitioner, Partitioner}
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.impl.GraphImpl._
+import org.apache.spark.graphx.impl.MsgRDDFunctions._
+import org.apache.spark.graphx.util.BytecodeUtils
+import org.apache.spark.rdd.{ShuffledRDD, RDD}
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.ClosureCleaner
+
+
+/**
+ * A Graph RDD that supports computation on graphs.
+ *
+ * Graphs are represented using two classes of data: vertex-partitioned and
+ * edge-partitioned. `vertices` contains vertex attributes, which are vertex-partitioned. `edges`
+ * contains edge attributes, which are edge-partitioned. For operations on vertex neighborhoods,
+ * vertex attributes are replicated to the edge partitions where they appear as sources or
+ * destinations. `routingTable` stores the routing information for shipping vertex attributes to
+ * edge partitions. `replicatedVertexView` stores a view of the replicated vertex attributes created
+ * using the routing table.
+ */
+class GraphImpl[VD: ClassTag, ED: ClassTag] protected (
+    @transient val vertices: VertexRDD[VD],
+    @transient val edges: EdgeRDD[ED],
+    @transient val routingTable: RoutingTable,
+    @transient val replicatedVertexView: ReplicatedVertexView[VD])
+  extends Graph[VD, ED] with Serializable {
+
+  def this(
+      vertices: VertexRDD[VD],
+      edges: EdgeRDD[ED],
+      routingTable: RoutingTable) = {
+    this(vertices, edges, routingTable, new ReplicatedVertexView(vertices, edges, routingTable))
+  }
+
+  def this(
+      vertices: VertexRDD[VD],
+      edges: EdgeRDD[ED]) = {
+    this(vertices, edges, new RoutingTable(edges, vertices))
+  }
+
+  /** Return a RDD that brings edges together with their source and destination vertices. */
+  @transient override val triplets: RDD[EdgeTriplet[VD, ED]] = {
+    val vdTag = classTag[VD]
+    val edTag = classTag[ED]
+    edges.partitionsRDD.zipPartitions(
+      replicatedVertexView.get(true, true), true) { (ePartIter, vPartIter) =>
+      val (pid, ePart) = ePartIter.next()
+      val (_, vPart) = vPartIter.next()
+      new EdgeTripletIterator(vPart.index, vPart.values, ePart)(vdTag, edTag)
+    }
+  }
+
+  override def persist(newLevel: StorageLevel): Graph[VD, ED] = {
+    vertices.persist(newLevel)
+    edges.persist(newLevel)
+    this
+  }
+
+  override def cache(): Graph[VD, ED] = persist(StorageLevel.MEMORY_ONLY)
+
+  override def partitionBy(partitionStrategy: PartitionStrategy): Graph[VD, ED] = {
+    val numPartitions = edges.partitions.size
+    val edTag = classTag[ED]
+    val newEdges = new EdgeRDD(edges.map { e =>
+      val part: PartitionID = partitionStrategy.getPartition(e.srcId, e.dstId, numPartitions)
+
+      // Should we be using 3-tuple or an optimized class
+      new MessageToPartition(part, (e.srcId, e.dstId, e.attr))
+    }
+      .partitionBy(new HashPartitioner(numPartitions))
+      .mapPartitionsWithIndex( { (pid, iter) =>
+        val builder = new EdgePartitionBuilder[ED]()(edTag)
+        iter.foreach { message =>
+          val data = message.data
+          builder.add(data._1, data._2, data._3)
+        }
+        val edgePartition = builder.toEdgePartition
+        Iterator((pid, edgePartition))
+      }, preservesPartitioning = true).cache())
+    new GraphImpl(vertices, newEdges)
+  }
+
+  override def statistics: Map[String, Any] = {
+    // Get the total number of vertices after replication, used to compute the replication ratio.
+    def numReplicatedVertices(vid2pids: RDD[Array[Array[VertexID]]]): Double = {
+      vid2pids.map(_.map(_.size).sum.toLong).reduce(_ + _).toDouble
+    }
+
+    val numVertices = this.ops.numVertices
+    val numEdges = this.ops.numEdges
+    val replicationRatioBoth = numReplicatedVertices(routingTable.bothAttrs) / numVertices
+    val replicationRatioSrcOnly = numReplicatedVertices(routingTable.srcAttrOnly) / numVertices
+    val replicationRatioDstOnly = numReplicatedVertices(routingTable.dstAttrOnly) / numVertices
+    // One entry for each partition, indicate the total number of edges on that partition.
+    val loadArray = edges.partitionsRDD.map(_._2.size).collect().map(_.toDouble / numEdges)
+    val minLoad = loadArray.min
+    val maxLoad = loadArray.max
+    Map(
+      "Num Vertices" -> numVertices,
+      "Num Edges" -> numEdges,
+      "Replication (both)" -> replicationRatioBoth,
+      "Replication (src only)" -> replicationRatioSrcOnly,
+      "Replication (dest only)" -> replicationRatioDstOnly,
+      "Load Array" -> loadArray,
+      "Min Load" -> minLoad,
+      "Max Load" -> maxLoad)
+  }
+
+  /**
+   * Display the lineage information for this graph.
+   */
+  def printLineage() = {
+    def traverseLineage(
+        rdd: RDD[_],
+        indent: String = "",
+        visited: Map[Int, String] = Map.empty[Int, String]) {
+      if (visited.contains(rdd.id)) {
+        println(indent + visited(rdd.id))
+        println(indent)
+      } else {
+        val locs = rdd.partitions.map( p => rdd.preferredLocations(p) )
+        val cacheLevel = rdd.getStorageLevel
+        val name = rdd.id
+        val deps = rdd.dependencies
+        val partitioner = rdd.partitioner
+        val numparts = partitioner match { case Some(p) => p.numPartitions; case None => 0}
+        println(indent + name + ": " + cacheLevel.description + " (partitioner: " + partitioner +
+          ", " + numparts +")")
+        println(indent + " |--->  Deps:    " + deps.map(d => (d, d.rdd.id) ).toString)
+        println(indent + " |--->  PrefLoc: " + locs.map(x=> x.toString).mkString(", "))
+        deps.foreach(d => traverseLineage(d.rdd, indent + " | ", visited))
+      }
+    }
+    println("edges ------------------------------------------")
+    traverseLineage(edges, "  ")
+    var visited = Map(edges.id -> "edges")
+    println("\n\nvertices ------------------------------------------")
+    traverseLineage(vertices, "  ", visited)
+    visited += (vertices.id -> "vertices")
+    println("\n\nroutingTable.bothAttrs -------------------------------")
+    traverseLineage(routingTable.bothAttrs, "  ", visited)
+    visited += (routingTable.bothAttrs.id -> "routingTable.bothAttrs")
+    println("\n\ntriplets ----------------------------------------")
+    traverseLineage(triplets, "  ", visited)
+    println(visited)
+  } // end of printLineage
+
+  override def reverse: Graph[VD, ED] = {
+    val newETable = edges.mapEdgePartitions((pid, part) => part.reverse)
+    new GraphImpl(vertices, newETable, routingTable, replicatedVertexView)
+  }
+
+  override def mapVertices[VD2: ClassTag](f: (VertexID, VD) => VD2): Graph[VD2, ED] = {
+    if (classTag[VD] equals classTag[VD2]) {
+      // The map preserves type, so we can use incremental replication
+      val newVerts = vertices.mapVertexPartitions(_.map(f))
+      val changedVerts = vertices.asInstanceOf[VertexRDD[VD2]].diff(newVerts)
+      val newReplicatedVertexView = new ReplicatedVertexView[VD2](
+        changedVerts, edges, routingTable,
+        Some(replicatedVertexView.asInstanceOf[ReplicatedVertexView[VD2]]))
+      new GraphImpl(newVerts, edges, routingTable, newReplicatedVertexView)
+    } else {
+      // The map does not preserve type, so we must re-replicate all vertices
+      new GraphImpl(vertices.mapVertexPartitions(_.map(f)), edges, routingTable)
+    }
+  }
+
+  override def mapEdges[ED2: ClassTag](
+      f: (PartitionID, Iterator[Edge[ED]]) => Iterator[ED2]): Graph[VD, ED2] = {
+    val newETable = edges.mapEdgePartitions((pid, part) => part.map(f(pid, part.iterator)))
+    new GraphImpl(vertices, newETable , routingTable, replicatedVertexView)
+  }
+
+  override def mapTriplets[ED2: ClassTag](
+      f: (PartitionID, Iterator[EdgeTriplet[VD, ED]]) => Iterator[ED2]): Graph[VD, ED2] = {
+    // Use an explicit manifest in PrimitiveKeyOpenHashMap init so we don't pull in the implicit
+    // manifest from GraphImpl (which would require serializing GraphImpl).
+    val vdTag = classTag[VD]
+    val newEdgePartitions =
+      edges.partitionsRDD.zipPartitions(replicatedVertexView.get(true, true), true) {
+        (ePartIter, vTableReplicatedIter) =>
+        val (ePid, edgePartition) = ePartIter.next()
+        val (vPid, vPart) = vTableReplicatedIter.next()
+        assert(!vTableReplicatedIter.hasNext)
+        assert(ePid == vPid)
+        val et = new EdgeTriplet[VD, ED]
+        val inputIterator = edgePartition.iterator.map { e =>
+          et.set(e)
+          et.srcAttr = vPart(e.srcId)
+          et.dstAttr = vPart(e.dstId)
+          et
+        }
+        // Apply the user function to the vertex partition
+        val outputIter = f(ePid, inputIterator)
+        // Consume the iterator to update the edge attributes
+        val newEdgePartition = edgePartition.map(outputIter)
+        Iterator((ePid, newEdgePartition))
+      }
+    new GraphImpl(vertices, new EdgeRDD(newEdgePartitions), routingTable, replicatedVertexView)
+  }
+
+  override def subgraph(
+      epred: EdgeTriplet[VD, ED] => Boolean = x => true,
+      vpred: (VertexID, VD) => Boolean = (a, b) => true): Graph[VD, ED] = {
+    // Filter the vertices, reusing the partitioner and the index from this graph
+    val newVerts = vertices.mapVertexPartitions(_.filter(vpred))
+
+    // Filter the edges
+    val edTag = classTag[ED]
+    val newEdges = new EdgeRDD[ED](triplets.filter { et =>
+      vpred(et.srcId, et.srcAttr) && vpred(et.dstId, et.dstAttr) && epred(et)
+    }.mapPartitionsWithIndex( { (pid, iter) =>
+      val builder = new EdgePartitionBuilder[ED]()(edTag)
+      iter.foreach { et => builder.add(et.srcId, et.dstId, et.attr) }
+      val edgePartition = builder.toEdgePartition
+      Iterator((pid, edgePartition))
+    }, preservesPartitioning = true)).cache()
+
+    // Reuse the previous ReplicatedVertexView unmodified. The replicated vertices that have been
+    // removed will be ignored, since we only refer to replicated vertices when they are adjacent to
+    // an edge.
+    new GraphImpl(newVerts, newEdges, new RoutingTable(newEdges, newVerts), replicatedVertexView)
+  } // end of subgraph
+
+  override def mask[VD2: ClassTag, ED2: ClassTag] (
+      other: Graph[VD2, ED2]): Graph[VD, ED] = {
+    val newVerts = vertices.innerJoin(other.vertices) { (vid, v, w) => v }
+    val newEdges = edges.innerJoin(other.edges) { (src, dst, v, w) => v }
+    // Reuse the previous ReplicatedVertexView unmodified. The replicated vertices that have been
+    // removed will be ignored, since we only refer to replicated vertices when they are adjacent to
+    // an edge.
+    new GraphImpl(newVerts, newEdges, routingTable, replicatedVertexView)
+  }
+
+  override def groupEdges(merge: (ED, ED) => ED): Graph[VD, ED] = {
+    ClosureCleaner.clean(merge)
+    val newETable = edges.mapEdgePartitions((pid, part) => part.groupEdges(merge))
+    new GraphImpl(vertices, newETable, routingTable, replicatedVertexView)
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+  // Lower level transformation methods
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+
+  override def mapReduceTriplets[A: ClassTag](
+      mapFunc: EdgeTriplet[VD, ED] => Iterator[(VertexID, A)],
+      reduceFunc: (A, A) => A,
+      activeSetOpt: Option[(VertexRDD[_], EdgeDirection)] = None) = {
+
+    ClosureCleaner.clean(mapFunc)
+    ClosureCleaner.clean(reduceFunc)
+
+    // For each vertex, replicate its attribute only to partitions where it is
+    // in the relevant position in an edge.
+    val mapUsesSrcAttr = accessesVertexAttr[VD, ED](mapFunc, "srcAttr")
+    val mapUsesDstAttr = accessesVertexAttr[VD, ED](mapFunc, "dstAttr")
+    val vs = activeSetOpt match {
+      case Some((activeSet, _)) =>
+        replicatedVertexView.get(mapUsesSrcAttr, mapUsesDstAttr, activeSet)
+      case None =>
+        replicatedVertexView.get(mapUsesSrcAttr, mapUsesDstAttr)
+    }
+    val activeDirectionOpt = activeSetOpt.map(_._2)
+
+    // Map and combine.
+    val preAgg = edges.partitionsRDD.zipPartitions(vs, true) { (ePartIter, vPartIter) =>
+      val (ePid, edgePartition) = ePartIter.next()
+      val (vPid, vPart) = vPartIter.next()
+      assert(!vPartIter.hasNext)
+      assert(ePid == vPid)
+      // Choose scan method
+      val activeFraction = vPart.numActives.getOrElse(0) / edgePartition.indexSize.toFloat
+      val edgeIter = activeDirectionOpt match {
+        case Some(EdgeDirection.Both) =>
+          if (activeFraction < 0.8) {
+            edgePartition.indexIterator(srcVertexID => vPart.isActive(srcVertexID))
+              .filter(e => vPart.isActive(e.dstId))
+          } else {
+            edgePartition.iterator.filter(e => vPart.isActive(e.srcId) && vPart.isActive(e.dstId))
+          }
+        case Some(EdgeDirection.Out) =>
+          if (activeFraction < 0.8) {
+            edgePartition.indexIterator(srcVertexID => vPart.isActive(srcVertexID))
+          } else {
+            edgePartition.iterator.filter(e => vPart.isActive(e.srcId))
+          }
+        case Some(EdgeDirection.In) =>
+          edgePartition.iterator.filter(e => vPart.isActive(e.dstId))
+        case None =>
+          edgePartition.iterator
+      }
+
+      // Scan edges and run the map function
+      val et = new EdgeTriplet[VD, ED]
+      val mapOutputs = edgeIter.flatMap { e =>
+        et.set(e)
+        if (mapUsesSrcAttr) {
+          et.srcAttr = vPart(e.srcId)
+        }
+        if (mapUsesDstAttr) {
+          et.dstAttr = vPart(e.dstId)
+        }
+        mapFunc(et)
+      }
+      // Note: This doesn't allow users to send messages to arbitrary vertices.
+      vPart.aggregateUsingIndex(mapOutputs, reduceFunc).iterator
+    }
+
+    // do the final reduction reusing the index map
+    vertices.aggregateUsingIndex(preAgg, reduceFunc)
+  } // end of mapReduceTriplets
+
+  override def outerJoinVertices[U: ClassTag, VD2: ClassTag]
+      (updates: RDD[(VertexID, U)])(updateF: (VertexID, VD, Option[U]) => VD2): Graph[VD2, ED] = {
+    if (classTag[VD] equals classTag[VD2]) {
+      // updateF preserves type, so we can use incremental replication
+      val newVerts = vertices.leftJoin(updates)(updateF)
+      val changedVerts = vertices.asInstanceOf[VertexRDD[VD2]].diff(newVerts)
+      val newReplicatedVertexView = new ReplicatedVertexView[VD2](
+        changedVerts, edges, routingTable,
+        Some(replicatedVertexView.asInstanceOf[ReplicatedVertexView[VD2]]))
+      new GraphImpl(newVerts, edges, routingTable, newReplicatedVertexView)
+    } else {
+      // updateF does not preserve type, so we must re-replicate all vertices
+      val newVerts = vertices.leftJoin(updates)(updateF)
+      new GraphImpl(newVerts, edges, routingTable)
+    }
+  }
+
+  private def accessesVertexAttr[VD, ED](closure: AnyRef, attrName: String): Boolean = {
+    try {
+      BytecodeUtils.invokedMethod(closure, classOf[EdgeTriplet[VD, ED]], attrName)
+    } catch {
+      case _: ClassNotFoundException => true // if we don't know, be conservative
+    }
+  }
+} // end of class GraphImpl
+
+
+object GraphImpl {
+
+  def apply[VD: ClassTag, ED: ClassTag](
+      edges: RDD[Edge[ED]],
+      defaultVertexAttr: VD): GraphImpl[VD, ED] =
+  {
+    fromEdgeRDD(createEdgeRDD(edges), defaultVertexAttr)
+  }
+
+  def fromEdgePartitions[VD: ClassTag, ED: ClassTag](
+      edgePartitions: RDD[(PartitionID, EdgePartition[ED])],
+      defaultVertexAttr: VD): GraphImpl[VD, ED] = {
+    fromEdgeRDD(new EdgeRDD(edgePartitions), defaultVertexAttr)
+  }
+
+  def apply[VD: ClassTag, ED: ClassTag](
+      vertices: RDD[(VertexID, VD)],
+      edges: RDD[Edge[ED]],
+      defaultVertexAttr: VD): GraphImpl[VD, ED] =
+  {
+    val edgeRDD = createEdgeRDD(edges).cache()
+
+    // Get the set of all vids
+    val partitioner = Partitioner.defaultPartitioner(vertices)
+    val vPartitioned = vertices.partitionBy(partitioner)
+    val vidsFromEdges = collectVertexIDsFromEdges(edgeRDD, partitioner)
+    val vids = vPartitioned.zipPartitions(vidsFromEdges) { (vertexIter, vidsFromEdgesIter) =>
+      vertexIter.map(_._1) ++ vidsFromEdgesIter.map(_._1)
+    }
+
+    val vertexRDD = VertexRDD(vids, vPartitioned, defaultVertexAttr)
+
+    new GraphImpl(vertexRDD, edgeRDD)
+  }
+
+  /**
+   * Create the edge RDD, which is much more efficient for Java heap storage than the normal edges
+   * data structure (RDD[(VertexID, VertexID, ED)]).
+   *
+   * The edge RDD contains multiple partitions, and each partition contains only one RDD key-value
+   * pair: the key is the partition id, and the value is an EdgePartition object containing all the
+   * edges in a partition.
+   */
+  private def createEdgeRDD[ED: ClassTag](
+      edges: RDD[Edge[ED]]): EdgeRDD[ED] = {
+    val edgePartitions = edges.mapPartitionsWithIndex { (pid, iter) =>
+      val builder = new EdgePartitionBuilder[ED]
+      iter.foreach { e =>
+        builder.add(e.srcId, e.dstId, e.attr)
+      }
+      Iterator((pid, builder.toEdgePartition))
+    }
+    new EdgeRDD(edgePartitions)
+  }
+
+  private def fromEdgeRDD[VD: ClassTag, ED: ClassTag](
+      edges: EdgeRDD[ED],
+      defaultVertexAttr: VD): GraphImpl[VD, ED] = {
+    edges.cache()
+    // Get the set of all vids
+    val vids = collectVertexIDsFromEdges(edges, new HashPartitioner(edges.partitions.size))
+    // Create the VertexRDD.
+    val vertices = VertexRDD(vids.mapValues(x => defaultVertexAttr))
+    new GraphImpl(vertices, edges)
+  }
+
+  /** Collects all vids mentioned in edges and partitions them by partitioner. */
+  private def collectVertexIDsFromEdges(
+      edges: EdgeRDD[_],
+      partitioner: Partitioner): RDD[(VertexID, Int)] = {
+    // TODO: Consider doing map side distinct before shuffle.
+    new ShuffledRDD[VertexID, Int, (VertexID, Int)](
+      edges.collectVertexIDs.map(vid => (vid, 0)), partitioner)
+      .setSerializer(classOf[VertexIDMsgSerializer].getName)
+  }
+} // end of object GraphImpl
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
new file mode 100644
index 0000000000..ad5daf8f6a
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/MessageToPartition.scala
@@ -0,0 +1,93 @@
+package org.apache.spark.graphx.impl
+
+import scala.reflect.{classTag, ClassTag}
+
+import org.apache.spark.Partitioner
+import org.apache.spark.graphx.{PartitionID, VertexID}
+import org.apache.spark.rdd.{ShuffledRDD, RDD}
+
+
+class VertexBroadcastMsg[@specialized(Int, Long, Double, Boolean) T](
+    @transient var partition: PartitionID,
+    var vid: VertexID,
+    var data: T)
+  extends Product2[PartitionID, (VertexID, T)] with Serializable {
+
+  override def _1 = partition
+
+  override def _2 = (vid, data)
+
+  override def canEqual(that: Any): Boolean = that.isInstanceOf[VertexBroadcastMsg[_]]
+}
+
+
+/**
+ * A message used to send a specific value to a partition.
+ * @param partition index of the target partition.
+ * @param data value to send
+ */
+class MessageToPartition[@specialized(Int, Long, Double, Char, Boolean/*, AnyRef*/) T](
+    @transient var partition: PartitionID,
+    var data: T)
+  extends Product2[PartitionID, T] with Serializable {
+
+  override def _1 = partition
+
+  override def _2 = data
+
+  override def canEqual(that: Any): Boolean = that.isInstanceOf[MessageToPartition[_]]
+}
+
+
+class VertexBroadcastMsgRDDFunctions[T: ClassTag](self: RDD[VertexBroadcastMsg[T]]) {
+  def partitionBy(partitioner: Partitioner): RDD[VertexBroadcastMsg[T]] = {
+    val rdd = new ShuffledRDD[PartitionID, (VertexID, T), VertexBroadcastMsg[T]](self, partitioner)
+
+    // Set a custom serializer if the data is of int or double type.
+    if (classTag[T] == ClassTag.Int) {
+      rdd.setSerializer(classOf[IntVertexBroadcastMsgSerializer].getName)
+    } else if (classTag[T] == ClassTag.Long) {
+      rdd.setSerializer(classOf[LongVertexBroadcastMsgSerializer].getName)
+    } else if (classTag[T] == ClassTag.Double) {
+      rdd.setSerializer(classOf[DoubleVertexBroadcastMsgSerializer].getName)
+    }
+    rdd
+  }
+}
+
+
+class MsgRDDFunctions[T: ClassTag](self: RDD[MessageToPartition[T]]) {
+
+  /**
+   * Return a copy of the RDD partitioned using the specified partitioner.
+   */
+  def partitionBy(partitioner: Partitioner): RDD[MessageToPartition[T]] = {
+    new ShuffledRDD[PartitionID, T, MessageToPartition[T]](self, partitioner)
+  }
+
+}
+
+
+object MsgRDDFunctions {
+  implicit def rdd2PartitionRDDFunctions[T: ClassTag](rdd: RDD[MessageToPartition[T]]) = {
+    new MsgRDDFunctions(rdd)
+  }
+
+  implicit def rdd2vertexMessageRDDFunctions[T: ClassTag](rdd: RDD[VertexBroadcastMsg[T]]) = {
+    new VertexBroadcastMsgRDDFunctions(rdd)
+  }
+
+  def partitionForAggregation[T: ClassTag](msgs: RDD[(VertexID, T)], partitioner: Partitioner) = {
+    val rdd = new ShuffledRDD[VertexID, T, (VertexID, T)](msgs, partitioner)
+
+    // Set a custom serializer if the data is of int or double type.
+    if (classTag[T] == ClassTag.Int) {
+      rdd.setSerializer(classOf[IntAggMsgSerializer].getName)
+    } else if (classTag[T] == ClassTag.Long) {
+      rdd.setSerializer(classOf[LongAggMsgSerializer].getName)
+    } else if (classTag[T] == ClassTag.Double) {
+      rdd.setSerializer(classOf[DoubleAggMsgSerializer].getName)
+    }
+    rdd
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
new file mode 100644
index 0000000000..63180bc3af
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala
@@ -0,0 +1,182 @@
+package org.apache.spark.graphx.impl
+
+import scala.reflect.{classTag, ClassTag}
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.collection.{PrimitiveVector, OpenHashSet}
+
+import org.apache.spark.graphx._
+
+/**
+ * A view of the vertices after they are shipped to the join sites specified in
+ * `vertexPlacement`. The resulting view is co-partitioned with `edges`. If `prevViewOpt` is
+ * specified, `updatedVerts` are treated as incremental updates to the previous view. Otherwise, a
+ * fresh view is created.
+ *
+ * The view is always cached (i.e., once it is created, it remains materialized). This avoids
+ * constructing it twice if the user calls graph.triplets followed by graph.mapReduceTriplets, for
+ * example.
+ */
+private[impl]
+class ReplicatedVertexView[VD: ClassTag](
+    updatedVerts: VertexRDD[VD],
+    edges: EdgeRDD[_],
+    routingTable: RoutingTable,
+    prevViewOpt: Option[ReplicatedVertexView[VD]] = None) {
+
+  /**
+   * Within each edge partition, create a local map from vid to an index into the attribute
+   * array. Each map contains a superset of the vertices that it will receive, because it stores
+   * vids from both the source and destination of edges. It must always include both source and
+   * destination vids because some operations, such as GraphImpl.mapReduceTriplets, rely on this.
+   */
+  private val localVertexIDMap: RDD[(Int, VertexIdToIndexMap)] = prevViewOpt match {
+    case Some(prevView) =>
+      prevView.localVertexIDMap
+    case None =>
+      edges.partitionsRDD.mapPartitions(_.map {
+        case (pid, epart) =>
+          val vidToIndex = new VertexIdToIndexMap
+          epart.foreach { e =>
+            vidToIndex.add(e.srcId)
+            vidToIndex.add(e.dstId)
+          }
+          (pid, vidToIndex)
+      }, preservesPartitioning = true).cache().setName("ReplicatedVertexView localVertexIDMap")
+  }
+
+  private lazy val bothAttrs: RDD[(PartitionID, VertexPartition[VD])] = create(true, true)
+  private lazy val srcAttrOnly: RDD[(PartitionID, VertexPartition[VD])] = create(true, false)
+  private lazy val dstAttrOnly: RDD[(PartitionID, VertexPartition[VD])] = create(false, true)
+  private lazy val noAttrs: RDD[(PartitionID, VertexPartition[VD])] = create(false, false)
+
+  def get(includeSrc: Boolean, includeDst: Boolean): RDD[(PartitionID, VertexPartition[VD])] = {
+    (includeSrc, includeDst) match {
+      case (true, true) => bothAttrs
+      case (true, false) => srcAttrOnly
+      case (false, true) => dstAttrOnly
+      case (false, false) => noAttrs
+    }
+  }
+
+  def get(
+      includeSrc: Boolean,
+      includeDst: Boolean,
+      actives: VertexRDD[_]): RDD[(PartitionID, VertexPartition[VD])] = {
+    // Ship active sets to edge partitions using vertexPlacement, but ignoring includeSrc and
+    // includeDst. These flags govern attribute shipping, but the activeness of a vertex must be
+    // shipped to all edges mentioning that vertex, regardless of whether the vertex attribute is
+    // also shipped there.
+    val shippedActives = routingTable.get(true, true)
+      .zipPartitions(actives.partitionsRDD)(ReplicatedVertexView.buildActiveBuffer(_, _))
+      .partitionBy(edges.partitioner.get)
+    // Update the view with shippedActives, setting activeness flags in the resulting
+    // VertexPartitions
+    get(includeSrc, includeDst).zipPartitions(shippedActives) { (viewIter, shippedActivesIter) =>
+      val (pid, vPart) = viewIter.next()
+      val newPart = vPart.replaceActives(shippedActivesIter.flatMap(_._2.iterator))
+      Iterator((pid, newPart))
+    }
+  }
+
+  private def create(includeSrc: Boolean, includeDst: Boolean)
+    : RDD[(PartitionID, VertexPartition[VD])] = {
+    val vdTag = classTag[VD]
+
+    // Ship vertex attributes to edge partitions according to vertexPlacement
+    val verts = updatedVerts.partitionsRDD
+    val shippedVerts = routingTable.get(includeSrc, includeDst)
+      .zipPartitions(verts)(ReplicatedVertexView.buildBuffer(_, _)(vdTag))
+      .partitionBy(edges.partitioner.get)
+    // TODO: Consider using a specialized shuffler.
+
+    prevViewOpt match {
+      case Some(prevView) =>
+        // Update prevView with shippedVerts, setting staleness flags in the resulting
+        // VertexPartitions
+        prevView.get(includeSrc, includeDst).zipPartitions(shippedVerts) {
+          (prevViewIter, shippedVertsIter) =>
+            val (pid, prevVPart) = prevViewIter.next()
+            val newVPart = prevVPart.innerJoinKeepLeft(shippedVertsIter.flatMap(_._2.iterator))
+            Iterator((pid, newVPart))
+        }.cache().setName("ReplicatedVertexView delta %s %s".format(includeSrc, includeDst))
+
+      case None =>
+        // Within each edge partition, place the shipped vertex attributes into the correct
+        // locations specified in localVertexIDMap
+        localVertexIDMap.zipPartitions(shippedVerts) { (mapIter, shippedVertsIter) =>
+          val (pid, vidToIndex) = mapIter.next()
+          assert(!mapIter.hasNext)
+          // Populate the vertex array using the vidToIndex map
+          val vertexArray = vdTag.newArray(vidToIndex.capacity)
+          for ((_, block) <- shippedVertsIter) {
+            for (i <- 0 until block.vids.size) {
+              val vid = block.vids(i)
+              val attr = block.attrs(i)
+              val ind = vidToIndex.getPos(vid)
+              vertexArray(ind) = attr
+            }
+          }
+          val newVPart = new VertexPartition(
+            vidToIndex, vertexArray, vidToIndex.getBitSet)(vdTag)
+          Iterator((pid, newVPart))
+        }.cache().setName("ReplicatedVertexView %s %s".format(includeSrc, includeDst))
+    }
+  }
+}
+
+object ReplicatedVertexView {
+  protected def buildBuffer[VD: ClassTag](
+      pid2vidIter: Iterator[Array[Array[VertexID]]],
+      vertexPartIter: Iterator[VertexPartition[VD]]) = {
+    val pid2vid: Array[Array[VertexID]] = pid2vidIter.next()
+    val vertexPart: VertexPartition[VD] = vertexPartIter.next()
+
+    Iterator.tabulate(pid2vid.size) { pid =>
+      val vidsCandidate = pid2vid(pid)
+      val size = vidsCandidate.length
+      val vids = new PrimitiveVector[VertexID](pid2vid(pid).size)
+      val attrs = new PrimitiveVector[VD](pid2vid(pid).size)
+      var i = 0
+      while (i < size) {
+        val vid = vidsCandidate(i)
+        if (vertexPart.isDefined(vid)) {
+          vids += vid
+          attrs += vertexPart(vid)
+        }
+        i += 1
+      }
+      (pid, new VertexAttributeBlock(vids.trim().array, attrs.trim().array))
+    }
+  }
+
+  protected def buildActiveBuffer(
+      pid2vidIter: Iterator[Array[Array[VertexID]]],
+      activePartIter: Iterator[VertexPartition[_]])
+    : Iterator[(Int, Array[VertexID])] = {
+    val pid2vid: Array[Array[VertexID]] = pid2vidIter.next()
+    val activePart: VertexPartition[_] = activePartIter.next()
+
+    Iterator.tabulate(pid2vid.size) { pid =>
+      val vidsCandidate = pid2vid(pid)
+      val size = vidsCandidate.length
+      val actives = new PrimitiveVector[VertexID](vidsCandidate.size)
+      var i = 0
+      while (i < size) {
+        val vid = vidsCandidate(i)
+        if (activePart.isDefined(vid)) {
+          actives += vid
+        }
+        i += 1
+      }
+      (pid, actives.trim().array)
+    }
+  }
+}
+
+class VertexAttributeBlock[VD: ClassTag](val vids: Array[VertexID], val attrs: Array[VD])
+  extends Serializable {
+  def iterator: Iterator[(VertexID, VD)] =
+    (0 until vids.size).iterator.map { i => (vids(i), attrs(i)) }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala
new file mode 100644
index 0000000000..3bd8b24133
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTable.scala
@@ -0,0 +1,64 @@
+package org.apache.spark.graphx.impl
+
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.collection.PrimitiveVector
+
+/**
+ * Stores the locations of edge-partition join sites for each vertex attribute; that is, the routing
+ * information for shipping vertex attributes to edge partitions. This is always cached because it
+ * may be used multiple times in ReplicatedVertexView -- once to ship the vertex attributes and
+ * (possibly) once to ship the active-set information.
+ */
+class RoutingTable(edges: EdgeRDD[_], vertices: VertexRDD[_]) {
+
+  val bothAttrs: RDD[Array[Array[VertexID]]] = createPid2Vid(true, true)
+  val srcAttrOnly: RDD[Array[Array[VertexID]]] = createPid2Vid(true, false)
+  val dstAttrOnly: RDD[Array[Array[VertexID]]] = createPid2Vid(false, true)
+  val noAttrs: RDD[Array[Array[VertexID]]] = createPid2Vid(false, false)
+
+  def get(includeSrcAttr: Boolean, includeDstAttr: Boolean): RDD[Array[Array[VertexID]]] =
+    (includeSrcAttr, includeDstAttr) match {
+      case (true, true) => bothAttrs
+      case (true, false) => srcAttrOnly
+      case (false, true) => dstAttrOnly
+      case (false, false) => noAttrs
+    }
+
+  private def createPid2Vid(
+      includeSrcAttr: Boolean, includeDstAttr: Boolean): RDD[Array[Array[VertexID]]] = {
+    // Determine which vertices each edge partition needs by creating a mapping from vid to pid.
+    val vid2pid: RDD[(VertexID, PartitionID)] = edges.partitionsRDD.mapPartitions { iter =>
+      val (pid: PartitionID, edgePartition: EdgePartition[_]) = iter.next()
+      val numEdges = edgePartition.size
+      val vSet = new VertexSet
+      if (includeSrcAttr) {  // Add src vertices to the set.
+        var i = 0
+        while (i < numEdges) {
+          vSet.add(edgePartition.srcIds(i))
+          i += 1
+        }
+      }
+      if (includeDstAttr) {  // Add dst vertices to the set.
+      var i = 0
+        while (i < numEdges) {
+          vSet.add(edgePartition.dstIds(i))
+          i += 1
+        }
+      }
+      vSet.iterator.map { vid => (vid, pid) }
+    }
+
+    val numPartitions = vertices.partitions.size
+    vid2pid.partitionBy(vertices.partitioner.get).mapPartitions { iter =>
+      val pid2vid = Array.fill(numPartitions)(new PrimitiveVector[VertexID])
+      for ((vid, pid) <- iter) {
+        pid2vid(pid) += vid
+      }
+
+      Iterator(pid2vid.map(_.trim().array))
+    }.cache().setName("RoutingTable %s %s".format(includeSrcAttr, includeDstAttr))
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
new file mode 100644
index 0000000000..1c3c87f08d
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/Serializers.scala
@@ -0,0 +1,386 @@
+package org.apache.spark.graphx.impl
+
+import java.io.{EOFException, InputStream, OutputStream}
+import java.nio.ByteBuffer
+
+import org.apache.spark.SparkConf
+import org.apache.spark.graphx._
+import org.apache.spark.serializer._
+
+class VertexIDMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[(VertexID, _)]
+        writeVarLong(msg._1, optimizePositive = false)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      override def readObject[T](): T = {
+        (readVarLong(optimizePositive = false), null).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+/** A special shuffle serializer for VertexBroadcastMessage[Int]. */
+class IntVertexBroadcastMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[VertexBroadcastMsg[Int]]
+        writeVarLong(msg.vid, optimizePositive = false)
+        writeInt(msg.data)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      override def readObject[T](): T = {
+        val a = readVarLong(optimizePositive = false)
+        val b = readInt()
+        new VertexBroadcastMsg[Int](0, a, b).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+/** A special shuffle serializer for VertexBroadcastMessage[Long]. */
+class LongVertexBroadcastMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[VertexBroadcastMsg[Long]]
+        writeVarLong(msg.vid, optimizePositive = false)
+        writeLong(msg.data)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      override def readObject[T](): T = {
+        val a = readVarLong(optimizePositive = false)
+        val b = readLong()
+        new VertexBroadcastMsg[Long](0, a, b).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+/** A special shuffle serializer for VertexBroadcastMessage[Double]. */
+class DoubleVertexBroadcastMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[VertexBroadcastMsg[Double]]
+        writeVarLong(msg.vid, optimizePositive = false)
+        writeDouble(msg.data)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      def readObject[T](): T = {
+        val a = readVarLong(optimizePositive = false)
+        val b = readDouble()
+        new VertexBroadcastMsg[Double](0, a, b).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+/** A special shuffle serializer for AggregationMessage[Int]. */
+class IntAggMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[(VertexID, Int)]
+        writeVarLong(msg._1, optimizePositive = false)
+        writeUnsignedVarInt(msg._2)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      override def readObject[T](): T = {
+        val a = readVarLong(optimizePositive = false)
+        val b = readUnsignedVarInt()
+        (a, b).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+/** A special shuffle serializer for AggregationMessage[Long]. */
+class LongAggMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[(VertexID, Long)]
+        writeVarLong(msg._1, optimizePositive = false)
+        writeVarLong(msg._2, optimizePositive = true)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      override def readObject[T](): T = {
+        val a = readVarLong(optimizePositive = false)
+        val b = readVarLong(optimizePositive = true)
+        (a, b).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+/** A special shuffle serializer for AggregationMessage[Double]. */
+class DoubleAggMsgSerializer(conf: SparkConf) extends Serializer {
+  override def newInstance(): SerializerInstance = new ShuffleSerializerInstance {
+
+    override def serializeStream(s: OutputStream) = new ShuffleSerializationStream(s) {
+      def writeObject[T](t: T) = {
+        val msg = t.asInstanceOf[(VertexID, Double)]
+        writeVarLong(msg._1, optimizePositive = false)
+        writeDouble(msg._2)
+        this
+      }
+    }
+
+    override def deserializeStream(s: InputStream) = new ShuffleDeserializationStream(s) {
+      def readObject[T](): T = {
+        val a = readVarLong(optimizePositive = false)
+        val b = readDouble()
+        (a, b).asInstanceOf[T]
+      }
+    }
+  }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Helper classes to shorten the implementation of those special serializers.
+////////////////////////////////////////////////////////////////////////////////
+
+abstract class ShuffleSerializationStream(s: OutputStream) extends SerializationStream {
+  // The implementation should override this one.
+  def writeObject[T](t: T): SerializationStream
+
+  def writeInt(v: Int) {
+    s.write(v >> 24)
+    s.write(v >> 16)
+    s.write(v >> 8)
+    s.write(v)
+  }
+
+  def writeUnsignedVarInt(value: Int) {
+    if ((value >>> 7) == 0) {
+      s.write(value.toInt)
+    } else if ((value >>> 14) == 0) {
+      s.write((value & 0x7F) | 0x80)
+      s.write(value >>> 7)
+    } else if ((value >>> 21) == 0) {
+      s.write((value & 0x7F) | 0x80)
+      s.write(value >>> 7 | 0x80)
+      s.write(value >>> 14)
+    } else if ((value >>> 28) == 0) {
+      s.write((value & 0x7F) | 0x80)
+      s.write(value >>> 7 | 0x80)
+      s.write(value >>> 14 | 0x80)
+      s.write(value >>> 21)
+    } else {
+      s.write((value & 0x7F) | 0x80)
+      s.write(value >>> 7 | 0x80)
+      s.write(value >>> 14 | 0x80)
+      s.write(value >>> 21 | 0x80)
+      s.write(value >>> 28)
+    }
+  }
+
+  def writeVarLong(value: Long, optimizePositive: Boolean) {
+    val v = if (!optimizePositive) (value << 1) ^ (value >> 63) else value
+    if ((v >>> 7) == 0) {
+      s.write(v.toInt)
+    } else if ((v >>> 14) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7).toInt)
+    } else if ((v >>> 21) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14).toInt)
+    } else if ((v >>> 28) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14 | 0x80).toInt)
+      s.write((v >>> 21).toInt)
+    } else if ((v >>> 35) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14 | 0x80).toInt)
+      s.write((v >>> 21 | 0x80).toInt)
+      s.write((v >>> 28).toInt)
+    } else if ((v >>> 42) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14 | 0x80).toInt)
+      s.write((v >>> 21 | 0x80).toInt)
+      s.write((v >>> 28 | 0x80).toInt)
+      s.write((v >>> 35).toInt)
+    } else if ((v >>> 49) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14 | 0x80).toInt)
+      s.write((v >>> 21 | 0x80).toInt)
+      s.write((v >>> 28 | 0x80).toInt)
+      s.write((v >>> 35 | 0x80).toInt)
+      s.write((v >>> 42).toInt)
+    } else if ((v >>> 56) == 0) {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14 | 0x80).toInt)
+      s.write((v >>> 21 | 0x80).toInt)
+      s.write((v >>> 28 | 0x80).toInt)
+      s.write((v >>> 35 | 0x80).toInt)
+      s.write((v >>> 42 | 0x80).toInt)
+      s.write((v >>> 49).toInt)
+    } else {
+      s.write(((v & 0x7F) | 0x80).toInt)
+      s.write((v >>> 7 | 0x80).toInt)
+      s.write((v >>> 14 | 0x80).toInt)
+      s.write((v >>> 21 | 0x80).toInt)
+      s.write((v >>> 28 | 0x80).toInt)
+      s.write((v >>> 35 | 0x80).toInt)
+      s.write((v >>> 42 | 0x80).toInt)
+      s.write((v >>> 49 | 0x80).toInt)
+      s.write((v >>> 56).toInt)
+    }
+  }
+
+  def writeLong(v: Long) {
+    s.write((v >>> 56).toInt)
+    s.write((v >>> 48).toInt)
+    s.write((v >>> 40).toInt)
+    s.write((v >>> 32).toInt)
+    s.write((v >>> 24).toInt)
+    s.write((v >>> 16).toInt)
+    s.write((v >>> 8).toInt)
+    s.write(v.toInt)
+  }
+
+  //def writeDouble(v: Double): Unit = writeUnsignedVarLong(java.lang.Double.doubleToLongBits(v))
+  def writeDouble(v: Double): Unit = writeLong(java.lang.Double.doubleToLongBits(v))
+
+  override def flush(): Unit = s.flush()
+
+  override def close(): Unit = s.close()
+}
+
+abstract class ShuffleDeserializationStream(s: InputStream) extends DeserializationStream {
+  // The implementation should override this one.
+  def readObject[T](): T
+
+  def readInt(): Int = {
+    val first = s.read()
+    if (first < 0) throw new EOFException
+    (first & 0xFF) << 24 | (s.read() & 0xFF) << 16 | (s.read() & 0xFF) << 8 | (s.read() & 0xFF)
+  }
+
+  def readUnsignedVarInt(): Int = {
+    var value: Int = 0
+    var i: Int = 0
+    def readOrThrow(): Int = {
+      val in = s.read()
+      if (in < 0) throw new EOFException
+      in & 0xFF
+    }
+    var b: Int = readOrThrow()
+    while ((b & 0x80) != 0) {
+      value |= (b & 0x7F) << i
+      i += 7
+      if (i > 35) throw new IllegalArgumentException("Variable length quantity is too long")
+      b = readOrThrow()
+    }
+    value | (b << i)
+  }
+
+  def readVarLong(optimizePositive: Boolean): Long = {
+    def readOrThrow(): Int = {
+      val in = s.read()
+      if (in < 0) throw new EOFException
+      in & 0xFF
+    }
+    var b = readOrThrow()
+    var ret: Long = b & 0x7F
+    if ((b & 0x80) != 0) {
+      b = readOrThrow()
+      ret |= (b & 0x7F) << 7
+      if ((b & 0x80) != 0) {
+        b = readOrThrow()
+        ret |= (b & 0x7F) << 14
+        if ((b & 0x80) != 0) {
+          b = readOrThrow()
+          ret |= (b & 0x7F) << 21
+          if ((b & 0x80) != 0) {
+            b = readOrThrow()
+            ret |= (b & 0x7F).toLong << 28
+            if ((b & 0x80) != 0) {
+              b = readOrThrow()
+              ret |= (b & 0x7F).toLong << 35
+              if ((b & 0x80) != 0) {
+                b = readOrThrow()
+                ret |= (b & 0x7F).toLong << 42
+                if ((b & 0x80) != 0) {
+                  b = readOrThrow()
+                  ret |= (b & 0x7F).toLong << 49
+                  if ((b & 0x80) != 0) {
+                    b = readOrThrow()
+                    ret |= b.toLong << 56
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    if (!optimizePositive) (ret >>> 1) ^ -(ret & 1) else ret
+  }
+
+  def readLong(): Long = {
+    val first = s.read()
+    if (first < 0) throw new EOFException()
+    (first.toLong << 56) |
+      (s.read() & 0xFF).toLong << 48 |
+      (s.read() & 0xFF).toLong << 40 |
+      (s.read() & 0xFF).toLong << 32 |
+      (s.read() & 0xFF).toLong << 24 |
+      (s.read() & 0xFF) << 16 |
+      (s.read() & 0xFF) << 8 |
+      (s.read() & 0xFF)
+  }
+
+  //def readDouble(): Double = java.lang.Double.longBitsToDouble(readUnsignedVarLong())
+  def readDouble(): Double = java.lang.Double.longBitsToDouble(readLong())
+
+  override def close(): Unit = s.close()
+}
+
+sealed trait ShuffleSerializerInstance extends SerializerInstance {
+
+  override def serialize[T](t: T): ByteBuffer = throw new UnsupportedOperationException
+
+  override def deserialize[T](bytes: ByteBuffer): T = throw new UnsupportedOperationException
+
+  override def deserialize[T](bytes: ByteBuffer, loader: ClassLoader): T =
+    throw new UnsupportedOperationException
+
+  // The implementation should override the following two.
+  override def serializeStream(s: OutputStream): SerializationStream
+  override def deserializeStream(s: InputStream): DeserializationStream
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
new file mode 100644
index 0000000000..7c83497ca9
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala
@@ -0,0 +1,262 @@
+package org.apache.spark.graphx.impl
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.util.collection.{BitSet, PrimitiveKeyOpenHashMap}
+
+import org.apache.spark.Logging
+import org.apache.spark.graphx._
+
+
+private[graphx] object VertexPartition {
+
+  def apply[VD: ClassTag](iter: Iterator[(VertexID, VD)]): VertexPartition[VD] = {
+    val map = new PrimitiveKeyOpenHashMap[VertexID, VD]
+    iter.foreach { case (k, v) =>
+      map(k) = v
+    }
+    new VertexPartition(map.keySet, map._values, map.keySet.getBitSet)
+  }
+
+  def apply[VD: ClassTag](iter: Iterator[(VertexID, VD)], mergeFunc: (VD, VD) => VD)
+    : VertexPartition[VD] =
+  {
+    val map = new PrimitiveKeyOpenHashMap[VertexID, VD]
+    iter.foreach { case (k, v) =>
+      map.setMerge(k, v, mergeFunc)
+    }
+    new VertexPartition(map.keySet, map._values, map.keySet.getBitSet)
+  }
+}
+
+
+private[graphx]
+class VertexPartition[@specialized(Long, Int, Double) VD: ClassTag](
+    val index: VertexIdToIndexMap,
+    val values: Array[VD],
+    val mask: BitSet,
+    /** A set of vids of active vertices. May contain vids not in index due to join rewrite. */
+    private val activeSet: Option[VertexSet] = None)
+  extends Logging {
+
+  val capacity: Int = index.capacity
+
+  def size: Int = mask.cardinality()
+
+  /** Return the vertex attribute for the given vertex ID. */
+  def apply(vid: VertexID): VD = values(index.getPos(vid))
+
+  def isDefined(vid: VertexID): Boolean = {
+    val pos = index.getPos(vid)
+    pos >= 0 && mask.get(pos)
+  }
+
+  /** Look up vid in activeSet, throwing an exception if it is None. */
+  def isActive(vid: VertexID): Boolean = {
+    activeSet.get.contains(vid)
+  }
+
+  /** The number of active vertices, if any exist. */
+  def numActives: Option[Int] = activeSet.map(_.size)
+
+  /**
+   * Pass each vertex attribute along with the vertex id through a map
+   * function and retain the original RDD's partitioning and index.
+   *
+   * @tparam VD2 the type returned by the map function
+   *
+   * @param f the function applied to each vertex id and vertex
+   * attribute in the RDD
+   *
+   * @return a new VertexPartition with values obtained by applying `f` to
+   * each of the entries in the original VertexRDD.  The resulting
+   * VertexPartition retains the same index.
+   */
+  def map[VD2: ClassTag](f: (VertexID, VD) => VD2): VertexPartition[VD2] = {
+    // Construct a view of the map transformation
+    val newValues = new Array[VD2](capacity)
+    var i = mask.nextSetBit(0)
+    while (i >= 0) {
+      newValues(i) = f(index.getValue(i), values(i))
+      i = mask.nextSetBit(i + 1)
+    }
+    new VertexPartition[VD2](index, newValues, mask)
+  }
+
+  /**
+   * Restrict the vertex set to the set of vertices satisfying the given predicate.
+   *
+   * @param pred the user defined predicate
+   *
+   * @note The vertex set preserves the original index structure which means that the returned
+   *       RDD can be easily joined with the original vertex-set. Furthermore, the filter only
+   *       modifies the bitmap index and so no new values are allocated.
+   */
+  def filter(pred: (VertexID, VD) => Boolean): VertexPartition[VD] = {
+    // Allocate the array to store the results into
+    val newMask = new BitSet(capacity)
+    // Iterate over the active bits in the old mask and evaluate the predicate
+    var i = mask.nextSetBit(0)
+    while (i >= 0) {
+      if (pred(index.getValue(i), values(i))) {
+        newMask.set(i)
+      }
+      i = mask.nextSetBit(i + 1)
+    }
+    new VertexPartition(index, values, newMask)
+  }
+
+  /**
+   * Hides vertices that are the same between this and other. For vertices that are different, keeps
+   * the values from `other`. The indices of `this` and `other` must be the same.
+   */
+  def diff(other: VertexPartition[VD]): VertexPartition[VD] = {
+    if (index != other.index) {
+      logWarning("Diffing two VertexPartitions with different indexes is slow.")
+      diff(createUsingIndex(other.iterator))
+    } else {
+      val newMask = mask & other.mask
+      var i = newMask.nextSetBit(0)
+      while (i >= 0) {
+        if (values(i) == other.values(i)) {
+          newMask.unset(i)
+        }
+        i = newMask.nextSetBit(i + 1)
+      }
+      new VertexPartition(index, other.values, newMask)
+    }
+  }
+
+  /** Left outer join another VertexPartition. */
+  def leftJoin[VD2: ClassTag, VD3: ClassTag]
+      (other: VertexPartition[VD2])
+      (f: (VertexID, VD, Option[VD2]) => VD3): VertexPartition[VD3] = {
+    if (index != other.index) {
+      logWarning("Joining two VertexPartitions with different indexes is slow.")
+      leftJoin(createUsingIndex(other.iterator))(f)
+    } else {
+      val newValues = new Array[VD3](capacity)
+
+      var i = mask.nextSetBit(0)
+      while (i >= 0) {
+        val otherV: Option[VD2] = if (other.mask.get(i)) Some(other.values(i)) else None
+        newValues(i) = f(index.getValue(i), values(i), otherV)
+        i = mask.nextSetBit(i + 1)
+      }
+      new VertexPartition(index, newValues, mask)
+    }
+  }
+
+  /** Left outer join another iterator of messages. */
+  def leftJoin[VD2: ClassTag, VD3: ClassTag]
+      (other: Iterator[(VertexID, VD2)])
+      (f: (VertexID, VD, Option[VD2]) => VD3): VertexPartition[VD3] = {
+    leftJoin(createUsingIndex(other))(f)
+  }
+
+  /** Inner join another VertexPartition. */
+  def innerJoin[U: ClassTag, VD2: ClassTag](other: VertexPartition[U])
+      (f: (VertexID, VD, U) => VD2): VertexPartition[VD2] = {
+    if (index != other.index) {
+      logWarning("Joining two VertexPartitions with different indexes is slow.")
+      innerJoin(createUsingIndex(other.iterator))(f)
+    } else {
+      val newMask = mask & other.mask
+      val newValues = new Array[VD2](capacity)
+      var i = newMask.nextSetBit(0)
+      while (i >= 0) {
+        newValues(i) = f(index.getValue(i), values(i), other.values(i))
+        i = newMask.nextSetBit(i + 1)
+      }
+      new VertexPartition(index, newValues, newMask)
+    }
+  }
+
+  /**
+   * Inner join an iterator of messages.
+   */
+  def innerJoin[U: ClassTag, VD2: ClassTag]
+      (iter: Iterator[Product2[VertexID, U]])
+      (f: (VertexID, VD, U) => VD2): VertexPartition[VD2] = {
+    innerJoin(createUsingIndex(iter))(f)
+  }
+
+  /**
+   * Similar effect as aggregateUsingIndex((a, b) => a)
+   */
+  def createUsingIndex[VD2: ClassTag](iter: Iterator[Product2[VertexID, VD2]])
+    : VertexPartition[VD2] = {
+    val newMask = new BitSet(capacity)
+    val newValues = new Array[VD2](capacity)
+    iter.foreach { case (vid, vdata) =>
+      val pos = index.getPos(vid)
+      if (pos >= 0) {
+        newMask.set(pos)
+        newValues(pos) = vdata
+      }
+    }
+    new VertexPartition[VD2](index, newValues, newMask)
+  }
+
+  /**
+   * Similar to innerJoin, but vertices from the left side that don't appear in iter will remain in
+   * the partition, hidden by the bitmask.
+   */
+  def innerJoinKeepLeft(iter: Iterator[Product2[VertexID, VD]]): VertexPartition[VD] = {
+    val newMask = new BitSet(capacity)
+    val newValues = new Array[VD](capacity)
+    System.arraycopy(values, 0, newValues, 0, newValues.length)
+    iter.foreach { case (vid, vdata) =>
+      val pos = index.getPos(vid)
+      if (pos >= 0) {
+        newMask.set(pos)
+        newValues(pos) = vdata
+      }
+    }
+    new VertexPartition(index, newValues, newMask)
+  }
+
+  def aggregateUsingIndex[VD2: ClassTag](
+      iter: Iterator[Product2[VertexID, VD2]],
+      reduceFunc: (VD2, VD2) => VD2): VertexPartition[VD2] = {
+    val newMask = new BitSet(capacity)
+    val newValues = new Array[VD2](capacity)
+    iter.foreach { product =>
+      val vid = product._1
+      val vdata = product._2
+      val pos = index.getPos(vid)
+      if (pos >= 0) {
+        if (newMask.get(pos)) {
+          newValues(pos) = reduceFunc(newValues(pos), vdata)
+        } else { // otherwise just store the new value
+          newMask.set(pos)
+          newValues(pos) = vdata
+        }
+      }
+    }
+    new VertexPartition[VD2](index, newValues, newMask)
+  }
+
+  def replaceActives(iter: Iterator[VertexID]): VertexPartition[VD] = {
+    val newActiveSet = new VertexSet
+    iter.foreach(newActiveSet.add(_))
+    new VertexPartition(index, values, mask, Some(newActiveSet))
+  }
+
+  /**
+   * Construct a new VertexPartition whose index contains only the vertices in the mask.
+   */
+  def reindex(): VertexPartition[VD] = {
+    val hashMap = new PrimitiveKeyOpenHashMap[VertexID, VD]
+    val arbitraryMerge = (a: VD, b: VD) => a
+    for ((k, v) <- this.iterator) {
+      hashMap.setMerge(k, v, arbitraryMerge)
+    }
+    new VertexPartition(hashMap.keySet, hashMap._values, hashMap.keySet.getBitSet)
+  }
+
+  def iterator: Iterator[(VertexID, VD)] =
+    mask.iterator.map(ind => (index.getValue(ind), values(ind)))
+
+  def vidIterator: Iterator[VertexID] = mask.iterator.map(ind => index.getValue(ind))
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
new file mode 100644
index 0000000000..96f0d91c9b
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
@@ -0,0 +1,22 @@
+package org.apache.spark
+
+import org.apache.spark.util.collection.OpenHashSet
+
+
+package object graphx {
+
+  type VertexID = Long
+
+  // TODO: Consider using Char.
+  type PartitionID = Int
+
+  type VertexSet = OpenHashSet[VertexID]
+
+  //  type VertexIdToIndexMap = it.unimi.dsi.fastutil.longs.Long2IntOpenHashMap
+  type VertexIdToIndexMap = OpenHashSet[VertexID]
+
+  /**
+   * Return the default null-like value for a data type T.
+   */
+  def nullValue[T] = null.asInstanceOf[T]
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/perf/BagelTest.scala b/graphx/src/main/scala/org/apache/spark/graphx/perf/BagelTest.scala
new file mode 100644
index 0000000000..81332e0800
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/perf/BagelTest.scala
@@ -0,0 +1,76 @@
+///// This file creates circular dependencies between examples bagle and graph
+
+// package org.apache.spark.graphx.perf
+
+// import org.apache.spark._
+// import org.apache.spark.SparkContext._
+// import org.apache.spark.bagel.Bagel
+
+// import org.apache.spark.examples.bagel
+// //import org.apache.spark.bagel.examples._
+// import org.apache.spark.graphx._
+
+
+// object BagelTest {
+
+//   def main(args: Array[String]) {
+//     val host = args(0)
+//     val taskType = args(1)
+//     val fname = args(2)
+//     val options =  args.drop(3).map { arg =>
+//       arg.dropWhile(_ == '-').split('=') match {
+//         case Array(opt, v) => (opt -> v)
+//         case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
+//       }
+//     }
+
+//     System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+//     //System.setProperty("spark.shuffle.compress", "false")
+//     System.setProperty("spark.kryo.registrator", "org.apache.spark.bagel.examples.PRKryoRegistrator")
+
+//     var numIter = Int.MaxValue
+//     var isDynamic = false
+//     var tol:Float = 0.001F
+//     var outFname = ""
+//     var numVPart = 4
+//     var numEPart = 4
+
+//     options.foreach{
+//       case ("numIter", v) => numIter = v.toInt
+//       case ("dynamic", v) => isDynamic = v.toBoolean
+//       case ("tol", v) => tol = v.toFloat
+//       case ("output", v) => outFname = v
+//       case ("numVPart", v) => numVPart = v.toInt
+//       case ("numEPart", v) => numEPart = v.toInt
+//       case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+//     }
+
+//     val sc = new SparkContext(host, "PageRank(" + fname + ")")
+//     val g = GraphLoader.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache()
+//     val startTime = System.currentTimeMillis
+
+//     val numVertices = g.vertices.count()
+
+//     val vertices = g.collectNeighborIds(EdgeDirection.Out).map { case (vid, neighbors) =>
+//       (vid.toString, new PRVertex(1.0, neighbors.map(_.toString)))
+//     }
+
+//     // Do the computation
+//     val epsilon = 0.01 / numVertices
+//     val messages = sc.parallelize(Array[(String, PRMessage)]())
+//     val utils = new PageRankUtils
+//     val result =
+//         Bagel.run(
+//           sc, vertices, messages, combiner = new PRCombiner(),
+//           numPartitions = numVPart)(
+//           utils.computeWithCombiner(numVertices, epsilon, numIter))
+
+//     println("Total rank: " + result.map{ case (id, r) => r.value }.reduce(_+_) )
+//     if (!outFname.isEmpty) {
+//       println("Saving pageranks of pages to " + outFname)
+//       result.map{ case (id, r) => id + "\t" + r.value }.saveAsTextFile(outFname)
+//     }
+//     println("Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
+//     sc.stop()
+//   }
+// }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/perf/SparkTest.scala b/graphx/src/main/scala/org/apache/spark/graphx/perf/SparkTest.scala
new file mode 100644
index 0000000000..24262640ab
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/perf/SparkTest.scala
@@ -0,0 +1,75 @@
+///// This file creates circular dependencies between examples bagle and graph
+
+
+// package org.apache.spark.graphx.perf
+
+// import org.apache.spark._
+// import org.apache.spark.SparkContext._
+// import org.apache.spark.bagel.Bagel
+// import org.apache.spark.bagel.examples._
+// import org.apache.spark.graphx._
+
+
+// object SparkTest {
+
+//   def main(args: Array[String]) {
+//     val host = args(0)
+//     val taskType = args(1)
+//     val fname = args(2)
+//     val options =  args.drop(3).map { arg =>
+//       arg.dropWhile(_ == '-').split('=') match {
+//         case Array(opt, v) => (opt -> v)
+//         case _ => throw new IllegalArgumentException("Invalid argument: " + arg)
+//       }
+//     }
+
+//     System.setProperty("spark.serializer", "org.apache.spark.KryoSerializer")
+//     //System.setProperty("spark.shuffle.compress", "false")
+//     System.setProperty("spark.kryo.registrator", "spark.bagel.examples.PRKryoRegistrator")
+
+//     var numIter = Int.MaxValue
+//     var isDynamic = false
+//     var tol:Float = 0.001F
+//     var outFname = ""
+//     var numVPart = 4
+//     var numEPart = 4
+
+//     options.foreach{
+//       case ("numIter", v) => numIter = v.toInt
+//       case ("dynamic", v) => isDynamic = v.toBoolean
+//       case ("tol", v) => tol = v.toFloat
+//       case ("output", v) => outFname = v
+//       case ("numVPart", v) => numVPart = v.toInt
+//       case ("numEPart", v) => numEPart = v.toInt
+//       case (opt, _) => throw new IllegalArgumentException("Invalid option: " + opt)
+//     }
+
+//     val sc = new SparkContext(host, "PageRank(" + fname + ")")
+//     val g = GraphLoader.textFile(sc, fname, a => 1.0F).withPartitioner(numVPart, numEPart).cache()
+//     val startTime = System.currentTimeMillis
+
+//     val numVertices = g.vertices.count()
+
+//     val vertices = g.collectNeighborIds(EdgeDirection.Out).map { case (vid, neighbors) =>
+//       (vid.toString, new PRVertex(1.0, neighbors.map(_.toString)))
+//     }
+
+//     // Do the computation
+//     val epsilon = 0.01 / numVertices
+//     val messages = sc.parallelize(Array[(String, PRMessage)]())
+//     val utils = new PageRankUtils
+//     val result =
+//         Bagel.run(
+//           sc, vertices, messages, combiner = new PRCombiner(),
+//           numPartitions = numVPart)(
+//           utils.computeWithCombiner(numVertices, epsilon, numIter))
+
+//     println("Total rank: " + result.map{ case (id, r) => r.value }.reduce(_+_) )
+//     if (!outFname.isEmpty) {
+//       println("Saving pageranks of pages to " + outFname)
+//       result.map{ case (id, r) => id + "\t" + r.value }.saveAsTextFile(outFname)
+//     }
+//     println("Runtime:    " + ((System.currentTimeMillis - startTime)/1000.0) + " seconds")
+//     sc.stop()
+//   }
+// }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
new file mode 100644
index 0000000000..ec8d534333
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
@@ -0,0 +1,114 @@
+package org.apache.spark.graphx.util
+
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
+
+import scala.collection.mutable.HashSet
+
+import org.apache.spark.util.Utils
+
+import org.objectweb.asm.{ClassReader, ClassVisitor, MethodVisitor}
+import org.objectweb.asm.Opcodes._
+
+
+
+private[spark] object BytecodeUtils {
+
+  /**
+   * Test whether the given closure invokes the specified method in the specified class.
+   */
+  def invokedMethod(closure: AnyRef, targetClass: Class[_], targetMethod: String): Boolean = {
+    if (_invokedMethod(closure.getClass, "apply", targetClass, targetMethod)) {
+      true
+    } else {
+      // look at closures enclosed in this closure
+      for (f <- closure.getClass.getDeclaredFields
+           if f.getType.getName.startsWith("scala.Function")) {
+        f.setAccessible(true)
+        if (invokedMethod(f.get(closure), targetClass, targetMethod)) {
+          return true
+        }
+      }
+      return false
+    }
+  }
+
+  private def _invokedMethod(cls: Class[_], method: String,
+      targetClass: Class[_], targetMethod: String): Boolean = {
+
+    val seen = new HashSet[(Class[_], String)]
+    var stack = List[(Class[_], String)]((cls, method))
+
+    while (stack.nonEmpty) {
+      val (c, m) = stack.head
+      stack = stack.tail
+      seen.add((c, m))
+      val finder = new MethodInvocationFinder(c.getName, m)
+      getClassReader(c).accept(finder, 0)
+      for (classMethod <- finder.methodsInvoked) {
+        //println(classMethod)
+        if (classMethod._1 == targetClass && classMethod._2 == targetMethod) {
+          return true
+        } else if (!seen.contains(classMethod)) {
+          stack = classMethod :: stack
+        }
+      }
+    }
+    return false
+  }
+
+  /**
+   * Get an ASM class reader for a given class from the JAR that loaded it.
+   */
+  private def getClassReader(cls: Class[_]): ClassReader = {
+    // Copy data over, before delegating to ClassReader - else we can run out of open file handles.
+    val className = cls.getName.replaceFirst("^.*\\.", "") + ".class"
+    val resourceStream = cls.getResourceAsStream(className)
+    // todo: Fixme - continuing with earlier behavior ...
+    if (resourceStream == null) return new ClassReader(resourceStream)
+
+    val baos = new ByteArrayOutputStream(128)
+    Utils.copyStream(resourceStream, baos, true)
+    new ClassReader(new ByteArrayInputStream(baos.toByteArray))
+  }
+
+  /**
+   * Given the class name, return whether we should look into the class or not. This is used to
+   * skip examing a large quantity of Java or Scala classes that we know for sure wouldn't access
+   * the closures. Note that the class name is expected in ASM style (i.e. use "/" instead of ".").
+   */
+  private def skipClass(className: String): Boolean = {
+    val c = className
+    c.startsWith("java/") || c.startsWith("scala/") || c.startsWith("javax/")
+  }
+
+  /**
+   * Find the set of methods invoked by the specified method in the specified class.
+   * For example, after running the visitor,
+   *   MethodInvocationFinder("spark/graph/Foo", "test")
+   * its methodsInvoked variable will contain the set of methods invoked directly by
+   * Foo.test(). Interface invocations are not returned as part of the result set because we cannot
+   * determine the actual metod invoked by inspecting the bytecode.
+   */
+  private class MethodInvocationFinder(className: String, methodName: String)
+    extends ClassVisitor(ASM4) {
+
+    val methodsInvoked = new HashSet[(Class[_], String)]
+
+    override def visitMethod(access: Int, name: String, desc: String,
+                             sig: String, exceptions: Array[String]): MethodVisitor = {
+      if (name == methodName) {
+        new MethodVisitor(ASM4) {
+          override def visitMethodInsn(op: Int, owner: String, name: String, desc: String) {
+            if (op == INVOKEVIRTUAL || op == INVOKESPECIAL || op == INVOKESTATIC) {
+              if (!skipClass(owner)) {
+                methodsInvoked.add((Class.forName(owner.replace("/", ".")), name))
+              }
+            }
+          }
+        }
+      } else {
+        null
+      }
+    }
+  }
+}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
new file mode 100644
index 0000000000..57117241ad
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala
@@ -0,0 +1,282 @@
+package org.apache.spark.graphx.util
+
+import scala.annotation.tailrec
+import scala.math._
+import scala.reflect.ClassTag
+import scala.util._
+
+import org.apache.spark._
+import org.apache.spark.serializer._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.Graph
+import org.apache.spark.graphx.Edge
+import org.apache.spark.graphx.impl.GraphImpl
+
+/**
+ * @todo cleanup and modularize code
+ */
+object GraphGenerators {
+
+  val RMATa = 0.45
+  val RMATb = 0.15
+  val RMATc = 0.15
+  val RMATd = 0.25
+
+  def main(args: Array[String]) {
+
+
+    val serializer = "org.apache.spark.serializer.KryoSerializer"
+    System.setProperty("spark.serializer", serializer)
+    //System.setProperty("spark.shuffle.compress", "false")
+    System.setProperty("spark.kryo.registrator", "spark.graphx.GraphKryoRegistrator")
+    val host = "local[4]"
+    val sc = new SparkContext(host, "Lognormal graph generator")
+
+    val lnGraph = logNormalGraph(sc, 10000)
+
+    val rmat = rmatGraph(sc, 1000, 3000)
+
+    //for (v <- lnGraph.vertices) {
+    //  println(v.id + ":\t" + v.data)
+    //}
+
+    val times = 100000
+    //val nums = (1 to times).flatMap { n => List(sampleLogNormal(4.0, 1.3, times)) }.toList
+    //val avg = nums.sum / nums.length
+    //val sumSquares = nums.foldLeft(0.0) {(total, next) =>
+    //  (total + math.pow((next - avg), 2)) }
+    //val stdev = math.sqrt(sumSquares/(nums.length - 1))
+
+    //println("avg: " + avg + "+-" + stdev)
+
+
+    //for (i <- 1 to 1000) {
+    //  println(sampleLogNormal(4.0, 1.3, 1000))
+    //}
+
+    sc.stop()
+
+  }
+
+
+  // Right now it just generates a bunch of edges where
+  // the edge data is the weight (default 1)
+  def logNormalGraph(sc: SparkContext, numVertices: Int): Graph[Int, Int] = {
+    // based on Pregel settings
+    val mu = 4
+    val sigma = 1.3
+    //val vertsAndEdges = (0 until numVertices).flatMap { src => {
+
+    val vertices: RDD[(VertexID, Int)] = sc.parallelize(0 until numVertices).map{
+      src => (src, sampleLogNormal(mu, sigma, numVertices))
+    }
+
+    val edges = vertices.flatMap{
+      v => generateRandomEdges(v._1.toInt, v._2, numVertices)
+    }
+
+    Graph(vertices, edges, 0)
+    //println("Vertices:")
+    //for (v <- vertices) {
+    //  println(v.id)
+    //}
+
+    //println("Edges")
+    //for (e <- edges) {
+    //  println(e.src, e.dst, e.data)
+    //}
+
+  }
+
+
+  def generateRandomEdges(src: Int, numEdges: Int, maxVertexID: Int): Array[Edge[Int]] = {
+    val rand = new Random()
+    var dsts: Set[Int] = Set()
+    while (dsts.size < numEdges) {
+      val nextDst = rand.nextInt(maxVertexID)
+      if (nextDst != src) {
+        dsts += nextDst
+      }
+    }
+    dsts.map {dst => Edge[Int](src, dst, 1) }.toArray
+  }
+
+
+  /**
+   * Randomly samples from a log normal distribution
+   * whose corresponding normal distribution has the
+   * the given mean and standard deviation. It uses
+   * the formula X = exp(m+s*Z) where m, s are the
+   * mean, standard deviation of the lognormal distribution
+   * and Z~N(0, 1). In this function,
+   * m = e^(mu+sigma^2/2) and
+   * s = sqrt[(e^(sigma^2) - 1)(e^(2*mu+sigma^2))].
+   *
+   * @param mu the mean of the normal distribution
+   * @param sigma the standard deviation of the normal distribution
+   * @param macVal exclusive upper bound on the value of the sample
+   */
+  def sampleLogNormal(mu: Double, sigma: Double, maxVal: Int): Int = {
+    val rand = new Random()
+    val m = math.exp(mu+(sigma*sigma)/2.0)
+    val s = math.sqrt((math.exp(sigma*sigma) - 1) * math.exp(2*mu + sigma*sigma))
+    // Z ~ N(0, 1)
+    var X: Double = maxVal
+
+    while (X >= maxVal) {
+      val Z = rand.nextGaussian()
+      //X = math.exp((m + s*Z))
+      X = math.exp((mu + sigma*Z))
+    }
+    math.round(X.toFloat)
+  }
+
+
+
+  def rmatGraph(sc: SparkContext, requestedNumVertices: Int, numEdges: Int): Graph[Int, Int] = {
+    // let N = requestedNumVertices
+    // the number of vertices is 2^n where n=ceil(log2[N])
+    // This ensures that the 4 quadrants are the same size at all recursion levels
+    val numVertices = math.round(math.pow(2.0, math.ceil(math.log(requestedNumVertices)/math.log(2.0)))).toInt
+    var edges: Set[Edge[Int]] = Set()
+    while (edges.size < numEdges) {
+      if (edges.size % 100 == 0) {
+        println(edges.size + " edges")
+      }
+      edges += addEdge(numVertices)
+
+    }
+    val graph = outDegreeFromEdges(sc.parallelize(edges.toList))
+    graph
+
+  }
+
+  def outDegreeFromEdges[ED: ClassTag](edges: RDD[Edge[ED]]): Graph[Int, ED] = {
+
+    val vertices = edges.flatMap { edge => List((edge.srcId, 1)) }
+      .reduceByKey(_ + _)
+      .map{ case (vid, degree) => (vid, degree) }
+    Graph(vertices, edges, 0)
+  }
+
+  /**
+   * @param numVertices Specifies the total number of vertices in the graph (used to get
+   * the dimensions of the adjacency matrix
+   */
+  def addEdge(numVertices: Int): Edge[Int] = {
+    //val (src, dst) = chooseCell(numVertices/2.0, numVertices/2.0, numVertices/2.0)
+    val v = math.round(numVertices.toFloat/2.0).toInt
+
+    val (src, dst) = chooseCell(v, v, v)
+    Edge[Int](src, dst, 1)
+  }
+
+
+  /**
+   * This method recursively subdivides the the adjacency matrix into quadrants
+   * until it picks a single cell. The naming conventions in this paper match
+   * those of the R-MAT paper. There are a power of 2 number of nodes in the graph.
+   * The adjacency matrix looks like:
+   *
+   *          dst ->
+   * (x,y) ***************  _
+   *       |      |      |  |
+   *       |  a   |  b   |  |
+   *  src  |      |      |  |
+   *   |   ***************  | T
+   *  \|/  |      |      |  |
+   *       |   c  |   d  |  |
+   *       |      |      |  |
+   *       ***************  -
+   *
+   * where this represents the subquadrant of the adj matrix currently being
+   * subdivided. (x,y) represent the upper left hand corner of the subquadrant,
+   * and T represents the side length (guaranteed to be a power of 2).
+   *
+   * After choosing the next level subquadrant, we get the resulting sets
+   * of parameters:
+   *    quad = a, x'=x, y'=y, T'=T/2
+   *    quad = b, x'=x+T/2, y'=y, T'=T/2
+   *    quad = c, x'=x, y'=y+T/2, T'=T/2
+   *    quad = d, x'=x+T/2, y'=y+T/2, T'=T/2
+   *
+   * @param src is the
+   */
+  @tailrec
+  def chooseCell(x: Int, y: Int, t: Int): (Int, Int) = {
+    if (t <= 1)
+      (x,y)
+    else {
+      val newT = math.round(t.toFloat/2.0).toInt
+      pickQuadrant(RMATa, RMATb, RMATc, RMATd) match {
+        case 0 => chooseCell(x, y, newT)
+        case 1 => chooseCell(x+newT, y, newT)
+        case 2 => chooseCell(x, y+newT, newT)
+        case 3 => chooseCell(x+newT, y+newT, newT)
+      }
+    }
+  }
+
+  // TODO(crankshaw) turn result into an enum (or case class for pattern matching}
+  def pickQuadrant(a: Double, b: Double, c: Double, d: Double): Int = {
+    if (a+b+c+d != 1.0) {
+      throw new IllegalArgumentException("R-MAT probability parameters sum to " + (a+b+c+d) + ", should sum to 1.0")
+    }
+    val rand = new Random()
+    val result = rand.nextDouble()
+    result match {
+      case x if x < a => 0 // 0 corresponds to quadrant a
+      case x if (x >= a && x < a+b) => 1 // 1 corresponds to b
+      case x if (x >= a+b && x < a+b+c) => 2 // 2 corresponds to c
+      case _ => 3 // 3 corresponds to d
+    }
+  }
+
+
+
+  /**
+   * Create `rows` by `cols` grid graph with each vertex connected to its
+   * row+1 and col+1 neighbors.  Vertex ids are assigned in row major
+   * order.
+   *
+   * @param sc the spark context in which to construct the graph
+   * @param rows the number of rows
+   * @param cols the number of columns
+   *
+   * @return A graph containing vertices with the row and column ids
+   * as their attributes and edge values as 1.0.
+   */
+  def gridGraph(sc: SparkContext, rows: Int, cols: Int): Graph[(Int,Int), Double] = {
+    // Convert row column address into vertex ids (row major order)
+    def sub2ind(r: Int, c: Int): VertexID = r * cols + c
+
+    val vertices: RDD[(VertexID, (Int,Int))] =
+      sc.parallelize(0 until rows).flatMap( r => (0 until cols).map( c => (sub2ind(r,c), (r,c)) ) )
+    val edges: RDD[Edge[Double]] =
+      vertices.flatMap{ case (vid, (r,c)) =>
+        (if (r+1 < rows) { Seq( (sub2ind(r, c), sub2ind(r+1, c))) } else { Seq.empty }) ++
+        (if (c+1 < cols) { Seq( (sub2ind(r, c), sub2ind(r, c+1))) } else { Seq.empty })
+      }.map{ case (src, dst) => Edge(src, dst, 1.0) }
+    Graph(vertices, edges)
+  } // end of gridGraph
+
+  /**
+   * Create a star graph with vertex 0 being the center.
+   *
+   * @param sc the spark context in which to construct the graph
+   * @param nverts the number of vertices in the star
+   *
+   * @return A star graph containing `nverts` vertices with vertex 0
+   * being the center vertex.
+   */
+  def starGraph(sc: SparkContext, nverts: Int): Graph[Int, Int] = {
+    val edges: RDD[(VertexID, VertexID)] = sc.parallelize(1 until nverts).map(vid => (vid, 0))
+    Graph.fromEdgeTuples(edges, 1)
+  } // end of starGraph
+
+
+
+} // end of Graph Generators
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/HashUtils.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/HashUtils.scala
new file mode 100644
index 0000000000..7a79d33350
--- /dev/null
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/HashUtils.scala
@@ -0,0 +1,21 @@
+package org.apache.spark.graphx.util
+
+
+object HashUtils {
+
+  /**
+   * Compute a 64-bit hash value for the given string.
+   * See http://stackoverflow.com/questions/1660501/what-is-a-good-64bit-hash-function-in-java-for-textual-strings
+   */
+  def hash(str: String): Long = {
+    var h = 1125899906842597L
+    val len = str.length
+    var i = 0
+
+    while (i < len) {
+      h = 31 * h + str(i)
+      i += 1
+    }
+    h
+  }
+}
diff --git a/graphx/src/test/resources/log4j.properties b/graphx/src/test/resources/log4j.properties
new file mode 100644
index 0000000000..896936d8c4
--- /dev/null
+++ b/graphx/src/test/resources/log4j.properties
@@ -0,0 +1,28 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file core/target/unit-tests.log
+log4j.rootCategory=INFO, file
+log4j.appender.file=org.apache.log4j.FileAppender
+log4j.appender.file.append=false
+log4j.appender.file.file=graph/target/unit-tests.log
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
+org.eclipse.jetty.LEVEL=WARN
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
new file mode 100644
index 0000000000..cc281fce99
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphOpsSuite.scala
@@ -0,0 +1,92 @@
+package org.apache.spark.graphx
+
+import org.apache.spark.SparkContext
+import org.apache.spark.graphx.Graph._
+import org.apache.spark.graphx.impl.EdgePartition
+import org.apache.spark.rdd._
+import org.scalatest.FunSuite
+
+class GraphOpsSuite extends FunSuite with LocalSparkContext {
+
+  test("aggregateNeighbors") {
+    withSpark { sc =>
+      val n = 3
+      val star =
+        Graph.fromEdgeTuples(sc.parallelize((1 to n).map(x => (0: VertexID, x: VertexID))), 1)
+
+      val indegrees = star.aggregateNeighbors(
+        (vid, edge) => Some(1),
+        (a: Int, b: Int) => a + b,
+        EdgeDirection.In)
+      assert(indegrees.collect().toSet === (1 to n).map(x => (x, 1)).toSet)
+
+      val outdegrees = star.aggregateNeighbors(
+        (vid, edge) => Some(1),
+        (a: Int, b: Int) => a + b,
+        EdgeDirection.Out)
+      assert(outdegrees.collect().toSet === Set((0, n)))
+
+      val noVertexValues = star.aggregateNeighbors[Int](
+        (vid: VertexID, edge: EdgeTriplet[Int, Int]) => None,
+        (a: Int, b: Int) => throw new Exception("reduceFunc called unexpectedly"),
+        EdgeDirection.In)
+      assert(noVertexValues.collect().toSet === Set.empty[(VertexID, Int)])
+    }
+  }
+
+  test("joinVertices") {
+    withSpark { sc =>
+      val vertices =
+        sc.parallelize(Seq[(VertexID, String)]((1, "one"), (2, "two"), (3, "three")), 2)
+      val edges = sc.parallelize((Seq(Edge(1, 2, "onetwo"))))
+      val g: Graph[String, String] = Graph(vertices, edges)
+
+      val tbl = sc.parallelize(Seq[(VertexID, Int)]((1, 10), (2, 20)))
+      val g1 = g.joinVertices(tbl) { (vid: VertexID, attr: String, u: Int) => attr + u }
+
+      val v = g1.vertices.collect().toSet
+      assert(v === Set((1, "one10"), (2, "two20"), (3, "three")))
+    }
+  }
+
+  test("collectNeighborIds") {
+    withSpark { sc =>
+      val chain = (0 until 100).map(x => (x, (x+1)%100) )
+      val rawEdges = sc.parallelize(chain, 3).map { case (s,d) => (s.toLong, d.toLong) }
+      val graph = Graph.fromEdgeTuples(rawEdges, 1.0)
+      val nbrs = graph.collectNeighborIds(EdgeDirection.Both)
+      assert(nbrs.count === chain.size)
+      assert(graph.numVertices === nbrs.count)
+      nbrs.collect.foreach { case (vid, nbrs) => assert(nbrs.size === 2) }
+      nbrs.collect.foreach { case (vid, nbrs) =>
+        val s = nbrs.toSet
+        assert(s.contains((vid + 1) % 100))
+        assert(s.contains(if (vid > 0) vid - 1 else 99 ))
+      }
+    }
+  }
+
+  test ("filter") {
+    withSpark { sc =>
+      val n = 5
+      val vertices = sc.parallelize((0 to n).map(x => (x:VertexID, x)))
+      val edges = sc.parallelize((1 to n).map(x => Edge(0, x, x)))
+      val graph: Graph[Int, Int] = Graph(vertices, edges)
+      val filteredGraph = graph.filter(
+        graph => {
+          val degrees: VertexRDD[Int] = graph.outDegrees
+          graph.outerJoinVertices(degrees) {(vid, data, deg) => deg.getOrElse(0)}
+        },
+        vpred = (vid: VertexID, deg:Int) => deg > 0
+      )
+
+      val v = filteredGraph.vertices.collect().toSet
+      assert(v === Set((0,0)))
+
+      // the map is necessary because of object-reuse in the edge iterator
+      val e = filteredGraph.edges.map(e => Edge(e.srcId, e.dstId, e.attr)).collect().toSet
+      assert(e.isEmpty)
+    }
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
new file mode 100644
index 0000000000..094fa722a0
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
@@ -0,0 +1,272 @@
+package org.apache.spark.graphx
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.graphx.Graph._
+import org.apache.spark.rdd._
+
+class GraphSuite extends FunSuite with LocalSparkContext {
+
+  def starGraph(sc: SparkContext, n: Int): Graph[String, Int] = {
+    Graph.fromEdgeTuples(sc.parallelize((1 to n).map(x => (0: VertexID, x: VertexID)), 3), "v")
+  }
+
+  test("Graph.fromEdgeTuples") {
+    withSpark { sc =>
+      val ring = (0L to 100L).zip((1L to 99L) :+ 0L)
+      val doubleRing = ring ++ ring
+      val graph = Graph.fromEdgeTuples(sc.parallelize(doubleRing), 1)
+      assert(graph.edges.count() === doubleRing.size)
+      assert(graph.edges.collect.forall(e => e.attr == 1))
+
+      // uniqueEdges option should uniquify edges and store duplicate count in edge attributes
+      val uniqueGraph = Graph.fromEdgeTuples(sc.parallelize(doubleRing), 1, Some(RandomVertexCut))
+      assert(uniqueGraph.edges.count() === ring.size)
+      assert(uniqueGraph.edges.collect.forall(e => e.attr == 2))
+    }
+  }
+
+  test("Graph.fromEdges") {
+    withSpark { sc =>
+      val ring = (0L to 100L).zip((1L to 99L) :+ 0L).map { case (a, b) => Edge(a, b, 1) }
+      val graph = Graph.fromEdges(sc.parallelize(ring), 1.0F)
+      assert(graph.edges.count() === ring.size)
+    }
+  }
+
+  test("Graph.apply") {
+    withSpark { sc =>
+      val rawEdges = (0L to 98L).zip((1L to 99L) :+ 0L)
+      val edges: RDD[Edge[Int]] = sc.parallelize(rawEdges).map { case (s, t) => Edge(s, t, 1) }
+      val vertices: RDD[(VertexID, Boolean)] = sc.parallelize((0L until 10L).map(id => (id, true)))
+      val graph = Graph(vertices, edges, false)
+      assert( graph.edges.count() === rawEdges.size )
+      // Vertices not explicitly provided but referenced by edges should be created automatically
+      assert( graph.vertices.count() === 100)
+      graph.triplets.map { et =>
+        assert((et.srcId < 10 && et.srcAttr) || (et.srcId >= 10 && !et.srcAttr))
+        assert((et.dstId < 10 && et.dstAttr) || (et.dstId >= 10 && !et.dstAttr))
+      }
+    }
+  }
+
+  test("triplets") {
+    withSpark { sc =>
+      val n = 5
+      val star = starGraph(sc, n)
+      assert(star.triplets.map(et => (et.srcId, et.dstId, et.srcAttr, et.dstAttr)).collect.toSet ===
+        (1 to n).map(x => (0: VertexID, x: VertexID, "v", "v")).toSet)
+    }
+  }
+
+  test("partitionBy") {
+    withSpark { sc =>
+      def mkGraph(edges: List[(Long, Long)]) = Graph.fromEdgeTuples(sc.parallelize(edges, 2), 0)
+      def nonemptyParts(graph: Graph[Int, Int]) = {
+        graph.edges.partitionsRDD.mapPartitions { iter =>
+          Iterator(iter.next()._2.iterator.toList)
+        }.filter(_.nonEmpty)
+      }
+      val identicalEdges = List((0L, 1L), (0L, 1L))
+      val canonicalEdges = List((0L, 1L), (1L, 0L))
+      val sameSrcEdges = List((0L, 1L), (0L, 2L))
+
+      // The two edges start out in different partitions
+      for (edges <- List(identicalEdges, canonicalEdges, sameSrcEdges)) {
+        assert(nonemptyParts(mkGraph(edges)).count === 2)
+      }
+      // partitionBy(RandomVertexCut) puts identical edges in the same partition
+      assert(nonemptyParts(mkGraph(identicalEdges).partitionBy(RandomVertexCut)).count === 1)
+      // partitionBy(EdgePartition1D) puts same-source edges in the same partition
+      assert(nonemptyParts(mkGraph(sameSrcEdges).partitionBy(EdgePartition1D)).count === 1)
+      // partitionBy(CanonicalRandomVertexCut) puts edges that are identical modulo direction into
+      // the same partition
+      assert(nonemptyParts(mkGraph(canonicalEdges).partitionBy(CanonicalRandomVertexCut)).count === 1)
+      // partitionBy(EdgePartition2D) puts identical edges in the same partition
+      assert(nonemptyParts(mkGraph(identicalEdges).partitionBy(EdgePartition2D)).count === 1)
+
+      // partitionBy(EdgePartition2D) ensures that vertices need only be replicated to 2 * sqrt(p)
+      // partitions
+      val n = 100
+      val p = 100
+      val verts = 1 to n
+      val graph = Graph.fromEdgeTuples(sc.parallelize(verts.flatMap(x =>
+        verts.filter(y => y % x == 0).map(y => (x: VertexID, y: VertexID))), p), 0)
+      assert(graph.edges.partitions.length === p)
+      val partitionedGraph = graph.partitionBy(EdgePartition2D)
+      assert(graph.edges.partitions.length === p)
+      val bound = 2 * math.sqrt(p)
+      // Each vertex should be replicated to at most 2 * sqrt(p) partitions
+      val partitionSets = partitionedGraph.edges.partitionsRDD.mapPartitions { iter =>
+        val part = iter.next()._2
+        Iterator((part.srcIds ++ part.dstIds).toSet)
+      }.collect
+      assert(verts.forall(id => partitionSets.count(_.contains(id)) <= bound))
+      // This should not be true for the default hash partitioning
+      val partitionSetsUnpartitioned = graph.edges.partitionsRDD.mapPartitions { iter =>
+        val part = iter.next()._2
+        Iterator((part.srcIds ++ part.dstIds).toSet)
+      }.collect
+      assert(verts.exists(id => partitionSetsUnpartitioned.count(_.contains(id)) > bound))
+    }
+  }
+
+  test("mapVertices") {
+    withSpark { sc =>
+      val n = 5
+      val star = starGraph(sc, n)
+      // mapVertices preserving type
+      val mappedVAttrs = star.mapVertices((vid, attr) => attr + "2")
+      assert(mappedVAttrs.vertices.collect.toSet === (0 to n).map(x => (x: VertexID, "v2")).toSet)
+      // mapVertices changing type
+      val mappedVAttrs2 = star.mapVertices((vid, attr) => attr.length)
+      assert(mappedVAttrs2.vertices.collect.toSet === (0 to n).map(x => (x: VertexID, 1)).toSet)
+    }
+  }
+
+  test("mapEdges") {
+    withSpark { sc =>
+      val n = 3
+      val star = starGraph(sc, n)
+      val starWithEdgeAttrs = star.mapEdges(e => e.dstId)
+
+      val edges = starWithEdgeAttrs.edges.collect()
+      assert(edges.size === n)
+      assert(edges.toSet === (1 to n).map(x => Edge(0, x, x)).toSet)
+    }
+  }
+
+  test("mapTriplets") {
+    withSpark { sc =>
+      val n = 5
+      val star = starGraph(sc, n)
+      assert(star.mapTriplets(et => et.srcAttr + et.dstAttr).edges.collect.toSet ===
+        (1L to n).map(x => Edge(0, x, "vv")).toSet)
+    }
+  }
+
+  test("reverse") {
+    withSpark { sc =>
+      val n = 5
+      val star = starGraph(sc, n)
+      assert(star.reverse.outDegrees.collect.toSet === (1 to n).map(x => (x: VertexID, 1)).toSet)
+    }
+  }
+
+  test("subgraph") {
+    withSpark { sc =>
+      // Create a star graph of 10 veritces.
+      val n = 10
+      val star = starGraph(sc, n)
+      // Take only vertices whose vids are even
+      val subgraph = star.subgraph(vpred = (vid, attr) => vid % 2 == 0)
+
+      // We should have 5 vertices.
+      assert(subgraph.vertices.collect().toSet === (0 to n by 2).map(x => (x, "v")).toSet)
+
+      // And 4 edges.
+      assert(subgraph.edges.map(_.copy()).collect().toSet === (2 to n by 2).map(x => Edge(0, x, 1)).toSet)
+    }
+  }
+
+  test("mask") {
+    withSpark { sc =>
+      val n = 5
+      val vertices = sc.parallelize((0 to n).map(x => (x:VertexID, x)))
+      val edges = sc.parallelize((1 to n).map(x => Edge(0, x, x)))
+      val graph: Graph[Int, Int] = Graph(vertices, edges)
+
+      val subgraph = graph.subgraph(
+        e => e.dstId != 4L,
+        (vid, vdata) => vid != 3L
+      ).mapVertices((vid, vdata) => -1).mapEdges(e => -1)
+
+      val projectedGraph = graph.mask(subgraph)
+
+      val v = projectedGraph.vertices.collect().toSet
+      assert(v === Set((0,0), (1,1), (2,2), (4,4), (5,5)))
+
+      // the map is necessary because of object-reuse in the edge iterator
+      val e = projectedGraph.edges.map(e => Edge(e.srcId, e.dstId, e.attr)).collect().toSet
+      assert(e === Set(Edge(0,1,1), Edge(0,2,2), Edge(0,5,5)))
+
+    }
+  }
+
+  test("groupEdges") {
+    withSpark { sc =>
+      val n = 5
+      val star = starGraph(sc, n)
+      val doubleStar = Graph.fromEdgeTuples(
+        sc.parallelize((1 to n).flatMap(x =>
+          List((0: VertexID, x: VertexID), (0: VertexID, x: VertexID))), 1), "v")
+      val star2 = doubleStar.groupEdges { (a, b) => a}
+      assert(star2.edges.collect.toArray.sorted(Edge.lexicographicOrdering[Int]) ===
+        star.edges.collect.toArray.sorted(Edge.lexicographicOrdering[Int]))
+      assert(star2.vertices.collect.toSet === star.vertices.collect.toSet)
+    }
+  }
+
+  test("mapReduceTriplets") {
+    withSpark { sc =>
+      val n = 5
+      val star = starGraph(sc, n).mapVertices { (_, _) => 0 }
+      val starDeg = star.joinVertices(star.degrees){ (vid, oldV, deg) => deg }
+      val neighborDegreeSums = starDeg.mapReduceTriplets(
+        edge => Iterator((edge.srcId, edge.dstAttr), (edge.dstId, edge.srcAttr)),
+        (a: Int, b: Int) => a + b)
+      assert(neighborDegreeSums.collect().toSet === (0 to n).map(x => (x, n)).toSet)
+
+      // activeSetOpt
+      val allPairs = for (x <- 1 to n; y <- 1 to n) yield (x: VertexID, y: VertexID)
+      val complete = Graph.fromEdgeTuples(sc.parallelize(allPairs, 3), 0)
+      val vids = complete.mapVertices((vid, attr) => vid).cache()
+      val active = vids.vertices.filter { case (vid, attr) => attr % 2 == 0 }
+      val numEvenNeighbors = vids.mapReduceTriplets(et => {
+        // Map function should only run on edges with destination in the active set
+        if (et.dstId % 2 != 0) {
+          throw new Exception("map ran on edge with dst vid %d, which is odd".format(et.dstId))
+        }
+        Iterator((et.srcId, 1))
+      }, (a: Int, b: Int) => a + b, Some((active, EdgeDirection.In))).collect.toSet
+      assert(numEvenNeighbors === (1 to n).map(x => (x: VertexID, n / 2)).toSet)
+
+      // outerJoinVertices followed by mapReduceTriplets(activeSetOpt)
+      val ringEdges = sc.parallelize((0 until n).map(x => (x: VertexID, (x+1) % n: VertexID)), 3)
+      val ring = Graph.fromEdgeTuples(ringEdges, 0) .mapVertices((vid, attr) => vid).cache()
+      val changed = ring.vertices.filter { case (vid, attr) => attr % 2 == 1 }.mapValues(-_)
+      val changedGraph = ring.outerJoinVertices(changed) { (vid, old, newOpt) => newOpt.getOrElse(old) }
+      val numOddNeighbors = changedGraph.mapReduceTriplets(et => {
+        // Map function should only run on edges with source in the active set
+        if (et.srcId % 2 != 1) {
+          throw new Exception("map ran on edge with src vid %d, which is even".format(et.dstId))
+        }
+        Iterator((et.dstId, 1))
+      }, (a: Int, b: Int) => a + b, Some(changed, EdgeDirection.Out)).collect.toSet
+      assert(numOddNeighbors === (2 to n by 2).map(x => (x: VertexID, 1)).toSet)
+
+    }
+  }
+
+  test("outerJoinVertices") {
+    withSpark { sc =>
+      val n = 5
+      val reverseStar = starGraph(sc, n).reverse
+      // outerJoinVertices changing type
+      val reverseStarDegrees =
+        reverseStar.outerJoinVertices(reverseStar.outDegrees) { (vid, a, bOpt) => bOpt.getOrElse(0) }
+      val neighborDegreeSums = reverseStarDegrees.mapReduceTriplets(
+        et => Iterator((et.srcId, et.dstAttr), (et.dstId, et.srcAttr)),
+        (a: Int, b: Int) => a + b).collect.toSet
+      assert(neighborDegreeSums === Set((0: VertexID, n)) ++ (1 to n).map(x => (x: VertexID, 0)))
+      // outerJoinVertices preserving type
+      val messages = reverseStar.vertices.mapValues { (vid, attr) => vid.toString }
+      val newReverseStar =
+        reverseStar.outerJoinVertices(messages) { (vid, a, bOpt) => a + bOpt.getOrElse("") }
+      assert(newReverseStar.vertices.map(_._2).collect.toSet ===
+        (0 to n).map(x => "v%d".format(x)).toSet)
+    }
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala b/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala
new file mode 100644
index 0000000000..6aec2ea8a9
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/LocalSparkContext.scala
@@ -0,0 +1,28 @@
+package org.apache.spark.graphx
+
+import org.scalatest.Suite
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.SparkContext
+
+
+/**
+ * Provides a method to run tests against a {@link SparkContext} variable that is correctly stopped
+ * after each test.
+*/
+trait LocalSparkContext {
+  System.setProperty("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+  System.setProperty("spark.kryo.registrator", "org.apache.spark.graphx.GraphKryoRegistrator")
+
+  /** Runs `f` on a new SparkContext and ensures that it is stopped afterwards. */
+  def withSpark[T](f: SparkContext => T) = {
+    val sc = new SparkContext("local", "test")
+    try {
+      f(sc)
+    } finally {
+      sc.stop()
+      // To avoid Akka rebinding to the same port, since it doesn't unbind immediately on shutdown
+      System.clearProperty("spark.driver.port")
+    }
+  }
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala
new file mode 100644
index 0000000000..429622357f
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/PregelSuite.scala
@@ -0,0 +1,41 @@
+package org.apache.spark.graphx
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.rdd._
+
+class PregelSuite extends FunSuite with LocalSparkContext {
+
+  test("1 iteration") {
+    withSpark { sc =>
+      val n = 5
+      val star =
+        Graph.fromEdgeTuples(sc.parallelize((1 to n).map(x => (0: VertexID, x: VertexID)), 3), "v")
+      val result = Pregel(star, 0)(
+        (vid, attr, msg) => attr,
+        et => Iterator.empty,
+        (a: Int, b: Int) => throw new Exception("mergeMsg run unexpectedly"))
+      assert(result.vertices.collect.toSet === star.vertices.collect.toSet)
+    }
+  }
+
+  test("chain propagation") {
+    withSpark { sc =>
+      val n = 5
+      val chain = Graph.fromEdgeTuples(
+        sc.parallelize((1 until n).map(x => (x: VertexID, x + 1: VertexID)), 3),
+        0).cache()
+      assert(chain.vertices.collect.toSet === (1 to n).map(x => (x: VertexID, 0)).toSet)
+      val chainWithSeed = chain.mapVertices { (vid, attr) => if (vid == 1) 1 else 0 }
+      assert(chainWithSeed.vertices.collect.toSet ===
+        Set((1: VertexID, 1)) ++ (2 to n).map(x => (x: VertexID, 0)).toSet)
+      val result = Pregel(chainWithSeed, 0)(
+        (vid, attr, msg) => math.max(msg, attr),
+        et => Iterator((et.dstId, et.srcAttr)),
+        (a: Int, b: Int) => math.max(a, b))
+      assert(result.vertices.collect.toSet ===
+        chain.vertices.mapValues { (vid, attr) => attr + 1 }.collect.toSet)
+    }
+  }
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
new file mode 100644
index 0000000000..3ba412c1f8
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/SerializerSuite.scala
@@ -0,0 +1,183 @@
+package org.apache.spark.graphx
+
+import java.io.{EOFException, ByteArrayInputStream, ByteArrayOutputStream}
+
+import scala.util.Random
+
+import org.scalatest.FunSuite
+
+import org.apache.spark._
+import org.apache.spark.graphx.impl._
+import org.apache.spark.graphx.impl.MsgRDDFunctions._
+import org.apache.spark.serializer.SerializationStream
+
+
+class SerializerSuite extends FunSuite with LocalSparkContext {
+
+  test("IntVertexBroadcastMsgSerializer") {
+    val conf = new SparkConf(false)
+    val outMsg = new VertexBroadcastMsg[Int](3, 4, 5)
+    val bout = new ByteArrayOutputStream
+    val outStrm = new IntVertexBroadcastMsgSerializer(conf).newInstance().serializeStream(bout)
+    outStrm.writeObject(outMsg)
+    outStrm.writeObject(outMsg)
+    bout.flush()
+    val bin = new ByteArrayInputStream(bout.toByteArray)
+    val inStrm = new IntVertexBroadcastMsgSerializer(conf).newInstance().deserializeStream(bin)
+    val inMsg1: VertexBroadcastMsg[Int] = inStrm.readObject()
+    val inMsg2: VertexBroadcastMsg[Int] = inStrm.readObject()
+    assert(outMsg.vid === inMsg1.vid)
+    assert(outMsg.vid === inMsg2.vid)
+    assert(outMsg.data === inMsg1.data)
+    assert(outMsg.data === inMsg2.data)
+
+    intercept[EOFException] {
+      inStrm.readObject()
+    }
+  }
+
+  test("LongVertexBroadcastMsgSerializer") {
+    val conf = new SparkConf(false)
+    val outMsg = new VertexBroadcastMsg[Long](3, 4, 5)
+    val bout = new ByteArrayOutputStream
+    val outStrm = new LongVertexBroadcastMsgSerializer(conf).newInstance().serializeStream(bout)
+    outStrm.writeObject(outMsg)
+    outStrm.writeObject(outMsg)
+    bout.flush()
+    val bin = new ByteArrayInputStream(bout.toByteArray)
+    val inStrm = new LongVertexBroadcastMsgSerializer(conf).newInstance().deserializeStream(bin)
+    val inMsg1: VertexBroadcastMsg[Long] = inStrm.readObject()
+    val inMsg2: VertexBroadcastMsg[Long] = inStrm.readObject()
+    assert(outMsg.vid === inMsg1.vid)
+    assert(outMsg.vid === inMsg2.vid)
+    assert(outMsg.data === inMsg1.data)
+    assert(outMsg.data === inMsg2.data)
+
+    intercept[EOFException] {
+      inStrm.readObject()
+    }
+  }
+
+  test("DoubleVertexBroadcastMsgSerializer") {
+    val conf = new SparkConf(false)
+    val outMsg = new VertexBroadcastMsg[Double](3, 4, 5.0)
+    val bout = new ByteArrayOutputStream
+    val outStrm = new DoubleVertexBroadcastMsgSerializer(conf).newInstance().serializeStream(bout)
+    outStrm.writeObject(outMsg)
+    outStrm.writeObject(outMsg)
+    bout.flush()
+    val bin = new ByteArrayInputStream(bout.toByteArray)
+    val inStrm = new DoubleVertexBroadcastMsgSerializer(conf).newInstance().deserializeStream(bin)
+    val inMsg1: VertexBroadcastMsg[Double] = inStrm.readObject()
+    val inMsg2: VertexBroadcastMsg[Double] = inStrm.readObject()
+    assert(outMsg.vid === inMsg1.vid)
+    assert(outMsg.vid === inMsg2.vid)
+    assert(outMsg.data === inMsg1.data)
+    assert(outMsg.data === inMsg2.data)
+
+    intercept[EOFException] {
+      inStrm.readObject()
+    }
+  }
+
+  test("IntAggMsgSerializer") {
+    val conf = new SparkConf(false)
+    val outMsg = (4: VertexID, 5)
+    val bout = new ByteArrayOutputStream
+    val outStrm = new IntAggMsgSerializer(conf).newInstance().serializeStream(bout)
+    outStrm.writeObject(outMsg)
+    outStrm.writeObject(outMsg)
+    bout.flush()
+    val bin = new ByteArrayInputStream(bout.toByteArray)
+    val inStrm = new IntAggMsgSerializer(conf).newInstance().deserializeStream(bin)
+    val inMsg1: (VertexID, Int) = inStrm.readObject()
+    val inMsg2: (VertexID, Int) = inStrm.readObject()
+    assert(outMsg === inMsg1)
+    assert(outMsg === inMsg2)
+
+    intercept[EOFException] {
+      inStrm.readObject()
+    }
+  }
+
+  test("LongAggMsgSerializer") {
+    val conf = new SparkConf(false)
+    val outMsg = (4: VertexID, 1L << 32)
+    val bout = new ByteArrayOutputStream
+    val outStrm = new LongAggMsgSerializer(conf).newInstance().serializeStream(bout)
+    outStrm.writeObject(outMsg)
+    outStrm.writeObject(outMsg)
+    bout.flush()
+    val bin = new ByteArrayInputStream(bout.toByteArray)
+    val inStrm = new LongAggMsgSerializer(conf).newInstance().deserializeStream(bin)
+    val inMsg1: (VertexID, Long) = inStrm.readObject()
+    val inMsg2: (VertexID, Long) = inStrm.readObject()
+    assert(outMsg === inMsg1)
+    assert(outMsg === inMsg2)
+
+    intercept[EOFException] {
+      inStrm.readObject()
+    }
+  }
+
+  test("DoubleAggMsgSerializer") {
+    val conf = new SparkConf(false)
+    val outMsg = (4: VertexID, 5.0)
+    val bout = new ByteArrayOutputStream
+    val outStrm = new DoubleAggMsgSerializer(conf).newInstance().serializeStream(bout)
+    outStrm.writeObject(outMsg)
+    outStrm.writeObject(outMsg)
+    bout.flush()
+    val bin = new ByteArrayInputStream(bout.toByteArray)
+    val inStrm = new DoubleAggMsgSerializer(conf).newInstance().deserializeStream(bin)
+    val inMsg1: (VertexID, Double) = inStrm.readObject()
+    val inMsg2: (VertexID, Double) = inStrm.readObject()
+    assert(outMsg === inMsg1)
+    assert(outMsg === inMsg2)
+
+    intercept[EOFException] {
+      inStrm.readObject()
+    }
+  }
+
+  test("TestShuffleVertexBroadcastMsg") {
+    withSpark { sc =>
+      val bmsgs = sc.parallelize(0 until 100, 10).map { pid =>
+        new VertexBroadcastMsg[Int](pid, pid, pid)
+      }
+      bmsgs.partitionBy(new HashPartitioner(3)).collect()
+    }
+  }
+
+  test("variable long encoding") {
+    def testVarLongEncoding(v: Long, optimizePositive: Boolean) {
+      val bout = new ByteArrayOutputStream
+      val stream = new ShuffleSerializationStream(bout) {
+        def writeObject[T](t: T): SerializationStream = {
+          writeVarLong(t.asInstanceOf[Long], optimizePositive = optimizePositive)
+          this
+        }
+      }
+      stream.writeObject(v)
+
+      val bin = new ByteArrayInputStream(bout.toByteArray)
+      val dstream = new ShuffleDeserializationStream(bin) {
+        def readObject[T](): T = {
+          readVarLong(optimizePositive).asInstanceOf[T]
+        }
+      }
+      val read = dstream.readObject[Long]()
+      assert(read === v)
+    }
+
+    // Test all variable encoding code path (each branch uses 7 bits, i.e. 1L << 7 difference)
+    val d = Random.nextLong() % 128
+    Seq[Long](0, 1L << 0 + d, 1L << 7 + d, 1L << 14 + d, 1L << 21 + d, 1L << 28 + d, 1L << 35 + d,
+      1L << 42 + d, 1L << 49 + d, 1L << 56 + d, 1L << 63 + d).foreach { number =>
+      testVarLongEncoding(number, optimizePositive = false)
+      testVarLongEncoding(number, optimizePositive = true)
+      testVarLongEncoding(-number, optimizePositive = false)
+      testVarLongEncoding(-number, optimizePositive = true)
+    }
+  }
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
new file mode 100644
index 0000000000..573b708e89
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/VertexRDDSuite.scala
@@ -0,0 +1,85 @@
+package org.apache.spark.graphx
+
+import org.apache.spark.SparkContext
+import org.apache.spark.graphx.Graph._
+import org.apache.spark.graphx.impl.EdgePartition
+import org.apache.spark.rdd._
+import org.scalatest.FunSuite
+
+class VertexRDDSuite extends FunSuite with LocalSparkContext {
+
+  def vertices(sc: SparkContext, n: Int) = {
+    VertexRDD(sc.parallelize((0 to n).map(x => (x.toLong, x)), 5))
+  }
+
+  test("filter") {
+    withSpark { sc =>
+      val n = 100
+      val verts = vertices(sc, n)
+      val evens = verts.filter(q => ((q._2 % 2) == 0))
+      assert(evens.count === (0 to n).filter(_ % 2 == 0).size)
+    }
+  }
+
+  test("mapValues") {
+    withSpark { sc =>
+      val n = 100
+      val verts = vertices(sc, n)
+      val negatives = verts.mapValues(x => -x).cache() // Allow joining b with a derived RDD of b
+      assert(negatives.count === n + 1)
+    }
+  }
+
+  test("diff") {
+    withSpark { sc =>
+      val n = 100
+      val verts = vertices(sc, n)
+      val flipEvens = verts.mapValues(x => if (x % 2 == 0) -x else x)
+      // diff should keep only the changed vertices
+      assert(verts.diff(flipEvens).map(_._2).collect().toSet === (2 to n by 2).map(-_).toSet)
+      // diff should keep the vertex values from `other`
+      assert(flipEvens.diff(verts).map(_._2).collect().toSet === (2 to n by 2).toSet)
+    }
+  }
+
+  test("leftJoin") {
+    withSpark { sc =>
+      val n = 100
+      val verts = vertices(sc, n)
+      val evens = verts.filter(q => ((q._2 % 2) == 0))
+      // leftJoin with another VertexRDD
+      assert(verts.leftJoin(evens) { (id, a, bOpt) => a - bOpt.getOrElse(0) }.collect.toSet ===
+        (0 to n by 2).map(x => (x.toLong, 0)).toSet ++ (1 to n by 2).map(x => (x.toLong, x)).toSet)
+      // leftJoin with an RDD
+      val evensRDD = evens.map(identity)
+      assert(verts.leftJoin(evensRDD) { (id, a, bOpt) => a - bOpt.getOrElse(0) }.collect.toSet ===
+        (0 to n by 2).map(x => (x.toLong, 0)).toSet ++ (1 to n by 2).map(x => (x.toLong, x)).toSet)
+    }
+  }
+
+  test("innerJoin") {
+    withSpark { sc =>
+      val n = 100
+      val verts = vertices(sc, n)
+      val evens = verts.filter(q => ((q._2 % 2) == 0))
+      // innerJoin with another VertexRDD
+      assert(verts.innerJoin(evens) { (id, a, b) => a - b }.collect.toSet ===
+        (0 to n by 2).map(x => (x.toLong, 0)).toSet)
+      // innerJoin with an RDD
+      val evensRDD = evens.map(identity)
+      assert(verts.innerJoin(evensRDD) { (id, a, b) => a - b }.collect.toSet ===
+        (0 to n by 2).map(x => (x.toLong, 0)).toSet)    }
+  }
+
+  test("aggregateUsingIndex") {
+    withSpark { sc =>
+      val n = 100
+      val verts = vertices(sc, n)
+      val messageTargets = (0 to n) ++ (0 to n by 2)
+      val messages = sc.parallelize(messageTargets.map(x => (x.toLong, 1)))
+      assert(verts.aggregateUsingIndex[Int](messages, _ + _).collect.toSet ===
+        (0 to n).map(x => (x.toLong, if (x % 2 == 0) 2 else 1)).toSet)
+    }
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/algorithms/ConnectedComponentsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/algorithms/ConnectedComponentsSuite.scala
new file mode 100644
index 0000000000..5e2ecfcde9
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/algorithms/ConnectedComponentsSuite.scala
@@ -0,0 +1,83 @@
+package org.apache.spark.graphx.algorithms
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.util.GraphGenerators
+import org.apache.spark.rdd._
+
+
+class ConnectedComponentsSuite extends FunSuite with LocalSparkContext {
+
+  test("Grid Connected Components") {
+    withSpark { sc =>
+      val gridGraph = GraphGenerators.gridGraph(sc, 10, 10).cache()
+      val ccGraph = ConnectedComponents.run(gridGraph).cache()
+      val maxCCid = ccGraph.vertices.map { case (vid, ccId) => ccId }.sum
+      assert(maxCCid === 0)
+    }
+  } // end of Grid connected components
+
+
+  test("Reverse Grid Connected Components") {
+    withSpark { sc =>
+      val gridGraph = GraphGenerators.gridGraph(sc, 10, 10).reverse.cache()
+      val ccGraph = ConnectedComponents.run(gridGraph).cache()
+      val maxCCid = ccGraph.vertices.map { case (vid, ccId) => ccId }.sum
+      assert(maxCCid === 0)
+    }
+  } // end of Grid connected components
+
+
+  test("Chain Connected Components") {
+    withSpark { sc =>
+      val chain1 = (0 until 9).map(x => (x, x+1) )
+      val chain2 = (10 until 20).map(x => (x, x+1) )
+      val rawEdges = sc.parallelize(chain1 ++ chain2, 3).map { case (s,d) => (s.toLong, d.toLong) }
+      val twoChains = Graph.fromEdgeTuples(rawEdges, 1.0).cache()
+      val ccGraph = ConnectedComponents.run(twoChains).cache()
+      val vertices = ccGraph.vertices.collect()
+      for ( (id, cc) <- vertices ) {
+        if(id < 10) { assert(cc === 0) }
+        else { assert(cc === 10) }
+      }
+      val ccMap = vertices.toMap
+      for (id <- 0 until 20) {
+        if (id < 10) {
+          assert(ccMap(id) === 0)
+        } else {
+          assert(ccMap(id) === 10)
+        }
+      }
+    }
+  } // end of chain connected components
+
+  test("Reverse Chain Connected Components") {
+    withSpark { sc =>
+      val chain1 = (0 until 9).map(x => (x, x+1) )
+      val chain2 = (10 until 20).map(x => (x, x+1) )
+      val rawEdges = sc.parallelize(chain1 ++ chain2, 3).map { case (s,d) => (s.toLong, d.toLong) }
+      val twoChains = Graph.fromEdgeTuples(rawEdges, true).reverse.cache()
+      val ccGraph = ConnectedComponents.run(twoChains).cache()
+      val vertices = ccGraph.vertices.collect
+      for ( (id, cc) <- vertices ) {
+        if (id < 10) {
+          assert(cc === 0)
+        } else {
+          assert(cc === 10)
+        }
+      }
+      val ccMap = vertices.toMap
+      for ( id <- 0 until 20 ) {
+        if (id < 10) {
+          assert(ccMap(id) === 0)
+        } else {
+          assert(ccMap(id) === 10)
+        }
+      }
+    }
+  } // end of reverse chain connected components
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/algorithms/PageRankSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/algorithms/PageRankSuite.scala
new file mode 100644
index 0000000000..e365b1e230
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/algorithms/PageRankSuite.scala
@@ -0,0 +1,126 @@
+package org.apache.spark.graphx.algorithms
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.graphx._
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd._
+
+import org.apache.spark.graphx.util.GraphGenerators
+
+
+object GridPageRank {
+  def apply(nRows: Int, nCols: Int, nIter: Int, resetProb: Double) = {
+    val inNbrs = Array.fill(nRows * nCols)(collection.mutable.MutableList.empty[Int])
+    val outDegree = Array.fill(nRows * nCols)(0)
+    // Convert row column address into vertex ids (row major order)
+    def sub2ind(r: Int, c: Int): Int = r * nCols + c
+    // Make the grid graph
+    for (r <- 0 until nRows; c <- 0 until nCols) {
+      val ind = sub2ind(r,c)
+      if (r+1 < nRows) {
+        outDegree(ind) += 1
+        inNbrs(sub2ind(r+1,c)) += ind
+      }
+      if (c+1 < nCols) {
+        outDegree(ind) += 1
+        inNbrs(sub2ind(r,c+1)) += ind
+      }
+    }
+    // compute the pagerank
+    var pr = Array.fill(nRows * nCols)(resetProb)
+    for (iter <- 0 until nIter) {
+      val oldPr = pr
+      pr = new Array[Double](nRows * nCols)
+      for (ind <- 0 until (nRows * nCols)) {
+        pr(ind) = resetProb + (1.0 - resetProb) *
+          inNbrs(ind).map( nbr => oldPr(nbr) / outDegree(nbr)).sum
+      }
+    }
+    (0L until (nRows * nCols)).zip(pr)
+  }
+
+}
+
+
+class PageRankSuite extends FunSuite with LocalSparkContext {
+
+  def compareRanks(a: VertexRDD[Double], b: VertexRDD[Double]): Double = {
+    a.leftJoin(b) { case (id, a, bOpt) => (a - bOpt.getOrElse(0.0)) * (a - bOpt.getOrElse(0.0)) }
+      .map { case (id, error) => error }.sum
+  }
+
+  test("Star PageRank") {
+    withSpark { sc =>
+      val nVertices = 100
+      val starGraph = GraphGenerators.starGraph(sc, nVertices).cache()
+      val resetProb = 0.15
+      val errorTol = 1.0e-5
+
+      val staticRanks1 = PageRank.run(starGraph, numIter = 1, resetProb).vertices.cache()
+      val staticRanks2 = PageRank.run(starGraph, numIter = 2, resetProb).vertices.cache()
+
+      // Static PageRank should only take 2 iterations to converge
+      val notMatching = staticRanks1.innerZipJoin(staticRanks2) { (vid, pr1, pr2) =>
+        if (pr1 != pr2) 1 else 0
+      }.map { case (vid, test) => test }.sum
+      assert(notMatching === 0)
+
+      val staticErrors = staticRanks2.map { case (vid, pr) =>
+        val correct = (vid > 0 && pr == resetProb) ||
+          (vid == 0 && math.abs(pr - (resetProb + (1.0 - resetProb) * (resetProb * (nVertices - 1)) )) < 1.0E-5)
+        if (!correct) 1 else 0
+      }
+      assert(staticErrors.sum === 0)
+
+      val dynamicRanks = PageRank.runUntillConvergence(starGraph, 0, resetProb).vertices.cache()
+      val standaloneRanks = PageRank.runStandalone(starGraph, 0, resetProb).cache()
+      assert(compareRanks(staticRanks2, dynamicRanks) < errorTol)
+      assert(compareRanks(staticRanks2, standaloneRanks) < errorTol)
+    }
+  } // end of test Star PageRank
+
+
+
+  test("Grid PageRank") {
+    withSpark { sc =>
+      val rows = 10
+      val cols = 10
+      val resetProb = 0.15
+      val tol = 0.0001
+      val numIter = 50
+      val errorTol = 1.0e-5
+      val gridGraph = GraphGenerators.gridGraph(sc, rows, cols).cache()
+
+      val staticRanks = PageRank.run(gridGraph, numIter, resetProb).vertices.cache()
+      val dynamicRanks = PageRank.runUntillConvergence(gridGraph, tol, resetProb).vertices.cache()
+      val standaloneRanks = PageRank.runStandalone(gridGraph, tol, resetProb).cache()
+      val referenceRanks = VertexRDD(sc.parallelize(GridPageRank(rows, cols, numIter, resetProb)))
+
+      assert(compareRanks(staticRanks, referenceRanks) < errorTol)
+      assert(compareRanks(dynamicRanks, referenceRanks) < errorTol)
+      assert(compareRanks(standaloneRanks, referenceRanks) < errorTol)
+    }
+  } // end of Grid PageRank
+
+
+  test("Chain PageRank") {
+    withSpark { sc =>
+      val chain1 = (0 until 9).map(x => (x, x+1) )
+      val rawEdges = sc.parallelize(chain1, 1).map { case (s,d) => (s.toLong, d.toLong) }
+      val chain = Graph.fromEdgeTuples(rawEdges, 1.0).cache()
+      val resetProb = 0.15
+      val tol = 0.0001
+      val numIter = 10
+      val errorTol = 1.0e-5
+
+      val staticRanks = PageRank.run(chain, numIter, resetProb).vertices.cache()
+      val dynamicRanks = PageRank.runUntillConvergence(chain, tol, resetProb).vertices.cache()
+      val standaloneRanks = PageRank.runStandalone(chain, tol, resetProb).cache()
+
+      assert(compareRanks(staticRanks, dynamicRanks) < errorTol)
+      assert(compareRanks(dynamicRanks, standaloneRanks) < errorTol)
+    }
+  }
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/algorithms/SVDPlusPlusSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/algorithms/SVDPlusPlusSuite.scala
new file mode 100644
index 0000000000..06604198d7
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/algorithms/SVDPlusPlusSuite.scala
@@ -0,0 +1,30 @@
+package org.apache.spark.graphx.algorithms
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.util.GraphGenerators
+import org.apache.spark.rdd._
+
+
+class SVDPlusPlusSuite extends FunSuite with LocalSparkContext {
+
+  test("Test SVD++ with mean square error on training set") {
+    withSpark { sc =>
+      val svdppErr = 8.0
+      val edges = sc.textFile("mllib/data/als/test.data").map { line =>
+        val fields = line.split(",")
+        Edge(fields(0).toLong * 2, fields(1).toLong * 2 + 1, fields(2).toDouble)
+      }
+      val conf = new SVDPlusPlusConf(10, 2, 0.0, 5.0, 0.007, 0.007, 0.005, 0.015) // 2 iterations
+      var (graph, u) = SVDPlusPlus.run(edges, conf)
+      val err = graph.vertices.collect.map{ case (vid, vd) =>
+        if (vid % 2 == 1) vd._4 else 0.0
+      }.reduce(_ + _) / graph.triplets.collect.size
+      assert(err <= svdppErr)
+    }
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/algorithms/StronglyConnectedComponentsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/algorithms/StronglyConnectedComponentsSuite.scala
new file mode 100644
index 0000000000..696b80944e
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/algorithms/StronglyConnectedComponentsSuite.scala
@@ -0,0 +1,57 @@
+package org.apache.spark.graphx.algorithms
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.util.GraphGenerators
+import org.apache.spark.rdd._
+
+
+class StronglyConnectedComponentsSuite extends FunSuite with LocalSparkContext {
+
+  test("Island Strongly Connected Components") {
+    withSpark { sc =>
+      val vertices = sc.parallelize((1L to 5L).map(x => (x, -1)))
+      val edges = sc.parallelize(Seq.empty[Edge[Int]])
+      val graph = Graph(vertices, edges)
+      val sccGraph = StronglyConnectedComponents.run(graph, 5)
+      for ((id, scc) <- sccGraph.vertices.collect) {
+        assert(id == scc)
+      }
+    }
+  }
+
+  test("Cycle Strongly Connected Components") {
+    withSpark { sc =>
+      val rawEdges = sc.parallelize((0L to 6L).map(x => (x, (x + 1) % 7)))
+      val graph = Graph.fromEdgeTuples(rawEdges, -1)
+      val sccGraph = StronglyConnectedComponents.run(graph, 20)
+      for ((id, scc) <- sccGraph.vertices.collect) {
+        assert(0L == scc)
+      }
+    }
+  }
+
+  test("2 Cycle Strongly Connected Components") {
+    withSpark { sc =>
+      val edges =
+        Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
+        Array(3L -> 4L, 4L -> 5L, 5L -> 3L) ++
+        Array(6L -> 0L, 5L -> 7L)
+      val rawEdges = sc.parallelize(edges)
+      val graph = Graph.fromEdgeTuples(rawEdges, -1)
+      val sccGraph = StronglyConnectedComponents.run(graph, 20)
+      for ((id, scc) <- sccGraph.vertices.collect) {
+        if (id < 3)
+          assert(0L == scc)
+        else if (id < 6)
+          assert(3L == scc)
+        else
+          assert(id == scc)
+      }
+    }
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/algorithms/TriangleCountSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/algorithms/TriangleCountSuite.scala
new file mode 100644
index 0000000000..0e59912754
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/algorithms/TriangleCountSuite.scala
@@ -0,0 +1,73 @@
+package org.apache.spark.graphx.algorithms
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.SparkContext
+import org.apache.spark.SparkContext._
+import org.apache.spark.graphx._
+import org.apache.spark.graphx.util.GraphGenerators
+import org.apache.spark.rdd._
+
+
+class TriangleCountSuite extends FunSuite with LocalSparkContext {
+
+  test("Count a single triangle") {
+    withSpark { sc =>
+      val rawEdges = sc.parallelize(Array( 0L->1L, 1L->2L, 2L->0L ), 2)
+      val graph = Graph.fromEdgeTuples(rawEdges, true).cache()
+      val triangleCount = TriangleCount.run(graph)
+      val verts = triangleCount.vertices
+      verts.collect.foreach { case (vid, count) => assert(count === 1) }
+    }
+  }
+
+  test("Count two triangles") {
+    withSpark { sc =>
+      val triangles = Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
+        Array(0L -> -1L, -1L -> -2L, -2L -> 0L)
+      val rawEdges = sc.parallelize(triangles, 2)
+      val graph = Graph.fromEdgeTuples(rawEdges, true).cache()
+      val triangleCount = TriangleCount.run(graph)
+      val verts = triangleCount.vertices
+      verts.collect().foreach { case (vid, count) =>
+        if (vid == 0) {
+          assert(count === 2)
+        } else {
+          assert(count === 1)
+        }
+      }
+    }
+  }
+
+  test("Count two triangles with bi-directed edges") {
+    withSpark { sc =>
+      val triangles =
+        Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
+        Array(0L -> -1L, -1L -> -2L, -2L -> 0L)
+      val revTriangles = triangles.map { case (a,b) => (b,a) }
+      val rawEdges = sc.parallelize(triangles ++ revTriangles, 2)
+      val graph = Graph.fromEdgeTuples(rawEdges, true).cache()
+      val triangleCount = TriangleCount.run(graph)
+      val verts = triangleCount.vertices
+      verts.collect().foreach { case (vid, count) =>
+        if (vid == 0) {
+          assert(count === 4)
+        } else {
+          assert(count === 2)
+        }
+      }
+    }
+  }
+
+  test("Count a single triangle with duplicate edges") {
+    withSpark { sc =>
+      val rawEdges = sc.parallelize(Array(0L -> 1L, 1L -> 2L, 2L -> 0L) ++
+        Array(0L -> 1L, 1L -> 2L, 2L -> 0L), 2)
+      val graph = Graph.fromEdgeTuples(rawEdges, true, uniqueEdges = Some(RandomVertexCut)).cache()
+      val triangleCount = TriangleCount.run(graph)
+      val verts = triangleCount.vertices
+      verts.collect.foreach { case (vid, count) => assert(count === 1) }
+    }
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
new file mode 100644
index 0000000000..eb82436f09
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/impl/EdgePartitionSuite.scala
@@ -0,0 +1,76 @@
+package org.apache.spark.graphx.impl
+
+import scala.reflect.ClassTag
+import scala.util.Random
+
+import org.scalatest.FunSuite
+
+import org.apache.spark.graphx._
+
+class EdgePartitionSuite extends FunSuite {
+
+  test("reverse") {
+    val edges = List(Edge(0, 1, 0), Edge(1, 2, 0), Edge(2, 0, 0))
+    val reversedEdges = List(Edge(0, 2, 0), Edge(1, 0, 0), Edge(2, 1, 0))
+    val builder = new EdgePartitionBuilder[Int]
+    for (e <- edges) {
+      builder.add(e.srcId, e.dstId, e.attr)
+    }
+    val edgePartition = builder.toEdgePartition
+    assert(edgePartition.reverse.iterator.map(_.copy()).toList === reversedEdges)
+    assert(edgePartition.reverse.reverse.iterator.map(_.copy()).toList === edges)
+  }
+
+  test("map") {
+    val edges = List(Edge(0, 1, 0), Edge(1, 2, 0), Edge(2, 0, 0))
+    val builder = new EdgePartitionBuilder[Int]
+    for (e <- edges) {
+      builder.add(e.srcId, e.dstId, e.attr)
+    }
+    val edgePartition = builder.toEdgePartition
+    assert(edgePartition.map(e => e.srcId + e.dstId).iterator.map(_.copy()).toList ===
+      edges.map(e => e.copy(attr = e.srcId + e.dstId)))
+  }
+
+  test("groupEdges") {
+    val edges = List(
+      Edge(0, 1, 1), Edge(1, 2, 2), Edge(2, 0, 4), Edge(0, 1, 8), Edge(1, 2, 16), Edge(2, 0, 32))
+    val groupedEdges = List(Edge(0, 1, 9), Edge(1, 2, 18), Edge(2, 0, 36))
+    val builder = new EdgePartitionBuilder[Int]
+    for (e <- edges) {
+      builder.add(e.srcId, e.dstId, e.attr)
+    }
+    val edgePartition = builder.toEdgePartition
+    assert(edgePartition.groupEdges(_ + _).iterator.map(_.copy()).toList === groupedEdges)
+  }
+
+  test("indexIterator") {
+    val edgesFrom0 = List(Edge(0, 1, 0))
+    val edgesFrom1 = List(Edge(1, 0, 0), Edge(1, 2, 0))
+    val sortedEdges = edgesFrom0 ++ edgesFrom1
+    val builder = new EdgePartitionBuilder[Int]
+    for (e <- Random.shuffle(sortedEdges)) {
+      builder.add(e.srcId, e.dstId, e.attr)
+    }
+
+    val edgePartition = builder.toEdgePartition
+    assert(edgePartition.iterator.map(_.copy()).toList === sortedEdges)
+    assert(edgePartition.indexIterator(_ == 0).map(_.copy()).toList === edgesFrom0)
+    assert(edgePartition.indexIterator(_ == 1).map(_.copy()).toList === edgesFrom1)
+  }
+
+  test("innerJoin") {
+    def makeEdgePartition[A: ClassTag](xs: Iterable[(Int, Int, A)]): EdgePartition[A] = {
+      val builder = new EdgePartitionBuilder[A]
+      for ((src, dst, attr) <- xs) { builder.add(src: VertexID, dst: VertexID, attr) }
+      builder.toEdgePartition
+    }
+    val aList = List((0, 1, 0), (1, 0, 0), (1, 2, 0), (5, 4, 0), (5, 5, 0))
+    val bList = List((0, 1, 0), (1, 0, 0), (1, 1, 0), (3, 4, 0), (5, 5, 0))
+    val a = makeEdgePartition(aList)
+    val b = makeEdgePartition(bList)
+
+    assert(a.innerJoin(b) { (src, dst, a, b) => a }.iterator.map(_.copy()).toList ===
+      List(Edge(0, 1, 0), Edge(1, 0, 0), Edge(5, 5, 0)))
+  }
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala
new file mode 100644
index 0000000000..d37d64e8c8
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/impl/VertexPartitionSuite.scala
@@ -0,0 +1,113 @@
+package org.apache.spark.graphx.impl
+
+import org.apache.spark.graphx._
+import org.scalatest.FunSuite
+
+class VertexPartitionSuite extends FunSuite {
+
+  test("isDefined, filter") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1))).filter { (vid, attr) => vid == 0 }
+    assert(vp.isDefined(0))
+    assert(!vp.isDefined(1))
+    assert(!vp.isDefined(2))
+    assert(!vp.isDefined(-1))
+  }
+
+  test("isActive, numActives, replaceActives") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1)))
+      .filter { (vid, attr) => vid == 0 }
+      .replaceActives(Iterator(0, 2, 0))
+    assert(vp.isActive(0))
+    assert(!vp.isActive(1))
+    assert(vp.isActive(2))
+    assert(!vp.isActive(-1))
+    assert(vp.numActives == Some(2))
+  }
+
+  test("map") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1))).map { (vid, attr) => 2 }
+    assert(vp(0) === 2)
+  }
+
+  test("diff") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val vp2 = vp.filter { (vid, attr) => vid <= 1 }
+    val vp3a = vp.map { (vid, attr) => 2 }
+    val vp3b = VertexPartition(vp3a.iterator)
+    // diff with same index
+    val diff1 = vp2.diff(vp3a)
+    assert(diff1(0) === 2)
+    assert(diff1(1) === 2)
+    assert(diff1(2) === 2)
+    assert(!diff1.isDefined(2))
+    // diff with different indexes
+    val diff2 = vp2.diff(vp3b)
+    assert(diff2(0) === 2)
+    assert(diff2(1) === 2)
+    assert(diff2(2) === 2)
+    assert(!diff2.isDefined(2))
+  }
+
+  test("leftJoin") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val vp2a = vp.filter { (vid, attr) => vid <= 1 }.map { (vid, attr) => 2 }
+    val vp2b = VertexPartition(vp2a.iterator)
+    // leftJoin with same index
+    val join1 = vp.leftJoin(vp2a) { (vid, a, bOpt) => bOpt.getOrElse(a) }
+    assert(join1.iterator.toSet === Set((0L, 2), (1L, 2), (2L, 1)))
+    // leftJoin with different indexes
+    val join2 = vp.leftJoin(vp2b) { (vid, a, bOpt) => bOpt.getOrElse(a) }
+    assert(join2.iterator.toSet === Set((0L, 2), (1L, 2), (2L, 1)))
+    // leftJoin an iterator
+    val join3 = vp.leftJoin(vp2a.iterator) { (vid, a, bOpt) => bOpt.getOrElse(a) }
+    assert(join3.iterator.toSet === Set((0L, 2), (1L, 2), (2L, 1)))
+  }
+
+  test("innerJoin") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val vp2a = vp.filter { (vid, attr) => vid <= 1 }.map { (vid, attr) => 2 }
+    val vp2b = VertexPartition(vp2a.iterator)
+    // innerJoin with same index
+    val join1 = vp.innerJoin(vp2a) { (vid, a, b) => b }
+    assert(join1.iterator.toSet === Set((0L, 2), (1L, 2)))
+    // innerJoin with different indexes
+    val join2 = vp.innerJoin(vp2b) { (vid, a, b) => b }
+    assert(join2.iterator.toSet === Set((0L, 2), (1L, 2)))
+    // innerJoin an iterator
+    val join3 = vp.innerJoin(vp2a.iterator) { (vid, a, b) => b }
+    assert(join3.iterator.toSet === Set((0L, 2), (1L, 2)))
+  }
+
+  test("createUsingIndex") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val elems = List((0L, 2), (2L, 2), (3L, 2))
+    val vp2 = vp.createUsingIndex(elems.iterator)
+    assert(vp2.iterator.toSet === Set((0L, 2), (2L, 2)))
+    assert(vp.index === vp2.index)
+  }
+
+  test("innerJoinKeepLeft") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val elems = List((0L, 2), (2L, 2), (3L, 2))
+    val vp2 = vp.innerJoinKeepLeft(elems.iterator)
+    assert(vp2.iterator.toSet === Set((0L, 2), (2L, 2)))
+    assert(vp2(1) === 1)
+  }
+
+  test("aggregateUsingIndex") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val messages = List((0L, "a"), (2L, "b"), (0L, "c"), (3L, "d"))
+    val vp2 = vp.aggregateUsingIndex[String](messages.iterator, _ + _)
+    assert(vp2.iterator.toSet === Set((0L, "ac"), (2L, "b")))
+  }
+
+  test("reindex") {
+    val vp = VertexPartition(Iterator((0L, 1), (1L, 1), (2L, 1)))
+    val vp2 = vp.filter { (vid, attr) => vid <= 1 }
+    val vp3 = vp2.reindex()
+    assert(vp2.iterator.toSet === vp3.iterator.toSet)
+    assert(vp2(2) === 1)
+    assert(vp3.index.getPos(2) === -1)
+  }
+
+}
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala
new file mode 100644
index 0000000000..11db339750
--- /dev/null
+++ b/graphx/src/test/scala/org/apache/spark/graphx/util/BytecodeUtilsSuite.scala
@@ -0,0 +1,93 @@
+package org.apache.spark.graphx.util
+
+import org.scalatest.FunSuite
+
+
+class BytecodeUtilsSuite extends FunSuite {
+
+  import BytecodeUtilsSuite.TestClass
+
+  test("closure invokes a method") {
+    val c1 = {e: TestClass => println(e.foo); println(e.bar); println(e.baz); }
+    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
+    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
+    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
+
+    val c2 = {e: TestClass => println(e.foo); println(e.bar); }
+    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "foo"))
+    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "bar"))
+    assert(!BytecodeUtils.invokedMethod(c2, classOf[TestClass], "baz"))
+
+    val c3 = {e: TestClass => println(e.foo); }
+    assert(BytecodeUtils.invokedMethod(c3, classOf[TestClass], "foo"))
+    assert(!BytecodeUtils.invokedMethod(c3, classOf[TestClass], "bar"))
+    assert(!BytecodeUtils.invokedMethod(c3, classOf[TestClass], "baz"))
+  }
+
+  test("closure inside a closure invokes a method") {
+    val c1 = {e: TestClass => println(e.foo); println(e.bar); println(e.baz); }
+    val c2 = {e: TestClass => c1(e); println(e.foo); }
+    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "foo"))
+    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "bar"))
+    assert(BytecodeUtils.invokedMethod(c2, classOf[TestClass], "baz"))
+  }
+
+  test("closure inside a closure inside a closure invokes a method") {
+    val c1 = {e: TestClass => println(e.baz); }
+    val c2 = {e: TestClass => c1(e); println(e.foo); }
+    val c3 = {e: TestClass => c2(e) }
+    assert(BytecodeUtils.invokedMethod(c3, classOf[TestClass], "foo"))
+    assert(!BytecodeUtils.invokedMethod(c3, classOf[TestClass], "bar"))
+    assert(BytecodeUtils.invokedMethod(c3, classOf[TestClass], "baz"))
+  }
+
+  test("closure calling a function that invokes a method") {
+    def zoo(e: TestClass) {
+      println(e.baz)
+    }
+    val c1 = {e: TestClass => zoo(e)}
+    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
+    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
+    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
+  }
+
+  test("closure calling a function that invokes a method which uses another closure") {
+    val c2 = {e: TestClass => println(e.baz)}
+    def zoo(e: TestClass) {
+      c2(e)
+    }
+    val c1 = {e: TestClass => zoo(e)}
+    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
+    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
+    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
+  }
+
+  test("nested closure") {
+    val c2 = {e: TestClass => println(e.baz)}
+    def zoo(e: TestClass, c: TestClass => Unit) {
+      c(e)
+    }
+    val c1 = {e: TestClass => zoo(e, c2)}
+    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
+    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
+    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
+  }
+
+  // The following doesn't work yet, because the byte code doesn't contain any information
+  // about what exactly "c" is.
+//  test("invoke interface") {
+//    val c1 = {e: TestClass => c(e)}
+//    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "foo"))
+//    assert(!BytecodeUtils.invokedMethod(c1, classOf[TestClass], "bar"))
+//    assert(BytecodeUtils.invokedMethod(c1, classOf[TestClass], "baz"))
+//  }
+
+  private val c = {e: TestClass => println(e.baz)}
+}
+
+
+object BytecodeUtilsSuite {
+  class TestClass(val foo: Int, val bar: Long) {
+    def baz: Boolean = false
+  }
+}
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index c2cd6fb45a..c2b1c0c35c 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -48,20 +48,20 @@ object SparkBuild extends Build {
   lazy val core = Project("core", file("core"), settings = coreSettings)
 
   lazy val repl = Project("repl", file("repl"), settings = replSettings)
-    .dependsOn(core, graph, bagel, mllib)
+    .dependsOn(core, graphx, bagel, mllib)
 
   lazy val tools = Project("tools", file("tools"), settings = toolsSettings) dependsOn(core) dependsOn(streaming)
 
   lazy val bagel = Project("bagel", file("bagel"), settings = bagelSettings) dependsOn(core)
 
-  lazy val graph = Project("graph", file("graph"), settings = graphSettings) dependsOn(core)
+  lazy val graphx = Project("graphx", file("graphx"), settings = graphxSettings) dependsOn(core)
 
   lazy val streaming = Project("streaming", file("streaming"), settings = streamingSettings) dependsOn(core)
 
   lazy val mllib = Project("mllib", file("mllib"), settings = mllibSettings) dependsOn(core)
 
   lazy val assemblyProj = Project("assembly", file("assembly"), settings = assemblyProjSettings)
-    .dependsOn(core, graph, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*)
+    .dependsOn(core, graphx, bagel, mllib, repl, streaming) dependsOn(maybeYarn: _*)
 
   lazy val assembleDeps = TaskKey[Unit]("assemble-deps", "Build assembly of dependencies and packages Spark projects")
 
@@ -111,10 +111,10 @@ object SparkBuild extends Build {
   lazy val allExternalRefs = Seq[ProjectReference](externalTwitter, externalKafka, externalFlume, externalZeromq, externalMqtt)
 
   lazy val examples = Project("examples", file("examples"), settings = examplesSettings)
-    .dependsOn(core, mllib, graph, bagel, streaming, externalTwitter) dependsOn(allExternal: _*)
+    .dependsOn(core, mllib, graphx, bagel, streaming, externalTwitter) dependsOn(allExternal: _*)
 
   // Everything except assembly, tools and examples belong to packageProjects
-  lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graph) ++ maybeYarnRef
+  lazy val packageProjects = Seq[ProjectReference](core, repl, bagel, streaming, mllib, graphx) ++ maybeYarnRef
 
   lazy val allProjects = packageProjects ++ allExternalRefs ++ Seq[ProjectReference](examples, tools, assemblyProj)
 
@@ -308,7 +308,7 @@ object SparkBuild extends Build {
     name := "spark-tools"
   ) ++ assemblySettings ++ extraAssemblySettings
 
-  def graphSettings = sharedSettings ++ Seq(
+  def graphxSettings = sharedSettings ++ Seq(
     name := "spark-graphx"
   )
 
-- 
cgit v1.2.3