Remove duplicate method in GraphLoader and improve docs

author: Ankur Dave <ankurdave@gmail.com> 2014-01-10 12:37:20 -0800
committer: Ankur Dave <ankurdave@gmail.com> 2014-01-10 12:37:20 -0800
commit: 9454fa1f6c714a720197a6db04a6c369e425e507 (patch)
tree: 12b2a8683fc76d97287e73c8a565ad403497d39f
parent: 37611e57f6ca1768529e4038198de56f85f90665 (diff)
download: spark-9454fa1f6c714a720197a6db04a6c369e425e507.tar.gz
spark-9454fa1f6c714a720197a6db04a6c369e425e507.tar.bz2
spark-9454fa1f6c714a720197a6db04a6c369e425e507.zip
1 files changed, 13 insertions, 50 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
index 473cfb18cf..3c06a403ea 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala
@@ -8,51 +8,20 @@ import org.apache.spark.{Logging, SparkContext}
 import org.apache.spark.graphx.impl.{EdgePartition, GraphImpl}
 import org.apache.spark.util.collection.PrimitiveVector
 
-
+/**
+ * Provides utilities for loading [[Graph]]s from files.
+ */
 object GraphLoader extends Logging {
 
   /**
-   * Load an edge list from file initializing the Graph
-   *
-   * @tparam ED the type of the edge data of the resulting Graph
-   *
-   * @param sc the SparkContext used to construct RDDs
-   * @param path the path to the text file containing the edge list
-   * @param edgeParser a function that takes an array of strings and
-   * returns an ED object
-   * @param minEdgePartitions the number of partitions for the
-   * the Edge RDD
+   * Loads a graph from an edge list formatted file where each line contains two integers: a source
+   * id and a target id. Skips lines that begin with `#`.
    *
-   */
-  def textFile[ED: ClassTag](
-      sc: SparkContext,
-      path: String,
-      edgeParser: Array[String] => ED,
-      minEdgePartitions: Int = 1):
-    Graph[Int, ED] = {
-    // Parse the edge data table
-    val edges = sc.textFile(path, minEdgePartitions).mapPartitions( iter =>
-      iter.filter(line => !line.isEmpty && line(0) != '#').map { line =>
-        val lineArray = line.split("\\s+")
-        if(lineArray.length < 2) {
-          println("Invalid line: " + line)
-          assert(false)
-        }
-        val source = lineArray(0).trim.toLong
-        val target = lineArray(1).trim.toLong
-        val tail = lineArray.drop(2)
-        val edata = edgeParser(tail)
-        Edge(source, target, edata)
-      })
-    val defaultVertexAttr = 1
-    Graph.fromEdges(edges, defaultVertexAttr)
-  }
-
-  /**
-   * Load a graph from an edge list formatted file with each line containing
-   * two integers: a source Id and a target Id.
+   * If desired the edges can be automatically oriented in the positive
+   * direction (source Id < target Id) by setting `canonicalOrientation` to
+   * true.
    *
-   * @example A file in the following format:
+   * @example Loads a file in the following format:
    * {{{
    * # Comment Line
    * # Source Id <\t> Target Id
@@ -62,25 +31,19 @@ object GraphLoader extends Logging {
    * 1    8
    * }}}
    *
-   * If desired the edges can be automatically oriented in the positive
-   * direction (source Id < target Id) by setting `canonicalOrientation` to
-   * true
-   *
    * @param sc
-   * @param path the path to the file (e.g., /Home/data/file or hdfs://file)
+   * @param path the path to the file (e.g., /home/data/file or hdfs://file)
    * @param canonicalOrientation whether to orient edges in the positive
-   *        direction.
+   *        direction
    * @param minEdgePartitions the number of partitions for the
-   *        the Edge RDD
+   *        the edge RDD
    * @tparam ED
-   * @return
    */
   def edgeListFile(
       sc: SparkContext,
       path: String,
       canonicalOrientation: Boolean = false,
-      minEdgePartitions: Int = 1):
-    Graph[Int, Int] = {
+      minEdgePartitions: Int = 1): Graph[Int, Int] = {
     val startTime = System.currentTimeMillis
 
     // Parse the edge data table directly into edge partitions
author	Ankur Dave <ankurdave@gmail.com>	2014-01-10 12:37:20 -0800
committer	Ankur Dave <ankurdave@gmail.com>	2014-01-10 12:37:20 -0800
commit	9454fa1f6c714a720197a6db04a6c369e425e507 (patch)
tree	12b2a8683fc76d97287e73c8a565ad403497d39f
parent	37611e57f6ca1768529e4038198de56f85f90665 (diff)
download	spark-9454fa1f6c714a720197a6db04a6c369e425e507.tar.gz spark-9454fa1f6c714a720197a6db04a6c369e425e507.tar.bz2 spark-9454fa1f6c714a720197a6db04a6c369e425e507.zip