diff options
Diffstat (limited to 'bagel')
-rw-r--r-- | bagel/src/main/scala/spark/bagel/examples/WikipediaPageRank.scala | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRank.scala b/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRank.scala index f37ee01fd2..03843019c0 100644 --- a/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRank.scala +++ b/bagel/src/main/scala/spark/bagel/examples/WikipediaPageRank.scala @@ -8,6 +8,11 @@ import spark.bagel.Bagel._ import scala.xml.{XML,NodeSeq} +/** + * Run PageRank on XML Wikipedia dumps from http://wiki.freebase.com/wiki/WEX. Uses the "articles" + * files from there, which contains one line per wiki article in a tab-separated format + * (http://wiki.freebase.com/wiki/WEX/Documentation#articles). + */ object WikipediaPageRank { def main(args: Array[String]) { if (args.length < 5) { |