From edad598684236d6271ce7853a8312081d15a28a6 Mon Sep 17 00:00:00 2001
From: Justin Ma <jtma@eecs.berkeley.edu>
Date: Mon, 19 Jul 2010 15:03:49 -0700
Subject: Updated Spark to run with latest Mesos build and Scala-2.8.0.final.

---
 src/examples/LocalFileLR.scala       |   2 +-
 src/scala/spark/NexusScheduler.scala |   6 +++---
 third_party/nexus.jar                | Bin 39840 -> 42647 bytes
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/examples/LocalFileLR.scala b/src/examples/LocalFileLR.scala
index 988442755a..3d3bb60677 100644
--- a/src/examples/LocalFileLR.scala
+++ b/src/examples/LocalFileLR.scala
@@ -13,7 +13,7 @@ object LocalFileLR {
   }
 
   def main(args: Array[String]) {
-    val lines = scala.io.Source.fromPath(args(0)).getLines()
+    val lines = scala.io.Source.fromFile(args(0)).getLines()
     val points = lines.map(parsePoint _)
     val ITERATIONS = args(1).toInt
 
diff --git a/src/scala/spark/NexusScheduler.scala b/src/scala/spark/NexusScheduler.scala
index a5343039ef..752df2b15c 100644
--- a/src/scala/spark/NexusScheduler.scala
+++ b/src/scala/spark/NexusScheduler.scala
@@ -84,7 +84,7 @@ extends NScheduler with spark.Scheduler
     }
   }
 
-  override def registered(d: SchedulerDriver, frameworkId: Int) {
+  override def registered(d: SchedulerDriver, frameworkId: String) {
     println("Registered as framework ID " + frameworkId)
     registeredLock.synchronized {
       isRegistered = true
@@ -100,7 +100,7 @@ extends NScheduler with spark.Scheduler
   }
 
   override def resourceOffer(
-      d: SchedulerDriver, oid: Long, offers: SlaveOfferVector) {
+      d: SchedulerDriver, oid: String, offers: SlaveOfferVector) {
     synchronized {
       val tasks = new TaskDescriptionVector
       if (activeOp != null) {
@@ -223,7 +223,7 @@ extends ParallelOperation
         {
           val taskId = sched.newTaskId()
           tidToIndex(taskId) = i
-          printf("Starting task %d as TID %d on slave %d: %s (%s)\n",
+          printf("Starting task %d as TID %s on slave %s: %s (%s)\n",
             i, taskId, offer.getSlaveId, offer.getHost, 
             if(checkPref) "preferred" else "non-preferred")
           tasks(i).markStarted(offer)
diff --git a/third_party/nexus.jar b/third_party/nexus.jar
index 300e2d10a5..735a09c7c5 100644
Binary files a/third_party/nexus.jar and b/third_party/nexus.jar differ
-- 
cgit v1.2.3


From 0435de9e8710ffd2d24a65ef4371529e79d3bf3c Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 19 Jul 2010 18:00:30 -0700
Subject: Made it possible to set various Spark options and environment
 variables in general through a conf/spark-env.sh script.

---
 README | 23 ++++++++++++++++++++++-
 run    | 39 ++++++++++++++++++++++++++-------------
 2 files changed, 48 insertions(+), 14 deletions(-)

diff --git a/README b/README
index e0c7603632..6af34a2294 100644
--- a/README
+++ b/README
@@ -1,4 +1,6 @@
-Spark requires Scala 2.8. This version has been tested with 2.8.0RC3.
+BUILDING
+
+Spark requires Scala 2.8. This version has been tested with 2.8.0.final.
 
 To build and run Spark, you will need to have Scala's bin in your $PATH,
 or you will need to set the SCALA_HOME environment variable to point
@@ -13,3 +15,22 @@ example programs prints usage help if no params are given.
 
 Tip: If you are building Spark and examples repeatedly, export USE_FSC=1
 to have the Makefile use the fsc compiler daemon instead of scalac.
+
+CONFIGURATION
+
+Spark can be configured through two files: conf/java-opts and conf/spark-env.sh.
+
+In java-opts, you can add flags to be passed to the JVM when running Spark.
+
+In spark-env.sh, you can set any environment variables you wish to be available
+when running Spark programs, such as PATH, SCALA_HOME, etc. There are also
+several Spark-specific variables you can set:
+- SPARK_CLASSPATH: Extra entries to be added to the classpath, separated by ":".
+- SPARK_MEM: Memory for Spark to use, in the format used by java's -Xmx option
+             (for example, 200m meams 200 MB, 1g means 1 GB, etc).
+- SPARK_LIBRARY_PATH: Extra entries to add to java.library.path for locating
+                      shared libraries.
+- SPARK_JAVA_OPTS: Extra options to pass to JVM.
+
+Note that spark-env.sh must be a shell script (it must be executable and start
+with a #! header to specify the shell to use).
diff --git a/run b/run
index e6723ccd7c..36fbd9d23d 100755
--- a/run
+++ b/run
@@ -3,26 +3,39 @@
 # Figure out where the Scala framework is installed
 FWDIR=`dirname $0`
 
-# Set JAVA_OPTS to be able to load libnexus.so and set various other misc options
-export JAVA_OPTS="-Djava.library.path=$FWDIR/third_party:$FWDIR/src/native -Xms100m -Xmx750m"
+# Load environment variables from conf/spark-env.sh, if it exists
+if [ -e $FWDIR/conf/spark-env.sh ] ; then
+  . $FWDIR/conf/spark-env.sh
+fi
+
+if [ "$SPARK_MEM" == "" ] ; then
+  SPARK_MEM="200m"
+fi
+
+# Set JAVA_OPTS to be able to load native libraries and to set heap size
+JAVA_OPTS="$SPARK_JAVA_OPTS"
+JAVA_OPTS+=" -Djava.library.path=$SPARK_LIBRARY_PATH:$FWDIR/third_party:$FWDIR/src/native"
+JAVA_OPTS+=" -Xms$SPARK_MEM -Xmx$SPARK_MEM"
+# Load extra JAVA_OPTS from conf/java-opts, if it exists
 if [ -e $FWDIR/conf/java-opts ] ; then
   JAVA_OPTS+=" `cat $FWDIR/conf/java-opts`"
 fi
 export JAVA_OPTS
 
 # Build up classpath
-CLASSPATH=$FWDIR/build/classes
-CLASSPATH+=:$FWDIR/third_party/nexus.jar
-CLASSPATH+=:$FWDIR/third_party/asm-3.2/lib/all/asm-all-3.2.jar
-CLASSPATH+=:$FWDIR/third_party/colt.jar
-CLASSPATH+=:$FWDIR/third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar
-CLASSPATH+=:$FWDIR/third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
-CLASSPATH+=:third_party/scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar
-CLASSPATH+=:third_party/scalacheck_2.8.0.RC3-1.7.jar
+SPARK_CLASSPATH="$SPARK_CLASSPATH:$FWDIR/build/classes"
+SPARK_CLASSPATH+=:$FWDIR/third_party/nexus.jar
+SPARK_CLASSPATH+=:$FWDIR/third_party/asm-3.2/lib/all/asm-all-3.2.jar
+SPARK_CLASSPATH+=:$FWDIR/third_party/colt.jar
+SPARK_CLASSPATH+=:$FWDIR/third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar
+SPARK_CLASSPATH+=:$FWDIR/third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
+SPARK_CLASSPATH+=:third_party/scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar
+SPARK_CLASSPATH+=:third_party/scalacheck_2.8.0.RC3-1.7.jar
 for jar in $FWDIR/third_party/hadoop-0.20.0/lib/*.jar; do
-  CLASSPATH+=:$jar
+  SPARK_CLASSPATH+=:$jar
 done
-export CLASSPATH
+export SPARK_CLASSPATH
+export CLASSPATH=$SPARK_CLASSPATH # Needed for spark-shell
 
 if [ -n "$SCALA_HOME" ]; then
   SCALA=${SCALA_HOME}/bin/scala
@@ -30,4 +43,4 @@ else
   SCALA=scala
 fi
 
-exec $SCALA -cp $CLASSPATH $@
+exec $SCALA -cp $SPARK_CLASSPATH $@
-- 
cgit v1.2.3


From e240e38ee9c0447ad0c94b77f2cb2b3aa6be5814 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 25 Jul 2010 21:10:03 -0400
Subject: Updated a bunch of libraries, and increased the default memory in run
 so that unit tests can run successfully.

---
 Makefile                                           |   6 +-
 run                                                |   8 +-
 third_party/google-collect-1.0-rc5/COPYING         | 202 ---------------------
 .../google-collect-1.0-rc5.jar                     | Bin 577311 -> 0 bytes
 third_party/guava-r06/COPYING                      | 202 +++++++++++++++++++++
 third_party/guava-r06/README                       |  28 +++
 third_party/guava-r06/guava-r06.jar                | Bin 0 -> 934385 bytes
 third_party/scalacheck_2.8.0-1.7.jar               | Bin 0 -> 745883 bytes
 third_party/scalacheck_2.8.0.RC3-1.7.jar           | Bin 745630 -> 0 bytes
 .../scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar | Bin 1784234 -> 0 bytes
 third_party/scalatest-1.2/LICENSE                  | 202 +++++++++++++++++++++
 third_party/scalatest-1.2/NOTICE                   |   7 +
 third_party/scalatest-1.2/README.txt               |  58 ++++++
 third_party/scalatest-1.2/scalatest-1.2.jar        | Bin 0 -> 1784096 bytes
 14 files changed, 504 insertions(+), 209 deletions(-)
 delete mode 100644 third_party/google-collect-1.0-rc5/COPYING
 delete mode 100644 third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar
 create mode 100644 third_party/guava-r06/COPYING
 create mode 100644 third_party/guava-r06/README
 create mode 100644 third_party/guava-r06/guava-r06.jar
 create mode 100644 third_party/scalacheck_2.8.0-1.7.jar
 delete mode 100644 third_party/scalacheck_2.8.0.RC3-1.7.jar
 delete mode 100644 third_party/scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar
 create mode 100644 third_party/scalatest-1.2/LICENSE
 create mode 100644 third_party/scalatest-1.2/NOTICE
 create mode 100644 third_party/scalatest-1.2/README.txt
 create mode 100644 third_party/scalatest-1.2/scalatest-1.2.jar

diff --git a/Makefile b/Makefile
index b5cc065e60..8d5acd665e 100644
--- a/Makefile
+++ b/Makefile
@@ -5,11 +5,11 @@ SPACE = $(EMPTY) $(EMPTY)
 JARS = third_party/nexus.jar
 JARS += third_party/asm-3.2/lib/all/asm-all-3.2.jar
 JARS += third_party/colt.jar
-JARS += third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar
+JARS += third_party/guava-r06/guava-r06.jar
 JARS += third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
 JARS += third_party/hadoop-0.20.0/lib/commons-logging-1.0.4.jar
-JARS += third_party/scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar
-JARS += third_party/scalacheck_2.8.0.RC3-1.7.jar
+JARS += third_party/scalatest-1.2/scalatest-1.2.jar
+JARS += third_party/scalacheck_2.8.0-1.7.jar
 CLASSPATH = $(subst $(SPACE),:,$(JARS))
 
 SCALA_SOURCES =  src/examples/*.scala src/scala/spark/*.scala src/scala/spark/repl/*.scala
diff --git a/run b/run
index 36fbd9d23d..8dca59129f 100755
--- a/run
+++ b/run
@@ -9,7 +9,7 @@ if [ -e $FWDIR/conf/spark-env.sh ] ; then
 fi
 
 if [ "$SPARK_MEM" == "" ] ; then
-  SPARK_MEM="200m"
+  SPARK_MEM="300m"
 fi
 
 # Set JAVA_OPTS to be able to load native libraries and to set heap size
@@ -27,10 +27,10 @@ SPARK_CLASSPATH="$SPARK_CLASSPATH:$FWDIR/build/classes"
 SPARK_CLASSPATH+=:$FWDIR/third_party/nexus.jar
 SPARK_CLASSPATH+=:$FWDIR/third_party/asm-3.2/lib/all/asm-all-3.2.jar
 SPARK_CLASSPATH+=:$FWDIR/third_party/colt.jar
-SPARK_CLASSPATH+=:$FWDIR/third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar
+SPARK_CLASSPATH+=:$FWDIR/third_party/guava-r06/guava-r06.jar
 SPARK_CLASSPATH+=:$FWDIR/third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
-SPARK_CLASSPATH+=:third_party/scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar
-SPARK_CLASSPATH+=:third_party/scalacheck_2.8.0.RC3-1.7.jar
+SPARK_CLASSPATH+=:third_party/scalatest-1.2/scalatest-1.2.jar
+SPARK_CLASSPATH+=:third_party/scalacheck_2.8.0-1.7.jar
 for jar in $FWDIR/third_party/hadoop-0.20.0/lib/*.jar; do
   SPARK_CLASSPATH+=:$jar
 done
diff --git a/third_party/google-collect-1.0-rc5/COPYING b/third_party/google-collect-1.0-rc5/COPYING
deleted file mode 100644
index d645695673..0000000000
--- a/third_party/google-collect-1.0-rc5/COPYING
+++ /dev/null
@@ -1,202 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar b/third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar
deleted file mode 100644
index 36aa65a069..0000000000
Binary files a/third_party/google-collect-1.0-rc5/google-collect-1.0-rc5.jar and /dev/null differ
diff --git a/third_party/guava-r06/COPYING b/third_party/guava-r06/COPYING
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/third_party/guava-r06/COPYING
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/third_party/guava-r06/README b/third_party/guava-r06/README
new file mode 100644
index 0000000000..a0e832dd54
--- /dev/null
+++ b/third_party/guava-r06/README
@@ -0,0 +1,28 @@
+Guava: Google Core Libraries for Java
+
+Requires JDK 5 or higher.
+
+Project page:
+  http://guava-libraries.googlecode.com
+
+Ask "how-to" and "why-didn't-it-work" questions at:
+  http://www.stackoverflow.com/questions/ask
+  (use the "guava" tag so we'll see it)
+
+Ask discussion questions at:
+  http://groups.google.com/group/guava-discuss
+
+Subscribe to project updates in your feed reader:
+  http://code.google.com/feeds/p/guava-libraries/updates/basic
+
+Warnings:
+
+All APIs marked @Beta at the class or method level are subject to
+change. If your code is a library or framework that users outside
+your control will include on their classpath, do not use @Beta
+APIs (at least without repackaging them somehow).
+
+Serialized forms of ALL objects are subject to change. Do not
+persist these and assume they can be read by a future version of
+the library.
+
diff --git a/third_party/guava-r06/guava-r06.jar b/third_party/guava-r06/guava-r06.jar
new file mode 100644
index 0000000000..8ff3a81748
Binary files /dev/null and b/third_party/guava-r06/guava-r06.jar differ
diff --git a/third_party/scalacheck_2.8.0-1.7.jar b/third_party/scalacheck_2.8.0-1.7.jar
new file mode 100644
index 0000000000..fb3c0e9e12
Binary files /dev/null and b/third_party/scalacheck_2.8.0-1.7.jar differ
diff --git a/third_party/scalacheck_2.8.0.RC3-1.7.jar b/third_party/scalacheck_2.8.0.RC3-1.7.jar
deleted file mode 100644
index ac9687fc00..0000000000
Binary files a/third_party/scalacheck_2.8.0.RC3-1.7.jar and /dev/null differ
diff --git a/third_party/scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar b/third_party/scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar
deleted file mode 100644
index f5c4af19b0..0000000000
Binary files a/third_party/scalatest-1.2-for-scala-2.8.0.RC3-SNAPSHOT.jar and /dev/null differ
diff --git a/third_party/scalatest-1.2/LICENSE b/third_party/scalatest-1.2/LICENSE
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/third_party/scalatest-1.2/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/third_party/scalatest-1.2/NOTICE b/third_party/scalatest-1.2/NOTICE
new file mode 100644
index 0000000000..a405cbd58a
--- /dev/null
+++ b/third_party/scalatest-1.2/NOTICE
@@ -0,0 +1,7 @@
+================================================================================
+==    NOTICE file corresponding to section 4(d) of the Apache License,        ==
+==    Version 2.0, in this case for the ScalaTest distribution.               ==
+================================================================================
+
+  - This product includes software developed by
+    Artima, Inc. (http://www.artima.com/).
diff --git a/third_party/scalatest-1.2/README.txt b/third_party/scalatest-1.2/README.txt
new file mode 100644
index 0000000000..d505b9c640
--- /dev/null
+++ b/third_party/scalatest-1.2/README.txt
@@ -0,0 +1,58 @@
+ScalaTest 1.0
+
+ScalaTest is a free, open-source testing toolkit for Scala and
+Java programmers.  Because different developers take different approaches to creating
+software, no single approach to testing is a good fit for everyone. In light of
+this reality, ScalaTest is designed to facilitate different styles of testing. ScalaTest
+provides several traits that you can mix together into whatever combination makes you feel the most productive.
+For some examples of the various styles that ScalaTest supports, see:
+
+http://www.artima.com/scalatest
+
+GETTING STARTED
+
+To learn how to use ScalaTest, please
+open in your browser the scaladoc documentation in the
+/scalatest-1.0/doc directory. Look first at the documentation for trait
+org.scalatest.Suite, which gives a decent intro. All the other types are
+documented as well, so you can hop around to learn more.
+org.scalatest.tools.Runner explains how to use the application. The
+Ignore class is written in Java, and isn't currently shown in the Scaladoc.
+
+To try it out, you can use ScalaTest to run its own tests, i.e., the tests
+used to test ScalaTest itself. This command will run the GUI:
+
+scala -classpath scalatest-1.0.jar org.scalatest.tools.Runner -p "scalatest-1.0-tests.jar" -g -s org.scalatest.SuiteSuite
+
+This command will run and just print results to the standard output:
+
+scala -classpath scalatest-1.0.jar org.scalatest.tools.Runner -p "scalatest-1.0-tests.jar" -o -s org.scalatest.SuiteSuite
+
+ScalaTest 1.0 was tested with Scala version 2.7.5.final, so it is not
+guaranteed to work with earlier Scala versions.
+
+ABOUT SCALATEST
+
+ScalaTest was written by Bill Venners, George Berger, Josh Cough, and
+other contributors starting in late 2007.  ScalaTest, which is almost
+exclusively written in Scala, follows and improves upon the Java code
+and design of Artima SuiteRunner, a testing tool also written
+primarily by Bill Venners, starting in 2001. Over the years a few
+other people contributed to SuiteRunner as well, including:
+
+Mark Brouwer
+Chua Chee Seng
+Chris Daily
+Matt Gerrans
+John Mitchel
+Frank Sommers
+
+Several people have helped with ScalaTest, including:
+
+Corey Haines
+Colin Howe
+Dianne Marsh
+Joel Neely
+Jon-Anders Teigen
+Daniel Watson
+
diff --git a/third_party/scalatest-1.2/scalatest-1.2.jar b/third_party/scalatest-1.2/scalatest-1.2.jar
new file mode 100644
index 0000000000..cb8db9bdf5
Binary files /dev/null and b/third_party/scalatest-1.2/scalatest-1.2.jar differ
-- 
cgit v1.2.3


From 4239f76997df82242cbe69192d9525014b7da5f9 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 25 Jul 2010 23:46:44 -0400
Subject: Removed Matei's old start on broadcast code

---
 src/scala/ubiquifs/Header.scala   |  21 ------
 src/scala/ubiquifs/Master.scala   |  49 -------------
 src/scala/ubiquifs/Message.scala  |  14 ----
 src/scala/ubiquifs/Slave.scala    | 141 --------------------------------------
 src/scala/ubiquifs/UbiquiFS.scala |  11 ---
 src/scala/ubiquifs/Utils.scala    |  12 ----
 6 files changed, 248 deletions(-)
 delete mode 100644 src/scala/ubiquifs/Header.scala
 delete mode 100644 src/scala/ubiquifs/Master.scala
 delete mode 100644 src/scala/ubiquifs/Message.scala
 delete mode 100644 src/scala/ubiquifs/Slave.scala
 delete mode 100644 src/scala/ubiquifs/UbiquiFS.scala
 delete mode 100644 src/scala/ubiquifs/Utils.scala

diff --git a/src/scala/ubiquifs/Header.scala b/src/scala/ubiquifs/Header.scala
deleted file mode 100644
index bdca83a2d5..0000000000
--- a/src/scala/ubiquifs/Header.scala
+++ /dev/null
@@ -1,21 +0,0 @@
-package ubiquifs
-
-import java.io.{DataInputStream, DataOutputStream}
-
-object RequestType {
-  val READ = 0
-  val WRITE = 1
-}
-
-class Header(val requestType: Int, val path: String) {
-  def write(out: DataOutputStream) {
-    out.write(requestType)
-    out.writeUTF(path)
-  }
-}
-
-object Header {
-  def read(in: DataInputStream): Header = {
-    new Header(in.read(), in.readUTF())
-  }
-}
diff --git a/src/scala/ubiquifs/Master.scala b/src/scala/ubiquifs/Master.scala
deleted file mode 100644
index 6854acd6a5..0000000000
--- a/src/scala/ubiquifs/Master.scala
+++ /dev/null
@@ -1,49 +0,0 @@
-package ubiquifs
-
-import scala.actors.Actor
-import scala.actors.Actor._
-import scala.actors.remote.RemoteActor
-import scala.actors.remote.RemoteActor._
-import scala.actors.remote.Node
-import scala.collection.mutable.{ArrayBuffer, Map, Set}
-
-class Master(port: Int) extends Actor {
-  case class SlaveInfo(host: String, port: Int)
-
-  val files = Set[String]()
-  val slaves = new ArrayBuffer[SlaveInfo]()
-
-  def act() {
-    alive(port)
-    register('UbiquiFS, self)
-    println("Created UbiquiFS Master on port " + port)
-
-    loop {
-      react {
-        case RegisterSlave(host, port) =>
-          slaves += SlaveInfo(host, port)
-          sender ! RegisterSucceeded()
-
-        case Create(path) =>
-          if (files.contains(path)) {
-            sender ! CreateFailed("File already exists")
-          } else if (slaves.isEmpty) {
-            sender ! CreateFailed("No slaves registered")
-          } else {
-            files += path
-            sender ! CreateSucceeded(slaves(0).host, slaves(0).port)
-          }
-            
-        case m: Any =>
-          println("Unknown message: " + m)
-      }
-    }
-  }
-}
-
-object MasterMain {
-  def main(args: Array[String]) {
-    val port = args(0).toInt
-    new Master(port).start()
-  }
-}
diff --git a/src/scala/ubiquifs/Message.scala b/src/scala/ubiquifs/Message.scala
deleted file mode 100644
index 153542f8de..0000000000
--- a/src/scala/ubiquifs/Message.scala
+++ /dev/null
@@ -1,14 +0,0 @@
-package ubiquifs
-
-sealed case class Message()
-
-case class RegisterSlave(host: String, port: Int) extends Message
-case class RegisterSucceeded() extends Message
-
-case class Create(path: String) extends Message
-case class CreateSucceeded(host: String, port: Int) extends Message
-case class CreateFailed(message: String) extends Message
-
-case class Read(path: String) extends Message
-case class ReadSucceeded(host: String, port: Int) extends Message
-case class ReadFailed(message: String) extends Message
diff --git a/src/scala/ubiquifs/Slave.scala b/src/scala/ubiquifs/Slave.scala
deleted file mode 100644
index 328b73c828..0000000000
--- a/src/scala/ubiquifs/Slave.scala
+++ /dev/null
@@ -1,141 +0,0 @@
-package ubiquifs
-
-import java.io.{DataInputStream, DataOutputStream, IOException}
-import java.net.{InetAddress, Socket, ServerSocket}
-import java.util.concurrent.locks.ReentrantLock
-
-import scala.actors.Actor
-import scala.actors.Actor._
-import scala.actors.remote.RemoteActor
-import scala.actors.remote.RemoteActor._
-import scala.actors.remote.Node
-import scala.collection.mutable.{ArrayBuffer, Map, Set}
-
-class Slave(myPort: Int, master: String) extends Thread("UbiquiFS slave") {
-  val CHUNK_SIZE = 1024 * 1024
-
-  val buffers = Map[String, Buffer]()
-
-  override def run() {
-    // Create server socket
-    val socket = new ServerSocket(myPort)
-
-    // Register with master
-    val (masterHost, masterPort) = Utils.parseHostPort(master)
-    val masterActor = select(Node(masterHost, masterPort), 'UbiquiFS)
-    val myHost = InetAddress.getLocalHost.getHostName
-    val reply = masterActor !? RegisterSlave(myHost, myPort)
-    println("Registered with master, reply = " + reply)
-
-    while (true) {
-      val conn = socket.accept()
-      new ConnectionHandler(conn).start()
-    }
-  }
-
-  class ConnectionHandler(conn: Socket) extends Thread("ConnectionHandler") {
-    try {
-      val in = new DataInputStream(conn.getInputStream)
-      val out = new DataOutputStream(conn.getOutputStream)
-      val header = Header.read(in)
-      header.requestType match {
-        case RequestType.READ =>
-          performRead(header.path, out)
-        case RequestType.WRITE =>
-          performWrite(header.path, in)
-        case other =>
-          throw new IOException("Invalid header type " + other)
-      }
-      println("hi")
-    } catch {
-      case e: Exception => e.printStackTrace()
-    } finally {
-      conn.close()
-    }
-  }
-
-  def performWrite(path: String, in: DataInputStream) {
-    var buffer = new Buffer()
-    synchronized {
-      if (buffers.contains(path))
-        throw new IllegalArgumentException("Path " + path + " already exists")
-      buffers(path) = buffer
-    }
-    var chunk = new Array[Byte](CHUNK_SIZE)
-    var pos = 0
-    while (true) {
-      var numRead = in.read(chunk, pos, chunk.size - pos)
-      if (numRead == -1) {
-        buffer.addChunk(chunk.subArray(0, pos), true)
-        return
-      } else {
-        pos += numRead
-        if (pos == chunk.size) {
-          buffer.addChunk(chunk, false)
-          chunk = new Array[Byte](CHUNK_SIZE)
-          pos = 0
-        }
-      }
-    }
-    // TODO: launch a thread to write the data to disk, and when this finishes,
-    // remove the hard reference to buffer
-  }
-
-  def performRead(path: String, out: DataOutputStream) {
-    var buffer: Buffer = null
-    synchronized {
-      if (!buffers.contains(path))
-        throw new IllegalArgumentException("Path " + path + " doesn't exist")
-      buffer = buffers(path)
-    }
-    for (chunk <- buffer.iterator) {
-      out.write(chunk, 0, chunk.size)
-    }
-  }
-
-  class Buffer {
-    val chunks = new ArrayBuffer[Array[Byte]]
-    var finished = false
-    val mutex = new ReentrantLock
-    val chunksAvailable = mutex.newCondition()
-
-    def addChunk(chunk: Array[Byte], finish: Boolean) {
-      mutex.lock()
-      chunks += chunk
-      finished = finish
-      chunksAvailable.signalAll()
-      mutex.unlock()
-    }
-
-    def iterator = new Iterator[Array[Byte]] {
-      var index = 0
-
-      def hasNext: Boolean = {
-        mutex.lock()
-        while (index >= chunks.size && !finished)
-          chunksAvailable.await()
-        val ret = (index < chunks.size)
-        mutex.unlock()
-        return ret
-      }
-
-      def next: Array[Byte] = {
-        mutex.lock()
-        if (!hasNext)
-          throw new NoSuchElementException("End of file")
-        val ret = chunks(index) // hasNext ensures we advance past index
-        index += 1
-        mutex.unlock()
-        return ret
-      }
-    }
-  }
-}
-
-object SlaveMain {
-  def main(args: Array[String]) {
-    val port = args(0).toInt
-    val master = args(1)
-    new Slave(port, master).start()
-  }
-}
diff --git a/src/scala/ubiquifs/UbiquiFS.scala b/src/scala/ubiquifs/UbiquiFS.scala
deleted file mode 100644
index 9ce0fd4f44..0000000000
--- a/src/scala/ubiquifs/UbiquiFS.scala
+++ /dev/null
@@ -1,11 +0,0 @@
-package ubiquifs
-
-import java.io.{InputStream, OutputStream}
-
-class UbiquiFS(master: String) {
-  private val (masterHost, masterPort) = Utils.parseHostPort(master)
-
-  def create(path: String): OutputStream = null
-
-  def open(path: String): InputStream = null
-}
diff --git a/src/scala/ubiquifs/Utils.scala b/src/scala/ubiquifs/Utils.scala
deleted file mode 100644
index d6fd3f0181..0000000000
--- a/src/scala/ubiquifs/Utils.scala
+++ /dev/null
@@ -1,12 +0,0 @@
-package ubiquifs
-
-private[ubiquifs] object Utils {
-  private val HOST_PORT_RE = "([a-zA-Z0-9.-]+):([0-9]+)".r
-
-  def parseHostPort(string: String): (String, Int) = {
-    string match {
-      case HOST_PORT_RE(host, port) => (host, port.toInt)
-      case _ => throw new IllegalArgumentException(string)
-    }
-  }
-}
-- 
cgit v1.2.3


From b56ed67553a946b2f4e94dab33382b8de9957a5b Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 25 Jul 2010 23:53:46 -0400
Subject: Updated code to work with Nexus->Mesos name change

---
 Makefile                             |   2 +-
 run                                  |   2 +-
 src/scala/spark/Executor.scala       |  10 +-
 src/scala/spark/HdfsFile.scala       |   2 +-
 src/scala/spark/MesosScheduler.scala | 290 +++++++++++++++++++++++++++++++++++
 src/scala/spark/NexusScheduler.scala | 290 -----------------------------------
 src/scala/spark/ParallelArray.scala  |   2 +-
 src/scala/spark/RDD.scala            |   2 +-
 src/scala/spark/SparkContext.scala   |   4 +-
 src/scala/spark/Task.scala           |   2 +-
 third_party/mesos.jar                | Bin 0 -> 43915 bytes
 third_party/nexus.jar                | Bin 42647 -> 0 bytes
 12 files changed, 303 insertions(+), 303 deletions(-)
 create mode 100644 src/scala/spark/MesosScheduler.scala
 delete mode 100644 src/scala/spark/NexusScheduler.scala
 create mode 100644 third_party/mesos.jar
 delete mode 100644 third_party/nexus.jar

diff --git a/Makefile b/Makefile
index 8d5acd665e..9c9ebb6a82 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ EMPTY =
 SPACE = $(EMPTY) $(EMPTY)
 
 # Build up classpath by concatenating some strings
-JARS = third_party/nexus.jar
+JARS = third_party/mesos.jar
 JARS += third_party/asm-3.2/lib/all/asm-all-3.2.jar
 JARS += third_party/colt.jar
 JARS += third_party/guava-r06/guava-r06.jar
diff --git a/run b/run
index 8dca59129f..7353311522 100755
--- a/run
+++ b/run
@@ -24,7 +24,7 @@ export JAVA_OPTS
 
 # Build up classpath
 SPARK_CLASSPATH="$SPARK_CLASSPATH:$FWDIR/build/classes"
-SPARK_CLASSPATH+=:$FWDIR/third_party/nexus.jar
+SPARK_CLASSPATH+=:$FWDIR/third_party/mesos.jar
 SPARK_CLASSPATH+=:$FWDIR/third_party/asm-3.2/lib/all/asm-all-3.2.jar
 SPARK_CLASSPATH+=:$FWDIR/third_party/colt.jar
 SPARK_CLASSPATH+=:$FWDIR/third_party/guava-r06/guava-r06.jar
diff --git a/src/scala/spark/Executor.scala b/src/scala/spark/Executor.scala
index 679a61f3c0..68f4cb8aae 100644
--- a/src/scala/spark/Executor.scala
+++ b/src/scala/spark/Executor.scala
@@ -2,14 +2,14 @@ package spark
 
 import java.util.concurrent.{Executors, ExecutorService}
 
-import nexus.{ExecutorArgs, ExecutorDriver, NexusExecutorDriver}
-import nexus.{TaskDescription, TaskState, TaskStatus}
+import mesos.{ExecutorArgs, ExecutorDriver, MesosExecutorDriver}
+import mesos.{TaskDescription, TaskState, TaskStatus}
 
 object Executor {
   def main(args: Array[String]) {
-    System.loadLibrary("nexus")
+    System.loadLibrary("mesos")
 
-    val exec = new nexus.Executor() {
+    val exec = new mesos.Executor() {
       var classLoader: ClassLoader = null
       var threadPool: ExecutorService = null
 
@@ -66,6 +66,6 @@ object Executor {
       }
     }
 
-    new NexusExecutorDriver(exec).run()
+    new MesosExecutorDriver(exec).run()
   }
 }
diff --git a/src/scala/spark/HdfsFile.scala b/src/scala/spark/HdfsFile.scala
index 6aa0e22338..e6c693c67c 100644
--- a/src/scala/spark/HdfsFile.scala
+++ b/src/scala/spark/HdfsFile.scala
@@ -1,6 +1,6 @@
 package spark
 
-import nexus.SlaveOffer
+import mesos.SlaveOffer
 
 import org.apache.hadoop.io.LongWritable
 import org.apache.hadoop.io.Text
diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
new file mode 100644
index 0000000000..081f720bbd
--- /dev/null
+++ b/src/scala/spark/MesosScheduler.scala
@@ -0,0 +1,290 @@
+package spark
+
+import java.io.File
+
+import scala.collection.mutable.Map
+
+import mesos.{Scheduler => NScheduler}
+import mesos._
+
+// The main Scheduler implementation, which talks to Mesos. Clients are expected
+// to first call start(), then submit tasks through the runTasks method.
+//
+// This implementation is currently a little quick and dirty. The following
+// improvements need to be made to it:
+// 1) Right now, the scheduler uses a linear scan through the tasks to find a
+//    local one for a given node. It would be faster to have a separate list of
+//    pending tasks for each node.
+// 2) Presenting a single slave in ParallelOperation.slaveOffer makes it
+//    difficult to balance tasks across nodes. It would be better to pass
+//    all the offers to the ParallelOperation and have it load-balance.
+private class MesosScheduler(
+  master: String, frameworkName: String, execArg: Array[Byte])
+extends NScheduler with spark.Scheduler
+{
+  // Lock used by runTasks to ensure only one thread can be in it
+  val runTasksMutex = new Object()
+
+  // Lock used to wait for  scheduler to be registered
+  var isRegistered = false
+  val registeredLock = new Object()
+
+  // Current callback object (may be null)
+  var activeOp: ParallelOperation = null
+
+  // Incrementing task ID
+  private var nextTaskId = 0
+
+  def newTaskId(): Int = {
+    val id = nextTaskId;
+    nextTaskId += 1;
+    return id
+  }
+
+  // Driver for talking to Mesos
+  var driver: SchedulerDriver = null
+  
+  override def start() {
+    new Thread("Spark scheduler") {
+      setDaemon(true)
+      override def run {
+        val ns = MesosScheduler.this
+        ns.driver = new MesosSchedulerDriver(ns, master)
+        ns.driver.run()
+      }
+    }.start
+  }
+
+  override def getFrameworkName(d: SchedulerDriver): String = frameworkName
+  
+  override def getExecutorInfo(d: SchedulerDriver): ExecutorInfo =
+    new ExecutorInfo(new File("spark-executor").getCanonicalPath(), execArg)
+
+  override def runTasks[T: ClassManifest](tasks: Array[Task[T]]): Array[T] = {
+    runTasksMutex.synchronized {
+      waitForRegister()
+      val myOp = new SimpleParallelOperation(this, tasks)
+
+      try {
+        this.synchronized {
+          this.activeOp = myOp
+        }
+        driver.reviveOffers();
+        myOp.join();
+      } finally {
+        this.synchronized {
+          this.activeOp = null
+        }
+      }
+
+      if (myOp.errorHappened)
+        throw new SparkException(myOp.errorMessage, myOp.errorCode)
+      else
+        return myOp.results
+    }
+  }
+
+  override def registered(d: SchedulerDriver, frameworkId: String) {
+    println("Registered as framework ID " + frameworkId)
+    registeredLock.synchronized {
+      isRegistered = true
+      registeredLock.notifyAll()
+    }
+  }
+  
+  override def waitForRegister() {
+    registeredLock.synchronized {
+      while (!isRegistered)
+        registeredLock.wait()
+    }
+  }
+
+  override def resourceOffer(
+      d: SchedulerDriver, oid: String, offers: SlaveOfferVector) {
+    synchronized {
+      val tasks = new TaskDescriptionVector
+      if (activeOp != null) {
+        try {
+          for (i <- 0 until offers.size.toInt) {
+            activeOp.slaveOffer(offers.get(i)) match {
+              case Some(task) => tasks.add(task)
+              case None => {}
+            }
+          }
+        } catch {
+          case e: Exception => e.printStackTrace
+        }
+      }  
+      val params = new StringMap
+      params.set("timeout", "1")
+      d.replyToOffer(oid, tasks, params) // TODO: use smaller timeout
+    }
+  }
+
+  override def statusUpdate(d: SchedulerDriver, status: TaskStatus) {
+    synchronized {
+      try {
+        if (activeOp != null) {
+          activeOp.statusUpdate(status)
+        }
+      } catch {
+        case e: Exception => e.printStackTrace
+      }
+    }
+  }
+
+  override def error(d: SchedulerDriver, code: Int, message: String) {
+    synchronized {
+      if (activeOp != null) {
+        try {
+          activeOp.error(code, message)
+        } catch {
+          case e: Exception => e.printStackTrace
+        }
+      } else {
+        val msg = "Mesos error: %s (error code: %d)".format(message, code)
+        System.err.println(msg)
+        System.exit(1)
+      }
+    }
+  }
+
+  override def stop() {
+    if (driver != null)
+      driver.stop()
+  }
+}
+
+
+// Trait representing an object that manages a parallel operation by
+// implementing various scheduler callbacks.
+trait ParallelOperation {
+  def slaveOffer(s: SlaveOffer): Option[TaskDescription]
+  def statusUpdate(t: TaskStatus): Unit
+  def error(code: Int, message: String): Unit
+}
+
+
+class SimpleParallelOperation[T: ClassManifest](
+  sched: MesosScheduler, tasks: Array[Task[T]])
+extends ParallelOperation
+{
+  // Maximum time to wait to run a task in a preferred location (in ms)
+  val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "1000").toLong
+
+  val callingThread = currentThread
+  val numTasks = tasks.length
+  val results = new Array[T](numTasks)
+  val launched = new Array[Boolean](numTasks)
+  val finished = new Array[Boolean](numTasks)
+  val tidToIndex = Map[Int, Int]()
+
+  var allFinished = false
+  val joinLock = new Object()
+
+  var errorHappened = false
+  var errorCode = 0
+  var errorMessage = ""
+
+  var tasksLaunched = 0
+  var tasksFinished = 0
+  var lastPreferredLaunchTime = System.currentTimeMillis
+
+  def setAllFinished() {
+    joinLock.synchronized {
+      allFinished = true
+      joinLock.notifyAll()
+    }
+  }
+
+  def join() {
+    joinLock.synchronized {
+      while (!allFinished)
+        joinLock.wait()
+    }
+  }
+
+  def slaveOffer(offer: SlaveOffer): Option[TaskDescription] = {
+    if (tasksLaunched < numTasks) {
+      var checkPrefVals: Array[Boolean] = Array(true)
+      val time = System.currentTimeMillis
+      if (time - lastPreferredLaunchTime > LOCALITY_WAIT)
+        checkPrefVals = Array(true, false) // Allow non-preferred tasks
+      // TODO: Make desiredCpus and desiredMem configurable
+      val desiredCpus = 1
+      val desiredMem = 750L * 1024L * 1024L
+      if (offer.getParams.get("cpus").toInt < desiredCpus || 
+          offer.getParams.get("mem").toLong < desiredMem)
+        return None
+      for (checkPref <- checkPrefVals; i <- 0 until numTasks) {
+        if (!launched(i) && (!checkPref ||
+            tasks(i).preferredLocations.contains(offer.getHost) ||
+            tasks(i).preferredLocations.isEmpty))
+        {
+          val taskId = sched.newTaskId()
+          tidToIndex(taskId) = i
+          printf("Starting task %d as TID %s on slave %s: %s (%s)\n",
+            i, taskId, offer.getSlaveId, offer.getHost, 
+            if(checkPref) "preferred" else "non-preferred")
+          tasks(i).markStarted(offer)
+          launched(i) = true
+          tasksLaunched += 1
+          if (checkPref)
+            lastPreferredLaunchTime = time
+          val params = new StringMap
+          params.set("cpus", "" + desiredCpus)
+          params.set("mem", "" + desiredMem)
+          val serializedTask = Utils.serialize(tasks(i))
+          return Some(new TaskDescription(taskId, offer.getSlaveId,
+            "task_" + taskId, params, serializedTask))
+        }
+      }
+    }
+    return None
+  }
+
+  def statusUpdate(status: TaskStatus) {
+    status.getState match {
+      case TaskState.TASK_FINISHED =>
+        taskFinished(status)
+      case TaskState.TASK_LOST =>
+        taskLost(status)
+      case TaskState.TASK_FAILED =>
+        taskLost(status)
+      case TaskState.TASK_KILLED =>
+        taskLost(status)
+      case _ =>
+    }
+  }
+
+  def taskFinished(status: TaskStatus) {
+    val tid = status.getTaskId
+    println("Finished TID " + tid)
+    // Deserialize task result
+    val result = Utils.deserialize[TaskResult[T]](status.getData)
+    results(tidToIndex(tid)) = result.value
+    // Update accumulators
+    Accumulators.add(callingThread, result.accumUpdates)
+    // Mark finished and stop if we've finished all the tasks
+    finished(tidToIndex(tid)) = true
+    tasksFinished += 1
+    if (tasksFinished == numTasks)
+      setAllFinished()
+  }
+
+  def taskLost(status: TaskStatus) {
+    val tid = status.getTaskId
+    println("Lost TID " + tid)
+    launched(tidToIndex(tid)) = false
+    tasksLaunched -= 1
+  }
+
+  def error(code: Int, message: String) {
+    // Save the error message
+    errorHappened = true
+    errorCode = code
+    errorMessage = message
+    // Indicate to caller thread that we're done
+    setAllFinished()
+  }
+}
diff --git a/src/scala/spark/NexusScheduler.scala b/src/scala/spark/NexusScheduler.scala
deleted file mode 100644
index 752df2b15c..0000000000
--- a/src/scala/spark/NexusScheduler.scala
+++ /dev/null
@@ -1,290 +0,0 @@
-package spark
-
-import java.io.File
-
-import scala.collection.mutable.Map
-
-import nexus.{Scheduler => NScheduler}
-import nexus._
-
-// The main Scheduler implementation, which talks to Nexus. Clients are expected
-// to first call start(), then submit tasks through the runTasks method.
-//
-// This implementation is currently a little quick and dirty. The following
-// improvements need to be made to it:
-// 1) Right now, the scheduler uses a linear scan through the tasks to find a
-//    local one for a given node. It would be faster to have a separate list of
-//    pending tasks for each node.
-// 2) Presenting a single slave in ParallelOperation.slaveOffer makes it
-//    difficult to balance tasks across nodes. It would be better to pass
-//    all the offers to the ParallelOperation and have it load-balance.
-private class NexusScheduler(
-  master: String, frameworkName: String, execArg: Array[Byte])
-extends NScheduler with spark.Scheduler
-{
-  // Lock used by runTasks to ensure only one thread can be in it
-  val runTasksMutex = new Object()
-
-  // Lock used to wait for  scheduler to be registered
-  var isRegistered = false
-  val registeredLock = new Object()
-
-  // Current callback object (may be null)
-  var activeOp: ParallelOperation = null
-
-  // Incrementing task ID
-  private var nextTaskId = 0
-
-  def newTaskId(): Int = {
-    val id = nextTaskId;
-    nextTaskId += 1;
-    return id
-  }
-
-  // Driver for talking to Nexus
-  var driver: SchedulerDriver = null
-  
-  override def start() {
-    new Thread("Spark scheduler") {
-      setDaemon(true)
-      override def run {
-        val ns = NexusScheduler.this
-        ns.driver = new NexusSchedulerDriver(ns, master)
-        ns.driver.run()
-      }
-    }.start
-  }
-
-  override def getFrameworkName(d: SchedulerDriver): String = frameworkName
-  
-  override def getExecutorInfo(d: SchedulerDriver): ExecutorInfo =
-    new ExecutorInfo(new File("spark-executor").getCanonicalPath(), execArg)
-
-  override def runTasks[T: ClassManifest](tasks: Array[Task[T]]): Array[T] = {
-    runTasksMutex.synchronized {
-      waitForRegister()
-      val myOp = new SimpleParallelOperation(this, tasks)
-
-      try {
-        this.synchronized {
-          this.activeOp = myOp
-        }
-        driver.reviveOffers();
-        myOp.join();
-      } finally {
-        this.synchronized {
-          this.activeOp = null
-        }
-      }
-
-      if (myOp.errorHappened)
-        throw new SparkException(myOp.errorMessage, myOp.errorCode)
-      else
-        return myOp.results
-    }
-  }
-
-  override def registered(d: SchedulerDriver, frameworkId: String) {
-    println("Registered as framework ID " + frameworkId)
-    registeredLock.synchronized {
-      isRegistered = true
-      registeredLock.notifyAll()
-    }
-  }
-  
-  override def waitForRegister() {
-    registeredLock.synchronized {
-      while (!isRegistered)
-        registeredLock.wait()
-    }
-  }
-
-  override def resourceOffer(
-      d: SchedulerDriver, oid: String, offers: SlaveOfferVector) {
-    synchronized {
-      val tasks = new TaskDescriptionVector
-      if (activeOp != null) {
-        try {
-          for (i <- 0 until offers.size.toInt) {
-            activeOp.slaveOffer(offers.get(i)) match {
-              case Some(task) => tasks.add(task)
-              case None => {}
-            }
-          }
-        } catch {
-          case e: Exception => e.printStackTrace
-        }
-      }  
-      val params = new StringMap
-      params.set("timeout", "1")
-      d.replyToOffer(oid, tasks, params) // TODO: use smaller timeout
-    }
-  }
-
-  override def statusUpdate(d: SchedulerDriver, status: TaskStatus) {
-    synchronized {
-      try {
-        if (activeOp != null) {
-          activeOp.statusUpdate(status)
-        }
-      } catch {
-        case e: Exception => e.printStackTrace
-      }
-    }
-  }
-
-  override def error(d: SchedulerDriver, code: Int, message: String) {
-    synchronized {
-      if (activeOp != null) {
-        try {
-          activeOp.error(code, message)
-        } catch {
-          case e: Exception => e.printStackTrace
-        }
-      } else {
-        val msg = "Nexus error: %s (error code: %d)".format(message, code)
-        System.err.println(msg)
-        System.exit(1)
-      }
-    }
-  }
-
-  override def stop() {
-    if (driver != null)
-      driver.stop()
-  }
-}
-
-
-// Trait representing an object that manages a parallel operation by
-// implementing various scheduler callbacks.
-trait ParallelOperation {
-  def slaveOffer(s: SlaveOffer): Option[TaskDescription]
-  def statusUpdate(t: TaskStatus): Unit
-  def error(code: Int, message: String): Unit
-}
-
-
-class SimpleParallelOperation[T: ClassManifest](
-  sched: NexusScheduler, tasks: Array[Task[T]])
-extends ParallelOperation
-{
-  // Maximum time to wait to run a task in a preferred location (in ms)
-  val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "1000").toLong
-
-  val callingThread = currentThread
-  val numTasks = tasks.length
-  val results = new Array[T](numTasks)
-  val launched = new Array[Boolean](numTasks)
-  val finished = new Array[Boolean](numTasks)
-  val tidToIndex = Map[Int, Int]()
-
-  var allFinished = false
-  val joinLock = new Object()
-
-  var errorHappened = false
-  var errorCode = 0
-  var errorMessage = ""
-
-  var tasksLaunched = 0
-  var tasksFinished = 0
-  var lastPreferredLaunchTime = System.currentTimeMillis
-
-  def setAllFinished() {
-    joinLock.synchronized {
-      allFinished = true
-      joinLock.notifyAll()
-    }
-  }
-
-  def join() {
-    joinLock.synchronized {
-      while (!allFinished)
-        joinLock.wait()
-    }
-  }
-
-  def slaveOffer(offer: SlaveOffer): Option[TaskDescription] = {
-    if (tasksLaunched < numTasks) {
-      var checkPrefVals: Array[Boolean] = Array(true)
-      val time = System.currentTimeMillis
-      if (time - lastPreferredLaunchTime > LOCALITY_WAIT)
-        checkPrefVals = Array(true, false) // Allow non-preferred tasks
-      // TODO: Make desiredCpus and desiredMem configurable
-      val desiredCpus = 1
-      val desiredMem = 750L * 1024L * 1024L
-      if (offer.getParams.get("cpus").toInt < desiredCpus || 
-          offer.getParams.get("mem").toLong < desiredMem)
-        return None
-      for (checkPref <- checkPrefVals; i <- 0 until numTasks) {
-        if (!launched(i) && (!checkPref ||
-            tasks(i).preferredLocations.contains(offer.getHost) ||
-            tasks(i).preferredLocations.isEmpty))
-        {
-          val taskId = sched.newTaskId()
-          tidToIndex(taskId) = i
-          printf("Starting task %d as TID %s on slave %s: %s (%s)\n",
-            i, taskId, offer.getSlaveId, offer.getHost, 
-            if(checkPref) "preferred" else "non-preferred")
-          tasks(i).markStarted(offer)
-          launched(i) = true
-          tasksLaunched += 1
-          if (checkPref)
-            lastPreferredLaunchTime = time
-          val params = new StringMap
-          params.set("cpus", "" + desiredCpus)
-          params.set("mem", "" + desiredMem)
-          val serializedTask = Utils.serialize(tasks(i))
-          return Some(new TaskDescription(taskId, offer.getSlaveId,
-            "task_" + taskId, params, serializedTask))
-        }
-      }
-    }
-    return None
-  }
-
-  def statusUpdate(status: TaskStatus) {
-    status.getState match {
-      case TaskState.TASK_FINISHED =>
-        taskFinished(status)
-      case TaskState.TASK_LOST =>
-        taskLost(status)
-      case TaskState.TASK_FAILED =>
-        taskLost(status)
-      case TaskState.TASK_KILLED =>
-        taskLost(status)
-      case _ =>
-    }
-  }
-
-  def taskFinished(status: TaskStatus) {
-    val tid = status.getTaskId
-    println("Finished TID " + tid)
-    // Deserialize task result
-    val result = Utils.deserialize[TaskResult[T]](status.getData)
-    results(tidToIndex(tid)) = result.value
-    // Update accumulators
-    Accumulators.add(callingThread, result.accumUpdates)
-    // Mark finished and stop if we've finished all the tasks
-    finished(tidToIndex(tid)) = true
-    tasksFinished += 1
-    if (tasksFinished == numTasks)
-      setAllFinished()
-  }
-
-  def taskLost(status: TaskStatus) {
-    val tid = status.getTaskId
-    println("Lost TID " + tid)
-    launched(tidToIndex(tid)) = false
-    tasksLaunched -= 1
-  }
-
-  def error(code: Int, message: String) {
-    // Save the error message
-    errorHappened = true
-    errorCode = code
-    errorMessage = message
-    // Indicate to caller thread that we're done
-    setAllFinished()
-  }
-}
diff --git a/src/scala/spark/ParallelArray.scala b/src/scala/spark/ParallelArray.scala
index 39ca867cb9..c4df837eff 100644
--- a/src/scala/spark/ParallelArray.scala
+++ b/src/scala/spark/ParallelArray.scala
@@ -1,6 +1,6 @@
 package spark
 
-import nexus.SlaveOffer
+import mesos.SlaveOffer
 
 import java.util.concurrent.atomic.AtomicLong
 
diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index f9a16ed782..073215ebea 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -7,7 +7,7 @@ import java.util.HashSet
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.Map
 
-import nexus._
+import mesos._
 
 import com.google.common.collect.MapMaker
 
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index 6b5a07cff1..bad4bf5721 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -28,8 +28,8 @@ class SparkContext(master: String, frameworkName: String) {
   private var scheduler: Scheduler = master match {
     case "local" => new LocalScheduler(1)
     case LOCAL_REGEX(threads) => new LocalScheduler(threads.toInt)
-    case _ => { System.loadLibrary("nexus");
-                new NexusScheduler(master, frameworkName, createExecArg()) }
+    case _ => { System.loadLibrary("mesos");
+                new MesosScheduler(master, frameworkName, createExecArg()) }
   }
 
   private val local = scheduler.isInstanceOf[LocalScheduler]  
diff --git a/src/scala/spark/Task.scala b/src/scala/spark/Task.scala
index efb864472d..6e94009f6e 100644
--- a/src/scala/spark/Task.scala
+++ b/src/scala/spark/Task.scala
@@ -1,6 +1,6 @@
 package spark
 
-import nexus._
+import mesos._
 
 @serializable
 trait Task[T] {
diff --git a/third_party/mesos.jar b/third_party/mesos.jar
new file mode 100644
index 0000000000..1751d85756
Binary files /dev/null and b/third_party/mesos.jar differ
diff --git a/third_party/nexus.jar b/third_party/nexus.jar
deleted file mode 100644
index 735a09c7c5..0000000000
Binary files a/third_party/nexus.jar and /dev/null differ
-- 
cgit v1.2.3


From 0e6e577fdfebbc619276608fc10c18d273c1399f Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 25 Jul 2010 23:54:56 -0400
Subject: Add Mesos native library to .gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 2d12458a44..98dfe5a27b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@
 build
 work
 .DS_Store
+third_party/libmesos.so
+third_party/libmesos.dylib
-- 
cgit v1.2.3


From f415b071af7f407cdf8c1796ca64394f3ec25e8e Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 6 Aug 2010 12:07:18 -0700
Subject: Change shell framework's name to "Spark shell"

---
 src/scala/spark/repl/SparkInterpreterLoop.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scala/spark/repl/SparkInterpreterLoop.scala b/src/scala/spark/repl/SparkInterpreterLoop.scala
index 26361fdc25..5bad0a37da 100644
--- a/src/scala/spark/repl/SparkInterpreterLoop.scala
+++ b/src/scala/spark/repl/SparkInterpreterLoop.scala
@@ -267,7 +267,7 @@ extends InterpreterControl {
         if (prop != null) prop else "local"
       }
     }
-    new SparkContext(master, "Spark REPL")
+    new SparkContext(master, "Spark shell")
   }
 
   /** The main read-eval-print loop for the interpreter.  It calls
-- 
cgit v1.2.3


From 4488b3bc8a4483eeeb703d96842ea3e9695300c3 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 9 Aug 2010 16:46:14 -0700
Subject: Fixed a bug where we would incorrectly decide we've finished a
 parallel operation if Mesos tells us a task is finished twice

---
 src/scala/spark/MesosScheduler.scala | 32 ++++++++++++++++++++------------
 1 file changed, 20 insertions(+), 12 deletions(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 081f720bbd..84b9d9af68 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -260,23 +260,31 @@ extends ParallelOperation
   def taskFinished(status: TaskStatus) {
     val tid = status.getTaskId
     println("Finished TID " + tid)
-    // Deserialize task result
-    val result = Utils.deserialize[TaskResult[T]](status.getData)
-    results(tidToIndex(tid)) = result.value
-    // Update accumulators
-    Accumulators.add(callingThread, result.accumUpdates)
-    // Mark finished and stop if we've finished all the tasks
-    finished(tidToIndex(tid)) = true
-    tasksFinished += 1
-    if (tasksFinished == numTasks)
-      setAllFinished()
+    if (!finished(tidToIndex(tid))) {
+      // Deserialize task result
+      val result = Utils.deserialize[TaskResult[T]](status.getData)
+      results(tidToIndex(tid)) = result.value
+      // Update accumulators
+      Accumulators.add(callingThread, result.accumUpdates)
+      // Mark finished and stop if we've finished all the tasks
+      finished(tidToIndex(tid)) = true
+      tasksFinished += 1
+      if (tasksFinished == numTasks)
+        setAllFinished()
+    } else {
+      printf("Task %s had already finished, so ignoring it\n", tidToIndex(tid))
+    }
   }
 
   def taskLost(status: TaskStatus) {
     val tid = status.getTaskId
     println("Lost TID " + tid)
-    launched(tidToIndex(tid)) = false
-    tasksLaunched -= 1
+    if (!finished(tid)) {
+      launched(tidToIndex(tid)) = false
+      tasksLaunched -= 1
+    } else {
+      printf("Task %s had already finished, so ignoring it\n", tidToIndex(tid))
+    }
   }
 
   def error(code: Int, message: String) {
-- 
cgit v1.2.3


From a9481c3514b33389de7a7db5e5c0ac8291c11f1c Mon Sep 17 00:00:00 2001
From: root <root@ip-10-202-143-64.ec2.internal>
Date: Fri, 13 Aug 2010 07:39:36 +0000
Subject: Update to work with latest Mesos API changes

---
 run                                  |   7 ++++++-
 src/scala/spark/MesosScheduler.scala |  18 +++++++++---------
 third_party/mesos.jar                | Bin 43915 -> 36494 bytes
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/run b/run
index 7353311522..7e044eb021 100755
--- a/run
+++ b/run
@@ -8,7 +8,12 @@ if [ -e $FWDIR/conf/spark-env.sh ] ; then
   . $FWDIR/conf/spark-env.sh
 fi
 
-if [ "$SPARK_MEM" == "" ] ; then
+if [ "x$MESOS_HOME" != "x" ] ; then
+  SPARK_CLASSPATH="$MESOS_HOME/lib/java/mesos.jar:$SPARK_CLASSPATH"
+  SPARK_LIBRARY_PATH="$MESOS_HOME/lib/java:$SPARK_LIBARY_PATH"
+fi
+
+if [ "x$SPARK_MEM" == "x" ] ; then
   SPARK_MEM="300m"
 fi
 
diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 84b9d9af68..74c1d8dfb5 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -100,9 +100,9 @@ extends NScheduler with spark.Scheduler
   }
 
   override def resourceOffer(
-      d: SchedulerDriver, oid: String, offers: SlaveOfferVector) {
+      d: SchedulerDriver, oid: String, offers: java.util.List[SlaveOffer]) {
     synchronized {
-      val tasks = new TaskDescriptionVector
+      val tasks = new java.util.ArrayList[TaskDescription]
       if (activeOp != null) {
         try {
           for (i <- 0 until offers.size.toInt) {
@@ -115,8 +115,8 @@ extends NScheduler with spark.Scheduler
           case e: Exception => e.printStackTrace
         }
       }  
-      val params = new StringMap
-      params.set("timeout", "1")
+      val params = new java.util.HashMap[String, String]
+      params.put("timeout", "1")
       d.replyToOffer(oid, tasks, params) // TODO: use smaller timeout
     }
   }
@@ -212,9 +212,9 @@ extends ParallelOperation
         checkPrefVals = Array(true, false) // Allow non-preferred tasks
       // TODO: Make desiredCpus and desiredMem configurable
       val desiredCpus = 1
-      val desiredMem = 750L * 1024L * 1024L
+      val desiredMem = 750
       if (offer.getParams.get("cpus").toInt < desiredCpus || 
-          offer.getParams.get("mem").toLong < desiredMem)
+          offer.getParams.get("mem").toInt < desiredMem)
         return None
       for (checkPref <- checkPrefVals; i <- 0 until numTasks) {
         if (!launched(i) && (!checkPref ||
@@ -231,9 +231,9 @@ extends ParallelOperation
           tasksLaunched += 1
           if (checkPref)
             lastPreferredLaunchTime = time
-          val params = new StringMap
-          params.set("cpus", "" + desiredCpus)
-          params.set("mem", "" + desiredMem)
+          val params = new java.util.HashMap[String, String]
+          params.put("cpus", "" + desiredCpus)
+          params.put("mem", "" + desiredMem)
           val serializedTask = Utils.serialize(tasks(i))
           return Some(new TaskDescription(taskId, offer.getSlaveId,
             "task_" + taskId, params, serializedTask))
diff --git a/third_party/mesos.jar b/third_party/mesos.jar
index 1751d85756..e9530161f2 100644
Binary files a/third_party/mesos.jar and b/third_party/mesos.jar differ
-- 
cgit v1.2.3


From 3d8d7fd557776019670fe954fd2e89d526dc9a9c Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 13 Aug 2010 11:29:19 -0700
Subject: Bug fix from Justin

---
 src/scala/spark/MesosScheduler.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 74c1d8dfb5..18261e68fd 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -279,7 +279,7 @@ extends ParallelOperation
   def taskLost(status: TaskStatus) {
     val tid = status.getTaskId
     println("Lost TID " + tid)
-    if (!finished(tid)) {
+    if (!finished(tidToIndex(tid))) {
       launched(tidToIndex(tid)) = false
       tasksLaunched -= 1
     } else {
-- 
cgit v1.2.3


From 0b195927b6de951634fce8d5ab6ae8f72d76046a Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 13 Aug 2010 18:54:32 -0700
Subject: Improved README and added blank templates for config files.

---
 .gitignore        |  2 ++
 README            |  6 +++++-
 conf/java-opts    |  0
 conf/spark-env.sh | 13 +++++++++++++
 4 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 conf/java-opts
 create mode 100755 conf/spark-env.sh

diff --git a/.gitignore b/.gitignore
index 98dfe5a27b..8156d6e8c1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,5 @@ work
 .DS_Store
 third_party/libmesos.so
 third_party/libmesos.dylib
+conf/java-opts
+conf/spark-env.sh
diff --git a/README b/README
index 6af34a2294..f084f22a1f 100644
--- a/README
+++ b/README
@@ -5,7 +5,7 @@ Spark requires Scala 2.8. This version has been tested with 2.8.0.final.
 To build and run Spark, you will need to have Scala's bin in your $PATH,
 or you will need to set the SCALA_HOME environment variable to point
 to where you've installed Scala. Scala must be accessible through one
-of these methods on Nexus slave nodes as well as on the master.
+of these methods on Mesos slave nodes as well as on the master.
 
 To build Spark and the example programs, run make.
 
@@ -13,6 +13,10 @@ To run one of the examples, use ./run <class> <params>. For example,
 ./run SparkLR will run the Logistic Regression example. Each of the
 example programs prints usage help if no params are given.
 
+All of the Spark samples take a <host> parameter that is the Mesos master
+to connect to. This can be a Mesos URL, or "local" to run locally with one
+thread, or "local[N]" to run locally with N threads.
+
 Tip: If you are building Spark and examples repeatedly, export USE_FSC=1
 to have the Makefile use the fsc compiler daemon instead of scalac.
 
diff --git a/conf/java-opts b/conf/java-opts
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/conf/spark-env.sh b/conf/spark-env.sh
new file mode 100755
index 0000000000..6852b23a34
--- /dev/null
+++ b/conf/spark-env.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+# Set Spark environment variables for your site in this file. Some useful
+# variables to set are:
+# - MESOS_HOME, to point to your Mesos installation
+# - SCALA_HOME, to point to your Scala installation
+# - SPARK_CLASSPATH, to add elements to Spark's classpath
+# - SPARK_JAVA_OPTS, to add JVM options
+# - SPARK_MEM, to change the amount of memory used per node (this should
+#   be in the same format as the JVM's -Xmx option, e.g. 300m or 1g).
+# - SPARK_LIBRARY_PATH, to add extra search paths for native libraries.
+
+
-- 
cgit v1.2.3


From 1600c31554d82c89823255aa5b5889a24c1cfdb1 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 13 Aug 2010 19:03:46 -0700
Subject: Added latest mesos.jar

---
 third_party/mesos.jar | Bin 36494 -> 34562 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/third_party/mesos.jar b/third_party/mesos.jar
index e9530161f2..1852cf8fd0 100644
Binary files a/third_party/mesos.jar and b/third_party/mesos.jar differ
-- 
cgit v1.2.3


From 1cbffaae6f8a8511a4e239bcdbd1785cda1ac6cc Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 15 Aug 2010 18:33:27 -0700
Subject: Modified Scala interpreter to have it avoid computing string versions
 of all results when :silent is enabled, so that it is easier to work with
 large arrays in Spark. (The string version of an array of numbers might not
 fit in memory even though the array itself does.)

---
 src/scala/spark/repl/SparkInterpreter.scala | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/scala/spark/repl/SparkInterpreter.scala b/src/scala/spark/repl/SparkInterpreter.scala
index 6237c83625..2bfaaf7342 100644
--- a/src/scala/spark/repl/SparkInterpreter.scala
+++ b/src/scala/spark/repl/SparkInterpreter.scala
@@ -956,7 +956,9 @@ class SparkInterpreter(val settings: Settings, out: PrintWriter) {
       """.stripMargin
 
       code println preamble
-      handlers foreach { _.resultExtractionCode(this, code) }
+      if (printResults) {
+        handlers foreach { _.resultExtractionCode(this, code) }
+      }
       code println postamble
     }
 
-- 
cgit v1.2.3


From 75b2ca10c3d5a4e97536c133ad0ef8e22b95ceba Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 16 Aug 2010 23:16:35 -0700
Subject: Removed HOD from included Hadoop because it was making the project
 count as Python on GitHub :|.

---
 third_party/hadoop-0.20.0/contrib/hod/CHANGES.txt  |  248 ----
 third_party/hadoop-0.20.0/contrib/hod/README       |  104 --
 third_party/hadoop-0.20.0/contrib/hod/bin/VERSION  |    1 -
 .../hadoop-0.20.0/contrib/hod/bin/checknodes       |   31 -
 third_party/hadoop-0.20.0/contrib/hod/bin/hod      |  577 ---------
 .../hadoop-0.20.0/contrib/hod/bin/hodcleanup       |  183 ---
 third_party/hadoop-0.20.0/contrib/hod/bin/hodring  |  287 -----
 .../hadoop-0.20.0/contrib/hod/bin/ringmaster       |  349 ------
 .../hadoop-0.20.0/contrib/hod/bin/verify-account   |   11 -
 third_party/hadoop-0.20.0/contrib/hod/build.xml    |   81 --
 third_party/hadoop-0.20.0/contrib/hod/conf/hodrc   |   46 -
 third_party/hadoop-0.20.0/contrib/hod/config.txt   |  172 ---
 .../hadoop-0.20.0/contrib/hod/getting_started.txt  |  233 ----
 .../hod/hodlib/AllocationManagers/__init__.py      |   16 -
 .../AllocationManagers/goldAllocationManager.py    |  104 --
 .../contrib/hod/hodlib/Common/__init__.py          |   15 -
 .../hod/hodlib/Common/allocationManagerUtil.py     |   27 -
 .../contrib/hod/hodlib/Common/desc.py              |  298 -----
 .../contrib/hod/hodlib/Common/descGenerator.py     |   72 --
 .../contrib/hod/hodlib/Common/hodsvc.py            |  228 ----
 .../contrib/hod/hodlib/Common/logger.py            |  788 ------------
 .../contrib/hod/hodlib/Common/miniHTMLParser.py    |   45 -
 .../contrib/hod/hodlib/Common/nodepoolutil.py      |   26 -
 .../contrib/hod/hodlib/Common/setup.py             | 1058 ----------------
 .../contrib/hod/hodlib/Common/socketServers.py     |  621 ----------
 .../hadoop-0.20.0/contrib/hod/hodlib/Common/tcp.py |  176 ---
 .../contrib/hod/hodlib/Common/threads.py           |  389 ------
 .../contrib/hod/hodlib/Common/types.py             | 1266 --------------------
 .../contrib/hod/hodlib/Common/util.py              |  309 -----
 .../contrib/hod/hodlib/Common/xmlrpc.py            |   57 -
 .../contrib/hod/hodlib/GridServices/__init__.py    |   18 -
 .../contrib/hod/hodlib/GridServices/hdfs.py        |  310 -----
 .../contrib/hod/hodlib/GridServices/mapred.py      |  272 -----
 .../contrib/hod/hodlib/GridServices/service.py     |  266 ----
 .../contrib/hod/hodlib/Hod/__init__.py             |   15 -
 .../hadoop-0.20.0/contrib/hod/hodlib/Hod/hadoop.py |  747 ------------
 .../hadoop-0.20.0/contrib/hod/hodlib/Hod/hod.py    |  754 ------------
 .../contrib/hod/hodlib/Hod/nodePool.py             |  128 --
 .../contrib/hod/hodlib/HodRing/__init__.py         |   15 -
 .../contrib/hod/hodlib/HodRing/hodRing.py          |  928 --------------
 .../contrib/hod/hodlib/NodePools/__init__.py       |   15 -
 .../contrib/hod/hodlib/NodePools/torque.py         |  334 ------
 .../contrib/hod/hodlib/RingMaster/__init__.py      |   15 -
 .../hod/hodlib/RingMaster/idleJobTracker.py        |  218 ----
 .../contrib/hod/hodlib/RingMaster/ringMaster.py    | 1019 ----------------
 .../contrib/hod/hodlib/Schedulers/__init__.py      |   15 -
 .../contrib/hod/hodlib/Schedulers/torque.py        |  175 ---
 .../contrib/hod/hodlib/ServiceProxy/__init__.py    |   15 -
 .../hod/hodlib/ServiceProxy/serviceProxy.py        |   49 -
 .../contrib/hod/hodlib/ServiceRegistry/__init__.py |   15 -
 .../hod/hodlib/ServiceRegistry/serviceRegistry.py  |  127 --
 .../hadoop-0.20.0/contrib/hod/hodlib/__init__.py   |   16 -
 third_party/hadoop-0.20.0/contrib/hod/ivy.xml      |   22 -
 .../contrib/hod/ivy/libraries.properties           |    5 -
 .../contrib/hod/support/checklimits.sh             |   57 -
 .../contrib/hod/support/logcondense.py             |  212 ----
 .../hadoop-0.20.0/contrib/hod/testing/__init__.py  |   15 -
 .../hadoop-0.20.0/contrib/hod/testing/helper.py    |   33 -
 .../hadoop-0.20.0/contrib/hod/testing/lib.py       |  113 --
 .../hadoop-0.20.0/contrib/hod/testing/main.py      |   83 --
 .../contrib/hod/testing/testHadoop.py              |  123 --
 .../hadoop-0.20.0/contrib/hod/testing/testHod.py   |  310 -----
 .../contrib/hod/testing/testHodCleanup.py          |  113 --
 .../contrib/hod/testing/testHodRing.py             |  117 --
 .../contrib/hod/testing/testModule.py              |   88 --
 .../contrib/hod/testing/testRingmasterRPCs.py      |  171 ---
 .../contrib/hod/testing/testThreads.py             |   99 --
 .../hadoop-0.20.0/contrib/hod/testing/testTypes.py |  180 ---
 .../hadoop-0.20.0/contrib/hod/testing/testUtil.py  |   62 -
 .../contrib/hod/testing/testXmlrpc.py              |  109 --
 70 files changed, 15196 deletions(-)
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/CHANGES.txt
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/README
 delete mode 100755 third_party/hadoop-0.20.0/contrib/hod/bin/VERSION
 delete mode 100755 third_party/hadoop-0.20.0/contrib/hod/bin/checknodes
 delete mode 100755 third_party/hadoop-0.20.0/contrib/hod/bin/hod
 delete mode 100755 third_party/hadoop-0.20.0/contrib/hod/bin/hodcleanup
 delete mode 100755 third_party/hadoop-0.20.0/contrib/hod/bin/hodring
 delete mode 100755 third_party/hadoop-0.20.0/contrib/hod/bin/ringmaster
 delete mode 100755 third_party/hadoop-0.20.0/contrib/hod/bin/verify-account
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/build.xml
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/conf/hodrc
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/config.txt
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/getting_started.txt
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/__init__.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/goldAllocationManager.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/__init__.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/allocationManagerUtil.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/desc.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/descGenerator.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/hodsvc.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/logger.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/miniHTMLParser.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/nodepoolutil.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/setup.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/socketServers.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/tcp.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/threads.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/types.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/util.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/xmlrpc.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/__init__.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/hdfs.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/mapred.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/service.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/__init__.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hadoop.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hod.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/nodePool.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/__init__.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/hodRing.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/__init__.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/torque.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/__init__.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/idleJobTracker.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/ringMaster.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/__init__.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/torque.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/__init__.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/serviceProxy.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/__init__.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/serviceRegistry.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/hodlib/__init__.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/ivy.xml
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/ivy/libraries.properties
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/support/checklimits.sh
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/support/logcondense.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/__init__.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/helper.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/lib.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/main.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/testHadoop.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/testHod.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/testHodCleanup.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/testHodRing.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/testModule.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/testRingmasterRPCs.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/testThreads.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/testTypes.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/testUtil.py
 delete mode 100644 third_party/hadoop-0.20.0/contrib/hod/testing/testXmlrpc.py

diff --git a/third_party/hadoop-0.20.0/contrib/hod/CHANGES.txt b/third_party/hadoop-0.20.0/contrib/hod/CHANGES.txt
deleted file mode 100644
index 95cf0710fe..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/CHANGES.txt
+++ /dev/null
@@ -1,248 +0,0 @@
-HOD Change Log
-
-Release 0.20.0 - (unreleased changes)
-
-  INCOMPATIBLE CHANGES
-
-  NEW FEATURES
-
-  IMPROVEMENTS
-
-    HADOOP-4705. Grant read permissions for files/directories
-    created by HOD. (Peeyush Bishnoi via yhemanth)
-
-    HADOOP-4937. Include ringmaster RPC port in the notes
-    attribute. (Peeyush Bishnoi via yhemanth)
-
-  OPTIMIZATIONS
-
-  BUG FIXES
-
-    HADOOP-4782. Revert umask changes in HADOOP-4705 so that
-    files are still securely created. (Peeyush Bishnoi via
-    yhemanth)
-
-Release 0.19.0 - 2008-11-18
-
-  INCOMPATIBLE CHANGES
-
-  NEW FEATURES
-
-    HADOOP-3695. Provide an ability to start multiple workers per node.
-    (Vinod Kumar Vavilapalli via yhemanth)
-
-  IMPROVEMENTS
-
-  OPTIMIZATIONS
-
-  BUG FIXES
-
-    HADOOP-3959. Pass options specified in resource_manager.options to
-    job submission. 
-    (Craig Macdonald and Vinod Kumar Vavilapalli via yhemanth)
-
-    HADOOP-3814. Remove generation of dfs.client.buffer.dir for the generated
-    hadoop-site.xml. (Vinod Kumar Vavilapalli via acmurthy)
-
-Release 0.18.2 - Unreleased 
-
-  BUG FIXES
-
-    HADOOP-3786. Use HDFS instead of DFS in all docs and hyperlink to Torque.
-    (Vinod Kumar Vavilapalli via acmurthy)
-
-Release 0.18.1 - 2008-09-17
-
-  INCOMPATIBLE CHANGES
-
-    HADOOP-4060. Modified HOD to rotate log files on the client side.
-    (Vinod Kumar Vavilapalli via yhemanth)
-
-  IMPROVEMENTS
-
-    HADOOP-4145. Add an accounting plugin (script) for HOD.
-    (Hemanth Yamijala via nigel)
-
-  BUG FIXES
-
-    HADOOP-4161. Fixed bug in HOD cleanup that had the potential to
-    hang clients. (Vinod Kumar Vavilapalli via nigel)
-
-Release 0.18.0 - 2008-08-19
-
-  INCOMPATIBLE CHANGES
-
-    HADOOP-3483. Modified HOD to create a cluster directory if one does not
-    exist and to auto-deallocate a cluster while reallocating it, if it is
-    already dead. (Hemanth Yamijala via mukund)
-
-    HADOOP-3184. Modified HOD to handle master failures on bad nodes by trying 
-    to bring them up on another node in the ring. (Hemanth Yamijala via ddas)
-
-    HADOOP-3610. Modified HOD to create cluster directory if one does not
-    exist when using the script option. (Vinod Kumar Vavilapalli via yhemanth)
-
-    HADOOP-3808. Modified HOD to include RPC port of the JobTracker
-    into the notes attribute of the resource manager. (yhemanth)
-
-  NEW FEATURES
-
-  IMPROVEMENTS
-
-    HADOOP-3376: Provide a mechanism to detect and handle violations to 
-    resource manager limits. (Vinod Kumar Vavilapalli via ddas)
-
-    HADOOP-3151. Improves error messages when reporting failures due to 
-    incorrect parameters passed to HOD. (Vinod Kumar Vavilapalli via ddas)
-
-    HADOOP-3464. Implemented a mechanism to transfer HOD errors that occur on
-    compute nodes to the submit node running the HOD client, so users have good
-    feedback on why an allocation failed. (Vinod Kumar Vavilapalli via mukund)
-
-    HADOOP-3505. Updated HOD documentation with changes made for Hadoop
-    0.18. (Vinod Kumar Vavilapalli via yhemanth)
- 
-  BUG FIXES
-
-    HADOOP-2961. Avoids unnecessary checks for some configuration parameters
-    related to service configuration. (Vinod Kumar Vavilapalli via ddas)
-
-    HADOOP-3523. Fixes auto-deallocation of cluster if job id is not found in
-    Torque's job list (Hemanth Yamijala via ddas)
-
-    HADOOP-3531. Fixes a bug related to handling JobTracker failures because of
-    timing issues on slow nodes. (Hemanth Yamijala via ddas)
-
-    HADOOP-3564. HOD generates values for the parameter dfs.datanode.ipc.address
-    in the hadoop-site.xml created on datanodes. 
-    (Vinod Kumar Vavilapalli via ddas)
-
-    HADOOP-3076. Fixes a bug related to a spurious message about the 
-    script.exitcode file when a cluster directory is specified as a relative
-    path. (Vinod Kumar Vavilapalli via yhemanth)
-
-    HADOOP-3668. Makes editorial changes to HOD documentation.
-    (Vinod Kumar Vavilapalli via yhemanth)
-
-    HADOOP-3703. Fixes logcondense.py to use the new format of hadoop dfs -lsr
-    command line output format. (Vinod Kumar Vavilapalli via yhemanth)
-
-Release 0.17.3 - Unreleased 
-
-  BUG FIXES
-
-    HADOOP-3217. Decrease the rate at which the hod queries the resource
-    manager for job status. (Hemanth Yamijala via acmurthy) 
-
-Release 0.17.0 - 2008-05-18
-
-  INCOMPATIBLE CHANGES
-
-    HADOOP-3137. Modified build script to pick up version automatically
-    from Hadoop build. (yhemanth)
-
-  IMPROVEMENTS
-
-    HADOOP-2775.  Adds unit test framework for HOD.
-    (Vinod Kumar Vavilapalli via ddas).
-
-    HADOOP-2848. [HOD]hod -o list and deallocate works even after deleting
-    the cluster directory. (Hemanth Yamijala via ddas)
-
-    HADOOP-2899. [HOD] Cleans up hdfs:///mapredsystem directory after
-    deallocation. (Hemanth Yamijala via ddas)
-
-    HADOOP-2796. Enables distinguishing exit codes from user code vis-a-vis
-    HOD's exit code. (Hemanth Yamijala via ddas)
-
-    HADOOP-2947. HOD redirects stdout and stderr of daemons to assist
-    getting stack traces. (Vinod Kumar Vavilapalli via yhemanth)
-
-  BUG FIXES
-
-    HADOOP-2924. Fixes an address problem to do with TaskTracker binding
-    to an address. (Vinod Kumar Vavilapalli via ddas)
-
-    HADOOP-2970. Fixes a problem to do with Wrong class definition for
-    hodlib/Hod/hod.py for Python < 2.5.1.
-    (Vinod Kumar Vavilapalli via ddas)
-
-    HADOOP-2783. Fixes a problem to do with import in
-    hod/hodlib/Common/xmlrpc.py. (Vinod Kumar Vavilapalli via ddas)
-
-    HADOOP-2936. Fixes HOD in a way that it generates hdfs://host:port on the
-    client side configs. (Vinod Kumar Vavilapalli via ddas)
-
-    HADOOP-2983. [HOD] Fixes the problem - local_fqdn() returns None when
-    gethostbyname_ex doesnt return any FQDNs. (Craig Macdonald via ddas)
-
-    HADOOP-2982. Fixes a problem in the way HOD looks for free nodes.
-    (Hemanth Yamijala via ddas)
-
-    HADOOP-2855. Fixes the way HOD handles relative paths for cluster
-    directory, script file and other options.
-    (Vinod Kumar Vavilapalli via yhemanth)
-
-    HADOOP-3153. Fixes the way HOD handles allocation if the user has no
-    permissions to update the clusters state file.
-    (Vinod Kumar Vavilapalli via yhemanth)
-
-Release 0.16.4 - 2008-05-05
-
-  BUG FIXES
-
-    HADOOP-3304. [HOD] Fixes the way the logcondense.py utility searches
-    for log files that need to be deleted. (yhemanth via mukund)
-
-Release 0.16.2 - 2008-04-02
-
-  BUG FIXES
-
-    HADOOP-3103. [HOD] Hadoop.tmp.dir should not be set to cluster
-    directory. (Vinod Kumar Vavilapalli via ddas).
-
-Release 0.16.1 - 2008-03-13
-
-  INCOMPATIBLE CHANGES
-
-    HADOOP-2861. Improve the user interface for the HOD commands.
-    Command line structure has changed. (Hemanth Yamijala via nigel)
-
-  IMPROVEMENTS
-
-    HADOOP-2730. HOD documentation update.
-    (Vinod Kumar Vavilapalli via ddas)
-
-    HADOOP-2911. Make the information printed by the HOD allocate and
-    info commands less verbose and clearer. (Vinod Kumar via nigel)
-
-  BUG FIXES
-
-    HADOOP-2766. Enables setting of HADOOP_OPTS env variable for the hadoop
-    daemons through HOD. (Vinod Kumar Vavilapalli via ddas)
-
-    HADOOP-2809.  Fix HOD syslog config syslog-address so that it works.
-    (Hemanth Yamijala via nigel)
-
-    HADOOP-2847.  Ensure idle cluster cleanup works even if the JobTracker
-    becomes unresponsive to RPC calls. (Hemanth Yamijala via nigel)
-
-    HADOOP-2925. Fix HOD to create the mapred system directory using a
-    naming convention that will avoid clashes in multi-user shared
-    cluster scenario. (Hemanth Yamijala via nigel)
-
-Release 0.16.0 - 2008-02-07
-
-  NEW FEATURES
-
-    HADOOP-1301.  Hadoop-On-Demand (HOD): resource management
-    provisioning for Hadoop. (Hemanth Yamijala via nigel)
-
-  BUG FIXES
-
-    HADOOP-2720. Jumbo bug fix patch to HOD.  Final sync of Apache SVN with
-    internal Yahoo SVN.  (Hemanth Yamijala via nigel)
-
-    HADOOP-2740. Fix HOD to work with the configuration variables changed in
-    HADOOP-2404. (Hemanth Yamijala via omalley)
-
diff --git a/third_party/hadoop-0.20.0/contrib/hod/README b/third_party/hadoop-0.20.0/contrib/hod/README
deleted file mode 100644
index aaa7d35c3e..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/README
+++ /dev/null
@@ -1,104 +0,0 @@
-                        Hadoop On Demand
-                        ================
-
-1. Introduction:
-================
-
-The Hadoop On Demand (HOD) project is a system for provisioning and 
-managing independent Hadoop MapReduce instances on a shared cluster 
-of nodes. HOD uses a resource manager for allocation. At present it
-supports Torque (http://www.clusterresources.com/pages/products/torque-resource-manager.php)
-out of the box. 
-
-2. Feature List:
-================
-
-The following are the features provided by HOD:
-
-2.1 Simplified interface for managing MapReduce clusters:
-
-The MapReduce user interacts with the cluster through a simple 
-command line interface, the HOD client. HOD brings up a virtual 
-MapReduce cluster with the required number of nodes, which the 
-user can use for running Hadoop jobs. When done, HOD will 
-automatically clean up the resources and make the nodes available 
-again.
-
-2.2 Automatic installation of Hadoop:
-
-With HOD, Hadoop does not need to be even installed on the cluster.
-The user can provide a Hadoop tarball that HOD will automatically 
-distribute to all the nodes in the cluster.
-
-2.3 Configuring Hadoop:
-
-Dynamic parameters of Hadoop configuration, such as the NameNode and 
-JobTracker addresses and ports, and file system temporary directories
-are generated and distributed by HOD automatically to all nodes in
-the cluster.
-
-In addition, HOD allows the user to configure Hadoop parameters
-at both the server (for e.g. JobTracker) and client (for e.g. JobClient)
-level, including 'final' parameters, that were introduced with 
-Hadoop 0.15.
-
-2.4 Auto-cleanup of unused clusters:
-
-HOD has an automatic timeout so that users cannot misuse resources they 
-aren't using. The timeout applies only when there is no MapReduce job 
-running. 
-
-2.5 Log services:
-
-HOD can be used to collect all MapReduce logs to a central location
-for archiving and inspection after the job is completed.
-
-3. HOD Components
-=================
-
-This is a brief overview of the various components of HOD and how they
-interact to provision Hadoop.
-
-HOD Client: The HOD client is a Unix command that users use to allocate 
-Hadoop MapReduce clusters. The command provides other options to list 
-allocated clusters and deallocate them. The HOD client generates the 
-hadoop-site.xml in a user specified directory. The user can point to 
-this configuration file while running Map/Reduce jobs on the allocated 
-cluster.
-
-RingMaster: The RingMaster is a HOD process that is started on one node 
-per every allocated cluster. It is submitted as a 'job' to the resource 
-manager by the HOD client. It controls which Hadoop daemons start on 
-which nodes. It provides this information to other HOD processes, 
-such as the HOD client, so users can also determine this information. 
-The RingMaster is responsible for hosting and distributing the 
-Hadoop tarball to all nodes in the cluster. It also automatically 
-cleans up unused clusters.
-
-HodRing: The HodRing is a HOD process that runs on every allocated node
-in the cluster. These processes are run by the RingMaster through the 
-resource manager, using a facility of parallel execution. The HodRings
-are responsible for launching Hadoop commands on the nodes to bring up 
-the Hadoop daemons. They get the command to launch from the RingMaster.
-
-Hodrc / HOD configuration file: An INI style configuration file where
-the users configure various options for the HOD system, including
-install locations of different software, resource manager parameters,
-log and temp file directories, parameters for their MapReduce jobs,
-etc.
-
-Submit Nodes: Nodes where the HOD Client is run, from where jobs are
-submitted to the resource manager system for allocating and running 
-clusters.
-
-Compute Nodes: Nodes which get allocated by a resource manager, 
-and on which the Hadoop daemons are provisioned and started.
-
-4. Next Steps:
-==============
-
-- Read getting_started.txt to get an idea of how to get started with
-installing, configuring and running HOD.
-
-- Read config.txt to get more details on configuration options for HOD.
-
diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/VERSION b/third_party/hadoop-0.20.0/contrib/hod/bin/VERSION
deleted file mode 100755
index 5a03fb737b..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/bin/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.20.0
diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/checknodes b/third_party/hadoop-0.20.0/contrib/hod/bin/checknodes
deleted file mode 100755
index 5f9f92f166..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/bin/checknodes
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-PBS_NODES_PATH=`which pbsnodes 2>/dev/null`
-if [ -z $PBS_NODES_PATH ]
-then
-  echo Could not find pbsnodes in path. Cannot check available number of nodes. >&2
-  exit 1
-fi
-if [ -z $1 ]
-then
-  echo Usage: checknodes queue-name >&2
-  exit 2
-fi
-# the number of nodes marked 'free', and which do not contain a jobs attribute from the server or from the moms.
-$PBS_NODES_PATH :$1 | awk 'BEGIN {c=0} /state = free/ {getline;getline;getline;getline; if ($0 !~ /jobs =/ && $0 !~ /jobs=[0-9].*/)  c++ ; } END {print c}'
diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/hod b/third_party/hadoop-0.20.0/contrib/hod/bin/hod
deleted file mode 100755
index e87b2764db..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/bin/hod
+++ /dev/null
@@ -1,577 +0,0 @@
-#!/bin/sh
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-""":"
-work_dir=$(dirname $0)
-base_name=$(basename $0)
-original_dir=$PWD
-cd $work_dir
-
-if [ $HOD_PYTHON_HOME ]; then
-    exec $HOD_PYTHON_HOME -u -OO $base_name ${1+"$@"} --hod.original-dir $original_dir
-elif [ -e /usr/bin/python ]; then
-    exec /usr/bin/python -u -OO $base_name ${1+"$@"} --hod.original-dir $original_dir
-elif [ -e /usr/local/bin/python ]; then
-    exec /usr/local/bin/python -u -OO $base_name ${1+"$@"} --hod.original-dir $original_dir
-else
-    exec python -u -OO $base_name ${1+"$@"} --hod.original-dir $work_dir
-fi
-":"""
-
-"""The executable to be used by the user"""
-import sys, os, re, pwd, threading, sys
-
-myName          = os.path.basename(sys.argv[0])
-myName          = re.sub(".*/", "", myName)
-binDirectory    = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/bin/.*", "", binDirectory)
-libDirectory    = rootDirectory
-
-sys.path.append(libDirectory)
-
-from hodlib.Hod.hod import hodRunner
-from hodlib.Common.setup import *
-from hodlib.Common.descGenerator import *
-from hodlib.Common.util import local_fqdn, need_to_allocate, filter_warnings,\
-    get_exception_error_string, hodInterrupt, \
-    HOD_INTERRUPTED_MESG, HOD_INTERRUPTED_CODE,\
-    TORQUE_USER_LIMITS_COMMENT_FIELD
-from hodlib.Common.tcp import tcpError, tcpSocket
-from hodlib.Hod.hod import hodHelp
-
-filter_warnings()
-
-reVersion = re.compile(".*(\d+_\d+).*")
-
-VERSION = None
-if os.path.exists("./VERSION"):
-  vFile = open("./VERSION", 'r')
-  VERSION = vFile.readline()
-  vFile.close()
-
-# Always look for hodrc file here unless otherwise specified with -c:   
-DEFAULT_LOC = os.path.join(rootDirectory, 'conf')
-DEFAULT_HOD_DIR = os.path.join(os.environ['HOME'], ".hod")
-
-if not os.path.isdir(DEFAULT_HOD_DIR):
-  os.mkdir(DEFAULT_HOD_DIR, 0777)
-
-DEFAULT_CONFIG = os.path.join(DEFAULT_HOD_DIR, 'hodrc')
-if not os.path.exists(DEFAULT_CONFIG):
-  if os.environ.has_key('HOD_CONF_DIR') and os.environ['HOD_CONF_DIR'] is not None:
-    DEFAULT_CONFIG = os.path.join(os.environ['HOD_CONF_DIR'], 'hodrc')
-
-# Definition tuple is of the form:
-#  (name, type, description, help?, default value, required?, validate?, 
-#   short option)
-#
-defList = { 'hod' : (      
-             ('original-dir', 'directory', 'hod original start directory',
-              False, None, True, True, 'r'),
-
-             ('clusterdir', 'directory', 
-             'Directory where cluster state information and hadoop-site.xml' +
-             ' will be stored.',
-              True, None, False, False, 'd'),
-
-             ('syslog-address', 'address', 'Syslog address.',
-              False, None, False, True, 'y'),
-              
-             ('java-home', 'directory', 'Java home directory.',
-              True, None, True, True, 'j'),
-            
-             ('debug', 'pos_int', 'Debugging level, 0-4.',
-              True, 3, True, True, 'b'),
-            
-             ('stream', 'bool', 'Output to stderr.',
-              False, True, False, True),
-
-             ('nodecount', 'pos_int', 
-              'Number of nodes to allocate at startup. ',
-              True, None, False, True, 'n'),
-
-             ('script', 'file', 'Hadoop script to execute.',
-              True, None, False, False, 's'), 
-
-             ('userid', 'user_account', 
-              'User ID the hod shell is running under.',
-              False, pwd.getpwuid(os.getuid())[0], False, True, 'u'),
-             
-             ('allocate-wait-time', 'pos_int', 
-              'Time to wait for cluster allocation.',
-              False, 300, True, True, 'e'),         
-              
-             ('operation', 'string',
-              'Initiate a hod operation. (help, allocate, deallocate ...)',
-              False, None, False, True, 'o'),
-             
-             ('cluster-factor', 'pos_float',
-              'The number of grid slots per machines', False, 1.9, False, True,
-              'x'),
-             
-             ('cluster', 'string', 'Name of cluster being used.',
-              False, None, True, True, 'w'),
-
-             ('proxy-xrs-address', 'address', 
-              'Address to Allocation Manager XML RPC proxy.',
-              False, None, False, True, 'p'),
-              
-             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
-              False, None, True, True),
-
-             ('client-params', 'keyval', 'Hadoop client xml key/value list',
-              True, None, False, True, 'C'), 
-
-             ('hadoop-ui-log-dir', 'directory', 'Directory to store Web UI Logs of Hadoop',
-              True, None, False, True),
-
-             ('temp-dir', 'directory', 'HOD temporary directories.',
-              False, None, True, False),
-
-             ('update-worker-info', 'bool', 'Specifies whether to update Worker Info after allocation',
-              False, False, False, True),
-
-             ('job-feasibility-attr', 'string', 'Specifies whether to check job feasibility - resource manager and/or scheduler limits, also gives the attribute value',
-              False, None, False, True),
-
-             ('title', 'string', 'Title for the current HOD allocation.',
-               True, "HOD", False, True, 'N'),
-
-             ('walltime', 'pos_int', 'Walltime in seconds for the current HOD allocation',
-              True, None, False, True, 'l'),
-
-             ('script-wait-time', 'pos_int', 'Specifies the time to wait before running the script. Used with the hod.script option.',
-              True, 10, False, True, 'W'),
-
-             ('log-rollover-count', 'pos_int', 'Specifies the number of rolled-over log files of HOD client. A zero value disables rollover.',
-              True, 5, False, True, 'L'),
-
-             ('job-status-query-interval', 'pos_int', 'Specifies the time between checking for job status', 
-              False, 30, False, True),
-
-             ('job-command-failure-interval', 'pos_int', 'Specifies the time between checking for failed job status or submission commands', 
-              False, 10, False, True),
-
-             ('job-status-query-failure-retries', 'pos_int', 'Specifies the number of times job status failure queries are retried', 
-              False, 3, False, True),
-
-             ('job-submission-failure-retries', 'pos_int', 'Specifies the number of times job submission failure queries are retried',
-              False, 3, False, True)),
-
-            'resource_manager' : (
-             ('id', 'string', 'Batch scheduler ID: torque|condor.',
-              False, None, True, True),
-             
-             ('pbs-user', 'user_account', 'User ID jobs are submitted under.',
-              False, None, False, True),
-              
-             ('pbs-account', 'string', 'User Account jobs are submitted under.',
-              True, None, False, False, 'A'),
-              
-             ('queue', 'string', 'Queue of the batch scheduler to query.',
-              True, 'batch', False, True, 'Q'),
-             
-             ('batch-home', 'directory', 'Scheduler installation directory.',
-              False, None, True, True),
-             
-             ('options', 'keyval', 'Options to pass to the scheduler.',
-              False, None, False, True),
-
-             ('env-vars', 'keyval', 'Environment variables to pass to the submitted jobs.',
-              False, None, False, True)),
-                            
-            'ringmaster' : (
-             ('work-dirs', 'list', 'hod work directories',
-              False, None, True, False),
-
-             ('temp-dir', 'directory', 'Ringmaster temporary directory.',
-              False, None, True, False),
-              
-             ('log-dir', 'directory', 'hod logging directory.', 
-              False, os.path.join(rootDirectory, 'logs'), False, False),
-
-             ('syslog-address', 'address', 'Syslog address.',
-              False, None, False, True),
-
-             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
-              False, None, True, True),
-              
-             ('http-port-range', 'range', 'HTTP port range n-m.',
-              False, None, True, True),
-              
-             ('debug', 'pos_int', 'Debugging level, 0-4.',
-              False, 4, True,   True),
-               
-             ('register', 'bool', 'Register with service registry?',
-              False, True, True, True),
-               
-             ('stream', 'bool', 'Output to stderr.',
-              False, False, False, True),
-              
-             ('userid', 'user_account', 
-              'User ID the hod shell is running under.',
-              False, pwd.getpwuid(os.getuid())[0], False, True),
-               
-             ('svcrgy-addr', 'address', 'Download HTTP address.',
-              False, None, False, False),             
-             
-             ('hadoop-tar-ball', 'uri', 'hadoop program tar ball.',
-              True, None, False, False, 't'),
-
-             ('max-connect','pos_int','max connections allowed for a single tarball server',
-             False, 30, False, True),
-
-             ('jt-poll-interval', 'pos_int', 'How often to poll the Job tracker for idleness',
-             False, 120, False, True),
-
-             ('idleness-limit', 'pos_int', 'Limit after which to deallocate the cluster',
-             False, 3600, False, True),
-
-             ('max-master-failures', 'pos_int', 
-              'Defines how many times a master can fail before' \
-              ' failing cluster allocation', False, 5, True, True),
-
-             ('workers_per_ring', 'pos_int', 'Defines number of workers per service per hodring',
-             False, 1, False, True)),
-
-            'gridservice-mapred' : (
-             ('external', 'bool', "Connect to an already running MapRed?",
-              False, False, True, True),
-              
-             ('host', 'hostname', 'Mapred hostname.', 
-              False, 'localhost', False, False),
-
-             ('info_port', 'pos_int', 'Mapred info port.',
-              False, None, False, False),
-             
-             ('tracker_port', 'pos_int', 'Mapred job tracker port.',
-              False, None, False, False),
-                        
-             ('cmdline-params', 'keyval', 'Hadoop cmdline key/value list.',
-              False, None, False, False),
-
-             ('server-params', 'keyval', 'Hadoop xml key/value list',
-              True, None, False, True, 'M'),
-               
-             ('envs', 'keyval', 'environment to run this package in',
-              False, None, False, True),
-
-             ('final-server-params', 'keyval', 'Hadoop final xml key/val list',
-              False, None, False, True, 'F'),
-
-             ('pkgs', 'directory', "directory where the package is installed",
-              False, None, False, False)), 
-               
-               
-            'gridservice-hdfs' : (
-             ('external', 'bool', "Connect to an already running HDFS?",
-              False, False, True, True),
-             
-             ('host', 'hostname', 'HDFS hostname.', 
-              False, 'localhost', False, False),
-             
-             ('fs_port', 'pos_int', 'HDFS port.',
-              False, None, False, False),
-              
-             ('info_port', 'pos_int', 'HDFS info port.',
-              False, None, False, False), 
-             
-             ('cmdline-params', 'keyval', 'Hadoop cmdline key/value list.',
-              False, None, False, False),
-
-             ('server-params', 'keyval', 'Hadoop xml key/value list',
-              False, None, False, True, 'H'),
-
-             ('final-server-params', 'keyval', 'Hadoop final xml key/value list',
-              False, None, False, True, 'S'),
-           
-             ('envs', 'keyval', 'Environment in which to run this package.',
-              False, None, False, True),
-
-             ('pkgs', 'directory', "directory where the package is installed",
-              False, None, False, False)),           
-             
-             
-            'hodring' : (
-             ('temp-dir', 'list', 'hodring temporary directory.',
-              False, None, True, False),
-              
-             ('log-dir', 'directory', 'hod logging directory.', 
-              False, os.path.join(rootDirectory, 'logs'), False, False), 
-              
-             ('log-destination-uri', 'string', 
-              'URI to store logs to, local://some_path or '
-              + 'hdfs://host:port/some_path', 
-              False, None, False, True),
-
-             ('pkgs', 'directory', 'Path to Hadoop to use in case of uploading to HDFS',
-              False, None, False, False),
-              
-             ('syslog-address', 'address', 'Syslog address.',
-              False, None, False, True),
-          
-             ('java-home', 'directory', 'Java home directory.',
-              False, None, True, False),
-              
-             ('debug', 'pos_int', 'Debugging level, 0-4.',
-              False, 3, True, True),
-               
-             ('register', 'bool', 'Register with service registry?',
-              False, True, True, True),
-               
-             ('stream', 'bool', 'Output to stderr.',
-              False, False, False, True),
-
-             ('userid', 'user_account', 
-              'User ID the hod shell is running under.',
-              False, pwd.getpwuid(os.getuid())[0], False, True),
-               
-             ('command', 'string', 'Command for hodring to run.',
-              False, None, False, True),
-
-             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
-              False, None, True, True),
-               
-             ('http-port-range', 'range', 'HTTP port range n-m.',
-              False, None, True, True),
-              
-             ('service-id', 'string', 'Service ID.',
-              False, None, False, True),
-              
-             ('download-addr', 'string', 'Download HTTP address.',
-              False, None, False, True),
-               
-             ('svcrgy-addr', 'address', 'Download HTTP address.',
-              False, None, False, True), 
-    
-             ('ringmaster-xrs-addr', 'address', 'Ringmaster XML-RPC address.',
-              False, None, False, True),
-
-             ('tarball-retry-initial-time', 'pos_float','Initial Retry time for tarball download',
-              False, 1, False, True),
-              
-             ('tarball-retry-interval', 'pos_float','interval to spread retries for tarball download',
-              False, 3, False, True),
-              
-             ('cmd-retry-initial-time', 'pos_float','Initial retry time for getting commands',
-              False, 2, False, True),
-             
-             ('cmd-retry-interval', 'pos_float','interval to spread retries for getting commands',
-              False, 2, False, True),
-
-             ('mapred-system-dir-root', 'string', 'Root under which mapreduce system directory names are generated by HOD.',
-              False, '/mapredsystem', False, False))
-              }   
-
-defOrder = [ 'hod', 'ringmaster', 'hodring', 'resource_manager', 
-             'gridservice-mapred', 'gridservice-hdfs' ]
-
-def printErrors(msgs):
-  for msg in msgs:
-    print msg
-
-def op_requires_pkgs(config):
-  if config['hod'].has_key('operation'):
-    return config['hod']['operation'].startswith('allocate')
-  else:
-    return config['hod'].has_key('script')
-
-if __name__ == '__main__':  
-  try:
-    confDef = definition()
-    confDef.add_defs(defList, defOrder)
-    hodhelp = hodHelp()
-    usage = hodhelp.help()
-            
-    hodOptions = options(confDef, usage,
-                      VERSION, withConfig=True, defaultConfig=DEFAULT_CONFIG,
-                      name=myName )
-    # hodConfig is a dict like object, hodConfig[section][name]
-    try:
-      hodConfig = config(hodOptions['config'], configDef=confDef, 
-                       originalDir=hodOptions['hod']['original-dir'],
-                       options=hodOptions) 
-    except IOError, e:
-      print >>sys.stderr,"error: %s not found. Specify the path to the HOD configuration file, or define the environment variable %s under which a file named hodrc can be found." % (hodOptions['config'], 'HOD_CONF_DIR')
-      sys.exit(1)
-  
-    # Conditional validation
-    statusMsgs = []
-
-    if hodConfig.normalizeValue('gridservice-hdfs', 'external'):
-      # For external HDFS
-      statusMsgs.extend(hodConfig.validateValue('gridservice-hdfs',
-                                                'fs_port'))
-      statusMsgs.extend(hodConfig.validateValue('gridservice-hdfs',
-                                                'info_port'))
-      statusMsgs.extend(hodConfig.validateValue('gridservice-hdfs',
-                                                'host'))
-    else:
-      hodConfig['gridservice-hdfs']['fs_port'] = 0 # Dummy
-      hodConfig['gridservice-hdfs']['info_port'] = 0 # Not used at all
-
-    if hodConfig.normalizeValue('gridservice-mapred', 'external'):
-      statusMsgs.extend(hodConfig.validateValue('gridservice-mapred',
-                                                'tracker_port'))
-      statusMsgs.extend(hodConfig.validateValue('gridservice-mapred',
-                                                'info_port'))
-      statusMsgs.extend(hodConfig.validateValue('gridservice-mapred',
-                                                'host'))
-    else:
-      hodConfig['gridservice-mapred']['tracker_port'] = 0 # Dummy
-      hodConfig['gridservice-mapred']['info_port'] = 0 # Not used at all
-
-    if len(statusMsgs) != 0:
-      for msg in statusMsgs:
-        print >>sys.stderr, msg
-      sys.exit(1)
-    # End of conditional validation
-
-    status = True
-    statusMsgs = []
-  
-    (status,statusMsgs) = hodConfig.verify()
-    if not status:
-      print >>sys.stderr,"error: bin/hod failed to start."
-      for msg in statusMsgs:
-        print >>sys.stderr,"%s" % (msg)
-      sys.exit(1)
-  
-    ## TODO : should move the dependency verification to hodConfig.verify
-    if hodConfig['hod'].has_key('operation') and \
-      hodConfig['hod'].has_key('script'):
-      print "Script operation is mutually exclusive with other HOD operations"
-      hodOptions.print_help(sys.stderr)
-      sys.exit(1)
-    
-    if 'operation' not in hodConfig['hod'] and 'script' not in hodConfig['hod']:
-      print "HOD requires at least a script or operation be specified."
-      hodOptions.print_help(sys.stderr)
-      sys.exit(1)    
-    
-    if hodConfig['gridservice-hdfs']['external']:
-      hdfsAddress = "%s:%s" % (hodConfig['gridservice-hdfs']['host'], 
-                               hodConfig['gridservice-hdfs']['fs_port'])
-  
-      hdfsSocket = tcpSocket(hdfsAddress)
-        
-      try:
-        hdfsSocket.open()
-        hdfsSocket.close()
-      except tcpError:
-        printErrors(hodConfig.var_error('hod', 'gridservice-hdfs', 
-          "Failed to open a connection to external hdfs address: %s." % 
-          hdfsAddress))
-        sys.exit(1)
-    else:
-      hodConfig['gridservice-hdfs']['host'] = 'localhost'
-  
-    if hodConfig['gridservice-mapred']['external']:
-      mapredAddress = "%s:%s" % (hodConfig['gridservice-mapred']['host'], 
-                                 hodConfig['gridservice-mapred']['tracker_port'])
-  
-      mapredSocket = tcpSocket(mapredAddress)
-        
-      try:
-        mapredSocket.open()
-        mapredSocket.close()
-      except tcpError:
-        printErrors(hodConfig.var_error('hod', 'gridservice-mapred', 
-          "Failed to open a connection to external mapred address: %s." % 
-          mapredAddress))
-        sys.exit(1)
-    else:
-      hodConfig['gridservice-mapred']['host'] = 'localhost'
-  
-    if not hodConfig['ringmaster'].has_key('hadoop-tar-ball') and \
-      not hodConfig['gridservice-hdfs'].has_key('pkgs') and \
-      op_requires_pkgs(hodConfig):
-      printErrors(hodConfig.var_error('gridservice-hdfs', 'pkgs', 
-        "gridservice-hdfs.pkgs must be defined if ringmaster.hadoop-tar-ball "
-        + "is not defined."))
-      sys.exit(1)
-  
-    if not hodConfig['ringmaster'].has_key('hadoop-tar-ball') and \
-      not hodConfig['gridservice-mapred'].has_key('pkgs') and \
-      op_requires_pkgs(hodConfig):
-      printErrors(hodConfig.var_error('gridservice-mapred', 'pkgs', 
-        "gridservice-mapred.pkgs must be defined if ringmaster.hadoop-tar-ball "
-        + "is not defined."))
-      sys.exit(1)
-  
-    if hodConfig['hodring'].has_key('log-destination-uri'):
-      if hodConfig['hodring']['log-destination-uri'].startswith('file://'):
-        pass
-      elif hodConfig['hodring']['log-destination-uri'].startswith('hdfs://'):
-        hostPort = hodConfig['hodring']['log-destination-uri'][7:].split("/")
-        hostPort = hostPort[0]
-        socket = tcpSocket(hostPort)
-        try:
-          socket.open()
-          socket.close()
-        except:
-          printErrors(hodConfig.var_error('hodring', 'log-destination-uri', 
-          "Unable to contact host/port specified in log destination uri: %s" % 
-          hodConfig['hodring']['log-destination-uri']))
-          sys.exit(1)
-      else:
-        printErrors(hodConfig.var_error('hodring', 'log-destination-uri', 
-          "The log destiniation uri must be of type local:// or hdfs://."))
-        sys.exit(1)
-  
-    if hodConfig['ringmaster']['workers_per_ring'] < 1:
-      printErrors(hodConfig.var_error('ringmaster', 'workers_per_ring',
-                "ringmaster.workers_per_ring must be a positive integer " +
-                "greater than or equal to 1"))
-      sys.exit(1)
-                        
-    ## TODO : end of should move the dependency verification to hodConfig.verif
-      
-    hodConfig['hod']['base-dir'] = rootDirectory
-    hodConfig['hod']['user_state'] = DEFAULT_HOD_DIR
-  
-    dGen = DescGenerator(hodConfig)
-    hodConfig = dGen.initializeDesc()
-    
-    os.environ['JAVA_HOME'] = hodConfig['hod']['java-home']
-    
-    if hodConfig['hod']['debug'] == 4:
-      print ""
-      print "Using Python: %s" % sys.version
-      print ""
-   
-    hod = hodRunner(hodConfig)
-  
-    # Initiate signal handling
-    hodInterrupt.set_log(hod.get_logger())
-    hodInterrupt.init_signals()
-    # Interrupts set up. Now on we handle signals only when we wish to.
-  except KeyboardInterrupt:
-    print HOD_INTERRUPTED_MESG
-    sys.exit(HOD_INTERRUPTED_CODE)
-  
-  opCode = 0
-  try:
-    if hodConfig['hod'].has_key('script'):
-      opCode = hod.script()
-    else:  
-      opCode = hod.operation()
-  except Exception, e:
-    print "Uncaught Exception : %s" % e
-  finally:
-    sys.exit(opCode)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/hodcleanup b/third_party/hadoop-0.20.0/contrib/hod/bin/hodcleanup
deleted file mode 100755
index 51613eae0a..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/bin/hodcleanup
+++ /dev/null
@@ -1,183 +0,0 @@
-#!/bin/sh
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-""":"
-work_dir=$(dirname $0)
-base_name=$(basename $0)
-original_dir=$PWD
-cd $work_dir
-
-if [ $HOD_PYTHON_HOME ]; then
-    exec $HOD_PYTHON_HOME -u -OO $base_name ${1+"$@"}
-elif [ -e /usr/bin/python ]; then
-    exec /usr/bin/python -u -OO $base_name ${1+"$@"}
-elif [ -e /usr/local/bin/python ]; then
-    exec /usr/local/bin/python -u -OO $base_name ${1+"$@"}
-else
-    exec python -u -OO $base_name ${1+"$@"}
-fi
-":"""
-
-"""The executable to be used by the user"""
-import sys, os, re, pwd, threading, sys, random, time, pprint, shutil, time, re
-from pprint import pformat
-from optparse import OptionParser
-
-myName          = os.path.basename(sys.argv[0])
-myName          = re.sub(".*/", "", myName)
-binDirectory    = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/bin/.*", "", binDirectory)
-libDirectory    = rootDirectory
-
-sys.path.append(libDirectory)
-
-from hodlib.Common.threads import simpleCommand
-from hodlib.Common.util import local_fqdn, tar, filter_warnings,\
-                            get_exception_string, get_exception_error_string
-from hodlib.Common.logger import hodLog
-from hodlib.Common.logger import getLogger
-from hodlib.HodRing.hodRing import createMRSystemDirectoryManager
-
-filter_warnings()
-
-reVersion = re.compile(".*(\d+_\d+).*")
-reHdfsURI = re.compile("(hdfs://.*?:\d+)(.*)")
-
-VERSION = None
-if os.path.exists("./VERSION"):
-  vFile = open("./VERSION", 'r')
-  VERSION = vFile.readline()
-  vFile.close()
-
-def __archive_logs(conf, log):
-  # need log-destination-uri, __hadoopLogDirs, temp-dir
-  status = True
-  logUri = conf['log-destination-uri']
-  hadoopLogDirs = conf['hadoop-log-dirs']
-  if logUri:
-    try:
-      if hadoopLogDirs:
-        date = time.localtime()
-        for logDir in hadoopLogDirs:
-          (head, tail) = os.path.split(logDir)
-          (head, logType) = os.path.split(head)
-          tarBallFile = "%s-%s-%04d%02d%02d%02d%02d%02d-%s.tar.gz" % (
-            logType, local_fqdn(), date[0], date[1], date[2], date[3], 
-            date[4], date[5], random.randint(0,1000))
-          
-          if logUri.startswith('file://'):
-            tarBallFile = os.path.join(logUri[7:], 
-                                       tarBallFile)
-          else:
-            tarBallFile = os.path.join(conf['temp-dir'], tarBallFile)
-          
-          log.debug('archiving log files to: %s' % tarBallFile)
-          status = tar(tarBallFile, logDir, ['*',])
-          log.info('archive %s status: %s' % (tarBallFile, status))
-          if status and \
-            logUri.startswith('hdfs://'):
-            __copy_archive_to_dfs(conf, tarBallFile)
-            log.info("copying archive to dfs finished")
-        dict = {} 
-    except:
-      log.error(get_exception_string())
-      status = False
-  return status
-
-
-def __copy_archive_to_dfs(conf, archiveFile):
-  # need log-destination-uri, hadoopCommandstring and/or pkgs
-  hdfsURIMatch = reHdfsURI.match(conf['log-destination-uri'])
-  
-  (head, tail) = os.path.split(archiveFile)
-  destFile = os.path.join(hdfsURIMatch.group(2), conf['user-id'], 'hod-logs', conf['service-id'], tail)
-  
-  log.info("copying archive %s to DFS %s ..." % (archiveFile, destFile))
-  
-  hadoopCmd = conf['hadoop-command-string']
-  if conf['pkgs']:
-    hadoopCmd = os.path.join(conf['pkgs'], 'bin', 'hadoop')
-
-  copyCommand = "%s dfs -fs %s -copyFromLocal %s %s" % (hadoopCmd, 
-    hdfsURIMatch.group(1), archiveFile, destFile)
-  
-  log.debug(copyCommand)
-  
-  copyThread = simpleCommand('hadoop', copyCommand)
-  copyThread.start()
-  copyThread.wait()
-  copyThread.join()
-  log.debug(pprint.pformat(copyThread.output()))
-  
-  os.unlink(archiveFile)
-
-def unpack():
-  parser = OptionParser()
-  option_list=["--log-destination-uri", "--hadoop-log-dirs", \
-          "--temp-dir", "--hadoop-command-string", "--pkgs", "--user-id", \
-          "--service-id", "--hodring-debug", "--hodring-log-dir", \
-          "--hodring-syslog-address", "--hodring-cleanup-list", \
-          "--jt-pid", "--mr-sys-dir", "--fs-name", "--hadoop-path"]
-  regexp = re.compile("^--")
-  for opt in option_list:
-    parser.add_option(opt,dest=regexp.sub("",opt),action="store")
-  option_list.append("--hodring-stream")
-  parser.add_option("--hodring-stream",dest="hodring-stream",metavar="bool",\
-                                                        action="store_true")
-  (options, args) = parser.parse_args()
-  _options= {}
-  _options['hodring'] = {}
-  for opt in dir(options):
-    if "--"+opt in option_list:
-      _options[opt] = getattr(options,opt)
-  if _options.has_key('hadoop-log-dirs') and _options['hadoop-log-dirs']:
-    _options['hadoop-log-dirs'] = _options['hadoop-log-dirs'].split(",")
-  if _options.has_key('hodring-syslog-address') and _options['hodring-syslog-address']:
-    _options['hodring']['syslog-address'] = \
-        _options['hodring-syslog-address'].split(':')
-  _options['hodring']['debug']        = int(_options['hodring-debug'])
-  _options['hodring']['log-dir']      = _options['hodring-log-dir']
-  _options['hodring']['stream']      = _options['hodring-stream']
-  _options['hodring']['userid']      = _options['user-id']
-  os.putenv('PBS_JOBID', _options['service-id'] )
-  return _options
- 
-if __name__ == '__main__':  
-  log = None
-  try:
-    conf = unpack()
-    # Use the same log as hodring
-    log = getLogger(conf['hodring'],'hodring')
-    log.debug("Logger initialised successfully")
-    mrSysDirManager = createMRSystemDirectoryManager(conf, log)
-    if mrSysDirManager is not None:
-      mrSysDirManager.removeMRSystemDirectory()
-
-    status =  __archive_logs(conf,log)
-    log.info("Archive status : %s" % status)
-    list = conf['hodring-cleanup-list'].split(',')
-    log.info("now removing %s" % list)
-    for dir in list:
-     if os.path.exists(dir):
-       log.debug('removing %s' % (dir))
-       shutil.rmtree(dir, True)
-       log.debug("done")
-    log.info("Cleanup successfully completed")
-  except Exception, e:
-    if log:
-      log.info("Stack trace:\n%s\n%s" %(get_exception_error_string(),get_exception_string()))
diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/hodring b/third_party/hadoop-0.20.0/contrib/hod/bin/hodring
deleted file mode 100755
index 1bb891c540..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/bin/hodring
+++ /dev/null
@@ -1,287 +0,0 @@
-#!/bin/sh
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-""":"
-work_dir=$(dirname $0)
-base_name=$(basename $0)
-cd $work_dir
-
-if [ $HOD_PYTHON_HOME ]; then
-    exec $HOD_PYTHON_HOME -OO $base_name ${1+"$@"}
-elif [ -e /usr/bin/python ]; then
-    exec /usr/bin/python -OO $base_name ${1+"$@"}
-elif [ -e /usr/local/bin/python ]; then
-    exec /usr/local/bin/python -OO $base_name ${1+"$@"}
-else
-    exec python -OO $base_name ${1+"$@"}
-fi
-":"""
-
-"""The executable to be used by the user"""
-import sys, os, re
-
-
-myName          = os.path.basename(sys.argv[0])
-myName          = re.sub(".*/", "", myName)
-binDirectory    = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/bin/.*", "", binDirectory)
-libDirectory    = rootDirectory
-
-sys.path.append(libDirectory)
-
-from hodlib.HodRing.hodRing import HodRing
-from hodlib.Common.setup import *
-from hodlib.Common.util import filter_warnings, get_exception_string, \
-                get_exception_error_string, getMapredSystemDirectory, \
-                to_http_url, local_fqdn
-from hodlib.Common.logger import getLogger, ensureLogDir
-from hodlib.Common.xmlrpc import hodXRClient
-
-filter_warnings()
-
-reVersion = re.compile(".*(\d+_\d+).*")
-
-VERSION = '$HeadURL$'
-
-reMatch = reVersion.match(VERSION)
-if reMatch:
-    VERSION = reMatch.group(1)
-    VERSION = re.sub("_", ".", VERSION)
-else:
-    VERSION = 'DEV'
-
-# Definition tuple is of the form:
-#  (name, type, description, default value, required?, validate?)
-#
-defList = { 'hodring' : (
-             ('temp-dir', 'directory', 'hod work directories',
-              False, None, True, False),
-              
-             ('log-dir', 'directory', 'hod logging directory.', 
-              False, os.path.join(rootDirectory, 'logs'), False, True), 
-
-             ('log-destination-uri', 'string', 
-              'URI to store logs to, local://some_path or '
-              + 'hdfs://host:port/some_path', 
-              False, None, False, True), 
-
-             ('pkgs', 'directory', 'Path to Hadoop to use in case of uploading to HDFS',
-              False, None, False, True),
-              
-             ('syslog-address', 'address', 'Syslog address.',
-              False, None, False, True),
-          
-             ('java-home', 'directory', 'Java home directory.',
-              False, None, True, True),
-              
-             ('debug', 'pos_int', 'Debugging level, 0-4.',
-              False, 3, True, True),
-               
-             ('register', 'bool', 'Register with service registry?',
-              False, True, True, True),
-               
-             ('stream', 'bool', 'Output to stderr.',
-              False, False, False, True),
-
-             ('userid', 'user_account', 
-              'User ID the hod shell is running under.',
-              False, None, True, False),
-
-             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
-              False, None, True, True),
-               
-             ('http-port-range', 'range', 'HTTP port range n-m.',
-              False, None, True, True),
-               
-             ('command', 'string', 'Command for hodring to run.',
-              False, None, False, True),
-              
-             ('service-id', 'string', 'Service ID.',
-              False, None, False, True),
-              
-             ('download-addr', 'string', 'Download HTTP address.',
-              False, None, False, True),
-               
-             ('svcrgy-addr', 'address', 'Service registry XMLRPC address.',
-              False, None, True, True), 
-    
-             ('ringmaster-xrs-addr', 'address', 'Ringmaster XML-RPC address.',
-              False, None, False, True),
- 
-             ('tarball-retry-initial-time', 'pos_float','initial retry time for tarball download',
-              False, 1, False, True),
-              
-             ('tarball-retry-interval', 'pos_float','interval to spread retries for tarball download',
-              False, 3, False, True),
-              
-             ('cmd-retry-initial-time', 'pos_float','initial retry time for getting commands',
-              False, 2, False, True),
-             
-             ('cmd-retry-interval', 'pos_float','interval to spread retries for getting commands',
-              False, 2, False, True), 
-
-             ('mapred-system-dir-root', 'string', 'Root under which mapreduce system directory names are generated by HOD.',
-              False, '/mapredsystem', False, False))
-            }            
-
-if __name__ == '__main__':
-
-  confDef = definition()
-  confDef.add_defs(defList)
-  hodRingOptions = options(confDef, "./%s [OPTIONS]" % myName, VERSION)
-  ensureLogDir(hodRingOptions['hodring']['log-dir'])
-  service = None
-  try:
-    (status, statusMsgs) = hodRingOptions.verify()
-    if not status:
-      raise Exception("%s" % statusMsgs)
-    hodRingOptions['hodring']['base-dir'] = rootDirectory
-    service = HodRing(hodRingOptions)
-    service.start()
-    service.wait()
-   
-    if service.log:
-      log = service.log
-    else: 
-      log = getLogger(hodRingOptions['hodring'],'hodring')
-
-    list = []
-    
-    runningHadoops = service.getRunningValues()
-
-    mrSysDirManager = None      
-    for cmd in runningHadoops:
-      if cmd.name == 'jobtracker':
-        mrSysDirManager = cmd.getMRSystemDirectoryManager()
-      log.debug("addding %s to cleanup list..." % cmd)
-      cmd.addCleanup(list)
-    
-    list.append(service.getTempDir())
-    log.debug(list)
-       
-    # archive_logs now
-    cmdString = os.path.join(rootDirectory, "bin", "hodcleanup") # same python
-
-    if (len(runningHadoops) == 0):
-      log.info("len(runningHadoops) == 0, No running cluster?")
-      log.info("Skipping __copy_archive_to_dfs")
-      hadoopString = ""
-    else: hadoopString=runningHadoops[0].path
-
-    #construct the arguments
-    if hodRingOptions['hodring'].has_key('log-destination-uri'):
-      cmdString = cmdString + " --log-destination-uri " \
-                    + hodRingOptions['hodring']['log-destination-uri']
-
-    hadoopLogDirs = service.getHadoopLogDirs()
-    if hadoopLogDirs:
-      cmdString = cmdString \
-                    + " --hadoop-log-dirs " \
-                    + ",".join(hadoopLogDirs)
-
-    cmdString = cmdString \
-                  + " --temp-dir " \
-                  + service._cfg['temp-dir'] \
-                  + " --hadoop-command-string " \
-                  + hadoopString \
-                  + " --user-id " \
-                  + service._cfg['userid'] \
-                  + " --service-id " \
-                  + service._cfg['service-id'] \
-                  + " --hodring-debug " \
-                  + str(hodRingOptions['hodring']['debug']) \
-                  + " --hodring-log-dir " \
-                  + hodRingOptions['hodring']['log-dir'] \
-                  + " --hodring-cleanup-list " \
-                  + ",".join(list)
-
-    if hodRingOptions['hodring'].has_key('syslog-address'):
-      syslogAddr = hodRingOptions['hodring']['syslog-address'][0] + \
-                   ':' + str(hodRingOptions['hodring']['syslog-address'][1])
-      cmdString = cmdString + " --hodring-syslog-address " + syslogAddr
-    if service._cfg.has_key('pkgs'):
-      cmdString = cmdString + " --pkgs " + service._cfg['pkgs']
-
-    if mrSysDirManager is not None:
-      cmdString = "%s %s" % (cmdString, mrSysDirManager.toCleanupArgs())
-
-    log.info("cleanup commandstring : ")
-    log.info(cmdString)
-
-    # clean up
-    cmd = ['/bin/sh', '-c', cmdString]
-
-    mswindows = (sys.platform == "win32")
-    originalcwd = os.getcwd()
-
-    if not mswindows:
-      try: 
-        pid = os.fork() 
-        if pid > 0:
-          # exit first parent
-          log.info("child(pid: %s) is now doing cleanup" % pid)
-          sys.exit(0) 
-      except OSError, e: 
-        log.error("fork failed: %d (%s)" % (e.errno, e.strerror)) 
-        sys.exit(1)
-
-      # decouple from parent environment
-      os.chdir("/") 
-      os.setsid() 
-      os.umask(0) 
- 
-    MAXFD = 128 # more than enough file descriptors to close. Just in case.
-    for i in xrange(0, MAXFD):
-      try:
-        os.close(i)
-      except OSError:
-        pass
-  
-    try:
-      os.execvp(cmd[0], cmd)
-    finally:
-      log.critical("exec failed")
-      os._exit(1)
-
-  except Exception, e:
-    if service:
-      if service.log:
-        log = service.log
-    else:
-      log = getLogger(hodRingOptions['hodring'], 'hodring')
-    log.error("Error in bin/hodring %s. \nStack trace:\n%s" %(get_exception_error_string(),get_exception_string()))
-    
-    log.info("now trying informing to ringmaster")
-    log.info(hodRingOptions['hodring']['ringmaster-xrs-addr'])
-    log.info(hodRingOptions.normalizeValue('hodring', 'ringmaster-xrs-addr'))
-    log.info(to_http_url(hodRingOptions.normalizeValue( \
-            'hodring', 'ringmaster-xrs-addr')))
-    # Report errors to the Ringmaster if possible
-    try:
-      ringXRAddress = to_http_url(hodRingOptions.normalizeValue( \
-                                     'hodring', 'ringmaster-xrs-addr'))
-      log.debug("Creating ringmaster XML-RPC client.")
-      ringClient = hodXRClient(ringXRAddress)    
-      if ringClient is not None:
-        addr = local_fqdn() + "_" + str(os.getpid())
-        ringClient.setHodRingErrors(addr, str(e))
-        log.info("Reported errors to ringmaster at %s" % ringXRAddress)
-    except Exception, e:
-      log.error("Failed to report errors to ringmaster at %s" % ringXRAddress)
-      log.error("Reason : %s" % get_exception_string())
-    # End of reporting errors to the client
diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/ringmaster b/third_party/hadoop-0.20.0/contrib/hod/bin/ringmaster
deleted file mode 100755
index fc194f6d27..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/bin/ringmaster
+++ /dev/null
@@ -1,349 +0,0 @@
-#!/bin/sh
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-""":"
-work_dir=$(dirname $0)
-base_name=$(basename $0)
-cd $work_dir
-
-if [ $HOD_PYTHON_HOME ]; then
-    exec $HOD_PYTHON_HOME -OO $base_name ${1+"$@"}
-elif [ -e /usr/bin/python ]; then
-    exec /usr/bin/python -OO $base_name ${1+"$@"}
-elif [ -e /usr/local/bin/python ]; then
-    exec /usr/local/bin/python -OO $base_name ${1+"$@"}
-else
-    exec python -OO $base_name ${1+"$@"}
-fi
-":"""
-
-"""The executable to be used by the user"""
-import sys, os, re, getpass
-
-myName          = os.path.basename(sys.argv[0])
-myName          = re.sub(".*/", "", myName)
-binDirectory    = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/bin/.*", "", binDirectory)
-libDirectory    = rootDirectory
-
-sys.path.append(libDirectory)
-
-from hodlib.RingMaster.ringMaster import main
-from hodlib.Common.setup import *
-from hodlib.Common.descGenerator import *
-from hodlib.Common.util import local_fqdn, filter_warnings, to_http_url, \
-                        get_exception_string, get_exception_error_string
-from hodlib.Common.logger import getLogger, ensureLogDir
-from hodlib.Common.xmlrpc import hodXRClient
-import logging
-
-filter_warnings()
-
-reVersion = re.compile(".*(\d+_\d+).*")
-
-VERSION = '$HeadURL$'
-
-reMatch = reVersion.match(VERSION)
-if reMatch:
-    VERSION = reMatch.group(1)
-    VERSION = re.sub("_", ".", VERSION)
-else:
-    VERSION = 'DEV'
-
-# Definition tuple is of the form:
-#  (name, type, description, default value, required?, validate?)
-#
-defList = { 'ringmaster' : (
-             ('work-dirs', 'list', 'hod work directories',
-              False, None, True, False),
-
-             ('temp-dir', 'directory', 'Ringmaster temporary directory.',
-              False, None, True, False),
-              
-             ('log-dir', 'directory', 'hod logging directory.', 
-              False, os.path.join(rootDirectory, 'logs'), False, True),
-              
-             ('syslog-address', 'address', 'Syslog address.',
-              False, None, False, True),
-               
-             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
-              False, None, True, True),
-              
-             ('http-port-range', 'range', 'HTTP port range n-m.',
-              False, None, True, True),
-              
-             ('debug', 'pos_int', 'Debugging level, 0-4.',
-              False, 3, True,   True),
-               
-             ('register', 'bool', 'Register with service registry?',
-              False, True, True, True),
-               
-             ('stream', 'bool', 'Output to stderr.',
-              False, False, False, True),
-              
-             ('userid', 'user_account', 
-              'User ID the hod shell is running under.',
-              False, None, True, False),
-               
-             ('svcrgy-addr', 'address', 'Download HTTP address.',
-              False, None, False, True),             
-             
-             ('hadoop-tar-ball', 'uri', 'hadoop program tar ball.',
-              False, None, False, False),
-        
-             ('max-connect','pos_int','max connections allowed for a single tarball server',
-             False, 30, False, True),
-
-             ('jt-poll-interval', 'pos_int', 'How often to poll the Job tracker for idleness',
-             False, 120, False, True),
-
-             ('idleness-limit', 'pos_int', 'Limit after which to deallocate the cluster',
-             False, 3600, False, True),
-
-             ('max-master-failures', 'pos_int', 
-              'Defines how many times a master can fail before' \
-              ' failing cluster allocation', False, 5, True, True),
-
-             ('workers_per_ring', 'pos_int', 'Defines number of workers per service per hodring',
-              False, 1, False, True)),
-
-            'resource_manager' : (
-             ('id', 'string', 'Batch scheduler ID: torque|condor.',
-              False, None, True, True),
-
-             ('pbs-user', 'user_account', 'User ID jobs are submitted under.',
-              False, None, False, True),
-
-             ('pbs-server', 'hostname', 'Hostname of PBS server.',
-              False, None, False, True),    
-
-             ('pbs-account', 'string', 'User Account jobs are submitted under.',
-              False, None, False, False),
-
-             ('queue', 'string', 'Queue of the batch scheduler to query.',
-              False, None, False, False),
-              
-             ('batch-home', 'directory', 'Scheduler installation directory.',
-              False, None, True, True),
-             
-             ('options', 'keyval', 'Options to pass to the scheduler.',
-              False, None, False, True),                    
-
-             ('env-vars', 'keyval', 'Environment variables to pass to the submitted jobs.',
-              False, None, False, True)),
-
-            'gridservice-mapred' : (   
-             ('external', 'bool', "Connect to an already running MapRed?",
-              False, False, True, True),
-                          
-             ('host', 'hostname', 'Mapred hostname.', 
-              False, 'localhost', False, True),
-
-             ('info_port', 'pos_int', 'Mapred info port.',
-              False, None, True, True), 
-             
-             ('tracker_port', 'pos_int', 'Mapred job tracker port.',
-              False, None, True, True),                  
-                          
-             ('cmdline-params', 'keyval', 'Hadoop cmdline key/value list.',
-              False, None, False, False),
-
-             ('server-params', 'keyval', 'Hadoop xml key/value list',
-              False, None, False, False),
-
-             ('final-server-params', 'keyval', 'Hadoop final xml params',
-              False, None, False, False),
-              
-             ('envs', 'keyval', 'environment to run this package in',
-              False, None, False, False),
-              
-             ('pkgs', 'directory', "directory where the package is installed",
-              False, None, False, False)), 
-               
-               
-            'gridservice-hdfs' : (
-             ('external', 'bool', "Connect to an already running HDFS?",
-              False, False, True, True),
-              
-             ('host', 'hostname', 'HDFS hostname.', 
-              False, 'localhost', True, True),
-             
-             ('fs_port', 'pos_int', 'HDFS port range.',
-              False, None, True, True),
-              
-             ('info_port', 'pos_int', 'HDFS info port.',
-              False, None, True, True), 
-             
-             ('cmdline-params', 'keyval', 'Hadoop cmdline key/value list.',
-              False, None, False, False),
-
-             ('server-params', 'keyval', 'Hadoop xml key/value list',
-              False, None, False, False),
-
-             ('final-server-params', 'keyval', 'Hadoop final xml params',
-              False, None, False, False),
-           
-             ('envs', 'keyval', 'Environment in which to run this package.',
-              False, None, False, False),
-
-             ('pkgs', 'directory', "directory where the package is installed",
-              False, None, False, False)),          
-             
-             
-            'hodring' : (
-             ('temp-dir', 'directory', 'hod work directories',
-              False, None, True, False),
-              
-             ('log-dir', 'directory', 'hod logging directory.', 
-              False, os.path.join(rootDirectory, 'logs'), False, False), 
-
-             ('log-destination-uri', 'string', 
-              'URI to store logs to, local://some_path or '
-              + 'hdfs://host:port/some_path', 
-              False, None, False, True),             
-
-             ('pkgs', 'directory', 'Path to Hadoop to use in case of uploading to HDFS',
-              False, None, False, True),
-              
-             ('syslog-address', 'address', 'Syslog address.',
-              False, None, False, True),
-          
-             ('java-home', 'directory', 'Java home directory.',
-              False, None, True, False),
-              
-             ('debug', 'pos_int', 'Debugging level, 0-4.',
-              False, 3, True, True),
-               
-             ('register', 'bool', 'Register with service registry?',
-              False, True, True, True),
-               
-             ('stream', 'bool', 'Output to stderr.',
-              False, False, False, True),
-
-             ('userid', 'user_account', 
-              'User ID the hod shell is running under.',
-              False, None, True, False),
-
-             ('xrs-port-range', 'range', 'XML-RPC port range n-m.',
-              False, None, True, True),
-               
-             ('http-port-range', 'range', 'HTTP port range n-m.',
-              False, None, True, True),
-               
-             ('command', 'string', 'Command for hodring to run.',
-              False, None, False, True),
-              
-             ('service-id', 'string', 'Service ID.',
-              False, None, False, True),
-              
-             ('download-addr', 'address', 'Download HTTP address.',
-              False, None, False, True),
-               
-             ('svcrgy-addr', 'address', 'Download HTTP address.',
-              False, None, False, True),
-    
-             ('ringmaster-xrs-addr', 'address', 'Ringmaster XML-RPC address.',
-              False, None, False, True),
- 
-             ('tarball-retry-initial-time', 'pos_float','initial retry time for tarball download',
-              False, 1, False, True),
-              
-             ('tarball-retry-interval', 'pos_float','interval to spread retries for tarball download',
-              False, 3, False, True),
-              
-             ('cmd-retry-initial-time', 'pos_float','initial retry time for getting commands',
-              False, 2, False, True),
-             
-             ('cmd-retry-interval', 'pos_float','interval to spread retries for getting commands',
-              False, 2, False, True),
- 
-             ('mapred-system-dir-root', 'string', 'Root under which mapreduce system directory names are generated by HOD.',
-              False, '/mapredsystem', False, False))
-              }   
-
-             
-defOrder = [ 'ringmaster', 'hodring', 'resource_manager', 
-             'gridservice-mapred', 'gridservice-hdfs' ]
-
-if __name__ == '__main__':
-  confDef = definition()
-  confDef.add_defs(defList, defOrder)
-  ringMasterOptions = options(confDef, "./%s [OPTIONS]" % myName, VERSION)
-  log = logging.getLogger()
-
-  try:
-
-    # Set up logging before anything else.
-    ensureLogDir(ringMasterOptions.normalizeValue('ringmaster', 'log-dir'))
-    log = getLogger(ringMasterOptions['ringmaster'],'ringmaster')
-    # End of setting up logging
-
-    # Verify and process options
-    statusMsgs = []
-    # Conditional validation
-    if not ringMasterOptions['ringmaster'].has_key('hadoop-tar-ball') or \
-        not ringMasterOptions['ringmaster']['hadoop-tar-ball']:
-      # If tarball is not used
-      if not ringMasterOptions.normalizeValue('gridservice-hdfs', 'external'):
-        # And if hdfs is not external, validate gridservice-hdfs.pkgs
-        statusMsgs.extend(ringMasterOptions.validateValue(
-                                                  'gridservice-hdfs', 'pkgs'))
-      statusMsgs.extend(ringMasterOptions.validateValue(
-                                                  'gridservice-mapred', 'pkgs'))
-
-    if len(statusMsgs) != 0:
-      # format status messages into a single string
-      errStr = ''
-      for msg in statusMsgs:
-        errStr = "%s%s\n" % (errStr, msg)
-      raise Exception("%s" % errStr)
-    # End of conditional validation
-
-    (status, statusMsgs) = ringMasterOptions.verify()
-    if not status:
-      # format status messages into a single string
-      errStr = ''
-      for msg in statusMsgs:
-        errStr = "%s%s\n" % (errStr, msg)
-      raise Exception("%s" % errStr)
-
-    ringMasterOptions.replace_escape_seqs()
-    ringMasterOptions['ringmaster']['base-dir'] = rootDirectory 
-    # End of option processing
-
-    ret = main(ringMasterOptions,log)
-    sys.exit(ret)
-  except Exception, e:
-    log.error("bin/ringmaster failed to start.%s. Stack trace follows:\n%s" % (get_exception_error_string(),get_exception_string()))
-
-    # Report errors to the client if possible
-    try:
-      serviceAddr = to_http_url(ringMasterOptions.normalizeValue( \
-                                     'ringmaster', 'svcrgy-addr'))
-      serviceClient = hodXRClient(serviceAddr)
-      if serviceClient is not None:
-        serviceClient.setRMError([local_fqdn(), str(e), \
-                                    get_exception_string()])
-        log.info("Reported errors to service registry at %s" % serviceAddr)
-    except Exception, e:
-      log.error("Failed to report errors to service registry.")
-      log.error("Reason : %s" % get_exception_string())
-    # End of reporting errors to the client
-
-    # Ringmaster failing to start is a ringmaster error. Exit with the appropriate exit code.
-    sys.exit(6)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/bin/verify-account b/third_party/hadoop-0.20.0/contrib/hod/bin/verify-account
deleted file mode 100755
index 65aa79ab44..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/bin/verify-account
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/sh
-# This script file is a stub for systems that might want to include
-# checks for the account name that is passed to HOD. It will be
-# launched by HOD with the account name as an argument. The script
-# should return a zero exit code if the account is valid, and a
-# non zero exit code otherwise. Any output that the script generates
-# would be reported to the user by HOD, in case of a non-zero exit
-# code.
-#
-# By default, the script does nothing and returns a zero exit code.
-exit 0
diff --git a/third_party/hadoop-0.20.0/contrib/hod/build.xml b/third_party/hadoop-0.20.0/contrib/hod/build.xml
deleted file mode 100644
index e16b36dacf..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/build.xml
+++ /dev/null
@@ -1,81 +0,0 @@
-<?xml version="1.0"?>
-
-<!--
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
--->
-
-<project name="hod" default="compile">
-    <import file="../build-contrib.xml"/>
-    <target name="compile">
-        <mkdir dir="${build.dir}"/>
-        <copy todir="${build.dir}">
-            <fileset dir="${basedir}">
-                <exclude name="**/VERSION"/>
-            </fileset>
-        </copy>
-        <exec executable="echo" output="${build.dir}/bin/VERSION">
-            <arg line="${version}" />
-        </exec>
-    </target>
-    <target name="package" depends="compile">
-        <mkdir dir="${dist.dir}/contrib/${name}"/>
-        <copy todir="${dist.dir}/contrib/${name}">
-            <fileset dir="${build.dir}"/>
-        </copy>
-        <chmod dir="${dist.dir}/contrib/${name}/bin" perm="a+x" includes="*"/>
-    </target>
-
-    <target name="test" depends="compile" description="Run HOD unit tests">  
-      <antcall target="python.pathcheck"/>
-      <antcall target="checkAndRunTests"/>
-    </target>
-
-    <target name="checkAndRunTests" if="python.home">
-      <!-- Check python version now -->
-      <exec executable="/bin/sh" outputproperty="hodtest.pythonVersion">
-          <arg value="-c" />
-          <arg value="${python.home}/python -V" />
-      </exec>
-      <condition property="python.versionmatched">
-        <!--- Currently check for only 2.5.1 -->
-        <equals arg1="${hodtest.pythonVersion}" arg2="Python 2.5.1" />
-      </condition>
-      <antcall target="python.versioncheck"/>
-      <antcall target="runtests"/>
-    </target>
- 
-    <target name="python.pathcheck" unless="python.home">
-      <echo message="'python.home' is not defined. Please pass -Dpython.home=&lt;Path to Python&gt; to Ant on the command-line."/>
-    </target>
-
-    <target name="runtests" if="python.versionmatched">
-      <echo message="Using Python at : ${python.home}" />
-      <echo message="Version : ${hodtest.pythonVersion}"/>
-      <exec executable="/bin/sh" resultproperty="hodtest.failedTests">
-        <arg value="-c" />
-          <arg value="${python.home}/python ${build.dir}/testing/main.py" />
-      </exec>
-      <condition property="hodtest.success">
-        <equals arg1="${hodtest.failedTests}" arg2="0"/>
-      </condition>
-      <fail message="TestCases failed. ${hodtest.failedTests} failed to run successfully." unless="hodtest.success"/>
-    </target>
-    
-    <target name="python.versioncheck" unless="python.versionmatched">
-      <echo message="Need Python version 2.5.1. You specified ${hodtest.pythonVersion}"/>
-    </target>
-
-</project>
diff --git a/third_party/hadoop-0.20.0/contrib/hod/conf/hodrc b/third_party/hadoop-0.20.0/contrib/hod/conf/hodrc
deleted file mode 100644
index bc2866d4c8..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/conf/hodrc
+++ /dev/null
@@ -1,46 +0,0 @@
-[hod]
-stream                          = True
-java-home                       = ${JAVA_HOME}
-cluster                         = ${CLUSTER_NAME}
-cluster-factor                  = 1.8
-xrs-port-range                  = 32768-65536
-debug                           = 3
-allocate-wait-time              = 3600
-temp-dir                        = /tmp/hod
-
-[ringmaster]
-register                        = True
-stream                          = False
-temp-dir                        = /tmp/hod
-http-port-range                 = 8000-9000
-work-dirs                       = /tmp/hod/1,/tmp/hod/2
-xrs-port-range                  = 32768-65536
-debug                           = 3
-
-[hodring]
-stream                          = False
-temp-dir                        = /tmp/hod
-register                        = True
-java-home                       = ${JAVA_HOME}
-http-port-range                 = 8000-9000
-xrs-port-range                  = 32768-65536
-debug                           = 3
-
-[resource_manager]
-queue                           = ${RM_QUEUE}
-batch-home                      = ${RM_HOME}
-id                              = torque
-#env-vars                       = HOD_PYTHON_HOME=/foo/bar/python-2.5.1/bin/python
-
-[gridservice-mapred]
-external                        = False
-pkgs                            = ${HADOOP_HOME}
-tracker_port                    = 8030
-info_port                       = 50080
-
-[gridservice-hdfs]
-external                        = False
-pkgs                            = ${HADOOP_HOME}
-fs_port                         = 8020
-info_port                       = 50070
-
diff --git a/third_party/hadoop-0.20.0/contrib/hod/config.txt b/third_party/hadoop-0.20.0/contrib/hod/config.txt
deleted file mode 100644
index ca894a702a..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/config.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-                     HOD Configuration
-                     =================
-
-1. Introduction:
-================
-
-Configuration options for HOD are organized as sections and options 
-within them. They can be specified in two ways: a configuration file 
-in the INI format, and as command line options to the HOD shell, 
-specified in the format --section.option[=value]. If the same option is 
-specified in both places, the value specified on the command line 
-overrides the value in the configuration file.
-
-To get a simple description of all configuration options, you can type
-    hod --verbose-help
-
-This document explains some of the most important or commonly used
-configuration options in some more detail.
-
-2. Sections:
-============
-
-The following are the various sections in the HOD configuration:
-
-    * hod:                  Options for the HOD client
-    * resource_manager:     Options for specifying which resource 
-                            manager to use, and other parameters for 
-                            using that resource manager
-    * ringmaster:           Options for the RingMaster process, 
-    * hodring:              Options for the HodRing processes
-    * gridservice-mapred:   Options for the MapReduce daemons
-    * gridservice-hdfs:     Options for the HDFS daemons.
-
-The following are some of the important options in the HOD 
-configuration:
-
-3. Important / Commonly Used Configuration Options:
-===================================================
-
-3.1. Common configuration options:
-----------------------------------
-
-Certain configuration options are defined in most of the sections of 
-the HOD configuration. Options defined in a section, are used by the
-process for which that section applies. These options have the same
-meaning, but can have different values in each section.
-
-* temp-dir: Temporary directory for usage by the HOD processes. Make 
-            sure that the users who will run hod have rights to create 
-            directories under the directory specified here.
-
-* debug: A numeric value from 1-4. 4 produces the most log information,
-         and 1 the least.
-
-* log-dir: Directory where log files are stored. By default, this is
-           <install-location>/logs/. The restrictions and notes for the
-           temp-dir variable apply here too.
-
-* xrs-port-range: A range of ports, among which an available port shall
-                  be picked for use to run an XML-RPC server.
-
-* http-port-range: A range of ports, among which an available port shall
-                   be picked for use to run an HTTP server.
-
-* java-home: Location of Java to be used by Hadoop.
-
-3.2 hod options:
-----------------
-
-* cluster: A descriptive name given to the cluster. For Torque, this is
-           specified as a 'Node property' for every node in the cluster.
-           HOD uses this value to compute the number of available nodes.
-
-* client-params: A comma-separated list of hadoop config parameters
-                 specified as key-value pairs. These will be used to
-                 generate a hadoop-site.xml on the submit node that 
-                 should be used for running MapReduce jobs.
-
-3.3 resource_manager options:
------------------------------
-
-* queue: Name of the queue configured in the resource manager to which
-         jobs are to be submitted.
-
-* batch-home: Install directory to which 'bin' is appended and under 
-              which the executables of the resource manager can be 
-              found. 
-
-* env-vars: This is a comma separated list of key-value pairs, 
-            expressed as key=value, which would be passed to the jobs 
-            launched on the compute nodes. 
-            For example, if the python installation is 
-            in a non-standard location, one can set the environment
-            variable 'HOD_PYTHON_HOME' to the path to the python 
-            executable. The HOD processes launched on the compute nodes
-            can then use this variable.
-
-3.4 ringmaster options:
------------------------
-
-* work-dirs: These are a list of comma separated paths that will serve
-             as the root for directories that HOD generates and passes
-             to Hadoop for use to store DFS / MapReduce data. For e.g.
-             this is where DFS data blocks will be stored. Typically,
-             as many paths are specified as there are disks available
-             to ensure all disks are being utilized. The restrictions
-             and notes for the temp-dir variable apply here too.
-
-3.5 gridservice-hdfs options:
------------------------------
-
-* external: If false, this indicates that a HDFS cluster must be 
-            bought up by the HOD system, on the nodes which it 
-            allocates via the allocate command. Note that in that case,
-            when the cluster is de-allocated, it will bring down the 
-            HDFS cluster, and all the data will be lost.
-            If true, it will try and connect to an externally configured
-            HDFS system.
-            Typically, because input for jobs are placed into HDFS
-            before jobs are run, and also the output from jobs in HDFS 
-            is required to be persistent, an internal HDFS cluster is 
-            of little value in a production system. However, it allows 
-            for quick testing.
-
-* host: Hostname of the externally configured NameNode, if any
-
-* fs_port: Port to which NameNode RPC server is bound.
-
-* info_port: Port to which the NameNode web UI server is bound.
-
-* pkgs: Installation directory, under which bin/hadoop executable is 
-        located. This can be used to use a pre-installed version of
-        Hadoop on the cluster.
-
-* server-params: A comma-separated list of hadoop config parameters
-                 specified key-value pairs. These will be used to
-                 generate a hadoop-site.xml that will be used by the
-                 NameNode and DataNodes.
-
-* final-server-params: Same as above, except they will be marked final.
-
-
-3.6 gridservice-mapred options:
--------------------------------
-
-* external: If false, this indicates that a MapReduce cluster must be
-            bought up by the HOD system on the nodes which it allocates
-            via the allocate command.
-            If true, if will try and connect to an externally 
-            configured MapReduce system.
-
-* host: Hostname of the externally configured JobTracker, if any
-
-* tracker_port: Port to which the JobTracker RPC server is bound
-
-* info_port: Port to which the JobTracker web UI server is bound.
-
-* pkgs: Installation directory, under which bin/hadoop executable is 
-        located
-
-* server-params: A comma-separated list of hadoop config parameters
-                 specified key-value pairs. These will be used to
-                 generate a hadoop-site.xml that will be used by the
-                 JobTracker and TaskTrackers
-
-* final-server-params: Same as above, except they will be marked final.
-
-4. Known Issues:
-================
-
-HOD does not currently handle special characters such as space, comma
-and equals in configuration values.
diff --git a/third_party/hadoop-0.20.0/contrib/hod/getting_started.txt b/third_party/hadoop-0.20.0/contrib/hod/getting_started.txt
deleted file mode 100644
index ae2b0738f9..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/getting_started.txt
+++ /dev/null
@@ -1,233 +0,0 @@
-            Getting Started With Hadoop On Demand (HOD)
-            ===========================================
-
-1. Pre-requisites:
-==================
-
-Hardware:
-HOD requires a minimum of 3 nodes configured through a resource manager.
-
-Software:
-The following components are assumed to be installed before using HOD:
-* Torque:
-  (http://www.clusterresources.com/pages/products/torque-resource-manager.php)
-  Currently HOD supports Torque out of the box. We assume that you are
-  familiar with configuring Torque. You can get information about this
-  from the following link: 
-  http://www.clusterresources.com/wiki/doku.php?id=torque:torque_wiki
-* Python (http://www.python.org/)
-  We require version 2.5.1 of Python.
-    
-The following components can be optionally installed for getting better
-functionality from HOD:
-* Twisted Python: This can be used for improving the scalability of HOD
-  (http://twistedmatrix.com/trac/)
-* Hadoop: HOD can automatically distribute Hadoop to all nodes in the 
-  cluster. However, it can also use a pre-installed version of Hadoop,
-  if it is available on all nodes in the cluster.
-  (http://hadoop.apache.org/core)
-  HOD currently supports Hadoop 0.15 and above.
-
-NOTE: HOD configuration requires the location of installs of these 
-components to be the same on all nodes in the cluster. It will also 
-make the configuration simpler to have the same location on the submit
-nodes.
-
-2. Resource Manager Configuration Pre-requisites:
-=================================================
-
-For using HOD with Torque:
-* Install Torque components: pbs_server on a head node, pbs_moms on all
-  compute nodes, and PBS client tools on all compute nodes and submit 
-  nodes.
-* Create a queue for submitting jobs on the pbs_server.
-* Specify a name for all nodes in the cluster, by setting a 'node 
-  property' to all the nodes.
-  This can be done by using the 'qmgr' command. For example:
-  qmgr -c "set node node properties=cluster-name"
-* Ensure that jobs can be submitted to the nodes. This can be done by
-  using the 'qsub' command. For example:
-  echo "sleep 30" | qsub -l nodes=3
-* More information about setting up Torque can be found by referring
-  to the documentation under:
-http://www.clusterresources.com/pages/products/torque-resource-manager.php
-
-3. Setting up HOD:
-==================
-
-* HOD is available under the 'contrib' section of Hadoop under the root
-  directory 'hod'.
-* Distribute the files under this directory to all the nodes in the
-  cluster. Note that the location where the files are copied should be
-  the same on all the nodes.
-* On the node from where you want to run hod, edit the file hodrc 
-  which can be found in the <install dir>/conf directory. This file
-  contains the minimal set of values required for running hod.
-* Specify values suitable to your environment for the following 
-  variables defined in the configuration file. Note that some of these
-  variables are defined at more than one place in the file.
-
-  * ${JAVA_HOME}: Location of Java for Hadoop. Hadoop supports Sun JDK
-    1.5.x
-  * ${CLUSTER_NAME}: Name of the cluster which is specified in the 
-    'node property' as mentioned in resource manager configuration.
-  * ${HADOOP_HOME}: Location of Hadoop installation on the compute and
-    submit nodes.
-  * ${RM_QUEUE}: Queue configured for submiting jobs in the resource
-    manager configuration.
-  * ${RM_HOME}: Location of the resource manager installation on the
-    compute and submit nodes.
-
-* The following environment variables *may* need to be set depending on 
-  your environment. These variables must be defined where you run the 
-  HOD client, and also be specified in the HOD configuration file as the 
-  value of the key resource_manager.env-vars. Multiple variables can be
-  specified as a comma separated list of key=value pairs.
-
-  * HOD_PYTHON_HOME: If you install python to a non-default location 
-    of the compute nodes, or submit nodes, then, this variable must be 
-    defined to point to the python executable in the non-standard 
-    location.
-
-
-NOTE: 
-
-You can also review other configuration options in the file and
-modify them to suit your needs. Refer to the file config.txt for 
-information about the HOD configuration.
-
-
-4. Running HOD:
-===============
-
-4.1 Overview:
--------------
-
-A typical session of HOD will involve atleast three steps: allocate, 
-run hadoop jobs, deallocate.
-
-4.1.1 Operation allocate
-------------------------
-
-The allocate operation is used to allocate a set of nodes and install and
-provision Hadoop on them. It has the following syntax:
-
-  hod -c config_file -t hadoop_tarball_location -o "allocate \
-                                                cluster_dir number_of_nodes"
-
-The hadoop_tarball_location must be a location on a shared file system
-accesible from all nodes in the cluster. Note, the cluster_dir must exist
-before running the command. If the command completes successfully then
-cluster_dir/hadoop-site.xml will be generated and will contain information
-about the allocated cluster's JobTracker and NameNode.
-
-For example, the following command uses a hodrc file in ~/hod-config/hodrc and
-allocates Hadoop (provided by the tarball ~/share/hadoop.tar.gz) on 10 nodes,
-storing the generated Hadoop configuration in a directory named
-~/hadoop-cluster:
-
-  $ hod -c ~/hod-config/hodrc -t ~/share/hadoop.tar.gz -o "allocate \
-                                                        ~/hadoop-cluster 10"
-
-HOD also supports an environment variable called HOD_CONF_DIR. If this is
-defined, HOD will look for a default hodrc file at $HOD_CONF_DIR/hodrc.
-Defining this allows the above command to also be run as follows:
-
-  $ export HOD_CONF_DIR=~/hod-config
-  $ hod -t ~/share/hadoop.tar.gz -o "allocate ~/hadoop-cluster 10" 
-
-4.1.2 Running Hadoop jobs using the allocated cluster
------------------------------------------------------
-
-Now, one can run Hadoop jobs using the allocated cluster in the usual manner:
-
-  hadoop --config cluster_dir hadoop_command hadoop_command_args
-
-Continuing our example, the following command will run a wordcount example on
-the allocated cluster:
-
-  $ hadoop --config ~/hadoop-cluster jar \
-       /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output 
-
-4.1.3 Operation deallocate
---------------------------
-
-The deallocate operation is used to release an allocated cluster. When
-finished with a cluster, deallocate must be run so that the nodes become free
-for others to use. The deallocate operation has the following syntax:
-
-  hod -o "deallocate cluster_dir"
-
-Continuing our example, the following command will deallocate the cluster:
-
-  $ hod -o "deallocate ~/hadoop-cluster" 
-
-4.2 Command Line Options
-------------------------
-
-This section covers the major command line options available via the hod
-command:
-
---help
-Prints out the help message to see the basic options.
-
---verbose-help
-All configuration options provided in the hodrc file can be passed on the
-command line, using the syntax --section_name.option_name[=value]. When
-provided this way, the value provided on command line overrides the option
-provided in hodrc. The verbose-help command lists all the available options in
-the hodrc file. This is also a nice way to see the meaning of the
-configuration options.
-
--c config_file
-Provides the configuration file to use. Can be used with all other options of
-HOD. Alternatively, the HOD_CONF_DIR environment variable can be defined to
-specify a directory that contains a file named hodrc, alleviating the need to
-specify the configuration file in each HOD command.
-
--b 1|2|3|4
-Enables the given debug level. Can be used with all other options of HOD. 4 is
-most verbose.
-
--o "help"
-Lists the operations available in the operation mode.
-
--o "allocate cluster_dir number_of_nodes"
-Allocates a cluster on the given number of cluster nodes, and store the
-allocation information in cluster_dir for use with subsequent hadoop commands.
-Note that the cluster_dir must exist before running the command.
-
--o "list"
-Lists the clusters allocated by this user. Information provided includes the
-Torque job id corresponding to the cluster, the cluster directory where the
-allocation information is stored, and whether the Map/Reduce daemon is still
-active or not.
-
--o "info cluster_dir"
-Lists information about the cluster whose allocation information is stored in
-the specified cluster directory.
-
--o "deallocate cluster_dir"
-Deallocates the cluster whose allocation information is stored in the
-specified cluster directory.
-
--t hadoop_tarball
-Provisions Hadoop from the given tar.gz file. This option is only applicable
-to the allocate operation. For better distribution performance it is
-recommended that the Hadoop tarball contain only the libraries and binaries,
-and not the source or documentation. 
-
--Mkey1=value1 -Mkey2=value2
-Provides configuration parameters for the provisioned Map/Reduce daemons
-(JobTracker and TaskTrackers). A hadoop-site.xml is generated with these
-values on the cluster nodes
-
--Hkey1=value1 -Hkey2=value2
-Provides configuration parameters for the provisioned HDFS daemons (NameNode
-and DataNodes). A hadoop-site.xml is generated with these values on the
-cluster nodes
-
--Ckey1=value1 -Ckey2=value2
-Provides configuration parameters for the client from where jobs can be
-submitted. A hadoop-site.xml is generated with these values on the submit
-node.
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/__init__.py
deleted file mode 100644
index 56759d7963..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/goldAllocationManager.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/goldAllocationManager.py
deleted file mode 100644
index 2794c50354..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/AllocationManagers/goldAllocationManager.py
+++ /dev/null
@@ -1,104 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""Gold Allocation Manager Implementation"""
-# -*- python -*-
-
-import sys, httplib
-import sha, base64, hmac
-import xml.dom.minidom
-
-from hodlib.Common.util import *
-
-class goldAllocationManager:
-  def __init__(self, cfg, log):
-    self.__GOLD_SECRET_KEY_FILE = cfg['auth-file']
-    (self.__goldHost, self.__goldPort) = (cfg['allocation-manager-address'][0], 
-                                          cfg['allocation-manager-address'][1])
-    self.cfg = cfg
-    self.log = log
-
-  def getQuote(self, user, project, ignoreErrors=True):
-    # Get Secret Key from File
-    secret = ''
-    try:
-      secretFile = open(self.__GOLD_SECRET_KEY_FILE)
-      secret = secretFile.readline()
-    except Exception, e:
-      self.log.error("Unable to open file %s" % self.__GOLD_SECRET_KEY_FILE)
-      self.log.debug(get_exception_string())
-      return (ignoreErrors or False)
-    secretFile.close()
-    secret = secret.rstrip()
-
-    # construct the SSRMAP request body 
-    body = '<Body><Request action="Quote" actor="hod"><Object>Job</Object><Data><Job><ProjectId>%s</ProjectId><UserId>%s</UserId><WallDuration>10</WallDuration></Job></Data></Request></Body>' % (project, user)
-
-    # compute digest
-    message = sha.new()
-    message.update(body)
-    digest = message.digest()
-    digestStr = base64.b64encode(digest)
-
-    # compute signature
-    message = hmac.new(secret, digest, sha)
-    signatureStr = base64.b64encode(message.digest())
-
-    # construct the SSSRMAP Message
-    sssrmapRequest = '<?xml version="1.0" encoding="UTF-8"?>\
-<Envelope>%s<Signature><DigestValue>%s</DigestValue><SignatureValue>%s</SignatureValue><SecurityToken type="Symmetric"></SecurityToken></Signature></Envelope>' % (body, digestStr, signatureStr)
-    self.log.info('sssrmapRequest: %s' % sssrmapRequest)
-
-    try:
-      # post message to GOLD server
-      webservice = httplib.HTTP(self.__goldHost, self.__goldPort)
-      webservice.putrequest("POST", "/SSSRMAP3 HTTP/1.1")
-      webservice.putheader("Content-Type", "text/xml; charset=\"utf-8\"")
-      webservice.putheader("Transfer-Encoding", "chunked")
-      webservice.endheaders()
-      webservice.send("%X" % len(sssrmapRequest) + "\r\n" + sssrmapRequest + '0\r\n')
-
-      # handle the response
-      statusCode, statusmessage, header = webservice.getreply()
-      responseStr = webservice.getfile().read()
-      self.log.debug("httpStatusCode: %d" % statusCode)
-      self.log.info('responseStr: %s' % responseStr)
-
-      # parse XML response
-      if (statusCode == 200):
-        responseArr = responseStr.split("\n")
-        responseBody = responseArr[2]
-        try:
-          doc = xml.dom.minidom.parseString(responseBody)
-          responseVal = doc.getElementsByTagName("Value")[0].firstChild.nodeValue
-          self.log.info("responseVal: %s" % responseVal)
-          if (responseVal == 'Success'):
-            return True
-          else:
-            return False
-        except Exception, e:
-          self.log.error("Unable to parse GOLD responseBody XML \"(%s)\" to get responseVal" % (responseBody))
-          self.log.debug(get_exception_string())
-          return (ignoreErrors or False)
-      else:
-        self.log.error("Invalid HTTP statusCode %d" % statusCode)
-    except Exception, e:
-      self.log.error("Unable to POST message to GOLD server (%s, %d)" %
-                       (self.__goldHost, self.__goldPort))
-      self.log.debug(get_exception_string())
-      return (ignoreErrors or False)
-
-    return True
-
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/__init__.py
deleted file mode 100644
index 12c2f1e1da..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/allocationManagerUtil.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/allocationManagerUtil.py
deleted file mode 100644
index 515e875070..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/allocationManagerUtil.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""defines Allocation Manager Utilities"""
-
-# -*- python -*-
-from hodlib.allocationManagers.goldAllocationManager import goldAllocationManager
-
-class allocationManagerUtil:
-  def getAllocationManager(name, cfg, log):
-    """returns a concrete instance of the specified AllocationManager"""
-    if name == 'gold':
-      return goldAllocationManager(cfg, log)
-    
-  getAllocationManager = staticmethod(getAllocationManager)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/desc.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/desc.py
deleted file mode 100644
index 013e3bde02..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/desc.py
+++ /dev/null
@@ -1,298 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""manage component descriptors"""
-# -*- python -*-
-
-import random
-
-from sets import Set
-from pprint import pformat
-from hodlib.Common.util import local_fqdn
-from hodlib.Common.tcp import tcpSocket, tcpError
-
-class Schema:
-  """the primary class for describing
-  schema's """
-  STRING, LIST, MAP = range(3)
-
-  def __init__(self, name, type = STRING, delim=','):
-    self.name = name
-    self.type = type
-    self.delim = delim
-
-  def getName(self):
-    return self.name
-
-  def getType(self):
-    return self.type
-
-  def getDelim(self):
-    return self.delim
-
-class _Merger:
-  """A class to merge lists and add key/value
-  pairs to a dictionary"""
-  def mergeList(x, y, uniq=True):
-    l = []
-    l.extend(x)
-    l.extend(y)
-    if not uniq:
-      return l
-
-    s = Set(l)
-    l = list(s)
-    return l
-
-  mergeList = staticmethod(mergeList)
-
-  def mergeMap(to, add):
-
-    for k in add:
-      to.setdefault(k, add[k])
-
-    return to
-
-  mergeMap = staticmethod(mergeMap)
-
-class NodePoolDesc:
-  """a schema for describing
-  Nodepools"""
-  def __init__(self, dict):
-    self.dict = dict.copy()
-
-    self.dict.setdefault('attrs', {})
-
-    self._checkRequired()
-
-    if 'options' in dict: self.dict['attrs'] = dict['options']
-
-  def _checkRequired(self):
-
-    if not 'id' in self.dict:
-      raise ValueError, "nodepool needs 'id'"
-
-    if self.getPkgDir() == None:
-      raise ValueError, "nodepool %s needs 'pkgs'" % (self.getName())
-
-  def getName(self):
-    return self.dict['id']
-
-  def getPkgDir(self):
-    return self.dict['batch-home']
-
-  def getAttrs(self):
-    return self.dict['attrs']
-
-  def getSchema():
-    schema = {}
-
-    s = Schema('id')
-    schema[s.getName()] = s
-
-    s = Schema('batch-home', Schema.LIST, ':')
-    schema[s.getName()] = s
-
-    s = Schema('attrs', Schema.MAP)
-    schema[s.getName()] = s
-
-    return schema
-
-  getSchema = staticmethod(getSchema)
-
-class ServiceDesc:
-  """A schema for describing services"""
-  def __init__(self, dict):
-    self.dict = dict.copy()
-
-    self.dict.setdefault('external', False)
-    self.dict.setdefault('attrs', {})
-    self.dict.setdefault('envs', {})
-    self.dict.setdefault('host',None)
-    self.dict.setdefault('port',None)
-    self.dict.setdefault('tar', None)
-    self.dict.setdefault('pkgs', '')
-    self.dict.setdefault('final-attrs', {})
-    self._checkRequired()
-    if self.dict.has_key('hadoop-tar-ball'):
-      self.dict['tar'] = self.dict['hadoop-tar-ball']  
-
-  def _checkRequired(self):
-
-    if not 'id' in self.dict:
-      raise ValueError, "service description needs 'id'"
-
-#    if len(self.getPkgDirs()) <= 0:
-#      raise ValueError, "service description %s needs 'pkgs'" % (self.getName())
-
-  def getName(self):
-    return self.dict['id']
-
-  def isExternal(self):
-    """True if the service is outside hod. 
-    e.g. connect to existing HDFS"""
-    
-    return self.dict['external']
-
-  def getPkgDirs(self):
-    return self.dict['pkgs']
-  
-  def getTar(self):
-    return self.dict['tar']
-  
-  def getAttrs(self):
-    return self.dict['attrs']
-
-  def getfinalAttrs(self):
-    return self.dict['final-attrs']
-  
-  def getEnvs(self):
-    return self.dict['envs']
-
-  def getSchema():
-    schema = {}
-
-    s = Schema('id')
-    schema[s.getName()] = s
-
-    s = Schema('external')
-    schema[s.getName()] = s
-
-    s = Schema('pkgs', Schema.LIST, ':')
-    schema[s.getName()] = s
-    
-    s = Schema('tar', Schema.LIST, ":")
-    schema[s.getName()] = s
-    
-    s = Schema('attrs', Schema.MAP)
-    schema[s.getName()] = s
-
-    s = Schema('final-attrs', Schema.MAP)
-    schema[s.getName()] = s
-    
-    s = Schema('envs', Schema.MAP)
-    schema[s.getName()] = s
-
-    return schema
-  
-  getSchema = staticmethod(getSchema)
-
-class CommandDesc:
-
-  def __init__(self, dict):
-    """a class for how a command is described"""
-    self.dict = dict
-
-  def __repr__(self):
-    return pformat(self.dict)
-  
-  def _getName(self):
-    """return the name of the command to be run"""
-    return self.dict['name']
-
-  def _getProgram(self):
-    """return where the program is """
-    return self.dict['program']
-
-  def _getArgv(self):
-    """return the arguments for the command to be run"""
-    return self.dict['argv']
-
-  def _getEnvs(self):
-    """return the environment in which the command is to be run"""
-    return self.dict['envs']
-  
-  def _getPkgDirs(self):
-    """return the packages for this command"""
-    return self.dict['pkgdirs']
-
-  def _getWorkDirs(self):
-    """return the working directories for this command"""
-    return self.dict['workdirs']
-
-  def _getAttrs(self):
-    """return the list of attributes for this command"""
-    return self.dict['attrs']
-
-  def _getfinalAttrs(self):
-    """return the final xml params list for this command"""
-    return self.dict['final-attrs']
-  
-  def _getForeground(self):
-    """return if the command is to be run in foreground or not"""
-    return self.dict['fg']
-
-  def _getStdin(self):
-    return self.dict['stdin']
-
-  def toString(cmdDesc):
-    """return a string representation of this command"""
-    row = []
-    row.append('name=%s' % (cmdDesc._getName()))
-    row.append('program=%s' % (cmdDesc._getProgram()))
-    row.append('pkgdirs=%s' % CommandDesc._csv(cmdDesc._getPkgDirs(), ':'))
-
-    if 'argv' in cmdDesc.dict:
-      row.append('argv=%s' % CommandDesc._csv(cmdDesc._getArgv()))
-
-    if 'envs' in cmdDesc.dict:
-      envs = cmdDesc._getEnvs()
-      list = []
-      for k in envs:
-        v = envs[k]
-        list.append('%s=%s' % (k, v))
-      row.append('envs=%s' % CommandDesc._csv(list))
-
-    if 'workdirs' in cmdDesc.dict:
-      row.append('workdirs=%s' % CommandDesc._csv(cmdDesc._getWorkDirs(), ':'))
-
-    if 'attrs' in cmdDesc.dict:
-      attrs = cmdDesc._getAttrs()
-      list = []
-      for k in attrs:
-        v = attrs[k]
-        list.append('%s=%s' % (k, v))
-      row.append('attrs=%s' % CommandDesc._csv(list))
-
-    if 'final-attrs' in cmdDesc.dict:
-      fattrs = cmdDesc._getAttrs()
-      list = []
-      for k in fattrs:
-	v = fattrs[k]
-	list.append('%s=%s' % (k, v)) 
-      row.append('final-attrs=%s' % CommandDesc._cvs(list))
-      
-    if 'fg' in cmdDesc.dict:
-      row.append('fg=%s' % (cmdDesc._getForeground()))
-
-    if 'stdin' in cmdDesc.dict:
-      row.append('stdin=%s' % (cmdDesc._getStdin()))
-
-    return CommandDesc._csv(row)
-
-  toString = staticmethod(toString)
-
-  def _csv(row, delim=','):
-    """return a string in csv format"""
-    import cStringIO
-    import csv
-
-    queue = cStringIO.StringIO()
-    writer = csv.writer(queue, delimiter=delim, escapechar='\\', quoting=csv.QUOTE_NONE, 
-                          doublequote=False, lineterminator='\n')
-    writer.writerow(row)
-    return queue.getvalue().rstrip('\n')
-
-  _csv = staticmethod(_csv)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/descGenerator.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/descGenerator.py
deleted file mode 100644
index 03852cca64..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/descGenerator.py
+++ /dev/null
@@ -1,72 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""manage hod configuration"""
-# -*- python -*-
-
-import sys, csv, os
-from optparse import Option, OptionParser
-from xml.dom import minidom
-from sets import Set
-from select import select, poll, POLLIN
-
-from hodlib.Common.desc import *
-
-class DescGenerator:
-  """Contains the conversion to descriptors and other method calls
-  to config"""  
-  def __init__(self, hodConfig):
-    """parse all the descriptors"""
-    
-    self.hodConfig = hodConfig
-    
-  def initializeDesc(self):
-    self.hodConfig['nodepooldesc'] = self.createNodePoolDesc()
-    self.hodConfig['servicedesc'] = self.createServiceDescDict()
-    
-    return self.hodConfig
-  
-  def getServices(self):
-    """get all the services from the config"""
-    
-    sdd = {}
-    for keys in self.hodConfig:
-      if keys.startswith('gridservice-'):
-        str = keys.split('-')
-        dict = self.hodConfig[keys]
-        if 'server-params' in dict: dict['attrs'] = dict['server-params']
-        if 'final-server-params' in dict: dict['final-attrs'] = dict['final-server-params']
-        dict['id'] = str[1]
-        desc = ServiceDesc(dict)
-        sdd[desc.getName()] = desc 
-        
-    return sdd
-  
-  def createNodePoolDesc(self):
-    """ create a node pool descriptor and store
-    it in hodconfig"""
-    
-    desc = NodePoolDesc(self.hodConfig['resource_manager'])
-    return desc
-  
-  def createServiceDescDict(self):
-    """create a service descriptor for 
-    all the services and store it in the 
-    hodconfig"""
-    
-    sdd = self.getServices()
-    return sdd
-  
-  
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/hodsvc.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/hodsvc.py
deleted file mode 100644
index e042fe13b7..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/hodsvc.py
+++ /dev/null
@@ -1,228 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-# $Id:setup.py 5158 2007-04-09 00:14:35Z zim $
-#
-#------------------------------------------------------------------------------
-import os, time, shutil, xmlrpclib, socket, pprint
-
-from signal import *
-
-from hodlib.Common.logger import hodLog, hodDummyLogger
-from hodlib.Common.socketServers import hodXMLRPCServer
-from hodlib.Common.util import local_fqdn
-from hodlib.Common.xmlrpc import hodXRClient
-
-class hodBaseService:
-  """hodBaseService class - This class provides service registration, logging,
-     and configuration access methods.  It also provides an XML-RPC server.
-     This class should be extended to create hod services.  Methods beginning
-     with _xr_method will automatically be added to instances of this class.
-     """
-  def __init__(self, name, config, xrtype='threaded'):
-    """ Initialization requires a name string and a config object of type
-        hodlib.Common.setup.options or hodlib.Common.setup.config."""
-        
-    self.name = name
-    self.hostname = local_fqdn()
-    self._cfg = config
-    self._xrc = None
-    self.logs = {}
-    self._baseLogger = None
-    self._serviceID = os.getenv('PBS_JOBID')
-        
-    self.__logDir = None
-    self.__svcrgy = None
-    self.__stop = False
-    self.__xrtype = xrtype
-    
-    self._init_logging()
-        
-    if name != 'serviceRegistry': self._init_signals()
-    self._init_xrc_server()
-    
-  def __set_logging_level(self, level):
-    self.logs['main'].info("Setting log level to %s." % level)
-    for loggerName in self.loggers.keys():
-      self.logs['main'].set_logger_level(loggerName, level)
-
-  def __get_logging_level(self):
-    if self._cfg.has_key('stream'):
-      return self.loggers['main'].get_level('stream', 'main')
-    elif self._cfg.has_key('log-dir'):
-      return self.loggers['main'].get_level('file', 'main')
-    else:
-      return 0
-  
-  def _xr_method_stop(self, *args):
-    """XML-RPC method, calls stop() on ourselves."""
-    
-    return self.stop()
-  
-  def _xr_method_status(self, *args):
-    """XML-RPC method, calls status() on ourselves."""
-    
-    return self.status()
-  
-  def _init_logging(self):
-    if self._cfg.has_key('debug'):
-      if self._cfg['debug'] > 0:
-        self._baseLogger = hodLog(self.name)
-        self.logs['main'] = self._baseLogger.add_logger('main')
-        
-        if self._cfg.has_key('stream'):
-          if self._cfg['stream']:
-            self._baseLogger.add_stream(level=self._cfg['debug'], 
-                                 addToLoggerNames=('main',))
-            
-        if self._cfg.has_key('log-dir'):
-          if self._serviceID:
-              self.__logDir = os.path.join(self._cfg['log-dir'], "%s.%s" % (
-                                       self._cfg['userid'], self._serviceID))
-          else:
-              self.__logDir = os.path.join(self._cfg['log-dir'], 
-                                           self._cfg['userid'])
-          if not os.path.exists(self.__logDir):
-            os.mkdir(self.__logDir)
-            
-          self._baseLogger.add_file(logDirectory=self.__logDir, 
-            level=self._cfg['debug'], addToLoggerNames=('main',))
-          
-        if self._cfg.has_key('syslog-address'):
-          self._baseLogger.add_syslog(self._cfg['syslog-address'], 
-            level=self._cfg['debug'], addToLoggerNames=('main',))
-        
-        if not self.logs.has_key('main'):
-          self.logs['main'] = hodDummyLogger()
-      else:
-        self.logs['main'] = hodDummyLogger()
-    else:
-      self.logs['main'] = hodDummyLogger()
-  
-  def _init_signals(self):
-    def sigStop(sigNum, handler):
-      self.sig_wrapper(sigNum, self.stop)
-
-    def toggleLevel():
-      currentLevel = self.__get_logging_level()
-      if currentLevel == 4:
-        self.__set_logging_level(1)
-      else:
-        self.__set_logging_level(currentLevel + 1)
-
-    def sigStop(sigNum, handler):
-      self._sig_wrapper(sigNum, self.stop)
-
-    def sigDebug(sigNum, handler):
-      self.sig_wrapper(sigNum, toggleLevel)
-
-    signal(SIGTERM, sigStop)
-    signal(SIGQUIT, sigStop)
-    signal(SIGINT, sigStop)
-    signal(SIGUSR2, sigDebug)
-
-  def _sig_wrapper(self, sigNum, handler, *args):
-    self.logs['main'].info("Caught signal %s." % sigNum)
-
-    if args:
-        handler(args)
-    else:
-        handler()
-  
-  def _init_xrc_server(self):
-    host = None
-    ports = None
-    if self._cfg.has_key('xrs-address'):
-      (host, port) = (self._cfg['xrs-address'][0], self._cfg['xrs-address'][1])
-      ports = (port,)
-    elif self._cfg.has_key('xrs-port-range'):
-      host = ''
-      ports = self._cfg['xrs-port-range']
-    
-    if host != None:  
-      if self.__xrtype == 'threaded':
-        self._xrc = hodXMLRPCServer(host, ports)
-      elif self.__xrtype == 'twisted':
-        try:
-          from socketServers import twistedXMLRPCServer
-          self._xrc = twistedXMLRPCServer(host, ports, self.logs['main'])
-        except ImportError:
-          self.logs['main'].error("Twisted XML-RPC server not available, "
-                                  + "falling back on threaded server.")
-          self._xrc = hodXMLRPCServer(host, ports)
-      for attr in dir(self):
-        if attr.startswith('_xr_method_'):
-          self._xrc.register_function(getattr(self, attr),
-                                      attr[11:])
-    
-      self._xrc.register_introspection_functions()
-  
-  def _register_service(self, port=None, installSignalHandlers=1):
-    if self.__svcrgy:
-      self.logs['main'].info(
-          "Registering service with service registery %s... " % self.__svcrgy)
-      svcrgy = hodXRClient(self.__svcrgy, None, None, 0, 0, installSignalHandlers)
-      
-      if self._xrc and self._http:
-        svcrgy.registerService(self._cfg['userid'], self._serviceID, 
-                               self.hostname, self.name, 'hod', {
-                               'xrs' : "http://%s:%s" % (
-                               self._xrc.server_address[0], 
-                               self._xrc.server_address[1]),'http' : 
-                               "http://%s:%s" % (self._http.server_address[0], 
-                               self._http.server_address[1])})
-      elif self._xrc:
-        svcrgy.registerService(self._cfg['userid'], self._serviceID, 
-                               self.hostname, self.name, 'hod', {
-                               'xrs' : "http://%s:%s" % (
-                               self._xrc.server_address[0], 
-                               self._xrc.server_address[1]),})
-      elif self._http:
-        svcrgy.registerService(self._cfg['userid'], self._serviceID, 
-                               self.hostname, self.name, 'hod', {'http' : 
-                               "http://%s:%s" % (self._http.server_address[0], 
-                               self._http.server_address[1]),})        
-      else:
-        svcrgy.registerService(self._cfg['userid'], self._serviceID, 
-                               self.hostname, name, 'hod', {} )
-  
-  def start(self):
-    """ Start XML-RPC server and register service."""
-    
-    self.logs['main'].info("Starting HOD service: %s ..." % self.name)
-
-    if self._xrc: self._xrc.serve_forever()
-    if self._cfg.has_key('register') and self._cfg['register']:
-        self._register_service()
-  
-  def stop(self):
-    """ Stop XML-RPC server, unregister service and set stop flag. """
-    
-    self.logs['main'].info("Stopping service...")
-    if self._xrc: self._xrc.stop()
-    self.__stop = True
-  
-    return True
-  
-  def status(self):
-    """Returns true, should be overriden."""
-    
-    return True
-  
-  def wait(self):
-    """Wait until stop method is called."""
-    
-    while not self.__stop:
-      time.sleep(.1)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/logger.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/logger.py
deleted file mode 100644
index 3101ab2cde..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/logger.py
+++ /dev/null
@@ -1,788 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""hodLogger provides a customized interface to Python's core logging package.
-"""
-
-import sys, os, re, logging, logging.handlers, inspect, pprint, types
-from tcp import get_address_tuple
-
-fileFormatString    = '[%(asctime)s] %(levelname)s/%(levelno)s \
-%(module)s:%(lineno)s - %(message)s'
-
-streamFormatString  = '%(levelname)s - %(message)s'
-
-debugStreamFormatString = '[%(asctime)s] %(levelname)s/%(levelno)s \
-%(module)s:%(lineno)s - %(message)s'
-
-syslogFormatString = '(%(process)d) %(levelname)s/%(levelno)s \
-%(module)s:%(lineno)s - %(message)s'
-
-smtpFormatString    = '[%(asctime)s] %(levelname)s/%(levelno)s \
-%(module)s:%(lineno)s\n\n%(message)s'
-
-fileFormater = logging.Formatter(fileFormatString)
-streamFormater = logging.Formatter(streamFormatString)
-debugStreamFormater = logging.Formatter(debugStreamFormatString)
-syslogFormater = logging.Formatter(syslogFormatString)
-smtpFormater = logging.Formatter(smtpFormatString)
-
-defaultFileLevel = 3
-defaultStreamLevel = 4
-defaultSyslogLevel = 3
-defaultSmtpLevel = 0
-
-hodLogLevelMap = { 0 : logging.CRITICAL,
-                   1 : logging.ERROR,
-                   2 : logging.WARNING,
-                   3 : logging.INFO,
-                   4 : logging.DEBUG    }
-
-hodStreamFormatMap = { 0 : streamFormater,
-                       1 : streamFormater,
-                       2 : streamFormater,
-                       3 : streamFormater,
-                       4 : debugStreamFormater }
-
-rehodLogLevelMap = {}
-for key in hodLogLevelMap.keys():
-    rehodLogLevelMap[hodLogLevelMap[key]] = key
-
-
-reModule = re.compile("^(.*)\..*$")
-
-hodLogs = {}
-
-class hodRotatingFileHandler(logging.handlers.RotatingFileHandler):
-    """ This class needs to be used in place of RotatingFileHandler when
-        the 2.4.0 Python interpreter is used."""
-
-    def emit(self, record):
-        """
-        Emit a record.
-
-        If a formatter is specified, it is used to format the record.
-        The record is then written to the stream with a trailing newline
-        [N.B. this may be removed depending on feedback]. If exception
-        information is present, it is formatted using
-        traceback.print_exception and appended to the stream.
-
-        *****
-
-        THIS IS A HACK, when instances of hodLogger get passed to the child of
-        a child thread for some reason self.stream gets closed.  This version
-        of emit re-opens self.stream if it is closed.  After testing it appears
-        that self.stream is only closed once after the second thread is
-        initialized so there is not performance penalty to this hack.  This
-        problem only exists in python 2.4.
-
-        *****
-        """
-        try:
-            if self.shouldRollover(record):
-                self.doRollover()
-            try:
-                msg = self.format(record)
-                fs = "%s\n"
-                if not hasattr(types, "UnicodeType"): #if no unicode support...
-                    self.stream.write(fs % msg)
-                else:
-                    try:
-                        self.stream.write(fs % msg)
-                    except UnicodeError:
-                        self.stream.write(fs % msg.encode("UTF-8"))
-                    except ValueError:
-                        self.stream = open(self.baseFilename, self.mode)
-                        self.stream.write(fs % msg)
-
-                self.flush()
-            except:
-                self.handleError(record)
-        except:
-            self.handleError(record)
-
-    def shouldRollover(self, record):
-        """
-        Determine if rollover should occur.
-
-        Basically, see if the supplied record would cause the file to exceed
-        the size limit we have.
-
-        *****
-
-        THIS IS A HACK, when instances of hodLogger get passed to the child of
-        a child thread for some reason self.stream gets closed.  This version
-        of emit re-opens self.stream if it is closed.  After testing it appears
-        that self.stream is only closed once after the second thread is
-        initialized so there is not performance penalty to this hack. This
-        problem only exists in python 2.4.
-
-        *****
-        """
-        if self.maxBytes > 0:                   # are we rolling over?
-            msg = "%s\n" % self.format(record)
-
-            try:
-                #due to non-posix-compliant Windows feature
-                self.stream.seek(0, 2)
-            except ValueError:
-                self.stream = open(self.baseFilename, self.mode)
-                self.stream.seek(0, 2)
-
-            if self.stream.tell() + len(msg) >= self.maxBytes:
-                return 1
-        return 0
-
-class hodCustomLoggingLogger(logging.Logger):
-    """ Slight extension of the logging.Logger class used by the hodLog class.
-    """
-    def findCaller(self):
-        """ findCaller() is supposed to return the callers file name and line
-            number of the caller. This was broken when the logging package was
-            wrapped by hodLog.  We should return much more relevant info now.
-            """
-
-        callerModule = ''
-        callerLine = 0
-
-        currentModule = os.path.basename(__file__)
-        currentModule = reModule.sub("\g<1>", currentModule)
-
-        frames = inspect.stack()
-        for i in range(len(frames)):
-            frameModule = os.path.basename(frames[i][1])
-            frameModule = reModule.sub("\g<1>", frameModule)
-            if frameModule == currentModule:
-                previousFrameModule = os.path.basename(frames[i+1][1])
-                previousFrameModule = reModule.sub("\g<1>",
-                    previousFrameModule)
-                callerFile = frames[i+1][1]
-                callerLine = frames[i+1][2]
-                continue
-
-        returnValues = (callerFile, callerLine)
-        if sys.version.startswith('2.4.4') or sys.version.startswith('2.5'):
-            returnValues = (callerFile, callerLine, None)
-            
-        return returnValues
-
-class hodLog:
-    """ Cluster management logging class.
-
-        logging levels: 0 - log only critical messages
-                        1 - log critical and error messages
-                        2 - log critical, error, and warning messages
-                        3 - log critical, error, warning, and info messages
-                        4 - log critical, error, warning, info, and debug
-                            messages"""
-
-    def __init__(self, appName):
-        """Constructs a hodLogger object.
-
-        appName      - name of logging application, log filenames will be
-                       prepended with this name"""
-
-        self.__appName = appName
-
-        # initialize a dictionary to hold loggerNames
-        self.__loggerNames = {}
-
-        # initialize a dictionary to track log handlers and handler classes
-        self.__logObjs = { 'file' : {}, 'smtp' : {}, 
-                           'syslog' : {}, 'strm' : {} }
-
-        # use a custom logging.Logger class
-        logging.setLoggerClass(hodCustomLoggingLogger)
-
-        # get the root app logger
-        self.__logger = logging.getLogger(appName)
-        self.__logger.setLevel(logging.DEBUG)
-        
-        hodLogs[self.__appName] = self
-
-    def __attr__(self, attrname):
-        """loggerNames  - list of defined logger names"""
-
-        if attrname   == "loggerNames":  return self.__loggerNames.keys()
-        else: raise AttributeError, attrname
-
-    def __repr__(self):
-        """Returns a string representation of a hodLog object of the form:
-
-           LOG_NAME
-                file: FILENAME (level LEVEL)
-                smtp: SMTP_SERVER from FROM_ADDRESS (level LEVEL)
-                strm: STRM_OBJECT (level LEVEL)
-                ... """
-
-        hodLogString = "hodLog: %s\n\n" % self.__appName
-        for loggerName in self.__loggerNames.keys():
-            hodLogString = "%s    logger: %s\n" % (hodLogString, loggerName)
-            handlerClasses = self.__logObjs.keys()
-            handlerClasses.sort()
-            for handlerClass in handlerClasses:
-                try:
-                    loggerLevelName = logging.getLevelName(
-                        self.__logObjs[handlerClass][loggerName]['level'])
-                    hodLogString = "%s        %s: %s (level %s)\n" % (
-                        hodLogString, handlerClass,
-                        self.__logObjs[handlerClass][loggerName]['data'],
-                        loggerLevelName)
-                except:
-                    hodLogString = "%s        %s: none\n" % (
-                        hodLogString, handlerClass)
-            hodLogString = "%s\n" % hodLogString
-
-        return hodLogString
-
-    # 'private' method which adds handlers to self.__logObjs
-    def __add_to_handlers(self, handlerClass, loggerName, handler, data,
-        level):
-        self.__logObjs[handlerClass][loggerName] = {}
-        self.__logObjs[handlerClass][loggerName]['handler'] = handler
-        self.__logObjs[handlerClass][loggerName]['data'] = data
-        self.__logObjs[handlerClass][loggerName]['level'] = level
-
-    # 'private' method which determines whether a hod log level is valid and
-    #   returns a valid logging.Logger level
-    def __get_logging_level(self, level, defaultLevel):
-        loggingLevel = ''
-        try:
-            loggingLevel = hodLogLevelMap[int(level)]
-        except:
-            loggingLevel = hodLogLevelMap[defaultLevel]
-
-        return loggingLevel
-
-    # make a logging.logger name rootLogger.childLogger in our case the
-    #   appName.componentName
-    def __get_logging_logger_name(self, loggerName):
-        return "%s.%s" % (self.__appName, loggerName)
-
-    def add_logger(self, loggerName):
-        """Adds a logger of name loggerName.
-
-           loggerName    - name of component of a given application doing the
-                           logging
-
-           Returns a hodLogger object for the just added logger."""
-
-        try:
-            self.__loggerNames[loggerName]
-        except:
-            loggingLoggerName = self.__get_logging_logger_name(loggerName)
-            logging.getLogger(loggingLoggerName)
-
-            self.__loggerNames[loggerName] = 1
-
-            return hodLogger(self.__appName, loggingLoggerName)
-
-    def add_file(self, logDirectory, maxBytes=0, backupCount=0,
-        level=defaultFileLevel, addToLoggerNames=None):
-        """Adds a file handler to all defined loggers or a specified set of
-           loggers.  Each log file will be located in logDirectory and have a
-           name of the form appName-loggerName.log.
-
-           logDirectory     - logging directory
-           maxBytes         - maximum log size to write in bytes before rotate
-           backupCount      - number of rotated logs to keep
-           level            - cluster management log level
-           addToLoggerNames - list of logger names to which stream handling
-                              will be added"""
-
-        def add_file_handler(loggerName):
-            if not self.__logObjs['file'].has_key(loggerName):
-                loggingLevel = self.__get_logging_level(level,
-                    defaultFileLevel)
-
-                logFile = os.path.join(logDirectory, "%s-%s.log" % (
-                    self.__appName, loggerName))
-
-                logFilePresent = False
-                if(os.path.exists(logFile)):
-                  logFilePresent = True
-
-                if sys.version.startswith('2.4'):
-                    fileHandler = hodRotatingFileHandler(logFile,
-                        maxBytes=maxBytes, backupCount=backupCount)
-                else:
-                    fileHandler = logging.handlers.RotatingFileHandler(logFile,
-                        maxBytes=maxBytes, backupCount=backupCount)
-                if logFilePresent and backupCount:
-                  fileHandler.doRollover()
-
-                fileHandler.setLevel(loggingLevel)
-                fileHandler.setFormatter(fileFormater)
-
-                loggingLoggerName = self.__get_logging_logger_name(loggerName)
-                aLogger = logging.getLogger(loggingLoggerName)
-                aLogger.addHandler(fileHandler)
-
-                fileData = "%s" % logFile
-                self.__add_to_handlers('file', loggerName, fileHandler,
-                    fileData, loggingLevel)
-
-        if addToLoggerNames:
-            for loggerName in addToLoggerNames:
-                add_file_handler(loggerName)
-        else:
-            for loggerName in self.__loggerNames:
-                add_file_handler(loggerName)
-
-    def add_stream(self, stream=sys.stderr, level=defaultStreamLevel,
-        addToLoggerNames=None):
-        """Adds a stream handler to all defined loggers or a specified set of
-           loggers.
-
-           stream           - a stream such as sys.stderr or sys.stdout
-           level            - cluster management log level
-           addToLoggerNames - tupple of logger names to which stream handling
-                              will be added"""
-
-        def add_stream_handler(loggerName):
-            if not self.__logObjs['strm'].has_key(loggerName):
-                loggingLevel = self.__get_logging_level(level,
-                    defaultStreamLevel)
-
-                streamHandler = logging.StreamHandler(stream)
-                
-                streamHandler.setLevel(loggingLevel)
-                
-                streamHandler.setFormatter(hodStreamFormatMap[int(level)])
-
-                loggingLoggerName = self.__get_logging_logger_name(loggerName)
-                aLogger = logging.getLogger(loggingLoggerName)
-                aLogger.addHandler(streamHandler)
-
-                streamData = "%s" % stream
-                self.__add_to_handlers('strm', loggerName, streamHandler,
-                    streamData, loggingLevel)
-
-        if addToLoggerNames:
-            for loggerName in addToLoggerNames:
-                add_stream_handler(loggerName)
-        else:
-            for loggerName in self.__loggerNames:
-                add_stream_handler(loggerName)
-
-    def add_syslog(self, address, level=defaultSyslogLevel, 
-                   addToLoggerNames=None):
-        def add_syslog_handler(loggerName):
-            if not self.__logObjs['syslog'].has_key(loggerName):
-                loggingLevel = self.__get_logging_level(level,
-                    defaultSyslogLevel)
-
-                address[1] = int(address[1])
-                syslogHandler = logging.handlers.SysLogHandler(tuple(address),
-                                                               9)
-                
-                syslogHandler.setLevel(loggingLevel)
-                
-                syslogHandler.setFormatter(syslogFormater)
-
-                loggingLoggerName = self.__get_logging_logger_name(loggerName)
-                aLogger = logging.getLogger(loggingLoggerName)
-                aLogger.addHandler(syslogHandler)
-
-                syslogData = "%s:%s" % (address[0], address[1])
-                self.__add_to_handlers('syslog', loggerName, syslogHandler,
-                    syslogData, loggingLevel)
-
-        if addToLoggerNames:
-            for loggerName in addToLoggerNames:
-                add_syslog_handler(loggerName)
-        else:
-            for loggerName in self.__loggerNames:
-                add_syslog_handler(loggerName)      
-      
-
-    def add_smtp(self, mailHost, fromAddress, toAddresses,
-        level=defaultSmtpLevel, addToLoggerNames=None):
-        """Adds an SMTP handler to all defined loggers or a specified set of
-           loggers.
-
-           mailHost         - SMTP server to used when sending mail
-           fromAddress      - email address to use as the from address when
-                              sending mail
-           toAdresses       - comma seperated list of email address to which
-                              mail will be sent
-           level            - cluster management log level
-           addToLoggerNames - tupple of logger names to which smtp handling
-                              will be added"""
-
-        def add_email_handler(loggerName):
-            if not self.__logObjs['smtp'].has_key(loggerName):
-                loggingLevel = self.__get_logging_level(level,
-                    defaultSmtpLevel)
-
-                subject = loggerName
-                if   loggingLevel == 50:
-                    subject = "%s - a critical error has occured." % subject
-                elif loggingLevel == 40:
-                    subject = "%s - an error has occured."         % subject
-                elif loggingLevel == 30:
-                    subject = "%s - warning message."              % subject
-                elif loggingLevel == 20:
-                    subject = "%s - information message."          % subject
-                elif loggingLevel == 10:
-                    subject = "%s - debugging message."            % subject
-
-                mailHostTuple = get_address_tuple(mailHost)
-                emailHandler = logging.handlers.SMTPHandler(mailHostTuple,
-                    fromAddress, toAddresses, subject)
-
-                emailHandler.setFormatter(smtpFormater)
-                emailHandler.setLevel(loggingLevel)
-
-                loggingLoggerName = self.__get_logging_logger_name(loggerName)
-                aLogger = logging.getLogger(loggingLoggerName)
-                aLogger.addHandler(emailHandler)
-
-                emailData = "%s from %s" % (mailHost, fromAddress)
-                self.__add_to_handlers('smtp', loggerName, emailHandler,
-                    emailData, loggingLevel)
-
-        if addToLoggerNames:
-            for loggerName in addToLoggerNames:
-                add_email_handler(loggerName)
-        else:
-            for loggerName in self.__loggerNames:
-                add_email_handler(loggerName)
-
-    def status(self):
-        statusStruct = {}
-        for loggerName in self.__loggerNames.keys():
-            statusStruct[loggerName] = []
-            for handlerClass in self.__logObjs.keys():
-                loggerDict = {}
-                try:
-                    level = self.__logObjs[handlerClass][loggerName]['level']
-                    level = rehodLogLevelMap[level]
-
-                    loggerDict['handler'] = handlerClass
-                    loggerDict['level'] = level
-                    loggerDict['data'] = \
-                        self.__logObjs[handlerClass][loggerName]['data']
-                except:
-                    pass
-                else:
-                    statusStruct[loggerName].append(loggerDict)
-
-        return statusStruct
-
-    def lock_handlers(self):
-        for handlerClass in self.__logObjs.keys():
-            for loggerName in self.__logObjs[handlerClass].keys():
-                self.__logObjs[handlerClass][loggerName]['handler'].acquire()
-
-    def release_handlers(self):
-        for handlerClass in self.__logObjs.keys():
-            for loggerName in self.__logObjs[handlerClass].keys():
-                self.__logObjs[handlerClass][loggerName]['handler'].release()
-
-    def get_level(self, handler, loggerName):
-        return rehodLogLevelMap[self.__logObjs[handler][loggerName]['level']]
-
-    def set_level(self, handler, loggerName, level):
-        """Sets the logging level of a particular logger and logger handler.
-
-           handler    - handler (smtp, file, or stream)
-           loggerName - logger to set level on
-           level      - level to set logger
-        """
-
-        level = self.__get_logging_level(level, defaultFileLevel)
-        self.__logObjs[handler][loggerName]['handler'].setLevel(level)
-        self.__logObjs[handler][loggerName]['level'] = level
-        
-        if handler == 'stream':
-            self.__logObjs[handler][loggerName]['handler'].setFormatter(
-                hodStreamFormatMap[int(level)])
-
-    def set_logger_level(self, loggerName, level):
-        status = 0
-        for handlerClass in self.__logObjs.keys():
-            if self.__logObjs[handlerClass].has_key(loggerName):
-                self.set_level(handlerClass, loggerName, level)
-            else:
-                status = 1
-        
-        return status
-
-    def rollover(self, loggerName):
-        status = 0 
-        if self.__logObjs['file'].has_key(loggerName):
-            if self.__logObjs['file'][loggerName]['handler'].shouldRollover():
-                self.__logObjs['file'][loggerName]['handler'].doRollover()
-        else:
-            status = 1
-            
-        return status
-        
-    def set_max_bytes(self, maxBytes):
-        status = 0
-        if self.__logObjs.has_key('file'):
-            for loggerName in self.__logObjs['file'].keys():
-                self.__logObjs['file'][loggerName]['handler'].maxBytes = 0
-        else:
-            status = 1
-            
-        return status
-
-    def get_logger(self, loggerName):
-        """ Returns a hodLogger object for a logger by name. """
-
-        loggingLoggerName = self.__get_logging_logger_name(loggerName)
-        return hodLogger(self.__appName, loggingLoggerName)
-
-    def critical(self, loggerName, msg):
-        """Logs a critical message and flushes log buffers.  This method really
-           should only be called upon a catastrophic failure.
-
-           loggerName - logger to use
-           msg        - message to be logged"""
-
-        loggingLoggerName = self.__get_logging_logger_name(loggerName)
-        logger = logging.getLogger(loggingLoggerName)
-        logger.critical(msg)
-        self.flush()
-
-    def error(self, loggerName, msg):
-        """Logs an error message and flushes log buffers.
-
-           loggerName - logger to use
-           msg        - message to be logged"""
-
-        loggingLoggerName = self.__get_logging_logger_name(loggerName)
-        logger = logging.getLogger(loggingLoggerName)
-        logger.error(msg)
-        self.flush()
-
-    def warn(self, loggerName, msg):
-        """Logs a warning message.
-
-           loggerName - logger to use
-           msg        - message to be logged"""
-
-        loggingLoggerName = self.__get_logging_logger_name(loggerName)
-        logger = logging.getLogger(loggingLoggerName)
-        logger.warn(msg)
-
-    def info(self, loggerName, msg):
-        """Logs an information message.
-
-           loggerName - logger to use
-           msg        - message to be logged"""
-
-        loggingLoggerName = self.__get_logging_logger_name(loggerName)
-        logger = logging.getLogger(loggingLoggerName)
-        logger.info(msg)
-
-    def debug(self, loggerName, msg):
-        """Logs a debugging message.
-
-           loggerName - logger to use
-           msg        - message to be logged"""
-
-        loggingLoggerName = self.__get_logging_logger_name(loggerName)
-        logger = logging.getLogger(loggingLoggerName)
-        logger.debug(msg)
-
-    def flush(self):
-        """Flush all log handlers."""
-
-        for handlerClass in self.__logObjs.keys():
-            for loggerName in self.__logObjs[handlerClass].keys():
-                self.__logObjs[handlerClass][loggerName]['handler'].flush()
-
-    def shutdown(self):
-        """Shutdown all logging, flushing all buffers."""
-
-        for handlerClass in self.__logObjs.keys():
-            for loggerName in self.__logObjs[handlerClass].keys():
-                self.__logObjs[handlerClass][loggerName]['handler'].flush()
-                # Causes famous 'ValueError: I/O operation on closed file'
-                # self.__logObjs[handlerClass][loggerName]['handler'].close()
-
-class hodLogger:
-    """ Encapsulates a particular logger from a hodLog object. """
-    def __init__(self, appName, loggingLoggerName):
-        """Constructs a hodLogger object (a particular logger in a hodLog
-           object).
-
-           loggingLoggerName - name of a logger in hodLog object"""
-
-        self.__appName = appName
-        self.__loggerName = loggingLoggerName
-        self.__logger = logging.getLogger(self.__loggerName)
-
-    def __repr__(self):
-        """Returns a string representation of a hodComponentLog object."""
-
-        return "%s hodLog" % self.__loggerName
-
-    def __call__(self):
-        pass
-
-    def set_logger_level(self, loggerName, level):
-        
-        return hodLogs[self.__appName].set_logger_level(loggerName, level)
-        
-    def set_max_bytes(self, maxBytes):
-            
-        return hodLogs[self.__appName].set_max_bytes(maxBytes)
-
-    def rollover(self):
-        return hodLogs[self.__appName].rollover(self.__loggerName)
-
-    def get_level(self, handler, loggerName):
-    
-        return hodLogs[self.__appName].get_level(handler, loggerName)
-        
-    def critical(self, msg):
-        """Logs a critical message and calls sys.exit(1).
-
-           msg     - message to be logged"""
-
-        self.__logger.critical(msg)
-
-    def error(self, msg):
-        """Logs an error message.
-
-           msg     - message to be logged"""
-
-        self.__logger.error(msg)
-
-    def warn(self, msg):
-        """Logs a warning message.
-
-           msg     - message to be logged"""
-
-        self.__logger.warn(msg)
-
-    def info(self, msg):
-        """Logs an information message.
-
-           msg     - message to be logged"""
-
-        self.__logger.info(msg)
-
-    def debug(self, msg):
-        """Logs a debugging message.
-
-           msg     - message to be logged"""
-
-        self.__logger.debug(msg)
-
-class hodDummyLogger:
-    """ Dummy hodLogger class.  Other hod classes requiring a hodLogger default
-        to this hodLogger if no logger is passed."""
-
-    def __init__(self):
-        """pass"""
-
-        pass
-
-    def __repr__(self):
-        return "dummy hodLogger"
-
-    def __call__(self):
-        """pass"""
-
-        pass
-
-    def set_logger_level(self, loggerName, level):
-        
-        return 0
-        
-    def set_max_bytes(self, loggerName, maxBytes):
-            
-        return 0
-
-    def get_level(self, handler, loggerName):
-        
-        return 4
-
-    def rollover(self):
-        
-        return 0
-
-    def critical(self, msg):
-        """pass"""
-
-        pass
-
-    def error(self, msg):
-        """pass"""
-
-        pass
-
-    def warn(self, msg):
-        """pass"""
-
-        pass
-
-    def info(self, msg):
-        """pass"""
-
-        pass
-
-    def debug(self, msg):
-        """pass"""
-
-        pass
-
-def ensureLogDir(logDir):
-  """Verify that the passed in log directory exists, and if it doesn't
-  create it."""
-  if not os.path.exists(logDir):
-    try:
-      old_mask = os.umask(0)
-      os.makedirs(logDir, 01777)
-      os.umask(old_mask)
-    except Exception, e:
-      print >>sys.stderr, "Could not create log directories %s. Exception: %s. Stack Trace: %s" % (logDir, get_exception_error_string(), get_exception_string())
-      raise e
-
-def getLogger(cfg, logName):
-  if cfg['debug'] > 0:
-    user = cfg['userid']
-    baseLogger = hodLog(logName)
-    log = baseLogger.add_logger('main')
-
-    if cfg.has_key('log-dir'):
-      serviceId = os.getenv('PBS_JOBID')
-      if serviceId:
-        logDir = os.path.join(cfg['log-dir'], "%s.%s" % (user, serviceId))
-      else:
-        logDir = os.path.join(cfg['log-dir'], user) 
-      if not os.path.exists(logDir):
-        os.mkdir(logDir)
-
-      baseLogger.add_file(logDirectory=logDir, level=cfg['debug'], 
-               addToLoggerNames=('main',))
-
-    try:
-      if cfg.has_key('stream') and cfg['stream']:
-        baseLogger.add_stream(level=cfg['debug'], addToLoggerNames=('main',))
-
-      if cfg.has_key('syslog-address'):
-        baseLogger.add_syslog(cfg['syslog-address'], 
-          level=cfg['debug'], addToLoggerNames=('main',))
-    except Exception,e:
-      # Caught an exception while initialising logger
-      log.critical("%s Logger failed to initialise. Reason : %s" % (logName, e))
-      pass
-    return log
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/miniHTMLParser.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/miniHTMLParser.py
deleted file mode 100644
index 34a0fd0124..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/miniHTMLParser.py
+++ /dev/null
@@ -1,45 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import urllib, urlparse, re
-
-from HTMLParser import HTMLParser
-
-class miniHTMLParser( HTMLParser ):
-
-  viewedQueue = []
-  instQueue = []
-
-  def setBaseUrl(self, url):
-    self.baseUrl = url
-
-  def getNextLink( self ):
-    if self.instQueue == []:
-      return None
-    else:
-      return self.instQueue.pop(0)
-
-  def handle_starttag( self, tag, attrs ):
-    if tag == 'a':
-      newstr = urlparse.urljoin(self.baseUrl, str(attrs[0][1]))
-      if re.search('mailto', newstr) != None:
-        return
-
-      if (newstr in self.viewedQueue) == False:
-        self.instQueue.append( newstr )
-        self.viewedQueue.append( newstr )
-
-
-
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/nodepoolutil.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/nodepoolutil.py
deleted file mode 100644
index d733780ec1..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/nodepoolutil.py
+++ /dev/null
@@ -1,26 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-from hodlib.NodePools.torque import TorquePool
-
-class NodePoolUtil:
-  def getNodePool(nodePoolDesc, cfg, log):
-    """returns a concrete instance of NodePool as configured by 'cfg'"""
-    npd = nodePoolDesc
-    name = npd.getName()
-    if name == 'torque':
-      return TorquePool(npd, cfg, log)
-    
-  getNodePool = staticmethod(getNodePool)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/setup.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/setup.py
deleted file mode 100644
index 791b095c9b..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/setup.py
+++ /dev/null
@@ -1,1058 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-# $Id:setup.py 5158 2007-04-09 00:14:35Z zim $
-# $Id:setup.py 5158 2007-04-09 00:14:35Z zim $
-#
-#------------------------------------------------------------------------------
-
-"""'setup' provides for reading and verifing configuration files based on
-   Python's SafeConfigParser class."""
-
-import sys, os, re, pprint
-
-from ConfigParser import SafeConfigParser
-from optparse import OptionParser, IndentedHelpFormatter, OptionGroup
-from util import get_perms, replace_escapes
-from types import typeValidator, typeValidatorInstance, is_valid_type, \
-                  typeToString
-from hodlib.Hod.hod import hodHelp
-
-reEmailAddress = re.compile("^.*@.*$")
-reEmailDelimit = re.compile("@")
-reComma = re.compile("\s*,\s*")
-reDot = re.compile("\.")
-reCommentHack = re.compile("^.*?\s+#|;.*", flags=re.S)
-reCommentNewline = re.compile("\n|\r$")
-reKeyVal = r"(?<!\\)="
-reKeyVal = re.compile(reKeyVal)
-reKeyValList = r"(?<!\\),"
-reKeyValList = re.compile(reKeyValList)
-
-errorPrefix = 'error'
-requiredPerms = '0660'
-
-class definition:
-    def __init__(self):
-        """Generates a configuration definition object."""
-        self.__def = {}
-        self.__defOrder = []
-
-    def __repr__(self):
-        return pprint.pformat(self.__def)  
-
-    def __getitem__(self, section):
-        return self.__def[section]
-
-    def __iter__(self):
-        return iter(self.__def)
-
-    def sections(self):
-        """Returns a list of sections/groups."""
-        
-        if len(self.__defOrder):
-            return self.__defOrder
-        else:  
-            return self.__def.keys()
-      
-    def add_section(self, section):
-        """Add a configuration section / option group."""
-        
-        if self.__def.has_key(section):
-            raise Exception("Section already exists: '%s'" % section)
-        else:
-            self.__def[section] = {}
-
-    def add_def(self, section, var, type, desc, help = True, default = None, 
-                req = True, validate = True, short = None):
-        """ Add a variable definition.
-        
-            section  - section name
-            var      - variable name
-            type     - valid hodlib.types
-            desc     - description of variable
-            help     - display help for this variable
-            default  - default value
-            req      - bool, requried?
-            validate - bool, validate type value?
-            short    - short symbol (1 character),
-            help     - bool, display help?"""
-            
-        if self.__def.has_key(section):
-            if not is_valid_type(type):
-                raise Exception("Type (type) is invalid: %s.%s - '%s'" % (section, var, 
-                                                                type))
-            if not isinstance(desc, str):
-                raise Exception("Description (desc) must be a string: %s.%s - '%s'" % (
-                    section, var, desc))
-            if not isinstance(req, bool):
-                raise Exception("Required (req) must be a bool: %s.%s - '%s'" % (section, 
-                                                                       var, 
-                                                                       req))
-            if not isinstance(validate, bool):
-                raise Exception("Validate (validate) must be a bool: %s.%s - '%s'" % (
-                    section, var, validate))
-              
-            if self.__def[section].has_key(var):
-                raise Exception("Variable name already defined: '%s'" % var)
-            else:
-                self.__def[section][var] = { 'type'     : type,
-                                             'desc'     : desc,
-                                             'help'     : help,
-                                             'default'  : default,
-                                             'req'      : req,
-                                             'validate' : validate,
-                                             'short'    : short }                
-        else:    
-            raise Exception("Section does not exist: '%s'" % section)
-          
-    def add_defs(self, defList, defOrder=None):
-        """ Add a series of definitions.
-        
-            defList = { section0 : ((name0, 
-                                     type0, 
-                                     desc0, 
-                                     help0,
-                                     default0,
-                                     req0, 
-                                     validate0,
-                                     short0),
-                                  ....
-                                    (nameN, 
-                                     typeN, 
-                                     descN,
-                                     helpN, 
-                                     defaultN, 
-                                     reqN, 
-                                     validateN,
-                                     shortN)),             
-                           ....
-                           
-                        sectionN : ... }
-                        
-            Where the short synmbol is optional and can only be one char."""
-                        
-        for section in defList.keys():
-            self.add_section(section)
-            for defTuple in defList[section]:
-                if isinstance(defTuple, tuple): 
-                    if len(defTuple) < 7:
-                        raise Exception(
-                            "section %s is missing an element: %s" % (
-                            section, pprint.pformat(defTuple)))
-                else:
-                    raise Exception("section %s of defList is not a tuple" % 
-                                    section)
-                
-                if len(defTuple) == 7:
-                    self.add_def(section, defTuple[0], defTuple[1], 
-                                 defTuple[2], defTuple[3], defTuple[4], 
-                                 defTuple[5], defTuple[6])
-                else:
-                    self.add_def(section, defTuple[0], defTuple[1], 
-                                 defTuple[2], defTuple[3], defTuple[4], 
-                                 defTuple[5], defTuple[6], defTuple[7])                     
-        if defOrder:
-            for section in defOrder:
-                if section in self.__def:
-                    self.__defOrder.append(section)
-                    
-            for section in self.__def:
-                if not section in defOrder:
-                    raise Exception(
-                        "section %s is missing from specified defOrder." % 
-                        section)
-            
-class baseConfig:
-    def __init__(self, configDef, originalDir=None):
-        self.__toString = typeToString()
-        self.__validated = False
-        self._configDef = configDef
-        self._options = None
-        self._mySections = []
-        self._dict = {}
-        self.configFile = None
-        self.__originalDir = originalDir
-
-        if self._configDef:
-            self._mySections = configDef.sections()
-
-    def __repr__(self):
-        """Returns a string representation of a config object including all
-           normalizations."""
-
-        print_string = '';
-        for section in self._mySections:
-            print_string = "%s[%s]\n" % (print_string, section)
-            options = self._dict[section].keys()
-            for option in options:
-                print_string = "%s%s = %s\n" % (print_string, option,
-                    self._dict[section][option])
-
-            print_string = "%s\n" % (print_string)
-
-        print_string = re.sub("\n\n$", "", print_string)
-
-        return print_string
-
-    def __getitem__(self, section):
-        """ Returns a dictionary of configuration name and values by section.
-        """
-        return self._dict[section]
-
-    def __setitem__(self, section, value):
-        self._dict[section] = value
-
-    def __iter__(self):
-        return iter(self._dict)
-
-    def has_key(self, section):
-        status = False
-        if section in self._dict:
-            status = True
-            
-        return status
-
-    # Prints configuration error messages
-    def var_error(self, section, option, *addData):
-        errorStrings = []  
-        if not self._dict[section].has_key(option):
-          self._dict[section][option] = None
-        errorStrings.append("%s: invalid '%s' specified in section %s (--%s.%s): %s" % (
-            errorPrefix, option, section, section, option, self._dict[section][option]))
-
-        if addData:
-            errorStrings.append("%s: additional info: %s\n" % (errorPrefix,
-                addData[0]))
-        return errorStrings
-
-    def var_error_suggest(self, errorStrings):
-        if self.configFile:
-            errorStrings.append("Check your command line options and/or " + \
-                              "your configuration file %s" % self.configFile)
-    
-    def __get_args(self, section):
-        def __dummyToString(type, value):
-            return value
-        
-        toString = __dummyToString
-        if self.__validated:
-            toString = self.__toString
-            
-        args = []
-        if isinstance(self._dict[section], dict):
-            for option in self._dict[section]:
-                if section in self._configDef and \
-                option in self._configDef[section]:
-                  if self._configDef[section][option]['type'] == 'bool':
-                    if self._dict[section][option] == 'True' or \
-                        self._dict[section][option] == True:
-                        args.append("--%s.%s" % (section, option))
-                  else:
-                    args.append("--%s.%s" % (section, option))
-                    args.append(toString(
-                           self._configDef[section][option]['type'], 
-                           self._dict[section][option]))
-        else:
-            if section in self._configDef:
-              if self._configDef[section][option]['type'] == 'bool':
-                if self._dict[section] == 'True' or \
-                    self._dict[section] == True:
-                    args.append("--%s" % section)
-              else:
-                if self._dict[section] != 'config':
-                  args.append("--%s" % section)
-                  args.append(toString(self._configDef[section]['type'], 
-                                             self._dict[section]))
-                    
-        return args
-                
-    def values(self):
-        return self._dict.values()
-      
-    def keys(self):
-        return self._dict.keys()
-    
-    def get_args(self, exclude=None, section=None):
-        """Retrieve a tuple of config arguments."""
-        
-        args = []
-        if section:
-            args = self.__get_args(section)
-        else:
-            for section in self._dict:
-                if exclude:
-                    if not section in exclude:
-                        args.extend(self.__get_args(section))
-                else:
-                    args.extend(self.__get_args(section))
-        
-        return tuple(args)
-        
-    def verify(self):
-        """Verifies each configuration variable, using the configValidator
-           class, based on its type as defined by the dictionary configDef.
-           Upon encountering a problem an error is printed to STDERR and
-           false is returned."""
-        
-        oldDir = os.getcwd()
-        if self.__originalDir:
-          os.chdir(self.__originalDir)
-        
-        status = True
-        statusMsgs = []
-	
-        if self._configDef:
-            errorCount = 0
-            configValidator = typeValidator(self.__originalDir)
-
-            # foreach section and option by type string as defined in configDef
-            #   add value to be validated to validator
-            for section in self._mySections:
-                for option in self._configDef[section].keys():
-                    configVarName = "%s.%s" % (section, option)
-
-                    if self._dict[section].has_key(option):
-                        if self._configDef[section][option].has_key('validate'):
-                            if self._configDef[section][option]['validate']:
-                                # is the section.option needed to be validated?
-                                configValidator.add(configVarName,
-                                    self._configDef[section][option]['type'],
-                                    self._dict[section][option])
-                            else:
-                                # If asked not to validate, just normalize
-                                self[section][option] = \
-                                    configValidator.normalize(
-                                    self._configDef[section][option]['type'], 
-                                    self._dict[section][option])
-                            if self._configDef[section][option]['default'] != \
-                                None:
-                                self._configDef[section][option]['default'] = \
-                                    configValidator.normalize(
-                                    self._configDef[section][option]['type'],
-                                    self._configDef[section][option]['default']
-                                    )
-                                self._configDef[section][option]['default'] = \
-                                    self.__toString(
-                                    self._configDef[section][option]['type'], 
-                                    self._configDef[section][option]['default']
-                                    )
-                        else:        
-                            # This should not happen. Just in case, take this as 'to be validated' case.
-                            configValidator.add(configVarName,
-                                self._configDef[section][option]['type'],
-                                self._dict[section][option])
-                    elif self._configDef[section][option]['req']:
-                        statusMsgs.append("%s: %s.%s is not defined."
-                             % (errorPrefix, section, option))
-                        errorCount = errorCount + 1                         
-
-            configValidator.validate()
-
-            for valueInfo in configValidator.validatedInfo:
-                sectionsOptions = reDot.split(valueInfo['name'])
-
-                if valueInfo['isValid'] == 1:
-                    self._dict[sectionsOptions[0]][sectionsOptions[1]] = \
-                        valueInfo['normalized']
-                else:
-                    if valueInfo['errorData']:
-                        statusMsgs.extend(self.var_error(sectionsOptions[0],
-                            sectionsOptions[1], valueInfo['errorData']))
-                    else:
-                        statusMsgs.extend(self.var_error(sectionsOptions[0],
-                            sectionsOptions[1]))
-                    errorCount = errorCount + 1
-
-            if errorCount > 1:
-                statusMsgs.append( "%s: %s problems found." % (
-                    errorPrefix, errorCount))
-                self.var_error_suggest(statusMsgs)
-                status = False
-            elif errorCount > 0:
-                statusMsgs.append( "%s: %s problem found." % (
-                    errorPrefix, errorCount))
-                self.var_error_suggest(statusMsgs)
-                status = False
-        
-        self.__validated = True
-
-        if self.__originalDir:
-          os.chdir(oldDir)
-
-        return status,statusMsgs
-
-    def normalizeValue(self, section, option)  :
-      return typeValidatorInstance.normalize(
-                                  self._configDef[section][option]['type'],
-                                  self[section][option])
-
-    def validateValue(self, section, option):
-      # Validates a section.option and exits on error
-      valueInfo = typeValidatorInstance.verify(
-                                  self._configDef[section][option]['type'],
-                                  self[section][option])
-      if valueInfo['isValid'] == 1:
-        return []
-      else:
-        if valueInfo['errorData']:
-          return self.var_error(section, option, valueInfo['errorData'])
-        else:
-          return self.var_error(section, option)
-
-class config(SafeConfigParser, baseConfig):
-    def __init__(self, configFile, configDef=None, originalDir=None, 
-                 options=None, checkPerms=False):
-        """Constructs config object.
-
-           configFile - configuration file to read
-           configDef  - definition object
-           options    - options object
-           checkPerms - check file permission on config file, 0660
-
-           sample configuration file:
-
-            [snis]
-            modules_dir  = modules/       ; location of infoModules
-            md5_defs_dir = etc/md5_defs   ; location of infoTree md5 defs
-            info_store   = var/info       ; location of nodeInfo store
-            cam_daemon   = localhost:8200 ; cam daemon address"""
-
-
-        SafeConfigParser.__init__(self)
-        baseConfig.__init__(self, configDef, originalDir)
-
-        if(os.path.exists(configFile)):
-          self.configFile = configFile
-        else:
-          raise IOError
-        
-        self._options = options
-        
-	## UNUSED CODE : checkPerms is never True
-  ## zim: this code is used if one instantiates config() with checkPerms set to
-  ## True.
-        if checkPerms: self.__check_perms()
-
-        self.read(configFile)
-
-        self._configDef = configDef
-        if not self._configDef:
-            self._mySections = self.sections()
-
-        self.__initialize_config_dict()
-
-    def __initialize_config_dict(self):
-        """ build a dictionary of config vars keyed by section name defined in
-           configDef, if options defined override config"""
-
-        for section in self._mySections:
-            items = self.items(section)
-            self._dict[section] = {}
-
-            # First fill self._dict with whatever is given in hodrc.
-            # Going by this, options given at the command line either override
-            # options in hodrc, or get appended to the list, like for
-            # hod.client-params. Note that after this dict has _only_ hodrc
-            # params
-            for keyValuePair in items:
-                # stupid commenting bug in ConfigParser class, lines without an
-                #  option value pair or section required that ; or # are at the
-                #  beginning of the line, :(
-                newValue = reCommentHack.sub("", keyValuePair[1])
-                newValue = reCommentNewline.sub("", newValue)
-                self._dict[section][keyValuePair[0]] = newValue
-            # end of filling with options given in hodrc
-            # now start filling in command line options
-            if self._options:    
-                for option in self._configDef[section].keys():
-                    if self._options[section].has_key(option):
-                        # the user has given an option
-                        compoundOpt = "%s.%s" %(section,option)
-                        if ( compoundOpt == \
-                              'gridservice-mapred.final-server-params' \
-                              or compoundOpt == \
-                                    'gridservice-hdfs.final-server-params' \
-                              or compoundOpt == \
-                                    'gridservice-mapred.server-params' \
-                              or compoundOpt == \
-                                    'gridservice-hdfs.server-params' \
-                              or compoundOpt == \
-                                    'hod.client-params' ):
-                 
-                           if ( compoundOpt == \
-                              'gridservice-mapred.final-server-params' \
-                              or compoundOpt == \
-                                    'gridservice-hdfs.final-server-params' ):
-                              overwrite = False
-                           else: overwrite = True
-
-                           # Append to the current list of values in self._dict
-                           if not self._dict[section].has_key(option):
-                             self._dict[section][option] = ""
-                           dictOpts = reKeyValList.split(self._dict[section][option])
-                           dictOptsKeyVals = {}
-                           for opt in dictOpts:
-                              if opt != '':
-                                # when dict _has_ params from hodrc
-                                if reKeyVal.search(opt):
-                                  (key, val) = reKeyVal.split(opt,1)
-                                  # we only consider the first '=' for splitting
-                                  # we do this to support passing params like
-                                  # mapred.child.java.opts=-Djava.library.path=some_dir
-                                  # Even in case of an invalid error like unescaped '=',
-                                  # we don't want to fail here itself. We leave such errors 
-                                  # to be caught during validation which happens after this
-                                  dictOptsKeyVals[key] = val
-                                else: 
-                                  # this means an invalid option. Leaving it
-                                  #for config.verify to catch
-                                  dictOptsKeyVals[opt] = None
-                                
-                           cmdLineOpts = reKeyValList.split(self._options[section][option])
-
-                           for opt in cmdLineOpts:
-                              if reKeyVal.search(opt):
-                                # Same as for hodrc options. only consider
-                                # the first =
-                                ( key, val ) = reKeyVal.split(opt,1)
-                              else:
-                                key = opt
-                                val = None
-                              # whatever is given at cmdline overrides
-                              # what is given in hodrc only for non-final params
-                              if dictOptsKeyVals.has_key(key):
-                                if overwrite:
-                                  dictOptsKeyVals[key] = val
-                              else: dictOptsKeyVals[key] = val
-                              
-                           self._dict[section][option] = ""
-                           for key in dictOptsKeyVals:
-                              if self._dict[section][option] == "":
-                                if dictOptsKeyVals[key]:
-                                  self._dict[section][option] = key + "=" + \
-                                    dictOptsKeyVals[key]
-                                else: #invalid option. let config.verify catch
-                                  self._dict[section][option] = key
-                              else:
-                                if dictOptsKeyVals[key]:
-                                  self._dict[section][option] = \
-                                    self._dict[section][option] + "," + key + \
-                                      "=" + dictOptsKeyVals[key]
-                                else:  #invalid option. let config.verify catch
-                                  self._dict[section][option] = \
-                                    self._dict[section][option] + "," + key
-
-                        else:
-                             # for rest of the options, that don't need
-                            # appending business.
-                            # options = cmdline opts + defaults
-                            # dict    = hodrc opts only
-                            # only non default opts can overwrite any opt
-                            # currently in dict
-                           if not self._dict[section].has_key(option):
-                              # options not mentioned in hodrc
-                              self._dict[section][option] = \
-                                               self._options[section][option]
-                           elif self._configDef[section][option]['default'] != \
-                                               self._options[section][option]:
-                              # option mentioned in hodrc but user has given a
-                              # non-default option
-                              self._dict[section][option] = \
-                                               self._options[section][option]
-
-    ## UNUSED METHOD
-    ## zim: is too :)
-    def __check_perms(self):
-        perms = None
-        if self._options:  
-            try:
-                perms = get_perms(self.configFile)
-            except OSError, data:
-                self._options.print_help()
-                raise Exception("*** could not find config file: %s" % data)
-                sys.exit(1)
-        else:
-            perms = get_perms(self.configFile)
-               
-        if perms != requiredPerms:
-            error = "*** '%s' has invalid permission: %s should be %s\n" % \
-                (self.configFile, perms, requiredPerms)
-            raise Exception( error)
-            sys.exit(1)
-
-    def replace_escape_seqs(self):
-      """ replace any escaped characters """
-      replace_escapes(self)
-
-class formatter(IndentedHelpFormatter):
-    def format_option_strings(self, option):
-        """Return a comma-separated list of option strings & metavariables."""
-        if option.takes_value():
-            metavar = option.metavar or option.dest.upper()
-            short_opts = [sopt
-                          for sopt in option._short_opts]
-            long_opts = [self._long_opt_fmt % (lopt, metavar)
-                         for lopt in option._long_opts]
-        else:
-            short_opts = option._short_opts
-            long_opts = option._long_opts
-
-        if self.short_first:
-            opts = short_opts + long_opts
-        else:
-            opts = long_opts + short_opts
-
-        return ", ".join(opts)    
-
-class options(OptionParser, baseConfig):
-
-    def __init__(self, optionDef, usage, version, originalDir=None, 
-                 withConfig=False, defaultConfig=None, defaultLocation=None,
-                 name=None):
-        """Constructs and options object.
-         
-           optionDef     - definition object
-           usage         - usage statement
-           version       - version string
-           withConfig    - used in conjunction with a configuration file
-           defaultConfig - default configuration file
-        
-        """
-        OptionParser.__init__(self, usage=usage)
-        baseConfig.__init__(self, optionDef, originalDir)
-        
-        self.formatter = formatter(4, max_help_position=100, width=180, 
-                                   short_first=1)
-        
-        self.__name = name
-        self.__version = version
-        self.__withConfig = withConfig
-        self.__defaultConfig = defaultConfig
-        self.__defaultLoc = defaultLocation
-        self.args = []
-        self.__optionList = []
-        self.__compoundOpts = []
-        self.__shortMap = {}
-        self.__alphaString = 'abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVXYZ1234567890'
-        self.__alpha = []
-        self.__parsedOptions = {}
-        self.__reserved = [ 'h' ]
-        
-        self.__orig_grps = []
-        self.__orig_grp_lists = {}
-        self.__orig_option_list = []
-        
-        self.__display_grps = []
-        self.__display_grp_lists = {}
-        self.__display_option_list = [] 
-        
-        self.config = None
-        
-        if self.__withConfig:
-            self.__reserved.append('c')
-        self.__reserved.append('v')
-        
-        self.__gen_alpha()            
-
-        # build self.__optionList, so it contains all the options that are
-        # possible. the list elements are of the form section.option
-        for section in self._mySections:
-            if self.__withConfig and section == 'config':
-                raise Exception(
-                    "withConfig set 'config' cannot be used as a section name")
-            for option in self._configDef[section].keys():
-                if '.' in option:
-                    raise Exception("Options cannot contain: '.'")
-                elif self.__withConfig and option == 'config':
-                    raise Exception(
-                        "With config set, option config is not allowed.")
-                elif self.__withConfig and option == 'verbose-help':
-                    raise Exception(
-                        "With config set, option verbose-help is not allowed.")                 
-                self.__optionList.append(self.__splice_compound(section, 
-                                                                option))
-        self.__build_short_map()
-        self.__add_options()
-        self.__init_display_options() 
-        
-        (self.__parsedOptions, self.args) = self.parse_args()
-
-        # Now process the positional arguments only for the client side
-        if self.__name == 'hod':
-
-          hodhelp = hodHelp()
-
-          _operation = getattr(self.__parsedOptions,'hod.operation')
-          _script = getattr(self.__parsedOptions, 'hod.script')
-          nArgs = self.args.__len__()
-          if _operation:
-            # -o option is given
-            if nArgs != 0:
-              self.error('invalid syntax : command and operation(-o) cannot coexist')
-          elif nArgs == 0 and _script:
-            # for a script option, without subcommand: hod -s script ...
-            pass
-          elif nArgs == 0:
-            print "Usage: ",hodhelp.help()
-            sys.exit(0)
-          else:
-            # subcommand is given
-            cmdstr = self.args[0] # the subcommand itself
-            cmdlist = hodhelp.ops
-            if cmdstr not in cmdlist:
-              print "Usage: ", hodhelp.help()
-              sys.exit(2)
-
-            numNodes = None
-            clusterDir = None
-            # Check which subcommand. cmdstr  = subcommand itself now.
-            if cmdstr == "allocate":
-              clusterDir = getattr(self.__parsedOptions, 'hod.clusterdir')
-              numNodes = getattr(self.__parsedOptions, 'hod.nodecount')
- 
-              if not clusterDir or not numNodes:
-                print hodhelp.usage(cmdstr)
-                sys.exit(3)
-
-              cmdstr = cmdstr + ' ' + clusterDir + ' ' + numNodes
-
-              setattr(self.__parsedOptions,'hod.operation', cmdstr)
- 
-            elif cmdstr == "deallocate" or cmdstr == "info":
-              clusterDir = getattr(self.__parsedOptions, 'hod.clusterdir')
-
-              if not clusterDir:
-                print hodhelp.usage(cmdstr)
-                sys.exit(3)
- 
-              cmdstr = cmdstr + ' ' + clusterDir
-              setattr(self.__parsedOptions,'hod.operation', cmdstr)
-
-            elif cmdstr == "list":
-              setattr(self.__parsedOptions,'hod.operation', cmdstr)
-              pass
- 
-            elif cmdstr == "script":
-              clusterDir = getattr(self.__parsedOptions, 'hod.clusterdir')
-              numNodes = getattr(self.__parsedOptions, 'hod.nodecount')
-              originalDir = getattr(self.__parsedOptions, 'hod.original-dir')
-
-              if originalDir and clusterDir:
-                self.remove_exit_code_file(originalDir, clusterDir)
-
-              if not _script or not clusterDir or not numNodes:
-                print hodhelp.usage(cmdstr)
-                sys.exit(3)
-              pass
-
-            elif cmdstr == "help":
-              if nArgs == 1:
-                self.print_help()
-                sys.exit(0)
-              elif nArgs != 2:
-                self.print_help()
-                sys.exit(3)
-              elif self.args[1] == 'options':
-                self.print_options()
-                sys.exit(0)
-              cmdstr = cmdstr + ' ' + self.args[1]
-              setattr(self.__parsedOptions,'hod.operation', cmdstr)
-
-        # end of processing for arguments on the client side
-
-        if self.__withConfig:
-            self.config = self.__parsedOptions.config
-            if not self.config:
-                self.error("configuration file must be specified")
-            if not os.path.isabs(self.config):
-                # A relative path. Append the original directory which would be the
-                # current directory at the time of launch
-                try:  
-                    origDir = getattr(self.__parsedOptions, 'hod.original-dir')
-                    if origDir is not None:
-                        self.config = os.path.join(origDir, self.config)
-                        self.__parsedOptions.config = self.config
-                except AttributeError, e:
-                    self.error("hod.original-dir is not defined.\
-                                   Cannot get current directory")
-            if not os.path.exists(self.config):
-                if self.__defaultLoc and not re.search("/", self.config):
-                    self.__parsedOptions.config = os.path.join(
-                        self.__defaultLoc, self.config)
-        self.__build_dict()   
-
-    def norm_cluster_dir(self, orig_dir, directory):
-        directory = os.path.expanduser(directory)
-        if not os.path.isabs(directory):
-            directory = os.path.join(orig_dir, directory)
-        directory = os.path.abspath(directory)
-
-        return directory
-
-    def remove_exit_code_file(self, orig_dir, dir):
-        try:
-            dir = self.norm_cluster_dir(orig_dir, dir)
-            if os.path.exists(dir):
-                exit_code_file = os.path.join(dir, "script.exitcode")
-                if os.path.exists(exit_code_file):
-                    os.remove(exit_code_file)
-        except:
-            print >>sys.stderr, "Could not remove the script.exitcode file."
-    
-    def __init_display_options(self):
-        self.__orig_option_list = self.option_list[:]
-        optionListTitleMap = {}
-        for option in self.option_list:
-            optionListTitleMap[option._long_opts[0]] = option
-      
-        self.__orig_grps = self.option_groups[:]
-        for group in self.option_groups:
-            self.__orig_grp_lists[group.title] = group.option_list[:]
-                                    
-        groupTitleMap = {}
-        optionTitleMap = {}
-        for group in self.option_groups:
-            groupTitleMap[group.title] = group
-            optionTitleMap[group.title] = {}
-            for option in group.option_list:
-                (sectionName, optionName) = \
-                    self.__split_compound(option._long_opts[0])
-                optionTitleMap[group.title][optionName] = option
-          
-        for section in self._mySections:
-            for option in self._configDef[section]:
-                if self._configDef[section][option]['help']:
-                    if groupTitleMap.has_key(section):
-                        if not self.__display_grp_lists.has_key(section):
-                            self.__display_grp_lists[section] = []
-                        self.__display_grp_lists[section].append(
-                            optionTitleMap[section][option])
-                    
-                    try:    
-                        self.__display_option_list.append(
-                            optionListTitleMap["--" + self.__splice_compound(
-                            section, option)])
-                    except KeyError:
-                        pass
-        try:
-            self.__display_option_list.append(optionListTitleMap['--config'])
-        except KeyError:
-            pass
-          
-        self.__display_option_list.append(optionListTitleMap['--help'])
-        self.__display_option_list.append(optionListTitleMap['--verbose-help'])
-        self.__display_option_list.append(optionListTitleMap['--version'])
-                    
-        self.__display_grps = self.option_groups[:]             
-        for section in self._mySections:
-            if self.__display_grp_lists.has_key(section):
-                self.__orig_grp_lists[section] = \
-                    groupTitleMap[section].option_list
-            else:
-                try:
-                    self.__display_grps.remove(groupTitleMap[section])
-                except KeyError:
-                    pass
-                
-    def __gen_alpha(self):
-        assignedOptions = []
-        for section in self._configDef:
-            for option in self._configDef[section]:
-                if self._configDef[section][option]['short']:
-                    assignedOptions.append(
-                        self._configDef[section][option]['short'])
-        
-        for symbol in self.__alphaString:
-            if not symbol in assignedOptions:
-                self.__alpha.append(symbol)
-
-    def __splice_compound(self, section, option):
-        return "%s.%s" % (section, option)
-        
-    def __split_compound(self, compound):    
-        return compound.split('.')
-        
-    def __build_short_map(self):
-        """ build a short_map of parametername : short_option. This is done
-        only for those parameters that don't have short options already
-        defined in configDef.
-        If possible, the first letter in the option that is not already
-        used/reserved as a short option is allotted. Otherwise the first
-        letter in __alpha that isn't still used is allotted.
-        e.g. { 'hodring.java-home': 'T', 'resource_manager.batch-home': 'B' }
-        """
-
-        optionsKey = {}
-        for compound in self.__optionList:
-            (section, option) = self.__split_compound(compound)
-            if not optionsKey.has_key(section):
-                optionsKey[section] = []
-            optionsKey[section].append(option)
-        
-        for section in self._configDef.sections():
-            options = optionsKey[section]
-            options.sort()
-            for option in options:
-                if not self._configDef[section][option]['short']:
-                    compound = self.__splice_compound(section, option)
-                    shortOptions = self.__shortMap.values()
-                    for i in range(0, len(option)):
-                        letter = option[i]
-                        letter = letter.lower()
-                        if letter in self.__alpha:
-                            if not letter in shortOptions and \
-                                not letter in self.__reserved:
-                                self.__shortMap[compound] = letter
-                                break
-                    if not self.__shortMap.has_key(compound):
-                        for i in range(0, len(self.__alpha)):
-                            letter = self.__alpha[i]
-                            if not letter in shortOptions and \
-                                not letter in self.__reserved:
-                                self.__shortMap[compound] = letter
-
-    def __add_option(self, config, compoundOpt, section, option, group=None):
-        addMethod = self.add_option
-        if group: addMethod=group.add_option
-        
-        self.__compoundOpts.append(compoundOpt)
-        
-        if compoundOpt == 'gridservice-mapred.final-server-params' or \
-           compoundOpt == 'gridservice-hdfs.final-server-params' or \
-           compoundOpt == 'gridservice-mapred.server-params' or \
-           compoundOpt == 'gridservice-hdfs.server-params' or \
-           compoundOpt == 'hod.client-params':
-          _action = 'append'
-        elif config[section][option]['type'] == 'bool':
-          _action = 'store_true'
-        else:
-          _action = 'store'
-
-        if self.__shortMap.has_key(compoundOpt):
-          addMethod("-" + self.__shortMap[compoundOpt],
-                          "--" + compoundOpt, dest=compoundOpt, 
-                          action= _action, 
-                          metavar=config[section][option]['type'],
-                          default=config[section][option]['default'],
-                          help=config[section][option]['desc'])
-        else:
-          if config[section][option]['short']:
-            addMethod("-" + config[section][option]['short'], 
-                              "--" + compoundOpt, dest=compoundOpt, 
-                              action= _action,
-                              metavar=config[section][option]['type'],
-                              default=config[section][option]['default'],
-                              help=config[section][option]['desc'])   
-          else:
-            addMethod('', "--" + compoundOpt, dest=compoundOpt, 
-                              action= _action, 
-                              metavar=config[section][option]['type'],
-                              default=config[section][option]['default'],
-                              help=config[section][option]['desc'])   
-                           
-    def __add_options(self):
-        if self.__withConfig:
-            self.add_option("-c", "--config", dest='config', 
-                action='store', default=self.__defaultConfig, 
-                metavar='config_file',
-                help="Full path to configuration file.")
-
-        self.add_option("", "--verbose-help", 
-            action='help', default=None, 
-            metavar='flag',
-            help="Display verbose help information.")
-        
-        self.add_option("-v", "--version", 
-            action='version', default=None, 
-            metavar='flag',
-            help="Display version information.")
-        
-        self.version = self.__version
-  
-        if len(self._mySections) > 1:
-            for section in self._mySections:
-                group = OptionGroup(self, section)
-                for option in self._configDef[section]:
-                    compoundOpt = self.__splice_compound(section, option)
-                    self.__add_option(self._configDef, compoundOpt, section, 
-                                      option, group)
-                self.add_option_group(group)
-        else:
-            for section in self._mySections:
-                for option in self._configDef[section]:
-                    compoundOpt = self.__splice_compound(section, option)
-                    self.__add_option(self._configDef, compoundOpt, section, 
-                                      option)
-                    
-    def __build_dict(self):
-        if self.__withConfig:
-            self._dict['config'] = str(getattr(self.__parsedOptions, 'config'))
-        for compoundOption in dir(self.__parsedOptions):
-            if compoundOption in self.__compoundOpts:
-                (section, option) = self.__split_compound(compoundOption)
-                if not self._dict.has_key(section):
-                    self._dict[section] = {}
-                
-                if getattr(self.__parsedOptions, compoundOption):
-                    _attr = getattr(self.__parsedOptions, compoundOption)
-                    # when we have multi-valued parameters passed separately
-                    # from command line, python optparser pushes them into a
-                    # list. So converting all such lists to strings
-                    if type(_attr) == type([]):
-                      import string
-                      _attr = string.join(_attr,',')
-                    self._dict[section][option] = _attr
-                    
-        for section in self._configDef:
-            for option in self._configDef[section]: 
-                if self._configDef[section][option]['type'] == 'bool':
-                    compoundOption = self.__splice_compound(section, option)
-                    if not self._dict.has_key(section):
-                        self._dict[section] = {}
-                    
-                    if option not in self._dict[section]:
-                        self._dict[section][option] = False
- 
-    def __set_display_groups(self):
-        if not '--verbose-help' in sys.argv:
-            self.option_groups = self.__display_grps
-            self.option_list = self.__display_option_list
-            for group in self.option_groups:
-                group.option_list = self.__display_grp_lists[group.title]
- 
-    def __unset_display_groups(self):
-        if not '--verbose-help' in sys.argv:
-            self.option_groups = self.__orig_grps
-            self.option_list = self.__orig_option_list
-            for group in self.option_groups:
-                group.option_list = self.__orig_grp_lists[group.title]      
- 
-    def print_help(self, file=None):
-        self.__set_display_groups()
-        OptionParser.print_help(self, file)
-        self.__unset_display_groups()
-
-    def print_options(self):
-        _usage = self.usage
-        self.set_usage('')
-        self.print_help()
-        self.set_usage(_usage)
-                        
-    def verify(self):
-        return baseConfig.verify(self)
-
-    def replace_escape_seqs(self):
-      replace_escapes(self)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/socketServers.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/socketServers.py
deleted file mode 100644
index 72dbd69569..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/socketServers.py
+++ /dev/null
@@ -1,621 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-# Various socket server and helper classes.
-#
-#
-import os, sys, socket, threading, pprint, re, xmlrpclib, time
-  
-from select import select
-from SocketServer import ThreadingMixIn, ForkingMixIn
-from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
-from SimpleXMLRPCServer import SimpleXMLRPCRequestHandler, SimpleXMLRPCServer
-from SimpleHTTPServer import SimpleHTTPRequestHandler
-from random import Random
-from urlparse import urlparse
-
-Fault = xmlrpclib.Fault
-
-from hodlib.Common.util import local_fqdn
-from hodlib.Common.logger import hodDummyLogger
-
-class hodHTTPHandler(BaseHTTPRequestHandler):
-  port = -1
-
-  def __init__(self, request, client_address, server, registerService):
-    self.registerService = registerService
-    BaseHTTPRequestHandler.__init__(self, request, client_address, server)
-  
-  def log_message(self, *args):
-    """Forget logging for now."""
-    
-    pass
-      
-  def do_GET(self):
-    self.fullUrl = "http://%s:%s%s" % (self.server.server_address[0],
-                                       self.server.server_address[1], 
-                                       self.path)
-    
-    parsedUrl = urlparse(self.fullUrl)
-    self.writeHeaders()
-    self.writeData(parsedUrl)
-  
-  def w(self, string):
-    self.wfile.write("%s\n" % string)
-  
-  def writeHeaders(self):
-   self.send_response(200, 'OK')
-   self.send_header('Content-type', 'text/html')
-   self.end_headers()   
-     
-  def sendWrongPage(self, userJob):
-    self.w('<font class="alert">')
-    if userJob == False:
-      self.w('invalid URL specified')   
-    elif re.match("^\d+$", userJob):
-      self.w('invalid URL specified, job <b>%s</b> does not exist' % userJob)
-    elif re.match("^\w+$", userJob):
-      self.w('invalid URL specified, user <b>%s</b> does not exist' % userJob) 
-    self.w('</font>')
-    
-  def getServiceHosts(self, serviceInfo):
-    hostInfo = { 'long' : {}, 'short' : {} }
-    for user in serviceInfo:
-      for job in serviceInfo[user]:
-        for host in serviceInfo[user][job]:
-          for serviceItem in serviceInfo[user][job][host]:
-            serviceName = serviceItem.keys()
-            serviceName = serviceName[0]
-            if isinstance(serviceItem[serviceName], str):
-              hostInfo['short'][self.getJobKey(user, job, host)] = True
-            hostInfo['long'][self.getJobKey(user, job, host)] = True
-    
-    return hostInfo
-
-  def getJobInfo(self, job, serviceInfo):
-    jobInfo = {}
-    
-    for user in serviceInfo.keys():
-      for someJob in serviceInfo[user].keys():
-        if job == someJob:
-          jobInfo[user] = { job : serviceInfo[user][job] }
-    
-    return jobInfo
-  
-  def getJobKey(self, user, job, host):
-    return "%s-%s-%s" % (user, job, host)
-  
-  def writeData(self, parsedUrl):
-    options = parsedUrl[4]
-    serviceInfo = self.server.service.getServiceInfo()
-    users = serviceInfo.keys()
-    users.sort()
-
-    self.w("<html>")
-    self.w("<body>")
-    self.w("<head>")
-    self.writeCSS()
-    self.w("</head>")
-    self.w('<font class="header2">HOD Service Registry Information</font>')
-    if serviceInfo == {}:
-      self.w('<br><br><font class="header">&nbsp;&nbsp;No HOD clusters configured.</font>')
-    else:
-      if parsedUrl[2] == '/':
-        self.w('&nbsp;&nbsp;&nbsp;<table class="main">')
-        count = 0
-        for user in users:
-          self.writeUserData(user, options, serviceInfo, count)
-          count = count + 1
-      elif parsedUrl[2][1:] in serviceInfo:
-        self.w('&nbsp;&nbsp;&nbsp;<table class="main">')
-        self.writeUserData(parsedUrl[2][1:], options, serviceInfo, 0)
-      elif re.match("^\d+$", parsedUrl[2][1:]):
-        jobInfo = self.getJobInfo(parsedUrl[2][1:], serviceInfo)
-        if jobInfo.keys():
-          self.w('&nbsp;&nbsp;&nbsp;<table class="main">')
-          for user in jobInfo.keys():
-            self.writeUserData(user, options, jobInfo, 0)   
-        else:
-          self.sendWrongPage(parsedUrl[2][1:]) 
-          self.w('&nbsp;&nbsp;&nbsp;<table class="main">')
-          count = 0
-          for user in users:
-            self.writeUserData(user, options, serviceInfo, count)
-            count = count + 1
-      elif re.match("^\w+$", parsedUrl[2][1:]):
-        self.sendWrongPage(parsedUrl[2][1:]) 
-        self.w('&nbsp;&nbsp;&nbsp;<table class="main">')
-        count = 0
-        for user in users:
-          self.writeUserData(user, options, serviceInfo, count)
-          count = count + 1        
-      else:
-        self.sendWrongPage(False) 
-        self.w('&nbsp;&nbsp;&nbsp;<table class="main">')
-        count = 0
-        for user in users:
-          self.writeUserData(user, options, serviceInfo, count)
-          count = count + 1
-
-    self.w('</table>')
-    self.w("</pre>")
-    self.w("</body>")
-    self.w("</html>")
-
-  def writeCSS(self):
-    self.w('<style type="text/css">')
-    
-    self.w('table.main { border: 0px; padding: 1; background-color: #E1ECE0; width: 70%; margin: 10; }')
-    self.w('table.sub1 { background-color: #F1F1F1; padding: 0; }')
-    self.w('table.sub2 { background-color: #FFFFFF; padding: 0; }')
-    self.w('table.sub3 { border: 1px solid #EEEEEE; background-color: #FFFFFF; padding: 0; }')
-    self.w('td.header { border-bottom: 1px solid #CCCCCC; padding: 2;}')
-    self.w('td.service1 { border: 0px; background-color: #FFFFFF; padding: 2; width: 10%}')
-    self.w('td.service2 { border: 0px; background-color: #FFFFFF; padding: 2; width: 90%}')
-    self.w('td { vertical-align: top; padding: 0; }')
-    self.w('td.noborder { border-style: none; border-collapse: collapse; }')
-    self.w('tr.colored { background-color: #F1F1F1; }')
-    self.w('font { font-family: Helvetica, Arial, sans-serif; font-size: 10pt; color: #666666; }')
-    self.w('font.header { font-family: Helvetica, Arial, sans-serif;  font-size: 10pt; color: #333333; font-style: bold }')
-    self.w('font.header2 { font-family: Helvetica, Arial, sans-serif; font-size: 16pt; color: #333333; }')
-    self.w('font.sml { font-family: Helvetica, Arial, sans-serif; font-size: 8pt; color: #666666; }')
-    self.w('font.alert { font-family: Helvetica, Arial, sans-serif; font-size: 9pt; color: #FF7A22; }')
-    self.w('a { font-family: Helvetica, Arial, sans-serif; text-decoration:none; font-size: 10pt; color: #111111; }')
-    self.w('a:visited { font-family: Helvetica, Arial, sans-serif; color:#2D4628; text-decoration:none; font-size: 10pt; }')
-    self.w('a:hover { font-family: Helvetica, Arial, sans-serif; color:#00A033; text-decoration:none; font-size: 10pt; }')
-    self.w('a.small { font-family:  Helvetica, Arial, sans-serif; text-decoration:none; font-size: 8pt }')
-    self.w('a.small:hover { color:#822499; text-decoration:none; font-size: 8pt }')
-
-    self.w("</style>")
-
-  def writeUserData(self, user, options, serviceInfo, count):
-    hostInfo = self.getServiceHosts(serviceInfo)
-    hostKey = 'short'
-    if options == 'display=long':
-      hostKey = 'long'
-
-    if count == 0:
-      self.w('<tr>')
-      self.w('<td class="header" colspan="2">')
-      self.w('<font class="header">Active Users</font>')
-      self.w('</td>')
-      self.w('</tr>')
-    self.w('<tr>')
-    self.w('<td><font>%s</font></td>' % user)
-    self.w('<td>')
-    jobIDs = serviceInfo[user].keys()
-    jobIDs.sort()
-    for jobID in jobIDs: 
-      self.w('<table class="sub1" width="100%">')
-      if count == 0:
-        self.w('<tr>')
-        self.w('<td class="header" colspan="2">')
-        self.w('<font class="header">PBS Job Identifiers</font>')
-        self.w('</td>')
-        self.w('</tr>')        
-      self.w('<tr>')
-      self.w('<td><font>%s</font></td>' % jobID)
-      self.w('<td>')
-      hosts = serviceInfo[user][jobID].keys()
-      hosts.sort()
-      for host in hosts:
-        if hostInfo[hostKey].has_key(self.getJobKey(user, jobID, host)):
-          self.w('<table class="sub2" width="100%">')
-          if count == 0:
-            self.w('<tr>')
-            self.w('<td class="header" colspan="2">')
-            self.w('<font class="header">Hosts Running Services</font>')
-            self.w('</td>')
-            self.w('</tr>')  
-          self.w('<tr>')
-          self.w('<td><font>%s</font></td>' % host)
-          self.w('<td>')
-          self.w('<table class="sub3" width="100%">')
-          self.w('<tr>')
-          self.w('<td colspan="2">')
-          self.w('<font class="header">Service Information</font>')
-          self.w('</td>')
-          self.w('</tr>')  
-          for serviceItem in serviceInfo[user][jobID][host]:
-            serviceName = serviceItem.keys()
-            serviceName = serviceName[0]
-            if isinstance(serviceItem[serviceName], dict) and \
-              options == 'display=long':
-              self.w('<tr class="colored">')
-              self.w('<td><font>%s</font></td>' % serviceName)
-              self.w('<td>')
-              self.w('<table width="100%">')
-              for key in serviceItem[serviceName]:
-                self.w('<tr>')
-                self.w('<td class="service1"><font>%s</font></td>' % key)
-                self.w('<td class="service2"><font>%s</font></td>' % serviceItem[serviceName][key])
-                self.w('</tr>')
-              self.w('</table>')
-              self.w('</td>')
-              self.w('</tr>')
-            elif isinstance(serviceItem[serviceName], str):
-              self.w('<tr class="colored">')
-              self.w('<td><font class="service1">%s</font></td>' % serviceName)
-              self.w('<td>')
-              (host, port) = serviceItem[serviceName].split(':')
-              hostnameInfo = socket.gethostbyname_ex(host)
-              if serviceName.startswith('mapred'):
-                self.w('<a href="http://%s:%s">Hadoop Job Tracker</a>' % (hostnameInfo[0], port))
-              elif serviceName.startswith('hdfs'):
-                self.w('<a href="http://%s:%s">HDFS Name Node</a>&nbsp' % (hostnameInfo[0], port))
-              else:
-                self.w('<font class="service2">%s</font>' % serviceItem[serviceName])
-              self.w('</td>')
-              self.w('</tr>')
-          self.w('</table>')    
-          self.w('</td>')
-          self.w('</tr>')
-          self.w('</table>')
-          count = count + 1
-      self.w('</td>')  
-      self.w('</tr>')
-      self.w('</table>')
-      count = count + 1
-    self.w('</td>')
-    self.w('</tr>')
-#    self.w("<pre>")
-#    self.w(pprint.pformat(serviceInfo))
-#    self.w("</pre>")
-    
-class baseSocketServer:
-    def __init__(self, host, ports):
-        self.host = host
-        self.ports = ports
-        self.__stopForever = threading.Event()
-        self.__stopForever.clear()
-        self.__run = threading.Event()
-        self.__run.set()    
-        self.server_address = ()
-        self.mThread = None
-        
-    def server_bind(self):
-        """server_bind() method binds to a random range of ports."""
-
-        self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-
-        if len(self.ports) > 1:
-            randomPort = Random(os.getpid())
-            portSequence = range(self.ports[0], self.ports[1])
-
-            maxTryCount = abs(self.ports[0] - self.ports[1])
-            tryCount = 0
-            while True:
-                somePort = randomPort.choice(portSequence)
-                self.server_address = (self.host, somePort)
-                try:
-                    self.socket.bind(self.server_address)
-                except socket.gaierror, errData:
-                    raise socket.gaierror, errData
-                except:
-                    tryCount = tryCount + 1
-                    if tryCount > maxTryCount:
-                        bindError = "bind failure for port range %s:%d" % (
-                            self.ports)
-
-                        raise socket.error, bindError
-                else:
-                    break
-        else:
-            self.server_address = (self.host, int(self.ports[0]))
-            self.socket.bind(self.server_address)
-        
-        if self.host == '':
-            self.server_address = (local_fqdn(), self.server_address[1])
-
-    def _serve_forever(self):
-        """Replacement for serve_forever loop.
-        
-           All baseSocketServers run within a master thread; that thread
-           imitates serve_forever, but checks an event (self.__stopForever) 
-           before processing new connections.
-        """
-        
-        while not self.__stopForever.isSet():
-            (rlist, wlist, xlist) = select([self.socket], [], [], 
-                                           1)
-            
-            if (len(rlist) > 0 and self.socket == rlist[0]):
-                self.handle_request()
-        
-            while not self.__run.isSet():
-                if self.__stopForever.isSet():
-                    break
-                time.sleep(1)
-        
-        self.server_close()
-        
-        return True
-
-    def serve_forever(self):
-        """Handle requests until stopForever event flag indicates stop."""
-
-        self.mThread = threading.Thread(name="baseSocketServer", 
-                                        target=self._serve_forever)
-        self.mThread.start()
-
-        return self.mThread
-
-    def pause(self):
-        """Temporarily stop servicing requests."""
-
-        self.__run.clear()
-
-    def cont(self):
-        """Resume servicing requests."""
-
-        self.__run.set()
-
-    def stop(self):
-        """Set the stopForever flag to tell serve_forever() to exit."""
-    
-        self.__stopForever.set()
-        if self.mThread: self.mThread.join()
-        return True
-
-    def is_alive(self):
-        if self.mThread != None:
-            return self.mThread.isAlive()
-        else:
-            return False
-
-class threadedHTTPServer(baseSocketServer, ThreadingMixIn, HTTPServer):
-    def __init__(self, host, ports):
-        baseSocketServer.__init__(self, host, ports)
-        HTTPServer.__init__(self, self.server_address, SimpleHTTPRequestHandler)
-
-class forkingHTTPServer(baseSocketServer, ForkingMixIn, HTTPServer):
-    def __init__(self, host, ports):
-        baseSocketServer.__init__(self, host, ports)
-        HTTPServer.__init__(self, self.server_address, SimpleHTTPRequestHandler)
-
-class hodHTTPServer(baseSocketServer, ThreadingMixIn, HTTPServer):
-    service = None 
-    def __init__(self, host, ports, serviceobj = None):
-        self.service = serviceobj
-        baseSocketServer.__init__(self, host, ports)
-        HTTPServer.__init__(self, self.server_address, hodHTTPHandler)
-
-    def finish_request(self, request, client_address):
-        self.RequestHandlerClass(request, client_address, self, self.service)
-        
-class hodXMLRPCServer(baseSocketServer, ThreadingMixIn, SimpleXMLRPCServer):
-    def __init__(self, host, ports, 
-                 requestHandler=SimpleXMLRPCRequestHandler, 
-                 logRequests=False, allow_none=False, encoding=None):
-        baseSocketServer.__init__(self, host, ports)
-        SimpleXMLRPCServer.__init__(self, self.server_address, requestHandler, 
-                                    logRequests)
-        
-        self.register_function(self.stop, 'stop')
-
-try:
-    from twisted.web import server, xmlrpc
-    from twisted.internet import reactor, defer
-    from twisted.internet.threads import deferToThread
-    from twisted.python import log
-                
-    class twistedXMLRPC(xmlrpc.XMLRPC):
-        def __init__(self, logger):
-            xmlrpc.XMLRPC.__init__(self)
-            
-            self.__XRMethods = {}
-            self.__numRequests = 0
-            self.__logger = logger
-            self.__pause = False
-    
-        def render(self, request):
-            request.content.seek(0, 0)
-            args, functionPath = xmlrpclib.loads(request.content.read())
-            try:
-                function = self._getFunction(functionPath)
-            except Fault, f:
-                self._cbRender(f, request)
-            else:
-                request.setHeader("content-type", "text/xml")
-                defer.maybeDeferred(function, *args).addErrback(
-                    self._ebRender).addCallback(self._cbRender, request)
-            
-            return server.NOT_DONE_YET
-    
-        def _cbRender(self, result, request):
-            if isinstance(result, xmlrpc.Handler):
-                result = result.result
-            if not isinstance(result, Fault):
-                result = (result,)
-            try:
-                s = xmlrpclib.dumps(result, methodresponse=1)
-            except:
-                f = Fault(self.FAILURE, "can't serialize output")
-                s = xmlrpclib.dumps(f, methodresponse=1)
-            request.setHeader("content-length", str(len(s)))
-            request.write(s)
-            request.finish()
-     
-        def _ebRender(self, failure):
-            if isinstance(failure.value, Fault):
-                return failure.value
-            log.err(failure)
-            return Fault(self.FAILURE, "error")
-        
-        def _getFunction(self, methodName):
-            while self.__pause:
-                time.sleep(1)
-            
-            self.__numRequests = self.__numRequests + 1
-            function = None
-            try:
-                def defer_function(*args):
-                    return deferToThread(self.__XRMethods[methodName], 
-                                         *args)
-                function = defer_function
-                self.__logger.info(
-                    "[%s] processing defered XML-RPC call to: %s ..." % 
-                    (self.__numRequests, methodName))            
-            except KeyError:
-                self.__logger.warn(
-                    "[%s] fault %s on XML-RPC call to %s, method not found." % (
-                    self.__numRequests, self.NOT_FOUND, methodName))
-                raise xmlrpc.NoSuchFunction(self.NOT_FOUND, 
-                                            "method %s not found" % methodName)
-            
-            return function
-        
-        def register_function(self, functionRef, methodName):
-            self.__XRMethods[methodName] = functionRef
-            
-        def list_methods(self):
-            return self.__XRMethods.keys()
-        
-        def num_requests(self):
-            return self.__numRequests
-        
-        def pause(self):
-            self.__pause = True
-        
-        def cont(self):
-            self.__pause = False
-            
-    class twistedXMLRPCServer:
-        def __init__(self, host, ports, logger=None, threadPoolSize=100):
-            self.__host = host
-            self.__ports = ports
-            
-            if logger == None:
-                logger = hodDummyLogger()
-            
-            self.__logger = logger
-                
-            self.server_address = ['', '']
-            reactor.suggestThreadPoolSize(threadPoolSize)    
-    
-            self.__stopForever = threading.Event()
-            self.__stopForever.clear()
-            self.__mThread = None
-                
-            self.__xmlrpc = twistedXMLRPC(self.__logger)
-                
-        def _serve_forever(self):
-            if len(self.__ports) > 1:
-                randomPort = Random(os.getpid())
-                portSequence = range(self.__ports[0], self.__ports[1])
-    
-                maxTryCount = abs(self.__ports[0] - self.__ports[1])
-                tryCount = 0
-                while True:
-                    somePort = randomPort.choice(portSequence)
-                    self.server_address = (self.__host, int(somePort))
-                    if self.__host == '':
-                        self.server_address = (local_fqdn(), self.server_address[1])
-                    try:
-                        reactor.listenTCP(int(somePort), server.Site(
-                            self.__xmlrpc), interface=self.__host)
-                        reactor.run(installSignalHandlers=0)
-                    except:
-                        self.__logger.debug("Failed to bind to: %s:%s." % (
-                            self.__host, somePort))
-                        tryCount = tryCount + 1
-                        if tryCount > maxTryCount:
-                            self.__logger.warn("Failed to bind to: %s:%s" % (
-                                self.__host, self.__ports))
-                            sys.exit(1)
-                    else:
-                        break
-            else:
-                try:
-                    self.server_address = (self.__host, int(self.__ports[0]))
-                    if self.__host == '':
-                        self.server_address = (local_fqdn(), self.server_address[1])
-                    reactor.listenTCP(int(self.__ports[0]), server.Site(self.__xmlrpc), 
-                                      interface=self.__host)
-                    reactor.run(installSignalHandlers=0)
-                except:
-                    self.__logger.warn("Failed to bind to: %s:%s."% (
-                            self.__host, self.__ports[0]))
-                    sys.exit(1)
-            
-        def serve_forever(self):
-            """Handle requests until stopForever event flag indicates stop."""
-    
-            self.__mThread = threading.Thread(name="XRServer",
-                                              target=self._serve_forever)
-            self.__mThread.start()
-            
-            if not self.__mThread.isAlive():
-                raise Exception("Twisted XMLRPC server thread dead.")
-                    
-        def register_function(self, functionRef, methodName):
-            self.__xmlrpc.register_function(functionRef, methodName)
-        
-        def register_introspection_functions(self):
-            pass
-        
-        def register_instance(self, instance):
-            for method in dir(instance):
-                if not method.startswith('_'):
-                    self.register_function(getattr(instance, method), method)
-        
-        def pause(self):
-            self.__xmlrpc.pause()
-        
-        def cont(self):
-            self.__xmlrpc.cont()
-        
-        def stop(self):
-            def stop_thread():
-                time.sleep(2)
-                reactor.stop()
-                
-            self.__stopForever.set()
-            
-            stopThread = threading.Thread(name='XRStop', target=stop_thread)
-            stopThread.start()
-                
-            return True
-            
-        def is_alive(self):
-            status = False
-            if reactor.running == 1:
-                status = True
-            
-            return status
-        
-        def status(self):
-            """Return status information on running XMLRPC Server."""
-            stat = { 'XR server address'     : self.server_address,
-                     'XR methods'            : self.system_listMethods(),
-                     'XR server alive'       : self.is_alive(),
-                     'XR requests processed' : self.__xmlrpc.num_requests(),
-                     'XR server stop flag'   : self.__stopForever.isSet()}
-            return(stat)
-        
-        def system_listMethods(self):
-            return self.__xmlrpc.list_methods()
-        
-        def get_server_address(self):
-            waitCount = 0
-            while self.server_address == '':
-                if waitCount == 9:
-                    break 
-                time.sleep(1)
-                waitCount = waitCount + 1
-                
-            return self.server_address
-except ImportError:
-    pass
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/tcp.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/tcp.py
deleted file mode 100644
index a118a67f9c..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/tcp.py
+++ /dev/null
@@ -1,176 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-# $Id:tcp.py 6172 2007-05-22 20:26:54Z zim $
-#
-#------------------------------------------------------------------------------
-
-""" TCP related classes. """
-
-import socket, re, string
-reAddress    = re.compile(":")
-reMayBeIp = re.compile("^\d+\.\d+\.\d+\.\d+$")
-reValidPort = re.compile("^\d+$")
-
-class Error(Exception):
-    def __init__(self, msg=''):
-        self.message = msg
-        Exception.__init__(self, msg)
-
-    def __repr__(self):
-        return self.message
-
-class tcpError(Error):
-    def __init__(self, message):
-        Error.__init__(self, message)
-
-class tcpSocket:
-    def __init__(self, address, timeout=30, autoflush=0):
-        """Constructs a tcpSocket object.
-
-           address - standard tcp address (HOST:PORT)
-           timeout - socket timeout"""
-
-        self.address = address
-        self.__autoFlush = autoflush
-        self.__remoteSock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-        self.__remoteSock.settimeout(timeout)
-        self.host = None
-        self.port = None
-        splitAddress = address
-        if isinstance(address, (tuple, list)):
-            self.host = address[0]
-            self.port = int(address[1])
-        else:
-            splitAddress = get_address_tuple(address)
-            if not splitAddress[0]:
-                self.host = 'localhost'
-            else:
-                self.host = splitAddress[0]
-
-            self.port = int(splitAddress[1])
-
-        self.__fileObjectOut = ''
-        self.__fileObjectIn = ''
-
-    def __repr__(self):
-        return self.address
-
-    def __iter__(self):
-        return self
-
-    def next(self):
-        sockLine = self.read()
-        if not sockLine:
-            raise StopIteration
-
-        return sockLine
-
-    def open(self):
-        """Attempts to open a socket to the specified address."""
-
-        socketAddress = (self.host, self.port)
-
-        try:
-            self.__remoteSock.connect(socketAddress)
-            if self.__autoFlush:
-                self.__fileObjectOut = self.__remoteSock.makefile('wb', 0)
-            else:
-                self.__fileObjectOut = self.__remoteSock.makefile('wb')
-
-            self.__fileObjectIn  = self.__remoteSock.makefile('rb', 0)
-        except:
-            raise tcpError, "connection failure: %s" % self.address
-
-    def flush(self):
-        """Flushes write buffer."""
-        self.__fileObjectOut.flush()
-
-    def close(self):
-        """Attempts to close and open socket connection"""
-
-        try:
-            self.__remoteSock.close()
-            self.__fileObjectOut.close()
-            self.__fileObjectIn.close()
-        except socket.error, exceptionObject:
-            exceptionMessage = "close failure %s %s" % (self.address,
-                exceptionObject.__str__())
-            raise tcpError, exceptionMessage
-
-    def verify(self):
-        """Verifies that a given IP address/host and port are valid. This
-           method will not attempt to open a socket to the specified address.
-        """
-
-        isValidAddress = False
-        if reMayBeIp.match(self.host):
-            if check_ip_address(self.host):
-                if reValidPort.match(str(self.port)):
-                    isValidAddress = True
-        else:
-            if reValidPort.match(str(self.port)):
-                isValidAddress = True
-
-        return(isValidAddress)
-
-    def read(self):
-        """Reads a line off of the active socket."""
-
-        return self.__fileObjectIn.readline()
-
-    def write(self, string):
-        """Writes a string to the active socket."""
-
-        print >> self.__fileObjectOut, string
-
-def check_net_address(address):
-    valid = True
-    pieces = string.split(address, '.')
-    if len(pieces) != 4:
-        valid = False
-    else:
-        for piece in pieces:
-            if int(piece) < 0 or int(piece) > 255:
-                valid = False
-
-    return valid
-
-def check_ip_address(address):
-    valid = True
-    pieces = string.split(address, '.')
-    if len(pieces) != 4:
-        valid = False
-    else:
-        if int(pieces[0]) < 1 or int(pieces[0]) > 254:
-            valid = False
-        for i in range(1,4):
-            if int(pieces[i]) < 0 or int(pieces[i]) > 255:
-                valid = False
-
-    return valid
-
-def get_address_tuple(address):
-    """ Returns an address tuple for TCP address.
-
-        address - TCP address of the form host:port
-
-        returns address tuple (host, port)
-    """
-
-    addressList = reAddress.split(address)
-    addressTuple = (addressList[0], int(addressList[1]))
-
-    return addressTuple
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/threads.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/threads.py
deleted file mode 100644
index 0d19042074..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/threads.py
+++ /dev/null
@@ -1,389 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import threading, time, os, sys, pprint
-
-from popen2 import Popen4, Popen3, MAXFD
-from signal import SIGTERM, SIGKILL
-
-class baseThread(threading.Thread):
-    """Base CAM threading class.  The run method should be overridden."""
-
-    def __init__(self, name):
-        threading.Thread.__init__(self, name=name)
-        self.stopFlag = threading.Event()
-        self.stopFlag.clear()
-        self.running = threading.Event()
-        self.running.set()
-        self.isFinished = threading.Event()
-        self.isFinished.clear()
-
-    def join(self, timeout=None):
-        self.stopFlag.set()
-        threading.Thread.join(self, timeout)
-
-    def pause(self):
-        """Pause thread."""
-
-        self.running.clear()
-
-    def cont(self):
-        """Resume thread operation."""
-
-        self.running.set()
-
-class simpleCommand(baseThread):
-    """Command execution object.  Command output and exit status are captured.
-
-       Public class attributes:
-
-       cmdString    - command to be executed
-       outputBuffer - command output, stdout + stderr
-       status       - exit status, as returned by wait
-       
-       stdin        - standard input for command
-       stdout       - standard output of command when buffer == False
-       stderr       - standard error of command when mode == 3 and buffer == False
-       
-       """
-
-    def __init__(self, name, cmdString, env=os.environ, mode=4, buffer=True, 
-                 wait=True, chdir=None):
-        """Class initialization.
-
-           name        - thread name to use when running the command
-           cmdString   - command string to execute
-           inputString - string to print to command's stdin
-           env         - shell environment dictionary
-           mode        - 3 for popen3 and 4 for popen4
-           buffer      - out put to be retrieved with output() method
-           wait        - return immediately after start() is called and output 
-                         command results as they come to stdout"""
-
-        baseThread.__init__(self, name=name)
-
-        self.cmdString = cmdString
-        self.__mode = mode
-        self.__buffer = buffer
-        self.__wait = wait
-        self.__chdir = chdir
-        self.__outputBuffer = []
-        self.__status = None
-        self.__pid = None
-        self.__isFinished = threading.Event()
-        self.__isFinished.clear()
-        
-        self.stdin = None
-        self.stdout = None
-        self.stderr = None
-
-        self.__env = env
-    
-    def run(self):
-        """ Overridden run method.  Most of the work happens here.  start()
-            should be called in place of this method."""
-            
-        oldDir = None
-        if self.__chdir:
-            if os.path.exists(self.__chdir):
-                oldDir = os.getcwd()  
-                os.chdir(self.__chdir)
-            else:
-                raise Exception(
-                    "simpleCommand: invalid chdir specified: %s" % 
-                    self.__chdir)
-            
-        cmd = None
-        if self.__mode == 3:
-            cmd = _Popen3Env(self.cmdString, env=self.__env)
-        else:
-            cmd = _Popen4Env(self.cmdString, env=self.__env)
-        self.__pid = cmd.pid
-
-        self.stdin = cmd.tochild
-        
-        if self.__mode == 3:
-            self.stderr = cmd.childerr
-
-        while cmd.fromchild == None:
-            time.sleep(1)
-        
-        if self.__buffer == True:
-            output = cmd.fromchild.readline()
-            while output != '':
-                while not self.running.isSet():
-                    if self.stopFlag.isSet():
-                        break
-                    time.sleep(1)
-                self.__outputBuffer.append(output)
-                output = cmd.fromchild.readline()
-
-        elif self.__wait == False:
-            output = cmd.fromchild.readline()
-            while output != '':
-                while not self.running.isSet():
-                    if self.stopFlag.isSet():
-                        break
-                    time.sleep(1)
-                print output,
-                if self.stopFlag.isSet():
-                    break
-                output = cmd.fromchild.readline()
-        else:
-            self.stdout = cmd.fromchild
-
-        self.__status = cmd.poll()
-        while self.__status == -1:
-            while not self.running.isSet():
-                if self.stopFlag.isSet():
-                    break
-                time.sleep(1)
-
-            self.__status = cmd.poll()
-            time.sleep(1)
-
-        if oldDir:
-            os.chdir(oldDir)
-
-        self.__isFinished.set()
-        
-        sys.exit(0)
-
-    def getPid(self):
-        """return pid of the launches process"""
-        return self.__pid
-
-    def output(self):
-        return self.__outputBuffer[:]
-
-    def wait(self):
-        """Wait blocking until command execution completes."""
-
-        self.__isFinished.wait()
-
-        return os.WEXITSTATUS(self.__status)
-
-    def is_running(self):
-        """Returns boolean, are we running?"""
-        
-        status = True
-        if self.__isFinished.isSet():
-            status = False
-            
-        return status 
-
-    def exit_code(self):
-        """ Returns process exit code."""
-        
-        if self.__status != None:
-            return os.WEXITSTATUS(self.__status)
-        else:
-            return None
-        
-    def exit_status_string(self):
-        """Return a string representation of the command's exit status."""
-
-        statusString = None
-        if self.__status:
-            exitStatus = os.WEXITSTATUS(self.__status)
-            exitSignal = os.WIFSIGNALED(self.__status)
-            coreDump   = os.WCOREDUMP(self.__status)
-
-            statusString = "exit code: %s | signal: %s | core %s" % \
-                (exitStatus, exitSignal, coreDump)
-
-        return(statusString)
-
-    def stop(self):
-        """Stop the running command and join it's execution thread."""
-
-        self.join()
-
-    def kill(self):
-        count = 0
-        while self.is_running():
-          try:
-            if count > 20:
-              os.kill(self.__pid, SIGKILL)
-              break
-            else:  
-              os.kill(self.__pid, SIGTERM)
-          except:
-            break
-          
-          time.sleep(.1)
-          count = count + 1
-        
-        self.stop()
-        
-class _Popen3Env(Popen3):
-    def __init__(self, cmd, capturestderr=False, bufsize=-1, env=os.environ):
-        self._env = env
-        Popen3.__init__(self, cmd, capturestderr, bufsize)
-    
-    def _run_child(self, cmd):
-        if isinstance(cmd, basestring):
-            cmd = ['/bin/sh', '-c', cmd]
-        for i in xrange(3, MAXFD):
-            try:
-                os.close(i)
-            except OSError:
-                pass
-
-        try:
-            os.execvpe(cmd[0], cmd, self._env)
-        finally:
-            os._exit(1)
-            
-class _Popen4Env(_Popen3Env, Popen4):
-    childerr = None
-
-    def __init__(self, cmd, bufsize=-1, env=os.environ):
-        self._env = env
-        Popen4.__init__(self, cmd, bufsize)
-        
-class loop(baseThread):
-    """ A simple extension of the threading.Thread class which continuously
-        executes a block of code until join().
-    """
-
-    def __init__(self, name, functionRef, functionArgs=None, sleep=1, wait=0,
-        offset=False):
-        """Initialize a loop object.
-
-           name         - thread name
-           functionRef  - a function reference
-           functionArgs - function arguments in the form of a tuple,
-           sleep        - time to wait between function execs
-           wait         - time to wait before executing the first time
-           offset       - set true to sleep as an offset of the start of the
-                          last func exec instead of the end of the last func
-                          exec
-        """
-
-        self.__functionRef  = functionRef
-        self.__functionArgs = functionArgs
-        self.__sleep        = sleep
-        self.__wait         = wait
-        self.__offset       = offset
-
-        baseThread.__init__(self, name=name)
-
-    def run(self):
-        """Do not call this directly.  Call self.start()."""
-
-        startTime = None
-        while not self.stopFlag.isSet():
-            sleep = self.__sleep
-            if self.__wait > 0:
-                startWaitCount = 0
-                while not self.stopFlag.isSet():
-                    while not self.running.isSet():
-                        if self.stopFlag.isSet():
-                            break
-                        time.sleep(1)
-                    time.sleep(0.5)
-                    startWaitCount = startWaitCount + .5
-                    if startWaitCount >= self.__wait:
-                        self.__wait = 0
-                        break
-            startTime = time.time()
-
-            if not self.stopFlag.isSet():
-                if self.running.isSet():
-                    if self.__functionArgs:
-                        self.__functionRef(self.__functionArgs)
-                    else:
-                        self.__functionRef()
-            endTime = time.time()
-
-            while not self.running.isSet():
-                time.sleep(1)
-
-            while not self.stopFlag.isSet():
-                while not self.running.isSet():
-                    if self.stopFlag.isSet():
-                        break
-                    time.sleep(1)
-
-                currentTime = time.time()
-                if self.__offset:
-                    elapsed = time.time() - startTime
-                else:
-                    elapsed = time.time() - endTime
-
-                if elapsed >= self.__sleep:
-                    break
-
-                time.sleep(0.5)
-        
-        self.isFinished.set()
-
-    def set_sleep(self, sleep, wait=None, offset=None):
-        """Modify loop frequency paramaters.
-
-           sleep        - time to wait between function execs
-           wait         - time to wait before executing the first time
-           offset       - set true to sleep as an offset of the start of the
-                          last func exec instead of the end of the last func
-                          exec
-        """
-
-        self.__sleep = sleep
-        if wait != None:
-            self.__wait = wait
-        if offset != None:
-            self.__offset = offset
-
-    def get_sleep(self):
-        """Get loop frequency paramaters.
-        Returns a dictionary with sleep, wait, offset.
-        """
-
-        return {
-            'sleep'  : self.__sleep,
-            'wait'   : self.__wait,
-            'offset' : self.__offset,
-            }
-        
-class func(baseThread):
-    """ A simple extension of the threading.Thread class which executes 
-        a function in a separate thread.
-    """
-
-    def __init__(self, name, functionRef, functionArgs=None):
-        """Initialize a func object.
-
-           name         - thread name
-           functionRef  - a function reference
-           functionArgs - function arguments in the form of a tuple,
-        """
-
-        self.__functionRef  = functionRef
-        self.__functionArgs = functionArgs
-
-        baseThread.__init__(self, name=name)
-
-    def run(self):
-        """Do not call this directly.  Call self.start()."""
-
-        if not self.stopFlag.isSet():
-            if self.running.isSet():
-                if self.__functionArgs:
-                    self.__functionRef(self.__functionArgs)
-                else:
-                    self.__functionRef()
-        sys.exit(0)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/types.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/types.py
deleted file mode 100644
index 9612ce4313..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/types.py
+++ /dev/null
@@ -1,1266 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-# $Id:types.py 6172 2007-05-22 20:26:54Z zim $
-#
-#------------------------------------------------------------------------------
-
-""" Higher level data types and type related classes.
-
-    Supported Types (Verification and Display):
-
-      address        - validates ip:port and host:port tcp addresses
-      ip_address     - validates and IP address
-      net_address    - validates an IP like address, ie netmask
-      hostname       - validates a hostname with DNS
-      eaddress       - validates a single email address or a comma
-                       seperated list of email addresses
-      http_version   - validates a value is a http version (1.0/1.1)
-      tcp_port       - validates a value to be a valid tcp port (2-65535)
-      bool           - validates value is (0, 1, true, false) / converts
-                       true -> 1 and false -> 0
-      directory      - validates a values is a directory / resolves path to
-                       absolute path
-      file           - validates a value is a file / resolves path to absolute
-                       path
-      float          - validates a value is a float, converts string to float
-      pos_float      - validates a value is a float and >= 0, converts string
-                       to float
-      pos_num        - same as pos_float
-      neg_float      - validates a value is a float and < 0, converts string to
-                       float
-      int            - validates a value is an integer, converts string to
-                       integer
-      pos_int        - validates a value is an integer and >= 0, converts
-                       string to integer
-      neg_int        - validates a values is an integer and < 0, converts
-                       striing to integer
-      freq           - frequency, positive integer
-      size           - validates a size in bytes, kb, mb, kb, and tb
-                       (int > 0 post fixed with K, M, G, or T) also converts
-                       value to integer bytes
-      range          - numeric range, x-y normalized to a tuple, if a single
-                       number is supplie a single element tuple is returned
-      timestamp      - utc timestamp of the form YYYYMMDDHHMMSS
-      user_account   - UNIX user account name
-      user_group     - UNIX group name
-      string         - arbitrarily long string
-      list           - comma seperated list of strings of arbitrary length,
-      keyval         - comma seperated list of key=value pairs, key does not 
-                       need to be unique.
-      uri            - a uri """
-
-import sys, os, socket, pwd, grp, stat, re, re, string, pprint, urlparse
-
-from tcp import tcpSocket, check_net_address, check_ip_address
-from util import check_timestamp
-
-types = { 'directory'      : { 'db'    : 'string',
-                               'units' : None     },
-
-          'address'        : { 'db'    : 'string',
-                               'units' : None     },
-
-          'ip_address'     : { 'db'    : 'string',
-                               'units' : None     },
-
-          'net_address'    : { 'db'    : 'string',
-                               'units' : None     },
-
-          'bool'           : { 'db'    : 'bool',
-                               'units' : None     },
-
-          'int'            : { 'db'    : 'integer',
-                               'units' : None     },
-
-          'float'          : { 'db'    : 'float',
-                               'units' : None     },
-
-          'pos_int'        : { 'db'    : 'integer',
-                               'units' : None     },
-
-          'neg_int'        : { 'db'    : 'integer',
-                               'units' : None     },
-
-          'pos_num'        : { 'db'    : 'float',
-                               'units' : None     },
-
-          'pos_float'      : { 'db'    : 'float',
-                               'units' : None     },
-
-          'neg_float'      : { 'db'    : 'float',
-                               'units' : None     },
-
-          'string'         : { 'db'    : 'string',
-                               'units' : None     },
-
-          'list'           : { 'db'    : 'string',
-                               'units' : None     },
-
-          'file'           : { 'db'    : 'string',
-                               'units' : None     },
-
-          'size'           : { 'db'    : 'integer',
-                               'units' : 'bytes'  },
-
-          'freq'           : { 'db'    : 'integer',
-                               'units' : 'hz'     },
-
-          'eaddress'       : { 'db'    : 'string',
-                               'units' : None     },
-
-          'tcp_port'       : { 'db'    : 'integer',
-                               'units' : None     },
-
-          'http_version'   : { 'db'    : 'float',
-                               'units' : None     },
-
-          'range'          : { 'db'    : 'string',
-                               'units' : None     },
-
-          'hostname'       : { 'db'    : 'string',
-                               'units' : None     },
-
-          'user_account'   : { 'db'    : 'string',
-                               'units' : None     },
-
-          'user_group'     : { 'db'    : 'string',
-                               'units' : None     },
-
-          'timestamp'      : { 'db'    : 'timestamp',
-                               'units' : None     },
-
-          'keyval'         : { 'db'    : 'string',
-                               'units' : None     },
-          
-          'uri'            : { 'db'    : 'string',
-                               'units' : None     },
-
-          ''               : { 'db'    : 'string',
-                               'units' : None     }}
-
-dbTypes = { 'string'  :   { 'type'  : 'varchar',
-                            'store' : 'type_strings_0',
-                            'table' : True              },
-
-            'integer' :   { 'type'  : 'bigint',
-                            'store' : 'integers',
-                            'table' : False             },
-
-            'float' :     { 'type'  : 'real',
-                            'store' : 'floats',
-                            'table' : False             },
-
-            'bool' :      { 'type'  : 'boolean',
-                            'store' : 'bools',
-                            'table' : False             },
-
-            'timestamp' : { 'type'  : 'timestamp(0)',
-                            'store' : 'timestamps',
-                            'table' : False             }}
-
-reSizeFormat = re.compile("^(\d+)(k|m|g|t|p|kb|mb|gb|tb|pb)$", flags=2)
-reDash = re.compile("\s*-\s*")
-
-sizeFactors = { 'b'     : 1,
-                'bytes' : 1,
-                'k'     : 1024,
-                'kb'    : 1024,
-                'm'     : 1048576,
-                'mb'    : 1048576,
-                'g'     : 1073741824,
-                'gb'    : 1073741824,
-                't'     : 1099511627776,
-                'tb'    : 1099511627776,
-                'p'     : 1125899906842624,
-                'pb'    : 1125899906842624 }
-
-freqFactors = { 'hz'  : 1,
-                'khz' : 1000,
-                'mhz' : 1000000,
-                'ghz' : 1000000000,
-                'thz' : 1000000000000,
-                'phz' : 1000000000000000 }
-
-sizeMap = [ { 'factor' : sizeFactors['b'],
-              'long'   : 'byte',
-              'short'  : 'byte'           },
-
-            { 'factor' : sizeFactors['k'],
-              'long'   : 'Kilobyte',
-              'short'  : 'KB'             },
-
-            { 'factor' : sizeFactors['m'],
-              'long'   : 'Megabyte',
-              'short'  : 'MB'             },
-
-            { 'factor' : sizeFactors['g'],
-              'long'   : 'Gigabyte',
-              'short'  : 'GB'             },
-
-            { 'factor' : sizeFactors['t'],
-              'long'   : 'Terabyte',
-              'short'  : 'TB'             },
-
-            { 'factor' : sizeFactors['p'],
-              'long'   : 'Petabyte',
-              'short'  : 'PB'             } ]
-
-freqMap = [ { 'factor' : freqFactors['hz'],
-              'long'   : 'Hertz',
-              'short'  : 'Hz'               },
-
-            { 'factor' : freqFactors['khz'],
-              'long'   : 'Kilohertz',
-              'short'  : 'KHz'              },
-
-            { 'factor' : freqFactors['mhz'],
-              'long'   : 'Megahertz',
-              'short'  : 'MHz'              },
-
-            { 'factor' : freqFactors['ghz'],
-              'long'   : 'Gigahertz',
-              'short'  : 'GHz'              },
-
-            { 'factor' : freqFactors['thz'],
-              'long'   : 'Terahertz',
-              'short'  : 'THz'              },
-
-            { 'factor' : freqFactors['phz'],
-              'long'   : 'Petahertz',
-              'short'  : 'PHz'              } ]
-
-reListString = r"(?<!\\),"
-reList = re.compile(reListString)
-
-reKeyVal = r"(?<!\\)="
-reKeyVal = re.compile(reKeyVal)
-
-class typeToString:
-    """Provides method for converting normalized types to strings."""
-    def __init__(self):
-        self.toStringFunctions = {}
-        self.__build_to_string_functions()
- 
-    def __call__(self, type, value):
-        return self.toStringFunctions[type](value) 
- 
-    def __build_to_string_functions(self):
-        functions = {}
-        for function in dir(self):
-            functions[function] = 1
-
-        for type in types.keys():
-            # kinda bad, need to find out how to know the name of the class
-            #  I'm in.  But it works.
-            functionName = "_typeToString__tostring_%s" % type
-            if functions.has_key(functionName):
-                self.toStringFunctions[type] = getattr(self, functionName)
-            else:
-                if type == '':
-                    self.toStringFunctions[type] = self.__tostring_nothing
-                else:
-                    error = "To string function %s for type %s does not exist." \
-                        % (functionName, type)
-                    raise Exception(error)
-                    sys.exit(1)        
-
-    def __tostring(self, value):
-        return str(value)
-
-    def __tostring_directory(self, value):
-        return self.__tostring(value)
-
-    def __tostring_address(self, value):
-        return "%s:%s" % (value[0], value[1])
-
-    def __tostring_ip_address(self, value):
-        return self.__tostring(value)
-
-    def __tostring_net_address(self, value):
-        return self.__tostring(value)
-
-    def __tostring_bool(self, value):
-        if value == False:
-            return 'false'
-        elif value == True:
-            return 'true'
-        else:
-            return str(value)
-
-    def __tostring_int(self, value):
-        return self.__tostring(value)
-
-    def __tostring_float(self, value):
-        return self.__tostring(value)
-
-    def __tostring_pos_int(self, value):
-        return self.__tostring(value)
-
-    def __tostring_neg_int(self, value):
-        return self.__tostring(value)     
-
-    def __tostring_freq(self, value):
-        return self.__tostring(value)
-
-    def __tostring_pos_float(self, value):
-        return self.__tostring(value)
-
-    def __tostring_pos_num(self, value):
-        return self.__tostring(value)
-
-    def __tostring_neg_float(self, value):
-        return self.__tostring(value)
-
-    def __tostring_string(self, value):
-        return value
-
-    def __tostring_keyval(self, value):
-        string = '"' # to protect from shell escapes
-        for key in value:
-          # for item in value[key]:
-          #      string = "%s%s=%s," % (string, key, item)
-          # Quotes still cannot protect Double-slashes.
-          # Dealing with them separately
-          val = re.sub(r"\\\\",r"\\\\\\\\",value[key])
-
-          string = "%s%s=%s," % (string, key, val)
-
-        return string[:-1] + '"'
-
-    def __tostring_list(self, value):
-        string = ''
-        for item in value:
-            string = "%s%s," % (string, item)
-            
-        return string[:-1]
-
-    def __tostring_file(self, value):
-        return self.__tostring(value)
-      
-    def __tostring_size(self, value):
-        return self.__tostring(value)
-        
-    def __tostring_eaddress(self, value):
-        return self.__tostring(value)
-
-    def __tostring_tcp_port(self, value):
-        return self.__tostring(value)
-
-    def __tostring_http_version(self, value):
-        return self.__tostring(value)
-
-    def __tostring_range(self, value):
-        if len(value) < 2:
-          return value[0]
-        else:
-          return "%s-%s" % (value[0], value[1])
-
-    def __tostring_timestamp(self, value):
-        return self.__tostring(value)
-
-    def __tostring_hostname(self, value):
-        return self.__tostring(value)
-
-    def __tostring_user_account(self, value):
-        return self.__tostring(value)
-
-    def __tostring_user_group(self, value):
-        return self.__tostring(value)
-
-    def __tostring_uri(self, value):
-        return self.__tostring(value)
-
-    def __tostring_nothing(self, value):
-        return value
-
-class typeValidator:
-    """Type validation class used to normalize values or validated 
-       single/large sets of values by type."""
-
-    def __init__(self, originalDir=None):
-        self.verifyFunctions = {}
-        self.__build_verify_functions()
-
-        self.validateList = []
-        self.validatedInfo = []
-        self.__originalDir = originalDir
-
-    def __getattr__(self, attrname):
-        """validateList  = [ { 'func' : <bound method configValidator>,
-                               'name' : 'SA_COMMON.old_xml_dir',
-                               'value': 'var/data/old'                 },
-
-                             { 'func' : <bound method configValidator>,
-                               'name' : 'SA_COMMON.log_level',
-                               'value': '4'                            } ]
-
-           validatedInfo = [ { # name supplied to add()
-                               'name'       : 'SA_COMMON.tmp_xml_dir',
-
-                               # is valid or not
-                               'isValid'    : 1
-
-                               # normalized value
-                               'normalized' : /var/data/tmp,
-
-                               # error string ?
-                               'errorData'  : 0                        },
-
-                             { 'name'       : 'SA_COMMON.new_xml_dir',
-                               'isValid'    : 1
-                               'normalized' : /var/data/new,
-                               'errorData'  : 0                        } ]"""
-
-        if attrname   == "validateList":
-            return self.validateList   # list of items to be validated
-        elif attrname == "validatedInfo":
-            return self.validatedInfo  # list of validation results
-        else: raise AttributeError, attrname
-
-    def __build_verify_functions(self):
-        functions = {}
-        for function in dir(self):
-            functions[function] = 1
-
-        for type in types.keys():
-            # kinda bad, need to find out how to know the name of the class
-            #  I'm in.  But it works.
-            functionName = "_typeValidator__verify_%s" % type
-            if functions.has_key(functionName):
-                self.verifyFunctions[type] = getattr(self, functionName)
-            else:
-                if type == '':
-                    self.verifyFunctions[type] = self.__verify_nothing
-                else:
-                    error = "Verify function %s for type %s does not exist." \
-                        % (functionName, type)
-                    raise Exception(error)
-                    sys.exit(1)
-
-    def __get_value_info(self):
-        valueInfo = { 'isValid' : 0, 'normalized' : 0, 'errorData' : 0 }
-
-        return valueInfo
-
-    def __set_value_info(self, valueInfo, **valueData):
-        try:
-            valueInfo['normalized'] = valueData['normalized']
-            valueInfo['isValid'] = 1
-        except KeyError:
-            valueInfo['isValid'] = 0
-            try:
-                valueInfo['errorData'] = valueData['errorData']
-            except:
-                pass
-
-    # start of 'private' verification methods, each one should correspond to a
-    #   type string (see self.verify_config())
-    def __verify_directory(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        if os.path.isdir(value):
-            self.__set_value_info(valueInfo, normalized=self.normalize(type, 
-                                                                       value))
-        else:
-            self.__set_value_info(valueInfo)
-
-        return valueInfo
-      
-    def __norm_directory(self, value):
-        return self.__normalizedPath(value)
-
-    def __verify_address(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        try:
-            socket = tcpSocket(value)
-            if socket.verify():
-                self.__set_value_info(valueInfo, normalized=self.normalize(type, 
-                                                                       value))
-            else:
-                self.__set_value_info(valueInfo)
-        except:
-            self.__set_value_info(valueInfo)
-
-        return valueInfo
-      
-    def __norm_address(self, value):
-        return value.split(':')
-
-    def __verify_ip_address(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        if check_ip_address(value):
-            self.__set_value_info(valueInfo, normalized=self.normalize(type, 
-                                                                       value))
-        else:
-            self.__set_value_info(valueInfo)
-
-        return valueInfo
-
-    def __verify_net_address(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        if check_net_address(value):
-            self.__set_value_info(valueInfo, normalized=self.normalize(type, 
-                                                                       value))
-        else:
-            self.__set_value_info(valueInfo)
-
-        return valueInfo
-
-    def __verify_bool(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        value = str(value)
-        if re.match("^false|0|f|no$", value, 2):
-            self.__set_value_info(valueInfo, normalized=False)
-        elif re.match("^true|1|t|yes$", value, 2):
-            self.__set_value_info(valueInfo, normalized=True)
-        else:
-            self.__set_value_info(valueInfo)
-
-        return valueInfo
-      
-    def __norm_bool(self, value):
-        value = str(value)
-        norm = ""
-        if re.match("^false|0|f|no$", value, 2):
-            norm = False
-        elif re.match("^true|1|t|yes$", value, 2):
-            norm = True
-        else:
-            raise Exception("invalid bool specified: %s" % value)
-            
-        return norm
-
-    def __verify_int(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        try:
-            self.__set_value_info(valueInfo, normalized=self.normalize(type, 
-                                                                       value))
-        except:
-            self.__set_value_info(valueInfo)
-
-        return valueInfo
-      
-    def __norm_int(self, value):
-        return int(value)
-
-    def __verify_float(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        try:
-            self.__set_value_info(valueInfo, normalized=self.normalize(type, 
-                                                                       value))
-        except:
-            self.__set_value_info(valueInfo)
-
-        return valueInfo
-      
-    def __norm_float(self, value):
-        return float(value)
-
-    def __verify_pos_int(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        try:
-            value = self.normalize(type, value)
-        except:
-            self.__set_value_info(valueInfo)
-        else:
-            self.__set_value_info(valueInfo, normalized=value)
-
-        return valueInfo
-      
-    def __norm_pos_int(self, value):
-        value = int(value)
-        if value < 0:
-            raise Exception("value is not positive: %s" % value)
-        
-        return value
-
-    def __verify_neg_int(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        try:
-            value = self.normalize(type, value)
-        except:
-            self.__set_value_info(valueInfo)
-        else:
-            self.__set_value_info(valueInfo, normalized=value)
-
-        return valueInfo
-      
-    def __norm_neg_int(self, type, value):
-        value = int(value)
-        if value > 0:
-            raise Exception("value is not negative: %s" % value)
-        
-        return value        
-
-    def __verify_freq(self, type, value):
-        return self.__verify_pos_int(type, value)
-
-    def __norm_freq(self, value):
-        return self.__norm_pos_int(value)
-
-    def __verify_pos_float(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        try:
-            value = self.normalize(type, value)
-        except:
-            self.__set_value_info(valueInfo)
-        else:
-            self.__set_value_info(valueInfo, normalized=value)
-
-        return valueInfo
-
-    def __norm_pos_float(self, value):
-        value = float(value)
-        if value < 0:
-            raise Exception("value is not positive: %s" % value)
-        
-        return value
-
-    def __verify_pos_num(self, type, value):
-        return self.__verify_pos_float(value)
-      
-    def __norm_pos_num(self, value):
-        return self.__norm_pos_float(value)
-
-    def __verify_neg_float(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        try:
-            value = self.normalize(type, value)
-        except:
-            self.__set_value_info(valueInfo)
-        else:
-            self.__set_value_info(valueInfo, normalized=value)
-
-        return valueInfo
-
-    def __norm_neg_float(self, value):
-        value = float(value)
-        if value >= 0:
-            raise Exception("value is not negative: %s" % value)
-        
-        return value
-
-    def __verify_string(self, type, value):
-        valueInfo = self.__get_value_info()
-        self.__set_value_info(valueInfo, normalized=self.normalize(type, 
-                                                                   value))
-        
-        return valueInfo
-      
-    def __norm_string(self, value):
-        return str(value)
-
-    def __verify_keyval(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        if reKeyVal.search(value):
-          try:
-            self.__set_value_info(valueInfo, normalized=self.normalize(type, 
-                                value))
-          except:
-            self.__set_value_info(valueInfo, errorData = \
-              "invalid list of key-value pairs : [ %s ]" % value)
-        else:
-            msg = "No key value pairs found?"
-            self.__set_value_info(valueInfo, errorData=msg)
-
-        return valueInfo
-      
-    def __norm_keyval(self, value):
-        list = self.__norm_list(value)
-        keyValue = {}
-        for item in list:
-            (key, value) = reKeyVal.split(item)
-            #if not keyValue.has_key(key):
-            #    keyValue[key] = []
-            #keyValue[key].append(value)
-            keyValue[key] = value
-        return keyValue     
-
-    def __verify_list(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        self.__set_value_info(valueInfo, normalized=self.normalize(type,value))
-
-        return valueInfo
-      
-    def __norm_list(self, value):
-        norm = []
-        if reList.search(value):
-            norm = reList.split(value)
-        else:
-            norm = [value,]
-            
-        return norm
-
-    def __verify_file(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        if os.path.isfile(value):
-            self.__set_value_info(valueInfo, normalized=self.normalize(type,
-                                                                       value))
-        else:
-            self.__set_value_info(valueInfo)
-
-        return valueInfo
-      
-    def __norm_file(self, value):
-        return self.__normalizedPath(value)
-
-    def __verify_size(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        value = str(value)
-        if reSizeFormat.match(value):
-            numberPart = int(reSizeFormat.sub("\g<1>", value))
-            factorPart = reSizeFormat.sub("\g<2>", value)
-            try:
-                normalized = normalize_size(numberPart, factorPart)
-                self.__set_value_info(valueInfo,
-                    normalized=normalized)
-            except:
-                self.__set_value_info(valueInfo)
-        else:
-            try:
-                value = int(value)
-            except:
-                self.__set_value_info(valueInfo)
-            else:
-                if value >= 0:
-                    self.__set_value_info(valueInfo, normalized=value)
-                else:
-                    self.__set_value_info(valueInfo)
-
-        return valueInfo
-
-    def __norm_size(self, file):
-        norm = None
-        if reSizeFormat.match(value):
-            numberPart = int(reSizeFormat.sub("\g<1>", value))
-            factorPart = reSizeFormat.sub("\g<2>", value)
-            norm = normalize_size(numberPart, factorPart)            
-        else:
-            norm = int(value)
-            
-        return norm
-        
-        
-    def __verify_eaddress(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        emailList = reComma.split(value)
-
-        for emailAddress in emailList:
-            if reEmailAddress.match(emailAddress):
-                emailParts = reEmailDelimit.split(emailAddress)
-                try:
-                    socket.gethostbyname(emailParts[1])
-                    self.__set_value_info(valueInfo, normalized=self.normalize(
-                                          type, value))
-                except:
-                    errorString = "%s is invalid (domain lookup failed)" % \
-                        emailAddress
-                    self.__set_value_info(valueInfo, errorData=errorString)
-            else:
-                errorString = "%s is invalid" % emailAddress
-                self.__set_value_info(valueInfo, errorData=errorString)
-
-        return valueInfo
-
-    def __verify_tcp_port(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        try:
-            value = self.__norm_tcp_port(value)
-        except:
-            self.__set_value_info(valueInfo)
-        else:
-            if value in range(2, 65536):
-                self.__set_value_info(valueInfo, normalized=value)
-            else:
-                self.__set_value_info(valueInfo)
-
-        return valueInfo
-      
-    def __norm_tcp_port(self, value):
-        return int(value)
-
-    def __verify_http_version(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        if value in ('1.0', '1.1'):
-            self.__set_value_info(valueInfo, normalized=float(value))
-        else:
-            self.__set_value_info(valueInfo)
-
-        return valueInfo
-
-    def __verify_range(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        range = reDash.split(value)
-
-        try:
-            if len(range) > 1:
-                start = int(range[0])
-                end = int(range[1])
-            else:
-                start = int(range[0])
-                end = None
-        except:
-            self.__set_value_info(valueInfo)
-        else:
-            if end:
-                if end - start != 0:
-                    self.__set_value_info(valueInfo, normalized=(start, end))
-                else:
-                    self.__set_value_info(valueInfo)
-            else:
-                self.__set_value_info(valueInfo, normalized=(start,))
-
-        return valueInfo
-      
-    def __norm_range(self, value):
-        range = reDash.split(value)
-        if len(range) > 1:
-            start = int(range[0])
-            end = int(range[1])
-        else:
-            start = int(range[0])
-            end = None   
-            
-        return (start, end)     
-
-    def __verify_uri(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        _norm = None
-        try:
-            uriComponents = urlparse.urlparse(value)
-            if uriComponents[0] == '' or uriComponents[0] == 'file':
-              # if scheme is '' or 'file'
-              if not os.path.isfile(uriComponents[2]) and \
-                                         not os.path.isdir(uriComponents[2]):
-                  raise Exception("Invalid local URI")
-              else:
-                  self.__set_value_info(valueInfo, normalized=self.normalize(
-                                                                  type,value))
-            else:
-              # other schemes
-              # currently not checking anything. TODO
-              self.__set_value_info(valueInfo, normalized=self.normalize(
-                                                                   type,value))
-        except:
-            errorString = "%s is an invalid uri" % value
-            self.__set_value_info(valueInfo, errorData=errorString)
-
-        return valueInfo
-
-    def __norm_uri(self, value):
-       uriComponents = list(urlparse.urlparse(value))
-       if uriComponents[0] == '':
-          # if scheme is '''
-          return self.__normalizedPath(uriComponents[2])
-       elif uriComponents[0] == 'file':
-          # if scheme is 'file'
-          normalizedPath = self.__normalizedPath(uriComponents[2])
-          return urlparse.urlunsplit(uriComponents[0:1] + [normalizedPath] + uriComponents[3:])
-
-       # Not dealing with any other case right now
-       return value
-
-    def __verify_timestamp(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        if check_timestamp(value):
-            self.__set_value_info(valueInfo, normalized=self.normalize(type, 
-                                                                       value))
-        else:
-            self.__set_value_info(valueInfo)
-
-        return valueInfo
-
-    def __verify_hostname(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        try:
-            socket.gethostbyname(value)
-            self.__set_value_info(valueInfo, normalized=self.normalize(type, 
-                                                                       value))
-        except:
-            errorString = "%s is invalid (domain lookup failed)" % value
-            self.__set_value_info(valueInfo, errorData=errorString)
-
-        return valueInfo
-
-    def __verify_user_account(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        try:
-            pwd.getpwnam(value)
-        except:
-            errorString = "'%s' user account does not exist" % value
-            self.__set_value_info(valueInfo, errorData=errorString)
-        else:
-            self.__set_value_info(valueInfo, normalized=self.normalize(type, 
-                                                                       value))
-
-        return valueInfo
-
-    def __verify_user_group(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        try:
-            grp.getgrnam(value)
-        except:
-            errorString = "'%s' group does not exist" % value
-            self.__set_value_info(valueInfo, errorData=errorString)
-        else:
-            self.__set_value_info(valueInfo, normalized=self.normalize(type, 
-                                                                       value))
-
-        return valueInfo
-
-    def __verify_nothing(self, type, value):
-        valueInfo = self.__get_value_info()
-
-        self.__set_value_info(valueInfo, normalized=self.normalize(type, 
-                                                                   value))
-
-        return valueInfo
-
-    #--------------------------------------------------------------------------
-
-    def normalize(self, type, value):
-        try:
-          normFunc = getattr(self, "_typeValidator__norm_%s" % type)
-          return normFunc(value)
-        except AttributeError, A:
-          # this exception should occur only when we don't have corresponding normalize function
-          return value
-
-    def verify(self, type, value, allowNone=False):
-        """Verifies a value based on its type.
-
-           type      - supported configValidator type
-           value     - data to be validated
-           allowNone - don't freak out if None or '' is supplied
-
-           returns a valueInfo dictionary:
-
-            valueInfo = { 'isValid' : 1, 'normalized' : 5, 'errorData' : 0 }
-
-           where:
-
-            isValid    - true or false (0/1)
-            normalized - the normalized value
-            errorData  - if invalid an error string
-
-           supported types:
-
-            see top level"""
-
-        result = None
-        if allowNone:
-            if value == '' or value == None:
-                result = self.__verify_nothing(None, None)
-                result['normalized'] = None
-            else:
-                result = self.verifyFunctions[type](type, value)
-        else:
-            result = self.verifyFunctions[type](type, value)
-
-        return result
-
-    def is_valid_type(self, type):
-        """Returns true if type is valid."""
-
-        return types.has_key(type)
-
-    def type_info(self, type):
-        """Returns type info dictionary."""
-
-        dbInfo = dbTypes[types[type]['db']]
-        typeInfo = types[type].copy()
-        typeInfo['db'] = dbInfo
-
-        return typeInfo
-
-    def add(self, name, type, value):
-        """Adds a value and type by name to the configValidate object to be
-           verified using validate().
-
-           name  - name used to key values and access the results of the
-                   validation
-           type  - configValidator type
-           value - data to be verified"""
-
-        self.validateList.append({ 'name' : name,
-                                   'type' : type,
-                                   'value': value })
-
-    def validate(self, allowNone=False):
-        """Validates configValidate object populating validatedInfo with
-           valueInfo dictionaries for each value added to the object."""
-
-        for valItem in self.validateList:
-            valueInfo = self.verify(valItem['type'], valItem['value'],
-                allowNone)
-            if valueInfo:
-                valueInfo['name'] = valItem['name']
-                self.validatedInfo.append(valueInfo)
-            else:
-                raise Exception("\nMissing a return value: valueInfo\n%s" % \
-                    self.verifyFunctions[valItem['type']](valItem['value']))
-
-    def __normalizedPath(self, value):    
-        oldWd = os.getcwd()
-        if self.__originalDir:
-          os.chdir(self.__originalDir)
-        normPath = os.path.realpath(value)
-        os.chdir(oldWd)
-        return normPath
-
-
-class display:
-    def __init__(self):
-        self.displayFunctions = {}
-        self.__build_dispaly_functions()
-
-    def __build_dispaly_functions(self):
-        functions = {}
-        for function in dir(self):
-            functions[function] = 1
-
-        for type in types.keys():
-            # kinda bad, need to find out how to know the name of the class
-            #  I'm in.  But it works.
-            functionName = "_cisplay__display_%s" % type
-            if functions.has_key(functionName):
-                self.displayFunctions[type] = getattr(self, functionName)
-            else:
-                if type == '':
-                    self.displayFunctions[type] = self.__display_default
-                else:
-                    error = "Display function %s for type %s does not exist." \
-                        % (functionName, type)
-                    raise Exception(error)
-                    sys.exit(1)
-
-    def __display_default(self, value, style):
-        return value
-
-    def __display_generic_number(self, value):
-        displayNumber = ''
-        splitNum = string.split(str(value), sep='.')
-        numList = list(str(splitNum[0]))
-        numList.reverse()
-        length = len(numList)
-        counter = 0
-        for char in numList:
-            counter = counter + 1
-            if counter % 3 or counter == length:
-                displayNumber = "%s%s" % (char, displayNumber)
-            else:
-                displayNumber = ",%s%s" % (char, displayNumber)
-
-        if len(splitNum) > 1:
-            displayNumber = "%s.%s" % (displayNumber, splitNum[1])
-
-        return displayNumber
-
-    def __display_generic_mappable(self, map, value, style, plural=True):
-        displayValue = ''
-        length = len(str(value))
-        if length > 3:
-            for factorSet in map:
-                displayValue = float(value) / factorSet['factor']
-                if len(str(int(displayValue))) <= 3 or \
-                    factorSet['factor'] == map[-1]['factor']:
-                    displayValue = "%10.2f" % displayValue    
-                    if displayValue[-1] == '0':
-                        if displayValue > 1 and style != 'short' and plural:
-                            displayValue = "%s %ss" % (displayValue[:-1], 
-                                                      factorSet[style])
-                        else:
-                            displayValue = "%s %s" % (displayValue[:-1], 
-                                                      factorSet[style])
-                    else:
-                        if displayValue > 1 and style != 'short' and plural:
-                            displayValue = "%s %ss" % (displayValue, 
-                                                      factorSet[style])
-                        else:
-                            displayValue = "%s %s" % (displayValue, 
-                                                      factorSet[style])
-                    break
-
-        return displayValue
-
-    def __display_directory(self, value, style):
-        return self.__display_default(value, style)
-
-    def __display_address(self, value, style):
-        return self.__display_default(value, style)
-
-    def __display_ip_address(self, value, style):
-        return self.__display_default(value, style)
-
-    def __display_net_address(self, value, style):
-        return self.__display_default(value, style)
-
-    def __display_bool(self, value, style):
-        displayValue = value
-        
-        if not isinstance(displayValue, bool):
-            if re.match("^false|0|f|no$", value, 2):
-                displayValue=False
-            elif re.match("^true|1|t|yes$", value, 2):
-                displayValue=True
-
-        return displayValue
-
-    def __display_int(self, value, style):
-        return self.__display_generic_number(value)
-
-    def __display_float(self, value, style):
-        return self.__display_generic_number(value)
-
-    def __display_pos_int(self, value, style):
-        return self.__display_generic_number(value)
-
-    def __display_neg_int(self, value, style):
-        return self.__display_generic_number(value)
-
-    def __display_pos_num(self, value, style):
-        return self.__display_generic_number(value)
-
-    def __display_pos_float(self, value, style):
-        return self.__display_generic_number(value)
-
-    def __display_neg_float(self, value, style):
-        return self.__display_generic_number(value)
-
-    def __display_string(self, value, style):
-        return self.__display_default(value, style)
-
-    def __display_list(self, value, style):
-        value = value.rstrip()
-        return value.rstrip(',')
-
-    def __display_keyval(self, value, style):
-        value = value.rstrip()
-        return value.rstrip(',')
-
-    def __display_file(self, value, style):
-        return self.__display_default(value, style)
-
-    def __display_size(self, value, style):
-        return self.__display_generic_mappable(sizeMap, value, style)
-
-    def __display_freq(self, value, style):
-        return self.__display_generic_mappable(freqMap, value, style, False)
-
-    def __display_eaddress(self, value, style):
-        return self.__display_default(value, style)
-
-    def __display_tcp_port(self, value, style):
-        return self.__display_default(value, style)
-
-    def __display_http_version(self, value, style):
-        return self.__display_default(value, style)
-
-    def __display_range(self, value, style):
-        return self.__display_default(value, style)
-
-    def __display_hostname(self, value, style):
-        return self.__display_default(value, style)
-
-    def __display_user_account(self, value, style):
-        return self.__display_default(value, style)
-
-    def __display_user_group(self, value, style):
-        return self.__display_default(value, style)
-
-    def __display_timestamp(self, value, style):
-        return self.__display_default(value, style)
-
-    def display(self, type, value, style='short'):
-        displayValue = value
-        if value != None:
-            displayValue = self.displayFunctions[type](value, style)
-
-        return displayValue
-
-typeValidatorInstance = typeValidator()
-
-def is_valid_type(type):
-    """Returns true if type is valid."""
-
-    return typeValidatorInstance.is_valid_type(type)
-
-def type_info(type):
-    """Returns type info dictionary."""
-
-    return typeValidatorInstance.type_info(type)
-
-def verify(type, value, allowNone=False):
-    """Returns a normalized valueInfo dictionary."""
-
-    return typeValidatorInstance.verify(type, value, allowNone)
-
-def __normalize(map, val, factor):
-    normFactor = string.lower(factor)
-    normVal = float(val)
-    return int(normVal * map[normFactor])
-
-def normalize_size(size, factor):
-    """ Normalize a size to bytes.
-
-        size   - number of B, KB, MB, GB, TB, or PB
-        factor - size factor (case insensitive):
-                 b | bytes - bytes
-                 k | kb    - kilobytes
-                 m | mb    - megabytes
-                 g | gb    - gigabytes
-                 t | tb    - terabytes
-                 p | pb    - petabytes
-    """
-
-    return __normalize(sizeFactors, size, factor)
-
-def normalize_freq(freq, factor):
-    """ Normalize a frequency to hertz.
-
-        freq   - number of Hz, Khz, Mhz, Ghz, Thz, or Phz
-        factor - size factor (case insensitive):
-                 Hz  - Hertz
-                 Mhz - Megahertz
-                 Ghz - Gigahertz
-                 Thz - Terahertz
-                 Phz - Petahertz
-    """
-
-    return __normalize(freqFactors, freq, factor)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/util.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/util.py
deleted file mode 100644
index 3d5cb6fade..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/util.py
+++ /dev/null
@@ -1,309 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import errno, sys, os, traceback, stat, socket, re, warnings, signal
-
-from hodlib.Common.tcp import tcpSocket, tcpError 
-from hodlib.Common.threads import simpleCommand
-
-setUGV   = { 'S_ISUID' : 2, 'S_ISGID' : 1, 'S_ISVTX' : 0 }
-reEscapeSeq = r"\\(.)?"
-reEscapeSeq = re.compile(reEscapeSeq)
-
-HOD_INTERRUPTED_CODE = 127
-HOD_INTERRUPTED_MESG = "Hod interrupted. Cleaning up and exiting"
-TORQUE_USER_LIMITS_COMMENT_FIELD = "User-limits exceeded. " + \
-        "Requested:([0-9]*) Used:([0-9]*) MaxLimit:([0-9]*)"
-TORQUE_USER_LIMITS_EXCEEDED_MSG = "Requested number of nodes exceeded " + \
-                                  "maximum user limits. "
-
-class AlarmException(Exception):
-    def __init__(self, msg=''):
-        self.message = msg
-        Exception.__init__(self, msg)
-
-    def __repr__(self):
-        return self.message
-
-def isProcessRunning(pid):
-    '''Check if a process is running, by sending it a 0 signal, and checking for errors'''
-    # This method is documented in some email threads on the python mailing list.
-    # For e.g.: http://mail.python.org/pipermail/python-list/2002-May/144522.html
-    try:
-      os.kill(pid, 0)
-      return True
-    except OSError, err:
-      return err.errno == errno.EPERM
-
-def untar(file, targetDir):
-    status = False
-    command = 'tar -C %s -zxf %s' % (targetDir, file)
-    commandObj = simpleCommand('untar', command)
-    commandObj.start()
-    commandObj.wait()
-    commandObj.join()
-    if commandObj.exit_code() == 0:
-        status = True
-        
-    return status
-
-def tar(tarFile, tarDirectory, tarList):
-    currentDir = os.getcwd()
-    os.chdir(tarDirectory)
-    status = False
-    command = 'tar -czf %s ' % (tarFile)
-
-    for file in tarList:
-        command = "%s%s " % (command, file)
-    
-    commandObj = simpleCommand('tar', command)
-    commandObj.start()
-    commandObj.wait()
-    commandObj.join()
-    if commandObj.exit_code() == 0:
-        status = True
-    else:
-        status = commandObj.exit_status_string()
-    
-    os.chdir(currentDir)
-        
-    return status
-  
-def to_http_url(list):
-    """convert [hostname, port]  to a http url""" 
-    str = ''
-    str = "http://%s:%s" % (list[0], list[1])
-    
-    return str
-
-def get_exception_string():
-    (type, value, tb) = sys.exc_info()
-    exceptList = traceback.format_exception(type, value, tb)
-    exceptString = ''
-    for line in exceptList:
-        exceptString = "%s%s" % (exceptString, line)
-    
-    return exceptString
-  
-def get_exception_error_string():
-  (type, value, tb) = sys.exc_info()
-  if value:
-    exceptString = "%s %s" % (type, value)
-  else:
-    exceptString = type
-    
-  return exceptString
-
-def check_timestamp(timeStamp):
-    """ Checks the validity of a timeStamp.
-
-        timeStamp - (YYYY-MM-DD HH:MM:SS in UTC)
-
-        returns True or False
-    """
-    isValid = True
-
-    try:
-        timeStruct = time.strptime(timeStamp, "%Y-%m-%d %H:%M:%S")
-    except:
-        isValid = False
-
-    return isValid
-
-def sig_wrapper(sigNum, handler, *args):
-  if args:
-      handler(args)
-  else:
-      handler()
-      
-def get_perms(filename):
-    mode = stat.S_IMODE(os.stat(filename)[stat.ST_MODE])
-    permsString = ''
-    permSet = 0
-    place = 2
-    for who in "USR", "GRP", "OTH":
-        for what in "R", "W", "X":
-            if mode & getattr(stat,"S_I"+what+who):
-                permSet = permSet + 2**place
-            place = place - 1
-
-        permsString = "%s%s" % (permsString, permSet)
-        permSet = 0
-        place = 2
-
-    permSet = 0
-    for permFlag in setUGV.keys():
-        if mode & getattr(stat, permFlag):
-            permSet = permSet + 2**setUGV[permFlag]
-
-    permsString = "%s%s" % (permSet, permsString)
-
-    return permsString
-
-def local_fqdn():
-    """Return a system's true FQDN rather than any aliases, which are
-       occasionally returned by socket.gethostname."""
-
-    fqdn = None
-    me = os.uname()[1]
-    nameInfo=socket.gethostbyname_ex(me)
-    nameInfo[1].append(nameInfo[0])
-    for name in nameInfo[1]:
-        if name.count(".") and name.startswith(me):
-            fqdn = name
-    if fqdn == None:
-        fqdn = me
-    return(fqdn)
-  
-def need_to_allocate(allocated, config, command):
-    status = True
-    
-    if allocated.isSet():
-        status = False
-    elif re.search("\s*dfs.*$", command) and \
-        config['gridservice-hdfs']['external']:    
-        status = False
-    elif config['gridservice-mapred']['external']:    
-        status = False
-        
-    return status
-  
-def filter_warnings():
-    warnings.filterwarnings('ignore',
-        message=".*?'with' will become a reserved keyword.*")
-    
-def args_to_string(list):
-  """return a string argument space seperated"""
-  arg = ''
-  for item in list:
-    arg = "%s%s " % (arg, item)
-  return arg[:-1]
-
-def replace_escapes(object):
-  """ replace any escaped character. e.g \, with , \= with = and so on """
-  # here object is either a config object or a options object
-  for section in object._mySections:
-    for option in object._configDef[section].keys():
-      if object[section].has_key(option):
-        if object._configDef[section][option]['type'] == 'keyval':
-          keyValDict = object[section][option]
-          object[section][option] = {}
-          for (key,value) in keyValDict.iteritems():
-            match = reEscapeSeq.search(value)
-            if match:
-              value = reEscapeSeq.sub(r"\1", value)
-            object[section][option][key] = value
-
-def hadoopVersion(hadoopDir, java_home, log):
-  # Determine the version of hadoop being used by executing the 
-  # hadoop version command. Code earlier in idleTracker.py
-  hadoopVersion = { 'major' : None, 'minor' : None }
-  hadoopPath = os.path.join(hadoopDir, 'bin', 'hadoop')
-  cmd = "%s version" % hadoopPath
-  log.debug('Executing command %s to find hadoop version' % cmd)
-  env = os.environ
-  env['JAVA_HOME'] = java_home
-  hadoopVerCmd = simpleCommand('HadoopVersion', cmd, env)
-  hadoopVerCmd.start()
-  hadoopVerCmd.wait()
-  hadoopVerCmd.join()
-  if hadoopVerCmd.exit_code() == 0:
-    verLine = hadoopVerCmd.output()[0]
-    log.debug('Version from hadoop command: %s' % verLine)
-    hadoopVerRegExp = re.compile("Hadoop ([0-9]+)\.([0-9]+).*")
-    verMatch = hadoopVerRegExp.match(verLine)
-    if verMatch != None:
-      hadoopVersion['major'] = verMatch.group(1)
-      hadoopVersion['minor'] = verMatch.group(2)
-  return hadoopVersion
-
-
-def get_cluster_status(hdfsAddress, mapredAddress):
-  """Determine the status of the cluster based on socket availability
-     of HDFS and Map/Reduce."""
-  status = 0
-
-  mapredSocket = tcpSocket(mapredAddress)
-  try:
-    mapredSocket.open()
-    mapredSocket.close()
-  except tcpError:
-    status = 14
-
-  hdfsSocket = tcpSocket(hdfsAddress)
-  try:
-    hdfsSocket.open()
-    hdfsSocket.close()
-  except tcpError:
-    if status > 0:
-      status = 10
-    else:
-      status = 13
-
-  return status
-
-def parseEquals(list):
-  # takes in a list of keyval pairs e.g ['a=b','c=d'] and returns a
-  # dict e.g {'a'='b','c'='d'}. Used in GridService/{mapred.py/hdfs.py} and 
-  # HodRing/hodring.py. No need for specially treating escaped =. as in \=,
-  # since all keys are generated by hod and don't contain such anomalies
-  dict = {}
-  for elems in list:
-    splits = elems.split('=')
-    dict[splits[0]] = splits[1]
-  return dict
-
-def getMapredSystemDirectory(mrSysDirRoot, userid, jobid):
-  return os.path.join(mrSysDirRoot, userid, 'mapredsystem', jobid)
-
-class HodInterrupt:
-  def __init__(self):
-    self.HodInterruptFlag = False
-    self.log = None
-
-  def set_log(self, log):
-    self.log = log
-
-  def init_signals(self):
-
-    def sigStop(sigNum, handler):
-      sig_wrapper(sigNum, self.setFlag)
-
-    signal.signal(signal.SIGTERM, sigStop) # 15 : software termination signal
-    signal.signal(signal.SIGQUIT, sigStop) # 3  : Quit program
-    signal.signal(signal.SIGINT, sigStop)  # 2 ^C : Interrupt program
-
-    def sig_wrapper(sigNum, handler, *args):
-      self.log.critical("Caught signal %s." % sigNum )
-
-      if args:
-          handler(args)
-      else:
-          handler()
-
-  def setFlag(self, val = True):
-    self.HodInterruptFlag = val
-
-  def isSet(self):
-    return self.HodInterruptFlag
-
-class HodInterruptException(Exception):
-  def __init__(self, value = ""):
-    self.value = value
-    
-  def __str__(self):
-    return repr(self.value)
-
-hodInterrupt = HodInterrupt()
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/xmlrpc.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/xmlrpc.py
deleted file mode 100644
index bb7ef8b60c..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Common/xmlrpc.py
+++ /dev/null
@@ -1,57 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import xmlrpclib, time, random, signal
-from hodlib.Common.util import hodInterrupt, HodInterruptException
-
-class hodXRClient(xmlrpclib.ServerProxy):
-    def __init__(self, uri, transport=None, encoding=None, verbose=0,
-                 allow_none=0, installSignalHandlers=1, retryRequests=True, timeOut=15):
-        xmlrpclib.ServerProxy.__init__(self, uri, transport, encoding, verbose, 
-                                       allow_none)
-        self.__retryRequests = retryRequests
-        self.__timeOut = timeOut
-        if (installSignalHandlers!=0):
-          self.__set_alarm()
-    
-    def __set_alarm(self):
-        def alarm_handler(sigNum, sigHandler):
-            raise Exception("XML-RPC socket timeout.")
-          
-        signal.signal(signal.SIGALRM, alarm_handler)
-      
-    def __request(self, methodname, params):
-        response = None
-        retryWaitTime = 5 + random.randint(0, 5)
-        for i in range(0, 30):
-            signal.alarm(self.__timeOut)
-            try:
-                response = self._ServerProxy__request(methodname, params)
-                signal.alarm(0)
-                break
-            except Exception:
-                if self.__retryRequests:
-                  if hodInterrupt.isSet():
-                    raise HodInterruptException()
-                  time.sleep(retryWaitTime)
-                else:
-                  raise Exception("hodXRClientTimeout")
-
-        return response
-                
-    def __getattr__(self, name):
-        # magic method dispatcher
-        return xmlrpclib._Method(self.__request, name)
-                           
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/__init__.py
deleted file mode 100644
index 52138f2f8a..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/__init__.py
+++ /dev/null
@@ -1,18 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-from mapred import MapReduce, MapReduceExternal
-from hdfs import Hdfs, HdfsExternal
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/hdfs.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/hdfs.py
deleted file mode 100644
index 11efd116c3..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/hdfs.py
+++ /dev/null
@@ -1,310 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""define Hdfs as subclass of Service"""
-
-# -*- python -*-
-
-import os
-
-from service import *
-from hodlib.Hod.nodePool import *
-from hodlib.Common.desc import CommandDesc
-from hodlib.Common.util import get_exception_string, parseEquals
-
-class HdfsExternal(MasterSlave):
-  """dummy proxy to external HDFS instance"""
-
-  def __init__(self, serviceDesc, workDirs, version):
-    MasterSlave.__init__(self, serviceDesc, workDirs,None)
-    self.launchedMaster = True
-    self.masterInitialized = True
-    self.version = version
-    
-  def getMasterRequest(self):
-    return None
-
-  def getMasterCommands(self, serviceDict):
-    return []
-
-  def getAdminCommands(self, serviceDict):
-    return []
-
-  def getWorkerCommands(self, serviceDict):
-    return []
-
-  def getMasterAddrs(self):
-    attrs = self.serviceDesc.getfinalAttrs()
-    addr = attrs['fs.default.name']
-    return [addr]
-  
-  def setMasterParams(self, dict):
-   self.serviceDesc.dict['final-attrs']['fs.default.name'] = "%s:%s" % \
-     (dict['host'], dict['fs_port'])
-
-   if self.version < 16:
-    self.serviceDesc.dict['final-attrs']['dfs.info.port'] = \
-                                    str(self.serviceDesc.dict['info_port'])
-   else:
-     # After Hadoop-2185
-     self.serviceDesc.dict['final-attrs']['dfs.http.address'] = "%s:%s" % \
-       (dict['host'], dict['info_port'])
-
-  def getInfoAddrs(self):
-    attrs = self.serviceDesc.getfinalAttrs()
-    if self.version < 16:
-      addr = attrs['fs.default.name']
-      k,v = addr.split( ":")
-      infoaddr = k + ':' + attrs['dfs.info.port']
-    else:
-      # After Hadoop-2185
-      infoaddr = attrs['dfs.http.address']
-    return [infoaddr]
-
-class Hdfs(MasterSlave):
-
-  def __init__(self, serviceDesc, nodePool, required_node, version, \
-                                        format=True, upgrade=False,
-                                        workers_per_ring = 1):
-    MasterSlave.__init__(self, serviceDesc, nodePool, required_node)
-    self.masterNode = None
-    self.masterAddr = None
-    self.runAdminCommands = True
-    self.infoAddr = None
-    self._isLost = False
-    self.format = format
-    self.upgrade = upgrade
-    self.workers = []
-    self.version = version
-    self.workers_per_ring = workers_per_ring
-
-  def getMasterRequest(self):
-    req = NodeRequest(1, [], False)
-    return req
-
-  def getMasterCommands(self, serviceDict):
-
-    masterCommands = []
-    if self.format:
-      masterCommands.append(self._getNameNodeCommand(True))
-
-    if self.upgrade:
-      masterCommands.append(self._getNameNodeCommand(False, True))
-    else:
-      masterCommands.append(self._getNameNodeCommand(False))
-
-    return masterCommands
-
-  def getAdminCommands(self, serviceDict):
-
-    adminCommands = []
-    if self.upgrade and self.runAdminCommands:
-      adminCommands.append(self._getNameNodeAdminCommand('-safemode wait'))
-      adminCommands.append(self._getNameNodeAdminCommand('-finalizeUpgrade',
-                                                          True, True))
-
-    self.runAdminCommands = False
-    return adminCommands
-
-  def getWorkerCommands(self, serviceDict):
-    workerCmds = []
-    for id in range(1, self.workers_per_ring + 1):
-      workerCmds.append(self._getDataNodeCommand(str(id)))
-
-    return workerCmds
-
-  def setMasterNodes(self, list):
-    node = list[0]
-    self.masterNode = node
-    
-  def getMasterAddrs(self):
-    return [self.masterAddr]
-
-  def getInfoAddrs(self):
-    return [self.infoAddr]
-
-  def getWorkers(self):
-    return self.workers
-
-  def setMasterParams(self, list):
-    dict = self._parseEquals(list)
-    self.masterAddr = dict['fs.default.name']
-    k,v = self.masterAddr.split( ":")
-    self.masterNode = k
-    if self.version < 16:
-      self.infoAddr = self.masterNode + ':' + dict['dfs.info.port']
-    else:
-      # After Hadoop-2185
-      self.infoAddr = dict['dfs.http.address']
-   
-  def _parseEquals(self, list):
-    return parseEquals(list)
-  
-  def _setWorkDirs(self, workDirs, envs, attrs, parentDirs, subDir):
-    namedir = None
-    hadooptmpdir = None
-    datadir = []
-
-    for p in parentDirs:
-      workDirs.append(p)
-      workDirs.append(os.path.join(p, subDir))
-      dir = os.path.join(p, subDir, 'dfs-data')
-      datadir.append(dir)
-      if not hadooptmpdir:
-        # Not used currently, generating hadooptmpdir just in case
-        hadooptmpdir = os.path.join(p, subDir, 'hadoop-tmp')
-
-      if not namedir:
-        namedir = os.path.join(p, subDir, 'dfs-name')
-
-    workDirs.append(namedir)
-    workDirs.extend(datadir)
-
-    # FIXME!! use csv
-    attrs['dfs.name.dir'] = namedir
-    attrs['hadoop.tmp.dir'] = hadooptmpdir
-    attrs['dfs.data.dir'] = ','.join(datadir)
-    envs['HADOOP_ROOT_LOGGER'] = "INFO,DRFA"
-
-
-  def _getNameNodeCommand(self, format=False, upgrade=False):
-    sd = self.serviceDesc
-
-    parentDirs = self.workDirs
-    workDirs = []
-    attrs = sd.getfinalAttrs().copy()
-    envs = sd.getEnvs().copy()
-    
-    if 'fs.default.name' not in attrs:
-      attrs['fs.default.name'] = 'fillinhostport'
- 
-    if self.version < 16:
-     if 'dfs.info.port' not in attrs:
-      attrs['dfs.info.port'] = 'fillinport'
-    else:
-      # Addressing Hadoop-2185, added the following. Earlier versions don't
-      # care about this
-      if 'dfs.http.address' not in attrs:
-        attrs['dfs.http.address'] = 'fillinhostport'
-
-    self._setWorkDirs(workDirs, envs, attrs, parentDirs, 'hdfs-nn')
-
-    dict = { 'name' : 'namenode' }
-    dict['program'] = os.path.join('bin', 'hadoop')
-    argv = ['namenode']
-    if format:
-      argv.append('-format')
-    elif upgrade:
-      argv.append('-upgrade')
-    dict['argv'] = argv
-    dict['envs'] = envs
-    dict['pkgdirs'] = sd.getPkgDirs()
-    dict['workdirs'] = workDirs
-    dict['final-attrs'] = attrs
-    dict['attrs'] = sd.getAttrs()
-    if format:
-      dict['fg'] = 'true'
-      dict['stdin'] = 'Y'
-    cmd = CommandDesc(dict)
-    return cmd
-
-  def _getNameNodeAdminCommand(self, adminCommand, wait=True, ignoreFailures=False):
-    sd = self.serviceDesc
-
-    parentDirs = self.workDirs
-    workDirs = []
-    attrs = sd.getfinalAttrs().copy()
-    envs = sd.getEnvs().copy()
-    nn = self.masterAddr
-
-    if nn == None:
-      raise ValueError, "Can't get namenode address"
-
-    attrs['fs.default.name'] = nn
-
-    self._setWorkDirs(workDirs, envs, attrs, parentDirs, 'hdfs-nn')
-
-    dict = { 'name' : 'dfsadmin' }
-    dict['program'] = os.path.join('bin', 'hadoop')
-    argv = ['dfsadmin']
-    argv.append(adminCommand)
-    dict['argv'] = argv
-    dict['envs'] = envs
-    dict['pkgdirs'] = sd.getPkgDirs()
-    dict['workdirs'] = workDirs
-    dict['final-attrs'] = attrs
-    dict['attrs'] = sd.getAttrs()
-    if wait:
-      dict['fg'] = 'true'
-      dict['stdin'] = 'Y'
-    if ignoreFailures:
-      dict['ignorefailures'] = 'Y'
-    cmd = CommandDesc(dict)
-    return cmd
- 
-  def _getDataNodeCommand(self, id):
-
-    sd = self.serviceDesc
-
-    parentDirs = self.workDirs
-    workDirs = []
-    attrs = sd.getfinalAttrs().copy()
-    envs = sd.getEnvs().copy()
-    nn = self.masterAddr
-
-    if nn == None:
-      raise ValueError, "Can't get namenode address"
-
-    attrs['fs.default.name'] = nn
-
-    if self.version < 16:
-      if 'dfs.datanode.port' not in attrs:
-        attrs['dfs.datanode.port'] = 'fillinport'
-      if 'dfs.datanode.info.port' not in attrs:
-        attrs['dfs.datanode.info.port'] = 'fillinport'
-    else:
-      # Adding the following. Hadoop-2185
-      if 'dfs.datanode.address' not in attrs:
-        attrs['dfs.datanode.address'] = 'fillinhostport'
-      if 'dfs.datanode.http.address' not in attrs:
-        attrs['dfs.datanode.http.address'] = 'fillinhostport'
-    
-    if self.version >= 18:
-      # After HADOOP-3283
-      # TODO: check for major as well as minor versions
-      attrs['dfs.datanode.ipc.address'] = 'fillinhostport'
-                    
-    # unique workdirs in case of multiple datanodes per hodring
-    pd = []
-    for dir in parentDirs:
-      dir = dir + "-" + id
-      pd.append(dir)
-    parentDirs = pd
-    # end of unique workdirs
-
-    self._setWorkDirs(workDirs, envs, attrs, parentDirs, 'hdfs-dn')
-
-    dict = { 'name' : 'datanode' }
-    dict['program'] = os.path.join('bin', 'hadoop')
-    dict['argv'] = ['datanode']
-    dict['envs'] = envs
-    dict['pkgdirs'] = sd.getPkgDirs()
-    dict['workdirs'] = workDirs
-    dict['final-attrs'] = attrs
-    dict['attrs'] = sd.getAttrs()
- 
-    cmd = CommandDesc(dict)
-    return cmd
-
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/mapred.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/mapred.py
deleted file mode 100644
index 086f052fda..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/mapred.py
+++ /dev/null
@@ -1,272 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""define MapReduce as subclass of Service"""
-
-# -*- python -*-
-
-import os, copy, time
-
-from service import *
-from hodlib.Hod.nodePool import *
-from hodlib.Common.desc import CommandDesc
-from hodlib.Common.util import get_exception_string, parseEquals
-
-class MapReduceExternal(MasterSlave):
-  """dummy proxy to external MapReduce instance"""
-
-  def __init__(self, serviceDesc, workDirs, version):
-    MasterSlave.__init__(self, serviceDesc, workDirs,None)
-    self.launchedMaster = True
-    self.masterInitialized = True
-    self.version = version
-    
-  def getMasterRequest(self):
-    return None
-
-  def getMasterCommands(self, serviceDict):
-    return []
-
-  def getAdminCommands(self, serviceDict):
-    return []
-
-  def getWorkerCommands(self, serviceDict):
-    return []
-
-  def getMasterAddrs(self):
-    attrs = self.serviceDesc.getfinalAttrs()
-    addr = attrs['mapred.job.tracker']
-    return [addr]
-
-  def needsMore(self):
-    return 0
-
-  def needsLess(self):
-    return 0
-
-  def setMasterParams(self, dict):
-    self.serviceDesc['final-attrs']['mapred.job.tracker'] = "%s:%s" % (dict['host'], 
-      dict['tracker_port'])
-    
-    if self.version < 16:
-      self.serviceDesc.dict['final-attrs']['mapred.job.tracker.info.port'] = \
-                                      str(self.serviceDesc.dict['info_port'])
-    else:
-      # After Hadoop-2185
-      self.serviceDesc['final-attrs']['mapred.job.tracker.http.address'] = \
-        "%s:%s" %(dict['host'], dict['info_port'])
-
-  def getInfoAddrs(self):
-    attrs = self.serviceDesc.getfinalAttrs()
-    if self.version < 16:
-      addr = attrs['mapred.job.tracker']
-      k,v = addr.split( ":")
-      infoaddr = k + ':' + attrs['mapred.job.tracker.info.port']
-    else:
-      # After Hadoop-2185
-      # Note: earlier,we never respected mapred.job.tracker.http.address
-      infoaddr = attrs['mapred.job.tracker.http.address']
-    return [infoaddr]
-  
-class MapReduce(MasterSlave):
-
-  def __init__(self, serviceDesc, workDirs,required_node, version,
-                workers_per_ring = 1):
-    MasterSlave.__init__(self, serviceDesc, workDirs,required_node)
-
-    self.masterNode = None
-    self.masterAddr = None
-    self.infoAddr = None
-    self.workers = []
-    self.required_node = required_node
-    self.version = version
-    self.workers_per_ring = workers_per_ring
-
-  def isLaunchable(self, serviceDict):
-    hdfs = serviceDict['hdfs']
-    if (hdfs.isMasterInitialized()):
-      return True
-    return False
-  
-  def getMasterRequest(self):
-    req = NodeRequest(1, [], False)
-    return req
-
-  def getMasterCommands(self, serviceDict):
-
-    hdfs = serviceDict['hdfs']
-
-    cmdDesc = self._getJobTrackerCommand(hdfs)
-    return [cmdDesc]
-
-  def getAdminCommands(self, serviceDict):
-    return []
-
-  def getWorkerCommands(self, serviceDict):
-
-    hdfs = serviceDict['hdfs']
-
-    workerCmds = []
-    for id in range(1, self.workers_per_ring + 1):
-      workerCmds.append(self._getTaskTrackerCommand(str(id), hdfs))
-      
-    return workerCmds
-
-  def setMasterNodes(self, list):
-    node = list[0]
-    self.masterNode = node
-
-  def getMasterAddrs(self):
-    return [self.masterAddr]
-
-  def getInfoAddrs(self):
-    return [self.infoAddr]
-
-  def getWorkers(self):
-    return self.workers
-
-  def requiredNode(self):
-    return self.required_host
-
-  def setMasterParams(self, list):
-    dict = self._parseEquals(list)
-    self.masterAddr = dict['mapred.job.tracker']
-    k,v = self.masterAddr.split(":")
-    self.masterNode = k
-    if self.version < 16:
-      self.infoAddr = self.masterNode + ':' + dict['mapred.job.tracker.info.port']
-    else:
-      # After Hadoop-2185
-      self.infoAddr = dict['mapred.job.tracker.http.address']
-  
-  def _parseEquals(self, list):
-    return parseEquals(list)
-
-  def _setWorkDirs(self, workDirs, envs, attrs, parentDirs, subDir):
-    local = []
-    system = None
-    temp = None
-    hadooptmpdir = None
-    dfsclient = []
-    
-    for p in parentDirs:
-      workDirs.append(p)
-      workDirs.append(os.path.join(p, subDir))
-      dir = os.path.join(p, subDir, 'mapred-local')
-      local.append(dir)
-      if not system:
-        system = os.path.join(p, subDir, 'mapred-system')
-      if not temp:
-        temp = os.path.join(p, subDir, 'mapred-temp')
-      if not hadooptmpdir:
-        # Not used currently, generating hadooptmpdir just in case
-        hadooptmpdir = os.path.join(p, subDir, 'hadoop-tmp')
-      dfsclientdir = os.path.join(p, subDir, 'dfs-client')
-      dfsclient.append(dfsclientdir)
-      workDirs.append(dfsclientdir)
-    # FIXME!! use csv
-    attrs['mapred.local.dir'] = ','.join(local)
-    attrs['mapred.system.dir'] = 'fillindir'
-    attrs['mapred.temp.dir'] = temp
-    attrs['hadoop.tmp.dir'] = hadooptmpdir
-
-
-    envs['HADOOP_ROOT_LOGGER'] = "INFO,DRFA"
-
-
-  def _getJobTrackerCommand(self, hdfs):
-    sd = self.serviceDesc
-
-    parentDirs = self.workDirs
-    workDirs = []
-    attrs = sd.getfinalAttrs().copy()
-    envs = sd.getEnvs().copy()
-
-    if 'mapred.job.tracker' not in attrs:
-      attrs['mapred.job.tracker'] = 'fillinhostport'
-
-    if self.version < 16:
-      if 'mapred.job.tracker.info.port' not in attrs:
-        attrs['mapred.job.tracker.info.port'] = 'fillinport'
-    else:
-      # Addressing Hadoop-2185,
-      if 'mapred.job.tracker.http.address' not in attrs:
-        attrs['mapred.job.tracker.http.address'] = 'fillinhostport'
-
-    attrs['fs.default.name'] = hdfs.getMasterAddrs()[0]
-
-    self._setWorkDirs(workDirs, envs, attrs, parentDirs, 'mapred-jt')
-
-    dict = { 'name' : 'jobtracker' }
-    dict['version'] = self.version
-    dict['program'] = os.path.join('bin', 'hadoop')
-    dict['argv'] = ['jobtracker']
-    dict['envs'] = envs
-    dict['pkgdirs'] = sd.getPkgDirs()
-    dict['workdirs'] = workDirs
-    dict['final-attrs'] = attrs
-    dict['attrs'] = sd.getAttrs()
-    cmd = CommandDesc(dict)
-    return cmd
-
-  def _getTaskTrackerCommand(self, id, hdfs):
-
-    sd = self.serviceDesc
-
-    parentDirs = self.workDirs
-    workDirs = []
-    attrs = sd.getfinalAttrs().copy()
-    envs = sd.getEnvs().copy()
-    jt = self.masterAddr
-
-    if jt == None:
-      raise ValueError, "Can't get job tracker address"
-
-    attrs['mapred.job.tracker'] = jt
-    attrs['fs.default.name'] = hdfs.getMasterAddrs()[0]
-
-    if self.version < 16:
-      if 'tasktracker.http.port' not in attrs:
-        attrs['tasktracker.http.port'] = 'fillinport'
-      # earlier to 16, tasktrackers always took ephemeral port 0 for
-      # tasktracker.report.bindAddress
-    else:
-      # Adding the following. Hadoop-2185
-      if 'mapred.task.tracker.report.address' not in attrs:
-        attrs['mapred.task.tracker.report.address'] = 'fillinhostport'
-      if 'mapred.task.tracker.http.address' not in attrs:
-        attrs['mapred.task.tracker.http.address'] = 'fillinhostport'
-
-    # unique parentDirs in case of multiple tasktrackers per hodring
-    pd = []
-    for dir in parentDirs:
-      dir = dir + "-" + id
-      pd.append(dir)
-    parentDirs = pd
-    # end of unique workdirs
-
-    self._setWorkDirs(workDirs, envs, attrs, parentDirs, 'mapred-tt')
-
-    dict = { 'name' : 'tasktracker' }
-    dict['program'] = os.path.join('bin', 'hadoop')
-    dict['argv'] = ['tasktracker']
-    dict['envs'] = envs
-    dict['pkgdirs'] = sd.getPkgDirs()
-    dict['workdirs'] = workDirs
-    dict['final-attrs'] = attrs
-    dict['attrs'] = sd.getAttrs()
-    cmd = CommandDesc(dict)
-    return cmd
-
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/service.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/service.py
deleted file mode 100644
index f0c7f5cbbf..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/GridServices/service.py
+++ /dev/null
@@ -1,266 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""defines Service as abstract interface"""
-
-# -*- python -*-
-import random, socket
-
-class Service:
-  """ the service base class that all the 
-  other services inherit from. """
-  def __init__(self, serviceDesc, workDirs):
-    self.serviceDesc = serviceDesc
-    self.workDirs = workDirs
-
-  def getName(self):
-    return self.serviceDesc.getName()
-
-  def getInfoAddrs(self):
-    """Return a list of addresses that provide 
-    information about the servie"""
-    return []
-
-  def isLost(self):
-    """True if the service is down"""
-    raise NotImplementedError
-
-  def addNodes(self, nodeList):
-    """add nodeSet"""
-    raise NotImplementedError
-
-  def removeNodes(self, nodeList):
-    """remove a nodeset"""
-    raise NotImplementedError
-
-  def getWorkers(self):
-     raise NotImplementedError
-
-  def needsMore(self):
-    """return number of nodes the service wants to add"""
-    raise NotImplementedError
-
-  def needsLess(self):
-    """return number of nodes the service wants to remove"""
-    raise NotImplementedError
-
-class MasterSlave(Service):
-  """ the base class for a master slave 
-  service architecture. """
-  def __init__(self, serviceDesc, workDirs,requiredNode):
-    Service.__init__(self, serviceDesc, workDirs)
-    self.launchedMaster = False
-    self.masterInitialized = False
-    self.masterAddress = 'none'
-    self.requiredNode = requiredNode
-    self.failedMsg = None
-    self.masterFailureCount = 0
-
-  def getRequiredNode(self):
-    return self.requiredNode
- 
-  def getMasterRequest(self):
-    """ the number of master you need
-    to run for this service. """
-    raise NotImplementedError
-  
-  def isLaunchable(self, serviceDict):
-    """ if your service does not depend on
-    other services. is set to true by default. """
-    return True
-  
-  def getMasterCommands(self, serviceDict):
-    """ a list of master commands you 
-    want to run for this service. """
-    raise NotImplementedError
-
-  def getAdminCommands(self, serviceDict):
-    """ a list of admin commands you 
-    want to run for this service. """
-    raise NotImplementedError
-
-  def getWorkerCommands(self, serviceDict):
-    """ a list of worker commands you want to 
-    run for this service. """
-    raise NotImplementedError
-
-  def setMasterNodes(self, list):
-    """ set the status of master nodes 
-    after they start running on a node cluster. """
-    raise NotImplementedError
-
-  def addNodes(self, list):
-    """ add nodes to a service. Not implemented
-    currently. """
-    raise NotImplementedError
-
-  def getMasterAddrs(self):
-    """ return the addresses of master. the 
-    hostname:port to which worker nodes should
-    connect. """
-    raise NotImplementedError
-  
-  def setMasterParams(self, list):
-    """ set the various master params 
-    depending on what each hodring set 
-    the master params to. """
-    raise NotImplementedError
-
-  def setlaunchedMaster(self):
-    """ set the status of master launched
-    to true. """
-    self.launchedMaster = True
-
-  def isMasterLaunched(self):
-    """ return if a master has been launched
-    for the service or not. """
-    return self.launchedMaster
-
-  def isMasterInitialized(self):
-    """ return if a master if launched 
-    has been initialized or not. """
-    return self.masterInitialized
-
-  def setMasterInitialized(self):
-    """ set the master initialized to
-    true. """
-    self.masterInitialized = True
-    # Reset failure related variables, as master is initialized successfully.
-    self.masterFailureCount = 0
-    self.failedMsg = None
-
-  def getMasterAddress(self):
-    """ it needs to change to reflect 
-    more that one masters. Currently it 
-    keeps a knowledge of where the master 
-    was launched and to keep track if it was actually
-    up or not. """
-    return self.masterAddress
-
-  def setMasterAddress(self, addr):
-    self.masterAddress = addr
-
-  def isExternal(self):
-    return self.serviceDesc.isExternal()
-
-  def setMasterFailed(self, err):
-    """Sets variables related to Master failure"""
-    self.masterFailureCount += 1
-    self.failedMsg = err
-    # When command is sent to HodRings, this would have been set to True.
-    # Reset it to reflect the correct status.
-    self.launchedMaster = False
-
-  def getMasterFailed(self):
-    return self.failedMsg
- 
-  def getMasterFailureCount(self):
-    return self.masterFailureCount
- 
-class NodeRequest:
-  """ A class to define 
-  a node request. """
-  def __init__(self, n, required = [], preferred = [], isPreemptee = True):
-    self.numNodes = n
-    self.preferred = preferred
-    self.isPreemptee = isPreemptee
-    self.required = required
-
-  def setNumNodes(self, n):
-    self.numNodes = n
-
-  def setPreferredList(self, list):
-    self.preferred = list
-
-  def setIsPreemptee(self, flag):
-    self.isPreemptee = flag
-
-
-class ServiceUtil:
-  """ this class should be moved out of 
-  service.py to a util file"""
-  localPortUsed = {}
-    
-  def getUniqRandomPort(h=None, low=50000, high=60000, retry=900, log=None):
-    """This allocates a randome free port between low and high"""
-    # We use a default value of 900 retries, which takes an agreeable
-    # time limit of ~ 6.2 seconds to check 900 ports, in the worse case
-    # of no available port in those 900.
-
-    while retry > 0:
-      n = random.randint(low, high)
-      if n in ServiceUtil.localPortUsed:
-        continue
-      s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-      if not h:
-        h = socket.gethostname()
-      avail = False
-      if log: log.debug("Trying to see if port %s is available"% n)
-      try:
-        s.bind((h, n))
-        if log: log.debug("Yes, port %s is available" % n)
-        avail = True
-      except socket.error,e:
-        if log: log.debug("Could not bind to the port %s. Reason %s" % (n,e))
-        retry -= 1
-        pass
-      # The earlier code that used to be here had syntax errors. The code path
-      # couldn't be followd anytime, so the error remained uncaught.
-      # This time I stumbled upon the error
-      s.close()
-
-      if avail:
-        ServiceUtil.localPortUsed[n] = True
-        return n
-    raise ValueError, "Can't find unique local port between %d and %d" % (low, high)
-  
-  getUniqRandomPort = staticmethod(getUniqRandomPort)
-  
-  def getUniqPort(h=None, low=40000, high=60000, retry=900, log=None):
-    """get unique port on a host that can be used by service
-    This and its consumer code should disappear when master
-    nodes get allocatet by nodepool"""
-
-    # We use a default value of 900 retries, which takes an agreeable
-    # time limit of ~ 6.2 seconds to check 900 ports, in the worse case
-    # of no available port in those 900.
-
-    n  = low
-    while retry > 0:
-      n = n + 1
-      if n in ServiceUtil.localPortUsed:
-        continue
-      s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-      if not h:
-        h = socket.gethostname()
-      avail = False
-      if log: log.debug("Trying to see if port %s is available"% n)
-      try:
-        s.bind((h, n))
-        if log: log.debug("Yes, port %s is available" % n)
-        avail = True
-      except socket.error,e:
-        if log: log.debug("Could not bind to the port %s. Reason %s" % (n,e))
-        retry -= 1
-        pass
-      s.close()
-
-      if avail:
-        ServiceUtil.localPortUsed[n] = True
-        return n
-
-    raise ValueError, "Can't find unique local port between %d and %d" % (low, high)
-
-  getUniqPort = staticmethod(getUniqPort)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/__init__.py
deleted file mode 100644
index 12c2f1e1da..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hadoop.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hadoop.py
deleted file mode 100644
index 616d775803..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hadoop.py
+++ /dev/null
@@ -1,747 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""define WorkLoad as abstract interface for user job"""
-# -*- python -*-
-
-import os, time, sys, shutil, exceptions, re, threading, signal, urllib, pprint, math
-
-from HTMLParser import HTMLParser
-
-import xml.dom.minidom
-import xml.dom.pulldom
-from xml.dom import getDOMImplementation
-
-from hodlib.Common.util import *
-from hodlib.Common.xmlrpc import hodXRClient
-from hodlib.Common.miniHTMLParser import miniHTMLParser
-from hodlib.Common.nodepoolutil import NodePoolUtil
-from hodlib.Common.tcp import tcpError, tcpSocket
-
-reCommandDelimeterString = r"(?<!\\);"
-reCommandDelimeter = re.compile(reCommandDelimeterString)
-
-class hadoopConfig:
-  def __create_xml_element(self, doc, name, value, description, final = False):
-    prop = doc.createElement("property")
-    nameP = doc.createElement("name")
-    string = doc.createTextNode(name)
-    nameP.appendChild(string)
-    valueP = doc.createElement("value")
-    string = doc.createTextNode(value)
-    valueP.appendChild(string)
-    if final:
-      finalP = doc.createElement("final")
-      string = doc.createTextNode("true")
-      finalP.appendChild(string)
-    desc = doc.createElement("description")
-    string = doc.createTextNode(description)
-    desc.appendChild(string)
-    prop.appendChild(nameP)
-    prop.appendChild(valueP)
-    if final:
-      prop.appendChild(finalP)
-    prop.appendChild(desc)
-    
-    return prop
-
-  def gen_site_conf(self, confDir, tempDir, numNodes, hdfsAddr, mrSysDir,\
-             mapredAddr=None, clientParams=None, serverParams=None,\
-             finalServerParams=None, clusterFactor=None):
-    if not mapredAddr:
-      mapredAddr = "dummy:8181"
-    
-    implementation = getDOMImplementation()
-    doc = implementation.createDocument('', 'configuration', None)
-    comment = doc.createComment(
-      "This is an auto generated hadoop-site.xml, do not modify")
-    topElement = doc.documentElement
-    topElement.appendChild(comment)
-
-    description = {}
-    paramsDict = {  'mapred.job.tracker'    : mapredAddr , \
-                    'fs.default.name'       : "hdfs://" + hdfsAddr, \
-                    'hadoop.tmp.dir'        : tempDir, \
-                 }
-
-    paramsDict['mapred.system.dir'] = mrSysDir
-    
-    # mapred-default.xml is no longer used now.
-    numred = int(math.floor(clusterFactor * (int(numNodes) - 1)))
-    paramsDict['mapred.reduce.tasks'] = str(numred)
-    # end
-
-    # for all the above vars generated, set the description
-    for k, v in paramsDict.iteritems():
-      description[k] = 'Hod generated parameter'
-
-    # finalservelParams
-    if finalServerParams:
-      for k, v in finalServerParams.iteritems():
-        if not description.has_key(k):
-          description[k] = "final server parameter"
-          paramsDict[k] = v
-
-    # servelParams
-    if serverParams:
-      for k, v in serverParams.iteritems():
-        if not description.has_key(k):
-          # if no final value for same param is mentioned
-          description[k] = "server parameter"
-          paramsDict[k] = v
-
-    # clientParams
-    if clientParams:
-      for k, v in clientParams.iteritems():
-        if not description.has_key(k) or description[k] == "server parameter":
-          # Just add, if no final value for same param is mentioned.
-          # Replace even if server param is mentioned for same config variable
-          description[k] = "client-side parameter"
-          paramsDict[k] = v
-    
-    # generate the xml elements
-    for k,v in paramsDict.iteritems():
-      if ( description[k] == "final server parameter" or \
-                             description[k] == "Hod generated parameter" ): 
-         final = True
-      else: final = False
-      prop = self.__create_xml_element(doc, k, v, description[k], final)
-      topElement.appendChild(prop)
-
-    siteName = os.path.join(confDir, "hadoop-site.xml")
-    sitefile = file(siteName, 'w')
-    print >> sitefile, topElement.toxml()
-    sitefile.close()
-
-class hadoopCluster:
-  def __init__(self, cfg, log):
-    self.__cfg = cfg
-    self.__log = log
-    self.__changedClusterParams = []
-    
-    self.__hostname = local_fqdn()    
-    self.__svcrgyClient = None
-    self.__nodePool = NodePoolUtil.getNodePool(self.__cfg['nodepooldesc'], 
-                                               self.__cfg, self.__log)        
-    self.__hadoopCfg = hadoopConfig()
-    self.jobId = None
-    self.mapredInfo = None
-    self.hdfsInfo = None
-    self.ringmasterXRS = None
-
-  def __get_svcrgy_client(self):
-    svcrgyUrl = to_http_url(self.__cfg['hod']['xrs-address'])
-    return hodXRClient(svcrgyUrl)
-
-  def __get_service_status(self):
-    serviceData = self.__get_service_data()
-    
-    status = True
-    hdfs = False
-    mapred = False
-    
-    for host in serviceData.keys():
-      for item in serviceData[host]:
-        service = item.keys()
-        if service[0] == 'hdfs.grid' and \
-          self.__cfg['gridservice-hdfs']['external'] == False:
-          hdfs = True
-        elif service[0] == 'mapred.grid':
-          mapred = True
-    
-    if not mapred:
-      status = "mapred"
-    
-    if not hdfs and self.__cfg['gridservice-hdfs']['external'] == False:
-      if status != True:
-        status = "mapred and hdfs"
-      else:
-        status = "hdfs"
-      
-    return status
-  
-  def __get_service_data(self):
-    registry = to_http_url(self.__cfg['hod']['xrs-address'])
-    serviceData = self.__svcrgyClient.getServiceInfo(
-      self.__cfg['hod']['userid'], self.__setup.np.getNodePoolId())
-    
-    return serviceData
-  
-  def __check_job_status(self):
-    failureCount = 0
-    status = False
-    state = 'Q'
-    userLimitsFirstFlag = True
-
-    while (state=='Q') or (state==False):
-      if hodInterrupt.isSet():
-        raise HodInterruptException()
-
-      jobInfo = self.__nodePool.getJobInfo()
-      state = jobInfo['job_state']
-      self.__log.debug('job state %s' % state)
-      if state == False:
-        failureCount += 1
-        if (failureCount >= self.__cfg['hod']['job-status-query-failure-retries']):
-          self.__log.debug('Number of retries reached max limit while querying job status')
-          break
-        time.sleep(self.__cfg['hod']['job-command-failure-interval'])
-      elif state!='Q':
-        break
-      else:
-        self.__log.debug('querying for job status after job-status-query-interval')
-        time.sleep(self.__cfg['hod']['job-status-query-interval'])
-
-      if self.__cfg['hod'].has_key('job-feasibility-attr') and \
-                      self.__cfg['hod']['job-feasibility-attr']:
-        (status, msg) = self.__isJobFeasible()
-        if status == "Never":
-          self.__log.critical(TORQUE_USER_LIMITS_EXCEEDED_MSG + msg + \
-                "This cluster cannot be allocated now.")
-          return -1
-        elif status == False:
-          if userLimitsFirstFlag:
-            self.__log.critical(TORQUE_USER_LIMITS_EXCEEDED_MSG + msg + \
-                "This cluster allocation will succeed only after other " + \
-                "clusters are deallocated.")
-            userLimitsFirstFlag = False
-   
-    if state and state != 'C':
-      status = True
-    
-    return status
-
-  def __isJobFeasible(self):
-    return self.__nodePool.isJobFeasible()
-  
-  def __get_ringmaster_client(self):
-    ringmasterXRS = None
-   
-    ringList = self.__svcrgyClient.getServiceInfo(
-      self.__cfg['ringmaster']['userid'], self.__nodePool.getServiceId(), 
-      'ringmaster', 'hod')
-
-    if ringList and len(ringList):
-      if isinstance(ringList, list):
-        ringmasterXRS = ringList[0]['xrs']
-    else:    
-      count = 0
-      waitTime = self.__cfg['hod']['allocate-wait-time']
-  
-      while count < waitTime:
-        if hodInterrupt.isSet():
-          raise HodInterruptException()
-
-        ringList = self.__svcrgyClient.getServiceInfo(
-          self.__cfg['ringmaster']['userid'], self.__nodePool.getServiceId(), 
-          'ringmaster', 
-          'hod')
-        
-        if ringList and len(ringList):
-          if isinstance(ringList, list):        
-            ringmasterXRS = ringList[0]['xrs']
-        
-        if ringmasterXRS is not None:
-          break
-        else:
-          time.sleep(1)
-          count = count + 1
-          # check to see if the job exited by any chance in that time:
-          if (count % self.__cfg['hod']['job-status-query-interval'] == 0):
-            if not self.__check_job_status():
-              break
-    return ringmasterXRS
- 
-  def __init_hadoop_service(self, serviceName, xmlrpcClient):
-    status = True
-    serviceAddress = None
-    serviceInfo = None
- 
-    for i in range(0, 250): 
-      try:
-        if hodInterrupt.isSet():
-            raise HodInterruptException()
-
-        serviceAddress = xmlrpcClient.getServiceAddr(serviceName)
-        if serviceAddress:
-          if serviceAddress == 'not found':
-            time.sleep(1)
-          # check to see if the job exited by any chance in that time:
-            if ((i+1) % self.__cfg['hod']['job-status-query-interval'] == 0):
-              if not self.__check_job_status():
-                break
-          else:
-            serviceInfo = xmlrpcClient.getURLs(serviceName)           
-            break 
-      except HodInterruptException,h :
-        raise h
-      except:
-        self.__log.critical("'%s': ringmaster xmlrpc error." % serviceName)
-        self.__log.debug(get_exception_string())
-        status = False
-        break
-    
-    if serviceAddress == 'not found' or not serviceAddress:
-      self.__log.critical("Failed to retrieve '%s' service address." % 
-                          serviceName)
-      status = False
-    elif serviceAddress.startswith("Error: "):
-      errs = serviceAddress[len("Error: "):]
-      self.__log.critical("Cluster could not be allocated because of the following errors.\n%s" % \
-                             errs)
-      status = False
-    else:
-      try:
-        self.__svcrgyClient.registerService(self.__cfg['hodring']['userid'], 
-                                            self.jobId, self.__hostname, 
-                                            serviceName, 'grid', serviceInfo)
-        
-      except HodInterruptException, h:
-        raise h
-      except:
-        self.__log.critical("'%s': registry xmlrpc error." % serviceName)    
-        self.__log.debug(get_exception_string())
-        status = False
-        
-    return status, serviceAddress, serviceInfo
-
-  def __collect_jobtracker_ui(self, dir):
-
-     link = self.mapredInfo + "/jobtracker.jsp"
-     parser = miniHTMLParser()
-     parser.setBaseUrl(self.mapredInfo)
-     node_cache = {}
-
-     self.__log.debug("collect_jobtracker_ui seeded with " + link)
-
-     def alarm_handler(number, stack):
-         raise AlarmException("timeout")
-       
-     signal.signal(signal.SIGALRM, alarm_handler)
-
-     input = None
-     while link:
-       self.__log.debug("link: %s" % link)
-       # taskstats.jsp,taskdetails.jsp not included since too many to collect
-       if re.search(
-         "jobfailures\.jsp|jobtracker\.jsp|jobdetails\.jsp|jobtasks\.jsp", 
-         link):
-
-         for i in range(1,5):
-           if hodInterrupt.isSet():
-             raise HodInterruptException()
-           try:
-             input = urllib.urlopen(link)
-             break
-           except:
-             self.__log.debug(get_exception_string())
-             time.sleep(1)
-  
-         if input:
-           out = None
-    
-           self.__log.debug("collecting " + link + "...")
-           filename = re.sub(self.mapredInfo, "", link)
-           filename = dir + "/"  + filename
-           filename = re.sub("http://","", filename)
-           filename = re.sub("[\?\&=:]","_",filename)
-           filename = filename + ".html"
-    
-           try:
-             tempdir, tail = os.path.split(filename)
-             if not os.path.exists(tempdir):
-               os.makedirs(tempdir)
-           except:
-             self.__log.debug(get_exception_string())
-    
-           out = open(filename, 'w')
-           
-           bufSz = 8192
-           
-           signal.alarm(10)
-           
-           try:
-             self.__log.debug("Starting to grab: %s" % link)
-             buf = input.read(bufSz)
-      
-             while len(buf) > 0:
-               # Feed the file into the HTML parser
-               parser.feed(buf)
-        
-         # Re-write the hrefs in the file
-               p = re.compile("\?(.+?)=(.+?)")
-               buf = p.sub(r"_\1_\2",buf)
-               p= re.compile("&(.+?)=(.+?)")
-               buf = p.sub(r"_\1_\2",buf)
-               p = re.compile("http://(.+?):(\d+)?")
-               buf = p.sub(r"\1_\2/",buf)
-               buf = re.sub("href=\"/","href=\"",buf)
-               p = re.compile("href=\"(.+?)\"")
-               buf = p.sub(r"href=\1.html",buf)
- 
-               out.write(buf)
-               buf = input.read(bufSz)
-      
-             signal.alarm(0)
-             input.close()
-             if out:
-               out.close()
-               
-             self.__log.debug("Finished grabbing: %s" % link)
-           except AlarmException:
-             if hodInterrupt.isSet():
-               raise HodInterruptException()
-             if out: out.close()
-             if input: input.close()
-             
-             self.__log.debug("Failed to retrieve: %s" % link)
-         else:
-           self.__log.debug("Failed to retrieve: %s" % link)
-         
-       # Get the next link in level traversal order
-       link = parser.getNextLink()
-
-     parser.close()
-     
-  def check_cluster(self, clusterInfo):
-    status = 0
-
-    if 'mapred' in clusterInfo:
-      mapredAddress = clusterInfo['mapred'][7:]
-      hdfsAddress = clusterInfo['hdfs'][7:]
-      status = get_cluster_status(hdfsAddress, mapredAddress)
-      if status == 0:
-        status = 12
-    else:
-      status = 15
-
-    return status
-
-  def is_cluster_deallocated(self, jobId):
-    """Returns True if the JobId that represents this cluster
-       is in the Completed or exiting state."""
-    jobInfo = self.__nodePool.getJobInfo(jobId)
-    state = None
-    if jobInfo is not None and jobInfo.has_key('job_state'):
-      state = jobInfo['job_state']
-    return ((state == 'C') or (state == 'E'))
-
-  def cleanup(self):
-    if self.__nodePool: self.__nodePool.finalize()     
-
-  def get_job_id(self):
-    return self.jobId
-
-  def delete_job(self, jobId):
-    '''Delete a job given it's ID'''
-    ret = 0
-    if self.__nodePool: 
-      ret = self.__nodePool.deleteJob(jobId)
-    else:
-      raise Exception("Invalid state: Node pool is not initialized to delete the given job.")
-    return ret
-         
-  def is_valid_account(self):
-    """Verify if the account being used to submit the job is a valid account.
-       This code looks for a file <install-dir>/bin/verify-account. 
-       If the file is present, it executes the file, passing as argument 
-       the account name. It returns the exit code and output from the 
-       script on non-zero exit code."""
-
-    accountValidationScript = os.path.abspath('./verify-account')
-    if not os.path.exists(accountValidationScript):
-      return (0, None)
-
-    account = self.__nodePool.getAccountString()
-    exitCode = 0
-    errMsg = None
-    try:
-      accountValidationCmd = simpleCommand('Account Validation Command',\
-                                             '%s %s' % (accountValidationScript,
-                                                        account))
-      accountValidationCmd.start()
-      accountValidationCmd.wait()
-      accountValidationCmd.join()
-      exitCode = accountValidationCmd.exit_code()
-      self.__log.debug('account validation script is run %d' \
-                          % exitCode)
-      errMsg = None
-      if exitCode is not 0:
-        errMsg = accountValidationCmd.output()
-    except Exception, e:
-      exitCode = 0
-      self.__log.warn('Error executing account script: %s ' \
-                         'Accounting is disabled.' \
-                          % get_exception_error_string())
-      self.__log.debug(get_exception_string())
-    return (exitCode, errMsg)
-    
-  def allocate(self, clusterDir, min, max=None):
-    status = 0
-    failureCount = 0
-    self.__svcrgyClient = self.__get_svcrgy_client()
-        
-    self.__log.debug("allocate %s %s %s" % (clusterDir, min, max))
-    
-    if min < 3:
-      self.__log.critical("Minimum nodes must be greater than 2.")
-      status = 2
-    else:
-      nodeSet = self.__nodePool.newNodeSet(min)
-      walltime = None
-      if self.__cfg['hod'].has_key('walltime'):
-        walltime = self.__cfg['hod']['walltime']
-      self.jobId, exitCode = self.__nodePool.submitNodeSet(nodeSet, walltime)
-      # if the job submission returned an error other than no resources
-      # retry a couple of times
-      while (self.jobId is False) and (exitCode != 188):
-        if hodInterrupt.isSet():
-          raise HodInterruptException()
-
-        failureCount += 1
-        if (failureCount >= self.__cfg['hod']['job-status-query-failure-retries']):
-          self.__log.debug("failed submitting job more than the retries. exiting")
-          break
-        else:
-          # wait a bit before retrying
-          time.sleep(self.__cfg['hod']['job-command-failure-interval'])
-          if hodInterrupt.isSet():
-            raise HodInterruptException()
-          self.jobId, exitCode = self.__nodePool.submitNodeSet(nodeSet, walltime)
-
-      if self.jobId:
-        jobStatus = None
-        try:
-          jobStatus = self.__check_job_status()
-        except HodInterruptException, h:
-          self.__log.info(HOD_INTERRUPTED_MESG)
-          self.delete_job(self.jobId)
-          self.__log.info("Cluster %s removed from queue." % self.jobId)
-          raise h
-        else:
-          if jobStatus == -1:
-            self.delete_job(self.jobId);
-            status = 4
-            return status
-
-        if jobStatus:
-          self.__log.info("Cluster Id %s" \
-                                                              % self.jobId)
-          try:
-            self.ringmasterXRS = self.__get_ringmaster_client()
-            
-            self.__log.debug("Ringmaster at : %s" % self.ringmasterXRS )
-            ringClient = None
-            if self.ringmasterXRS:
-              ringClient =  hodXRClient(self.ringmasterXRS)
-                
-              hdfsStatus, hdfsAddr, self.hdfsInfo = \
-                self.__init_hadoop_service('hdfs', ringClient)
-                
-              if hdfsStatus:
-                self.__log.info("HDFS UI at http://%s" % self.hdfsInfo)
-  
-                mapredStatus, mapredAddr, self.mapredInfo = \
-                  self.__init_hadoop_service('mapred', ringClient)
-  
-                if mapredStatus:
-                  self.__log.info("Mapred UI at http://%s" % self.mapredInfo)
-  
-                  if self.__cfg['hod'].has_key('update-worker-info') \
-                    and self.__cfg['hod']['update-worker-info']:
-                    workerInfoMap = {}
-                    workerInfoMap['HDFS UI'] = 'http://%s' % self.hdfsInfo
-                    workerInfoMap['Mapred UI'] = 'http://%s' % self.mapredInfo
-                    # Ringmaster URL sample format : http://hostname:port/
-                    workerInfoMap['RM RPC Port'] = '%s' % self.ringmasterXRS.split(":")[2].strip("/")
-                    if mapredAddr.find(':') != -1:
-                      workerInfoMap['Mapred RPC Port'] = mapredAddr.split(':')[1]
-                    ret = self.__nodePool.updateWorkerInfo(workerInfoMap, self.jobId)
-                    if ret != 0:
-                      self.__log.warn('Could not update HDFS and Mapred information.' \
-                                      'User Portal may not show relevant information.' \
-                                      'Error code=%s' % ret)
-  
-                  self.__cfg.replace_escape_seqs()
-                    
-                  # Go generate the client side hadoop-site.xml now
-                  # adding final-params as well, just so that conf on 
-                  # client-side and server-side are (almost) the same
-                  clientParams = None
-                  serverParams = {}
-                  finalServerParams = {}
-  
-                  # client-params
-                  if self.__cfg['hod'].has_key('client-params'):
-                    clientParams = self.__cfg['hod']['client-params']
-  
-                  # server-params
-                  if self.__cfg['gridservice-mapred'].has_key('server-params'):
-                    serverParams.update(\
-                      self.__cfg['gridservice-mapred']['server-params'])
-                  if self.__cfg['gridservice-hdfs'].has_key('server-params'):
-                    # note that if there are params in both mapred and hdfs
-                    # sections, the ones in hdfs overwirte the ones in mapred
-                    serverParams.update(\
-                        self.__cfg['gridservice-hdfs']['server-params'])
-                    
-                  # final-server-params
-                  if self.__cfg['gridservice-mapred'].has_key(\
-                                                    'final-server-params'):
-                    finalServerParams.update(\
-                      self.__cfg['gridservice-mapred']['final-server-params'])
-                  if self.__cfg['gridservice-hdfs'].has_key(
-                                                    'final-server-params'):
-                    finalServerParams.update(\
-                        self.__cfg['gridservice-hdfs']['final-server-params'])
-  
-                  clusterFactor = self.__cfg['hod']['cluster-factor']
-                  tempDir = self.__cfg['hod']['temp-dir']
-                  if not os.path.exists(tempDir):
-                    os.makedirs(tempDir)
-                  tempDir = os.path.join( tempDir, self.__cfg['hod']['userid']\
-                                  + "." + self.jobId )
-                  mrSysDir = getMapredSystemDirectory(self.__cfg['hodring']['mapred-system-dir-root'],\
-                                      self.__cfg['hod']['userid'], self.jobId)
-                  self.__hadoopCfg.gen_site_conf(clusterDir, tempDir, min,\
-                            hdfsAddr, mrSysDir, mapredAddr, clientParams,\
-                            serverParams, finalServerParams,\
-                            clusterFactor)
-                  self.__log.info("hadoop-site.xml at %s" % clusterDir)
-                  # end of hadoop-site.xml generation
-                else:
-                  status = 8
-              else:
-                status = 7  
-            else:
-              status = 6
-            if status != 0:
-              self.__log.debug("Cleaning up cluster id %s, as cluster could not be allocated." % self.jobId)
-              if ringClient is None:
-                self.delete_job(self.jobId)
-              else:
-                self.__log.debug("Calling rm.stop()")
-                ringClient.stopRM()
-                self.__log.debug("Returning from rm.stop()")
-          except HodInterruptException, h:
-            self.__log.info(HOD_INTERRUPTED_MESG)
-            if self.ringmasterXRS:
-              if ringClient is None:
-                ringClient =  hodXRClient(self.ringmasterXRS)
-              self.__log.debug("Calling rm.stop()")
-              ringClient.stopRM()
-              self.__log.debug("Returning from rm.stop()")
-              self.__log.info("Cluster Shutdown by informing ringmaster.")
-            else:
-              self.delete_job(self.jobId)
-              self.__log.info("Cluster %s removed from queue directly." % self.jobId)
-            raise h
-        else:
-          self.__log.critical("No cluster found, ringmaster failed to run.")
-          status = 5 
-
-      elif self.jobId == False:
-        if exitCode == 188:
-          self.__log.critical("Request execeeded maximum resource allocation.")
-        else:
-          self.__log.critical("Job submission failed with exit code %s" % exitCode)
-        status = 4
-      else:    
-        self.__log.critical("Scheduler failure, allocation failed.\n\n")        
-        status = 4
-    
-    if status == 5 or status == 6:
-      ringMasterErrors = self.__svcrgyClient.getRMError()
-      if ringMasterErrors:
-        self.__log.critical("Cluster could not be allocated because" \
-                            " of the following errors on the "\
-                            "ringmaster host %s.\n%s" % \
-                               (ringMasterErrors[0], ringMasterErrors[1]))
-        self.__log.debug("Stack trace on ringmaster: %s" % ringMasterErrors[2])
-    return status
-
-  def __isRingMasterAlive(self, rmAddr):
-    ret = True
-    rmSocket = tcpSocket(rmAddr)
-    try:
-      rmSocket.open()
-      rmSocket.close()
-    except tcpError:
-      ret = False
-
-    return ret
-
-  def deallocate(self, clusterDir, clusterInfo):
-    status = 0 
-    
-    nodeSet = self.__nodePool.newNodeSet(clusterInfo['min'], 
-                                         id=clusterInfo['jobid'])
-    self.mapredInfo = clusterInfo['mapred']
-    self.hdfsInfo = clusterInfo['hdfs']
-
-    try:
-      if self.__cfg['hod'].has_key('hadoop-ui-log-dir'):
-        clusterStatus = self.check_cluster(clusterInfo)
-        if clusterStatus != 14 and clusterStatus != 10:   
-          # If JT is still alive
-          self.__collect_jobtracker_ui(self.__cfg['hod']['hadoop-ui-log-dir'])
-      else:
-        self.__log.debug('hadoop-ui-log-dir not specified. Skipping Hadoop UI log collection.')
-    except HodInterruptException, h:
-      # got an interrupt. just pass and proceed to qdel
-      pass 
-    except:
-      self.__log.info("Exception in collecting Job tracker logs. Ignoring.")
-    
-    rmAddr = None
-    if clusterInfo.has_key('ring'):
-      # format is http://host:port/ We need host:port
-      rmAddr = clusterInfo['ring'][7:]
-      if rmAddr.endswith('/'):
-        rmAddr = rmAddr[:-1]
-
-    if (rmAddr is None) or (not self.__isRingMasterAlive(rmAddr)):
-      # Cluster is already dead, don't try to contact ringmaster.
-      self.__nodePool.finalize()
-      status = 10 # As cluster is dead, we just set the status to 'cluster dead'.
-    else:
-      xrsAddr = clusterInfo['ring']
-      rmClient = hodXRClient(xrsAddr)
-      self.__log.debug('calling rm.stop')
-      rmClient.stopRM()
-      self.__log.debug('completed rm.stop')
-
-    # cleanup hod temp dirs
-    tempDir = os.path.join( self.__cfg['hod']['temp-dir'], \
-                    self.__cfg['hod']['userid'] + "." + clusterInfo['jobid'] )
-    if os.path.exists(tempDir):
-      shutil.rmtree(tempDir)
-   
-    return status
-  
-class hadoopScript:
-  def __init__(self, conf, execDir):
-    self.__environ = os.environ.copy()
-    self.__environ['HADOOP_CONF_DIR'] = conf
-    self.__execDir = execDir
-    
-  def run(self, script):
-    scriptThread = simpleCommand(script, script, self.__environ, 4, False, 
-                                 False, self.__execDir)
-    scriptThread.start()
-    scriptThread.wait()
-    scriptThread.join()
-    
-    return scriptThread.exit_code()
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hod.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hod.py
deleted file mode 100644
index b2587bb77a..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/hod.py
+++ /dev/null
@@ -1,754 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-# -*- python -*-
-
-import sys, os, getpass, pprint, re, cPickle, random, shutil, time, errno
-
-import hodlib.Common.logger
-
-from hodlib.ServiceRegistry.serviceRegistry import svcrgy
-from hodlib.Common.xmlrpc import hodXRClient
-from hodlib.Common.util import to_http_url, get_exception_string
-from hodlib.Common.util import get_exception_error_string
-from hodlib.Common.util import hodInterrupt, HodInterruptException
-from hodlib.Common.util import HOD_INTERRUPTED_CODE
-
-from hodlib.Common.nodepoolutil import NodePoolUtil
-from hodlib.Hod.hadoop import hadoopCluster, hadoopScript
-
-CLUSTER_DATA_FILE = 'clusters'
-INVALID_STATE_FILE_MSGS = \
-              [
-
-                "Requested operation cannot be performed. Cannot read %s: " + \
-                "Permission denied.",
-
-                "Requested operation cannot be performed. " + \
-                "Cannot write to %s: Permission denied.",
-
-                "Requested operation cannot be performed. " + \
-                "Cannot read/write to %s: Permission denied.",
-
-                "Cannot update %s: Permission denied. " + \
-                "Cluster is deallocated, but info and list " + \
-                "operations might show incorrect information.",
-
-              ]
-
-class hodState:
-  def __init__(self, store):
-    self.__store = store
-    self.__stateFile = None
-    self.__init_store()
-    self.__STORE_EXT = ".state"
-   
-  def __init_store(self):
-    if not os.path.exists(self.__store):
-      os.mkdir(self.__store)
-  
-  def __set_state_file(self, id=None):
-    if id:
-      self.__stateFile = os.path.join(self.__store, "%s%s" % (id, 
-                                      self.__STORE_EXT))
-    else:
-      for item in os.listdir(self.__store):
-        if item.endswith(self.__STORE_EXT):  
-          self.__stateFile = os.path.join(self.__store, item)          
-
-  def get_state_file(self):
-    return self.__stateFile
-          
-  def checkStateFile(self, id=None, modes=(os.R_OK,)):
-    # is state file exists/readable/writable/both?
-    self.__set_state_file(id)
-
-    # return true if file doesn't exist, because HOD CAN create
-    # state file and so WILL have permissions to read and/or write
-    try:
-      os.stat(self.__stateFile)
-    except OSError, err:
-      if err.errno == errno.ENOENT: # error 2 (no such file)
-        return True
-
-    # file exists
-    ret = True
-    for mode in modes:
-      ret = ret and os.access(self.__stateFile, mode)
-    return ret
-
-  def read(self, id=None):
-    info = {}
-    
-    self.__set_state_file(id)
-  
-    if self.__stateFile:
-      if os.path.isfile(self.__stateFile):
-        stateFile = open(self.__stateFile, 'r')
-        try:
-          info = cPickle.load(stateFile)
-        except EOFError:
-          pass
-        
-        stateFile.close()
-    
-    return info
-           
-  def write(self, id, info):
-    self.__set_state_file(id)
-    if not os.path.exists(self.__stateFile):
-      self.clear(id)
- 
-    stateFile = open(self.__stateFile, 'w')
-    cPickle.dump(info, stateFile)
-    stateFile.close()
-  
-  def clear(self, id=None):
-    self.__set_state_file(id)
-    if self.__stateFile and os.path.exists(self.__stateFile):
-      os.remove(self.__stateFile)
-    else:
-      for item in os.listdir(self.__store):
-        if item.endswith(self.__STORE_EXT):
-          os.remove(item)
-        
-class hodRunner:
-
-  def __init__(self, cfg, log=None, cluster=None):
-    self.__hodhelp = hodHelp()
-    self.__ops = self.__hodhelp.ops
-    self.__cfg = cfg  
-    self.__npd = self.__cfg['nodepooldesc']
-    self.__opCode = 0
-    self.__user = getpass.getuser()
-    self.__registry = None
-    self.__baseLogger = None
-    # Allowing to pass in log object to help testing - a stub can be passed in
-    if log is None:
-      self.__setup_logger()
-    else:
-      self.__log = log
-    
-    self.__userState = hodState(self.__cfg['hod']['user_state']) 
-    
-    self.__clusterState = None
-    self.__clusterStateInfo = { 'env' : None, 'hdfs' : None, 'mapred' : None }
-    
-    # Allowing to pass in log object to help testing - a stib can be passed in
-    if cluster is None:
-      self.__cluster = hadoopCluster(self.__cfg, self.__log)
-    else:
-      self.__cluster = cluster
-  
-  def __setup_logger(self):
-    self.__baseLogger = hodlib.Common.logger.hodLog('hod')
-    self.__log = self.__baseLogger.add_logger(self.__user )
- 
-    if self.__cfg['hod']['stream']:
-      self.__baseLogger.add_stream(level=self.__cfg['hod']['debug'], 
-                            addToLoggerNames=(self.__user ,))
-  
-    if self.__cfg['hod'].has_key('syslog-address'):
-      self.__baseLogger.add_syslog(self.__cfg['hod']['syslog-address'], 
-                                   level=self.__cfg['hod']['debug'], 
-                                   addToLoggerNames=(self.__user ,))
-
-  def get_logger(self):
-    return self.__log
-
-  def __setup_cluster_logger(self, directory):
-    self.__baseLogger.add_file(logDirectory=directory, level=4,
-                          backupCount=self.__cfg['hod']['log-rollover-count'],
-                          addToLoggerNames=(self.__user ,))
-
-  def __setup_cluster_state(self, directory):
-    self.__clusterState = hodState(directory)
-
-  def __norm_cluster_dir(self, directory):
-    directory = os.path.expanduser(directory)
-    if not os.path.isabs(directory):
-      directory = os.path.join(self.__cfg['hod']['original-dir'], directory)
-    directory = os.path.abspath(directory)
-    
-    return directory
-  
-  def __setup_service_registry(self):
-    cfg = self.__cfg['hod'].copy()
-    cfg['debug'] = 0
-    self.__registry = svcrgy(cfg, self.__log)
-    self.__registry.start()
-    self.__log.debug(self.__registry.getXMLRPCAddr())
-    self.__cfg['hod']['xrs-address'] = self.__registry.getXMLRPCAddr()
-    self.__cfg['ringmaster']['svcrgy-addr'] = self.__cfg['hod']['xrs-address']
-
-  def __set_cluster_state_info(self, env, hdfs, mapred, ring, jobid, min, max):
-    self.__clusterStateInfo['env'] = env
-    self.__clusterStateInfo['hdfs'] = "http://%s" % hdfs
-    self.__clusterStateInfo['mapred'] = "http://%s" % mapred
-    self.__clusterStateInfo['ring'] = ring
-    self.__clusterStateInfo['jobid'] = jobid
-    self.__clusterStateInfo['min'] = min
-    self.__clusterStateInfo['max'] = max
-    
-  def __set_user_state_info(self, info):
-    userState = self.__userState.read(CLUSTER_DATA_FILE)
-    for key in info.keys():
-      userState[key] = info[key]
-      
-    self.__userState.write(CLUSTER_DATA_FILE, userState)  
-
-  def __remove_cluster(self, clusterDir):
-    clusterInfo = self.__userState.read(CLUSTER_DATA_FILE)
-    if clusterDir in clusterInfo:
-      del(clusterInfo[clusterDir])
-      self.__userState.write(CLUSTER_DATA_FILE, clusterInfo)
-      
-  def __cleanup(self):
-    if self.__registry: self.__registry.stop()
-    
-  def __check_operation(self, operation):    
-    opList = operation.split()
-    
-    if not opList[0] in self.__ops:
-      self.__log.critical("Invalid hod operation specified: %s" % operation)
-      self._op_help(None)
-      self.__opCode = 2
-         
-    return opList 
-  
-  def __adjustMasterFailureCountConfig(self, nodeCount):
-    # This method adjusts the ringmaster.max-master-failures variable
-    # to a value that is bounded by the a function of the number of
-    # nodes.
-
-    maxFailures = self.__cfg['ringmaster']['max-master-failures']
-    # Count number of masters required - depends on which services
-    # are external
-    masters = 0
-    if not self.__cfg['gridservice-hdfs']['external']:
-      masters += 1
-    if not self.__cfg['gridservice-mapred']['external']:
-      masters += 1
-
-    # So, if there are n nodes and m masters, we look atleast for
-    # all masters to come up. Therefore, atleast m nodes should be
-    # good, which means a maximum of n-m master nodes can fail.
-    maxFailedNodes = nodeCount - masters
-
-    # The configured max number of failures is now bounded by this
-    # number.
-    self.__cfg['ringmaster']['max-master-failures'] = \
-                              min(maxFailures, maxFailedNodes)
-
-  def _op_allocate(self, args):
-    operation = "allocate"
-    argLength = len(args)
-    min = 0
-    max = 0
-    errorFlag = False
-    errorMsgs = []
-
-    if argLength == 3:
-      nodes = args[2]
-      clusterDir = self.__norm_cluster_dir(args[1])
-
-      if not os.path.exists(clusterDir):
-        try:
-          os.makedirs(clusterDir)
-        except OSError, err:
-          errorFlag = True
-          errorMsgs.append("Could not create cluster directory. %s" \
-                            % (str(err)))
-      elif not os.path.isdir(clusterDir):
-        errorFlag = True
-        errorMsgs.append( \
-                    "Invalid cluster directory (--hod.clusterdir or -d) : " + \
-                         clusterDir + " : Not a directory")
-        
-      if int(nodes) < 3 :
-        errorFlag = True
-        errorMsgs.append("Invalid nodecount (--hod.nodecount or -n) : " + \
-                         "Must be >= 3. Given nodes: %s" % nodes)
-      if errorFlag:
-        for msg in errorMsgs:
-          self.__log.critical(msg)
-        self.__opCode = 3
-        return
-
-      if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, \
-                                              (os.R_OK, os.W_OK)):
-        self.__log.critical(INVALID_STATE_FILE_MSGS[2] % \
-                         self.__userState.get_state_file())
-        self.__opCode = 1
-        return
-
-      clusterList = self.__userState.read(CLUSTER_DATA_FILE)
-      if clusterDir in clusterList.keys():
-        self.__setup_cluster_state(clusterDir)
-        clusterInfo = self.__clusterState.read()
-        # Check if the job is not running. Only then can we safely
-        # allocate another cluster. Otherwise the user would need
-        # to deallocate and free up resources himself.
-        if clusterInfo.has_key('jobid') and \
-            self.__cluster.is_cluster_deallocated(clusterInfo['jobid']):
-          self.__log.warn("Found a dead cluster at cluster directory '%s'. Deallocating it to allocate a new one." % (clusterDir))
-          self.__remove_cluster(clusterDir)
-          self.__clusterState.clear()
-        else:
-          self.__log.critical("Found a previously allocated cluster at cluster directory '%s'. HOD cannot determine if this cluster can be automatically deallocated. Deallocate the cluster if it is unused." % (clusterDir))
-          self.__opCode = 12
-          return
- 
-      self.__setup_cluster_logger(clusterDir)
-
-      (status, message) = self.__cluster.is_valid_account()
-      if status is not 0:
-        if message:
-          for line in message:
-            self.__log.critical("verify-account output: %s" % line)
-        self.__log.critical("Cluster cannot be allocated because account verification failed. " \
-                              + "verify-account returned exit code: %s." % status)
-        self.__opCode = 4
-        return
-      else:
-        self.__log.debug("verify-account returned zero exit code.")
-        if message:
-          self.__log.debug("verify-account output: %s" % message)
-
-      if re.match('\d+-\d+', nodes):
-        (min, max) = nodes.split("-")
-        min = int(min)
-        max = int(max)
-      else:
-        try:
-          nodes = int(nodes)
-          min = nodes
-          max = nodes
-        except ValueError:
-          print self.__hodhelp.help(operation)
-          self.__log.critical(
-          "%s operation requires a pos_int value for n(nodecount)." % 
-          operation)
-          self.__opCode = 3
-        else:
-          self.__setup_cluster_state(clusterDir)
-          clusterInfo = self.__clusterState.read()
-          self.__opCode = self.__cluster.check_cluster(clusterInfo)
-          if self.__opCode == 0 or self.__opCode == 15:
-            self.__setup_service_registry()   
-            if hodInterrupt.isSet(): 
-              self.__cleanup()
-              raise HodInterruptException()
-            self.__log.debug("Service Registry started.")
-
-            self.__adjustMasterFailureCountConfig(nodes)
-            
-            try:
-              allocateStatus = self.__cluster.allocate(clusterDir, min, max)    
-            except HodInterruptException, h:
-              self.__cleanup()
-              raise h
-            # Allocation has gone through.
-            # Don't care about interrupts any more
-
-            try:
-              if allocateStatus == 0:
-                self.__set_cluster_state_info(os.environ, 
-                                              self.__cluster.hdfsInfo, 
-                                              self.__cluster.mapredInfo, 
-                                              self.__cluster.ringmasterXRS,
-                                              self.__cluster.jobId,
-                                              min, max)
-                self.__setup_cluster_state(clusterDir)
-                self.__clusterState.write(self.__cluster.jobId, 
-                                          self.__clusterStateInfo)
-                #  Do we need to check for interrupts here ??
-  
-                self.__set_user_state_info( 
-                  { clusterDir : self.__cluster.jobId, } )
-              self.__opCode = allocateStatus
-            except Exception, e:
-              # Some unknown problem.
-              self.__cleanup()
-              self.__cluster.deallocate(clusterDir, self.__clusterStateInfo)
-              self.__opCode = 1
-              raise Exception(e)
-          elif self.__opCode == 12:
-            self.__log.critical("Cluster %s already allocated." % clusterDir)
-          elif self.__opCode == 10:
-            self.__log.critical("dead\t%s\t%s" % (clusterInfo['jobid'], 
-                                                  clusterDir))
-          elif self.__opCode == 13:
-            self.__log.warn("hdfs dead\t%s\t%s" % (clusterInfo['jobid'], 
-                                                       clusterDir))
-          elif self.__opCode == 14:
-            self.__log.warn("mapred dead\t%s\t%s" % (clusterInfo['jobid'], 
-                                                     clusterDir))   
-          
-          if self.__opCode > 0 and self.__opCode != 15:
-            self.__log.critical("Cannot allocate cluster %s" % clusterDir)
-    else:
-      print self.__hodhelp.help(operation)
-      self.__log.critical("%s operation requires two arguments. "  % operation
-                        + "A cluster directory and a nodecount.")
-      self.__opCode = 3
- 
-  def _is_cluster_allocated(self, clusterDir):
-    if os.path.isdir(clusterDir):
-      self.__setup_cluster_state(clusterDir)
-      clusterInfo = self.__clusterState.read()
-      if clusterInfo != {}:
-        return True
-    return False
-
-  def _op_deallocate(self, args):
-    operation = "deallocate"
-    argLength = len(args)
-    if argLength == 2:
-      clusterDir = self.__norm_cluster_dir(args[1])
-      if os.path.isdir(clusterDir):
-        self.__setup_cluster_state(clusterDir)
-        clusterInfo = self.__clusterState.read()
-        if clusterInfo == {}:
-          self.__handle_invalid_cluster_directory(clusterDir, cleanUp=True)
-        else:
-          self.__opCode = \
-            self.__cluster.deallocate(clusterDir, clusterInfo)
-          # irrespective of whether deallocate failed or not\
-          # remove the cluster state.
-          self.__clusterState.clear()
-          if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.W_OK,)):
-            self.__log.critical(INVALID_STATE_FILE_MSGS[3] % \
-                               self.__userState.get_state_file())
-            self.__opCode = 1
-            return
-          self.__remove_cluster(clusterDir)
-      else:
-        self.__handle_invalid_cluster_directory(clusterDir, cleanUp=True)
-    else:
-      print self.__hodhelp.help(operation)
-      self.__log.critical("%s operation requires one argument. "  % operation
-                        + "A cluster path.")
-      self.__opCode = 3
-            
-  def _op_list(self, args):
-    operation = 'list'
-    clusterList = self.__userState.read(CLUSTER_DATA_FILE)
-    for path in clusterList.keys():
-      if not os.path.isdir(path):
-        self.__log.info("cluster state unknown\t%s\t%s" % (clusterList[path], path))
-        continue
-      self.__setup_cluster_state(path)
-      clusterInfo = self.__clusterState.read()
-      if clusterInfo == {}:
-        # something wrong with the cluster directory.
-        self.__log.info("cluster state unknown\t%s\t%s" % (clusterList[path], path))
-        continue
-      clusterStatus = self.__cluster.check_cluster(clusterInfo)
-      if clusterStatus == 12:
-        self.__log.info("alive\t%s\t%s" % (clusterList[path], path))
-      elif clusterStatus == 10:
-        self.__log.info("dead\t%s\t%s" % (clusterList[path], path))
-      elif clusterStatus == 13:
-        self.__log.info("hdfs dead\t%s\t%s" % (clusterList[path], path))
-      elif clusterStatus == 14:
-        self.__log.info("mapred dead\t%s\t%s" % (clusterList[path], path))    
-         
-  def _op_info(self, args):
-    operation = 'info'
-    argLength = len(args)  
-    if argLength == 2:
-      clusterDir = self.__norm_cluster_dir(args[1])
-      if os.path.isdir(clusterDir):
-        self.__setup_cluster_state(clusterDir)
-        clusterInfo = self.__clusterState.read()
-        if clusterInfo == {}:
-          # something wrong with the cluster directory.
-          self.__handle_invalid_cluster_directory(clusterDir)
-        else:
-          clusterStatus = self.__cluster.check_cluster(clusterInfo)
-          if clusterStatus == 12:
-            self.__print_cluster_info(clusterInfo)
-            self.__log.info("hadoop-site.xml at %s" % clusterDir)
-          elif clusterStatus == 10:
-            self.__log.critical("%s cluster is dead" % clusterDir)
-          elif clusterStatus == 13:
-            self.__log.warn("%s cluster hdfs is dead" % clusterDir)
-          elif clusterStatus == 14:
-            self.__log.warn("%s cluster mapred is dead" % clusterDir)
-
-          if clusterStatus != 12:
-            if clusterStatus == 15:
-              self.__log.critical("Cluster %s not allocated." % clusterDir)
-            else:
-              self.__print_cluster_info(clusterInfo)
-              self.__log.info("hadoop-site.xml at %s" % clusterDir)
-            
-            self.__opCode = clusterStatus
-      else:
-        self.__handle_invalid_cluster_directory(clusterDir)
-    else:
-      print self.__hodhelp.help(operation)
-      self.__log.critical("%s operation requires one argument. "  % operation
-                        + "A cluster path.")
-      self.__opCode = 3      
-
-  def __handle_invalid_cluster_directory(self, clusterDir, cleanUp=False):
-    if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.R_OK,)):
-      self.__log.critical(INVALID_STATE_FILE_MSGS[0] % \
-                           self.__userState.get_state_file())
-      self.__opCode = 1
-      return
-
-    clusterList = self.__userState.read(CLUSTER_DATA_FILE)
-    if clusterDir in clusterList.keys():
-      # previously allocated cluster.
-      self.__log.critical("Cannot find information for cluster with id '%s' in previously allocated cluster directory '%s'." % (clusterList[clusterDir], clusterDir))
-      if cleanUp:
-        self.__cluster.delete_job(clusterList[clusterDir])
-        self.__log.critical("Freeing resources allocated to the cluster.")
-        if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.W_OK,)):
-          self.__log.critical(INVALID_STATE_FILE_MSGS[1] % \
-                              self.__userState.get_state_file())
-          self.__opCode = 1
-          return
-        self.__remove_cluster(clusterDir)
-      self.__opCode = 3
-    else:
-      if not os.path.exists(clusterDir):
-        self.__log.critical(  \
-                  "Invalid hod.clusterdir(--hod.clusterdir or -d). " + \
-                  clusterDir + " : No such directory")
-      elif not os.path.isdir(clusterDir):
-        self.__log.critical( \
-                  "Invalid hod.clusterdir(--hod.clusterdir or -d). " + \
-                  clusterDir + " : Not a directory")
-      else:
-        self.__log.critical( \
-                  "Invalid hod.clusterdir(--hod.clusterdir or -d). " + \
-                  clusterDir + " : Not tied to any allocated cluster.")
-      self.__opCode = 15
-    
-  def __print_cluster_info(self, clusterInfo):
-    keys = clusterInfo.keys()
-
-    _dict = { 
-              'jobid' : 'Cluster Id', 'min' : 'Nodecount',
-              'hdfs' : 'HDFS UI at' , 'mapred' : 'Mapred UI at'
-            }
-
-    for key in _dict.keys():
-      if clusterInfo.has_key(key):
-        self.__log.info("%s %s" % (_dict[key], clusterInfo[key]))
-
-    if clusterInfo.has_key('ring'):
-      self.__log.debug("%s\t%s" % ('Ringmaster at ', clusterInfo['ring']))
-    
-    if self.__cfg['hod']['debug'] == 4:
-      for var in clusterInfo['env'].keys():
-        self.__log.debug("%s = %s" % (var, clusterInfo['env'][var]))
-
-  def _op_help(self, arg):
-    if arg == None or arg.__len__() != 2:
-      print "hod commands:\n"
-      for op in self.__ops:
-        print self.__hodhelp.help(op)
-    else:
-      if arg[1] not in self.__ops:
-        print self.__hodhelp.help('help')
-        self.__log.critical("Help requested for invalid operation : %s"%arg[1])
-        self.__opCode = 3
-      else: print self.__hodhelp.help(arg[1])
-
-  def operation(self):  
-    operation = self.__cfg['hod']['operation']
-    try:
-      opList = self.__check_operation(operation)
-      if self.__opCode == 0:
-        if not self.__userState.checkStateFile(CLUSTER_DATA_FILE, (os.R_OK,)):
-           self.__log.critical(INVALID_STATE_FILE_MSGS[0] % \
-                         self.__userState.get_state_file())
-           self.__opCode = 1
-           return self.__opCode
-        getattr(self, "_op_%s" % opList[0])(opList)
-    except HodInterruptException, h:
-      self.__log.critical("op: %s failed because of a process interrupt." \
-                                                                % operation)
-      self.__opCode = HOD_INTERRUPTED_CODE
-    except:
-      self.__log.critical("op: %s failed: %s" % (operation,
-                          get_exception_error_string()))
-      self.__log.debug(get_exception_string())
-    
-    self.__cleanup()
-    
-    self.__log.debug("return code: %s" % self.__opCode)
-    
-    return self.__opCode
-  
-  def script(self):
-    errorFlag = False
-    errorMsgs = []
-    scriptRet = 0 # return from the script, if run
-    
-    script = self.__cfg['hod']['script']
-    nodes = self.__cfg['hod']['nodecount']
-    clusterDir = self.__cfg['hod']['clusterdir']
-    
-    if not os.path.exists(script):
-      errorFlag = True
-      errorMsgs.append("Invalid script file (--hod.script or -s) : " + \
-                       script + " : No such file")
-    elif not os.path.isfile(script):
-      errorFlag = True
-      errorMsgs.append("Invalid script file (--hod.script or -s) : " + \
-                       script + " : Not a file.")
-    else:
-      isExecutable = os.access(script, os.X_OK)
-      if not isExecutable:
-        errorFlag = True
-        errorMsgs.append("Invalid script file (--hod.script or -s) : " + \
-                         script + " : Not an executable.")
-
-    if not os.path.exists(clusterDir):
-      try:
-        os.makedirs(clusterDir)
-      except OSError, err:
-        errorFlag = True
-        errorMsgs.append("Could not create cluster directory. %s" % (str(err)))
-    elif not os.path.isdir(clusterDir):
-      errorFlag = True
-      errorMsgs.append( \
-                  "Invalid cluster directory (--hod.clusterdir or -d) : " + \
-                       clusterDir + " : Not a directory")
-
-    if int(self.__cfg['hod']['nodecount']) < 3 :
-      errorFlag = True
-      errorMsgs.append("Invalid nodecount (--hod.nodecount or -n) : " + \
-                       "Must be >= 3. Given nodes: %s" % nodes)
-
-    if errorFlag:
-      for msg in errorMsgs:
-        self.__log.critical(msg)
-      self.handle_script_exit_code(scriptRet, clusterDir)
-      sys.exit(3)
-
-    try:
-      self._op_allocate(('allocate', clusterDir, str(nodes)))
-      if self.__opCode == 0:
-        if self.__cfg['hod'].has_key('script-wait-time'):
-          time.sleep(self.__cfg['hod']['script-wait-time'])
-          self.__log.debug('Slept for %d time. Now going to run the script' % self.__cfg['hod']['script-wait-time'])
-        if hodInterrupt.isSet():
-          self.__log.debug('Hod interrupted - not executing script')
-        else:
-          scriptRunner = hadoopScript(clusterDir, 
-                                  self.__cfg['hod']['original-dir'])
-          self.__opCode = scriptRunner.run(script)
-          scriptRet = self.__opCode
-          self.__log.info("Exit code from running the script: %d" % self.__opCode)
-      else:
-        self.__log.critical("Error %d in allocating the cluster. Cannot run the script." % self.__opCode)
-
-      if hodInterrupt.isSet():
-        # Got interrupt while executing script. Unsetting it for deallocating
-        hodInterrupt.setFlag(False)
-      if self._is_cluster_allocated(clusterDir):
-        self._op_deallocate(('deallocate', clusterDir))
-    except HodInterruptException, h:
-      self.__log.critical("Script failed because of a process interrupt.")
-      self.__opCode = HOD_INTERRUPTED_CODE
-    except:
-      self.__log.critical("script: %s failed: %s" % (script,
-                          get_exception_error_string()))
-      self.__log.debug(get_exception_string())
-    
-    self.__cleanup()
-
-    self.handle_script_exit_code(scriptRet, clusterDir)
-    
-    return self.__opCode
-
-  def handle_script_exit_code(self, scriptRet, clusterDir):
-    # We want to give importance to a failed script's exit code, and write out exit code to a file separately
-    # so users can easily get it if required. This way they can differentiate between the script's exit code
-    # and hod's exit code.
-    if os.path.exists(clusterDir):
-      exit_code_file_name = (os.path.join(clusterDir, 'script.exitcode'))
-      if scriptRet != 0:
-        exit_code_file = open(exit_code_file_name, 'w')
-        print >>exit_code_file, scriptRet
-        exit_code_file.close()
-        self.__opCode = scriptRet
-      else:
-        #ensure script exit code file is not there:
-        if (os.path.exists(exit_code_file_name)):
-          os.remove(exit_code_file_name)
-
-class hodHelp:
-  def __init__(self):
-    self.ops = ['allocate', 'deallocate', 'info', 'list','script',  'help']
-
-    self.usage_strings = \
-      {
-        'allocate'   : 'hod allocate -d <clusterdir> -n <nodecount> [OPTIONS]',
-        'deallocate' : 'hod deallocate -d <clusterdir> [OPTIONS]',
-        'list'       : 'hod list [OPTIONS]',
-        'info'       : 'hod info -d <clusterdir> [OPTIONS]',
-        'script'     :
-              'hod script -d <clusterdir> -n <nodecount> -s <script> [OPTIONS]',
-        'help'       : 'hod help <OPERATION>',
-        }
-
-    self.description_strings = \
-      {
-       'allocate' : "Allocates a cluster of n nodes using the specified \n" + \
-      "              cluster directory to store cluster state \n" + \
-      "              information. The Hadoop site XML is also stored \n" + \
-      "              in this location.\n",
-
-       'deallocate' : "Deallocates a cluster using the specified \n" + \
-      "             cluster directory.  This operation is also \n" + \
-      "             required to clean up a dead cluster.\n",
-
-       'list' : "List all clusters currently allocated by a user, \n" + \
-      "              along with limited status information and the \n" + \
-      "              cluster ID.\n",
-
-       'info' : "Provide detailed information on an allocated cluster.\n",
-
-       'script' : "Allocates a cluster of n nodes with the given \n" +\
-           "              cluster directory, runs the specified script \n" + \
-           "              using the allocated cluster, and then \n" + \
-           "              deallocates the cluster.\n",
- 
-       'help' : "Print help for the operation and exit.\n" + \
-                "Available operations : %s.\n" % self.ops,
-       }
-
-  def usage(self, op):
-    return "Usage       : " + self.usage_strings[op] + "\n" + \
-           "For full description: hod help " + op + ".\n"
-
-  def help(self, op=None):
-    if op is None:
-      return "hod <operation> [ARGS] [OPTIONS]\n" + \
-             "Available operations : %s\n" % self.ops + \
-             "For help on a particular operation : hod help <operation>.\n" + \
-             "For all options : hod help options."
-    else:
-      return "Usage       : " + self.usage_strings[op] + "\n" + \
-             "Description : " + self.description_strings[op] + \
-             "For all options : hod help options.\n"
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/nodePool.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/nodePool.py
deleted file mode 100644
index 4eafddb0c9..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Hod/nodePool.py
+++ /dev/null
@@ -1,128 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""defines nodepool and nodeset as abstract interface for batch system"""
-# -*- python -*-
-
-from hodlib.GridServices.service import *
-
-class NodeSet:
-  """a set of nodes as one allocation unit"""
-
-  PENDING, COMMITTED, COMPLETE = range(3)
-
-  def __init__(self, id, numNodes, preferredList, isPreemptee):
-    self.id = id
-    self.numNodes = numNodes
-    self.isPreemptee = isPreemptee
-    self.preferredList = preferredList
-    self.cmdDescSet = []
-
-  def getId(self):
-    """returns a unique id of the nodeset"""
-    return self.id
-
-  def registerCommand(self, cmdDesc):
-    """register a command to the nodeset"""
-    self.cmdDescSet.append(cmdDesc)
-
-  def getAddrList(self):
-    """get list of node host names
-    May return empty list if node set is not allocated yet"""
-    raise NotImplementedError
-
-  def _getNumNodes(self):
-    return self.numNodes
-
-  def _isPreemptee(self):
-    return self.isPreemptee
-
-  def _getPreferredList(self):
-    return self.preferredList
-
-  def _getCmdSet(self):
-    return self.cmdDescSet
-
-class NodePool:
-  """maintains a collection of node sets as they get allocated.
-  Also the base class for all kinds of nodepools. """
-
-  def __init__(self, nodePoolDesc, cfg, log):
-    self.nodePoolDesc = nodePoolDesc
-    self.nodeSetDict = {}
-    self._cfg = cfg
-    self.nextNodeSetId = 0
-    self._log = log
-    
-
-  def newNodeSet(self, numNodes, preferred=[], isPreemptee=True, id=None):
-    """create a nodeset possibly with asked properties"""
-    raise NotImplementedError
-
-  def submitNodeSet(self, nodeSet, walltime = None, qosLevel = None, 
-                    account = None, resourcelist = None):
-    """submit the nodeset request to nodepool
-    return False if error happened"""
-    raise NotImplementedError
-
-  def pollNodeSet(self, nodeSet):
-    """return status of node set"""
-    raise NotImplementedError
-
-  def getWorkers(self):
-    """return the hosts that comprise this nodepool"""
-    raise NotImplementedError
-
-  def runWorkers(self, nodeSet = None, args = []):
-    """Run node set workers."""
-    
-    raise NotImplementedError
-  
-  def freeNodeSet(self, nodeset):
-    """free a node set"""
-    raise NotImplementedError
-
-  def finalize(self):
-    """cleans up all nodesets"""
-    raise NotImplementedError
-
-  def getServiceId(self):
-    raise NotImplementedError
- 
-  def getJobInfo(self, jobId=None):
-    raise NotImplementedError
-
-  def deleteJob(self, jobId):
-    """Delete a job, given it's id"""
-    raise NotImplementedError
-
-  def isJobFeasible(self):
-    """Check if job can run by looking at any user/job limits"""
-    raise NotImplementedError
-
-  def updateWorkerInfo(self, workerInfoMap, jobId):
-    """Update information about the workers started by this NodePool."""
-    raise NotImplementedError
-
-  def getAccountString(self):
-    """Return the account string for this job"""
-    raise NotImplementedError
-
-  def getNextNodeSetId(self):
-    id = self.nextNodeSetId
-    self.nextNodeSetId += 1
-    
-    return id  
-  
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/__init__.py
deleted file mode 100644
index 12c2f1e1da..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/hodRing.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/hodRing.py
deleted file mode 100644
index 02d6dbfca0..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/HodRing/hodRing.py
+++ /dev/null
@@ -1,928 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-#!/usr/bin/env python
-"""hodring launches hadoop commands on work node and 
- cleans up all the work dirs afterward
-"""
-# -*- python -*-
-import os, sys, time, shutil, getpass, xml.dom.minidom, xml.dom.pulldom
-import socket, sets, urllib, csv, signal, pprint, random, re, httplib
-
-from xml.dom import getDOMImplementation
-from pprint import pformat
-from optparse import OptionParser
-from urlparse import urlparse
-from hodlib.Common.util import local_fqdn, parseEquals, getMapredSystemDirectory, isProcessRunning
-from hodlib.Common.tcp import tcpSocket, tcpError 
-
-binfile = sys.path[0]
-libdir = os.path.dirname(binfile)
-sys.path.append(libdir)
-
-import hodlib.Common.logger
-
-from hodlib.GridServices.service import *
-from hodlib.Common.util import *
-from hodlib.Common.socketServers import threadedHTTPServer
-from hodlib.Common.hodsvc import hodBaseService
-from hodlib.Common.threads import simpleCommand
-from hodlib.Common.xmlrpc import hodXRClient
-
-mswindows = (sys.platform == "win32")
-originalcwd = os.getcwd()
-
-reHdfsURI = re.compile("hdfs://(.*?:\d+)(.*)")
-
-class CommandDesc:
-  """A class that represents the commands that
-  are run by hodring"""
-  def __init__(self, dict, log):
-    self.log = log
-    self.log.debug("In command desc")
-    self.log.debug("Done in command desc")
-    dict.setdefault('argv', [])
-    dict.setdefault('version', None)
-    dict.setdefault('envs', {})
-    dict.setdefault('workdirs', [])
-    dict.setdefault('attrs', {})
-    dict.setdefault('final-attrs', {})
-    dict.setdefault('fg', False)
-    dict.setdefault('ignorefailures', False)
-    dict.setdefault('stdin', None)
-
-    self.log.debug("Printing dict")
-    self._checkRequired(dict)
-    self.dict = dict
-
-  def _checkRequired(self, dict):
-    if 'name' not in dict:
-      raise ValueError, "Command description lacks 'name'"
-    if 'program' not in dict:
-      raise ValueError, "Command description lacks 'program'"
-    if 'pkgdirs' not in dict:
-      raise ValueError, "Command description lacks 'pkgdirs'"
-
-  def getName(self):
-    return self.dict['name']
-
-  def getProgram(self):
-    return self.dict['program']
-
-  def getArgv(self):
-    return self.dict['argv']
-
-  def getVersion(self):
-    return self.dict['version']
-
-  def getEnvs(self):
-    return self.dict['envs']
-
-  def getPkgDirs(self):
-    return self.dict['pkgdirs']
-
-  def getWorkDirs(self):
-    return self.dict['workdirs']
-
-  def getAttrs(self):
-    return self.dict['attrs']
-
-  def getfinalAttrs(self):
-    return self.dict['final-attrs']
-  
-  def isForeground(self):
-    return self.dict['fg']
-
-  def isIgnoreFailures(self):
-    return self.dict['ignorefailures']
-
-  def getStdin(self):
-    return self.dict['stdin']
-
-  def parseDesc(str):
-
-    dict = CommandDesc._parseMap(str)
-
-    dict['argv'] = CommandDesc._parseList(dict['argv'])
-    dict['envs'] = CommandDesc._parseMap(dict['envs'])
-    dict['pkgdirs'] = CommandDesc._parseList(dict['pkgdirs'], ':')
-    dict['workdirs'] = CommandDesc._parseList(dict['workdirs'], ':')
-    dict['attrs'] = CommandDesc._parseMap(dict['attrs'])
-    dict['final-attrs'] = CommandDesc._parseMap(dict['final-attrs'])
-						
-    return CommandDesc(dict)
-
-  parseDesc = staticmethod(parseDesc)
-
-  def _parseList(str, delim = ','):
-    list = []
-    for row in csv.reader([str], delimiter=delim, escapechar='\\', 
-                          quoting=csv.QUOTE_NONE, doublequote=False):
-      list.extend(row)
-    return list
-
-  _parseList = staticmethod(_parseList)
-
-  def _parseMap(str):
-    """Parses key value pairs"""
-    dict = {}
-    for row in csv.reader([str], escapechar='\\', quoting=csv.QUOTE_NONE, doublequote=False):
-      for f in row:
-        [k, v] = f.split('=', 1)
-        dict[k] = v
-    return dict
-
-  _parseMap = staticmethod(_parseMap)
-
-class MRSystemDirectoryManager:
-  """Class that is responsible for managing the MapReduce system directory"""
-
-  def __init__(self, jtPid, mrSysDir, fsName, hadoopPath, log, retries=120):
-    self.__jtPid = jtPid
-    self.__mrSysDir = mrSysDir
-    self.__fsName = fsName
-    self.__hadoopPath = hadoopPath
-    self.__log = log
-    self.__retries = retries
-
-  def toCleanupArgs(self):
-    return " --jt-pid %s --mr-sys-dir %s --fs-name %s --hadoop-path %s " \
-              % (self.__jtPid, self.__mrSysDir, self.__fsName, self.__hadoopPath)
-
-  def removeMRSystemDirectory(self):
-    
-    jtActive = isProcessRunning(self.__jtPid)
-    count = 0 # try for a max of a minute for the process to end
-    while jtActive and (count<self.__retries):
-      time.sleep(0.5)
-      jtActive = isProcessRunning(self.__jtPid)
-      count += 1
-    
-    if count == self.__retries:
-      self.__log.warn('Job Tracker did not exit even after a minute. Not going to try and cleanup the system directory')
-      return
-
-    self.__log.debug('jt is now inactive')
-
-    cmd = "%s dfs -fs hdfs://%s -rmr %s" % (self.__hadoopPath, self.__fsName, \
-                                            self.__mrSysDir)
-    self.__log.debug('Command to run to remove system directory: %s' % (cmd))
-    try:
-      hadoopCommand = simpleCommand('mr-sys-dir-cleaner', cmd)
-      hadoopCommand.start()
-      hadoopCommand.wait()
-      hadoopCommand.join()
-      ret = hadoopCommand.exit_code()
-      if ret != 0:
-        self.__log.warn("Error in removing MapReduce system directory '%s' from '%s' using path '%s'" \
-                          % (self.__mrSysDir, self.__fsName, self.__hadoopPath))
-        self.__log.warn(pprint.pformat(hadoopCommand.output()))
-      else:
-        self.__log.info("Removed MapReduce system directory successfully.")
-    except:
-      self.__log.error('Exception while cleaning up MapReduce system directory. May not be cleaned up. %s', \
-                          get_exception_error_string())
-      self.__log.debug(get_exception_string())
-
-
-def createMRSystemDirectoryManager(dict, log):
-  keys = [ 'jt-pid', 'mr-sys-dir', 'fs-name', 'hadoop-path' ]
-  for key in keys:
-    if (not dict.has_key(key)) or (dict[key] is None):
-      return None
-
-  mrSysDirManager = MRSystemDirectoryManager(int(dict['jt-pid']), dict['mr-sys-dir'], \
-                                              dict['fs-name'], dict['hadoop-path'], log)
-  return mrSysDirManager
-
-class HadoopCommand:
-  """Runs a single hadoop command"""
-    
-  def __init__(self, id, desc, tempdir, tardir, log, javahome, 
-                mrSysDir, restart=False):
-    self.desc = desc
-    self.log = log
-    self.javahome = javahome
-    self.__mrSysDir = mrSysDir
-    self.program = desc.getProgram()
-    self.name = desc.getName()
-    self.workdirs = desc.getWorkDirs()
-    self.hadoopdir = tempdir
-    self.confdir = os.path.join(self.hadoopdir, '%d-%s' % (id, self.name), 
-                                "confdir")
-    self.logdir = os.path.join(self.hadoopdir, '%d-%s' % (id, self.name), 
-                               "logdir")
-    self.out = os.path.join(self.logdir, '%s.out' % self.name)
-    self.err = os.path.join(self.logdir, '%s.err' % self.name)
-
-    self.child = None
-    self.restart = restart
-    self.filledInKeyVals = []
-    self._createWorkDirs()
-    self._createHadoopSiteXml()
-    self._createHadoopLogDir()
-    self.__hadoopThread = None
-    self.stdErrContents = "" # store list of contents for returning to user
-
-  def _createWorkDirs(self):
-    for dir in self.workdirs:
-      if os.path.exists(dir):
-        if not os.access(dir, os.F_OK | os.R_OK | os.W_OK | os.X_OK):
-          raise ValueError, "Workdir %s does not allow rwx permission." % (dir)
-        continue
-      try:
-        os.makedirs(dir)
-      except:
-        pass
-
-  def getFilledInKeyValues(self):
-    return self.filledInKeyVals
-
-  def createXML(self, doc, attr, topElement, final):
-    for k,v in attr.iteritems():
-      self.log.debug('_createHadoopSiteXml: ' + str(k) + " " + str(v))
-      if ( v == "fillinport" ):
-        v = "%d" % (ServiceUtil.getUniqRandomPort(low=50000, log=self.log))
-
-      keyvalpair = ''
-      if isinstance(v, (tuple, list)):
-        for item in v:
-          keyvalpair = "%s%s=%s," % (keyvalpair, k, item)
-        keyvalpair = keyvalpair[:-1]
-      else:
-        keyvalpair = k + '=' + v
-
-      self.filledInKeyVals.append(keyvalpair)
-      if(k == "mapred.job.tracker"): # total hack for time's sake
-        keyvalpair = k + "=" + v
-        self.filledInKeyVals.append(keyvalpair)
-	
-      if ( v == "fillinhostport"):
-        port = "%d" % (ServiceUtil.getUniqRandomPort(low=50000, log=self.log))
-        self.log.debug('Setting hostname to: %s' % local_fqdn())
-        v = local_fqdn() + ':' + port
-      
-      keyvalpair = ''
-      if isinstance(v, (tuple, list)):
-        for item in v:
-          keyvalpair = "%s%s=%s," % (keyvalpair, k, item)
-        keyvalpair = keyvalpair[:-1]
-      else:
-        keyvalpair = k + '=' + v
-      
-      self.filledInKeyVals.append(keyvalpair)
-      if ( v == "fillindir"):
-        v = self.__mrSysDir
-        pass
-      
-      prop = None
-      if isinstance(v, (tuple, list)):
-        for item in v:
-          prop = self._createXmlElement(doc, k, item, "No description", final)
-          topElement.appendChild(prop)
-      else:
-        if k == 'fs.default.name':
-          prop = self._createXmlElement(doc, k, "hdfs://" + v, "No description", final)
-        else:
-          prop = self._createXmlElement(doc, k, v, "No description", final)
-        topElement.appendChild(prop)
-	
-  def _createHadoopSiteXml(self):
-    if self.restart:
-      if not os.path.exists(self.confdir):
-        os.makedirs(self.confdir)
-    else:
-      assert os.path.exists(self.confdir) == False
-      os.makedirs(self.confdir)
-
-    implementation = getDOMImplementation()
-    doc = implementation.createDocument('', 'configuration', None)
-    comment = doc.createComment("This is an auto generated hadoop-site.xml, do not modify")
-    topElement = doc.documentElement
-    topElement.appendChild(comment)
-    
-    finalAttr = self.desc.getfinalAttrs()
-    self.createXML(doc, finalAttr, topElement, True)
-    attr = {}
-    attr1 = self.desc.getAttrs()
-    for k,v in attr1.iteritems():
-      if not finalAttr.has_key(k):
-        attr[k] = v
-    self.createXML(doc, attr, topElement, False)
-              
-    
-    siteName = os.path.join(self.confdir, "hadoop-site.xml")
-    sitefile = file(siteName, 'w')
-    print >> sitefile, topElement.toxml()
-    sitefile.close()
-    self.log.debug('created %s' % (siteName))
-
-  def _createHadoopLogDir(self):
-    if self.restart:
-      if not os.path.exists(self.logdir):
-        os.makedirs(self.logdir)
-    else:
-      assert os.path.exists(self.logdir) == False
-      os.makedirs(self.logdir)
-
-  def _createXmlElement(self, doc, name, value, description, final):
-    prop = doc.createElement("property")
-    nameP = doc.createElement("name")
-    string = doc.createTextNode(name)
-    nameP.appendChild(string)
-    valueP = doc.createElement("value")
-    string = doc.createTextNode(value)
-    valueP.appendChild(string)
-    desc = doc.createElement("description")
-    string = doc.createTextNode(description)
-    desc.appendChild(string)
-    prop.appendChild(nameP)
-    prop.appendChild(valueP)
-    prop.appendChild(desc)
-    if (final):
-      felement = doc.createElement("final")
-      string = doc.createTextNode("true")
-      felement.appendChild(string)
-      prop.appendChild(felement)
-      pass
-    
-    return prop
-
-  def getMRSystemDirectoryManager(self):
-    return MRSystemDirectoryManager(self.__hadoopThread.getPid(), self.__mrSysDir, \
-                                    self.desc.getfinalAttrs()['fs.default.name'], \
-                                    self.path, self.log)
-
-  def run(self, dir):
-    status = True
-    args = []
-    desc = self.desc
-    
-    self.log.debug(pprint.pformat(desc.dict))
-    
-    
-    self.log.debug("Got package dir of %s" % dir)
-    
-    self.path = os.path.join(dir, self.program)
-    
-    self.log.debug("path: %s" % self.path)
-    args.append(self.path)
-    args.extend(desc.getArgv())
-    envs = desc.getEnvs()
-    fenvs = os.environ
-    
-    for k, v in envs.iteritems():
-      fenvs[k] = v
-    
-    if envs.has_key('HADOOP_OPTS'):
-      fenvs['HADOOP_OPTS'] = envs['HADOOP_OPTS']
-      self.log.debug("HADOOP_OPTS : %s" % fenvs['HADOOP_OPTS'])
-    
-    fenvs['JAVA_HOME'] = self.javahome
-    fenvs['HADOOP_CONF_DIR'] = self.confdir
-    fenvs['HADOOP_LOG_DIR'] = self.logdir
-
-    self.log.info(pprint.pformat(fenvs))
-
-    hadoopCommand = ''
-    for item in args:
-        hadoopCommand = "%s%s " % (hadoopCommand, item)
-
-    # Redirecting output and error to self.out and self.err
-    hadoopCommand = hadoopCommand + ' 1>%s 2>%s ' % (self.out, self.err)
-        
-    self.log.debug('running command: %s' % (hadoopCommand)) 
-    self.log.debug('hadoop env: %s' % fenvs)
-    self.log.debug('Command stdout will be redirected to %s ' % self.out + \
-                   'and command stderr to %s' % self.err)
-
-    self.__hadoopThread = simpleCommand('hadoop', hadoopCommand, env=fenvs)
-    self.__hadoopThread.start()
-    
-    while self.__hadoopThread.stdin == None:
-      time.sleep(.2)
-      self.log.debug("hadoopThread still == None ...")
-    
-    input = desc.getStdin()
-    self.log.debug("hadoop input: %s" % input)
-    if input:
-      if self.__hadoopThread.is_running():
-        print >>self.__hadoopThread.stdin, input
-      else:
-        self.log.error("hadoop command failed to start")
-    
-    self.__hadoopThread.stdin.close()  
-    
-    self.log.debug("isForground: %s" % desc.isForeground())
-    if desc.isForeground():
-      self.log.debug("Waiting on hadoop to finish...")
-      self.__hadoopThread.wait()
-      
-      self.log.debug("Joining hadoop thread...")
-      self.__hadoopThread.join()
-      if self.__hadoopThread.exit_code() != 0:
-        status = False
-    else:
-      status = self.getCommandStatus()
-        
-    self.log.debug("hadoop run status: %s" % status)    
-    
-    if status == False:
-      self.handleFailedCommand()
-   
-    if (status == True) or (not desc.isIgnoreFailures()):
-      return status
-    else:
-      self.log.error("Ignoring Failure")
-      return True
-
-  def kill(self):
-    self.__hadoopThread.kill()
-    if self.__hadoopThread:
-      self.__hadoopThread.join()
-
-  def addCleanup(self, list):
-    list.extend(self.workdirs)
-    list.append(self.confdir)
-
-  def getCommandStatus(self):
-    status = True
-    ec = self.__hadoopThread.exit_code()
-    if (ec != 0) and (ec != None):
-      status = False
-    return status
-
-  def handleFailedCommand(self):
-    self.log.error('hadoop error: %s' % (
-                     self.__hadoopThread.exit_status_string()))
-    # read the contents of redirected stderr to print information back to user
-    if os.path.exists(self.err):
-      f = None
-      try:
-        f = open(self.err)
-        lines = f.readlines()
-        # format
-        for line in lines:
-          self.stdErrContents = "%s%s" % (self.stdErrContents, line)
-      finally:
-        if f is not None:
-          f.close()
-    self.log.error('See %s.out and/or %s.err for details. They are ' % \
-                   (self.name, self.name) + \
-                   'located at subdirectories under either ' + \
-                   'hodring.work-dirs or hodring.log-destination-uri.')
-
-class HodRing(hodBaseService):
-  """The main class for hodring that
-  polls the commands it runs"""
-  def __init__(self, config):
-    hodBaseService.__init__(self, 'hodring', config['hodring'])
-    self.log = self.logs['main']
-    self._http = None
-    self.__pkg = None
-    self.__pkgDir = None 
-    self.__tempDir = None
-    self.__running = {}
-    self.__hadoopLogDirs = []
-    self.__init_temp_dir()
-
-  def __init_temp_dir(self):
-    self.__tempDir = os.path.join(self._cfg['temp-dir'], 
-                                  "%s.%s.hodring" % (self._cfg['userid'], 
-                                                      self._cfg['service-id']))
-    if not os.path.exists(self.__tempDir):
-      os.makedirs(self.__tempDir)
-    os.chdir(self.__tempDir)  
-
-  def __fetch(self, url, spath):
-    retry = 3
-    success = False
-    while (retry != 0 and success != True):
-      try:
-        input = urllib.urlopen(url)
-        bufsz = 81920
-        buf = input.read(bufsz)
-        out = open(spath, 'w')
-        while len(buf) > 0:
-          out.write(buf)
-          buf = input.read(bufsz)
-        input.close()
-        out.close()
-        success = True
-      except:
-        self.log.debug("Failed to copy file")
-        retry = retry - 1
-    if (retry == 0 and success != True):
-      raise IOError, "Failed to copy the files"
-
-      
-  def __get_name(self, addr):
-    parsedUrl = urlparse(addr)
-    path = parsedUrl[2]
-    split = path.split('/', 1)
-    return split[1]
-
-  def __get_dir(self, name):
-    """Return the root directory inside the tarball
-    specified by name. Assumes that the tarball begins
-    with a root directory."""
-    import tarfile
-    myTarFile = tarfile.open(name)
-    hadoopPackage = myTarFile.getnames()[0]
-    self.log.debug("tarball name : %s hadoop package name : %s" %(name,hadoopPackage))
-    return hadoopPackage
-
-  def getRunningValues(self):
-    return self.__running.values()
-
-  def getTempDir(self):
-    return self.__tempDir
-
-  def getHadoopLogDirs(self):
-    return self.__hadoopLogDirs
- 
-  def __download_package(self, ringClient):
-    self.log.debug("Found download address: %s" % 
-                   self._cfg['download-addr'])
-    try:
-      addr = 'none'
-      downloadTime = self._cfg['tarball-retry-initial-time']           # download time depends on tarball size and network bandwidth
-      
-      increment = 0
-      
-      addr = ringClient.getTarList(self.hostname)
-
-      while(addr == 'none'):
-        rand = self._cfg['tarball-retry-initial-time'] + increment + \
-                        random.uniform(0,self._cfg['tarball-retry-interval'])
-        increment = increment + 1
-        self.log.debug("got no tarball. Retrying again in %s seconds." % rand)
-        time.sleep(rand)
-        addr = ringClient.getTarList(self.hostname)
-
-    
-      self.log.debug("got this address %s" % addr)
-      
-      tarName = self.__get_name(addr)
-      self.log.debug("tar package name: %s" % tarName)
-      
-      fetchPath = os.path.join(os.getcwd(), tarName) 
-      self.log.debug("fetch path: %s" % fetchPath)
-      
-      self.__fetch(addr, fetchPath)
-      self.log.debug("done fetching")
-    
-      tarUrl = "http://%s:%d/%s" % (self._http.server_address[0], 
-                                    self._http.server_address[1], 
-                                    tarName)
-      try: 
-        ringClient.registerTarSource(self.hostname, tarUrl,addr)
-        #ringClient.tarDone(addr)
-      except KeyError, e:
-        self.log.error("registerTarSource and tarDone failed: ", e)
-        raise KeyError(e)
-      
-      check = untar(fetchPath, os.getcwd())
-      
-      if (check == False):
-        raise IOError, "Untarring failed."
-      
-      self.__pkg = self.__get_dir(tarName)
-      self.__pkgDir = os.path.join(os.getcwd(), self.__pkg)      
-    except Exception, e:
-      self.log.error("Failed download tar package: %s" % 
-                     get_exception_error_string())
-      raise Exception(e)
-      
-  def __run_hadoop_commands(self, restart=True):
-    id = 0
-    for desc in self._cfg['commanddesc']:
-      self.log.debug(pprint.pformat(desc.dict))
-      mrSysDir = getMapredSystemDirectory(self._cfg['mapred-system-dir-root'],
-                          self._cfg['userid'], self._cfg['service-id'])
-      self.log.debug('mrsysdir is %s' % mrSysDir)
-      cmd = HadoopCommand(id, desc, self.__tempDir, self.__pkgDir, self.log, 
-                          self._cfg['java-home'], mrSysDir, restart)
-    
-      self.__hadoopLogDirs.append(cmd.logdir)
-      self.log.debug("hadoop log directory: %s" % self.__hadoopLogDirs)
-      
-      try:
-        # if the tarball isn't there, we use the pkgs dir given.
-        if self.__pkgDir == None:
-          pkgdir = desc.getPkgDirs()
-        else:
-          pkgdir = self.__pkgDir
-
-        self.log.debug('This is the packcage dir %s ' % (pkgdir))
-        if not cmd.run(pkgdir):
-          addnInfo = ""
-          if cmd.stdErrContents is not "":
-            addnInfo = " Information from stderr of the command:\n%s" % (cmd.stdErrContents)
-          raise Exception("Could not launch the %s using %s/bin/hadoop.%s" % (desc.getName(), pkgdir, addnInfo))
-      except Exception, e:
-        self.log.debug("Exception running hadoop command: %s\n%s" % (get_exception_error_string(), get_exception_string()))
-        self.__running[id] = cmd
-        raise Exception(e)
-
-      id += 1
-      if desc.isForeground():
-        continue
-      self.__running[id-1] = cmd
-
-      # ok.. now command is running. If this HodRing got jobtracker, 
-      # Check if it is ready for accepting jobs, and then only return
-      self.__check_jobtracker(desc, id-1, pkgdir)
-      
-  def __check_jobtracker(self, desc, id, pkgdir):
-    # Check jobtracker status. Return properly if it is ready to accept jobs.
-    # Currently Checks for Jetty to come up, the last thing that can be checked
-    # before JT completes initialisation. To be perfectly reliable, we need 
-    # hadoop support
-    name = desc.getName()
-    if name == 'jobtracker':
-      # Yes I am the Jobtracker
-      self.log.debug("Waiting for jobtracker to initialise")
-      version = desc.getVersion()
-      self.log.debug("jobtracker version : %s" % version)
-      hadoopCmd = self.getRunningValues()[id]
-      attrs = hadoopCmd.getFilledInKeyValues()
-      attrs = parseEquals(attrs)
-      jobTrackerAddr = attrs['mapred.job.tracker']
-      self.log.debug("jobtracker rpc server : %s" % jobTrackerAddr)
-      if version < 16:
-        jettyAddr = jobTrackerAddr.split(':')[0] + ':' + \
-                              attrs['mapred.job.tracker.info.port']
-      else:
-        jettyAddr = attrs['mapred.job.tracker.http.address']
-      self.log.debug("Jobtracker jetty : %s" % jettyAddr)
-
-      # Check for Jetty to come up
-      # For this do a http head, and then look at the status
-      defaultTimeout = socket.getdefaulttimeout()
-      # socket timeout isn`t exposed at httplib level. Setting explicitly.
-      socket.setdefaulttimeout(1)
-      sleepTime = 0.5
-      jettyStatus = False
-      jettyStatusmsg = ""
-      while sleepTime <= 32:
-        # There is a possibility that the command might fail after a while.
-        # This code will check if the command failed so that a better
-        # error message can be returned to the user.
-        if not hadoopCmd.getCommandStatus():
-          self.log.critical('Hadoop command found to have failed when ' \
-                            'checking for jobtracker status')
-          hadoopCmd.handleFailedCommand()
-          addnInfo = ""
-          if hadoopCmd.stdErrContents is not "":
-            addnInfo = " Information from stderr of the command:\n%s" \
-                                        % (hadoopCmd.stdErrContents)
-          raise Exception("Could not launch the %s using %s/bin/hadoop.%s" \
-                                        % (desc.getName(), pkgdir, addnInfo))
-          
-        try:
-          jettyConn = httplib.HTTPConnection(jettyAddr)
-          jettyConn.request("HEAD", "/jobtracker.jsp")
-          # httplib inherently retries the following till socket timeout
-          resp = jettyConn.getresponse()
-          if resp.status != 200:
-            # Some problem?
-            jettyStatus = False
-            jettyStatusmsg = "Jetty gave a non-200 response to a HTTP-HEAD" +\
-                             " request. HTTP Status (Code, Msg): (%s, %s)" % \
-                             ( resp.status, resp.reason )
-            break
-          else:
-            self.log.info("Jetty returned a 200 status (%s)" % resp.reason)
-            self.log.info("JobTracker successfully initialised")
-            return
-        except socket.error:
-          self.log.debug("Jetty gave a socket error. Sleeping for %s" \
-                                                                  % sleepTime)
-          time.sleep(sleepTime)
-          sleepTime = sleepTime * 2
-        except Exception, e:
-          jettyStatus = False
-          jettyStatusmsg = ("Process(possibly other than jetty) running on" + \
-                  " port assigned to jetty is returning invalid http response")
-          break
-      socket.setdefaulttimeout(defaultTimeout)
-      if not jettyStatus:
-        self.log.critical("Jobtracker failed to initialise.")
-        if jettyStatusmsg:
-          self.log.critical( "Reason: %s" % jettyStatusmsg )
-        else: self.log.critical( "Reason: Jetty failed to give response")
-        raise Exception("JobTracker failed to initialise")
-
-  def stop(self):
-    self.log.debug("Entered hodring stop.")
-    if self._http: 
-      self.log.debug("stopping http server...")
-      self._http.stop()
-    
-    self.log.debug("call hodsvcrgy stop...")
-    hodBaseService.stop(self)
-    
-  def _xr_method_clusterStart(self, initialize=True):
-    return self.clusterStart(initialize)
-
-  def _xr_method_clusterStop(self):
-    return self.clusterStop()
- 
-  def start(self):
-    """Run and maintain hodring commands"""
-    
-    try:
-      if self._cfg.has_key('download-addr'):
-        self._http = threadedHTTPServer('', self._cfg['http-port-range'])
-        self.log.info("Starting http server...")
-        self._http.serve_forever()
-        self.log.debug("http://%s:%d" % (self._http.server_address[0],
-                     self._http.server_address[1]))
-      
-      hodBaseService.start(self)
-      
-      ringXRAddress = None
-      if self._cfg.has_key('ringmaster-xrs-addr'):
-        ringXRAddress = "http://%s:%s/" % (self._cfg['ringmaster-xrs-addr'][0],
-                          self._cfg['ringmaster-xrs-addr'][1])
-        self.log.debug("Ringmaster at %s" % ringXRAddress)
-
-      self.log.debug("Creating service registry XML-RPC client.")
-      serviceClient = hodXRClient(to_http_url(
-                                  self._cfg['svcrgy-addr']))
-      if ringXRAddress == None:
-        self.log.info("Did not get ringmaster XML-RPC address. Fetching information from service registry.")
-        ringList = serviceClient.getServiceInfo(self._cfg['userid'], 
-            self._cfg['service-id'], 'ringmaster', 'hod')
-      
-        self.log.debug(pprint.pformat(ringList))
-      
-        if len(ringList):
-          if isinstance(ringList, list):
-            ringXRAddress = ringList[0]['xrs']
-      
-        count = 0
-        while (ringXRAddress == None and count < 3000):
-          ringList = serviceClient.getServiceInfo(self._cfg['userid'], 
-            self._cfg['service-id'], 'ringmaster', 'hod')
-        
-          if len(ringList):
-            if isinstance(ringList, list):
-              ringXRAddress = ringList[0]['xrs']
-        
-          count = count + 1
-          time.sleep(.2)
-      
-      if ringXRAddress == None:
-        raise Exception("Could not get ringmaster XML-RPC server address.")
-        
-      self.log.debug("Creating ringmaster XML-RPC client.")
-      ringClient = hodXRClient(ringXRAddress)    
-      
-      id = self.hostname + "_" + str(os.getpid())
-      
-      if 'download-addr' in self._cfg:
-        self.__download_package(ringClient)
-      else:
-        self.log.debug("Did not find a download address.")
-          
-      cmdlist = []
-      firstTime = True
-      increment = 0
-      hadoopStartupTime = 2
-       
-      cmdlist = ringClient.getCommand(id)
-
-      while (cmdlist == []):
-        if firstTime:
-          sleepTime = increment + self._cfg['cmd-retry-initial-time'] + hadoopStartupTime\
-                        + random.uniform(0,self._cfg['cmd-retry-interval'])
-          firstTime = False
-        else:
-          sleepTime = increment + self._cfg['cmd-retry-initial-time'] + \
-                        + random.uniform(0,self._cfg['cmd-retry-interval'])
-        self.log.debug("Did not get command list. Waiting for %s seconds." % (sleepTime))
-        time.sleep(sleepTime)
-        increment = increment + 1
-        cmdlist = ringClient.getCommand(id)
-
-      self.log.debug(pformat(cmdlist)) 
-      cmdDescs = []
-      for cmds in cmdlist:
-        cmdDescs.append(CommandDesc(cmds['dict'], self.log))
-  
-      self._cfg['commanddesc'] = cmdDescs
-      
-      self.log.info("Running hadoop commands...")
-
-      self.__run_hadoop_commands(False)
-        
-      masterParams = []
-      for k, cmd in self.__running.iteritems():
-        masterParams.extend(cmd.filledInKeyVals)
-  
-      self.log.debug("printing getparams")
-      self.log.debug(pformat(id))
-      self.log.debug(pformat(masterParams))
-      # when this is on a required host, the ringMaster already has our masterParams
-      if(len(masterParams) > 0):
-        ringClient.addMasterParams(id, masterParams)
-    except Exception, e:
-      raise Exception(e)
-
-  def clusterStart(self, initialize=True):
-    """Start a stopped mapreduce/dfs cluster"""
-    if initialize:
-      self.log.debug('clusterStart Method Invoked - Initialize')
-    else:
-      self.log.debug('clusterStart Method Invoked - No Initialize')
-    try:
-      self.log.debug("Creating service registry XML-RPC client.")
-      serviceClient = hodXRClient(to_http_url(self._cfg['svcrgy-addr']),
-                                  None, None, 0, 0, 0)
-
-      self.log.info("Fetching ringmaster information from service registry.")
-      count = 0
-      ringXRAddress = None
-      while (ringXRAddress == None and count < 3000):
-        ringList = serviceClient.getServiceInfo(self._cfg['userid'],
-          self._cfg['service-id'], 'ringmaster', 'hod')
-        if len(ringList):
-          if isinstance(ringList, list):
-            ringXRAddress = ringList[0]['xrs']
-        count = count + 1
-
-      if ringXRAddress == None:
-        raise Exception("Could not get ringmaster XML-RPC server address.")
-
-      self.log.debug("Creating ringmaster XML-RPC client.")
-      ringClient = hodXRClient(ringXRAddress, None, None, 0, 0, 0)
-
-      id = self.hostname + "_" + str(os.getpid())
-
-      cmdlist = []
-      if initialize:
-        if 'download-addr' in self._cfg:
-          self.__download_package(ringClient)
-        else:
-          self.log.debug("Did not find a download address.")
-        while (cmdlist == []):
-          cmdlist = ringClient.getCommand(id)
-      else:
-        while (cmdlist == []):
-          cmdlist = ringClient.getAdminCommand(id)
-
-      self.log.debug(pformat(cmdlist))
-      cmdDescs = []
-      for cmds in cmdlist:
-        cmdDescs.append(CommandDesc(cmds['dict'], self.log))
-
-      self._cfg['commanddesc'] = cmdDescs
-
-      if initialize:
-        self.log.info("Running hadoop commands again... - Initialize")
-        self.__run_hadoop_commands()
-        masterParams = []
-        for k, cmd in self.__running.iteritems():
-          self.log.debug(cmd)
-          masterParams.extend(cmd.filledInKeyVals)
-
-        self.log.debug("printing getparams")
-        self.log.debug(pformat(id))
-        self.log.debug(pformat(masterParams))
-        # when this is on a required host, the ringMaster already has our masterParams
-        if(len(masterParams) > 0):
-          ringClient.addMasterParams(id, masterParams)
-      else:
-        self.log.info("Running hadoop commands again... - No Initialize")
-        self.__run_hadoop_commands()
-
-    except:
-      self.log.error(get_exception_string())
-
-    return True
-
-  def clusterStop(self):
-    """Stop a running mapreduce/dfs cluster without stopping the hodring"""
-    self.log.debug('clusterStop Method Invoked')
-    try:
-      for cmd in self.__running.values():
-        cmd.kill()
-      self.__running = {}
-    except:
-      self.log.error(get_exception_string())
-
-    return True
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/__init__.py
deleted file mode 100644
index 12c2f1e1da..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/torque.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/torque.py
deleted file mode 100644
index 49b03dcfbe..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/NodePools/torque.py
+++ /dev/null
@@ -1,334 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""Maui/Torque implementation of NodePool"""
-# -*- python -*-
-
-import os, sys, csv, socket, time, re, pprint
-
-from hodlib.Hod.nodePool import *
-from hodlib.Schedulers.torque import torqueInterface
-from hodlib.Common.threads import simpleCommand
-from hodlib.Common.util import get_exception_string, args_to_string, local_fqdn, \
-                        TORQUE_USER_LIMITS_COMMENT_FIELD
-
-class TorqueNodeSet(NodeSet):
-  def __init__(self, id, numNodes, preferredList, isPreemptee):
-    NodeSet.__init__(self, id, numNodes, preferredList, isPreemptee)
-    self.qsubId = None
-    self.addrList = []
-
-  def _setQsubId(self, qsubId):
-    self.qsubId = qsubId
-
-  def _setAddrList(self, addrList):
-    self.addrList = addrList
-
-  def getAddrList(self):
-    return self.addrList
-
-class TorquePool(NodePool):
-  def __init__(self, nodePoolDesc, cfg, log):
-    NodePool.__init__(self, nodePoolDesc, cfg, log)
-
-    environ = os.environ.copy()
-    
-    if self._cfg['resource_manager'].has_key('pbs-server'):
-      environ['PBS_DEFAULT'] = self._cfg['resource_manager']['pbs-server']
-
-    self.__torque = torqueInterface(
-      self._cfg['resource_manager']['batch-home'], environ, self._log)
-
-  def getAccountString(self):
-    account = ''
-    if self._cfg['resource_manager'].has_key('pbs-account'):
-      account = self._cfg['resource_manager']['pbs-account']
-    return account
-
-  def __gen_submit_params(self, nodeSet, walltime = None, qosLevel = None, 
-                          account = None):
-    argList = []
-    stdinList = []
-    
-    npd = self.nodePoolDesc
-    
-    def gen_stdin_list():
-      # Here we are basically generating the standard input for qsub.
-      #  Specifically a script to exec ringmaster.
-      stdinList.append('#!/bin/sh')
-      
-      ringBin = os.path.join(self._cfg['hod']['base-dir'], 'bin', 
-                             'ringmaster')
-      ringArgs = [ringBin,]
-      ringArgs.extend(self._cfg.get_args(exclude=('hod')))
-      
-      ringMasterCommand = args_to_string(ringArgs)
-      
-      self._log.debug("ringmaster cmd: %s" % ringMasterCommand)
-      
-      stdinList.append(ringMasterCommand)
-      
-    def gen_arg_list():      
-      def process_qsub_attributes():
-        rawAttributes = self.nodePoolDesc.getAttrs()
-    
-        # 'W:x' is used to specify torque management extentensions ie -W x= ...
-        resourceManagementExtensions = ''
-        if 'W:x' in rawAttributes:
-          resourceManagementExtensions = rawAttributes['W:x']
-    
-        if qosLevel:
-          if len(resourceManagementExtensions) > 0:
-            resourceManagementExtensions += ';'
-          resourceManagementExtensions += 'QOS:%s' % (qosLevel)
-    
-        rawAttributes['W:x'] = resourceManagementExtensions
-        
-        hostname = local_fqdn()
-   
-        # key values are expected to have string values. 
-        rawAttributes['l:nodes'] = "%s" % nodeSet._getNumNodes()
-        
-        if walltime:
-          rawAttributes['l:walltime'] = "%s" % walltime
-        
-        #create a dict of dictionaries for 
-        # various arguments of torque
-        cmds = {}
-        for key in rawAttributes:
-          value = rawAttributes[key]
-    
-          if key.find(':') == -1:
-            raise ValueError, 'Syntax error: missing colon after %s in %s=%s' % (
-              key, key, value)
-    
-          [option, subOption] = key.split(':', 1)
-          if not option in cmds:
-            cmds[option] = {}
-          cmds[option][subOption] = value
-        
-        opts = []
-        #create a string from this
-        #dictionary of dictionaries createde above
-        for k in cmds:
-          csv = []
-          nv = cmds[k]
-          for n in nv:
-            v = nv[n]
-            if len(n) == 0:
-              csv.append(v)
-            else:
-              csv.append('%s=%s' % (n, v))
-          opts.append('-%s' % (k))
-          opts.append(','.join(csv))
-    
-        for option in cmds:
-          commandList = []
-          for subOption in cmds[option]:
-            value = cmds[option][subOption]
-            if len(subOption) == 0:
-                commandList.append(value)
-            else:
-                commandList.append("%s=%s" % (subOption, value))
-          opts.append('-%s' % option)
-          opts.append(','.join(commandList))
-          
-        return opts
-      
-      pkgdir = npd.getPkgDir()
-  
-      qsub = os.path.join(pkgdir, 'bin', 'qsub')
-      sdd = self._cfg['servicedesc']
-      
-      gsvc = None
-      for key in sdd:
-        gsvc = sdd[key]
-        break
-      
-      argList.extend(process_qsub_attributes())
-
-      argList.extend(('-N', '"' + self._cfg['hod']['title'] + '"'))
-      argList.extend(('-r','n'))
-
-      if 'pbs-user' in self._cfg['resource_manager']:
-        argList.extend(('-u', self._cfg['resource_manager']['pbs-user']))
-  
-      argList.extend(('-d','/tmp/'))
-      if 'queue' in self._cfg['resource_manager']:
-        queue = self._cfg['resource_manager']['queue']
-        argList.extend(('-q',queue))
-  
-      # In HOD 0.4, we pass in an account string only if it is mentioned.
-      # Also, we don't append userid to the account string, as HOD jobs run as the 
-      # user running them, not as 'HOD' user.
-      if self._cfg['resource_manager'].has_key('pbs-account'):
-        argList.extend(('-A', (self._cfg['resource_manager']['pbs-account'])))
-    
-      if 'env-vars' in self._cfg['resource_manager']:
-        qsub_envs = self._cfg['resource_manager']['env-vars']
-        argList.extend(('-v', self.__keyValToString(qsub_envs)))
-
-    gen_arg_list()
-    gen_stdin_list()
-    
-    return argList, stdinList
-    
-  def __keyValToString(self, keyValList):
-    ret = ""
-    for key in keyValList:
-      ret = "%s%s=%s," % (ret, key, keyValList[key])
-    return ret[:-1]
-  
-  def newNodeSet(self, numNodes, preferred=[], isPreemptee=True, id=None):
-    if not id:
-      id = self.getNextNodeSetId()
-    
-    nodeSet = TorqueNodeSet(id, numNodes, preferred, isPreemptee)
-
-    self.nodeSetDict[nodeSet.getId()] = nodeSet
-    
-    return nodeSet
-      
-  def submitNodeSet(self, nodeSet, walltime = None, qosLevel = None, 
-                    account = None):
-
-    argList, stdinList = self.__gen_submit_params(nodeSet, walltime, qosLevel, 
-                                                  account)
-    
-    jobId, exitCode = self.__torque.qsub(argList, stdinList)
-    
-    ## UNUSED CODE: LINE ##
-    nodeSet.qsubId = jobId
-
-    return jobId, exitCode
-
-  def freeNodeSet(self, nodeSet):
-    
-    exitCode = self.deleteJob(nodeSet.getId())
-    
-    del self.nodeSetDict[nodeSet.getId()]
-  
-    return exitCode
-  
-  def finalize(self):
-    status = 0
-    exitCode = 0
-    for nodeSet in self.nodeSetDict.values():
-      exitCode = self.freeNodeSet(nodeSet)
-      
-    if exitCode > 0 and exitCode != 153:
-      status = 4
-      
-    return status
-    
-  ## UNUSED METHOD ?? ##
-  def getWorkers(self):
-    hosts = []
-    
-    qstatInfo = self.__torque(self.getServiceId())
-    if qstatInfo:
-      hosts = qstatInfop['exec_host']
-    
-    return hosts
- 
-  ## UNUSED METHOD ?? ##
-  def pollNodeSet(self, nodeSet):
-    status = NodeSet.COMPLETE  
-    nodeSet = self.nodeSetDict[0] 
-
-    qstatInfo = self.__torque(self.getServiceId())
-
-    if qstatMap:    
-      jobstate = qstatMap['job_state']
-      exechost = qstatMap['exec_host']
-
-    if jobstate == 'Q':
-      status = NodeSet.PENDING
-    elif exechost == None:
-      status = NodeSet.COMMITTED
-    else:
-      nodeSet._setAddrList(exec_host)
-
-    return status
-        
-  def getServiceId(self):
-    id = None
-    
-    nodeSets = self.nodeSetDict.values()
-    if len(nodeSets):
-      id = nodeSets[0].qsubId
-      
-    if id == None:
-      id = os.getenv('PBS_JOBID')
-      
-    return id
-
-  def getJobInfo(self, jobId=None):
-
-    jobNonExistentErrorCode = 153
-    self.__jobInfo = { 'job_state' : False }
-    
-    if jobId == None:
-      jobId = self.getServiceId()
-
-    qstatInfo, exitCode = self.__torque.qstat(jobId)
-    if exitCode == 0:
-      self.__jobInfo = qstatInfo
-    elif exitCode == jobNonExistentErrorCode:
-      # This really means that the job completed
-      # However, setting only job_state for now, not 
-      # any other attributes, as none seem required.
-      self.__jobInfo = { 'job_state' : 'C' }
-
-    return self.__jobInfo
-
-  def deleteJob(self, jobId):
-    exitCode = self.__torque.qdel(jobId)
-    return exitCode
-
-  def isJobFeasible(self):
-    comment = None
-    msg = None
-    if self.__jobInfo.has_key('comment'):
-      comment = self.__jobInfo['comment']
-    try:
-      if comment:
-        commentField = re.compile(self._cfg['hod']['job-feasibility-attr'])
-        match = commentField.search(comment)
-        if match:
-          reqUsage = int(match.group(1))
-          currentUsage = int(match.group(2))
-          maxUsage = int(match.group(3))
-          msg = "Current Usage:%s, Requested:%s, Maximum Limit:%s " % \
-                                  (currentUsage, reqUsage, maxUsage)
-          if reqUsage > maxUsage:
-            return "Never", msg
-          if reqUsage + currentUsage > maxUsage:
-            return False, msg
-    except Exception, e:
-      self._log.error("Error in isJobFeasible : %s" %e)
-      raise Exception(e)
-    return True, msg
-    
-  def runWorkers(self, args):
-    return self.__torque.pbsdsh(args)
-
-  def updateWorkerInfo(self, workerInfoMap, jobId):
-    workerInfoStr = ''
-    for key in workerInfoMap.keys():
-      workerInfoStr = '%s,%s:%s' % (workerInfoStr, key, workerInfoMap[key])
-    exitCode = self.__torque.qalter("notes", workerInfoStr[1:], jobId)
-    return exitCode
-
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/__init__.py
deleted file mode 100644
index 12c2f1e1da..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/idleJobTracker.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/idleJobTracker.py
deleted file mode 100644
index 33f145baab..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/idleJobTracker.py
+++ /dev/null
@@ -1,218 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import os, re, time
-from hodlib.Common.threads import loop, func
-from hodlib.Common.threads import simpleCommand
-from hodlib.Common.util import get_exception_string, hadoopVersion
-
-class HadoopJobStatus:
-  """This class represents the status of a single Hadoop job"""
-  
-  def __init__(self, jobId, status):
-    self.__jobId = jobId
-    self.__status = status
-
-  def getJobId(self):
-    return self.__jobId
-
-  def getStatus(self):
-    return self.__status
-
-class HadoopClientException(Exception):
-  """This class represents an exception that is raised when we fail in
-     running the job client."""
-  
-  def __init__(self, errorCode):
-    self.errorCode = errorCode
-  
-class JobTrackerMonitor:
-  """This class monitors the JobTracker of an allocated cluster
-     periodically to detect whether it is idle. If it is found
-     to be idle for more than a configured limit, it calls back
-     registered handlers who can act upon the idle cluster."""
-
-  def __init__(self, log, idleJTHandler, interval, limit,
-                      hadoopDir, javaHome, servInfoProvider):
-    self.__log = log
-    self.__idlenessLimit = limit
-    self.__idleJobTrackerHandler = idleJTHandler
-    self.__hadoopDir = hadoopDir
-    hadoopPath = os.path.join(self.__hadoopDir, "bin", "hadoop")
-    #hadoop directory can be from pkgs or a temp location like tarball. Verify once.
-    if not os.path.exists(hadoopPath):
-      raise Exception('Invalid Hadoop path specified: %s' % hadoopPath)
-    self.__javaHome = javaHome
-    # Note that when this object is created, we don't yet know the JT URL.
-    # The service info provider will be polled until we get the URL.
-    self.__serviceInfoProvider = servInfoProvider
-    self.__jobCountRegExp = re.compile("([0-9]+) jobs currently running.*")
-    self.__jobStatusRegExp = re.compile("(\S+)\s+(\d)\s+\d+\s+\S+$")
-    self.__firstIdleTime = 0
-    self.__hadoop15Version = { 'major' : '0', 'minor' : '15' }
-    #Assumption: we are not going to support versions older than 0.15 for Idle Job tracker.
-    if not self.__isCompatibleHadoopVersion(self.__hadoop15Version):
-      raise Exception('Incompatible Hadoop Version: Cannot check status')
-    self.__stopFlag = False
-    self.__jtURLFinderThread = func(name='JTURLFinderThread', functionRef=self.getJobTrackerURL)
-    self.__jtMonitorThread = loop(name='JTMonitorThread', functionRef=self.monitorJobTracker,
-                                  sleep=interval)
-    self.__jobTrackerURL = None
-
-  def start(self):
-    """This method starts a thread that will determine the JobTracker URL"""
-    self.__jtURLFinderThread.start()
-
-  def stop(self):
-    self.__log.debug('Joining the monitoring thread.')
-    self.__stopFlag = True
-    if self.__jtMonitorThread.isAlive():
-      self.__jtMonitorThread.join()
-    self.__log.debug('Joined the monitoring thread.')
-
-  def getJobTrackerURL(self):
-    """This method periodically checks the service info provider for the JT URL"""
-    self.__jobTrackerURL = self.__serviceInfoProvider.getServiceAddr('mapred')
-    while not self.__stopFlag and not self.__isValidJobTrackerURL():
-      time.sleep(10)
-      if not self.__stopFlag:
-        self.__jobTrackerURL = self.__serviceInfoProvider.getServiceAddr('mapred')
-      else:
-        break
-
-    if self.__isValidJobTrackerURL():
-      self.__log.debug('Got URL %s. Starting monitoring' % self.__jobTrackerURL)
-      self.__jtMonitorThread.start()
-
-  def monitorJobTracker(self):
-    """This method is periodically called to monitor the JobTracker of the cluster."""
-    try:
-      if self.__isIdle():
-        if self.__idleJobTrackerHandler:
-          self.__log.info('Detected cluster as idle. Calling registered callback handler.')
-          self.__idleJobTrackerHandler.handleIdleJobTracker()
-    except:
-      self.__log.debug('Exception while monitoring job tracker. %s' % get_exception_string())
-
-  def getJobsStatus(self):
-    """This method should return the status of all jobs that are run on the HOD allocated
-       hadoop cluster"""
-    jobStatusList = []
-    try:
-      hadoop16Version = { 'major' : '0', 'minor' : '16' }
-      if self.__isCompatibleHadoopVersion(hadoop16Version):
-        jtStatusCommand = self.__initStatusCommand(option='-list all')
-        jtStatusCommand.start()
-        jtStatusCommand.wait()
-        jtStatusCommand.join()
-        if jtStatusCommand.exit_code() == 0:
-          for line in jtStatusCommand.output():
-            jobStatus = self.__extractJobStatus(line)
-            if jobStatus is not None:
-              jobStatusList.append(jobStatus)
-    except:
-      self.__log.debug('Exception while getting job statuses. %s' % get_exception_string())
-    return jobStatusList
-
-  def __isValidJobTrackerURL(self):
-    """This method checks that the passed in URL is not one of the special case strings
-       returned by the getServiceAddr API"""
-    return ((self.__jobTrackerURL != None) and (self.__jobTrackerURL != 'not found') \
-              and (not self.__jobTrackerURL.startswith('Error')))
-
-  def __extractJobStatus(self, line):
-    """This method parses an output line from the job status command and creates
-       the JobStatus object if there is a match"""
-    jobStatus = None
-    line = line.strip()
-    jsMatch = self.__jobStatusRegExp.match(line)
-    if jsMatch:
-      jobStatus = HadoopJobStatus(jsMatch.group(1), int(jsMatch.group(2)))
-    return jobStatus
-
-  def __isIdle(self):
-    """This method checks if the JobTracker is idle beyond a certain limit."""
-    jobCount = 0
-    err = False
-
-    try:
-      jobCount = self.__getJobCount()
-    except HadoopClientException, hce:
-      self.__log.debug('HadoopClientException handled in getting job count. \
-                                      Error code: %s' % hce.errorCode)
-      err = True
-
-    if (jobCount==0) or err:
-      if self.__firstIdleTime == 0:
-        #detecting idleness for the first time
-        self.__firstIdleTime = time.time()
-      else:
-        if ((time.time()-self.__firstIdleTime) >= self.__idlenessLimit):
-          self.__log.info('Idleness limit crossed for cluster')
-          return True
-    else:
-      # reset idleness time
-      self.__firstIdleTime = 0
-      
-    return False
-
-  def __getJobCount(self):
-    """This method executes the hadoop job -list command and parses the output to detect
-       the number of running jobs."""
-
-    # We assume here that the poll interval is small enough to detect running jobs. 
-    # If jobs start and stop within the poll interval, the cluster would be incorrectly 
-    # treated as idle. Hadoop 2266 will provide a better mechanism than this.
-    jobs = -1
-    jtStatusCommand = self.__initStatusCommand()
-    jtStatusCommand.start()
-    jtStatusCommand.wait()
-    jtStatusCommand.join()
-    if jtStatusCommand.exit_code() == 0:
-      for line in jtStatusCommand.output():
-        match = self.__jobCountRegExp.match(line)
-        if match:
-          jobs = int(match.group(1))
-    elif jtStatusCommand.exit_code() == 1:
-      # for now, exit code 1 comes for any exception raised by JobClient. If hadoop gets
-      # to differentiate and give more granular exit codes, we can check for those errors
-      # corresponding to network errors etc.
-      raise HadoopClientException(jtStatusCommand.exit_code())
-    return jobs
-
-  def __isCompatibleHadoopVersion(self, expectedVersion):
-    """This method determines whether the version of hadoop being used is one that 
-       is higher than the expectedVersion.
-       This can be used for checking if a particular feature is available or not"""
-    ver = hadoopVersion(self.__hadoopDir, self.__javaHome, self.__log)
-    ret = False
-  
-    if (ver['major']!=None) and (int(ver['major']) >= int(expectedVersion['major'])) \
-      and (ver['minor']!=None) and (int(ver['minor']) >= int(expectedVersion['minor'])):
-      ret = True
-    return ret
-
-  def __initStatusCommand(self, option="-list"):
-    """This method initializes the command to run to check the JT status"""
-    cmd = None
-    hadoopPath = os.path.join(self.__hadoopDir, 'bin', 'hadoop')
-    cmdStr = "%s job -jt %s" % (hadoopPath, self.__jobTrackerURL)
-    cmdStr = "%s %s" % (cmdStr, option)
-    self.__log.debug('cmd str %s' % cmdStr)
-    env = os.environ
-    env['JAVA_HOME'] = self.__javaHome
-    cmd = simpleCommand('HadoopStatus', cmdStr, env)
-    return cmd
-   
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/ringMaster.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/ringMaster.py
deleted file mode 100644
index a289d95d75..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/RingMaster/ringMaster.py
+++ /dev/null
@@ -1,1019 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-#!/usr/bin/env python
-"""manages services and nodepool"""
-# -*- python -*-
-
-import os, sys, random, time, sets, shutil, threading
-import urllib, urlparse, re, getpass, pprint, signal, shutil
-
-from pprint import pformat
-from HTMLParser import HTMLParser
-
-binfile = sys.path[0]
-libdir = os.path.dirname(binfile)
-sys.path.append(libdir)
-
-import hodlib.Common.logger
-from hodlib.RingMaster.idleJobTracker import JobTrackerMonitor, HadoopJobStatus
-
-from hodlib.Common.threads import func 
-
-from hodlib.Hod.nodePool import *
-from hodlib.Common.util import *
-from hodlib.Common.nodepoolutil import NodePoolUtil
-from hodlib.Common.socketServers import hodXMLRPCServer
-from hodlib.Common.socketServers import threadedHTTPServer
-from hodlib.NodePools import *
-from hodlib.NodePools.torque import *
-from hodlib.GridServices import *
-from hodlib.Common.descGenerator import *
-from hodlib.Common.xmlrpc import hodXRClient
-from hodlib.Common.miniHTMLParser import miniHTMLParser
-from hodlib.Common.threads import simpleCommand
-
-class ringMasterServer:
-  """The RPC server that exposes all the master config
-  changes. Also, one of these RPC servers runs as a proxy
-  and all the hodring instances register with this proxy"""
-  instance = None
-  xmlrpc = None
-  
-  def __init__(self, cfg, log, logMasterSources, retry=5):
-    try:
-      from hodlib.Common.socketServers import twistedXMLRPCServer
-      ringMasterServer.xmlrpc = twistedXMLRPCServer("", 
-        cfg['ringmaster']['xrs-port-range'])
-    except ImportError:
-      log.info("Twisted interface not found. Using hodXMLRPCServer.")
-      ringMasterServer.xmlrpc = hodXMLRPCServer("", 
-        cfg['ringmaster']['xrs-port-range'])
-
-    ringMasterServer.xmlrpc.register_instance(logMasterSources)
-    self.logMasterSources = logMasterSources
-    ringMasterServer.xmlrpc.serve_forever()
-        
-    while not ringMasterServer.xmlrpc.is_alive():
-      time.sleep(.5)
-          
-    log.debug('Ringmaster RPC Server at %d' % 
-                 ringMasterServer.xmlrpc.server_address[1])
-    
-  def startService(ss, cfg, np, log, rm):
-    logMasterSources = _LogMasterSources(ss, cfg, np, log, rm)
-    ringMasterServer.instance = ringMasterServer(cfg, log, logMasterSources)
-
-  def stopService():
-    ringMasterServer.xmlrpc.stop()
-  
-  def getPort():
-    return ringMasterServer.instance.port
-
-  def getAddress():
-    return 'http://%s:%d/' % (socket.gethostname(), 
-                              ringMasterServer.xmlrpc.server_address[1])
-  
-  startService = staticmethod(startService)
-  stopService = staticmethod(stopService)
-  getPort = staticmethod(getPort)
-  getAddress = staticmethod(getAddress)
-  
-class _LogMasterSources:
-  """All the methods that are run by the RPC server are
-  added into this class """
-  
-  def __init__(self, serviceDict, cfg, np, log, rm):
-    self.serviceDict = serviceDict
-    self.tarSource = []
-    self.tarSourceLock = threading.Lock()
-    self.dict = {}
-    self.count = {}
-    self.logsourceList = []
-    self.logsourceListLock = threading.Lock()
-    self.masterParam = []
-    self.masterParamLock = threading.Lock()
-    self.verify = 'none'
-    self.cmdLock = threading.Lock()
-    self.cfg = cfg
-    self.log = log
-    self.np = np
-    self.rm = rm 
-    self.hdfsHost = None
-    self.mapredHost = None
-    self.maxconnect = self.cfg['ringmaster']['max-connect']
-    self.log.debug("Using max-connect value %s"%self.maxconnect)
-
-   
-  def registerTarSource(self, hostname, url, addr=None):
-    self.log.debug("registering: " + url)
-    lock = self.tarSourceLock
-    lock.acquire()
-    self.dict[url] = url
-    self.count[url] = 0
-    # addr is None when ringMaster himself invokes this method
-    if addr:
-      c = self.count[addr]
-      self.count[addr] = c - 1
-    lock.release()
-    if addr:
-      str = "%s is done" % (addr)
-      self.log.debug(str)
-    return url
-
-  def getTarList(self,hodring):   # this looks useful
-    lock = self.tarSourceLock
-    lock.acquire()
-    leastkey = None
-    leastval = -1
-    for k, v in self.count.iteritems():
-      if (leastval  == -1):
-        leastval = v
-        pass
-      if (v <= leastval and v < self.maxconnect):
-        leastkey = k
-        leastval = v
-    if (leastkey == None):
-      url  = 'none'
-    else:
-      url = self.dict[leastkey]
-      self.count[leastkey] = leastval + 1
-      self.log.debug("%s %d" % (leastkey, self.count[leastkey]))
-    lock.release()
-    self.log.debug('sending url ' + url+" to "+hodring)  # this looks useful
-    return url
-
-  def tarDone(self, uri):
-    str = "%s is done" % (uri)
-    self.log.debug(str)
-    lock = self.tarSourceLock
-    lock.acquire()
-    c = self.count[uri]
-    self.count[uri] = c - 1
-    lock.release()
-    return uri
-
-  def status(self):
-    return True
-
-# FIXME: this code is broken, it relies on a central service registry
-#
-#  def clusterStart(self, changedClusterParams=[]):
-#    self.log.debug("clusterStart method invoked.")
-#    self.dict = {}
-#    self.count = {}
-#    try:
-#      if (len(changedClusterParams) > 0):
-#        self.log.debug("Updating config.")
-#        for param in changedClusterParams:
-#          (key, sep1, val) = param.partition('=')
-#          (i1, sep2, i2) = key.partition('.')
-#          try:
-#            prev = self.cfg[i1][i2]
-#            self.rm.cfg[i1][i2] = val
-#            self.cfg[i1][i2] = val
-#            self.log.debug("\nModified [%s][%s]=%s to [%s][%s]=%s" % (i1, i2, prev, i1, i2, val))
-#          except KeyError, e:
-#            self.log.info("Skipping %s as no such config parameter found in ringmaster" % param)
-#        self.log.debug("Regenerating Service Description.")
-#        dGen = DescGenerator(self.rm.cfg)
-#        self.rm.cfg['servicedesc'] = dGen.createServiceDescDict()
-#        self.cfg['servicedesc'] = self.rm.cfg['servicedesc']
-#  
-#      self.rm.tar = None
-#      if self.rm.cfg['ringmaster'].has_key('hadoop-tar-ball'):
-#        self.rm.download = True
-#        self.rm.tar = self.rm.cfg['ringmaster']['hadoop-tar-ball']
-#        self.log.debug("self.rm.tar=%s" % self.rm.tar)
-# 
-#      self.rm.cd_to_tempdir()
-#
-#      self.rm.tarAddress = None 
-#      hostname = socket.gethostname()
-#      if (self.rm.download):
-#        self.rm.basename = os.path.basename(self.rm.tar)
-#        dest = os.path.join(os.getcwd(), self.rm.basename)
-#        src =  self.rm.tar  
-#        self.log.debug("cp %s -> %s" % (src, dest))
-#        shutil.copy(src, dest) 
-#        self.rm.tarAddress = "%s%s" % (self.rm.httpAddress, self.rm.basename)
-#        self.registerTarSource(hostname, self.rm.tarAddress)
-#        self.log.debug("Registered new tarAddress %s" % self.rm.tarAddress)
-#      else:
-#        self.log.debug("Download not set.")
-#      
-#      if (self.rm.tar != None):
-#        self.cfg['hodring']['download-addr'] = self.rm.tarAddress
-#        self.rm.cfg['hodring']['download-addr'] = self.rm.tarAddress
-#
-#      sdl = self.rm.cfg['servicedesc']
-#      workDirs = self.rm.getWorkDirs(self.rm.cfg, True)
-#      hdfsDesc = sdl['hdfs']
-#      hdfs = None
-#      if hdfsDesc.isExternal():
-#        hdfs = HdfsExternal(hdfsDesc, workDirs)
-#      else:
-#        hdfs = Hdfs(hdfsDesc, workDirs, 0, False, True)
-#    
-#      self.rm.serviceDict[hdfs.getName()] = hdfs
-#      mrDesc = sdl['mapred']
-#      mr = None
-#      if mrDesc.isExternal():
-#        mr = MapReduceExternal(mrDesc, workDirs)
-#      else:
-#        mr = MapReduce(mrDesc, workDirs, 1)
-#      self.rm.serviceDict[mr.getName()] = mr
-#
-#      ringList = self.rm.serviceClient.getServiceInfo(self.cfg['hodring']['userid'],
-#        self.np.getServiceId(), 'hodring', 'hod') 
-#    
-#      slaveList = ringList
-#      hdfsringXRAddress = None
-#      # Start HDFS Master - Step 1
-#      if not hdfsDesc.isExternal():
-#        masterFound = False
-#        for ring in ringList:
-#          ringXRAddress = ring['xrs']
-#          if ringXRAddress == None:
-#            raise Exception("Could not get hodring XML-RPC server address.")
-#          if  (ringXRAddress.find(self.hdfsHost) != -1):
-#            ringClient = hodXRClient(ringXRAddress, None, None, 0, 0, 0, False, 0)
-#            hdfsringXRAddress = ringXRAddress
-#            self.log.debug("Invoking clusterStart on " + ringXRAddress + " (HDFS Master)")
-#            ringClient.clusterStart()
-#            masterFound = True 
-#            slaveList.remove(ring)
-#            break
-#        if not masterFound:
-#          raise Exception("HDFS Master host not found")
-#        while hdfs.getInfoAddrs() == None:
-#          self.log.debug("Waiting for HDFS Master (Name Node) to register dfs.info.port")
-#          time.sleep(1)
-#
-#      # Start MAPRED Master - Step 2
-#      if not mrDesc.isExternal():
-#        masterFound = False
-#        for ring in ringList:
-#          ringXRAddress = ring['xrs']
-#          if ringXRAddress == None:
-#            raise Exception("Could not get hodring XML-RPC server address.")
-#          if (not mrDesc.isExternal() and ringXRAddress.find(self.mapredHost) != -1):
-#            ringClient = hodXRClient(ringXRAddress, None, None, 0, 0, 0, False, 0)
-#            self.log.debug("Invoking clusterStart on " + ringXRAddress + " (MAPRED Master)")
-#            ringClient.clusterStart()
-#            masterFound = True 
-#            slaveList.remove(ring)
-#            break
-#        if not masterFound:
-#          raise Excpetion("MAPRED Master host not found")
-#        while mr.getInfoAddrs() == None:
-#          self.log.debug("Waiting for MAPRED Master (Job Tracker) to register \
-# mapred.job.tracker.info.port")
-#          time.sleep(1)
-#
-#      # Start Slaves - Step 3 
-#      for ring in slaveList:
-#          ringXRAddress = ring['xrs']
-#          if ringXRAddress == None:
-#            raise Exception("Could not get hodring XML-RPC server address.")
-#          ringClient = hodXRClient(ringXRAddress, None, None, 0, 0, 0, False, 0)
-#          self.log.debug("Invoking clusterStart on " + ringXRAddress + " (Slaves)")
-#          ringThread = func(name='hodring_slaves_start', functionRef=ringClient.clusterStart())
-#          ring['thread'] = ringThread
-#          ringThread.start()
-#
-#      for ring in slaveList:
-#        ringThread = ring['thread']
-#        if ringThread == None:
-#          raise Exception("Could not get hodring thread (Slave).")
-#        ringThread.join()
-#        self.log.debug("Completed clusterStart on " + ring['xrs'] + " (Slave)")
-#
-#      # Run Admin Commands on HDFS Master - Step 4
-#      if not hdfsDesc.isExternal():
-#        if hdfsringXRAddress == None:
-#          raise Exception("HDFS Master host not found (to Run Admin Commands)")
-#        ringClient = hodXRClient(hdfsringXRAddress, None, None, 0, 0, 0, False, 0)
-#        self.log.debug("Invoking clusterStart(False) - Admin on "
-#                       + hdfsringXRAddress + " (HDFS Master)")
-#        ringClient.clusterStart(False)
-#
-#    except:
-#      self.log.debug(get_exception_string())
-#      return False
-#
-#    self.log.debug("Successfully started cluster.")
-#    return True
-#
-#  def clusterStop(self):
-#    self.log.debug("clusterStop method invoked.")
-#    try:
-#      hdfsAddr = self.getServiceAddr('hdfs')
-#      if hdfsAddr.find(':') != -1:
-#        h, p = hdfsAddr.split(':', 1)
-#        self.hdfsHost = h
-#        self.log.debug("hdfsHost: " + self.hdfsHost)
-#      mapredAddr = self.getServiceAddr('mapred')
-#      if mapredAddr.find(':') != -1:
-#        h, p = mapredAddr.split(':', 1)
-#        self.mapredHost = h
-#        self.log.debug("mapredHost: " + self.mapredHost)
-#      ringList = self.rm.serviceClient.getServiceInfo(self.cfg['hodring']['userid'],
-#                                                      self.np.getServiceId(),
-#                                                      'hodring', 'hod')
-#      for ring in ringList:
-#        ringXRAddress = ring['xrs']
-#        if ringXRAddress == None:
-#          raise Exception("Could not get hodring XML-RPC server address.")
-#        ringClient = hodXRClient(ringXRAddress, None, None, 0, 0, 0, False)
-#        self.log.debug("Invoking clusterStop on " + ringXRAddress)
-#        ringThread = func(name='hodring_stop', functionRef=ringClient.clusterStop())
-#        ring['thread'] = ringThread
-#        ringThread.start()
-#
-#      for ring in ringList:
-#        ringThread = ring['thread']
-#        if ringThread == None:
-#          raise Exception("Could not get hodring thread.")
-#        ringThread.join()
-#        self.log.debug("Completed clusterStop on " + ring['xrs'])
-#
-#    except:
-#      self.log.debug(get_exception_string())
-#      return False
-#
-#    self.log.debug("Successfully stopped cluster.")
-#    
-#    return True
-
-  def getCommand(self, addr):
-    """This method is called by the
-    hodrings to get commands from
-    the ringmaster"""
-    lock = self.cmdLock
-    cmdList = []
-    lock.acquire()
-    try:
-      try:
-        for v in self.serviceDict.itervalues():
-          if (not v.isExternal()):
-            if v.isLaunchable(self.serviceDict):
-              # If a master is still not launched, or the number of 
-              # retries for launching master is not reached, 
-              # launch master
-              if not v.isMasterLaunched() and \
-                  (v.getMasterFailureCount() <= \
-                      self.cfg['ringmaster']['max-master-failures']):
-                cmdList = v.getMasterCommands(self.serviceDict)
-                v.setlaunchedMaster()
-                v.setMasterAddress(addr)
-                break
-        if cmdList == []:
-          for s in self.serviceDict.itervalues():
-            if (not v.isExternal()):
-              if s.isMasterInitialized():
-                cl = s.getWorkerCommands(self.serviceDict)
-                cmdList.extend(cl)
-              else:
-                cmdList = []
-                break
-      except:
-        self.log.debug(get_exception_string())
-    finally:
-      lock.release()
-      pass
-    
-    cmd = addr + pformat(cmdList)
-    self.log.debug("getCommand returning " + cmd)
-    return cmdList
-  
-  def getAdminCommand(self, addr):
-    """This method is called by the
-    hodrings to get admin commands from
-    the ringmaster"""
-    lock = self.cmdLock
-    cmdList = []
-    lock.acquire()
-    try:
-      try:
-        for v in self.serviceDict.itervalues():
-          cmdList = v.getAdminCommands(self.serviceDict)
-          if cmdList != []:
-            break
-      except Exception, e:
-        self.log.debug(get_exception_string())
-    finally:
-      lock.release()
-      pass
-    cmd = addr + pformat(cmdList)
-    self.log.debug("getAdminCommand returning " + cmd)
-    return cmdList
-
-  def addMasterParams(self, addr, vals):
-    """This method is called by
-    hodring to update any parameters
-    its changed for the commands it was
-    running"""
-    self.log.debug('Comment: adding master params from %s' % addr)
-    self.log.debug(pformat(vals))
-    lock = self.masterParamLock
-    lock.acquire()
-    try:
-      for v in self.serviceDict.itervalues():
-        if v.isMasterLaunched():
-          if (v.getMasterAddress() == addr):
-            v.setMasterParams(vals)
-            v.setMasterInitialized()
-    except:
-      self.log.debug(get_exception_string())
-      pass
-    lock.release()
-            
-    return addr
-
-  def setHodRingErrors(self, addr, errors):
-    """This method is called by the hodrings to update errors 
-      it encountered while starting up"""
-    self.log.critical("Hodring at %s failed with following errors:\n%s" \
-                        % (addr, errors))
-    lock = self.masterParamLock
-    lock.acquire()
-    try:
-      for v in self.serviceDict.itervalues():
-        if v.isMasterLaunched():
-          if (v.getMasterAddress() == addr):
-            # strip the PID part.
-            idx = addr.rfind('_')
-            if idx is not -1:
-              addr = addr[:idx]
-            v.setMasterFailed("Hodring at %s failed with following" \
-                                " errors:\n%s" % (addr, errors))
-    except:
-      self.log.debug(get_exception_string())
-      pass
-    lock.release()
-    return True
-
-  def getKeys(self):
-    lock= self.masterParamLock
-    lock.acquire()
-    keys = self.serviceDict.keys()
-    lock.release()    
-  
-    return keys
-  
-  def getServiceAddr(self, name):
-    addr = 'not found'
-    self.log.debug("getServiceAddr name: %s" % name)
-    lock= self.masterParamLock
-    lock.acquire()
-    try:
-      service = self.serviceDict[name]
-    except KeyError:
-      pass
-    else:
-      self.log.debug("getServiceAddr service: %s" % service)
-      # Check if we should give up ! If the limit on max failures is hit, 
-      # give up.
-      err = service.getMasterFailed()
-      if (err is not None) and \
-            (service.getMasterFailureCount() > \
-                      self.cfg['ringmaster']['max-master-failures']):
-        self.log.critical("Detected errors (%s) beyond allowed number"\
-                            " of failures (%s). Flagging error to client" \
-                            % (service.getMasterFailureCount(), \
-                              self.cfg['ringmaster']['max-master-failures']))
-        addr = "Error: " + err
-      elif (service.isMasterInitialized()):
-        addr = service.getMasterAddrs()[0]
-      else:
-        addr = 'not found'
-    lock.release()
-    self.log.debug("getServiceAddr addr %s: %s" % (name, addr))
-    
-    return addr
-
-  def getURLs(self, name):
-    addr = 'none'
-    lock = self.masterParamLock
-    lock.acquire()
-    
-    try:
-      service = self.serviceDict[name]
-    except KeyError:
-      pass
-    else:
-      if (service.isMasterInitialized()):
-        addr = service.getInfoAddrs()[0]
-      
-    lock.release()
-    
-    return addr
-
-  def stopRM(self):
-    """An XMLRPC call which will spawn a thread to stop the Ringmaster program."""
-    # We spawn a thread here because we want the XMLRPC call to return. Calling
-    # stop directly from here will also stop the XMLRPC server.
-    try:
-      self.log.debug("inside xml-rpc call to stop ringmaster")
-      rmStopperThread = func('RMStopper', self.rm.stop)
-      rmStopperThread.start()
-      self.log.debug("returning from xml-rpc call to stop ringmaster")
-      return True
-    except:
-      self.log.debug("Exception in stop: %s" % get_exception_string())
-      return False
-
-class RingMaster:
-  def __init__(self, cfg, log, **kwds):
-    """starts nodepool and services"""
-    self.download = False
-    self.httpServer = None
-    self.cfg = cfg
-    self.log = log
-    self.__hostname = local_fqdn()
-    self.workDirs = None 
-
-    # ref to the idle job tracker object.
-    self.__jtMonitor = None
-    self.__idlenessDetected = False
-    self.__stopInProgress = False
-    self.__isStopped = False # to let main exit
-    self.__exitCode = 0 # exit code with which the ringmaster main method should return
-
-    self.workers_per_ring = self.cfg['ringmaster']['workers_per_ring']
-
-    self.__initialize_signal_handlers()
-    
-    sdd = self.cfg['servicedesc']
-    gsvc = None
-    for key in sdd:
-      gsvc = sdd[key]
-      break
-    
-    npd = self.cfg['nodepooldesc']
-    self.np = NodePoolUtil.getNodePool(npd, cfg, log)
-
-    self.log.debug("Getting service ID.")
-    
-    self.serviceId = self.np.getServiceId()
-    
-    self.log.debug("Got service ID: %s" % self.serviceId)
-
-    self.tarSrcLoc = None
-    if self.cfg['ringmaster'].has_key('hadoop-tar-ball'):
-      self.download = True
-      self.tarSrcLoc = self.cfg['ringmaster']['hadoop-tar-ball']
- 
-    self.cd_to_tempdir()
-
-    if (self.download):
-      self.__copy_tarball(os.getcwd())
-      self.basename = self.__find_tarball_in_dir(os.getcwd())
-      if self.basename is None:
-        raise Exception('Did not find tarball copied from %s in %s.'
-                          % (self.tarSrcLoc, os.getcwd()))
-      
-    self.serviceAddr = to_http_url(self.cfg['ringmaster']['svcrgy-addr'])
-    
-    self.log.debug("Service registry @ %s" % self.serviceAddr)
-    
-    self.serviceClient = hodXRClient(self.serviceAddr)
-    self.serviceDict  = {}
-    try:
-      sdl = self.cfg['servicedesc']
-
-      workDirs = self.getWorkDirs(cfg)
-
-      hdfsDesc = sdl['hdfs']
-      hdfs = None
- 
-      # Determine hadoop Version
-      hadoopVers = hadoopVersion(self.__getHadoopDir(), \
-                                self.cfg['hodring']['java-home'], self.log)
-     
-      if (hadoopVers['major']==None) or (hadoopVers['minor']==None):
-        raise Exception('Could not retrive the version of Hadoop.'
-                        + ' Check the Hadoop installation or the value of the hodring.java-home variable.')
-      if hdfsDesc.isExternal():
-        hdfs = HdfsExternal(hdfsDesc, workDirs, version=int(hadoopVers['minor']))
-        hdfs.setMasterParams( self.cfg['gridservice-hdfs'] )
-      else:
-        hdfs = Hdfs(hdfsDesc, workDirs, 0, version=int(hadoopVers['minor']),
-                    workers_per_ring = self.workers_per_ring)
-
-      self.serviceDict[hdfs.getName()] = hdfs
-      
-      mrDesc = sdl['mapred']
-      mr = None
-      if mrDesc.isExternal():
-        mr = MapReduceExternal(mrDesc, workDirs, version=int(hadoopVers['minor']))
-        mr.setMasterParams( self.cfg['gridservice-mapred'] )
-      else:
-        mr = MapReduce(mrDesc, workDirs,1, version=int(hadoopVers['minor']),
-                       workers_per_ring = self.workers_per_ring)
-
-      self.serviceDict[mr.getName()] = mr
-    except:
-      self.log.critical("Exception in creating Hdfs and Map/Reduce descriptor objects: \
-                            %s." % get_exception_error_string())
-      self.log.debug(get_exception_string())
-      raise
-
-    # should not be starting these in a constructor
-    ringMasterServer.startService(self.serviceDict, cfg, self.np, log, self)
-    
-    self.rpcserver = ringMasterServer.getAddress()
-    
-    self.httpAddress = None   
-    self.tarAddress = None 
-    hostname = socket.gethostname()
-    if (self.download):
-      self.httpServer = threadedHTTPServer(hostname, 
-        self.cfg['ringmaster']['http-port-range'])
-      
-      self.httpServer.serve_forever()
-      self.httpAddress = "http://%s:%d/" % (self.httpServer.server_address[0], 
-                                 self.httpServer.server_address[1])
-      self.tarAddress = "%s%s" % (self.httpAddress, self.basename)
-      
-      ringMasterServer.instance.logMasterSources.registerTarSource(hostname, 
-                                                                   self.tarAddress)
-    else:
-      self.log.debug("Download not set.")
-    
-    self.log.debug("%s %s %s %s %s" % (self.cfg['ringmaster']['userid'], 
-      self.serviceId, self.__hostname, 'ringmaster', 'hod'))
-    
-    if self.cfg['ringmaster']['register']:      
-      if self.httpAddress:
-        self.serviceClient.registerService(self.cfg['ringmaster']['userid'], 
-          self.serviceId, self.__hostname, 'ringmaster', 'hod', {
-          'xrs' : self.rpcserver, 'http' : self.httpAddress })
-      else:
-        self.serviceClient.registerService(self.cfg['ringmaster']['userid'], 
-          self.serviceId, self.__hostname, 'ringmaster', 'hod', {
-          'xrs' : self.rpcserver, })
-    
-    self.log.debug("Registered with serivce registry: %s." % self.serviceAddr)
-    
-    hodRingPath = os.path.join(cfg['ringmaster']['base-dir'], 'bin', 'hodring')
-    hodRingWorkDir = os.path.join(cfg['hodring']['temp-dir'], 'hodring' + '_' 
-                                  + getpass.getuser())
-    
-    self.cfg['hodring']['hodring'] = [hodRingWorkDir,]
-    self.cfg['hodring']['svcrgy-addr'] = self.cfg['ringmaster']['svcrgy-addr']
-    self.cfg['hodring']['service-id'] = self.np.getServiceId()
-
-    self.cfg['hodring']['ringmaster-xrs-addr'] = self.__url_to_addr(self.rpcserver)
-    
-    if (self.tarSrcLoc != None):
-      cfg['hodring']['download-addr'] = self.tarAddress
- 
-    self.__init_job_tracker_monitor(ringMasterServer.instance.logMasterSources)
-
-  def __init_job_tracker_monitor(self, logMasterSources):
-    hadoopDir = self.__getHadoopDir()
-    self.log.debug('hadoopdir=%s, java-home=%s' % \
-                (hadoopDir, self.cfg['hodring']['java-home']))
-    try:
-      self.__jtMonitor = JobTrackerMonitor(self.log, self, 
-                            self.cfg['ringmaster']['jt-poll-interval'], 
-                            self.cfg['ringmaster']['idleness-limit'],
-                            hadoopDir, self.cfg['hodring']['java-home'],
-                            logMasterSources)
-      self.log.debug('starting jt monitor')
-      self.__jtMonitor.start()
-    except:
-      self.log.critical('Exception in running idle job tracker. This cluster cannot be deallocated if idle.\
-                          Exception message: %s' % get_exception_error_string())
-      self.log.debug('Exception details: %s' % get_exception_string())
-
-
-  def __getHadoopDir(self):
-    hadoopDir = None
-    if self.cfg['ringmaster'].has_key('hadoop-tar-ball'):
-      tarFile = os.path.join(os.getcwd(), self.basename)
-      ret = untar(tarFile, os.getcwd())
-      if not ret:
-        raise Exception('Untarring tarfile %s to directory %s failed. Cannot find hadoop directory.' \
-                            % (tarFile, os.getcwd()))
-      hadoopDir = os.path.join(os.getcwd(), self.__get_dir(tarFile))
-    else:
-      hadoopDir = self.cfg['gridservice-mapred']['pkgs']
-    self.log.debug('Returning Hadoop directory as: %s' % hadoopDir)
-    return hadoopDir
-
-  def __get_dir(self, name):
-    """Return the root directory inside the tarball
-    specified by name. Assumes that the tarball begins
-    with a root directory."""
-    import tarfile
-    myTarFile = tarfile.open(name)
-    hadoopPackage = myTarFile.getnames()[0]
-    self.log.debug("tarball name : %s hadoop package name : %s" %(name,hadoopPackage))
-    return hadoopPackage
-
-  def __find_tarball_in_dir(self, dir):
-    """Find the tarball among files specified in the given 
-    directory. We need this method because how the tarball
-    source URI is given depends on the method of copy and
-    we can't get the tarball name from that.
-    This method will fail if there are multiple tarballs
-    in the directory with the same suffix."""
-    files = os.listdir(dir)
-    for file in files:
-      if self.tarSrcLoc.endswith(file):
-        return file
-    return None
-
-  def __copy_tarball(self, destDir):
-    """Copy the hadoop tar ball from a remote location to the
-    specified destination directory. Based on the URL it executes
-    an appropriate copy command. Throws an exception if the command
-    returns a non-zero exit code."""
-    # for backwards compatibility, treat the default case as file://
-    url = ''
-    if self.tarSrcLoc.startswith('/'):
-      url = 'file:/'
-    src = '%s%s' % (url, self.tarSrcLoc)
-    if src.startswith('file://'):
-      src = src[len('file://')-1:]
-      cpCmd = '/bin/cp'
-      cmd = '%s %s %s' % (cpCmd, src, destDir)
-      self.log.debug('Command to execute: %s' % cmd)
-      copyProc = simpleCommand('remote copy', cmd)
-      copyProc.start()
-      copyProc.wait()
-      copyProc.join()
-      ret = copyProc.exit_code()
-      self.log.debug('Completed command execution. Exit Code: %s.' % ret)
-
-      if ret != 0:
-        output = copyProc.output()
-        raise Exception('Could not copy tarball using command %s. Exit code: %s. Output: %s' 
-                        % (cmd, ret, output))
-    else:
-      raise Exception('Unsupported URL for file: %s' % src)
-
-# input: http://hostname:port/. output: [hostname,port]
-  def __url_to_addr(self, url):
-    addr = url.rstrip('/')
-    if addr.startswith('http://'):
-      addr = addr.replace('http://', '', 1)
-    addr_parts = addr.split(':')
-    return [addr_parts[0], int(addr_parts[1])]
-
-  def __initialize_signal_handlers(self): 
-    def sigStop(sigNum, handler):
-      sig_wrapper(sigNum, self.stop)
-  
-    signal.signal(signal.SIGTERM, sigStop)
-    signal.signal(signal.SIGINT, sigStop)
-    signal.signal(signal.SIGQUIT, sigStop)
-
-  def __clean_up(self):
-    tempDir = self.__get_tempdir()
-    os.chdir(os.path.split(tempDir)[0])
-    if os.path.exists(tempDir):
-      shutil.rmtree(tempDir, True)
-      
-    self.log.debug("Cleaned up temporary dir: %s" % tempDir)
-
-  def __get_tempdir(self):
-    dir = os.path.join(self.cfg['ringmaster']['temp-dir'], 
-                          "%s.%s.ringmaster" % (self.cfg['ringmaster']['userid'], 
-                                                self.np.getServiceId()))
-    return dir
-
-  def getWorkDirs(self, cfg, reUse=False):
-
-    if (not reUse) or (self.workDirs == None):
-      import math
-      frand = random.random()
-      while math.ceil(frand) != math.floor(frand):
-        frand = frand * 100
-
-      irand = int(frand)
-      uniq = '%s-%d-%s' % (socket.gethostname(), os.getpid(), irand)
-      dirs = []
-      parentDirs = cfg['ringmaster']['work-dirs']
-      for p in parentDirs:
-        dir = os.path.join(p, uniq)
-        dirs.append(dir)
-      self.workDirs = dirs
-
-    return self.workDirs
-
-  def _fetchLink(self, link, parentDir):
-    parser = miniHTMLParser()
-    self.log.debug("Checking link %s" %link)
-    while link:
-
-      # Get the file from the site and link
-      input = urllib.urlopen(link)
-      out = None
-      contentType = input.info().gettype()
-      isHtml = contentType == 'text/html'
-
-      #print contentType
-      if isHtml:
-        parser.setBaseUrl(input.geturl())
-      else:
-        parsed = urlparse.urlparse(link)
-        hp = parsed[1]
-        h = hp
-        p = None
-        if hp.find(':') != -1:
-          h, p = hp.split(':', 1)
-        path = parsed[2]
-        path = path.split('/')
-        file = os.path.join(parentDir, h, p)
-        for c in path:
-          if c == '':
-            continue
-          file = os.path.join(file, c)
-
-        try:
-          self.log.debug('Creating %s' % file)
-          dir, tail = os.path.split(file)
-          if not os.path.exists(dir):
-            os.makedirs(dir)
-        except:
-          self.log.debug(get_exception_string())
-
-        out = open(file, 'w')
-
-      bufSz = 8192
-      buf = input.read(bufSz)
-      while len(buf) > 0:
-        if isHtml:
-          # Feed the file into the HTML parser
-          parser.feed(buf)
-        if out:
-          out.write(buf)
-        buf = input.read(bufSz)
-
-      input.close()
-      if out:
-        out.close()
-
-      # Search the retfile here
-
-      # Get the next link in level traversal order
-      link = parser.getNextLink()
-      
-    parser.close()
-    
-  def _finalize(self):
-    try:
-      # FIXME: get dir from config
-      dir = 'HOD-log-P%d' % (os.getpid())
-      dir = os.path.join('.', dir)
-    except:
-      self.log.debug(get_exception_string())
-
-    self.np.finalize()
-
-  def handleIdleJobTracker(self):
-    self.log.critical("Detected idle job tracker for %s seconds. The allocation will be cleaned up." \
-                          % self.cfg['ringmaster']['idleness-limit'])
-    self.__idlenessDetected = True
-
-  def cd_to_tempdir(self):
-    dir = self.__get_tempdir()
-    
-    if not os.path.exists(dir):
-      os.makedirs(dir)
-    os.chdir(dir)
-    
-    return dir
-  
-  def getWorkload(self):
-    return self.workload
-
-  def getHostName(self):
-    return self.__hostname
-
-  def start(self):
-    """run the thread main loop"""
-    
-    self.log.debug("Entered start method.")
-    hodring = os.path.join(self.cfg['ringmaster']['base-dir'], 
-                           'bin', 'hodring')
-    largs = [hodring]
-    targs = self.cfg.get_args(section='hodring')
-    largs.extend(targs) 
-    
-    hodringCmd = ""
-    for item in largs:
-      hodringCmd = "%s%s " % (hodringCmd, item)
-      
-    self.log.debug(hodringCmd)
-    
-    if self.np.runWorkers(largs) > 0:
-      self.log.critical("Failed to start worker.")
-    
-    self.log.debug("Returned from runWorkers.")
-    
-    self._finalize()
-
-  def __findExitCode(self):
-    """Determine the exit code based on the status of the cluster or jobs run on them"""
-    xmlrpcServer = ringMasterServer.instance.logMasterSources
-    if xmlrpcServer.getServiceAddr('hdfs') == 'not found' or \
-        xmlrpcServer.getServiceAddr('hdfs').startswith("Error: "):
-      self.__exitCode = 7
-    elif xmlrpcServer.getServiceAddr('mapred') == 'not found' or \
-        xmlrpcServer.getServiceAddr('mapred').startswith("Error: "):
-      self.__exitCode = 8
-    else:
-      clusterStatus = get_cluster_status(xmlrpcServer.getServiceAddr('hdfs'),
-                                          xmlrpcServer.getServiceAddr('mapred'))
-      if clusterStatus != 0:
-        self.__exitCode = clusterStatus
-      else:
-        self.__exitCode = self.__findHadoopJobsExitCode()
-    self.log.debug('exit code %s' % self.__exitCode)
-
-  def __findHadoopJobsExitCode(self):
-    """Determine the consolidate exit code of hadoop jobs run on this cluster, provided
-       this information is available. Return 0 otherwise"""
-    ret = 0
-    failureStatus = 3
-    failureCount = 0
-    if self.__jtMonitor:
-      jobStatusList = self.__jtMonitor.getJobsStatus()
-      try:
-        if len(jobStatusList) > 0:
-          for jobStatus in jobStatusList:
-            self.log.debug('job status for %s: %s' % (jobStatus.getJobId(), 
-                                                      jobStatus.getStatus()))
-            if jobStatus.getStatus() == failureStatus:
-              failureCount = failureCount+1
-        if failureCount > 0:
-          if failureCount == len(jobStatusList): # all jobs failed
-            ret = 16
-          else:
-            ret = 17
-      except:
-        self.log.debug('exception in finding hadoop jobs exit code' % get_exception_string())
-    return ret
-
-  def stop(self):
-    self.log.debug("RingMaster stop method invoked.")
-    if self.__stopInProgress or self.__isStopped:
-      return
-    self.__stopInProgress = True
-    if ringMasterServer.instance is not None:
-      self.log.debug('finding exit code')
-      self.__findExitCode()
-      self.log.debug('stopping ringmaster instance')
-      ringMasterServer.stopService()
-    else:
-      self.__exitCode = 6
-    if self.__jtMonitor is not None:
-      self.__jtMonitor.stop()
-    if self.httpServer:
-      self.httpServer.stop()
-      
-    self.__clean_up()
-    self.__isStopped = True
-
-  def shouldStop(self):
-    """Indicates whether the main loop should exit, either due to idleness condition, 
-    or a stop signal was received"""
-    return self.__idlenessDetected or self.__isStopped
-
-  def getExitCode(self):
-    """return the exit code of the program"""
-    return self.__exitCode
-
-def main(cfg,log):
-  try:
-    rm = None
-    dGen = DescGenerator(cfg)
-    cfg = dGen.initializeDesc()
-    rm = RingMaster(cfg, log)
-    rm.start()
-    while not rm.shouldStop():
-      time.sleep(1)
-    rm.stop()
-    log.debug('returning from main')
-    return rm.getExitCode()
-  except Exception, e:
-    if log:
-      log.critical(get_exception_string())
-    raise Exception(e)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/__init__.py
deleted file mode 100644
index 12c2f1e1da..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/torque.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/torque.py
deleted file mode 100644
index a4e8f95dfd..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/Schedulers/torque.py
+++ /dev/null
@@ -1,175 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import os, pprint, re, time
-
-from hodlib.Common.threads import simpleCommand
-from hodlib.Common.util import args_to_string
-from hodlib.Common.logger import hodDummyLogger
-
-reQstatLine = re.compile("^\s*(\w+)\s*=\s*(.*)\s*$")
-
-class torqueInterface:
-  def __init__(self, torqueDir, environment, log=None):
-    self.__qsub = os.path.join(torqueDir, 'bin', 'qsub')
-    self.__qdel = os.path.join(torqueDir, 'bin', 'qdel')
-    self.__qstat = os.path.join(torqueDir, 'bin', 'qstat')
-    self.__pbsNodes = os.path.join(torqueDir, 'bin', 'pbsnodes')
-    self.__pbsdsh = os.path.join(torqueDir, 'bin', 'pbsdsh')
-    self.__qalter = os.path.join(torqueDir, 'bin', 'qalter')
-    self.__env = environment
-    
-    self.__log = log
-    if not self.__log:
-      self.__log = hodDummyLogger()
-        
-  def qsub(self, argList, stdinList):
-    jobID = False
-    exitCode = 0
-
-    qsubCommand = "%s %s" % (self.__qsub, args_to_string(argList))
-    
-    self.__log.debug("qsub -> %s" % qsubCommand)
-    
-    qsubProcess = simpleCommand('qsub', qsubCommand, env=self.__env)
-    qsubProcess.start()
-    
-    while qsubProcess.stdin == None:
-      time.sleep(.2)
-
-    try:
-      for line in stdinList:
-        self.__log.debug("qsub stdin: %s" % line)
-        print >>qsubProcess.stdin, line
-      qsubProcess.stdin.close()
-    except IOError, i:
-      # If torque's qsub is given invalid params, it fails & returns immediately
-      # Check for such errors here
-      # Wait for command execution to finish
-      qsubProcess.wait()
-      qsubProcess.join()
-      output = qsubProcess.output()
-      if output!=[]:
-        self.__log.critical("qsub Failure : %s " % output[0].strip())
-        self.__log.critical("qsub Command : %s" % qsubCommand)
-      return None, qsubProcess.exit_code()
-
-    qsubProcess.wait()
-    qsubProcess.join()
-    
-    exitCode = qsubProcess.exit_code()
-    if exitCode == 0:
-      buffer = qsubProcess.output()
-      jobID = buffer[0].rstrip('\n')
-      self.__log.debug("qsub jobid: %s" % jobID)
-    else:
-      self.__log.critical("qsub error: %s" % qsubProcess.exit_status_string())    
-    
-    return jobID, exitCode
-  
-  def qstat(self, jobID):
-    qstatInfo = None  
-    
-    qstatCommand = "%s -f -1 %s" % (self.__qstat, jobID)
-    
-    self.__log.debug(qstatCommand)
-
-    qstatProcess = simpleCommand('qstat', qstatCommand, env=self.__env)
-    qstatProcess.start()
-    qstatProcess.wait()
-    qstatProcess.join()
-    
-    exitCode = qstatProcess.exit_code()
-    if exitCode > 0:
-      self.__log.warn('qstat error: %s' % qstatProcess.exit_status_string())
-    else:
-      qstatInfo = {}
-      for line in qstatProcess.output():
-        line = line.rstrip()
-        if line.find('=') != -1:
-          qstatMatch = reQstatLine.match(line)
-          if qstatMatch:
-            key = qstatMatch.group(1)
-            value = qstatMatch.group(2)
-            qstatInfo[key] = value
-          
-      if 'exec_host' in qstatInfo:
-        list = qstatInfo['exec_host'].split('+')
-        addrList = []
-        
-        for item in list:
-          [head, end] = item.split('/', 1)
-          addrList.append(head)
-        
-        qstatInfo['exec_host'] = addrList
-        
-    return qstatInfo, exitCode
-  
-  def pbs_nodes(self, argString):
-    pass
-  
-  def qdel(self, jobId, force=False):
-    exitCode = 0
-    qdel = self.__qdel
-    if force:
-      qdel = "%s -p %s" % (qdel, jobId)
-    else:
-      qdel = "%s %s" % (qdel, jobId) 
-
-    self.__log.debug(qdel)
-
-    qdelProcess = simpleCommand('qdel', qdel, env=self.__env)
-    qdelProcess.start()
-    qdelProcess.wait()
-    qdelProcess.join()      
-      
-    exitCode = qdelProcess.exit_code()
-    
-    return exitCode
-  
-  def pbsdsh(self, arguments):
-    status = None
-    
-    pbsdshCommand = "%s %s" % (self.__pbsdsh, args_to_string(arguments))
-    
-    self.__log.debug("pbsdsh command: %s" % pbsdshCommand)
-    
-    pbsdsh = simpleCommand('pbsdsh', pbsdshCommand, env=self.__env)
-    pbsdsh.start()   
-
-    for i in range(0, 30):
-      status = pbsdsh.exit_code()
-      if status:
-        self.__log.error("pbsdsh failed: %s" % pbsdsh.exit_status_string())
-        break  
-    
-    if not status: status = 0
-      
-    return status  
-
-  def qalter(self, fieldName, fieldValue, jobId):
-    """Update the job field with fieldName with the fieldValue.
-       The fieldValue must be modifiable after the job is submitted."""
-
-    # E.g. to alter comment: qalter -W notes='value` jobId
-    qalterCmd = '%s -W %s=\"%s\" %s' % (self.__qalter, fieldName, fieldValue, jobId) 
-    self.__log.debug("qalter command: %s" % qalterCmd)
-    qalterProcess = simpleCommand('qalter', qalterCmd, env=self.__env)
-    qalterProcess.start()
-    qalterProcess.wait()
-    qalterProcess.join()
-    exitCode = qalterProcess.exit_code()
-
-    return exitCode
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/__init__.py
deleted file mode 100644
index 12c2f1e1da..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/serviceProxy.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/serviceProxy.py
deleted file mode 100644
index 0e80d20ce6..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceProxy/serviceProxy.py
+++ /dev/null
@@ -1,49 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-"""HOD Service Proxy Implementation"""
-# -*- python -*-
-
-import sys, time, signal, httplib, socket, threading
-import sha, base64, hmac
-import xml.dom.minidom
-
-from hodlib.Common.socketServers import hodHTTPServer
-from hodlib.Common.hodsvc import hodBaseService
-from hodlib.Common.threads import loop
-from hodlib.Common.tcp import tcpSocket
-from hodlib.Common.util import get_exception_string
-from hodlib.Common.AllocationManagerUtil import *
-
-class svcpxy(hodBaseService):
-    def __init__(self, config):
-        hodBaseService.__init__(self, 'serviceProxy', config['service_proxy'],
-                                xrtype='twisted')
-        self.amcfg=config['allocation_manager']
-
-    def _xr_method_isProjectUserValid(self, userid, project, ignoreErrors = False, timeOut = 15):
-       return self.isProjectUserValid(userid, project, ignoreErrors, timeOut)
-    
-    def isProjectUserValid(self, userid, project, ignoreErrors, timeOut):
-        """Method thats called upon by
-        the hodshell to verify if the 
-        specified (user, project) combination 
-        is valid"""
-        self.logs['main'].info("Begin isProjectUserValid()")
-        am = AllocationManagerUtil.getAllocationManager(self.amcfg['id'], 
-                                                        self.amcfg,
-                                                        self.logs['main'])
-        self.logs['main'].info("End isProjectUserValid()")
-        return am.getQuote(userid, project)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/__init__.py
deleted file mode 100644
index 12c2f1e1da..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/serviceRegistry.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/serviceRegistry.py
deleted file mode 100644
index ac18ff77d4..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/ServiceRegistry/serviceRegistry.py
+++ /dev/null
@@ -1,127 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import sys, time, socket, threading, copy, pprint
-
-from hodlib.Common.hodsvc import hodBaseService
-from hodlib.Common.threads import loop
-from hodlib.Common.tcp import tcpSocket
-from hodlib.Common.util import get_exception_string
-import logging
-
-class svcrgy(hodBaseService):
-    def __init__(self, config, log=None):
-        hodBaseService.__init__(self, 'serviceRegistry', config)
-        
-        self.__serviceDict = {}
-        self.__failCount = {}
-        self.__released = {}
-        self.__locked = {}
-        
-        self.__serviceDictLock = threading.Lock()
-        self.RMErrorMsgs = None # Ringmaster error messages
-        self.log = log
-        if self.log is None:
-          self.log = logging.getLogger()
-    
-    def __get_job_key(self, userid, job):
-        return "%s-%s" % (userid, job)
-    
-    def _xr_method_registerService(self, userid, job, host, name, type, dict):
-       return self.registerService(userid, job, host, name, type, dict)
-    
-    def _xr_method_getServiceInfo(self, userid=None, job=None, name=None, 
-                                  type=None):
-        return self.getServiceInfo(userid, job, name, type)
-
-    def _xr_method_setRMError(self, args):
-        self.log.debug("setRMError called with %s" % args)
-        self.RMErrorMsgs = args
-        return True
-
-    def _xr_method_getRMError(self):
-        self.log.debug("getRMError called")
-        if self.RMErrorMsgs is not None:
-          return self.RMErrorMsgs
-        else:
-          self.log.debug("no Ringmaster error messages")
-          return False
-
-    def registerService(self, userid, job, host, name, type, dict):
-        """Method thats called upon by
-        the ringmaster to register to the
-        the service registry"""
-        lock = self.__serviceDictLock
-        lock.acquire()
-        try:
-            self.logs['main'].debug("Registering %s.%s.%s.%s.%s..." % (
-                                    userid, job, host, name, type))    
-            id = "%s.%s" % (name, type) 
-   
-            if userid in self.__serviceDict:
-                if job in self.__serviceDict[userid]:
-                     if host in self.__serviceDict[userid][job]:
-                          self.__serviceDict[userid][job][host].append(
-                              {id : dict,})
-                     else:
-                        self.__serviceDict[userid][job][host] = [
-                            {id : dict,},] 
-                else:
-                    self.__serviceDict[userid][job] = {host : [
-                                                       { id : dict,},]}
-            else:    
-                self.__serviceDict[userid] = {job : {host : [
-                                                     { id : dict,},]}}
-
-        finally:
-            lock.release()
-            
-        return True
-    
-    def getXMLRPCAddr(self):
-      """return the xml rpc server address"""
-      return self._xrc.server_address
-    
-    def getServiceInfo(self, userid=None, job=None, name=None, type=None):
-        """This method is called upon by others
-        to query for a particular service returns
-        a dictionary of elements"""
-        
-        self.logs['main'].debug("inside getServiceInfo: %s.%s.%s" % (userid, job, name))
-        retdict = {}
-        lock = self.__serviceDictLock
-        lock.acquire()
-        try:
-            if userid in self.__serviceDict:
-                if job in self.__serviceDict[userid]:
-                    if name and type:
-                        retdict = []
-                        id = "%s.%s" % (name, type)
-                        for host in self.__serviceDict[userid][job]:
-                            for dict in self.__serviceDict[userid][job][host]:
-                                [loopID, ] = dict.keys()
-                                if loopID == id:
-                                    retdict.append(dict[id])
-                    else:
-                        retdict = copy.deepcopy(
-                            self.__serviceDict[userid][job])
-                elif not job:
-                    retdict = copy.deepcopy(self.__serviceDict[userid])
-            elif not userid:
-                retdict = copy.deepcopy(self.__serviceDict)
-        finally:
-            lock.release()
-        
-        return retdict
diff --git a/third_party/hadoop-0.20.0/contrib/hod/hodlib/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/hodlib/__init__.py
deleted file mode 100644
index 56759d7963..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/hodlib/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
diff --git a/third_party/hadoop-0.20.0/contrib/hod/ivy.xml b/third_party/hadoop-0.20.0/contrib/hod/ivy.xml
deleted file mode 100644
index e775663256..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/ivy.xml
+++ /dev/null
@@ -1,22 +0,0 @@
-<?xml version="1.0" ?>
-<ivy-module version="1.0">
-  <info organisation="org.apache.hadoop" module="${ant.project.name}">
-    <license name="Apache 2.0"/>
-    <ivyauthor name="Apache Hadoop Team" url="http://hadoop.apache.org"/>
-    <description>
-        Apache Hadoop 
-    </description>
-  </info>
-  <configurations defaultconfmapping="default">
-    <!--these match the Maven configurations-->
-    <conf name="default" extends="master,runtime"/>
-    <conf name="master" description="contains the artifact but no dependencies"/>
-    <conf name="runtime" description="runtime but not the artifact" />
-    <!--Private configurations. -->
-
-    <conf name="common" visibility="private" 
-      description="artifacts needed to compile/test the application"/>
-  </configurations>
-  <dependencies>
-  </dependencies>
-</ivy-module>
diff --git a/third_party/hadoop-0.20.0/contrib/hod/ivy/libraries.properties b/third_party/hadoop-0.20.0/contrib/hod/ivy/libraries.properties
deleted file mode 100644
index a470b372ad..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/ivy/libraries.properties
+++ /dev/null
@@ -1,5 +0,0 @@
-#This properties file lists the versions of the various artifacts used by streaming.
-#It drives ivy and the generation of a maven POM
-
-#Please list the dependencies name with version if they are different from the ones 
-#listed in the global libraries.properties file (in alphabetical order)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/support/checklimits.sh b/third_party/hadoop-0.20.0/contrib/hod/support/checklimits.sh
deleted file mode 100644
index 61de9cddf9..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/support/checklimits.sh
+++ /dev/null
@@ -1,57 +0,0 @@
-#!/bin/bash
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-COMMANDS=( "qstat" "qalter" "checkjob" )
-ERROR=0
-for (( i=0; i<${#COMMANDS[@]}; i++ ))
-do
-  cmd=${COMMANDS[$i]}
-  CMD_PATH=`which $cmd 2>/dev/null`
-  if [ $? -ne 0 ]
-  then
-    echo Could not find $cmd in PATH
-    ERROR=1
-  fi
-done
-if [ $ERROR -ne 0 ]
-then
-  exit 1
-fi
-
-jobs=`qstat -i |grep -o -e '^[0-9]*'`
-for job in $jobs
-do
-  echo -en "$job\t"
-  PATTERN="job [^ ]* violates active HARD MAXPROC limit of \([0-9]*\) for user [^ ]*[ ]*(R: \([0-9]*\), U: \([0-9]*\))"
-  OUT=`checkjob $job 2>&1|grep -o -e "$PATTERN"`
-  if [ $? -eq 0 ]
-  then
-    echo -en "| Exceeds resource limits\t"
-    COMMENT_FIELD=`echo $OUT|sed -e "s/$PATTERN/User-limits exceeded. Requested:\2 Used:\3 MaxLimit:\1/"`
-    qstat -f $job|grep '^[ \t]*comment = .*$' >/dev/null
-    if [ $? -ne 0 ]
-    then
-      echo -en "| Comment field updated\t"
-      qalter $job -W comment="$COMMENT_FIELD" >/dev/null
-    else
-      echo -en "| Comment field already set\t"
-    fi
-  else
-    echo -en "| Doesn't exceed limits.\t"
-  fi
-  echo
-done
diff --git a/third_party/hadoop-0.20.0/contrib/hod/support/logcondense.py b/third_party/hadoop-0.20.0/contrib/hod/support/logcondense.py
deleted file mode 100644
index c8fd4dbc02..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/support/logcondense.py
+++ /dev/null
@@ -1,212 +0,0 @@
-#!/bin/sh
-
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-""":"
-work_dir=$(dirname $0)
-base_name=$(basename $0)
-cd $work_dir
-
-if [ $HOD_PYTHON_HOME ]; then
-exec $HOD_PYTHON_HOME -OO -u $base_name ${1+"$@"}
-elif [ -e /usr/bin/python ]; then
-exec /usr/bin/python -OO -u $base_name ${1+"$@"}
-elif [ -e /usr/local/bin/python ]; then
-exec /usr/local/bin/python -OO -u $base_name ${1+"$@"}
-else
-exec python -OO -u $base_name ${1+"$@"}
-fi
-":"""
-					      
-from os import popen3
-import os, sys
-import re
-import time
-from datetime import datetime
-from optparse import OptionParser
-
-myName          = os.path.basename(sys.argv[0])
-myName          = re.sub(".*/", "", myName)
-
-reVersion = re.compile(".*(\d+_\d+).*")
-
-VERSION = '$HeadURL: https://svn.apache.org/repos/asf/hadoop/core/branches/branch-0.20/src/contrib/hod/support/logcondense.py $'
-
-reMatch = reVersion.match(VERSION)
-if reMatch:
-    VERSION = reMatch.group(1)
-    VERSION = re.sub("_", ".", VERSION)
-else:
-    VERSION = 'DEV'
-
-options = ( {'short'   : "-p",
-             'long'    : "--package",
-             'type'    : "string",
-             'action'  : "store",
-             'dest'    : "package",
-             'metavar' : " ",
-             'default' : 'hadoop',
-             'help'    : "Bin file for hadoop"},
-
-	    {'short'   : "-d",
-	     'long'    : "--days",
-	     'type'    : "int",
-	     'action'  : "store",
-	     'dest'    : "days",
-	     'metavar' : " ",
-	     'default' : 7,
-	     'help'    : "Number of days before logs are deleted"},
-	    
-	    {'short'   : "-c",
-	     'long'    : "--config",
-	     'type'    : "string",
-	     'action'  : "store",
-	     'dest'    : "config",
-	     'metavar' : " ",
-	     'default' : None,
-	     'help'    : "config directory for hadoop"},
-	    
-	    {'short'   : "-l",
-	     'long'    : "--logs",
-	     'type'    : "string",
-	     'action'  : "store",
-	     'dest'    : "log",
-	     'metavar' : " ",
-	     'default' : "/user",
-	     'help'    : "directory prefix under which logs are stored per user"},
-
-	    {'short'   : "-n",
-	     'long'    : "--dynamicdfs",
-	     'type'    : "string",
-	     'action'  : "store",
-	     'dest'    : "dynamicdfs",
-	     'metavar' : " ",
-	     'default' : "false",
-	     'help'    : "'true', if the cluster is used to bring up dynamic dfs clusters, 'false' otherwise"}
-	    )
-
-def getDfsCommand(options, args):
-  if (options.config == None): 
-    cmd = options.package + " " + "dfs " + args
-  else:
-    cmd = options.package + " " + "--config " + options.config + " dfs " + args
-  return cmd
-
-def runcondense():
-  import shutil
-  
-  options = process_args()
-  # if the cluster is used to bring up dynamic dfs, we must leave NameNode and JobTracker logs, 
-  # otherwise only JobTracker logs. Likewise, in case of dynamic dfs, we must also look for
-  # deleting datanode logs
-  filteredNames = ['jobtracker']
-  deletedNamePrefixes = ['*-tasktracker-*']
-  if options.dynamicdfs == 'true':
-    filteredNames.append('namenode')
-    deletedNamePrefixes.append('*-datanode-*')
-
-  filepath = '%s/\*/hod-logs/' % (options.log)
-  cmd = getDfsCommand(options, "-lsr " + filepath)
-  (stdin, stdout, stderr) = popen3(cmd)
-  lastjobid = 'none'
-  toPurge = { }
-  for line in stdout:
-    try:
-      m = re.match("^.*\s(.*)\n$", line)
-      filename = m.group(1)
-      # file name format: <prefix>/<user>/hod-logs/<jobid>/[0-9]*-[jobtracker|tasktracker|datanode|namenode|]-hostname-YYYYMMDDtime-random.tar.gz
-      # first strip prefix:
-      if filename.startswith(options.log):
-        filename = filename.lstrip(options.log)
-        if not filename.startswith('/'):
-          filename = '/' + filename
-      else:
-        continue
-    
-      # Now get other details from filename.
-      k = re.match("/(.*)/hod-logs/(.*)/.*-.*-([0-9][0-9][0-9][0-9])([0-9][0-9])([0-9][0-9]).*$", filename)
-      if k:
-        username = k.group(1)
-        jobid =  k.group(2)
-        datetimefile = datetime(int(k.group(3)), int(k.group(4)), int(k.group(5)))
-        datetimenow = datetime.utcnow()
-        diff = datetimenow - datetimefile
-        filedate = k.group(3) + k.group(4) + k.group(5)
-        newdate = datetimenow.strftime("%Y%m%d")
-        print "%s %s %s %d" % (filename,  filedate, newdate, diff.days)
-
-        # if the cluster is used to bring up dynamic dfs, we must also leave NameNode logs.
-        foundFilteredName = False
-        for name in filteredNames:
-          if filename.find(name) >= 0:
-            foundFilteredName = True
-            break
-
-        if foundFilteredName:
-          continue
-
-        if (diff.days > options.days):
-          desttodel = filename
-          if not toPurge.has_key(jobid):
-            toPurge[jobid] = options.log.rstrip("/") + "/" + username + "/hod-logs/" + jobid
-    except Exception, e:
-      print >> sys.stderr, e
-
-  for job in toPurge.keys():
-    try:
-      for prefix in deletedNamePrefixes:
-        cmd = getDfsCommand(options, "-rm " + toPurge[job] + '/' + prefix)
-        print cmd
-        ret = 0
-        ret = os.system(cmd)
-        if (ret != 0):
-          print >> sys.stderr, "Command failed to delete file " + cmd 
-    except Exception, e:
-      print >> sys.stderr, e
-	  
-	
-def process_args():
-  global options, myName, VERSION
-  
-  usage = "usage: %s <ARGS>" % (myName)
-  
-  version = "%s %s" % (myName, VERSION)
-  
-  argParser = OptionParser(usage=usage, version=VERSION)
-  
-  for option_element in options:
-    argParser.add_option(option_element['short'], option_element['long'],
-			 type=option_element['type'], action=option_element['action'],
-			 dest=option_element['dest'], default=option_element['default'],
-			 metavar=option_element['metavar'], help=option_element['help'])
-
-  (parsedOptions, args) = argParser.parse_args()
-  
-  if not os.path.exists(parsedOptions.package):
-    argParser.error("Could not find path to hadoop binary: %s" % parsedOptions.package)
-  if not os.path.exists(parsedOptions.config):
-    argParser.error("Could not find config: %s" % parsedOptions.config)
-  if parsedOptions.days <= 0:
-    argParser.error("Invalid number of days specified, must be > 0: %s" % parsedOptions.config)
-  if parsedOptions.dynamicdfs!='true' and parsedOptions.dynamicdfs!='false':
-    argParser.error("Invalid option for dynamicdfs, must be true or false: %s" % parsedOptions.dynamicdfs)
-
-  return parsedOptions
-  
-  
-if __name__ == '__main__':
-  runcondense()
-  
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/__init__.py b/third_party/hadoop-0.20.0/contrib/hod/testing/__init__.py
deleted file mode 100644
index 12c2f1e1da..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/__init__.py
+++ /dev/null
@@ -1,15 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/helper.py b/third_party/hadoop-0.20.0/contrib/hod/testing/helper.py
deleted file mode 100644
index 5645d388b7..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/helper.py
+++ /dev/null
@@ -1,33 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-
-import sys
-
-sampleText = "Hello World!"
-
-if __name__=="__main__":
-  args = sys.argv[1:]
-  if args[0] == "1":
-    # print sample text to stderr
-    sys.stdout.write(sampleText)
-
-  elif args[0] == "2":
-    # print sample text to stderr 
-    sys.stderr.write(sampleText)
-
-  # Add any other helper programs here, with different values for args[0]
-  pass
-
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/lib.py b/third_party/hadoop-0.20.0/contrib/hod/testing/lib.py
deleted file mode 100644
index 578d812cc0..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/lib.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import unittest, re, sys
-
-class BaseTestSuite():
-  def __init__(self, name, excludes):
-    self.name = name
-    self.excludes = excludes
-    pass
-  
-  def runTests(self):
-    # Create a runner
-    self.runner = unittest.TextTestRunner()
-    
-    # Get all the test-case classes
-    # From module import *
-    mod = __import__(self.name, fromlist=['*'])
-    modItemsList = dir(mod)
-
-    allsuites = []
-
-    # Create all the test suites
-    for modItem in modItemsList:
-      if re.search(r"^test_", modItem):
-        # Yes this is a test class
-        if modItem not in self.excludes:
-          test_class = getattr(mod, modItem)
-          allsuites.append(unittest.makeSuite(test_class))
-
-    # Create a master suite to be run.
-    alltests = unittest.TestSuite(tuple(allsuites))
-
-    # Run the master test suite.
-    runner = self.runner.run(alltests)
-    if(runner.wasSuccessful()): return 0
-    printLine( "%s test(s) failed." % runner.failures.__len__())
-    printLine( "%s test(s) threw errors." % runner.errors.__len__())
-    return runner.failures.__len__() + runner.errors.__len__()
-
-  def cleanUp(self):
-    # suite tearDown
-    pass
-
-def printLine(str):
-  print >>sys.stderr, str
-
-def printSeparator():
-  str = ""
-  for i in range(0,79):
-    str = str + "*"
-  print >>sys.stderr, "\n", str, "\n"
-
-# This class captures all log messages logged by hodRunner and other classes.
-# It is then used to verify that certain log messages have come. This is one
-# way to validate that messages printed to the logger are correctly written.
-class MockLogger:
-  def __init__(self):
-    self.__logLines = {}
-
-  def info(self, message):
-    self.__logLines[message] = 'info'
-
-  def critical(self, message):
-    self.__logLines[message] = 'critical'
-
-  def warn(self, message):
-    self.__logLines[message] = 'warn'
-
-  def debug(self, message):
-    # don't track debug lines.
-    pass
-
-  # verify a certain message has been logged at the defined level of severity.
-  def hasMessage(self, message, level):
-    if not self.__logLines.has_key(message):
-      return False
-    return self.__logLines[message] == level
-
-# Stub class to test cluster manipulation operations.
-class MockHadoopCluster:
-  
-  def __init__(self):
-    # store the operations received.
-    self.__operations = {}
-  
-  def delete_job(self, jobid):
-    self.__operations['delete_job'] = [jobid]
- 
-  def is_cluster_deallocated(self, dummy):
-    return False
- 
-  def wasOperationPerformed(self, operation, args):
-    if self.__operations.has_key(operation):
-      actualArgs = self.__operations[operation]
-      for arg in actualArgs:
-        if arg not in args:
-          break
-      else:
-        return True
-    return False
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/main.py b/third_party/hadoop-0.20.0/contrib/hod/testing/main.py
deleted file mode 100644
index ec4d4fdd01..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/main.py
+++ /dev/null
@@ -1,83 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import unittest, os, sys, re
-
-myPath = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/testing/.*", "", myPath)
-testingDir = os.path.join(rootDirectory, "testing")
-
-sys.path.append(rootDirectory)
-
-from testing.lib import printSeparator, printLine
-
-moduleList = []
-allList = []
-excludes = [
-           ]
-
-# Build a module list by scanning through all files in testingDir
-for file in os.listdir(testingDir):
-  if(re.search(r".py$", file) and re.search(r"^test", file)):
-    # All .py files with names starting in 'test'
-    module = re.sub(r"^test","",file)
-    module = re.sub(r".py$","",module)
-    allList.append(module)
-    if module not in excludes:
-      moduleList.append(module)
-
-printLine("All testcases - %s" % allList)
-printLine("Excluding the testcases - %s" % excludes)
-printLine("Executing the testcases - %s" % moduleList)
-
-testsResult = 0
-# Now import each of these modules and start calling the corresponding
-#testSuite methods
-for moduleBaseName in moduleList:
-  try:
-    module = "testing.test" + moduleBaseName
-    suiteCaller = "Run" + moduleBaseName + "Tests"
-    printSeparator()
-    printLine("Running %s" % suiteCaller)
-
-    # Import the corresponding test cases module
-    imported_module = __import__(module , fromlist=[suiteCaller] )
-    
-    # Call the corresponding suite method now
-    testRes = getattr(imported_module, suiteCaller)()
-    testsResult = testsResult + testRes
-    printLine("Finished %s. TestSuite Result : %s\n" % \
-                                              (suiteCaller, testRes))
-  except ImportError, i:
-    # Failed to import a test module
-    printLine(i)
-    testsResult = testsResult + 1
-    pass
-  except AttributeError, n:
-    # Failed to get suiteCaller from a test module
-    printLine(n)
-    testsResult = testsResult + 1
-    pass
-  except Exception, e:
-    # Test module suiteCaller threw some exception
-    printLine("%s failed. \nReason : %s" % (suiteCaller, e))
-    printLine("Skipping %s" % suiteCaller)
-    testsResult = testsResult + 1
-    pass
-
-if testsResult != 0:
-  printSeparator()
-  printLine("Total testcases with failure or error : %s" % testsResult)
-sys.exit(testsResult)
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testHadoop.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testHadoop.py
deleted file mode 100644
index b15f6803dd..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/testHadoop.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import unittest, os, sys, re, threading, time
-
-myDirectory = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/testing/.*", "", myDirectory)
-
-sys.path.append(rootDirectory)
-
-from testing.lib import BaseTestSuite
-
-excludes = []
-
-import tempfile, getpass
-from xml.dom import minidom
-
-from hodlib.Hod.hadoop import hadoopConfig
-
-# All test-case classes should have the naming convention test_.*
-class test_hadoopConfig(unittest.TestCase):
-  def setUp(self):
-    self.__hadoopConfig = hadoopConfig()
-    self.rootDir = '/tmp/hod-%s' % getpass.getuser()
-    if not os.path.exists(self.rootDir):
-      os.mkdir(self.rootDir)
-    self.testingDir = tempfile.mkdtemp( dir=self.rootDir,
-                                  prefix='HadoopTestSuite.test_hadoopConfig')
-    self.confDir = tempfile.mkdtemp(dir=self.rootDir,
-                                  prefix='HadoopTestSuite.test_hadoopConfig')
-    self.tempDir = '/tmp/hod-%s/something' % getpass.getuser()
-    self.hadoopSite = os.path.join(self.confDir,'hadoop-site.xml')
-    self.numNodes = 4
-    self.hdfsAddr = 'nosuchhost1.apache.org:50505'
-    self.mapredAddr = 'nosuchhost2.apache.org:50506'
-    self.finalServerParams = {
-                                'mapred.child.java.opts' : '-Xmx1024m',
-                                'mapred.compress.map.output' : 'false',
-                             }
-    self.serverParams = {
-                          'mapred.userlog.limit' : '200',
-                          'mapred.userlog.retain.hours' : '10',
-                          'mapred.reduce.parallel.copies' : '20',
-                        }
-    self.clientParams = {
-                          'mapred.tasktracker.tasks.maximum' : '2',
-                          'io.sort.factor' : '100',
-                          'io.sort.mb' : '200',
-                          'mapred.userlog.limit.kb' : '1024',
-                          'io.file.buffer.size' : '262144',
-                        }
-    self.clusterFactor = 1.9
-    self.mySysDir = '/user/' + getpass.getuser() + '/mapredsystem'
-    pass
-
-  def testSuccess(self):
-    self.__hadoopConfig.gen_site_conf(
-                  confDir = self.confDir,\
-                  tempDir = self.tempDir,\
-                  numNodes = self.numNodes,\
-                  hdfsAddr = self.hdfsAddr,\
-                  mrSysDir = self.mySysDir,\
-                  mapredAddr = self.mapredAddr,\
-                  clientParams = self.clientParams,\
-                  serverParams = self.serverParams,\
-                  finalServerParams = self.finalServerParams,\
-                  clusterFactor = self.clusterFactor
-
-    )
-    xmldoc = minidom.parse(self.hadoopSite)
-    xmldoc = xmldoc.childNodes[0] # leave out xml spec
-    properties = xmldoc.childNodes # children of tag configuration
-    keyvals = {}
-    for prop in properties:
-      if not isinstance(prop,minidom.Comment):
-        #      ---------- tag -------------------- -value elem-- data -- 
-        name = prop.getElementsByTagName('name')[0].childNodes[0].data
-        value = prop.getElementsByTagName('value')[0].childNodes[0].data
-        keyvals[name] = value
-
-    # fs.default.name should start with hdfs://
-    assert(keyvals['fs.default.name'].startswith('hdfs://'))
-    assert(keyvals['hadoop.tmp.dir'] == self.tempDir)
-
-    # TODO other tests
-    pass
-    
-  def tearDown(self):
-    if os.path.exists(self.hadoopSite): os.unlink(self.hadoopSite)
-    if os.path.exists(self.confDir) : os.rmdir(self.confDir)
-    if os.path.exists(self.testingDir) : os.rmdir(self.testingDir)
-    pass
-
-class HadoopTestSuite(BaseTestSuite):
-  def __init__(self):
-    # suite setup
-    BaseTestSuite.__init__(self, __name__, excludes)
-    pass
-  
-  def cleanUp(self):
-    # suite tearDown
-    pass
-
-def RunHadoopTests():
-  suite = HadoopTestSuite()
-  testResult = suite.runTests()
-  suite.cleanUp()
-  return testResult
-
-if __name__ == "__main__":
-  RunHadoopTests()
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testHod.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testHod.py
deleted file mode 100644
index 350cccb6e3..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/testHod.py
+++ /dev/null
@@ -1,310 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import unittest, getpass, os, sys, re, threading, time
-
-myDirectory = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/testing/.*", "", myDirectory)
-
-sys.path.append(rootDirectory)
-
-import tempfile
-from testing.lib import BaseTestSuite, MockLogger, MockHadoopCluster
-from hodlib.Hod.hod import hodRunner, hodState
-from hodlib.Common.desc import NodePoolDesc
-
-excludes = []
-
-# Information about all clusters is written to a file called clusters.state.
-from hodlib.Hod.hod import CLUSTER_DATA_FILE as TEST_CLUSTER_DATA_FILE, \
-                           INVALID_STATE_FILE_MSGS
-
-# Temp directory prefix
-TMP_DIR_PREFIX=os.path.join('/tmp', 'hod-%s' % (getpass.getuser()))
-
-# build a config object with all required keys for initializing hod.
-def setupConf():
-  cfg = {
-          'hod' : {
-                    'original-dir' : os.getcwd(),
-                    'stream' : True,
-                    # store all the info about clusters in this directory
-                    'user_state' : '/tmp/hodtest',
-                    'debug' : 3,
-                    'java-home' : os.getenv('JAVA_HOME'),
-                    'cluster' : 'dummy',
-                    'cluster-factor' : 1.8,
-                    'xrs-port-range' : (32768,65536),
-                    'allocate-wait-time' : 3600,
-                    'temp-dir' : '/tmp/hod'
-                  },
-          # just set everything to dummy. Need something to initialize the
-          # node pool description object.
-          'resource_manager' : {
-                                 'id' : 'dummy',
-                                 'batch-home' : 'dummy',
-                                 'queue' : 'dummy',
-                               }
-        }
-  cfg['nodepooldesc'] = NodePoolDesc(cfg['resource_manager'])
-  return cfg
-
-# Test class that defines methods to test invalid arguments to hod operations.
-class test_InvalidArgsOperations(unittest.TestCase):
-  def setUp(self):
-
-    self.cfg = setupConf()
-    # initialize the mock objects
-    self.log = MockLogger()
-    self.cluster = MockHadoopCluster()
-
-    # Use the test logger. This will be used for test verification.
-    self.client = hodRunner(self.cfg, log=self.log, cluster=self.cluster)
-    # Create the hodState object to set the test state you want.
-    self.state = hodState(self.cfg['hod']['user_state'])
-    if not os.path.exists(self.cfg['hod']['user_state']):
-      os.path.mkdir(self.cfg['hod']['user_state'])
-    p = os.path.join(self.cfg['hod']['user_state'], '%s.state' % TEST_CLUSTER_DATA_FILE)
-    # ensure cluster data file exists, so write works in the tests.
-    f = open(p, 'w')
-    f.close()
-  
-  def tearDown(self):
-    # clean up cluster data file and directory
-    p = os.path.join(self.cfg['hod']['user_state'], '%s.state' % TEST_CLUSTER_DATA_FILE)
-    os.remove(p)
-    os.rmdir(self.cfg['hod']['user_state'])
-
-  # Test that list works with deleted cluster directories - more than one entries which are invalid.
-  def testListInvalidDirectory(self):
-    userState = { os.path.join(TMP_DIR_PREFIX, 'testListInvalidDirectory1') : '123.dummy.id1', 
-                  os.path.join(TMP_DIR_PREFIX, 'testListInvalidDirectory2') : '123.dummy.id2' }
-    self.__setupClusterState(userState)
-    self.client._op_list(['list'])
-    # assert that required errors are logged.
-    for clusterDir in userState.keys():
-      self.assertTrue(self.log.hasMessage('cluster state unknown\t%s\t%s' \
-                            % (userState[clusterDir], clusterDir), 'info'))
-
-    # simulate a test where a directory is deleted, and created again, without deallocation
-    clusterDir = os.path.join(TMP_DIR_PREFIX, 'testListEmptyDirectory')
-    os.makedirs(clusterDir)
-    self.assertTrue(os.path.isdir(clusterDir))
-    userState = { clusterDir : '123.dummy.id3' }
-    self.__setupClusterState(userState, False)
-    self.client._op_list(['list'])
-    self.assertTrue(self.log.hasMessage('cluster state unknown\t%s\t%s' \
-                          % (userState[clusterDir], clusterDir), 'info'))
-    os.rmdir(clusterDir)
-    
-  # Test that info works with a deleted cluster directory
-  def testInfoInvalidDirectory(self):
-    clusterDir = os.path.join(TMP_DIR_PREFIX, 'testInfoInvalidDirectory')
-    userState = { clusterDir : '456.dummy.id' }
-    self.__setupClusterState(userState)
-    self.client._op_info(['info', clusterDir])
-    self.assertTrue(self.log.hasMessage("Cannot find information for cluster with id '%s' in previously allocated cluster directory '%s'." % (userState[clusterDir], clusterDir), 'critical'))
-
-    # simulate a test where a directory is deleted, and created again, without deallocation
-    clusterDir = os.path.join(TMP_DIR_PREFIX, 'testInfoEmptyDirectory')
-    os.makedirs(clusterDir)
-    self.assertTrue(os.path.isdir(clusterDir))
-    userState = { clusterDir : '456.dummy.id1' }
-    self.__setupClusterState(userState, False)
-    self.client._op_info(['info', clusterDir])
-    self.assertTrue(self.log.hasMessage("Cannot find information for cluster with id '%s' in previously allocated cluster directory '%s'." % (userState[clusterDir], clusterDir), 'critical'))
-    os.rmdir(clusterDir)
-
-  # Test info works with an invalid cluster directory
-  def testInfoNonExistentDirectory(self):
-    clusterDir = '/tmp/hod/testInfoNonExistentDirectory'
-    self.client._op_info(['info', clusterDir])
-    self.assertTrue(self.log.hasMessage("Invalid hod.clusterdir(--hod.clusterdir or -d). %s : No such directory" % (clusterDir), 'critical'))
-
-  # Test that deallocation works on a deleted cluster directory
-  # by clearing the job, and removing the state
-  def testDeallocateInvalidDirectory(self):
-    clusterDir = os.path.join(TMP_DIR_PREFIX,'testDeallocateInvalidDirectory')
-    jobid = '789.dummy.id'
-    userState = { clusterDir : jobid }
-    self.__setupClusterState(userState)
-    self.client._op_deallocate(['deallocate', clusterDir])
-    # verify job was deleted
-    self.assertTrue(self.cluster.wasOperationPerformed('delete_job', jobid))
-    # verify appropriate message was logged.
-    self.assertTrue(self.log.hasMessage("Cannot find information for cluster with id '%s' in previously allocated cluster directory '%s'." % (userState[clusterDir], clusterDir), 'critical'))
-    self.assertTrue(self.log.hasMessage("Freeing resources allocated to the cluster.", 'critical'))
-    # verify that the state information was cleared.
-    userState = self.state.read(TEST_CLUSTER_DATA_FILE)
-    self.assertFalse(clusterDir in userState.keys())
- 
-    # simulate a test where a directory is deleted, and created again, without deallocation
-    clusterDir = os.path.join(TMP_DIR_PREFIX,'testDeallocateEmptyDirectory')
-    os.makedirs(clusterDir)
-    self.assertTrue(os.path.isdir(clusterDir))
-    jobid = '789.dummy.id1'
-    userState = { clusterDir : jobid }
-    self.__setupClusterState(userState, False)
-    self.client._op_deallocate(['deallocate', clusterDir])
-    # verify job was deleted
-    self.assertTrue(self.cluster.wasOperationPerformed('delete_job', jobid))
-    # verify appropriate message was logged.
-    self.assertTrue(self.log.hasMessage("Cannot find information for cluster with id '%s' in previously allocated cluster directory '%s'." % (userState[clusterDir], clusterDir), 'critical'))
-    self.assertTrue(self.log.hasMessage("Freeing resources allocated to the cluster.", 'critical'))
-    # verify that the state information was cleared.
-    userState = self.state.read(TEST_CLUSTER_DATA_FILE)
-    self.assertFalse(clusterDir in userState.keys())
-    os.rmdir(clusterDir)
-     
-  # Test that deallocation works on a nonexistent directory.
-  def testDeallocateNonExistentDirectory(self):
-    clusterDir = os.path.join(TMP_DIR_PREFIX,'testDeallocateNonExistentDirectory')
-    self.client._op_deallocate(['deallocate', clusterDir])
-    # there should be no call..
-    self.assertFalse(self.cluster.wasOperationPerformed('delete_job', None))
-    self.assertTrue(self.log.hasMessage("Invalid hod.clusterdir(--hod.clusterdir or -d). %s : No such directory" % (clusterDir), 'critical'))
-
-  # Test that allocation on an previously deleted directory fails.    
-  def testAllocateOnDeletedDirectory(self):
-    clusterDir = os.path.join(TMP_DIR_PREFIX, 'testAllocateOnDeletedDirectory')
-    os.makedirs(clusterDir)
-    self.assertTrue(os.path.isdir(clusterDir))
-    jobid = '1234.abc.com'
-    userState = { clusterDir : jobid }
-    self.__setupClusterState(userState, False)
-    self.client._op_allocate(['allocate', clusterDir, '3'])
-    self.assertTrue(self.log.hasMessage("Found a previously allocated cluster at "\
-                      "cluster directory '%s'. HOD cannot determine if this cluster "\
-                      "can be automatically deallocated. Deallocate the cluster if it "\
-                      "is unused." % (clusterDir), 'critical'))
-    os.rmdir(clusterDir)
-
-  def __setupClusterState(self, clusterStateMap, verifyDirIsAbsent=True):
-    for clusterDir in clusterStateMap.keys():
-      # ensure directory doesn't exist, just in case.
-      if verifyDirIsAbsent:
-        self.assertFalse(os.path.exists(clusterDir))
-    # set up required state.
-    self.state.write(TEST_CLUSTER_DATA_FILE, clusterStateMap)
-    # verify everything is stored correctly.
-    state = self.state.read(TEST_CLUSTER_DATA_FILE)
-    for clusterDir in clusterStateMap.keys():
-      self.assertTrue(clusterDir in state.keys())
-      self.assertEquals(clusterStateMap[clusterDir], state[clusterDir])
-
-class test_InvalidHodStateFiles(unittest.TestCase):
-  def setUp(self):
-    self.rootDir = '/tmp/hod-%s' % getpass.getuser()
-    self.cfg = setupConf() # creat a conf
-    # Modify hod.user_state
-    self.cfg['hod']['user_state'] = tempfile.mkdtemp(dir=self.rootDir,
-                              prefix='HodTestSuite.test_InvalidHodStateFiles_')
-    self.log = MockLogger() # mock logger
-    self.cluster = MockHadoopCluster() # mock hadoop cluster
-    self.client = hodRunner(self.cfg, log=self.log, cluster=self.cluster)
-    self.state = hodState(self.cfg['hod']['user_state'])
-    self.statePath = os.path.join(self.cfg['hod']['user_state'], '%s.state' % \
-                                  TEST_CLUSTER_DATA_FILE)
-    self.clusterDir = tempfile.mkdtemp(dir=self.rootDir,
-                              prefix='HodTestSuite.test_InvalidHodStateFiles_')
-  
-  def testOperationWithInvalidStateFile(self):
-    jobid = '1234.hadoop.apache.org'
-    # create user state file with invalid permissions
-    stateFile = open(self.statePath, "w")
-    os.chmod(self.statePath, 000) # has no read/write permissions
-    self.client._hodRunner__cfg['hod']['operation'] = \
-                                             "info %s" % self.clusterDir
-    ret = self.client.operation()
-    os.chmod(self.statePath, 700) # restore permissions
-    stateFile.close()
-    os.remove(self.statePath)
-
-    # print self.log._MockLogger__logLines
-    self.assertTrue(self.log.hasMessage(INVALID_STATE_FILE_MSGS[0] % \
-                          os.path.realpath(self.statePath), 'critical'))
-    self.assertEquals(ret, 1)
-    
-  def testAllocateWithInvalidStateFile(self):
-    jobid = '1234.hadoop.apache.org'
-    # create user state file with invalid permissions
-    stateFile = open(self.statePath, "w")
-    os.chmod(self.statePath, 0400) # has no write permissions
-    self.client._hodRunner__cfg['hod']['operation'] = \
-                                        "allocate %s %s" % (self.clusterDir, '3')
-    ret = self.client.operation()
-    os.chmod(self.statePath, 700) # restore permissions
-    stateFile.close()
-    os.remove(self.statePath)
-
-    # print self.log._MockLogger__logLines
-    self.assertTrue(self.log.hasMessage(INVALID_STATE_FILE_MSGS[2] % \
-                        os.path.realpath(self.statePath), 'critical'))
-    self.assertEquals(ret, 1)
-  
-  def testAllocateWithInvalidStateStore(self):
-    jobid = '1234.hadoop.apache.org'
-    self.client._hodRunner__cfg['hod']['operation'] = \
-                                      "allocate %s %s" % (self.clusterDir, 3)
-
-    ###### check with no executable permissions ######
-    stateFile = open(self.statePath, "w") # create user state file
-    os.chmod(self.cfg['hod']['user_state'], 0600) 
-    ret = self.client.operation()
-    os.chmod(self.cfg['hod']['user_state'], 0700) # restore permissions
-    stateFile.close()
-    os.remove(self.statePath)
-    # print self.log._MockLogger__logLines
-    self.assertTrue(self.log.hasMessage(INVALID_STATE_FILE_MSGS[0] % \
-                          os.path.realpath(self.statePath), 'critical'))
-    self.assertEquals(ret, 1)
-    
-    ###### check with no write permissions ######
-    stateFile = open(self.statePath, "w") # create user state file
-    os.chmod(self.cfg['hod']['user_state'], 0500) 
-    ret = self.client.operation()
-    os.chmod(self.cfg['hod']['user_state'], 0700) # restore permissions
-    stateFile.close()
-    os.remove(self.statePath)
-    # print self.log._MockLogger__logLines
-    self.assertTrue(self.log.hasMessage(INVALID_STATE_FILE_MSGS[0] % \
-                          os.path.realpath(self.statePath), 'critical'))
-    self.assertEquals(ret, 1)
-
-  def tearDown(self):
-    if os.path.exists(self.clusterDir): os.rmdir(self.clusterDir)
-    if os.path.exists(self.cfg['hod']['user_state']):
-      os.rmdir(self.cfg['hod']['user_state'])
-
-
-class HodTestSuite(BaseTestSuite):
-  def __init__(self):
-    # suite setup
-    BaseTestSuite.__init__(self, __name__, excludes)
-    pass
-  
-  def cleanUp(self):
-    # suite tearDown
-    pass
-
-def RunHodTests():
-  # modulename_suite
-  suite = HodTestSuite()
-  testResult = suite.runTests()
-  suite.cleanUp()
-  return testResult
-
-if __name__ == "__main__":
-  RunHodTests()
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testHodCleanup.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testHodCleanup.py
deleted file mode 100644
index 93e18333fd..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/testHodCleanup.py
+++ /dev/null
@@ -1,113 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import unittest, os, sys, re, threading, time
-
-myDirectory = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/testing/.*", "", myDirectory)
-
-sys.path.append(rootDirectory)
-
-from testing.lib import BaseTestSuite
-from hodlib.HodRing.hodRing import MRSystemDirectoryManager, createMRSystemDirectoryManager
-from hodlib.Common.threads import simpleCommand
-
-excludes = []
-
-# duplicating temporarily until HADOOP-2848 is committed.
-class MyMockLogger:
-  def __init__(self):
-    self.__logLines = {}
-
-  def info(self, message):
-    self.__logLines[message] = 'info'
-
-  def critical(self, message):
-    self.__logLines[message] = 'critical'
-
-  def warn(self, message):
-    self.__logLines[message] = 'warn'
-
-  def debug(self, message):
-    # don't track debug lines.
-    pass
-
-  # verify a certain message has been logged at the defined level of severity.
-  def hasMessage(self, message, level):
-    if not self.__logLines.has_key(message):
-      return False
-    return self.__logLines[message] == level
-
-class test_MRSystemDirectoryManager(unittest.TestCase):
-
-  def setUp(self):
-    self.log = MyMockLogger()
-
-  def testCleanupArgsString(self):
-    sysDirMgr = MRSystemDirectoryManager(1234, '/user/hod/mapredsystem/hoduser.123.abc.com', \
-                                          'def.com:5678', '/usr/bin/hadoop', self.log)
-    str = sysDirMgr.toCleanupArgs()
-    self.assertTrue(" --jt-pid 1234 --mr-sys-dir /user/hod/mapredsystem/hoduser.123.abc.com --fs-name def.com:5678 --hadoop-path /usr/bin/hadoop ", str) 
-
-  def testCreateMRSysDirInvalidParams(self):
-    # test that no mr system directory manager is created if required keys are not present
-    # this case will test scenarios of non jobtracker daemons.
-    keys = [ 'jt-pid', 'mr-sys-dir', 'fs-name', 'hadoop-path' ]
-    map = { 'jt-pid' : 1234,
-            'mr-sys-dir' : '/user/hod/mapredsystem/hoduser.def.com',
-            'fs-name' : 'ghi.com:1234',
-            'hadoop-path' : '/usr/bin/hadoop'
-          }
-    for key in keys:
-      val = map[key]
-      map[key] = None
-      self.assertEquals(createMRSystemDirectoryManager(map, self.log), None)
-      map[key] = val
-
-  def testUnresponsiveJobTracker(self):
-    # simulate an unresponsive job tracker, by giving a command that runs longer than the retries
-    # verify that the program returns with the right error message.
-    sc = simpleCommand("sleep", "sleep 300")
-    sc.start()
-    pid = sc.getPid()
-    while pid is None:
-      pid = sc.getPid()
-    sysDirMgr = MRSystemDirectoryManager(pid, '/user/yhemanth/mapredsystem/hoduser.123.abc.com', \
-                                                'def.com:5678', '/usr/bin/hadoop', self.log, retries=3)
-    sysDirMgr.removeMRSystemDirectory()
-    self.log.hasMessage("Job Tracker did not exit even after a minute. Not going to try and cleanup the system directory", 'warn')
-    sc.kill()
-    sc.wait()
-    sc.join()
-
-class HodCleanupTestSuite(BaseTestSuite):
-  def __init__(self):
-    # suite setup
-    BaseTestSuite.__init__(self, __name__, excludes)
-    pass
-  
-  def cleanUp(self):
-    # suite tearDown
-    pass
-
-def RunHodCleanupTests():
-  # modulename_suite
-  suite = HodCleanupTestSuite()
-  testResult = suite.runTests()
-  suite.cleanUp()
-  return testResult
-
-if __name__ == "__main__":
-  RunHodCleanupTests()
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testHodRing.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testHodRing.py
deleted file mode 100644
index 609c19908a..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/testHodRing.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import unittest, os, sys, re, threading, time
-
-myDirectory = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/testing/.*", "", myDirectory)
-
-sys.path.append(rootDirectory)
-
-from testing.lib import BaseTestSuite
-
-excludes = []
-
-import tempfile, getpass, logging
-from xml.dom import minidom
-
-from hodlib.Hod.hadoop import hadoopConfig
-from hodlib.HodRing.hodRing import CommandDesc, HadoopCommand
-
-# All test-case classes should have the naming convention test_.*
-class test_HadoopCommand(unittest.TestCase):
-  def setUp(self):
-    self.rootDir = '/tmp/hod-%s' % getpass.getuser()
-    self.id = 0
-    self.desc = None
-    self.tempDir = os.path.join(self.rootDir,'test_HadoopCommand_tempDir')
-    self.pkgDir = os.path.join(self.rootDir,'test_HadoopCommand_pkgDir')
-    self.log = logging.getLogger() # TODO Use MockLogger
-    self.javaHome = '/usr/java/bin/'
-    self.mrSysDir = '/user/' + getpass.getuser() + '/mapredsystem'
-    
-    self.attrs = {}
-    self.finalAttrs = {
-                        'fs.default.name': 'nohost.apache.com:56366',
-                        'mapred.child.java.opts' : '-Xmx1024m',
-                        'mapred.compress.map.output' : 'false',
-                      }
-    self.attrs = {
-                    'mapred.userlog.limit' : '200',
-                    'mapred.userlog.retain.hours' : '10',
-                    'mapred.reduce.parallel.copies' : '20',
-                 }
-    self.desc = CommandDesc(
-                              {
-                                'name' : 'dummyHadoop',
-                                'program' : 'bin/hadoop',
-                                'pkgdirs' : self.pkgDir,
-                                'final-attrs' : self.finalAttrs,
-                                'attrs' : self.attrs,
-                              }, self.log
-                            )
-    # TODO
-    #   4th arg to HadoopCommand 'tardir' is not used at all. Instead pkgdir is
-    #   specified through HadoopCommand.run(pkgdir). This could be changed so
-    #   that pkgdir is specified at the time of object creation.
-    # END OF TODO
-    self.hadoopCommand = HadoopCommand(self.id, self.desc, self.tempDir,
-                          self.pkgDir, self.log, self.javaHome,
-                          self.mrSysDir, restart=True)
-    self.hadoopSite = os.path.join( self.hadoopCommand.confdir,
-                                    'hadoop-site.xml')
-    pass
-
-  def test_createHadoopSiteXml(self):
-    self.hadoopCommand._createHadoopSiteXml()
-    xmldoc = minidom.parse(self.hadoopSite)
-    xmldoc = xmldoc.childNodes[0] # leave out xml spec
-    properties = xmldoc.childNodes # children of tag configuration
-    keyvals = {}
-    for prop in properties:
-      if not isinstance(prop,minidom.Comment):
-        #      ---------- tag -------------------- -value elem-- data -- 
-        name = prop.getElementsByTagName('name')[0].childNodes[0].data
-        value = prop.getElementsByTagName('value')[0].childNodes[0].data
-        keyvals[name] = value
-
-    # fs.default.name should start with hdfs://
-    assert(keyvals['fs.default.name'].startswith('hdfs://'))
-
-    # TODO other tests
-    pass
-    
-  def tearDown(self):
-    pass
-
-class HodRingTestSuite(BaseTestSuite):
-  def __init__(self):
-    # suite setup
-    BaseTestSuite.__init__(self, __name__, excludes)
-    pass
-  
-  def cleanUp(self):
-    # suite tearDown
-    pass
-
-def RunHodRingTests():
-  # modulename_suite
-  suite = HodRingTestSuite()
-  testResult = suite.runTests()
-  suite.cleanUp()
-  return testResult
-
-if __name__ == "__main__":
-  RunHodRingTests()
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testModule.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testModule.py
deleted file mode 100644
index a09fd04709..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/testModule.py
+++ /dev/null
@@ -1,88 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import unittest, os, sys, re, threading, time
-
-myDirectory = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/testing/.*", "", myDirectory)
-
-sys.path.append(rootDirectory)
-
-from testing.lib import BaseTestSuite
-
-excludes = ['test_MINITEST3']
-
-# All test-case classes should have the naming convention test_.*
-class test_MINITEST1(unittest.TestCase):
-  def setUp(self):
-    pass
-
-  # All testMethods have to have their names start with 'test'
-  def testSuccess(self):
-    pass
-    
-  def testFailure(self):
-    pass
-
-  def tearDown(self):
-    pass
-
-class test_MINITEST2(unittest.TestCase):
-  def setUp(self):
-    pass
-
-  # All testMethods have to have their names start with 'test'
-  def testSuccess(self):
-    pass
-    
-  def testFailure(self):
-    pass
-
-  def tearDown(self):
-    pass
-
-class test_MINITEST3(unittest.TestCase):
-  def setUp(self):
-    pass
-
-  # All testMethods have to have their names start with 'test'
-  def testSuccess(self):
-    pass
-    
-  def testFailure(self):
-    pass
-
-  def tearDown(self):
-    pass
-
-class ModuleTestSuite(BaseTestSuite):
-  def __init__(self):
-    # suite setup
-    BaseTestSuite.__init__(self, __name__, excludes)
-    pass
-  
-  def cleanUp(self):
-    # suite tearDown
-    pass
-
-def RunModuleTests():
-  # modulename_suite
-  suite = ModuleTestSuite()
-  testResult = suite.runTests()
-  suite.cleanUp()
-  return testResult
-
-if __name__ == "__main__":
-  RunModuleTests()
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testRingmasterRPCs.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testRingmasterRPCs.py
deleted file mode 100644
index 5a02e066b0..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/testRingmasterRPCs.py
+++ /dev/null
@@ -1,171 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import unittest, os, sys, re, threading, time
-
-import logging
-
-myDirectory    = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/testing/.*", "", myDirectory)
-
-sys.path.append(rootDirectory)
-
-from testing.lib import BaseTestSuite
-
-excludes = ['test_MINITEST1', 'test_MINITEST2']
- 
-from hodlib.GridServices import *
-from hodlib.Common.desc import ServiceDesc
-from hodlib.RingMaster.ringMaster import _LogMasterSources
-
-configuration = {
-       'hod': {}, 
-      'resource_manager': {
-                            'id': 'torque', 
-                            'batch-home': '/home/y/'
-                          }, 
-       'ringmaster': {
-                      'max-connect' : 2,
-                      'max-master-failures' : 5
-                     }, 
-       'hodring': {
-                  }, 
-       'gridservice-mapred': { 
-                              'id': 'mapred' 
-                             } ,
-       'gridservice-hdfs': { 
-                              'id': 'hdfs' 
-                            }, 
-       'servicedesc' : {} ,
-       'nodepooldesc': {} , 
-       }
-
-# All test-case classes should have the naming convention test_.*
-class test_MINITEST1(unittest.TestCase):
-  def setUp(self):
-    pass
-
-  # All testMethods have to have their names start with 'test'
-  def testSuccess(self):
-    pass
-    
-  def testFailure(self):
-    pass
-
-  def tearDown(self):
-    pass
-
-class test_Multiple_Workers(unittest.TestCase):
-  def setUp(self):
-    self.config = configuration
-    self.config['ringmaster']['workers_per_ring'] = 2
-
-    hdfsDesc = self.config['servicedesc']['hdfs'] = ServiceDesc(self.config['gridservice-hdfs'])
-    mrDesc = self.config['servicedesc']['mapred'] = ServiceDesc(self.config['gridservice-mapred'])
-
-    self.hdfs = Hdfs(hdfsDesc, [], 0, 19, workers_per_ring = \
-                                 self.config['ringmaster']['workers_per_ring'])
-    self.mr = MapReduce(mrDesc, [],1, 19, workers_per_ring = \
-                                 self.config['ringmaster']['workers_per_ring'])
-    
-    self.log = logging.getLogger()
-    pass
-
-  # All testMethods have to have their names start with 'test'
-  def testWorkersCount(self):
-    self.serviceDict = {}
-    self.serviceDict[self.hdfs.getName()] = self.hdfs
-    self.serviceDict[self.mr.getName()] = self.mr
-    self.rpcSet = _LogMasterSources(self.serviceDict, self.config, None, self.log, None)
-
-    cmdList = self.rpcSet.getCommand('host1')
-    self.assertEquals(len(cmdList), 2)
-    self.assertEquals(cmdList[0].dict['argv'][0], 'namenode')
-    self.assertEquals(cmdList[1].dict['argv'][0], 'namenode')
-    addParams = ['fs.default.name=host1:51234', 'dfs.http.address=host1:5125' ]
-    self.rpcSet.addMasterParams('host1', addParams)
-    # print "NN is launched"
-
-    cmdList = self.rpcSet.getCommand('host2')
-    self.assertEquals(len(cmdList), 1)
-    self.assertEquals(cmdList[0].dict['argv'][0], 'jobtracker')
-    addParams = ['mapred.job.tracker=host2:51236',
-                 'mapred.job.tracker.http.address=host2:51237']
-    self.rpcSet.addMasterParams('host2', addParams)
-    # print "JT is launched"
-
-    cmdList = self.rpcSet.getCommand('host3')
-    # Verify the workers count per ring : TTs + DNs
-    self.assertEquals(len(cmdList),
-                      self.config['ringmaster']['workers_per_ring'] * 2)
-    pass
-    
-  def testFailure(self):
-    pass
-
-  def tearDown(self):
-    pass
-
-class test_GetCommand(unittest.TestCase):
-  def setUp(self):
-    self.config = configuration
-
-    hdfsDesc = self.config['servicedesc']['hdfs'] = ServiceDesc(self.config['gridservice-hdfs'])
-    mrDesc = self.config['servicedesc']['mapred'] = ServiceDesc(self.config['gridservice-mapred'])
-
-    # API : serviceObj = service(desc, workDirs, reqNodes, version)
-    self.hdfs = Hdfs(hdfsDesc, [], 0, 17)
-    self.hdfsExternal = HdfsExternal(hdfsDesc, [], 17)
-    self.mr = MapReduce(mrDesc, [],1, 17)
-    self.mrExternal = MapReduceExternal(mrDesc, [], 17)
-    
-    self.log = logging.getLogger()
-    pass
-
-  # All testMethods have to have their names start with 'test'
-  def testBothInternal(self):
-    self.serviceDict = {}
-    self.serviceDict[self.hdfs.getName()] = self.hdfs
-    self.serviceDict[self.mr.getName()] = self.mr
-    self.rpcSet = _LogMasterSources(self.serviceDict, self.config, None, self.log, None)
-
-    cmdList = self.rpcSet.getCommand('localhost')
-    self.assertEquals(cmdList.__len__(), 2)
-    self.assertEquals(cmdList[0].dict['argv'][0], 'namenode')
-    self.assertEquals(cmdList[1].dict['argv'][0], 'namenode')
-    pass
-    
-  def tearDown(self):
-    pass
-
-class RingmasterRPCsTestSuite(BaseTestSuite):
-  def __init__(self):
-    # suite setup
-    BaseTestSuite.__init__(self, __name__, excludes)
-    pass
-  
-  def cleanUp(self):
-    # suite tearDown
-    pass
-
-def RunRingmasterRPCsTests():
-  # modulename_suite
-  suite = RingmasterRPCsTestSuite()
-  testResult = suite.runTests()
-  suite.cleanUp() 
-  return testResult
-
-if __name__ == "__main__":
-  RunRingmasterRPCsTests()
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testThreads.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testThreads.py
deleted file mode 100644
index 22753cfe90..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/testThreads.py
+++ /dev/null
@@ -1,99 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import unittest, os, sys, re, threading, time
-
-myDirectory = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/testing/.*", "", myDirectory)
-
-sys.path.append(rootDirectory)
-
-from testing.lib import BaseTestSuite
-
-# module specific imports
-import os, tempfile, random
-
-excludes = []
-
-import getpass
-from hodlib.Common.threads import simpleCommand
-from testing.helper import sampleText
-
-# All test-case classes should have the naming convention test_.*
-class test_SimpleCommand(unittest.TestCase):
-  def setUp(self):
-    self.rootDir = '/tmp/hod-%s' % getpass.getuser()
-    if not os.path.exists(self.rootDir):
-      os.mkdir(self.rootDir)
-    self.prefix= 'ThreadsTestSuite.test_SimpleCommand'
-    self.testFile = None
-    pass
-
-  def testRedirectedStdout(self):
-    self.testFile= tempfile.NamedTemporaryFile(dir=self.rootDir, \
-                                               prefix=self.prefix)
-    cmd=simpleCommand('helper','%s %s 1 1>%s' % \
-                      (sys.executable, \
-                      os.path.join(rootDirectory, "testing", "helper.py"), \
-                      self.testFile.name))
-
-    cmd.start()
-    cmd.join()
-    
-    self.testFile.seek(0)
-    stdout = self.testFile.read()
-    # print stdout, sampleText
-    assert(stdout == sampleText)
-    pass
-
-  def testRedirectedStderr(self):
-    self.testFile= tempfile.NamedTemporaryFile(dir=self.rootDir, \
-                                                prefix=self.prefix)
-    cmd=simpleCommand('helper','%s %s 2 2>%s' % \
-                      (sys.executable, \
-                      os.path.join(rootDirectory, "testing", "helper.py"), \
-                      self.testFile.name))
-    cmd.start()
-    cmd.join()
-     
-    self.testFile.seek(0)
-    stderror = self.testFile.read()
-    # print stderror, sampleText
-    assert(stderror == sampleText)
-    pass
-
-  def tearDown(self):
-    if self.testFile: self.testFile.close()
-    pass
-
-class ThreadsTestSuite(BaseTestSuite):
-  def __init__(self):
-    # suite setup
-    BaseTestSuite.__init__(self, __name__, excludes)
-    pass
-  
-  def cleanUp(self):
-    # suite tearDown
-    pass
-
-def RunThreadsTests():
-  # modulename_suite
-  suite = ThreadsTestSuite()
-  testResult = suite.runTests()
-  suite.cleanUp()
-  return testResult
-
-if __name__ == "__main__":
-  RunThreadsTests()
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testTypes.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testTypes.py
deleted file mode 100644
index 7e23dca3be..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/testTypes.py
+++ /dev/null
@@ -1,180 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import unittest, os, sys, re, threading, time
-
-myDirectory = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/testing/.*", "", myDirectory)
-
-sys.path.append(rootDirectory)
-
-from testing.lib import BaseTestSuite
-
-excludes = ['']
-
-import tempfile, shutil, getpass, random
-from hodlib.Common.types import typeValidator
-
-# All test-case classes should have the naming convention test_.*
-class test_typeValidator(unittest.TestCase):
-  def setUp(self):
-    self.originalDir = os.getcwd()
-    self.validator = typeValidator(self.originalDir)
-    self.tempDir = tempfile.mkdtemp(dir='/tmp/hod-%s' % getpass.getuser(),
-                                    prefix='test_Types_typeValidator_tempDir')
-    self.tempFile = tempfile.NamedTemporaryFile(dir=self.tempDir)
-
-    # verification : error strings
-    self.errorStringsForVerify = {
-                          'pos_int' : 0,
-                          'uri' : '%s is an invalid uri',
-                          'directory' : 0,
-                          'file' : 0,
-                        }
-
-    # verification : valid vals
-    self.verifyValidVals = [
-                            ('pos_int', 0),
-                            ('pos_int', 1),
-                            ('directory', self.tempDir),
-                            ('directory', '/tmp/hod-%s/../../%s' % \
-                                    (getpass.getuser(), self.tempDir)),
-                            ('file', self.tempFile.name),
-                            ('file', '/tmp/hod-%s/../../%s' % \
-                                    (getpass.getuser(), self.tempFile.name)),
-                            ('uri', 'file://localhost/' + self.tempDir),
-                            ('uri', 'file:///' + self.tempDir),
-                            ('uri', 'file:///tmp/hod-%s/../../%s' % \
-                                    (getpass.getuser(), self.tempDir)),
-                            ('uri', 'file://localhost/tmp/hod-%s/../../%s' % \
-                                    (getpass.getuser(), self.tempDir)),
-                            ('uri', 'http://hadoop.apache.org/core/'),
-                            ('uri', self.tempDir),
-                            ('uri', '/tmp/hod-%s/../../%s' % \
-                                    (getpass.getuser(), self.tempDir)),
-                           ]
-
-    # generate an invalid uri
-    randomNum = random.random()
-    while os.path.exists('/%s' % randomNum):
-      # Just to be sure :)
-      randomNum = random.random()
-    invalidUri = 'file://localhost/%s' % randomNum
-
-    # verification : invalid vals
-    self.verifyInvalidVals = [
-                              ('pos_int', -1),
-                              ('uri', invalidUri),
-                              ('directory', self.tempFile.name),
-                              ('file', self.tempDir),
-                             ]
-
-    # normalization : vals
-    self.normalizeVals = [
-                            ('pos_int', 1, 1),
-                            ('pos_int', '1', 1),
-                            ('directory', self.tempDir, self.tempDir),
-                            ('directory', '/tmp/hod-%s/../../%s' % \
-                                  (getpass.getuser(), self.tempDir), 
-                                                      self.tempDir),
-                            ('file', self.tempFile.name, self.tempFile.name),
-                            ('file', '/tmp/hod-%s/../../%s' % \
-                                    (getpass.getuser(), self.tempFile.name),
-                                                         self.tempFile.name),
-                            ('uri', 'file://localhost' + self.tempDir, 
-                                  'file://' + self.tempDir),
-                            ('uri', 'file://127.0.0.1' + self.tempDir, 
-                                  'file://' + self.tempDir),
-                            ('uri', 'http://hadoop.apache.org/core',
-                                  'http://hadoop.apache.org/core'),
-                            ('uri', self.tempDir, self.tempDir),
-                            ('uri', '/tmp/hod-%s/../../%s' % \
-                                  (getpass.getuser(), self.tempDir), 
-                                                      self.tempDir),
-                         ]
-    pass
-
-  # All testMethods have to have their names start with 'test'
-  def testnormalize(self):
-    for (type, originalVal, normalizedVal) in self.normalizeVals:
-      # print type, originalVal, normalizedVal,\
-      #                          self.validator.normalize(type, originalVal)
-      assert(self.validator.normalize(type, originalVal) == normalizedVal)
-    pass
-
-  def test__normalize(self):
-    # Special test for functionality of private method __normalizedPath
-    tmpdir = tempfile.mkdtemp(dir=self.originalDir) #create in self.originalDir
-    oldWd = os.getcwd()
-    os.chdir('/')
-    tmpdirName = re.sub(".*/","",tmpdir)
-    # print re.sub(".*/","",tmpdirName)
-    # print os.path.join(self.originalDir,tmpdir)
-    (type, originalVal, normalizedVal) = \
-                                    ('file', tmpdirName, \
-                                    os.path.join(self.originalDir,tmpdirName))
-    assert(self.validator.normalize(type, originalVal) == normalizedVal)
-    os.chdir(oldWd)
-    os.rmdir(tmpdir)
-    pass
-    
-  def testverify(self):
-    # test verify method
-
-    # test valid vals
-    for (type,value) in self.verifyValidVals:
-      valueInfo = { 'isValid' : 0, 'normalized' : 0, 'errorData' : 0 }
-      valueInfo = self.validator.verify(type,value)
-      # print type, value, valueInfo
-      assert(valueInfo['isValid'] == 1)
-
-    # test invalid vals
-    for (type,value) in self.verifyInvalidVals:
-      valueInfo = { 'isValid' : 0, 'normalized' : 0, 'errorData' : 0 }
-      valueInfo = self.validator.verify(type,value)
-      # print type, value, valueInfo
-      assert(valueInfo['isValid'] == 0)
-      if valueInfo['errorData'] != 0:
-        # if there is any errorData, check
-        assert(valueInfo['errorData'] == \
-                                      self.errorStringsForVerify[type] % value)
-
-    pass
-
-  def tearDown(self):
-    self.tempFile.close()
-    if os.path.exists(self.tempDir):
-      shutil.rmtree(self.tempDir)
-    pass
-
-class TypesTestSuite(BaseTestSuite):
-  def __init__(self):
-    # suite setup
-    BaseTestSuite.__init__(self, __name__, excludes)
-    pass
-  
-  def cleanUp(self):
-    # suite tearDown
-    pass
-
-def RunTypesTests():
-  # modulename_suite
-  suite = TypesTestSuite()
-  testResult = suite.runTests()
-  suite.cleanUp()
-  return testResult
-
-if __name__ == "__main__":
-  RunTypesTests()
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testUtil.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testUtil.py
deleted file mode 100644
index 62003c99df..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/testUtil.py
+++ /dev/null
@@ -1,62 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import unittest, os, sys, re, threading, time
-
-myDirectory = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/testing/.*", "", myDirectory)
-
-sys.path.append(rootDirectory)
-
-from testing.lib import BaseTestSuite
-from hodlib.Common.util import *
-from hodlib.Common.threads import simpleCommand
-
-excludes = []
-
-class test_Util(unittest.TestCase):
-
-  def testProcessStatus(self):
-    sc = simpleCommand('testsleep', 'sleep 60')
-    sc.start()
-    pid = sc.getPid()
-    while pid is None:
-      pid = sc.getPid()
-    self.assertTrue(isProcessRunning(pid))
-    sc.kill()
-    sc.wait()
-    sc.join()
-    self.assertFalse(isProcessRunning(pid))
-    
-
-class UtilTestSuite(BaseTestSuite):
-  def __init__(self):
-    # suite setup
-    BaseTestSuite.__init__(self, __name__, excludes)
-    pass
-  
-  def cleanUp(self):
-    # suite tearDown
-    pass
-
-def RunUtilTests():
-  # modulename_suite
-  suite = UtilTestSuite()
-  testResult = suite.runTests()
-  suite.cleanUp()
-  return testResult
-
-if __name__ == "__main__":
-  RunUtilTests()
diff --git a/third_party/hadoop-0.20.0/contrib/hod/testing/testXmlrpc.py b/third_party/hadoop-0.20.0/contrib/hod/testing/testXmlrpc.py
deleted file mode 100644
index f630032dfa..0000000000
--- a/third_party/hadoop-0.20.0/contrib/hod/testing/testXmlrpc.py
+++ /dev/null
@@ -1,109 +0,0 @@
-#Licensed to the Apache Software Foundation (ASF) under one
-#or more contributor license agreements.  See the NOTICE file
-#distributed with this work for additional information
-#regarding copyright ownership.  The ASF licenses this file
-#to you under the Apache License, Version 2.0 (the
-#"License"); you may not use this file except in compliance
-#with the License.  You may obtain a copy of the License at
-
-#     http://www.apache.org/licenses/LICENSE-2.0
-
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import unittest, os, sys, re, threading, time
-
-myDirectory    = os.path.realpath(sys.argv[0])
-rootDirectory   = re.sub("/testing/.*", "", myDirectory)
-
-sys.path.append(rootDirectory)
-
-from hodlib.Common.xmlrpc import hodXRClient
-from hodlib.Common.socketServers import hodXMLRPCServer
-from hodlib.GridServices.service import ServiceUtil
-from hodlib.Common.util import hodInterrupt, HodInterruptException
-
-from testing.lib import BaseTestSuite
-
-excludes = []
-
-global serverPort
-serverPort = None
-
-class test_HodXRClient(unittest.TestCase):
-  def setUp(self):
-    pass
-
-  # All testMethods have to have their names start with 'test'
-  def testSuccess(self):
-    global serverPort
-    client = hodXRClient('http://localhost:' + str(serverPort), retryRequests=False)
-    self.assertEqual(client.testing(), True)
-    pass
-    
-  def testFailure(self):
-    """HOD should raise Exception when unregistered rpc is called"""
-    global serverPort
-    client = hodXRClient('http://localhost:' + str(serverPort), retryRequests=False)
-    self.assertRaises(Exception, client.noMethod)
-    pass
-
-  def testTimeout(self):
-    """HOD should raise Exception when rpc call times out"""
-    # Give client some random nonexistent url
-    serverPort = ServiceUtil.getUniqRandomPort(h='localhost',low=40000,high=50000)
-    client = hodXRClient('http://localhost:' + str(serverPort), retryRequests=False)
-    self.assertRaises(Exception, client.testing)
-    pass
-
-  def testInterrupt(self):
-    """ HOD should raise HodInterruptException when interrupted"""
-
-    def interrupt(testClass):
-      testClass.assertRaises(HodInterruptException, client.testing)
-      
-    serverPort = ServiceUtil.getUniqRandomPort(h='localhost',low=40000,high=50000)
-    client = hodXRClient('http://localhost:' + str(serverPort))
-    myThread = threading.Thread(name='testinterrupt', target=interrupt,args=(self,))
-    # Set the global interrupt
-    hodInterrupt.setFlag()
-    myThread.start()
-    myThread.join()
-    pass
-
-  def tearDown(self):
-    pass
-
-class XmlrpcTestSuite(BaseTestSuite):
-  def __init__(self):
-    # suite setup
-    BaseTestSuite.__init__(self, __name__, excludes)
-
-    def rpcCall():
-      return True
-    
-    global serverPort
-    serverPort = ServiceUtil.getUniqRandomPort(h='localhost',low=40000,high=50000)
-    self.server = hodXMLRPCServer('localhost', [serverPort])
-    self.server.register_function(rpcCall, 'testing')
-    self.thread = threading.Thread(name="server", 
-                                   target=self.server._serve_forever)
-    self.thread.start()
-    time.sleep(1) # give some time to start server
-  
-  def cleanUp(self):
-    # suite tearDown
-    self.server.stop()
-    self.thread.join()
-
-def RunXmlrpcTests():
-  # modulename_suite
-  suite = XmlrpcTestSuite()
-  testResult = suite.runTests()
-  suite.cleanUp()
-  return testResult
-
-if __name__ == "__main__":
-  RunXmlrpcTests()
-- 
cgit v1.2.3


From 156bccbe230d36af93fe46f8942c3ac1d18635ae Mon Sep 17 00:00:00 2001
From: Justin Ma <jtma@eecs.berkeley.edu>
Date: Wed, 18 Aug 2010 15:25:57 -0700
Subject: HdfsFile.scala: added a try/catch block to exit gracefully for
 correupted gzip files MesosScheduler.scala: formatted the slaveOffer() output
 to include the serialized task size RDD.scala: added support for aggregating
 RDDs on a per-split basis (aggregateSplit()) as well as for sampling without
 replacement (sample())

---
 src/scala/spark/HdfsFile.scala       |  7 ++++++-
 src/scala/spark/MesosScheduler.scala |  4 +++-
 src/scala/spark/RDD.scala            | 26 ++++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/src/scala/spark/HdfsFile.scala b/src/scala/spark/HdfsFile.scala
index e6c693c67c..cdc8a2aa17 100644
--- a/src/scala/spark/HdfsFile.scala
+++ b/src/scala/spark/HdfsFile.scala
@@ -44,7 +44,12 @@ extends RDD[String, HdfsSplit](sc) {
 
     override def hasNext: Boolean = {
       if (!gotNext) {
-        finished = !reader.next(lineNum, text)
+        try {
+          finished = !reader.next(lineNum, text)
+        } catch {
+          case eofe: java.io.EOFException =>
+            finished = true
+        }
         gotNext = true
       }
       !finished
diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 18261e68fd..9e7e12cac1 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -223,7 +223,8 @@ extends ParallelOperation
         {
           val taskId = sched.newTaskId()
           tidToIndex(taskId) = i
-          printf("Starting task %d as TID %s on slave %s: %s (%s)\n",
+          //printf("Starting task %d as TID %s on slave %s: %s (%s)\n",
+          printf("Starting task %d as TID %s on slave %s: %s (%s)",
             i, taskId, offer.getSlaveId, offer.getHost, 
             if(checkPref) "preferred" else "non-preferred")
           tasks(i).markStarted(offer)
@@ -235,6 +236,7 @@ extends ParallelOperation
           params.put("cpus", "" + desiredCpus)
           params.put("mem", "" + desiredMem)
           val serializedTask = Utils.serialize(tasks(i))
+          println("... Serialized size: " + serializedTask.size)
           return Some(new TaskDescription(taskId, offer.getSlaveId,
             "task_" + taskId, params, serializedTask))
         }
diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index 073215ebea..ba4bff3c4b 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -3,6 +3,7 @@ package spark
 import java.util.concurrent.atomic.AtomicLong
 import java.util.concurrent.ConcurrentHashMap
 import java.util.HashSet
+import java.util.Random
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.Map
@@ -24,7 +25,9 @@ abstract class RDD[T: ClassManifest, Split](
 
   def map[U: ClassManifest](f: T => U) = new MappedRDD(this, sc.clean(f))
   def filter(f: T => Boolean) = new FilteredRDD(this, sc.clean(f))
+  def aggregateSplit() = new SplitRDD(this)
   def cache() = new CachedRDD(this)
+  def sample(frac: Double, seed: Int) = new SampledRDD(this, frac, seed)
 
   def foreach(f: T => Unit) {
     val cleanF = sc.clean(f)
@@ -77,6 +80,7 @@ abstract class RDD[T: ClassManifest, Split](
     new UnionRDD(sc, this, other)
 
   def ++[OtherSplit](other: RDD[T, OtherSplit]) = this.union(other)
+
 }
 
 @serializable
@@ -136,6 +140,28 @@ extends RDD[T, Split](prev.sparkContext) {
   override def taskStarted(split: Split, slot: SlaveOffer) = prev.taskStarted(split, slot)
 }
 
+class SplitRDD[T: ClassManifest, Split](
+  prev: RDD[T, Split]) 
+extends RDD[Array[T], Split](prev.sparkContext) {
+  override def splits = prev.splits
+  override def preferredLocations(split: Split) = prev.preferredLocations(split)
+  override def iterator(split: Split) = Iterator.fromArray(Array(prev.iterator(split).toArray))
+  override def taskStarted(split: Split, slot: SlaveOffer) = prev.taskStarted(split, slot)
+}
+
+
+@serializable class SeededSplit[Split](val prev: Split, val seed: Int) {}
+
+class SampledRDD[T: ClassManifest, Split](
+  prev: RDD[T, Split], frac: Double, seed: Int) 
+extends RDD[T, SeededSplit[Split]](prev.sparkContext) {
+  @transient val splits_ = { val rg = new Random(seed); prev.splits.map(x => new SeededSplit(x, rg.nextInt)) }
+  override def splits = splits_
+  override def preferredLocations(split: SeededSplit[Split]) = prev.preferredLocations(split.prev)
+  override def iterator(split: SeededSplit[Split]) = { val rg = new Random(split.seed); prev.iterator(split.prev).filter(x => (rg.nextDouble <= frac)) }
+  override def taskStarted(split: SeededSplit[Split], slot: SlaveOffer) = prev.taskStarted(split.prev, slot)
+}
+
 class CachedRDD[T, Split](
   prev: RDD[T, Split])(implicit m: ClassManifest[T])
 extends RDD[T, Split](prev.sparkContext) {
-- 
cgit v1.2.3


From ea8c2785ddd0439cb0433dde08d2a7caecfc06cb Mon Sep 17 00:00:00 2001
From: Justin Ma <jtma@eecs.berkeley.edu>
Date: Wed, 18 Aug 2010 15:59:35 -0700
Subject: now we have sampling with replacement (at least on a per-split basis)

---
 src/scala/spark/RDD.scala | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index ba4bff3c4b..8dd7b104fb 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -27,7 +27,7 @@ abstract class RDD[T: ClassManifest, Split](
   def filter(f: T => Boolean) = new FilteredRDD(this, sc.clean(f))
   def aggregateSplit() = new SplitRDD(this)
   def cache() = new CachedRDD(this)
-  def sample(frac: Double, seed: Int) = new SampledRDD(this, frac, seed)
+  def sample(withReplacement: Boolean, frac: Double, seed: Int) = new SampledRDD(this, withReplacement, frac, seed)
 
   def foreach(f: T => Unit) {
     val cleanF = sc.clean(f)
@@ -153,15 +153,29 @@ extends RDD[Array[T], Split](prev.sparkContext) {
 @serializable class SeededSplit[Split](val prev: Split, val seed: Int) {}
 
 class SampledRDD[T: ClassManifest, Split](
-  prev: RDD[T, Split], frac: Double, seed: Int) 
+  prev: RDD[T, Split], withReplacement: Boolean, frac: Double, seed: Int) 
 extends RDD[T, SeededSplit[Split]](prev.sparkContext) {
   @transient val splits_ = { val rg = new Random(seed); prev.splits.map(x => new SeededSplit(x, rg.nextInt)) }
   override def splits = splits_
   override def preferredLocations(split: SeededSplit[Split]) = prev.preferredLocations(split.prev)
-  override def iterator(split: SeededSplit[Split]) = { val rg = new Random(split.seed); prev.iterator(split.prev).filter(x => (rg.nextDouble <= frac)) }
+  override def iterator(split: SeededSplit[Split]) = { 
+    val rg = new Random(split.seed);
+    // Sampling with replacement (TODO: use reservoir sampling to make this more efficient?)
+    if (withReplacement) {
+      val oldData = prev.iterator(split.prev).toArray
+      val sampleSize = (oldData.size * frac).ceil.toInt
+      val sampledData = for (i <- 1 to sampleSize) yield oldData(rg.nextInt(oldData.size)) // all of oldData's indices are candidates, even if sampleSize < oldData.size
+      sampledData.iterator
+    }
+    // Sampling without replacement
+    else {
+      prev.iterator(split.prev).filter(x => (rg.nextDouble <= frac))
+    }
+  }
   override def taskStarted(split: SeededSplit[Split], slot: SlaveOffer) = prev.taskStarted(split.prev, slot)
 }
 
+
 class CachedRDD[T, Split](
   prev: RDD[T, Split])(implicit m: ClassManifest[T])
 extends RDD[T, Split](prev.sparkContext) {
-- 
cgit v1.2.3


From 7a9ff1cc9ae0440ada96c24c63239bdb15daf754 Mon Sep 17 00:00:00 2001
From: Justin Ma <jtma@eecs.berkeley.edu>
Date: Tue, 31 Aug 2010 12:08:09 -0700
Subject: - Got rid of 'Split' type parameter in RDD - Added SampledRDD,
 SplitRDD and CartesianRDD - Made Split a class rather than a type parameter -
 Added numCores() to Scheduler to help set default level of parallelism

---
 src/scala/spark/HdfsFile.scala       |  19 +++---
 src/scala/spark/LocalScheduler.scala |   2 +
 src/scala/spark/MesosScheduler.scala |   3 +
 src/scala/spark/ParallelArray.scala  |  11 +--
 src/scala/spark/RDD.scala            | 125 ++++++++++++++++++++++-------------
 src/scala/spark/Scheduler.scala      |   1 +
 src/scala/spark/SparkContext.scala   |   2 +-
 7 files changed, 104 insertions(+), 59 deletions(-)

diff --git a/src/scala/spark/HdfsFile.scala b/src/scala/spark/HdfsFile.scala
index cdc8a2aa17..1c007c679a 100644
--- a/src/scala/spark/HdfsFile.scala
+++ b/src/scala/spark/HdfsFile.scala
@@ -11,11 +11,13 @@ import org.apache.hadoop.mapred.TextInputFormat
 import org.apache.hadoop.mapred.RecordReader
 import org.apache.hadoop.mapred.Reporter
 
-class HdfsSplit(@transient s: InputSplit)
-extends SerializableWritable[InputSplit](s) {}
+@serializable class HdfsSplit(@transient s: InputSplit)
+extends Split { 
+  val inputSplit = new SerializableWritable[InputSplit](s)
+}
 
 class HdfsTextFile(sc: SparkContext, path: String)
-extends RDD[String, HdfsSplit](sc) {
+extends RDD[String](sc) {
   @transient val conf = new JobConf()
   @transient val inputFormat = new TextInputFormat()
 
@@ -25,9 +27,10 @@ extends RDD[String, HdfsSplit](sc) {
   @transient val splits_ =
     inputFormat.getSplits(conf, 2).map(new HdfsSplit(_)).toArray
 
-  override def splits = splits_
+  override def splits = splits_.asInstanceOf[Array[Split]]
   
-  override def iterator(split: HdfsSplit) = new Iterator[String] {
+  override def iterator(split_in: Split) = new Iterator[String] {
+    val split = split_in.asInstanceOf[HdfsSplit]
     var reader: RecordReader[LongWritable, Text] = null
     ConfigureLock.synchronized {
       val conf = new JobConf()
@@ -35,7 +38,7 @@ extends RDD[String, HdfsSplit](sc) {
           System.getProperty("spark.buffer.size", "65536"))
       val tif = new TextInputFormat()
       tif.configure(conf) 
-      reader = tif.getRecordReader(split.value, conf, Reporter.NULL)
+      reader = tif.getRecordReader(split.inputSplit.value, conf, Reporter.NULL)
     }
     val lineNum = new LongWritable()
     val text = new Text()
@@ -65,9 +68,9 @@ extends RDD[String, HdfsSplit](sc) {
     }
   }
 
-  override def preferredLocations(split: HdfsSplit) = {
+  override def preferredLocations(split: Split) = {
     // TODO: Filtering out "localhost" in case of file:// URLs
-    split.value.getLocations().filter(_ != "localhost")
+    split.asInstanceOf[HdfsSplit].inputSplit.value.getLocations().filter(_ != "localhost")
   }
 }
 
diff --git a/src/scala/spark/LocalScheduler.scala b/src/scala/spark/LocalScheduler.scala
index b33f3c863e..a3e0fae065 100644
--- a/src/scala/spark/LocalScheduler.scala
+++ b/src/scala/spark/LocalScheduler.scala
@@ -54,6 +54,8 @@ private class LocalScheduler(threads: Int) extends Scheduler {
   }
   
   override def stop() {}
+
+  override def numCores() = threads
 }
 
 // A ThreadFactory that creates daemon threads
diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 9e7e12cac1..b8533e0bd9 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -153,6 +153,9 @@ extends NScheduler with spark.Scheduler
     if (driver != null)
       driver.stop()
   }
+
+  // TODO: query Mesos for number of cores
+  override def numCores() = System.getProperty("spark.default.parallelism", "2").toInt
 }
 
 
diff --git a/src/scala/spark/ParallelArray.scala b/src/scala/spark/ParallelArray.scala
index c4df837eff..12fbfaf4c2 100644
--- a/src/scala/spark/ParallelArray.scala
+++ b/src/scala/spark/ParallelArray.scala
@@ -5,7 +5,8 @@ import mesos.SlaveOffer
 import java.util.concurrent.atomic.AtomicLong
 
 @serializable class ParallelArraySplit[T: ClassManifest](
-    val arrayId: Long, val slice: Int, values: Seq[T]) {
+    val arrayId: Long, val slice: Int, values: Seq[T])
+extends Split {
   def iterator(): Iterator[T] = values.iterator
 
   override def hashCode(): Int = (41 * (41 + arrayId) + slice).toInt
@@ -22,7 +23,7 @@ import java.util.concurrent.atomic.AtomicLong
 
 class ParallelArray[T: ClassManifest](
   sc: SparkContext, @transient data: Seq[T], numSlices: Int)
-extends RDD[T, ParallelArraySplit[T]](sc) {
+extends RDD[T](sc) {
   // TODO: Right now, each split sends along its full data, even if later down
   // the RDD chain it gets cached. It might be worthwhile to write the data to
   // a file in the DFS and read it in the split instead.
@@ -34,11 +35,11 @@ extends RDD[T, ParallelArraySplit[T]](sc) {
     slices.indices.map(i => new ParallelArraySplit(id, i, slices(i))).toArray
   }
 
-  override def splits = splits_
+  override def splits = splits_.asInstanceOf[Array[Split]]
 
-  override def iterator(s: ParallelArraySplit[T]) = s.iterator
+  override def iterator(s: Split) = s.asInstanceOf[ParallelArraySplit[T]].iterator
   
-  override def preferredLocations(s: ParallelArraySplit[T]): Seq[String] = Nil
+  override def preferredLocations(s: Split): Seq[String] = Nil
 }
 
 private object ParallelArray {
diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index 8dd7b104fb..f8d1ab7f57 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -13,7 +13,7 @@ import mesos._
 import com.google.common.collect.MapMaker
 
 @serializable
-abstract class RDD[T: ClassManifest, Split](
+abstract class RDD[T: ClassManifest](
     @transient sc: SparkContext) {
   def splits: Array[Split]
   def iterator(split: Split): Iterator[T]
@@ -76,42 +76,42 @@ abstract class RDD[T: ClassManifest, Split](
     try { map(x => 1L).reduce(_+_) }
     catch { case e: UnsupportedOperationException => 0L }
 
-  def union[OtherSplit](other: RDD[T, OtherSplit]) =
-    new UnionRDD(sc, this, other)
+  def union(other: RDD[T]) = new UnionRDD(sc, this, other)
+  def cartesian[U: ClassManifest](other: RDD[U]) = new CartesianRDD(sc, this, other)
 
-  def ++[OtherSplit](other: RDD[T, OtherSplit]) = this.union(other)
+  def ++(other: RDD[T]) = this.union(other)
 
 }
 
 @serializable
-abstract class RDDTask[U: ClassManifest, T: ClassManifest, Split](
-  val rdd: RDD[T, Split], val split: Split)
+abstract class RDDTask[U: ClassManifest, T: ClassManifest](
+  val rdd: RDD[T], val split: Split)
 extends Task[U] {
   override def preferredLocations() = rdd.preferredLocations(split)
   override def markStarted(slot: SlaveOffer) { rdd.taskStarted(split, slot) }
 }
 
-class ForeachTask[T: ClassManifest, Split](
-  rdd: RDD[T, Split], split: Split, func: T => Unit)
-extends RDDTask[Unit, T, Split](rdd, split) {
+class ForeachTask[T: ClassManifest](
+  rdd: RDD[T], split: Split, func: T => Unit)
+extends RDDTask[Unit, T](rdd, split) {
   override def run() {
     println("Processing " + split)
     rdd.iterator(split).foreach(func)
   }
 }
 
-class CollectTask[T, Split](
-  rdd: RDD[T, Split], split: Split)(implicit m: ClassManifest[T])
-extends RDDTask[Array[T], T, Split](rdd, split) {
+class CollectTask[T](
+  rdd: RDD[T], split: Split)(implicit m: ClassManifest[T])
+extends RDDTask[Array[T], T](rdd, split) {
   override def run(): Array[T] = {
     println("Processing " + split)
     rdd.iterator(split).toArray(m)
   }
 }
 
-class ReduceTask[T: ClassManifest, Split](
-  rdd: RDD[T, Split], split: Split, f: (T, T) => T)
-extends RDDTask[Option[T], T, Split](rdd, split) {
+class ReduceTask[T: ClassManifest](
+  rdd: RDD[T], split: Split, f: (T, T) => T)
+extends RDDTask[Option[T], T](rdd, split) {
   override def run(): Option[T] = {
     println("Processing " + split)
     val iter = rdd.iterator(split)
@@ -122,27 +122,26 @@ extends RDDTask[Option[T], T, Split](rdd, split) {
   }
 }
 
-class MappedRDD[U: ClassManifest, T: ClassManifest, Split](
-  prev: RDD[T, Split], f: T => U) 
-extends RDD[U, Split](prev.sparkContext) {
+class MappedRDD[U: ClassManifest, T: ClassManifest](
+  prev: RDD[T], f: T => U) 
+extends RDD[U](prev.sparkContext) {
   override def splits = prev.splits
   override def preferredLocations(split: Split) = prev.preferredLocations(split)
   override def iterator(split: Split) = prev.iterator(split).map(f)
   override def taskStarted(split: Split, slot: SlaveOffer) = prev.taskStarted(split, slot)
 }
 
-class FilteredRDD[T: ClassManifest, Split](
-  prev: RDD[T, Split], f: T => Boolean) 
-extends RDD[T, Split](prev.sparkContext) {
+class FilteredRDD[T: ClassManifest](
+  prev: RDD[T], f: T => Boolean) 
+extends RDD[T](prev.sparkContext) {
   override def splits = prev.splits
   override def preferredLocations(split: Split) = prev.preferredLocations(split)
   override def iterator(split: Split) = prev.iterator(split).filter(f)
   override def taskStarted(split: Split, slot: SlaveOffer) = prev.taskStarted(split, slot)
 }
 
-class SplitRDD[T: ClassManifest, Split](
-  prev: RDD[T, Split]) 
-extends RDD[Array[T], Split](prev.sparkContext) {
+class SplitRDD[T: ClassManifest](prev: RDD[T]) 
+extends RDD[Array[T]](prev.sparkContext) {
   override def splits = prev.splits
   override def preferredLocations(split: Split) = prev.preferredLocations(split)
   override def iterator(split: Split) = Iterator.fromArray(Array(prev.iterator(split).toArray))
@@ -150,15 +149,20 @@ extends RDD[Array[T], Split](prev.sparkContext) {
 }
 
 
-@serializable class SeededSplit[Split](val prev: Split, val seed: Int) {}
+@serializable class SeededSplit(val prev: Split, val seed: Int) extends Split {}
 
-class SampledRDD[T: ClassManifest, Split](
-  prev: RDD[T, Split], withReplacement: Boolean, frac: Double, seed: Int) 
-extends RDD[T, SeededSplit[Split]](prev.sparkContext) {
+class SampledRDD[T: ClassManifest](
+  prev: RDD[T], withReplacement: Boolean, frac: Double, seed: Int) 
+extends RDD[T](prev.sparkContext) {
+  
   @transient val splits_ = { val rg = new Random(seed); prev.splits.map(x => new SeededSplit(x, rg.nextInt)) }
-  override def splits = splits_
-  override def preferredLocations(split: SeededSplit[Split]) = prev.preferredLocations(split.prev)
-  override def iterator(split: SeededSplit[Split]) = { 
+
+  override def splits = splits_.asInstanceOf[Array[Split]]
+
+  override def preferredLocations(split: Split) = prev.preferredLocations(split.asInstanceOf[SeededSplit].prev)
+
+  override def iterator(splitIn: Split) = { 
+    val split = splitIn.asInstanceOf[SeededSplit]
     val rg = new Random(split.seed);
     // Sampling with replacement (TODO: use reservoir sampling to make this more efficient?)
     if (withReplacement) {
@@ -172,13 +176,14 @@ extends RDD[T, SeededSplit[Split]](prev.sparkContext) {
       prev.iterator(split.prev).filter(x => (rg.nextDouble <= frac))
     }
   }
-  override def taskStarted(split: SeededSplit[Split], slot: SlaveOffer) = prev.taskStarted(split.prev, slot)
+
+  override def taskStarted(split: Split, slot: SlaveOffer) = prev.taskStarted(split.asInstanceOf[SeededSplit].prev, slot)
 }
 
 
-class CachedRDD[T, Split](
-  prev: RDD[T, Split])(implicit m: ClassManifest[T])
-extends RDD[T, Split](prev.sparkContext) {
+class CachedRDD[T](
+  prev: RDD[T])(implicit m: ClassManifest[T])
+extends RDD[T](prev.sparkContext) {
   val id = CachedRDD.newId()
   @transient val cacheLocs = Map[Split, List[String]]()
 
@@ -243,23 +248,23 @@ private object CachedRDD {
 }
 
 @serializable
-abstract class UnionSplit[T: ClassManifest] {
+abstract class UnionSplit[T: ClassManifest] extends Split {
   def iterator(): Iterator[T]
   def preferredLocations(): Seq[String]
 }
 
 @serializable
-class UnionSplitImpl[T: ClassManifest, Split](
-  rdd: RDD[T, Split], split: Split)
+class UnionSplitImpl[T: ClassManifest](
+  rdd: RDD[T], split: Split)
 extends UnionSplit[T] {
   override def iterator() = rdd.iterator(split)
   override def preferredLocations() = rdd.preferredLocations(split)
 }
 
 @serializable
-class UnionRDD[T: ClassManifest, Split1, Split2](
-  sc: SparkContext, rdd1: RDD[T, Split1], rdd2: RDD[T, Split2])
-extends RDD[T, UnionSplit[T]](sc) {
+class UnionRDD[T: ClassManifest](
+  sc: SparkContext, rdd1: RDD[T], rdd2: RDD[T])
+extends RDD[T](sc) {
 
   @transient val splits_ : Array[UnionSplit[T]] = {
     val a1 = rdd1.splits.map(s => new UnionSplitImpl(rdd1, s))
@@ -267,10 +272,40 @@ extends RDD[T, UnionSplit[T]](sc) {
     (a1 ++ a2).toArray
   }
 
-  override def splits = splits_
+  override def splits = splits_.asInstanceOf[Array[Split]]
+
+  override def iterator(s: Split): Iterator[T] = s.asInstanceOf[UnionSplit[T]].iterator()
+
+  override def preferredLocations(s: Split): Seq[String] = 
+    s.asInstanceOf[UnionSplit[T]].preferredLocations()
+}
+
+@serializable class CartesianSplit(val s1: Split, val s2: Split) extends Split {}
+
+@serializable
+class CartesianRDD[T: ClassManifest, U:ClassManifest](
+  sc: SparkContext, rdd1: RDD[T], rdd2: RDD[U])
+extends RDD[Pair[T, U]](sc) {
+  @transient val splits_ = {
+    // create the cross product split
+    rdd2.splits.map(y => rdd1.splits.map(x => new CartesianSplit(x, y))).flatten
+  }
+
+  override def splits = splits_.asInstanceOf[Array[Split]]
 
-  override def iterator(s: UnionSplit[T]): Iterator[T] = s.iterator()
+  override def preferredLocations(split: Split) = {
+    val currSplit = split.asInstanceOf[CartesianSplit]
+    rdd1.preferredLocations(currSplit.s1) ++ rdd2.preferredLocations(currSplit.s2)
+  }
+
+  override def iterator(split: Split) = {
+    val currSplit = split.asInstanceOf[CartesianSplit]
+    for (x <- rdd1.iterator(currSplit.s1); y <- rdd2.iterator(currSplit.s2)) yield (x, y)
+  }
 
-  override def preferredLocations(s: UnionSplit[T]): Seq[String] = 
-    s.preferredLocations()
+  override def taskStarted(split: Split, slot: SlaveOffer) = {
+    val currSplit = split.asInstanceOf[CartesianSplit]
+    rdd1.taskStarted(currSplit.s1, slot)
+    rdd2.taskStarted(currSplit.s2, slot)
+  }
 }
diff --git a/src/scala/spark/Scheduler.scala b/src/scala/spark/Scheduler.scala
index 27cf48e9d2..b9f3128c82 100644
--- a/src/scala/spark/Scheduler.scala
+++ b/src/scala/spark/Scheduler.scala
@@ -6,4 +6,5 @@ private trait Scheduler {
   def waitForRegister()
   def runTasks[T](tasks: Array[Task[T]])(implicit m: ClassManifest[T]): Array[T]
   def stop()
+  def numCores(): Int
 }
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index bad4bf5721..b1ddd80f87 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -12,7 +12,7 @@ class SparkContext(master: String, frameworkName: String) {
     new ParallelArray[T](this, seq, numSlices)
 
   def parallelize[T: ClassManifest](seq: Seq[T]): ParallelArray[T] =
-    parallelize(seq, 2)
+    parallelize(seq, scheduler.numCores)
 
   def accumulator[T](initialValue: T)(implicit param: AccumulatorParam[T]) =
     new Accumulator(initialValue, param)
-- 
cgit v1.2.3


From e9ffe6caabc0bb763f0ac758149fb8c0336731bb Mon Sep 17 00:00:00 2001
From: Justin Ma <jtma@eecs.berkeley.edu>
Date: Wed, 1 Sep 2010 13:31:06 -0700
Subject: now adding the Split object.

---
 src/scala/spark/Split.scala | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 src/scala/spark/Split.scala

diff --git a/src/scala/spark/Split.scala b/src/scala/spark/Split.scala
new file mode 100644
index 0000000000..4251191814
--- /dev/null
+++ b/src/scala/spark/Split.scala
@@ -0,0 +1,3 @@
+package spark
+
+abstract class Split {}
-- 
cgit v1.2.3


From 6f0d2c1cbcd04fb72334e77eb3809b472e657985 Mon Sep 17 00:00:00 2001
From: Justin Ma <jtma@eecs.berkeley.edu>
Date: Tue, 7 Sep 2010 14:03:59 -0700
Subject: round robin scheduling of tasks has been added

---
 src/scala/spark/HdfsFile.scala       |  2 +-
 src/scala/spark/MesosScheduler.scala | 34 +++++++++++++++++++++++-----------
 src/scala/spark/SparkContext.scala   |  2 +-
 3 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/src/scala/spark/HdfsFile.scala b/src/scala/spark/HdfsFile.scala
index 1c007c679a..595386fceb 100644
--- a/src/scala/spark/HdfsFile.scala
+++ b/src/scala/spark/HdfsFile.scala
@@ -25,7 +25,7 @@ extends RDD[String](sc) {
   ConfigureLock.synchronized { inputFormat.configure(conf) }
 
   @transient val splits_ =
-    inputFormat.getSplits(conf, 2).map(new HdfsSplit(_)).toArray
+    inputFormat.getSplits(conf, sc.scheduler.numCores).map(new HdfsSplit(_)).toArray
 
   override def splits = splits_.asInstanceOf[Array[Split]]
   
diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index b8533e0bd9..7f82c49348 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -3,6 +3,7 @@ package spark
 import java.io.File
 
 import scala.collection.mutable.Map
+import scala.collection.JavaConversions._
 
 import mesos.{Scheduler => NScheduler}
 import mesos._
@@ -105,10 +106,20 @@ extends NScheduler with spark.Scheduler
       val tasks = new java.util.ArrayList[TaskDescription]
       if (activeOp != null) {
         try {
-          for (i <- 0 until offers.size.toInt) {
-            activeOp.slaveOffer(offers.get(i)) match {
-              case Some(task) => tasks.add(task)
-              case None => {}
+          val availableCpus = offers.map(_.getParams.get("cpus").toInt)
+          val availableMem = offers.map(_.getParams.get("mem").toInt)
+          var resourcesAvailable = true
+          while (resourcesAvailable) {
+            resourcesAvailable = false
+            for (i <- 0 until offers.size.toInt) {
+              activeOp.slaveOffer(offers.get(i), availableCpus(i), availableMem(i)) match {
+                case Some(task) =>
+                  tasks.add(task)
+                  availableCpus(i) -= task.getParams.get("cpus").toInt
+                  availableMem(i) -= task.getParams.get("mem").toInt
+                  resourcesAvailable = resourcesAvailable || true
+                case None => {}
+              }
             }
           }
         } catch {
@@ -162,7 +173,7 @@ extends NScheduler with spark.Scheduler
 // Trait representing an object that manages a parallel operation by
 // implementing various scheduler callbacks.
 trait ParallelOperation {
-  def slaveOffer(s: SlaveOffer): Option[TaskDescription]
+  def slaveOffer(s: SlaveOffer, availableCpus: Int, availableMem: Int): Option[TaskDescription]
   def statusUpdate(t: TaskStatus): Unit
   def error(code: Int, message: String): Unit
 }
@@ -207,7 +218,7 @@ extends ParallelOperation
     }
   }
 
-  def slaveOffer(offer: SlaveOffer): Option[TaskDescription] = {
+  def slaveOffer(offer: SlaveOffer, availableCpus: Int, availableMem: Int): Option[TaskDescription] = {
     if (tasksLaunched < numTasks) {
       var checkPrefVals: Array[Boolean] = Array(true)
       val time = System.currentTimeMillis
@@ -215,9 +226,8 @@ extends ParallelOperation
         checkPrefVals = Array(true, false) // Allow non-preferred tasks
       // TODO: Make desiredCpus and desiredMem configurable
       val desiredCpus = 1
-      val desiredMem = 750
-      if (offer.getParams.get("cpus").toInt < desiredCpus || 
-          offer.getParams.get("mem").toInt < desiredMem)
+      val desiredMem = 500
+      if ((availableCpus < desiredCpus) || (availableMem < desiredMem))
         return None
       for (checkPref <- checkPrefVals; i <- 0 until numTasks) {
         if (!launched(i) && (!checkPref ||
@@ -264,7 +274,7 @@ extends ParallelOperation
 
   def taskFinished(status: TaskStatus) {
     val tid = status.getTaskId
-    println("Finished TID " + tid)
+    print("Finished TID " + tid)
     if (!finished(tidToIndex(tid))) {
       // Deserialize task result
       val result = Utils.deserialize[TaskResult[T]](status.getData)
@@ -274,10 +284,12 @@ extends ParallelOperation
       // Mark finished and stop if we've finished all the tasks
       finished(tidToIndex(tid)) = true
       tasksFinished += 1
+
+      println(", finished " + tasksFinished + "/" + numTasks)
       if (tasksFinished == numTasks)
         setAllFinished()
     } else {
-      printf("Task %s had already finished, so ignoring it\n", tidToIndex(tid))
+      printf("... Task %s had already finished, so ignoring it\n", tidToIndex(tid))
     }
   }
 
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index b1ddd80f87..1188367bdd 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -25,7 +25,7 @@ class SparkContext(master: String, frameworkName: String) {
 
   val LOCAL_REGEX = """local\[([0-9]+)\]""".r
 
-  private var scheduler: Scheduler = master match {
+  private[spark] var scheduler: Scheduler = master match {
     case "local" => new LocalScheduler(1)
     case LOCAL_REGEX(threads) => new LocalScheduler(threads.toInt)
     case _ => { System.loadLibrary("mesos");
-- 
cgit v1.2.3


From 0896fd62197686a8406558d9c1ea32466ec1d7a6 Mon Sep 17 00:00:00 2001
From: Justin Ma <jtma@eecs.berkeley.edu>
Date: Sun, 12 Sep 2010 09:01:44 -0700
Subject: Added fork()/join() operations for SparkContext, as well as
 corresponding changes to MesosScheduler to support multiple
 ParallelOperations.

---
 src/scala/spark/MesosScheduler.scala | 117 ++++++++++++++++++++---------------
 src/scala/spark/SparkContext.scala   |  23 +++++++
 2 files changed, 91 insertions(+), 49 deletions(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 7f82c49348..c8943e3d3a 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -3,6 +3,7 @@ package spark
 import java.io.File
 
 import scala.collection.mutable.Map
+import scala.collection.mutable.HashMap
 import scala.collection.JavaConversions._
 
 import mesos.{Scheduler => NScheduler}
@@ -31,7 +32,15 @@ extends NScheduler with spark.Scheduler
   val registeredLock = new Object()
 
   // Current callback object (may be null)
-  var activeOp: ParallelOperation = null
+  var activeOps = new HashMap[Int, ParallelOperation]
+  private var nextOpId = 0
+  private[spark] var taskIdToOpId = new HashMap[Int, Int]
+  
+  def newOpId(): Int = {
+    val id = nextOpId
+    nextOpId += 1
+    return id
+  }
 
   // Incrementing task ID
   private var nextTaskId = 0
@@ -62,27 +71,29 @@ extends NScheduler with spark.Scheduler
     new ExecutorInfo(new File("spark-executor").getCanonicalPath(), execArg)
 
   override def runTasks[T: ClassManifest](tasks: Array[Task[T]]): Array[T] = {
+    var opId = 0
     runTasksMutex.synchronized {
       waitForRegister()
-      val myOp = new SimpleParallelOperation(this, tasks)
+      opId = newOpId()
+    }
+    val myOp = new SimpleParallelOperation(this, tasks, opId)
 
-      try {
-        this.synchronized {
-          this.activeOp = myOp
-        }
-        driver.reviveOffers();
-        myOp.join();
-      } finally {
-        this.synchronized {
-          this.activeOp = null
-        }
+    try {
+      this.synchronized {
+        this.activeOps(myOp.opId) = myOp
+      }
+      driver.reviveOffers();
+      myOp.join();
+    } finally {
+      this.synchronized {
+        this.activeOps.remove(myOp.opId)
       }
-
-      if (myOp.errorHappened)
-        throw new SparkException(myOp.errorMessage, myOp.errorCode)
-      else
-        return myOp.results
     }
+
+    if (myOp.errorHappened)
+      throw new SparkException(myOp.errorMessage, myOp.errorCode)
+    else
+      return myOp.results
   }
 
   override def registered(d: SchedulerDriver, frameworkId: String) {
@@ -104,28 +115,26 @@ extends NScheduler with spark.Scheduler
       d: SchedulerDriver, oid: String, offers: java.util.List[SlaveOffer]) {
     synchronized {
       val tasks = new java.util.ArrayList[TaskDescription]
-      if (activeOp != null) {
-        try {
-          val availableCpus = offers.map(_.getParams.get("cpus").toInt)
-          val availableMem = offers.map(_.getParams.get("mem").toInt)
-          var resourcesAvailable = true
-          while (resourcesAvailable) {
-            resourcesAvailable = false
-            for (i <- 0 until offers.size.toInt) {
-              activeOp.slaveOffer(offers.get(i), availableCpus(i), availableMem(i)) match {
-                case Some(task) =>
-                  tasks.add(task)
-                  availableCpus(i) -= task.getParams.get("cpus").toInt
-                  availableMem(i) -= task.getParams.get("mem").toInt
-                  resourcesAvailable = resourcesAvailable || true
-                case None => {}
-              }
+      val availableCpus = offers.map(_.getParams.get("cpus").toInt)
+      val availableMem = offers.map(_.getParams.get("mem").toInt)
+      var resourcesAvailable = true
+      while (resourcesAvailable) {
+        resourcesAvailable = false
+        for (i <- 0 until offers.size.toInt; (opId, activeOp) <- activeOps) {
+          try {
+            activeOp.slaveOffer(offers.get(i), availableCpus(i), availableMem(i)) match {
+              case Some(task) =>
+                tasks.add(task)
+                availableCpus(i) -= task.getParams.get("cpus").toInt
+                availableMem(i) -= task.getParams.get("mem").toInt
+                resourcesAvailable = resourcesAvailable || true
+              case None => {}
             }
+          } catch {
+            case e: Exception => e.printStackTrace
           }
-        } catch {
-          case e: Exception => e.printStackTrace
         }
-      }  
+      }
       val params = new java.util.HashMap[String, String]
       params.put("timeout", "1")
       d.replyToOffer(oid, tasks, params) // TODO: use smaller timeout
@@ -135,9 +144,15 @@ extends NScheduler with spark.Scheduler
   override def statusUpdate(d: SchedulerDriver, status: TaskStatus) {
     synchronized {
       try {
-        if (activeOp != null) {
-          activeOp.statusUpdate(status)
+        taskIdToOpId.get(status.getTaskId) match {
+          case Some(opId) =>
+            if (activeOps.contains(opId)) {
+              activeOps(opId).statusUpdate(status)
+            }
+          case None =>
+            println("TID " + status.getTaskId + "already finished")
         }
+
       } catch {
         case e: Exception => e.printStackTrace
       }
@@ -146,11 +161,13 @@ extends NScheduler with spark.Scheduler
 
   override def error(d: SchedulerDriver, code: Int, message: String) {
     synchronized {
-      if (activeOp != null) {
-        try {
-          activeOp.error(code, message)
-        } catch {
-          case e: Exception => e.printStackTrace
+      if (activeOps.size > 0) {
+        for ((opId, activeOp) <- activeOps) {
+          try {
+            activeOp.error(code, message)
+          } catch {
+            case e: Exception => e.printStackTrace
+          }
         }
       } else {
         val msg = "Mesos error: %s (error code: %d)".format(message, code)
@@ -180,7 +197,7 @@ trait ParallelOperation {
 
 
 class SimpleParallelOperation[T: ClassManifest](
-  sched: MesosScheduler, tasks: Array[Task[T]])
+  sched: MesosScheduler, tasks: Array[Task[T]], val opId: Int)
 extends ParallelOperation
 {
   // Maximum time to wait to run a task in a preferred location (in ms)
@@ -235,10 +252,10 @@ extends ParallelOperation
             tasks(i).preferredLocations.isEmpty))
         {
           val taskId = sched.newTaskId()
+          sched.taskIdToOpId(taskId) = opId
           tidToIndex(taskId) = i
-          //printf("Starting task %d as TID %s on slave %s: %s (%s)\n",
-          printf("Starting task %d as TID %s on slave %s: %s (%s)",
-            i, taskId, offer.getSlaveId, offer.getHost, 
+          printf("Starting task %d as opId %d, TID %s on slave %s: %s (%s)",
+            i, opId, taskId, offer.getSlaveId, offer.getHost, 
             if(checkPref) "preferred" else "non-preferred")
           tasks(i).markStarted(offer)
           launched(i) = true
@@ -274,7 +291,7 @@ extends ParallelOperation
 
   def taskFinished(status: TaskStatus) {
     val tid = status.getTaskId
-    print("Finished TID " + tid)
+    print("Finished opId " + opId + " TID " + tid)
     if (!finished(tidToIndex(tid))) {
       // Deserialize task result
       val result = Utils.deserialize[TaskResult[T]](status.getData)
@@ -283,6 +300,8 @@ extends ParallelOperation
       Accumulators.add(callingThread, result.accumUpdates)
       // Mark finished and stop if we've finished all the tasks
       finished(tidToIndex(tid)) = true
+      // Remove TID -> opId mapping from sched
+      sched.taskIdToOpId.remove(tid)
       tasksFinished += 1
 
       println(", finished " + tasksFinished + "/" + numTasks)
@@ -295,7 +314,7 @@ extends ParallelOperation
 
   def taskLost(status: TaskStatus) {
     val tid = status.getTaskId
-    println("Lost TID " + tid)
+    println("Lost opId " + opId + " TID " + tid)
     if (!finished(tidToIndex(tid))) {
       launched(tidToIndex(tid)) = false
       tasksLaunched -= 1
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index 1188367bdd..35d3458723 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -4,6 +4,17 @@ import java.io._
 import java.util.UUID
 
 import scala.collection.mutable.ArrayBuffer
+import scala.actors.Actor._
+
+case class SparkAsyncLock(var finished: Boolean = false) {
+  def join() {
+    this.synchronized {
+      while (!finished) {
+        this.wait
+      }
+    }
+  }
+}
 
 class SparkContext(master: String, frameworkName: String) {
   Broadcast.initialize(true)
@@ -21,6 +32,18 @@ class SparkContext(master: String, frameworkName: String) {
   def broadcast[T](value: T) = new CentralizedHDFSBroadcast(value, local)
   //def broadcast[T](value: T) = new ChainedStreamingBroadcast(value, local)
 
+  def fork(f: => Unit): SparkAsyncLock = {
+    val thisLock = new SparkAsyncLock
+    actor {
+      f
+      thisLock.synchronized {
+        thisLock.finished = true
+        thisLock.notifyAll()
+      }
+    }
+    thisLock
+  }
+
   def textFile(path: String) = new HdfsTextFile(this, path)
 
   val LOCAL_REGEX = """local\[([0-9]+)\]""".r
-- 
cgit v1.2.3


From 366c09c47b03ea957fb18f32171fe40349bbed48 Mon Sep 17 00:00:00 2001
From: Justin Ma <jtma@eecs.berkeley.edu>
Date: Mon, 13 Sep 2010 15:30:22 -0700
Subject: Let's use future instead of actors

---
 src/scala/spark/MesosScheduler.scala | 40 +++++++++++++++++++++---------------
 src/scala/spark/SparkContext.scala   | 22 --------------------
 2 files changed, 24 insertions(+), 38 deletions(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index c8943e3d3a..984a5e5637 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -3,6 +3,7 @@ package spark
 import java.io.File
 
 import scala.collection.mutable.Map
+import scala.collection.mutable.Queue
 import scala.collection.mutable.HashMap
 import scala.collection.JavaConversions._
 
@@ -32,6 +33,7 @@ extends NScheduler with spark.Scheduler
   val registeredLock = new Object()
 
   // Current callback object (may be null)
+  var activeOpsQueue = new Queue[Int]
   var activeOps = new HashMap[Int, ParallelOperation]
   private var nextOpId = 0
   private[spark] var taskIdToOpId = new HashMap[Int, Int]
@@ -72,8 +74,8 @@ extends NScheduler with spark.Scheduler
 
   override def runTasks[T: ClassManifest](tasks: Array[Task[T]]): Array[T] = {
     var opId = 0
-    runTasksMutex.synchronized {
-      waitForRegister()
+    waitForRegister()
+    this.synchronized {
       opId = newOpId()
     }
     val myOp = new SimpleParallelOperation(this, tasks, opId)
@@ -81,12 +83,14 @@ extends NScheduler with spark.Scheduler
     try {
       this.synchronized {
         this.activeOps(myOp.opId) = myOp
+        this.activeOpsQueue += myOp.opId
       }
       driver.reviveOffers();
       myOp.join();
     } finally {
       this.synchronized {
         this.activeOps.remove(myOp.opId)
+        this.activeOpsQueue.dequeueAll(x => (x == myOp.opId))
       }
     }
 
@@ -117,21 +121,24 @@ extends NScheduler with spark.Scheduler
       val tasks = new java.util.ArrayList[TaskDescription]
       val availableCpus = offers.map(_.getParams.get("cpus").toInt)
       val availableMem = offers.map(_.getParams.get("mem").toInt)
-      var resourcesAvailable = true
-      while (resourcesAvailable) {
-        resourcesAvailable = false
-        for (i <- 0 until offers.size.toInt; (opId, activeOp) <- activeOps) {
-          try {
-            activeOp.slaveOffer(offers.get(i), availableCpus(i), availableMem(i)) match {
-              case Some(task) =>
-                tasks.add(task)
-                availableCpus(i) -= task.getParams.get("cpus").toInt
-                availableMem(i) -= task.getParams.get("mem").toInt
-                resourcesAvailable = resourcesAvailable || true
-              case None => {}
+      var launchedTask = true
+      for (opId <- activeOpsQueue) {
+        launchedTask = true
+        while (launchedTask) {
+          launchedTask = false
+          for (i <- 0 until offers.size.toInt) {
+            try {
+              activeOps(opId).slaveOffer(offers.get(i), availableCpus(i), availableMem(i)) match {
+                case Some(task) =>
+                  tasks.add(task)
+                  availableCpus(i) -= task.getParams.get("cpus").toInt
+                  availableMem(i) -= task.getParams.get("mem").toInt
+                  launchedTask = launchedTask || true
+                case None => {}
+              }
+            } catch {
+              case e: Exception => e.printStackTrace
             }
-          } catch {
-            case e: Exception => e.printStackTrace
           }
         }
       }
@@ -317,6 +324,7 @@ extends ParallelOperation
     println("Lost opId " + opId + " TID " + tid)
     if (!finished(tidToIndex(tid))) {
       launched(tidToIndex(tid)) = false
+      sched.taskIdToOpId.remove(tid)
       tasksLaunched -= 1
     } else {
       printf("Task %s had already finished, so ignoring it\n", tidToIndex(tid))
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index 35d3458723..d5d4db4678 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -6,16 +6,6 @@ import java.util.UUID
 import scala.collection.mutable.ArrayBuffer
 import scala.actors.Actor._
 
-case class SparkAsyncLock(var finished: Boolean = false) {
-  def join() {
-    this.synchronized {
-      while (!finished) {
-        this.wait
-      }
-    }
-  }
-}
-
 class SparkContext(master: String, frameworkName: String) {
   Broadcast.initialize(true)
 
@@ -32,18 +22,6 @@ class SparkContext(master: String, frameworkName: String) {
   def broadcast[T](value: T) = new CentralizedHDFSBroadcast(value, local)
   //def broadcast[T](value: T) = new ChainedStreamingBroadcast(value, local)
 
-  def fork(f: => Unit): SparkAsyncLock = {
-    val thisLock = new SparkAsyncLock
-    actor {
-      f
-      thisLock.synchronized {
-        thisLock.finished = true
-        thisLock.notifyAll()
-      }
-    }
-    thisLock
-  }
-
   def textFile(path: String) = new HdfsTextFile(this, path)
 
   val LOCAL_REGEX = """local\[([0-9]+)\]""".r
-- 
cgit v1.2.3


From b7ce592bece278571ca612fd025cc34f41c3448a Mon Sep 17 00:00:00 2001
From: Justin Ma <jtma@eecs.berkeley.edu>
Date: Sat, 25 Sep 2010 14:37:25 -0700
Subject: changes to accumulator to add objects in-place.

---
 src/examples/Vector.scala            |  2 +-
 src/scala/spark/Accumulators.scala   | 12 +++++++-----
 src/scala/spark/MesosScheduler.scala |  1 +
 src/scala/spark/SparkContext.scala   |  4 ++--
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/examples/Vector.scala b/src/examples/Vector.scala
index 0ae2cbc6e8..e9fbdca752 100644
--- a/src/examples/Vector.scala
+++ b/src/examples/Vector.scala
@@ -57,7 +57,7 @@ object Vector {
   implicit def doubleToMultiplier(num: Double) = new Multiplier(num)
 
   implicit object VectorAccumParam extends spark.AccumulatorParam[Vector] {
-    def add(t1: Vector, t2: Vector) = t1 + t2
+    def addInPlace(t1: Vector, t2: Vector) = t1 + t2
     def zero(initialValue: Vector) = Vector.zeros(initialValue.length)
   }
 }
diff --git a/src/scala/spark/Accumulators.scala b/src/scala/spark/Accumulators.scala
index 3e4cd4935a..ee93d3c85c 100644
--- a/src/scala/spark/Accumulators.scala
+++ b/src/scala/spark/Accumulators.scala
@@ -4,15 +4,17 @@ import java.io._
 
 import scala.collection.mutable.Map
 
-@serializable class Accumulator[T](initialValue: T, param: AccumulatorParam[T])
+@serializable class Accumulator[T](
+  @transient initialValue: T, param: AccumulatorParam[T])
 {
   val id = Accumulators.newId
-  @transient var value_ = initialValue
+  @transient var value_ = initialValue // Current value on master
+  val zero = param.zero(initialValue)  // Zero value to be passed to workers
   var deserialized = false
 
   Accumulators.register(this)
 
-  def += (term: T) { value_ = param.add(value_, term) }
+  def += (term: T) { value_ = param.addInPlace(value_, term) }
   def value = this.value_
   def value_= (t: T) {
     if (!deserialized) value_ = t
@@ -22,7 +24,7 @@ import scala.collection.mutable.Map
   // Called by Java when deserializing an object
   private def readObject(in: ObjectInputStream) {
     in.defaultReadObject
-    value_ = param.zero(initialValue)
+    value_ = zero
     deserialized = true
     Accumulators.register(this)
   }
@@ -31,7 +33,7 @@ import scala.collection.mutable.Map
 }
 
 @serializable trait AccumulatorParam[T] {
-  def add(t1: T, t2: T): T
+  def addInPlace(t1: T, t2: T): T
   def zero(initialValue: T): T
 }
 
diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 984a5e5637..2f1c7431c5 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -304,6 +304,7 @@ extends ParallelOperation
       val result = Utils.deserialize[TaskResult[T]](status.getData)
       results(tidToIndex(tid)) = result.value
       // Update accumulators
+      print(" with " + result.accumUpdates.size + " accumulatedUpdates")
       Accumulators.add(callingThread, result.accumUpdates)
       // Mark finished and stop if we've finished all the tasks
       finished(tidToIndex(tid)) = true
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index d5d4db4678..c26032bb4f 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -82,11 +82,11 @@ class SparkContext(master: String, frameworkName: String) {
 
 object SparkContext {
   implicit object DoubleAccumulatorParam extends AccumulatorParam[Double] {
-    def add(t1: Double, t2: Double): Double = t1 + t2
+    def addInPlace(t1: Double, t2: Double): Double = t1 + t2
     def zero(initialValue: Double) = 0.0
   }
   implicit object IntAccumulatorParam extends AccumulatorParam[Int] {
-    def add(t1: Int, t2: Int): Int = t1 + t2
+    def addInPlace(t1: Int, t2: Int): Int = t1 + t2
     def zero(initialValue: Int) = 0
   }
   // TODO: Add AccumulatorParams for other types, e.g. lists and strings
-- 
cgit v1.2.3


From b749f0e20961ae0517d3aa4f54c4f6391a5737a9 Mon Sep 17 00:00:00 2001
From: Justin Ma <jtma@eecs.berkeley.edu>
Date: Tue, 28 Sep 2010 17:28:54 -0700
Subject: fixed typo in printing which task is already finished

---
 src/scala/spark/MesosScheduler.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 984a5e5637..42ad45a56f 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -157,7 +157,7 @@ extends NScheduler with spark.Scheduler
               activeOps(opId).statusUpdate(status)
             }
           case None =>
-            println("TID " + status.getTaskId + "already finished")
+            println("TID " + status.getTaskId + " already finished")
         }
 
       } catch {
-- 
cgit v1.2.3


From 7ef3a20a0cc20f49907693bd212d2de75a1b8859 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 28 Sep 2010 17:55:11 -0700
Subject: Modified the interpreter to serve classes to the executors using a
 Jetty HTTP server instead of a shared (NFS) file system.

---
 src/scala/spark/repl/ClassServer.scala             |  74 +++++++++++++++++++++
 .../jetty-server-7.1.6.v20100715.jar               | Bin 0 -> 647178 bytes
 .../jetty-7.1.6.v20100715/servlet-api-2.5.jar      | Bin 0 -> 105112 bytes
 3 files changed, 74 insertions(+)
 create mode 100644 src/scala/spark/repl/ClassServer.scala
 create mode 100644 third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
 create mode 100644 third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar

diff --git a/src/scala/spark/repl/ClassServer.scala b/src/scala/spark/repl/ClassServer.scala
new file mode 100644
index 0000000000..14ab2fe2a3
--- /dev/null
+++ b/src/scala/spark/repl/ClassServer.scala
@@ -0,0 +1,74 @@
+package spark.repl
+
+import java.io.File
+import java.net.InetAddress
+
+import org.eclipse.jetty.server.Server
+import org.eclipse.jetty.server.handler.DefaultHandler
+import org.eclipse.jetty.server.handler.HandlerList
+import org.eclipse.jetty.server.handler.ResourceHandler
+
+
+/**
+ * Exception type thrown by ClassServer when it is in the wrong state 
+ * for an operation.
+ */
+class ServerStateException(message: String) extends Exception(message)
+
+
+/**
+ * An HTTP server used by the interpreter to allow worker nodes to access
+ * class files created as the user types in lines of code. This is just a
+ * wrapper around a Jetty embedded HTTP server.
+ */
+class ClassServer(classDir: File) {
+  private var server: Server = null
+  private var port: Int = -1
+
+  def start() {
+    if (server != null) {
+      throw new ServerStateException("Server is already started")
+    } else {
+      server = new Server(0)
+      val resHandler = new ResourceHandler
+      resHandler.setResourceBase(classDir.getAbsolutePath)
+      val handlerList = new HandlerList
+      handlerList.setHandlers(Array(resHandler, new DefaultHandler))
+      server.setHandler(handlerList)
+      server.start()
+      port = server.getConnectors()(0).getLocalPort()
+    }
+  }
+
+  def stop() {
+    if (server == null) {
+      throw new ServerStateException("Server is already stopped")
+    } else {
+      server.stop()
+      port = -1
+      server = null
+    }
+  }
+
+  /**
+   * Get the URI of this HTTP server (http://host:port)
+   */
+  def uri: String = {
+    if (server == null) {
+      throw new ServerStateException("Server is not started")
+    } else {
+      return "http://" + getLocalIpAddress + ":" + port
+    }
+  }
+
+  /**
+   * Get the local host's IP address in dotted-quad format (e.g. 1.2.3.4)
+   */
+  private def getLocalIpAddress: String = {
+    // Get local IP as an array of four bytes
+    val bytes = InetAddress.getLocalHost().getAddress()
+    // Convert the bytes to ints (keeping in mind that they may be negative)
+    // and join them into a string
+    return bytes.map(b => (b.toInt + 256) % 256).mkString(".")
+  }
+}
diff --git a/third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar b/third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
new file mode 100644
index 0000000000..d9ef50be6d
Binary files /dev/null and b/third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar differ
diff --git a/third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar b/third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
new file mode 100644
index 0000000000..fb52493468
Binary files /dev/null and b/third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar differ
-- 
cgit v1.2.3


From e068f21e0108b89dc9ecf84d98f2ba619d6435e2 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 28 Sep 2010 22:32:38 -0700
Subject: More work on HTTP class loading

---
 Makefile                                       |  2 ++
 run                                            |  2 ++
 src/scala/spark/Executor.scala                 |  8 +++---
 src/scala/spark/repl/ExecutorClassLoader.scala | 40 ++++++++++++++++++++------
 src/scala/spark/repl/SparkInterpreter.scala    | 33 +++++++++++++--------
 5 files changed, 61 insertions(+), 24 deletions(-)

diff --git a/Makefile b/Makefile
index 9c9ebb6a82..1a3e97a7be 100644
--- a/Makefile
+++ b/Makefile
@@ -10,6 +10,8 @@ JARS += third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
 JARS += third_party/hadoop-0.20.0/lib/commons-logging-1.0.4.jar
 JARS += third_party/scalatest-1.2/scalatest-1.2.jar
 JARS += third_party/scalacheck_2.8.0-1.7.jar
+JARS += third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
+JARS += third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
 CLASSPATH = $(subst $(SPACE),:,$(JARS))
 
 SCALA_SOURCES =  src/examples/*.scala src/scala/spark/*.scala src/scala/spark/repl/*.scala
diff --git a/run b/run
index 7e044eb021..33eec9fe1b 100755
--- a/run
+++ b/run
@@ -36,6 +36,8 @@ SPARK_CLASSPATH+=:$FWDIR/third_party/guava-r06/guava-r06.jar
 SPARK_CLASSPATH+=:$FWDIR/third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
 SPARK_CLASSPATH+=:third_party/scalatest-1.2/scalatest-1.2.jar
 SPARK_CLASSPATH+=:third_party/scalacheck_2.8.0-1.7.jar
+SPARK_CLASSPATH+=:third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
+SPARK_CLASSPATH+=:third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
 for jar in $FWDIR/third_party/hadoop-0.20.0/lib/*.jar; do
   SPARK_CLASSPATH+=:$jar
 done
diff --git a/src/scala/spark/Executor.scala b/src/scala/spark/Executor.scala
index 68f4cb8aae..58b20b41dc 100644
--- a/src/scala/spark/Executor.scala
+++ b/src/scala/spark/Executor.scala
@@ -25,10 +25,10 @@ object Executor {
         // If the REPL is in use, create a ClassLoader that will be able to
         // read new classes defined by the REPL as the user types code
         classLoader = this.getClass.getClassLoader
-        val classDir = System.getProperty("spark.repl.current.classdir")
-        if (classDir != null) {
-          println("Using REPL classdir: " + classDir)
-          classLoader = new repl.ExecutorClassLoader(classDir, classLoader)
+        val classUri = System.getProperty("spark.repl.class.uri")
+        if (classUri != null) {
+          println("Using REPL class URI: " + classUri)
+          classLoader = new repl.ExecutorClassLoader(classUri, classLoader)
         }
         Thread.currentThread.setContextClassLoader(classLoader)
         
diff --git a/src/scala/spark/repl/ExecutorClassLoader.scala b/src/scala/spark/repl/ExecutorClassLoader.scala
index 7d91b20e79..0833b319f7 100644
--- a/src/scala/spark/repl/ExecutorClassLoader.scala
+++ b/src/scala/spark/repl/ExecutorClassLoader.scala
@@ -1,7 +1,7 @@
 package spark.repl
 
 import java.io.{ByteArrayOutputStream, InputStream}
-import java.net.{URI, URL, URLClassLoader}
+import java.net.{URI, URL, URLClassLoader, URLEncoder}
 import java.util.concurrent.{Executors, ExecutorService}
 
 import org.apache.hadoop.conf.Configuration
@@ -12,18 +12,35 @@ import org.objectweb.asm.commons.EmptyVisitor
 import org.objectweb.asm.Opcodes._
 
 
-// A ClassLoader that reads classes from a Hadoop FileSystem URL, used to load
-// classes defined by the interpreter when the REPL is in use
-class ExecutorClassLoader(classDir: String, parent: ClassLoader)
+/**
+ * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI,
+ * used to load classes defined by the interpreter when the REPL is used
+ */
+class ExecutorClassLoader(classUri: String, parent: ClassLoader)
 extends ClassLoader(parent) {
-  val fileSystem = FileSystem.get(new URI(classDir), new Configuration())
-  val directory = new URI(classDir).getPath
+  val uri = new URI(classUri)
+  val directory = uri.getPath
+
+  // Hadoop FileSystem object for our URI, if it isn't using HTTP
+  var fileSystem: FileSystem = {
+    if (uri.getScheme() == "http")
+      null
+    else
+      FileSystem.get(uri, new Configuration())
+  }
   
   override def findClass(name: String): Class[_] = {
     try {
       //println("repl.ExecutorClassLoader resolving " + name)
-      val path = new Path(directory, name.replace('.', '/') + ".class")
-      val bytes = readAndTransformClass(name, fileSystem.open(path))
+      val pathInDirectory = name.replace('.', '/') + ".class"
+      val inputStream = {
+        if (fileSystem != null)
+          fileSystem.open(new Path(directory, pathInDirectory))
+        else
+          new URL(classUri + "/" + urlEncode(pathInDirectory)).openStream()
+      }
+      val bytes = readAndTransformClass(name, inputStream)
+      inputStream.close()
       return defineClass(name, bytes, 0, bytes.length)
     } catch {
       case e: Exception => throw new ClassNotFoundException(name, e)
@@ -57,6 +74,13 @@ extends ClassLoader(parent) {
       return bos.toByteArray
     }
   }
+
+  /**
+   * URL-encode a string, preserving only slashes
+   */
+  def urlEncode(str: String): String = {
+    str.split('/').map(part => URLEncoder.encode(part, "UTF-8")).mkString("/")
+  }
 }
 
 class ConstructorCleaner(className: String, cv: ClassVisitor)
diff --git a/src/scala/spark/repl/SparkInterpreter.scala b/src/scala/spark/repl/SparkInterpreter.scala
index 2bfaaf7342..29a4420950 100644
--- a/src/scala/spark/repl/SparkInterpreter.scala
+++ b/src/scala/spark/repl/SparkInterpreter.scala
@@ -90,32 +90,40 @@ class SparkInterpreter(val settings: Settings, out: PrintWriter) {
   
   val SPARK_DEBUG_REPL: Boolean = (System.getenv("SPARK_DEBUG_REPL") == "1")
 
-  /** directory to save .class files to */
-  //val virtualDirectory = new VirtualDirectory("(memory)", None)
-  val virtualDirectory = {
+  val outputDir = {
     val rootDir = new File(System.getProperty("spark.repl.classdir",
                            System.getProperty("java.io.tmpdir")))
     var attempts = 0
     val maxAttempts = 10
-    var outputDir: File = null
-    while (outputDir == null) {
+    var dir: File = null
+    while (dir == null) {
       attempts += 1
       if (attempts > maxAttempts) {
         throw new IOException("Failed to create a temp directory " +
                               "after " + maxAttempts + " attempts!")
       }
       try {
-        outputDir = new File(rootDir, "spark-" + UUID.randomUUID.toString)
-        if (outputDir.exists() || !outputDir.mkdirs())
-          outputDir = null
+        dir = new File(rootDir, "spark-" + UUID.randomUUID.toString)
+        if (dir.exists() || !dir.mkdirs())
+          dir = null
       } catch { case e: IOException => ; }
     }
-    System.setProperty("spark.repl.current.classdir",
-      "file://" + outputDir.getAbsolutePath + "/")
     if (SPARK_DEBUG_REPL)
-      println("Output directory: " + outputDir)
-    new PlainFile(outputDir)
+      println("Output directory: " + dir)
+    dir
   }
+
+  /** directory to save .class files to */
+  //val virtualDirectory = new VirtualDirectory("(memory)", None)
+  val virtualDirectory = new PlainFile(outputDir)
+
+  /** Jetty server that will serve our classes to worker nodes */
+  val classServer = new ClassServer(outputDir)
+
+  // Start the classServer and remember its URI in a spark system property */
+  classServer.start()
+  println("ClassServer started, URI = " + classServer.uri)
+  System.setProperty("spark.repl.class.uri", classServer.uri)
   
   /** reporter */
   object reporter extends ConsoleReporter(settings, null, out) {
@@ -714,6 +722,7 @@ class SparkInterpreter(val settings: Settings, out: PrintWriter) {
    */
   def close() {
     reporter.flush
+    classServer.stop()
   }
 
   /** A traverser that finds all mentioned identifiers, i.e. things
-- 
cgit v1.2.3


From c7d233b9115b8eed799425a18a3ff09afa7dbad8 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 28 Sep 2010 23:08:01 -0700
Subject: Added log4j jars and paths

---
 Makefile                                         |   3 +++
 run                                              |   3 +++
 third_party/apache-log4j-1.2.16/log4j-1.2.16.jar | Bin 0 -> 481534 bytes
 third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar      | Bin 0 -> 25496 bytes
 third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar  | Bin 0 -> 9753 bytes
 5 files changed, 6 insertions(+)
 create mode 100644 third_party/apache-log4j-1.2.16/log4j-1.2.16.jar
 create mode 100644 third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar
 create mode 100644 third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar

diff --git a/Makefile b/Makefile
index 1a3e97a7be..c5d004fb10 100644
--- a/Makefile
+++ b/Makefile
@@ -12,6 +12,9 @@ JARS += third_party/scalatest-1.2/scalatest-1.2.jar
 JARS += third_party/scalacheck_2.8.0-1.7.jar
 JARS += third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
 JARS += third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
+JARS += third_party/apache-log4j-1.2.16/log4j-1.2.16.jar
+JARS += third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar
+JARS += third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar
 CLASSPATH = $(subst $(SPACE),:,$(JARS))
 
 SCALA_SOURCES =  src/examples/*.scala src/scala/spark/*.scala src/scala/spark/repl/*.scala
diff --git a/run b/run
index 33eec9fe1b..ab6889b157 100755
--- a/run
+++ b/run
@@ -38,6 +38,9 @@ SPARK_CLASSPATH+=:third_party/scalatest-1.2/scalatest-1.2.jar
 SPARK_CLASSPATH+=:third_party/scalacheck_2.8.0-1.7.jar
 SPARK_CLASSPATH+=:third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
 SPARK_CLASSPATH+=:third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
+SPARK_CLASSPATH+=:third_party/apache-log4j-1.2.16/log4j-1.2.16.jar
+SPARK_CLASSPATH+=:third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar
+SPARK_CLASSPATH+=:third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar
 for jar in $FWDIR/third_party/hadoop-0.20.0/lib/*.jar; do
   SPARK_CLASSPATH+=:$jar
 done
diff --git a/third_party/apache-log4j-1.2.16/log4j-1.2.16.jar b/third_party/apache-log4j-1.2.16/log4j-1.2.16.jar
new file mode 100644
index 0000000000..3f9d847618
Binary files /dev/null and b/third_party/apache-log4j-1.2.16/log4j-1.2.16.jar differ
diff --git a/third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar b/third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar
new file mode 100644
index 0000000000..42e0ad0de7
Binary files /dev/null and b/third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar differ
diff --git a/third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar b/third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar
new file mode 100644
index 0000000000..873d11983e
Binary files /dev/null and b/third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar differ
-- 
cgit v1.2.3


From db623defbeedc0d3f5581b64adee49aacc048170 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 28 Sep 2010 23:12:23 -0700
Subject: Added Logging trait

---
 src/scala/spark/Logging.scala | 44 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 src/scala/spark/Logging.scala

diff --git a/src/scala/spark/Logging.scala b/src/scala/spark/Logging.scala
new file mode 100644
index 0000000000..35f7382a5f
--- /dev/null
+++ b/src/scala/spark/Logging.scala
@@ -0,0 +1,44 @@
+package spark
+
+import org.slf4j.Logger
+import org.slf4j.LoggerFactory
+
+/**
+ * Utility trait for classes that want to log data. Creates a SLF4J logger
+ * for the class and allows logging messages at different levels using
+ * methods that only evaluate parameters lazily if the log level is enabled.
+ */
+trait Logging {
+  // Make the log field transient so that objects with Logging can
+  // be serialized and used on another machine
+  @transient private var log_ : Logger = null
+
+  // Method to get or create the logger
+  def log: Logger = {
+    if (log_ == null)
+      log_ = LoggerFactory.getLogger(this.getClass())
+    return log_
+  }
+
+  // Log methods that take only a String
+  def logInfo(msg: => String) = if (log.isInfoEnabled) log.info(msg)
+
+  def logDebug(msg: => String) = if (log.isDebugEnabled) log.debug(msg)
+
+  def logWarning(msg: => String) = if (log.isWarnEnabled) log.warn(msg)
+
+  def logError(msg: => String) = if (log.isErrorEnabled) log.error(msg)
+
+  // Log methods that take Throwables (Exceptions/Errors) too
+  def logInfo(msg: => String, throwable: Throwable) =
+    if (log.isInfoEnabled) log.info(msg)
+
+  def logDebug(msg: => String, throwable: Throwable) =
+    if (log.isDebugEnabled) log.debug(msg)
+
+  def logWarning(msg: => String, throwable: Throwable) =
+    if (log.isWarnEnabled) log.warn(msg, throwable)
+
+  def logError(msg: => String, throwable: Throwable) =
+    if (log.isErrorEnabled) log.error(msg, throwable)
+}
-- 
cgit v1.2.3


From 332c8b8c2285687bb47023a9b45a4d5aca183221 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 28 Sep 2010 23:16:28 -0700
Subject: Removed Hadoop's SLF4J jars

---
 third_party/hadoop-0.20.0/lib/slf4j-api-1.4.3.jar     | Bin 15345 -> 0 bytes
 third_party/hadoop-0.20.0/lib/slf4j-log4j12-1.4.3.jar | Bin 8601 -> 0 bytes
 2 files changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 third_party/hadoop-0.20.0/lib/slf4j-api-1.4.3.jar
 delete mode 100644 third_party/hadoop-0.20.0/lib/slf4j-log4j12-1.4.3.jar

diff --git a/third_party/hadoop-0.20.0/lib/slf4j-api-1.4.3.jar b/third_party/hadoop-0.20.0/lib/slf4j-api-1.4.3.jar
deleted file mode 100644
index b34fe8d82d..0000000000
Binary files a/third_party/hadoop-0.20.0/lib/slf4j-api-1.4.3.jar and /dev/null differ
diff --git a/third_party/hadoop-0.20.0/lib/slf4j-log4j12-1.4.3.jar b/third_party/hadoop-0.20.0/lib/slf4j-log4j12-1.4.3.jar
deleted file mode 100644
index 70082fc382..0000000000
Binary files a/third_party/hadoop-0.20.0/lib/slf4j-log4j12-1.4.3.jar and /dev/null differ
-- 
cgit v1.2.3


From 516248aa66752e55f83d37c8fd3382975e05f700 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 28 Sep 2010 23:20:23 -0700
Subject: Added log4j.properties

---
 .gitignore            | 1 +
 conf/log4j.properties | 4 ++++
 run                   | 1 +
 3 files changed, 6 insertions(+)
 create mode 100644 conf/log4j.properties

diff --git a/.gitignore b/.gitignore
index 8156d6e8c1..5abdec5d50 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,3 +7,4 @@ third_party/libmesos.so
 third_party/libmesos.dylib
 conf/java-opts
 conf/spark-env.sh
+conf/log4j.properties
diff --git a/conf/log4j.properties b/conf/log4j.properties
new file mode 100644
index 0000000000..dfbf25db76
--- /dev/null
+++ b/conf/log4j.properties
@@ -0,0 +1,4 @@
+log4j.rootCategory=INFO, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
diff --git a/run b/run
index ab6889b157..fe4b929500 100755
--- a/run
+++ b/run
@@ -29,6 +29,7 @@ export JAVA_OPTS
 
 # Build up classpath
 SPARK_CLASSPATH="$SPARK_CLASSPATH:$FWDIR/build/classes"
+SPARK_CLASSPATH+=:$FWDIR/conf
 SPARK_CLASSPATH+=:$FWDIR/third_party/mesos.jar
 SPARK_CLASSPATH+=:$FWDIR/third_party/asm-3.2/lib/all/asm-all-3.2.jar
 SPARK_CLASSPATH+=:$FWDIR/third_party/colt.jar
-- 
cgit v1.2.3


From 7090dea44bf9cc671bceb4b3dbee4ac3670c26b2 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 28 Sep 2010 23:22:07 -0700
Subject: Changed printlns to log statements and fixed a bug in run that was
 causing it to fail on a Mesos cluster

---
 run                                            | 35 ++++++-----
 src/scala/spark/Broadcast.scala                | 80 +++++++++++++-------------
 src/scala/spark/ClosureCleaner.scala           |  8 +--
 src/scala/spark/Executor.scala                 | 16 ++++--
 src/scala/spark/LocalScheduler.scala           | 20 ++++---
 src/scala/spark/MesosScheduler.scala           | 36 ++++++------
 src/scala/spark/RDD.scala                      | 16 +++---
 src/scala/spark/SparkContext.scala             |  6 +-
 src/scala/spark/repl/ClassServer.scala         |  5 +-
 src/scala/spark/repl/ExecutorClassLoader.scala |  2 -
 src/scala/spark/repl/SparkInterpreter.scala    | 13 +++--
 11 files changed, 126 insertions(+), 111 deletions(-)

diff --git a/run b/run
index fe4b929500..f28b39af9b 100755
--- a/run
+++ b/run
@@ -28,25 +28,24 @@ fi
 export JAVA_OPTS
 
 # Build up classpath
-SPARK_CLASSPATH="$SPARK_CLASSPATH:$FWDIR/build/classes"
-SPARK_CLASSPATH+=:$FWDIR/conf
-SPARK_CLASSPATH+=:$FWDIR/third_party/mesos.jar
-SPARK_CLASSPATH+=:$FWDIR/third_party/asm-3.2/lib/all/asm-all-3.2.jar
-SPARK_CLASSPATH+=:$FWDIR/third_party/colt.jar
-SPARK_CLASSPATH+=:$FWDIR/third_party/guava-r06/guava-r06.jar
-SPARK_CLASSPATH+=:$FWDIR/third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
-SPARK_CLASSPATH+=:third_party/scalatest-1.2/scalatest-1.2.jar
-SPARK_CLASSPATH+=:third_party/scalacheck_2.8.0-1.7.jar
-SPARK_CLASSPATH+=:third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
-SPARK_CLASSPATH+=:third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
-SPARK_CLASSPATH+=:third_party/apache-log4j-1.2.16/log4j-1.2.16.jar
-SPARK_CLASSPATH+=:third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar
-SPARK_CLASSPATH+=:third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar
+CLASSPATH="$SPARK_CLASSPATH:$FWDIR/build/classes"
+CLASSPATH+=:$FWDIR/conf
+CLASSPATH+=:$FWDIR/third_party/mesos.jar
+CLASSPATH+=:$FWDIR/third_party/asm-3.2/lib/all/asm-all-3.2.jar
+CLASSPATH+=:$FWDIR/third_party/colt.jar
+CLASSPATH+=:$FWDIR/third_party/guava-r06/guava-r06.jar
+CLASSPATH+=:$FWDIR/third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
+CLASSPATH+=:$FWDIR/third_party/scalatest-1.2/scalatest-1.2.jar
+CLASSPATH+=:$FWDIR/third_party/scalacheck_2.8.0-1.7.jar
+CLASSPATH+=:$FWDIR/third_party/jetty-7.1.6.v20100715/jetty-server-7.1.6.v20100715.jar
+CLASSPATH+=:$FWDIR/third_party/jetty-7.1.6.v20100715/servlet-api-2.5.jar
+CLASSPATH+=:$FWDIR/third_party/apache-log4j-1.2.16/log4j-1.2.16.jar
+CLASSPATH+=:$FWDIR/third_party/slf4j-1.6.1/slf4j-api-1.6.1.jar
+CLASSPATH+=:$FWDIR/third_party/slf4j-1.6.1/slf4j-log4j12-1.6.1.jar
 for jar in $FWDIR/third_party/hadoop-0.20.0/lib/*.jar; do
-  SPARK_CLASSPATH+=:$jar
+  CLASSPATH+=:$jar
 done
-export SPARK_CLASSPATH
-export CLASSPATH=$SPARK_CLASSPATH # Needed for spark-shell
+export CLASSPATH # Needed for spark-shell
 
 if [ -n "$SCALA_HOME" ]; then
   SCALA=${SCALA_HOME}/bin/scala
@@ -54,4 +53,4 @@ else
   SCALA=scala
 fi
 
-exec $SCALA -cp $SPARK_CLASSPATH $@
+exec $SCALA -cp $CLASSPATH $@
diff --git a/src/scala/spark/Broadcast.scala b/src/scala/spark/Broadcast.scala
index 7fe84da47c..fc37635983 100644
--- a/src/scala/spark/Broadcast.scala
+++ b/src/scala/spark/Broadcast.scala
@@ -33,7 +33,7 @@ trait BroadcastRecipe {
 // TODO: Right, now no parallelization between multiple broadcasts
 @serializable
 class ChainedStreamingBroadcast[T] (@transient var value_ : T, local: Boolean) 
-  extends BroadcastRecipe {
+extends BroadcastRecipe with Logging {
   
   def value = value_
 
@@ -92,7 +92,7 @@ class ChainedStreamingBroadcast[T] (@transient var value_ : T, local: Boolean)
         } 
         
         val time = (System.nanoTime - start) / 1e9
-        println( System.currentTimeMillis + ": " +  "Reading Broadcasted variable " + uuid + " took " + time + " s")                  
+        logInfo("Reading Broadcasted variable " + uuid + " took " + time + " s")                  
       }
     }
   }
@@ -149,7 +149,7 @@ class ChainedStreamingBroadcast[T] (@transient var value_ : T, local: Boolean)
 
 @serializable 
 class CentralizedHDFSBroadcast[T](@transient var value_ : T, local: Boolean) 
-  extends BroadcastRecipe {
+extends BroadcastRecipe with Logging {
   
   def value = value_
 
@@ -179,7 +179,7 @@ class CentralizedHDFSBroadcast[T](@transient var value_ : T, local: Boolean)
         fileIn.close
         
         val time = (System.nanoTime - start) / 1e9
-        println( System.currentTimeMillis + ": " +  "Reading Broadcasted variable " + uuid + " took " + time + " s")
+        logInfo("Reading Broadcasted variable " + uuid + " took " + time + " s")
       }
     }
   }
@@ -188,7 +188,7 @@ class CentralizedHDFSBroadcast[T](@transient var value_ : T, local: Boolean)
 @serializable
 case class SourceInfo (val hostAddress: String, val listenPort: Int, 
   val totalBlocks: Int, val totalBytes: Int, val replicaID: Int)  
-  extends Comparable [SourceInfo]{
+extends Comparable[SourceInfo]{
 
   var currentLeechers = 0
   var receptionFailed = false
@@ -231,7 +231,7 @@ private object Broadcast {
   }
 }
 
-private object BroadcastCS {
+private object BroadcastCS extends Logging {
   val values = new MapMaker ().softValues ().makeMap[UUID, Any]
   // val valueInfos = new MapMaker ().softValues ().makeMap[UUID, Any]  
 
@@ -286,15 +286,15 @@ private object BroadcastCS {
           guideMR = new GuideMultipleRequests
           guideMR.setDaemon (true)
           guideMR.start
-          println (System.currentTimeMillis + ": " +  "GuideMultipleRequests started")
+          logInfo("GuideMultipleRequests started")
         }        
         serveMR = new ServeMultipleRequests
         serveMR.setDaemon (true)
         serveMR.start
         
-        println (System.currentTimeMillis + ": " +  "ServeMultipleRequests started")
+        logInfo("ServeMultipleRequests started")
         
-        println (System.currentTimeMillis + ": " +  "BroadcastCS object has been initialized")
+        logInfo("BroadcastCS object has been initialized")
                   
         initialized = true
       }
@@ -352,7 +352,7 @@ private object BroadcastCS {
       // Connect to Master and send this worker's Information
       val clientSocketToMaster = 
         new Socket(BroadcastCS.masterHostAddress, BroadcastCS.masterListenPort)  
-      println (System.currentTimeMillis + ": " +  "Connected to Master's guiding object")
+      logInfo("Connected to Master's guiding object")
       // TODO: Guiding object connection is reusable
       val oisMaster = 
         new ObjectInputStream (clientSocketToMaster.getInputStream)
@@ -371,11 +371,11 @@ private object BroadcastCS {
       }
       totalBytes = sourceInfo.totalBytes
       
-      println (System.currentTimeMillis + ": " +  "Received SourceInfo from Master:" + sourceInfo + " My Port: " + listenPort)    
+      logInfo("Received SourceInfo from Master:" + sourceInfo + " My Port: " + listenPort)    
 
       retByteArray = receiveSingleTransmission (sourceInfo)
       
-      println (System.currentTimeMillis + ": " +  "I got this from receiveSingleTransmission: " + retByteArray)
+      logInfo("I got this from receiveSingleTransmission: " + retByteArray)
 
       // TODO: Update sourceInfo to add error notifactions for Master
       if (retByteArray == null) { sourceInfo.receptionFailed = true }
@@ -414,8 +414,8 @@ private object BroadcastCS {
       oisSource = 
         new ObjectInputStream (clientSocketToSource.getInputStream)
         
-      println (System.currentTimeMillis + ": " +  "Inside receiveSingleTransmission")
-      println (System.currentTimeMillis + ": " +  "totalBlocks: "+ totalBlocks + " " + "hasBlocks: " + hasBlocks)
+      logInfo("Inside receiveSingleTransmission")
+      logInfo("totalBlocks: "+ totalBlocks + " " + "hasBlocks: " + hasBlocks)
       retByteArray = new Array[Byte] (totalBytes)
       for (i <- 0 until totalBlocks) {
         val bcBlock = oisSource.readObject.asInstanceOf[BroadcastBlock]
@@ -426,14 +426,14 @@ private object BroadcastCS {
         hasBlocksLock.synchronized {
           hasBlocksLock.notifyAll
         }
-        println (System.currentTimeMillis + ": " +  "Received block: " + i + " " + bcBlock)
+        logInfo("Received block: " + i + " " + bcBlock)
       } 
       assert (hasBlocks == totalBlocks)
-      println (System.currentTimeMillis + ": " +  "After the receive loop")
+      logInfo("After the receive loop")
     } catch {
       case e: Exception => { 
         retByteArray = null 
-        println (System.currentTimeMillis + ": " +  "receiveSingleTransmission had a " + e)
+        logInfo("receiveSingleTransmission had a " + e)
       }
     } finally {    
       if (oisSource != null) { oisSource.close }
@@ -446,13 +446,13 @@ private object BroadcastCS {
     return retByteArray
   } 
   
-  class TrackMultipleValues extends Thread {
+  class TrackMultipleValues extends Thread with Logging {
     override def run = {
       var threadPool = Executors.newCachedThreadPool
       var serverSocket: ServerSocket = null
       
       serverSocket = new ServerSocket (BroadcastCS.masterListenPort)
-      println (System.currentTimeMillis + ": " +  "TrackMultipleVariables" + serverSocket + " " + listenPort)
+      logInfo("TrackMultipleVariables" + serverSocket + " " + listenPort)
       
       var keepAccepting = true
       try {
@@ -463,11 +463,11 @@ private object BroadcastCS {
             clientSocket = serverSocket.accept
           } catch {
             case e: Exception => { 
-              println ("TrackMultipleValues Timeout. Stopping listening...") 
+              logInfo("TrackMultipleValues Timeout. Stopping listening...") 
               keepAccepting = false 
             }
           }
-          println (System.currentTimeMillis + ": " +  "TrackMultipleValues:Got new request:" + clientSocket)
+          logInfo("TrackMultipleValues:Got new request:" + clientSocket)
           if (clientSocket != null) {
             try {            
               threadPool.execute (new Runnable {
@@ -506,14 +506,14 @@ private object BroadcastCS {
     
   }
   
-  class GuideMultipleRequests extends Thread {
+  class GuideMultipleRequests extends Thread with Logging {
     override def run = {
       var threadPool = Executors.newCachedThreadPool
       var serverSocket: ServerSocket = null
 
       serverSocket = new ServerSocket (BroadcastCS.masterListenPort)
       // listenPort = BroadcastCS.masterListenPort
-      println (System.currentTimeMillis + ": " +  "GuideMultipleRequests" + serverSocket + " " + listenPort)
+      logInfo("GuideMultipleRequests" + serverSocket + " " + listenPort)
       
       var keepAccepting = true
       try {
@@ -524,12 +524,12 @@ private object BroadcastCS {
             clientSocket = serverSocket.accept
           } catch {
             case e: Exception => { 
-              println ("GuideMultipleRequests Timeout. Stopping listening...") 
+              logInfo("GuideMultipleRequests Timeout. Stopping listening...") 
               keepAccepting = false 
             }
           }
           if (clientSocket != null) {
-            println (System.currentTimeMillis + ": " +  "Guide:Accepted new client connection:" + clientSocket)
+            logInfo("Guide:Accepted new client connection:" + clientSocket)
             try {            
               threadPool.execute (new GuideSingleRequest (clientSocket))
             } catch {
@@ -543,7 +543,8 @@ private object BroadcastCS {
       }
     }
     
-    class GuideSingleRequest (val clientSocket: Socket) extends Runnable {
+    class GuideSingleRequest (val clientSocket: Socket)
+    extends Runnable with Logging {
       private val oos = new ObjectOutputStream (clientSocket.getOutputStream)
       private val ois = new ObjectInputStream (clientSocket.getInputStream)
 
@@ -552,21 +553,21 @@ private object BroadcastCS {
       
       def run = {
         try {
-          println (System.currentTimeMillis + ": " +  "new GuideSingleRequest is running")
+          logInfo("new GuideSingleRequest is running")
           // Connecting worker is sending in its hostAddress and listenPort it will 
           // be listening to. ReplicaID is 0 and other fields are invalid (-1)
           var sourceInfo = ois.readObject.asInstanceOf[SourceInfo]
           
           // Select a suitable source and send it back to the worker
           selectedSourceInfo = selectSuitableSource (sourceInfo)
-          println (System.currentTimeMillis + ": " +  "Sending selectedSourceInfo:" + selectedSourceInfo)
+          logInfo("Sending selectedSourceInfo:" + selectedSourceInfo)
           oos.writeObject (selectedSourceInfo)
           oos.flush
 
           // Add this new (if it can finish) source to the PQ of sources
           thisWorkerInfo = new SourceInfo(sourceInfo.hostAddress, 
             sourceInfo.listenPort, totalBlocks, totalBytes, 0)  
-          println (System.currentTimeMillis + ": " +  "Adding possible new source to pqOfSources: " + thisWorkerInfo)    
+          logInfo("Adding possible new source to pqOfSources: " + thisWorkerInfo)    
           pqOfSources.synchronized {
             pqOfSources.add (thisWorkerInfo)
           }
@@ -642,14 +643,14 @@ private object BroadcastCS {
     }    
   }
 
-  class ServeMultipleRequests extends Thread {
+  class ServeMultipleRequests extends Thread with Logging {
     override def run = {
       var threadPool = Executors.newCachedThreadPool
       var serverSocket: ServerSocket = null
 
       serverSocket = new ServerSocket (0) 
       listenPort = serverSocket.getLocalPort
-      println (System.currentTimeMillis + ": " +  "ServeMultipleRequests" + serverSocket + " " + listenPort)
+      logInfo("ServeMultipleRequests" + serverSocket + " " + listenPort)
       
       listenPortLock.synchronized {
         listenPortLock.notifyAll
@@ -664,12 +665,12 @@ private object BroadcastCS {
             clientSocket = serverSocket.accept
           } catch {
             case e: Exception => { 
-              println ("ServeMultipleRequests Timeout. Stopping listening...") 
+              logInfo("ServeMultipleRequests Timeout. Stopping listening...") 
               keepAccepting = false 
             }
           }
           if (clientSocket != null) {
-            println (System.currentTimeMillis + ": " +  "Serve:Accepted new client connection:" + clientSocket)
+            logInfo("Serve:Accepted new client connection:" + clientSocket)
             try {            
               threadPool.execute (new ServeSingleRequest (clientSocket))
             } catch {
@@ -683,23 +684,24 @@ private object BroadcastCS {
       }
     }
     
-    class ServeSingleRequest (val clientSocket: Socket) extends Runnable {
+    class ServeSingleRequest (val clientSocket: Socket)
+    extends Runnable with Logging {
       private val oos = new ObjectOutputStream (clientSocket.getOutputStream)
       private val ois = new ObjectInputStream (clientSocket.getInputStream)
       
       def run  = {
         try {
-          println (System.currentTimeMillis + ": " +  "new ServeSingleRequest is running")
+          logInfo("new ServeSingleRequest is running")
           sendObject
         } catch {
           // TODO: Need to add better exception handling here
           // If something went wrong, e.g., the worker at the other end died etc. 
           // then close everything up
           case e: Exception => { 
-            println (System.currentTimeMillis + ": " +  "ServeSingleRequest had a " + e)
+            logInfo("ServeSingleRequest had a " + e)
           }
         } finally {
-          println (System.currentTimeMillis + ": " +  "ServeSingleRequest is closing streams and sockets")
+          logInfo("ServeSingleRequest is closing streams and sockets")
           ois.close
           oos.close
           clientSocket.close
@@ -726,7 +728,7 @@ private object BroadcastCS {
           } catch {
             case e: Exception => { }
           }
-          println (System.currentTimeMillis + ": " +  "Send block: " + i + " " + arrayOfBlocks(i))
+          logInfo("Send block: " + i + " " + arrayOfBlocks(i))
         }
       }    
     } 
@@ -734,7 +736,7 @@ private object BroadcastCS {
   }
 }
 
-private object BroadcastCH {
+private object BroadcastCH extends Logging {
   val values = new MapMaker ().softValues ().makeMap[UUID, Any]
 
   private var initialized = false
diff --git a/src/scala/spark/ClosureCleaner.scala b/src/scala/spark/ClosureCleaner.scala
index 8037434c38..0e0b3954d4 100644
--- a/src/scala/spark/ClosureCleaner.scala
+++ b/src/scala/spark/ClosureCleaner.scala
@@ -8,7 +8,7 @@ import org.objectweb.asm.commons.EmptyVisitor
 import org.objectweb.asm.Opcodes._
 
 
-object ClosureCleaner {
+object ClosureCleaner extends Logging {
   private def getClassReader(cls: Class[_]): ClassReader = {
     new ClassReader(cls.getResourceAsStream(
       cls.getName.replaceFirst("^.*\\.", "") + ".class"))
@@ -72,13 +72,13 @@ object ClosureCleaner {
         val field = cls.getDeclaredField(fieldName)
         field.setAccessible(true)
         val value = field.get(obj)
-        //println("1: Setting " + fieldName + " on " + cls + " to " + value);
+        //logInfo("1: Setting " + fieldName + " on " + cls + " to " + value);
         field.set(outer, value)
       }
     }
     
     if (outer != null) {
-      //println("2: Setting $outer on " + func.getClass + " to " + outer);
+      //logInfo("2: Setting $outer on " + func.getClass + " to " + outer);
       val field = func.getClass.getDeclaredField("$outer")
       field.setAccessible(true)
       field.set(func, outer)
@@ -101,7 +101,7 @@ object ClosureCleaner {
       val newCtor = rf.newConstructorForSerialization(cls, parentCtor)
       val obj = newCtor.newInstance().asInstanceOf[AnyRef];
       if (outer != null) {
-        //println("3: Setting $outer on " + cls + " to " + outer);
+        //logInfo("3: Setting $outer on " + cls + " to " + outer);
         val field = cls.getDeclaredField("$outer")
         field.setAccessible(true)
         field.set(obj, outer)
diff --git a/src/scala/spark/Executor.scala b/src/scala/spark/Executor.scala
index 58b20b41dc..be73aae541 100644
--- a/src/scala/spark/Executor.scala
+++ b/src/scala/spark/Executor.scala
@@ -5,10 +5,14 @@ import java.util.concurrent.{Executors, ExecutorService}
 import mesos.{ExecutorArgs, ExecutorDriver, MesosExecutorDriver}
 import mesos.{TaskDescription, TaskState, TaskStatus}
 
-object Executor {
+/**
+ * The Mesos executor for Spark.
+ */
+object Executor extends Logging {
   def main(args: Array[String]) {
     System.loadLibrary("mesos")
 
+    // Create a new Executor implementation that will run our tasks
     val exec = new mesos.Executor() {
       var classLoader: ClassLoader = null
       var threadPool: ExecutorService = null
@@ -27,7 +31,7 @@ object Executor {
         classLoader = this.getClass.getClassLoader
         val classUri = System.getProperty("spark.repl.class.uri")
         if (classUri != null) {
-          println("Using REPL class URI: " + classUri)
+          logInfo("Using REPL class URI: " + classUri)
           classLoader = new repl.ExecutorClassLoader(classUri, classLoader)
         }
         Thread.currentThread.setContextClassLoader(classLoader)
@@ -43,7 +47,7 @@ object Executor {
         val arg = desc.getArg
         threadPool.execute(new Runnable() {
           def run() = {
-            println("Running task ID " + taskId)
+            logInfo("Running task ID " + taskId)
             try {
               Accumulators.clear
               val task = Utils.deserialize[Task[Any]](arg, classLoader)
@@ -52,12 +56,11 @@ object Executor {
               val result = new TaskResult(value, accumUpdates)
               d.sendStatusUpdate(new TaskStatus(
                 taskId, TaskState.TASK_FINISHED, Utils.serialize(result)))
-              println("Finished task ID " + taskId)
+              logInfo("Finished task ID " + taskId)
             } catch {
               case e: Exception => {
                 // TODO: Handle errors in tasks less dramatically
-                System.err.println("Exception in task ID " + taskId + ":")
-                e.printStackTrace
+                logError("Exception in task ID " + taskId, e)
                 System.exit(1)
               }
             }
@@ -66,6 +69,7 @@ object Executor {
       }
     }
 
+    // Start it running and connect it to the slave
     new MesosExecutorDriver(exec).run()
   }
 }
diff --git a/src/scala/spark/LocalScheduler.scala b/src/scala/spark/LocalScheduler.scala
index a3e0fae065..20954a1224 100644
--- a/src/scala/spark/LocalScheduler.scala
+++ b/src/scala/spark/LocalScheduler.scala
@@ -4,8 +4,10 @@ import java.util.concurrent._
 
 import scala.collection.mutable.Map
 
-// A simple Scheduler implementation that runs tasks locally in a thread pool.
-private class LocalScheduler(threads: Int) extends Scheduler {
+/**
+ * A simple Scheduler implementation that runs tasks locally in a thread pool.
+ */
+private class LocalScheduler(threads: Int) extends Scheduler with Logging {
   var threadPool: ExecutorService =
     Executors.newFixedThreadPool(threads, DaemonThreadFactory)
   
@@ -20,25 +22,24 @@ private class LocalScheduler(threads: Int) extends Scheduler {
     for (i <- 0 until tasks.length) {
       futures(i) = threadPool.submit(new Callable[TaskResult[T]]() {
         def call(): TaskResult[T] = {
-          println("Running task " + i)
+          logInfo("Running task " + i)
           try {
             // Serialize and deserialize the task so that accumulators are
             // changed to thread-local ones; this adds a bit of unnecessary
             // overhead but matches how the Nexus Executor works
             Accumulators.clear
             val bytes = Utils.serialize(tasks(i))
-            println("Size of task " + i + " is " + bytes.size + " bytes")
+            logInfo("Size of task " + i + " is " + bytes.size + " bytes")
             val task = Utils.deserialize[Task[T]](
               bytes, currentThread.getContextClassLoader)
             val value = task.run
             val accumUpdates = Accumulators.values
-            println("Finished task " + i)
+            logInfo("Finished task " + i)
             new TaskResult[T](value, accumUpdates)
           } catch {
             case e: Exception => {
               // TODO: Do something nicer here
-              System.err.println("Exception in task " + i + ":")
-              e.printStackTrace()
+              logError("Exception in task " + i, e)
               System.exit(1)
               null
             }
@@ -58,7 +59,10 @@ private class LocalScheduler(threads: Int) extends Scheduler {
   override def numCores() = threads
 }
 
-// A ThreadFactory that creates daemon threads
+
+/**
+ * A ThreadFactory that creates daemon threads
+ */
 private object DaemonThreadFactory extends ThreadFactory {
   override def newThread(r: Runnable): Thread = {
     val t = new Thread(r);
diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 42ad45a56f..38e8ab2cd1 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -23,7 +23,7 @@ import mesos._
 //    all the offers to the ParallelOperation and have it load-balance.
 private class MesosScheduler(
   master: String, frameworkName: String, execArg: Array[Byte])
-extends NScheduler with spark.Scheduler
+extends NScheduler with spark.Scheduler with Logging
 {
   // Lock used by runTasks to ensure only one thread can be in it
   val runTasksMutex = new Object()
@@ -101,7 +101,7 @@ extends NScheduler with spark.Scheduler
   }
 
   override def registered(d: SchedulerDriver, frameworkId: String) {
-    println("Registered as framework ID " + frameworkId)
+    logInfo("Registered as framework ID " + frameworkId)
     registeredLock.synchronized {
       isRegistered = true
       registeredLock.notifyAll()
@@ -157,7 +157,7 @@ extends NScheduler with spark.Scheduler
               activeOps(opId).statusUpdate(status)
             }
           case None =>
-            println("TID " + status.getTaskId + " already finished")
+            logInfo("TID " + status.getTaskId + " already finished")
         }
 
       } catch {
@@ -177,8 +177,7 @@ extends NScheduler with spark.Scheduler
           }
         }
       } else {
-        val msg = "Mesos error: %s (error code: %d)".format(message, code)
-        System.err.println(msg)
+        logError("Mesos error: %s (error code: %d)".format(message, code))
         System.exit(1)
       }
     }
@@ -205,7 +204,7 @@ trait ParallelOperation {
 
 class SimpleParallelOperation[T: ClassManifest](
   sched: MesosScheduler, tasks: Array[Task[T]], val opId: Int)
-extends ParallelOperation
+extends ParallelOperation with Logging
 {
   // Maximum time to wait to run a task in a preferred location (in ms)
   val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "1000").toLong
@@ -273,7 +272,7 @@ extends ParallelOperation
           params.put("cpus", "" + desiredCpus)
           params.put("mem", "" + desiredMem)
           val serializedTask = Utils.serialize(tasks(i))
-          println("... Serialized size: " + serializedTask.size)
+          //logInfo("Serialized size: " + serializedTask.size)
           return Some(new TaskDescription(taskId, offer.getSlaveId,
             "task_" + taskId, params, serializedTask))
         }
@@ -298,36 +297,37 @@ extends ParallelOperation
 
   def taskFinished(status: TaskStatus) {
     val tid = status.getTaskId
-    print("Finished opId " + opId + " TID " + tid)
-    if (!finished(tidToIndex(tid))) {
+    logInfo("Finished opId " + opId + " TID " + tid)
+    val index = tidToIndex(tid)
+    if (!finished(index)) {
       // Deserialize task result
       val result = Utils.deserialize[TaskResult[T]](status.getData)
-      results(tidToIndex(tid)) = result.value
+      results(index) = result.value
       // Update accumulators
       Accumulators.add(callingThread, result.accumUpdates)
       // Mark finished and stop if we've finished all the tasks
-      finished(tidToIndex(tid)) = true
+      finished(index) = true
       // Remove TID -> opId mapping from sched
       sched.taskIdToOpId.remove(tid)
       tasksFinished += 1
-
-      println(", finished " + tasksFinished + "/" + numTasks)
+      logInfo("Progress: " + tasksFinished + "/" + numTasks)
       if (tasksFinished == numTasks)
         setAllFinished()
     } else {
-      printf("... Task %s had already finished, so ignoring it\n", tidToIndex(tid))
+      logInfo("Task " + index + " has already finished, so ignoring it")
     }
   }
 
   def taskLost(status: TaskStatus) {
     val tid = status.getTaskId
-    println("Lost opId " + opId + " TID " + tid)
-    if (!finished(tidToIndex(tid))) {
-      launched(tidToIndex(tid)) = false
+    logInfo("Lost opId " + opId + " TID " + tid)
+    val index = tidToIndex(tid)
+    if (!finished(index)) {
+      launched(index) = false
       sched.taskIdToOpId.remove(tid)
       tasksLaunched -= 1
     } else {
-      printf("Task %s had already finished, so ignoring it\n", tidToIndex(tid))
+      logInfo("Task " + index + " has already finished, so ignoring it")
     }
   }
 
diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index f8d1ab7f57..4d0c8c6711 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -93,27 +93,27 @@ extends Task[U] {
 
 class ForeachTask[T: ClassManifest](
   rdd: RDD[T], split: Split, func: T => Unit)
-extends RDDTask[Unit, T](rdd, split) {
+extends RDDTask[Unit, T](rdd, split) with Logging {
   override def run() {
-    println("Processing " + split)
+    logInfo("Processing " + split)
     rdd.iterator(split).foreach(func)
   }
 }
 
 class CollectTask[T](
   rdd: RDD[T], split: Split)(implicit m: ClassManifest[T])
-extends RDDTask[Array[T], T](rdd, split) {
+extends RDDTask[Array[T], T](rdd, split) with Logging {
   override def run(): Array[T] = {
-    println("Processing " + split)
+    logInfo("Processing " + split)
     rdd.iterator(split).toArray(m)
   }
 }
 
 class ReduceTask[T: ClassManifest](
   rdd: RDD[T], split: Split, f: (T, T) => T)
-extends RDDTask[Option[T], T](rdd, split) {
+extends RDDTask[Option[T], T](rdd, split) with Logging {
   override def run(): Option[T] = {
-    println("Processing " + split)
+    logInfo("Processing " + split)
     val iter = rdd.iterator(split)
     if (iter.hasNext)
       Some(iter.reduceLeft(f))
@@ -183,7 +183,7 @@ extends RDD[T](prev.sparkContext) {
 
 class CachedRDD[T](
   prev: RDD[T])(implicit m: ClassManifest[T])
-extends RDD[T](prev.sparkContext) {
+extends RDD[T](prev.sparkContext) with Logging {
   val id = CachedRDD.newId()
   @transient val cacheLocs = Map[Split, List[String]]()
 
@@ -217,7 +217,7 @@ extends RDD[T](prev.sparkContext) {
         }
       }
       // If we got here, we have to load the split
-      println("Loading and caching " + split)
+      logInfo("Loading and caching " + split)
       val array = prev.iterator(split).toArray(m)
       cache.put(key, array)
       loading.synchronized {
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index d5d4db4678..e00eda9aa2 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -6,7 +6,7 @@ import java.util.UUID
 import scala.collection.mutable.ArrayBuffer
 import scala.actors.Actor._
 
-class SparkContext(master: String, frameworkName: String) {
+class SparkContext(master: String, frameworkName: String) extends Logging {
   Broadcast.initialize(true)
 
   def parallelize[T: ClassManifest](seq: Seq[T], numSlices: Int) =
@@ -56,10 +56,10 @@ class SparkContext(master: String, frameworkName: String) {
 
   private[spark] def runTaskObjects[T: ClassManifest](tasks: Seq[Task[T]])
       : Array[T] = {
-    println("Running " + tasks.length + " tasks in parallel")
+    logInfo("Running " + tasks.length + " tasks in parallel")
     val start = System.nanoTime
     val result = scheduler.runTasks(tasks.toArray)
-    println("Tasks finished in " + (System.nanoTime - start) / 1e9 + " s")
+    logInfo("Tasks finished in " + (System.nanoTime - start) / 1e9 + " s")
     return result
   }
 
diff --git a/src/scala/spark/repl/ClassServer.scala b/src/scala/spark/repl/ClassServer.scala
index 14ab2fe2a3..6a40d92765 100644
--- a/src/scala/spark/repl/ClassServer.scala
+++ b/src/scala/spark/repl/ClassServer.scala
@@ -8,6 +8,8 @@ import org.eclipse.jetty.server.handler.DefaultHandler
 import org.eclipse.jetty.server.handler.HandlerList
 import org.eclipse.jetty.server.handler.ResourceHandler
 
+import spark.Logging
+
 
 /**
  * Exception type thrown by ClassServer when it is in the wrong state 
@@ -21,7 +23,7 @@ class ServerStateException(message: String) extends Exception(message)
  * class files created as the user types in lines of code. This is just a
  * wrapper around a Jetty embedded HTTP server.
  */
-class ClassServer(classDir: File) {
+class ClassServer(classDir: File) extends Logging {
   private var server: Server = null
   private var port: Int = -1
 
@@ -37,6 +39,7 @@ class ClassServer(classDir: File) {
       server.setHandler(handlerList)
       server.start()
       port = server.getConnectors()(0).getLocalPort()
+      logDebug("ClassServer started at " + uri)
     }
   }
 
diff --git a/src/scala/spark/repl/ExecutorClassLoader.scala b/src/scala/spark/repl/ExecutorClassLoader.scala
index 0833b319f7..13d81ec1cf 100644
--- a/src/scala/spark/repl/ExecutorClassLoader.scala
+++ b/src/scala/spark/repl/ExecutorClassLoader.scala
@@ -31,7 +31,6 @@ extends ClassLoader(parent) {
   
   override def findClass(name: String): Class[_] = {
     try {
-      //println("repl.ExecutorClassLoader resolving " + name)
       val pathInDirectory = name.replace('.', '/') + ".class"
       val inputStream = {
         if (fileSystem != null)
@@ -92,7 +91,6 @@ extends ClassAdapter(cv) {
       // This is the constructor, time to clean it; just output some new
       // instructions to mv that create the object and set the static MODULE$
       // field in the class to point to it, but do nothing otherwise.
-      //println("Cleaning constructor of " + className)
       mv.visitCode()
       mv.visitVarInsn(ALOAD, 0) // load this
       mv.visitMethodInsn(INVOKESPECIAL, "java/lang/Object", "<init>", "()V")
diff --git a/src/scala/spark/repl/SparkInterpreter.scala b/src/scala/spark/repl/SparkInterpreter.scala
index 29a4420950..ae2e7e8a68 100644
--- a/src/scala/spark/repl/SparkInterpreter.scala
+++ b/src/scala/spark/repl/SparkInterpreter.scala
@@ -90,6 +90,7 @@ class SparkInterpreter(val settings: Settings, out: PrintWriter) {
   
   val SPARK_DEBUG_REPL: Boolean = (System.getenv("SPARK_DEBUG_REPL") == "1")
 
+  /** Local directory to save .class files too */
   val outputDir = {
     val rootDir = new File(System.getProperty("spark.repl.classdir",
                            System.getProperty("java.io.tmpdir")))
@@ -108,22 +109,26 @@ class SparkInterpreter(val settings: Settings, out: PrintWriter) {
           dir = null
       } catch { case e: IOException => ; }
     }
-    if (SPARK_DEBUG_REPL)
+    if (SPARK_DEBUG_REPL) {
       println("Output directory: " + dir)
+    }
     dir
   }
 
-  /** directory to save .class files to */
+  /** Scala compiler virtual directory for outputDir */
   //val virtualDirectory = new VirtualDirectory("(memory)", None)
   val virtualDirectory = new PlainFile(outputDir)
 
   /** Jetty server that will serve our classes to worker nodes */
   val classServer = new ClassServer(outputDir)
 
-  // Start the classServer and remember its URI in a spark system property */
+  // Start the classServer and store its URI in a spark system property
+  // (which will be passed to executors so that they can connect to it)
   classServer.start()
-  println("ClassServer started, URI = " + classServer.uri)
   System.setProperty("spark.repl.class.uri", classServer.uri)
+  if (SPARK_DEBUG_REPL) {
+    println("ClassServer started, URI = " + classServer.uri)
+  }
   
   /** reporter */
   object reporter extends ConsoleReporter(settings, null, out) {
-- 
cgit v1.2.3


From 0fa70a6770fdb319d8800abc8c0838217bb31883 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 28 Sep 2010 23:58:19 -0700
Subject: Updated log4j.properties to ignore jetty messages below WARN level

---
 conf/log4j.properties | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/conf/log4j.properties b/conf/log4j.properties
index dfbf25db76..d72dbadc39 100644
--- a/conf/log4j.properties
+++ b/conf/log4j.properties
@@ -1,4 +1,8 @@
+# Set everything to be logged to the console
 log4j.rootCategory=INFO, console
 log4j.appender.console=org.apache.log4j.ConsoleAppender
 log4j.appender.console.layout=org.apache.log4j.PatternLayout
 log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
-- 
cgit v1.2.3


From 0d28bdcefd1f03db5ac027973b77961a59bfe1fa Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Wed, 29 Sep 2010 00:10:46 -0700
Subject: A couple of minor fixes: - Don't include trailing $'s in class names
 of Scala objects - Report errors using logError instead of printStackTrace

---
 src/scala/spark/Logging.scala        | 11 ++++++++---
 src/scala/spark/MesosScheduler.scala | 14 ++++++++------
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/scala/spark/Logging.scala b/src/scala/spark/Logging.scala
index 35f7382a5f..2d1feebbb1 100644
--- a/src/scala/spark/Logging.scala
+++ b/src/scala/spark/Logging.scala
@@ -13,10 +13,15 @@ trait Logging {
   // be serialized and used on another machine
   @transient private var log_ : Logger = null
 
-  // Method to get or create the logger
+  // Method to get or create the logger for this object
   def log: Logger = {
-    if (log_ == null)
-      log_ = LoggerFactory.getLogger(this.getClass())
+    if (log_ == null) {
+      var className = this.getClass().getName()
+      // Ignore trailing $'s in the class names for Scala objects
+      if (className.endsWith("$"))
+        className = className.substring(0, className.length - 1)
+      log_ = LoggerFactory.getLogger(className)
+    }
     return log_
   }
 
diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 38e8ab2cd1..be58957806 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -137,7 +137,7 @@ extends NScheduler with spark.Scheduler with Logging
                 case None => {}
               }
             } catch {
-              case e: Exception => e.printStackTrace
+              case e: Exception => logError("Exception in resourceOffer", e)
             }
           }
         }
@@ -161,7 +161,7 @@ extends NScheduler with spark.Scheduler with Logging
         }
 
       } catch {
-        case e: Exception => e.printStackTrace
+        case e: Exception => logError("Exception in statusUpdate", e)
       }
     }
   }
@@ -173,7 +173,7 @@ extends NScheduler with spark.Scheduler with Logging
           try {
             activeOp.error(code, message)
           } catch {
-            case e: Exception => e.printStackTrace
+            case e: Exception => logError("Exception in error callback", e)
           }
         }
       } else {
@@ -260,9 +260,11 @@ extends ParallelOperation with Logging
           val taskId = sched.newTaskId()
           sched.taskIdToOpId(taskId) = opId
           tidToIndex(taskId) = i
-          printf("Starting task %d as opId %d, TID %s on slave %s: %s (%s)",
-            i, opId, taskId, offer.getSlaveId, offer.getHost, 
-            if(checkPref) "preferred" else "non-preferred")
+          val preferred = if(checkPref) "preferred" else "non-preferred"
+          val message =
+            "Starting task %d as opId %d, TID %s on slave %s: %s (%s)".format(
+              i, opId, taskId, offer.getSlaveId, offer.getHost, preferred)
+          logInfo(message)
           tasks(i).markStarted(offer)
           launched(i) = true
           tasksLaunched += 1
-- 
cgit v1.2.3


From 40f69140b6ed8eb5760dc175cba54b3f044368d7 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Wed, 29 Sep 2010 00:22:09 -0700
Subject: Made spark-executor output slightly nicer

---
 spark-executor | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/spark-executor b/spark-executor
index ee847cfff0..0f9b9b1ece 100755
--- a/spark-executor
+++ b/spark-executor
@@ -1,5 +1,4 @@
 #!/bin/sh
-echo "In spark-executor"
 FWDIR="`dirname $0`"
-echo Framework dir: $FWDIR
+echo "Running spark-executor with framework dir = $FWDIR"
 exec $FWDIR/run spark.Executor
-- 
cgit v1.2.3


From a7c0e2a7c3a9ae223a8b8b6bc952ac6c51a6dbab Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Wed, 29 Sep 2010 00:22:11 -0700
Subject: Made task-finished log messages slightly nicer

---
 src/scala/spark/MesosScheduler.scala | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index be58957806..6075019b8d 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -299,9 +299,11 @@ extends ParallelOperation with Logging
 
   def taskFinished(status: TaskStatus) {
     val tid = status.getTaskId
-    logInfo("Finished opId " + opId + " TID " + tid)
     val index = tidToIndex(tid)
     if (!finished(index)) {
+      tasksFinished += 1
+      logInfo("Finished opId %d TID %d (progress: %d/%d)".format(
+        opId, tid, tasksFinished, numTasks))
       // Deserialize task result
       val result = Utils.deserialize[TaskResult[T]](status.getData)
       results(index) = result.value
@@ -311,25 +313,25 @@ extends ParallelOperation with Logging
       finished(index) = true
       // Remove TID -> opId mapping from sched
       sched.taskIdToOpId.remove(tid)
-      tasksFinished += 1
-      logInfo("Progress: " + tasksFinished + "/" + numTasks)
       if (tasksFinished == numTasks)
         setAllFinished()
     } else {
-      logInfo("Task " + index + " has already finished, so ignoring it")
+      logInfo("Ignoring task-finished event for TID " + tid +
+        " because task " + index + " is already finished")
     }
   }
 
   def taskLost(status: TaskStatus) {
     val tid = status.getTaskId
-    logInfo("Lost opId " + opId + " TID " + tid)
     val index = tidToIndex(tid)
     if (!finished(index)) {
+      logInfo("Lost opId " + opId + " TID " + tid)
       launched(index) = false
       sched.taskIdToOpId.remove(tid)
       tasksLaunched -= 1
     } else {
-      logInfo("Task " + index + " has already finished, so ignoring it")
+      logInfo("Ignoring task-lost event for TID " + tid +
+        " because task " + index + " is already finished")
     }
   }
 
-- 
cgit v1.2.3


From f50b23b825dfbaa7ad65a8c27db2cc7eb0b2ebfa Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Wed, 29 Sep 2010 10:04:00 -0700
Subject: Increase default locality wait to 3s. Fixes #20.

---
 src/scala/spark/MesosScheduler.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 42ad45a56f..2face7f08d 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -208,7 +208,7 @@ class SimpleParallelOperation[T: ClassManifest](
 extends ParallelOperation
 {
   // Maximum time to wait to run a task in a preferred location (in ms)
-  val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "1000").toLong
+  val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong
 
   val callingThread = currentThread
   val numTasks = tasks.length
-- 
cgit v1.2.3


From 34eccedbf5781b8723abdc1e6fd3d98e14056999 Mon Sep 17 00:00:00 2001
From: root <root@domU-12-31-39-05-4C-21.compute-1.internal>
Date: Sun, 3 Oct 2010 05:06:06 +0000
Subject: Fixed a rather bad bug in HDFS files that has been in for a while:
 caching was not working because Split objects did not have a consistent
 toString value

---
 src/scala/spark/HdfsFile.scala | 1 +
 src/scala/spark/RDD.scala      | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/scala/spark/HdfsFile.scala b/src/scala/spark/HdfsFile.scala
index 595386fceb..886272a8ed 100644
--- a/src/scala/spark/HdfsFile.scala
+++ b/src/scala/spark/HdfsFile.scala
@@ -14,6 +14,7 @@ import org.apache.hadoop.mapred.Reporter
 @serializable class HdfsSplit(@transient s: InputSplit)
 extends Split { 
   val inputSplit = new SerializableWritable[InputSplit](s)
+  override def toString = inputSplit.toString
 }
 
 class HdfsTextFile(sc: SparkContext, path: String)
diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index 4d0c8c6711..181f7e8b03 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -198,6 +198,7 @@ extends RDD[T](prev.sparkContext) with Logging {
   
   override def iterator(split: Split): Iterator[T] = {
     val key = id + "::" + split.toString
+    logInfo("CachedRDD split key is " + key)
     val cache = CachedRDD.cache
     val loading = CachedRDD.loading
     val cachedVal = cache.get(key)
-- 
cgit v1.2.3


From aef9e5b98c0ebd23fb9a85830ba0ea993f826f82 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 3 Oct 2010 13:28:01 -0700
Subject: Renamed ParallelOperation to Job

---
 src/scala/spark/MesosScheduler.scala | 84 ++++++++++++++++++------------------
 1 file changed, 42 insertions(+), 42 deletions(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 873a97c59c..ecb95d7c0b 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -18,9 +18,9 @@ import mesos._
 // 1) Right now, the scheduler uses a linear scan through the tasks to find a
 //    local one for a given node. It would be faster to have a separate list of
 //    pending tasks for each node.
-// 2) Presenting a single slave in ParallelOperation.slaveOffer makes it
+// 2) Presenting a single slave in Job.slaveOffer makes it
 //    difficult to balance tasks across nodes. It would be better to pass
-//    all the offers to the ParallelOperation and have it load-balance.
+//    all the offers to the Job and have it load-balance.
 private class MesosScheduler(
   master: String, frameworkName: String, execArg: Array[Byte])
 extends NScheduler with spark.Scheduler with Logging
@@ -33,14 +33,14 @@ extends NScheduler with spark.Scheduler with Logging
   val registeredLock = new Object()
 
   // Current callback object (may be null)
-  var activeOpsQueue = new Queue[Int]
-  var activeOps = new HashMap[Int, ParallelOperation]
-  private var nextOpId = 0
-  private[spark] var taskIdToOpId = new HashMap[Int, Int]
+  var activeJobsQueue = new Queue[Int]
+  var activeJobs = new HashMap[Int, Job]
+  private var nextJobId = 0
+  private[spark] var taskIdToJobId = new HashMap[Int, Int]
   
-  def newOpId(): Int = {
-    val id = nextOpId
-    nextOpId += 1
+  def newJobId(): Int = {
+    val id = nextJobId
+    nextJobId += 1
     return id
   }
 
@@ -73,31 +73,31 @@ extends NScheduler with spark.Scheduler with Logging
     new ExecutorInfo(new File("spark-executor").getCanonicalPath(), execArg)
 
   override def runTasks[T: ClassManifest](tasks: Array[Task[T]]): Array[T] = {
-    var opId = 0
+    var jobId = 0
     waitForRegister()
     this.synchronized {
-      opId = newOpId()
+      jobId = newJobId()
     }
-    val myOp = new SimpleParallelOperation(this, tasks, opId)
+    val myJob = new SimpleJob(this, tasks, jobId)
 
     try {
       this.synchronized {
-        this.activeOps(myOp.opId) = myOp
-        this.activeOpsQueue += myOp.opId
+        this.activeJobs(myJob.jobId) = myJob
+        this.activeJobsQueue += myJob.jobId
       }
       driver.reviveOffers();
-      myOp.join();
+      myJob.join();
     } finally {
       this.synchronized {
-        this.activeOps.remove(myOp.opId)
-        this.activeOpsQueue.dequeueAll(x => (x == myOp.opId))
+        this.activeJobs.remove(myJob.jobId)
+        this.activeJobsQueue.dequeueAll(x => (x == myJob.jobId))
       }
     }
 
-    if (myOp.errorHappened)
-      throw new SparkException(myOp.errorMessage, myOp.errorCode)
+    if (myJob.errorHappened)
+      throw new SparkException(myJob.errorMessage, myJob.errorCode)
     else
-      return myOp.results
+      return myJob.results
   }
 
   override def registered(d: SchedulerDriver, frameworkId: String) {
@@ -122,13 +122,13 @@ extends NScheduler with spark.Scheduler with Logging
       val availableCpus = offers.map(_.getParams.get("cpus").toInt)
       val availableMem = offers.map(_.getParams.get("mem").toInt)
       var launchedTask = true
-      for (opId <- activeOpsQueue) {
+      for (jobId <- activeJobsQueue) {
         launchedTask = true
         while (launchedTask) {
           launchedTask = false
           for (i <- 0 until offers.size.toInt) {
             try {
-              activeOps(opId).slaveOffer(offers.get(i), availableCpus(i), availableMem(i)) match {
+              activeJobs(jobId).slaveOffer(offers.get(i), availableCpus(i), availableMem(i)) match {
                 case Some(task) =>
                   tasks.add(task)
                   availableCpus(i) -= task.getParams.get("cpus").toInt
@@ -151,10 +151,10 @@ extends NScheduler with spark.Scheduler with Logging
   override def statusUpdate(d: SchedulerDriver, status: TaskStatus) {
     synchronized {
       try {
-        taskIdToOpId.get(status.getTaskId) match {
-          case Some(opId) =>
-            if (activeOps.contains(opId)) {
-              activeOps(opId).statusUpdate(status)
+        taskIdToJobId.get(status.getTaskId) match {
+          case Some(jobId) =>
+            if (activeJobs.contains(jobId)) {
+              activeJobs(jobId).statusUpdate(status)
             }
           case None =>
             logInfo("TID " + status.getTaskId + " already finished")
@@ -168,10 +168,10 @@ extends NScheduler with spark.Scheduler with Logging
 
   override def error(d: SchedulerDriver, code: Int, message: String) {
     synchronized {
-      if (activeOps.size > 0) {
-        for ((opId, activeOp) <- activeOps) {
+      if (activeJobs.size > 0) {
+        for ((jobId, activeJob) <- activeJobs) {
           try {
-            activeOp.error(code, message)
+            activeJob.error(code, message)
           } catch {
             case e: Exception => logError("Exception in error callback", e)
           }
@@ -195,16 +195,16 @@ extends NScheduler with spark.Scheduler with Logging
 
 // Trait representing an object that manages a parallel operation by
 // implementing various scheduler callbacks.
-trait ParallelOperation {
+trait Job {
   def slaveOffer(s: SlaveOffer, availableCpus: Int, availableMem: Int): Option[TaskDescription]
   def statusUpdate(t: TaskStatus): Unit
   def error(code: Int, message: String): Unit
 }
 
 
-class SimpleParallelOperation[T: ClassManifest](
-  sched: MesosScheduler, tasks: Array[Task[T]], val opId: Int)
-extends ParallelOperation with Logging
+class SimpleJob[T: ClassManifest](
+  sched: MesosScheduler, tasks: Array[Task[T]], val jobId: Int)
+extends Job with Logging
 {
   // Maximum time to wait to run a task in a preferred location (in ms)
   val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong
@@ -258,12 +258,12 @@ extends ParallelOperation with Logging
             tasks(i).preferredLocations.isEmpty))
         {
           val taskId = sched.newTaskId()
-          sched.taskIdToOpId(taskId) = opId
+          sched.taskIdToJobId(taskId) = jobId
           tidToIndex(taskId) = i
           val preferred = if(checkPref) "preferred" else "non-preferred"
           val message =
-            "Starting task %d as opId %d, TID %s on slave %s: %s (%s)".format(
-              i, opId, taskId, offer.getSlaveId, offer.getHost, preferred)
+            "Starting task %d as jobId %d, TID %s on slave %s: %s (%s)".format(
+              i, jobId, taskId, offer.getSlaveId, offer.getHost, preferred)
           logInfo(message)
           tasks(i).markStarted(offer)
           launched(i) = true
@@ -302,8 +302,8 @@ extends ParallelOperation with Logging
     val index = tidToIndex(tid)
     if (!finished(index)) {
       tasksFinished += 1
-      logInfo("Finished opId %d TID %d (progress: %d/%d)".format(
-        opId, tid, tasksFinished, numTasks))
+      logInfo("Finished job %d TID %d (progress: %d/%d)".format(
+        jobId, tid, tasksFinished, numTasks))
       // Deserialize task result
       val result = Utils.deserialize[TaskResult[T]](status.getData)
       results(index) = result.value
@@ -311,8 +311,8 @@ extends ParallelOperation with Logging
       Accumulators.add(callingThread, result.accumUpdates)
       // Mark finished and stop if we've finished all the tasks
       finished(index) = true
-      // Remove TID -> opId mapping from sched
-      sched.taskIdToOpId.remove(tid)
+      // Remove TID -> jobId mapping from sched
+      sched.taskIdToJobId.remove(tid)
       if (tasksFinished == numTasks)
         setAllFinished()
     } else {
@@ -325,9 +325,9 @@ extends ParallelOperation with Logging
     val tid = status.getTaskId
     val index = tidToIndex(tid)
     if (!finished(index)) {
-      logInfo("Lost opId " + opId + " TID " + tid)
+      logInfo("Lost job " + jobId + " TID " + tid)
       launched(index) = false
-      sched.taskIdToOpId.remove(tid)
+      sched.taskIdToJobId.remove(tid)
       tasksLaunched -= 1
     } else {
       logInfo("Ignoring task-lost event for TID " + tid +
-- 
cgit v1.2.3


From 9f20b6b4338d70262be3a2256856080aa22ecbce Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 3 Oct 2010 20:28:20 -0700
Subject: Added reduceByKey operation for RDDs containing pairs

---
 src/scala/spark/RDD.scala          | 35 +++++++++++++++++++++++++++++++++--
 src/scala/spark/SparkContext.scala |  5 +++++
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index 181f7e8b03..aaf006b6cb 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -7,6 +7,7 @@ import java.util.Random
 
 import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.Map
+import scala.collection.mutable.HashMap
 
 import mesos._
 
@@ -27,7 +28,12 @@ abstract class RDD[T: ClassManifest](
   def filter(f: T => Boolean) = new FilteredRDD(this, sc.clean(f))
   def aggregateSplit() = new SplitRDD(this)
   def cache() = new CachedRDD(this)
-  def sample(withReplacement: Boolean, frac: Double, seed: Int) = new SampledRDD(this, withReplacement, frac, seed)
+
+  def sample(withReplacement: Boolean, frac: Double, seed: Int) =
+    new SampledRDD(this, withReplacement, frac, seed)
+
+  def flatMap[U: ClassManifest](f: T => Traversable[U]) =
+    new FlatMappedRDD(this, sc.clean(f))
 
   def foreach(f: T => Unit) {
     val cleanF = sc.clean(f)
@@ -140,6 +146,16 @@ extends RDD[T](prev.sparkContext) {
   override def taskStarted(split: Split, slot: SlaveOffer) = prev.taskStarted(split, slot)
 }
 
+class FlatMappedRDD[U: ClassManifest, T: ClassManifest](
+  prev: RDD[T], f: T => Traversable[U]) 
+extends RDD[U](prev.sparkContext) {
+  override def splits = prev.splits
+  override def preferredLocations(split: Split) = prev.preferredLocations(split)
+  override def iterator(split: Split) =
+    prev.iterator(split).toStream.flatMap(f).iterator
+  override def taskStarted(split: Split, slot: SlaveOffer) = prev.taskStarted(split, slot)
+}
+
 class SplitRDD[T: ClassManifest](prev: RDD[T]) 
 extends RDD[Array[T]](prev.sparkContext) {
   override def splits = prev.splits
@@ -281,7 +297,7 @@ extends RDD[T](sc) {
     s.asInstanceOf[UnionSplit[T]].preferredLocations()
 }
 
-@serializable class CartesianSplit(val s1: Split, val s2: Split) extends Split {}
+@serializable class CartesianSplit(val s1: Split, val s2: Split) extends Split
 
 @serializable
 class CartesianRDD[T: ClassManifest, U:ClassManifest](
@@ -310,3 +326,18 @@ extends RDD[Pair[T, U]](sc) {
     rdd2.taskStarted(currSplit.s2, slot)
   }
 }
+
+@serializable class PairRDDExtras[K, V](rdd: RDD[(K, V)]) {
+  def reduceByKey(func: (V, V) => V): Map[K, V] = {
+    def mergeMaps(m1: HashMap[K, V], m2: HashMap[K, V]): HashMap[K, V] = {
+      for ((k, v) <- m2) {
+        m1.get(k) match {
+          case None => m1(k) = v
+          case Some(w) => m1(k) = func(w, v)
+        }
+      }
+      return m1
+    }
+    rdd.map(pair => HashMap(pair)).reduce(mergeMaps)
+  }
+}
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index e00eda9aa2..20f04f8639 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -85,9 +85,14 @@ object SparkContext {
     def add(t1: Double, t2: Double): Double = t1 + t2
     def zero(initialValue: Double) = 0.0
   }
+
   implicit object IntAccumulatorParam extends AccumulatorParam[Int] {
     def add(t1: Int, t2: Int): Int = t1 + t2
     def zero(initialValue: Int) = 0
   }
+
   // TODO: Add AccumulatorParams for other types, e.g. lists and strings
+
+  implicit def rddToPairRDDExtras[K, V](rdd: RDD[(K, V)]) =
+    new PairRDDExtras(rdd)
 }
-- 
cgit v1.2.3


From a41ca203753f1c4f533dd78d41cf2f821b332990 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 4 Oct 2010 12:01:05 -0700
Subject: Added splitWords function in Utils

---
 src/scala/spark/Utils.scala | 27 ++++++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/src/scala/spark/Utils.scala b/src/scala/spark/Utils.scala
index 52bcb89f00..27d73aefbd 100644
--- a/src/scala/spark/Utils.scala
+++ b/src/scala/spark/Utils.scala
@@ -2,7 +2,9 @@ package spark
 
 import java.io._
 
-private object Utils {
+import scala.collection.mutable.ArrayBuffer
+
+object Utils {
   def serialize[T](o: T): Array[Byte] = {
     val bos = new ByteArrayOutputStream
     val oos = new ObjectOutputStream(bos)
@@ -25,4 +27,27 @@ private object Utils {
     }
     return ois.readObject.asInstanceOf[T]
   }
+
+  def isAlpha(c: Char) = {
+    (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+  }
+
+  def splitWords(s: String): Seq[String] = {
+    val buf = new ArrayBuffer[String]
+    var i = 0
+    while (i < s.length) {
+      var j = i
+      while (j < s.length && isAlpha(s.charAt(j))) {
+        j += 1
+      }
+      if (j > i) {
+        buf += s.substring(i, j);
+      }
+      i = j
+      while (i < s.length && !isAlpha(s.charAt(i))) {
+        i += 1
+      }
+    }
+    return buf
+  }
 }
-- 
cgit v1.2.3


From b3517614d85a7490f8369681620b54d3fd325596 Mon Sep 17 00:00:00 2001
From: Justin Ma <jtma@eecs.berkeley.edu>
Date: Thu, 7 Oct 2010 14:38:25 -0700
Subject: Added toString() methods to UnionSplit, SeededSplit and
 CartesianSplit to ensure that the proper keys will be generated when they
 cached.

---
 src/scala/spark/RDD.scala | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index aaf006b6cb..5236eb958f 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -165,7 +165,10 @@ extends RDD[Array[T]](prev.sparkContext) {
 }
 
 
-@serializable class SeededSplit(val prev: Split, val seed: Int) extends Split {}
+@serializable class SeededSplit(val prev: Split, val seed: Int) extends Split {
+  override def toString() =
+    "SeededSplit(" + prev.toString + ", seed " + seed + ")"
+}
 
 class SampledRDD[T: ClassManifest](
   prev: RDD[T], withReplacement: Boolean, frac: Double, seed: Int) 
@@ -268,6 +271,7 @@ private object CachedRDD {
 abstract class UnionSplit[T: ClassManifest] extends Split {
   def iterator(): Iterator[T]
   def preferredLocations(): Seq[String]
+  def toString(): String
 }
 
 @serializable
@@ -276,6 +280,8 @@ class UnionSplitImpl[T: ClassManifest](
 extends UnionSplit[T] {
   override def iterator() = rdd.iterator(split)
   override def preferredLocations() = rdd.preferredLocations(split)
+  override def toString() =
+    "UnionSplitImpl(" + split.toString + ")"
 }
 
 @serializable
@@ -297,7 +303,10 @@ extends RDD[T](sc) {
     s.asInstanceOf[UnionSplit[T]].preferredLocations()
 }
 
-@serializable class CartesianSplit(val s1: Split, val s2: Split) extends Split
+@serializable class CartesianSplit(val s1: Split, val s2: Split) extends Split {
+  override def toString() =
+    "CartesianSplit(" + s1.toString + ", " + s2.toString + ")"
+}
 
 @serializable
 class CartesianRDD[T: ClassManifest, U:ClassManifest](
-- 
cgit v1.2.3


From f9671b086b8e4faba8feb6fb6fa5a61ae4290ca3 Mon Sep 17 00:00:00 2001
From: Justin Ma <jtma@eecs.berkeley.edu>
Date: Thu, 7 Oct 2010 14:41:10 -0700
Subject: got rid of unnecessary line

---
 src/scala/spark/MesosScheduler.scala | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 6575b48e8b..873a97c59c 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -308,7 +308,6 @@ extends ParallelOperation with Logging
       val result = Utils.deserialize[TaskResult[T]](status.getData)
       results(index) = result.value
       // Update accumulators
-      print(" with " + result.accumUpdates.size + " accumulatedUpdates")
       Accumulators.add(callingThread, result.accumUpdates)
       // Mark finished and stop if we've finished all the tasks
       finished(index) = true
-- 
cgit v1.2.3


From 630a982b88a3bac3bd362d52d64331b1495f31ff Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Thu, 7 Oct 2010 17:17:07 -0700
Subject: Added a getId method to split to force classes to specify a unique ID
 for each split. This replaces the previous method of calling split.toString,
 which would produce different results for the same split each time it is
 deserialized (because the default implementation returns the Java object's
 address).

---
 src/scala/spark/HdfsFile.scala      |  4 +++-
 src/scala/spark/ParallelArray.scala |  2 +-
 src/scala/spark/RDD.scala           | 16 ++++++++--------
 src/scala/spark/Split.scala         | 12 +++++++++++-
 4 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/src/scala/spark/HdfsFile.scala b/src/scala/spark/HdfsFile.scala
index 886272a8ed..8637c6e30a 100644
--- a/src/scala/spark/HdfsFile.scala
+++ b/src/scala/spark/HdfsFile.scala
@@ -14,7 +14,9 @@ import org.apache.hadoop.mapred.Reporter
 @serializable class HdfsSplit(@transient s: InputSplit)
 extends Split { 
   val inputSplit = new SerializableWritable[InputSplit](s)
-  override def toString = inputSplit.toString
+
+  override def getId() = inputSplit.toString // Hadoop makes this unique
+                                             // for each split of each file
 }
 
 class HdfsTextFile(sc: SparkContext, path: String)
diff --git a/src/scala/spark/ParallelArray.scala b/src/scala/spark/ParallelArray.scala
index 12fbfaf4c2..a01904d61c 100644
--- a/src/scala/spark/ParallelArray.scala
+++ b/src/scala/spark/ParallelArray.scala
@@ -17,7 +17,7 @@ extends Split {
     case _ => false
   }
 
-  override def toString() =
+  override def getId() =
     "ParallelArraySplit(arrayId %d, slice %d)".format(arrayId, slice)
 }
 
diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index 5236eb958f..803c063865 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -166,8 +166,8 @@ extends RDD[Array[T]](prev.sparkContext) {
 
 
 @serializable class SeededSplit(val prev: Split, val seed: Int) extends Split {
-  override def toString() =
-    "SeededSplit(" + prev.toString + ", seed " + seed + ")"
+  override def getId() =
+    "SeededSplit(" + prev.getId() + ", seed " + seed + ")"
 }
 
 class SampledRDD[T: ClassManifest](
@@ -216,7 +216,7 @@ extends RDD[T](prev.sparkContext) with Logging {
   }
   
   override def iterator(split: Split): Iterator[T] = {
-    val key = id + "::" + split.toString
+    val key = id + "::" + split.getId()
     logInfo("CachedRDD split key is " + key)
     val cache = CachedRDD.cache
     val loading = CachedRDD.loading
@@ -271,7 +271,7 @@ private object CachedRDD {
 abstract class UnionSplit[T: ClassManifest] extends Split {
   def iterator(): Iterator[T]
   def preferredLocations(): Seq[String]
-  def toString(): String
+  def getId(): String
 }
 
 @serializable
@@ -280,8 +280,8 @@ class UnionSplitImpl[T: ClassManifest](
 extends UnionSplit[T] {
   override def iterator() = rdd.iterator(split)
   override def preferredLocations() = rdd.preferredLocations(split)
-  override def toString() =
-    "UnionSplitImpl(" + split.toString + ")"
+  override def getId() =
+    "UnionSplitImpl(" + split.getId() + ")"
 }
 
 @serializable
@@ -304,8 +304,8 @@ extends RDD[T](sc) {
 }
 
 @serializable class CartesianSplit(val s1: Split, val s2: Split) extends Split {
-  override def toString() =
-    "CartesianSplit(" + s1.toString + ", " + s2.toString + ")"
+  override def getId() =
+    "CartesianSplit(" + s1.getId() + ", " + s2.getId() + ")"
 }
 
 @serializable
diff --git a/src/scala/spark/Split.scala b/src/scala/spark/Split.scala
index 4251191814..0f7a21354d 100644
--- a/src/scala/spark/Split.scala
+++ b/src/scala/spark/Split.scala
@@ -1,3 +1,13 @@
 package spark
 
-abstract class Split {}
+/**
+ * A partition of an RDD.
+ */
+trait Split {
+  /**
+   * Get a unique ID for this split which can be used, for example, to
+   * set up caches based on it. The ID should stay the same if we serialize
+   * and then deserialize the split.
+   */
+  def getId(): String
+}
-- 
cgit v1.2.3


From a9098ad5d4c1ecc318dcb07ac2a104168619044b Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Thu, 7 Oct 2010 18:27:26 -0700
Subject: Moved Job and SimpleJob to new files

---
 src/scala/spark/Job.scala            |  16 +++
 src/scala/spark/MesosScheduler.scala | 218 ++++++-----------------------------
 src/scala/spark/SimpleJob.scala      | 155 +++++++++++++++++++++++++
 3 files changed, 206 insertions(+), 183 deletions(-)
 create mode 100644 src/scala/spark/Job.scala
 create mode 100644 src/scala/spark/SimpleJob.scala

diff --git a/src/scala/spark/Job.scala b/src/scala/spark/Job.scala
new file mode 100644
index 0000000000..6b01307adc
--- /dev/null
+++ b/src/scala/spark/Job.scala
@@ -0,0 +1,16 @@
+package spark
+
+import mesos._
+
+/**
+ * Trait representing a parallel job in MesosScheduler. Schedules the
+ * job by implementing various callbacks.
+ */
+trait Job {
+  def slaveOffer(s: SlaveOffer, availableCpus: Int, availableMem: Int)
+    : Option[TaskDescription]
+
+  def statusUpdate(t: TaskStatus): Unit
+
+  def error(code: Int, message: String): Unit
+}
diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index ecb95d7c0b..cb78c2b582 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -1,13 +1,16 @@
 package spark
 
 import java.io.File
+import java.util.{ArrayList => JArrayList}
+import java.util.{List => JList}
+import java.util.{HashMap => JHashMap}
 
 import scala.collection.mutable.Map
 import scala.collection.mutable.Queue
 import scala.collection.mutable.HashMap
 import scala.collection.JavaConversions._
 
-import mesos.{Scheduler => NScheduler}
+import mesos.{Scheduler => MScheduler}
 import mesos._
 
 // The main Scheduler implementation, which talks to Mesos. Clients are expected
@@ -23,22 +26,20 @@ import mesos._
 //    all the offers to the Job and have it load-balance.
 private class MesosScheduler(
   master: String, frameworkName: String, execArg: Array[Byte])
-extends NScheduler with spark.Scheduler with Logging
+extends MScheduler with spark.Scheduler with Logging
 {
-  // Lock used by runTasks to ensure only one thread can be in it
-  val runTasksMutex = new Object()
-
   // Lock used to wait for  scheduler to be registered
   var isRegistered = false
   val registeredLock = new Object()
 
-  // Current callback object (may be null)
-  var activeJobsQueue = new Queue[Int]
   var activeJobs = new HashMap[Int, Job]
-  private var nextJobId = 0
+  var activeJobsQueue = new Queue[Job]
+
   private[spark] var taskIdToJobId = new HashMap[Int, Int]
+
+  private var nextJobId = 0
   
-  def newJobId(): Int = {
+  def newJobId(): Int = this.synchronized {
     val id = nextJobId
     nextJobId += 1
     return id
@@ -60,9 +61,9 @@ extends NScheduler with spark.Scheduler with Logging
     new Thread("Spark scheduler") {
       setDaemon(true)
       override def run {
-        val ns = MesosScheduler.this
-        ns.driver = new MesosSchedulerDriver(ns, master)
-        ns.driver.run()
+        val sched = MesosScheduler.this
+        sched.driver = new MesosSchedulerDriver(sched, master)
+        sched.driver.run()
       }
     }.start
   }
@@ -72,25 +73,26 @@ extends NScheduler with spark.Scheduler with Logging
   override def getExecutorInfo(d: SchedulerDriver): ExecutorInfo =
     new ExecutorInfo(new File("spark-executor").getCanonicalPath(), execArg)
 
+  /**
+   * The primary means to submit a job to the scheduler. Given a list of tasks,
+   * runs them and returns an array of the results.
+   */
   override def runTasks[T: ClassManifest](tasks: Array[Task[T]]): Array[T] = {
-    var jobId = 0
     waitForRegister()
-    this.synchronized {
-      jobId = newJobId()
-    }
+    val jobId = newJobId()
     val myJob = new SimpleJob(this, tasks, jobId)
 
     try {
       this.synchronized {
         this.activeJobs(myJob.jobId) = myJob
-        this.activeJobsQueue += myJob.jobId
+        this.activeJobsQueue += myJob
       }
       driver.reviveOffers();
       myJob.join();
     } finally {
       this.synchronized {
         this.activeJobs.remove(myJob.jobId)
-        this.activeJobsQueue.dequeueAll(x => (x == myJob.jobId))
+        this.activeJobsQueue.dequeueAll(x => (x == myJob))
       }
     }
 
@@ -116,35 +118,34 @@ extends NScheduler with spark.Scheduler with Logging
   }
 
   override def resourceOffer(
-      d: SchedulerDriver, oid: String, offers: java.util.List[SlaveOffer]) {
+      d: SchedulerDriver, oid: String, offers: JList[SlaveOffer]) {
     synchronized {
-      val tasks = new java.util.ArrayList[TaskDescription]
+      val tasks = new JArrayList[TaskDescription]
       val availableCpus = offers.map(_.getParams.get("cpus").toInt)
       val availableMem = offers.map(_.getParams.get("mem").toInt)
-      var launchedTask = true
-      for (jobId <- activeJobsQueue) {
-        launchedTask = true
-        while (launchedTask) {
+      var launchedTask = false
+      for (job <- activeJobsQueue) {
+        do {
           launchedTask = false
           for (i <- 0 until offers.size.toInt) {
             try {
-              activeJobs(jobId).slaveOffer(offers.get(i), availableCpus(i), availableMem(i)) match {
+              job.slaveOffer(offers(i), availableCpus(i), availableMem(i)) match {
                 case Some(task) =>
                   tasks.add(task)
                   availableCpus(i) -= task.getParams.get("cpus").toInt
                   availableMem(i) -= task.getParams.get("mem").toInt
-                  launchedTask = launchedTask || true
+                  launchedTask = true
                 case None => {}
               }
             } catch {
               case e: Exception => logError("Exception in resourceOffer", e)
             }
           }
-        }
+        } while (launchedTask)
       }
-      val params = new java.util.HashMap[String, String]
+      val params = new JHashMap[String, String]
       params.put("timeout", "1")
-      d.replyToOffer(oid, tasks, params) // TODO: use smaller timeout
+      d.replyToOffer(oid, tasks, params) // TODO: use smaller timeout?
     }
   }
 
@@ -167,8 +168,10 @@ extends NScheduler with spark.Scheduler with Logging
   }
 
   override def error(d: SchedulerDriver, code: Int, message: String) {
+    logError("Mesos error: %s (error code: %d)".format(message, code))
     synchronized {
       if (activeJobs.size > 0) {
+        // Have each job throw a SparkException with the error
         for ((jobId, activeJob) <- activeJobs) {
           try {
             activeJob.error(code, message)
@@ -177,7 +180,9 @@ extends NScheduler with spark.Scheduler with Logging
           }
         }
       } else {
-        logError("Mesos error: %s (error code: %d)".format(message, code))
+        // No jobs are active but we still got an error. Just exit since this
+        // must mean the error is during registration.
+        // It might be good to do something smarter here in the future.
         System.exit(1)
       }
     }
@@ -191,156 +196,3 @@ extends NScheduler with spark.Scheduler with Logging
   // TODO: query Mesos for number of cores
   override def numCores() = System.getProperty("spark.default.parallelism", "2").toInt
 }
-
-
-// Trait representing an object that manages a parallel operation by
-// implementing various scheduler callbacks.
-trait Job {
-  def slaveOffer(s: SlaveOffer, availableCpus: Int, availableMem: Int): Option[TaskDescription]
-  def statusUpdate(t: TaskStatus): Unit
-  def error(code: Int, message: String): Unit
-}
-
-
-class SimpleJob[T: ClassManifest](
-  sched: MesosScheduler, tasks: Array[Task[T]], val jobId: Int)
-extends Job with Logging
-{
-  // Maximum time to wait to run a task in a preferred location (in ms)
-  val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong
-
-  val callingThread = currentThread
-  val numTasks = tasks.length
-  val results = new Array[T](numTasks)
-  val launched = new Array[Boolean](numTasks)
-  val finished = new Array[Boolean](numTasks)
-  val tidToIndex = Map[Int, Int]()
-
-  var allFinished = false
-  val joinLock = new Object()
-
-  var errorHappened = false
-  var errorCode = 0
-  var errorMessage = ""
-
-  var tasksLaunched = 0
-  var tasksFinished = 0
-  var lastPreferredLaunchTime = System.currentTimeMillis
-
-  def setAllFinished() {
-    joinLock.synchronized {
-      allFinished = true
-      joinLock.notifyAll()
-    }
-  }
-
-  def join() {
-    joinLock.synchronized {
-      while (!allFinished)
-        joinLock.wait()
-    }
-  }
-
-  def slaveOffer(offer: SlaveOffer, availableCpus: Int, availableMem: Int): Option[TaskDescription] = {
-    if (tasksLaunched < numTasks) {
-      var checkPrefVals: Array[Boolean] = Array(true)
-      val time = System.currentTimeMillis
-      if (time - lastPreferredLaunchTime > LOCALITY_WAIT)
-        checkPrefVals = Array(true, false) // Allow non-preferred tasks
-      // TODO: Make desiredCpus and desiredMem configurable
-      val desiredCpus = 1
-      val desiredMem = 500
-      if ((availableCpus < desiredCpus) || (availableMem < desiredMem))
-        return None
-      for (checkPref <- checkPrefVals; i <- 0 until numTasks) {
-        if (!launched(i) && (!checkPref ||
-            tasks(i).preferredLocations.contains(offer.getHost) ||
-            tasks(i).preferredLocations.isEmpty))
-        {
-          val taskId = sched.newTaskId()
-          sched.taskIdToJobId(taskId) = jobId
-          tidToIndex(taskId) = i
-          val preferred = if(checkPref) "preferred" else "non-preferred"
-          val message =
-            "Starting task %d as jobId %d, TID %s on slave %s: %s (%s)".format(
-              i, jobId, taskId, offer.getSlaveId, offer.getHost, preferred)
-          logInfo(message)
-          tasks(i).markStarted(offer)
-          launched(i) = true
-          tasksLaunched += 1
-          if (checkPref)
-            lastPreferredLaunchTime = time
-          val params = new java.util.HashMap[String, String]
-          params.put("cpus", "" + desiredCpus)
-          params.put("mem", "" + desiredMem)
-          val serializedTask = Utils.serialize(tasks(i))
-          //logInfo("Serialized size: " + serializedTask.size)
-          return Some(new TaskDescription(taskId, offer.getSlaveId,
-            "task_" + taskId, params, serializedTask))
-        }
-      }
-    }
-    return None
-  }
-
-  def statusUpdate(status: TaskStatus) {
-    status.getState match {
-      case TaskState.TASK_FINISHED =>
-        taskFinished(status)
-      case TaskState.TASK_LOST =>
-        taskLost(status)
-      case TaskState.TASK_FAILED =>
-        taskLost(status)
-      case TaskState.TASK_KILLED =>
-        taskLost(status)
-      case _ =>
-    }
-  }
-
-  def taskFinished(status: TaskStatus) {
-    val tid = status.getTaskId
-    val index = tidToIndex(tid)
-    if (!finished(index)) {
-      tasksFinished += 1
-      logInfo("Finished job %d TID %d (progress: %d/%d)".format(
-        jobId, tid, tasksFinished, numTasks))
-      // Deserialize task result
-      val result = Utils.deserialize[TaskResult[T]](status.getData)
-      results(index) = result.value
-      // Update accumulators
-      Accumulators.add(callingThread, result.accumUpdates)
-      // Mark finished and stop if we've finished all the tasks
-      finished(index) = true
-      // Remove TID -> jobId mapping from sched
-      sched.taskIdToJobId.remove(tid)
-      if (tasksFinished == numTasks)
-        setAllFinished()
-    } else {
-      logInfo("Ignoring task-finished event for TID " + tid +
-        " because task " + index + " is already finished")
-    }
-  }
-
-  def taskLost(status: TaskStatus) {
-    val tid = status.getTaskId
-    val index = tidToIndex(tid)
-    if (!finished(index)) {
-      logInfo("Lost job " + jobId + " TID " + tid)
-      launched(index) = false
-      sched.taskIdToJobId.remove(tid)
-      tasksLaunched -= 1
-    } else {
-      logInfo("Ignoring task-lost event for TID " + tid +
-        " because task " + index + " is already finished")
-    }
-  }
-
-  def error(code: Int, message: String) {
-    // Save the error message
-    errorHappened = true
-    errorCode = code
-    errorMessage = message
-    // Indicate to caller thread that we're done
-    setAllFinished()
-  }
-}
diff --git a/src/scala/spark/SimpleJob.scala b/src/scala/spark/SimpleJob.scala
new file mode 100644
index 0000000000..9664a44578
--- /dev/null
+++ b/src/scala/spark/SimpleJob.scala
@@ -0,0 +1,155 @@
+package spark
+
+import java.util.{HashMap => JHashMap}
+
+import scala.collection.mutable.HashMap
+
+import mesos._
+
+
+/**
+ * A simple implementation of Job that just runs each task in an array.
+ */
+class SimpleJob[T: ClassManifest](
+  sched: MesosScheduler, tasks: Array[Task[T]], val jobId: Int)
+extends Job with Logging
+{
+  // Maximum time to wait to run a task in a preferred location (in ms)
+  val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong
+
+  val callingThread = currentThread
+  val numTasks = tasks.length
+  val results = new Array[T](numTasks)
+  val launched = new Array[Boolean](numTasks)
+  val finished = new Array[Boolean](numTasks)
+  val tidToIndex = HashMap[Int, Int]()
+
+  var allFinished = false
+  val joinLock = new Object()
+
+  var errorHappened = false
+  var errorCode = 0
+  var errorMessage = ""
+
+  var tasksLaunched = 0
+  var tasksFinished = 0
+  var lastPreferredLaunchTime = System.currentTimeMillis
+
+  val cpusPerTask = System.getProperty("spark.task.cpus", "1").toInt
+  val memPerTask = System.getProperty("spark.task.mem", "512").toInt
+
+  def setAllFinished() {
+    joinLock.synchronized {
+      allFinished = true
+      joinLock.notifyAll()
+    }
+  }
+
+  def join() {
+    joinLock.synchronized {
+      while (!allFinished)
+        joinLock.wait()
+    }
+  }
+
+  def slaveOffer(offer: SlaveOffer, availableCpus: Int, availableMem: Int)
+      : Option[TaskDescription] = {
+    if (tasksLaunched < numTasks) {
+      var checkPrefVals: Array[Boolean] = Array(true)
+      val time = System.currentTimeMillis
+      if (time - lastPreferredLaunchTime > LOCALITY_WAIT)
+        checkPrefVals = Array(true, false) // Allow non-preferred tasks
+      if ((availableCpus < cpusPerTask) || (availableMem < memPerTask))
+        return None
+      for (checkPref <- checkPrefVals; i <- 0 until numTasks) {
+        if (!launched(i) && (!checkPref ||
+            tasks(i).preferredLocations.contains(offer.getHost) ||
+            tasks(i).preferredLocations.isEmpty))
+        {
+          val taskId = sched.newTaskId()
+          sched.taskIdToJobId(taskId) = jobId
+          tidToIndex(taskId) = i
+          val preferred = if(checkPref) "preferred" else "non-preferred"
+          val message =
+            "Starting task %d:%d as TID %s on slave %s: %s (%s)".format(
+              i, jobId, taskId, offer.getSlaveId, offer.getHost, preferred)
+          logInfo(message)
+          tasks(i).markStarted(offer)
+          launched(i) = true
+          tasksLaunched += 1
+          if (checkPref)
+            lastPreferredLaunchTime = time
+          val params = new JHashMap[String, String]
+          params.put("cpus", "" + cpusPerTask)
+          params.put("mem", "" + memPerTask)
+          val serializedTask = Utils.serialize(tasks(i))
+          logDebug("Serialized size: " + serializedTask.size)
+          return Some(new TaskDescription(taskId, offer.getSlaveId,
+            "task_" + taskId, params, serializedTask))
+        }
+      }
+    }
+    return None
+  }
+
+  def statusUpdate(status: TaskStatus) {
+    status.getState match {
+      case TaskState.TASK_FINISHED =>
+        taskFinished(status)
+      case TaskState.TASK_LOST =>
+        taskLost(status)
+      case TaskState.TASK_FAILED =>
+        taskLost(status)
+      case TaskState.TASK_KILLED =>
+        taskLost(status)
+      case _ =>
+    }
+  }
+
+  def taskFinished(status: TaskStatus) {
+    val tid = status.getTaskId
+    val index = tidToIndex(tid)
+    if (!finished(index)) {
+      tasksFinished += 1
+      logInfo("Finished TID %d (progress: %d/%d)".format(
+        tid, tasksFinished, numTasks))
+      // Deserialize task result
+      val result = Utils.deserialize[TaskResult[T]](status.getData)
+      results(index) = result.value
+      // Update accumulators
+      Accumulators.add(callingThread, result.accumUpdates)
+      // Mark finished and stop if we've finished all the tasks
+      finished(index) = true
+      // Remove TID -> jobId mapping from sched
+      sched.taskIdToJobId.remove(tid)
+      if (tasksFinished == numTasks)
+        setAllFinished()
+    } else {
+      logInfo("Ignoring task-finished event for TID " + tid +
+        " because task " + index + " is already finished")
+    }
+  }
+
+  def taskLost(status: TaskStatus) {
+    val tid = status.getTaskId
+    val index = tidToIndex(tid)
+    if (!finished(index)) {
+      logInfo("Lost TID %d (task %d:%d)".format(tid, jobId, index))
+      launched(index) = false
+      sched.taskIdToJobId.remove(tid)
+      tasksLaunched -= 1
+    } else {
+      logInfo("Ignoring task-lost event for TID " + tid +
+        " because task " + index + " is already finished")
+    }
+  }
+
+  def error(code: Int, message: String) {
+    // Save the error message
+    errorHappened = true
+    errorCode = code
+    errorMessage = message
+    // Indicate to caller thread that we're done
+    setAllFinished()
+  }
+}
-- 
cgit v1.2.3


From ad7a9c5a3695f1f83cbdfa96d435003939582c8a Mon Sep 17 00:00:00 2001
From: Mosharaf Chowdhury <mosharaf@mosharaf-ubuntu.(none)>
Date: Tue, 12 Oct 2010 12:55:43 -0700
Subject: Minor cleanup in Broadcast.scala. Changed BroadcastTest.scala to have
 multiple broadcasts.

---
 conf/java-opts                     |   1 +
 conf/spark-env.sh                  |   2 +-
 src/examples/BroadcastTest.scala   |  18 +++--
 src/examples/SparkPi.scala         |   2 +-
 src/scala/spark/Broadcast.scala    | 148 +++++++++++++++++++------------------
 src/scala/spark/SparkContext.scala |   2 +-
 third_party/mesos.jar              | Bin 34562 -> 33618 bytes
 7 files changed, 90 insertions(+), 83 deletions(-)

diff --git a/conf/java-opts b/conf/java-opts
index e69de29bb2..b61e8163b5 100644
--- a/conf/java-opts
+++ b/conf/java-opts
@@ -0,0 +1 @@
+-Dspark.broadcast.masterHostAddress=127.0.0.1 -Dspark.broadcast.masterTrackerPort=11111 -Dspark.broadcast.blockSize=1024 -Dspark.broadcast.maxRetryCount=2 -Dspark.broadcast.serverSocketTimout=50000 -Dspark.broadcast.dualMode=false
diff --git a/conf/spark-env.sh b/conf/spark-env.sh
index 6852b23a34..77f9cb69b9 100755
--- a/conf/spark-env.sh
+++ b/conf/spark-env.sh
@@ -10,4 +10,4 @@
 #   be in the same format as the JVM's -Xmx option, e.g. 300m or 1g).
 # - SPARK_LIBRARY_PATH, to add extra search paths for native libraries.
 
-
+MESOS_HOME=/home/mosharaf/Work/mesos
diff --git a/src/examples/BroadcastTest.scala b/src/examples/BroadcastTest.scala
index 7764013413..40c2be8f6d 100644
--- a/src/examples/BroadcastTest.scala
+++ b/src/examples/BroadcastTest.scala
@@ -10,15 +10,19 @@ object BroadcastTest {
     val slices = if (args.length > 1) args(1).toInt else 2
     val num = if (args.length > 2) args(2).toInt else 1000000
 
-    var arr = new Array[Int](num)
-    for (i <- 0 until arr.length) 
-      arr(i) = i
+    var arr1 = new Array[Int](num)
+    for (i <- 0 until arr1.length) 
+      arr1(i) = i
     
-    val barr = spark.broadcast(arr)
+//    var arr2 = new Array[Int](num * 2)
+//    for (i <- 0 until arr2.length)
+//      arr2(i) = i
+
+    val barr1 = spark.broadcast(arr1)
+//    val barr2 = spark.broadcast(arr2)
     spark.parallelize(1 to 10, slices).foreach {
-      println("in task: barr = " + barr)
-      i => println(barr.value.size)
+//      i => println(barr1.value.size + barr2.value.size)
+      i => println(barr1.value.size)
     }
   }
 }
-
diff --git a/src/examples/SparkPi.scala b/src/examples/SparkPi.scala
index 07311908ee..f055614125 100644
--- a/src/examples/SparkPi.scala
+++ b/src/examples/SparkPi.scala
@@ -5,7 +5,7 @@ import SparkContext._
 object SparkPi {
   def main(args: Array[String]) {
     if (args.length == 0) {
-      System.err.println("Usage: SparkLR <host> [<slices>]")
+      System.err.println("Usage: SparkPi <host> [<slices>]")
       System.exit(1)
     }
     val spark = new SparkContext(args(0), "SparkPi")
diff --git a/src/scala/spark/Broadcast.scala b/src/scala/spark/Broadcast.scala
index fc37635983..105aa3483e 100644
--- a/src/scala/spark/Broadcast.scala
+++ b/src/scala/spark/Broadcast.scala
@@ -29,7 +29,6 @@ trait BroadcastRecipe {
   override def toString = "spark.Broadcast(" + uuid + ")"
 }
 
-// TODO: Should think about storing in HDFS in the future
 // TODO: Right, now no parallelization between multiple broadcasts
 @serializable
 class ChainedStreamingBroadcast[T] (@transient var value_ : T, local: Boolean) 
@@ -66,6 +65,7 @@ extends BroadcastRecipe with Logging {
     out.close
   }
   
+  // Called by Java when deserializing an object
   private def readObject (in: ObjectInputStream) {
     in.defaultReadObject
     BroadcastCS.synchronized {
@@ -74,7 +74,7 @@ extends BroadcastRecipe with Logging {
         value_ = cachedVal.asInstanceOf[T]
       } else {
         // Only a single worker (the first one) in the same node can ever be 
-        // here. The rest will always get the value ready 
+        // here. The rest will always get the value ready. 
         val start = System.nanoTime        
 
         val retByteArray = BroadcastCS.receiveBroadcast (uuid)
@@ -223,7 +223,7 @@ private object Broadcast {
         // Initialization for CentralizedHDFSBroadcast
         BroadcastCH.initialize 
         // Initialization for ChainedStreamingBroadcast
-        //BroadcastCS.initialize (isMaster)
+        BroadcastCS.initialize (isMaster)
         
         initialized = true
       }
@@ -276,7 +276,7 @@ private object BroadcastCS extends Logging {
         maxRetryCount_ = 
           System.getProperty ("spark.broadcast.maxRetryCount", "2").toInt          
         serverSocketTimout_ = 
-          System.getProperty ("spark.broadcast.serverSocketTimout", "50000").toInt          
+          System.getProperty ("spark.broadcast.serverSocketTimout", "50000").toInt
         dualMode_ = 
           System.getProperty ("spark.broadcast.dualMode", "false").toBoolean          
 
@@ -288,10 +288,10 @@ private object BroadcastCS extends Logging {
           guideMR.start
           logInfo("GuideMultipleRequests started")
         }        
+
         serveMR = new ServeMultipleRequests
         serveMR.setDaemon (true)
-        serveMR.start
-        
+        serveMR.start        
         logInfo("ServeMultipleRequests started")
         
         logInfo("BroadcastCS object has been initialized")
@@ -369,8 +369,7 @@ private object BroadcastCS extends Logging {
       totalBlocksLock.synchronized {
         totalBlocksLock.notifyAll
       }
-      totalBytes = sourceInfo.totalBytes
-      
+      totalBytes = sourceInfo.totalBytes      
       logInfo("Received SourceInfo from Master:" + sourceInfo + " My Port: " + listenPort)    
 
       retByteArray = receiveSingleTransmission (sourceInfo)
@@ -415,7 +414,7 @@ private object BroadcastCS extends Logging {
         new ObjectInputStream (clientSocketToSource.getInputStream)
         
       logInfo("Inside receiveSingleTransmission")
-      logInfo("totalBlocks: "+ totalBlocks + " " + "hasBlocks: " + hasBlocks)
+      logInfo("totalBlocks: " + totalBlocks + " " + "hasBlocks: " + hasBlocks)
       retByteArray = new Array[Byte] (totalBytes)
       for (i <- 0 until totalBlocks) {
         val bcBlock = oisSource.readObject.asInstanceOf[BroadcastBlock]
@@ -437,74 +436,78 @@ private object BroadcastCS extends Logging {
       }
     } finally {    
       if (oisSource != null) { oisSource.close }
-      if (oosSource != null) { 
-        oosSource.close 
-      }
+      if (oosSource != null) { oosSource.close }
       if (clientSocketToSource != null) { clientSocketToSource.close }
     }
           
     return retByteArray
   } 
   
-  class TrackMultipleValues extends Thread with Logging {
-    override def run = {
-      var threadPool = Executors.newCachedThreadPool
-      var serverSocket: ServerSocket = null
-      
-      serverSocket = new ServerSocket (BroadcastCS.masterListenPort)
-      logInfo("TrackMultipleVariables" + serverSocket + " " + listenPort)
-      
-      var keepAccepting = true
-      try {
-        while (true) {
-          var clientSocket: Socket = null
-          try {
-            serverSocket.setSoTimeout (serverSocketTimout)
-            clientSocket = serverSocket.accept
-          } catch {
-            case e: Exception => { 
-              logInfo("TrackMultipleValues Timeout. Stopping listening...") 
-              keepAccepting = false 
-            }
-          }
-          logInfo("TrackMultipleValues:Got new request:" + clientSocket)
-          if (clientSocket != null) {
-            try {            
-              threadPool.execute (new Runnable {
-                def run = {
-                  val oos = new ObjectOutputStream (clientSocket.getOutputStream)
-                  val ois = new ObjectInputStream (clientSocket.getInputStream)
-                  try {
-                    val variableUUID = ois.readObject.asInstanceOf[UUID]
-                    var contactPort = 0
-                    // TODO: Add logic and data structures to find out UUID->port
-                    // mapping. 0 = missed the broadcast, read from HDFS; <0 = 
-                    // Haven't started yet, wait & retry; >0 = Read from this port
-                    oos.writeObject (contactPort)
-                  } catch {
-                    case e: Exception => { }
-                  } finally {
-                    ois.close
-                    oos.close
-                    clientSocket.close
-                  }
-                }
-              })
-            } catch {
-              // In failure, close the socket here; else, the thread will close it
-              case ioe: IOException => clientSocket.close
-            }
-          }
-        }
-      } finally {
-        serverSocket.close
-      }      
-    }
-  }
+//  class TrackMultipleValues extends Thread with Logging {
+//    override def run = {
+//      var threadPool = Executors.newCachedThreadPool
+//      var serverSocket: ServerSocket = null
+//      
+//      serverSocket = new ServerSocket (BroadcastCS.masterListenPort)
+//      logInfo("TrackMultipleVariables" + serverSocket + " " + listenPort)
+//      
+//      var keepAccepting = true
+//      try {
+//        while (keepAccepting) {
+//          var clientSocket: Socket = null
+//          try {
+//            serverSocket.setSoTimeout (serverSocketTimout)
+//            clientSocket = serverSocket.accept
+//          } catch {
+//            case e: Exception => { 
+//              logInfo("TrackMultipleValues Timeout. Stopping listening...") 
+//              keepAccepting = false 
+//            }
+//          }
+//          logInfo("TrackMultipleValues:Got new request:" + clientSocket)
+//          if (clientSocket != null) {
+//            try {            
+//              threadPool.execute (new Runnable {
+//                def run = {
+//                  val oos = new ObjectOutputStream (clientSocket.getOutputStream)
+//                  val ois = new ObjectInputStream (clientSocket.getInputStream)
+//                  try {
+//                    val variableUUID = ois.readObject.asInstanceOf[UUID]
+//                    var contactPort = 0
+//                    // TODO: Add logic and data structures to find out UUID->port
+//                    // mapping. 0 = missed the broadcast, read from HDFS; <0 = 
+//                    // Haven't started yet, wait & retry; >0 = Read from this port
+//                    oos.writeObject (contactPort)
+//                  } catch {
+//                    case e: Exception => { }
+//                  } finally {
+//                    ois.close
+//                    oos.close
+//                    clientSocket.close
+//                  }
+//                }
+//              })
+//            } catch {
+//              // In failure, close the socket here; else, the thread will close it
+//              case ioe: IOException => clientSocket.close
+//            }
+//          }
+//        }
+//      } finally {
+//        serverSocket.close
+//      }      
+//    }
+//  }
+//  
+//  class TrackSingleValue {
+//    
+//  }
+  
+//  public static ExecutorService newCachedThreadPool() {
+//    return new ThreadPoolExecutor(0, Integer.MAX_VALUE, 60L, TimeUnit.SECONDS,
+//      new SynchronousQueue<Runnable>());
+//  }
   
-  class TrackSingleValue {
-    
-  }
   
   class GuideMultipleRequests extends Thread with Logging {
     override def run = {
@@ -661,7 +664,7 @@ private object BroadcastCS extends Logging {
         while (keepAccepting) {
           var clientSocket: Socket = null
           try {
-            serverSocket.setSoTimeout (serverSocketTimout)
+            serverSocket.setSoTimeout (serverSocketTimout)            
             clientSocket = serverSocket.accept
           } catch {
             case e: Exception => { 
@@ -731,8 +734,7 @@ private object BroadcastCS extends Logging {
           logInfo("Send block: " + i + " " + arrayOfBlocks(i))
         }
       }    
-    } 
-    
+    }    
   }
 }
 
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index 62e49271bf..90bea8921a 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -20,7 +20,7 @@ class SparkContext(master: String, frameworkName: String) extends Logging {
 
   // TODO: Keep around a weak hash map of values to Cached versions?
   def broadcast[T](value: T) = new CentralizedHDFSBroadcast(value, local)
-  //def broadcast[T](value: T) = new ChainedStreamingBroadcast(value, local)
+  // def broadcast[T](value: T) = new ChainedStreamingBroadcast(value, local)
 
   def textFile(path: String) = new HdfsTextFile(this, path)
 
diff --git a/third_party/mesos.jar b/third_party/mesos.jar
index 1852cf8fd0..60d299c8af 100644
Binary files a/third_party/mesos.jar and b/third_party/mesos.jar differ
-- 
cgit v1.2.3


From 28d6f231968a1ee7bf17c890fc7a4cc850ed55c7 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 15 Oct 2010 15:36:40 -0700
Subject: Made locality scheduling constant-time and added support for changing
 CPU and memory requested per task.

---
 src/scala/spark/SimpleJob.scala | 103 ++++++++++++++++++++++++++++++----------
 1 file changed, 79 insertions(+), 24 deletions(-)

diff --git a/src/scala/spark/SimpleJob.scala b/src/scala/spark/SimpleJob.scala
index 9664a44578..425dbe6366 100644
--- a/src/scala/spark/SimpleJob.scala
+++ b/src/scala/spark/SimpleJob.scala
@@ -3,6 +3,7 @@ package spark
 import java.util.{HashMap => JHashMap}
 
 import scala.collection.mutable.HashMap
+import scala.collection.mutable.Queue
 
 import mesos._
 
@@ -17,6 +18,10 @@ extends Job with Logging
   // Maximum time to wait to run a task in a preferred location (in ms)
   val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong
 
+  // CPUs and memory to claim per task from Mesos
+  val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toInt
+  val MEM_PER_TASK = System.getProperty("spark.task.mem", "512").toInt
+
   val callingThread = currentThread
   val numTasks = tasks.length
   val results = new Array[T](numTasks)
@@ -25,7 +30,7 @@ extends Job with Logging
   val tidToIndex = HashMap[Int, Int]()
 
   var allFinished = false
-  val joinLock = new Object()
+  val joinLock = new Object() // Used to wait for all tasks to finish
 
   var errorHappened = false
   var errorCode = 0
@@ -33,10 +38,25 @@ extends Job with Logging
 
   var tasksLaunched = 0
   var tasksFinished = 0
+
   var lastPreferredLaunchTime = System.currentTimeMillis
 
-  val cpusPerTask = System.getProperty("spark.task.cpus", "1").toInt
-  val memPerTask = System.getProperty("spark.task.mem", "512").toInt
+  // Queue of pending tasks for each node
+  val pendingTasksForNode = new HashMap[String, Queue[Int]]
+
+  // Queue containing all pending tasks
+  val allPendingTasks = new Queue[Int]
+
+  for (i <- 0 until numTasks) {
+    addPendingTask(i)
+  }
+
+  def addPendingTask(index: Int) {
+    allPendingTasks += index
+    for (host <- tasks(index).preferredLocations) {
+      pendingTasksForNode(host) += index
+    }
+  }
 
   def setAllFinished() {
     joinLock.synchronized {
@@ -52,41 +72,74 @@ extends Job with Logging
     }
   }
 
+  def getPendingTasksForNode(host: String): Queue[Int] = {
+    pendingTasksForNode.getOrElse(host, Queue())
+  }
+
+  // Dequeue a pending task from the given queue and return its index.
+  // Return None if the queue is empty.
+  def findTaskFromQueue(queue: Queue[Int]): Option[Int] = {
+    while (!queue.isEmpty) {
+      val index = queue.dequeue
+      if (!launched(index) && !finished(index)) {
+        return Some(index)
+      }
+    }
+    return None
+  }
+
+  // Dequeue a pending task for a given node and return its index.
+  // If localOnly is set to false, allow non-local tasks as well.
+  def findTask(host: String, localOnly: Boolean): Option[Int] = {
+    findTaskFromQueue(getPendingTasksForNode(host)) match {
+      case Some(task) => Some(task)
+      case None =>
+        if (localOnly) None
+        else findTaskFromQueue(allPendingTasks)
+    }
+  }
+
+  def isPreferredLocation(task: Task[T], host: String): Boolean = {
+    val locs = task.preferredLocations
+    return (locs.contains(host) || locs.isEmpty)
+  }
+
   def slaveOffer(offer: SlaveOffer, availableCpus: Int, availableMem: Int)
       : Option[TaskDescription] = {
-    if (tasksLaunched < numTasks) {
-      var checkPrefVals: Array[Boolean] = Array(true)
+    if (tasksLaunched < numTasks && availableCpus >= CPUS_PER_TASK &&
+        availableMem >= MEM_PER_TASK) {
       val time = System.currentTimeMillis
-      if (time - lastPreferredLaunchTime > LOCALITY_WAIT)
-        checkPrefVals = Array(true, false) // Allow non-preferred tasks
-      if ((availableCpus < cpusPerTask) || (availableMem < memPerTask))
-        return None
-      for (checkPref <- checkPrefVals; i <- 0 until numTasks) {
-        if (!launched(i) && (!checkPref ||
-            tasks(i).preferredLocations.contains(offer.getHost) ||
-            tasks(i).preferredLocations.isEmpty))
-        {
+      val localOnly = (time - lastPreferredLaunchTime < LOCALITY_WAIT)
+      val host = offer.getHost
+      findTask(host, localOnly) match {
+        case Some(index) => {
+          val task = tasks(index)
           val taskId = sched.newTaskId()
-          sched.taskIdToJobId(taskId) = jobId
-          tidToIndex(taskId) = i
-          val preferred = if(checkPref) "preferred" else "non-preferred"
+          // Figure out whether the task's location is preferred
+          val preferred = isPreferredLocation(task, host)
+          val prefStr = if(preferred) "preferred" else "non-preferred"
           val message =
             "Starting task %d:%d as TID %s on slave %s: %s (%s)".format(
-              i, jobId, taskId, offer.getSlaveId, offer.getHost, preferred)
+              index, jobId, taskId, offer.getSlaveId, host, prefStr)
           logInfo(message)
-          tasks(i).markStarted(offer)
-          launched(i) = true
+          // Do various bookkeeping
+          sched.taskIdToJobId(taskId) = jobId
+          tidToIndex(taskId) = index
+          task.markStarted(offer)
+          launched(index) = true
           tasksLaunched += 1
-          if (checkPref)
+          if (preferred)
             lastPreferredLaunchTime = time
+          // Create and return the Mesos task object
           val params = new JHashMap[String, String]
-          params.put("cpus", "" + cpusPerTask)
-          params.put("mem", "" + memPerTask)
-          val serializedTask = Utils.serialize(tasks(i))
+          params.put("cpus", "" + CPUS_PER_TASK)
+          params.put("mem", "" + MEM_PER_TASK)
+          val serializedTask = Utils.serialize(task)
           logDebug("Serialized size: " + serializedTask.size)
           return Some(new TaskDescription(taskId, offer.getSlaveId,
             "task_" + taskId, params, serializedTask))
         }
+        case _ =>
       }
     }
     return None
@@ -138,6 +191,8 @@ extends Job with Logging
       launched(index) = false
       sched.taskIdToJobId.remove(tid)
       tasksLaunched -= 1
+      // Re-enqueue the task as pending
+      addPendingTask(index)
     } else {
       logInfo("Ignoring task-lost event for TID " + tid +
         " because task " + index + " is already finished")
-- 
cgit v1.2.3


From 31b5b8b4a6fdbeb28bad7bb4912a0dfac0cbd832 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 15 Oct 2010 15:37:14 -0700
Subject: A couple of improvements to ReplSuite: - Use collect instead of
 toArray - Disable the "running on Mesos" test when MESOS_HOME is not set

---
 src/test/spark/repl/ReplSuite.scala | 56 ++++++++++++++++++++-----------------
 1 file changed, 30 insertions(+), 26 deletions(-)

diff --git a/src/test/spark/repl/ReplSuite.scala b/src/test/spark/repl/ReplSuite.scala
index dcf71182ec..9f41c77ead 100644
--- a/src/test/spark/repl/ReplSuite.scala
+++ b/src/test/spark/repl/ReplSuite.scala
@@ -39,9 +39,9 @@ class ReplSuite extends FunSuite {
   test ("external vars") {
     val output = runInterpreter("local", """
       var v = 7
-      sc.parallelize(1 to 10).map(x => v).toArray.reduceLeft(_+_)
+      sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
       v = 10
-      sc.parallelize(1 to 10).map(x => v).toArray.reduceLeft(_+_)
+      sc.parallelize(1 to 10).map(x => v).collect.reduceLeft(_+_)
       """)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -54,7 +54,7 @@ class ReplSuite extends FunSuite {
       class C {
         def foo = 5
       }
-      sc.parallelize(1 to 10).map(x => (new C).foo).toArray.reduceLeft(_+_)
+      sc.parallelize(1 to 10).map(x => (new C).foo).collect.reduceLeft(_+_)
       """)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -64,7 +64,7 @@ class ReplSuite extends FunSuite {
   test ("external functions") {
     val output = runInterpreter("local", """
       def double(x: Int) = x + x
-      sc.parallelize(1 to 10).map(x => double(x)).toArray.reduceLeft(_+_)
+      sc.parallelize(1 to 10).map(x => double(x)).collect.reduceLeft(_+_)
       """)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -75,9 +75,9 @@ class ReplSuite extends FunSuite {
     val output = runInterpreter("local", """
       var v = 7
       def getV() = v
-      sc.parallelize(1 to 10).map(x => getV()).toArray.reduceLeft(_+_)
+      sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
       v = 10
-      sc.parallelize(1 to 10).map(x => getV()).toArray.reduceLeft(_+_)
+      sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
       """)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -92,9 +92,9 @@ class ReplSuite extends FunSuite {
     val output = runInterpreter("local", """
       var array = new Array[Int](5)
       val broadcastArray = sc.broadcast(array)
-      sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).toArray
+      sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
       array(0) = 5
-      sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).toArray
+      sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
       """)
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
@@ -103,23 +103,27 @@ class ReplSuite extends FunSuite {
   }
   
   test ("running on Mesos") {
-    val output = runInterpreter("localquiet", """
-      var v = 7
-      def getV() = v
-      sc.parallelize(1 to 10).map(x => getV()).toArray.reduceLeft(_+_)
-      v = 10
-      sc.parallelize(1 to 10).map(x => getV()).toArray.reduceLeft(_+_)
-      var array = new Array[Int](5)
-      val broadcastArray = sc.broadcast(array)
-      sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).toArray
-      array(0) = 5
-      sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).toArray
-      """)
-    assertDoesNotContain("error:", output)
-    assertDoesNotContain("Exception", output)
-    assertContains("res0: Int = 70", output)
-    assertContains("res1: Int = 100", output)
-    assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-    assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    if (System.getenv("MESOS_HOME") != null) {
+      val output = runInterpreter("localquiet", """
+        var v = 7
+        def getV() = v
+        sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        v = 10
+        sc.parallelize(1 to 10).map(x => getV()).collect.reduceLeft(_+_)
+        var array = new Array[Int](5)
+        val broadcastArray = sc.broadcast(array)
+        sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        array(0) = 5
+        sc.parallelize(0 to 4).map(x => broadcastArray.value(x)).collect
+        """)
+      assertDoesNotContain("error:", output)
+      assertDoesNotContain("Exception", output)
+      assertContains("res0: Int = 70", output)
+      assertContains("res1: Int = 100", output)
+      assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+      assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
+    } else {
+      info("Skipping \"running on Mesos\" test because MESOS_HOME is not set");
+    }
   }
 }
-- 
cgit v1.2.3


From 57a778426cdb5efd68026a73137884276cc6d172 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 15 Oct 2010 15:55:58 -0700
Subject: Updated guava to version r07

---
 Makefile                            |   2 +-
 run                                 |   2 +-
 third_party/guava-r06/COPYING       | 202 ------------------------------------
 third_party/guava-r06/README        |  28 -----
 third_party/guava-r06/guava-r06.jar | Bin 934385 -> 0 bytes
 third_party/guava-r07/COPYING       | 202 ++++++++++++++++++++++++++++++++++++
 third_party/guava-r07/README        |  28 +++++
 third_party/guava-r07/guava-r07.jar | Bin 0 -> 1075964 bytes
 8 files changed, 232 insertions(+), 232 deletions(-)
 delete mode 100644 third_party/guava-r06/COPYING
 delete mode 100644 third_party/guava-r06/README
 delete mode 100644 third_party/guava-r06/guava-r06.jar
 create mode 100644 third_party/guava-r07/COPYING
 create mode 100644 third_party/guava-r07/README
 create mode 100644 third_party/guava-r07/guava-r07.jar

diff --git a/Makefile b/Makefile
index c5d004fb10..929f4ccb64 100644
--- a/Makefile
+++ b/Makefile
@@ -5,7 +5,7 @@ SPACE = $(EMPTY) $(EMPTY)
 JARS = third_party/mesos.jar
 JARS += third_party/asm-3.2/lib/all/asm-all-3.2.jar
 JARS += third_party/colt.jar
-JARS += third_party/guava-r06/guava-r06.jar
+JARS += third_party/guava-r07/guava-r07.jar
 JARS += third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
 JARS += third_party/hadoop-0.20.0/lib/commons-logging-1.0.4.jar
 JARS += third_party/scalatest-1.2/scalatest-1.2.jar
diff --git a/run b/run
index f28b39af9b..8be8f73220 100755
--- a/run
+++ b/run
@@ -33,7 +33,7 @@ CLASSPATH+=:$FWDIR/conf
 CLASSPATH+=:$FWDIR/third_party/mesos.jar
 CLASSPATH+=:$FWDIR/third_party/asm-3.2/lib/all/asm-all-3.2.jar
 CLASSPATH+=:$FWDIR/third_party/colt.jar
-CLASSPATH+=:$FWDIR/third_party/guava-r06/guava-r06.jar
+CLASSPATH+=:$FWDIR/third_party/guava-r07/guava-r07.jar
 CLASSPATH+=:$FWDIR/third_party/hadoop-0.20.0/hadoop-0.20.0-core.jar
 CLASSPATH+=:$FWDIR/third_party/scalatest-1.2/scalatest-1.2.jar
 CLASSPATH+=:$FWDIR/third_party/scalacheck_2.8.0-1.7.jar
diff --git a/third_party/guava-r06/COPYING b/third_party/guava-r06/COPYING
deleted file mode 100644
index d645695673..0000000000
--- a/third_party/guava-r06/COPYING
+++ /dev/null
@@ -1,202 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/third_party/guava-r06/README b/third_party/guava-r06/README
deleted file mode 100644
index a0e832dd54..0000000000
--- a/third_party/guava-r06/README
+++ /dev/null
@@ -1,28 +0,0 @@
-Guava: Google Core Libraries for Java
-
-Requires JDK 5 or higher.
-
-Project page:
-  http://guava-libraries.googlecode.com
-
-Ask "how-to" and "why-didn't-it-work" questions at:
-  http://www.stackoverflow.com/questions/ask
-  (use the "guava" tag so we'll see it)
-
-Ask discussion questions at:
-  http://groups.google.com/group/guava-discuss
-
-Subscribe to project updates in your feed reader:
-  http://code.google.com/feeds/p/guava-libraries/updates/basic
-
-Warnings:
-
-All APIs marked @Beta at the class or method level are subject to
-change. If your code is a library or framework that users outside
-your control will include on their classpath, do not use @Beta
-APIs (at least without repackaging them somehow).
-
-Serialized forms of ALL objects are subject to change. Do not
-persist these and assume they can be read by a future version of
-the library.
-
diff --git a/third_party/guava-r06/guava-r06.jar b/third_party/guava-r06/guava-r06.jar
deleted file mode 100644
index 8ff3a81748..0000000000
Binary files a/third_party/guava-r06/guava-r06.jar and /dev/null differ
diff --git a/third_party/guava-r07/COPYING b/third_party/guava-r07/COPYING
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/third_party/guava-r07/COPYING
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/third_party/guava-r07/README b/third_party/guava-r07/README
new file mode 100644
index 0000000000..a0e832dd54
--- /dev/null
+++ b/third_party/guava-r07/README
@@ -0,0 +1,28 @@
+Guava: Google Core Libraries for Java
+
+Requires JDK 5 or higher.
+
+Project page:
+  http://guava-libraries.googlecode.com
+
+Ask "how-to" and "why-didn't-it-work" questions at:
+  http://www.stackoverflow.com/questions/ask
+  (use the "guava" tag so we'll see it)
+
+Ask discussion questions at:
+  http://groups.google.com/group/guava-discuss
+
+Subscribe to project updates in your feed reader:
+  http://code.google.com/feeds/p/guava-libraries/updates/basic
+
+Warnings:
+
+All APIs marked @Beta at the class or method level are subject to
+change. If your code is a library or framework that users outside
+your control will include on their classpath, do not use @Beta
+APIs (at least without repackaging them somehow).
+
+Serialized forms of ALL objects are subject to change. Do not
+persist these and assume they can be read by a future version of
+the library.
+
diff --git a/third_party/guava-r07/guava-r07.jar b/third_party/guava-r07/guava-r07.jar
new file mode 100644
index 0000000000..a6c9ce02df
Binary files /dev/null and b/third_party/guava-r07/guava-r07.jar differ
-- 
cgit v1.2.3


From aa8ccec315226f83081a774b351e0d8401cd252b Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 15 Oct 2010 15:57:26 -0700
Subject: Abort jobs if a task fails more than a limited number of times

---
 src/scala/spark/MesosScheduler.scala |  7 +----
 src/scala/spark/SimpleJob.scala      | 54 ++++++++++++++++++++++++++++--------
 src/scala/spark/SparkException.scala |  6 +---
 3 files changed, 44 insertions(+), 23 deletions(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index cb78c2b582..40680a625f 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -88,18 +88,13 @@ extends MScheduler with spark.Scheduler with Logging
         this.activeJobsQueue += myJob
       }
       driver.reviveOffers();
-      myJob.join();
+      return myJob.join();
     } finally {
       this.synchronized {
         this.activeJobs.remove(myJob.jobId)
         this.activeJobsQueue.dequeueAll(x => (x == myJob))
       }
     }
-
-    if (myJob.errorHappened)
-      throw new SparkException(myJob.errorMessage, myJob.errorCode)
-    else
-      return myJob.results
   }
 
   override def registered(d: SchedulerDriver, frameworkId: String) {
diff --git a/src/scala/spark/SimpleJob.scala b/src/scala/spark/SimpleJob.scala
index 425dbe6366..a8544e4474 100644
--- a/src/scala/spark/SimpleJob.scala
+++ b/src/scala/spark/SimpleJob.scala
@@ -18,27 +18,28 @@ extends Job with Logging
   // Maximum time to wait to run a task in a preferred location (in ms)
   val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong
 
-  // CPUs and memory to claim per task from Mesos
+  // CPUs and memory to request per task
   val CPUS_PER_TASK = System.getProperty("spark.task.cpus", "1").toInt
   val MEM_PER_TASK = System.getProperty("spark.task.mem", "512").toInt
 
+  // Maximum times a task is allowed to fail before failing the job
+  val MAX_TASK_FAILURES = 4
+
   val callingThread = currentThread
   val numTasks = tasks.length
   val results = new Array[T](numTasks)
   val launched = new Array[Boolean](numTasks)
   val finished = new Array[Boolean](numTasks)
+  val numFailures = new Array[Int](numTasks)
   val tidToIndex = HashMap[Int, Int]()
 
   var allFinished = false
   val joinLock = new Object() // Used to wait for all tasks to finish
 
-  var errorHappened = false
-  var errorCode = 0
-  var errorMessage = ""
-
   var tasksLaunched = 0
   var tasksFinished = 0
 
+  // Last time when we launched a preferred task (for delay scheduling)
   var lastPreferredLaunchTime = System.currentTimeMillis
 
   // Queue of pending tasks for each node
@@ -47,6 +48,10 @@ extends Job with Logging
   // Queue containing all pending tasks
   val allPendingTasks = new Queue[Int]
 
+  // Did the job fail?
+  var failed = false
+  var causeOfFailure = ""
+
   for (i <- 0 until numTasks) {
     addPendingTask(i)
   }
@@ -58,6 +63,7 @@ extends Job with Logging
     }
   }
 
+  // Mark the job as finished and wake up any threads waiting on it
   def setAllFinished() {
     joinLock.synchronized {
       allFinished = true
@@ -65,10 +71,17 @@ extends Job with Logging
     }
   }
 
-  def join() {
+  // Wait until the job finishes and return its results
+  def join(): Array[T] = {
     joinLock.synchronized {
-      while (!allFinished)
+      while (!allFinished) {
         joinLock.wait()
+      }
+      if (failed) {
+        throw new SparkException(causeOfFailure)
+      } else {
+        return results
+      }
     }
   }
 
@@ -193,6 +206,17 @@ extends Job with Logging
       tasksLaunched -= 1
       // Re-enqueue the task as pending
       addPendingTask(index)
+      // Mark it as failed
+      if (status.getState == TaskState.TASK_FAILED ||
+          status.getState == TaskState.TASK_LOST) {
+        numFailures(index) += 1
+        if (numFailures(index) > MAX_TASK_FAILURES) {
+          logError("Task %d:%d failed more than %d times; aborting job".format(
+            jobId, index, MAX_TASK_FAILURES))
+          abort("Task %d failed more than %d times".format(
+            index, MAX_TASK_FAILURES))
+        }
+      }
     } else {
       logInfo("Ignoring task-lost event for TID " + tid +
         " because task " + index + " is already finished")
@@ -201,10 +225,16 @@ extends Job with Logging
 
   def error(code: Int, message: String) {
     // Save the error message
-    errorHappened = true
-    errorCode = code
-    errorMessage = message
-    // Indicate to caller thread that we're done
-    setAllFinished()
+    abort("Mesos error: %s (error code: %d)".format(message, code))
+  }
+
+  def abort(message: String) {
+    joinLock.synchronized {
+      failed = true
+      causeOfFailure = message
+      // TODO: Kill running tasks if we were not terminated due to a Mesos error
+      // Indicate to any joining thread that we're done
+      setAllFinished()
+    }
   }
 }
diff --git a/src/scala/spark/SparkException.scala b/src/scala/spark/SparkException.scala
index 7257bf7b0c..6f9be1a94f 100644
--- a/src/scala/spark/SparkException.scala
+++ b/src/scala/spark/SparkException.scala
@@ -1,7 +1,3 @@
 package spark
 
-class SparkException(message: String) extends Exception(message) {
-  def this(message: String, errorCode: Int) {
-    this("%s (error code: %d)".format(message, errorCode))
-  }
-}
+class SparkException(message: String) extends Exception(message) {}
-- 
cgit v1.2.3


From a768cf417b642fc3690ba03331f05d49c08d7a12 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 15 Oct 2010 16:17:43 -0700
Subject: Increased default memory for alltests

---
 alltests | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/alltests b/alltests
index 3c9db301c4..8f596da8f9 100755
--- a/alltests
+++ b/alltests
@@ -1,3 +1,6 @@
 #!/bin/bash
 FWDIR=`dirname $0`
+if [ "x$SPARK_MEM" == "x" ]; then
+  export SPARK_MEM=500m
+fi
 $FWDIR/run org.scalatest.tools.Runner -p $FWDIR/build/classes -o $@
-- 
cgit v1.2.3


From ecb1af576e5e0be9f67cf0375f998d1fa7ac3a07 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 15 Oct 2010 19:04:18 -0700
Subject: Moved ClassServer out of repl packaged and renamed it to HttpServer.

---
 src/scala/spark/HttpServer.scala            | 75 ++++++++++++++++++++++++++++
 src/scala/spark/repl/ClassServer.scala      | 77 -----------------------------
 src/scala/spark/repl/SparkInterpreter.scala |  6 ++-
 3 files changed, 79 insertions(+), 79 deletions(-)
 create mode 100644 src/scala/spark/HttpServer.scala
 delete mode 100644 src/scala/spark/repl/ClassServer.scala

diff --git a/src/scala/spark/HttpServer.scala b/src/scala/spark/HttpServer.scala
new file mode 100644
index 0000000000..55fb0a2218
--- /dev/null
+++ b/src/scala/spark/HttpServer.scala
@@ -0,0 +1,75 @@
+package spark
+
+import java.io.File
+import java.net.InetAddress
+
+import org.eclipse.jetty.server.Server
+import org.eclipse.jetty.server.handler.DefaultHandler
+import org.eclipse.jetty.server.handler.HandlerList
+import org.eclipse.jetty.server.handler.ResourceHandler
+
+
+/**
+ * Exception type thrown by HttpServer when it is in the wrong state 
+ * for an operation.
+ */
+class ServerStateException(message: String) extends Exception(message)
+
+
+/**
+ * An HTTP server for static content used to allow worker nodes to access JARs
+ * added to SparkContext as well as classes created by the interpreter when
+ * the user types in code. This is just a wrapper around a Jetty server.
+ */
+class HttpServer(resourceBase: File) extends Logging {
+  private var server: Server = null
+  private var port: Int = -1
+
+  def start() {
+    if (server != null) {
+      throw new ServerStateException("Server is already started")
+    } else {
+      server = new Server(0)
+      val resHandler = new ResourceHandler
+      resHandler.setResourceBase(resourceBase.getAbsolutePath)
+      val handlerList = new HandlerList
+      handlerList.setHandlers(Array(resHandler, new DefaultHandler))
+      server.setHandler(handlerList)
+      server.start()
+      port = server.getConnectors()(0).getLocalPort()
+      logDebug("HttpServer started at " + uri)
+    }
+  }
+
+  def stop() {
+    if (server == null) {
+      throw new ServerStateException("Server is already stopped")
+    } else {
+      server.stop()
+      port = -1
+      server = null
+    }
+  }
+
+  /**
+   * Get the URI of this HTTP server (http://host:port)
+   */
+  def uri: String = {
+    if (server == null) {
+      throw new ServerStateException("Server is not started")
+    } else {
+      return "http://" + getLocalIpAddress + ":" + port
+    }
+  }
+
+  /**
+   * Get the local host's IP address in dotted-quad format (e.g. 1.2.3.4)
+   */
+  private def getLocalIpAddress: String = {
+    // Get local IP as an array of four bytes
+    val bytes = InetAddress.getLocalHost().getAddress()
+    // Convert the bytes to ints (keeping in mind that they may be negative)
+    // and join them into a string
+    return bytes.map(b => (b.toInt + 256) % 256).mkString(".")
+  }
+}
diff --git a/src/scala/spark/repl/ClassServer.scala b/src/scala/spark/repl/ClassServer.scala
deleted file mode 100644
index 6a40d92765..0000000000
--- a/src/scala/spark/repl/ClassServer.scala
+++ /dev/null
@@ -1,77 +0,0 @@
-package spark.repl
-
-import java.io.File
-import java.net.InetAddress
-
-import org.eclipse.jetty.server.Server
-import org.eclipse.jetty.server.handler.DefaultHandler
-import org.eclipse.jetty.server.handler.HandlerList
-import org.eclipse.jetty.server.handler.ResourceHandler
-
-import spark.Logging
-
-
-/**
- * Exception type thrown by ClassServer when it is in the wrong state 
- * for an operation.
- */
-class ServerStateException(message: String) extends Exception(message)
-
-
-/**
- * An HTTP server used by the interpreter to allow worker nodes to access
- * class files created as the user types in lines of code. This is just a
- * wrapper around a Jetty embedded HTTP server.
- */
-class ClassServer(classDir: File) extends Logging {
-  private var server: Server = null
-  private var port: Int = -1
-
-  def start() {
-    if (server != null) {
-      throw new ServerStateException("Server is already started")
-    } else {
-      server = new Server(0)
-      val resHandler = new ResourceHandler
-      resHandler.setResourceBase(classDir.getAbsolutePath)
-      val handlerList = new HandlerList
-      handlerList.setHandlers(Array(resHandler, new DefaultHandler))
-      server.setHandler(handlerList)
-      server.start()
-      port = server.getConnectors()(0).getLocalPort()
-      logDebug("ClassServer started at " + uri)
-    }
-  }
-
-  def stop() {
-    if (server == null) {
-      throw new ServerStateException("Server is already stopped")
-    } else {
-      server.stop()
-      port = -1
-      server = null
-    }
-  }
-
-  /**
-   * Get the URI of this HTTP server (http://host:port)
-   */
-  def uri: String = {
-    if (server == null) {
-      throw new ServerStateException("Server is not started")
-    } else {
-      return "http://" + getLocalIpAddress + ":" + port
-    }
-  }
-
-  /**
-   * Get the local host's IP address in dotted-quad format (e.g. 1.2.3.4)
-   */
-  private def getLocalIpAddress: String = {
-    // Get local IP as an array of four bytes
-    val bytes = InetAddress.getLocalHost().getAddress()
-    // Convert the bytes to ints (keeping in mind that they may be negative)
-    // and join them into a string
-    return bytes.map(b => (b.toInt + 256) % 256).mkString(".")
-  }
-}
diff --git a/src/scala/spark/repl/SparkInterpreter.scala b/src/scala/spark/repl/SparkInterpreter.scala
index ae2e7e8a68..41324333a3 100644
--- a/src/scala/spark/repl/SparkInterpreter.scala
+++ b/src/scala/spark/repl/SparkInterpreter.scala
@@ -36,6 +36,8 @@ import scala.tools.nsc.{ InterpreterResults => IR }
 import interpreter._
 import SparkInterpreter._
 
+import spark.HttpServer
+
 /** <p>
  *    An interpreter for Scala code.
  *  </p>
@@ -120,14 +122,14 @@ class SparkInterpreter(val settings: Settings, out: PrintWriter) {
   val virtualDirectory = new PlainFile(outputDir)
 
   /** Jetty server that will serve our classes to worker nodes */
-  val classServer = new ClassServer(outputDir)
+  val classServer = new HttpServer(outputDir)
 
   // Start the classServer and store its URI in a spark system property
   // (which will be passed to executors so that they can connect to it)
   classServer.start()
   System.setProperty("spark.repl.class.uri", classServer.uri)
   if (SPARK_DEBUG_REPL) {
-    println("ClassServer started, URI = " + classServer.uri)
+    println("Class server started, URI = " + classServer.uri)
   }
   
   /** reporter */
-- 
cgit v1.2.3


From 6c1dee2e42587e5722bdec86bd102c120e2e1dee Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 15 Oct 2010 19:42:26 -0700
Subject: Added code so that Spark jobs can be launched from outside the Spark
 directory by setting SPARK_HOME and locating the executor relative to that.
 Entries on SPARK_CLASSPATH and SPARK_LIBRARY_PATH are also passed along to
 worker nodes.

---
 run                                  | 14 ++++++++++----
 src/scala/spark/MesosScheduler.scala | 31 +++++++++++++++++++++++++++++--
 2 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/run b/run
index 8be8f73220..627f44a563 100755
--- a/run
+++ b/run
@@ -3,14 +3,20 @@
 # Figure out where the Scala framework is installed
 FWDIR=`dirname $0`
 
+# Export this as SPARK_HOME
+export SPARK_HOME="$FWDIR"
+
 # Load environment variables from conf/spark-env.sh, if it exists
 if [ -e $FWDIR/conf/spark-env.sh ] ; then
   . $FWDIR/conf/spark-env.sh
 fi
 
+MESOS_CLASSPATH=""
+MESOS_LIBRARY_PATH=""
+
 if [ "x$MESOS_HOME" != "x" ] ; then
-  SPARK_CLASSPATH="$MESOS_HOME/lib/java/mesos.jar:$SPARK_CLASSPATH"
-  SPARK_LIBRARY_PATH="$MESOS_HOME/lib/java:$SPARK_LIBARY_PATH"
+  MESOS_CLASSPATH="$MESOS_HOME/lib/java/mesos.jar"
+  MESOS_LIBRARY_PATH="$MESOS_HOME/lib/java"
 fi
 
 if [ "x$SPARK_MEM" == "x" ] ; then
@@ -19,7 +25,7 @@ fi
 
 # Set JAVA_OPTS to be able to load native libraries and to set heap size
 JAVA_OPTS="$SPARK_JAVA_OPTS"
-JAVA_OPTS+=" -Djava.library.path=$SPARK_LIBRARY_PATH:$FWDIR/third_party:$FWDIR/src/native"
+JAVA_OPTS+=" -Djava.library.path=$SPARK_LIBRARY_PATH:$FWDIR/third_party:$FWDIR/src/native:$MESOS_LIBRARY_PATH"
 JAVA_OPTS+=" -Xms$SPARK_MEM -Xmx$SPARK_MEM"
 # Load extra JAVA_OPTS from conf/java-opts, if it exists
 if [ -e $FWDIR/conf/java-opts ] ; then
@@ -28,7 +34,7 @@ fi
 export JAVA_OPTS
 
 # Build up classpath
-CLASSPATH="$SPARK_CLASSPATH:$FWDIR/build/classes"
+CLASSPATH="$SPARK_CLASSPATH:$FWDIR/build/classes:$MESOS_CLASSPATH"
 CLASSPATH+=:$FWDIR/conf
 CLASSPATH+=:$FWDIR/third_party/mesos.jar
 CLASSPATH+=:$FWDIR/third_party/asm-3.2/lib/all/asm-all-3.2.jar
diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 40680a625f..bc24bf37fd 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -28,6 +28,13 @@ private class MesosScheduler(
   master: String, frameworkName: String, execArg: Array[Byte])
 extends MScheduler with spark.Scheduler with Logging
 {
+  // Environment variables to pass to our executors
+  val ENV_VARS_TO_SEND_TO_EXECUTORS = Array(
+    "SPARK_MEM",
+    "SPARK_CLASSPATH",
+    "SPARK_LIBRARY_PATH"
+  )
+
   // Lock used to wait for  scheduler to be registered
   var isRegistered = false
   val registeredLock = new Object()
@@ -70,8 +77,28 @@ extends MScheduler with spark.Scheduler with Logging
 
   override def getFrameworkName(d: SchedulerDriver): String = frameworkName
   
-  override def getExecutorInfo(d: SchedulerDriver): ExecutorInfo =
-    new ExecutorInfo(new File("spark-executor").getCanonicalPath(), execArg)
+  // Get Spark's home location from either the spark.home Java property
+  // or the SPARK_HOME environment variable (in that order of preference).
+  // If neither of these is set, throws an exception.
+  def getSparkHome(): String = {
+    if (System.getProperty("spark.home") != null)
+      System.getProperty("spark.home")
+    else if (System.getenv("SPARK_HOME") != null)
+      System.getenv("SPARK_HOME")
+    else
+      throw new SparkException("Spark home is not set; either set the " +
+        "spark.home system property or the SPARK_HOME environment variable")
+  }
+
+  override def getExecutorInfo(d: SchedulerDriver): ExecutorInfo = {
+    val execScript = new File(getSparkHome, "spark-executor").getCanonicalPath
+    val params = new JHashMap[String, String]
+    for (key <- ENV_VARS_TO_SEND_TO_EXECUTORS) {
+      if (System.getenv(key) != null)
+        params(key) = System.getenv(key)
+    }
+    new ExecutorInfo(execScript, execArg)
+  }
 
   /**
    * The primary means to submit a job to the scheduler. Given a list of tasks,
-- 
cgit v1.2.3


From 47b38fd207c703c4bb7e7b771f399c15292ab944 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 09:21:43 -0700
Subject: Bug fix in passing env vars to executors

---
 src/scala/spark/MesosScheduler.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index bc24bf37fd..080171ab78 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -95,7 +95,7 @@ extends MScheduler with spark.Scheduler with Logging
     val params = new JHashMap[String, String]
     for (key <- ENV_VARS_TO_SEND_TO_EXECUTORS) {
       if (System.getenv(key) != null)
-        params(key) = System.getenv(key)
+        params("env." + key) = System.getenv(key)
     }
     new ExecutorInfo(execScript, execArg)
   }
-- 
cgit v1.2.3


From a4953c5051e47069ec80bfa44f25ec45c0ab7b8f Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 10:02:22 -0700
Subject: Moved Spark home detection to SparkContext and added a setSparkHome
 method for setting it programatically.

---
 src/scala/spark/MesosScheduler.scala | 39 +++++----------
 src/scala/spark/SparkContext.scala   | 93 ++++++++++++++++++++++++++----------
 2 files changed, 81 insertions(+), 51 deletions(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 080171ab78..87ebcb8692 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -13,19 +13,12 @@ import scala.collection.JavaConversions._
 import mesos.{Scheduler => MScheduler}
 import mesos._
 
-// The main Scheduler implementation, which talks to Mesos. Clients are expected
-// to first call start(), then submit tasks through the runTasks method.
-//
-// This implementation is currently a little quick and dirty. The following
-// improvements need to be made to it:
-// 1) Right now, the scheduler uses a linear scan through the tasks to find a
-//    local one for a given node. It would be faster to have a separate list of
-//    pending tasks for each node.
-// 2) Presenting a single slave in Job.slaveOffer makes it
-//    difficult to balance tasks across nodes. It would be better to pass
-//    all the offers to the Job and have it load-balance.
+/**
+ * The main Scheduler implementation, which runs jobs on Mesos. Clients should
+ * first call start(), then submit tasks through the runTasks method.
+ */
 private class MesosScheduler(
-  master: String, frameworkName: String, execArg: Array[Byte])
+  sc: SparkContext, master: String, frameworkName: String, execArg: Array[Byte])
 extends MScheduler with spark.Scheduler with Logging
 {
   // Environment variables to pass to our executors
@@ -77,21 +70,15 @@ extends MScheduler with spark.Scheduler with Logging
 
   override def getFrameworkName(d: SchedulerDriver): String = frameworkName
   
-  // Get Spark's home location from either the spark.home Java property
-  // or the SPARK_HOME environment variable (in that order of preference).
-  // If neither of these is set, throws an exception.
-  def getSparkHome(): String = {
-    if (System.getProperty("spark.home") != null)
-      System.getProperty("spark.home")
-    else if (System.getenv("SPARK_HOME") != null)
-      System.getenv("SPARK_HOME")
-    else
-      throw new SparkException("Spark home is not set; either set the " +
-        "spark.home system property or the SPARK_HOME environment variable")
-  }
-
   override def getExecutorInfo(d: SchedulerDriver): ExecutorInfo = {
-    val execScript = new File(getSparkHome, "spark-executor").getCanonicalPath
+    val sparkHome = sc.getSparkHome match {
+      case Some(path) => path
+      case None =>
+        throw new SparkException("Spark home is not set; either set the " +
+          "spark.home system property or the SPARK_HOME environment variable " +
+          "or call SparkContext.setSparkHome")
+    }
+    val execScript = new File(sparkHome, "spark-executor").getCanonicalPath
     val params = new JHashMap[String, String]
     for (key <- ENV_VARS_TO_SEND_TO_EXECUTORS) {
       if (System.getenv(key) != null)
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index 62e49271bf..3abce13a86 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -4,17 +4,43 @@ import java.io._
 import java.util.UUID
 
 import scala.collection.mutable.ArrayBuffer
-import scala.actors.Actor._
+
 
 class SparkContext(master: String, frameworkName: String) extends Logging {
+  private[spark] var scheduler: Scheduler = {
+    // Regular expression used for local[N] master format
+    val LOCAL_N_REGEX = """local\[([0-9]+)\]""".r
+    master match {
+      case "local" =>
+        new LocalScheduler(1)
+      case LOCAL_N_REGEX(threads) =>
+        new LocalScheduler(threads.toInt)
+      case _ =>
+        System.loadLibrary("mesos")
+        new MesosScheduler(this, master, frameworkName, createExecArg())
+    }
+  }
+
+  private val local = scheduler.isInstanceOf[LocalScheduler]
+
+  scheduler.start()
+
   Broadcast.initialize(true)
 
+  private var sparkHome: Option[String] = None
+
+  // Methods for creating RDDs
+
   def parallelize[T: ClassManifest](seq: Seq[T], numSlices: Int) =
     new ParallelArray[T](this, seq, numSlices)
 
   def parallelize[T: ClassManifest](seq: Seq[T]): ParallelArray[T] =
     parallelize(seq, scheduler.numCores)
 
+  def textFile(path: String) = new HdfsTextFile(this, path)
+
+  // Methods for creating shared variables
+
   def accumulator[T](initialValue: T)(implicit param: AccumulatorParam[T]) =
     new Accumulator(initialValue, param)
 
@@ -22,21 +48,7 @@ class SparkContext(master: String, frameworkName: String) extends Logging {
   def broadcast[T](value: T) = new CentralizedHDFSBroadcast(value, local)
   //def broadcast[T](value: T) = new ChainedStreamingBroadcast(value, local)
 
-  def textFile(path: String) = new HdfsTextFile(this, path)
-
-  val LOCAL_REGEX = """local\[([0-9]+)\]""".r
-
-  private[spark] var scheduler: Scheduler = master match {
-    case "local" => new LocalScheduler(1)
-    case LOCAL_REGEX(threads) => new LocalScheduler(threads.toInt)
-    case _ => { System.loadLibrary("mesos");
-                new MesosScheduler(master, frameworkName, createExecArg()) }
-  }
-
-  private val local = scheduler.isInstanceOf[LocalScheduler]  
-
-  scheduler.start()
-
+  // Create and serialize an executor argument to use when running on Mesos
   private def createExecArg(): Array[Byte] = {
     // Our executor arg is an array containing all the spark.* system properties
     val props = new ArrayBuffer[(String, String)]
@@ -50,10 +62,45 @@ class SparkContext(master: String, frameworkName: String) extends Logging {
     return Utils.serialize(props.toArray)
   }
 
+  // Stop the SparkContext
+  def stop() {
+     scheduler.stop()
+     scheduler = null
+  }
+  
+  // Wait for the scheduler to be registered
+  def waitForRegister() {
+    scheduler.waitForRegister()
+  }
+
+  // Set the Spark home location
+  def setSparkHome(path: String) {
+    if (path == null)
+      throw new NullPointerException("Path passed to setSparkHome was null")
+    sparkHome = Some(path)
+  }
+
+  // Get Spark's home location from either a value set through setSparkHome,
+  // or the spark.home Java property, or the SPARK_HOME environment variable
+  // (in that order of preference). If neither of these is set, return None.
+  def getSparkHome(): Option[String] = {
+    if (sparkHome != None)
+      sparkHome
+    else if (System.getProperty("spark.home") != null)
+      Some(System.getProperty("spark.home"))
+    else if (System.getenv("SPARK_HOME") != null)
+      Some(System.getenv("SPARK_HOME"))
+    else
+      None
+  }
+
+
+  // Submit an array of tasks (passed as functions) to the scheduler
   def runTasks[T: ClassManifest](tasks: Array[() => T]): Array[T] = {
     runTaskObjects(tasks.map(f => new FunctionTask(f)))
   }
 
+  // Run an array of spark.Task objects
   private[spark] def runTaskObjects[T: ClassManifest](tasks: Seq[Task[T]])
       : Array[T] = {
     logInfo("Running " + tasks.length + " tasks in parallel")
@@ -62,15 +109,6 @@ class SparkContext(master: String, frameworkName: String) extends Logging {
     logInfo("Tasks finished in " + (System.nanoTime - start) / 1e9 + " s")
     return result
   }
-
-  def stop() { 
-     scheduler.stop()
-     scheduler = null
-  }
-  
-  def waitForRegister() {
-    scheduler.waitForRegister()
-  }
   
   // Clean a closure to make it ready to serialized and send to tasks
   // (removes unreferenced variables in $outer's, updates REPL variables)
@@ -80,6 +118,11 @@ class SparkContext(master: String, frameworkName: String) extends Logging {
   }
 }
 
+
+/**
+ * The SparkContext object contains a number of implicit conversions and
+ * parameters for use with various Spark features.
+ */
 object SparkContext {
   implicit object DoubleAccumulatorParam extends AccumulatorParam[Double] {
     def addInPlace(t1: Double, t2: Double): Double = t1 + t2
-- 
cgit v1.2.3


From dbdd7682eb4efc28062733f76a2bd5d04856a142 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 10:38:56 -0700
Subject: Bug fixes and improvements for MesosScheduler and SimpleJob

---
 src/scala/spark/Job.scala            |  6 ++--
 src/scala/spark/MesosScheduler.scala |  1 +
 src/scala/spark/SimpleJob.scala      | 64 +++++++++++++++++++++++-------------
 3 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/src/scala/spark/Job.scala b/src/scala/spark/Job.scala
index 6b01307adc..6abbcbce51 100644
--- a/src/scala/spark/Job.scala
+++ b/src/scala/spark/Job.scala
@@ -3,14 +3,16 @@ package spark
 import mesos._
 
 /**
- * Trait representing a parallel job in MesosScheduler. Schedules the
+ * Class representing a parallel job in MesosScheduler. Schedules the
  * job by implementing various callbacks.
  */
-trait Job {
+abstract class Job(jobId: Int) {
   def slaveOffer(s: SlaveOffer, availableCpus: Int, availableMem: Int)
     : Option[TaskDescription]
 
   def statusUpdate(t: TaskStatus): Unit
 
   def error(code: Int, message: String): Unit
+
+  def getId(): Int = jobId
 }
diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 87ebcb8692..8a713d6f2b 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -141,6 +141,7 @@ extends MScheduler with spark.Scheduler with Logging
               job.slaveOffer(offers(i), availableCpus(i), availableMem(i)) match {
                 case Some(task) =>
                   tasks.add(task)
+                  taskIdToJobId(task.getTaskId) = job.getId
                   availableCpus(i) -= task.getParams.get("cpus").toInt
                   availableMem(i) -= task.getParams.get("mem").toInt
                   launchedTask = true
diff --git a/src/scala/spark/SimpleJob.scala b/src/scala/spark/SimpleJob.scala
index a8544e4474..d0abaa4b67 100644
--- a/src/scala/spark/SimpleJob.scala
+++ b/src/scala/spark/SimpleJob.scala
@@ -2,8 +2,8 @@ package spark
 
 import java.util.{HashMap => JHashMap}
 
+import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
-import scala.collection.mutable.Queue
 
 import mesos._
 
@@ -13,7 +13,7 @@ import mesos._
  */
 class SimpleJob[T: ClassManifest](
   sched: MesosScheduler, tasks: Array[Task[T]], val jobId: Int)
-extends Job with Logging
+extends Job(jobId) with Logging
 {
   // Maximum time to wait to run a task in a preferred location (in ms)
   val LOCALITY_WAIT = System.getProperty("spark.locality.wait", "3000").toLong
@@ -42,24 +42,34 @@ extends Job with Logging
   // Last time when we launched a preferred task (for delay scheduling)
   var lastPreferredLaunchTime = System.currentTimeMillis
 
-  // Queue of pending tasks for each node
-  val pendingTasksForNode = new HashMap[String, Queue[Int]]
+  // List of pending tasks for each node. These collections are actually
+  // treated as stacks, in which new tasks are added to the end of the
+  // ArrayBuffer and removed from the end. This makes it faster to detect
+  // tasks that repeatedly fail because whenever a task failed, it is put
+  // back at the head of the stack. They are also only cleaned up lazily;
+  // when a task is launched, it remains in all the pending lists except
+  // the one that it was launched from, but gets removed from them later.
+  val pendingTasksForHost = new HashMap[String, ArrayBuffer[Int]]
 
-  // Queue containing all pending tasks
-  val allPendingTasks = new Queue[Int]
+  // List containing all pending tasks (also used as a stack, as above)
+  val allPendingTasks = new ArrayBuffer[Int]
 
   // Did the job fail?
   var failed = false
   var causeOfFailure = ""
 
-  for (i <- 0 until numTasks) {
+  // Add all our tasks to the pending lists. We do this in reverse order
+  // of task index so that tasks with low indices get launched first.
+  for (i <- (0 until numTasks).reverse) {
     addPendingTask(i)
   }
 
+  // Add a task to all the pending-task lists that it should be on.
   def addPendingTask(index: Int) {
     allPendingTasks += index
     for (host <- tasks(index).preferredLocations) {
-      pendingTasksForNode(host) += index
+      val list = pendingTasksForHost.getOrElseUpdate(host, ArrayBuffer())
+      list += index
     }
   }
 
@@ -85,15 +95,18 @@ extends Job with Logging
     }
   }
 
-  def getPendingTasksForNode(host: String): Queue[Int] = {
-    pendingTasksForNode.getOrElse(host, Queue())
+  // Return the pending tasks list for a given host, or an empty list if
+  // there is no map entry for that host
+  def getPendingTasksForHost(host: String): ArrayBuffer[Int] = {
+    pendingTasksForHost.getOrElse(host, ArrayBuffer())
   }
 
-  // Dequeue a pending task from the given queue and return its index.
-  // Return None if the queue is empty.
-  def findTaskFromQueue(queue: Queue[Int]): Option[Int] = {
-    while (!queue.isEmpty) {
-      val index = queue.dequeue
+  // Dequeue a pending task from the given list and return its index.
+  // Return None if the list is empty.
+  def findTaskFromList(list: ArrayBuffer[Int]): Option[Int] = {
+    while (!list.isEmpty) {
+      val index = list.last
+      list.trimEnd(1)
       if (!launched(index) && !finished(index)) {
         return Some(index)
       }
@@ -104,19 +117,23 @@ extends Job with Logging
   // Dequeue a pending task for a given node and return its index.
   // If localOnly is set to false, allow non-local tasks as well.
   def findTask(host: String, localOnly: Boolean): Option[Int] = {
-    findTaskFromQueue(getPendingTasksForNode(host)) match {
+    findTaskFromList(getPendingTasksForHost(host)) match {
       case Some(task) => Some(task)
       case None =>
         if (localOnly) None
-        else findTaskFromQueue(allPendingTasks)
+        else findTaskFromList(allPendingTasks)
     }
   }
 
+  // Does a host count as a preferred location for a task? This is true if
+  // either the task has preferred locations and this host is one, or it has
+  // no preferred locations (in which we still count the launch as preferred).
   def isPreferredLocation(task: Task[T], host: String): Boolean = {
     val locs = task.preferredLocations
     return (locs.contains(host) || locs.isEmpty)
   }
 
+  // Respond to an offer of a single slave from the scheduler by finding a task
   def slaveOffer(offer: SlaveOffer, availableCpus: Int, availableMem: Int)
       : Option[TaskDescription] = {
     if (tasksLaunched < numTasks && availableCpus >= CPUS_PER_TASK &&
@@ -126,9 +143,10 @@ extends Job with Logging
       val host = offer.getHost
       findTask(host, localOnly) match {
         case Some(index) => {
+          // Found a task; do some bookkeeping and return a Mesos task for it
           val task = tasks(index)
           val taskId = sched.newTaskId()
-          // Figure out whether the task's location is preferred
+          // Figure out whether this should count as a preferred launch
           val preferred = isPreferredLocation(task, host)
           val prefStr = if(preferred) "preferred" else "non-preferred"
           val message =
@@ -136,7 +154,6 @@ extends Job with Logging
               index, jobId, taskId, offer.getSlaveId, host, prefStr)
           logInfo(message)
           // Do various bookkeeping
-          sched.taskIdToJobId(taskId) = jobId
           tidToIndex(taskId) = index
           task.markStarted(offer)
           launched(index) = true
@@ -145,12 +162,13 @@ extends Job with Logging
             lastPreferredLaunchTime = time
           // Create and return the Mesos task object
           val params = new JHashMap[String, String]
-          params.put("cpus", "" + CPUS_PER_TASK)
-          params.put("mem", "" + MEM_PER_TASK)
+          params.put("cpus", CPUS_PER_TASK.toString)
+          params.put("mem", MEM_PER_TASK.toString)
           val serializedTask = Utils.serialize(task)
           logDebug("Serialized size: " + serializedTask.size)
-          return Some(new TaskDescription(taskId, offer.getSlaveId,
-            "task_" + taskId, params, serializedTask))
+          val taskName = "task %d:%d".format(jobId, taskId)
+          return Some(new TaskDescription(
+            taskId, offer.getSlaveId, taskName, params, serializedTask))
         }
         case _ =>
       }
-- 
cgit v1.2.3


From c21f840a802b4e6b3289d57fc85db3a74d546192 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 10:40:42 -0700
Subject: Fixed some log messages

---
 src/scala/spark/SimpleJob.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/scala/spark/SimpleJob.scala b/src/scala/spark/SimpleJob.scala
index d0abaa4b67..faf3b1c492 100644
--- a/src/scala/spark/SimpleJob.scala
+++ b/src/scala/spark/SimpleJob.scala
@@ -151,7 +151,7 @@ extends Job(jobId) with Logging
           val prefStr = if(preferred) "preferred" else "non-preferred"
           val message =
             "Starting task %d:%d as TID %s on slave %s: %s (%s)".format(
-              index, jobId, taskId, offer.getSlaveId, host, prefStr)
+              jobId, index, taskId, offer.getSlaveId, host, prefStr)
           logInfo(message)
           // Do various bookkeeping
           tidToIndex(taskId) = index
@@ -166,7 +166,7 @@ extends Job(jobId) with Logging
           params.put("mem", MEM_PER_TASK.toString)
           val serializedTask = Utils.serialize(task)
           logDebug("Serialized size: " + serializedTask.size)
-          val taskName = "task %d:%d".format(jobId, taskId)
+          val taskName = "task %d:%d".format(jobId, index)
           return Some(new TaskDescription(
             taskId, offer.getSlaveId, taskName, params, serializedTask))
         }
-- 
cgit v1.2.3


From bf21bb28f3f1fef1445f6413bb13d74888842906 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 11:57:36 -0700
Subject: Further clarified some code

---
 src/scala/spark/MesosScheduler.scala | 27 +++++++++++++++++++++------
 src/scala/spark/SimpleJob.scala      |  5 +----
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 8a713d6f2b..5adff032eb 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -29,13 +29,13 @@ extends MScheduler with spark.Scheduler with Logging
   )
 
   // Lock used to wait for  scheduler to be registered
-  var isRegistered = false
-  val registeredLock = new Object()
+  private var isRegistered = false
+  private val registeredLock = new Object()
 
-  var activeJobs = new HashMap[Int, Job]
-  var activeJobsQueue = new Queue[Job]
+  private var activeJobs = new HashMap[Int, Job]
+  private var activeJobsQueue = new Queue[Job]
 
-  private[spark] var taskIdToJobId = new HashMap[Int, Int]
+  private var taskIdToJobId = new HashMap[Int, Int]
 
   private var nextJobId = 0
   
@@ -126,6 +126,11 @@ extends MScheduler with spark.Scheduler with Logging
     }
   }
 
+  /**
+   * Method called by Mesos to offer resources on slaves. We resond by asking
+   * our active jobs for tasks in FIFO order. We fill each node with tasks in
+   * a round-robin manner so that tasks are balanced across the cluster.
+   */
   override def resourceOffer(
       d: SchedulerDriver, oid: String, offers: JList[SlaveOffer]) {
     synchronized {
@@ -159,6 +164,14 @@ extends MScheduler with spark.Scheduler with Logging
     }
   }
 
+  // Check whether a Mesos task state represents a finished task
+  def isFinished(state: TaskState) = {
+    state == TaskState.TASK_FINISHED ||
+    state == TaskState.TASK_FAILED ||
+    state == TaskState.TASK_KILLED ||
+    state == TaskState.TASK_LOST
+  }
+
   override def statusUpdate(d: SchedulerDriver, status: TaskStatus) {
     synchronized {
       try {
@@ -167,10 +180,12 @@ extends MScheduler with spark.Scheduler with Logging
             if (activeJobs.contains(jobId)) {
               activeJobs(jobId).statusUpdate(status)
             }
+            if (isFinished(status.getState)) {
+              taskIdToJobId.remove(status.getTaskId)
+            }
           case None =>
             logInfo("TID " + status.getTaskId + " already finished")
         }
-
       } catch {
         case e: Exception => logError("Exception in statusUpdate", e)
       }
diff --git a/src/scala/spark/SimpleJob.scala b/src/scala/spark/SimpleJob.scala
index faf3b1c492..b15d0522d4 100644
--- a/src/scala/spark/SimpleJob.scala
+++ b/src/scala/spark/SimpleJob.scala
@@ -9,7 +9,7 @@ import mesos._
 
 
 /**
- * A simple implementation of Job that just runs each task in an array.
+ * A Job that runs a set of tasks with no interdependencies.
  */
 class SimpleJob[T: ClassManifest](
   sched: MesosScheduler, tasks: Array[Task[T]], val jobId: Int)
@@ -204,8 +204,6 @@ extends Job(jobId) with Logging
       Accumulators.add(callingThread, result.accumUpdates)
       // Mark finished and stop if we've finished all the tasks
       finished(index) = true
-      // Remove TID -> jobId mapping from sched
-      sched.taskIdToJobId.remove(tid)
       if (tasksFinished == numTasks)
         setAllFinished()
     } else {
@@ -220,7 +218,6 @@ extends Job(jobId) with Logging
     if (!finished(index)) {
       logInfo("Lost TID %d (task %d:%d)".format(tid, jobId, index))
       launched(index) = false
-      sched.taskIdToJobId.remove(tid)
       tasksLaunched -= 1
       // Re-enqueue the task as pending
       addPendingTask(index)
-- 
cgit v1.2.3


From 7da569e8a5a388859d1b7d7a856c9386a54b0568 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 12:11:19 -0700
Subject: Keep track of tasks in each job so that they can be removed when the
 job exits

---
 src/scala/spark/MesosScheduler.scala | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 5adff032eb..470be69e50 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -5,9 +5,10 @@ import java.util.{ArrayList => JArrayList}
 import java.util.{List => JList}
 import java.util.{HashMap => JHashMap}
 
+import scala.collection.mutable.HashMap
+import scala.collection.mutable.HashSet
 import scala.collection.mutable.Map
 import scala.collection.mutable.Queue
-import scala.collection.mutable.HashMap
 import scala.collection.JavaConversions._
 
 import mesos.{Scheduler => MScheduler}
@@ -36,6 +37,7 @@ extends MScheduler with spark.Scheduler with Logging
   private var activeJobsQueue = new Queue[Job]
 
   private var taskIdToJobId = new HashMap[Int, Int]
+  private var jobTasks = new HashMap[Int, HashSet[Int]]
 
   private var nextJobId = 0
   
@@ -95,18 +97,20 @@ extends MScheduler with spark.Scheduler with Logging
     waitForRegister()
     val jobId = newJobId()
     val myJob = new SimpleJob(this, tasks, jobId)
-
     try {
       this.synchronized {
-        this.activeJobs(myJob.jobId) = myJob
-        this.activeJobsQueue += myJob
+        activeJobs(jobId) = myJob
+        activeJobsQueue += myJob
+        jobTasks(jobId) = new HashSet()
       }
       driver.reviveOffers();
       return myJob.join();
     } finally {
       this.synchronized {
-        this.activeJobs.remove(myJob.jobId)
-        this.activeJobsQueue.dequeueAll(x => (x == myJob))
+        activeJobs -= jobId
+        activeJobsQueue.dequeueAll(x => (x == myJob))
+        taskIdToJobId --= jobTasks(jobId)
+        jobTasks.remove(jobId)
       }
     }
   }
@@ -147,6 +151,7 @@ extends MScheduler with spark.Scheduler with Logging
                 case Some(task) =>
                   tasks.add(task)
                   taskIdToJobId(task.getTaskId) = job.getId
+                  jobTasks(job.getId) += task.getTaskId
                   availableCpus(i) -= task.getParams.get("cpus").toInt
                   availableMem(i) -= task.getParams.get("mem").toInt
                   launchedTask = true
@@ -182,6 +187,7 @@ extends MScheduler with spark.Scheduler with Logging
             }
             if (isFinished(status.getState)) {
               taskIdToJobId.remove(status.getTaskId)
+              jobTasks(jobId) -= status.getTaskId
             }
           case None =>
             logInfo("TID " + status.getTaskId + " already finished")
-- 
cgit v1.2.3


From c0b856a056f5e101bc00b5cd4a8d5b5d91e488f1 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 12:18:02 -0700
Subject: Set absolute path for SPARK_HOME

---
 alltests | 2 +-
 run      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/alltests b/alltests
index 8f596da8f9..5d1c4cd3b1 100755
--- a/alltests
+++ b/alltests
@@ -1,5 +1,5 @@
 #!/bin/bash
-FWDIR=`dirname $0`
+FWDIR="`dirname $0`"
 if [ "x$SPARK_MEM" == "x" ]; then
   export SPARK_MEM=500m
 fi
diff --git a/run b/run
index 627f44a563..d6f7d920c5 100755
--- a/run
+++ b/run
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # Figure out where the Scala framework is installed
-FWDIR=`dirname $0`
+FWDIR="$(cd `dirname $0`; pwd)"
 
 # Export this as SPARK_HOME
 export SPARK_HOME="$FWDIR"
-- 
cgit v1.2.3


From 1c082ad5fbfbcb72044a96b7c0b71329ae8e682a Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 16:14:13 -0700
Subject: Added the ability to specify a list of JAR files when creating a
 SparkContext and have the master node serve those to workers.

---
 Makefile                                    |   4 +-
 src/scala/spark/Executor.scala              | 154 ++++++++++++++++++----------
 src/scala/spark/HttpServer.scala            |   5 +-
 src/scala/spark/MesosScheduler.scala        |  91 +++++++++++++---
 src/scala/spark/SparkContext.scala          |  37 +++----
 src/scala/spark/Utils.scala                 |  45 ++++++++
 src/scala/spark/repl/SparkInterpreter.scala |  28 ++---
 7 files changed, 247 insertions(+), 117 deletions(-)

diff --git a/Makefile b/Makefile
index 929f4ccb64..febf47f3f4 100644
--- a/Makefile
+++ b/Makefile
@@ -50,6 +50,8 @@ native: java
 
 jar: build/spark.jar build/spark-dep.jar
 
+depjar: build/spark-dep.jar
+
 build/spark.jar: scala java
 	jar cf build/spark.jar -C build/classes spark
 
@@ -67,4 +69,4 @@ clean:
 	$(MAKE) -C src/native clean
 	rm -rf build
 
-.phony: default all clean scala java native jar
+.phony: default all clean scala java native jar depjar
diff --git a/src/scala/spark/Executor.scala b/src/scala/spark/Executor.scala
index be73aae541..e47d3757b6 100644
--- a/src/scala/spark/Executor.scala
+++ b/src/scala/spark/Executor.scala
@@ -1,75 +1,115 @@
 package spark
 
+import java.io.{File, FileOutputStream}
+import java.net.{URI, URL, URLClassLoader}
 import java.util.concurrent.{Executors, ExecutorService}
 
+import scala.collection.mutable.ArrayBuffer
+
 import mesos.{ExecutorArgs, ExecutorDriver, MesosExecutorDriver}
 import mesos.{TaskDescription, TaskState, TaskStatus}
 
 /**
  * The Mesos executor for Spark.
  */
-object Executor extends Logging {
-  def main(args: Array[String]) {
-    System.loadLibrary("mesos")
+class Executor extends mesos.Executor with Logging {
+  var classLoader: ClassLoader = null
+  var threadPool: ExecutorService = null
 
-    // Create a new Executor implementation that will run our tasks
-    val exec = new mesos.Executor() {
-      var classLoader: ClassLoader = null
-      var threadPool: ExecutorService = null
-
-      override def init(d: ExecutorDriver, args: ExecutorArgs) {
-        // Read spark.* system properties
-        val props = Utils.deserialize[Array[(String, String)]](args.getData)
-        for ((key, value) <- props)
-          System.setProperty(key, value)
-        
-        // Initialize broadcast system (uses some properties read above)
-        Broadcast.initialize(false)
-        
-        // If the REPL is in use, create a ClassLoader that will be able to
-        // read new classes defined by the REPL as the user types code
-        classLoader = this.getClass.getClassLoader
-        val classUri = System.getProperty("spark.repl.class.uri")
-        if (classUri != null) {
-          logInfo("Using REPL class URI: " + classUri)
-          classLoader = new repl.ExecutorClassLoader(classUri, classLoader)
-        }
-        Thread.currentThread.setContextClassLoader(classLoader)
-        
-        // Start worker thread pool (they will inherit our context ClassLoader)
-        threadPool = Executors.newCachedThreadPool()
-      }
-      
-      override def launchTask(d: ExecutorDriver, desc: TaskDescription) {
-        // Pull taskId and arg out of TaskDescription because it won't be a
-        // valid pointer after this method call (TODO: fix this in C++/SWIG)
-        val taskId = desc.getTaskId
-        val arg = desc.getArg
-        threadPool.execute(new Runnable() {
-          def run() = {
-            logInfo("Running task ID " + taskId)
-            try {
-              Accumulators.clear
-              val task = Utils.deserialize[Task[Any]](arg, classLoader)
-              val value = task.run
-              val accumUpdates = Accumulators.values
-              val result = new TaskResult(value, accumUpdates)
-              d.sendStatusUpdate(new TaskStatus(
-                taskId, TaskState.TASK_FINISHED, Utils.serialize(result)))
-              logInfo("Finished task ID " + taskId)
-            } catch {
-              case e: Exception => {
-                // TODO: Handle errors in tasks less dramatically
-                logError("Exception in task ID " + taskId, e)
-                System.exit(1)
-              }
-            }
+  override def init(d: ExecutorDriver, args: ExecutorArgs) {
+    // Read spark.* system properties from executor arg
+    val props = Utils.deserialize[Array[(String, String)]](args.getData)
+    for ((key, value) <- props)
+      System.setProperty(key, value)
+    
+    // Initialize broadcast system (uses some properties read above)
+    Broadcast.initialize(false)
+    
+    // Create our ClassLoader (using spark properties) and set it on this thread
+    classLoader = createClassLoader()
+    Thread.currentThread.setContextClassLoader(classLoader)
+    
+    // Start worker thread pool (they will inherit our context ClassLoader)
+    threadPool = Executors.newCachedThreadPool()
+  }
+  
+  override def launchTask(d: ExecutorDriver, desc: TaskDescription) {
+    // Pull taskId and arg out of TaskDescription because it won't be a
+    // valid pointer after this method call (TODO: fix this in C++/SWIG)
+    val taskId = desc.getTaskId
+    val arg = desc.getArg
+    threadPool.execute(new Runnable() {
+      def run() = {
+        logInfo("Running task ID " + taskId)
+        try {
+          Accumulators.clear
+          val task = Utils.deserialize[Task[Any]](arg, classLoader)
+          val value = task.run
+          val accumUpdates = Accumulators.values
+          val result = new TaskResult(value, accumUpdates)
+          d.sendStatusUpdate(new TaskStatus(
+            taskId, TaskState.TASK_FINISHED, Utils.serialize(result)))
+          logInfo("Finished task ID " + taskId)
+        } catch {
+          case e: Exception => {
+            // TODO: Handle errors in tasks less dramatically
+            logError("Exception in task ID " + taskId, e)
+            System.exit(1)
           }
-        })
+        }
       }
+    })
+  }
+
+  // Create a ClassLoader for use in tasks, adding any JARs specified by the
+  // user or any classes created by the interpreter to the search path
+  private def createClassLoader(): ClassLoader = {
+    var loader = this.getClass.getClassLoader
+
+    // If any JAR URIs are given through spark.jar.uris, fetch them to the
+    // current directory and put them all on the classpath. We assume that
+    // each URL has a unique file name so that no local filenames will clash
+    // in this process. This is guaranteed by MesosScheduler.
+    val uris = System.getProperty("spark.jar.uris", "")
+    val localFiles = ArrayBuffer[String]()
+    for (uri <- uris.split(",").filter(_.size > 0)) {
+      val url = new URL(uri)
+      val filename = url.getPath.split("/").last
+      downloadFile(url, filename)
+      localFiles += filename
+    }
+    if (localFiles.size > 0) {
+      val urls = localFiles.map(f => new File(f).toURI.toURL).toArray
+      loader = new URLClassLoader(urls, loader)
     }
 
-    // Start it running and connect it to the slave
+    // If the REPL is in use, add another ClassLoader that will read
+    // new classes defined by the REPL as the user types code
+    val classUri = System.getProperty("spark.repl.class.uri")
+    if (classUri != null) {
+      logInfo("Using REPL class URI: " + classUri)
+      loader = new repl.ExecutorClassLoader(classUri, loader)
+    }
+
+    return loader
+  }
+
+  // Download a file from a given URL to the local filesystem
+  private def downloadFile(url: URL, localPath: String) {
+    val in = url.openStream()
+    val out = new FileOutputStream(localPath)
+    Utils.copyStream(in, out, true)
+  }
+}
+
+/**
+ * Executor entry point.
+ */
+object Executor extends Logging {
+  def main(args: Array[String]) {
+    System.loadLibrary("mesos")
+    // Create a new Executor and start it running
+    val exec = new Executor
     new MesosExecutorDriver(exec).run()
   }
 }
diff --git a/src/scala/spark/HttpServer.scala b/src/scala/spark/HttpServer.scala
index 55fb0a2218..08eb08bbe3 100644
--- a/src/scala/spark/HttpServer.scala
+++ b/src/scala/spark/HttpServer.scala
@@ -7,6 +7,7 @@ import org.eclipse.jetty.server.Server
 import org.eclipse.jetty.server.handler.DefaultHandler
 import org.eclipse.jetty.server.handler.HandlerList
 import org.eclipse.jetty.server.handler.ResourceHandler
+import org.eclipse.jetty.util.thread.QueuedThreadPool
 
 
 /**
@@ -30,6 +31,9 @@ class HttpServer(resourceBase: File) extends Logging {
       throw new ServerStateException("Server is already started")
     } else {
       server = new Server(0)
+      val threadPool = new QueuedThreadPool
+      threadPool.setDaemon(true)
+      server.setThreadPool(threadPool)
       val resHandler = new ResourceHandler
       resHandler.setResourceBase(resourceBase.getAbsolutePath)
       val handlerList = new HandlerList
@@ -37,7 +41,6 @@ class HttpServer(resourceBase: File) extends Logging {
       server.setHandler(handlerList)
       server.start()
       port = server.getConnectors()(0).getLocalPort()
-      logDebug("HttpServer started at " + uri)
     }
   }
 
diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 470be69e50..0f7adb4826 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -1,10 +1,11 @@
 package spark
 
-import java.io.File
+import java.io.{File, FileInputStream, FileOutputStream}
 import java.util.{ArrayList => JArrayList}
 import java.util.{List => JList}
 import java.util.{HashMap => JHashMap}
 
+import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.HashMap
 import scala.collection.mutable.HashSet
 import scala.collection.mutable.Map
@@ -19,7 +20,7 @@ import mesos._
  * first call start(), then submit tasks through the runTasks method.
  */
 private class MesosScheduler(
-  sc: SparkContext, master: String, frameworkName: String, execArg: Array[Byte])
+  sc: SparkContext, master: String, frameworkName: String)
 extends MScheduler with spark.Scheduler with Logging
 {
   // Environment variables to pass to our executors
@@ -39,27 +40,37 @@ extends MScheduler with spark.Scheduler with Logging
   private var taskIdToJobId = new HashMap[Int, Int]
   private var jobTasks = new HashMap[Int, HashSet[Int]]
 
+  // Incrementing job and task IDs
   private var nextJobId = 0
-  
+  private var nextTaskId = 0
+
+  // Driver for talking to Mesos
+  var driver: SchedulerDriver = null
+
+  // JAR server, if any JARs were added by the user to the SparkContext
+  var jarServer: HttpServer = null
+
+  // URIs of JARs to pass to executor
+  var jarUris: String = ""
+
   def newJobId(): Int = this.synchronized {
     val id = nextJobId
     nextJobId += 1
     return id
   }
 
-  // Incrementing task ID
-  private var nextTaskId = 0
-
   def newTaskId(): Int = {
     val id = nextTaskId;
     nextTaskId += 1;
     return id
   }
-
-  // Driver for talking to Mesos
-  var driver: SchedulerDriver = null
   
   override def start() {
+    if (sc.jars.size > 0) {
+      // If the user added any JARS to the SparkContext, create an HTTP server
+      // to serve them to our executors
+      createJarServer()
+    }
     new Thread("Spark scheduler") {
       setDaemon(true)
       override def run {
@@ -83,10 +94,11 @@ extends MScheduler with spark.Scheduler with Logging
     val execScript = new File(sparkHome, "spark-executor").getCanonicalPath
     val params = new JHashMap[String, String]
     for (key <- ENV_VARS_TO_SEND_TO_EXECUTORS) {
-      if (System.getenv(key) != null)
+      if (System.getenv(key) != null) {
         params("env." + key) = System.getenv(key)
+      }
     }
-    new ExecutorInfo(execScript, execArg)
+    new ExecutorInfo(execScript, createExecArg())
   }
 
   /**
@@ -220,10 +232,63 @@ extends MScheduler with spark.Scheduler with Logging
   }
 
   override def stop() {
-    if (driver != null)
+    if (driver != null) {
       driver.stop()
+    }
+    if (jarServer != null) {
+      jarServer.stop()
+    }
   }
 
   // TODO: query Mesos for number of cores
-  override def numCores() = System.getProperty("spark.default.parallelism", "2").toInt
+  override def numCores() =
+    System.getProperty("spark.default.parallelism", "2").toInt
+
+  // Create a server for all the JARs added by the user to SparkContext.
+  // We first copy the JARs to a temp directory for easier server setup.
+  private def createJarServer() {
+    val jarDir = Utils.createTempDir()
+    logInfo("Temp directory for JARs: " + jarDir)
+    val filenames = ArrayBuffer[String]()
+    // Copy each JAR to a unique filename in the jarDir
+    for ((path, index) <- sc.jars.zipWithIndex) {
+      val file = new File(path)
+      val filename = index + "_" + file.getName
+      copyFile(file, new File(jarDir, filename))
+      filenames += filename
+    }
+    // Create the server
+    jarServer = new HttpServer(jarDir)
+    jarServer.start()
+    // Build up the jar URI list
+    val serverUri = jarServer.uri
+    jarUris = filenames.map(f => serverUri + "/" + f).mkString(",")
+    logInfo("JAR server started at " + serverUri)
+  }
+
+  // Copy a file on the local file system
+  private def copyFile(source: File, dest: File) {
+    val in = new FileInputStream(source)
+    val out = new FileOutputStream(dest)
+    Utils.copyStream(in, out, true)
+  }
+
+  // Create and serialize the executor argument to pass to Mesos.
+  // Our executor arg is an array containing all the spark.* system properties
+  // in the form of (String, String) pairs.
+  private def createExecArg(): Array[Byte] = {
+    val props = new HashMap[String, String]
+    val iter = System.getProperties.entrySet.iterator
+    while (iter.hasNext) {
+      val entry = iter.next
+      val (key, value) = (entry.getKey.toString, entry.getValue.toString)
+      if (key.startsWith("spark.")) {
+        props(key) = value
+      }
+    }
+    // Set spark.jar.uris to our JAR URIs, regardless of system property
+    props("spark.jar.uris") = jarUris
+    // Serialize the map as an array of (String, String) pairs
+    return Utils.serialize(props.toArray)
+  }
 }
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index 3abce13a86..b9870cc3b9 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -1,12 +1,18 @@
 package spark
 
 import java.io._
-import java.util.UUID
 
 import scala.collection.mutable.ArrayBuffer
 
 
-class SparkContext(master: String, frameworkName: String) extends Logging {
+class SparkContext(
+  master: String,
+  frameworkName: String,
+  val jars: Seq[String] = Nil)
+extends Logging {
+  // Spark home directory, used to resolve executor when running on Mesos
+  private var sparkHome: Option[String] = None
+
   private[spark] var scheduler: Scheduler = {
     // Regular expression used for local[N] master format
     val LOCAL_N_REGEX = """local\[([0-9]+)\]""".r
@@ -17,18 +23,16 @@ class SparkContext(master: String, frameworkName: String) extends Logging {
         new LocalScheduler(threads.toInt)
       case _ =>
         System.loadLibrary("mesos")
-        new MesosScheduler(this, master, frameworkName, createExecArg())
+        new MesosScheduler(this, master, frameworkName)
     }
   }
 
-  private val local = scheduler.isInstanceOf[LocalScheduler]
+  private val isLocal = scheduler.isInstanceOf[LocalScheduler]
 
+  // Start the scheduler and the broadcast system
   scheduler.start()
-
   Broadcast.initialize(true)
 
-  private var sparkHome: Option[String] = None
-
   // Methods for creating RDDs
 
   def parallelize[T: ClassManifest](seq: Seq[T], numSlices: Int) =
@@ -45,22 +49,8 @@ class SparkContext(master: String, frameworkName: String) extends Logging {
     new Accumulator(initialValue, param)
 
   // TODO: Keep around a weak hash map of values to Cached versions?
-  def broadcast[T](value: T) = new CentralizedHDFSBroadcast(value, local)
-  //def broadcast[T](value: T) = new ChainedStreamingBroadcast(value, local)
-
-  // Create and serialize an executor argument to use when running on Mesos
-  private def createExecArg(): Array[Byte] = {
-    // Our executor arg is an array containing all the spark.* system properties
-    val props = new ArrayBuffer[(String, String)]
-    val iter = System.getProperties.entrySet.iterator
-    while (iter.hasNext) {
-      val entry = iter.next
-      val (key, value) = (entry.getKey.toString, entry.getValue.toString)
-      if (key.startsWith("spark."))
-        props += key -> value
-    }
-    return Utils.serialize(props.toArray)
-  }
+  def broadcast[T](value: T) = new CentralizedHDFSBroadcast(value, isLocal)
+  //def broadcast[T](value: T) = new ChainedStreamingBroadcast(value, isLocal)
 
   // Stop the SparkContext
   def stop() {
@@ -94,7 +84,6 @@ class SparkContext(master: String, frameworkName: String) extends Logging {
       None
   }
 
-
   // Submit an array of tasks (passed as functions) to the scheduler
   def runTasks[T: ClassManifest](tasks: Array[() => T]): Array[T] = {
     runTaskObjects(tasks.map(f => new FunctionTask(f)))
diff --git a/src/scala/spark/Utils.scala b/src/scala/spark/Utils.scala
index 27d73aefbd..9d300d229a 100644
--- a/src/scala/spark/Utils.scala
+++ b/src/scala/spark/Utils.scala
@@ -1,9 +1,13 @@
 package spark
 
 import java.io._
+import java.util.UUID
 
 import scala.collection.mutable.ArrayBuffer
 
+/**
+ * Various utility methods used by Spark.
+ */
 object Utils {
   def serialize[T](o: T): Array[Byte] = {
     val bos = new ByteArrayOutputStream
@@ -50,4 +54,45 @@ object Utils {
     }
     return buf
   }
+
+  // Create a temporary directory inside the given parent directory
+  def createTempDir(root: String = System.getProperty("java.io.tmpdir")): File =
+  {
+    var attempts = 0
+    val maxAttempts = 10
+    var dir: File = null
+    while (dir == null) {
+      attempts += 1
+      if (attempts > maxAttempts) {
+        throw new IOException("Failed to create a temp directory " +
+                              "after " + maxAttempts + " attempts!")
+      }
+      try {
+        dir = new File(root, "spark-" + UUID.randomUUID.toString)
+        if (dir.exists() || !dir.mkdirs()) {
+          dir = null
+        }
+      } catch { case e: IOException => ; }
+    }
+    return dir
+  }
+
+  // Copy all data from an InputStream to an OutputStream
+  def copyStream(in: InputStream,
+                 out: OutputStream,
+                 closeStreams: Boolean = false)
+  {
+    val buf = new Array[Byte](8192)
+    var n = 0
+    while (n != -1) {
+      n = in.read(buf)
+      if (n != -1) {
+        out.write(buf, 0, n)
+      }
+    }
+    if (closeStreams) {
+      in.close()
+      out.close()
+    }
+  }
 }
diff --git a/src/scala/spark/repl/SparkInterpreter.scala b/src/scala/spark/repl/SparkInterpreter.scala
index 41324333a3..10ea346658 100644
--- a/src/scala/spark/repl/SparkInterpreter.scala
+++ b/src/scala/spark/repl/SparkInterpreter.scala
@@ -37,6 +37,7 @@ import interpreter._
 import SparkInterpreter._
 
 import spark.HttpServer
+import spark.Utils
 
 /** <p>
  *    An interpreter for Scala code.
@@ -94,27 +95,12 @@ class SparkInterpreter(val settings: Settings, out: PrintWriter) {
 
   /** Local directory to save .class files too */
   val outputDir = {
-    val rootDir = new File(System.getProperty("spark.repl.classdir",
-                           System.getProperty("java.io.tmpdir")))
-    var attempts = 0
-    val maxAttempts = 10
-    var dir: File = null
-    while (dir == null) {
-      attempts += 1
-      if (attempts > maxAttempts) {
-        throw new IOException("Failed to create a temp directory " +
-                              "after " + maxAttempts + " attempts!")
-      }
-      try {
-        dir = new File(rootDir, "spark-" + UUID.randomUUID.toString)
-        if (dir.exists() || !dir.mkdirs())
-          dir = null
-      } catch { case e: IOException => ; }
-    }
-    if (SPARK_DEBUG_REPL) {
-      println("Output directory: " + dir)
-    }
-    dir
+    val tmp = System.getProperty("java.io.tmpdir")
+    val rootDir = System.getProperty("spark.repl.classdir", tmp)
+    Utils.createTempDir(rootDir)
+  }
+  if (SPARK_DEBUG_REPL) {
+    println("Output directory: " + outputDir)
   }
 
   /** Scala compiler virtual directory for outputDir */
-- 
cgit v1.2.3


From 166d9f91258d1e803ac44c9faf429a3920728ec6 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 16:19:47 -0700
Subject: Removed setSparkHome method on SparkContext in favor of having an
 optional constructor parameter, so that the scheduler is guaranteed that a
 Spark home has been set when it first builds its executor arg.

---
 src/scala/spark/MesosScheduler.scala |  6 +++---
 src/scala/spark/SparkContext.scala   | 17 ++++-------------
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/src/scala/spark/MesosScheduler.scala b/src/scala/spark/MesosScheduler.scala
index 0f7adb4826..c45eff64d4 100644
--- a/src/scala/spark/MesosScheduler.scala
+++ b/src/scala/spark/MesosScheduler.scala
@@ -87,9 +87,9 @@ extends MScheduler with spark.Scheduler with Logging
     val sparkHome = sc.getSparkHome match {
       case Some(path) => path
       case None =>
-        throw new SparkException("Spark home is not set; either set the " +
-          "spark.home system property or the SPARK_HOME environment variable " +
-          "or call SparkContext.setSparkHome")
+        throw new SparkException("Spark home is not set; set it through the " +
+          "spark.home system property, the SPARK_HOME environment variable " +
+          "or the SparkContext constructor")
     }
     val execScript = new File(sparkHome, "spark-executor").getCanonicalPath
     val params = new JHashMap[String, String]
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index b9870cc3b9..69c3332bb0 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -8,11 +8,9 @@ import scala.collection.mutable.ArrayBuffer
 class SparkContext(
   master: String,
   frameworkName: String,
+  val sparkHome: String = null,
   val jars: Seq[String] = Nil)
 extends Logging {
-  // Spark home directory, used to resolve executor when running on Mesos
-  private var sparkHome: Option[String] = None
-
   private[spark] var scheduler: Scheduler = {
     // Regular expression used for local[N] master format
     val LOCAL_N_REGEX = """local\[([0-9]+)\]""".r
@@ -63,19 +61,12 @@ extends Logging {
     scheduler.waitForRegister()
   }
 
-  // Set the Spark home location
-  def setSparkHome(path: String) {
-    if (path == null)
-      throw new NullPointerException("Path passed to setSparkHome was null")
-    sparkHome = Some(path)
-  }
-
-  // Get Spark's home location from either a value set through setSparkHome,
+  // Get Spark's home location from either a value set through the constructor,
   // or the spark.home Java property, or the SPARK_HOME environment variable
   // (in that order of preference). If neither of these is set, return None.
   def getSparkHome(): Option[String] = {
-    if (sparkHome != None)
-      sparkHome
+    if (sparkHome != null)
+      Some(sparkHome)
     else if (System.getProperty("spark.home") != null)
       Some(System.getProperty("spark.home"))
     else if (System.getenv("SPARK_HOME") != null)
-- 
cgit v1.2.3


From 0e2adecdabe4b1b8f5df21ec16cea182b72d5626 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 17:13:52 -0700
Subject: Simplified UnionRDD slightly and added a SparkContext.union method
 for efficiently union-ing a large number of RDDs

---
 src/scala/spark/RDD.scala          | 40 ++++++++++++++------------------------
 src/scala/spark/SparkContext.scala | 10 +++++++---
 2 files changed, 22 insertions(+), 28 deletions(-)

diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index 803c063865..20865d7d28 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -82,11 +82,10 @@ abstract class RDD[T: ClassManifest](
     try { map(x => 1L).reduce(_+_) }
     catch { case e: UnsupportedOperationException => 0L }
 
-  def union(other: RDD[T]) = new UnionRDD(sc, this, other)
+  def union(other: RDD[T]) = new UnionRDD(sc, Array(this, other))
   def cartesian[U: ClassManifest](other: RDD[U]) = new CartesianRDD(sc, this, other)
 
   def ++(other: RDD[T]) = this.union(other)
-
 }
 
 @serializable
@@ -268,36 +267,27 @@ private object CachedRDD {
 }
 
 @serializable
-abstract class UnionSplit[T: ClassManifest] extends Split {
-  def iterator(): Iterator[T]
-  def preferredLocations(): Seq[String]
-  def getId(): String
-}
-
-@serializable
-class UnionSplitImpl[T: ClassManifest](
-  rdd: RDD[T], split: Split)
-extends UnionSplit[T] {
-  override def iterator() = rdd.iterator(split)
-  override def preferredLocations() = rdd.preferredLocations(split)
-  override def getId() =
-    "UnionSplitImpl(" + split.getId() + ")"
+class UnionSplit[T: ClassManifest](rdd: RDD[T], split: Split)
+extends Split {
+  def iterator() = rdd.iterator(split)
+  def preferredLocations() = rdd.preferredLocations(split)
+  override def getId() = "UnionSplit(" + split.getId() + ")"
 }
 
 @serializable
-class UnionRDD[T: ClassManifest](
-  sc: SparkContext, rdd1: RDD[T], rdd2: RDD[T])
+class UnionRDD[T: ClassManifest](sc: SparkContext, rdds: Seq[RDD[T]])
 extends RDD[T](sc) {
-
-  @transient val splits_ : Array[UnionSplit[T]] = {
-    val a1 = rdd1.splits.map(s => new UnionSplitImpl(rdd1, s))
-    val a2 = rdd2.splits.map(s => new UnionSplitImpl(rdd2, s))
-    (a1 ++ a2).toArray
+  @transient val splits_ : Array[Split] = {
+    val splits: Seq[Split] = 
+      for (rdd <- rdds; split <- rdd.splits)
+        yield new UnionSplit(rdd, split)
+    splits.toArray
   }
 
-  override def splits = splits_.asInstanceOf[Array[Split]]
+  override def splits = splits_
 
-  override def iterator(s: Split): Iterator[T] = s.asInstanceOf[UnionSplit[T]].iterator()
+  override def iterator(s: Split): Iterator[T] =
+    s.asInstanceOf[UnionSplit[T]].iterator()
 
   override def preferredLocations(s: Split): Seq[String] = 
     s.asInstanceOf[UnionSplit[T]].preferredLocations()
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index 69c3332bb0..953eac9eba 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -33,13 +33,17 @@ extends Logging {
 
   // Methods for creating RDDs
 
-  def parallelize[T: ClassManifest](seq: Seq[T], numSlices: Int) =
+  def parallelize[T: ClassManifest](seq: Seq[T], numSlices: Int): RDD[T] =
     new ParallelArray[T](this, seq, numSlices)
 
-  def parallelize[T: ClassManifest](seq: Seq[T]): ParallelArray[T] =
+  def parallelize[T: ClassManifest](seq: Seq[T]): RDD[T] =
     parallelize(seq, scheduler.numCores)
 
-  def textFile(path: String) = new HdfsTextFile(this, path)
+  def textFile(path: String): RDD[String] =
+    new HdfsTextFile(this, path)
+
+  def union[T: ClassManifest](rdds: RDD[T]*): RDD[T] =
+    new UnionRDD(this, rdds)
 
   // Methods for creating shared variables
 
-- 
cgit v1.2.3


From 03238cb7c1f297cfb67910fd5dd36ad14e7f0138 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 17:25:09 -0700
Subject: Renamed HdfsFile to HadoopFile

---
 src/scala/spark/HadoopFile.scala   | 81 ++++++++++++++++++++++++++++++++++++++
 src/scala/spark/HdfsFile.scala     | 80 -------------------------------------
 src/scala/spark/SparkContext.scala |  2 +-
 3 files changed, 82 insertions(+), 81 deletions(-)
 create mode 100644 src/scala/spark/HadoopFile.scala
 delete mode 100644 src/scala/spark/HdfsFile.scala

diff --git a/src/scala/spark/HadoopFile.scala b/src/scala/spark/HadoopFile.scala
new file mode 100644
index 0000000000..e8d6feffe5
--- /dev/null
+++ b/src/scala/spark/HadoopFile.scala
@@ -0,0 +1,81 @@
+package spark
+
+import mesos.SlaveOffer
+
+import org.apache.hadoop.io.LongWritable
+import org.apache.hadoop.io.Text
+import org.apache.hadoop.mapred.FileInputFormat
+import org.apache.hadoop.mapred.InputSplit
+import org.apache.hadoop.mapred.JobConf
+import org.apache.hadoop.mapred.TextInputFormat
+import org.apache.hadoop.mapred.RecordReader
+import org.apache.hadoop.mapred.Reporter
+
+@serializable class HadoopSplit(@transient s: InputSplit)
+extends Split { 
+  val inputSplit = new SerializableWritable[InputSplit](s)
+
+  // Hadoop gives each split a unique toString value, so use this as our ID
+  override def getId() = "HadoopSplit(" + inputSplit.toString + ")"
+}
+
+class HadoopTextFile(sc: SparkContext, path: String)
+extends RDD[String](sc) {
+  @transient val conf = new JobConf()
+  @transient val inputFormat = new TextInputFormat()
+
+  FileInputFormat.setInputPaths(conf, path)
+  ConfigureLock.synchronized { inputFormat.configure(conf) }
+
+  @transient val splits_ =
+    inputFormat.getSplits(conf, sc.scheduler.numCores).map(new HadoopSplit(_)).toArray
+
+  override def splits = splits_.asInstanceOf[Array[Split]]
+  
+  override def iterator(split_in: Split) = new Iterator[String] {
+    val split = split_in.asInstanceOf[HadoopSplit]
+    var reader: RecordReader[LongWritable, Text] = null
+    ConfigureLock.synchronized {
+      val conf = new JobConf()
+      conf.set("io.file.buffer.size",
+          System.getProperty("spark.buffer.size", "65536"))
+      val tif = new TextInputFormat()
+      tif.configure(conf) 
+      reader = tif.getRecordReader(split.inputSplit.value, conf, Reporter.NULL)
+    }
+    val lineNum = new LongWritable()
+    val text = new Text()
+    var gotNext = false
+    var finished = false
+
+    override def hasNext: Boolean = {
+      if (!gotNext) {
+        try {
+          finished = !reader.next(lineNum, text)
+        } catch {
+          case eofe: java.io.EOFException =>
+            finished = true
+        }
+        gotNext = true
+      }
+      !finished
+    }
+
+    override def next: String = {
+      if (!gotNext)
+        finished = !reader.next(lineNum, text)
+      if (finished)
+        throw new java.util.NoSuchElementException("end of stream")
+      gotNext = false
+      text.toString
+    }
+  }
+
+  override def preferredLocations(split: Split) = {
+    // TODO: Filtering out "localhost" in case of file:// URLs
+    val hadoopSplit = split.asInstanceOf[HadoopSplit]
+    hadoopSplit.inputSplit.value.getLocations.filter(_ != "localhost")
+  }
+}
+
+object ConfigureLock {}
diff --git a/src/scala/spark/HdfsFile.scala b/src/scala/spark/HdfsFile.scala
deleted file mode 100644
index 8637c6e30a..0000000000
--- a/src/scala/spark/HdfsFile.scala
+++ /dev/null
@@ -1,80 +0,0 @@
-package spark
-
-import mesos.SlaveOffer
-
-import org.apache.hadoop.io.LongWritable
-import org.apache.hadoop.io.Text
-import org.apache.hadoop.mapred.FileInputFormat
-import org.apache.hadoop.mapred.InputSplit
-import org.apache.hadoop.mapred.JobConf
-import org.apache.hadoop.mapred.TextInputFormat
-import org.apache.hadoop.mapred.RecordReader
-import org.apache.hadoop.mapred.Reporter
-
-@serializable class HdfsSplit(@transient s: InputSplit)
-extends Split { 
-  val inputSplit = new SerializableWritable[InputSplit](s)
-
-  override def getId() = inputSplit.toString // Hadoop makes this unique
-                                             // for each split of each file
-}
-
-class HdfsTextFile(sc: SparkContext, path: String)
-extends RDD[String](sc) {
-  @transient val conf = new JobConf()
-  @transient val inputFormat = new TextInputFormat()
-
-  FileInputFormat.setInputPaths(conf, path)
-  ConfigureLock.synchronized { inputFormat.configure(conf) }
-
-  @transient val splits_ =
-    inputFormat.getSplits(conf, sc.scheduler.numCores).map(new HdfsSplit(_)).toArray
-
-  override def splits = splits_.asInstanceOf[Array[Split]]
-  
-  override def iterator(split_in: Split) = new Iterator[String] {
-    val split = split_in.asInstanceOf[HdfsSplit]
-    var reader: RecordReader[LongWritable, Text] = null
-    ConfigureLock.synchronized {
-      val conf = new JobConf()
-      conf.set("io.file.buffer.size",
-          System.getProperty("spark.buffer.size", "65536"))
-      val tif = new TextInputFormat()
-      tif.configure(conf) 
-      reader = tif.getRecordReader(split.inputSplit.value, conf, Reporter.NULL)
-    }
-    val lineNum = new LongWritable()
-    val text = new Text()
-    var gotNext = false
-    var finished = false
-
-    override def hasNext: Boolean = {
-      if (!gotNext) {
-        try {
-          finished = !reader.next(lineNum, text)
-        } catch {
-          case eofe: java.io.EOFException =>
-            finished = true
-        }
-        gotNext = true
-      }
-      !finished
-    }
-
-    override def next: String = {
-      if (!gotNext)
-        finished = !reader.next(lineNum, text)
-      if (finished)
-        throw new java.util.NoSuchElementException("end of stream")
-      gotNext = false
-      text.toString
-    }
-  }
-
-  override def preferredLocations(split: Split) = {
-    // TODO: Filtering out "localhost" in case of file:// URLs
-    split.asInstanceOf[HdfsSplit].inputSplit.value.getLocations().filter(_ != "localhost")
-  }
-}
-
-object ConfigureLock {}
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index 953eac9eba..e6b828901a 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -40,7 +40,7 @@ extends Logging {
     parallelize(seq, scheduler.numCores)
 
   def textFile(path: String): RDD[String] =
-    new HdfsTextFile(this, path)
+    new HadoopTextFile(this, path)
 
   def union[T: ClassManifest](rdds: RDD[T]*): RDD[T] =
     new UnionRDD(this, rdds)
-- 
cgit v1.2.3


From 74bbfa91c252150811ce9617424cdeea3711bdf9 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 19:03:33 -0700
Subject: Added support for generic Hadoop InputFormats and refactored textFile
 to use this. Closes #12.

---
 src/scala/spark/HadoopFile.scala   | 89 +++++++++++++++++++++++++++-----------
 src/scala/spark/SparkContext.scala | 50 ++++++++++++++++++++-
 2 files changed, 111 insertions(+), 28 deletions(-)

diff --git a/src/scala/spark/HadoopFile.scala b/src/scala/spark/HadoopFile.scala
index e8d6feffe5..aa28569eda 100644
--- a/src/scala/spark/HadoopFile.scala
+++ b/src/scala/spark/HadoopFile.scala
@@ -5,12 +5,15 @@ import mesos.SlaveOffer
 import org.apache.hadoop.io.LongWritable
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.mapred.FileInputFormat
+import org.apache.hadoop.mapred.InputFormat
 import org.apache.hadoop.mapred.InputSplit
 import org.apache.hadoop.mapred.JobConf
 import org.apache.hadoop.mapred.TextInputFormat
 import org.apache.hadoop.mapred.RecordReader
 import org.apache.hadoop.mapred.Reporter
+import org.apache.hadoop.util.ReflectionUtils
 
+/** A Spark split class that wraps around a Hadoop InputSplit */
 @serializable class HadoopSplit(@transient s: InputSplit)
 extends Split { 
   val inputSplit = new SerializableWritable[InputSplit](s)
@@ -19,39 +22,54 @@ extends Split {
   override def getId() = "HadoopSplit(" + inputSplit.toString + ")"
 }
 
-class HadoopTextFile(sc: SparkContext, path: String)
-extends RDD[String](sc) {
-  @transient val conf = new JobConf()
-  @transient val inputFormat = new TextInputFormat()
 
-  FileInputFormat.setInputPaths(conf, path)
-  ConfigureLock.synchronized { inputFormat.configure(conf) }
+/**
+ * An RDD that reads a Hadoop file (from HDFS, S3, the local filesystem, etc)
+ * and represents it as a set of key-value pairs using a given InputFormat.
+ */
+class HadoopFile[K, V](
+  sc: SparkContext,
+  path: String,
+  inputFormatClass: Class[_ <: InputFormat[K, V]],
+  keyClass: Class[K],
+  valueClass: Class[V])
+extends RDD[(K, V)](sc) {
+  @transient val splits_ : Array[Split] = ConfigureLock.synchronized {
+    val conf = new JobConf()
+    FileInputFormat.setInputPaths(conf, path)
+    val inputFormat = createInputFormat(conf)
+    val inputSplits = inputFormat.getSplits(conf, sc.scheduler.numCores)
+    inputSplits.map(x => new HadoopSplit(x): Split).toArray
+  }
 
-  @transient val splits_ =
-    inputFormat.getSplits(conf, sc.scheduler.numCores).map(new HadoopSplit(_)).toArray
+  def createInputFormat(conf: JobConf): InputFormat[K, V] = {
+    ReflectionUtils.newInstance(inputFormatClass.asInstanceOf[Class[_]], conf)
+                   .asInstanceOf[InputFormat[K, V]]
+  }
 
-  override def splits = splits_.asInstanceOf[Array[Split]]
+  override def splits = splits_
   
-  override def iterator(split_in: Split) = new Iterator[String] {
-    val split = split_in.asInstanceOf[HadoopSplit]
-    var reader: RecordReader[LongWritable, Text] = null
+  override def iterator(theSplit: Split) = new Iterator[(K, V)] {
+    val split = theSplit.asInstanceOf[HadoopSplit]
+    var reader: RecordReader[K, V] = null
+
     ConfigureLock.synchronized {
       val conf = new JobConf()
-      conf.set("io.file.buffer.size",
-          System.getProperty("spark.buffer.size", "65536"))
-      val tif = new TextInputFormat()
-      tif.configure(conf) 
-      reader = tif.getRecordReader(split.inputSplit.value, conf, Reporter.NULL)
+      val bufferSize = System.getProperty("spark.buffer.size", "65536")
+      conf.set("io.file.buffer.size", bufferSize)
+      val fmt = createInputFormat(conf)
+      reader = fmt.getRecordReader(split.inputSplit.value, conf, Reporter.NULL)
     }
-    val lineNum = new LongWritable()
-    val text = new Text()
+
+    val key: K = keyClass.newInstance()
+    val value: V = valueClass.newInstance()
     var gotNext = false
     var finished = false
 
     override def hasNext: Boolean = {
       if (!gotNext) {
         try {
-          finished = !reader.next(lineNum, text)
+          finished = !reader.next(key, value)
         } catch {
           case eofe: java.io.EOFException =>
             finished = true
@@ -61,13 +79,15 @@ extends RDD[String](sc) {
       !finished
     }
 
-    override def next: String = {
-      if (!gotNext)
-        finished = !reader.next(lineNum, text)
-      if (finished)
-        throw new java.util.NoSuchElementException("end of stream")
+    override def next: (K, V) = {
+      if (!gotNext) {
+        finished = !reader.next(key, value)
+      }
+      if (finished) {
+        throw new java.util.NoSuchElementException("End of stream")
+      }
       gotNext = false
-      text.toString
+      (key, value)
     }
   }
 
@@ -78,4 +98,21 @@ extends RDD[String](sc) {
   }
 }
 
+
+/**
+ * Convenience class for Hadoop files read using TextInputFormat that
+ * represents the file as an RDD of Strings.
+ */
+class HadoopTextFile(sc: SparkContext, path: String)
+extends MappedRDD[String, (LongWritable, Text)](
+  new HadoopFile(sc, path, classOf[TextInputFormat],
+                 classOf[LongWritable], classOf[Text]),
+  { pair: (LongWritable, Text) => pair._2.toString }
+)
+
+
+/**
+ * Object used to ensure that only one thread at a time is configuring Hadoop
+ * InputFormat classes. Apparently configuring them is not thread safe!
+ */
 object ConfigureLock {}
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index e6b828901a..e85b26e238 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -4,6 +4,9 @@ import java.io._
 
 import scala.collection.mutable.ArrayBuffer
 
+import org.apache.hadoop.mapred.InputFormat
+import org.apache.hadoop.mapred.SequenceFileInputFormat
+
 
 class SparkContext(
   master: String,
@@ -42,6 +45,49 @@ extends Logging {
   def textFile(path: String): RDD[String] =
     new HadoopTextFile(this, path)
 
+  /** Get an RDD for a Hadoop file with an arbitrary InputFormat */
+  def hadoopFile[K, V](path: String,
+                       inputFormatClass: Class[_ <: InputFormat[K, V]],
+                       keyClass: Class[K],
+                       valueClass: Class[V])
+      : RDD[(K, V)] = {
+    new HadoopFile(this, path, inputFormatClass, keyClass, valueClass)
+  }
+
+  /**
+   * Smarter version of hadoopFile() that uses class manifests to figure out
+   * the classes of keys, values and the InputFormat so that users don't need
+   * to pass them directly.
+   */
+  def hadoopFile[K, V, F <: InputFormat[K, V]](path: String)
+      (implicit km: ClassManifest[K], vm: ClassManifest[V], fm: ClassManifest[F])
+      : RDD[(K, V)] = {
+    hadoopFile(path,
+               fm.erasure.asInstanceOf[Class[F]],
+               km.erasure.asInstanceOf[Class[K]],
+               vm.erasure.asInstanceOf[Class[V]])
+  }
+
+  /** Get an RDD for a Hadoop SequenceFile with given key and value types */
+  def sequenceFile[K, V](path: String,
+                         keyClass: Class[K],
+                         valueClass: Class[V]): RDD[(K, V)] = {
+    val inputFormatClass = classOf[SequenceFileInputFormat[K, V]]
+    hadoopFile(path, inputFormatClass, keyClass, valueClass)
+  }
+
+  /**
+   * Smarter version of sequenceFile() that obtains the key and value classes
+   * from ClassManifests instead of requiring the user to pass them directly.
+   */
+  def sequenceFile[K, V](path: String)
+      (implicit km: ClassManifest[K], vm: ClassManifest[V]): RDD[(K, V)] = {
+    sequenceFile(path,
+                 km.erasure.asInstanceOf[Class[K]],
+                 vm.erasure.asInstanceOf[Class[V]])
+  }
+
+  /** Build the union of a list of RDDs. */
   def union[T: ClassManifest](rdds: RDD[T]*): RDD[T] =
     new UnionRDD(this, rdds)
 
@@ -59,7 +105,7 @@ extends Logging {
      scheduler.stop()
      scheduler = null
   }
-  
+
   // Wait for the scheduler to be registered
   def waitForRegister() {
     scheduler.waitForRegister()
@@ -93,7 +139,7 @@ extends Logging {
     logInfo("Tasks finished in " + (System.nanoTime - start) / 1e9 + " s")
     return result
   }
-  
+
   // Clean a closure to make it ready to serialized and send to tasks
   // (removes unreferenced variables in $outer's, updates REPL variables)
   private[spark] def clean[F <: AnyRef](f: F): F = {
-- 
cgit v1.2.3


From 023ed194b465b1ada80be1ef0deea0da99ce109e Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 21:21:16 -0700
Subject: Fixed some whitespace

---
 src/scala/spark/HadoopFile.scala |  4 ++--
 src/scala/spark/HttpServer.scala |  2 +-
 src/scala/spark/RDD.scala        | 22 +++++++++++-----------
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/src/scala/spark/HadoopFile.scala b/src/scala/spark/HadoopFile.scala
index aa28569eda..5746c433ee 100644
--- a/src/scala/spark/HadoopFile.scala
+++ b/src/scala/spark/HadoopFile.scala
@@ -15,7 +15,7 @@ import org.apache.hadoop.util.ReflectionUtils
 
 /** A Spark split class that wraps around a Hadoop InputSplit */
 @serializable class HadoopSplit(@transient s: InputSplit)
-extends Split { 
+extends Split {
   val inputSplit = new SerializableWritable[InputSplit](s)
 
   // Hadoop gives each split a unique toString value, so use this as our ID
@@ -48,7 +48,7 @@ extends RDD[(K, V)](sc) {
   }
 
   override def splits = splits_
-  
+
   override def iterator(theSplit: Split) = new Iterator[(K, V)] {
     val split = theSplit.asInstanceOf[HadoopSplit]
     var reader: RecordReader[K, V] = null
diff --git a/src/scala/spark/HttpServer.scala b/src/scala/spark/HttpServer.scala
index 08eb08bbe3..d5bdd245bb 100644
--- a/src/scala/spark/HttpServer.scala
+++ b/src/scala/spark/HttpServer.scala
@@ -11,7 +11,7 @@ import org.eclipse.jetty.util.thread.QueuedThreadPool
 
 
 /**
- * Exception type thrown by HttpServer when it is in the wrong state 
+ * Exception type thrown by HttpServer when it is in the wrong state
  * for an operation.
  */
 class ServerStateException(message: String) extends Exception(message)
diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index 20865d7d28..9dd8bc9dce 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -78,7 +78,7 @@ abstract class RDD[T: ClassManifest](
     case _ => throw new UnsupportedOperationException("empty collection")
   }
 
-  def count(): Long = 
+  def count(): Long =
     try { map(x => 1L).reduce(_+_) }
     catch { case e: UnsupportedOperationException => 0L }
 
@@ -128,7 +128,7 @@ extends RDDTask[Option[T], T](rdd, split) with Logging {
 }
 
 class MappedRDD[U: ClassManifest, T: ClassManifest](
-  prev: RDD[T], f: T => U) 
+  prev: RDD[T], f: T => U)
 extends RDD[U](prev.sparkContext) {
   override def splits = prev.splits
   override def preferredLocations(split: Split) = prev.preferredLocations(split)
@@ -137,7 +137,7 @@ extends RDD[U](prev.sparkContext) {
 }
 
 class FilteredRDD[T: ClassManifest](
-  prev: RDD[T], f: T => Boolean) 
+  prev: RDD[T], f: T => Boolean)
 extends RDD[T](prev.sparkContext) {
   override def splits = prev.splits
   override def preferredLocations(split: Split) = prev.preferredLocations(split)
@@ -146,7 +146,7 @@ extends RDD[T](prev.sparkContext) {
 }
 
 class FlatMappedRDD[U: ClassManifest, T: ClassManifest](
-  prev: RDD[T], f: T => Traversable[U]) 
+  prev: RDD[T], f: T => Traversable[U])
 extends RDD[U](prev.sparkContext) {
   override def splits = prev.splits
   override def preferredLocations(split: Split) = prev.preferredLocations(split)
@@ -155,7 +155,7 @@ extends RDD[U](prev.sparkContext) {
   override def taskStarted(split: Split, slot: SlaveOffer) = prev.taskStarted(split, slot)
 }
 
-class SplitRDD[T: ClassManifest](prev: RDD[T]) 
+class SplitRDD[T: ClassManifest](prev: RDD[T])
 extends RDD[Array[T]](prev.sparkContext) {
   override def splits = prev.splits
   override def preferredLocations(split: Split) = prev.preferredLocations(split)
@@ -170,16 +170,16 @@ extends RDD[Array[T]](prev.sparkContext) {
 }
 
 class SampledRDD[T: ClassManifest](
-  prev: RDD[T], withReplacement: Boolean, frac: Double, seed: Int) 
+  prev: RDD[T], withReplacement: Boolean, frac: Double, seed: Int)
 extends RDD[T](prev.sparkContext) {
-  
+
   @transient val splits_ = { val rg = new Random(seed); prev.splits.map(x => new SeededSplit(x, rg.nextInt)) }
 
   override def splits = splits_.asInstanceOf[Array[Split]]
 
   override def preferredLocations(split: Split) = prev.preferredLocations(split.asInstanceOf[SeededSplit].prev)
 
-  override def iterator(splitIn: Split) = { 
+  override def iterator(splitIn: Split) = {
     val split = splitIn.asInstanceOf[SeededSplit]
     val rg = new Random(split.seed);
     // Sampling with replacement (TODO: use reservoir sampling to make this more efficient?)
@@ -213,7 +213,7 @@ extends RDD[T](prev.sparkContext) with Logging {
     else
       prev.preferredLocations(split)
   }
-  
+
   override def iterator(split: Split): Iterator[T] = {
     val key = id + "::" + split.getId()
     logInfo("CachedRDD split key is " + key)
@@ -278,7 +278,7 @@ extends Split {
 class UnionRDD[T: ClassManifest](sc: SparkContext, rdds: Seq[RDD[T]])
 extends RDD[T](sc) {
   @transient val splits_ : Array[Split] = {
-    val splits: Seq[Split] = 
+    val splits: Seq[Split] =
       for (rdd <- rdds; split <- rdd.splits)
         yield new UnionSplit(rdd, split)
     splits.toArray
@@ -289,7 +289,7 @@ extends RDD[T](sc) {
   override def iterator(s: Split): Iterator[T] =
     s.asInstanceOf[UnionSplit[T]].iterator()
 
-  override def preferredLocations(s: Split): Seq[String] = 
+  override def preferredLocations(s: Split): Seq[String] =
     s.asInstanceOf[UnionSplit[T]].preferredLocations()
 }
 
-- 
cgit v1.2.3


From e5fb280ec82072fdd3373136a41f96c9536a1592 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 21:51:25 -0700
Subject: Changed the config files that were included in git to templates which
 are used to create an initial copy of each config file if the user does not
 have one. This way, users won't accidentally commit their changes to config
 files to git.

---
 Makefile                       | 13 ++++++++++---
 conf/java-opts                 |  0
 conf/java-opts.template        |  0
 conf/log4j.properties          |  8 --------
 conf/log4j.properties.template |  8 ++++++++
 conf/spark-env.sh              | 13 -------------
 conf/spark-env.sh.template     | 13 +++++++++++++
 7 files changed, 31 insertions(+), 24 deletions(-)
 delete mode 100644 conf/java-opts
 create mode 100644 conf/java-opts.template
 delete mode 100644 conf/log4j.properties
 create mode 100644 conf/log4j.properties.template
 delete mode 100755 conf/spark-env.sh
 create mode 100755 conf/spark-env.sh.template

diff --git a/Makefile b/Makefile
index febf47f3f4..0486e443a8 100644
--- a/Makefile
+++ b/Makefile
@@ -34,7 +34,9 @@ else
   COMPILER = $(SCALA_HOME)/bin/$(COMPILER_NAME)
 endif
 
-all: scala java
+CONF_FILES = conf/spark-env.sh conf/log4j.properties conf/java-opts
+
+all: scala java conf-files
 
 build/classes:
 	mkdir -p build/classes
@@ -50,7 +52,7 @@ native: java
 
 jar: build/spark.jar build/spark-dep.jar
 
-depjar: build/spark-dep.jar
+dep-jar: build/spark-dep.jar
 
 build/spark.jar: scala java
 	jar cf build/spark.jar -C build/classes spark
@@ -60,6 +62,11 @@ build/spark-dep.jar:
 	cd build/dep &&	for i in $(JARS); do jar xf ../../$$i; done
 	jar cf build/spark-dep.jar -C build/dep .
 
+conf-files: $(CONF_FILES)
+
+$(CONF_FILES): %: %.template
+	if [ ! -e $@ ] ; then cp $^ $@; else touch $^; fi
+
 test: all
 	./alltests
 
@@ -69,4 +76,4 @@ clean:
 	$(MAKE) -C src/native clean
 	rm -rf build
 
-.phony: default all clean scala java native jar depjar
+.phony: default all clean scala java native jar dep-jar conf-files
diff --git a/conf/java-opts b/conf/java-opts
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/conf/java-opts.template b/conf/java-opts.template
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/conf/log4j.properties b/conf/log4j.properties
deleted file mode 100644
index d72dbadc39..0000000000
--- a/conf/log4j.properties
+++ /dev/null
@@ -1,8 +0,0 @@
-# Set everything to be logged to the console
-log4j.rootCategory=INFO, console
-log4j.appender.console=org.apache.log4j.ConsoleAppender
-log4j.appender.console.layout=org.apache.log4j.PatternLayout
-log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
-
-# Ignore messages below warning level from Jetty, because it's a bit verbose
-log4j.logger.org.eclipse.jetty=WARN
diff --git a/conf/log4j.properties.template b/conf/log4j.properties.template
new file mode 100644
index 0000000000..d72dbadc39
--- /dev/null
+++ b/conf/log4j.properties.template
@@ -0,0 +1,8 @@
+# Set everything to be logged to the console
+log4j.rootCategory=INFO, console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+log4j.logger.org.eclipse.jetty=WARN
diff --git a/conf/spark-env.sh b/conf/spark-env.sh
deleted file mode 100755
index 6852b23a34..0000000000
--- a/conf/spark-env.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/env bash
-
-# Set Spark environment variables for your site in this file. Some useful
-# variables to set are:
-# - MESOS_HOME, to point to your Mesos installation
-# - SCALA_HOME, to point to your Scala installation
-# - SPARK_CLASSPATH, to add elements to Spark's classpath
-# - SPARK_JAVA_OPTS, to add JVM options
-# - SPARK_MEM, to change the amount of memory used per node (this should
-#   be in the same format as the JVM's -Xmx option, e.g. 300m or 1g).
-# - SPARK_LIBRARY_PATH, to add extra search paths for native libraries.
-
-
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
new file mode 100755
index 0000000000..6852b23a34
--- /dev/null
+++ b/conf/spark-env.sh.template
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+# Set Spark environment variables for your site in this file. Some useful
+# variables to set are:
+# - MESOS_HOME, to point to your Mesos installation
+# - SCALA_HOME, to point to your Scala installation
+# - SPARK_CLASSPATH, to add elements to Spark's classpath
+# - SPARK_JAVA_OPTS, to add JVM options
+# - SPARK_MEM, to change the amount of memory used per node (this should
+#   be in the same format as the JVM's -Xmx option, e.g. 300m or 1g).
+# - SPARK_LIBRARY_PATH, to add extra search paths for native libraries.
+
+
-- 
cgit v1.2.3


From b940164db38ced59dcd26b91e41f20e5c1f56cc2 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 16 Oct 2010 22:01:05 -0700
Subject: Less hacky way of preventing config files from being overwritten when
 a template file changes

---
 Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 0486e443a8..d724ca89f6 100644
--- a/Makefile
+++ b/Makefile
@@ -64,8 +64,8 @@ build/spark-dep.jar:
 
 conf-files: $(CONF_FILES)
 
-$(CONF_FILES): %: %.template
-	if [ ! -e $@ ] ; then cp $^ $@; else touch $^; fi
+$(CONF_FILES): %: | %.template
+	cp $@.template $@
 
 test: all
 	./alltests
-- 
cgit v1.2.3


From 0e0ec8357011f3daa86616138abea99a621f1913 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 19 Oct 2010 13:58:52 -0700
Subject: Undid some changes that Mosharaf inadvertedly committed to master.

---
 conf/java-opts.template         | 1 -
 conf/spark-env.sh.template      | 2 +-
 src/scala/spark/Broadcast.scala | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/conf/java-opts.template b/conf/java-opts.template
index b61e8163b5..e69de29bb2 100644
--- a/conf/java-opts.template
+++ b/conf/java-opts.template
@@ -1 +0,0 @@
--Dspark.broadcast.masterHostAddress=127.0.0.1 -Dspark.broadcast.masterTrackerPort=11111 -Dspark.broadcast.blockSize=1024 -Dspark.broadcast.maxRetryCount=2 -Dspark.broadcast.serverSocketTimout=50000 -Dspark.broadcast.dualMode=false
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index 77f9cb69b9..6852b23a34 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -10,4 +10,4 @@
 #   be in the same format as the JVM's -Xmx option, e.g. 300m or 1g).
 # - SPARK_LIBRARY_PATH, to add extra search paths for native libraries.
 
-MESOS_HOME=/home/mosharaf/Work/mesos
+
diff --git a/src/scala/spark/Broadcast.scala b/src/scala/spark/Broadcast.scala
index 105aa3483e..df7ab3f19f 100644
--- a/src/scala/spark/Broadcast.scala
+++ b/src/scala/spark/Broadcast.scala
@@ -223,7 +223,7 @@ private object Broadcast {
         // Initialization for CentralizedHDFSBroadcast
         BroadcastCH.initialize 
         // Initialization for ChainedStreamingBroadcast
-        BroadcastCS.initialize (isMaster)
+        // BroadcastCS.initialize (isMaster)
         
         initialized = true
       }
-- 
cgit v1.2.3


From 787faf0d0e288a64ec92f1013526a35525b78084 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 19 Oct 2010 16:07:58 -0700
Subject: Fixed a bug with scheduling of tasks that have no locality
 preferences. These tasks were being subjected to delay scheduling but then
 counted as having been launched on a preferred node. The solution is to have
 a separate queue for them and treat them as preferred during scheduling.

---
 src/scala/spark/SimpleJob.scala | 35 ++++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/src/scala/spark/SimpleJob.scala b/src/scala/spark/SimpleJob.scala
index b15d0522d4..09846ccc34 100644
--- a/src/scala/spark/SimpleJob.scala
+++ b/src/scala/spark/SimpleJob.scala
@@ -51,6 +51,9 @@ extends Job(jobId) with Logging
   // the one that it was launched from, but gets removed from them later.
   val pendingTasksForHost = new HashMap[String, ArrayBuffer[Int]]
 
+  // List containing pending tasks with no locality preferences
+  val pendingTasksWithNoPrefs = new ArrayBuffer[Int]
+
   // List containing all pending tasks (also used as a stack, as above)
   val allPendingTasks = new ArrayBuffer[Int]
 
@@ -66,11 +69,16 @@ extends Job(jobId) with Logging
 
   // Add a task to all the pending-task lists that it should be on.
   def addPendingTask(index: Int) {
-    allPendingTasks += index
-    for (host <- tasks(index).preferredLocations) {
-      val list = pendingTasksForHost.getOrElseUpdate(host, ArrayBuffer())
-      list += index
+    val locations = tasks(index).preferredLocations
+    if (locations.size == 0) {
+      pendingTasksWithNoPrefs += index
+    } else {
+      for (host <- locations) {
+        val list = pendingTasksForHost.getOrElseUpdate(host, ArrayBuffer())
+        list += index
+      }
     }
+    allPendingTasks += index
   }
 
   // Mark the job as finished and wake up any threads waiting on it
@@ -103,6 +111,8 @@ extends Job(jobId) with Logging
 
   // Dequeue a pending task from the given list and return its index.
   // Return None if the list is empty.
+  // This method also cleans up any tasks in the list that have already
+  // been launched, since we want that to happen lazily.
   def findTaskFromList(list: ArrayBuffer[Int]): Option[Int] = {
     while (!list.isEmpty) {
       val index = list.last
@@ -117,11 +127,18 @@ extends Job(jobId) with Logging
   // Dequeue a pending task for a given node and return its index.
   // If localOnly is set to false, allow non-local tasks as well.
   def findTask(host: String, localOnly: Boolean): Option[Int] = {
-    findTaskFromList(getPendingTasksForHost(host)) match {
-      case Some(task) => Some(task)
-      case None =>
-        if (localOnly) None
-        else findTaskFromList(allPendingTasks)
+    val localTask = findTaskFromList(getPendingTasksForHost(host))
+    if (localTask != None) {
+      return localTask
+    }
+    val noPrefTask = findTaskFromList(pendingTasksWithNoPrefs)
+    if (noPrefTask != None) {
+      return noPrefTask
+    }
+    if (!localOnly) {
+      return findTaskFromList(allPendingTasks) // Look for non-local task
+    } else {
+      return None
     }
   }
 
-- 
cgit v1.2.3


From 93a200bc7efd1a867d9cc3759e82327cfdf325b0 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 23 Oct 2010 15:34:03 -0700
Subject: Renamed aggregateSplit() to splitRdd(), plus some style fixes

---
 src/scala/spark/RDD.scala | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index 9dd8bc9dce..9bfc8c9c62 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -13,9 +13,9 @@ import mesos._
 
 import com.google.common.collect.MapMaker
 
+
 @serializable
-abstract class RDD[T: ClassManifest](
-    @transient sc: SparkContext) {
+abstract class RDD[T: ClassManifest](@transient sc: SparkContext) {
   def splits: Array[Split]
   def iterator(split: Split): Iterator[T]
   def preferredLocations(split: Split): Seq[String]
@@ -26,7 +26,6 @@ abstract class RDD[T: ClassManifest](
 
   def map[U: ClassManifest](f: T => U) = new MappedRDD(this, sc.clean(f))
   def filter(f: T => Boolean) = new FilteredRDD(this, sc.clean(f))
-  def aggregateSplit() = new SplitRDD(this)
   def cache() = new CachedRDD(this)
 
   def sample(withReplacement: Boolean, frac: Double, seed: Int) =
@@ -78,14 +77,22 @@ abstract class RDD[T: ClassManifest](
     case _ => throw new UnsupportedOperationException("empty collection")
   }
 
-  def count(): Long =
-    try { map(x => 1L).reduce(_+_) }
-    catch { case e: UnsupportedOperationException => 0L }
+  def count(): Long = {
+    try { 
+      map(x => 1L).reduce(_+_) 
+    } catch { 
+      case e: UnsupportedOperationException => 0L // No elements in RDD
+    }
+  }
 
   def union(other: RDD[T]) = new UnionRDD(sc, Array(this, other))
-  def cartesian[U: ClassManifest](other: RDD[U]) = new CartesianRDD(sc, this, other)
 
   def ++(other: RDD[T]) = this.union(other)
+
+  def splitRdd() = new SplitRDD(this)
+
+  def cartesian[U: ClassManifest](other: RDD[U]) =
+    new CartesianRDD(sc, this, other)
 }
 
 @serializable
-- 
cgit v1.2.3


From a481e237616906120921f1c42d40112db4d5fb0f Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 23 Oct 2010 17:54:25 -0700
Subject: Made caching pluggable and added soft reference and weak reference
 caches.

---
 src/scala/spark/Broadcast.scala          |  7 +---
 src/scala/spark/Cache.scala              | 63 ++++++++++++++++++++++++++++++++
 src/scala/spark/Executor.scala           |  5 ++-
 src/scala/spark/RDD.scala                |  5 +--
 src/scala/spark/SoftReferenceCache.scala | 13 +++++++
 src/scala/spark/SparkContext.scala       |  3 +-
 src/scala/spark/WeakReferenceCache.scala | 14 +++++++
 7 files changed, 98 insertions(+), 12 deletions(-)
 create mode 100644 src/scala/spark/Cache.scala
 create mode 100644 src/scala/spark/SoftReferenceCache.scala
 create mode 100644 src/scala/spark/WeakReferenceCache.scala

diff --git a/src/scala/spark/Broadcast.scala b/src/scala/spark/Broadcast.scala
index df7ab3f19f..5089dca82e 100644
--- a/src/scala/spark/Broadcast.scala
+++ b/src/scala/spark/Broadcast.scala
@@ -4,8 +4,6 @@ import java.io._
 import java.net._
 import java.util.{UUID, PriorityQueue, Comparator}
 
-import com.google.common.collect.MapMaker
-
 import java.util.concurrent.{Executors, ExecutorService}
 
 import scala.actors.Actor
@@ -232,8 +230,7 @@ private object Broadcast {
 }
 
 private object BroadcastCS extends Logging {
-  val values = new MapMaker ().softValues ().makeMap[UUID, Any]
-  // val valueInfos = new MapMaker ().softValues ().makeMap[UUID, Any]  
+  val values = Cache.newKeySpace()
 
   // private var valueToPort = Map[UUID, Int] ()
 
@@ -739,7 +736,7 @@ private object BroadcastCS extends Logging {
 }
 
 private object BroadcastCH extends Logging {
-  val values = new MapMaker ().softValues ().makeMap[UUID, Any]
+  val values = Cache.newKeySpace()
 
   private var initialized = false
 
diff --git a/src/scala/spark/Cache.scala b/src/scala/spark/Cache.scala
new file mode 100644
index 0000000000..9887520758
--- /dev/null
+++ b/src/scala/spark/Cache.scala
@@ -0,0 +1,63 @@
+package spark
+
+import java.util.concurrent.atomic.AtomicLong
+
+
+/**
+ * An interface for caches in Spark, to allow for multiple implementations.
+ * Caches are used to store both partitions of cached RDDs and broadcast
+ * variables on Spark executors.
+ *
+ * A single Cache instance gets created on each machine and is shared by all
+ * caches (i.e. both the RDD split cache and the broadcast variable cache),
+ * to enable global replacement policies. However, because these several
+ * independent modules all perform caching, it is important to give them
+ * separate key namespaces, so that an RDD and a broadcast variable (for
+ * example) do not use the same key. For this purpose, Cache has the
+ * notion of KeySpaces. Each client module must first ask for a KeySpace,
+ * and then call get() and put() on that space using its own keys.
+ * This abstract class handles the creation of key spaces, so that subclasses
+ * need only deal with keys that are unique across modules.
+ */
+abstract class Cache {
+  private val nextKeySpaceId = new AtomicLong(0)
+  private def newKeySpaceId() = nextKeySpaceId.getAndIncrement()
+
+  def newKeySpace() = new KeySpace(this, newKeySpaceId())
+
+  def get(key: Any): Any
+  def put(key: Any, value: Any): Unit
+}
+
+
+/**
+ * A key namespace in a Cache.
+ */
+class KeySpace(cache: Cache, id: Long) {
+  def get(key: Any): Any = cache.get((id, key))
+  def put(key: Any, value: Any): Unit = cache.put((id, key), value)
+}
+
+
+/**
+ * The Cache object maintains a global Cache instance, of the type specified
+ * by the spark.cache.class property.
+ */
+object Cache {
+  private var instance: Cache = null
+
+  def initialize() {
+    val cacheClass = System.getProperty("spark.cache.class",
+      "spark.SoftReferenceCache")
+    instance = Class.forName(cacheClass).newInstance().asInstanceOf[Cache]
+  }
+
+  def getInstance(): Cache = {
+    if (instance == null) {
+      throw new SparkException("Cache.getInstance called before initialize")
+    }
+    instance
+  }
+
+  def newKeySpace(): KeySpace = getInstance().newKeySpace()
+}
diff --git a/src/scala/spark/Executor.scala b/src/scala/spark/Executor.scala
index e47d3757b6..b4d023b428 100644
--- a/src/scala/spark/Executor.scala
+++ b/src/scala/spark/Executor.scala
@@ -21,8 +21,9 @@ class Executor extends mesos.Executor with Logging {
     val props = Utils.deserialize[Array[(String, String)]](args.getData)
     for ((key, value) <- props)
       System.setProperty(key, value)
-    
-    // Initialize broadcast system (uses some properties read above)
+
+    // Initialize cache and broadcast system (uses some properties read above)
+    Cache.initialize()
     Broadcast.initialize(false)
     
     // Create our ClassLoader (using spark properties) and set it on this thread
diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index 9bfc8c9c62..3519114306 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -1,7 +1,6 @@
 package spark
 
 import java.util.concurrent.atomic.AtomicLong
-import java.util.concurrent.ConcurrentHashMap
 import java.util.HashSet
 import java.util.Random
 
@@ -11,8 +10,6 @@ import scala.collection.mutable.HashMap
 
 import mesos._
 
-import com.google.common.collect.MapMaker
-
 
 @serializable
 abstract class RDD[T: ClassManifest](@transient sc: SparkContext) {
@@ -267,7 +264,7 @@ private object CachedRDD {
   def newId() = nextId.getAndIncrement()
 
   // Stores map results for various splits locally (on workers)
-  val cache = new MapMaker().softValues().makeMap[String, AnyRef]()
+  val cache = Cache.newKeySpace()
 
   // Remembers which splits are currently being loaded (on workers)
   val loading = new HashSet[String]
diff --git a/src/scala/spark/SoftReferenceCache.scala b/src/scala/spark/SoftReferenceCache.scala
new file mode 100644
index 0000000000..e84aa57efa
--- /dev/null
+++ b/src/scala/spark/SoftReferenceCache.scala
@@ -0,0 +1,13 @@
+package spark
+
+import com.google.common.collect.MapMaker
+
+/**
+ * An implementation of Cache that uses soft references.
+ */
+class SoftReferenceCache extends Cache {
+  val map = new MapMaker().softValues().makeMap[Any, Any]()
+
+  override def get(key: Any): Any = map.get(key)
+  override def put(key: Any, value: Any) = map.put(key, value)
+}
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index e85b26e238..24fe0e9bbb 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -30,8 +30,9 @@ extends Logging {
 
   private val isLocal = scheduler.isInstanceOf[LocalScheduler]
 
-  // Start the scheduler and the broadcast system
+  // Start the scheduler, the cache and the broadcast system
   scheduler.start()
+  Cache.initialize()
   Broadcast.initialize(true)
 
   // Methods for creating RDDs
diff --git a/src/scala/spark/WeakReferenceCache.scala b/src/scala/spark/WeakReferenceCache.scala
new file mode 100644
index 0000000000..ddca065454
--- /dev/null
+++ b/src/scala/spark/WeakReferenceCache.scala
@@ -0,0 +1,14 @@
+package spark
+
+import com.google.common.collect.MapMaker
+
+/**
+ * An implementation of Cache that uses weak references.
+ */
+class WeakReferenceCache extends Cache {
+  val map = new MapMaker().weakValues().makeMap[Any, Any]()
+
+  override def get(key: Any): Any = map.get(key)
+  override def put(key: Any, value: Any) = map.put(key, value)
+}
+
-- 
cgit v1.2.3


From edf86fdb277eaa4992bd0b90b572f3c0e89e5975 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 24 Oct 2010 18:01:35 -0700
Subject: Added SizeEstimator class for use by caches

---
 src/scala/spark/SizeEstimator.scala | 160 ++++++++++++++++++++++++++++++++++++
 1 file changed, 160 insertions(+)
 create mode 100644 src/scala/spark/SizeEstimator.scala

diff --git a/src/scala/spark/SizeEstimator.scala b/src/scala/spark/SizeEstimator.scala
new file mode 100644
index 0000000000..12dd19d704
--- /dev/null
+++ b/src/scala/spark/SizeEstimator.scala
@@ -0,0 +1,160 @@
+package spark
+
+import java.lang.reflect.Field
+import java.lang.reflect.Modifier
+import java.lang.reflect.{Array => JArray}
+import java.util.IdentityHashMap
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.mutable.ArrayBuffer
+
+
+/**
+ * Estimates the sizes of Java objects (number of bytes of memory they occupy),
+ * for use in memory-aware caches.
+ *
+ * Based on the following JavaWorld article:
+ * http://www.javaworld.com/javaworld/javaqa/2003-12/02-qa-1226-sizeof.html
+ */
+object SizeEstimator {
+  private val OBJECT_SIZE  = 8 // Minimum size of a java.lang.Object
+  private val POINTER_SIZE = 4 // Size of an object reference
+
+  // Sizes of primitive types
+  private val BYTE_SIZE    = 1
+  private val BOOLEAN_SIZE = 1
+  private val CHAR_SIZE    = 2
+  private val SHORT_SIZE   = 2
+  private val INT_SIZE     = 4
+  private val LONG_SIZE    = 8
+  private val FLOAT_SIZE   = 4
+  private val DOUBLE_SIZE  = 8
+
+  // A cache of ClassInfo objects for each class
+  private val classInfos = new ConcurrentHashMap[Class[_], ClassInfo]
+  classInfos.put(classOf[Object], new ClassInfo(OBJECT_SIZE, Nil))
+
+  /**
+   * The state of an ongoing size estimation. Contains a stack of objects
+   * to visit as well as an IdentityHashMap of visited objects, and provides
+   * utility methods for enqueueing new objects to visit.
+   */
+  private class SearchState {
+    val visited = new IdentityHashMap[AnyRef, AnyRef]
+    val stack = new ArrayBuffer[AnyRef]
+    var size = 0L
+
+    def enqueue(obj: AnyRef) {
+      if (obj != null && !visited.containsKey(obj)) {
+        visited.put(obj, null)
+        stack += obj
+      }
+    }
+
+    def isFinished(): Boolean = stack.isEmpty
+
+    def dequeue(): AnyRef = {
+      val elem = stack.last
+      stack.trimEnd(1)
+      return elem
+    }
+  }
+
+  /**
+   * Cached information about each class. We remember two things: the
+   * "shell size" of the class (size of all non-static fields plus the
+   * java.lang.Object size), and any fields that are pointers to objects.
+   */
+  private class ClassInfo(
+    val shellSize: Long,
+    val pointerFields: List[Field]) {}
+
+  def estimate(obj: AnyRef): Long = {
+    val state = new SearchState
+    state.enqueue(obj)
+    while (!state.isFinished) {
+      visitSingleObject(state.dequeue(), state)
+    }
+    return state.size
+  }
+
+  private def visitSingleObject(obj: AnyRef, state: SearchState) {
+    val cls = obj.getClass
+    if (cls.isArray) {
+      visitArray(obj, cls, state)
+    } else {
+      val classInfo = getClassInfo(cls)
+      state.size += classInfo.shellSize
+      for (field <- classInfo.pointerFields) {
+        state.enqueue(field.get(obj))
+      }
+    }
+  }
+
+  private def visitArray(array: AnyRef, cls: Class[_], state: SearchState) {
+    val length = JArray.getLength(array)
+    val elementClass = cls.getComponentType
+    if (elementClass.isPrimitive) {
+      state.size += length * primitiveSize(elementClass)
+    } else {
+      state.size += length * POINTER_SIZE
+      for (i <- 0 until length) {
+        state.enqueue(JArray.get(array, i))
+      }
+    }
+  }
+
+  private def primitiveSize(cls: Class[_]): Long = {
+    if (cls == classOf[Byte])
+      BYTE_SIZE
+    else if (cls == classOf[Boolean])
+      BOOLEAN_SIZE
+    else if (cls == classOf[Char])
+      CHAR_SIZE
+    else if (cls == classOf[Short])
+      SHORT_SIZE
+    else if (cls == classOf[Int])
+      INT_SIZE
+    else if (cls == classOf[Long])
+      LONG_SIZE
+    else if (cls == classOf[Float])
+      FLOAT_SIZE
+    else if (cls == classOf[Double])
+      DOUBLE_SIZE
+    else throw new IllegalArgumentException(
+      "Non-primitive class " + cls + " passed to primitiveSize()")
+  }
+
+  /**
+   * Get or compute the ClassInfo for a given class.
+   */
+  private def getClassInfo(cls: Class[_]): ClassInfo = {
+    // Check whether we've already cached a ClassInfo for this class
+    val info = classInfos.get(cls)
+    if (info != null) {
+      return info
+    }
+    
+    val parent = getClassInfo(cls.getSuperclass)
+    var shellSize = parent.shellSize
+    var pointerFields = parent.pointerFields
+
+    for (field <- cls.getDeclaredFields) {
+      if (!Modifier.isStatic(field.getModifiers)) {
+        val fieldClass = field.getType
+        if (fieldClass.isPrimitive) {
+          shellSize += primitiveSize(fieldClass)
+        } else {
+          field.setAccessible(true) // Enable future get()'s on this field
+          shellSize += POINTER_SIZE
+          pointerFields = field :: pointerFields
+        }
+      }
+    }
+
+    // Create and cache a new ClassInfo
+    val newInfo = new ClassInfo(shellSize, pointerFields)
+    classInfos.put(cls, newInfo)
+    return newInfo
+  }
+}
-- 
cgit v1.2.3


From dd7c5d8e34c743b9766542f0645745de12b0ac26 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sun, 24 Oct 2010 19:14:35 -0700
Subject: Added initial attempt at a BoundedMemoryCache

---
 src/scala/spark/BoundedMemoryCache.scala | 69 ++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 src/scala/spark/BoundedMemoryCache.scala

diff --git a/src/scala/spark/BoundedMemoryCache.scala b/src/scala/spark/BoundedMemoryCache.scala
new file mode 100644
index 0000000000..19d9bebfe5
--- /dev/null
+++ b/src/scala/spark/BoundedMemoryCache.scala
@@ -0,0 +1,69 @@
+package spark
+
+import java.util.LinkedHashMap
+
+/**
+ * An implementation of Cache that estimates the sizes of its entries and
+ * attempts to limit its total memory usage to a fraction of the JVM heap.
+ * Objects' sizes are estimated using SizeEstimator, which has limitations;
+ * most notably, we will overestimate total memory used if some cache
+ * entries have pointers to a shared object. Nonetheless, this Cache should
+ * work well when most of the space is used by arrays of primitives or of
+ * simple classes.
+ */
+class BoundedMemoryCache extends Cache with Logging {
+  private val maxBytes: Long = getMaxBytes()
+  logInfo("BoundedMemoryCache.maxBytes = " + maxBytes)
+
+  private var currentBytes = 0L
+  private val map = new LinkedHashMap[Any, Entry](32, 0.75f, true)
+
+  // An entry in our map; stores a cached object and its size in bytes
+  class Entry(val value: Any, val size: Long) {}
+
+  override def get(key: Any): Any = {
+    synchronized {
+      val entry = map.get(key)
+      if (entry != null) entry.value else null
+    }
+  }
+
+  override def put(key: Any, value: Any) {
+    logInfo("Asked to add key " + key)
+    val startTime = System.currentTimeMillis
+    val size = SizeEstimator.estimate(value.asInstanceOf[AnyRef])
+    val timeTaken = System.currentTimeMillis - startTime
+    logInfo("Estimated size for key %s is %d".format(key, size))
+    logInfo("Size estimation for key %s took %d ms".format(key, timeTaken))
+    synchronized {
+      ensureFreeSpace(size)
+      logInfo("Adding key " + key)
+      map.put(key, new Entry(value, size))
+      currentBytes += size
+      logInfo("Number of entries is now " + map.size)
+    }
+  }
+
+  private def getMaxBytes(): Long = {
+    val memoryFractionToUse = System.getProperty(
+      "spark.boundedMemoryCache.memoryFraction", "0.75").toDouble
+    (Runtime.getRuntime.totalMemory * memoryFractionToUse).toLong
+  }
+
+  /**
+   * Remove least recently used entries from the map until at least space
+   * bytes are free. Assumes that a lock is held on the BoundedMemoryCache.
+   */
+  private def ensureFreeSpace(space: Long) {
+    logInfo("ensureFreeSpace(%d) called with curBytes=%d, maxBytes=%d".format(
+      space, currentBytes, maxBytes))
+    val iter = map.entrySet.iterator
+    while (maxBytes - currentBytes < space && iter.hasNext) {
+      val mapEntry = iter.next()
+      logInfo("Dropping key %s of size %d to make space".format(
+        mapEntry.getKey, mapEntry.getValue.size))
+      currentBytes -= mapEntry.getValue.size
+      iter.remove()
+    }
+  }
+}
-- 
cgit v1.2.3


From 6f93baa463bba3a2954738e7ae2d51e93d4bf9d9 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 2 Nov 2010 12:51:22 -0700
Subject: 'Running on Mesos' test is now only run when MESOS_HOME is set

---
 src/test/spark/repl/ReplSuite.scala | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/test/spark/repl/ReplSuite.scala b/src/test/spark/repl/ReplSuite.scala
index 9f41c77ead..8b38cde85f 100644
--- a/src/test/spark/repl/ReplSuite.scala
+++ b/src/test/spark/repl/ReplSuite.scala
@@ -102,8 +102,8 @@ class ReplSuite extends FunSuite {
     assertContains("res2: Array[Int] = Array(5, 0, 0, 0, 0)", output)
   }
   
-  test ("running on Mesos") {
-    if (System.getenv("MESOS_HOME") != null) {
+  if (System.getenv("MESOS_HOME") != null) {
+    test ("running on Mesos") {
       val output = runInterpreter("localquiet", """
         var v = 7
         def getV() = v
@@ -122,8 +122,6 @@ class ReplSuite extends FunSuite {
       assertContains("res1: Int = 100", output)
       assertContains("res2: Array[Int] = Array(0, 0, 0, 0, 0)", output)
       assertContains("res4: Array[Int] = Array(0, 0, 0, 0, 0)", output)
-    } else {
-      info("Skipping \"running on Mesos\" test because MESOS_HOME is not set");
     }
   }
 }
-- 
cgit v1.2.3


From 648f42933af9158a6109c168064caa977da76cb2 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 2 Nov 2010 12:53:38 -0700
Subject: Made alltests write test output as XML in build/test_results

---
 alltests | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/alltests b/alltests
index 5d1c4cd3b1..cd11604855 100755
--- a/alltests
+++ b/alltests
@@ -3,4 +3,9 @@ FWDIR="`dirname $0`"
 if [ "x$SPARK_MEM" == "x" ]; then
   export SPARK_MEM=500m
 fi
-$FWDIR/run org.scalatest.tools.Runner -p $FWDIR/build/classes -o $@
+RESULTS_DIR="$FWDIR/build/test_results"
+if [ -d $RESULTS_DIR ]; then
+  rm -r $RESULTS_DIR
+fi
+mkdir -p $RESULTS_DIR
+$FWDIR/run org.scalatest.tools.Runner -p $FWDIR/build/classes -u $RESULTS_DIR -o $@
-- 
cgit v1.2.3


From 820dac5afebc4fd604c02ba74d0bef7d948287c5 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Wed, 3 Nov 2010 21:27:24 -0700
Subject: Initial work towards a simple HDFS-based shuffle.

---
 src/scala/spark/DfsShuffle.scala | 146 +++++++++++++++++++++++++++++++++++++++
 src/scala/spark/RDD.scala        |   8 +++
 src/scala/spark/Split.scala      |   2 +-
 3 files changed, 155 insertions(+), 1 deletion(-)
 create mode 100644 src/scala/spark/DfsShuffle.scala

diff --git a/src/scala/spark/DfsShuffle.scala b/src/scala/spark/DfsShuffle.scala
new file mode 100644
index 0000000000..bc26afde33
--- /dev/null
+++ b/src/scala/spark/DfsShuffle.scala
@@ -0,0 +1,146 @@
+package spark
+
+import java.io.{EOFException, ObjectInputStream, ObjectOutputStream}
+import java.net.URI
+import java.util.UUID
+
+import scala.collection.mutable.HashMap
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, Path, RawLocalFileSystem}
+
+import mesos.SlaveOffer
+
+
+/**
+ * An RDD that captures the splits of a parent RDD and gives them unique indexes.
+ * This is useful for a variety of shuffle implementations.
+ */
+class NumberedSplitRDD[T: ClassManifest](prev: RDD[T])
+extends RDD[(Int, Iterator[T])](prev.sparkContext) {
+  @transient val splits_ = {
+    prev.splits.zipWithIndex.map {
+      case (s, i) => new NumberedSplitRDDSplit(s, i): Split
+    }.toArray
+  }
+
+  override def splits = splits_
+
+  override def preferredLocations(split: Split) = {
+    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
+    prev.preferredLocations(nsplit.prev)
+  }
+
+  override def iterator(split: Split) = {
+    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
+    Iterator((nsplit.index, prev.iterator(nsplit.prev)))
+  }
+
+  override def taskStarted(split: Split, slot: SlaveOffer) = {
+    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
+    prev.taskStarted(nsplit.prev, slot)
+  }
+}
+
+
+class NumberedSplitRDDSplit(val prev: Split, val index: Int) extends Split {
+  override def getId() = "NumberedSplitRDDSplit(%d)".format(index)
+}
+
+
+/**
+ * A simple implementation of shuffle using a distributed file system.
+ */
+@serializable
+class DfsShuffle[K, V, C](
+  rdd: RDD[(K, V)],
+  numOutputSplits: Int,
+  createCombiner: () => C,
+  mergeValue: (C, V) => C,
+  mergeCombiners: (C, C) => C)
+extends Logging
+{
+  def compute(): RDD[(K, C)] = {
+    val sc = rdd.sparkContext
+    val dir = DfsShuffle.newTempDirectory()
+    logInfo("Intermediate data directory: " + dir)
+
+    val numberedSplitRdd = new NumberedSplitRDD(rdd)
+    val numInputSplits = numberedSplitRdd.splits.size
+
+    // Run a parallel foreach to write the intermediate data files
+    numberedSplitRdd.foreach((pair: (Int, Iterator[(K, V)])) => {
+      val myIndex = pair._1
+      val myIterator = pair._2
+      val combiners = new HashMap[K, C] {
+        override def default(key: K) = createCombiner()
+      }
+      for ((k, v) <- myIterator) {
+        combiners(k) = mergeValue(combiners(k), v)
+      }
+      val fs = DfsShuffle.getFileSystem()
+      val outputStreams = (0 until numOutputSplits).map(i => {
+        val path = new Path(dir, "intermediate-%d-%d".format(myIndex, i))
+        new ObjectOutputStream(fs.create(path, 1.toShort))
+      }).toArray
+      for ((k, c) <- combiners) {
+        val bucket = k.hashCode % numOutputSplits
+        outputStreams(bucket).writeObject((k, c))
+      }
+      outputStreams.foreach(_.close())
+    })
+
+    // Return an RDD that does each of the merges for a given partition
+    return sc.parallelize(0 until numOutputSplits).flatMap((myIndex: Int) => {
+      val combiners = new HashMap[K, C] {
+        override def default(key: K) = createCombiner()
+      }
+      val fs = DfsShuffle.getFileSystem()
+      for (i <- 0 until numInputSplits) {
+        val path = new Path(dir, "intermediate-%d-%d".format(i, myIndex))
+        val inputStream = new ObjectInputStream(fs.open(path))
+        try {
+          while (true) {
+            val pair = inputStream.readObject().asInstanceOf[(K, C)]
+            combiners(pair._1) = mergeCombiners(combiners(pair._1), pair._2)
+          }
+        } catch {
+          case e: EOFException => {}
+        }
+      }
+      combiners
+    })
+  }
+}
+
+
+object DfsShuffle {
+  var initialized = false
+  var fileSystem: FileSystem = null
+
+  private def initializeIfNeeded() = synchronized {
+    if (!initialized) {
+      val bufferSize = System.getProperty("spark.buffer.size", "65536").toInt
+      val dfs = System.getProperty("spark.dfs", "file:///")
+      val conf = new Configuration()
+      conf.setInt("io.file.buffer.size", bufferSize)
+      conf.setInt("dfs.replication", 1)
+      fileSystem = FileSystem.get(new URI(dfs), conf)
+    }
+    initialized = true
+  }
+
+  def getFileSystem(): FileSystem = {
+    initializeIfNeeded()
+    return fileSystem
+  }
+
+  def newTempDirectory(): String = {
+    val fs = getFileSystem()
+    val workDir = System.getProperty("spark.dfs.workdir", "/tmp")
+    val uuid = UUID.randomUUID()
+    val path = workDir + "/shuffle-" + uuid
+    fs.mkdirs(new Path(path))
+    return path
+  }
+}
diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index 3519114306..e48bb82ec3 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -343,4 +343,12 @@ extends RDD[Pair[T, U]](sc) {
     }
     rdd.map(pair => HashMap(pair)).reduce(mergeMaps)
   }
+
+  def combineByKey[C](numSplits: Int,
+                      createCombiner: () => C,
+                      mergeValue: (C, V) => C,
+                      mergeCombiners: (C, C) => C)
+      : RDD[(K, C)] = {
+    new DfsShuffle(rdd, numSplits, createCombiner, mergeValue, mergeCombiners).compute()
+  }
 }
diff --git a/src/scala/spark/Split.scala b/src/scala/spark/Split.scala
index 0f7a21354d..116cd16370 100644
--- a/src/scala/spark/Split.scala
+++ b/src/scala/spark/Split.scala
@@ -3,7 +3,7 @@ package spark
 /**
  * A partition of an RDD.
  */
-trait Split {
+@serializable trait Split {
   /**
    * Get a unique ID for this split which can be used, for example, to
    * set up caches based on it. The ID should stay the same if we serialize
-- 
cgit v1.2.3


From 44530c310b33cb750922d836d35e56b3979b1382 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Wed, 3 Nov 2010 22:45:44 -0700
Subject: Made DFS shuffle's "reduce tasks" fetch inputs in a random order so
 they don't all hit the same nodes at the same time.

---
 src/scala/spark/DfsShuffle.scala |  8 ++++----
 src/scala/spark/Utils.scala      | 18 +++++++++++++++++-
 2 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/scala/spark/DfsShuffle.scala b/src/scala/spark/DfsShuffle.scala
index bc26afde33..a100ddf05b 100644
--- a/src/scala/spark/DfsShuffle.scala
+++ b/src/scala/spark/DfsShuffle.scala
@@ -80,8 +80,8 @@ extends Logging
       }
       val fs = DfsShuffle.getFileSystem()
       val outputStreams = (0 until numOutputSplits).map(i => {
-        val path = new Path(dir, "intermediate-%d-%d".format(myIndex, i))
-        new ObjectOutputStream(fs.create(path, 1.toShort))
+        val path = new Path(dir, "%d-to-%d".format(myIndex, i))
+        new ObjectOutputStream(fs.create(path, true))
       }).toArray
       for ((k, c) <- combiners) {
         val bucket = k.hashCode % numOutputSplits
@@ -96,8 +96,8 @@ extends Logging
         override def default(key: K) = createCombiner()
       }
       val fs = DfsShuffle.getFileSystem()
-      for (i <- 0 until numInputSplits) {
-        val path = new Path(dir, "intermediate-%d-%d".format(i, myIndex))
+      for (i <- Utils.shuffle(0 until numInputSplits)) {
+        val path = new Path(dir, "%d-to-%d".format(i, myIndex))
         val inputStream = new ObjectInputStream(fs.open(path))
         try {
           while (true) {
diff --git a/src/scala/spark/Utils.scala b/src/scala/spark/Utils.scala
index 9d300d229a..1b2fe50c0e 100644
--- a/src/scala/spark/Utils.scala
+++ b/src/scala/spark/Utils.scala
@@ -4,13 +4,14 @@ import java.io._
 import java.util.UUID
 
 import scala.collection.mutable.ArrayBuffer
+import scala.util.Random
 
 /**
  * Various utility methods used by Spark.
  */
 object Utils {
   def serialize[T](o: T): Array[Byte] = {
-    val bos = new ByteArrayOutputStream
+    val bos = new ByteArrayOutputStream()
     val oos = new ObjectOutputStream(bos)
     oos.writeObject(o)
     oos.close
@@ -95,4 +96,19 @@ object Utils {
       out.close()
     }
   }
+
+  // Shuffle the elements of a collection into a random order, returning the
+  // result in a new collection. Unlike scala.util.Random.shuffle, this method
+  // uses a local random number generator, avoiding inter-thread contention.
+  def shuffle[T](seq: Seq[T]): Seq[T] = {
+    val buf = ArrayBuffer(seq: _*)
+    val rand = new Random()
+    for (i <- (buf.size - 1) to 1 by -1) {
+      val j = rand.nextInt(i)
+      val tmp = buf(j)
+      buf(j) = buf(i)
+      buf(i) = tmp
+    }
+    buf
+  }
 }
-- 
cgit v1.2.3


From d947cb97789acd04b7f02410490fa3f91c2a7bca Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Wed, 3 Nov 2010 22:52:41 -0700
Subject: Fixed a bug with negative hashcodes

---
 src/scala/spark/DfsShuffle.scala | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/scala/spark/DfsShuffle.scala b/src/scala/spark/DfsShuffle.scala
index a100ddf05b..e751e1bd75 100644
--- a/src/scala/spark/DfsShuffle.scala
+++ b/src/scala/spark/DfsShuffle.scala
@@ -84,7 +84,10 @@ extends Logging
         new ObjectOutputStream(fs.create(path, true))
       }).toArray
       for ((k, c) <- combiners) {
-        val bucket = k.hashCode % numOutputSplits
+        var bucket = k.hashCode % numOutputSplits
+        if (bucket < 0) {
+          bucket += numOutputSplits
+        }
         outputStreams(bucket).writeObject((k, c))
       }
       outputStreams.foreach(_.close())
-- 
cgit v1.2.3


From 72ec298cd4b66896cbed2c6d30dc68f210839245 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Wed, 3 Nov 2010 23:51:11 -0700
Subject: Added reduceByKey, groupByKey and join operations based on combine,
 as well as versions of the shuffle operations that set the number of splits
 automatically.

---
 src/scala/spark/DfsShuffle.scala   | 96 ++++++++++++++++++++------------------
 src/scala/spark/HadoopFile.scala   |  2 +-
 src/scala/spark/RDD.scala          | 66 +++++++++++++++++++++++---
 src/scala/spark/SparkContext.scala |  7 ++-
 4 files changed, 115 insertions(+), 56 deletions(-)

diff --git a/src/scala/spark/DfsShuffle.scala b/src/scala/spark/DfsShuffle.scala
index e751e1bd75..10f77a824a 100644
--- a/src/scala/spark/DfsShuffle.scala
+++ b/src/scala/spark/DfsShuffle.scala
@@ -12,50 +12,16 @@ import org.apache.hadoop.fs.{FileSystem, Path, RawLocalFileSystem}
 import mesos.SlaveOffer
 
 
-/**
- * An RDD that captures the splits of a parent RDD and gives them unique indexes.
- * This is useful for a variety of shuffle implementations.
- */
-class NumberedSplitRDD[T: ClassManifest](prev: RDD[T])
-extends RDD[(Int, Iterator[T])](prev.sparkContext) {
-  @transient val splits_ = {
-    prev.splits.zipWithIndex.map {
-      case (s, i) => new NumberedSplitRDDSplit(s, i): Split
-    }.toArray
-  }
-
-  override def splits = splits_
-
-  override def preferredLocations(split: Split) = {
-    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
-    prev.preferredLocations(nsplit.prev)
-  }
-
-  override def iterator(split: Split) = {
-    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
-    Iterator((nsplit.index, prev.iterator(nsplit.prev)))
-  }
-
-  override def taskStarted(split: Split, slot: SlaveOffer) = {
-    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
-    prev.taskStarted(nsplit.prev, slot)
-  }
-}
-
-
-class NumberedSplitRDDSplit(val prev: Split, val index: Int) extends Split {
-  override def getId() = "NumberedSplitRDDSplit(%d)".format(index)
-}
-
-
 /**
  * A simple implementation of shuffle using a distributed file system.
+ *
+ * TODO: Add support for compression when spark.compress is set to true.
  */
 @serializable
 class DfsShuffle[K, V, C](
   rdd: RDD[(K, V)],
   numOutputSplits: Int,
-  createCombiner: () => C,
+  createCombiner: V => C,
   mergeValue: (C, V) => C,
   mergeCombiners: (C, C) => C)
 extends Logging
@@ -72,11 +38,12 @@ extends Logging
     numberedSplitRdd.foreach((pair: (Int, Iterator[(K, V)])) => {
       val myIndex = pair._1
       val myIterator = pair._2
-      val combiners = new HashMap[K, C] {
-        override def default(key: K) = createCombiner()
-      }
+      val combiners = new HashMap[K, C]
       for ((k, v) <- myIterator) {
-        combiners(k) = mergeValue(combiners(k), v)
+        combiners(k) = combiners.get(k) match {
+          case Some(c) => mergeValue(c, v)
+          case None => createCombiner(v)
+        }
       }
       val fs = DfsShuffle.getFileSystem()
       val outputStreams = (0 until numOutputSplits).map(i => {
@@ -95,17 +62,18 @@ extends Logging
 
     // Return an RDD that does each of the merges for a given partition
     return sc.parallelize(0 until numOutputSplits).flatMap((myIndex: Int) => {
-      val combiners = new HashMap[K, C] {
-        override def default(key: K) = createCombiner()
-      }
+      val combiners = new HashMap[K, C]
       val fs = DfsShuffle.getFileSystem()
       for (i <- Utils.shuffle(0 until numInputSplits)) {
         val path = new Path(dir, "%d-to-%d".format(i, myIndex))
         val inputStream = new ObjectInputStream(fs.open(path))
         try {
           while (true) {
-            val pair = inputStream.readObject().asInstanceOf[(K, C)]
-            combiners(pair._1) = mergeCombiners(combiners(pair._1), pair._2)
+            val (k, c) = inputStream.readObject().asInstanceOf[(K, C)]
+            combiners(k) = combiners.get(k) match {
+              case Some(oldC) => mergeCombiners(oldC, c)
+              case None => c
+            }
           }
         } catch {
           case e: EOFException => {}
@@ -147,3 +115,39 @@ object DfsShuffle {
     return path
   }
 }
+
+
+/**
+ * An RDD that captures the splits of a parent RDD and gives them unique indexes.
+ * This is useful for a variety of shuffle implementations.
+ */
+class NumberedSplitRDD[T: ClassManifest](prev: RDD[T])
+extends RDD[(Int, Iterator[T])](prev.sparkContext) {
+  @transient val splits_ = {
+    prev.splits.zipWithIndex.map {
+      case (s, i) => new NumberedSplitRDDSplit(s, i): Split
+    }.toArray
+  }
+
+  override def splits = splits_
+
+  override def preferredLocations(split: Split) = {
+    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
+    prev.preferredLocations(nsplit.prev)
+  }
+
+  override def iterator(split: Split) = {
+    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
+    Iterator((nsplit.index, prev.iterator(nsplit.prev)))
+  }
+
+  override def taskStarted(split: Split, slot: SlaveOffer) = {
+    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
+    prev.taskStarted(nsplit.prev, slot)
+  }
+}
+
+
+class NumberedSplitRDDSplit(val prev: Split, val index: Int) extends Split {
+  override def getId() = "NumberedSplitRDDSplit(%d)".format(index)
+}
diff --git a/src/scala/spark/HadoopFile.scala b/src/scala/spark/HadoopFile.scala
index 5746c433ee..a63c9d8a94 100644
--- a/src/scala/spark/HadoopFile.scala
+++ b/src/scala/spark/HadoopFile.scala
@@ -38,7 +38,7 @@ extends RDD[(K, V)](sc) {
     val conf = new JobConf()
     FileInputFormat.setInputPaths(conf, path)
     val inputFormat = createInputFormat(conf)
-    val inputSplits = inputFormat.getSplits(conf, sc.scheduler.numCores)
+    val inputSplits = inputFormat.getSplits(conf, sc.numCores)
     inputSplits.map(x => new HadoopSplit(x): Split).toArray
   }
 
diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index e48bb82ec3..3c4aa3c3c9 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -330,8 +330,8 @@ extends RDD[Pair[T, U]](sc) {
   }
 }
 
-@serializable class PairRDDExtras[K, V](rdd: RDD[(K, V)]) {
-  def reduceByKey(func: (V, V) => V): Map[K, V] = {
+@serializable class PairRDDExtras[K, V](self: RDD[(K, V)]) {
+  def reduceByKeyToDriver(func: (V, V) => V): Map[K, V] = {
     def mergeMaps(m1: HashMap[K, V], m2: HashMap[K, V]): HashMap[K, V] = {
       for ((k, v) <- m2) {
         m1.get(k) match {
@@ -341,14 +341,66 @@ extends RDD[Pair[T, U]](sc) {
       }
       return m1
     }
-    rdd.map(pair => HashMap(pair)).reduce(mergeMaps)
+    self.map(pair => HashMap(pair)).reduce(mergeMaps)
   }
 
-  def combineByKey[C](numSplits: Int,
-                      createCombiner: () => C,
+  def combineByKey[C](createCombiner: V => C,
+                      mergeValue: (C, V) => C,
+                      mergeCombiners: (C, C) => C,
+                      numSplits: Int)
+  : RDD[(K, C)] = {
+    new DfsShuffle(self, numSplits, createCombiner, mergeValue, mergeCombiners).compute()
+  }
+
+  def reduceByKey(func: (V, V) => V, numSplits: Int): RDD[(K, V)] = {
+    combineByKey[V]((v: V) => v, func, func, numSplits)
+  }
+
+  def groupByKey(numSplits: Int): RDD[(K, Seq[V])] = {
+    def createCombiner(v: V) = ArrayBuffer(v)
+    def mergeValue(buf: ArrayBuffer[V], v: V) = buf += v
+    def mergeCombiners(b1: ArrayBuffer[V], b2: ArrayBuffer[V]) = b1 ++= b2
+    val bufs = combineByKey[ArrayBuffer[V]](
+      createCombiner _, mergeValue _, mergeCombiners _, numSplits)
+    bufs.asInstanceOf[RDD[(K, Seq[V])]]
+  }
+
+  def join[W](other: RDD[(K, W)], numSplits: Int): RDD[(K, (V, W))] = {
+    val vs: RDD[(K, Either[V, W])] = self.map { case (k, v) => (k, Left(v)) }
+    val ws: RDD[(K, Either[V, W])] = other.map { case (k, w) => (k, Right(w)) }
+    new PairRDDExtras(vs ++ ws).groupByKey(numSplits).flatMap {
+      case (k, seq) => {
+        val vbuf = new ArrayBuffer[V]
+        val wbuf = new ArrayBuffer[W]
+        seq.foreach(_ match {
+          case Left(v) => vbuf += v
+          case Right(w) => wbuf += w
+        })
+        for (v <- vbuf; w <- wbuf) yield (k, (v, w))
+      }
+    }
+  }
+
+  def combineByKey[C](createCombiner: V => C,
                       mergeValue: (C, V) => C,
                       mergeCombiners: (C, C) => C)
-      : RDD[(K, C)] = {
-    new DfsShuffle(rdd, numSplits, createCombiner, mergeValue, mergeCombiners).compute()
+  : RDD[(K, C)] = {
+    combineByKey(createCombiner, mergeValue, mergeCombiners, numCores)
+  }
+
+  def reduceByKey(func: (V, V) => V): RDD[(K, V)] = {
+    reduceByKey(func, numCores)
+  }
+
+  def groupByKey(): RDD[(K, Seq[V])] = {
+    groupByKey(numCores)
   }
+
+  def join[W](other: RDD[(K, W)]): RDD[(K, (V, W))] = {
+    join(other, numCores)
+  }
+
+  def numCores = self.sparkContext.numCores
+
+  def collectAsMap(): Map[K, V] = HashMap(self.collect(): _*)
 }
diff --git a/src/scala/spark/SparkContext.scala b/src/scala/spark/SparkContext.scala
index 24fe0e9bbb..02e80c7756 100644
--- a/src/scala/spark/SparkContext.scala
+++ b/src/scala/spark/SparkContext.scala
@@ -14,7 +14,7 @@ class SparkContext(
   val sparkHome: String = null,
   val jars: Seq[String] = Nil)
 extends Logging {
-  private[spark] var scheduler: Scheduler = {
+  private var scheduler: Scheduler = {
     // Regular expression used for local[N] master format
     val LOCAL_N_REGEX = """local\[([0-9]+)\]""".r
     master match {
@@ -41,7 +41,7 @@ extends Logging {
     new ParallelArray[T](this, seq, numSlices)
 
   def parallelize[T: ClassManifest](seq: Seq[T]): RDD[T] =
-    parallelize(seq, scheduler.numCores)
+    parallelize(seq, numCores)
 
   def textFile(path: String): RDD[String] =
     new HadoopTextFile(this, path)
@@ -147,6 +147,9 @@ extends Logging {
     ClosureCleaner.clean(f)
     return f
   }
+
+  // Get the number of cores available to run tasks (as reported by Scheduler)
+  def numCores = scheduler.numCores
 }
 
 
-- 
cgit v1.2.3


From 96f0be935ab2be424e56e5cfa2dfae117ec2a2b9 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Wed, 3 Nov 2010 23:58:53 -0700
Subject: Added groupBy function in RDD

---
 src/scala/spark/RDD.scala | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index 3c4aa3c3c9..9650ea9d8b 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -8,6 +8,8 @@ import scala.collection.mutable.ArrayBuffer
 import scala.collection.mutable.Map
 import scala.collection.mutable.HashMap
 
+import SparkContext._
+
 import mesos._
 
 
@@ -90,6 +92,12 @@ abstract class RDD[T: ClassManifest](@transient sc: SparkContext) {
 
   def cartesian[U: ClassManifest](other: RDD[U]) =
     new CartesianRDD(sc, this, other)
+
+  def groupBy[K](func: T => K, numSplits: Int): RDD[(K, Seq[T])] =
+    this.map(t => (func(t), t)).groupByKey(numSplits)
+
+  def groupBy[K](func: T => K): RDD[(K, Seq[T])] =
+    groupBy[K](func, sc.numCores)
 }
 
 @serializable
@@ -368,7 +376,7 @@ extends RDD[Pair[T, U]](sc) {
   def join[W](other: RDD[(K, W)], numSplits: Int): RDD[(K, (V, W))] = {
     val vs: RDD[(K, Either[V, W])] = self.map { case (k, v) => (k, Left(v)) }
     val ws: RDD[(K, Either[V, W])] = other.map { case (k, w) => (k, Right(w)) }
-    new PairRDDExtras(vs ++ ws).groupByKey(numSplits).flatMap {
+    (vs ++ ws).groupByKey(numSplits).flatMap {
       case (k, seq) => {
         val vbuf = new ArrayBuffer[V]
         val wbuf = new ArrayBuffer[W]
-- 
cgit v1.2.3


From 4cc0984b43095aaebcb565bcb0ac0a71b6cef7ca Mon Sep 17 00:00:00 2001
From: root <root@domU-12-31-39-15-18-A2.compute-1.internal>
Date: Thu, 4 Nov 2010 21:34:55 +0000
Subject: Fixed a small bug in DFS shuffle -- the number of reduce tasks was
 not being set based on numOutputSplits

---
 src/scala/spark/DfsShuffle.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/scala/spark/DfsShuffle.scala b/src/scala/spark/DfsShuffle.scala
index 10f77a824a..256bf4ea9c 100644
--- a/src/scala/spark/DfsShuffle.scala
+++ b/src/scala/spark/DfsShuffle.scala
@@ -61,7 +61,8 @@ extends Logging
     })
 
     // Return an RDD that does each of the merges for a given partition
-    return sc.parallelize(0 until numOutputSplits).flatMap((myIndex: Int) => {
+    val indexes = sc.parallelize(0 until numOutputSplits, numOutputSplits)
+    return indexes.flatMap((myIndex: Int) => {
       val combiners = new HashMap[K, C]
       val fs = DfsShuffle.getFileSystem()
       for (i <- Utils.shuffle(0 until numInputSplits)) {
-- 
cgit v1.2.3


From d984b8ab23f0921c4dc145f3efe1f13b172292bb Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Thu, 4 Nov 2010 14:39:55 -0700
Subject: Properly set the number of output splits in DFS shuffle

---
 src/scala/spark/DfsShuffle.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/scala/spark/DfsShuffle.scala b/src/scala/spark/DfsShuffle.scala
index 10f77a824a..256bf4ea9c 100644
--- a/src/scala/spark/DfsShuffle.scala
+++ b/src/scala/spark/DfsShuffle.scala
@@ -61,7 +61,8 @@ extends Logging
     })
 
     // Return an RDD that does each of the merges for a given partition
-    return sc.parallelize(0 until numOutputSplits).flatMap((myIndex: Int) => {
+    val indexes = sc.parallelize(0 until numOutputSplits, numOutputSplits)
+    return indexes.flatMap((myIndex: Int) => {
       val combiners = new HashMap[K, C]
       val fs = DfsShuffle.getFileSystem()
       for (i <- Utils.shuffle(0 until numInputSplits)) {
-- 
cgit v1.2.3


From d9ea6d69a5171d397bb9d79c9dc4517f292f6a44 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 6 Nov 2010 10:53:57 -0700
Subject: Create output files one by one instead of at the same time in the map
 phase of DfsShuffle.

---
 src/scala/spark/DfsShuffle.scala | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/scala/spark/DfsShuffle.scala b/src/scala/spark/DfsShuffle.scala
index 256bf4ea9c..2ef0321a63 100644
--- a/src/scala/spark/DfsShuffle.scala
+++ b/src/scala/spark/DfsShuffle.scala
@@ -38,26 +38,25 @@ extends Logging
     numberedSplitRdd.foreach((pair: (Int, Iterator[(K, V)])) => {
       val myIndex = pair._1
       val myIterator = pair._2
-      val combiners = new HashMap[K, C]
+      val buckets = Array.tabulate(numOutputSplits)(_ => new HashMap[K, C])
       for ((k, v) <- myIterator) {
-        combiners(k) = combiners.get(k) match {
+        var bucketId = k.hashCode % numOutputSplits
+        if (bucketId < 0) { // Fix bucket ID if hash code was negative
+          bucketId += numOutputSplits
+        }
+        val bucket = buckets(bucketId)
+        bucket(k) = bucket.get(k) match {
           case Some(c) => mergeValue(c, v)
           case None => createCombiner(v)
         }
       }
       val fs = DfsShuffle.getFileSystem()
-      val outputStreams = (0 until numOutputSplits).map(i => {
+      for (i <- 0 until numOutputSplits) {
         val path = new Path(dir, "%d-to-%d".format(myIndex, i))
-        new ObjectOutputStream(fs.create(path, true))
-      }).toArray
-      for ((k, c) <- combiners) {
-        var bucket = k.hashCode % numOutputSplits
-        if (bucket < 0) {
-          bucket += numOutputSplits
-        }
-        outputStreams(bucket).writeObject((k, c))
+        val out = new ObjectOutputStream(fs.create(path, true))
+        buckets(i).foreach(pair => out.writeObject(pair))
+        out.close()
       }
-      outputStreams.foreach(_.close())
     })
 
     // Return an RDD that does each of the merges for a given partition
-- 
cgit v1.2.3


From 9d3f05a990beacadea00c68f9cf7ff82f93b0a44 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 8 Nov 2010 00:45:02 -0800
Subject: Made shuffle algorithm pluggable and added LocalFileShuffle.

---
 src/scala/spark/DfsShuffle.scala       |  71 +++++----------
 src/scala/spark/LocalFileShuffle.scala | 152 +++++++++++++++++++++++++++++++++
 src/scala/spark/NumberedSplitRDD.scala |  42 +++++++++
 src/scala/spark/RDD.scala              |   8 +-
 src/scala/spark/Shuffle.scala          |  15 ++++
 src/scala/spark/Utils.scala            |   5 +-
 6 files changed, 237 insertions(+), 56 deletions(-)
 create mode 100644 src/scala/spark/LocalFileShuffle.scala
 create mode 100644 src/scala/spark/NumberedSplitRDD.scala
 create mode 100644 src/scala/spark/Shuffle.scala

diff --git a/src/scala/spark/DfsShuffle.scala b/src/scala/spark/DfsShuffle.scala
index 2ef0321a63..7a42bf2d06 100644
--- a/src/scala/spark/DfsShuffle.scala
+++ b/src/scala/spark/DfsShuffle.scala
@@ -9,8 +9,6 @@ import scala.collection.mutable.HashMap
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path, RawLocalFileSystem}
 
-import mesos.SlaveOffer
-
 
 /**
  * A simple implementation of shuffle using a distributed file system.
@@ -18,20 +16,19 @@ import mesos.SlaveOffer
  * TODO: Add support for compression when spark.compress is set to true.
  */
 @serializable
-class DfsShuffle[K, V, C](
-  rdd: RDD[(K, V)],
-  numOutputSplits: Int,
-  createCombiner: V => C,
-  mergeValue: (C, V) => C,
-  mergeCombiners: (C, C) => C)
-extends Logging
-{
-  def compute(): RDD[(K, C)] = {
-    val sc = rdd.sparkContext
+class DfsShuffle[K, V, C] extends Shuffle[K, V, C] with Logging {
+  override def compute(input: RDD[(K, V)],
+                       numOutputSplits: Int,
+                       createCombiner: V => C,
+                       mergeValue: (C, V) => C,
+                       mergeCombiners: (C, C) => C)
+  : RDD[(K, C)] =
+  {
+    val sc = input.sparkContext
     val dir = DfsShuffle.newTempDirectory()
     logInfo("Intermediate data directory: " + dir)
 
-    val numberedSplitRdd = new NumberedSplitRDD(rdd)
+    val numberedSplitRdd = new NumberedSplitRDD(input)
     val numInputSplits = numberedSplitRdd.splits.size
 
     // Run a parallel foreach to write the intermediate data files
@@ -78,6 +75,7 @@ extends Logging
         } catch {
           case e: EOFException => {}
         }
+        inputStream.close()
       }
       combiners
     })
@@ -85,9 +83,14 @@ extends Logging
 }
 
 
+/**
+ * Companion object of DfsShuffle; responsible for initializing a Hadoop
+ * FileSystem object based on the spark.dfs property and generating names
+ * for temporary directories.
+ */
 object DfsShuffle {
-  var initialized = false
-  var fileSystem: FileSystem = null
+  private var initialized = false
+  private var fileSystem: FileSystem = null
 
   private def initializeIfNeeded() = synchronized {
     if (!initialized) {
@@ -97,8 +100,8 @@ object DfsShuffle {
       conf.setInt("io.file.buffer.size", bufferSize)
       conf.setInt("dfs.replication", 1)
       fileSystem = FileSystem.get(new URI(dfs), conf)
+      initialized = true
     }
-    initialized = true
   }
 
   def getFileSystem(): FileSystem = {
@@ -115,39 +118,3 @@ object DfsShuffle {
     return path
   }
 }
-
-
-/**
- * An RDD that captures the splits of a parent RDD and gives them unique indexes.
- * This is useful for a variety of shuffle implementations.
- */
-class NumberedSplitRDD[T: ClassManifest](prev: RDD[T])
-extends RDD[(Int, Iterator[T])](prev.sparkContext) {
-  @transient val splits_ = {
-    prev.splits.zipWithIndex.map {
-      case (s, i) => new NumberedSplitRDDSplit(s, i): Split
-    }.toArray
-  }
-
-  override def splits = splits_
-
-  override def preferredLocations(split: Split) = {
-    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
-    prev.preferredLocations(nsplit.prev)
-  }
-
-  override def iterator(split: Split) = {
-    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
-    Iterator((nsplit.index, prev.iterator(nsplit.prev)))
-  }
-
-  override def taskStarted(split: Split, slot: SlaveOffer) = {
-    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
-    prev.taskStarted(nsplit.prev, slot)
-  }
-}
-
-
-class NumberedSplitRDDSplit(val prev: Split, val index: Int) extends Split {
-  override def getId() = "NumberedSplitRDDSplit(%d)".format(index)
-}
diff --git a/src/scala/spark/LocalFileShuffle.scala b/src/scala/spark/LocalFileShuffle.scala
new file mode 100644
index 0000000000..db6ae322f1
--- /dev/null
+++ b/src/scala/spark/LocalFileShuffle.scala
@@ -0,0 +1,152 @@
+package spark
+
+import java.io._
+import java.net.URL
+import java.util.UUID
+import java.util.concurrent.atomic.AtomicLong
+
+import scala.collection.mutable.{ArrayBuffer, HashMap}
+
+import spark.SparkContext._
+
+
+/**
+ * A simple implementation of shuffle using local files served through HTTP.
+ *
+ * TODO: Add support for compression when spark.compress is set to true.
+ */
+@serializable
+class LocalFileShuffle[K, V, C] extends Shuffle[K, V, C] with Logging {
+  override def compute(input: RDD[(K, V)],
+                       numOutputSplits: Int,
+                       createCombiner: V => C,
+                       mergeValue: (C, V) => C,
+                       mergeCombiners: (C, C) => C)
+  : RDD[(K, C)] =
+  {
+    val sc = input.sparkContext
+    val shuffleId = LocalFileShuffle.newShuffleId()
+    logInfo("Shuffle ID: " + shuffleId)
+
+    val splitRdd = new NumberedSplitRDD(input)
+    val numInputSplits = splitRdd.splits.size
+
+    // Run a parallel map and collect to write the intermediate data files,
+    // returning a hash table of inputSplitId -> serverUri pairs
+    val outputLocs = splitRdd.map((pair: (Int, Iterator[(K, V)])) => {
+      val myIndex = pair._1
+      val myIterator = pair._2
+      val buckets = Array.tabulate(numOutputSplits)(_ => new HashMap[K, C])
+      for ((k, v) <- myIterator) {
+        var bucketId = k.hashCode % numOutputSplits
+        if (bucketId < 0) { // Fix bucket ID if hash code was negative
+          bucketId += numOutputSplits
+        }
+        val bucket = buckets(bucketId)
+        bucket(k) = bucket.get(k) match {
+          case Some(c) => mergeValue(c, v)
+          case None => createCombiner(v)
+        }
+      }
+      for (i <- 0 until numOutputSplits) {
+        val file = LocalFileShuffle.getOutputFile(shuffleId, myIndex, i)
+        val out = new ObjectOutputStream(new FileOutputStream(file))
+        buckets(i).foreach(pair => out.writeObject(pair))
+        out.close()
+      }
+      (myIndex, LocalFileShuffle.serverUri)
+    }).collectAsMap()
+
+    // Build a hashmap from server URI to list of splits (to facillitate
+    // fetching all the URIs on a server within a single connection)
+    val splitsByUri = new HashMap[String, ArrayBuffer[Int]]
+    for ((inputId, serverUri) <- outputLocs) {
+      splitsByUri.getOrElseUpdate(serverUri, ArrayBuffer()) += inputId
+    }
+
+    // TODO: Could broadcast splitsByUri
+
+    // Return an RDD that does each of the merges for a given partition
+    val indexes = sc.parallelize(0 until numOutputSplits, numOutputSplits)
+    return indexes.flatMap((myId: Int) => {
+      val combiners = new HashMap[K, C]
+      for ((serverUri, inputIds) <- Utils.shuffle(splitsByUri)) {
+        for (i <- inputIds) {
+          val url = "%s/shuffle/%d/%d/%d".format(serverUri, shuffleId, i, myId)
+          val inputStream = new ObjectInputStream(new URL(url).openStream())
+          try {
+            while (true) {
+              val (k, c) = inputStream.readObject().asInstanceOf[(K, C)]
+              combiners(k) = combiners.get(k) match {
+                case Some(oldC) => mergeCombiners(oldC, c)
+                case None => c
+              }
+            }
+          } catch {
+            case e: EOFException => {}
+          }
+          inputStream.close()
+        }
+      }
+      combiners
+    })
+  }
+}
+
+
+object LocalFileShuffle extends Logging {
+  private var initialized = false
+  private var nextShuffleId = new AtomicLong(0)
+
+  // Variables initialized by initializeIfNeeded()
+  private var localDir: File = null
+  private var server: HttpServer = null
+  private var serverUri: String = null
+
+  private def initializeIfNeeded() = synchronized {
+    if (!initialized) {
+      val localDirRoot = System.getProperty("spark.local.dir", "/tmp")
+      var tries = 0
+      var foundLocalDir = false
+      while (!foundLocalDir && tries < 10) {
+        tries += 1
+        try {
+          localDir = new File(localDirRoot, "spark-local-" + UUID.randomUUID())
+          if (!localDir.exists()) {
+            localDir.mkdirs()
+            foundLocalDir = true
+          }
+        } catch {
+          case e: Exception =>
+            logWarning("Attempt " + tries + " to create local dir failed", e)
+        }
+      }
+      if (!foundLocalDir) {
+        logError("Failed 10 attempts to create local dir in " + localDirRoot)
+        System.exit(1)
+      }
+      logInfo("Local dir: " + localDir)
+      server = new HttpServer(localDir)
+      server.start()
+      serverUri = server.uri
+      initialized = true
+    }
+  }
+
+  def getOutputFile(shuffleId: Long, inputId: Int, outputId: Int): File = {
+    initializeIfNeeded()
+    val dir = new File(localDir, "shuffle/" + shuffleId + "/" + inputId)
+    dir.mkdirs()
+    val file = new File(dir, "" + outputId)
+    return file
+  }
+
+  def getServerUri(): String = {
+    initializeIfNeeded()
+    serverUri
+  }
+
+  def newShuffleId(): Long = {
+    nextShuffleId.getAndIncrement()
+  }
+}
diff --git a/src/scala/spark/NumberedSplitRDD.scala b/src/scala/spark/NumberedSplitRDD.scala
new file mode 100644
index 0000000000..7b12210d84
--- /dev/null
+++ b/src/scala/spark/NumberedSplitRDD.scala
@@ -0,0 +1,42 @@
+package spark
+
+import mesos.SlaveOffer
+
+
+/**
+ * An RDD that takes the splits of a parent RDD and gives them unique indexes.
+ * This is useful for a variety of shuffle implementations.
+ */
+class NumberedSplitRDD[T: ClassManifest](prev: RDD[T])
+extends RDD[(Int, Iterator[T])](prev.sparkContext) {
+  @transient val splits_ = {
+    prev.splits.zipWithIndex.map {
+      case (s, i) => new NumberedSplitRDDSplit(s, i): Split
+    }.toArray
+  }
+
+  override def splits = splits_
+
+  override def preferredLocations(split: Split) = {
+    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
+    prev.preferredLocations(nsplit.prev)
+  }
+
+  override def iterator(split: Split) = {
+    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
+    Iterator((nsplit.index, prev.iterator(nsplit.prev)))
+  }
+
+  override def taskStarted(split: Split, slot: SlaveOffer) = {
+    val nsplit = split.asInstanceOf[NumberedSplitRDDSplit]
+    prev.taskStarted(nsplit.prev, slot)
+  }
+}
+
+
+/**
+ * A split in a NumberedSplitRDD.
+ */
+class NumberedSplitRDDSplit(val prev: Split, val index: Int) extends Split {
+  override def getId() = "NumberedSplitRDDSplit(%d)".format(index)
+}
diff --git a/src/scala/spark/RDD.scala b/src/scala/spark/RDD.scala
index 9650ea9d8b..bac59319a0 100644
--- a/src/scala/spark/RDD.scala
+++ b/src/scala/spark/RDD.scala
@@ -356,8 +356,12 @@ extends RDD[Pair[T, U]](sc) {
                       mergeValue: (C, V) => C,
                       mergeCombiners: (C, C) => C,
                       numSplits: Int)
-  : RDD[(K, C)] = {
-    new DfsShuffle(self, numSplits, createCombiner, mergeValue, mergeCombiners).compute()
+  : RDD[(K, C)] =
+  {
+    val shufClass = Class.forName(System.getProperty(
+      "spark.shuffle.class", "spark.DfsShuffle"))
+    val shuf = shufClass.newInstance().asInstanceOf[Shuffle[K, V, C]]
+    shuf.compute(self, numSplits, createCombiner, mergeValue, mergeCombiners)
   }
 
   def reduceByKey(func: (V, V) => V, numSplits: Int): RDD[(K, V)] = {
diff --git a/src/scala/spark/Shuffle.scala b/src/scala/spark/Shuffle.scala
new file mode 100644
index 0000000000..4c5649b537
--- /dev/null
+++ b/src/scala/spark/Shuffle.scala
@@ -0,0 +1,15 @@
+package spark
+
+/**
+ * A trait for shuffle system. Given an input RDD and combiner functions
+ * for PairRDDExtras.combineByKey(), returns an output RDD.
+ */
+@serializable
+trait Shuffle[K, V, C] {
+  def compute(input: RDD[(K, V)],
+              numOutputSplits: Int,
+              createCombiner: V => C,
+              mergeValue: (C, V) => C,
+              mergeCombiners: (C, C) => C)
+  : RDD[(K, C)]
+}
diff --git a/src/scala/spark/Utils.scala b/src/scala/spark/Utils.scala
index 1b2fe50c0e..025472633b 100644
--- a/src/scala/spark/Utils.scala
+++ b/src/scala/spark/Utils.scala
@@ -100,8 +100,9 @@ object Utils {
   // Shuffle the elements of a collection into a random order, returning the
   // result in a new collection. Unlike scala.util.Random.shuffle, this method
   // uses a local random number generator, avoiding inter-thread contention.
-  def shuffle[T](seq: Seq[T]): Seq[T] = {
-    val buf = ArrayBuffer(seq: _*)
+  def shuffle[T](seq: TraversableOnce[T]): Seq[T] = {
+    val buf = new ArrayBuffer[T]()
+    buf ++= seq
     val rand = new Random()
     for (i <- (buf.size - 1) to 1 by -1) {
       val j = rand.nextInt(i)
-- 
cgit v1.2.3


From 504f839c65b2e8d174a4925c74889fb215cc4809 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Mon, 8 Nov 2010 08:49:42 -0800
Subject: Removed unnecessary collectAsMap

---
 src/scala/spark/LocalFileShuffle.scala | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/scala/spark/LocalFileShuffle.scala b/src/scala/spark/LocalFileShuffle.scala
index db6ae322f1..b5b5e7267d 100644
--- a/src/scala/spark/LocalFileShuffle.scala
+++ b/src/scala/spark/LocalFileShuffle.scala
@@ -7,8 +7,6 @@ import java.util.concurrent.atomic.AtomicLong
 
 import scala.collection.mutable.{ArrayBuffer, HashMap}
 
-import spark.SparkContext._
-
 
 /**
  * A simple implementation of shuffle using local files served through HTTP.
@@ -32,7 +30,7 @@ class LocalFileShuffle[K, V, C] extends Shuffle[K, V, C] with Logging {
     val numInputSplits = splitRdd.splits.size
 
     // Run a parallel map and collect to write the intermediate data files,
-    // returning a hash table of inputSplitId -> serverUri pairs
+    // returning a list of inputSplitId -> serverUri pairs
     val outputLocs = splitRdd.map((pair: (Int, Iterator[(K, V)])) => {
       val myIndex = pair._1
       val myIterator = pair._2
@@ -55,7 +53,7 @@ class LocalFileShuffle[K, V, C] extends Shuffle[K, V, C] with Logging {
         out.close()
       }
       (myIndex, LocalFileShuffle.serverUri)
-    }).collectAsMap()
+    }).collect()
 
     // Build a hashmap from server URI to list of splits (to facillitate
     // fetching all the URIs on a server within a single connection)
-- 
cgit v1.2.3


From 7b25ab87af80276d3b9c8aca162a05be0233852a Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Tue, 9 Nov 2010 13:46:30 -0800
Subject: Added options for using an external HTTP server with LocalFileShuffle

---
 src/scala/spark/HttpServer.scala       | 13 +------------
 src/scala/spark/LocalFileShuffle.scala | 35 +++++++++++++++++++++++++++-------
 src/scala/spark/Utils.scala            | 12 ++++++++++++
 3 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/src/scala/spark/HttpServer.scala b/src/scala/spark/HttpServer.scala
index d5bdd245bb..d2a663ac1f 100644
--- a/src/scala/spark/HttpServer.scala
+++ b/src/scala/spark/HttpServer.scala
@@ -61,18 +61,7 @@ class HttpServer(resourceBase: File) extends Logging {
     if (server == null) {
       throw new ServerStateException("Server is not started")
     } else {
-      return "http://" + getLocalIpAddress + ":" + port
+      return "http://" + Utils.localIpAddress + ":" + port
     }
   }
-
-  /**
-   * Get the local host's IP address in dotted-quad format (e.g. 1.2.3.4)
-   */
-  private def getLocalIpAddress: String = {
-    // Get local IP as an array of four bytes
-    val bytes = InetAddress.getLocalHost().getAddress()
-    // Convert the bytes to ints (keeping in mind that they may be negative)
-    // and join them into a string
-    return bytes.map(b => (b.toInt + 256) % 256).mkString(".")
-  }
 }
diff --git a/src/scala/spark/LocalFileShuffle.scala b/src/scala/spark/LocalFileShuffle.scala
index b5b5e7267d..367599cfb4 100644
--- a/src/scala/spark/LocalFileShuffle.scala
+++ b/src/scala/spark/LocalFileShuffle.scala
@@ -97,19 +97,24 @@ object LocalFileShuffle extends Logging {
   private var nextShuffleId = new AtomicLong(0)
 
   // Variables initialized by initializeIfNeeded()
-  private var localDir: File = null
+  private var shuffleDir: File = null
   private var server: HttpServer = null
   private var serverUri: String = null
 
   private def initializeIfNeeded() = synchronized {
     if (!initialized) {
+      // TODO: localDir should be created by some mechanism common to Spark
+      // so that it can be shared among shuffle, broadcast, etc
       val localDirRoot = System.getProperty("spark.local.dir", "/tmp")
       var tries = 0
       var foundLocalDir = false
+      var localDir: File = null
+      var localDirUuid: UUID = null
       while (!foundLocalDir && tries < 10) {
         tries += 1
         try {
-          localDir = new File(localDirRoot, "spark-local-" + UUID.randomUUID())
+          localDirUuid = UUID.randomUUID()
+          localDir = new File(localDirRoot, "spark-local-" + localDirUuid)
           if (!localDir.exists()) {
             localDir.mkdirs()
             foundLocalDir = true
@@ -123,17 +128,33 @@ object LocalFileShuffle extends Logging {
         logError("Failed 10 attempts to create local dir in " + localDirRoot)
         System.exit(1)
       }
-      logInfo("Local dir: " + localDir)
-      server = new HttpServer(localDir)
-      server.start()
-      serverUri = server.uri
+      shuffleDir = new File(localDir, "shuffle")
+      shuffleDir.mkdirs()
+      logInfo("Shuffle dir: " + shuffleDir)
+      val extServerPort = System.getProperty(
+        "spark.localFileShuffle.external.server.port", "-1").toInt
+      if (extServerPort != -1) {
+        // We're using an external HTTP server; set URI relative to its root
+        var extServerPath = System.getProperty(
+          "spark.localFileShuffle.external.server.path", "")
+        if (extServerPath != "" && !extServerPath.endsWith("/")) {
+          extServerPath += "/"
+        }
+        serverUri = "http://%s:%d/%s/spark-local-%s".format(
+          Utils.localIpAddress, extServerPort, extServerPath, localDirUuid)
+      } else {
+        // Create our own server
+        server = new HttpServer(localDir)
+        server.start()
+        serverUri = server.uri
+      }
       initialized = true
     }
   }
 
   def getOutputFile(shuffleId: Long, inputId: Int, outputId: Int): File = {
     initializeIfNeeded()
-    val dir = new File(localDir, "shuffle/" + shuffleId + "/" + inputId)
+    val dir = new File(shuffleDir, shuffleId + "/" + inputId)
     dir.mkdirs()
     val file = new File(dir, "" + outputId)
     return file
diff --git a/src/scala/spark/Utils.scala b/src/scala/spark/Utils.scala
index 025472633b..e333dd9c91 100644
--- a/src/scala/spark/Utils.scala
+++ b/src/scala/spark/Utils.scala
@@ -1,6 +1,7 @@
 package spark
 
 import java.io._
+import java.net.InetAddress
 import java.util.UUID
 
 import scala.collection.mutable.ArrayBuffer
@@ -112,4 +113,15 @@ object Utils {
     }
     buf
   }
+
+  /**
+   * Get the local host's IP address in dotted-quad format (e.g. 1.2.3.4)
+   */
+  def localIpAddress(): String = {
+    // Get local IP as an array of four bytes
+    val bytes = InetAddress.getLocalHost().getAddress()
+    // Convert the bytes to ints (keeping in mind that they may be negative)
+    // and join them into a string
+    return bytes.map(b => (b.toInt + 256) % 256).mkString(".")
+  }
 }
-- 
cgit v1.2.3


From d0a99665551ed6ecbe29583983198e27beb7200e Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 12 Nov 2010 16:12:14 -0800
Subject: Unit tests for shuffle operations. Fixes #33.

---
 src/test/spark/ShuffleSuite.scala | 119 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 src/test/spark/ShuffleSuite.scala

diff --git a/src/test/spark/ShuffleSuite.scala b/src/test/spark/ShuffleSuite.scala
new file mode 100644
index 0000000000..2898bd09c8
--- /dev/null
+++ b/src/test/spark/ShuffleSuite.scala
@@ -0,0 +1,119 @@
+package spark
+
+import org.scalatest.FunSuite
+import org.scalatest.prop.Checkers
+import org.scalacheck.Arbitrary._
+import org.scalacheck.Gen
+import org.scalacheck.Prop._
+
+import SparkContext._
+
+class ShuffleSuite extends FunSuite {
+  test("groupByKey") {
+    val sc = new SparkContext("local", "test")
+    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)))
+    val groups = pairs.groupByKey().collect()
+    assert(groups.size === 2)
+    val valuesFor1 = groups.find(_._1 == 1).get._2
+    assert(valuesFor1.toList.sorted === List(1, 2, 3))
+    val valuesFor2 = groups.find(_._1 == 2).get._2
+    assert(valuesFor2.toList.sorted === List(1))
+  }
+
+  test("groupByKey with duplicates") {
+    val sc = new SparkContext("local", "test")
+    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
+    val groups = pairs.groupByKey().collect()
+    assert(groups.size === 2)
+    val valuesFor1 = groups.find(_._1 == 1).get._2
+    assert(valuesFor1.toList.sorted === List(1, 1, 2, 3))
+    val valuesFor2 = groups.find(_._1 == 2).get._2
+    assert(valuesFor2.toList.sorted === List(1))
+  }
+
+  test("groupByKey with many output partitions") {
+    val sc = new SparkContext("local", "test")
+    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)))
+    val groups = pairs.groupByKey(10).collect()
+    assert(groups.size === 2)
+    val valuesFor1 = groups.find(_._1 == 1).get._2
+    assert(valuesFor1.toList.sorted === List(1, 2, 3))
+    val valuesFor2 = groups.find(_._1 == 2).get._2
+    assert(valuesFor2.toList.sorted === List(1))
+  }
+
+  test("reduceByKey") {
+    val sc = new SparkContext("local", "test")
+    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
+    val sums = pairs.reduceByKey(_+_).collect()
+    assert(sums.toSet === Set((1, 7), (2, 1)))
+  }
+
+  test("reduceByKey with collectAsMap") {
+    val sc = new SparkContext("local", "test")
+    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
+    val sums = pairs.reduceByKey(_+_).collectAsMap()
+    assert(sums.size === 2)
+    assert(sums(1) === 7)
+    assert(sums(2) === 1)
+  }
+
+  test("reduceByKey with many output partitons") {
+    val sc = new SparkContext("local", "test")
+    val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
+    val sums = pairs.reduceByKey(_+_, 10).collect()
+    assert(sums.toSet === Set((1, 7), (2, 1)))
+  }
+
+  test("join") {
+    val sc = new SparkContext("local", "test")
+    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
+    val joined = rdd1.join(rdd2).collect()
+    assert(joined.size === 4)
+    assert(joined.toSet === Set(
+      (1, (1, 'x')),
+      (1, (2, 'x')),
+      (2, (1, 'y')),
+      (2, (1, 'z'))
+    ))
+  }
+
+  test("join all-to-all") {
+    val sc = new SparkContext("local", "test")
+    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (1, 3)))
+    val rdd2 = sc.parallelize(Array((1, 'x'), (1, 'y')))
+    val joined = rdd1.join(rdd2).collect()
+    assert(joined.size === 6)
+    assert(joined.toSet === Set(
+      (1, (1, 'x')),
+      (1, (1, 'y')),
+      (1, (2, 'x')),
+      (1, (2, 'y')),
+      (1, (3, 'x')),
+      (1, (3, 'y'))
+    ))
+  }
+
+  test("join with no matches") {
+    val sc = new SparkContext("local", "test")
+    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd2 = sc.parallelize(Array((4, 'x'), (5, 'y'), (5, 'z'), (6, 'w')))
+    val joined = rdd1.join(rdd2).collect()
+    assert(joined.size === 0)
+  }
+
+  test("join with many output partitions") {
+    val sc = new SparkContext("local", "test")
+    val rdd1 = sc.parallelize(Array((1, 1), (1, 2), (2, 1), (3, 1)))
+    val rdd2 = sc.parallelize(Array((1, 'x'), (2, 'y'), (2, 'z'), (4, 'w')))
+    val joined = rdd1.join(rdd2, 10).collect()
+    assert(joined.size === 4)
+    assert(joined.toSet === Set(
+      (1, (1, 'x')),
+      (1, (2, 'x')),
+      (2, (1, 'y')),
+      (2, (1, 'z'))
+    ))
+  }
+}
-- 
cgit v1.2.3


From f8966ffc11773a4c090dcf359e3892027992ed98 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Fri, 12 Nov 2010 16:18:45 -0800
Subject: Added a shuffle test with negative hash codes for some keys (this was
 a bug earlier)

---
 src/test/spark/ShuffleSuite.scala | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/test/spark/ShuffleSuite.scala b/src/test/spark/ShuffleSuite.scala
index 2898bd09c8..a5773614e8 100644
--- a/src/test/spark/ShuffleSuite.scala
+++ b/src/test/spark/ShuffleSuite.scala
@@ -31,6 +31,17 @@ class ShuffleSuite extends FunSuite {
     assert(valuesFor2.toList.sorted === List(1))
   }
 
+  test("groupByKey with negative key hash codes") {
+    val sc = new SparkContext("local", "test")
+    val pairs = sc.parallelize(Array((-1, 1), (-1, 2), (-1, 3), (2, 1)))
+    val groups = pairs.groupByKey().collect()
+    assert(groups.size === 2)
+    val valuesForMinus1 = groups.find(_._1 == -1).get._2
+    assert(valuesForMinus1.toList.sorted === List(1, 2, 3))
+    val valuesFor2 = groups.find(_._1 == 2).get._2
+    assert(valuesFor2.toList.sorted === List(1))
+  }
+
   test("groupByKey with many output partitions") {
     val sc = new SparkContext("local", "test")
     val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (2, 1)))
-- 
cgit v1.2.3


From f8ea98d9894d72feb7e8cd3951a576b24b448397 Mon Sep 17 00:00:00 2001
From: Matei Zaharia <matei@eecs.berkeley.edu>
Date: Sat, 13 Nov 2010 18:39:07 -0800
Subject: Remove -unchecked compiler parameter

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index d724ca89f6..15ab516d1f 100644
--- a/Makefile
+++ b/Makefile
@@ -42,7 +42,7 @@ build/classes:
 	mkdir -p build/classes
 
 scala: build/classes java
-	$(COMPILER) -unchecked -d build/classes -classpath build/classes:$(CLASSPATH) $(SCALA_SOURCES)
+	$(COMPILER) -d build/classes -classpath build/classes:$(CLASSPATH) $(SCALA_SOURCES)
 
 java: $(JAVA_SOURCES) build/classes
 	javac -d build/classes $(JAVA_SOURCES)
-- 
cgit v1.2.3