Remove the new HadoopRDD constructor from SparkContext API, plus some minor style changes.

author: Harvey Feng <harvey@databricks.com> 2013-10-12 20:57:38 -0700
committer: Harvey Feng <harvey@databricks.com> 2013-10-12 21:02:08 -0700
commit: 6c32aab87d4ea26086536fb988744d0d30792160 (patch)
tree: 25f52465fa677b4d14d8519b71e50e9b091e7384
parent: 5a99e67894e3a7716a8a1eaa19ce17e7a8abf5b6 (diff)
download: spark-6c32aab87d4ea26086536fb988744d0d30792160.tar.gz
spark-6c32aab87d4ea26086536fb988744d0d30792160.tar.bz2
spark-6c32aab87d4ea26086536fb988744d0d30792160.zip
2 files changed, 3 insertions, 27 deletions
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index d9c6264855..7488cdd1ba 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -346,30 +346,6 @@ class SparkContext(
     new HadoopRDD(this, conf, inputFormatClass, keyClass, valueClass, minSplits)
   }
 
-  /**
-   * Get an RDD for a Hadoop file with an arbitray InputFormat. Accept a Hadoop Configuration
-   * that has already been broadcast and use it to construct JobConfs local to each process. These
-   * JobConfs will be initialized using an optional, user-specified closure.
-   */
-  def hadoopRDD[K, V](
-      path: String,
-      confBroadcast: Broadcast[SerializableWritable[Configuration]],
-      initLocalJobConfOpt: Option[JobConf => Unit],
-      inputFormatClass: Class[_ <: InputFormat[K, V]],
-      keyClass: Class[K],
-      valueClass: Class[V],
-      minSplits: Int
-    ): RDD[(K, V)] = {
-    new HadoopRDD(
-      this,
-      confBroadcast,
-      initLocalJobConfOpt,
-      inputFormatClass,
-      keyClass,
-      valueClass,
-      minSplits)
-  }
-
   /** Get an RDD for a Hadoop file with an arbitrary InputFormat */
   def hadoopFile[K, V](
       path: String,
@@ -380,11 +356,11 @@ class SparkContext(
       ): RDD[(K, V)] = {
     // A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it.
     val confBroadcast = broadcast(new SerializableWritable(hadoopConfiguration))
-    val setInputPathsFunc = Some((jobConf: JobConf) => FileInputFormat.setInputPaths(jobConf, path))
+    val setInputPathsFunc = (jobConf: JobConf) => FileInputFormat.setInputPaths(jobConf, path)
     new HadoopRDD(
       this,
       confBroadcast,
-      setInputPathsFunc,
+      Some(setInputPathsFunc),
       inputFormatClass,
       keyClass,
       valueClass,
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 4ecdd65e9b..2d394abfd9 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -55,7 +55,7 @@ private[spark] class HadoopPartition(rddId: Int, idx: Int, @transient s: InputSp
  * @param broadCastedConf A general Hadoop Configuration, or a subclass of it. If the enclosed
  *     variabe references an instance of JobConf, then that JobConf will be used for the Hadoop job.
  *     Otherwise, a new JobConf will be created on each slave using the enclosed Configuration.
- * @param initLocalJobConfFuncOpt Optional closure used to initialize any JobCOnf that HadoopRDD
+ * @param initLocalJobConfFuncOpt Optional closure used to initialize any JobConf that HadoopRDD
  *     creates.
  * @param inputFormatClass Storage format of the data to be read.
  * @param keyClass Class of the key associated with the inputFormatClass.
author	Harvey Feng <harvey@databricks.com>	2013-10-12 20:57:38 -0700
committer	Harvey Feng <harvey@databricks.com>	2013-10-12 21:02:08 -0700
commit	6c32aab87d4ea26086536fb988744d0d30792160 (patch)
tree	25f52465fa677b4d14d8519b71e50e9b091e7384
parent	5a99e67894e3a7716a8a1eaa19ce17e7a8abf5b6 (diff)
download	spark-6c32aab87d4ea26086536fb988744d0d30792160.tar.gz spark-6c32aab87d4ea26086536fb988744d0d30792160.tar.bz2 spark-6c32aab87d4ea26086536fb988744d0d30792160.zip