aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHarvey Feng <harvey@databricks.com>2013-10-12 20:57:38 -0700
committerHarvey Feng <harvey@databricks.com>2013-10-12 21:02:08 -0700
commit6c32aab87d4ea26086536fb988744d0d30792160 (patch)
tree25f52465fa677b4d14d8519b71e50e9b091e7384
parent5a99e67894e3a7716a8a1eaa19ce17e7a8abf5b6 (diff)
downloadspark-6c32aab87d4ea26086536fb988744d0d30792160.tar.gz
spark-6c32aab87d4ea26086536fb988744d0d30792160.tar.bz2
spark-6c32aab87d4ea26086536fb988744d0d30792160.zip
Remove the new HadoopRDD constructor from SparkContext API, plus some minor style changes.
-rw-r--r--core/src/main/scala/org/apache/spark/SparkContext.scala28
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala2
2 files changed, 3 insertions, 27 deletions
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index d9c6264855..7488cdd1ba 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -346,30 +346,6 @@ class SparkContext(
new HadoopRDD(this, conf, inputFormatClass, keyClass, valueClass, minSplits)
}
- /**
- * Get an RDD for a Hadoop file with an arbitray InputFormat. Accept a Hadoop Configuration
- * that has already been broadcast and use it to construct JobConfs local to each process. These
- * JobConfs will be initialized using an optional, user-specified closure.
- */
- def hadoopRDD[K, V](
- path: String,
- confBroadcast: Broadcast[SerializableWritable[Configuration]],
- initLocalJobConfOpt: Option[JobConf => Unit],
- inputFormatClass: Class[_ <: InputFormat[K, V]],
- keyClass: Class[K],
- valueClass: Class[V],
- minSplits: Int
- ): RDD[(K, V)] = {
- new HadoopRDD(
- this,
- confBroadcast,
- initLocalJobConfOpt,
- inputFormatClass,
- keyClass,
- valueClass,
- minSplits)
- }
-
/** Get an RDD for a Hadoop file with an arbitrary InputFormat */
def hadoopFile[K, V](
path: String,
@@ -380,11 +356,11 @@ class SparkContext(
): RDD[(K, V)] = {
// A Hadoop configuration can be about 10 KB, which is pretty big, so broadcast it.
val confBroadcast = broadcast(new SerializableWritable(hadoopConfiguration))
- val setInputPathsFunc = Some((jobConf: JobConf) => FileInputFormat.setInputPaths(jobConf, path))
+ val setInputPathsFunc = (jobConf: JobConf) => FileInputFormat.setInputPaths(jobConf, path)
new HadoopRDD(
this,
confBroadcast,
- setInputPathsFunc,
+ Some(setInputPathsFunc),
inputFormatClass,
keyClass,
valueClass,
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index 4ecdd65e9b..2d394abfd9 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -55,7 +55,7 @@ private[spark] class HadoopPartition(rddId: Int, idx: Int, @transient s: InputSp
* @param broadCastedConf A general Hadoop Configuration, or a subclass of it. If the enclosed
* variabe references an instance of JobConf, then that JobConf will be used for the Hadoop job.
* Otherwise, a new JobConf will be created on each slave using the enclosed Configuration.
- * @param initLocalJobConfFuncOpt Optional closure used to initialize any JobCOnf that HadoopRDD
+ * @param initLocalJobConfFuncOpt Optional closure used to initialize any JobConf that HadoopRDD
* creates.
* @param inputFormatClass Storage format of the data to be read.
* @param keyClass Class of the key associated with the inputFormatClass.