aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacek Laskowski <jacek.laskowski@deepsense.io>2015-11-06 20:05:18 +0000
committerSean Owen <sowen@cloudera.com>2015-11-06 20:05:18 +0000
commit62bb290773c9f9fa53cbe6d4eedc6e153761a763 (patch)
treece2ee639fcbf90ccbe64ba9862180c987f51671b
parent8211aab0793cf64202b99be4f31bb8a9ae77050d (diff)
downloadspark-62bb290773c9f9fa53cbe6d4eedc6e153761a763.tar.gz
spark-62bb290773c9f9fa53cbe6d4eedc6e153761a763.tar.bz2
spark-62bb290773c9f9fa53cbe6d4eedc6e153761a763.zip
Typo fixes + code readability improvements
Author: Jacek Laskowski <jacek.laskowski@deepsense.io> Closes #9501 from jaceklaskowski/typos-with-style.
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala14
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala12
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala10
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala2
4 files changed, 21 insertions, 17 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index d841f05ec5..0453614f6a 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -88,8 +88,8 @@ private[spark] class HadoopPartition(rddId: Int, idx: Int, s: InputSplit)
*
* @param sc The SparkContext to associate the RDD with.
* @param broadcastedConf A general Hadoop Configuration, or a subclass of it. If the enclosed
- * variabe references an instance of JobConf, then that JobConf will be used for the Hadoop job.
- * Otherwise, a new JobConf will be created on each slave using the enclosed Configuration.
+ * variable references an instance of JobConf, then that JobConf will be used for the Hadoop job.
+ * Otherwise, a new JobConf will be created on each slave using the enclosed Configuration.
* @param initLocalJobConfFuncOpt Optional closure used to initialize any JobConf that HadoopRDD
* creates.
* @param inputFormatClass Storage format of the data to be read.
@@ -123,7 +123,7 @@ class HadoopRDD[K, V](
sc,
sc.broadcast(new SerializableConfiguration(conf))
.asInstanceOf[Broadcast[SerializableConfiguration]],
- None /* initLocalJobConfFuncOpt */,
+ initLocalJobConfFuncOpt = None,
inputFormatClass,
keyClass,
valueClass,
@@ -184,8 +184,9 @@ class HadoopRDD[K, V](
protected def getInputFormat(conf: JobConf): InputFormat[K, V] = {
val newInputFormat = ReflectionUtils.newInstance(inputFormatClass.asInstanceOf[Class[_]], conf)
.asInstanceOf[InputFormat[K, V]]
- if (newInputFormat.isInstanceOf[Configurable]) {
- newInputFormat.asInstanceOf[Configurable].setConf(conf)
+ newInputFormat match {
+ case c: Configurable => c.setConf(conf)
+ case _ =>
}
newInputFormat
}
@@ -195,9 +196,6 @@ class HadoopRDD[K, V](
// add the credentials here as this can be called before SparkContext initialized
SparkHadoopUtil.get.addCredentials(jobConf)
val inputFormat = getInputFormat(jobConf)
- if (inputFormat.isInstanceOf[Configurable]) {
- inputFormat.asInstanceOf[Configurable].setConf(jobConf)
- }
val inputSplits = inputFormat.getSplits(jobConf, minPartitions)
val array = new Array[Partition](inputSplits.size)
for (i <- 0 until inputSplits.size) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index a1f0fd05f6..4a9518fff4 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -541,8 +541,7 @@ class DAGScheduler(
}
/**
- * Submit an action job to the scheduler and get a JobWaiter object back. The JobWaiter object
- * can be used to block until the the job finishes executing or can be used to cancel the job.
+ * Submit an action job to the scheduler.
*
* @param rdd target RDD to run tasks on
* @param func a function to run on each partition of the RDD
@@ -551,6 +550,11 @@ class DAGScheduler(
* @param callSite where in the user program this job was called
* @param resultHandler callback to pass each result to
* @param properties scheduler properties to attach to this job, e.g. fair scheduler pool name
+ *
+ * @return a JobWaiter object that can be used to block until the job finishes executing
+ * or can be used to cancel the job.
+ *
+ * @throws IllegalArgumentException when partitions ids are illegal
*/
def submitJob[T, U](
rdd: RDD[T],
@@ -584,7 +588,7 @@ class DAGScheduler(
/**
* Run an action job on the given RDD and pass all the results to the resultHandler function as
- * they arrive. Throws an exception if the job fials, or returns normally if successful.
+ * they arrive.
*
* @param rdd target RDD to run tasks on
* @param func a function to run on each partition of the RDD
@@ -593,6 +597,8 @@ class DAGScheduler(
* @param callSite where in the user program this job was called
* @param resultHandler callback to pass each result to
* @param properties scheduler properties to attach to this job, e.g. fair scheduler pool name
+ *
+ * @throws Exception when the job fails
*/
def runJob[T, U](
rdd: RDD[T],
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index f478f9982a..ea97ef0e74 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -27,11 +27,11 @@ import org.apache.spark.rdd.RDD
import org.apache.spark.shuffle.ShuffleWriter
/**
-* A ShuffleMapTask divides the elements of an RDD into multiple buckets (based on a partitioner
-* specified in the ShuffleDependency).
-*
-* See [[org.apache.spark.scheduler.Task]] for more information.
-*
+ * A ShuffleMapTask divides the elements of an RDD into multiple buckets (based on a partitioner
+ * specified in the ShuffleDependency).
+ *
+ * See [[org.apache.spark.scheduler.Task]] for more information.
+ *
* @param stageId id of the stage this task belongs to
* @param taskBinary broadcast version of the RDD and the ShuffleDependency. Once deserialized,
* the type should be (RDD[_], ShuffleDependency[_, _, _]).
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
index be8526ba9b..517c8991ae 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
@@ -29,7 +29,7 @@ private[spark] class TaskSet(
val stageAttemptId: Int,
val priority: Int,
val properties: Properties) {
- val id: String = stageId + "." + stageAttemptId
+ val id: String = stageId + "." + stageAttemptId
override def toString: String = "TaskSet " + id
}