Typo fixes + code readability improvements

Author: Jacek Laskowski <jacek.laskowski@deepsense.io> Closes #9501 from jaceklaskowski/typos-with-style.
author: Jacek Laskowski <jacek.laskowski@deepsense.io> 2015-11-06 20:05:18 +0000
committer: Sean Owen <sowen@cloudera.com> 2015-11-06 20:05:18 +0000
commit: 62bb290773c9f9fa53cbe6d4eedc6e153761a763 (patch)
tree: ce2ee639fcbf90ccbe64ba9862180c987f51671b
parent: 8211aab0793cf64202b99be4f31bb8a9ae77050d (diff)
download: spark-62bb290773c9f9fa53cbe6d4eedc6e153761a763.tar.gz
spark-62bb290773c9f9fa53cbe6d4eedc6e153761a763.tar.bz2
spark-62bb290773c9f9fa53cbe6d4eedc6e153761a763.zip
4 files changed, 21 insertions, 17 deletions
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index d841f05ec5..0453614f6a 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -88,8 +88,8 @@ private[spark] class HadoopPartition(rddId: Int, idx: Int, s: InputSplit)
  *
  * @param sc The SparkContext to associate the RDD with.
  * @param broadcastedConf A general Hadoop Configuration, or a subclass of it. If the enclosed
- *     variabe references an instance of JobConf, then that JobConf will be used for the Hadoop job.
- *     Otherwise, a new JobConf will be created on each slave using the enclosed Configuration.
+ *   variable references an instance of JobConf, then that JobConf will be used for the Hadoop job.
+ *   Otherwise, a new JobConf will be created on each slave using the enclosed Configuration.
  * @param initLocalJobConfFuncOpt Optional closure used to initialize any JobConf that HadoopRDD
  *     creates.
  * @param inputFormatClass Storage format of the data to be read.
@@ -123,7 +123,7 @@ class HadoopRDD[K, V](
       sc,
       sc.broadcast(new SerializableConfiguration(conf))
         .asInstanceOf[Broadcast[SerializableConfiguration]],
-      None /* initLocalJobConfFuncOpt */,
+      initLocalJobConfFuncOpt = None,
       inputFormatClass,
       keyClass,
       valueClass,
@@ -184,8 +184,9 @@ class HadoopRDD[K, V](
   protected def getInputFormat(conf: JobConf): InputFormat[K, V] = {
     val newInputFormat = ReflectionUtils.newInstance(inputFormatClass.asInstanceOf[Class[_]], conf)
       .asInstanceOf[InputFormat[K, V]]
-    if (newInputFormat.isInstanceOf[Configurable]) {
-      newInputFormat.asInstanceOf[Configurable].setConf(conf)
+    newInputFormat match {
+      case c: Configurable => c.setConf(conf)
+      case _ =>
     }
     newInputFormat
   }
@@ -195,9 +196,6 @@ class HadoopRDD[K, V](
     // add the credentials here as this can be called before SparkContext initialized
     SparkHadoopUtil.get.addCredentials(jobConf)
     val inputFormat = getInputFormat(jobConf)
-    if (inputFormat.isInstanceOf[Configurable]) {
-      inputFormat.asInstanceOf[Configurable].setConf(jobConf)
-    }
     val inputSplits = inputFormat.getSplits(jobConf, minPartitions)
     val array = new Array[Partition](inputSplits.size)
     for (i <- 0 until inputSplits.size) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index a1f0fd05f6..4a9518fff4 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -541,8 +541,7 @@ class DAGScheduler(
   }
 
   /**
-   * Submit an action job to the scheduler and get a JobWaiter object back. The JobWaiter object
-   * can be used to block until the the job finishes executing or can be used to cancel the job.
+   * Submit an action job to the scheduler.
    *
    * @param rdd target RDD to run tasks on
    * @param func a function to run on each partition of the RDD
@@ -551,6 +550,11 @@ class DAGScheduler(
    * @param callSite where in the user program this job was called
    * @param resultHandler callback to pass each result to
    * @param properties scheduler properties to attach to this job, e.g. fair scheduler pool name
+   *
+   * @return a JobWaiter object that can be used to block until the job finishes executing
+   *         or can be used to cancel the job.
+   *
+   * @throws IllegalArgumentException when partitions ids are illegal
    */
   def submitJob[T, U](
       rdd: RDD[T],
@@ -584,7 +588,7 @@ class DAGScheduler(
 
   /**
    * Run an action job on the given RDD and pass all the results to the resultHandler function as
-   * they arrive. Throws an exception if the job fials, or returns normally if successful.
+   * they arrive.
    *
    * @param rdd target RDD to run tasks on
    * @param func a function to run on each partition of the RDD
@@ -593,6 +597,8 @@ class DAGScheduler(
    * @param callSite where in the user program this job was called
    * @param resultHandler callback to pass each result to
    * @param properties scheduler properties to attach to this job, e.g. fair scheduler pool name
+   *
+   * @throws Exception when the job fails
    */
   def runJob[T, U](
       rdd: RDD[T],
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index f478f9982a..ea97ef0e74 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -27,11 +27,11 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.shuffle.ShuffleWriter
 
 /**
-* A ShuffleMapTask divides the elements of an RDD into multiple buckets (based on a partitioner
-* specified in the ShuffleDependency).
-*
-* See [[org.apache.spark.scheduler.Task]] for more information.
-*
+ * A ShuffleMapTask divides the elements of an RDD into multiple buckets (based on a partitioner
+ * specified in the ShuffleDependency).
+ *
+ * See [[org.apache.spark.scheduler.Task]] for more information.
+ *
  * @param stageId id of the stage this task belongs to
  * @param taskBinary broadcast version of the RDD and the ShuffleDependency. Once deserialized,
  *                   the type should be (RDD[_], ShuffleDependency[_, _, _]).
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
index be8526ba9b..517c8991ae 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
@@ -29,7 +29,7 @@ private[spark] class TaskSet(
     val stageAttemptId: Int,
     val priority: Int,
     val properties: Properties) {
-    val id: String = stageId + "." + stageAttemptId
+  val id: String = stageId + "." + stageAttemptId
 
   override def toString: String = "TaskSet " + id
 }
author	Jacek Laskowski <jacek.laskowski@deepsense.io>	2015-11-06 20:05:18 +0000
committer	Sean Owen <sowen@cloudera.com>	2015-11-06 20:05:18 +0000
commit	62bb290773c9f9fa53cbe6d4eedc6e153761a763 (patch)
tree	ce2ee639fcbf90ccbe64ba9862180c987f51671b
parent	8211aab0793cf64202b99be4f31bb8a9ae77050d (diff)
download	spark-62bb290773c9f9fa53cbe6d4eedc6e153761a763.tar.gz spark-62bb290773c9f9fa53cbe6d4eedc6e153761a763.tar.bz2 spark-62bb290773c9f9fa53cbe6d4eedc6e153761a763.zip