[MINOR][DOCS] Fix more typos in comments/strings.

## What changes were proposed in this pull request? This PR fixes 135 typos over 107 files: * 121 typos in comments * 11 typos in testcase name * 3 typos in log messages ## How was this patch tested? Manual. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #11689 from dongjoon-hyun/fix_more_typos.
author: Dongjoon Hyun <dongjoon@apache.org> 2016-03-14 09:07:39 +0000
committer: Sean Owen <sowen@cloudera.com> 2016-03-14 09:07:39 +0000
commit: acdf21970334cea9d6cfc287e4ccb8e72de9dee1 (patch)
tree: df8bcf3d80dc92ad74c5c27bd3618397205bcc86
parent: e58fa19d17db5dd8a00551e20b46921f98b958f7 (diff)
download: spark-acdf21970334cea9d6cfc287e4ccb8e72de9dee1.tar.gz
spark-acdf21970334cea9d6cfc287e4ccb8e72de9dee1.tar.bz2
spark-acdf21970334cea9d6cfc287e4ccb8e72de9dee1.zip
107 files changed, 135 insertions, 135 deletions
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
index 13e18a56c8..0d3a5237d9 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
@@ -66,7 +66,7 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double])
    */
   def unpersist(blocking: Boolean): JavaDoubleRDD = fromRDD(srdd.unpersist(blocking))
 
-  // first() has to be overriden here in order for its return type to be Double instead of Object.
+  // first() has to be overridden here in order for its return type to be Double instead of Object.
   override def first(): JDouble = srdd.first()
 
   // Transformations (return a new RDD)
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index f1aebbcd39..d362c40b7a 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -351,7 +351,7 @@ class JavaSparkContext(val sc: SparkContext)
   }
 
   /**
-   * Get an RDD for a Hadoop-readable dataset from a Hadooop JobConf giving its InputFormat and any
+   * Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf giving its InputFormat and any
    * other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable,
    * etc).
    *
@@ -383,7 +383,7 @@ class JavaSparkContext(val sc: SparkContext)
   }
 
   /**
-   * Get an RDD for a Hadoop-readable dataset from a Hadooop JobConf giving its InputFormat and any
+   * Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf giving its InputFormat and any
    * other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable,
    *
    * @param conf JobConf for setting up the dataset. Note: This will be put into a Broadcast.
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 550e1ba6d3..8091aa8062 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -74,7 +74,7 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
     } else {
       None
     }
-    // Note: use getSizeAsKb (not bytes) to maintain compatiblity if no units are provided
+    // Note: use getSizeAsKb (not bytes) to maintain compatibility if no units are provided
     blockSize = conf.getSizeAsKb("spark.broadcast.blockSize", "4m").toInt * 1024
   }
   setConf(SparkEnv.get.conf)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
index e2fda29044..000f7e8e1e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
@@ -87,7 +87,7 @@ private[history] class ApplicationCache(
   /**
    * The cache of applications.
    *
-   * Tagged as `protected` so as to allow subclasses in tests to accesss it directly
+   * Tagged as `protected` so as to allow subclasses in tests to access it directly
    */
   protected val appCache: LoadingCache[CacheKey, CacheEntry] = {
     CacheBuilder.newBuilder()
@@ -447,7 +447,7 @@ private[history] class CacheMetrics(prefix: String) extends Source {
 private[history] trait ApplicationCacheOperations {
 
   /**
-   * Get the application UI and the probe neededed to see if it has been updated.
+   * Get the application UI and the probe needed to see if it has been updated.
    * @param appId application ID
    * @param attemptId attempt ID
    * @return If found, the Spark UI and any history information to be used in the cache
@@ -590,7 +590,7 @@ private[history] object ApplicationCacheCheckFilterRelay extends Logging {
   // name of the attempt ID entry in the filter configuration. Optional.
   val ATTEMPT_ID = "attemptId"
 
-  // namer of the filter to register
+  // name of the filter to register
   val FILTER_NAME = "org.apache.spark.deploy.history.ApplicationCacheCheckFilter"
 
   /** the application cache to relay requests to */
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 07e3c12bc9..48372d70d5 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -124,7 +124,7 @@ private[spark] class Executor(
   private val HEARTBEAT_MAX_FAILURES = conf.getInt("spark.executor.heartbeat.maxFailures", 60)
 
   /**
-   * Count the failure times of heartbeat. It should only be acessed in the heartbeat thread. Each
+   * Count the failure times of heartbeat. It should only be accessed in the heartbeat thread. Each
    * successful heartbeat will reset it to 0.
    */
   private var heartbeatFailures = 0
diff --git a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
index c9606600ed..0f579cfe42 100644
--- a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
@@ -141,7 +141,7 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
    *  And on the input of 1 and 50 we would have a histogram of 1, 0, 1
    *
    * Note: if your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
-   * from an O(log n) inseration to O(1) per element. (where n = # buckets) if you set evenBuckets
+   * from an O(log n) insertion to O(1) per element. (where n = # buckets) if you set evenBuckets
    * to true.
    * buckets must be sorted and not contain any duplicates.
    * buckets array must be at least two elements
diff --git a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
index def0aac720..dfcdd113df 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
@@ -157,7 +157,7 @@ object InputFormatInfo {
     b) Decrement the currently allocated containers on that host.
     c) Compute rack info for each host and update rack -> count map based on (b).
     d) Allocate nodes based on (c)
-    e) On the allocation result, ensure that we dont allocate "too many" jobs on a single node
+    e) On the allocation result, ensure that we don't allocate "too many" jobs on a single node
        (even if data locality on that is very high) : this is to prevent fragility of job if a
        single (or small set of) hosts go down.
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
index 1ce83485f0..6e9337bb90 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
@@ -45,7 +45,7 @@ class SplitInfo(
     hashCode
   }
 
-  // This is practically useless since most of the Split impl's dont seem to implement equals :-(
+  // This is practically useless since most of the Split impl's don't seem to implement equals :-(
   // So unless there is identity equality between underlyingSplits, it will always fail even if it
   // is pointing to same block.
   override def equals(other: Any): Boolean = other match {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index bcf65e9d7e..996c1f5d9e 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -1057,7 +1057,7 @@ private[spark] class BlockManager(
               failures += 1
               replicationFailed = true
               peersFailedToReplicateTo += peer
-              if (failures > maxReplicationFailures) { // too many failures in replcating to peers
+              if (failures > maxReplicationFailures) { // too many failures in replicating to peers
                 done = true
               }
           }
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index 478a928acd..b19c30e2ff 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -70,7 +70,7 @@ final class ShuffleBlockFetcherIterator(
   private[this] var numBlocksToFetch = 0
 
   /**
-   * The number of blocks proccessed by the caller. The iterator is exhausted when
+   * The number of blocks processed by the caller. The iterator is exhausted when
    * [[numBlocksProcessed]] == [[numBlocksToFetch]].
    */
   private[this] var numBlocksProcessed = 0
diff --git a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
index 5a8c291431..094953f2f5 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
@@ -102,7 +102,7 @@ private[spark] object UIWorkloadGenerator {
             try {
               setProperties(desc)
               job()
-              println("Job funished: " + desc)
+              println("Job finished: " + desc)
             } catch {
               case e: Exception =>
                 println("Job Failed: " + desc)
diff --git a/core/src/main/scala/org/apache/spark/util/ParentClassLoader.scala b/core/src/main/scala/org/apache/spark/util/ParentClassLoader.scala
index 73d126ff62..c9b7493fcd 100644
--- a/core/src/main/scala/org/apache/spark/util/ParentClassLoader.scala
+++ b/core/src/main/scala/org/apache/spark/util/ParentClassLoader.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.util
 
 /**
- * A class loader which makes some protected methods in ClassLoader accesible.
+ * A class loader which makes some protected methods in ClassLoader accessible.
  */
 private[spark] class ParentClassLoader(parent: ClassLoader) extends ClassLoader(parent) {
 
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 83ded92609..a06db9a4fc 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -253,7 +253,7 @@ object SizeEstimator extends Logging {
       } else {
         // Estimate the size of a large array by sampling elements without replacement.
         // To exclude the shared objects that the array elements may link, sample twice
-        // and use the min one to caculate array size.
+        // and use the min one to calculate array size.
         val rand = new Random(42)
         val drawn = new OpenHashSet[Int](2 * ARRAY_SAMPLE_SIZE)
         val s1 = sampleArray(array, state, rand, drawn, length)
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index b5a98ce569..37c6c9bf90 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1530,7 +1530,7 @@ private[spark] object Utils extends Logging {
     rawMod + (if (rawMod < 0) mod else 0)
   }
 
-  // Handles idiosyncracies with hash (add more as required)
+  // Handles idiosyncrasies with hash (add more as required)
   // This method should be kept in sync with
   // org.apache.spark.network.util.JavaUtils#nonNegativeHash().
   def nonNegativeHash(obj: AnyRef): Int = {
@@ -1600,7 +1600,7 @@ private[spark] object Utils extends Logging {
    * @param f function to be executed. If prepare is not None, the running time of each call to f
    *          must be an order of magnitude longer than one millisecond for accurate timing.
    * @param prepare function to be executed before each call to f. Its running time doesn't count.
-   * @return the total time across all iterations (not couting preparation time)
+   * @return the total time across all iterations (not counting preparation time)
    */
   def timeIt(numIters: Int)(f: => Unit, prepare: Option[() => Unit] = None): Long = {
     if (prepare.isEmpty) {
diff --git a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
index 1314217023..3c61528ab5 100644
--- a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
@@ -54,7 +54,7 @@ object RandomSampler {
   /**
    * Default maximum gap-sampling fraction.
    * For sampling fractions <= this value, the gap sampling optimization will be applied.
-   * Above this value, it is assumed that "tradtional" Bernoulli sampling is faster.  The
+   * Above this value, it is assumed that "traditional" Bernoulli sampling is faster.  The
    * optimal value for this will depend on the RNG.  More expensive RNGs will tend to make
    * the optimal value higher.  The most reliable way to determine this value for a new RNG
    * is to experiment.  When tuning for a new RNG, I would expect a value of 0.5 to be close
@@ -319,7 +319,7 @@ class GapSamplingReplacementIterator[T: ClassTag](
   /**
    * Skip elements with replication factor zero (i.e. elements that won't be sampled).
    * Samples 'k' from geometric distribution  P(k) = (1-q)(q)^k, where q = e^(-f), that is
-   * q is the probabililty of Poisson(0; f)
+   * q is the probability of Poisson(0; f)
    */
   private def advance(): Unit = {
     val u = math.max(rng.nextDouble(), epsilon)
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 91fef772d1..bb2adff57e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
 import org.apache.spark.util.{ResetSystemProperties, Utils}
 
 // Note: this suite mixes in ResetSystemProperties because SparkSubmit.main() sets a bunch
-// of properties that neeed to be cleared after tests.
+// of properties that needed to be cleared after tests.
 class SparkSubmitSuite
   extends SparkFunSuite
   with Matchers
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index b7ff5c9e8c..d2e24912b5 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -501,7 +501,7 @@ class StandaloneDynamicAllocationSuite
     master.self.askWithRetry[MasterStateResponse](RequestMasterState)
   }
 
-  /** Get the applictions that are active from Master */
+  /** Get the applications that are active from Master */
   private def getApplications(): Seq[ApplicationInfo] = {
     getMasterState.activeApps
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
index 379c038c55..7017296bd1 100644
--- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
@@ -159,7 +159,7 @@ class AppClientSuite extends SparkFunSuite with LocalSparkContext with BeforeAnd
     master.self.askWithRetry[MasterStateResponse](RequestMasterState)
   }
 
-  /** Get the applictions that are active from Master */
+  /** Get the applications that are active from Master */
   private def getApplications(): Seq[ApplicationInfo] = {
     getMasterState.activeApps
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
index e24188781f..c874b95b09 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
@@ -219,7 +219,7 @@ class ApplicationCacheSuite extends SparkFunSuite with Logging with MockitoSugar
     val cacheEntry = cache.lookupCacheEntry(app1, None)
     assert(1 === cacheEntry.probeTime)
     assert(cacheEntry.completed)
-    // assert about queries made of the opereations
+    // assert about queries made of the operations
     assert(1 === operations.getAppUICount, "getAppUICount")
     assert(1 === operations.attachCount, "attachCount")
 
@@ -338,7 +338,7 @@ class ApplicationCacheSuite extends SparkFunSuite with Logging with MockitoSugar
   }
 
   /**
-   * Look up the cache entry and assert that it maches in the expected value.
+   * Look up the cache entry and assert that it matches in the expected value.
    * This assertion works if the two CacheEntries are different -it looks at the fields.
    * UI are compared on object equality; the timestamp and completed flags directly.
    * @param appId application ID
@@ -384,7 +384,7 @@ class ApplicationCacheSuite extends SparkFunSuite with Logging with MockitoSugar
     val operations = new StubCacheOperations()
     val clock = new ManualClock(0)
     val size = 5
-    // only two entries are retained, so we expect evictions to occurr on lookups
+    // only two entries are retained, so we expect evictions to occur on lookups
     implicit val cache: ApplicationCache = new TestApplicationCache(operations,
       retainedApplications = size, clock = clock)
 
diff --git a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
index e5a448298a..056e5463a0 100644
--- a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
@@ -98,14 +98,14 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
       rdd.coalesce(4).count()
     }
 
-    // for count and coelesce, the same bytes should be read.
+    // for count and coalesce, the same bytes should be read.
     assert(bytesRead != 0)
     assert(bytesRead2 == bytesRead)
   }
 
   /**
    * This checks the situation where we have interleaved reads from
-   * different sources. Currently, we only accumulate fron the first
+   * different sources. Currently, we only accumulate from the first
    * read method we find in the task. This test uses cartesian to create
    * the interleaved reads.
    *
@@ -183,7 +183,7 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
     assert(records == numRecords)
   }
 
-  test("input metrics on recordsd read with cache") {
+  test("input metrics on records read with cache") {
     // prime the cache manager
     val rdd = sc.textFile(tmpFilePath, 4).cache()
     rdd.collect()
diff --git a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
index 76451788d2..864adddad3 100644
--- a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
@@ -164,8 +164,8 @@ class DoubleRDDSuite extends SparkFunSuite with SharedSparkContext {
     val expectedHistogramResults = Array(4, 2, 1, 2, 3)
     assert(histogramResults === expectedHistogramResults)
   }
-  // Make sure this works with a NaN end bucket and an inifity
-  test("WorksMixedRangeWithUnevenBucketsAndNaNAndNaNRangeAndInfity") {
+  // Make sure this works with a NaN end bucket and an infinity
+  test("WorksMixedRangeWithUnevenBucketsAndNaNAndNaNRangeAndInfinity") {
     // Make sure that it works with two unequally spaced buckets and elements in each
     val rdd = sc.parallelize(Seq(-0.01, 0.0, 1, 2, 3, 5, 6, 11.01, 12.0, 199.0,
       200.0, 200.1, 1.0/0.0, -1.0/0.0, Double.NaN))
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 7d51538d92..b0d69de6e2 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -182,7 +182,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
     assert(sums(2) === 1)
   }
 
-  test("reduceByKey with many output partitons") {
+  test("reduceByKey with many output partitions") {
     val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
     val sums = pairs.reduceByKey(_ + _, 10).collect()
     assert(sums.toSet === Set((1, 7), (2, 1)))
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index d8849d5948..d1c7143abf 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -663,7 +663,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou
     completeShuffleMapStageSuccessfully(0, 1, numShufflePartitions = parts)
     completeNextResultStageWithSuccess(1, 1)
 
-    // Confirm job finished succesfully
+    // Confirm job finished successfully
     sc.listenerBus.waitUntilEmpty(1000)
     assert(ended === true)
     assert(results === (0 until parts).map { idx => idx -> 42 }.toMap)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 2c99dd5afb..d35ca411f4 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -396,7 +396,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
     val rescheduleDelay = 300L
     val conf = new SparkConf().
       set("spark.scheduler.executorTaskBlacklistTime", rescheduleDelay.toString).
-      // dont wait to jump locality levels in this test
+      // don't wait to jump locality levels in this test
       set("spark.locality.wait", "0")
 
     sc = new SparkContext("local", "test", conf)
diff --git a/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
index 683aaa3aab..bdee889cdc 100644
--- a/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
@@ -191,7 +191,7 @@ class SerializationDebuggerSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
 
     val originalException = new NotSerializableException("someClass")
-    // verify thaht original exception is returned on failure
+    // verify that original exception is returned on failure
     assert(SerializationDebugger.improveException(o, originalException).eq(originalException))
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
index 853503bbc2..83eba3690e 100644
--- a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
@@ -53,7 +53,7 @@ class XORShiftRandomSuite extends SparkFunSuite with Matchers {
      * Perform the chi square test on the 5 rows of randomly generated numbers evenly divided into
      * 10 bins. chiSquareTest returns true iff the null hypothesis (that the classifications
      * represented by the counts in the columns of the input 2-way table are independent of the
-     * rows) can be rejected with 100 * (1 - alpha) percent confidence, where alpha is prespeficied
+     * rows) can be rejected with 100 * (1 - alpha) percent confidence, where alpha is prespecified
      * as 0.05
      */
     val chiTest = new ChiSquareTest
diff --git a/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
index 7796f362bb..d498af9c39 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
@@ -38,7 +38,7 @@ object SkewedGroupByTest {
     val pairs1 = sc.parallelize(0 until numMappers, numMappers).flatMap { p =>
       val ranGen = new Random
 
-      // map output sizes lineraly increase from the 1st to the last
+      // map output sizes linearly increase from the 1st to the last
       numKVPairs = (1.0 * (p + 1) / numMappers * numKVPairs).toInt
 
       var arr1 = new Array[(Int, Array[Byte])](numKVPairs)
diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/Logging.scala b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/Logging.scala
index aa530a7121..09d3fe91e4 100644
--- a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/Logging.scala
+++ b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/Logging.scala
@@ -101,7 +101,7 @@ private[sink] trait Logging {
   private def initializeLogging() {
     Logging.initialized = true
 
-    // Force a call into slf4j to initialize it. Avoids this happening from mutliple threads
+    // Force a call into slf4j to initialize it. Avoids this happening from multiple threads
     // and triggering this: http://mailman.qos.ch/pipermail/slf4j-dev/2010-April/002956.html
     log
   }
diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala
index b9d4e762ca..3555fa68b6 100644
--- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala
@@ -77,7 +77,7 @@ private[flume] class FlumeBatchFetcher(receiver: FlumePollingReceiver) extends R
 
   /**
    * Gets a batch of events from the specified client. This method does not handle any exceptions
-   * which will be propogated to the caller.
+   * which will be propagated to the caller.
    * @param client Client to get events from
    * @return [[Some]] which contains the event batch if Flume sent any events back, else [[None]]
    */
@@ -96,8 +96,8 @@ private[flume] class FlumeBatchFetcher(receiver: FlumePollingReceiver) extends R
   }
 
   /**
-   * Store the events in the buffer to Spark. This method will not propogate any exceptions,
-   * but will propogate any other errors.
+   * Store the events in the buffer to Spark. This method will not propagate any exceptions,
+   * but will propagate any other errors.
    * @param buffer The buffer to store
    * @return true if the data was stored without any exception being thrown, else false
    */
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
index 8a66621a31..726b5d8ec3 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
@@ -167,7 +167,7 @@ class KafkaCluster(val kafkaParams: Map[String, String]) extends Serializable {
     ): Either[Err, Map[TopicAndPartition, LeaderOffset]] = {
     getLeaderOffsets(topicAndPartitions, before, 1).right.map { r =>
       r.map { kv =>
-        // mapValues isnt serializable, see SI-7005
+        // mapValues isn't serializable, see SI-7005
         kv._1 -> kv._2.head
       }
     }
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
index b5b76cb92d..23b74da642 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -132,7 +132,7 @@ private[kinesis] object KinesisRecordProcessor extends Logging {
    * Retry the given amount of times with a random backoff time (millis) less than the
    *   given maxBackOffMillis
    *
-   * @param expression expression to evalute
+   * @param expression expression to evaluate
    * @param numRetriesLeft number of retries left
    * @param maxBackOffMillis: max millis between retries
    *
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
index 2555332d22..905c33834d 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
@@ -122,7 +122,7 @@ abstract class KinesisBackedBlockRDDTests(aggregateTestData: Boolean)
       testIsBlockValid = true)
   }
 
-  testIfEnabled("Test whether RDD is valid after removing blocks from block anager") {
+  testIfEnabled("Test whether RDD is valid after removing blocks from block manager") {
     testRDD(numPartitions = 2, numPartitionsInBM = 2, numPartitionsInKinesis = 2,
       testBlockRemove = true)
   }
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
index fd15b6ccdc..deac9090e2 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -194,7 +194,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
     verify(checkpointerMock, times(1)).checkpoint()
   }
 
-  test("retry failed after exhausing all retries") {
+  test("retry failed after exhausting all retries") {
     val expectedErrorMessage = "final try error message"
     when(checkpointerMock.checkpoint())
         .thenThrow(new ThrottlingException("error message"))
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index fe884d0022..5485e30f5a 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -297,7 +297,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
 
   /**
    * Restricts the graph to only the vertices and edges satisfying the predicates. The resulting
-   * subgraph satisifies
+   * subgraph satisfies
    *
    * {{{
    * V' = {v : for all v in V where vpred(v)}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index d537b6141c..fcb1b5999f 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -236,11 +236,11 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
    * @param preprocess a function to compute new vertex and edge data before filtering
    * @param epred edge pred to filter on after preprocess, see more details under
    *  [[org.apache.spark.graphx.Graph#subgraph]]
-   * @param vpred vertex pred to filter on after prerocess, see more details under
+   * @param vpred vertex pred to filter on after preprocess, see more details under
    *  [[org.apache.spark.graphx.Graph#subgraph]]
    * @tparam VD2 vertex type the vpred operates on
    * @tparam ED2 edge type the epred operates on
-   * @return a subgraph of the orginal graph, with its data unchanged
+   * @return a subgraph of the original graph, with its data unchanged
    *
    * @example This function can be used to filter the graph based on some property, without
    * changing the vertex and edge values in your program. For example, we could remove the vertices
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
index 6dab465fb9..a4e293d74a 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
@@ -49,7 +49,7 @@ object ShippableVertexPartition {
   /**
    * Construct a `ShippableVertexPartition` from the given vertices with the specified routing
    * table, filling in missing vertices mentioned in the routing table using `defaultVal`,
-   * and merging duplicate vertex atrribute with mergeFunc.
+   * and merging duplicate vertex attribute with mergeFunc.
    */
   def apply[VD: ClassTag](
       iter: Iterator[(VertexId, VD)], routingTable: RoutingTablePartition, defaultVal: VD,
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index 46faad2e68..00ba358a9b 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -54,7 +54,7 @@ import org.apache.spark.graphx._
  * }}}
  *
  * `alpha` is the random reset probability (typically 0.15), `inNbrs[i]` is the set of
- * neighbors whick link to `i` and `outDeg[j]` is the out degree of vertex `j`.
+ * neighbors which link to `i` and `outDeg[j]` is the out degree of vertex `j`.
  *
  * Note that this is not the "normalized" PageRank and as a consequence pages that have no
  * inlinks will have a PageRank of alpha.
@@ -209,7 +209,7 @@ object PageRank extends Logging {
       }
       // Set the weight on the edges based on the degree
       .mapTriplets( e => 1.0 / e.srcAttr )
-      // Set the vertex attributes to (initalPR, delta = 0)
+      // Set the vertex attributes to (initialPR, delta = 0)
       .mapVertices { (id, attr) =>
         if (id == src) (resetProb, Double.NegativeInfinity) else (0.0, 0.0)
       }
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
index 6aab28ff05..dde25b9659 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/package.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
@@ -30,7 +30,7 @@ package object graphx {
    */
   type VertexId = Long
 
-  /** Integer identifer of a graph partition. Must be less than 2^30. */
+  /** Integer identifier of a graph partition. Must be less than 2^30. */
   // TODO: Consider using Char.
   type PartitionID = Int
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
index a6d0cb6409..d76e84ed8c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
@@ -92,7 +92,7 @@ private[graphx] object BytecodeUtils {
 
   /**
    * Given the class name, return whether we should look into the class or not. This is used to
-   * skip examing a large quantity of Java or Scala classes that we know for sure wouldn't access
+   * skip examining a large quantity of Java or Scala classes that we know for sure wouldn't access
    * the closures. Note that the class name is expected in ASM style (i.e. use "/" instead of ".").
    */
   private def skipClass(className: String): Boolean = {
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
index f497e001df..cb981797d3 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
@@ -229,7 +229,7 @@ class GraphSuite extends SparkFunSuite with LocalSparkContext {
 
   test("subgraph") {
     withSpark { sc =>
-      // Create a star graph of 10 veritces.
+      // Create a star graph of 10 vertices.
       val n = 10
       val star = starGraph(sc, n)
       // Take only vertices whose vids are even
diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
index f21b623e93..2cd94fa8f5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
@@ -523,7 +523,7 @@ private[ml] object FeedForwardTopology {
   /**
    * Creates a multi-layer perceptron
    * @param layerSizes sizes of layers including input and output size
-   * @param softmax wether to use SoftMax or Sigmoid function for an output layer.
+   * @param softmax whether to use SoftMax or Sigmoid function for an output layer.
    *                Softmax is default
    * @return multilayer perceptron topology
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
index 521d209a8f..27554acdf3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
@@ -481,7 +481,7 @@ object NominalAttribute extends AttributeFactory {
  * A binary attribute.
  * @param name optional name
  * @param index optional index
- * @param values optionla values. If set, its size must be 2.
+ * @param values optional values. If set, its size must be 2.
  */
 @DeveloperApi
 class BinaryAttribute private[ml] (
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index f8de4e2220..c8ec0c1685 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -83,7 +83,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
   /**
    * Returns the mean average precision (MAP) of all the queries.
    * If a query has an empty ground truth set, the average precision will be zero and a log
-   * warining is generated.
+   * warning is generated.
    */
   lazy val meanAveragePrecision: Double = {
     predictionAndLabels.map { case (pred, lab) =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
index 07eb750b06..790d6b101e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
@@ -58,7 +58,7 @@ class AssociationRules private[fpm] (
   /**
    * Computes the association rules with confidence above [[minConfidence]].
    * @param freqItemsets frequent itemset model obtained from [[FPGrowth]]
-   * @return a [[Set[Rule[Item]]] containing the assocation rules.
+   * @return a [[Set[Rule[Item]]] containing the association rules.
    *
    */
   @Since("1.5.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index 240781bcd3..58fd010e49 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -158,7 +158,7 @@ object LinearDataGenerator {
 
   /**
    * Generate an RDD containing sample data for Linear Regression models - including Ridge, Lasso,
-   * and uregularized variants.
+   * and unregularized variants.
    *
    * @param sc SparkContext to be used for generating the RDD.
    * @param nexamples Number of examples that will be contained in the RDD.
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
index 71f4926290..114a238462 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
@@ -88,7 +88,7 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
     val df = sqlContext.read.format("libsvm").load(path)
     val tempDir2 = Utils.createTempDir()
     val writepath = tempDir2.toURI.toString
-    // TODO: Remove requirement to coalesce by supporting mutiple reads.
+    // TODO: Remove requirement to coalesce by supporting multiple reads.
     df.coalesce(1).write.format("libsvm").mode(SaveMode.Overwrite).save(writepath)
 
     val df2 = sqlContext.read.format("libsvm").load(writepath)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index cea0adc55c..28fada7053 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -496,7 +496,7 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
      *    features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
      *    weights = coef(glmnet(features,label, family="multinomial", alpha = 0, lambda = 0))
      *
-     * The model weights of mutinomial logstic regression in R have `K` set of linear predictors
+     * The model weights of multinomial logistic regression in R have `K` set of linear predictors
      * for `K` classes classification problem; however, only `K-1` set is required if the first
      * outcome is chosen as a "pivot", and the other `K-1` outcomes are separately regressed against
      * the pivot outcome. This can be done by subtracting the first weights from those `K-1` set
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
index 142b90e764..46fcebe132 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
@@ -144,7 +144,7 @@ class HypothesisTestSuite extends SparkFunSuite with MLlibTestSparkContext {
     assert(chi.size === numCols)
     assert(chi(1000) != null) // SPARK-3087
 
-    // Detect continous features or labels
+    // Detect continuous features or labels
     val random = new Random(11L)
     val continuousLabel =
       Seq.fill(100000)(LabeledPoint(random.nextDouble(), Vectors.dense(random.nextInt(2))))
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
index dca8ea815a..5518bdf527 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
@@ -1075,7 +1075,7 @@ object DecisionTreeSuite extends SparkFunSuite {
     assert(a.isLeaf === b.isLeaf)
     assert(a.split === b.split)
     (a.stats, b.stats) match {
-      // TODO: Check other fields besides the infomation gain.
+      // TODO: Check other fields besides the information gain.
       case (Some(aStats), Some(bStats)) => assert(aStats.gain === bStats.gain)
       case (None, None) =>
       case _ => throw new AssertionError(
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index c4c8d8870f..faa52bf18c 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -334,7 +334,7 @@ object MimaExcludes {
         excludePackage("org.apache.spark.sql.columnar"),
         // The shuffle package is considered private.
         excludePackage("org.apache.spark.shuffle"),
-        // The collections utlities are considered pricate.
+        // The collections utilities are considered private.
         excludePackage("org.apache.spark.util.collection")
       ) ++
       MimaBuild.excludeSparkClass("streaming.flume.FlumeTestUtils") ++
@@ -639,7 +639,7 @@ object MimaExcludes {
       Seq(
         MimaBuild.excludeSparkPackage("deploy"),
         MimaBuild.excludeSparkPackage("ml"),
-        // SPARK-7910 Adding a method to get the partioner to JavaRDD,
+        // SPARK-7910 Adding a method to get the partitioner to JavaRDD,
         ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.partitioner"),
         // SPARK-5922 Adding a generalized diff(other: RDD[(VertexId, VD)]) to VertexRDD
         ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.graphx.VertexRDD.diff"),
@@ -657,7 +657,7 @@ object MimaExcludes {
         ProblemFilters.exclude[MissingClassProblem](
           "org.apache.spark.scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint")
       ) ++ Seq(
-        // SPARK-4655 - Making Stage an Abstract class broke binary compatility even though
+        // SPARK-4655 - Making Stage an Abstract class broke binary compatibility even though
         // the stage class is defined as private[spark]
         ProblemFilters.exclude[AbstractClassProblem]("org.apache.spark.scheduler.Stage")
       ) ++ Seq(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index f108264861..1219d4d453 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -305,7 +305,7 @@ trait Row extends Serializable {
    * @throws ClassCastException when data type does not match.
    */
   def getStruct(i: Int): Row = {
-    // Product and Row both are recoginized as StructType in a Row
+    // Product and Row both are recognized as StructType in a Row
     val t = get(i)
     if (t.isInstanceOf[Product]) {
       Row.fromTuple(t.asInstanceOf[Product])
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index c12b5c20ea..bf07f4557a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -29,7 +29,7 @@ import org.apache.spark.util.Utils
  */
 object ScalaReflection extends ScalaReflection {
   val universe: scala.reflect.runtime.universe.type = scala.reflect.runtime.universe
-  // Since we are creating a runtime mirror usign the class loader of current thread,
+  // Since we are creating a runtime mirror using the class loader of current thread,
   // we need to use def at here. So, every time we call mirror, it is using the
   // class loader of the current thread.
   // SPARK-13640: Synchronize this because universe.runtimeMirror is not thread-safe in Scala 2.10.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index ad56c98649..9c38dd2ee4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -155,7 +155,7 @@ object DecimalPrecision extends Rule[LogicalPlan] {
    *
    * Note that technically this is an "optimization" and should go into the optimizer. However,
    * by the time the optimizer runs, these comparison expressions would be pretty hard to pattern
-   * match because there are multuple (at least 2) levels of casts involved.
+   * match because there are multiple (at least 2) levels of casts involved.
    *
    * There are a lot more possible rules we can implement, but we don't do them
    * because we are not sure how common they are.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DistinctAggregationRewriter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DistinctAggregationRewriter.scala
index 38c1641f73..2e30d83a60 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DistinctAggregationRewriter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DistinctAggregationRewriter.scala
@@ -96,7 +96,7 @@ import org.apache.spark.sql.types.IntegerType
  * This rule duplicates the input data by two or more times (# distinct groups + an optional
  * non-distinct group). This will put quite a bit of memory pressure of the used aggregate and
  * exchange operators. Keeping the number of distinct groups as low a possible should be priority,
- * we could improve this in the current rule by applying more advanced expression cannocalization
+ * we could improve this in the current rule by applying more advanced expression canonicalization
  * techniques.
  */
 object DistinctAggregationRewriter extends Rule[LogicalPlan] {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 57bdb164e1..0f85f44ffa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -307,7 +307,7 @@ object HiveTypeCoercion {
       case p @ Equality(left @ TimestampType(), right @ StringType()) =>
         p.makeCopy(Array(left, Cast(right, TimestampType)))
 
-      // We should cast all relative timestamp/date/string comparison into string comparisions
+      // We should cast all relative timestamp/date/string comparison into string comparisons
       // This behaves as a user would expect because timestamp strings sort lexicographically.
       // i.e. TimeStamp(2013-01-01 00:00 ...) < "2014" = true
       case p @ BinaryComparison(left @ StringType(), right @ DateType()) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
index 3831535574..8bdf9b29c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
@@ -53,7 +53,7 @@ object AttributeSet {
  * cosmetically (e.g., the names have different capitalizations).
  *
  * Note that we do not override equality for Attribute references as it is really weird when
- * `AttributeReference("a"...) == AttrributeReference("b", ...)`. This tactic leads to broken tests,
+ * `AttributeReference("a"...) == AttributeReference("b", ...)`. This tactic leads to broken tests,
  * and also makes doing transformations hard (we always try keep older trees instead of new ones
  * when the transformation was a no-op).
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
index acea049adc..644a5b28a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
@@ -36,7 +36,7 @@ object ExpressionSet {
  * Internally this set uses the canonical representation, but keeps also track of the original
  * expressions to ease debugging.  Since different expressions can share the same canonical
  * representation, this means that operations that extract expressions from this set are only
- * guranteed to see at least one such expression.  For example:
+ * guaranteed to see at least one such expression.  For example:
  *
  * {{{
  *   val set = AttributeSet(a + 1, 1 + a)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index 22184f1ddf..500ff447a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -973,7 +973,7 @@ case class ScalaUDF(
 
   // scalastyle:on line.size.limit
 
-  // Generate codes used to convert the arguments to Scala type for user-defined funtions
+  // Generate codes used to convert the arguments to Scala type for user-defined functions
   private[this] def genCodeForConverter(ctx: CodegenContext, index: Int): String = {
     val converterClassName = classOf[Any => Any].getName
     val typeConvertersClassName = CatalystTypeConverters.getClass.getName + ".MODULE$"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index c4265a7539..3dbe634898 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -126,7 +126,7 @@ class CodegenContext {
    * For expressions that appear more than once, generate additional code to prevent
    * recomputing the value.
    *
-   * For example, consider two exprsesion generated from this SQL statement:
+   * For example, consider two expression generated from this SQL statement:
    *  SELECT (col1 + col2), (col1 + col2) / col3.
    *
    *  equivalentExpressions will match the tree containing `col1 + col2` and it will only
@@ -140,7 +140,7 @@ class CodegenContext {
   // Foreach expression that is participating in subexpression elimination, the state to use.
   val subExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState]
 
-  // The collection of sub-exression result resetting methods that need to be called on each row.
+  // The collection of sub-expression result resetting methods that need to be called on each row.
   val subexprFunctions = mutable.ArrayBuffer.empty[String]
 
   def declareAddedFunctions(): String = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 5ceb36513f..103ab365e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -214,7 +214,7 @@ case class CaseWhen(branches: Seq[(Expression, Expression)], elseValue: Option[E
 /** Factory methods for CaseWhen. */
 object CaseWhen {
 
-  // The maxium number of switches supported with codegen.
+  // The maximum number of switches supported with codegen.
   val MAX_NUM_CASES_FOR_CODEGEN = 20
 
   def apply(branches: Seq[(Expression, Expression)], elseValue: Expression): CaseWhen = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
index b95c5dd892..7eba617fcd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
@@ -364,7 +364,7 @@ object MapObjects {
  *                used as input for the `lambdaFunction`. It also carries the element type info.
  * @param lambdaFunction A function that take the `loopVar` as input, and used as lambda function
  *                       to handle collection elements.
- * @param inputData An expression that when evaluted returns a collection object.
+ * @param inputData An expression that when evaluated returns a collection object.
  */
 case class MapObjects private(
     loopVar: LambdaVariable,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
index e4417e0955..da90ddbd63 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
@@ -66,7 +66,7 @@ object NumberConverter {
    * negative digit is found, ignore the suffix starting there.
    *
    * @param radix  must be between MIN_RADIX and MAX_RADIX
-   * @param fromPos is the first element that should be conisdered
+   * @param fromPos is the first element that should be considered
    * @return the result should be treated as an unsigned 64-bit integer.
    */
   private def encode(radix: Int, fromPos: Int): Long = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index de9a56dc9c..4e7bbc38d6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -276,7 +276,7 @@ class AnalysisErrorSuite extends AnalysisTest {
 
   test("SPARK-6452 regression test") {
     // CheckAnalysis should throw AnalysisException when Aggregate contains missing attribute(s)
-    // Since we manually construct the logical plan at here and Sum only accetp
+    // Since we manually construct the logical plan at here and Sum only accept
     // LongType, DoubleType, and DecimalType. We use LongType as the type of a.
     val plan =
       Aggregate(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index aa1d2b0861..8b568b6dd6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -250,7 +250,7 @@ class AnalysisSuite extends AnalysisTest {
     assertAnalysisSuccess(plan)
   }
 
-  test("SPARK-8654: different types in inlist but can be converted to a commmon type") {
+  test("SPARK-8654: different types in inlist but can be converted to a common type") {
     val plan = Project(Alias(In(Literal(null), Seq(Literal(1), Literal(1.2345))), "a")() :: Nil,
       LocalRelation()
     )
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
index c30434a006..6f289dcc47 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
@@ -205,7 +205,7 @@ class HiveTypeCoercionSuite extends PlanTest {
       Project(Seq(Alias(transformed, "a")()), testRelation))
   }
 
-  test("cast NullType for expresions that implement ExpectsInputTypes") {
+  test("cast NullType for expressions that implement ExpectsInputTypes") {
     import HiveTypeCoercionSuite._
 
     ruleTest(HiveTypeCoercion.ImplicitTypeCasts,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
index ce42e5784c..0b350c6a98 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
@@ -70,7 +70,7 @@ class ExpressionSetSuite extends SparkFunSuite {
   // Not commutative
   setTest(2, aUpper - bUpper, bUpper - aUpper)
 
-  // Reversable
+  // Reversible
   setTest(1, aUpper > bUpper, bUpper < aUpper)
   setTest(1, aUpper >= bUpper, bUpper <= aUpper)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala
index 0dbfb01e88..f5374229ca 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala
@@ -131,7 +131,7 @@ class HyperLogLogPlusPlusSuite extends SparkFunSuite {
       i += 1
     }
 
-    // Merge the lower and upper halfs.
+    // Merge the lower and upper halves.
     hll.merge(buffer1a, buffer1b)
 
     // Create the other buffer in reverse
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index f7ba61d2b8..1751720a7d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -78,7 +78,7 @@ class TypedColumn[-T, U](
  *
  * {{{
  *   df("columnName")            // On a specific DataFrame.
- *   col("columnName")           // A generic column no yet associcated with a DataFrame.
+ *   col("columnName")           // A generic column no yet associated with a DataFrame.
  *   col("columnName.field")     // Extracting a struct field
  *   col("`a.column.with.dots`") // Escape `.` in column names.
  *   $"columnName"               // Scala short hand for a named column.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedDataset.scala
index 472ae716f1..a8700de135 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedDataset.scala
@@ -223,7 +223,7 @@ class GroupedDataset[K, V] private[sql](
    * Internal helper function for building typed aggregations that return tuples.  For simplicity
    * and code reuse, we do this without the help of the type system and then use helper functions
    * that cast appropriately for the user facing interface.
-   * TODO: does not handle aggrecations that return nonflat results,
+   * TODO: does not handle aggregations that return nonflat results,
    */
   protected def aggUntyped(columns: TypedColumn[_, _]*): Dataset[_] = {
     val encoders = columns.map(_.encoder)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index 3be4cce045..a92c99e06f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -52,7 +52,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   protected def sparkContext = sqlContext.sparkContext
 
   // sqlContext will be null when we are being deserialized on the slaves.  In this instance
-  // the value of subexpressionEliminationEnabled will be set by the desserializer after the
+  // the value of subexpressionEliminationEnabled will be set by the deserializer after the
   // constructor has run.
   val subexpressionEliminationEnabled: Boolean = if (sqlContext != null) {
     sqlContext.conf.subexpressionEliminationEnabled
@@ -65,7 +65,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
    */
   private val prepareCalled = new AtomicBoolean(false)
 
-  /** Overridden make copy also propogates sqlContext to copied plan. */
+  /** Overridden make copy also propagates sqlContext to copied plan. */
   override def makeCopy(newArgs: Array[AnyRef]): SparkPlan = {
     SQLContext.setActive(sqlContext)
     super.makeCopy(newArgs)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
index 3ec01185c4..f9d606e37e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
@@ -40,7 +40,7 @@ import org.apache.spark.unsafe.types.UTF8String
  * so we do not have helper methods for them.
  *
  *
- * WARNNING: This only works with HeapByteBuffer
+ * WARNING: This only works with HeapByteBuffer
  */
 private[columnar] object ByteBufferHelper {
   def getInt(buffer: ByteBuffer): Int = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index c3f8d7f75a..18a460fc85 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -88,7 +88,7 @@ private[sql] object PartitioningUtils {
     }.unzip
 
     // We create pairs of (path -> path's partition value) here
-    // If the corresponding partition value is None, the pair will be skiped
+    // If the corresponding partition value is None, the pair will be skipped
     val pathsWithPartitionValues = paths.zip(partitionValues).flatMap(x => x._2.map(x._1 -> _))
 
     if (pathsWithPartitionValues.isEmpty) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index e295722cac..64a820c6d7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -70,7 +70,7 @@ object JdbcUtils extends Logging {
 
     // Somewhat hacky, but there isn't a good way to identify whether a table exists for all
     // SQL database systems using JDBC meta data calls, considering "table" could also include
-    // the database name. Query used to find table exists can be overriden by the dialects.
+    // the database name. Query used to find table exists can be overridden by the dialects.
     Try {
       val statement = conn.prepareStatement(dialect.getTableExistsQuery(table))
       try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchPythonEvaluation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchPythonEvaluation.scala
index c65a7bcff8..79e4491026 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchPythonEvaluation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchPythonEvaluation.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.types.{StructField, StructType}
 
 
 /**
- * A physical plan that evalutes a [[PythonUDF]], one partition of tuples at a time.
+ * A physical plan that evaluates a [[PythonUDF]], one partition of tuples at a time.
  *
  * Python evaluation works by sending the necessary (projected) input data via a socket to an
  * external Python process, and combine the result from the Python process with the original row.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
index 1bd71b6b02..e3b2d2f67e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
@@ -38,7 +38,7 @@ trait Sink {
    * Accepts a new batch of data as well as a [[Offset]] that denotes how far in the input
    * data computation has progressed to.  When computation restarts after a failure, it is important
    * that a [[Sink]] returns the same [[Offset]] as the most recent batch of data that
-   * has been persisted durrably.  Note that this does not necessarily have to be the
+   * has been persisted durably.  Note that this does not necessarily have to be the
    * [[Offset]] for the most recent batch of data that was given to the sink.  For example,
    * it is valid to buffer data before persisting, as long as the [[Offset]] is stored
    * transactionally as data is eventually persisted.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index 096477ce0e..d7ff44afad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -100,7 +100,7 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
 
 /**
  * A sink that stores the results in memory. This [[Sink]] is primarily intended for use in unit
- * tests and does not provide durablility.
+ * tests and does not provide durability.
  */
 class MemorySink(schema: StructType) extends Sink with Logging {
   /** An order list of batches that have been written to this [[Sink]]. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 384102e5ea..59429d254e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -441,7 +441,7 @@ object SQLConf {
   // NOTE:
   //
   //  1. Instead of SQLConf, this option *must be set in Hadoop Configuration*.
-  //  2. This option can be overriden by "spark.sql.parquet.output.committer.class".
+  //  2. This option can be overridden by "spark.sql.parquet.output.committer.class".
   val OUTPUT_COMMITTER_CLASS =
     stringConf("spark.sql.sources.outputCommitterClass", isPublic = false)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index e865dbe6b5..a7a826bc7a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -80,7 +80,7 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext {
     // Verify that the splits span the entire dataset
     assert(splits.flatMap(_.collect()).toSet == data.collect().toSet)
 
-    // Verify that the splits don't overalap
+    // Verify that the splits don't overlap
     assert(splits(0).intersect(splits(1)).collect().isEmpty)
 
     // Verify that the results are deterministic across multiple runs
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 9f32c8bf95..d7fa23651b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -46,7 +46,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
       1, 1, 1)
   }
 
-  test("SPARK-12404: Datatype Helper Serializablity") {
+  test("SPARK-12404: Datatype Helper Serializability") {
     val ds = sparkContext.parallelize((
       new Timestamp(0),
       new Date(0),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 182f287dd0..98d0008489 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -986,7 +986,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
 
   test("SET commands with illegal or inappropriate argument") {
     sqlContext.conf.clear()
-    // Set negative mapred.reduce.tasks for automatically determing
+    // Set negative mapred.reduce.tasks for automatically determining
     // the number of reducers is not supported
     intercept[IllegalArgumentException](sql(s"SET mapred.reduce.tasks=-1"))
     intercept[IllegalArgumentException](sql(s"SET mapred.reduce.tasks=-01"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/StreamTest.scala
index 7a5b639115..81078dc6a0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StreamTest.scala
@@ -80,7 +80,7 @@ trait StreamTest extends QueryTest with Timeouts {
   trait StreamMustBeRunning
 
   /**
-   * Adds the given data to the stream.  Subsuquent check answers will block until this data has
+   * Adds the given data to the stream. Subsequent check answers will block until this data has
    * been processed.
    */
   object AddData {
@@ -109,7 +109,7 @@ trait StreamTest extends QueryTest with Timeouts {
 
   /**
    * Checks to make sure that the current data stored in the sink matches the `expectedAnswer`.
-   * This operation automatically blocks untill all added data has been processed.
+   * This operation automatically blocks until all added data has been processed.
    */
   object CheckAnswer {
     def apply[A : Encoder](data: A*): CheckAnswerRows = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
index 7af3f94aef..3a7cb25b4f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
@@ -80,7 +80,7 @@ class InferSchemaSuite extends SparkFunSuite {
     assert(CSVInferSchema.inferField(BooleanType, "\\N", "\\N") == BooleanType)
   }
 
-  test("Merging Nulltypes should yeild Nulltype.") {
+  test("Merging Nulltypes should yield Nulltype.") {
     val mergedNullTypes = CSVInferSchema.mergeRowTypes(Array(NullType), Array(NullType))
     assert(mergedNullTypes.deep == Array(NullType).deep)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 1ef517324d..f66deea065 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -359,7 +359,7 @@ class JDBCSuite extends SparkFunSuite
       .collect().length === 3)
   }
 
-  test("Partioning on column that might have null values.") {
+  test("Partitioning on column that might have null values.") {
     assert(
       sqlContext.read.jdbc(urlWithUserAndPass, "TEST.EMP", "theid", 0, 4, 3, new Properties)
         .collect().length === 4)
@@ -372,7 +372,7 @@ class JDBCSuite extends SparkFunSuite
         .collect().length === 4)
   }
 
-  test("SELECT * on partitioned table with a nullable partioncolumn") {
+  test("SELECT * on partitioned table with a nullable partition column") {
     assert(sql("SELECT * FROM nullparts").collect().size == 4)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
index 26c1ff5204..99f1661ad0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -339,7 +339,7 @@ class TableScanSuite extends DataSourceTest with SharedSQLContext {
 
   test("exceptions") {
     // Make sure we do throw correct exception when users use a relation provider that
-    // only implements the RelationProvier or the SchemaRelationProvider.
+    // only implements the RelationProvider or the SchemaRelationProvider.
     val schemaNotAllowed = intercept[Exception] {
       sql(
         """
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 54fffb971d..694bd97515 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -68,7 +68,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
    *                       with one of these strings is found, fail the test immediately.
    *                       The default value is `Seq("Error:")`
    *
-   * @param queriesAndExpectedAnswers one or more tupes of query + answer
+   * @param queriesAndExpectedAnswers one or more tuples of query + answer
    */
   def runCliWithin(
       timeout: FiniteDuration,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 56acb87c80..739fbaf444 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -245,7 +245,7 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
         val tableName = tableNameParts.map { case Token(p, Nil) => p }.mkString(".")
         DropTable(tableName, ifExists.nonEmpty)
 
-      // Support "ANALYZE TABLE tableNmae COMPUTE STATISTICS noscan"
+      // Support "ANALYZE TABLE tableName COMPUTE STATISTICS noscan"
       case Token("TOK_ANALYZE",
         Token("TOK_TAB", Token("TOK_TABNAME", tableNameParts) :: partitionSpec) :: isNoscan) =>
         // Reference:
@@ -535,7 +535,7 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
           case Token("TOK_STORAGEHANDLER", _) =>
             throw new AnalysisException(
               "CREATE TABLE AS SELECT cannot be used for a non-native table")
-          case _ => // Unsupport features
+          case _ => // Unsupported features
         }
 
         CreateTableAsSelect(tableDesc, nodeToPlan(query), allowExisting.isDefined)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
index 059ad8b1f7..8240f2f222 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
@@ -89,7 +89,7 @@ private[orc] object OrcFileOperator extends Logging {
   }
 
   def listOrcFiles(pathStr: String, conf: Configuration): Seq[Path] = {
-    // TODO: Check if the paths comming in are already qualified and simplify.
+    // TODO: Check if the paths coming in are already qualified and simplify.
     val origPath = new Path(pathStr)
     val fs = origPath.getFileSystem(conf)
     val path = origPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 1053246fc2..5e452d107d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -143,7 +143,7 @@ abstract class HiveComparisonTest
         0D
       }
 
-      s"""SQLBuiler statistics:
+      s"""SQLBuilder statistics:
          |- Total query number:                $numTotalQueries
          |- Number of convertible queries:     $numConvertibleQueries
          |- Percentage of convertible queries: $percentage%
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 1002487447..d905f0cd68 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -602,7 +602,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
       |select * where key = 4
     """.stripMargin)
 
-  // test get_json_object again Hive, because the HiveCompatabilitySuite cannot handle result
+  // test get_json_object again Hive, because the HiveCompatibilitySuite cannot handle result
   // with newline in it.
   createQueryTest("get_json_object #1",
     "SELECT get_json_object(src_json.json, '$') FROM src_json")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index 11a4c7dfd0..16c575bcc1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -205,7 +205,7 @@ class CheckpointWriter(
       // also use the latest checkpoint time as the file name, so that we can recovery from the
       // latest checkpoint file.
       //
-      // Note: there is only one thread writting the checkpoint files, so we don't need to worry
+      // Note: there is only one thread writing the checkpoint files, so we don't need to worry
       // about thread-safety.
       val checkpointFile = Checkpoint.checkpointFile(checkpointDir, latestCheckpointTime)
       val backupFile = Checkpoint.checkpointBackupFile(checkpointDir, latestCheckpointTime)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 25e61578a1..e7f3a213d4 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -299,7 +299,7 @@ class StreamingContext private[streaming] (
 
   /**
    * Create a input stream from TCP source hostname:port. Data is received using
-   * a TCP socket and the receive bytes it interepreted as object using the given
+   * a TCP socket and the receive bytes it interpreted as object using the given
    * converter.
    * @param hostname      Hostname to connect to for receiving data
    * @param port          Port to connect to for receiving data
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index aad9a12c15..2a80cf4466 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -155,7 +155,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
   /**
    * Return a new DStream by applying `reduceByKey` to each RDD. The values for each key are
    * merged using the supplied reduce function. org.apache.spark.Partitioner is used to control
-   * thepartitioning of each RDD.
+   * the partitioning of each RDD.
    */
   def reduceByKey(func: JFunction2[V, V, V], partitioner: Partitioner): JavaPairDStream[K, V] = {
     dstream.reduceByKey(func, partitioner)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index 860b802725..05f4da6fac 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -530,7 +530,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
    * Return the current state of the context. The context can be in three possible states -
    * <ul>
    *   <li>
-   *   StreamingContextState.INTIALIZED - The context has been created, but not been started yet.
+   *   StreamingContextState.INITIALIZED - The context has been created, but not been started yet.
    *   Input DStreams, transformations and output operations can be created on the context.
    *   </li>
    *   <li>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
index 1dcdb64e28..d6ff96e1fc 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
@@ -446,7 +446,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)])
    *                   remember the partitioner despite the key being changed.
    * @param partitioner Partitioner for controlling the partitioning of each RDD in the new
    *                    DStream
-   * @param rememberPartitioner Whether to remember the paritioner object in the generated RDDs.
+   * @param rememberPartitioner Whether to remember the partitioner object in the generated RDDs.
    * @tparam S State type
    */
   def updateStateByKey[S: ClassTag](
@@ -490,7 +490,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)])
    *                   remember the  partitioner despite the key being changed.
    * @param partitioner Partitioner for controlling the partitioning of each RDD in the new
    *                    DStream
-   * @param rememberPartitioner Whether to remember the paritioner object in the generated RDDs.
+   * @param rememberPartitioner Whether to remember the partitioner object in the generated RDDs.
    * @param initialRDD initial state value of each key.
    * @tparam S State type
    */
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
index 080bc873fa..47eb9b806f 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
@@ -55,7 +55,7 @@ class TransformedDStream[U: ClassTag] (
   /**
    * Wrap a body of code such that the call site and operation scope
    * information are passed to the RDDs created in this body properly.
-   * This has been overriden to make sure that `displayInnerRDDOps` is always `true`, that is,
+   * This has been overridden to make sure that `displayInnerRDDOps` is always `true`, that is,
    * the inner scopes and callsites of RDDs generated in `DStream.transform` are always
    * displayed in the UI.
    */
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
index 430f35a400..d6fcc582b9 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
@@ -257,7 +257,7 @@ private[streaming] class StreamingJobProgressListener(ssc: StreamingContext)
     }
     batchUIData.foreach { _batchUIData =>
       // We use an Iterable rather than explicitly converting to a seq so that updates
-      // will propegate
+      // will propagate
       val outputOpIdToSparkJobIds: Iterable[OutputOpIdAndSparkJobId] =
         Option(batchTimeToOutputOpIdSparkJobIdPair.get(batchTime).asScala)
           .getOrElse(Seq.empty)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
index 2be1d6df86..3a21cfae5a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
@@ -177,7 +177,7 @@ private[streaming] class OpenHashMapBasedStateMap[K, S](
     new OpenHashMapBasedStateMap[K, S](this, deltaChainThreshold = deltaChainThreshold)
   }
 
-  /** Whether the delta chain lenght is long enough that it should be compacted */
+  /** Whether the delta chain length is long enough that it should be compacted */
   def shouldCompact: Boolean = {
     deltaChainLength >= deltaChainThreshold
   }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
index 403400904b..3b662ec183 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
@@ -518,7 +518,7 @@ class MapWithStateSuite extends SparkFunSuite
 
       val mapWithStateStream = dstream.map { _ -> 1 }.mapWithState(
         StateSpec.function(runningCount))
-      // Set internval make sure there is one RDD checkpointing
+      // Set interval make sure there is one RDD checkpointing
       mapWithStateStream.checkpoint(checkpointDuration)
       mapWithStateStream.stateSnapshots()
     }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index 45424f9bac..95c1609d8e 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -202,13 +202,13 @@ class ReceivedBlockHandlerSuite
     blockManager = createBlockManager(12000, sparkConf)
 
     // there is not enough space to store this block in MEMORY,
-    // But BlockManager will be able to sereliaze this block to WAL
+    // But BlockManager will be able to serialize this block to WAL
     // and hence count returns correct value.
      testRecordcount(false, StorageLevel.MEMORY_ONLY,
       IteratorBlock((List.fill(70)(new Array[Byte](100))).iterator), blockManager, Some(70))
 
     // there is not enough space to store this block in MEMORY,
-    // But BlockManager will be able to sereliaze this block to DISK
+    // But BlockManager will be able to serialize this block to DISK
     // and hence count returns correct value.
     testRecordcount(true, StorageLevel.MEMORY_AND_DISK,
       IteratorBlock((List.fill(70)(new Array[Byte](100))).iterator), blockManager, Some(70))
@@ -272,7 +272,7 @@ class ReceivedBlockHandlerSuite
   }
 
   /**
-   * Test storing of data using different types of Handler, StorageLevle and ReceivedBlocks
+   * Test storing of data using different types of Handler, StorageLevel and ReceivedBlocks
    * and verify the correct record count
    */
   private def testRecordcount(isBlockManagedBasedBlockHandler: Boolean,
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
index a4871b460e..6763ac64da 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
@@ -97,7 +97,7 @@ class ReceiverInputDStreamSuite extends TestSuiteBase with BeforeAndAfterAll {
       assert(blockRDD.walRecordHandles.toSeq === blockInfos.map { _.walRecordHandleOption.get })
   }
 
-  testWithWAL("createBlockRDD creates BlockRDD when some block info dont have WAL info") {
+  testWithWAL("createBlockRDD creates BlockRDD when some block info don't have WAL info") {
     receiverStream =>
       val blockInfos1 = Seq.fill(2) { createBlockInfo(withWALInfo = true) }
       val blockInfos2 = Seq.fill(3) { createBlockInfo(withWALInfo = false) }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala
index 7a76cafc9a..484f3733e8 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala
@@ -182,7 +182,7 @@ class StateMapSuite extends SparkFunSuite {
      *
      * - These operations are done on a test map in "sets". After each set, the map is "copied"
      *   to create a new map, and the next set of operations are done on the new one. This tests
-     *   whether the map data persistes correctly across copies.
+     *   whether the map data persist correctly across copies.
      *
      * - Within each set, there are a number of operations to test whether the map correctly
      *   updates and removes data without affecting the parent state map.
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 197b3d1439..2159edce2b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -147,7 +147,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with Timeo
     }
   }
 
-  test("start with non-seriazable DStream checkpoints") {
+  test("start with non-serializable DStream checkpoints") {
     val checkpointDir = Utils.createTempDir()
     ssc = new StreamingContext(conf, batchDuration)
     ssc.checkpoint(checkpointDir.getAbsolutePath)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index 82cd63bcaf..8269963edf 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -56,7 +56,7 @@ private[streaming] class DummyInputDStream(ssc: StreamingContext) extends InputD
 /**
  * This is a input stream just for the testsuites. This is equivalent to a checkpointable,
  * replayable, reliable message queue like Kafka. It requires a sequence as input, and
- * returns the i_th element at the i_th batch unde manual clock.
+ * returns the i_th element at the i_th batch under manual clock.
  */
 class TestInputStream[T: ClassTag](_ssc: StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
   extends InputDStream[T](_ssc) {
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
index 6e95bb9710..498471b23b 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
@@ -115,7 +115,7 @@ private[yarn] class AMDelegationTokenRenewer(
         }
       }
     // Schedule update of credentials. This handles the case of updating the tokens right now
-    // as well, since the renenwal interval will be 0, and the thread will get scheduled
+    // as well, since the renewal interval will be 0, and the thread will get scheduled
     // immediately.
     scheduleRenewal(driverTokenRenewerRunnable)
   }
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 9f91d182eb..9cdbd6da62 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -186,9 +186,9 @@ private[yarn] class ExecutorRunnable(
         else {
           // If no java_opts specified, default to using -XX:+CMSIncrementalMode
           // It might be possible that other modes/config is being done in
-          // spark.executor.extraJavaOptions, so we dont want to mess with it.
-          // In our expts, using (default) throughput collector has severe perf ramnifications in
-          // multi-tennent machines
+          // spark.executor.extraJavaOptions, so we don't want to mess with it.
+          // In our expts, using (default) throughput collector has severe perf ramifications in
+          // multi-tenant machines
           // The options are based on
           // http://www.oracle.com/technetwork/java/gc-tuning-5-138395.html#0.0.0.%20When%20to%20Use
           // %20the%20Concurrent%20Low%20Pause%20Collector|outline
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index ed56d4bd44..2915e664be 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -65,7 +65,7 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil {
   override def isYarnMode(): Boolean = { true }
 
   // Return an appropriate (subclass) of Configuration. Creating a config initializes some Hadoop
-  // subsystems. Always create a new config, dont reuse yarnConf.
+  // subsystems. Always create a new config, don't reuse yarnConf.
   override def newConfiguration(conf: SparkConf): Configuration =
     new YarnConfiguration(super.newConfiguration(conf))
 
@@ -217,7 +217,7 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil {
     // the hive configuration class is a subclass of Hadoop Configuration, so can be cast down
     // to a Configuration and used without reflection
     val hiveConfClass = mirror.classLoader.loadClass("org.apache.hadoop.hive.conf.HiveConf")
-    // using the (Configuration, Class) constructor allows the current configuratin to be included
+    // using the (Configuration, Class) constructor allows the current configuration to be included
     // in the hive config.
     val ctor = hiveConfClass.getDeclaredConstructor(classOf[Configuration],
       classOf[Object].getClass)
@@ -502,7 +502,7 @@ object YarnSparkHadoopUtil {
   /**
    * Getting the initial target number of executors depends on whether dynamic allocation is
    * enabled.
-   * If not using dynamic allocation it gets the number of executors reqeusted by the user.
+   * If not using dynamic allocation it gets the number of executors requested by the user.
    */
   def getInitialTargetExecutorNumber(
       conf: SparkConf,
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
index 1538ff75be..05c1e1613d 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
@@ -78,7 +78,7 @@ private object YarnExternalShuffleDriver extends Logging with Matchers {
         s"""
         |Invalid command line: ${args.mkString(" ")}
         |
-        |Usage: ExternalShuffleDriver [result file] [registed exec file]
+        |Usage: ExternalShuffleDriver [result file] [registered exec file]
         """.stripMargin)
       // scalastyle:on println
       System.exit(1)
author	Dongjoon Hyun <dongjoon@apache.org>	2016-03-14 09:07:39 +0000
committer	Sean Owen <sowen@cloudera.com>	2016-03-14 09:07:39 +0000
commit	acdf21970334cea9d6cfc287e4ccb8e72de9dee1 (patch)
tree	df8bcf3d80dc92ad74c5c27bd3618397205bcc86
parent	e58fa19d17db5dd8a00551e20b46921f98b958f7 (diff)
download	spark-acdf21970334cea9d6cfc287e4ccb8e72de9dee1.tar.gz spark-acdf21970334cea9d6cfc287e4ccb8e72de9dee1.tar.bz2 spark-acdf21970334cea9d6cfc287e4ccb8e72de9dee1.zip