aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongjoon Hyun <dongjoon@apache.org>2016-03-14 09:07:39 +0000
committerSean Owen <sowen@cloudera.com>2016-03-14 09:07:39 +0000
commitacdf21970334cea9d6cfc287e4ccb8e72de9dee1 (patch)
treedf8bcf3d80dc92ad74c5c27bd3618397205bcc86
parente58fa19d17db5dd8a00551e20b46921f98b958f7 (diff)
downloadspark-acdf21970334cea9d6cfc287e4ccb8e72de9dee1.tar.gz
spark-acdf21970334cea9d6cfc287e4ccb8e72de9dee1.tar.bz2
spark-acdf21970334cea9d6cfc287e4ccb8e72de9dee1.zip
[MINOR][DOCS] Fix more typos in comments/strings.
## What changes were proposed in this pull request? This PR fixes 135 typos over 107 files: * 121 typos in comments * 11 typos in testcase name * 3 typos in log messages ## How was this patch tested? Manual. Author: Dongjoon Hyun <dongjoon@apache.org> Closes #11689 from dongjoon-hyun/fix_more_typos.
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala6
-rw-r--r--core/src/main/scala/org/apache/spark/executor/Executor.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/storage/BlockManager.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/util/ParentClassLoader.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/util/SizeEstimator.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/util/Utils.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala4
-rw-r--r--core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala2
-rw-r--r--core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala2
-rw-r--r--core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala2
-rw-r--r--core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala6
-rw-r--r--core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala6
-rw-r--r--core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala4
-rw-r--r--core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala2
-rw-r--r--core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala2
-rw-r--r--core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala2
-rw-r--r--core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala2
-rw-r--r--core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala2
-rw-r--r--external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/Logging.scala2
-rw-r--r--external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala6
-rw-r--r--external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala2
-rw-r--r--external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala2
-rw-r--r--external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala2
-rw-r--r--external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala2
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/Graph.scala2
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala4
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala2
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala4
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/package.scala2
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala2
-rw-r--r--graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala2
-rw-r--r--mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala2
-rw-r--r--project/MimaExcludes.scala6
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DistinctAggregationRewriter.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala2
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala2
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala2
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala2
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala2
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Column.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/GroupedDataset.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchPythonEvaluation.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/StreamTest.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala4
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala2
-rw-r--r--sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala4
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala2
-rw-r--r--sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala4
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala2
-rw-r--r--streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala2
-rw-r--r--streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala6
-rw-r--r--streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala2
-rw-r--r--streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala2
-rw-r--r--streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala2
-rw-r--r--streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala2
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala2
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala6
-rw-r--r--yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala6
-rw-r--r--yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala2
107 files changed, 135 insertions, 135 deletions
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
index 13e18a56c8..0d3a5237d9 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala
@@ -66,7 +66,7 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double])
*/
def unpersist(blocking: Boolean): JavaDoubleRDD = fromRDD(srdd.unpersist(blocking))
- // first() has to be overriden here in order for its return type to be Double instead of Object.
+ // first() has to be overridden here in order for its return type to be Double instead of Object.
override def first(): JDouble = srdd.first()
// Transformations (return a new RDD)
diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
index f1aebbcd39..d362c40b7a 100644
--- a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -351,7 +351,7 @@ class JavaSparkContext(val sc: SparkContext)
}
/**
- * Get an RDD for a Hadoop-readable dataset from a Hadooop JobConf giving its InputFormat and any
+ * Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf giving its InputFormat and any
* other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable,
* etc).
*
@@ -383,7 +383,7 @@ class JavaSparkContext(val sc: SparkContext)
}
/**
- * Get an RDD for a Hadoop-readable dataset from a Hadooop JobConf giving its InputFormat and any
+ * Get an RDD for a Hadoop-readable dataset from a Hadoop JobConf giving its InputFormat and any
* other necessary info (e.g. file name for a filesystem-based dataset, table name for HyperTable,
*
* @param conf JobConf for setting up the dataset. Note: This will be put into a Broadcast.
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index 550e1ba6d3..8091aa8062 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -74,7 +74,7 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
} else {
None
}
- // Note: use getSizeAsKb (not bytes) to maintain compatiblity if no units are provided
+ // Note: use getSizeAsKb (not bytes) to maintain compatibility if no units are provided
blockSize = conf.getSizeAsKb("spark.broadcast.blockSize", "4m").toInt * 1024
}
setConf(SparkEnv.get.conf)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
index e2fda29044..000f7e8e1e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
@@ -87,7 +87,7 @@ private[history] class ApplicationCache(
/**
* The cache of applications.
*
- * Tagged as `protected` so as to allow subclasses in tests to accesss it directly
+ * Tagged as `protected` so as to allow subclasses in tests to access it directly
*/
protected val appCache: LoadingCache[CacheKey, CacheEntry] = {
CacheBuilder.newBuilder()
@@ -447,7 +447,7 @@ private[history] class CacheMetrics(prefix: String) extends Source {
private[history] trait ApplicationCacheOperations {
/**
- * Get the application UI and the probe neededed to see if it has been updated.
+ * Get the application UI and the probe needed to see if it has been updated.
* @param appId application ID
* @param attemptId attempt ID
* @return If found, the Spark UI and any history information to be used in the cache
@@ -590,7 +590,7 @@ private[history] object ApplicationCacheCheckFilterRelay extends Logging {
// name of the attempt ID entry in the filter configuration. Optional.
val ATTEMPT_ID = "attemptId"
- // namer of the filter to register
+ // name of the filter to register
val FILTER_NAME = "org.apache.spark.deploy.history.ApplicationCacheCheckFilter"
/** the application cache to relay requests to */
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 07e3c12bc9..48372d70d5 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -124,7 +124,7 @@ private[spark] class Executor(
private val HEARTBEAT_MAX_FAILURES = conf.getInt("spark.executor.heartbeat.maxFailures", 60)
/**
- * Count the failure times of heartbeat. It should only be acessed in the heartbeat thread. Each
+ * Count the failure times of heartbeat. It should only be accessed in the heartbeat thread. Each
* successful heartbeat will reset it to 0.
*/
private var heartbeatFailures = 0
diff --git a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
index c9606600ed..0f579cfe42 100644
--- a/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/DoubleRDDFunctions.scala
@@ -141,7 +141,7 @@ class DoubleRDDFunctions(self: RDD[Double]) extends Logging with Serializable {
* And on the input of 1 and 50 we would have a histogram of 1, 0, 1
*
* Note: if your histogram is evenly spaced (e.g. [0, 10, 20, 30]) this can be switched
- * from an O(log n) inseration to O(1) per element. (where n = # buckets) if you set evenBuckets
+ * from an O(log n) insertion to O(1) per element. (where n = # buckets) if you set evenBuckets
* to true.
* buckets must be sorted and not contain any duplicates.
* buckets array must be at least two elements
diff --git a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
index def0aac720..dfcdd113df 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala
@@ -157,7 +157,7 @@ object InputFormatInfo {
b) Decrement the currently allocated containers on that host.
c) Compute rack info for each host and update rack -> count map based on (b).
d) Allocate nodes based on (c)
- e) On the allocation result, ensure that we dont allocate "too many" jobs on a single node
+ e) On the allocation result, ensure that we don't allocate "too many" jobs on a single node
(even if data locality on that is very high) : this is to prevent fragility of job if a
single (or small set of) hosts go down.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
index 1ce83485f0..6e9337bb90 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SplitInfo.scala
@@ -45,7 +45,7 @@ class SplitInfo(
hashCode
}
- // This is practically useless since most of the Split impl's dont seem to implement equals :-(
+ // This is practically useless since most of the Split impl's don't seem to implement equals :-(
// So unless there is identity equality between underlyingSplits, it will always fail even if it
// is pointing to same block.
override def equals(other: Any): Boolean = other match {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index bcf65e9d7e..996c1f5d9e 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -1057,7 +1057,7 @@ private[spark] class BlockManager(
failures += 1
replicationFailed = true
peersFailedToReplicateTo += peer
- if (failures > maxReplicationFailures) { // too many failures in replcating to peers
+ if (failures > maxReplicationFailures) { // too many failures in replicating to peers
done = true
}
}
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index 478a928acd..b19c30e2ff 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -70,7 +70,7 @@ final class ShuffleBlockFetcherIterator(
private[this] var numBlocksToFetch = 0
/**
- * The number of blocks proccessed by the caller. The iterator is exhausted when
+ * The number of blocks processed by the caller. The iterator is exhausted when
* [[numBlocksProcessed]] == [[numBlocksToFetch]].
*/
private[this] var numBlocksProcessed = 0
diff --git a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
index 5a8c291431..094953f2f5 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIWorkloadGenerator.scala
@@ -102,7 +102,7 @@ private[spark] object UIWorkloadGenerator {
try {
setProperties(desc)
job()
- println("Job funished: " + desc)
+ println("Job finished: " + desc)
} catch {
case e: Exception =>
println("Job Failed: " + desc)
diff --git a/core/src/main/scala/org/apache/spark/util/ParentClassLoader.scala b/core/src/main/scala/org/apache/spark/util/ParentClassLoader.scala
index 73d126ff62..c9b7493fcd 100644
--- a/core/src/main/scala/org/apache/spark/util/ParentClassLoader.scala
+++ b/core/src/main/scala/org/apache/spark/util/ParentClassLoader.scala
@@ -18,7 +18,7 @@
package org.apache.spark.util
/**
- * A class loader which makes some protected methods in ClassLoader accesible.
+ * A class loader which makes some protected methods in ClassLoader accessible.
*/
private[spark] class ParentClassLoader(parent: ClassLoader) extends ClassLoader(parent) {
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 83ded92609..a06db9a4fc 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -253,7 +253,7 @@ object SizeEstimator extends Logging {
} else {
// Estimate the size of a large array by sampling elements without replacement.
// To exclude the shared objects that the array elements may link, sample twice
- // and use the min one to caculate array size.
+ // and use the min one to calculate array size.
val rand = new Random(42)
val drawn = new OpenHashSet[Int](2 * ARRAY_SAMPLE_SIZE)
val s1 = sampleArray(array, state, rand, drawn, length)
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index b5a98ce569..37c6c9bf90 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -1530,7 +1530,7 @@ private[spark] object Utils extends Logging {
rawMod + (if (rawMod < 0) mod else 0)
}
- // Handles idiosyncracies with hash (add more as required)
+ // Handles idiosyncrasies with hash (add more as required)
// This method should be kept in sync with
// org.apache.spark.network.util.JavaUtils#nonNegativeHash().
def nonNegativeHash(obj: AnyRef): Int = {
@@ -1600,7 +1600,7 @@ private[spark] object Utils extends Logging {
* @param f function to be executed. If prepare is not None, the running time of each call to f
* must be an order of magnitude longer than one millisecond for accurate timing.
* @param prepare function to be executed before each call to f. Its running time doesn't count.
- * @return the total time across all iterations (not couting preparation time)
+ * @return the total time across all iterations (not counting preparation time)
*/
def timeIt(numIters: Int)(f: => Unit, prepare: Option[() => Unit] = None): Long = {
if (prepare.isEmpty) {
diff --git a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
index 1314217023..3c61528ab5 100644
--- a/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/RandomSampler.scala
@@ -54,7 +54,7 @@ object RandomSampler {
/**
* Default maximum gap-sampling fraction.
* For sampling fractions <= this value, the gap sampling optimization will be applied.
- * Above this value, it is assumed that "tradtional" Bernoulli sampling is faster. The
+ * Above this value, it is assumed that "traditional" Bernoulli sampling is faster. The
* optimal value for this will depend on the RNG. More expensive RNGs will tend to make
* the optimal value higher. The most reliable way to determine this value for a new RNG
* is to experiment. When tuning for a new RNG, I would expect a value of 0.5 to be close
@@ -319,7 +319,7 @@ class GapSamplingReplacementIterator[T: ClassTag](
/**
* Skip elements with replication factor zero (i.e. elements that won't be sampled).
* Samples 'k' from geometric distribution P(k) = (1-q)(q)^k, where q = e^(-f), that is
- * q is the probabililty of Poisson(0; f)
+ * q is the probability of Poisson(0; f)
*/
private def advance(): Unit = {
val u = math.max(rng.nextDouble(), epsilon)
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 91fef772d1..bb2adff57e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
import org.apache.spark.util.{ResetSystemProperties, Utils}
// Note: this suite mixes in ResetSystemProperties because SparkSubmit.main() sets a bunch
-// of properties that neeed to be cleared after tests.
+// of properties that needed to be cleared after tests.
class SparkSubmitSuite
extends SparkFunSuite
with Matchers
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index b7ff5c9e8c..d2e24912b5 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -501,7 +501,7 @@ class StandaloneDynamicAllocationSuite
master.self.askWithRetry[MasterStateResponse](RequestMasterState)
}
- /** Get the applictions that are active from Master */
+ /** Get the applications that are active from Master */
private def getApplications(): Seq[ApplicationInfo] = {
getMasterState.activeApps
}
diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
index 379c038c55..7017296bd1 100644
--- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
@@ -159,7 +159,7 @@ class AppClientSuite extends SparkFunSuite with LocalSparkContext with BeforeAnd
master.self.askWithRetry[MasterStateResponse](RequestMasterState)
}
- /** Get the applictions that are active from Master */
+ /** Get the applications that are active from Master */
private def getApplications(): Seq[ApplicationInfo] = {
getMasterState.activeApps
}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
index e24188781f..c874b95b09 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
@@ -219,7 +219,7 @@ class ApplicationCacheSuite extends SparkFunSuite with Logging with MockitoSugar
val cacheEntry = cache.lookupCacheEntry(app1, None)
assert(1 === cacheEntry.probeTime)
assert(cacheEntry.completed)
- // assert about queries made of the opereations
+ // assert about queries made of the operations
assert(1 === operations.getAppUICount, "getAppUICount")
assert(1 === operations.attachCount, "attachCount")
@@ -338,7 +338,7 @@ class ApplicationCacheSuite extends SparkFunSuite with Logging with MockitoSugar
}
/**
- * Look up the cache entry and assert that it maches in the expected value.
+ * Look up the cache entry and assert that it matches in the expected value.
* This assertion works if the two CacheEntries are different -it looks at the fields.
* UI are compared on object equality; the timestamp and completed flags directly.
* @param appId application ID
@@ -384,7 +384,7 @@ class ApplicationCacheSuite extends SparkFunSuite with Logging with MockitoSugar
val operations = new StubCacheOperations()
val clock = new ManualClock(0)
val size = 5
- // only two entries are retained, so we expect evictions to occurr on lookups
+ // only two entries are retained, so we expect evictions to occur on lookups
implicit val cache: ApplicationCache = new TestApplicationCache(operations,
retainedApplications = size, clock = clock)
diff --git a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
index e5a448298a..056e5463a0 100644
--- a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala
@@ -98,14 +98,14 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
rdd.coalesce(4).count()
}
- // for count and coelesce, the same bytes should be read.
+ // for count and coalesce, the same bytes should be read.
assert(bytesRead != 0)
assert(bytesRead2 == bytesRead)
}
/**
* This checks the situation where we have interleaved reads from
- * different sources. Currently, we only accumulate fron the first
+ * different sources. Currently, we only accumulate from the first
* read method we find in the task. This test uses cartesian to create
* the interleaved reads.
*
@@ -183,7 +183,7 @@ class InputOutputMetricsSuite extends SparkFunSuite with SharedSparkContext
assert(records == numRecords)
}
- test("input metrics on recordsd read with cache") {
+ test("input metrics on records read with cache") {
// prime the cache manager
val rdd = sc.textFile(tmpFilePath, 4).cache()
rdd.collect()
diff --git a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
index 76451788d2..864adddad3 100644
--- a/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/DoubleRDDSuite.scala
@@ -164,8 +164,8 @@ class DoubleRDDSuite extends SparkFunSuite with SharedSparkContext {
val expectedHistogramResults = Array(4, 2, 1, 2, 3)
assert(histogramResults === expectedHistogramResults)
}
- // Make sure this works with a NaN end bucket and an inifity
- test("WorksMixedRangeWithUnevenBucketsAndNaNAndNaNRangeAndInfity") {
+ // Make sure this works with a NaN end bucket and an infinity
+ test("WorksMixedRangeWithUnevenBucketsAndNaNAndNaNRangeAndInfinity") {
// Make sure that it works with two unequally spaced buckets and elements in each
val rdd = sc.parallelize(Seq(-0.01, 0.0, 1, 2, 3, 5, 6, 11.01, 12.0, 199.0,
200.0, 200.1, 1.0/0.0, -1.0/0.0, Double.NaN))
diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 7d51538d92..b0d69de6e2 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -182,7 +182,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with SharedSparkContext {
assert(sums(2) === 1)
}
- test("reduceByKey with many output partitons") {
+ test("reduceByKey with many output partitions") {
val pairs = sc.parallelize(Array((1, 1), (1, 2), (1, 3), (1, 1), (2, 1)))
val sums = pairs.reduceByKey(_ + _, 10).collect()
assert(sums.toSet === Set((1, 7), (2, 1)))
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index d8849d5948..d1c7143abf 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -663,7 +663,7 @@ class DAGSchedulerSuite extends SparkFunSuite with LocalSparkContext with Timeou
completeShuffleMapStageSuccessfully(0, 1, numShufflePartitions = parts)
completeNextResultStageWithSuccess(1, 1)
- // Confirm job finished succesfully
+ // Confirm job finished successfully
sc.listenerBus.waitUntilEmpty(1000)
assert(ended === true)
assert(results === (0 until parts).map { idx => idx -> 42 }.toMap)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 2c99dd5afb..d35ca411f4 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -396,7 +396,7 @@ class TaskSetManagerSuite extends SparkFunSuite with LocalSparkContext with Logg
val rescheduleDelay = 300L
val conf = new SparkConf().
set("spark.scheduler.executorTaskBlacklistTime", rescheduleDelay.toString).
- // dont wait to jump locality levels in this test
+ // don't wait to jump locality levels in this test
set("spark.locality.wait", "0")
sc = new SparkContext("local", "test", conf)
diff --git a/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
index 683aaa3aab..bdee889cdc 100644
--- a/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
@@ -191,7 +191,7 @@ class SerializationDebuggerSuite extends SparkFunSuite with BeforeAndAfterEach {
}
val originalException = new NotSerializableException("someClass")
- // verify thaht original exception is returned on failure
+ // verify that original exception is returned on failure
assert(SerializationDebugger.improveException(o, originalException).eq(originalException))
}
}
diff --git a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
index 853503bbc2..83eba3690e 100644
--- a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala
@@ -53,7 +53,7 @@ class XORShiftRandomSuite extends SparkFunSuite with Matchers {
* Perform the chi square test on the 5 rows of randomly generated numbers evenly divided into
* 10 bins. chiSquareTest returns true iff the null hypothesis (that the classifications
* represented by the counts in the columns of the input 2-way table are independent of the
- * rows) can be rejected with 100 * (1 - alpha) percent confidence, where alpha is prespeficied
+ * rows) can be rejected with 100 * (1 - alpha) percent confidence, where alpha is prespecified
* as 0.05
*/
val chiTest = new ChiSquareTest
diff --git a/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
index 7796f362bb..d498af9c39 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SkewedGroupByTest.scala
@@ -38,7 +38,7 @@ object SkewedGroupByTest {
val pairs1 = sc.parallelize(0 until numMappers, numMappers).flatMap { p =>
val ranGen = new Random
- // map output sizes lineraly increase from the 1st to the last
+ // map output sizes linearly increase from the 1st to the last
numKVPairs = (1.0 * (p + 1) / numMappers * numKVPairs).toInt
var arr1 = new Array[(Int, Array[Byte])](numKVPairs)
diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/Logging.scala b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/Logging.scala
index aa530a7121..09d3fe91e4 100644
--- a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/Logging.scala
+++ b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/Logging.scala
@@ -101,7 +101,7 @@ private[sink] trait Logging {
private def initializeLogging() {
Logging.initialized = true
- // Force a call into slf4j to initialize it. Avoids this happening from mutliple threads
+ // Force a call into slf4j to initialize it. Avoids this happening from multiple threads
// and triggering this: http://mailman.qos.ch/pipermail/slf4j-dev/2010-April/002956.html
log
}
diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala
index b9d4e762ca..3555fa68b6 100644
--- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala
+++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeBatchFetcher.scala
@@ -77,7 +77,7 @@ private[flume] class FlumeBatchFetcher(receiver: FlumePollingReceiver) extends R
/**
* Gets a batch of events from the specified client. This method does not handle any exceptions
- * which will be propogated to the caller.
+ * which will be propagated to the caller.
* @param client Client to get events from
* @return [[Some]] which contains the event batch if Flume sent any events back, else [[None]]
*/
@@ -96,8 +96,8 @@ private[flume] class FlumeBatchFetcher(receiver: FlumePollingReceiver) extends R
}
/**
- * Store the events in the buffer to Spark. This method will not propogate any exceptions,
- * but will propogate any other errors.
+ * Store the events in the buffer to Spark. This method will not propagate any exceptions,
+ * but will propagate any other errors.
* @param buffer The buffer to store
* @return true if the data was stored without any exception being thrown, else false
*/
diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
index 8a66621a31..726b5d8ec3 100644
--- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
+++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
@@ -167,7 +167,7 @@ class KafkaCluster(val kafkaParams: Map[String, String]) extends Serializable {
): Either[Err, Map[TopicAndPartition, LeaderOffset]] = {
getLeaderOffsets(topicAndPartitions, before, 1).right.map { r =>
r.map { kv =>
- // mapValues isnt serializable, see SI-7005
+ // mapValues isn't serializable, see SI-7005
kv._1 -> kv._2.head
}
}
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
index b5b76cb92d..23b74da642 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
+++ b/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
@@ -132,7 +132,7 @@ private[kinesis] object KinesisRecordProcessor extends Logging {
* Retry the given amount of times with a random backoff time (millis) less than the
* given maxBackOffMillis
*
- * @param expression expression to evalute
+ * @param expression expression to evaluate
* @param numRetriesLeft number of retries left
* @param maxBackOffMillis: max millis between retries
*
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
index 2555332d22..905c33834d 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
@@ -122,7 +122,7 @@ abstract class KinesisBackedBlockRDDTests(aggregateTestData: Boolean)
testIsBlockValid = true)
}
- testIfEnabled("Test whether RDD is valid after removing blocks from block anager") {
+ testIfEnabled("Test whether RDD is valid after removing blocks from block manager") {
testRDD(numPartitions = 2, numPartitionsInBM = 2, numPartitionsInKinesis = 2,
testBlockRemove = true)
}
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
index fd15b6ccdc..deac9090e2 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
+++ b/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
@@ -194,7 +194,7 @@ class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAft
verify(checkpointerMock, times(1)).checkpoint()
}
- test("retry failed after exhausing all retries") {
+ test("retry failed after exhausting all retries") {
val expectedErrorMessage = "final try error message"
when(checkpointerMock.checkpoint())
.thenThrow(new ThrottlingException("error message"))
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index fe884d0022..5485e30f5a 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -297,7 +297,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
/**
* Restricts the graph to only the vertices and edges satisfying the predicates. The resulting
- * subgraph satisifies
+ * subgraph satisfies
*
* {{{
* V' = {v : for all v in V where vpred(v)}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
index d537b6141c..fcb1b5999f 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -236,11 +236,11 @@ class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Seriali
* @param preprocess a function to compute new vertex and edge data before filtering
* @param epred edge pred to filter on after preprocess, see more details under
* [[org.apache.spark.graphx.Graph#subgraph]]
- * @param vpred vertex pred to filter on after prerocess, see more details under
+ * @param vpred vertex pred to filter on after preprocess, see more details under
* [[org.apache.spark.graphx.Graph#subgraph]]
* @tparam VD2 vertex type the vpred operates on
* @tparam ED2 edge type the epred operates on
- * @return a subgraph of the orginal graph, with its data unchanged
+ * @return a subgraph of the original graph, with its data unchanged
*
* @example This function can be used to filter the graph based on some property, without
* changing the vertex and edge values in your program. For example, we could remove the vertices
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
index 6dab465fb9..a4e293d74a 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
@@ -49,7 +49,7 @@ object ShippableVertexPartition {
/**
* Construct a `ShippableVertexPartition` from the given vertices with the specified routing
* table, filling in missing vertices mentioned in the routing table using `defaultVal`,
- * and merging duplicate vertex atrribute with mergeFunc.
+ * and merging duplicate vertex attribute with mergeFunc.
*/
def apply[VD: ClassTag](
iter: Iterator[(VertexId, VD)], routingTable: RoutingTablePartition, defaultVal: VD,
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
index 46faad2e68..00ba358a9b 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala
@@ -54,7 +54,7 @@ import org.apache.spark.graphx._
* }}}
*
* `alpha` is the random reset probability (typically 0.15), `inNbrs[i]` is the set of
- * neighbors whick link to `i` and `outDeg[j]` is the out degree of vertex `j`.
+ * neighbors which link to `i` and `outDeg[j]` is the out degree of vertex `j`.
*
* Note that this is not the "normalized" PageRank and as a consequence pages that have no
* inlinks will have a PageRank of alpha.
@@ -209,7 +209,7 @@ object PageRank extends Logging {
}
// Set the weight on the edges based on the degree
.mapTriplets( e => 1.0 / e.srcAttr )
- // Set the vertex attributes to (initalPR, delta = 0)
+ // Set the vertex attributes to (initialPR, delta = 0)
.mapVertices { (id, attr) =>
if (id == src) (resetProb, Double.NegativeInfinity) else (0.0, 0.0)
}
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/package.scala b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
index 6aab28ff05..dde25b9659 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/package.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/package.scala
@@ -30,7 +30,7 @@ package object graphx {
*/
type VertexId = Long
- /** Integer identifer of a graph partition. Must be less than 2^30. */
+ /** Integer identifier of a graph partition. Must be less than 2^30. */
// TODO: Consider using Char.
type PartitionID = Int
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
index a6d0cb6409..d76e84ed8c 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/util/BytecodeUtils.scala
@@ -92,7 +92,7 @@ private[graphx] object BytecodeUtils {
/**
* Given the class name, return whether we should look into the class or not. This is used to
- * skip examing a large quantity of Java or Scala classes that we know for sure wouldn't access
+ * skip examining a large quantity of Java or Scala classes that we know for sure wouldn't access
* the closures. Note that the class name is expected in ASM style (i.e. use "/" instead of ".").
*/
private def skipClass(className: String): Boolean = {
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
index f497e001df..cb981797d3 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/GraphSuite.scala
@@ -229,7 +229,7 @@ class GraphSuite extends SparkFunSuite with LocalSparkContext {
test("subgraph") {
withSpark { sc =>
- // Create a star graph of 10 veritces.
+ // Create a star graph of 10 vertices.
val n = 10
val star = starGraph(sc, n)
// Take only vertices whose vids are even
diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
index f21b623e93..2cd94fa8f5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
@@ -523,7 +523,7 @@ private[ml] object FeedForwardTopology {
/**
* Creates a multi-layer perceptron
* @param layerSizes sizes of layers including input and output size
- * @param softmax wether to use SoftMax or Sigmoid function for an output layer.
+ * @param softmax whether to use SoftMax or Sigmoid function for an output layer.
* Softmax is default
* @return multilayer perceptron topology
*/
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
index 521d209a8f..27554acdf3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
@@ -481,7 +481,7 @@ object NominalAttribute extends AttributeFactory {
* A binary attribute.
* @param name optional name
* @param index optional index
- * @param values optionla values. If set, its size must be 2.
+ * @param values optional values. If set, its size must be 2.
*/
@DeveloperApi
class BinaryAttribute private[ml] (
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index f8de4e2220..c8ec0c1685 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -83,7 +83,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
/**
* Returns the mean average precision (MAP) of all the queries.
* If a query has an empty ground truth set, the average precision will be zero and a log
- * warining is generated.
+ * warning is generated.
*/
lazy val meanAveragePrecision: Double = {
predictionAndLabels.map { case (pred, lab) =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
index 07eb750b06..790d6b101e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
@@ -58,7 +58,7 @@ class AssociationRules private[fpm] (
/**
* Computes the association rules with confidence above [[minConfidence]].
* @param freqItemsets frequent itemset model obtained from [[FPGrowth]]
- * @return a [[Set[Rule[Item]]] containing the assocation rules.
+ * @return a [[Set[Rule[Item]]] containing the association rules.
*
*/
@Since("1.5.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index 240781bcd3..58fd010e49 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -158,7 +158,7 @@ object LinearDataGenerator {
/**
* Generate an RDD containing sample data for Linear Regression models - including Ridge, Lasso,
- * and uregularized variants.
+ * and unregularized variants.
*
* @param sc SparkContext to be used for generating the RDD.
* @param nexamples Number of examples that will be contained in the RDD.
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
index 71f4926290..114a238462 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
@@ -88,7 +88,7 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
val df = sqlContext.read.format("libsvm").load(path)
val tempDir2 = Utils.createTempDir()
val writepath = tempDir2.toURI.toString
- // TODO: Remove requirement to coalesce by supporting mutiple reads.
+ // TODO: Remove requirement to coalesce by supporting multiple reads.
df.coalesce(1).write.format("libsvm").mode(SaveMode.Overwrite).save(writepath)
val df2 = sqlContext.read.format("libsvm").load(writepath)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
index cea0adc55c..28fada7053 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/LogisticRegressionSuite.scala
@@ -496,7 +496,7 @@ class LogisticRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
* features = as.matrix(data.frame(data$V2, data$V3, data$V4, data$V5))
* weights = coef(glmnet(features,label, family="multinomial", alpha = 0, lambda = 0))
*
- * The model weights of mutinomial logstic regression in R have `K` set of linear predictors
+ * The model weights of multinomial logistic regression in R have `K` set of linear predictors
* for `K` classes classification problem; however, only `K-1` set is required if the first
* outcome is chosen as a "pivot", and the other `K-1` outcomes are separately regressed against
* the pivot outcome. This can be done by subtracting the first weights from those `K-1` set
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
index 142b90e764..46fcebe132 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/HypothesisTestSuite.scala
@@ -144,7 +144,7 @@ class HypothesisTestSuite extends SparkFunSuite with MLlibTestSparkContext {
assert(chi.size === numCols)
assert(chi(1000) != null) // SPARK-3087
- // Detect continous features or labels
+ // Detect continuous features or labels
val random = new Random(11L)
val continuousLabel =
Seq.fill(100000)(LabeledPoint(random.nextDouble(), Vectors.dense(random.nextInt(2))))
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
index dca8ea815a..5518bdf527 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
@@ -1075,7 +1075,7 @@ object DecisionTreeSuite extends SparkFunSuite {
assert(a.isLeaf === b.isLeaf)
assert(a.split === b.split)
(a.stats, b.stats) match {
- // TODO: Check other fields besides the infomation gain.
+ // TODO: Check other fields besides the information gain.
case (Some(aStats), Some(bStats)) => assert(aStats.gain === bStats.gain)
case (None, None) =>
case _ => throw new AssertionError(
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index c4c8d8870f..faa52bf18c 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -334,7 +334,7 @@ object MimaExcludes {
excludePackage("org.apache.spark.sql.columnar"),
// The shuffle package is considered private.
excludePackage("org.apache.spark.shuffle"),
- // The collections utlities are considered pricate.
+ // The collections utilities are considered private.
excludePackage("org.apache.spark.util.collection")
) ++
MimaBuild.excludeSparkClass("streaming.flume.FlumeTestUtils") ++
@@ -639,7 +639,7 @@ object MimaExcludes {
Seq(
MimaBuild.excludeSparkPackage("deploy"),
MimaBuild.excludeSparkPackage("ml"),
- // SPARK-7910 Adding a method to get the partioner to JavaRDD,
+ // SPARK-7910 Adding a method to get the partitioner to JavaRDD,
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.api.java.JavaRDDLike.partitioner"),
// SPARK-5922 Adding a generalized diff(other: RDD[(VertexId, VD)]) to VertexRDD
ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.graphx.VertexRDD.diff"),
@@ -657,7 +657,7 @@ object MimaExcludes {
ProblemFilters.exclude[MissingClassProblem](
"org.apache.spark.scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint")
) ++ Seq(
- // SPARK-4655 - Making Stage an Abstract class broke binary compatility even though
+ // SPARK-4655 - Making Stage an Abstract class broke binary compatibility even though
// the stage class is defined as private[spark]
ProblemFilters.exclude[AbstractClassProblem]("org.apache.spark.scheduler.Stage")
) ++ Seq(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index f108264861..1219d4d453 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -305,7 +305,7 @@ trait Row extends Serializable {
* @throws ClassCastException when data type does not match.
*/
def getStruct(i: Int): Row = {
- // Product and Row both are recoginized as StructType in a Row
+ // Product and Row both are recognized as StructType in a Row
val t = get(i)
if (t.isInstanceOf[Product]) {
Row.fromTuple(t.asInstanceOf[Product])
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index c12b5c20ea..bf07f4557a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -29,7 +29,7 @@ import org.apache.spark.util.Utils
*/
object ScalaReflection extends ScalaReflection {
val universe: scala.reflect.runtime.universe.type = scala.reflect.runtime.universe
- // Since we are creating a runtime mirror usign the class loader of current thread,
+ // Since we are creating a runtime mirror using the class loader of current thread,
// we need to use def at here. So, every time we call mirror, it is using the
// class loader of the current thread.
// SPARK-13640: Synchronize this because universe.runtimeMirror is not thread-safe in Scala 2.10.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index ad56c98649..9c38dd2ee4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -155,7 +155,7 @@ object DecimalPrecision extends Rule[LogicalPlan] {
*
* Note that technically this is an "optimization" and should go into the optimizer. However,
* by the time the optimizer runs, these comparison expressions would be pretty hard to pattern
- * match because there are multuple (at least 2) levels of casts involved.
+ * match because there are multiple (at least 2) levels of casts involved.
*
* There are a lot more possible rules we can implement, but we don't do them
* because we are not sure how common they are.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DistinctAggregationRewriter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DistinctAggregationRewriter.scala
index 38c1641f73..2e30d83a60 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DistinctAggregationRewriter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DistinctAggregationRewriter.scala
@@ -96,7 +96,7 @@ import org.apache.spark.sql.types.IntegerType
* This rule duplicates the input data by two or more times (# distinct groups + an optional
* non-distinct group). This will put quite a bit of memory pressure of the used aggregate and
* exchange operators. Keeping the number of distinct groups as low a possible should be priority,
- * we could improve this in the current rule by applying more advanced expression cannocalization
+ * we could improve this in the current rule by applying more advanced expression canonicalization
* techniques.
*/
object DistinctAggregationRewriter extends Rule[LogicalPlan] {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
index 57bdb164e1..0f85f44ffa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala
@@ -307,7 +307,7 @@ object HiveTypeCoercion {
case p @ Equality(left @ TimestampType(), right @ StringType()) =>
p.makeCopy(Array(left, Cast(right, TimestampType)))
- // We should cast all relative timestamp/date/string comparison into string comparisions
+ // We should cast all relative timestamp/date/string comparison into string comparisons
// This behaves as a user would expect because timestamp strings sort lexicographically.
// i.e. TimeStamp(2013-01-01 00:00 ...) < "2014" = true
case p @ BinaryComparison(left @ StringType(), right @ DateType()) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
index 3831535574..8bdf9b29c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AttributeSet.scala
@@ -53,7 +53,7 @@ object AttributeSet {
* cosmetically (e.g., the names have different capitalizations).
*
* Note that we do not override equality for Attribute references as it is really weird when
- * `AttributeReference("a"...) == AttrributeReference("b", ...)`. This tactic leads to broken tests,
+ * `AttributeReference("a"...) == AttributeReference("b", ...)`. This tactic leads to broken tests,
* and also makes doing transformations hard (we always try keep older trees instead of new ones
* when the transformation was a no-op).
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
index acea049adc..644a5b28a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
@@ -36,7 +36,7 @@ object ExpressionSet {
* Internally this set uses the canonical representation, but keeps also track of the original
* expressions to ease debugging. Since different expressions can share the same canonical
* representation, this means that operations that extract expressions from this set are only
- * guranteed to see at least one such expression. For example:
+ * guaranteed to see at least one such expression. For example:
*
* {{{
* val set = AttributeSet(a + 1, 1 + a)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index 22184f1ddf..500ff447a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -973,7 +973,7 @@ case class ScalaUDF(
// scalastyle:on line.size.limit
- // Generate codes used to convert the arguments to Scala type for user-defined funtions
+ // Generate codes used to convert the arguments to Scala type for user-defined functions
private[this] def genCodeForConverter(ctx: CodegenContext, index: Int): String = {
val converterClassName = classOf[Any => Any].getName
val typeConvertersClassName = CatalystTypeConverters.getClass.getName + ".MODULE$"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index c4265a7539..3dbe634898 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -126,7 +126,7 @@ class CodegenContext {
* For expressions that appear more than once, generate additional code to prevent
* recomputing the value.
*
- * For example, consider two exprsesion generated from this SQL statement:
+ * For example, consider two expression generated from this SQL statement:
* SELECT (col1 + col2), (col1 + col2) / col3.
*
* equivalentExpressions will match the tree containing `col1 + col2` and it will only
@@ -140,7 +140,7 @@ class CodegenContext {
// Foreach expression that is participating in subexpression elimination, the state to use.
val subExprEliminationExprs = mutable.HashMap.empty[Expression, SubExprEliminationState]
- // The collection of sub-exression result resetting methods that need to be called on each row.
+ // The collection of sub-expression result resetting methods that need to be called on each row.
val subexprFunctions = mutable.ArrayBuffer.empty[String]
def declareAddedFunctions(): String = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 5ceb36513f..103ab365e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -214,7 +214,7 @@ case class CaseWhen(branches: Seq[(Expression, Expression)], elseValue: Option[E
/** Factory methods for CaseWhen. */
object CaseWhen {
- // The maxium number of switches supported with codegen.
+ // The maximum number of switches supported with codegen.
val MAX_NUM_CASES_FOR_CODEGEN = 20
def apply(branches: Seq[(Expression, Expression)], elseValue: Expression): CaseWhen = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
index b95c5dd892..7eba617fcd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
@@ -364,7 +364,7 @@ object MapObjects {
* used as input for the `lambdaFunction`. It also carries the element type info.
* @param lambdaFunction A function that take the `loopVar` as input, and used as lambda function
* to handle collection elements.
- * @param inputData An expression that when evaluted returns a collection object.
+ * @param inputData An expression that when evaluated returns a collection object.
*/
case class MapObjects private(
loopVar: LambdaVariable,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
index e4417e0955..da90ddbd63 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
@@ -66,7 +66,7 @@ object NumberConverter {
* negative digit is found, ignore the suffix starting there.
*
* @param radix must be between MIN_RADIX and MAX_RADIX
- * @param fromPos is the first element that should be conisdered
+ * @param fromPos is the first element that should be considered
* @return the result should be treated as an unsigned 64-bit integer.
*/
private def encode(radix: Int, fromPos: Int): Long = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index de9a56dc9c..4e7bbc38d6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -276,7 +276,7 @@ class AnalysisErrorSuite extends AnalysisTest {
test("SPARK-6452 regression test") {
// CheckAnalysis should throw AnalysisException when Aggregate contains missing attribute(s)
- // Since we manually construct the logical plan at here and Sum only accetp
+ // Since we manually construct the logical plan at here and Sum only accept
// LongType, DoubleType, and DecimalType. We use LongType as the type of a.
val plan =
Aggregate(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index aa1d2b0861..8b568b6dd6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -250,7 +250,7 @@ class AnalysisSuite extends AnalysisTest {
assertAnalysisSuccess(plan)
}
- test("SPARK-8654: different types in inlist but can be converted to a commmon type") {
+ test("SPARK-8654: different types in inlist but can be converted to a common type") {
val plan = Project(Alias(In(Literal(null), Seq(Literal(1), Literal(1.2345))), "a")() :: Nil,
LocalRelation()
)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
index c30434a006..6f289dcc47 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala
@@ -205,7 +205,7 @@ class HiveTypeCoercionSuite extends PlanTest {
Project(Seq(Alias(transformed, "a")()), testRelation))
}
- test("cast NullType for expresions that implement ExpectsInputTypes") {
+ test("cast NullType for expressions that implement ExpectsInputTypes") {
import HiveTypeCoercionSuite._
ruleTest(HiveTypeCoercion.ImplicitTypeCasts,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
index ce42e5784c..0b350c6a98 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala
@@ -70,7 +70,7 @@ class ExpressionSetSuite extends SparkFunSuite {
// Not commutative
setTest(2, aUpper - bUpper, bUpper - aUpper)
- // Reversable
+ // Reversible
setTest(1, aUpper > bUpper, bUpper < aUpper)
setTest(1, aUpper >= bUpper, bUpper <= aUpper)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala
index 0dbfb01e88..f5374229ca 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HyperLogLogPlusPlusSuite.scala
@@ -131,7 +131,7 @@ class HyperLogLogPlusPlusSuite extends SparkFunSuite {
i += 1
}
- // Merge the lower and upper halfs.
+ // Merge the lower and upper halves.
hll.merge(buffer1a, buffer1b)
// Create the other buffer in reverse
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index f7ba61d2b8..1751720a7d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -78,7 +78,7 @@ class TypedColumn[-T, U](
*
* {{{
* df("columnName") // On a specific DataFrame.
- * col("columnName") // A generic column no yet associcated with a DataFrame.
+ * col("columnName") // A generic column no yet associated with a DataFrame.
* col("columnName.field") // Extracting a struct field
* col("`a.column.with.dots`") // Escape `.` in column names.
* $"columnName" // Scala short hand for a named column.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/GroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/GroupedDataset.scala
index 472ae716f1..a8700de135 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/GroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/GroupedDataset.scala
@@ -223,7 +223,7 @@ class GroupedDataset[K, V] private[sql](
* Internal helper function for building typed aggregations that return tuples. For simplicity
* and code reuse, we do this without the help of the type system and then use helper functions
* that cast appropriately for the user facing interface.
- * TODO: does not handle aggrecations that return nonflat results,
+ * TODO: does not handle aggregations that return nonflat results,
*/
protected def aggUntyped(columns: TypedColumn[_, _]*): Dataset[_] = {
val encoders = columns.map(_.encoder)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index 3be4cce045..a92c99e06f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -52,7 +52,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
protected def sparkContext = sqlContext.sparkContext
// sqlContext will be null when we are being deserialized on the slaves. In this instance
- // the value of subexpressionEliminationEnabled will be set by the desserializer after the
+ // the value of subexpressionEliminationEnabled will be set by the deserializer after the
// constructor has run.
val subexpressionEliminationEnabled: Boolean = if (sqlContext != null) {
sqlContext.conf.subexpressionEliminationEnabled
@@ -65,7 +65,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
*/
private val prepareCalled = new AtomicBoolean(false)
- /** Overridden make copy also propogates sqlContext to copied plan. */
+ /** Overridden make copy also propagates sqlContext to copied plan. */
override def makeCopy(newArgs: Array[AnyRef]): SparkPlan = {
SQLContext.setActive(sqlContext)
super.makeCopy(newArgs)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
index 3ec01185c4..f9d606e37e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
@@ -40,7 +40,7 @@ import org.apache.spark.unsafe.types.UTF8String
* so we do not have helper methods for them.
*
*
- * WARNNING: This only works with HeapByteBuffer
+ * WARNING: This only works with HeapByteBuffer
*/
private[columnar] object ByteBufferHelper {
def getInt(buffer: ByteBuffer): Int = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index c3f8d7f75a..18a460fc85 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -88,7 +88,7 @@ private[sql] object PartitioningUtils {
}.unzip
// We create pairs of (path -> path's partition value) here
- // If the corresponding partition value is None, the pair will be skiped
+ // If the corresponding partition value is None, the pair will be skipped
val pathsWithPartitionValues = paths.zip(partitionValues).flatMap(x => x._2.map(x._1 -> _))
if (pathsWithPartitionValues.isEmpty) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index e295722cac..64a820c6d7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -70,7 +70,7 @@ object JdbcUtils extends Logging {
// Somewhat hacky, but there isn't a good way to identify whether a table exists for all
// SQL database systems using JDBC meta data calls, considering "table" could also include
- // the database name. Query used to find table exists can be overriden by the dialects.
+ // the database name. Query used to find table exists can be overridden by the dialects.
Try {
val statement = conn.prepareStatement(dialect.getTableExistsQuery(table))
try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchPythonEvaluation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchPythonEvaluation.scala
index c65a7bcff8..79e4491026 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchPythonEvaluation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchPythonEvaluation.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.types.{StructField, StructType}
/**
- * A physical plan that evalutes a [[PythonUDF]], one partition of tuples at a time.
+ * A physical plan that evaluates a [[PythonUDF]], one partition of tuples at a time.
*
* Python evaluation works by sending the necessary (projected) input data via a socket to an
* external Python process, and combine the result from the Python process with the original row.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
index 1bd71b6b02..e3b2d2f67e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Sink.scala
@@ -38,7 +38,7 @@ trait Sink {
* Accepts a new batch of data as well as a [[Offset]] that denotes how far in the input
* data computation has progressed to. When computation restarts after a failure, it is important
* that a [[Sink]] returns the same [[Offset]] as the most recent batch of data that
- * has been persisted durrably. Note that this does not necessarily have to be the
+ * has been persisted durably. Note that this does not necessarily have to be the
* [[Offset]] for the most recent batch of data that was given to the sink. For example,
* it is valid to buffer data before persisting, as long as the [[Offset]] is stored
* transactionally as data is eventually persisted.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
index 096477ce0e..d7ff44afad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/memory.scala
@@ -100,7 +100,7 @@ case class MemoryStream[A : Encoder](id: Int, sqlContext: SQLContext)
/**
* A sink that stores the results in memory. This [[Sink]] is primarily intended for use in unit
- * tests and does not provide durablility.
+ * tests and does not provide durability.
*/
class MemorySink(schema: StructType) extends Sink with Logging {
/** An order list of batches that have been written to this [[Sink]]. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 384102e5ea..59429d254e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -441,7 +441,7 @@ object SQLConf {
// NOTE:
//
// 1. Instead of SQLConf, this option *must be set in Hadoop Configuration*.
- // 2. This option can be overriden by "spark.sql.parquet.output.committer.class".
+ // 2. This option can be overridden by "spark.sql.parquet.output.committer.class".
val OUTPUT_COMMITTER_CLASS =
stringConf("spark.sql.sources.outputCommitterClass", isPublic = false)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index e865dbe6b5..a7a826bc7a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -80,7 +80,7 @@ class DataFrameStatSuite extends QueryTest with SharedSQLContext {
// Verify that the splits span the entire dataset
assert(splits.flatMap(_.collect()).toSet == data.collect().toSet)
- // Verify that the splits don't overalap
+ // Verify that the splits don't overlap
assert(splits(0).intersect(splits(1)).collect().isEmpty)
// Verify that the results are deterministic across multiple runs
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 9f32c8bf95..d7fa23651b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -46,7 +46,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
1, 1, 1)
}
- test("SPARK-12404: Datatype Helper Serializablity") {
+ test("SPARK-12404: Datatype Helper Serializability") {
val ds = sparkContext.parallelize((
new Timestamp(0),
new Date(0),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 182f287dd0..98d0008489 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -986,7 +986,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
test("SET commands with illegal or inappropriate argument") {
sqlContext.conf.clear()
- // Set negative mapred.reduce.tasks for automatically determing
+ // Set negative mapred.reduce.tasks for automatically determining
// the number of reducers is not supported
intercept[IllegalArgumentException](sql(s"SET mapred.reduce.tasks=-1"))
intercept[IllegalArgumentException](sql(s"SET mapred.reduce.tasks=-01"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/StreamTest.scala
index 7a5b639115..81078dc6a0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StreamTest.scala
@@ -80,7 +80,7 @@ trait StreamTest extends QueryTest with Timeouts {
trait StreamMustBeRunning
/**
- * Adds the given data to the stream. Subsuquent check answers will block until this data has
+ * Adds the given data to the stream. Subsequent check answers will block until this data has
* been processed.
*/
object AddData {
@@ -109,7 +109,7 @@ trait StreamTest extends QueryTest with Timeouts {
/**
* Checks to make sure that the current data stored in the sink matches the `expectedAnswer`.
- * This operation automatically blocks untill all added data has been processed.
+ * This operation automatically blocks until all added data has been processed.
*/
object CheckAnswer {
def apply[A : Encoder](data: A*): CheckAnswerRows = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
index 7af3f94aef..3a7cb25b4f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVInferSchemaSuite.scala
@@ -80,7 +80,7 @@ class InferSchemaSuite extends SparkFunSuite {
assert(CSVInferSchema.inferField(BooleanType, "\\N", "\\N") == BooleanType)
}
- test("Merging Nulltypes should yeild Nulltype.") {
+ test("Merging Nulltypes should yield Nulltype.") {
val mergedNullTypes = CSVInferSchema.mergeRowTypes(Array(NullType), Array(NullType))
assert(mergedNullTypes.deep == Array(NullType).deep)
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 1ef517324d..f66deea065 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -359,7 +359,7 @@ class JDBCSuite extends SparkFunSuite
.collect().length === 3)
}
- test("Partioning on column that might have null values.") {
+ test("Partitioning on column that might have null values.") {
assert(
sqlContext.read.jdbc(urlWithUserAndPass, "TEST.EMP", "theid", 0, 4, 3, new Properties)
.collect().length === 4)
@@ -372,7 +372,7 @@ class JDBCSuite extends SparkFunSuite
.collect().length === 4)
}
- test("SELECT * on partitioned table with a nullable partioncolumn") {
+ test("SELECT * on partitioned table with a nullable partition column") {
assert(sql("SELECT * FROM nullparts").collect().size == 4)
}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
index 26c1ff5204..99f1661ad0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -339,7 +339,7 @@ class TableScanSuite extends DataSourceTest with SharedSQLContext {
test("exceptions") {
// Make sure we do throw correct exception when users use a relation provider that
- // only implements the RelationProvier or the SchemaRelationProvider.
+ // only implements the RelationProvider or the SchemaRelationProvider.
val schemaNotAllowed = intercept[Exception] {
sql(
"""
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index 54fffb971d..694bd97515 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -68,7 +68,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
* with one of these strings is found, fail the test immediately.
* The default value is `Seq("Error:")`
*
- * @param queriesAndExpectedAnswers one or more tupes of query + answer
+ * @param queriesAndExpectedAnswers one or more tuples of query + answer
*/
def runCliWithin(
timeout: FiniteDuration,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
index 56acb87c80..739fbaf444 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala
@@ -245,7 +245,7 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
val tableName = tableNameParts.map { case Token(p, Nil) => p }.mkString(".")
DropTable(tableName, ifExists.nonEmpty)
- // Support "ANALYZE TABLE tableNmae COMPUTE STATISTICS noscan"
+ // Support "ANALYZE TABLE tableName COMPUTE STATISTICS noscan"
case Token("TOK_ANALYZE",
Token("TOK_TAB", Token("TOK_TABNAME", tableNameParts) :: partitionSpec) :: isNoscan) =>
// Reference:
@@ -535,7 +535,7 @@ private[hive] class HiveQl(conf: ParserConf) extends SparkQl(conf) with Logging
case Token("TOK_STORAGEHANDLER", _) =>
throw new AnalysisException(
"CREATE TABLE AS SELECT cannot be used for a non-native table")
- case _ => // Unsupport features
+ case _ => // Unsupported features
}
CreateTableAsSelect(tableDesc, nodeToPlan(query), allowExisting.isDefined)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
index 059ad8b1f7..8240f2f222 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
@@ -89,7 +89,7 @@ private[orc] object OrcFileOperator extends Logging {
}
def listOrcFiles(pathStr: String, conf: Configuration): Seq[Path] = {
- // TODO: Check if the paths comming in are already qualified and simplify.
+ // TODO: Check if the paths coming in are already qualified and simplify.
val origPath = new Path(pathStr)
val fs = origPath.getFileSystem(conf)
val path = origPath.makeQualified(fs.getUri, fs.getWorkingDirectory)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 1053246fc2..5e452d107d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -143,7 +143,7 @@ abstract class HiveComparisonTest
0D
}
- s"""SQLBuiler statistics:
+ s"""SQLBuilder statistics:
|- Total query number: $numTotalQueries
|- Number of convertible queries: $numConvertibleQueries
|- Percentage of convertible queries: $percentage%
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 1002487447..d905f0cd68 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -602,7 +602,7 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter {
|select * where key = 4
""".stripMargin)
- // test get_json_object again Hive, because the HiveCompatabilitySuite cannot handle result
+ // test get_json_object again Hive, because the HiveCompatibilitySuite cannot handle result
// with newline in it.
createQueryTest("get_json_object #1",
"SELECT get_json_object(src_json.json, '$') FROM src_json")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index 11a4c7dfd0..16c575bcc1 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -205,7 +205,7 @@ class CheckpointWriter(
// also use the latest checkpoint time as the file name, so that we can recovery from the
// latest checkpoint file.
//
- // Note: there is only one thread writting the checkpoint files, so we don't need to worry
+ // Note: there is only one thread writing the checkpoint files, so we don't need to worry
// about thread-safety.
val checkpointFile = Checkpoint.checkpointFile(checkpointDir, latestCheckpointTime)
val backupFile = Checkpoint.checkpointBackupFile(checkpointDir, latestCheckpointTime)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 25e61578a1..e7f3a213d4 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -299,7 +299,7 @@ class StreamingContext private[streaming] (
/**
* Create a input stream from TCP source hostname:port. Data is received using
- * a TCP socket and the receive bytes it interepreted as object using the given
+ * a TCP socket and the receive bytes it interpreted as object using the given
* converter.
* @param hostname Hostname to connect to for receiving data
* @param port Port to connect to for receiving data
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
index aad9a12c15..2a80cf4466 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala
@@ -155,7 +155,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
/**
* Return a new DStream by applying `reduceByKey` to each RDD. The values for each key are
* merged using the supplied reduce function. org.apache.spark.Partitioner is used to control
- * thepartitioning of each RDD.
+ * the partitioning of each RDD.
*/
def reduceByKey(func: JFunction2[V, V, V], partitioner: Partitioner): JavaPairDStream[K, V] = {
dstream.reduceByKey(func, partitioner)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index 860b802725..05f4da6fac 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -530,7 +530,7 @@ class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
* Return the current state of the context. The context can be in three possible states -
* <ul>
* <li>
- * StreamingContextState.INTIALIZED - The context has been created, but not been started yet.
+ * StreamingContextState.INITIALIZED - The context has been created, but not been started yet.
* Input DStreams, transformations and output operations can be created on the context.
* </li>
* <li>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
index 1dcdb64e28..d6ff96e1fc 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
@@ -446,7 +446,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)])
* remember the partitioner despite the key being changed.
* @param partitioner Partitioner for controlling the partitioning of each RDD in the new
* DStream
- * @param rememberPartitioner Whether to remember the paritioner object in the generated RDDs.
+ * @param rememberPartitioner Whether to remember the partitioner object in the generated RDDs.
* @tparam S State type
*/
def updateStateByKey[S: ClassTag](
@@ -490,7 +490,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)])
* remember the partitioner despite the key being changed.
* @param partitioner Partitioner for controlling the partitioning of each RDD in the new
* DStream
- * @param rememberPartitioner Whether to remember the paritioner object in the generated RDDs.
+ * @param rememberPartitioner Whether to remember the partitioner object in the generated RDDs.
* @param initialRDD initial state value of each key.
* @tparam S State type
*/
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
index 080bc873fa..47eb9b806f 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
@@ -55,7 +55,7 @@ class TransformedDStream[U: ClassTag] (
/**
* Wrap a body of code such that the call site and operation scope
* information are passed to the RDDs created in this body properly.
- * This has been overriden to make sure that `displayInnerRDDOps` is always `true`, that is,
+ * This has been overridden to make sure that `displayInnerRDDOps` is always `true`, that is,
* the inner scopes and callsites of RDDs generated in `DStream.transform` are always
* displayed in the UI.
*/
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
index 430f35a400..d6fcc582b9 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala
@@ -257,7 +257,7 @@ private[streaming] class StreamingJobProgressListener(ssc: StreamingContext)
}
batchUIData.foreach { _batchUIData =>
// We use an Iterable rather than explicitly converting to a seq so that updates
- // will propegate
+ // will propagate
val outputOpIdToSparkJobIds: Iterable[OutputOpIdAndSparkJobId] =
Option(batchTimeToOutputOpIdSparkJobIdPair.get(batchTime).asScala)
.getOrElse(Seq.empty)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
index 2be1d6df86..3a21cfae5a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
@@ -177,7 +177,7 @@ private[streaming] class OpenHashMapBasedStateMap[K, S](
new OpenHashMapBasedStateMap[K, S](this, deltaChainThreshold = deltaChainThreshold)
}
- /** Whether the delta chain lenght is long enough that it should be compacted */
+ /** Whether the delta chain length is long enough that it should be compacted */
def shouldCompact: Boolean = {
deltaChainLength >= deltaChainThreshold
}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
index 403400904b..3b662ec183 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
@@ -518,7 +518,7 @@ class MapWithStateSuite extends SparkFunSuite
val mapWithStateStream = dstream.map { _ -> 1 }.mapWithState(
StateSpec.function(runningCount))
- // Set internval make sure there is one RDD checkpointing
+ // Set interval make sure there is one RDD checkpointing
mapWithStateStream.checkpoint(checkpointDuration)
mapWithStateStream.stateSnapshots()
}
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
index 45424f9bac..95c1609d8e 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockHandlerSuite.scala
@@ -202,13 +202,13 @@ class ReceivedBlockHandlerSuite
blockManager = createBlockManager(12000, sparkConf)
// there is not enough space to store this block in MEMORY,
- // But BlockManager will be able to sereliaze this block to WAL
+ // But BlockManager will be able to serialize this block to WAL
// and hence count returns correct value.
testRecordcount(false, StorageLevel.MEMORY_ONLY,
IteratorBlock((List.fill(70)(new Array[Byte](100))).iterator), blockManager, Some(70))
// there is not enough space to store this block in MEMORY,
- // But BlockManager will be able to sereliaze this block to DISK
+ // But BlockManager will be able to serialize this block to DISK
// and hence count returns correct value.
testRecordcount(true, StorageLevel.MEMORY_AND_DISK,
IteratorBlock((List.fill(70)(new Array[Byte](100))).iterator), blockManager, Some(70))
@@ -272,7 +272,7 @@ class ReceivedBlockHandlerSuite
}
/**
- * Test storing of data using different types of Handler, StorageLevle and ReceivedBlocks
+ * Test storing of data using different types of Handler, StorageLevel and ReceivedBlocks
* and verify the correct record count
*/
private def testRecordcount(isBlockManagedBasedBlockHandler: Boolean,
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
index a4871b460e..6763ac64da 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverInputDStreamSuite.scala
@@ -97,7 +97,7 @@ class ReceiverInputDStreamSuite extends TestSuiteBase with BeforeAndAfterAll {
assert(blockRDD.walRecordHandles.toSeq === blockInfos.map { _.walRecordHandleOption.get })
}
- testWithWAL("createBlockRDD creates BlockRDD when some block info dont have WAL info") {
+ testWithWAL("createBlockRDD creates BlockRDD when some block info don't have WAL info") {
receiverStream =>
val blockInfos1 = Seq.fill(2) { createBlockInfo(withWALInfo = true) }
val blockInfos2 = Seq.fill(3) { createBlockInfo(withWALInfo = false) }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala
index 7a76cafc9a..484f3733e8 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala
@@ -182,7 +182,7 @@ class StateMapSuite extends SparkFunSuite {
*
* - These operations are done on a test map in "sets". After each set, the map is "copied"
* to create a new map, and the next set of operations are done on the new one. This tests
- * whether the map data persistes correctly across copies.
+ * whether the map data persist correctly across copies.
*
* - Within each set, there are a number of operations to test whether the map correctly
* updates and removes data without affecting the parent state map.
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index 197b3d1439..2159edce2b 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -147,7 +147,7 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with Timeo
}
}
- test("start with non-seriazable DStream checkpoints") {
+ test("start with non-serializable DStream checkpoints") {
val checkpointDir = Utils.createTempDir()
ssc = new StreamingContext(conf, batchDuration)
ssc.checkpoint(checkpointDir.getAbsolutePath)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index 82cd63bcaf..8269963edf 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -56,7 +56,7 @@ private[streaming] class DummyInputDStream(ssc: StreamingContext) extends InputD
/**
* This is a input stream just for the testsuites. This is equivalent to a checkpointable,
* replayable, reliable message queue like Kafka. It requires a sequence as input, and
- * returns the i_th element at the i_th batch unde manual clock.
+ * returns the i_th element at the i_th batch under manual clock.
*/
class TestInputStream[T: ClassTag](_ssc: StreamingContext, input: Seq[Seq[T]], numPartitions: Int)
extends InputDStream[T](_ssc) {
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
index 6e95bb9710..498471b23b 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala
@@ -115,7 +115,7 @@ private[yarn] class AMDelegationTokenRenewer(
}
}
// Schedule update of credentials. This handles the case of updating the tokens right now
- // as well, since the renenwal interval will be 0, and the thread will get scheduled
+ // as well, since the renewal interval will be 0, and the thread will get scheduled
// immediately.
scheduleRenewal(driverTokenRenewerRunnable)
}
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 9f91d182eb..9cdbd6da62 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -186,9 +186,9 @@ private[yarn] class ExecutorRunnable(
else {
// If no java_opts specified, default to using -XX:+CMSIncrementalMode
// It might be possible that other modes/config is being done in
- // spark.executor.extraJavaOptions, so we dont want to mess with it.
- // In our expts, using (default) throughput collector has severe perf ramnifications in
- // multi-tennent machines
+ // spark.executor.extraJavaOptions, so we don't want to mess with it.
+ // In our expts, using (default) throughput collector has severe perf ramifications in
+ // multi-tenant machines
// The options are based on
// http://www.oracle.com/technetwork/java/gc-tuning-5-138395.html#0.0.0.%20When%20to%20Use
// %20the%20Concurrent%20Low%20Pause%20Collector|outline
diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index ed56d4bd44..2915e664be 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -65,7 +65,7 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil {
override def isYarnMode(): Boolean = { true }
// Return an appropriate (subclass) of Configuration. Creating a config initializes some Hadoop
- // subsystems. Always create a new config, dont reuse yarnConf.
+ // subsystems. Always create a new config, don't reuse yarnConf.
override def newConfiguration(conf: SparkConf): Configuration =
new YarnConfiguration(super.newConfiguration(conf))
@@ -217,7 +217,7 @@ class YarnSparkHadoopUtil extends SparkHadoopUtil {
// the hive configuration class is a subclass of Hadoop Configuration, so can be cast down
// to a Configuration and used without reflection
val hiveConfClass = mirror.classLoader.loadClass("org.apache.hadoop.hive.conf.HiveConf")
- // using the (Configuration, Class) constructor allows the current configuratin to be included
+ // using the (Configuration, Class) constructor allows the current configuration to be included
// in the hive config.
val ctor = hiveConfClass.getDeclaredConstructor(classOf[Configuration],
classOf[Object].getClass)
@@ -502,7 +502,7 @@ object YarnSparkHadoopUtil {
/**
* Getting the initial target number of executors depends on whether dynamic allocation is
* enabled.
- * If not using dynamic allocation it gets the number of executors reqeusted by the user.
+ * If not using dynamic allocation it gets the number of executors requested by the user.
*/
def getInitialTargetExecutorNumber(
conf: SparkConf,
diff --git a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
index 1538ff75be..05c1e1613d 100644
--- a/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
+++ b/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
@@ -78,7 +78,7 @@ private object YarnExternalShuffleDriver extends Logging with Matchers {
s"""
|Invalid command line: ${args.mkString(" ")}
|
- |Usage: ExternalShuffleDriver [result file] [registed exec file]
+ |Usage: ExternalShuffleDriver [result file] [registered exec file]
""".stripMargin)
// scalastyle:on println
System.exit(1)