aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/deploy/master/Master.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala2
-rw-r--r--docs/streaming-custom-receivers.md2
-rw-r--r--docs/streaming-programming-guide.md4
-rw-r--r--examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/param/params.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala2
-rw-r--r--python/pyspark/mllib/regression.py6
-rw-r--r--python/pyspark/sql/functions.py2
-rw-r--r--python/pyspark/sql/readwriter.py2
-rw-r--r--python/pyspark/streaming/dstream.py2
-rw-r--r--python/pyspark/streaming/kafka.py2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala4
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala6
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala2
-rw-r--r--sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala4
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala4
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala10
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala2
61 files changed, 78 insertions, 78 deletions
diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
index 73495a8d7d..c3764ac671 100644
--- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
+++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -166,7 +166,7 @@ private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock)
}
/**
- * Send ExecutorRemoved to the event loop to remove a executor. Only for test.
+ * Send ExecutorRemoved to the event loop to remove an executor. Only for test.
*
* @return if HeartbeatReceiver is stopped, return None. Otherwise, return a Some(Future) that
* indicate if this operation is successful.
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index faed4f4dc9..f8aac3008c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -929,7 +929,7 @@ private[deploy] class Master(
exec.state = ExecutorState.KILLED
}
- /** Generate a new app ID given a app's submission date */
+ /** Generate a new app ID given an app's submission date */
private def newApplicationId(submitDate: Date): String = {
val appId = "app-%s-%04d".format(createDateFormat.format(submitDate), nextAppNumber)
nextAppNumber += 1
diff --git a/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala b/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala
index f8167074c6..f1915857ea 100644
--- a/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala
+++ b/core/src/main/scala/org/apache/spark/memory/ExecutionMemoryPool.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable
import org.apache.spark.internal.Logging
/**
- * Implements policies and bookkeeping for sharing a adjustable-sized pool of memory between tasks.
+ * Implements policies and bookkeeping for sharing an adjustable-sized pool of memory between tasks.
*
* Tries to ensure that each task gets a reasonable share of memory, instead of some task ramping up
* to a large amount first and then causing others to spill to disk repeatedly.
diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
index a714237601..104e0cb371 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala
@@ -1054,7 +1054,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
val warningMessage =
s"$outputCommitterClass may be an output committer that writes data directly to " +
"the final location. Because speculation is enabled, this output committer may " +
- "cause data loss (see the case in SPARK-10063). If possible, please use a output " +
+ "cause data loss (see the case in SPARK-10063). If possible, please use an output " +
"committer that does not have this behavior (e.g. FileOutputCommitter)."
logWarning(warningMessage)
}
@@ -1142,7 +1142,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)])
val warningMessage =
s"$outputCommitterClass may be an output committer that writes data directly to " +
"the final location. Because speculation is enabled, this output committer may " +
- "cause data loss (see the case in SPARK-10063). If possible, please use a output " +
+ "cause data loss (see the case in SPARK-10063). If possible, please use an output " +
"committer that does not have this behavior (e.g. FileOutputCommitter)."
logWarning(warningMessage)
}
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
index fffbd5cd44..ae4a600351 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/Inbox.scala
@@ -52,7 +52,7 @@ private[netty] case class RemoteProcessConnectionError(cause: Throwable, remoteA
extends InboxMessage
/**
- * A inbox that stores messages for an [[RpcEndpoint]] and posts messages to it thread-safely.
+ * An inbox that stores messages for an [[RpcEndpoint]] and posts messages to it thread-safely.
*/
private[netty] class Inbox(
val endpointRef: NettyRpcEndpointRef,
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala b/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
index 7e1197d742..642bf81ac0 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
@@ -20,7 +20,7 @@ package org.apache.spark.scheduler
import org.apache.spark.executor.ExecutorExitCode
/**
- * Represents an explanation for a executor or whole slave failing or exiting.
+ * Represents an explanation for an executor or whole slave failing or exiting.
*/
private[spark]
class ExecutorLossReason(val message: String) extends Serializable {
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
index 745ef12691..9dc274c9fe 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerManager.scala
@@ -134,7 +134,7 @@ private[spark] class SerializerManager(defaultSerializer: Serializer, conf: Spar
}
/**
- * Deserializes a InputStream into an iterator of values and disposes of it when the end of
+ * Deserializes an InputStream into an iterator of values and disposes of it when the end of
* the iterator is reached.
*/
def dataDeserializeStream[T: ClassTag](
diff --git a/docs/streaming-custom-receivers.md b/docs/streaming-custom-receivers.md
index a4e17fd24e..479140f519 100644
--- a/docs/streaming-custom-receivers.md
+++ b/docs/streaming-custom-receivers.md
@@ -36,7 +36,7 @@ Any exception in the receiving threads should be caught and handled properly to
failures of the receiver. `restart(<exception>)` will restart the receiver by
asynchronously calling `onStop()` and then calling `onStart()` after a delay.
`stop(<exception>)` will call `onStop()` and terminate the receiver. Also, `reportError(<error>)`
-reports a error message to the driver (visible in the logs and UI) without stopping / restarting
+reports an error message to the driver (visible in the logs and UI) without stopping / restarting
the receiver.
The following is a custom receiver that receives a stream of text over a socket. It treats
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 4d0a1122dc..d7eafff38f 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -612,7 +612,7 @@ as well as to run the receiver(s).
- When running a Spark Streaming program locally, do not use "local" or "local[1]" as the master URL.
Either of these means that only one thread will be used for running tasks locally. If you are using
- a input DStream based on a receiver (e.g. sockets, Kafka, Flume, etc.), then the single thread will
+ an input DStream based on a receiver (e.g. sockets, Kafka, Flume, etc.), then the single thread will
be used to run the receiver, leaving no thread for processing the received data. Hence, when
running locally, always use "local[*n*]" as the master URL, where *n* > number of receivers to run
(see [Spark Properties](configuration.html#spark-properties) for information on how to set
@@ -1788,7 +1788,7 @@ This example appends the word counts of network data into a file.
This behavior is made simple by using `JavaStreamingContext.getOrCreate`. This is used as follows.
{% highlight java %}
-// Create a factory object that can create a and setup a new JavaStreamingContext
+// Create a factory object that can create and setup a new JavaStreamingContext
JavaStreamingContextFactory contextFactory = new JavaStreamingContextFactory() {
@Override public JavaStreamingContext create() {
JavaStreamingContext jssc = new JavaStreamingContext(...); // new context
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
index 1cba565b38..e20b94d5b0 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaCustomReceiver.java
@@ -70,7 +70,7 @@ public class JavaCustomReceiver extends Receiver<String> {
SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver");
JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000));
- // Create a input stream with the custom receiver on target ip:port and count the
+ // Create an input stream with the custom receiver on target ip:port and count the
// words in input stream of \n delimited text (eg. generated by 'nc')
JavaReceiverInputDStream<String> lines = ssc.receiverStream(
new JavaCustomReceiver(args[0], Integer.parseInt(args[1])));
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
index 1d144db986..f70975eeb5 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
@@ -50,7 +50,7 @@ object CustomReceiver {
val sparkConf = new SparkConf().setAppName("CustomReceiver")
val ssc = new StreamingContext(sparkConf, Seconds(1))
- // Create a input stream with the custom receiver on target ip:port and count the
+ // Create an input stream with the custom receiver on target ip:port and count the
// words in input stream of \n delimited text (eg. generated by 'nc')
val lines = ssc.receiverStream(new CustomReceiver(args(0), args(1).toInt))
val words = lines.flatMap(_.split(" "))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 0ab4459bdb..61e355ab9f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -686,7 +686,7 @@ object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] {
/**
* MultiClassSummarizer computes the number of distinct labels and corresponding counts,
* and validates the data to see if the labels used for k class multi-label classification
- * are in the range of {0, 1, ..., k - 1} in a online fashion.
+ * are in the range of {0, 1, ..., k - 1} in an online fashion.
*
* Two MultilabelSummarizer can be merged together to have a statistical summary of the
* corresponding joint dataset.
@@ -923,7 +923,7 @@ class BinaryLogisticRegressionSummary private[classification] (
/**
* LogisticAggregator computes the gradient and loss for binary logistic loss function, as used
- * in binary classification for instances in sparse or dense vector in a online fashion.
+ * in binary classification for instances in sparse or dense vector in an online fashion.
*
* Note that multinomial logistic loss is not supported yet!
*
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index 82f2de7ccd..ecec61a72f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -790,7 +790,7 @@ trait Params extends Identifiable with Serializable {
* :: DeveloperApi ::
* Java-friendly wrapper for [[Params]].
* Java developers who need to extend [[Params]] should use this class instead.
- * If you need to extend a abstract class which already extends [[Params]], then that abstract
+ * If you need to extend an abstract class which already extends [[Params]], then that abstract
* class should be Java-friendly as well.
*/
@DeveloperApi
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index e63eb71080..00ef6ccc74 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -396,7 +396,7 @@ object AFTSurvivalRegressionModel extends MLReadable[AFTSurvivalRegressionModel]
/**
* AFTAggregator computes the gradient and loss for a AFT loss function,
- * as used in AFT survival regression for samples in sparse or dense vector in a online fashion.
+ * as used in AFT survival regression for samples in sparse or dense vector in an online fashion.
*
* The loss function and likelihood function under the AFT model based on:
* Lawless, J. F., Statistical Models and Methods for Lifetime Data,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 37552194c5..4d66b0eb37 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -731,7 +731,7 @@ class LinearRegressionSummary private[regression] (
/**
* LeastSquaresAggregator computes the gradient and loss for a Least-squared loss function,
- * as used in linear regression for samples in sparse or dense vector in a online fashion.
+ * as used in linear regression for samples in sparse or dense vector in an online fashion.
*
* Two LeastSquaresAggregator can be merged together to have a summary of loss and gradient of
* the corresponding joint dataset.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
index b836d2a234..7d12f447f7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ParamGridBuilder.scala
@@ -74,7 +74,7 @@ class ParamGridBuilder @Since("1.2.0") {
}
/**
- * Adds a int param with multiple values.
+ * Adds an int param with multiple values.
*/
@Since("1.2.0")
def addGrid(param: IntParam, values: Array[Int]): this.type = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala
index 1d2d777c00..b0fa287473 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPTree.scala
@@ -126,7 +126,7 @@ private[fpm] object FPTree {
def isRoot: Boolean = parent == null
}
- /** Summary of a item in an FP-Tree. */
+ /** Summary of an item in an FP-Tree. */
private class Summary[T] extends Serializable {
var count: Long = 0L
val nodes: ListBuffer[Node[T]] = ListBuffer.empty
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
index 98404be260..d4de0fd7d5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala
@@ -24,7 +24,7 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors}
* :: DeveloperApi ::
* MultivariateOnlineSummarizer implements [[MultivariateStatisticalSummary]] to compute the mean,
* variance, minimum, maximum, counts, and nonzero counts for instances in sparse or dense vector
- * format in a online fashion.
+ * format in an online fashion.
*
* Two MultivariateOnlineSummarizer can be merged together to have a statistical summary of
* the corresponding joint dataset.
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 43d9072a24..705022934e 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -648,7 +648,7 @@ class IsotonicRegressionModel(Saveable, Loader):
@since("1.4.0")
def save(self, sc, path):
- """Save a IsotonicRegressionModel."""
+ """Save an IsotonicRegressionModel."""
java_boundaries = _py2java(sc, self.boundaries.tolist())
java_predictions = _py2java(sc, self.predictions.tolist())
java_model = sc._jvm.org.apache.spark.mllib.regression.IsotonicRegressionModel(
@@ -658,7 +658,7 @@ class IsotonicRegressionModel(Saveable, Loader):
@classmethod
@since("1.4.0")
def load(cls, sc, path):
- """Load a IsotonicRegressionModel."""
+ """Load an IsotonicRegressionModel."""
java_model = sc._jvm.org.apache.spark.mllib.regression.IsotonicRegressionModel.load(
sc._jsc.sc(), path)
py_boundaries = _java2py(sc, java_model.boundaryVector()).toArray()
@@ -694,7 +694,7 @@ class IsotonicRegression(object):
@since("1.4.0")
def train(cls, data, isotonic=True):
"""
- Train a isotonic regression model on the given data.
+ Train an isotonic regression model on the given data.
:param data:
RDD of (label, feature, weight) tuples.
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 64b8bc442d..15cefc8cf1 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1177,7 +1177,7 @@ def sha2(col, numBits):
@since(2.0)
def hash(*cols):
- """Calculates the hash code of given columns, and returns the result as a int column.
+ """Calculates the hash code of given columns, and returns the result as an int column.
>>> spark.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect()
[Row(hash=-757602832)]
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 73d2b81b6b..13d21d7143 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -847,7 +847,7 @@ class DataFrameWriter(object):
@since(1.4)
def jdbc(self, url, table, mode=None, properties=None):
- """Saves the content of the :class:`DataFrame` to a external database table via JDBC.
+ """Saves the content of the :class:`DataFrame` to an external database table via JDBC.
.. note:: Don't create too many partitions in parallel on a large cluster; \
otherwise Spark might crash your external database systems.
diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py
index 67a0819601..fb6c66f2df 100644
--- a/python/pyspark/streaming/dstream.py
+++ b/python/pyspark/streaming/dstream.py
@@ -623,7 +623,7 @@ class TransformedDStream(DStream):
self._jdstream_val = None
# Using type() to avoid folding the functions and compacting the DStreams which is not
- # not strictly a object of TransformedDStream.
+ # not strictly an object of TransformedDStream.
# Changed here is to avoid bug in KafkaTransformedDStream when calling offsetRanges().
if (type(prev) is TransformedDStream and
not prev.is_cached and not prev.is_checkpointed):
diff --git a/python/pyspark/streaming/kafka.py b/python/pyspark/streaming/kafka.py
index 015ca77dbf..2c1a667fc8 100644
--- a/python/pyspark/streaming/kafka.py
+++ b/python/pyspark/streaming/kafka.py
@@ -228,7 +228,7 @@ class OffsetRange(object):
def __init__(self, topic, partition, fromOffset, untilOffset):
"""
- Create a OffsetRange to represent range of offsets
+ Create an OffsetRange to represent range of offsets
:param topic: Kafka topic name.
:param partition: Kafka partition id.
:param fromOffset: Inclusive starting offset.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index cd242d78a4..c5f221d783 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -215,7 +215,7 @@ trait CheckAnalysis extends PredicateHelper {
if (!RowOrdering.isOrderable(expr.dataType)) {
failAnalysis(
s"expression ${expr.sql} cannot be used as a grouping expression " +
- s"because its data type ${expr.dataType.simpleString} is not a orderable " +
+ s"because its data type ${expr.dataType.simpleString} is not an orderable " +
s"data type.")
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala
index 79c3528a52..d4350598f4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala
@@ -37,7 +37,7 @@ object TypeCheckResult {
/**
* Represents the failing result of `Expression.checkInputDataTypes`,
- * with a error message to show the reason of failure.
+ * with an error message to show the reason of failure.
*/
case class TypeCheckFailure(message: String) extends TypeCheckResult {
def isSuccess: Boolean = false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 91bdcc3b09..387e555254 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -178,8 +178,8 @@ object TypeCoercion {
q transformExpressions {
case a: AttributeReference =>
inputMap.get(a.exprId) match {
- // This can happen when a Attribute reference is born in a non-leaf node, for example
- // due to a call to an external script like in the Transform operator.
+ // This can happen when an Attribute reference is born in a non-leaf node, for
+ // example due to a call to an external script like in the Transform operator.
// TODO: Perhaps those should actually be aliases?
case None => a
// Leave the same if the dataTypes match.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
index 7da1fe93c6..c66d08d2cf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/functionResources.scala
@@ -26,7 +26,7 @@ object JarResource extends FunctionResourceType("jar")
object FileResource extends FunctionResourceType("file")
-// We do not allow users to specify a archive because it is YARN specific.
+// We do not allow users to specify an archive because it is YARN specific.
// When loading resources, we will throw an exception and ask users to
// use --archive with spark submit.
object ArchiveResource extends FunctionResourceType("archive")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
index b8ab0364dd..946b3d446a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.types._
/**
* Returns the first value of `child` for a group of rows. If the first value of `child`
- * is `null`, it returns `null` (respecting nulls). Even if [[First]] is used on a already
+ * is `null`, it returns `null` (respecting nulls). Even if [[First]] is used on an already
* sorted column, if we do partial aggregation and final aggregation (when mergeExpression
* is used) its result will not be deterministic (unless the input table is sorted and has
* a single partition, and we use a single reducer to do the aggregation.).
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
index b05d74b49b..53b4b761ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Last.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.types._
/**
* Returns the last value of `child` for a group of rows. If the last value of `child`
- * is `null`, it returns `null` (respecting nulls). Even if [[Last]] is used on a already
+ * is `null`, it returns `null` (respecting nulls). Even if [[Last]] is used on an already
* sorted column, if we do partial aggregation and final aggregation (when mergeExpression
* is used) its result will not be deterministic (unless the input table is sorted and has
* a single partition, and we use a single reducer to do the aggregation.).
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala
index 9ead571c53..16c03c500a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PivotFirst.scala
@@ -51,7 +51,7 @@ object PivotFirst {
}
/**
- * PivotFirst is a aggregate function used in the second phase of a two phase pivot to do the
+ * PivotFirst is an aggregate function used in the second phase of a two phase pivot to do the
* required rearrangement of values into pivoted form.
*
* For example on an input of
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 7e3683e482..95ed68fbb0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -182,7 +182,7 @@ case class Literal protected (value: Any, dataType: DataType)
override protected def jsonFields: List[JField] = {
// Turns all kinds of literal values to string in json field, as the type info is hard to
- // retain in json format, e.g. {"a": 123} can be a int, or double, or decimal, etc.
+ // retain in json format, e.g. {"a": 123} can be an int, or double, or decimal, etc.
val jsonValue = (value, dataType) match {
case (null, _) => JNull
case (i: Int, DateType) => JString(DateTimeUtils.toJavaDate(i).toString)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
index 93a8278528..e036982e70 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
@@ -214,7 +214,7 @@ class GenericRowWithSchema(values: Array[Any], override val schema: StructType)
}
/**
- * A internal row implementation that uses an array of objects as the underlying storage.
+ * An internal row implementation that uses an array of objects as the underlying storage.
* Note that, while the array is not copied, and thus could technically be mutated after creation,
* this is not allowed.
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
index 5001f9a41e..08cb6c0134 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
@@ -129,7 +129,7 @@ object PredicateSubquery {
/**
* A [[ListQuery]] expression defines the query which we want to search in an IN subquery
- * expression. It should and can only be used in conjunction with a IN expression.
+ * expression. It should and can only be used in conjunction with an IN expression.
*
* For example (SQL):
* {{{
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 5e998d6188..48d70099b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -926,7 +926,7 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper {
case e @ CaseWhen(branches, _) if branches.headOption.map(_._1) == Some(TrueLiteral) =>
// If the first branch is a true literal, remove the entire CaseWhen and use the value
// from that. Note that CaseWhen.branches should never be empty, and as a result the
- // headOption (rather than head) added above is just a extra (and unnecessary) safeguard.
+ // headOption (rather than head) added above is just an extra (and unnecessary) safeguard.
branches.head._2
}
}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index d042e191a9..d687a85c18 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -59,7 +59,7 @@ abstract class AbstractSqlParser extends ParserInterface with Logging {
}
}
- /** Get the builder (visitor) which converts a ParseTree into a AST. */
+ /** Get the builder (visitor) which converts a ParseTree into an AST. */
protected def astBuilder: AstBuilder
protected def parse[T](command: String)(toResult: SqlBaseParser => T): T = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 8b7e21b679..898784dab1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -530,7 +530,7 @@ private[sql] object Expand {
/**
* Apply the all of the GroupExpressions to every input row, hence we will get
- * multiple output rows for a input row.
+ * multiple output rows for an input row.
*
* @param bitmasks The bitmask set represents the grouping sets
* @param groupByAliases The aliased original group by expressions
@@ -572,7 +572,7 @@ private[sql] object Expand {
/**
* Apply a number of projections to every input row, hence we will get multiple output rows for
- * a input row.
+ * an input row.
*
* @param projections to apply
* @param output of all projections.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala
index 42bdab42b7..b46f7a6d5a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/package.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.catalyst
/**
- * A a collection of common abstractions for query plans as well as
+ * A collection of common abstractions for query plans as well as
* a base logical plan representation.
*/
package object plans
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index d449088498..51d78dd123 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -67,7 +67,7 @@ case class ClusteredDistribution(clustering: Seq[Expression]) extends Distributi
case class OrderedDistribution(ordering: Seq[SortOrder]) extends Distribution {
require(
ordering != Nil,
- "The ordering expressions of a OrderedDistribution should not be Nil. " +
+ "The ordering expressions of an OrderedDistribution should not be Nil. " +
"An AllTuples should be used to represent a distribution that only has " +
"a single partition.")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 31604bad0f..52e021070e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -366,7 +366,7 @@ object Decimal {
val ROUND_CEILING = BigDecimal.RoundingMode.CEILING
val ROUND_FLOOR = BigDecimal.RoundingMode.FLOOR
- /** Maximum number of decimal digits a Int can represent */
+ /** Maximum number of decimal digits an Int can represent */
val MAX_INT_DIGITS = 9
/** Maximum number of decimal digits a Long can represent */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
index 6b7e3714e0..6500875f95 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
@@ -152,7 +152,7 @@ object DecimalType extends AbstractDataType {
}
/**
- * Returns if dt is a DecimalType that fits inside a int
+ * Returns if dt is a DecimalType that fits inside an int
*/
def is32BitDecimalType(dt: DataType): Boolean = {
dt match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index 2e85e36767..c7b887ecd4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -529,7 +529,7 @@ final class DataFrameWriter private[sql](df: DataFrame) {
}
/**
- * Saves the content of the [[DataFrame]] to a external database table via JDBC. In the case the
+ * Saves the content of the [[DataFrame]] to an external database table via JDBC. In the case the
* table already exists in the external database, behavior of this function depends on the
* save mode, specified by the `mode` function (default to throwing an exception).
*
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
index 1be41ffc07..4c046f7bdc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExpandExec.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.execution.metric.SQLMetrics
/**
* Apply all of the GroupExpressions to every input row, hence we will get
- * multiple output rows for a input row.
+ * multiple output rows for an input row.
* @param projections The group of expressions, all of the group expressions should
* output the same schema specified bye the parameter `output`
* @param output The output Schema
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
index e81cd28ea3..5f0c264416 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
@@ -95,7 +95,7 @@ class CoalescedPartitioner(val parent: Partitioner, val partitionStartIndices: A
* interfaces / internals.
*
* This RDD takes a [[ShuffleDependency]] (`dependency`),
- * and a optional array of partition start indices as input arguments
+ * and an optional array of partition start indices as input arguments
* (`specifiedPartitionStartIndices`).
*
* The `dependency` has the parent RDD of this RDD, which represents the dataset before shuffle
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
index 908e22de73..2aec931894 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala
@@ -445,7 +445,7 @@ case class CollapseCodegenStages(conf: SQLConf) extends Rule[SparkPlan] {
}
/**
- * Inserts a InputAdapter on top of those that do not support codegen.
+ * Inserts an InputAdapter on top of those that do not support codegen.
*/
private def insertInputAdapter(plan: SparkPlan): SparkPlan = plan match {
case j @ SortMergeJoinExec(_, _, _, _, left, right) if j.supportCodegen =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
index 243aa15deb..4b8adf5230 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
@@ -41,7 +41,7 @@ import org.apache.spark.unsafe.KVIterator
* - Step 0: Do hash-based aggregation.
* - Step 1: Sort all entries of the hash map based on values of grouping expressions and
* spill them to disk.
- * - Step 2: Create a external sorter based on the spilled sorted map entries and reset the map.
+ * - Step 2: Create an external sorter based on the spilled sorted map entries and reset the map.
* - Step 3: Get a sorted [[KVIterator]] from the external sorter.
* - Step 4: Repeat step 0 until no more input.
* - Step 5: Initialize sort-based aggregation on the sorted iterator.
@@ -434,12 +434,12 @@ class TungstenAggregationIterator(
///////////////////////////////////////////////////////////////////////////
/**
- * Generate a output row when there is no input and there is no grouping expression.
+ * Generate an output row when there is no input and there is no grouping expression.
*/
def outputForEmptyGroupingKeyWithoutInput(): UnsafeRow = {
if (groupingExpressions.isEmpty) {
sortBasedAggregationBuffer.copyFrom(initialAggregationBuffer)
- // We create a output row and copy it. So, we can free the map.
+ // We create an output row and copy it. So, we can free the map.
val resultCopy =
generateOutput(UnsafeRow.createFromByteArray(0, 0), sortBasedAggregationBuffer).copy()
hashMap.free()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 89bde6ad73..185c79f899 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -142,9 +142,9 @@ case class FilterExec(condition: Expression, child: SparkPlan)
// To generate the predicates we will follow this algorithm.
// For each predicate that is not IsNotNull, we will generate them one by one loading attributes
- // as necessary. For each of both attributes, if there is a IsNotNull predicate we will generate
- // that check *before* the predicate. After all of these predicates, we will generate the
- // remaining IsNotNull checks that were not part of other predicates.
+ // as necessary. For each of both attributes, if there is an IsNotNull predicate we will
+ // generate that check *before* the predicate. After all of these predicates, we will generate
+ // the remaining IsNotNull checks that were not part of other predicates.
// This has the property of not doing redundant IsNotNull checks and taking better advantage of
// short-circuiting, not loading attributes until they are needed.
// This is very perf sensitive.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala
index 3b064a5bc4..61dcbebd64 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala
@@ -176,7 +176,7 @@ private[sql] abstract class BaseWriterContainer(
val ctor = clazz.getDeclaredConstructor(classOf[Path], classOf[TaskAttemptContext])
ctor.newInstance(new Path(outputPath), context)
} else {
- // The specified output committer is just a OutputCommitter.
+ // The specified output committer is just an OutputCommitter.
// So, we will use the no-argument constructor.
val ctor = clazz.getDeclaredConstructor()
ctor.newInstance()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala
index 0f5d6445b1..2cc012840d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Offset.scala
@@ -18,7 +18,7 @@
package org.apache.spark.sql.execution.streaming
/**
- * A offset is a monotonically increasing metric used to track progress in the computation of a
+ * An offset is a monotonically increasing metric used to track progress in the computation of a
* stream. An [[Offset]] must be comparable, and the result of `compareTo` must be consistent
* with `equals` and `hashcode`.
*/
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 0b490fe71c..d89e98645b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1996,7 +1996,7 @@ object functions {
/**
* Computes the numeric value of the first character of the string column, and returns the
- * result as a int column.
+ * result as an int column.
*
* @group string_funcs
* @since 1.5.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index f3f36efda5..ceb6862275 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -356,7 +356,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
val tableIdent = sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName)
sessionCatalog.refreshTable(tableIdent)
- // If this table is cached as a InMemoryRelation, drop the original
+ // If this table is cached as an InMemoryRelation, drop the original
// cached version and make the new version cached lazily.
val logicalPlan = sparkSession.sessionState.catalog.lookupRelation(tableIdent)
// Use lookupCachedData directly since RefreshTable also takes databaseName.
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
index cef5912c62..de70fdc14e 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2.scala
@@ -157,7 +157,7 @@ object HiveThriftServer2 extends Logging {
/**
- * A inner sparkListener called in sc.stop to clean up the HiveThriftServer2
+ * An inner sparkListener called in sc.stop to clean up the HiveThriftServer2
*/
private[thriftserver] class HiveThriftServer2Listener(
val server: HiveServer2,
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index d033b05d48..88f4a2d2b2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -391,7 +391,7 @@ private[spark] object HiveUtils extends Logging {
// Remote means that the metastore server is running in its own process.
// When the mode is remote, configurations like "javax.jdo.option.ConnectionURL" will not be
// used (because they are used by remote metastore server that talks to the database).
- // Because execution Hive should always connects to a embedded derby metastore.
+ // Because execution Hive should always connects to an embedded derby metastore.
// We have to remove the value of hive.metastore.uris. So, the execution Hive client connects
// to the actual embedded derby metastore instead of the remote metastore.
// You can search HiveConf.ConfVars.METASTOREURIS in the code of HiveConf (in Hive's repo).
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 9e8ff9317c..b3896484da 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -211,7 +211,7 @@ case class InsertIntoHiveTable(
val warningMessage =
s"$outputCommitterClass may be an output committer that writes data directly to " +
"the final location. Because speculation is enabled, this output committer may " +
- "cause data loss (see the case in SPARK-10063). If possible, please use a output " +
+ "cause data loss (see the case in SPARK-10063). If possible, please use an output " +
"committer that does not have this behavior (e.g. FileOutputCommitter)."
logWarning(warningMessage)
}
@@ -250,7 +250,7 @@ case class InsertIntoHiveTable(
orderedPartitionSpec.put(entry.getName, partitionSpec.getOrElse(entry.getName, ""))
}
- // inheritTableSpecs is set to true. It should be set to false for a IMPORT query
+ // inheritTableSpecs is set to true. It should be set to false for an IMPORT query
// which is currently considered as a Hive native command.
val inheritTableSpecs = true
// TODO: Correctly set isSkewedStoreAsSubdir.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
index 13d2bed606..0589c8ece3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.types.StructType
private[orc] object OrcFileOperator extends Logging {
/**
- * Retrieves a ORC file reader from a given path. The path can point to either a directory or a
+ * Retrieves an ORC file reader from a given path. The path can point to either a directory or a
* single ORC file. If it points to an directory, it picks any non-empty ORC file within that
* directory.
*
@@ -42,7 +42,7 @@ private[orc] object OrcFileOperator extends Logging {
* ORC file if the file contains zero rows. This is OK for Hive since the schema of the
* table is managed by metastore. But this becomes a problem when reading ORC files
* directly from HDFS via Spark SQL, because we have to discover the schema from raw ORC
- * files. So this method always tries to find a ORC file whose schema is non-empty, and
+ * files. So this method always tries to find an ORC file whose schema is non-empty, and
* create the result reader from that file. If no such file is found, it returns `None`.
* @todo Needs to consider all files when schema evolution is taken into account.
*/
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
index 6ececb1062..0b11026863 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala
@@ -154,7 +154,7 @@ object Checkpoint extends Logging {
Utils.tryWithSafeFinally {
// ObjectInputStream uses the last defined user-defined class loader in the stack
- // to find classes, which maybe the wrong class loader. Hence, a inherited version
+ // to find classes, which maybe the wrong class loader. Hence, an inherited version
// of ObjectInputStream is used to explicitly use the current thread's default class
// loader to find and load classes. This is a well know Java issue and has popped up
// in other places (e.g., http://jira.codehaus.org/browse/GROOVY-1627)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 928739a416..b524af9578 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -322,7 +322,7 @@ class StreamingContext private[streaming] (
}
/**
- * Create a input stream from network source hostname:port, where data is received
+ * Create an input stream from network source hostname:port, where data is received
* as serialized blocks (serialized using the Spark's serializer) that can be directly
* pushed into the block manager without deserializing them. This is the most efficient
* way to receive data.
@@ -341,7 +341,7 @@ class StreamingContext private[streaming] (
}
/**
- * Create a input stream that monitors a Hadoop-compatible filesystem
+ * Create an input stream that monitors a Hadoop-compatible filesystem
* for new files and reads them using the given key-value types and input format.
* Files must be written to the monitored directory by "moving" them from another
* location within the same file system. File names starting with . are ignored.
@@ -359,7 +359,7 @@ class StreamingContext private[streaming] (
}
/**
- * Create a input stream that monitors a Hadoop-compatible filesystem
+ * Create an input stream that monitors a Hadoop-compatible filesystem
* for new files and reads them using the given key-value types and input format.
* Files must be written to the monitored directory by "moving" them from another
* location within the same file system.
@@ -379,7 +379,7 @@ class StreamingContext private[streaming] (
}
/**
- * Create a input stream that monitors a Hadoop-compatible filesystem
+ * Create an input stream that monitors a Hadoop-compatible filesystem
* for new files and reads them using the given key-value types and input format.
* Files must be written to the monitored directory by "moving" them from another
* location within the same file system. File names starting with . are ignored.
@@ -403,7 +403,7 @@ class StreamingContext private[streaming] (
}
/**
- * Create a input stream that monitors a Hadoop-compatible filesystem
+ * Create an input stream that monitors a Hadoop-compatible filesystem
* for new files and reads them as text files (using key as LongWritable, value
* as Text and input format as TextInputFormat). Files must be written to the
* monitored directory by "moving" them from another location within the same
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
index 42fc84c19b..faf6db82d5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
@@ -79,7 +79,7 @@ private[streaming] abstract class ReceiverSupervisor(
optionalBlockId: Option[StreamBlockId]
): Unit
- /** Store a iterator of received data as a data block into Spark's memory. */
+ /** Store an iterator of received data as a data block into Spark's memory. */
def pushIterator(
iterator: Iterator[_],
optionalMetadata: Option[Any],
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
index 4fb0f8caac..5ba09a54af 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
@@ -129,7 +129,7 @@ private[streaming] class ReceiverSupervisorImpl(
pushAndReportBlock(ArrayBufferBlock(arrayBuffer), metadataOption, blockIdOption)
}
- /** Store a iterator of received data as a data block into Spark's memory. */
+ /** Store an iterator of received data as a data block into Spark's memory. */
def pushIterator(
iterator: Iterator[_],
metadataOption: Option[Any],
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala
index f7b6584893..fb5587edec 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ExecutorAllocationManager.scala
@@ -38,7 +38,7 @@ import org.apache.spark.util.{Clock, Utils}
* - Periodically take the average batch completion times and compare with the batch interval
* - If (avg. proc. time / batch interval) >= scaling up ratio, then request more executors.
* The number of executors requested is based on the ratio = (avg. proc. time / batch interval).
- * - If (avg. proc. time / batch interval) <= scaling down ratio, then try to kill a executor that
+ * - If (avg. proc. time / batch interval) <= scaling down ratio, then try to kill an executor that
* is not running a receiver.
*
* This features should ideally be used in conjunction with backpressure, as backpressure ensures
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
index 4f124a1356..8e1a090618 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
@@ -67,7 +67,7 @@ private[streaming] class InputInfoTracker(ssc: StreamingContext) extends Logging
if (inputInfos.contains(inputInfo.inputStreamId)) {
throw new IllegalStateException(s"Input stream ${inputInfo.inputStreamId} for batch" +
- s"$batchTime is already added into InputInfoTracker, this is a illegal state")
+ s"$batchTime is already added into InputInfoTracker, this is an illegal state")
}
inputInfos += ((inputInfo.inputStreamId, inputInfo))
}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
index 1ef26d2f86..60122b4813 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala
@@ -86,7 +86,7 @@ private[ui] class BatchPage(parent: StreamingTab) extends WebUIPage("batch") {
/**
* Generate a row for a Spark Job. Because duplicated output op infos needs to be collapsed into
- * one cell, we use "rowspan" for the first row of a output op.
+ * one cell, we use "rowspan" for the first row of an output op.
*/
private def generateNormalJobRow(
outputOpData: OutputOperationUIData,