aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--core/src/main/java/org/apache/spark/memory/MemoryConsumer.java2
-rw-r--r--core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java5
-rw-r--r--core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala5
-rw-r--r--core/src/main/scala/org/apache/spark/scheduler/Task.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/serializer/Serializer.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala3
-rw-r--r--core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala4
-rw-r--r--core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala6
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala6
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala5
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala2
-rw-r--r--sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Column.scala18
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala23
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala20
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/functions.scala2
-rw-r--r--sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala2
30 files changed, 68 insertions, 71 deletions
diff --git a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
index fc1f3a8023..48cf4b9455 100644
--- a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
+++ b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
@@ -60,8 +60,6 @@ public abstract class MemoryConsumer {
/**
* Force spill during building.
- *
- * For testing.
*/
public void spill() throws IOException {
spill(Long.MAX_VALUE, this);
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
index 4a15559e55..323a5d3c52 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -52,8 +52,7 @@ import org.apache.spark.util.Utils;
* This class implements sort-based shuffle's hash-style shuffle fallback path. This write path
* writes incoming records to separate files, one file per reduce partition, then concatenates these
* per-partition files to form a single output file, regions of which are served to reducers.
- * Records are not buffered in memory. This is essentially identical to
- * {@link org.apache.spark.shuffle.hash.HashShuffleWriter}, except that it writes output in a format
+ * Records are not buffered in memory. It writes output in a format
* that can be served / consumed via {@link org.apache.spark.shuffle.IndexShuffleBlockResolver}.
* <p>
* This write path is inefficient for shuffles with large numbers of reduce partitions because it
@@ -61,7 +60,7 @@ import org.apache.spark.util.Utils;
* {@link SortShuffleManager} only selects this write path when
* <ul>
* <li>no Ordering is specified,</li>
- * <li>no Aggregator is specific, and</li>
+ * <li>no Aggregator is specified, and</li>
* <li>the number of partitions is less than
* <code>spark.shuffle.sort.bypassMergeThreshold</code>.</li>
* </ul>
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
index e4b9f8111e..9112d93a86 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationClient.scala
@@ -71,13 +71,12 @@ private[spark] trait ExecutorAllocationClient {
/**
* Request that the cluster manager kill every executor on the specified host.
- * Results in a call to killExecutors for each executor on the host, with the replace
- * and force arguments set to true.
+ *
* @return whether the request is acknowledged by the cluster manager.
*/
def killExecutorsOnHost(host: String): Boolean
- /**
+ /**
* Request that the cluster manager kill the specified executor.
* @return whether the request is acknowledged by the cluster manager.
*/
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 46ef23f316..7fd2918960 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -149,7 +149,7 @@ private[spark] abstract class Task[T](
def preferredLocations: Seq[TaskLocation] = Nil
- // Map output tracker epoch. Will be set by TaskScheduler.
+ // Map output tracker epoch. Will be set by TaskSetManager.
var epoch: Long = -1
// Task context, to be initialized in run().
diff --git a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
index 008b038789..01bbda0b5e 100644
--- a/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/Serializer.scala
@@ -77,7 +77,7 @@ abstract class Serializer {
* position = 0
* serOut.write(obj1)
* serOut.flush()
- * position = # of bytes writen to stream so far
+ * position = # of bytes written to stream so far
* obj1Bytes = output[0:position-1]
* serOut.write(obj2)
* serOut.flush()
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index 8b2e26cdd9..ba3e0e395e 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -95,8 +95,7 @@ private[spark] class BlockStoreShuffleReader[K, C](
// Sort the output if there is a sort ordering defined.
dep.keyOrdering match {
case Some(keyOrd: Ordering[K]) =>
- // Create an ExternalSorter to sort the data. Note that if spark.shuffle.spill is disabled,
- // the ExternalSorter won't spill to disk.
+ // Create an ExternalSorter to sort the data.
val sorter =
new ExternalSorter[K, C, C](context, ordering = Some(keyOrd), serializer = dep.serializer)
sorter.insertAll(aggregatedIter)
diff --git a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
index 91858f0912..1554048517 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/IndexShuffleBlockResolver.scala
@@ -61,7 +61,7 @@ private[spark] class IndexShuffleBlockResolver(
/**
* Remove data file and index file that contain the output data from one map.
- * */
+ */
def removeDataByMap(shuffleId: Int, mapId: Int): Unit = {
var file = getDataFile(shuffleId, mapId)
if (file.exists()) {
@@ -132,7 +132,7 @@ private[spark] class IndexShuffleBlockResolver(
* replace them with new ones.
*
* Note: the `lengths` will be updated to match the existing index file if use the existing ones.
- * */
+ */
def writeIndexFileAndCommit(
shuffleId: Int,
mapId: Int,
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 5e977a16fe..bfb4dc698e 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -82,13 +82,13 @@ private[spark] class SortShuffleManager(conf: SparkConf) extends ShuffleManager
override val shuffleBlockResolver = new IndexShuffleBlockResolver(conf)
/**
- * Register a shuffle with the manager and obtain a handle for it to pass to tasks.
+ * Obtains a [[ShuffleHandle]] to pass to tasks.
*/
override def registerShuffle[K, V, C](
shuffleId: Int,
numMaps: Int,
dependency: ShuffleDependency[K, V, C]): ShuffleHandle = {
- if (SortShuffleWriter.shouldBypassMergeSort(SparkEnv.get.conf, dependency)) {
+ if (SortShuffleWriter.shouldBypassMergeSort(conf, dependency)) {
// If there are fewer than spark.shuffle.sort.bypassMergeThreshold partitions and we don't
// need map-side aggregation, then write numPartitions files directly and just concatenate
// them at the end. This avoids doing serialization and deserialization twice to merge
diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index 00e0cf257c..7479de5514 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -279,7 +279,7 @@ private[spark] object AccumulatorContext {
/**
- * An [[AccumulatorV2 accumulator]] for computing sum, count, and averages for 64-bit integers.
+ * An [[AccumulatorV2 accumulator]] for computing sum, count, and average of 64-bit integers.
*
* @since 2.0.0
*/
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
index e07c9a4717..0658bddf16 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DataFrameExample.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.util.Utils
/**
- * An example of how to use [[org.apache.spark.sql.DataFrame]] for ML. Run with
+ * An example of how to use [[DataFrame]] for ML. Run with
* {{{
* ./bin/run-example ml.DataFrameExample [options]
* }}}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
index a7243ccbf2..d3c84b77d2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Correlation.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.types.{StructField, StructType}
/**
* API for correlation functions in MLlib, compatible with Dataframes and Datasets.
*
- * The functions in this package generalize the functions in [[org.apache.spark.sql.Dataset.stat]]
+ * The functions in this package generalize the functions in [[org.apache.spark.sql.Dataset#stat]]
* to spark.ml's Vector types.
*/
@Since("2.2.0")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
index 70438eb591..920033a9a8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveHints.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.trees.CurrentOrigin
/**
* Collection of rules related to hints. The only hint currently available is broadcast join hint.
*
- * Note that this is separatedly into two rules because in the future we might introduce new hint
+ * Note that this is separately into two rules because in the future we might introduce new hint
* rules that have different ordering requirements from broadcast.
*/
object ResolveHints {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index 93fc565a53..ec003cdc17 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -229,9 +229,9 @@ case class ExpressionEncoder[T](
// serializer expressions are used to encode an object to a row, while the object is usually an
// intermediate value produced inside an operator, not from the output of the child operator. This
// is quite different from normal expressions, and `AttributeReference` doesn't work here
- // (intermediate value is not an attribute). We assume that all serializer expressions use a same
- // `BoundReference` to refer to the object, and throw exception if they don't.
- assert(serializer.forall(_.references.isEmpty), "serializer cannot reference to any attributes.")
+ // (intermediate value is not an attribute). We assume that all serializer expressions use the
+ // same `BoundReference` to refer to the object, and throw exception if they don't.
+ assert(serializer.forall(_.references.isEmpty), "serializer cannot reference any attributes.")
assert(serializer.flatMap { ser =>
val boundRefs = ser.collect { case b: BoundReference => b }
assert(boundRefs.nonEmpty,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index b93a5d0b7a..1db26d9c41 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -491,7 +491,7 @@ abstract class BinaryExpression extends Expression {
* A [[BinaryExpression]] that is an operator, with two properties:
*
* 1. The string representation is "x symbol y", rather than "funcName(x, y)".
- * 2. Two inputs are expected to the be same type. If the two inputs have different types,
+ * 2. Two inputs are expected to be of the same type. If the two inputs have different types,
* the analyzer will find the tightest common type and do the proper type casting.
*/
abstract class BinaryOperator extends BinaryExpression with ExpectsInputTypes {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 07d294b108..b2a3888ff7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -695,7 +695,7 @@ case class DenseRank(children: Seq[Expression]) extends RankLike {
*
* This documentation has been based upon similar documentation for the Hive and Presto projects.
*
- * @param children to base the rank on; a change in the value of one the children will trigger a
+ * @param children to base the rank on; a change in the value of one of the children will trigger a
* change in rank. This is an internal parameter and will be assigned by the
* Analyser.
*/
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
index 174d546e22..257dbfac8c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala
@@ -65,7 +65,7 @@ object EliminateSerialization extends Rule[LogicalPlan] {
/**
* Combines two adjacent [[TypedFilter]]s, which operate on same type object in condition, into one,
- * mering the filter functions into one conjunctive function.
+ * merging the filter functions into one conjunctive function.
*/
object CombineTypedFilters extends Rule[LogicalPlan] {
def apply(plan: LogicalPlan): LogicalPlan = plan transform {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index cd238e05d4..162051a8c0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -492,7 +492,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
}
/**
- * Add an [[Aggregate]] to a logical plan.
+ * Add an [[Aggregate]] or [[GroupingSets]] to a logical plan.
*/
private def withAggregation(
ctx: AggregationContext,
@@ -519,7 +519,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
}
/**
- * Add a Hint to a logical plan.
+ * Add a [[Hint]] to a logical plan.
*/
private def withHints(
ctx: HintContext,
@@ -545,7 +545,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with Logging {
}
/**
- * Create a single relation referenced in a FROM claused. This method is used when a part of the
+ * Create a single relation referenced in a FROM clause. This method is used when a part of the
* join condition is nested, for example:
* {{{
* select * from t1 join (t2 cross join t3) on col1 = col2
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 9fd95a4b36..2d8ec2053a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -230,14 +230,15 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT
def producedAttributes: AttributeSet = AttributeSet.empty
/**
- * Attributes that are referenced by expressions but not provided by this nodes children.
+ * Attributes that are referenced by expressions but not provided by this node's children.
* Subclasses should override this method if they produce attributes internally as it is used by
* assertions designed to prevent the construction of invalid plans.
*/
def missingInput: AttributeSet = references -- inputSet -- producedAttributes
/**
- * Runs [[transform]] with `rule` on all expressions present in this query operator.
+ * Runs [[transformExpressionsDown]] with `rule` on all expressions present
+ * in this query operator.
* Users should not expect a specific directionality. If a specific directionality is needed,
* transformExpressionsDown or transformExpressionsUp should be used.
*
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index e22b429aec..f71a976bd7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -32,7 +32,7 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging {
private var _analyzed: Boolean = false
/**
- * Marks this plan as already analyzed. This should only be called by CheckAnalysis.
+ * Marks this plan as already analyzed. This should only be called by [[CheckAnalysis]].
*/
private[catalyst] def setAnalyzed(): Unit = { _analyzed = true }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index c2e62e7397..d1c6b50536 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -26,7 +26,8 @@ import org.apache.spark.sql.types._
import org.apache.spark.unsafe.types.CalendarInterval
/**
- * Test basic expression parsing. If a type of expression is supported it should be tested here.
+ * Test basic expression parsing.
+ * If the type of an expression is supported it should be tested here.
*
* Please note that some of the expressions test don't have to be sound expressions, only their
* structure needs to be valid. Unsound expressions should be caught by the Analyzer or
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index ae0703513c..43de2de7e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -84,8 +84,8 @@ class TypedColumn[-T, U](
}
/**
- * Gives the TypedColumn a name (alias).
- * If the current TypedColumn has metadata associated with it, this metadata will be propagated
+ * Gives the [[TypedColumn]] a name (alias).
+ * If the current `TypedColumn` has metadata associated with it, this metadata will be propagated
* to the new column.
*
* @group expr_ops
@@ -99,16 +99,14 @@ class TypedColumn[-T, U](
/**
* A column that will be computed based on the data in a `DataFrame`.
*
- * A new column is constructed based on the input columns present in a dataframe:
+ * A new column can be constructed based on the input columns present in a DataFrame:
*
* {{{
- * df("columnName") // On a specific DataFrame.
+ * df("columnName") // On a specific `df` DataFrame.
* col("columnName") // A generic column no yet associated with a DataFrame.
* col("columnName.field") // Extracting a struct field
* col("`a.column.with.dots`") // Escape `.` in column names.
* $"columnName" // Scala short hand for a named column.
- * expr("a + 1") // A column that is constructed from a parsed SQL Expression.
- * lit("abc") // A column that produces a literal (constant) value.
* }}}
*
* [[Column]] objects can be composed to form complex expressions:
@@ -118,7 +116,7 @@ class TypedColumn[-T, U](
* $"a" === $"b"
* }}}
*
- * @note The internal Catalyst expression can be accessed via "expr", but this method is for
+ * @note The internal Catalyst expression can be accessed via [[expr]], but this method is for
* debugging purposes only and can change in any future Spark releases.
*
* @groupname java_expr_ops Java-specific expression operators
@@ -1100,7 +1098,7 @@ class Column(val expr: Expression) extends Logging {
def asc_nulls_last: Column = withExpr { SortOrder(expr, Ascending, NullsLast, Set.empty) }
/**
- * Prints the expression to the console for debugging purpose.
+ * Prints the expression to the console for debugging purposes.
*
* @group df_ops
* @since 1.3.0
@@ -1154,8 +1152,8 @@ class Column(val expr: Expression) extends Logging {
* {{{
* val w = Window.partitionBy("name").orderBy("id")
* df.select(
- * sum("price").over(w.rangeBetween(Long.MinValue, 2)),
- * avg("price").over(w.rowsBetween(0, 4))
+ * sum("price").over(w.rangeBetween(Window.unboundedPreceding, 2)),
+ * avg("price").over(w.rowsBetween(Window.currentRow, 4))
* )
* }}}
*
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala b/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
index 18bccee98f..582d4a3670 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
@@ -24,7 +24,8 @@ import org.apache.spark.annotation.InterfaceStability
*
* To use this, import implicit conversions in SQL:
* {{{
- * import sqlContext.implicits._
+ * val spark: SparkSession = ...
+ * import spark.implicits._
* }}}
*
* @since 1.6.0
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index a97297892b..b60499253c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -60,7 +60,7 @@ import org.apache.spark.util.Utils
* The builder can also be used to create a new session:
*
* {{{
- * SparkSession.builder()
+ * SparkSession.builder
* .master("local")
* .appName("Word Count")
* .config("spark.some.config.option", "some-value")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala
index e5a6a5f60b..470c736da9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/databases.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.types.StringType
/**
* A command for users to list the databases/schemas.
- * If a databasePattern is supplied then the databases that only matches the
+ * If a databasePattern is supplied then the databases that only match the
* pattern would be listed.
* The syntax of using this command in SQL is:
* {{{
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
index 75ffe90f2b..311942f6db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.types.StructType
* monotonically increasing notion of progress that can be represented as an [[Offset]]. Spark
* will regularly query each [[Source]] to see if any more data is available.
*/
-trait Source {
+trait Source {
/** Returns the schema of the data from this source */
def schema: StructType
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
index f3cf3052ea..00053485e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
@@ -113,7 +113,7 @@ object Window {
* Creates a [[WindowSpec]] with the frame boundaries defined,
* from `start` (inclusive) to `end` (inclusive).
*
- * Both `start` and `end` are relative positions from the current row. For example, "0" means
+ * Both `start` and `end` are positions relative to the current row. For example, "0" means
* "current row", while "-1" means the row before the current row, and "5" means the fifth row
* after the current row.
*
@@ -131,9 +131,9 @@ object Window {
* import org.apache.spark.sql.expressions.Window
* val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
* .toDF("id", "category")
- * df.withColumn("sum",
- * sum('id) over Window.partitionBy('category).orderBy('id).rowsBetween(0,1))
- * .show()
+ * val byCategoryOrderedById =
+ * Window.partitionBy('category).orderBy('id).rowsBetween(Window.currentRow, 1)
+ * df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
*
* +---+--------+---+
* | id|category|sum|
@@ -150,7 +150,7 @@ object Window {
* @param start boundary start, inclusive. The frame is unbounded if this is
* the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value (`Window.unboundedFollowing`).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 2.1.0
*/
// Note: when updating the doc for this method, also update WindowSpec.rowsBetween.
@@ -162,7 +162,7 @@ object Window {
* Creates a [[WindowSpec]] with the frame boundaries defined,
* from `start` (inclusive) to `end` (inclusive).
*
- * Both `start` and `end` are relative from the current row. For example, "0" means "current row",
+ * Both `start` and `end` are relative to the current row. For example, "0" means "current row",
* while "-1" means one off before the current row, and "5" means the five off after the
* current row.
*
@@ -183,9 +183,9 @@ object Window {
* import org.apache.spark.sql.expressions.Window
* val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
* .toDF("id", "category")
- * df.withColumn("sum",
- * sum('id) over Window.partitionBy('category).orderBy('id).rangeBetween(0,1))
- * .show()
+ * val byCategoryOrderedById =
+ * Window.partitionBy('category).orderBy('id).rowsBetween(Window.currentRow, 1)
+ * df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
*
* +---+--------+---+
* | id|category|sum|
@@ -202,7 +202,7 @@ object Window {
* @param start boundary start, inclusive. The frame is unbounded if this is
* the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value (`Window.unboundedFollowing`).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 2.1.0
*/
// Note: when updating the doc for this method, also update WindowSpec.rangeBetween.
@@ -221,7 +221,8 @@ object Window {
*
* {{{
* // PARTITION BY country ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
- * Window.partitionBy("country").orderBy("date").rowsBetween(Long.MinValue, 0)
+ * Window.partitionBy("country").orderBy("date")
+ * .rowsBetween(Window.unboundedPreceding, Window.currentRow)
*
* // PARTITION BY country ORDER BY date ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
* Window.partitionBy("country").orderBy("date").rowsBetween(-3, 3)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
index de7d7a1772..6279d48c94 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
@@ -86,7 +86,7 @@ class WindowSpec private[sql](
* after the current row.
*
* We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
- * and `[Window.currentRow` to specify special boundary values, rather than using integral
+ * and `Window.currentRow` to specify special boundary values, rather than using integral
* values directly.
*
* A row based boundary is based on the position of the row within the partition.
@@ -99,9 +99,9 @@ class WindowSpec private[sql](
* import org.apache.spark.sql.expressions.Window
* val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
* .toDF("id", "category")
- * df.withColumn("sum",
- * sum('id) over Window.partitionBy('category).orderBy('id).rowsBetween(0,1))
- * .show()
+ * val byCategoryOrderedById =
+ * Window.partitionBy('category).orderBy('id).rowsBetween(Window.currentRow, 1)
+ * df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
*
* +---+--------+---+
* | id|category|sum|
@@ -118,7 +118,7 @@ class WindowSpec private[sql](
* @param start boundary start, inclusive. The frame is unbounded if this is
* the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value (`Window.unboundedFollowing`).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 1.4.0
*/
// Note: when updating the doc for this method, also update Window.rowsBetween.
@@ -134,7 +134,7 @@ class WindowSpec private[sql](
* current row.
*
* We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
- * and `[Window.currentRow` to specify special boundary values, rather than using integral
+ * and `Window.currentRow` to specify special boundary values, rather than using integral
* values directly.
*
* A range based boundary is based on the actual value of the ORDER BY
@@ -150,9 +150,9 @@ class WindowSpec private[sql](
* import org.apache.spark.sql.expressions.Window
* val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
* .toDF("id", "category")
- * df.withColumn("sum",
- * sum('id) over Window.partitionBy('category).orderBy('id).rangeBetween(0,1))
- * .show()
+ * val byCategoryOrderedById =
+ * Window.partitionBy('category).orderBy('id).rangeBetween(Window.currentRow, 1)
+ * df.withColumn("sum", sum('id) over byCategoryOrderedById).show()
*
* +---+--------+---+
* | id|category|sum|
@@ -169,7 +169,7 @@ class WindowSpec private[sql](
* @param start boundary start, inclusive. The frame is unbounded if this is
* the minimum long value (`Window.unboundedPreceding`).
* @param end boundary end, inclusive. The frame is unbounded if this is the
- * maximum long value (`Window.unboundedFollowing`).
+ * maximum long value (`Window.unboundedFollowing`).
* @since 1.4.0
*/
// Note: when updating the doc for this method, also update Window.rangeBetween.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 0f9203065e..f07e043683 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -2968,7 +2968,7 @@ object functions {
*
* @param e a string column containing JSON data.
* @param schema the schema to use when parsing the json string
- * @param options options to control how the json is parsed. accepts the same options and the
+ * @param options options to control how the json is parsed. Accepts the same options as the
* json data source.
*
* @group collection_funcs
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 8048c2ba2c..2f3dfa05e9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.hive.client.HiveClient
import org.apache.spark.sql.internal.{BaseSessionStateBuilder, SessionResourceLoader, SessionState}
/**
- * Builder that produces a Hive aware [[SessionState]].
+ * Builder that produces a Hive-aware `SessionState`.
*/
@Experimental
@InterfaceStability.Unstable
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
index 8e1a090618..639ac6de4f 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala
@@ -66,7 +66,7 @@ private[streaming] class InputInfoTracker(ssc: StreamingContext) extends Logging
new mutable.HashMap[Int, StreamInputInfo]())
if (inputInfos.contains(inputInfo.inputStreamId)) {
- throw new IllegalStateException(s"Input stream ${inputInfo.inputStreamId} for batch" +
+ throw new IllegalStateException(s"Input stream ${inputInfo.inputStreamId} for batch " +
s"$batchTime is already added into InputInfoTracker, this is an illegal state")
}
inputInfos += ((inputInfo.inputStreamId, inputInfo))