aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJacek Laskowski <jacek@japila.pl>2016-04-26 11:51:12 +0100
committerSean Owen <sowen@cloudera.com>2016-04-26 11:51:12 +0100
commitb208229ba1768ee72eae7b6b0da0ac9ccff3be62 (patch)
tree91f9b31a53a04b246695f54b64a2142f34ad7dd7
parentde6e633420aba1fe5d806a2725a95e610699ae7d (diff)
downloadspark-b208229ba1768ee72eae7b6b0da0ac9ccff3be62.tar.gz
spark-b208229ba1768ee72eae7b6b0da0ac9ccff3be62.tar.bz2
spark-b208229ba1768ee72eae7b6b0da0ac9ccff3be62.zip
[MINOR][DOCS] Minor typo fixes
## What changes were proposed in this pull request? Minor typo fixes (too minor to deserve separate a JIRA) ## How was this patch tested? local build Author: Jacek Laskowski <jacek@japila.pl> Closes #12469 from jaceklaskowski/minor-typo-fixes.
-rw-r--r--graphx/src/main/scala/org/apache/spark/graphx/Graph.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala3
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala2
-rw-r--r--sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala4
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala4
10 files changed, 14 insertions, 15 deletions
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 5485e30f5a..922ec7955f 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -365,7 +365,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
*
* @note By expressing computation at the edge level we achieve
* maximum parallelism. This is one of the core functions in the
- * Graph API in that enables neighborhood level computation. For
+ * Graph API that enables neighborhood level computation. For
* example this function can be used to count neighbors satisfying a
* predicate or implement PageRank.
*
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
index 1219d4d453..726291b96c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala
@@ -338,7 +338,7 @@ trait Row extends Serializable {
* Returns the index of a given field name.
*
* @throws UnsupportedOperationException when schema is not defined.
- * @throws IllegalArgumentException when fieldName do not exist.
+ * @throws IllegalArgumentException when a field `name` does not exist.
*/
def fieldIndex(name: String): Int = {
throw new UnsupportedOperationException("fieldIndex on a Row without schema is undefined.")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index b3064fd531..4c2bf12ac9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -458,8 +458,8 @@ class Dataset[T] protected[sql](
* Returns true if this [[Dataset]] contains one or more sources that continuously
* return data as it arrives. A [[Dataset]] that reads data from a streaming source
* must be executed as a [[ContinuousQuery]] using the `startStream()` method in
- * [[DataFrameWriter]]. Methods that return a single answer, (e.g., `count()` or
- * `collect()`) will throw an [[AnalysisException]] when there is a streaming
+ * [[DataFrameWriter]]. Methods that return a single answer, e.g. `count()` or
+ * `collect()`, will throw an [[AnalysisException]] when there is a streaming
* source present.
*
* @group basic
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index ef626ef5fc..8e72e06b1f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -143,7 +143,7 @@ case class DataSource(
caseInsensitiveOptions,
fileCatalog.allFiles())
}.getOrElse {
- throw new AnalysisException("Unable to infer schema. It must be specified manually.")
+ throw new AnalysisException("Unable to infer schema. It must be specified manually.")
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index ddba3ccb1b..99bf20c746 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -46,8 +46,7 @@ import org.apache.spark.sql.SparkSession
* files in a directory always shows the latest files.
*/
class HDFSMetadataLog[T: ClassTag](sparkSession: SparkSession, path: String)
- extends MetadataLog[T]
- with Logging {
+ extends MetadataLog[T] with Logging {
import HDFSMetadataLog._
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
index 6457f928ed..1d2f7a8753 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala
@@ -34,8 +34,8 @@ trait Source {
def getOffset: Option[Offset]
/**
- * Returns the data that is is between the offsets (`start`, `end`]. When `start` is `None` then
- * the batch should begin with the first available record. This method must always return the
+ * Returns the data that is between the offsets (`start`, `end`]. When `start` is `None` then
+ * the batch should begin with the first available record. This method must always return the
* same data for a particular `start` and `end` pair.
*/
def getBatch(start: Option[Offset], end: Offset): DataFrame
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index ea3c73d984..fc18e5f065 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -75,7 +75,7 @@ class StreamExecution(
/** The current batchId or -1 if execution has not yet been initialized. */
private var currentBatchId: Long = -1
- /** All stream sources present the query plan. */
+ /** All stream sources present in the query plan. */
private val sources =
logicalPlan.collect { case s: StreamingExecutionRelation => s.source }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
index 3341580fc4..4d65d2f4f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
@@ -33,7 +33,7 @@ object StreamingRelation {
* [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]]. This is only used for creating
* a streaming [[org.apache.spark.sql.DataFrame]] from [[org.apache.spark.sql.DataFrameReader]].
* It should be used to create [[Source]] and converted to [[StreamingExecutionRelation]] when
- * passing to [StreamExecution]] to run a query.
+ * passing to [[StreamExecution]] to run a query.
*/
case class StreamingRelation(dataSource: DataSource, sourceName: String, output: Seq[Attribute])
extends LeafNode {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 8ff82a3c74..6b1ecd08c1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -204,7 +204,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest with SharedSQLContext {
createFileStreamSourceAndGetSchema(
format = Some("parquet"), path = Some(new File(src, "1").getCanonicalPath), schema = None)
}
- assert("Unable to infer schema. It must be specified manually.;" === e.getMessage)
+ assert("Unable to infer schema. It must be specified manually.;" === e.getMessage)
}
}
@@ -235,7 +235,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest with SharedSQLContext {
createFileStreamSourceAndGetSchema(
format = Some("json"), path = Some(src.getCanonicalPath), schema = None)
}
- assert("Unable to infer schema. It must be specified manually.;" === e.getMessage)
+ assert("Unable to infer schema. It must be specified manually.;" === e.getMessage)
}
}
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
index d6ff96e1fc..b6394e36b5 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala
@@ -418,7 +418,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)])
/**
* Return a new "state" DStream where the state for each key is updated by applying
* the given function on the previous state of the key and the new values of the key.
- * org.apache.spark.Partitioner is used to control the partitioning of each RDD.
+ * [[org.apache.spark.Partitioner]] is used to control the partitioning of each RDD.
* @param updateFunc State update function. If `this` function returns None, then
* corresponding state key-value pair will be eliminated.
* @param partitioner Partitioner for controlling the partitioning of each RDD in the new
@@ -439,7 +439,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)])
/**
* Return a new "state" DStream where the state for each key is updated by applying
* the given function on the previous state of the key and the new values of each key.
- * org.apache.spark.Partitioner is used to control the partitioning of each RDD.
+ * [[org.apache.spark.Partitioner]] is used to control the partitioning of each RDD.
* @param updateFunc State update function. Note, that this function may generate a different
* tuple with a different key than the input key. Therefore keys may be removed
* or added in this way. It is up to the developer to decide whether to