[MINOR][DOCS] Remove consecutive duplicated words/typo in Spark Repo

## What changes were proposed in this pull request? There are many locations in the Spark repo where the same word occurs consecutively. Sometimes they are appropriately placed, but many times they are not. This PR removes the inappropriately duplicated words. ## How was this patch tested? N/A since only docs or comments were updated. Author: Niranjan Padmanabhan <niranjan.padmanabhan@gmail.com> Closes #16455 from neurons/np.structure_streaming_doc.
author: Niranjan Padmanabhan <niranjan.padmanabhan@gmail.com> 2017-01-04 15:07:29 +0000
committer: Sean Owen <sowen@cloudera.com> 2017-01-04 15:07:29 +0000
commit: a1e40b1f5d651305bbd0ba05779263a44f607498 (patch)
tree: f70fcf889a0c6f366bc44f5d012ec7f3e91ffbcc /sql/core
parent: 7a82505817d479007adff6424473063d2003fcc1 (diff)
download: spark-a1e40b1f5d651305bbd0ba05779263a44f607498.tar.gz
spark-a1e40b1f5d651305bbd0ba05779263a44f607498.tar.bz2
spark-a1e40b1f5d651305bbd0ba05779263a44f607498.zip
6 files changed, 7 insertions, 7 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index c1cedd8541..2a06f3c47c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -361,7 +361,7 @@ class Dataset[T] private[sql](
    * method used to map columns depend on the type of `U`:
    *  - When `U` is a class, fields for the class will be mapped to columns of the same name
    *    (case sensitivity is determined by `spark.sql.caseSensitive`).
-   *  - When `U` is a tuple, the columns will be be mapped by ordinal (i.e. the first column will
+   *  - When `U` is a tuple, the columns will be mapped by ordinal (i.e. the first column will
    *    be assigned to `_1`).
    *  - When `U` is a primitive type (i.e. String, Int, etc), then the first column of the
    *    `DataFrame` will be used.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index bc290702dc..bad59961ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -41,7 +41,7 @@ object PartitionPath {
 }
 
 /**
- * Holds a directory in a partitioned collection of files as well as as the partition values
+ * Holds a directory in a partitioned collection of files as well as the partition values
  * in the form of a Row.  Before scanning, the files at `path` need to be enumerated.
  */
 case class PartitionPath(values: InternalRow, path: Path)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 0ce47b152c..0b39965b31 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -285,7 +285,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
 
   /**
    * Starts the execution of the streaming query, which will continually send results to the given
-   * `ForeachWriter` as as new data arrives. The `ForeachWriter` can be used to send the data
+   * `ForeachWriter` as new data arrives. The `ForeachWriter` can be used to send the data
    * generated by the `DataFrame`/`Dataset` to an external system.
    *
    * Scala example:
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
index 4296ec543e..22d5c47a6f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
@@ -257,7 +257,7 @@ class DataFrameTimeWindowingSuite extends QueryTest with SharedSQLContext with B
     }
   }
 
-  test("time window in SQL with with two expressions") {
+  test("time window in SQL with two expressions") {
     withTempTable { table =>
       checkAnswer(
         spark.sql(
@@ -272,7 +272,7 @@ class DataFrameTimeWindowingSuite extends QueryTest with SharedSQLContext with B
     }
   }
 
-  test("time window in SQL with with three expressions") {
+  test("time window in SQL with three expressions") {
     withTempTable { table =>
       checkAnswer(
         spark.sql(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 375da224aa..0bfc92fdb6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -363,7 +363,7 @@ class PlannerSuite extends SharedSQLContext {
   // This is a regression test for SPARK-9703
   test("EnsureRequirements should not repartition if only ordering requirement is unsatisfied") {
     // Consider an operator that imposes both output distribution and  ordering requirements on its
-    // children, such as sort sort merge join. If the distribution requirements are satisfied but
+    // children, such as sort merge join. If the distribution requirements are satisfied but
     // the output ordering requirements are unsatisfied, then the planner should only add sorts and
     // should not need to add additional shuffles / exchanges.
     val outputOrdering = Seq(SortOrder(Literal(1), Ascending))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 22f59f63d6..f67444fbc4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -144,7 +144,7 @@ class FileStreamSinkSuite extends StreamTest {
   }
 
   // This tests whether FileStreamSink works with aggregations. Specifically, it tests
-  // whether the the correct streaming QueryExecution (i.e. IncrementalExecution) is used to
+  // whether the correct streaming QueryExecution (i.e. IncrementalExecution) is used to
   // to execute the trigger for writing data to file sink. See SPARK-18440 for more details.
   test("writing with aggregation") {
author	Niranjan Padmanabhan <niranjan.padmanabhan@gmail.com>	2017-01-04 15:07:29 +0000
committer	Sean Owen <sowen@cloudera.com>	2017-01-04 15:07:29 +0000
commit	a1e40b1f5d651305bbd0ba05779263a44f607498 (patch)
tree	f70fcf889a0c6f366bc44f5d012ec7f3e91ffbcc /sql/core
parent	7a82505817d479007adff6424473063d2003fcc1 (diff)
download	spark-a1e40b1f5d651305bbd0ba05779263a44f607498.tar.gz spark-a1e40b1f5d651305bbd0ba05779263a44f607498.tar.bz2 spark-a1e40b1f5d651305bbd0ba05779263a44f607498.zip