[MINOR] Fix Typos 'an -> a'

## What changes were proposed in this pull request? `an -> a` Use cmds like `find . -name '*.R' | xargs -i sh -c "grep -in ' an [^aeiou]' {} && echo {}"` to generate candidates, and review them one by one. ## How was this patch tested? manual tests Author: Zheng RuiFeng <ruifengz@foxmail.com> Closes #13515 from zhengruifeng/an_a.
author: Zheng RuiFeng <ruifengz@foxmail.com> 2016-06-06 09:35:47 +0100
committer: Sean Owen <sowen@cloudera.com> 2016-06-06 09:35:47 +0100
commit: fd8af397132fa1415a4c19d7f5cb5a41aa6ddb27 (patch)
tree: a653b3542d0671c8cb8b3ff7fa3755525c0606a4 /python
parent: 32f2f95dbdfb21491e46d4b608fd4e8ac7ab8973 (diff)
download: spark-fd8af397132fa1415a4c19d7f5cb5a41aa6ddb27.tar.gz
spark-fd8af397132fa1415a4c19d7f5cb5a41aa6ddb27.tar.bz2
spark-fd8af397132fa1415a4c19d7f5cb5a41aa6ddb27.zip
9 files changed, 12 insertions, 12 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 7710fdf2e2..77badebeb4 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -351,7 +351,7 @@ class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
     def roc(self):
         """
         Returns the receiver operating characteristic (ROC) curve,
-        which is an Dataframe having two fields (FPR, TPR) with
+        which is a Dataframe having two fields (FPR, TPR) with
         (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
 
         .. seealso:: `Wikipedia reference \
@@ -380,7 +380,7 @@ class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
     @since("2.0.0")
     def pr(self):
         """
-        Returns the precision-recall curve, which is an Dataframe
+        Returns the precision-recall curve, which is a Dataframe
         containing two fields recall, precision with (0.0, 1.0) prepended
         to it.
 
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index eb6d3310d6..0777527134 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -42,7 +42,7 @@ class Pipeline(Estimator, MLReadable, MLWritable):
     stage. If a stage is a :py:class:`Transformer`, its
     :py:meth:`Transformer.transform` method will be called to produce
     the dataset for the next stage. The fitted model from a
-    :py:class:`Pipeline` is an :py:class:`PipelineModel`, which
+    :py:class:`Pipeline` is a :py:class:`PipelineModel`, which
     consists of fitted models and transformers, corresponding to the
     pipeline stages. If there are no stages, the pipeline acts as an
     identity transformer.
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index f186217031..3734f87405 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -52,7 +52,7 @@ class LinearClassificationModel(LinearModel):
 
         Sets the threshold that separates positive predictions from
         negative predictions. An example with prediction score greater
-        than or equal to this threshold is identified as an positive,
+        than or equal to this threshold is identified as a positive,
         and negative otherwise. It is used for binary classification
         only.
         """
diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
index 6bc2b1e646..31afdf576b 100644
--- a/python/pyspark/mllib/common.py
+++ b/python/pyspark/mllib/common.py
@@ -60,7 +60,7 @@ _picklable_classes = [
 
 # this will call the MLlib version of pythonToJava()
 def _to_java_object_rdd(rdd):
-    """ Return an JavaRDD of Object by unpickling
+    """ Return a JavaRDD of Object by unpickling
 
     It will convert each Python object into Java object by Pyrolite, whenever the
     RDD is serialized in batch or not.
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 411e377a56..6afe769662 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1040,7 +1040,7 @@ class RDD(object):
         If the elements in RDD do not vary (max == min) always returns
         a single bucket.
 
-        It will return an tuple of buckets and histogram.
+        It will return a tuple of buckets and histogram.
 
         >>> rdd = sc.parallelize(range(51))
         >>> rdd.histogram(2)
@@ -2211,7 +2211,7 @@ class RDD(object):
         return values.collect()
 
     def _to_java_object_rdd(self):
-        """ Return an JavaRDD of Object by unpickling
+        """ Return a JavaRDD of Object by unpickling
 
         It will convert each Python object into Java object by Pyrolite, whenever the
         RDD is serialized in batch or not.
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 8f7dcb54a7..7c9f532f94 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -360,7 +360,7 @@ class SparkSession(object):
 
     def _createFromLocal(self, data, schema):
         """
-        Create an RDD for DataFrame from an list or pandas.DataFrame, returns
+        Create an RDD for DataFrame from a list or pandas.DataFrame, returns
         the RDD and schema.
         """
         # make sure data could consumed multiple times
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index cd75622ced..580aba651f 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -72,7 +72,7 @@ class ContinuousQuery(object):
 
     @since(2.0)
     def processAllAvailable(self):
-        """Blocks until all available data in the source has been processed an committed to the
+        """Blocks until all available data in the source has been processed and committed to the
         sink. This method is intended for testing. Note that in the case of continually arriving
         data, this method may block forever. Additionally, this method is only guaranteed to block
         until data that has been synchronously appended data to a stream source prior to invocation.
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 7d8d0230b4..bb2b95404a 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1046,7 +1046,7 @@ def _need_converter(dataType):
 
 
 def _create_converter(dataType):
-    """Create an converter to drop the names of fields in obj """
+    """Create a converter to drop the names of fields in obj """
     if not _need_converter(dataType):
         return lambda x: x
 
diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py
index fb6c66f2df..59977dcb43 100644
--- a/python/pyspark/streaming/dstream.py
+++ b/python/pyspark/streaming/dstream.py
@@ -608,8 +608,8 @@ class DStream(object):
 
 class TransformedDStream(DStream):
     """
-    TransformedDStream is an DStream generated by an Python function
-    transforming each RDD of an DStream to another RDDs.
+    TransformedDStream is a DStream generated by an Python function
+    transforming each RDD of a DStream to another RDDs.
 
     Multiple continuous transformations of DStream can be combined into
     one transformation.
author	Zheng RuiFeng <ruifengz@foxmail.com>	2016-06-06 09:35:47 +0100
committer	Sean Owen <sowen@cloudera.com>	2016-06-06 09:35:47 +0100
commit	fd8af397132fa1415a4c19d7f5cb5a41aa6ddb27 (patch)
tree	a653b3542d0671c8cb8b3ff7fa3755525c0606a4 /python
parent	32f2f95dbdfb21491e46d4b608fd4e8ac7ab8973 (diff)
download	spark-fd8af397132fa1415a4c19d7f5cb5a41aa6ddb27.tar.gz spark-fd8af397132fa1415a4c19d7f5cb5a41aa6ddb27.tar.bz2 spark-fd8af397132fa1415a4c19d7f5cb5a41aa6ddb27.zip