aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorZheng RuiFeng <ruifengz@foxmail.com>2016-06-06 09:35:47 +0100
committerSean Owen <sowen@cloudera.com>2016-06-06 09:35:47 +0100
commitfd8af397132fa1415a4c19d7f5cb5a41aa6ddb27 (patch)
treea653b3542d0671c8cb8b3ff7fa3755525c0606a4 /python
parent32f2f95dbdfb21491e46d4b608fd4e8ac7ab8973 (diff)
downloadspark-fd8af397132fa1415a4c19d7f5cb5a41aa6ddb27.tar.gz
spark-fd8af397132fa1415a4c19d7f5cb5a41aa6ddb27.tar.bz2
spark-fd8af397132fa1415a4c19d7f5cb5a41aa6ddb27.zip
[MINOR] Fix Typos 'an -> a'
## What changes were proposed in this pull request? `an -> a` Use cmds like `find . -name '*.R' | xargs -i sh -c "grep -in ' an [^aeiou]' {} && echo {}"` to generate candidates, and review them one by one. ## How was this patch tested? manual tests Author: Zheng RuiFeng <ruifengz@foxmail.com> Closes #13515 from zhengruifeng/an_a.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/ml/classification.py4
-rw-r--r--python/pyspark/ml/pipeline.py2
-rw-r--r--python/pyspark/mllib/classification.py2
-rw-r--r--python/pyspark/mllib/common.py2
-rw-r--r--python/pyspark/rdd.py4
-rw-r--r--python/pyspark/sql/session.py2
-rw-r--r--python/pyspark/sql/streaming.py2
-rw-r--r--python/pyspark/sql/types.py2
-rw-r--r--python/pyspark/streaming/dstream.py4
9 files changed, 12 insertions, 12 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 7710fdf2e2..77badebeb4 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -351,7 +351,7 @@ class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
def roc(self):
"""
Returns the receiver operating characteristic (ROC) curve,
- which is an Dataframe having two fields (FPR, TPR) with
+ which is a Dataframe having two fields (FPR, TPR) with
(0.0, 0.0) prepended and (1.0, 1.0) appended to it.
.. seealso:: `Wikipedia reference \
@@ -380,7 +380,7 @@ class BinaryLogisticRegressionSummary(LogisticRegressionSummary):
@since("2.0.0")
def pr(self):
"""
- Returns the precision-recall curve, which is an Dataframe
+ Returns the precision-recall curve, which is a Dataframe
containing two fields recall, precision with (0.0, 1.0) prepended
to it.
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index eb6d3310d6..0777527134 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -42,7 +42,7 @@ class Pipeline(Estimator, MLReadable, MLWritable):
stage. If a stage is a :py:class:`Transformer`, its
:py:meth:`Transformer.transform` method will be called to produce
the dataset for the next stage. The fitted model from a
- :py:class:`Pipeline` is an :py:class:`PipelineModel`, which
+ :py:class:`Pipeline` is a :py:class:`PipelineModel`, which
consists of fitted models and transformers, corresponding to the
pipeline stages. If there are no stages, the pipeline acts as an
identity transformer.
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index f186217031..3734f87405 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -52,7 +52,7 @@ class LinearClassificationModel(LinearModel):
Sets the threshold that separates positive predictions from
negative predictions. An example with prediction score greater
- than or equal to this threshold is identified as an positive,
+ than or equal to this threshold is identified as a positive,
and negative otherwise. It is used for binary classification
only.
"""
diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
index 6bc2b1e646..31afdf576b 100644
--- a/python/pyspark/mllib/common.py
+++ b/python/pyspark/mllib/common.py
@@ -60,7 +60,7 @@ _picklable_classes = [
# this will call the MLlib version of pythonToJava()
def _to_java_object_rdd(rdd):
- """ Return an JavaRDD of Object by unpickling
+ """ Return a JavaRDD of Object by unpickling
It will convert each Python object into Java object by Pyrolite, whenever the
RDD is serialized in batch or not.
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 411e377a56..6afe769662 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1040,7 +1040,7 @@ class RDD(object):
If the elements in RDD do not vary (max == min) always returns
a single bucket.
- It will return an tuple of buckets and histogram.
+ It will return a tuple of buckets and histogram.
>>> rdd = sc.parallelize(range(51))
>>> rdd.histogram(2)
@@ -2211,7 +2211,7 @@ class RDD(object):
return values.collect()
def _to_java_object_rdd(self):
- """ Return an JavaRDD of Object by unpickling
+ """ Return a JavaRDD of Object by unpickling
It will convert each Python object into Java object by Pyrolite, whenever the
RDD is serialized in batch or not.
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 8f7dcb54a7..7c9f532f94 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -360,7 +360,7 @@ class SparkSession(object):
def _createFromLocal(self, data, schema):
"""
- Create an RDD for DataFrame from an list or pandas.DataFrame, returns
+ Create an RDD for DataFrame from a list or pandas.DataFrame, returns
the RDD and schema.
"""
# make sure data could consumed multiple times
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index cd75622ced..580aba651f 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -72,7 +72,7 @@ class ContinuousQuery(object):
@since(2.0)
def processAllAvailable(self):
- """Blocks until all available data in the source has been processed an committed to the
+ """Blocks until all available data in the source has been processed and committed to the
sink. This method is intended for testing. Note that in the case of continually arriving
data, this method may block forever. Additionally, this method is only guaranteed to block
until data that has been synchronously appended data to a stream source prior to invocation.
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 7d8d0230b4..bb2b95404a 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -1046,7 +1046,7 @@ def _need_converter(dataType):
def _create_converter(dataType):
- """Create an converter to drop the names of fields in obj """
+ """Create a converter to drop the names of fields in obj """
if not _need_converter(dataType):
return lambda x: x
diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py
index fb6c66f2df..59977dcb43 100644
--- a/python/pyspark/streaming/dstream.py
+++ b/python/pyspark/streaming/dstream.py
@@ -608,8 +608,8 @@ class DStream(object):
class TransformedDStream(DStream):
"""
- TransformedDStream is an DStream generated by an Python function
- transforming each RDD of an DStream to another RDDs.
+ TransformedDStream is a DStream generated by an Python function
+ transforming each RDD of a DStream to another RDDs.
Multiple continuous transformations of DStream can be combined into
one transformation.