aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml
diff options
context:
space:
mode:
authorNiranjan Padmanabhan <niranjan.padmanabhan@gmail.com>2017-01-04 15:07:29 +0000
committerSean Owen <sowen@cloudera.com>2017-01-04 15:07:29 +0000
commita1e40b1f5d651305bbd0ba05779263a44f607498 (patch)
treef70fcf889a0c6f366bc44f5d012ec7f3e91ffbcc /python/pyspark/ml
parent7a82505817d479007adff6424473063d2003fcc1 (diff)
downloadspark-a1e40b1f5d651305bbd0ba05779263a44f607498.tar.gz
spark-a1e40b1f5d651305bbd0ba05779263a44f607498.tar.bz2
spark-a1e40b1f5d651305bbd0ba05779263a44f607498.zip
[MINOR][DOCS] Remove consecutive duplicated words/typo in Spark Repo
## What changes were proposed in this pull request? There are many locations in the Spark repo where the same word occurs consecutively. Sometimes they are appropriately placed, but many times they are not. This PR removes the inappropriately duplicated words. ## How was this patch tested? N/A since only docs or comments were updated. Author: Niranjan Padmanabhan <niranjan.padmanabhan@gmail.com> Closes #16455 from neurons/np.structure_streaming_doc.
Diffstat (limited to 'python/pyspark/ml')
-rw-r--r--python/pyspark/ml/clustering.py2
-rw-r--r--python/pyspark/ml/linalg/__init__.py4
2 files changed, 3 insertions, 3 deletions
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 35d0aefa04..54510e0bea 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -699,7 +699,7 @@ class LDAModel(JavaModel):
@since("2.0.0")
def logPerplexity(self, dataset):
"""
- Calculate an upper bound bound on perplexity. (Lower is better.)
+ Calculate an upper bound on perplexity. (Lower is better.)
See Equation (16) in the Online LDA paper (Hoffman et al., 2010).
WARNING: If this model is an instance of :py:class:`DistributedLDAModel` (produced when
diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py
index 1705c156ce..b765343251 100644
--- a/python/pyspark/ml/linalg/__init__.py
+++ b/python/pyspark/ml/linalg/__init__.py
@@ -481,7 +481,7 @@ class SparseVector(Vector):
>>> SparseVector(4, {1:1.0, 6:2.0})
Traceback (most recent call last):
...
- AssertionError: Index 6 is out of the the size of vector with size=4
+ AssertionError: Index 6 is out of the size of vector with size=4
>>> SparseVector(4, {-1:1.0})
Traceback (most recent call last):
...
@@ -521,7 +521,7 @@ class SparseVector(Vector):
if self.indices.size > 0:
assert np.max(self.indices) < self.size, \
- "Index %d is out of the the size of vector with size=%d" \
+ "Index %d is out of the size of vector with size=%d" \
% (np.max(self.indices), self.size)
assert np.min(self.indices) >= 0, \
"Contains negative index %d" % (np.min(self.indices))