diff options
Diffstat (limited to 'python/examples/kmeans.py')
-rwxr-xr-x | python/examples/kmeans.py | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/python/examples/kmeans.py b/python/examples/kmeans.py index ba31af92fc..d8387b0b18 100755 --- a/python/examples/kmeans.py +++ b/python/examples/kmeans.py @@ -16,8 +16,13 @@ # """ -This example requires numpy (http://www.numpy.org/) +The K-means algorithm written from scratch against PySpark. In practice, +one may prefer to use the KMeans algorithm in MLlib, as shown in +python/examples/mllib/kmeans.py. + +This example requires NumPy (http://www.numpy.org/). """ + import sys import numpy as np @@ -49,9 +54,7 @@ if __name__ == "__main__": K = int(sys.argv[3]) convergeDist = float(sys.argv[4]) - # TODO: change this after we port takeSample() - #kPoints = data.takeSample(False, K, 34) - kPoints = data.take(K) + kPoints = data.takeSample(False, K, 1) tempDist = 1.0 while tempDist > convergeDist: |