aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/python/kmeans.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src/main/python/kmeans.py')
-rwxr-xr-xexamples/src/main/python/kmeans.py11
1 files changed, 6 insertions, 5 deletions
diff --git a/examples/src/main/python/kmeans.py b/examples/src/main/python/kmeans.py
index 86ef6f32c8..1939150646 100755
--- a/examples/src/main/python/kmeans.py
+++ b/examples/src/main/python/kmeans.py
@@ -22,6 +22,7 @@ examples/src/main/python/mllib/kmeans.py.
This example requires NumPy (http://www.numpy.org/).
"""
+from __future__ import print_function
import sys
@@ -47,12 +48,12 @@ def closestPoint(p, centers):
if __name__ == "__main__":
if len(sys.argv) != 4:
- print >> sys.stderr, "Usage: kmeans <file> <k> <convergeDist>"
+ print("Usage: kmeans <file> <k> <convergeDist>", file=sys.stderr)
exit(-1)
- print >> sys.stderr, """WARN: This is a naive implementation of KMeans Clustering and is given
+ print("""WARN: This is a naive implementation of KMeans Clustering and is given
as an example! Please refer to examples/src/main/python/mllib/kmeans.py for an example on
- how to use MLlib's KMeans implementation."""
+ how to use MLlib's KMeans implementation.""", file=sys.stderr)
sc = SparkContext(appName="PythonKMeans")
lines = sc.textFile(sys.argv[1])
@@ -69,13 +70,13 @@ if __name__ == "__main__":
pointStats = closest.reduceByKey(
lambda (x1, y1), (x2, y2): (x1 + x2, y1 + y2))
newPoints = pointStats.map(
- lambda (x, (y, z)): (x, y / z)).collect()
+ lambda xy: (xy[0], xy[1][0] / xy[1][1])).collect()
tempDist = sum(np.sum((kPoints[x] - y) ** 2) for (x, y) in newPoints)
for (x, y) in newPoints:
kPoints[x] = y
- print "Final centers: " + str(kPoints)
+ print("Final centers: " + str(kPoints))
sc.stop()