aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@eecs.berkeley.edu>2013-01-09 21:21:23 -0800
committerJosh Rosen <joshrosen@eecs.berkeley.edu>2013-01-09 21:21:23 -0800
commitd55f2b98822faa7d71f5fce2bfa980f8265e0610 (patch)
tree60600cbc73ed0b676e394fb9bc442b0da0461aa2 /python
parent1a64432ba50904c3933d8a9539a619fc94b3b30b (diff)
downloadspark-d55f2b98822faa7d71f5fce2bfa980f8265e0610.tar.gz
spark-d55f2b98822faa7d71f5fce2bfa980f8265e0610.tar.bz2
spark-d55f2b98822faa7d71f5fce2bfa980f8265e0610.zip
Use take() instead of takeSample() in PySpark kmeans example.
This is a temporary change until we port takeSample().
Diffstat (limited to 'python')
-rw-r--r--python/examples/kmeans.py4
1 files changed, 3 insertions, 1 deletions
diff --git a/python/examples/kmeans.py b/python/examples/kmeans.py
index ad2be21178..72cf9f88c6 100644
--- a/python/examples/kmeans.py
+++ b/python/examples/kmeans.py
@@ -33,7 +33,9 @@ if __name__ == "__main__":
K = int(sys.argv[3])
convergeDist = float(sys.argv[4])
- kPoints = data.takeSample(False, K, 34)
+ # TODO: change this after we port takeSample()
+ #kPoints = data.takeSample(False, K, 34)
+ kPoints = data.take(K)
tempDist = 1.0
while tempDist > convergeDist: