1 files changed, 6 insertions, 5 deletions
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index 65ed75b82e..50a8671560 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -48,14 +48,15 @@ optimal *k* is usually one where there is an "elbow" in the WSSSE graph.
 
 {% highlight scala %}
 import org.apache.spark.mllib.clustering.KMeans
+import org.apache.spark.mllib.linalg.Vectors
 
 // Load and parse the data
-val data = sc.textFile("kmeans_data.txt")
-val parsedData = data.map( _.split(' ').map(_.toDouble))
+val data = sc.textFile("data/kmeans_data.txt")
+val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble)))
 
 // Cluster the data into two classes using KMeans
-val numIterations = 20
 val numClusters = 2
+val numIterations = 20
 val clusters = KMeans.train(parsedData, numClusters, numIterations)
 
 // Evaluate clustering by computing Within Set Sum of Squared Errors
@@ -85,12 +86,12 @@ from numpy import array
 from math import sqrt
 
 # Load and parse the data
-data = sc.textFile("kmeans_data.txt")
+data = sc.textFile("data/kmeans_data.txt")
 parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')]))
 
 # Build the model (cluster the data)
 clusters = KMeans.train(parsedData, 2, maxIterations=10,
-        runs=30, initialization_mode="random")
+        runs=10, initialization_mode="random")
 
 # Evaluate clustering by computing Within Set Sum of Squared Errors
 def error(point):