From 635888cbed0e3f4127252fb84db449f0cc9ed659 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Sun, 13 Jul 2014 19:27:43 -0700 Subject: SPARK-2363. Clean MLlib's sample data files (Just made a PR for this, mengxr was the reporter of:) MLlib has sample data under serveral folders: 1) data/mllib 2) data/ 3) mllib/data/* Per previous discussion with Matei Zaharia, we want to put them under `data/mllib` and clean outdated files. Author: Sean Owen Closes #1394 from srowen/SPARK-2363 and squashes the following commits: 54313dd [Sean Owen] Move ML example data from /mllib/data/ and /data/ into /data/mllib/ --- docs/mllib-clustering.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'docs/mllib-clustering.md') diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md index 429cdf8d40..c76ac010d3 100644 --- a/docs/mllib-clustering.md +++ b/docs/mllib-clustering.md @@ -51,7 +51,7 @@ import org.apache.spark.mllib.clustering.KMeans import org.apache.spark.mllib.linalg.Vectors // Load and parse the data -val data = sc.textFile("data/kmeans_data.txt") +val data = sc.textFile("data/mllib/kmeans_data.txt") val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))) // Cluster the data into two classes using KMeans @@ -86,7 +86,7 @@ from numpy import array from math import sqrt # Load and parse the data -data = sc.textFile("data/kmeans_data.txt") +data = sc.textFile("data/mllib/kmeans_data.txt") parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')])) # Build the model (cluster the data) -- cgit v1.2.3