diff options
-rw-r--r-- | data/mllib/als/test.data (renamed from mllib/data/als/test.data) | 0 | ||||
-rw-r--r-- | data/mllib/kmeans_data.txt (renamed from data/kmeans_data.txt) | 0 | ||||
-rwxr-xr-x | data/mllib/lr-data/random.data (renamed from mllib/data/lr-data/random.data) | 0 | ||||
-rw-r--r-- | data/mllib/lr_data.txt (renamed from data/lr_data.txt) | 0 | ||||
-rw-r--r-- | data/mllib/pagerank_data.txt (renamed from data/pagerank_data.txt) | 0 | ||||
-rw-r--r-- | data/mllib/ridge-data/lpsa.data (renamed from mllib/data/ridge-data/lpsa.data) | 0 | ||||
-rw-r--r-- | data/mllib/sample_libsvm_data.txt (renamed from mllib/data/sample_libsvm_data.txt) | 0 | ||||
-rw-r--r-- | data/mllib/sample_naive_bayes_data.txt (renamed from mllib/data/sample_naive_bayes_data.txt) | 0 | ||||
-rw-r--r-- | data/mllib/sample_svm_data.txt (renamed from mllib/data/sample_svm_data.txt) | 0 | ||||
-rw-r--r-- | data/mllib/sample_tree_data.csv (renamed from mllib/data/sample_tree_data.csv) | 0 | ||||
-rw-r--r-- | docs/bagel-programming-guide.md | 2 | ||||
-rw-r--r-- | docs/mllib-basics.md | 6 | ||||
-rw-r--r-- | docs/mllib-clustering.md | 4 | ||||
-rw-r--r-- | docs/mllib-collaborative-filtering.md | 4 | ||||
-rw-r--r-- | docs/mllib-decision-tree.md | 4 | ||||
-rw-r--r-- | docs/mllib-linear-methods.md | 8 | ||||
-rw-r--r-- | docs/mllib-naive-bayes.md | 2 | ||||
-rw-r--r-- | docs/mllib-optimization.md | 2 |
18 files changed, 16 insertions, 16 deletions
diff --git a/mllib/data/als/test.data b/data/mllib/als/test.data index e476cc23e0..e476cc23e0 100644 --- a/mllib/data/als/test.data +++ b/data/mllib/als/test.data diff --git a/data/kmeans_data.txt b/data/mllib/kmeans_data.txt index 338664f78d..338664f78d 100644 --- a/data/kmeans_data.txt +++ b/data/mllib/kmeans_data.txt diff --git a/mllib/data/lr-data/random.data b/data/mllib/lr-data/random.data index 29bcb8acba..29bcb8acba 100755 --- a/mllib/data/lr-data/random.data +++ b/data/mllib/lr-data/random.data diff --git a/data/lr_data.txt b/data/mllib/lr_data.txt index d4df0634e0..d4df0634e0 100644 --- a/data/lr_data.txt +++ b/data/mllib/lr_data.txt diff --git a/data/pagerank_data.txt b/data/mllib/pagerank_data.txt index 95755ab8f5..95755ab8f5 100644 --- a/data/pagerank_data.txt +++ b/data/mllib/pagerank_data.txt diff --git a/mllib/data/ridge-data/lpsa.data b/data/mllib/ridge-data/lpsa.data index fdd16e36b4..fdd16e36b4 100644 --- a/mllib/data/ridge-data/lpsa.data +++ b/data/mllib/ridge-data/lpsa.data diff --git a/mllib/data/sample_libsvm_data.txt b/data/mllib/sample_libsvm_data.txt index 861c70cde7..861c70cde7 100644 --- a/mllib/data/sample_libsvm_data.txt +++ b/data/mllib/sample_libsvm_data.txt diff --git a/mllib/data/sample_naive_bayes_data.txt b/data/mllib/sample_naive_bayes_data.txt index 981da382d6..981da382d6 100644 --- a/mllib/data/sample_naive_bayes_data.txt +++ b/data/mllib/sample_naive_bayes_data.txt diff --git a/mllib/data/sample_svm_data.txt b/data/mllib/sample_svm_data.txt index 7ab30bd93c..7ab30bd93c 100644 --- a/mllib/data/sample_svm_data.txt +++ b/data/mllib/sample_svm_data.txt diff --git a/mllib/data/sample_tree_data.csv b/data/mllib/sample_tree_data.csv index bc97e2941a..bc97e2941a 100644 --- a/mllib/data/sample_tree_data.csv +++ b/data/mllib/sample_tree_data.csv diff --git a/docs/bagel-programming-guide.md b/docs/bagel-programming-guide.md index b280df0c8e..7e55131754 100644 --- a/docs/bagel-programming-guide.md +++ b/docs/bagel-programming-guide.md @@ -46,7 +46,7 @@ import org.apache.spark.bagel.Bagel._ Next, we load a sample graph from a text file as a distributed dataset and package it into `PRVertex` objects. We also cache the distributed dataset because Bagel will use it multiple times and we'd like to avoid recomputing it. {% highlight scala %} -val input = sc.textFile("data/pagerank_data.txt") +val input = sc.textFile("data/mllib/pagerank_data.txt") val numVerts = input.count() diff --git a/docs/mllib-basics.md b/docs/mllib-basics.md index 5796e16e8f..f9585251fa 100644 --- a/docs/mllib-basics.md +++ b/docs/mllib-basics.md @@ -193,7 +193,7 @@ import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.util.MLUtils import org.apache.spark.rdd.RDD -val examples: RDD[LabeledPoint] = MLUtils.loadLibSVMFile(sc, "mllib/data/sample_libsvm_data.txt") +val examples: RDD[LabeledPoint] = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") {% endhighlight %} </div> @@ -207,7 +207,7 @@ import org.apache.spark.mllib.util.MLUtils; import org.apache.spark.api.java.JavaRDD; JavaRDD<LabeledPoint> examples = - MLUtils.loadLibSVMFile(jsc.sc(), "mllib/data/sample_libsvm_data.txt").toJavaRDD(); + MLUtils.loadLibSVMFile(jsc.sc(), "data/mllib/sample_libsvm_data.txt").toJavaRDD(); {% endhighlight %} </div> @@ -218,7 +218,7 @@ examples stored in LIBSVM format. {% highlight python %} from pyspark.mllib.util import MLUtils -examples = MLUtils.loadLibSVMFile(sc, "mllib/data/sample_libsvm_data.txt") +examples = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") {% endhighlight %} </div> </div> diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md index 429cdf8d40..c76ac010d3 100644 --- a/docs/mllib-clustering.md +++ b/docs/mllib-clustering.md @@ -51,7 +51,7 @@ import org.apache.spark.mllib.clustering.KMeans import org.apache.spark.mllib.linalg.Vectors // Load and parse the data -val data = sc.textFile("data/kmeans_data.txt") +val data = sc.textFile("data/mllib/kmeans_data.txt") val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))) // Cluster the data into two classes using KMeans @@ -86,7 +86,7 @@ from numpy import array from math import sqrt # Load and parse the data -data = sc.textFile("data/kmeans_data.txt") +data = sc.textFile("data/mllib/kmeans_data.txt") parsedData = data.map(lambda line: array([float(x) for x in line.split(' ')])) # Build the model (cluster the data) diff --git a/docs/mllib-collaborative-filtering.md b/docs/mllib-collaborative-filtering.md index d51002f015..5cd7173872 100644 --- a/docs/mllib-collaborative-filtering.md +++ b/docs/mllib-collaborative-filtering.md @@ -58,7 +58,7 @@ import org.apache.spark.mllib.recommendation.ALS import org.apache.spark.mllib.recommendation.Rating // Load and parse the data -val data = sc.textFile("mllib/data/als/test.data") +val data = sc.textFile("data/mllib/als/test.data") val ratings = data.map(_.split(',') match { case Array(user, item, rate) => Rating(user.toInt, item.toInt, rate.toDouble) }) @@ -112,7 +112,7 @@ from pyspark.mllib.recommendation import ALS from numpy import array # Load and parse the data -data = sc.textFile("mllib/data/als/test.data") +data = sc.textFile("data/mllib/als/test.data") ratings = data.map(lambda line: array([float(x) for x in line.split(',')])) # Build the recommendation model using Alternating Least Squares diff --git a/docs/mllib-decision-tree.md b/docs/mllib-decision-tree.md index 3002a66a4f..9cd768599e 100644 --- a/docs/mllib-decision-tree.md +++ b/docs/mllib-decision-tree.md @@ -122,7 +122,7 @@ import org.apache.spark.mllib.tree.configuration.Algo._ import org.apache.spark.mllib.tree.impurity.Gini // Load and parse the data file -val data = sc.textFile("mllib/data/sample_tree_data.csv") +val data = sc.textFile("data/mllib/sample_tree_data.csv") val parsedData = data.map { line => val parts = line.split(',').map(_.toDouble) LabeledPoint(parts(0), Vectors.dense(parts.tail)) @@ -161,7 +161,7 @@ import org.apache.spark.mllib.tree.configuration.Algo._ import org.apache.spark.mllib.tree.impurity.Variance // Load and parse the data file -val data = sc.textFile("mllib/data/sample_tree_data.csv") +val data = sc.textFile("data/mllib/sample_tree_data.csv") val parsedData = data.map { line => val parts = line.split(',').map(_.toDouble) LabeledPoint(parts(0), Vectors.dense(parts.tail)) diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md index 4dfbebbcd0..b4d22e0df5 100644 --- a/docs/mllib-linear-methods.md +++ b/docs/mllib-linear-methods.md @@ -187,7 +187,7 @@ import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.MLUtils // Load training data in LIBSVM format. -val data = MLUtils.loadLibSVMFile(sc, "mllib/data/sample_libsvm_data.txt") +val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") // Split data into training (60%) and test (40%). val splits = data.randomSplit(Array(0.6, 0.4), seed = 11L) @@ -259,7 +259,7 @@ def parsePoint(line): values = [float(x) for x in line.split(' ')] return LabeledPoint(values[0], values[1:]) -data = sc.textFile("mllib/data/sample_svm_data.txt") +data = sc.textFile("data/mllib/sample_svm_data.txt") parsedData = data.map(parsePoint) # Build the model @@ -309,7 +309,7 @@ import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.linalg.Vectors // Load and parse the data -val data = sc.textFile("mllib/data/ridge-data/lpsa.data") +val data = sc.textFile("data/mllib/ridge-data/lpsa.data") val parsedData = data.map { line => val parts = line.split(',') LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble))) @@ -356,7 +356,7 @@ def parsePoint(line): values = [float(x) for x in line.replace(',', ' ').split(' ')] return LabeledPoint(values[0], values[1:]) -data = sc.textFile("mllib/data/ridge-data/lpsa.data") +data = sc.textFile("data/mllib/ridge-data/lpsa.data") parsedData = data.map(parsePoint) # Build the model diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md index 1d1d7dcf6f..b1650c83c9 100644 --- a/docs/mllib-naive-bayes.md +++ b/docs/mllib-naive-bayes.md @@ -40,7 +40,7 @@ import org.apache.spark.mllib.classification.NaiveBayes import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint -val data = sc.textFile("mllib/data/sample_naive_bayes_data.txt") +val data = sc.textFile("data/mllib/sample_naive_bayes_data.txt") val parsedData = data.map { line => val parts = line.split(',') LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble))) diff --git a/docs/mllib-optimization.md b/docs/mllib-optimization.md index ae9ede58e8..651958c781 100644 --- a/docs/mllib-optimization.md +++ b/docs/mllib-optimization.md @@ -214,7 +214,7 @@ import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.MLUtils import org.apache.spark.mllib.classification.LogisticRegressionModel -val data = MLUtils.loadLibSVMFile(sc, "mllib/data/sample_libsvm_data.txt") +val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt") val numFeatures = data.take(1)(0).features.size // Split data into training (60%) and test (40%). |