aboutsummaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
Diffstat (limited to 'docs')
-rw-r--r--docs/mllib-clustering.md3
-rw-r--r--docs/mllib-linear-methods.md9
-rw-r--r--docs/mllib-optimization.md1
3 files changed, 8 insertions, 5 deletions
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index dfd9cd5728..d10bd63746 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -52,7 +52,7 @@ import org.apache.spark.mllib.linalg.Vectors
// Load and parse the data
val data = sc.textFile("data/mllib/kmeans_data.txt")
-val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble)))
+val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))).cache()
// Cluster the data into two classes using KMeans
val numClusters = 2
@@ -100,6 +100,7 @@ public class KMeansExample {
}
}
);
+ parsedData.cache();
// Cluster the data into two classes using KMeans
int numClusters = 2;
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index 9137f9dc1b..d31bec3e1b 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -396,7 +396,7 @@ val data = sc.textFile("data/mllib/ridge-data/lpsa.data")
val parsedData = data.map { line =>
val parts = line.split(',')
LabeledPoint(parts(0).toDouble, Vectors.dense(parts(1).split(' ').map(_.toDouble)))
-}
+}.cache()
// Building the model
val numIterations = 100
@@ -455,6 +455,7 @@ public class LinearRegression {
}
}
);
+ parsedData.cache();
// Building the model
int numIterations = 100;
@@ -470,7 +471,7 @@ public class LinearRegression {
}
}
);
- JavaRDD<Object> MSE = new JavaDoubleRDD(valuesAndPreds.map(
+ double MSE = new JavaDoubleRDD(valuesAndPreds.map(
new Function<Tuple2<Double, Double>, Object>() {
public Object call(Tuple2<Double, Double> pair) {
return Math.pow(pair._1() - pair._2(), 2.0);
@@ -553,8 +554,8 @@ but in practice you will likely want to use unlabeled vectors for test data.
{% highlight scala %}
-val trainingData = ssc.textFileStream('/training/data/dir').map(LabeledPoint.parse)
-val testData = ssc.textFileStream('/testing/data/dir').map(LabeledPoint.parse)
+val trainingData = ssc.textFileStream("/training/data/dir").map(LabeledPoint.parse).cache()
+val testData = ssc.textFileStream("/testing/data/dir").map(LabeledPoint.parse)
{% endhighlight %}
diff --git a/docs/mllib-optimization.md b/docs/mllib-optimization.md
index 26ce5f3c50..45141c235b 100644
--- a/docs/mllib-optimization.md
+++ b/docs/mllib-optimization.md
@@ -217,6 +217,7 @@ import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.util.MLUtils
import org.apache.spark.mllib.classification.LogisticRegressionModel
+import org.apache.spark.mllib.optimization.{LBFGS, LogisticGradient, SquaredL2Updater}
val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
val numFeatures = data.take(1)(0).features.size