aboutsummaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
Diffstat (limited to 'docs')
-rw-r--r--docs/mllib-clustering.md6
-rw-r--r--docs/mllib-linear-methods.md24
-rw-r--r--docs/mllib-naive-bayes.md2
3 files changed, 14 insertions, 18 deletions
diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md
index dac22f736e..1b088969dd 100644
--- a/docs/mllib-clustering.md
+++ b/docs/mllib-clustering.md
@@ -249,11 +249,11 @@ public class GaussianMixtureExample {
GaussianMixtureModel gmm = new GaussianMixture().setK(2).run(parsedData.rdd());
// Save and load GaussianMixtureModel
- gmm.save(sc, "myGMMModel")
- GaussianMixtureModel sameModel = GaussianMixtureModel.load(sc, "myGMMModel")
+ gmm.save(sc.sc(), "myGMMModel");
+ GaussianMixtureModel sameModel = GaussianMixtureModel.load(sc.sc(), "myGMMModel");
// Output the parameters of the mixture model
for(int j=0; j<gmm.k(); j++) {
- System.out.println("weight=%f\nmu=%s\nsigma=\n%s\n",
+ System.out.printf("weight=%f\nmu=%s\nsigma=\n%s\n",
gmm.weights()[j], gmm.gaussians()[j].mu(), gmm.gaussians()[j].sigma());
}
}
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index 8029edca16..3dc8cc902f 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -163,11 +163,8 @@ object, and make predictions with the resulting model to compute the training
error.
{% highlight scala %}
-import org.apache.spark.SparkContext
import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.util.MLUtils
// Load training data in LIBSVM format.
@@ -231,15 +228,13 @@ calling `.rdd()` on your `JavaRDD` object. A self-contained application example
that is equivalent to the provided example in Scala is given bellow:
{% highlight java %}
-import java.util.Random;
-
import scala.Tuple2;
import org.apache.spark.api.java.*;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.classification.*;
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
-import org.apache.spark.mllib.linalg.Vector;
+
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.mllib.util.MLUtils;
import org.apache.spark.SparkConf;
@@ -282,8 +277,8 @@ public class SVMClassifier {
System.out.println("Area under ROC = " + auROC);
// Save and load model
- model.save(sc.sc(), "myModelPath");
- SVMModel sameModel = SVMModel.load(sc.sc(), "myModelPath");
+ model.save(sc, "myModelPath");
+ SVMModel sameModel = SVMModel.load(sc, "myModelPath");
}
}
{% endhighlight %}
@@ -315,15 +310,12 @@ a dependency.
</div>
<div data-lang="python" markdown="1">
-The following example shows how to load a sample dataset, build Logistic Regression model,
+The following example shows how to load a sample dataset, build SVM model,
and make predictions with the resulting model to compute the training error.
-Note that the Python API does not yet support model save/load but will in the future.
-
{% highlight python %}
-from pyspark.mllib.classification import LogisticRegressionWithSGD
+from pyspark.mllib.classification import SVMWithSGD, SVMModel
from pyspark.mllib.regression import LabeledPoint
-from numpy import array
# Load and parse the data
def parsePoint(line):
@@ -334,12 +326,16 @@ data = sc.textFile("data/mllib/sample_svm_data.txt")
parsedData = data.map(parsePoint)
# Build the model
-model = LogisticRegressionWithSGD.train(parsedData)
+model = SVMWithSGD.train(parsedData, iterations=100)
# Evaluating the model on training data
labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
print("Training Error = " + str(trainErr))
+
+# Save and load model
+model.save(sc, "myModelPath")
+sameModel = SVMModel.load(sc, "myModelPath")
{% endhighlight %}
</div>
</div>
diff --git a/docs/mllib-naive-bayes.md b/docs/mllib-naive-bayes.md
index acdcc37148..bf6d124fd5 100644
--- a/docs/mllib-naive-bayes.md
+++ b/docs/mllib-naive-bayes.md
@@ -53,7 +53,7 @@ val splits = parsedData.randomSplit(Array(0.6, 0.4), seed = 11L)
val training = splits(0)
val test = splits(1)
-val model = NaiveBayes.train(training, lambda = 1.0, model = "multinomial")
+val model = NaiveBayes.train(training, lambda = 1.0, modelType = "multinomial")
val predictionAndLabel = test.map(p => (model.predict(p.features), p.label))
val accuracy = 1.0 * predictionAndLabel.filter(x => x._1 == x._2).count() / test.count()