aboutsummaryrefslogtreecommitdiff
path: root/docs/mllib-linear-methods.md
diff options
context:
space:
mode:
authorYuhao Yang <hhbyyh@gmail.com>2015-06-02 23:15:38 -0700
committerXiangrui Meng <meng@databricks.com>2015-06-02 23:15:38 -0700
commit43adbd56114ba80039a23909b0a30d393eaacc62 (patch)
tree9f0330cd25c9484914a527f53bdb92b7d1698976 /docs/mllib-linear-methods.md
parentccaa823290cbe859cd224ac0f7071dfd0218b669 (diff)
downloadspark-43adbd56114ba80039a23909b0a30d393eaacc62.tar.gz
spark-43adbd56114ba80039a23909b0a30d393eaacc62.tar.bz2
spark-43adbd56114ba80039a23909b0a30d393eaacc62.zip
[SPARK-8043] [MLLIB] [DOC] update NaiveBayes and SVM examples in doc
jira: https://issues.apache.org/jira/browse/SPARK-8043 I found some issues during testing the save/load examples in markdown Documents, as a part of 1.4 QA plan Author: Yuhao Yang <hhbyyh@gmail.com> Closes #6584 from hhbyyh/naiveDocExample and squashes the following commits: a01a206 [Yuhao Yang] fix for Gaussian mixture 2fb8b96 [Yuhao Yang] update NaiveBayes and SVM examples in doc
Diffstat (limited to 'docs/mllib-linear-methods.md')
-rw-r--r--docs/mllib-linear-methods.md24
1 files changed, 10 insertions, 14 deletions
diff --git a/docs/mllib-linear-methods.md b/docs/mllib-linear-methods.md
index 8029edca16..3dc8cc902f 100644
--- a/docs/mllib-linear-methods.md
+++ b/docs/mllib-linear-methods.md
@@ -163,11 +163,8 @@ object, and make predictions with the resulting model to compute the training
error.
{% highlight scala %}
-import org.apache.spark.SparkContext
import org.apache.spark.mllib.classification.{SVMModel, SVMWithSGD}
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
-import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.util.MLUtils
// Load training data in LIBSVM format.
@@ -231,15 +228,13 @@ calling `.rdd()` on your `JavaRDD` object. A self-contained application example
that is equivalent to the provided example in Scala is given bellow:
{% highlight java %}
-import java.util.Random;
-
import scala.Tuple2;
import org.apache.spark.api.java.*;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.classification.*;
import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics;
-import org.apache.spark.mllib.linalg.Vector;
+
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.mllib.util.MLUtils;
import org.apache.spark.SparkConf;
@@ -282,8 +277,8 @@ public class SVMClassifier {
System.out.println("Area under ROC = " + auROC);
// Save and load model
- model.save(sc.sc(), "myModelPath");
- SVMModel sameModel = SVMModel.load(sc.sc(), "myModelPath");
+ model.save(sc, "myModelPath");
+ SVMModel sameModel = SVMModel.load(sc, "myModelPath");
}
}
{% endhighlight %}
@@ -315,15 +310,12 @@ a dependency.
</div>
<div data-lang="python" markdown="1">
-The following example shows how to load a sample dataset, build Logistic Regression model,
+The following example shows how to load a sample dataset, build SVM model,
and make predictions with the resulting model to compute the training error.
-Note that the Python API does not yet support model save/load but will in the future.
-
{% highlight python %}
-from pyspark.mllib.classification import LogisticRegressionWithSGD
+from pyspark.mllib.classification import SVMWithSGD, SVMModel
from pyspark.mllib.regression import LabeledPoint
-from numpy import array
# Load and parse the data
def parsePoint(line):
@@ -334,12 +326,16 @@ data = sc.textFile("data/mllib/sample_svm_data.txt")
parsedData = data.map(parsePoint)
# Build the model
-model = LogisticRegressionWithSGD.train(parsedData)
+model = SVMWithSGD.train(parsedData, iterations=100)
# Evaluating the model on training data
labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
print("Training Error = " + str(trainErr))
+
+# Save and load model
+model.save(sc, "myModelPath")
+sameModel = SVMModel.load(sc, "myModelPath")
{% endhighlight %}
</div>
</div>