aboutsummaryrefslogtreecommitdiff
path: root/docs/ml-linear-methods.md
diff options
context:
space:
mode:
authory-shimizu <y.shimizu0429@gmail.com>2015-09-11 08:27:30 -0700
committerXiangrui Meng <meng@databricks.com>2015-09-11 08:27:30 -0700
commitc268ca4ddde2f5213b2e3985dcaaac5900aea71c (patch)
tree5c16aace08404309354574729ff703d2a87ec822 /docs/ml-linear-methods.md
parent9bbe33f318c866c0b13088917542715062f0787f (diff)
downloadspark-c268ca4ddde2f5213b2e3985dcaaac5900aea71c.tar.gz
spark-c268ca4ddde2f5213b2e3985dcaaac5900aea71c.tar.bz2
spark-c268ca4ddde2f5213b2e3985dcaaac5900aea71c.zip
[SPARK-10518] [DOCS] Update code examples in spark.ml user guide to use LIBSVM data source instead of MLUtils
I fixed to use LIBSVM data source in the example code in spark.ml instead of MLUtils Author: y-shimizu <y.shimizu0429@gmail.com> Closes #8697 from y-shimizu/SPARK-10518.
Diffstat (limited to 'docs/ml-linear-methods.md')
-rw-r--r--docs/ml-linear-methods.md22
1 files changed, 6 insertions, 16 deletions
diff --git a/docs/ml-linear-methods.md b/docs/ml-linear-methods.md
index cdd9d4999f..4e94e2f9c7 100644
--- a/docs/ml-linear-methods.md
+++ b/docs/ml-linear-methods.md
@@ -59,10 +59,9 @@ $\alpha$ and `regParam` corresponds to $\lambda$.
<div data-lang="scala" markdown="1">
{% highlight scala %}
import org.apache.spark.ml.classification.LogisticRegression
-import org.apache.spark.mllib.util.MLUtils
// Load training data
-val training = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
+val training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
val lr = new LogisticRegression()
.setMaxIter(10)
@@ -81,8 +80,6 @@ println(s"Weights: ${lrModel.weights} Intercept: ${lrModel.intercept}")
{% highlight java %}
import org.apache.spark.ml.classification.LogisticRegression;
import org.apache.spark.ml.classification.LogisticRegressionModel;
-import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.util.MLUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.DataFrame;
@@ -98,7 +95,7 @@ public class LogisticRegressionWithElasticNetExample {
String path = "data/mllib/sample_libsvm_data.txt";
// Load training data
- DataFrame training = sql.createDataFrame(MLUtils.loadLibSVMFile(sc, path).toJavaRDD(), LabeledPoint.class);
+ DataFrame training = sqlContext.read.format("libsvm").load(path);
LogisticRegression lr = new LogisticRegression()
.setMaxIter(10)
@@ -118,11 +115,9 @@ public class LogisticRegressionWithElasticNetExample {
<div data-lang="python" markdown="1">
{% highlight python %}
from pyspark.ml.classification import LogisticRegression
-from pyspark.mllib.regression import LabeledPoint
-from pyspark.mllib.util import MLUtils
# Load training data
-training = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
+training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)
@@ -251,10 +246,9 @@ regression model and extracting model summary statistics.
<div data-lang="scala" markdown="1">
{% highlight scala %}
import org.apache.spark.ml.regression.LinearRegression
-import org.apache.spark.mllib.util.MLUtils
// Load training data
-val training = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
+val training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
val lr = new LinearRegression()
.setMaxIter(10)
@@ -283,8 +277,6 @@ import org.apache.spark.ml.regression.LinearRegression;
import org.apache.spark.ml.regression.LinearRegressionModel;
import org.apache.spark.ml.regression.LinearRegressionTrainingSummary;
import org.apache.spark.mllib.linalg.Vectors;
-import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.util.MLUtils;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.DataFrame;
@@ -300,7 +292,7 @@ public class LinearRegressionWithElasticNetExample {
String path = "data/mllib/sample_libsvm_data.txt";
// Load training data
- DataFrame training = sql.createDataFrame(MLUtils.loadLibSVMFile(sc, path).toJavaRDD(), LabeledPoint.class);
+ DataFrame training = sqlContext.read.format("libsvm").load(path);
LinearRegression lr = new LinearRegression()
.setMaxIter(10)
@@ -329,11 +321,9 @@ public class LinearRegressionWithElasticNetExample {
<!--- TODO: Add python model summaries once implemented -->
{% highlight python %}
from pyspark.ml.regression import LinearRegression
-from pyspark.mllib.regression import LabeledPoint
-from pyspark.mllib.util import MLUtils
# Load training data
-training = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
+training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
lr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8)