From c268ca4ddde2f5213b2e3985dcaaac5900aea71c Mon Sep 17 00:00:00 2001 From: y-shimizu Date: Fri, 11 Sep 2015 08:27:30 -0700 Subject: [SPARK-10518] [DOCS] Update code examples in spark.ml user guide to use LIBSVM data source instead of MLUtils I fixed to use LIBSVM data source in the example code in spark.ml instead of MLUtils Author: y-shimizu Closes #8697 from y-shimizu/SPARK-10518. --- docs/ml-linear-methods.md | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) (limited to 'docs/ml-linear-methods.md') diff --git a/docs/ml-linear-methods.md b/docs/ml-linear-methods.md index cdd9d4999f..4e94e2f9c7 100644 --- a/docs/ml-linear-methods.md +++ b/docs/ml-linear-methods.md @@ -59,10 +59,9 @@ $\alpha$ and `regParam` corresponds to $\lambda$.
{% highlight scala %} import org.apache.spark.ml.classification.LogisticRegression -import org.apache.spark.mllib.util.MLUtils // Load training data -val training = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF() +val training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") val lr = new LogisticRegression() .setMaxIter(10) @@ -81,8 +80,6 @@ println(s"Weights: ${lrModel.weights} Intercept: ${lrModel.intercept}") {% highlight java %} import org.apache.spark.ml.classification.LogisticRegression; import org.apache.spark.ml.classification.LogisticRegressionModel; -import org.apache.spark.mllib.regression.LabeledPoint; -import org.apache.spark.mllib.util.MLUtils; import org.apache.spark.SparkConf; import org.apache.spark.SparkContext; import org.apache.spark.sql.DataFrame; @@ -98,7 +95,7 @@ public class LogisticRegressionWithElasticNetExample { String path = "data/mllib/sample_libsvm_data.txt"; // Load training data - DataFrame training = sql.createDataFrame(MLUtils.loadLibSVMFile(sc, path).toJavaRDD(), LabeledPoint.class); + DataFrame training = sqlContext.read.format("libsvm").load(path); LogisticRegression lr = new LogisticRegression() .setMaxIter(10) @@ -118,11 +115,9 @@ public class LogisticRegressionWithElasticNetExample {
{% highlight python %} from pyspark.ml.classification import LogisticRegression -from pyspark.mllib.regression import LabeledPoint -from pyspark.mllib.util import MLUtils # Load training data -training = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF() +training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") lr = LogisticRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8) @@ -251,10 +246,9 @@ regression model and extracting model summary statistics.
{% highlight scala %} import org.apache.spark.ml.regression.LinearRegression -import org.apache.spark.mllib.util.MLUtils // Load training data -val training = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF() +val training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") val lr = new LinearRegression() .setMaxIter(10) @@ -283,8 +277,6 @@ import org.apache.spark.ml.regression.LinearRegression; import org.apache.spark.ml.regression.LinearRegressionModel; import org.apache.spark.ml.regression.LinearRegressionTrainingSummary; import org.apache.spark.mllib.linalg.Vectors; -import org.apache.spark.mllib.regression.LabeledPoint; -import org.apache.spark.mllib.util.MLUtils; import org.apache.spark.SparkConf; import org.apache.spark.SparkContext; import org.apache.spark.sql.DataFrame; @@ -300,7 +292,7 @@ public class LinearRegressionWithElasticNetExample { String path = "data/mllib/sample_libsvm_data.txt"; // Load training data - DataFrame training = sql.createDataFrame(MLUtils.loadLibSVMFile(sc, path).toJavaRDD(), LabeledPoint.class); + DataFrame training = sqlContext.read.format("libsvm").load(path); LinearRegression lr = new LinearRegression() .setMaxIter(10) @@ -329,11 +321,9 @@ public class LinearRegressionWithElasticNetExample { {% highlight python %} from pyspark.ml.regression import LinearRegression -from pyspark.mllib.regression import LabeledPoint -from pyspark.mllib.util import MLUtils # Load training data -training = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF() +training = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") lr = LinearRegression(maxIter=10, regParam=0.3, elasticNetParam=0.8) -- cgit v1.2.3