aboutsummaryrefslogtreecommitdiff
path: root/docs/ml-guide.md
diff options
context:
space:
mode:
Diffstat (limited to 'docs/ml-guide.md')
-rw-r--r--docs/ml-guide.md10
1 files changed, 2 insertions, 8 deletions
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index c293e71d28..be18a05361 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -867,10 +867,9 @@ The `ParamMap` which produces the best evaluation metric is selected as the best
import org.apache.spark.ml.evaluation.RegressionEvaluator
import org.apache.spark.ml.regression.LinearRegression
import org.apache.spark.ml.tuning.{ParamGridBuilder, TrainValidationSplit}
-import org.apache.spark.mllib.util.MLUtils
// Prepare training and test data.
-val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
+val data = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
val Array(training, test) = data.randomSplit(Array(0.9, 0.1), seed = 12345)
val lr = new LinearRegression()
@@ -911,14 +910,9 @@ import org.apache.spark.ml.evaluation.RegressionEvaluator;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.regression.LinearRegression;
import org.apache.spark.ml.tuning.*;
-import org.apache.spark.mllib.regression.LabeledPoint;
-import org.apache.spark.mllib.util.MLUtils;
-import org.apache.spark.rdd.RDD;
import org.apache.spark.sql.DataFrame;
-DataFrame data = sqlContext.createDataFrame(
- MLUtils.loadLibSVMFile(jsc.sc(), "data/mllib/sample_libsvm_data.txt"),
- LabeledPoint.class);
+DataFrame data = jsql.read().format("libsvm").load("data/mllib/sample_libsvm_data.txt");
// Prepare training and test data.
DataFrame[] splits = data.randomSplit(new double[] {0.9, 0.1}, 12345);