6 files changed, 12 insertions, 17 deletions
diff --git a/examples/src/main/python/ml/decision_tree_classification_example.py b/examples/src/main/python/ml/decision_tree_classification_example.py
index 0af92050e3..8cda56dbb9 100644
--- a/examples/src/main/python/ml/decision_tree_classification_example.py
+++ b/examples/src/main/python/ml/decision_tree_classification_example.py
@@ -28,7 +28,6 @@ from pyspark.ml import Pipeline
 from pyspark.ml.classification import DecisionTreeClassifier
 from pyspark.ml.feature import StringIndexer, VectorIndexer
 from pyspark.ml.evaluation import MulticlassClassificationEvaluator
-from pyspark.mllib.util import MLUtils
 # $example off$
 
 if __name__ == "__main__":
@@ -36,8 +35,8 @@ if __name__ == "__main__":
     sqlContext = SQLContext(sc)
 
     # $example on$
-    # Load and parse the data file, converting it to a DataFrame.
-    data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
+    # Load the data stored in LIBSVM format as a DataFrame.
+    data = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
 
     # Index labels, adding metadata to the label column.
     # Fit on whole dataset to include all labels in index.
diff --git a/examples/src/main/python/ml/decision_tree_regression_example.py b/examples/src/main/python/ml/decision_tree_regression_example.py
index 3857aed538..439e398947 100644
--- a/examples/src/main/python/ml/decision_tree_regression_example.py
+++ b/examples/src/main/python/ml/decision_tree_regression_example.py
@@ -28,7 +28,6 @@ from pyspark.ml import Pipeline
 from pyspark.ml.regression import DecisionTreeRegressor
 from pyspark.ml.feature import VectorIndexer
 from pyspark.ml.evaluation import RegressionEvaluator
-from pyspark.mllib.util import MLUtils
 # $example off$
 
 if __name__ == "__main__":
@@ -36,8 +35,8 @@ if __name__ == "__main__":
     sqlContext = SQLContext(sc)
 
     # $example on$
-    # Load and parse the data file, converting it to a DataFrame.
-    data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
+    # Load the data stored in LIBSVM format as a DataFrame.
+    data = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
 
     # Automatically identify categorical features, and index them.
     # We specify maxCategories so features with > 4 distinct values are treated as continuous.
diff --git a/examples/src/main/python/ml/gradient_boosted_trees.py b/examples/src/main/python/ml/gradient_boosted_trees.py
index 6446f0fe5e..c3bf8aa2eb 100644
--- a/examples/src/main/python/ml/gradient_boosted_trees.py
+++ b/examples/src/main/python/ml/gradient_boosted_trees.py
@@ -24,7 +24,6 @@ from pyspark.ml.classification import GBTClassifier
 from pyspark.ml.feature import StringIndexer
 from pyspark.ml.regression import GBTRegressor
 from pyspark.mllib.evaluation import BinaryClassificationMetrics, RegressionMetrics
-from pyspark.mllib.util import MLUtils
 from pyspark.sql import Row, SQLContext
 
 """
@@ -70,8 +69,8 @@ if __name__ == "__main__":
     sc = SparkContext(appName="PythonGBTExample")
     sqlContext = SQLContext(sc)
 
-    # Load and parse the data file into a dataframe.
-    df = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
+    # Load the data stored in LIBSVM format as a DataFrame.
+    df = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
 
     # Map labels into an indexed column of labels in [0, numLabels)
     stringIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel")
diff --git a/examples/src/main/python/ml/logistic_regression.py b/examples/src/main/python/ml/logistic_regression.py
index 55afe1b207..4cd027fdfb 100644
--- a/examples/src/main/python/ml/logistic_regression.py
+++ b/examples/src/main/python/ml/logistic_regression.py
@@ -23,7 +23,6 @@ from pyspark import SparkContext
 from pyspark.ml.classification import LogisticRegression
 from pyspark.mllib.evaluation import MulticlassMetrics
 from pyspark.ml.feature import StringIndexer
-from pyspark.mllib.util import MLUtils
 from pyspark.sql import SQLContext
 
 """
@@ -41,8 +40,8 @@ if __name__ == "__main__":
     sc = SparkContext(appName="PythonLogisticRegressionExample")
     sqlContext = SQLContext(sc)
 
-    # Load and parse the data file into a dataframe.
-    df = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
+    # Load the data stored in LIBSVM format as a DataFrame.
+    df = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
 
     # Map labels into an indexed column of labels in [0, numLabels)
     stringIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel")
diff --git a/examples/src/main/python/ml/multilayer_perceptron_classification.py b/examples/src/main/python/ml/multilayer_perceptron_classification.py
index d8ef9f39e3..f84588f547 100644
--- a/examples/src/main/python/ml/multilayer_perceptron_classification.py
+++ b/examples/src/main/python/ml/multilayer_perceptron_classification.py
@@ -22,7 +22,6 @@ from pyspark.sql import SQLContext
 # $example on$
 from pyspark.ml.classification import MultilayerPerceptronClassifier
 from pyspark.ml.evaluation import MulticlassClassificationEvaluator
-from pyspark.mllib.util import MLUtils
 # $example off$
 
 if __name__ == "__main__":
@@ -32,8 +31,8 @@ if __name__ == "__main__":
 
     # $example on$
     # Load training data
-    data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_multiclass_classification_data.txt")\
-        .toDF()
+    data = sqlContext.read.format("libsvm")\
+        .load("data/mllib/sample_multiclass_classification_data.txt")
     # Split the data into train and test
     splits = data.randomSplit([0.6, 0.4], 1234)
     train = splits[0]
diff --git a/examples/src/main/python/ml/random_forest_example.py b/examples/src/main/python/ml/random_forest_example.py
index c7730e1bfa..dc6a778670 100644
--- a/examples/src/main/python/ml/random_forest_example.py
+++ b/examples/src/main/python/ml/random_forest_example.py
@@ -74,8 +74,8 @@ if __name__ == "__main__":
     sc = SparkContext(appName="PythonRandomForestExample")
     sqlContext = SQLContext(sc)
 
-    # Load and parse the data file into a dataframe.
-    df = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF()
+    # Load the data stored in LIBSVM format as a DataFrame.
+    df = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt")
 
     # Map labels into an indexed column of labels in [0, numLabels)
     stringIndexer = StringIndexer(inputCol="label", outputCol="indexedLabel")