diff options
Diffstat (limited to 'examples/src/main/python/ml/decision_tree_classification_example.py')
-rw-r--r-- | examples/src/main/python/ml/decision_tree_classification_example.py | 5 |
1 files changed, 2 insertions, 3 deletions
diff --git a/examples/src/main/python/ml/decision_tree_classification_example.py b/examples/src/main/python/ml/decision_tree_classification_example.py index 0af92050e3..8cda56dbb9 100644 --- a/examples/src/main/python/ml/decision_tree_classification_example.py +++ b/examples/src/main/python/ml/decision_tree_classification_example.py @@ -28,7 +28,6 @@ from pyspark.ml import Pipeline from pyspark.ml.classification import DecisionTreeClassifier from pyspark.ml.feature import StringIndexer, VectorIndexer from pyspark.ml.evaluation import MulticlassClassificationEvaluator -from pyspark.mllib.util import MLUtils # $example off$ if __name__ == "__main__": @@ -36,8 +35,8 @@ if __name__ == "__main__": sqlContext = SQLContext(sc) # $example on$ - # Load and parse the data file, converting it to a DataFrame. - data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt").toDF() + # Load the data stored in LIBSVM format as a DataFrame. + data = sqlContext.read.format("libsvm").load("data/mllib/sample_libsvm_data.txt") # Index labels, adding metadata to the label column. # Fit on whole dataset to include all labels in index. |