aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/python/mllib/binary_classification_metrics_example.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src/main/python/mllib/binary_classification_metrics_example.py')
-rw-r--r--examples/src/main/python/mllib/binary_classification_metrics_example.py15
1 files changed, 10 insertions, 5 deletions
diff --git a/examples/src/main/python/mllib/binary_classification_metrics_example.py b/examples/src/main/python/mllib/binary_classification_metrics_example.py
index 8f0fc9d45d..daf000e38d 100644
--- a/examples/src/main/python/mllib/binary_classification_metrics_example.py
+++ b/examples/src/main/python/mllib/binary_classification_metrics_example.py
@@ -18,20 +18,25 @@
Binary Classification Metrics Example.
"""
from __future__ import print_function
-from pyspark import SparkContext
+from pyspark.sql import SparkSession
# $example on$
from pyspark.mllib.classification import LogisticRegressionWithLBFGS
from pyspark.mllib.evaluation import BinaryClassificationMetrics
-from pyspark.mllib.util import MLUtils
+from pyspark.mllib.regression import LabeledPoint
# $example off$
if __name__ == "__main__":
- sc = SparkContext(appName="BinaryClassificationMetricsExample")
+ spark = SparkSession\
+ .builder\
+ .appName("BinaryClassificationMetricsExample")\
+ .getOrCreate()
# $example on$
# Several of the methods available in scala are currently missing from pyspark
# Load training data in LIBSVM format
- data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_binary_classification_data.txt")
+ data = spark\
+ .read.format("libsvm").load("data/mllib/sample_binary_classification_data.txt")\
+ .rdd.map(lambda row: LabeledPoint(row[0], row[1]))
# Split data into training (60%) and test (40%)
training, test = data.randomSplit([0.6, 0.4], seed=11L)
@@ -53,4 +58,4 @@ if __name__ == "__main__":
print("Area under ROC = %s" % metrics.areaUnderROC)
# $example off$
- sc.stop()
+ spark.stop()