diff options
Diffstat (limited to 'examples/src/main/python/kmeans.py')
-rwxr-xr-x | examples/src/main/python/kmeans.py | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/examples/src/main/python/kmeans.py b/examples/src/main/python/kmeans.py index 3426e491dc..92e0a3ae2e 100755 --- a/examples/src/main/python/kmeans.py +++ b/examples/src/main/python/kmeans.py @@ -27,7 +27,7 @@ from __future__ import print_function import sys import numpy as np -from pyspark import SparkContext +from pyspark.sql import SparkSession def parseVector(line): @@ -55,8 +55,12 @@ if __name__ == "__main__": as an example! Please refer to examples/src/main/python/ml/kmeans_example.py for an example on how to use ML's KMeans implementation.""", file=sys.stderr) - sc = SparkContext(appName="PythonKMeans") - lines = sc.textFile(sys.argv[1]) + spark = SparkSession\ + .builder\ + .appName("PythonKMeans")\ + .getOrCreate() + + lines = spark.read.text(sys.argv[1]).rdd.map(lambda r: r[0]) data = lines.map(parseVector).cache() K = int(sys.argv[2]) convergeDist = float(sys.argv[3]) @@ -79,4 +83,4 @@ if __name__ == "__main__": print("Final centers: " + str(kPoints)) - sc.stop() + spark.stop() |