aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/python/kmeans.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src/main/python/kmeans.py')
-rwxr-xr-xexamples/src/main/python/kmeans.py12
1 files changed, 8 insertions, 4 deletions
diff --git a/examples/src/main/python/kmeans.py b/examples/src/main/python/kmeans.py
index 3426e491dc..92e0a3ae2e 100755
--- a/examples/src/main/python/kmeans.py
+++ b/examples/src/main/python/kmeans.py
@@ -27,7 +27,7 @@ from __future__ import print_function
import sys
import numpy as np
-from pyspark import SparkContext
+from pyspark.sql import SparkSession
def parseVector(line):
@@ -55,8 +55,12 @@ if __name__ == "__main__":
as an example! Please refer to examples/src/main/python/ml/kmeans_example.py for an
example on how to use ML's KMeans implementation.""", file=sys.stderr)
- sc = SparkContext(appName="PythonKMeans")
- lines = sc.textFile(sys.argv[1])
+ spark = SparkSession\
+ .builder\
+ .appName("PythonKMeans")\
+ .getOrCreate()
+
+ lines = spark.read.text(sys.argv[1]).rdd.map(lambda r: r[0])
data = lines.map(parseVector).cache()
K = int(sys.argv[2])
convergeDist = float(sys.argv[3])
@@ -79,4 +83,4 @@ if __name__ == "__main__":
print("Final centers: " + str(kPoints))
- sc.stop()
+ spark.stop()