aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/python/mllib/correlations.py
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src/main/python/mllib/correlations.py')
-rwxr-xr-xexamples/src/main/python/mllib/correlations.py19
1 files changed, 10 insertions, 9 deletions
diff --git a/examples/src/main/python/mllib/correlations.py b/examples/src/main/python/mllib/correlations.py
index 4218eca822..0e13546b88 100755
--- a/examples/src/main/python/mllib/correlations.py
+++ b/examples/src/main/python/mllib/correlations.py
@@ -18,6 +18,7 @@
"""
Correlations using MLlib.
"""
+from __future__ import print_function
import sys
@@ -29,7 +30,7 @@ from pyspark.mllib.util import MLUtils
if __name__ == "__main__":
if len(sys.argv) not in [1, 2]:
- print >> sys.stderr, "Usage: correlations (<file>)"
+ print("Usage: correlations (<file>)", file=sys.stderr)
exit(-1)
sc = SparkContext(appName="PythonCorrelations")
if len(sys.argv) == 2:
@@ -41,20 +42,20 @@ if __name__ == "__main__":
points = MLUtils.loadLibSVMFile(sc, filepath)\
.map(lambda lp: LabeledPoint(lp.label, lp.features.toArray()))
- print
- print 'Summary of data file: ' + filepath
- print '%d data points' % points.count()
+ print()
+ print('Summary of data file: ' + filepath)
+ print('%d data points' % points.count())
# Statistics (correlations)
- print
- print 'Correlation (%s) between label and each feature' % corrType
- print 'Feature\tCorrelation'
+ print()
+ print('Correlation (%s) between label and each feature' % corrType)
+ print('Feature\tCorrelation')
numFeatures = points.take(1)[0].features.size
labelRDD = points.map(lambda lp: lp.label)
for i in range(numFeatures):
featureRDD = points.map(lambda lp: lp.features[i])
corr = Statistics.corr(labelRDD, featureRDD, corrType)
- print '%d\t%g' % (i, corr)
- print
+ print('%d\t%g' % (i, corr))
+ print()
sc.stop()