diff options
Diffstat (limited to 'examples/src/main/python/ml/dataframe_example.py')
-rw-r--r-- | examples/src/main/python/ml/dataframe_example.py | 4 |
1 files changed, 3 insertions, 1 deletions
diff --git a/examples/src/main/python/ml/dataframe_example.py b/examples/src/main/python/ml/dataframe_example.py index a7d8b9056d..c1818d72fe 100644 --- a/examples/src/main/python/ml/dataframe_example.py +++ b/examples/src/main/python/ml/dataframe_example.py @@ -28,6 +28,7 @@ import shutil from pyspark.sql import SparkSession from pyspark.mllib.stat import Statistics +from pyspark.mllib.util import MLUtils if __name__ == "__main__": if len(sys.argv) > 2: @@ -55,7 +56,8 @@ if __name__ == "__main__": labelSummary.show() # Convert features column to an RDD of vectors. - features = df.select("features").rdd.map(lambda r: r.features) + features = MLUtils.convertVectorColumnsFromML(df, "features") \ + .select("features").rdd.map(lambda r: r.features) summary = Statistics.colStats(features) print("Selected features column with average values:\n" + str(summary.mean())) |