From 99f3c82776fe5ea4f89a9965a288c7447585dc2c Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Fri, 10 Jun 2016 18:29:26 -0700 Subject: [SPARK-14615][ML][FOLLOWUP] Fix Python examples to use the new ML Vector and Matrix APIs in the ML pipeline based algorithms ## What changes were proposed in this pull request? This PR fixes Python examples to use the new ML Vector and Matrix APIs in the ML pipeline based algorithms. I firstly executed this shell command, `grep -r "from pyspark.mllib" .` and then executed them all. Some of tests in `ml` produced the error messages as below: ``` pyspark.sql.utils.IllegalArgumentException: u'requirement failed: Input type must be VectorUDT but got org.apache.spark.mllib.linalg.VectorUDTf71b0bce.' ``` So, I fixed them to use new ones just identically with some Python tests fixed in https://github.com/apache/spark/pull/12627 ## How was this patch tested? Manually tested for all the examples listed by `grep -r "from pyspark.mllib" .`. Author: hyukjinkwon Closes #13393 from HyukjinKwon/SPARK-14615. --- .../src/main/python/ml/aft_survival_regression.py | 2 +- examples/src/main/python/ml/chisq_selector_example.py | 2 +- examples/src/main/python/ml/dct_example.py | 2 +- .../src/main/python/ml/elementwise_product_example.py | 2 +- .../python/ml/estimator_transformer_param_example.py | 2 +- examples/src/main/python/ml/pca_example.py | 2 +- .../main/python/ml/polynomial_expansion_example.py | 2 +- examples/src/main/python/ml/simple_params_example.py | 19 +++++++++---------- .../src/main/python/ml/vector_assembler_example.py | 2 +- examples/src/main/python/ml/vector_slicer_example.py | 2 +- 10 files changed, 18 insertions(+), 19 deletions(-) (limited to 'examples/src') diff --git a/examples/src/main/python/ml/aft_survival_regression.py b/examples/src/main/python/ml/aft_survival_regression.py index 9879679829..060f0171ff 100644 --- a/examples/src/main/python/ml/aft_survival_regression.py +++ b/examples/src/main/python/ml/aft_survival_regression.py @@ -19,7 +19,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.regression import AFTSurvivalRegression -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors # $example off$ from pyspark.sql import SparkSession diff --git a/examples/src/main/python/ml/chisq_selector_example.py b/examples/src/main/python/ml/chisq_selector_example.py index 8bafb942e0..5e19ef1624 100644 --- a/examples/src/main/python/ml/chisq_selector_example.py +++ b/examples/src/main/python/ml/chisq_selector_example.py @@ -20,7 +20,7 @@ from __future__ import print_function from pyspark.sql import SparkSession # $example on$ from pyspark.ml.feature import ChiSqSelector -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors # $example off$ if __name__ == "__main__": diff --git a/examples/src/main/python/ml/dct_example.py b/examples/src/main/python/ml/dct_example.py index e36fcdeaee..a4f25df784 100644 --- a/examples/src/main/python/ml/dct_example.py +++ b/examples/src/main/python/ml/dct_example.py @@ -19,7 +19,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.feature import DCT -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors # $example off$ from pyspark.sql import SparkSession diff --git a/examples/src/main/python/ml/elementwise_product_example.py b/examples/src/main/python/ml/elementwise_product_example.py index 41727edcdb..598deae886 100644 --- a/examples/src/main/python/ml/elementwise_product_example.py +++ b/examples/src/main/python/ml/elementwise_product_example.py @@ -19,7 +19,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.feature import ElementwiseProduct -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors # $example off$ from pyspark.sql import SparkSession diff --git a/examples/src/main/python/ml/estimator_transformer_param_example.py b/examples/src/main/python/ml/estimator_transformer_param_example.py index 0fcae0e3fc..3bd3fd30f8 100644 --- a/examples/src/main/python/ml/estimator_transformer_param_example.py +++ b/examples/src/main/python/ml/estimator_transformer_param_example.py @@ -20,7 +20,7 @@ Estimator Transformer Param Example. """ # $example on$ -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors from pyspark.ml.classification import LogisticRegression # $example off$ from pyspark.sql import SparkSession diff --git a/examples/src/main/python/ml/pca_example.py b/examples/src/main/python/ml/pca_example.py index f1b3cdec7b..414629ff88 100644 --- a/examples/src/main/python/ml/pca_example.py +++ b/examples/src/main/python/ml/pca_example.py @@ -19,7 +19,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.feature import PCA -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors # $example off$ from pyspark.sql import SparkSession diff --git a/examples/src/main/python/ml/polynomial_expansion_example.py b/examples/src/main/python/ml/polynomial_expansion_example.py index 08882bcb25..9475e33218 100644 --- a/examples/src/main/python/ml/polynomial_expansion_example.py +++ b/examples/src/main/python/ml/polynomial_expansion_example.py @@ -19,7 +19,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.feature import PolynomialExpansion -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors # $example off$ from pyspark.sql import SparkSession diff --git a/examples/src/main/python/ml/simple_params_example.py b/examples/src/main/python/ml/simple_params_example.py index c57e59d01b..54fbc2c9d0 100644 --- a/examples/src/main/python/ml/simple_params_example.py +++ b/examples/src/main/python/ml/simple_params_example.py @@ -21,9 +21,8 @@ import pprint import sys from pyspark.ml.classification import LogisticRegression -from pyspark.mllib.linalg import DenseVector -from pyspark.mllib.regression import LabeledPoint -from pyspark.sql import SparkSession +from pyspark.ml.linalg import DenseVector +from pyspark.sql import Row, SparkSession """ A simple example demonstrating ways to specify parameters for Estimators and Transformers. @@ -42,10 +41,10 @@ if __name__ == "__main__": # A LabeledPoint is an Object with two fields named label and features # and Spark SQL identifies these fields and creates the schema appropriately. training = spark.createDataFrame([ - LabeledPoint(1.0, DenseVector([0.0, 1.1, 0.1])), - LabeledPoint(0.0, DenseVector([2.0, 1.0, -1.0])), - LabeledPoint(0.0, DenseVector([2.0, 1.3, 1.0])), - LabeledPoint(1.0, DenseVector([0.0, 1.2, -0.5]))]) + Row(label=1.0, features=DenseVector([0.0, 1.1, 0.1])), + Row(label=0.0, features=DenseVector([2.0, 1.0, -1.0])), + Row(label=0.0, features=DenseVector([2.0, 1.3, 1.0])), + Row(label=1.0, features=DenseVector([0.0, 1.2, -0.5]))]) # Create a LogisticRegression instance with maxIter = 10. # This instance is an Estimator. @@ -77,9 +76,9 @@ if __name__ == "__main__": # prepare test data. test = spark.createDataFrame([ - LabeledPoint(1.0, DenseVector([-1.0, 1.5, 1.3])), - LabeledPoint(0.0, DenseVector([3.0, 2.0, -0.1])), - LabeledPoint(0.0, DenseVector([0.0, 2.2, -1.5]))]) + Row(label=1.0, features=DenseVector([-1.0, 1.5, 1.3])), + Row(label=0.0, features=DenseVector([3.0, 2.0, -0.1])), + Row(label=0.0, features=DenseVector([0.0, 2.2, -1.5]))]) # Make predictions on test data using the Transformer.transform() method. # LogisticRegressionModel.transform will only use the 'features' column. diff --git a/examples/src/main/python/ml/vector_assembler_example.py b/examples/src/main/python/ml/vector_assembler_example.py index b955ff00a8..bbfc316ff2 100644 --- a/examples/src/main/python/ml/vector_assembler_example.py +++ b/examples/src/main/python/ml/vector_assembler_example.py @@ -18,7 +18,7 @@ from __future__ import print_function # $example on$ -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors from pyspark.ml.feature import VectorAssembler # $example off$ from pyspark.sql import SparkSession diff --git a/examples/src/main/python/ml/vector_slicer_example.py b/examples/src/main/python/ml/vector_slicer_example.py index b833a894eb..d2f46b190f 100644 --- a/examples/src/main/python/ml/vector_slicer_example.py +++ b/examples/src/main/python/ml/vector_slicer_example.py @@ -19,7 +19,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.feature import VectorSlicer -from pyspark.mllib.linalg import Vectors +from pyspark.ml.linalg import Vectors from pyspark.sql.types import Row # $example off$ from pyspark.sql import SparkSession -- cgit v1.2.3