aboutsummaryrefslogtreecommitdiff
path: root/examples/src
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2016-06-10 18:29:26 -0700
committerJoseph K. Bradley <joseph@databricks.com>2016-06-10 18:29:26 -0700
commit99f3c82776fe5ea4f89a9965a288c7447585dc2c (patch)
tree1448dbcb50d8cad1f7635aa6e58b584e920ebded /examples/src
parentbba5d7999f7b3ae9d816ea552ba9378fea1615a6 (diff)
downloadspark-99f3c82776fe5ea4f89a9965a288c7447585dc2c.tar.gz
spark-99f3c82776fe5ea4f89a9965a288c7447585dc2c.tar.bz2
spark-99f3c82776fe5ea4f89a9965a288c7447585dc2c.zip
[SPARK-14615][ML][FOLLOWUP] Fix Python examples to use the new ML Vector and Matrix APIs in the ML pipeline based algorithms
## What changes were proposed in this pull request? This PR fixes Python examples to use the new ML Vector and Matrix APIs in the ML pipeline based algorithms. I firstly executed this shell command, `grep -r "from pyspark.mllib" .` and then executed them all. Some of tests in `ml` produced the error messages as below: ``` pyspark.sql.utils.IllegalArgumentException: u'requirement failed: Input type must be VectorUDT but got org.apache.spark.mllib.linalg.VectorUDTf71b0bce.' ``` So, I fixed them to use new ones just identically with some Python tests fixed in https://github.com/apache/spark/pull/12627 ## How was this patch tested? Manually tested for all the examples listed by `grep -r "from pyspark.mllib" .`. Author: hyukjinkwon <gurwls223@gmail.com> Closes #13393 from HyukjinKwon/SPARK-14615.
Diffstat (limited to 'examples/src')
-rw-r--r--examples/src/main/python/ml/aft_survival_regression.py2
-rw-r--r--examples/src/main/python/ml/chisq_selector_example.py2
-rw-r--r--examples/src/main/python/ml/dct_example.py2
-rw-r--r--examples/src/main/python/ml/elementwise_product_example.py2
-rw-r--r--examples/src/main/python/ml/estimator_transformer_param_example.py2
-rw-r--r--examples/src/main/python/ml/pca_example.py2
-rw-r--r--examples/src/main/python/ml/polynomial_expansion_example.py2
-rw-r--r--examples/src/main/python/ml/simple_params_example.py19
-rw-r--r--examples/src/main/python/ml/vector_assembler_example.py2
-rw-r--r--examples/src/main/python/ml/vector_slicer_example.py2
10 files changed, 18 insertions, 19 deletions
diff --git a/examples/src/main/python/ml/aft_survival_regression.py b/examples/src/main/python/ml/aft_survival_regression.py
index 9879679829..060f0171ff 100644
--- a/examples/src/main/python/ml/aft_survival_regression.py
+++ b/examples/src/main/python/ml/aft_survival_regression.py
@@ -19,7 +19,7 @@ from __future__ import print_function
# $example on$
from pyspark.ml.regression import AFTSurvivalRegression
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
# $example off$
from pyspark.sql import SparkSession
diff --git a/examples/src/main/python/ml/chisq_selector_example.py b/examples/src/main/python/ml/chisq_selector_example.py
index 8bafb942e0..5e19ef1624 100644
--- a/examples/src/main/python/ml/chisq_selector_example.py
+++ b/examples/src/main/python/ml/chisq_selector_example.py
@@ -20,7 +20,7 @@ from __future__ import print_function
from pyspark.sql import SparkSession
# $example on$
from pyspark.ml.feature import ChiSqSelector
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
# $example off$
if __name__ == "__main__":
diff --git a/examples/src/main/python/ml/dct_example.py b/examples/src/main/python/ml/dct_example.py
index e36fcdeaee..a4f25df784 100644
--- a/examples/src/main/python/ml/dct_example.py
+++ b/examples/src/main/python/ml/dct_example.py
@@ -19,7 +19,7 @@ from __future__ import print_function
# $example on$
from pyspark.ml.feature import DCT
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
# $example off$
from pyspark.sql import SparkSession
diff --git a/examples/src/main/python/ml/elementwise_product_example.py b/examples/src/main/python/ml/elementwise_product_example.py
index 41727edcdb..598deae886 100644
--- a/examples/src/main/python/ml/elementwise_product_example.py
+++ b/examples/src/main/python/ml/elementwise_product_example.py
@@ -19,7 +19,7 @@ from __future__ import print_function
# $example on$
from pyspark.ml.feature import ElementwiseProduct
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
# $example off$
from pyspark.sql import SparkSession
diff --git a/examples/src/main/python/ml/estimator_transformer_param_example.py b/examples/src/main/python/ml/estimator_transformer_param_example.py
index 0fcae0e3fc..3bd3fd30f8 100644
--- a/examples/src/main/python/ml/estimator_transformer_param_example.py
+++ b/examples/src/main/python/ml/estimator_transformer_param_example.py
@@ -20,7 +20,7 @@ Estimator Transformer Param Example.
"""
# $example on$
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
from pyspark.ml.classification import LogisticRegression
# $example off$
from pyspark.sql import SparkSession
diff --git a/examples/src/main/python/ml/pca_example.py b/examples/src/main/python/ml/pca_example.py
index f1b3cdec7b..414629ff88 100644
--- a/examples/src/main/python/ml/pca_example.py
+++ b/examples/src/main/python/ml/pca_example.py
@@ -19,7 +19,7 @@ from __future__ import print_function
# $example on$
from pyspark.ml.feature import PCA
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
# $example off$
from pyspark.sql import SparkSession
diff --git a/examples/src/main/python/ml/polynomial_expansion_example.py b/examples/src/main/python/ml/polynomial_expansion_example.py
index 08882bcb25..9475e33218 100644
--- a/examples/src/main/python/ml/polynomial_expansion_example.py
+++ b/examples/src/main/python/ml/polynomial_expansion_example.py
@@ -19,7 +19,7 @@ from __future__ import print_function
# $example on$
from pyspark.ml.feature import PolynomialExpansion
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
# $example off$
from pyspark.sql import SparkSession
diff --git a/examples/src/main/python/ml/simple_params_example.py b/examples/src/main/python/ml/simple_params_example.py
index c57e59d01b..54fbc2c9d0 100644
--- a/examples/src/main/python/ml/simple_params_example.py
+++ b/examples/src/main/python/ml/simple_params_example.py
@@ -21,9 +21,8 @@ import pprint
import sys
from pyspark.ml.classification import LogisticRegression
-from pyspark.mllib.linalg import DenseVector
-from pyspark.mllib.regression import LabeledPoint
-from pyspark.sql import SparkSession
+from pyspark.ml.linalg import DenseVector
+from pyspark.sql import Row, SparkSession
"""
A simple example demonstrating ways to specify parameters for Estimators and Transformers.
@@ -42,10 +41,10 @@ if __name__ == "__main__":
# A LabeledPoint is an Object with two fields named label and features
# and Spark SQL identifies these fields and creates the schema appropriately.
training = spark.createDataFrame([
- LabeledPoint(1.0, DenseVector([0.0, 1.1, 0.1])),
- LabeledPoint(0.0, DenseVector([2.0, 1.0, -1.0])),
- LabeledPoint(0.0, DenseVector([2.0, 1.3, 1.0])),
- LabeledPoint(1.0, DenseVector([0.0, 1.2, -0.5]))])
+ Row(label=1.0, features=DenseVector([0.0, 1.1, 0.1])),
+ Row(label=0.0, features=DenseVector([2.0, 1.0, -1.0])),
+ Row(label=0.0, features=DenseVector([2.0, 1.3, 1.0])),
+ Row(label=1.0, features=DenseVector([0.0, 1.2, -0.5]))])
# Create a LogisticRegression instance with maxIter = 10.
# This instance is an Estimator.
@@ -77,9 +76,9 @@ if __name__ == "__main__":
# prepare test data.
test = spark.createDataFrame([
- LabeledPoint(1.0, DenseVector([-1.0, 1.5, 1.3])),
- LabeledPoint(0.0, DenseVector([3.0, 2.0, -0.1])),
- LabeledPoint(0.0, DenseVector([0.0, 2.2, -1.5]))])
+ Row(label=1.0, features=DenseVector([-1.0, 1.5, 1.3])),
+ Row(label=0.0, features=DenseVector([3.0, 2.0, -0.1])),
+ Row(label=0.0, features=DenseVector([0.0, 2.2, -1.5]))])
# Make predictions on test data using the Transformer.transform() method.
# LogisticRegressionModel.transform will only use the 'features' column.
diff --git a/examples/src/main/python/ml/vector_assembler_example.py b/examples/src/main/python/ml/vector_assembler_example.py
index b955ff00a8..bbfc316ff2 100644
--- a/examples/src/main/python/ml/vector_assembler_example.py
+++ b/examples/src/main/python/ml/vector_assembler_example.py
@@ -18,7 +18,7 @@
from __future__ import print_function
# $example on$
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
from pyspark.ml.feature import VectorAssembler
# $example off$
from pyspark.sql import SparkSession
diff --git a/examples/src/main/python/ml/vector_slicer_example.py b/examples/src/main/python/ml/vector_slicer_example.py
index b833a894eb..d2f46b190f 100644
--- a/examples/src/main/python/ml/vector_slicer_example.py
+++ b/examples/src/main/python/ml/vector_slicer_example.py
@@ -19,7 +19,7 @@ from __future__ import print_function
# $example on$
from pyspark.ml.feature import VectorSlicer
-from pyspark.mllib.linalg import Vectors
+from pyspark.ml.linalg import Vectors
from pyspark.sql.types import Row
# $example off$
from pyspark.sql import SparkSession