aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/classification.py
diff options
context:
space:
mode:
authorDB Tsai <dbt@netflix.com>2016-05-17 12:51:07 -0700
committerXiangrui Meng <meng@databricks.com>2016-05-17 12:51:07 -0700
commite2efe0529acd748f26dbaa41331d1733ed256237 (patch)
treefe1a5aeeadfbf220b5dbe1429e0235153db8117b /python/pyspark/ml/classification.py
parent9f176dd3918129a72282a6b7a12e2899cbb6dac9 (diff)
downloadspark-e2efe0529acd748f26dbaa41331d1733ed256237.tar.gz
spark-e2efe0529acd748f26dbaa41331d1733ed256237.tar.bz2
spark-e2efe0529acd748f26dbaa41331d1733ed256237.zip
[SPARK-14615][ML] Use the new ML Vector and Matrix in the ML pipeline based algorithms
## What changes were proposed in this pull request? Once SPARK-14487 and SPARK-14549 are merged, we will migrate to use the new vector and matrix type in the new ml pipeline based apis. ## How was this patch tested? Unit tests Author: DB Tsai <dbt@netflix.com> Author: Liang-Chi Hsieh <simonh@tw.ibm.com> Author: Xiangrui Meng <meng@databricks.com> Closes #12627 from dbtsai/SPARK-14615-NewML.
Diffstat (limited to 'python/pyspark/ml/classification.py')
-rw-r--r--python/pyspark/ml/classification.py14
1 files changed, 7 insertions, 7 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 5c11aa71b4..a1c3f72984 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -53,7 +53,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti
Currently, this class only supports binary classification.
>>> from pyspark.sql import Row
- >>> from pyspark.mllib.linalg import Vectors
+ >>> from pyspark.ml.linalg import Vectors
>>> df = sc.parallelize([
... Row(label=1.0, weight=2.0, features=Vectors.dense(1.0)),
... Row(label=0.0, weight=2.0, features=Vectors.sparse(1, [], []))]).toDF()
@@ -496,7 +496,7 @@ class DecisionTreeClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
It supports both binary and multiclass labels, as well as both continuous and categorical
features.
- >>> from pyspark.mllib.linalg import Vectors
+ >>> from pyspark.ml.linalg import Vectors
>>> from pyspark.ml.feature import StringIndexer
>>> df = sqlContext.createDataFrame([
... (1.0, Vectors.dense(1.0)),
@@ -625,7 +625,7 @@ class RandomForestClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPred
>>> import numpy
>>> from numpy import allclose
- >>> from pyspark.mllib.linalg import Vectors
+ >>> from pyspark.ml.linalg import Vectors
>>> from pyspark.ml.feature import StringIndexer
>>> df = sqlContext.createDataFrame([
... (1.0, Vectors.dense(1.0)),
@@ -752,7 +752,7 @@ class GBTClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol
`SPARK-4240 <https://issues.apache.org/jira/browse/SPARK-4240>`_
>>> from numpy import allclose
- >>> from pyspark.mllib.linalg import Vectors
+ >>> from pyspark.ml.linalg import Vectors
>>> from pyspark.ml.feature import StringIndexer
>>> df = sqlContext.createDataFrame([
... (1.0, Vectors.dense(1.0)),
@@ -884,7 +884,7 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
The input feature values must be nonnegative.
>>> from pyspark.sql import Row
- >>> from pyspark.mllib.linalg import Vectors
+ >>> from pyspark.ml.linalg import Vectors
>>> df = sqlContext.createDataFrame([
... Row(label=0.0, features=Vectors.dense([0.0, 0.0])),
... Row(label=0.0, features=Vectors.dense([0.0, 1.0])),
@@ -1028,7 +1028,7 @@ class MultilayerPerceptronClassifier(JavaEstimator, HasFeaturesCol, HasLabelCol,
Number of inputs has to be equal to the size of feature vectors.
Number of outputs has to be equal to the total number of labels.
- >>> from pyspark.mllib.linalg import Vectors
+ >>> from pyspark.ml.linalg import Vectors
>>> df = sqlContext.createDataFrame([
... (0.0, Vectors.dense([0.0, 0.0])),
... (1.0, Vectors.dense([0.0, 1.0])),
@@ -1193,7 +1193,7 @@ class OneVsRest(Estimator, OneVsRestParams, MLReadable, MLWritable):
is picked to label the example.
>>> from pyspark.sql import Row
- >>> from pyspark.mllib.linalg import Vectors
+ >>> from pyspark.ml.linalg import Vectors
>>> df = sc.parallelize([
... Row(label=0.0, features=Vectors.dense(1.0, 0.8)),
... Row(label=1.0, features=Vectors.sparse(2, [], [])),