aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/classification.py
diff options
context:
space:
mode:
authorWeichenXu <WeichenXu123@outlook.com>2016-10-12 19:52:57 -0700
committerYanbo Liang <ybliang8@gmail.com>2016-10-12 19:52:57 -0700
commit0d4a695279c514c76aa0e9288c70ac7aaef91b03 (patch)
tree801eea030de1e38a4b35f3a287e96fe86aa97d76 /python/pyspark/ml/classification.py
parent6f20a92ca30f9c367009c4556939ea4de4284cb9 (diff)
downloadspark-0d4a695279c514c76aa0e9288c70ac7aaef91b03.tar.gz
spark-0d4a695279c514c76aa0e9288c70ac7aaef91b03.tar.bz2
spark-0d4a695279c514c76aa0e9288c70ac7aaef91b03.zip
[SPARK-17745][ML][PYSPARK] update NB python api - add weight col parameter
## What changes were proposed in this pull request? update python api for NaiveBayes: add weight col parameter. ## How was this patch tested? doctests added. Author: WeichenXu <WeichenXu123@outlook.com> Closes #15406 from WeichenXu123/nb_python_update.
Diffstat (limited to 'python/pyspark/ml/classification.py')
-rw-r--r--python/pyspark/ml/classification.py26
1 files changed, 13 insertions, 13 deletions
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index ea60fab029..3f763a10d4 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -981,7 +981,7 @@ class GBTClassificationModel(TreeEnsembleModel, JavaPredictionModel, JavaMLWrita
@inherit_doc
class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, HasProbabilityCol,
- HasRawPredictionCol, HasThresholds, JavaMLWritable, JavaMLReadable):
+ HasRawPredictionCol, HasThresholds, HasWeightCol, JavaMLWritable, JavaMLReadable):
"""
Naive Bayes Classifiers.
It supports both Multinomial and Bernoulli NB. `Multinomial NB
@@ -995,23 +995,23 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
>>> from pyspark.sql import Row
>>> from pyspark.ml.linalg import Vectors
>>> df = spark.createDataFrame([
- ... Row(label=0.0, features=Vectors.dense([0.0, 0.0])),
- ... Row(label=0.0, features=Vectors.dense([0.0, 1.0])),
- ... Row(label=1.0, features=Vectors.dense([1.0, 0.0]))])
- >>> nb = NaiveBayes(smoothing=1.0, modelType="multinomial")
+ ... Row(label=0.0, weight=0.1, features=Vectors.dense([0.0, 0.0])),
+ ... Row(label=0.0, weight=0.5, features=Vectors.dense([0.0, 1.0])),
+ ... Row(label=1.0, weight=1.0, features=Vectors.dense([1.0, 0.0]))])
+ >>> nb = NaiveBayes(smoothing=1.0, modelType="multinomial", weightCol="weight")
>>> model = nb.fit(df)
>>> model.pi
- DenseVector([-0.51..., -0.91...])
+ DenseVector([-0.81..., -0.58...])
>>> model.theta
- DenseMatrix(2, 2, [-1.09..., -0.40..., -0.40..., -1.09...], 1)
+ DenseMatrix(2, 2, [-0.91..., -0.51..., -0.40..., -1.09...], 1)
>>> test0 = sc.parallelize([Row(features=Vectors.dense([1.0, 0.0]))]).toDF()
>>> result = model.transform(test0).head()
>>> result.prediction
1.0
>>> result.probability
- DenseVector([0.42..., 0.57...])
+ DenseVector([0.32..., 0.67...])
>>> result.rawPrediction
- DenseVector([-1.60..., -1.32...])
+ DenseVector([-1.72..., -0.99...])
>>> test1 = sc.parallelize([Row(features=Vectors.sparse(2, [0], [1.0]))]).toDF()
>>> model.transform(test1).head().prediction
1.0
@@ -1045,11 +1045,11 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
@keyword_only
def __init__(self, featuresCol="features", labelCol="label", predictionCol="prediction",
probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0,
- modelType="multinomial", thresholds=None):
+ modelType="multinomial", thresholds=None, weightCol=None):
"""
__init__(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0, \
- modelType="multinomial", thresholds=None)
+ modelType="multinomial", thresholds=None, weightCol=None)
"""
super(NaiveBayes, self).__init__()
self._java_obj = self._new_java_obj(
@@ -1062,11 +1062,11 @@ class NaiveBayes(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredictionCol, H
@since("1.5.0")
def setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction",
probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0,
- modelType="multinomial", thresholds=None):
+ modelType="multinomial", thresholds=None, weightCol=None):
"""
setParams(self, featuresCol="features", labelCol="label", predictionCol="prediction", \
probabilityCol="probability", rawPredictionCol="rawPrediction", smoothing=1.0, \
- modelType="multinomial", thresholds=None)
+ modelType="multinomial", thresholds=None, weightCol=None)
Sets params for Naive Bayes.
"""
kwargs = self.setParams._input_kwargs