aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorJeff Zhang <zjffdu@apache.org>2016-06-09 09:54:38 -0700
committerNick Pentreath <nickp@za.ibm.com>2016-06-09 09:54:38 -0700
commite594b492836988ef3d9487b511368c70169d1ecd (patch)
tree1d6cf3d0bca7588646b73b99db876acdc2afcbd8 /python
parent99386fe3989f758844de14b2c28eccfdf8163221 (diff)
downloadspark-e594b492836988ef3d9487b511368c70169d1ecd.tar.gz
spark-e594b492836988ef3d9487b511368c70169d1ecd.tar.bz2
spark-e594b492836988ef3d9487b511368c70169d1ecd.zip
[SPARK-15788][PYSPARK][ML] PySpark IDFModel missing "idf" property
## What changes were proposed in this pull request? add method idf to IDF in pyspark ## How was this patch tested? add unit test Author: Jeff Zhang <zjffdu@apache.org> Closes #13540 from zjffdu/SPARK-15788.
Diffstat (limited to 'python')
-rwxr-xr-xpython/pyspark/ml/feature.py10
1 files changed, 10 insertions, 0 deletions
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 1aff2e550f..ebe13006ad 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -585,6 +585,8 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab
... (DenseVector([0.0, 1.0]),), (DenseVector([3.0, 0.2]),)], ["tf"])
>>> idf = IDF(minDocFreq=3, inputCol="tf", outputCol="idf")
>>> model = idf.fit(df)
+ >>> model.idf
+ DenseVector([0.0, 0.0])
>>> model.transform(df).head().idf
DenseVector([0.0, 0.0])
>>> idf.setParams(outputCol="freqs").fit(df).transform(df).collect()[1].freqs
@@ -658,6 +660,14 @@ class IDFModel(JavaModel, JavaMLReadable, JavaMLWritable):
.. versionadded:: 1.4.0
"""
+ @property
+ @since("2.0.0")
+ def idf(self):
+ """
+ Returns the IDF vector.
+ """
+ return self._call_java("idf")
+
@inherit_doc
class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable):