diff options
author | Jeff Zhang <zjffdu@apache.org> | 2016-06-09 09:54:38 -0700 |
---|---|---|
committer | Nick Pentreath <nickp@za.ibm.com> | 2016-06-09 09:54:38 -0700 |
commit | e594b492836988ef3d9487b511368c70169d1ecd (patch) | |
tree | 1d6cf3d0bca7588646b73b99db876acdc2afcbd8 /python/pyspark | |
parent | 99386fe3989f758844de14b2c28eccfdf8163221 (diff) | |
download | spark-e594b492836988ef3d9487b511368c70169d1ecd.tar.gz spark-e594b492836988ef3d9487b511368c70169d1ecd.tar.bz2 spark-e594b492836988ef3d9487b511368c70169d1ecd.zip |
[SPARK-15788][PYSPARK][ML] PySpark IDFModel missing "idf" property
## What changes were proposed in this pull request?
add method idf to IDF in pyspark
## How was this patch tested?
add unit test
Author: Jeff Zhang <zjffdu@apache.org>
Closes #13540 from zjffdu/SPARK-15788.
Diffstat (limited to 'python/pyspark')
-rwxr-xr-x | python/pyspark/ml/feature.py | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index 1aff2e550f..ebe13006ad 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -585,6 +585,8 @@ class IDF(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritab ... (DenseVector([0.0, 1.0]),), (DenseVector([3.0, 0.2]),)], ["tf"]) >>> idf = IDF(minDocFreq=3, inputCol="tf", outputCol="idf") >>> model = idf.fit(df) + >>> model.idf + DenseVector([0.0, 0.0]) >>> model.transform(df).head().idf DenseVector([0.0, 0.0]) >>> idf.setParams(outputCol="freqs").fit(df).transform(df).collect()[1].freqs @@ -658,6 +660,14 @@ class IDFModel(JavaModel, JavaMLReadable, JavaMLWritable): .. versionadded:: 1.4.0 """ + @property + @since("2.0.0") + def idf(self): + """ + Returns the IDF vector. + """ + return self._call_java("idf") + @inherit_doc class MaxAbsScaler(JavaEstimator, HasInputCol, HasOutputCol, JavaMLReadable, JavaMLWritable): |