aboutsummaryrefslogtreecommitdiff
path: root/examples/src/main/scala
diff options
context:
space:
mode:
Diffstat (limited to 'examples/src/main/scala')
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala2
1 files changed, 2 insertions, 0 deletions
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
index 28115f9390..396f073e6b 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/TfIdfExample.scala
@@ -43,6 +43,8 @@ object TfIdfExample {
val hashingTF = new HashingTF()
.setInputCol("words").setOutputCol("rawFeatures").setNumFeatures(20)
val featurizedData = hashingTF.transform(wordsData)
+ // alternatively, CountVectorizer can also be used to get term frequency vectors
+
val idf = new IDF().setInputCol("rawFeatures").setOutputCol("features")
val idfModel = idf.fit(featurizedData)
val rescaledData = idfModel.transform(featurizedData)