aboutsummaryrefslogtreecommitdiff
path: root/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
diff options
context:
space:
mode:
Diffstat (limited to 'mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java')
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java17
1 files changed, 11 insertions, 6 deletions
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
index 23463ab5fe..da22180563 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaHashingTFSuite.java
@@ -63,17 +63,22 @@ public class JavaHashingTFSuite {
new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
});
- DataFrame sentenceDataFrame = jsql.createDataFrame(jrdd, schema);
- Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
- DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame);
+ DataFrame sentenceData = jsql.createDataFrame(jrdd, schema);
+ Tokenizer tokenizer = new Tokenizer()
+ .setInputCol("sentence")
+ .setOutputCol("words");
+ DataFrame wordsData = tokenizer.transform(sentenceData);
int numFeatures = 20;
HashingTF hashingTF = new HashingTF()
.setInputCol("words")
- .setOutputCol("features")
+ .setOutputCol("rawFeatures")
.setNumFeatures(numFeatures);
- DataFrame featurized = hashingTF.transform(wordsDataFrame);
- for (Row r : featurized.select("features", "words", "label").take(3)) {
+ DataFrame featurizedData = hashingTF.transform(wordsData);
+ IDF idf = new IDF().setInputCol("rawFeatures").setOutputCol("features");
+ IDFModel idfModel = idf.fit(featurizedData);
+ DataFrame rescaledData = idfModel.transform(featurizedData);
+ for (Row r : rescaledData.select("features", "label").take(3)) {
Vector features = r.getAs(0);
Assert.assertEquals(features.size(), numFeatures);
}