diff options
author | Feynman Liang <fliang@databricks.com> | 2015-08-27 16:10:37 -0700 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2015-08-27 16:10:37 -0700 |
commit | 5bfe9e1111d9862084586549a7dc79476f67bab9 (patch) | |
tree | 1fca589260438d9eb39fc57d724739f9e4b264ac /mllib/src/test/java | |
parent | c94ecdfc5b3c0fe6c38a170dc2af9259354dc9e3 (diff) | |
download | spark-5bfe9e1111d9862084586549a7dc79476f67bab9.tar.gz spark-5bfe9e1111d9862084586549a7dc79476f67bab9.tar.bz2 spark-5bfe9e1111d9862084586549a7dc79476f67bab9.zip |
[SPARK-9680] [MLLIB] [DOC] StopWordsRemovers user guide and Java compatibility test
* Adds user guide for ml.feature.StopWordsRemovers, ran code examples on my machine
* Cleans up scaladocs for public methods
* Adds test for Java compatibility
* Follow up Python user guide code example is tracked by SPARK-10249
Author: Feynman Liang <fliang@databricks.com>
Closes #8436 from feynmanliang/SPARK-10230.
Diffstat (limited to 'mllib/src/test/java')
-rw-r--r-- | mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java new file mode 100644 index 0000000000..76cdd0fae8 --- /dev/null +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.ml.feature; + +import java.util.Arrays; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.SQLContext; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + + +public class JavaStopWordsRemoverSuite { + + private transient JavaSparkContext jsc; + private transient SQLContext jsql; + + @Before + public void setUp() { + jsc = new JavaSparkContext("local", "JavaStopWordsRemoverSuite"); + jsql = new SQLContext(jsc); + } + + @After + public void tearDown() { + jsc.stop(); + jsc = null; + } + + @Test + public void javaCompatibilityTest() { + StopWordsRemover remover = new StopWordsRemover() + .setInputCol("raw") + .setOutputCol("filtered"); + + JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList( + RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")), + RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb")) + )); + StructType schema = new StructType(new StructField[] { + new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty()) + }); + DataFrame dataset = jsql.createDataFrame(rdd, schema); + + remover.transform(dataset).collect(); + } +} |