aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorFeynman Liang <fliang@databricks.com>2015-08-27 16:10:37 -0700
committerXiangrui Meng <meng@databricks.com>2015-08-27 16:10:37 -0700
commit5bfe9e1111d9862084586549a7dc79476f67bab9 (patch)
tree1fca589260438d9eb39fc57d724739f9e4b264ac /mllib
parentc94ecdfc5b3c0fe6c38a170dc2af9259354dc9e3 (diff)
downloadspark-5bfe9e1111d9862084586549a7dc79476f67bab9.tar.gz
spark-5bfe9e1111d9862084586549a7dc79476f67bab9.tar.bz2
spark-5bfe9e1111d9862084586549a7dc79476f67bab9.zip
[SPARK-9680] [MLLIB] [DOC] StopWordsRemovers user guide and Java compatibility test
* Adds user guide for ml.feature.StopWordsRemovers, ran code examples on my machine * Cleans up scaladocs for public methods * Adds test for Java compatibility * Follow up Python user guide code example is tracked by SPARK-10249 Author: Feynman Liang <fliang@databricks.com> Closes #8436 from feynmanliang/SPARK-10230.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java72
1 files changed, 72 insertions, 0 deletions
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java
new file mode 100644
index 0000000000..76cdd0fae8
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaStopWordsRemoverSuite.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature;
+
+import java.util.Arrays;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+
+public class JavaStopWordsRemoverSuite {
+
+ private transient JavaSparkContext jsc;
+ private transient SQLContext jsql;
+
+ @Before
+ public void setUp() {
+ jsc = new JavaSparkContext("local", "JavaStopWordsRemoverSuite");
+ jsql = new SQLContext(jsc);
+ }
+
+ @After
+ public void tearDown() {
+ jsc.stop();
+ jsc = null;
+ }
+
+ @Test
+ public void javaCompatibilityTest() {
+ StopWordsRemover remover = new StopWordsRemover()
+ .setInputCol("raw")
+ .setOutputCol("filtered");
+
+ JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(
+ RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")),
+ RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb"))
+ ));
+ StructType schema = new StructType(new StructField[] {
+ new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
+ });
+ DataFrame dataset = jsql.createDataFrame(rdd, schema);
+
+ remover.transform(dataset).collect();
+ }
+}