From e6e483cc4de740c46398385b03ffe0e662edae39 Mon Sep 17 00:00:00 2001
From: Holden Karau <holden@pigscanfly.ca>
Date: Tue, 1 Sep 2015 10:48:57 -0700
Subject: [SPARK-9679] [ML] [PYSPARK] Add Python API for Stop Words Remover

Add a python API for the Stop Words Remover.

Author: Holden Karau <holden@pigscanfly.ca>

Closes #8118 from holdenk/SPARK-9679-python-StopWordsRemover.
---
 .../main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala   | 6 +++---
 .../scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala   | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'mllib')

diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
index 5d77ea08db..7da430c7d1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
@@ -29,14 +29,14 @@ import org.apache.spark.sql.types.{ArrayType, StringType, StructField, StructTyp
 /**
  * stop words list
  */
-private object StopWords {
+private[spark] object StopWords {
 
   /**
    * Use the same default stopwords list as scikit-learn.
    * The original list can be found from "Glasgow Information Retrieval Group"
    * [[http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words]]
    */
-  val EnglishStopWords = Array( "a", "about", "above", "across", "after", "afterwards", "again",
+  val English = Array( "a", "about", "above", "across", "after", "afterwards", "again",
     "against", "all", "almost", "alone", "along", "already", "also", "although", "always",
     "am", "among", "amongst", "amoungst", "amount", "an", "and", "another",
     "any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are",
@@ -121,7 +121,7 @@ class StopWordsRemover(override val uid: String)
   /** @group getParam */
   def getCaseSensitive: Boolean = $(caseSensitive)
 
-  setDefault(stopWords -> StopWords.EnglishStopWords, caseSensitive -> false)
+  setDefault(stopWords -> StopWords.English, caseSensitive -> false)
 
   override def transform(dataset: DataFrame): DataFrame = {
     val outputSchema = transformSchema(dataset.schema)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
index f01306f89c..e0d433f566 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
@@ -65,7 +65,7 @@ class StopWordsRemoverSuite extends SparkFunSuite with MLlibTestSparkContext {
   }
 
   test("StopWordsRemover with additional words") {
-    val stopWords = StopWords.EnglishStopWords ++ Array("python", "scala")
+    val stopWords = StopWords.English ++ Array("python", "scala")
     val remover = new StopWordsRemover()
       .setInputCol("raw")
       .setOutputCol("filtered")
-- 
cgit v1.2.3