aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorHolden Karau <holden@pigscanfly.ca>2015-09-01 10:48:57 -0700
committerXiangrui Meng <meng@databricks.com>2015-09-01 10:48:57 -0700
commite6e483cc4de740c46398385b03ffe0e662edae39 (patch)
tree652cf519f902aaaf8eecc564791690b395aea81b /mllib
parent391e6be0ae883f3ea0fab79463eb8b618af79afb (diff)
downloadspark-e6e483cc4de740c46398385b03ffe0e662edae39.tar.gz
spark-e6e483cc4de740c46398385b03ffe0e662edae39.tar.bz2
spark-e6e483cc4de740c46398385b03ffe0e662edae39.zip
[SPARK-9679] [ML] [PYSPARK] Add Python API for Stop Words Remover
Add a python API for the Stop Words Remover. Author: Holden Karau <holden@pigscanfly.ca> Closes #8118 from holdenk/SPARK-9679-python-StopWordsRemover.
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala6
-rw-r--r--mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala2
2 files changed, 4 insertions, 4 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
index 5d77ea08db..7da430c7d1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
@@ -29,14 +29,14 @@ import org.apache.spark.sql.types.{ArrayType, StringType, StructField, StructTyp
/**
* stop words list
*/
-private object StopWords {
+private[spark] object StopWords {
/**
* Use the same default stopwords list as scikit-learn.
* The original list can be found from "Glasgow Information Retrieval Group"
* [[http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words]]
*/
- val EnglishStopWords = Array( "a", "about", "above", "across", "after", "afterwards", "again",
+ val English = Array( "a", "about", "above", "across", "after", "afterwards", "again",
"against", "all", "almost", "alone", "along", "already", "also", "although", "always",
"am", "among", "amongst", "amoungst", "amount", "an", "and", "another",
"any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are",
@@ -121,7 +121,7 @@ class StopWordsRemover(override val uid: String)
/** @group getParam */
def getCaseSensitive: Boolean = $(caseSensitive)
- setDefault(stopWords -> StopWords.EnglishStopWords, caseSensitive -> false)
+ setDefault(stopWords -> StopWords.English, caseSensitive -> false)
override def transform(dataset: DataFrame): DataFrame = {
val outputSchema = transformSchema(dataset.schema)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
index f01306f89c..e0d433f566 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
@@ -65,7 +65,7 @@ class StopWordsRemoverSuite extends SparkFunSuite with MLlibTestSparkContext {
}
test("StopWordsRemover with additional words") {
- val stopWords = StopWords.EnglishStopWords ++ Array("python", "scala")
+ val stopWords = StopWords.English ++ Array("python", "scala")
val remover = new StopWordsRemover()
.setInputCol("raw")
.setOutputCol("filtered")