aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/tests.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/ml/tests.py')
-rwxr-xr-x[-rw-r--r--]python/pyspark/ml/tests.py7
1 files changed, 7 insertions, 0 deletions
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 78ec96af8a..ad1631fb5b 100644..100755
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -417,6 +417,13 @@ class FeatureTests(PySparkTestCase):
self.assertEqual(stopWordRemover.getStopWords(), stopwords)
transformedDF = stopWordRemover.transform(dataset)
self.assertEqual(transformedDF.head().output, ["a"])
+ # with language selection
+ stopwords = StopWordsRemover.loadDefaultStopWords("turkish")
+ dataset = sqlContext.createDataFrame([Row(input=["acaba", "ama", "biri"])])
+ stopWordRemover.setStopWords(stopwords)
+ self.assertEqual(stopWordRemover.getStopWords(), stopwords)
+ transformedDF = stopWordRemover.transform(dataset)
+ self.assertEqual(transformedDF.head().output, [])
def test_count_vectorizer_with_binary(self):
sqlContext = SQLContext(self.sc)