diff options
Diffstat (limited to 'python/pyspark/ml/tests.py')
-rwxr-xr-x[-rw-r--r--] | python/pyspark/ml/tests.py | 7 |
1 files changed, 7 insertions, 0 deletions
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 78ec96af8a..ad1631fb5b 100644..100755 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -417,6 +417,13 @@ class FeatureTests(PySparkTestCase): self.assertEqual(stopWordRemover.getStopWords(), stopwords) transformedDF = stopWordRemover.transform(dataset) self.assertEqual(transformedDF.head().output, ["a"]) + # with language selection + stopwords = StopWordsRemover.loadDefaultStopWords("turkish") + dataset = sqlContext.createDataFrame([Row(input=["acaba", "ama", "biri"])]) + stopWordRemover.setStopWords(stopwords) + self.assertEqual(stopWordRemover.getStopWords(), stopwords) + transformedDF = stopWordRemover.transform(dataset) + self.assertEqual(transformedDF.head().output, []) def test_count_vectorizer_with_binary(self): sqlContext = SQLContext(self.sc) |