From 620605a4a1123afaab2674e38251f1231dea17ce Mon Sep 17 00:00:00 2001 From: Feynman Liang Date: Mon, 29 Jun 2015 18:40:30 -0700 Subject: [SPARK-8456] [ML] Ngram featurizer python Python API for N-gram feature transformer Author: Feynman Liang Closes #6960 from feynmanliang/ngram-featurizer-python and squashes the following commits: f9e37c9 [Feynman Liang] Remove debugging code 4dd81f4 [Feynman Liang] Fix typo and doctest 06c79ac [Feynman Liang] Style guide 26c1175 [Feynman Liang] Add python NGram API --- python/pyspark/ml/tests.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'python/pyspark/ml/tests.py') diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py index 6adbf166f3..c151d21fd6 100644 --- a/python/pyspark/ml/tests.py +++ b/python/pyspark/ml/tests.py @@ -252,6 +252,17 @@ class FeatureTests(PySparkTestCase): output = idf0m.transform(dataset) self.assertIsNotNone(output.head().idf) + def test_ngram(self): + sqlContext = SQLContext(self.sc) + dataset = sqlContext.createDataFrame([ + ([["a", "b", "c", "d", "e"]])], ["input"]) + ngram0 = NGram(n=4, inputCol="input", outputCol="output") + self.assertEqual(ngram0.getN(), 4) + self.assertEqual(ngram0.getInputCol(), "input") + self.assertEqual(ngram0.getOutputCol(), "output") + transformedDF = ngram0.transform(dataset) + self.assertEquals(transformedDF.head().output, ["a b c d", "b c d e"]) + if __name__ == "__main__": unittest.main() -- cgit v1.2.3