aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/ml/tests.py
diff options
context:
space:
mode:
authorFeynman Liang <fliang@databricks.com>2015-06-29 18:40:30 -0700
committerJoseph K. Bradley <joseph@databricks.com>2015-06-29 18:40:30 -0700
commit620605a4a1123afaab2674e38251f1231dea17ce (patch)
tree2fec235613a66fb012193e8fae90902c6657b63d /python/pyspark/ml/tests.py
parent4c1808be4d3aaa37a5a878892e91ca73ea405ffa (diff)
downloadspark-620605a4a1123afaab2674e38251f1231dea17ce.tar.gz
spark-620605a4a1123afaab2674e38251f1231dea17ce.tar.bz2
spark-620605a4a1123afaab2674e38251f1231dea17ce.zip
[SPARK-8456] [ML] Ngram featurizer python
Python API for N-gram feature transformer Author: Feynman Liang <fliang@databricks.com> Closes #6960 from feynmanliang/ngram-featurizer-python and squashes the following commits: f9e37c9 [Feynman Liang] Remove debugging code 4dd81f4 [Feynman Liang] Fix typo and doctest 06c79ac [Feynman Liang] Style guide 26c1175 [Feynman Liang] Add python NGram API
Diffstat (limited to 'python/pyspark/ml/tests.py')
-rw-r--r--python/pyspark/ml/tests.py11
1 files changed, 11 insertions, 0 deletions
diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
index 6adbf166f3..c151d21fd6 100644
--- a/python/pyspark/ml/tests.py
+++ b/python/pyspark/ml/tests.py
@@ -252,6 +252,17 @@ class FeatureTests(PySparkTestCase):
output = idf0m.transform(dataset)
self.assertIsNotNone(output.head().idf)
+ def test_ngram(self):
+ sqlContext = SQLContext(self.sc)
+ dataset = sqlContext.createDataFrame([
+ ([["a", "b", "c", "d", "e"]])], ["input"])
+ ngram0 = NGram(n=4, inputCol="input", outputCol="output")
+ self.assertEqual(ngram0.getN(), 4)
+ self.assertEqual(ngram0.getInputCol(), "input")
+ self.assertEqual(ngram0.getOutputCol(), "output")
+ transformedDF = ngram0.transform(dataset)
+ self.assertEquals(transformedDF.head().output, ["a b c d", "b c d e"])
+
if __name__ == "__main__":
unittest.main()