From 2e0f3579f1fa7139c2e79bde656cbac049abbc33 Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Fri, 1 May 2015 23:43:24 -0700 Subject: [SPARK-7242] added python api for freqItems in DataFrames The python api for DataFrame's plus addressed your comments from previous PR. rxin Author: Burak Yavuz Closes #5859 from brkyvz/df-freq-py2 and squashes the following commits: f9aa9ce [Burak Yavuz] addressed comments v0.1 4b25056 [Burak Yavuz] added python api for freqItems --- python/pyspark/sql/tests.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'python/pyspark/sql/tests.py') diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 44c8b6a1aa..613efc0ac0 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -375,6 +375,13 @@ class SQLTests(ReusedPySparkTestCase): self.assertEqual(self.testData, df.select(df.key, df.value).collect()) self.assertEqual([Row(value='1')], df.where(df.key == 1).select(df.value).collect()) + def test_freqItems(self): + vals = [Row(a=1, b=-2.0) if i % 2 == 0 else Row(a=i, b=i * 1.0) for i in range(100)] + df = self.sc.parallelize(vals).toDF() + items = df.stat.freqItems(("a", "b"), 0.4).collect()[0] + self.assertTrue(1 in items[0]) + self.assertTrue(-2.0 in items[1]) + def test_aggregator(self): df = self.df g = df.groupBy() -- cgit v1.2.3