aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/tests.py
diff options
context:
space:
mode:
authorBurak Yavuz <brkyvz@gmail.com>2015-05-01 23:43:24 -0700
committerReynold Xin <rxin@databricks.com>2015-05-01 23:43:24 -0700
commit2e0f3579f1fa7139c2e79bde656cbac049abbc33 (patch)
tree1c408c400a6514978eac32dd49366940f03b6beb /python/pyspark/sql/tests.py
parentb79aeb95b45ab4ae811039d452cf028d7b844132 (diff)
downloadspark-2e0f3579f1fa7139c2e79bde656cbac049abbc33.tar.gz
spark-2e0f3579f1fa7139c2e79bde656cbac049abbc33.tar.bz2
spark-2e0f3579f1fa7139c2e79bde656cbac049abbc33.zip
[SPARK-7242] added python api for freqItems in DataFrames
The python api for DataFrame's plus addressed your comments from previous PR. rxin Author: Burak Yavuz <brkyvz@gmail.com> Closes #5859 from brkyvz/df-freq-py2 and squashes the following commits: f9aa9ce [Burak Yavuz] addressed comments v0.1 4b25056 [Burak Yavuz] added python api for freqItems
Diffstat (limited to 'python/pyspark/sql/tests.py')
-rw-r--r--python/pyspark/sql/tests.py7
1 files changed, 7 insertions, 0 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 44c8b6a1aa..613efc0ac0 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -375,6 +375,13 @@ class SQLTests(ReusedPySparkTestCase):
self.assertEqual(self.testData, df.select(df.key, df.value).collect())
self.assertEqual([Row(value='1')], df.where(df.key == 1).select(df.value).collect())
+ def test_freqItems(self):
+ vals = [Row(a=1, b=-2.0) if i % 2 == 0 else Row(a=i, b=i * 1.0) for i in range(100)]
+ df = self.sc.parallelize(vals).toDF()
+ items = df.stat.freqItems(("a", "b"), 0.4).collect()[0]
+ self.assertTrue(1 in items[0])
+ self.assertTrue(-2.0 in items[1])
+
def test_aggregator(self):
df = self.df
g = df.groupBy()