aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/tests.py
diff options
context:
space:
mode:
authorJian Feng <jzhang.chs@gmail.com>2015-09-21 23:36:41 -0700
committerReynold Xin <rxin@databricks.com>2015-09-21 23:36:41 -0700
commit0180b849dbaf191826231eda7dfaaf146a19602b (patch)
treee04903f9aed018f9891760f0d5c7c00c8b6c7e35 /python/pyspark/sql/tests.py
parentbf20d6c9f9e478a5de24b45bbafd4dd89666c4cf (diff)
downloadspark-0180b849dbaf191826231eda7dfaaf146a19602b.tar.gz
spark-0180b849dbaf191826231eda7dfaaf146a19602b.tar.bz2
spark-0180b849dbaf191826231eda7dfaaf146a19602b.zip
[SPARK-10577] [PYSPARK] DataFrame hint for broadcast join
https://issues.apache.org/jira/browse/SPARK-10577 Author: Jian Feng <jzhang.chs@gmail.com> Closes #8801 from Jianfeng-chs/master.
Diffstat (limited to 'python/pyspark/sql/tests.py')
-rw-r--r--python/pyspark/sql/tests.py18
1 files changed, 18 insertions, 0 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 3e680f1030..645133b2b2 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1075,6 +1075,24 @@ class SQLTests(ReusedPySparkTestCase):
self.assertRaises(TypeError, foo)
+ # add test for SPARK-10577 (test broadcast join hint)
+ def test_functions_broadcast(self):
+ from pyspark.sql.functions import broadcast
+
+ df1 = self.sqlCtx.createDataFrame([(1, "1"), (2, "2")], ("key", "value"))
+ df2 = self.sqlCtx.createDataFrame([(1, "1"), (2, "2")], ("key", "value"))
+
+ # equijoin - should be converted into broadcast join
+ plan1 = df1.join(broadcast(df2), "key")._jdf.queryExecution().executedPlan()
+ self.assertEqual(1, plan1.toString().count("BroadcastHashJoin"))
+
+ # no join key -- should not be a broadcast join
+ plan2 = df1.join(broadcast(df2))._jdf.queryExecution().executedPlan()
+ self.assertEqual(0, plan2.toString().count("BroadcastHashJoin"))
+
+ # planner should not crash without a join
+ broadcast(df1)._jdf.queryExecution().executedPlan()
+
class HiveContextSQLTests(ReusedPySparkTestCase):