diff options
author | Jian Feng <jzhang.chs@gmail.com> | 2015-09-21 23:36:41 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-09-21 23:36:41 -0700 |
commit | 0180b849dbaf191826231eda7dfaaf146a19602b (patch) | |
tree | e04903f9aed018f9891760f0d5c7c00c8b6c7e35 /python/pyspark/sql/tests.py | |
parent | bf20d6c9f9e478a5de24b45bbafd4dd89666c4cf (diff) | |
download | spark-0180b849dbaf191826231eda7dfaaf146a19602b.tar.gz spark-0180b849dbaf191826231eda7dfaaf146a19602b.tar.bz2 spark-0180b849dbaf191826231eda7dfaaf146a19602b.zip |
[SPARK-10577] [PYSPARK] DataFrame hint for broadcast join
https://issues.apache.org/jira/browse/SPARK-10577
Author: Jian Feng <jzhang.chs@gmail.com>
Closes #8801 from Jianfeng-chs/master.
Diffstat (limited to 'python/pyspark/sql/tests.py')
-rw-r--r-- | python/pyspark/sql/tests.py | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 3e680f1030..645133b2b2 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -1075,6 +1075,24 @@ class SQLTests(ReusedPySparkTestCase): self.assertRaises(TypeError, foo) + # add test for SPARK-10577 (test broadcast join hint) + def test_functions_broadcast(self): + from pyspark.sql.functions import broadcast + + df1 = self.sqlCtx.createDataFrame([(1, "1"), (2, "2")], ("key", "value")) + df2 = self.sqlCtx.createDataFrame([(1, "1"), (2, "2")], ("key", "value")) + + # equijoin - should be converted into broadcast join + plan1 = df1.join(broadcast(df2), "key")._jdf.queryExecution().executedPlan() + self.assertEqual(1, plan1.toString().count("BroadcastHashJoin")) + + # no join key -- should not be a broadcast join + plan2 = df1.join(broadcast(df2))._jdf.queryExecution().executedPlan() + self.assertEqual(0, plan2.toString().count("BroadcastHashJoin")) + + # planner should not crash without a join + broadcast(df1)._jdf.queryExecution().executedPlan() + class HiveContextSQLTests(ReusedPySparkTestCase): |