aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/dataframe.py
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2015-04-22 15:26:58 -0700
committerMichael Armbrust <michael@databricks.com>2015-04-22 15:26:58 -0700
commitbaf865ddc2cff9b99d6aeab9861e030da511257f (patch)
treece88dbc7f041bdb2ca896070a2bee382bb215b8f /python/pyspark/sql/dataframe.py
parentfbe7106d75c6a1624d10793fba6759703bc5c6e6 (diff)
downloadspark-baf865ddc2cff9b99d6aeab9861e030da511257f.tar.gz
spark-baf865ddc2cff9b99d6aeab9861e030da511257f.tar.bz2
spark-baf865ddc2cff9b99d6aeab9861e030da511257f.zip
[SPARK-7059][SQL] Create a DataFrame join API to facilitate equijoin.
Author: Reynold Xin <rxin@databricks.com> Closes #5638 from rxin/joinUsing and squashes the following commits: 13e9cc9 [Reynold Xin] Code review + Python. b1bd914 [Reynold Xin] [SPARK-7059][SQL] Create a DataFrame join API to facilitate equijoin and self join.
Diffstat (limited to 'python/pyspark/sql/dataframe.py')
-rw-r--r--python/pyspark/sql/dataframe.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index ca9bf8efb9..c8c30ce402 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -459,16 +459,23 @@ class DataFrame(object):
The following performs a full outer join between ``df1`` and ``df2``.
:param other: Right side of the join
- :param joinExprs: Join expression
+ :param joinExprs: a string for join column name, or a join expression (Column).
+ If joinExprs is a string indicating the name of the join column,
+ the column must exist on both sides, and this performs an inner equi-join.
:param joinType: str, default 'inner'.
One of `inner`, `outer`, `left_outer`, `right_outer`, `semijoin`.
>>> df.join(df2, df.name == df2.name, 'outer').select(df.name, df2.height).collect()
[Row(name=None, height=80), Row(name=u'Alice', height=None), Row(name=u'Bob', height=85)]
+
+ >>> df.join(df2, 'name').select(df.name, df2.height).collect()
+ [Row(name=u'Bob', height=85)]
"""
if joinExprs is None:
jdf = self._jdf.join(other._jdf)
+ elif isinstance(joinExprs, basestring):
+ jdf = self._jdf.join(other._jdf, joinExprs)
else:
assert isinstance(joinExprs, Column), "joinExprs should be Column"
if joinType is None: