diff options
Diffstat (limited to 'python/pyspark/sql/dataframe.py')
-rw-r--r-- | python/pyspark/sql/dataframe.py | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 7e1854c43b..5cfc348a69 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -911,14 +911,24 @@ class DataFrame(object): """ return self.groupBy().agg(*exprs) + @since(2.0) + def union(self, other): + """ Return a new :class:`DataFrame` containing union of rows in this + frame and another frame. + + This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union + (that does deduplication of elements), use this function followed by a distinct. + """ + return DataFrame(self._jdf.unionAll(other._jdf), self.sql_ctx) + @since(1.3) def unionAll(self, other): """ Return a new :class:`DataFrame` containing union of rows in this frame and another frame. - This is equivalent to `UNION ALL` in SQL. + .. note:: Deprecated in 2.0, use union instead. """ - return DataFrame(self._jdf.unionAll(other._jdf), self.sql_ctx) + return self.union(other) @since(1.3) def intersect(self, other): |