From c1ad373f26053e1906fce7681c03d130a642bf33 Mon Sep 17 00:00:00 2001 From: asokadiggs Date: Tue, 29 Sep 2015 17:45:18 -0400 Subject: [SPARK-10782] [PYTHON] Update dropDuplicates documentation Documentation for dropDuplicates() and drop_duplicates() is one and the same. Resolved the error in the example for drop_duplicates using the same approach used for groupby and groupBy, by indicating that dropDuplicates and drop_duplicates are aliases. Author: asokadiggs Closes #8930 from asokadiggs/jira-10782. --- python/pyspark/sql/dataframe.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'python') diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index b09422aade..033b31983f 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -931,6 +931,8 @@ class DataFrame(object): """Return a new :class:`DataFrame` with duplicate rows removed, optionally only considering certain columns. + :func:`drop_duplicates` is an alias for :func:`dropDuplicates`. + >>> from pyspark.sql import Row >>> df = sc.parallelize([ \ Row(name='Alice', age=5, height=80), \ -- cgit v1.2.3