aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorWeichenXu <WeichenXu123@outlook.com>2016-07-14 22:55:49 -0700
committerReynold Xin <rxin@databricks.com>2016-07-14 22:55:49 -0700
commit1832423827fd518853b63f91c321e4568a39107d (patch)
tree1ba1f0973010ac852de22a9f94ebac714bb0ebb7 /python/pyspark
parent2e4075e2ece9574100c79558cab054485e25c2ee (diff)
downloadspark-1832423827fd518853b63f91c321e4568a39107d.tar.gz
spark-1832423827fd518853b63f91c321e4568a39107d.tar.bz2
spark-1832423827fd518853b63f91c321e4568a39107d.zip
[SPARK-16546][SQL][PYSPARK] update python dataframe.drop
## What changes were proposed in this pull request? Make `dataframe.drop` API in python support multi-columns parameters, so that it is the same with scala API. ## How was this patch tested? The doc test. Author: WeichenXu <WeichenXu123@outlook.com> Closes #14203 from WeichenXu123/drop_python_api.
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/sql/dataframe.py27
1 files changed, 19 insertions, 8 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index ab41e88620..adf549dd02 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1399,11 +1399,11 @@ class DataFrame(object):
@since(1.4)
@ignore_unicode_prefix
- def drop(self, col):
+ def drop(self, *cols):
"""Returns a new :class:`DataFrame` that drops the specified column.
- :param col: a string name of the column to drop, or a
- :class:`Column` to drop.
+ :param cols: a string name of the column to drop, or a
+ :class:`Column` to drop, or a list of string name of the columns to drop.
>>> df.drop('age').collect()
[Row(name=u'Alice'), Row(name=u'Bob')]
@@ -1416,13 +1416,24 @@ class DataFrame(object):
>>> df.join(df2, df.name == df2.name, 'inner').drop(df2.name).collect()
[Row(age=5, name=u'Bob', height=85)]
+
+ >>> df.join(df2, 'name', 'inner').drop('age', 'height').collect()
+ [Row(name=u'Bob')]
"""
- if isinstance(col, basestring):
- jdf = self._jdf.drop(col)
- elif isinstance(col, Column):
- jdf = self._jdf.drop(col._jc)
+ if len(cols) == 1:
+ col = cols[0]
+ if isinstance(col, basestring):
+ jdf = self._jdf.drop(col)
+ elif isinstance(col, Column):
+ jdf = self._jdf.drop(col._jc)
+ else:
+ raise TypeError("col should be a string or a Column")
else:
- raise TypeError("col should be a string or a Column")
+ for col in cols:
+ if not isinstance(col, basestring):
+ raise TypeError("each col in the param list should be a string")
+ jdf = self._jdf.drop(self._jseq(cols))
+
return DataFrame(jdf, self.sql_ctx)
@ignore_unicode_prefix