diff options
author | Reynold Xin <rxin@databricks.com> | 2015-08-06 10:39:16 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-08-06 10:39:25 -0700 |
commit | 6b8d2d7edcdc780fa97a03cd8e2adec6921fd426 (patch) | |
tree | ab3009ea1064126070fe3514de62c75257e15058 /python | |
parent | 78f168e97238316e33ce0d3763ba655603928c32 (diff) | |
download | spark-6b8d2d7edcdc780fa97a03cd8e2adec6921fd426.tar.gz spark-6b8d2d7edcdc780fa97a03cd8e2adec6921fd426.tar.bz2 spark-6b8d2d7edcdc780fa97a03cd8e2adec6921fd426.zip |
[SPARK-9659][SQL] Rename inSet to isin to match Pandas function.
Inspiration drawn from this blog post: https://lab.getbase.com/pandarize-spark-dataframes/
Author: Reynold Xin <rxin@databricks.com>
Closes #7977 from rxin/isin and squashes the following commits:
9b1d3d6 [Reynold Xin] Added return.
2197d37 [Reynold Xin] Fixed test case.
7c1b6cf [Reynold Xin] Import warnings.
4f4a35d [Reynold Xin] [SPARK-9659][SQL] Rename inSet to isin to match Pandas function.
(cherry picked from commit 5e1b0ef07942a041195b3decd05d86c289bc8d2b)
Signed-off-by: Reynold Xin <rxin@databricks.com>
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/sql/column.py | 20 |
1 files changed, 19 insertions, 1 deletions
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 0a85da7443..8af8637cf9 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -16,6 +16,7 @@ # import sys +import warnings if sys.version >= '3': basestring = str @@ -254,12 +255,29 @@ class Column(object): [Row(age=5, name=u'Bob')] >>> df[df.age.inSet([1, 2, 3])].collect() [Row(age=2, name=u'Alice')] + + .. note:: Deprecated in 1.5, use :func:`Column.isin` instead. + """ + warnings.warn("inSet is deprecated. Use isin() instead.") + return self.isin(*cols) + + @ignore_unicode_prefix + @since(1.5) + def isin(self, *cols): + """ + A boolean expression that is evaluated to true if the value of this + expression is contained by the evaluated values of the arguments. + + >>> df[df.name.isin("Bob", "Mike")].collect() + [Row(age=5, name=u'Bob')] + >>> df[df.age.isin([1, 2, 3])].collect() + [Row(age=2, name=u'Alice')] """ if len(cols) == 1 and isinstance(cols[0], (list, set)): cols = cols[0] cols = [c._jc if isinstance(c, Column) else _create_column_from_literal(c) for c in cols] sc = SparkContext._active_spark_context - jc = getattr(self._jc, "in")(_to_seq(sc, cols)) + jc = getattr(self._jc, "isin")(_to_seq(sc, cols)) return Column(jc) # order |