From f535802977c5a3ce45894d89fdf59f8723f023c8 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Thu, 26 Mar 2015 00:01:24 -0700 Subject: [SPARK-6536] [PySpark] Column.inSet() in Python ``` >>> df[df.name.inSet("Bob", "Mike")].collect() [Row(age=5, name=u'Bob')] >>> df[df.age.inSet([1, 2, 3])].collect() [Row(age=2, name=u'Alice')] ``` Author: Davies Liu Closes #5190 from davies/in and squashes the following commits: 6b73a47 [Davies Liu] Column.inSet() in Python --- python/pyspark/sql/dataframe.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'python') diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 5cb89da7a8..bf7c47b726 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -985,6 +985,23 @@ class Column(object): __getslice__ = substr + def inSet(self, *cols): + """ A boolean expression that is evaluated to true if the value of this + expression is contained by the evaluated values of the arguments. + + >>> df[df.name.inSet("Bob", "Mike")].collect() + [Row(age=5, name=u'Bob')] + >>> df[df.age.inSet([1, 2, 3])].collect() + [Row(age=2, name=u'Alice')] + """ + if len(cols) == 1 and isinstance(cols[0], (list, set)): + cols = cols[0] + cols = [c._jc if isinstance(c, Column) else _create_column_from_literal(c) for c in cols] + sc = SparkContext._active_spark_context + jcols = ListConverter().convert(cols, sc._gateway._gateway_client) + jc = getattr(self._jc, "in")(sc._jvm.PythonUtils.toSeq(jcols)) + return Column(jc) + # order asc = _unary_op("asc", "Returns a sort expression based on the" " ascending order of the given column name.") -- cgit v1.2.3