[SPARK-9659][SQL] Rename inSet to isin to match Pandas function.

Inspiration drawn from this blog post: https://lab.getbase.com/pandarize-spark-dataframes/ Author: Reynold Xin <rxin@databricks.com> Closes #7977 from rxin/isin and squashes the following commits: 9b1d3d6 [Reynold Xin] Added return. 2197d37 [Reynold Xin] Fixed test case. 7c1b6cf [Reynold Xin] Import warnings. 4f4a35d [Reynold Xin] [SPARK-9659][SQL] Rename inSet to isin to match Pandas function.
author: Reynold Xin <rxin@databricks.com> 2015-08-06 10:39:16 -0700
committer: Reynold Xin <rxin@databricks.com> 2015-08-06 10:39:16 -0700
commit: 5e1b0ef07942a041195b3decd05d86c289bc8d2b (patch)
tree: 3ee8fa52d14bce8b62e152da4aa560eae780338b /python
parent: 98e69467d4fda2c26a951409b5b7c6f1e9345ce4 (diff)
download: spark-5e1b0ef07942a041195b3decd05d86c289bc8d2b.tar.gz
spark-5e1b0ef07942a041195b3decd05d86c289bc8d2b.tar.bz2
spark-5e1b0ef07942a041195b3decd05d86c289bc8d2b.zip
1 files changed, 19 insertions, 1 deletions
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 0a85da7443..8af8637cf9 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -16,6 +16,7 @@
 #
 
 import sys
+import warnings
 
 if sys.version >= '3':
     basestring = str
@@ -254,12 +255,29 @@ class Column(object):
         [Row(age=5, name=u'Bob')]
         >>> df[df.age.inSet([1, 2, 3])].collect()
         [Row(age=2, name=u'Alice')]
+
+        .. note:: Deprecated in 1.5, use :func:`Column.isin` instead.
+        """
+        warnings.warn("inSet is deprecated. Use isin() instead.")
+        return self.isin(*cols)
+
+    @ignore_unicode_prefix
+    @since(1.5)
+    def isin(self, *cols):
+        """
+        A boolean expression that is evaluated to true if the value of this
+        expression is contained by the evaluated values of the arguments.
+
+        >>> df[df.name.isin("Bob", "Mike")].collect()
+        [Row(age=5, name=u'Bob')]
+        >>> df[df.age.isin([1, 2, 3])].collect()
+        [Row(age=2, name=u'Alice')]
         """
         if len(cols) == 1 and isinstance(cols[0], (list, set)):
             cols = cols[0]
         cols = [c._jc if isinstance(c, Column) else _create_column_from_literal(c) for c in cols]
         sc = SparkContext._active_spark_context
-        jc = getattr(self._jc, "in")(_to_seq(sc, cols))
+        jc = getattr(self._jc, "isin")(_to_seq(sc, cols))
         return Column(jc)
 
     # order
author	Reynold Xin <rxin@databricks.com>	2015-08-06 10:39:16 -0700
committer	Reynold Xin <rxin@databricks.com>	2015-08-06 10:39:16 -0700
commit	5e1b0ef07942a041195b3decd05d86c289bc8d2b (patch)
tree	3ee8fa52d14bce8b62e152da4aa560eae780338b /python
parent	98e69467d4fda2c26a951409b5b7c6f1e9345ce4 (diff)
download	spark-5e1b0ef07942a041195b3decd05d86c289bc8d2b.tar.gz spark-5e1b0ef07942a041195b3decd05d86c289bc8d2b.tar.bz2 spark-5e1b0ef07942a041195b3decd05d86c289bc8d2b.zip