aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-06-02 13:38:06 -0700
committerReynold Xin <rxin@databricks.com>2015-06-02 13:38:14 -0700
commit6b0f61563dd93b4225f31aafe6546cf0a11bf9a6 (patch)
tree4f63886aea6d7ef45510fb24c345c33f16e10bc9
parentcbaf595447ae42227516f9220f6a0ed2d9fec54f (diff)
downloadspark-6b0f61563dd93b4225f31aafe6546cf0a11bf9a6.tar.gz
spark-6b0f61563dd93b4225f31aafe6546cf0a11bf9a6.tar.bz2
spark-6b0f61563dd93b4225f31aafe6546cf0a11bf9a6.zip
[SPARK-8038] [SQL] [PYSPARK] fix Column.when() and otherwise()
Thanks ogirardot, closes #6580 cc rxin JoshRosen Author: Davies Liu <davies@databricks.com> Closes #6590 from davies/when and squashes the following commits: c0f2069 [Davies Liu] fix Column.when() and otherwise() (cherry picked from commit 605ddbb27c8482fc0107b21c19d4e4ae19348f35) Signed-off-by: Reynold Xin <rxin@databricks.com>
-rw-r--r--python/pyspark/sql/column.py31
1 files changed, 28 insertions, 3 deletions
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 8dc5039f58..1ecec5b126 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -315,6 +315,14 @@ class Column(object):
"""
A boolean expression that is evaluated to true if the value of this
expression is between the given columns.
+
+ >>> df.select(df.name, df.age.between(2, 4)).show()
+ +-----+--------------------------+
+ | name|((age >= 2) && (age <= 4))|
+ +-----+--------------------------+
+ |Alice| true|
+ | Bob| false|
+ +-----+--------------------------+
"""
return (self >= lowerBound) & (self <= upperBound)
@@ -328,12 +336,20 @@ class Column(object):
:param condition: a boolean :class:`Column` expression.
:param value: a literal value, or a :class:`Column` expression.
+
+ >>> from pyspark.sql import functions as F
+ >>> df.select(df.name, F.when(df.age > 4, 1).when(df.age < 3, -1).otherwise(0)).show()
+ +-----+--------------------------------------------------------+
+ | name|CASE WHEN (age > 4) THEN 1 WHEN (age < 3) THEN -1 ELSE 0|
+ +-----+--------------------------------------------------------+
+ |Alice| -1|
+ | Bob| 1|
+ +-----+--------------------------------------------------------+
"""
- sc = SparkContext._active_spark_context
if not isinstance(condition, Column):
raise TypeError("condition should be a Column")
v = value._jc if isinstance(value, Column) else value
- jc = sc._jvm.functions.when(condition._jc, v)
+ jc = self._jc.when(condition._jc, v)
return Column(jc)
@since(1.4)
@@ -345,9 +361,18 @@ class Column(object):
See :func:`pyspark.sql.functions.when` for example usage.
:param value: a literal value, or a :class:`Column` expression.
+
+ >>> from pyspark.sql import functions as F
+ >>> df.select(df.name, F.when(df.age > 3, 1).otherwise(0)).show()
+ +-----+---------------------------------+
+ | name|CASE WHEN (age > 3) THEN 1 ELSE 0|
+ +-----+---------------------------------+
+ |Alice| 0|
+ | Bob| 1|
+ +-----+---------------------------------+
"""
v = value._jc if isinstance(value, Column) else value
- jc = self._jc.otherwise(value)
+ jc = self._jc.otherwise(v)
return Column(jc)
@since(1.4)