aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark
diff options
context:
space:
mode:
authorReynold Xin <rxin@databricks.com>2016-01-13 12:44:35 -0800
committerReynold Xin <rxin@databricks.com>2016-01-13 12:44:35 -0800
commitcbbcd8e4250aeec700f04c231f8be2f787243f1f (patch)
tree87741859e4b7ca40feac2829f164e3e03bb4b167 /python/pyspark
parentc2ea79f96acd076351b48162644ed1cff4c8e090 (diff)
downloadspark-cbbcd8e4250aeec700f04c231f8be2f787243f1f.tar.gz
spark-cbbcd8e4250aeec700f04c231f8be2f787243f1f.tar.bz2
spark-cbbcd8e4250aeec700f04c231f8be2f787243f1f.zip
[SPARK-12791][SQL] Simplify CaseWhen by breaking "branches" into "conditions" and "values"
This pull request rewrites CaseWhen expression to break the single, monolithic "branches" field into a sequence of tuples (Seq[(condition, value)]) and an explicit optional elseValue field. Prior to this pull request, each even position in "branches" represents the condition for each branch, and each odd position represents the value for each branch. The use of them have been pretty confusing with a lot sliding windows or grouped(2) calls. Author: Reynold Xin <rxin@databricks.com> Closes #10734 from rxin/simplify-case.
Diffstat (limited to 'python/pyspark')
-rw-r--r--python/pyspark/sql/column.py24
1 files changed, 12 insertions, 12 deletions
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 900def59d2..320451c52c 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -368,12 +368,12 @@ class Column(object):
>>> from pyspark.sql import functions as F
>>> df.select(df.name, F.when(df.age > 4, 1).when(df.age < 3, -1).otherwise(0)).show()
- +-----+--------------------------------------------------------+
- | name|CASE WHEN (age > 4) THEN 1 WHEN (age < 3) THEN -1 ELSE 0|
- +-----+--------------------------------------------------------+
- |Alice| -1|
- | Bob| 1|
- +-----+--------------------------------------------------------+
+ +-----+------------------------------------------------------------+
+ | name|CASE WHEN (age > 4) THEN 1 WHEN (age < 3) THEN -1 ELSE 0 END|
+ +-----+------------------------------------------------------------+
+ |Alice| -1|
+ | Bob| 1|
+ +-----+------------------------------------------------------------+
"""
if not isinstance(condition, Column):
raise TypeError("condition should be a Column")
@@ -393,12 +393,12 @@ class Column(object):
>>> from pyspark.sql import functions as F
>>> df.select(df.name, F.when(df.age > 3, 1).otherwise(0)).show()
- +-----+---------------------------------+
- | name|CASE WHEN (age > 3) THEN 1 ELSE 0|
- +-----+---------------------------------+
- |Alice| 0|
- | Bob| 1|
- +-----+---------------------------------+
+ +-----+-------------------------------------+
+ | name|CASE WHEN (age > 3) THEN 1 ELSE 0 END|
+ +-----+-------------------------------------+
+ |Alice| 0|
+ | Bob| 1|
+ +-----+-------------------------------------+
"""
v = value._jc if isinstance(value, Column) else value
jc = self._jc.otherwise(v)