diff options
author | Reynold Xin <rxin@databricks.com> | 2015-05-12 21:43:34 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2015-05-12 21:43:34 -0700 |
commit | 97dee313f23b00f15638cb72a4a80c1f197f8a9d (patch) | |
tree | b6718297822929afef06cb8550e765c8ad637efe /python/pyspark/sql/dataframe.py | |
parent | 8fd55358b7fc1c7545d823bef7b39769f731c1ee (diff) | |
download | spark-97dee313f23b00f15638cb72a4a80c1f197f8a9d.tar.gz spark-97dee313f23b00f15638cb72a4a80c1f197f8a9d.tar.bz2 spark-97dee313f23b00f15638cb72a4a80c1f197f8a9d.zip |
[SPARK-7321][SQL] Add Column expression for conditional statements (when/otherwise)
This builds on https://github.com/apache/spark/pull/5932 and should close https://github.com/apache/spark/pull/5932 as well.
As an example:
```python
df.select(when(df['age'] == 2, 3).otherwise(4).alias("age")).collect()
```
Author: Reynold Xin <rxin@databricks.com>
Author: kaka1992 <kaka_1992@163.com>
Closes #6072 from rxin/when-expr and squashes the following commits:
8f49201 [Reynold Xin] Throw exception if otherwise is applied twice.
0455eda [Reynold Xin] Reset run-tests.
bfb9d9f [Reynold Xin] Updated documentation and test cases.
762f6a5 [Reynold Xin] Merge pull request #5932 from kaka1992/IFCASE
95724c6 [kaka1992] Update
8218d0a [kaka1992] Update
801009e [kaka1992] Update
76d6346 [kaka1992] [SPARK-7321][SQL] Add Column expression for conditional statements (if, case)
Diffstat (limited to 'python/pyspark/sql/dataframe.py')
-rw-r--r-- | python/pyspark/sql/dataframe.py | 31 |
1 files changed, 31 insertions, 0 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 078acfdf7e..82cb1c2fdb 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -1546,6 +1546,37 @@ class Column(object): """ return (self >= lowerBound) & (self <= upperBound) + @ignore_unicode_prefix + def when(self, condition, value): + """Evaluates a list of conditions and returns one of multiple possible result expressions. + If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions. + + See :func:`pyspark.sql.functions.when` for example usage. + + :param condition: a boolean :class:`Column` expression. + :param value: a literal value, or a :class:`Column` expression. + + """ + sc = SparkContext._active_spark_context + if not isinstance(condition, Column): + raise TypeError("condition should be a Column") + v = value._jc if isinstance(value, Column) else value + jc = sc._jvm.functions.when(condition._jc, v) + return Column(jc) + + @ignore_unicode_prefix + def otherwise(self, value): + """Evaluates a list of conditions and returns one of multiple possible result expressions. + If :func:`Column.otherwise` is not invoked, None is returned for unmatched conditions. + + See :func:`pyspark.sql.functions.when` for example usage. + + :param value: a literal value, or a :class:`Column` expression. + """ + v = value._jc if isinstance(value, Column) else value + jc = self._jc.otherwise(value) + return Column(jc) + def __repr__(self): return 'Column<%s>' % self._jc.toString().encode('utf8') |