aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql
diff options
context:
space:
mode:
authorShiti <ssaxena.ece@gmail.com>2015-05-07 01:00:29 -0700
committerReynold Xin <rxin@databricks.com>2015-05-07 01:00:29 -0700
commitfa8fddffd52f8146ccceb72c2990607aaf5b2131 (patch)
tree9e01edb034e363e24a49293cd7dff5f1cb59e834 /python/pyspark/sql
parent01187f59b3d118495b6cfea965690829b99a36fa (diff)
downloadspark-fa8fddffd52f8146ccceb72c2990607aaf5b2131.tar.gz
spark-fa8fddffd52f8146ccceb72c2990607aaf5b2131.tar.bz2
spark-fa8fddffd52f8146ccceb72c2990607aaf5b2131.zip
[SPARK-7295][SQL] bitwise operations for DataFrame DSL
Author: Shiti <ssaxena.ece@gmail.com> Closes #5867 from Shiti/spark-7295 and squashes the following commits: 71a9913 [Shiti] implementation for bitwise and,or, not and xor on Column with tests and docs
Diffstat (limited to 'python/pyspark/sql')
-rw-r--r--python/pyspark/sql/dataframe.py5
-rw-r--r--python/pyspark/sql/functions.py2
-rw-r--r--python/pyspark/sql/tests.py13
3 files changed, 20 insertions, 0 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 24f370543d..cee804f5cc 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -1277,6 +1277,11 @@ class Column(object):
__contains__ = _bin_op("contains")
__getitem__ = _bin_op("getItem")
+ # bitwise operators
+ bitwiseOR = _bin_op("bitwiseOR")
+ bitwiseAND = _bin_op("bitwiseAND")
+ bitwiseXOR = _bin_op("bitwiseXOR")
+
def getItem(self, key):
"""An expression that gets an item at position `ordinal` out of a list,
or gets an item by key out of a dict.
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 692af868dd..274c410a1e 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -104,6 +104,8 @@ _functions = {
'toRadians': 'Converts an angle measured in degrees to an approximately equivalent angle ' +
'measured in radians.',
+ 'bitwiseNOT': 'Computes bitwise not.',
+
'max': 'Aggregate function: returns the maximum value of the expression in a group.',
'min': 'Aggregate function: returns the minimum value of the expression in a group.',
'first': 'Aggregate function: returns the first value in a group.',
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index b232f3a965..45dfedce22 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -645,6 +645,19 @@ class SQLTests(ReusedPySparkTestCase):
self.assertEqual(row.age, None)
self.assertEqual(row.height, None)
+ def test_bitwise_operations(self):
+ from pyspark.sql import functions
+ row = Row(a=170, b=75)
+ df = self.sqlCtx.createDataFrame([row])
+ result = df.select(df.a.bitwiseAND(df.b)).collect()[0].asDict()
+ self.assertEqual(170 & 75, result['(a & b)'])
+ result = df.select(df.a.bitwiseOR(df.b)).collect()[0].asDict()
+ self.assertEqual(170 | 75, result['(a | b)'])
+ result = df.select(df.a.bitwiseXOR(df.b)).collect()[0].asDict()
+ self.assertEqual(170 ^ 75, result['(a ^ b)'])
+ result = df.select(functions.bitwiseNOT(df.b)).collect()[0].asDict()
+ self.assertEqual(~75, result['~b'])
+
class HiveContextSQLTests(ReusedPySparkTestCase):