aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorBurak Yavuz <brkyvz@gmail.com>2015-05-05 22:56:01 -0700
committerReynold Xin <rxin@databricks.com>2015-05-05 22:56:01 -0700
commitba2b56614d7ab1bd7409b49e9d85c248d8faa48b (patch)
tree5dd1200d02c6fd88e8da512286959a33f8c63a2e /python
parenta4669443999dc13a1bb34509c827d8b9096ea84f (diff)
downloadspark-ba2b56614d7ab1bd7409b49e9d85c248d8faa48b.tar.gz
spark-ba2b56614d7ab1bd7409b49e9d85c248d8faa48b.tar.bz2
spark-ba2b56614d7ab1bd7409b49e9d85c248d8faa48b.zip
[SPARK-7358][SQL] Move DataFrame mathfunctions into functions
After a discussion on the user mailing list, it was decided to put all UDF's under `o.a.s.sql.functions` cc rxin Author: Burak Yavuz <brkyvz@gmail.com> Closes #5923 from brkyvz/move-math-funcs and squashes the following commits: a8dc3f7 [Burak Yavuz] address comments cf7a7bb [Burak Yavuz] [SPARK-7358] Move DataFrame mathfunctions into functions
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/functions.py52
-rw-r--r--python/pyspark/sql/mathfunctions.py101
-rw-r--r--python/pyspark/sql/tests.py2
3 files changed, 53 insertions, 102 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 641220a264..692af868dd 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -51,6 +51,19 @@ def _create_function(name, doc=""):
return _
+def _create_binary_mathfunction(name, doc=""):
+ """ Create a binary mathfunction by name"""
+ def _(col1, col2):
+ sc = SparkContext._active_spark_context
+ # users might write ints for simplicity. This would throw an error on the JVM side.
+ jc = getattr(sc._jvm.functions, name)(col1._jc if isinstance(col1, Column) else float(col1),
+ col2._jc if isinstance(col2, Column) else float(col2))
+ return Column(jc)
+ _.__name__ = name
+ _.__doc__ = doc
+ return _
+
+
_functions = {
'lit': 'Creates a :class:`Column` of literal value.',
'col': 'Returns a :class:`Column` based on the given column name.',
@@ -63,6 +76,34 @@ _functions = {
'sqrt': 'Computes the square root of the specified float value.',
'abs': 'Computes the absolute value.',
+ # unary math functions
+ 'acos': 'Computes the cosine inverse of the given value; the returned angle is in the range' +
+ '0.0 through pi.',
+ 'asin': 'Computes the sine inverse of the given value; the returned angle is in the range' +
+ '-pi/2 through pi/2.',
+ 'atan': 'Computes the tangent inverse of the given value.',
+ 'cbrt': 'Computes the cube-root of the given value.',
+ 'ceil': 'Computes the ceiling of the given value.',
+ 'cos': 'Computes the cosine of the given value.',
+ 'cosh': 'Computes the hyperbolic cosine of the given value.',
+ 'exp': 'Computes the exponential of the given value.',
+ 'expm1': 'Computes the exponential of the given value minus one.',
+ 'floor': 'Computes the floor of the given value.',
+ 'log': 'Computes the natural logarithm of the given value.',
+ 'log10': 'Computes the logarithm of the given value in Base 10.',
+ 'log1p': 'Computes the natural logarithm of the given value plus one.',
+ 'rint': 'Returns the double value that is closest in value to the argument and' +
+ ' is equal to a mathematical integer.',
+ 'signum': 'Computes the signum of the given value.',
+ 'sin': 'Computes the sine of the given value.',
+ 'sinh': 'Computes the hyperbolic sine of the given value.',
+ 'tan': 'Computes the tangent of the given value.',
+ 'tanh': 'Computes the hyperbolic tangent of the given value.',
+ 'toDegrees': 'Converts an angle measured in radians to an approximately equivalent angle ' +
+ 'measured in degrees.',
+ 'toRadians': 'Converts an angle measured in degrees to an approximately equivalent angle ' +
+ 'measured in radians.',
+
'max': 'Aggregate function: returns the maximum value of the expression in a group.',
'min': 'Aggregate function: returns the minimum value of the expression in a group.',
'first': 'Aggregate function: returns the first value in a group.',
@@ -74,10 +115,21 @@ _functions = {
'sumDistinct': 'Aggregate function: returns the sum of distinct values in the expression.',
}
+# math functions that take two arguments as input
+_binary_mathfunctions = {
+ 'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' +
+ 'polar coordinates (r, theta).',
+ 'hypot': 'Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.',
+ 'pow': 'Returns the value of the first argument raised to the power of the second argument.'
+}
+
for _name, _doc in _functions.items():
globals()[_name] = _create_function(_name, _doc)
+for _name, _doc in _binary_mathfunctions.items():
+ globals()[_name] = _create_binary_mathfunction(_name, _doc)
del _name, _doc
__all__ += _functions.keys()
+__all__ += _binary_mathfunctions.keys()
__all__.sort()
diff --git a/python/pyspark/sql/mathfunctions.py b/python/pyspark/sql/mathfunctions.py
deleted file mode 100644
index 7dbcab8694..0000000000
--- a/python/pyspark/sql/mathfunctions.py
+++ /dev/null
@@ -1,101 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""
-A collection of builtin math functions
-"""
-
-from pyspark import SparkContext
-from pyspark.sql.dataframe import Column
-
-__all__ = []
-
-
-def _create_unary_mathfunction(name, doc=""):
- """ Create a unary mathfunction by name"""
- def _(col):
- sc = SparkContext._active_spark_context
- jc = getattr(sc._jvm.mathfunctions, name)(col._jc if isinstance(col, Column) else col)
- return Column(jc)
- _.__name__ = name
- _.__doc__ = doc
- return _
-
-
-def _create_binary_mathfunction(name, doc=""):
- """ Create a binary mathfunction by name"""
- def _(col1, col2):
- sc = SparkContext._active_spark_context
- # users might write ints for simplicity. This would throw an error on the JVM side.
- if type(col1) is int:
- col1 = col1 * 1.0
- if type(col2) is int:
- col2 = col2 * 1.0
- jc = getattr(sc._jvm.mathfunctions, name)(col1._jc if isinstance(col1, Column) else col1,
- col2._jc if isinstance(col2, Column) else col2)
- return Column(jc)
- _.__name__ = name
- _.__doc__ = doc
- return _
-
-
-# math functions are found under another object therefore, they need to be handled separately
-_mathfunctions = {
- 'acos': 'Computes the cosine inverse of the given value; the returned angle is in the range' +
- '0.0 through pi.',
- 'asin': 'Computes the sine inverse of the given value; the returned angle is in the range' +
- '-pi/2 through pi/2.',
- 'atan': 'Computes the tangent inverse of the given value.',
- 'cbrt': 'Computes the cube-root of the given value.',
- 'ceil': 'Computes the ceiling of the given value.',
- 'cos': 'Computes the cosine of the given value.',
- 'cosh': 'Computes the hyperbolic cosine of the given value.',
- 'exp': 'Computes the exponential of the given value.',
- 'expm1': 'Computes the exponential of the given value minus one.',
- 'floor': 'Computes the floor of the given value.',
- 'log': 'Computes the natural logarithm of the given value.',
- 'log10': 'Computes the logarithm of the given value in Base 10.',
- 'log1p': 'Computes the natural logarithm of the given value plus one.',
- 'rint': 'Returns the double value that is closest in value to the argument and' +
- ' is equal to a mathematical integer.',
- 'signum': 'Computes the signum of the given value.',
- 'sin': 'Computes the sine of the given value.',
- 'sinh': 'Computes the hyperbolic sine of the given value.',
- 'tan': 'Computes the tangent of the given value.',
- 'tanh': 'Computes the hyperbolic tangent of the given value.',
- 'toDeg': 'Converts an angle measured in radians to an approximately equivalent angle ' +
- 'measured in degrees.',
- 'toRad': 'Converts an angle measured in degrees to an approximately equivalent angle ' +
- 'measured in radians.'
-}
-
-# math functions that take two arguments as input
-_binary_mathfunctions = {
- 'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' +
- 'polar coordinates (r, theta).',
- 'hypot': 'Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.',
- 'pow': 'Returns the value of the first argument raised to the power of the second argument.'
-}
-
-for _name, _doc in _mathfunctions.items():
- globals()[_name] = _create_unary_mathfunction(_name, _doc)
-for _name, _doc in _binary_mathfunctions.items():
- globals()[_name] = _create_binary_mathfunction(_name, _doc)
-del _name, _doc
-__all__ += _mathfunctions.keys()
-__all__ += _binary_mathfunctions.keys()
-__all__.sort()
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index 46c4c88e98..b232f3a965 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -416,7 +416,7 @@ class SQLTests(ReusedPySparkTestCase):
def test_math_functions(self):
df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF()
- from pyspark.sql import mathfunctions as functions
+ from pyspark.sql import functions
import math
def get_values(l):