From ba2b56614d7ab1bd7409b49e9d85c248d8faa48b Mon Sep 17 00:00:00 2001 From: Burak Yavuz Date: Tue, 5 May 2015 22:56:01 -0700 Subject: [SPARK-7358][SQL] Move DataFrame mathfunctions into functions After a discussion on the user mailing list, it was decided to put all UDF's under `o.a.s.sql.functions` cc rxin Author: Burak Yavuz Closes #5923 from brkyvz/move-math-funcs and squashes the following commits: a8dc3f7 [Burak Yavuz] address comments cf7a7bb [Burak Yavuz] [SPARK-7358] Move DataFrame mathfunctions into functions --- python/pyspark/sql/functions.py | 52 +++++++++++++++++++ python/pyspark/sql/mathfunctions.py | 101 ------------------------------------ python/pyspark/sql/tests.py | 2 +- 3 files changed, 53 insertions(+), 102 deletions(-) delete mode 100644 python/pyspark/sql/mathfunctions.py (limited to 'python') diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 641220a264..692af868dd 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -51,6 +51,19 @@ def _create_function(name, doc=""): return _ +def _create_binary_mathfunction(name, doc=""): + """ Create a binary mathfunction by name""" + def _(col1, col2): + sc = SparkContext._active_spark_context + # users might write ints for simplicity. This would throw an error on the JVM side. + jc = getattr(sc._jvm.functions, name)(col1._jc if isinstance(col1, Column) else float(col1), + col2._jc if isinstance(col2, Column) else float(col2)) + return Column(jc) + _.__name__ = name + _.__doc__ = doc + return _ + + _functions = { 'lit': 'Creates a :class:`Column` of literal value.', 'col': 'Returns a :class:`Column` based on the given column name.', @@ -63,6 +76,34 @@ _functions = { 'sqrt': 'Computes the square root of the specified float value.', 'abs': 'Computes the absolute value.', + # unary math functions + 'acos': 'Computes the cosine inverse of the given value; the returned angle is in the range' + + '0.0 through pi.', + 'asin': 'Computes the sine inverse of the given value; the returned angle is in the range' + + '-pi/2 through pi/2.', + 'atan': 'Computes the tangent inverse of the given value.', + 'cbrt': 'Computes the cube-root of the given value.', + 'ceil': 'Computes the ceiling of the given value.', + 'cos': 'Computes the cosine of the given value.', + 'cosh': 'Computes the hyperbolic cosine of the given value.', + 'exp': 'Computes the exponential of the given value.', + 'expm1': 'Computes the exponential of the given value minus one.', + 'floor': 'Computes the floor of the given value.', + 'log': 'Computes the natural logarithm of the given value.', + 'log10': 'Computes the logarithm of the given value in Base 10.', + 'log1p': 'Computes the natural logarithm of the given value plus one.', + 'rint': 'Returns the double value that is closest in value to the argument and' + + ' is equal to a mathematical integer.', + 'signum': 'Computes the signum of the given value.', + 'sin': 'Computes the sine of the given value.', + 'sinh': 'Computes the hyperbolic sine of the given value.', + 'tan': 'Computes the tangent of the given value.', + 'tanh': 'Computes the hyperbolic tangent of the given value.', + 'toDegrees': 'Converts an angle measured in radians to an approximately equivalent angle ' + + 'measured in degrees.', + 'toRadians': 'Converts an angle measured in degrees to an approximately equivalent angle ' + + 'measured in radians.', + 'max': 'Aggregate function: returns the maximum value of the expression in a group.', 'min': 'Aggregate function: returns the minimum value of the expression in a group.', 'first': 'Aggregate function: returns the first value in a group.', @@ -74,10 +115,21 @@ _functions = { 'sumDistinct': 'Aggregate function: returns the sum of distinct values in the expression.', } +# math functions that take two arguments as input +_binary_mathfunctions = { + 'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' + + 'polar coordinates (r, theta).', + 'hypot': 'Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.', + 'pow': 'Returns the value of the first argument raised to the power of the second argument.' +} + for _name, _doc in _functions.items(): globals()[_name] = _create_function(_name, _doc) +for _name, _doc in _binary_mathfunctions.items(): + globals()[_name] = _create_binary_mathfunction(_name, _doc) del _name, _doc __all__ += _functions.keys() +__all__ += _binary_mathfunctions.keys() __all__.sort() diff --git a/python/pyspark/sql/mathfunctions.py b/python/pyspark/sql/mathfunctions.py deleted file mode 100644 index 7dbcab8694..0000000000 --- a/python/pyspark/sql/mathfunctions.py +++ /dev/null @@ -1,101 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" -A collection of builtin math functions -""" - -from pyspark import SparkContext -from pyspark.sql.dataframe import Column - -__all__ = [] - - -def _create_unary_mathfunction(name, doc=""): - """ Create a unary mathfunction by name""" - def _(col): - sc = SparkContext._active_spark_context - jc = getattr(sc._jvm.mathfunctions, name)(col._jc if isinstance(col, Column) else col) - return Column(jc) - _.__name__ = name - _.__doc__ = doc - return _ - - -def _create_binary_mathfunction(name, doc=""): - """ Create a binary mathfunction by name""" - def _(col1, col2): - sc = SparkContext._active_spark_context - # users might write ints for simplicity. This would throw an error on the JVM side. - if type(col1) is int: - col1 = col1 * 1.0 - if type(col2) is int: - col2 = col2 * 1.0 - jc = getattr(sc._jvm.mathfunctions, name)(col1._jc if isinstance(col1, Column) else col1, - col2._jc if isinstance(col2, Column) else col2) - return Column(jc) - _.__name__ = name - _.__doc__ = doc - return _ - - -# math functions are found under another object therefore, they need to be handled separately -_mathfunctions = { - 'acos': 'Computes the cosine inverse of the given value; the returned angle is in the range' + - '0.0 through pi.', - 'asin': 'Computes the sine inverse of the given value; the returned angle is in the range' + - '-pi/2 through pi/2.', - 'atan': 'Computes the tangent inverse of the given value.', - 'cbrt': 'Computes the cube-root of the given value.', - 'ceil': 'Computes the ceiling of the given value.', - 'cos': 'Computes the cosine of the given value.', - 'cosh': 'Computes the hyperbolic cosine of the given value.', - 'exp': 'Computes the exponential of the given value.', - 'expm1': 'Computes the exponential of the given value minus one.', - 'floor': 'Computes the floor of the given value.', - 'log': 'Computes the natural logarithm of the given value.', - 'log10': 'Computes the logarithm of the given value in Base 10.', - 'log1p': 'Computes the natural logarithm of the given value plus one.', - 'rint': 'Returns the double value that is closest in value to the argument and' + - ' is equal to a mathematical integer.', - 'signum': 'Computes the signum of the given value.', - 'sin': 'Computes the sine of the given value.', - 'sinh': 'Computes the hyperbolic sine of the given value.', - 'tan': 'Computes the tangent of the given value.', - 'tanh': 'Computes the hyperbolic tangent of the given value.', - 'toDeg': 'Converts an angle measured in radians to an approximately equivalent angle ' + - 'measured in degrees.', - 'toRad': 'Converts an angle measured in degrees to an approximately equivalent angle ' + - 'measured in radians.' -} - -# math functions that take two arguments as input -_binary_mathfunctions = { - 'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' + - 'polar coordinates (r, theta).', - 'hypot': 'Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.', - 'pow': 'Returns the value of the first argument raised to the power of the second argument.' -} - -for _name, _doc in _mathfunctions.items(): - globals()[_name] = _create_unary_mathfunction(_name, _doc) -for _name, _doc in _binary_mathfunctions.items(): - globals()[_name] = _create_binary_mathfunction(_name, _doc) -del _name, _doc -__all__ += _mathfunctions.keys() -__all__ += _binary_mathfunctions.keys() -__all__.sort() diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 46c4c88e98..b232f3a965 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -416,7 +416,7 @@ class SQLTests(ReusedPySparkTestCase): def test_math_functions(self): df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF() - from pyspark.sql import mathfunctions as functions + from pyspark.sql import functions import math def get_values(l): -- cgit v1.2.3