From 2b01d3c701c58f07fa42afd570523dd161384882 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Fri, 7 Oct 2016 11:49:34 +0100 Subject: [SPARK-16960][SQL] Deprecate approxCountDistinct, toDegrees and toRadians according to FunctionRegistry ## What changes were proposed in this pull request? It seems `approxCountDistinct`, `toDegrees` and `toRadians` are also missed while matching the names to the ones in `FunctionRegistry`. (please see [approx_count_distinct](https://github.com/apache/spark/blob/5c2ae79bfcf448d8dc9217efafa1409997c739de/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L244), [degrees](https://github.com/apache/spark/blob/5c2ae79bfcf448d8dc9217efafa1409997c739de/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L203) and [radians](https://github.com/apache/spark/blob/5c2ae79bfcf448d8dc9217efafa1409997c739de/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L222) in `FunctionRegistry`). I took a scan between `functions.scala` and `FunctionRegistry` and it seems these are all left. For `countDistinct` and `sumDistinct`, they are not registered in `FunctionRegistry`. This PR deprecates `approxCountDistinct`, `toDegrees` and `toRadians` and introduces `approx_count_distinct`, `degrees` and `radians`. ## How was this patch tested? Existing tests should cover this. Author: hyukjinkwon Author: Hyukjin Kwon Closes #14538 from HyukjinKwon/SPARK-16588-followup. --- python/pyspark/sql/functions.py | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) (limited to 'python/pyspark/sql/functions.py') diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 45d6bf944b..7fa3fd2de7 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -112,11 +112,8 @@ _functions_1_4 = { 'sinh': 'Computes the hyperbolic sine of the given value.', 'tan': 'Computes the tangent of the given value.', 'tanh': 'Computes the hyperbolic tangent of the given value.', - 'toDegrees': 'Converts an angle measured in radians to an approximately equivalent angle ' + - 'measured in degrees.', - 'toRadians': 'Converts an angle measured in degrees to an approximately equivalent angle ' + - 'measured in radians.', - + 'toDegrees': '.. note:: Deprecated in 2.1, use degrees instead.', + 'toRadians': '.. note:: Deprecated in 2.1, use radians instead.', 'bitwiseNOT': 'Computes bitwise not.', } @@ -135,7 +132,15 @@ _functions_1_6 = { 'kurtosis': 'Aggregate function: returns the kurtosis of the values in a group.', 'collect_list': 'Aggregate function: returns a list of objects with duplicates.', 'collect_set': 'Aggregate function: returns a set of objects with duplicate elements' + - ' eliminated.' + ' eliminated.', +} + +_functions_2_1 = { + # unary math functions + 'degrees': 'Converts an angle measured in radians to an approximately equivalent angle ' + + 'measured in degrees.', + 'radians': 'Converts an angle measured in degrees to an approximately equivalent angle ' + + 'measured in radians.', } # math functions that take two arguments as input @@ -182,21 +187,31 @@ for _name, _doc in _window_functions.items(): globals()[_name] = since(1.6)(_create_window_function(_name, _doc)) for _name, _doc in _functions_1_6.items(): globals()[_name] = since(1.6)(_create_function(_name, _doc)) +for _name, _doc in _functions_2_1.items(): + globals()[_name] = since(2.1)(_create_function(_name, _doc)) del _name, _doc @since(1.3) def approxCountDistinct(col, rsd=None): + """ + .. note:: Deprecated in 2.1, use approx_count_distinct instead. + """ + return approx_count_distinct(col, rsd) + + +@since(2.1) +def approx_count_distinct(col, rsd=None): """Returns a new :class:`Column` for approximate distinct count of ``col``. - >>> df.agg(approxCountDistinct(df.age).alias('c')).collect() + >>> df.agg(approx_count_distinct(df.age).alias('c')).collect() [Row(c=2)] """ sc = SparkContext._active_spark_context if rsd is None: - jc = sc._jvm.functions.approxCountDistinct(_to_java_column(col)) + jc = sc._jvm.functions.approx_count_distinct(_to_java_column(col)) else: - jc = sc._jvm.functions.approxCountDistinct(_to_java_column(col), rsd) + jc = sc._jvm.functions.approx_count_distinct(_to_java_column(col), rsd) return Column(jc) -- cgit v1.2.3