aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2016-10-07 11:49:34 +0100
committerSean Owen <sowen@cloudera.com>2016-10-07 11:49:34 +0100
commit2b01d3c701c58f07fa42afd570523dd161384882 (patch)
treef9c89c3e67e8927b0fe766a2835f3e454cfa5676 /python
parent24097d84743d3e792e395410139e8d486b75a3ef (diff)
downloadspark-2b01d3c701c58f07fa42afd570523dd161384882.tar.gz
spark-2b01d3c701c58f07fa42afd570523dd161384882.tar.bz2
spark-2b01d3c701c58f07fa42afd570523dd161384882.zip
[SPARK-16960][SQL] Deprecate approxCountDistinct, toDegrees and toRadians according to FunctionRegistry
## What changes were proposed in this pull request? It seems `approxCountDistinct`, `toDegrees` and `toRadians` are also missed while matching the names to the ones in `FunctionRegistry`. (please see [approx_count_distinct](https://github.com/apache/spark/blob/5c2ae79bfcf448d8dc9217efafa1409997c739de/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L244), [degrees](https://github.com/apache/spark/blob/5c2ae79bfcf448d8dc9217efafa1409997c739de/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L203) and [radians](https://github.com/apache/spark/blob/5c2ae79bfcf448d8dc9217efafa1409997c739de/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala#L222) in `FunctionRegistry`). I took a scan between `functions.scala` and `FunctionRegistry` and it seems these are all left. For `countDistinct` and `sumDistinct`, they are not registered in `FunctionRegistry`. This PR deprecates `approxCountDistinct`, `toDegrees` and `toRadians` and introduces `approx_count_distinct`, `degrees` and `radians`. ## How was this patch tested? Existing tests should cover this. Author: hyukjinkwon <gurwls223@gmail.com> Author: Hyukjin Kwon <gurwls223@gmail.com> Closes #14538 from HyukjinKwon/SPARK-16588-followup.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/functions.py33
1 files changed, 24 insertions, 9 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 45d6bf944b..7fa3fd2de7 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -112,11 +112,8 @@ _functions_1_4 = {
'sinh': 'Computes the hyperbolic sine of the given value.',
'tan': 'Computes the tangent of the given value.',
'tanh': 'Computes the hyperbolic tangent of the given value.',
- 'toDegrees': 'Converts an angle measured in radians to an approximately equivalent angle ' +
- 'measured in degrees.',
- 'toRadians': 'Converts an angle measured in degrees to an approximately equivalent angle ' +
- 'measured in radians.',
-
+ 'toDegrees': '.. note:: Deprecated in 2.1, use degrees instead.',
+ 'toRadians': '.. note:: Deprecated in 2.1, use radians instead.',
'bitwiseNOT': 'Computes bitwise not.',
}
@@ -135,7 +132,15 @@ _functions_1_6 = {
'kurtosis': 'Aggregate function: returns the kurtosis of the values in a group.',
'collect_list': 'Aggregate function: returns a list of objects with duplicates.',
'collect_set': 'Aggregate function: returns a set of objects with duplicate elements' +
- ' eliminated.'
+ ' eliminated.',
+}
+
+_functions_2_1 = {
+ # unary math functions
+ 'degrees': 'Converts an angle measured in radians to an approximately equivalent angle ' +
+ 'measured in degrees.',
+ 'radians': 'Converts an angle measured in degrees to an approximately equivalent angle ' +
+ 'measured in radians.',
}
# math functions that take two arguments as input
@@ -182,21 +187,31 @@ for _name, _doc in _window_functions.items():
globals()[_name] = since(1.6)(_create_window_function(_name, _doc))
for _name, _doc in _functions_1_6.items():
globals()[_name] = since(1.6)(_create_function(_name, _doc))
+for _name, _doc in _functions_2_1.items():
+ globals()[_name] = since(2.1)(_create_function(_name, _doc))
del _name, _doc
@since(1.3)
def approxCountDistinct(col, rsd=None):
+ """
+ .. note:: Deprecated in 2.1, use approx_count_distinct instead.
+ """
+ return approx_count_distinct(col, rsd)
+
+
+@since(2.1)
+def approx_count_distinct(col, rsd=None):
"""Returns a new :class:`Column` for approximate distinct count of ``col``.
- >>> df.agg(approxCountDistinct(df.age).alias('c')).collect()
+ >>> df.agg(approx_count_distinct(df.age).alias('c')).collect()
[Row(c=2)]
"""
sc = SparkContext._active_spark_context
if rsd is None:
- jc = sc._jvm.functions.approxCountDistinct(_to_java_column(col))
+ jc = sc._jvm.functions.approx_count_distinct(_to_java_column(col))
else:
- jc = sc._jvm.functions.approxCountDistinct(_to_java_column(col), rsd)
+ jc = sc._jvm.functions.approx_count_distinct(_to_java_column(col), rsd)
return Column(jc)