aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--R/pkg/R/functions.R6
-rw-r--r--python/pyspark/sql/functions.py8
-rw-r--r--python/pyspark/sql/tests.py8
3 files changed, 15 insertions, 7 deletions
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 2cff3ac08c..449476dec5 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -2632,8 +2632,8 @@ setMethod("date_sub", signature(y = "Column", x = "numeric"),
#' format_number
#'
-#' Formats numeric column y to a format like '#,###,###.##', rounded to x decimal places,
-#' and returns the result as a string column.
+#' Formats numeric column y to a format like '#,###,###.##', rounded to x decimal places
+#' with HALF_EVEN round mode, and returns the result as a string column.
#'
#' If x is 0, the result has no decimal point or fractional part.
#' If x < 0, the result will be null.
@@ -3548,7 +3548,7 @@ setMethod("row_number",
#' array_contains
#'
-#' Returns true if the array contain the value.
+#' Returns null if the array is null, true if the array contains the value, and false otherwise.
#'
#' @param x A Column
#' @param value A value to be checked if contained in the column
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index f9121e60f3..843ae3816f 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1327,8 +1327,8 @@ def encode(col, charset):
@since(1.5)
def format_number(col, d):
"""
- Formats the number X to a format like '#,--#,--#.--', rounded to d decimal places,
- and returns the result as a string.
+ Formats the number X to a format like '#,--#,--#.--', rounded to d decimal places
+ with HALF_EVEN round mode, and returns the result as a string.
:param col: the column name of the numeric value to be formatted
:param d: the N decimal places
@@ -1675,8 +1675,8 @@ def array(*cols):
@since(1.5)
def array_contains(col, value):
"""
- Collection function: returns True if the array contains the given value. The collection
- elements and value must be of the same type.
+ Collection function: returns null if the array is null, true if the array contains the
+ given value, and false otherwise.
:param col: name of column containing array
:param value: value to check for in array
diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py
index b93b7ed192..db41b4edb6 100644
--- a/python/pyspark/sql/tests.py
+++ b/python/pyspark/sql/tests.py
@@ -1129,6 +1129,14 @@ class SQLTests(ReusedPySparkTestCase):
rndn2 = df.select('key', functions.randn(0)).collect()
self.assertEqual(sorted(rndn1), sorted(rndn2))
+ def test_array_contains_function(self):
+ from pyspark.sql.functions import array_contains
+
+ df = self.spark.createDataFrame([(["1", "2", "3"],), ([],)], ['data'])
+ actual = df.select(array_contains(df.data, 1).alias('b')).collect()
+ # The value argument can be implicitly castable to the element's type of the array.
+ self.assertEqual([Row(b=True), Row(b=False)], actual)
+
def test_between_function(self):
df = self.sc.parallelize([
Row(a=1, b=2, c=3),