aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/functions.py
diff options
context:
space:
mode:
Diffstat (limited to 'python/pyspark/sql/functions.py')
-rw-r--r--python/pyspark/sql/functions.py15
1 files changed, 15 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 3c134faa0a..719e623a1a 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -50,6 +50,7 @@ __all__ = [
'regexp_replace',
'sha1',
'sha2',
+ 'size',
'sparkPartitionId',
'struct',
'udf',
@@ -825,6 +826,20 @@ def weekofyear(col):
return Column(sc._jvm.functions.weekofyear(col))
+@since(1.5)
+def size(col):
+ """
+ Collection function: returns the length of the array or map stored in the column.
+ :param col: name of column or expression
+
+ >>> df = sqlContext.createDataFrame([([1, 2, 3],),([1],),([],)], ['data'])
+ >>> df.select(size(df.data)).collect()
+ [Row(size(data)=3), Row(size(data)=1), Row(size(data)=0)]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.size(_to_java_column(col)))
+
+
class UserDefinedFunction(object):
"""
User defined function in Python