diff options
Diffstat (limited to 'python/pyspark/sql/functions.py')
-rw-r--r-- | python/pyspark/sql/functions.py | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 3c134faa0a..719e623a1a 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -50,6 +50,7 @@ __all__ = [ 'regexp_replace', 'sha1', 'sha2', + 'size', 'sparkPartitionId', 'struct', 'udf', @@ -825,6 +826,20 @@ def weekofyear(col): return Column(sc._jvm.functions.weekofyear(col)) +@since(1.5) +def size(col): + """ + Collection function: returns the length of the array or map stored in the column. + :param col: name of column or expression + + >>> df = sqlContext.createDataFrame([([1, 2, 3],),([1],),([],)], ['data']) + >>> df.select(size(df.data)).collect() + [Row(size(data)=3), Row(size(data)=1), Row(size(data)=0)] + """ + sc = SparkContext._active_spark_context + return Column(sc._jvm.functions.size(_to_java_column(col))) + + class UserDefinedFunction(object): """ User defined function in Python |