diff options
author | Wenchen Fan <wenchen@databricks.com> | 2016-03-25 09:50:06 -0700 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2016-03-25 09:50:06 -0700 |
commit | 43b15e01c46ea1971569f74c9201a55de39e8917 (patch) | |
tree | 699a834d375d70d2a2de9cd46f8eda394e4dea6c /python | |
parent | 6603d9f7e283cf8199cfddfeea30d9db39669726 (diff) | |
download | spark-43b15e01c46ea1971569f74c9201a55de39e8917.tar.gz spark-43b15e01c46ea1971569f74c9201a55de39e8917.tar.bz2 spark-43b15e01c46ea1971569f74c9201a55de39e8917.zip |
[SPARK-14061][SQL] implement CreateMap
## What changes were proposed in this pull request?
As we have `CreateArray` and `CreateStruct`, we should also have `CreateMap`. This PR adds the `CreateMap` expression, and the DataFrame API, and python API.
## How was this patch tested?
various new tests.
Author: Wenchen Fan <wenchen@databricks.com>
Closes #11879 from cloud-fan/create_map.
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/sql/functions.py | 20 |
1 files changed, 20 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index dee3d536be..f5d959ef98 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1498,6 +1498,26 @@ def translate(srcCol, matching, replace): # ---------------------- Collection functions ------------------------------ +@ignore_unicode_prefix +@since(2.0) +def create_map(*cols): + """Creates a new map column. + + :param cols: list of column names (string) or list of :class:`Column` expressions that grouped + as key-value pairs, e.g. (key1, value1, key2, value2, ...). + + >>> df.select(create_map('name', 'age').alias("map")).collect() + [Row(map={u'Alice': 2}), Row(map={u'Bob': 5})] + >>> df.select(create_map([df.name, df.age]).alias("map")).collect() + [Row(map={u'Alice': 2}), Row(map={u'Bob': 5})] + """ + sc = SparkContext._active_spark_context + if len(cols) == 1 and isinstance(cols[0], (list, set)): + cols = cols[0] + jc = sc._jvm.functions.map(_to_seq(sc, cols, _to_java_column)) + return Column(jc) + + @since(1.4) def array(*cols): """Creates a new array column. |