aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-03-25 09:50:06 -0700
committerYin Huai <yhuai@databricks.com>2016-03-25 09:50:06 -0700
commit43b15e01c46ea1971569f74c9201a55de39e8917 (patch)
tree699a834d375d70d2a2de9cd46f8eda394e4dea6c /python
parent6603d9f7e283cf8199cfddfeea30d9db39669726 (diff)
downloadspark-43b15e01c46ea1971569f74c9201a55de39e8917.tar.gz
spark-43b15e01c46ea1971569f74c9201a55de39e8917.tar.bz2
spark-43b15e01c46ea1971569f74c9201a55de39e8917.zip
[SPARK-14061][SQL] implement CreateMap
## What changes were proposed in this pull request? As we have `CreateArray` and `CreateStruct`, we should also have `CreateMap`. This PR adds the `CreateMap` expression, and the DataFrame API, and python API. ## How was this patch tested? various new tests. Author: Wenchen Fan <wenchen@databricks.com> Closes #11879 from cloud-fan/create_map.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/functions.py20
1 files changed, 20 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index dee3d536be..f5d959ef98 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1498,6 +1498,26 @@ def translate(srcCol, matching, replace):
# ---------------------- Collection functions ------------------------------
+@ignore_unicode_prefix
+@since(2.0)
+def create_map(*cols):
+ """Creates a new map column.
+
+ :param cols: list of column names (string) or list of :class:`Column` expressions that grouped
+ as key-value pairs, e.g. (key1, value1, key2, value2, ...).
+
+ >>> df.select(create_map('name', 'age').alias("map")).collect()
+ [Row(map={u'Alice': 2}), Row(map={u'Bob': 5})]
+ >>> df.select(create_map([df.name, df.age]).alias("map")).collect()
+ [Row(map={u'Alice': 2}), Row(map={u'Bob': 5})]
+ """
+ sc = SparkContext._active_spark_context
+ if len(cols) == 1 and isinstance(cols[0], (list, set)):
+ cols = cols[0]
+ jc = sc._jvm.functions.map(_to_seq(sc, cols, _to_java_column))
+ return Column(jc)
+
+
@since(1.4)
def array(*cols):
"""Creates a new array column.