aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/functions.py
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2016-11-01 12:46:41 -0700
committerMichael Armbrust <michael@databricks.com>2016-11-01 12:46:41 -0700
commit01dd0083011741c2bbe5ae1d2a25f2c9a1302b76 (patch)
tree7b9993165b1a4f48e64d566d93c7883a3096403d /python/pyspark/sql/functions.py
parentcfac17ee1cec414663b957228e469869eb7673c1 (diff)
downloadspark-01dd0083011741c2bbe5ae1d2a25f2c9a1302b76.tar.gz
spark-01dd0083011741c2bbe5ae1d2a25f2c9a1302b76.tar.bz2
spark-01dd0083011741c2bbe5ae1d2a25f2c9a1302b76.zip
[SPARK-17764][SQL] Add `to_json` supporting to convert nested struct column to JSON string
## What changes were proposed in this pull request? This PR proposes to add `to_json` function in contrast with `from_json` in Scala, Java and Python. It'd be useful if we can convert a same column from/to json. Also, some datasources do not support nested types. If we are forced to save a dataframe into those data sources, we might be able to work around by this function. The usage is as below: ``` scala val df = Seq(Tuple1(Tuple1(1))).toDF("a") df.select(to_json($"a").as("json")).show() ``` ``` bash +--------+ | json| +--------+ |{"_1":1}| +--------+ ``` ## How was this patch tested? Unit tests in `JsonFunctionsSuite` and `JsonExpressionsSuite`. Author: hyukjinkwon <gurwls223@gmail.com> Closes #15354 from HyukjinKwon/SPARK-17764.
Diffstat (limited to 'python/pyspark/sql/functions.py')
-rw-r--r--python/pyspark/sql/functions.py23
1 files changed, 23 insertions, 0 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 7fa3fd2de7..45e3c22bfc 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -1744,6 +1744,29 @@ def from_json(col, schema, options={}):
return Column(jc)
+@ignore_unicode_prefix
+@since(2.1)
+def to_json(col, options={}):
+ """
+ Converts a column containing a [[StructType]] into a JSON string. Throws an exception,
+ in the case of an unsupported type.
+
+ :param col: name of column containing the struct
+ :param options: options to control converting. accepts the same options as the json datasource
+
+ >>> from pyspark.sql import Row
+ >>> from pyspark.sql.types import *
+ >>> data = [(1, Row(name='Alice', age=2))]
+ >>> df = spark.createDataFrame(data, ("key", "value"))
+ >>> df.select(to_json(df.value).alias("json")).collect()
+ [Row(json=u'{"age":2,"name":"Alice"}')]
+ """
+
+ sc = SparkContext._active_spark_context
+ jc = sc._jvm.functions.to_json(_to_java_column(col), options)
+ return Column(jc)
+
+
@since(1.5)
def size(col):
"""