From 27a8d4ce39aa620a5926b33371fcf03bbcb18698 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Wed, 6 Aug 2014 11:08:12 -0700 Subject: [SPARK-2875] [PySpark] [SQL] handle null in schemaRDD() Handle null in schemaRDD during converting them into Python. Author: Davies Liu Closes #1802 from davies/json and squashes the following commits: 88e6b1f [Davies Liu] handle null in schemaRDD() (cherry picked from commit 48789117c2dd6d38e0bd8d21cdbcb989913205a6) Signed-off-by: Michael Armbrust --- python/pyspark/sql.py | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'python') diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index f1093701dd..adc56e7ec0 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -1231,6 +1231,13 @@ class SQLContext: ... "field3.field5[0] as f3 from table3") >>> srdd6.collect() [Row(f1=u'row1', f2=None,...Row(f1=u'row3', f2=[], f3=None)] + + >>> sqlCtx.jsonRDD(sc.parallelize(['{}', + ... '{"key0": {"key1": "value1"}}'])).collect() + [Row(key0=None), Row(key0=Row(key1=u'value1'))] + >>> sqlCtx.jsonRDD(sc.parallelize(['{"key0": null}', + ... '{"key0": {"key1": "value1"}}'])).collect() + [Row(key0=None), Row(key0=Row(key1=u'value1'))] """ def func(iterator): -- cgit v1.2.3