aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql.py18
1 files changed, 14 insertions, 4 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index 0e8b398fc6..014ac1791c 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -807,14 +807,14 @@ def _create_converter(dataType):
return
if isinstance(obj, tuple):
- if hasattr(obj, "fields"):
- d = dict(zip(obj.fields, obj))
- if hasattr(obj, "__FIELDS__"):
+ if hasattr(obj, "_fields"):
+ d = dict(zip(obj._fields, obj))
+ elif hasattr(obj, "__FIELDS__"):
d = dict(zip(obj.__FIELDS__, obj))
elif all(isinstance(x, tuple) and len(x) == 2 for x in obj):
d = dict(obj)
else:
- raise ValueError("unexpected tuple: %s" % obj)
+ raise ValueError("unexpected tuple: %s" % str(obj))
elif isinstance(obj, dict):
d = obj
@@ -1327,6 +1327,16 @@ class SQLContext(object):
>>> srdd = sqlCtx.inferSchema(nestedRdd2)
>>> srdd.collect()
[Row(f1=[[1, 2], [2, 3]], f2=[1, 2]), ..., f2=[2, 3])]
+
+ >>> from collections import namedtuple
+ >>> CustomRow = namedtuple('CustomRow', 'field1 field2')
+ >>> rdd = sc.parallelize(
+ ... [CustomRow(field1=1, field2="row1"),
+ ... CustomRow(field1=2, field2="row2"),
+ ... CustomRow(field1=3, field2="row3")])
+ >>> srdd = sqlCtx.inferSchema(rdd)
+ >>> srdd.collect()[0]
+ Row(field1=1, field2=u'row1')
"""
if isinstance(rdd, SchemaRDD):