[SPARK-3013] [SQL] [PySpark] convert array into list

because Pyrolite does not support array from Python 2.6 Author: Davies Liu <davies.liu@gmail.com> Closes #1928 from davies/fix_array and squashes the following commits: 858e6c5 [Davies Liu] convert array into list (cherry picked from commit c974a716e17c9fe2628b1ba1d4309ead1bd855ad) Signed-off-by: Michael Armbrust <michael@databricks.com>
author: Davies Liu <davies.liu@gmail.com> 2014-08-13 14:56:11 -0700
committer: Michael Armbrust <michael@databricks.com> 2014-08-13 14:56:20 -0700
commit: 99360208792cb68aca6d26258be6c679c58f1cc8 (patch)
tree: 976a6be53b356213ef3fb527d405cd0e5cffd5be /python
parent: 78f2f99f1a36c2d01ccf7a709bf19b1a1a0f53fc (diff)
download: spark-99360208792cb68aca6d26258be6c679c58f1cc8.tar.gz
spark-99360208792cb68aca6d26258be6c679c58f1cc8.tar.bz2
spark-99360208792cb68aca6d26258be6c679c58f1cc8.zip
1 files changed, 7 insertions, 7 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index 27f1d2ddf9..46540ca3f1 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -498,10 +498,7 @@ def _infer_schema(row):
 
 def _create_converter(obj, dataType):
     """Create an converter to drop the names of fields in obj """
-    if not _has_struct(dataType):
-        return lambda x: x
-
-    elif isinstance(dataType, ArrayType):
+    if isinstance(dataType, ArrayType):
         conv = _create_converter(obj[0], dataType.elementType)
         return lambda row: map(conv, row)
 
@@ -510,6 +507,9 @@ def _create_converter(obj, dataType):
         conv = _create_converter(value, dataType.valueType)
         return lambda row: dict((k, conv(v)) for k, v in row.iteritems())
 
+    elif not isinstance(dataType, StructType):
+        return lambda x: x
+
     # dataType must be StructType
     names = [f.name for f in dataType.fields]
 
@@ -529,8 +529,7 @@ def _create_converter(obj, dataType):
     elif hasattr(obj, "__dict__"):  # object
         conv = lambda o: [o.__dict__.get(n, None) for n in names]
 
-    nested = any(_has_struct(f.dataType) for f in dataType.fields)
-    if not nested:
+    if all(isinstance(f.dataType, PrimitiveType) for f in dataType.fields):
         return conv
 
     row = conv(obj)
@@ -1037,7 +1036,8 @@ class SQLContext:
             raise ValueError("The first row in RDD is empty, "
                              "can not infer schema")
         if type(first) is dict:
-            warnings.warn("Using RDD of dict to inferSchema is deprecated")
+            warnings.warn("Using RDD of dict to inferSchema is deprecated,"
+                          "please use pyspark.Row instead")
 
         schema = _infer_schema(first)
         rdd = rdd.mapPartitions(lambda rows: _drop_schema(rows, schema))
author	Davies Liu <davies.liu@gmail.com>	2014-08-13 14:56:11 -0700
committer	Michael Armbrust <michael@databricks.com>	2014-08-13 14:56:20 -0700
commit	99360208792cb68aca6d26258be6c679c58f1cc8 (patch)
tree	976a6be53b356213ef3fb527d405cd0e5cffd5be /python
parent	78f2f99f1a36c2d01ccf7a709bf19b1a1a0f53fc (diff)
download	spark-99360208792cb68aca6d26258be6c679c58f1cc8.tar.gz spark-99360208792cb68aca6d26258be6c679c58f1cc8.tar.bz2 spark-99360208792cb68aca6d26258be6c679c58f1cc8.zip