diff options
author | Davies Liu <davies.liu@gmail.com> | 2014-08-13 14:56:11 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-08-13 14:56:11 -0700 |
commit | c974a716e17c9fe2628b1ba1d4309ead1bd855ad (patch) | |
tree | fa7673b00c04b4160a3c5f7abcfcdc0580522f0d | |
parent | 869f06c759c29b09c8dc72e0e4034c03f908ba30 (diff) | |
download | spark-c974a716e17c9fe2628b1ba1d4309ead1bd855ad.tar.gz spark-c974a716e17c9fe2628b1ba1d4309ead1bd855ad.tar.bz2 spark-c974a716e17c9fe2628b1ba1d4309ead1bd855ad.zip |
[SPARK-3013] [SQL] [PySpark] convert array into list
because Pyrolite does not support array from Python 2.6
Author: Davies Liu <davies.liu@gmail.com>
Closes #1928 from davies/fix_array and squashes the following commits:
858e6c5 [Davies Liu] convert array into list
-rw-r--r-- | python/pyspark/sql.py | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index 27f1d2ddf9..46540ca3f1 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -498,10 +498,7 @@ def _infer_schema(row): def _create_converter(obj, dataType): """Create an converter to drop the names of fields in obj """ - if not _has_struct(dataType): - return lambda x: x - - elif isinstance(dataType, ArrayType): + if isinstance(dataType, ArrayType): conv = _create_converter(obj[0], dataType.elementType) return lambda row: map(conv, row) @@ -510,6 +507,9 @@ def _create_converter(obj, dataType): conv = _create_converter(value, dataType.valueType) return lambda row: dict((k, conv(v)) for k, v in row.iteritems()) + elif not isinstance(dataType, StructType): + return lambda x: x + # dataType must be StructType names = [f.name for f in dataType.fields] @@ -529,8 +529,7 @@ def _create_converter(obj, dataType): elif hasattr(obj, "__dict__"): # object conv = lambda o: [o.__dict__.get(n, None) for n in names] - nested = any(_has_struct(f.dataType) for f in dataType.fields) - if not nested: + if all(isinstance(f.dataType, PrimitiveType) for f in dataType.fields): return conv row = conv(obj) @@ -1037,7 +1036,8 @@ class SQLContext: raise ValueError("The first row in RDD is empty, " "can not infer schema") if type(first) is dict: - warnings.warn("Using RDD of dict to inferSchema is deprecated") + warnings.warn("Using RDD of dict to inferSchema is deprecated," + "please use pyspark.Row instead") schema = _infer_schema(first) rdd = rdd.mapPartitions(lambda rows: _drop_schema(rows, schema)) |