diff options
author | Davies Liu <davies.liu@gmail.com> | 2014-08-13 14:56:11 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2014-08-13 14:56:20 -0700 |
commit | 99360208792cb68aca6d26258be6c679c58f1cc8 (patch) | |
tree | 976a6be53b356213ef3fb527d405cd0e5cffd5be /python | |
parent | 78f2f99f1a36c2d01ccf7a709bf19b1a1a0f53fc (diff) | |
download | spark-99360208792cb68aca6d26258be6c679c58f1cc8.tar.gz spark-99360208792cb68aca6d26258be6c679c58f1cc8.tar.bz2 spark-99360208792cb68aca6d26258be6c679c58f1cc8.zip |
[SPARK-3013] [SQL] [PySpark] convert array into list
because Pyrolite does not support array from Python 2.6
Author: Davies Liu <davies.liu@gmail.com>
Closes #1928 from davies/fix_array and squashes the following commits:
858e6c5 [Davies Liu] convert array into list
(cherry picked from commit c974a716e17c9fe2628b1ba1d4309ead1bd855ad)
Signed-off-by: Michael Armbrust <michael@databricks.com>
Diffstat (limited to 'python')
-rw-r--r-- | python/pyspark/sql.py | 14 |
1 files changed, 7 insertions, 7 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py index 27f1d2ddf9..46540ca3f1 100644 --- a/python/pyspark/sql.py +++ b/python/pyspark/sql.py @@ -498,10 +498,7 @@ def _infer_schema(row): def _create_converter(obj, dataType): """Create an converter to drop the names of fields in obj """ - if not _has_struct(dataType): - return lambda x: x - - elif isinstance(dataType, ArrayType): + if isinstance(dataType, ArrayType): conv = _create_converter(obj[0], dataType.elementType) return lambda row: map(conv, row) @@ -510,6 +507,9 @@ def _create_converter(obj, dataType): conv = _create_converter(value, dataType.valueType) return lambda row: dict((k, conv(v)) for k, v in row.iteritems()) + elif not isinstance(dataType, StructType): + return lambda x: x + # dataType must be StructType names = [f.name for f in dataType.fields] @@ -529,8 +529,7 @@ def _create_converter(obj, dataType): elif hasattr(obj, "__dict__"): # object conv = lambda o: [o.__dict__.get(n, None) for n in names] - nested = any(_has_struct(f.dataType) for f in dataType.fields) - if not nested: + if all(isinstance(f.dataType, PrimitiveType) for f in dataType.fields): return conv row = conv(obj) @@ -1037,7 +1036,8 @@ class SQLContext: raise ValueError("The first row in RDD is empty, " "can not infer schema") if type(first) is dict: - warnings.warn("Using RDD of dict to inferSchema is deprecated") + warnings.warn("Using RDD of dict to inferSchema is deprecated," + "please use pyspark.Row instead") schema = _infer_schema(first) rdd = rdd.mapPartitions(lambda rows: _drop_schema(rows, schema)) |