From 8ead999fd627b12837fb2f082a0e76e9d121d269 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Tue, 13 Jan 2015 12:50:31 -0800 Subject: [SPARK-5223] [MLlib] [PySpark] fix MapConverter and ListConverter in MLlib It will introduce problems if the object in dict/list/tuple can not support by py4j, such as Vector. Also, pickle may have better performance for larger object (less RPC). In some cases that the object in dict/list can not be pickled (such as JavaObject), we should still use MapConvert/ListConvert. This PR should be ported into branch-1.2 Author: Davies Liu Closes #4023 from davies/listconvert and squashes the following commits: 55d4ab2 [Davies Liu] fix MapConverter and ListConverter in MLlib --- python/pyspark/mllib/common.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'python/pyspark/mllib/common.py') diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py index 33c49e2399..3c5ee66cd8 100644 --- a/python/pyspark/mllib/common.py +++ b/python/pyspark/mllib/common.py @@ -18,7 +18,7 @@ import py4j.protocol from py4j.protocol import Py4JJavaError from py4j.java_gateway import JavaObject -from py4j.java_collections import MapConverter, ListConverter, JavaArray, JavaList +from py4j.java_collections import ListConverter, JavaArray, JavaList from pyspark import RDD, SparkContext from pyspark.serializers import PickleSerializer, AutoBatchedSerializer @@ -70,9 +70,7 @@ def _py2java(sc, obj): obj = _to_java_object_rdd(obj) elif isinstance(obj, SparkContext): obj = obj._jsc - elif isinstance(obj, dict): - obj = MapConverter().convert(obj, sc._gateway._gateway_client) - elif isinstance(obj, (list, tuple)): + elif isinstance(obj, list) and (obj or isinstance(obj[0], JavaObject)): obj = ListConverter().convert(obj, sc._gateway._gateway_client) elif isinstance(obj, JavaObject): pass -- cgit v1.2.3