aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-01-13 12:50:31 -0800
committerXiangrui Meng <meng@databricks.com>2015-01-13 12:50:31 -0800
commit8ead999fd627b12837fb2f082a0e76e9d121d269 (patch)
treeb12ae116038cbf72b3b89de5f4be5cf7ba956c85 /python
parent39e333ec4350ddafe29ee0958c37eec07bec85df (diff)
downloadspark-8ead999fd627b12837fb2f082a0e76e9d121d269.tar.gz
spark-8ead999fd627b12837fb2f082a0e76e9d121d269.tar.bz2
spark-8ead999fd627b12837fb2f082a0e76e9d121d269.zip
[SPARK-5223] [MLlib] [PySpark] fix MapConverter and ListConverter in MLlib
It will introduce problems if the object in dict/list/tuple can not support by py4j, such as Vector. Also, pickle may have better performance for larger object (less RPC). In some cases that the object in dict/list can not be pickled (such as JavaObject), we should still use MapConvert/ListConvert. This PR should be ported into branch-1.2 Author: Davies Liu <davies@databricks.com> Closes #4023 from davies/listconvert and squashes the following commits: 55d4ab2 [Davies Liu] fix MapConverter and ListConverter in MLlib
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/mllib/common.py6
1 files changed, 2 insertions, 4 deletions
diff --git a/python/pyspark/mllib/common.py b/python/pyspark/mllib/common.py
index 33c49e2399..3c5ee66cd8 100644
--- a/python/pyspark/mllib/common.py
+++ b/python/pyspark/mllib/common.py
@@ -18,7 +18,7 @@
import py4j.protocol
from py4j.protocol import Py4JJavaError
from py4j.java_gateway import JavaObject
-from py4j.java_collections import MapConverter, ListConverter, JavaArray, JavaList
+from py4j.java_collections import ListConverter, JavaArray, JavaList
from pyspark import RDD, SparkContext
from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
@@ -70,9 +70,7 @@ def _py2java(sc, obj):
obj = _to_java_object_rdd(obj)
elif isinstance(obj, SparkContext):
obj = obj._jsc
- elif isinstance(obj, dict):
- obj = MapConverter().convert(obj, sc._gateway._gateway_client)
- elif isinstance(obj, (list, tuple)):
+ elif isinstance(obj, list) and (obj or isinstance(obj[0], JavaObject)):
obj = ListConverter().convert(obj, sc._gateway._gateway_client)
elif isinstance(obj, JavaObject):
pass