diff options
author | Ai He <ai.he@ussuning.com> | 2015-06-29 14:36:26 -0700 |
---|---|---|
committer | Davies Liu <davies@databricks.com> | 2015-06-29 14:36:26 -0700 |
commit | ecd3aacf2805bb231cfb44bab079319cfe73c3f1 (patch) | |
tree | 6530ab15ba48883305be6db44bafcf626463c4fb /python/pyspark/rdd.py | |
parent | f6fc254ec4ce5f103d45da6d007b4066ce751236 (diff) | |
download | spark-ecd3aacf2805bb231cfb44bab079319cfe73c3f1.tar.gz spark-ecd3aacf2805bb231cfb44bab079319cfe73c3f1.tar.bz2 spark-ecd3aacf2805bb231cfb44bab079319cfe73c3f1.zip |
[SPARK-7810] [PYSPARK] solve python rdd socket connection problem
Method "_load_from_socket" in rdd.py cannot load data from jvm socket when ipv6 is used. The current method only works well with ipv4. New modification should work around both two protocols.
Author: Ai He <ai.he@ussuning.com>
Author: AiHe <ai.he@ussuning.com>
Closes #6338 from AiHe/pyspark-networking-issue and squashes the following commits:
d4fc9c4 [Ai He] handle code review 2
e75c5c8 [Ai He] handle code review
5644953 [AiHe] solve python rdd socket connection problem to jvm
Diffstat (limited to 'python/pyspark/rdd.py')
-rw-r--r-- | python/pyspark/rdd.py | 18 |
1 files changed, 15 insertions, 3 deletions
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 1b64be23a6..cb20bc8b54 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -121,10 +121,22 @@ def _parse_memory(s): def _load_from_socket(port, serializer): - sock = socket.socket() - sock.settimeout(3) + sock = None + # Support for both IPv4 and IPv6. + # On most of IPv6-ready systems, IPv6 will take precedence. + for res in socket.getaddrinfo("localhost", port, socket.AF_UNSPEC, socket.SOCK_STREAM): + af, socktype, proto, canonname, sa = res + try: + sock = socket.socket(af, socktype, proto) + sock.settimeout(3) + sock.connect(sa) + except socket.error: + sock = None + continue + break + if not sock: + raise Exception("could not open socket") try: - sock.connect(("localhost", port)) rf = sock.makefile("rb", 65536) for item in serializer.load_stream(rf): yield item |