aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/rdd.py
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@apache.org>2014-07-26 17:37:05 -0700
committerMatei Zaharia <matei@databricks.com>2014-07-26 17:37:05 -0700
commitba46bbed5d32aec0f11f0b71c82bba8dbe19f05a (patch)
tree5826bc60fdb70aebf9b0a9e3887dbce96d526851 /python/pyspark/rdd.py
parent12901643b7e808aa75cf0b19e2d0c3d40b1a978d (diff)
downloadspark-ba46bbed5d32aec0f11f0b71c82bba8dbe19f05a.tar.gz
spark-ba46bbed5d32aec0f11f0b71c82bba8dbe19f05a.tar.bz2
spark-ba46bbed5d32aec0f11f0b71c82bba8dbe19f05a.zip
[SPARK-2601] [PySpark] Fix Py4J error when transforming pickleFiles
Similar to SPARK-1034, the problem was that Py4J didn’t cope well with the fake ClassTags used in the Java API. It doesn’t look like there’s any reason why PythonRDD needs to take a ClassTag, since it just ignores the type of the previous RDD, so I removed the type parameter and we no longer pass ClassTags from Python. Author: Josh Rosen <joshrosen@apache.org> Closes #1605 from JoshRosen/spark-2601 and squashes the following commits: b68e118 [Josh Rosen] Fix Py4J error when transforming pickleFiles [SPARK-2601]
Diffstat (limited to 'python/pyspark/rdd.py')
-rw-r--r--python/pyspark/rdd.py4
1 files changed, 1 insertions, 3 deletions
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 113a082e16..b84d976114 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -1687,7 +1687,6 @@ class PipelinedRDD(RDD):
[x._jbroadcast for x in self.ctx._pickled_broadcast_vars],
self.ctx._gateway._gateway_client)
self.ctx._pickled_broadcast_vars.clear()
- class_tag = self._prev_jrdd.classTag()
env = MapConverter().convert(self.ctx.environment,
self.ctx._gateway._gateway_client)
includes = ListConverter().convert(self.ctx._python_includes,
@@ -1696,8 +1695,7 @@ class PipelinedRDD(RDD):
bytearray(pickled_command),
env, includes, self.preservesPartitioning,
self.ctx.pythonExec,
- broadcast_vars, self.ctx._javaAccumulator,
- class_tag)
+ broadcast_vars, self.ctx._javaAccumulator)
self._jrdd_val = python_rdd.asJavaRDD()
return self._jrdd_val