diff options
author | Liang-Chi Hsieh <simonh@tw.ibm.com> | 2016-05-24 10:10:41 -0700 |
---|---|---|
committer | Davies Liu <davies.liu@gmail.com> | 2016-05-24 10:10:41 -0700 |
commit | 695d9a0fd461070ee2684b2210fb69d0b6ed1a95 (patch) | |
tree | 6785c7908cbd2e045cbaa051eedfdda1f7a10417 | |
parent | f8763b80ecd9968566018396c8cdc1851e7f8a46 (diff) | |
download | spark-695d9a0fd461070ee2684b2210fb69d0b6ed1a95.tar.gz spark-695d9a0fd461070ee2684b2210fb69d0b6ed1a95.tar.bz2 spark-695d9a0fd461070ee2684b2210fb69d0b6ed1a95.zip |
[SPARK-15433] [PYSPARK] PySpark core test should not use SerDe from PythonMLLibAPI
## What changes were proposed in this pull request?
Currently PySpark core test uses the `SerDe` from `PythonMLLibAPI` which includes many MLlib things. It should use `SerDeUtil` instead.
## How was this patch tested?
Existing tests.
Author: Liang-Chi Hsieh <simonh@tw.ibm.com>
Closes #13214 from viirya/pycore-use-serdeutil.
-rw-r--r-- | core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala | 2 | ||||
-rw-r--r-- | python/pyspark/tests.py | 4 |
2 files changed, 3 insertions, 3 deletions
diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala index 1c632ebdf9..6e4eab4b80 100644 --- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala +++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala @@ -137,7 +137,7 @@ private[spark] object SerDeUtil extends Logging { * Convert an RDD of Java objects to an RDD of serialized Python objects, that is usable by * PySpark. */ - private[spark] def javaToPython(jRDD: JavaRDD[_]): JavaRDD[Array[Byte]] = { + def javaToPython(jRDD: JavaRDD[_]): JavaRDD[Array[Byte]] = { jRDD.rdd.mapPartitions { iter => new AutoBatchedPickler(iter) } } diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 97ea39dde0..222c5ca5f4 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -960,13 +960,13 @@ class RDDTests(ReusedPySparkTestCase): ] data_rdd = self.sc.parallelize(data) data_java_rdd = data_rdd._to_java_object_rdd() - data_python_rdd = self.sc._jvm.SerDe.javaToPython(data_java_rdd) + data_python_rdd = self.sc._jvm.SerDeUtil.javaToPython(data_java_rdd) converted_rdd = RDD(data_python_rdd, self.sc) self.assertEqual(2, converted_rdd.count()) # conversion between python and java RDD threw exceptions data_java_rdd = converted_rdd._to_java_object_rdd() - data_python_rdd = self.sc._jvm.SerDe.javaToPython(data_java_rdd) + data_python_rdd = self.sc._jvm.SerDeUtil.javaToPython(data_java_rdd) converted_rdd = RDD(data_python_rdd, self.sc) self.assertEqual(2, converted_rdd.count()) |