aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2015-08-25 12:33:13 +0100
committerSean Owen <sowen@cloudera.com>2015-08-25 12:33:13 +0100
commit69c9c177160e32a2fbc9b36ecc52156077fca6fc (patch)
tree57345aaf19c3149038bfca5c4ddccf33d41bdd5b /python
parent7f1e507bf7e82bff323c5dec3c1ee044687c4173 (diff)
downloadspark-69c9c177160e32a2fbc9b36ecc52156077fca6fc.tar.gz
spark-69c9c177160e32a2fbc9b36ecc52156077fca6fc.tar.bz2
spark-69c9c177160e32a2fbc9b36ecc52156077fca6fc.zip
[SPARK-9613] [CORE] Ban use of JavaConversions and migrate all existing uses to JavaConverters
Replace `JavaConversions` implicits with `JavaConverters` Most occurrences I've seen so far are necessary conversions; a few have been avoidable. None are in critical code as far as I see, yet. Author: Sean Owen <sowen@cloudera.com> Closes #8033 from srowen/SPARK-9613.
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql/column.py12
-rw-r--r--python/pyspark/sql/dataframe.py4
2 files changed, 14 insertions, 2 deletions
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 8af8637cf9..0948f9b27c 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -61,6 +61,18 @@ def _to_seq(sc, cols, converter=None):
return sc._jvm.PythonUtils.toSeq(cols)
+def _to_list(sc, cols, converter=None):
+ """
+ Convert a list of Column (or names) into a JVM (Scala) List of Column.
+
+ An optional `converter` could be used to convert items in `cols`
+ into JVM Column objects.
+ """
+ if converter:
+ cols = [converter(c) for c in cols]
+ return sc._jvm.PythonUtils.toList(cols)
+
+
def _unary_op(name, doc="unary operator"):
""" Create a method for given unary operator """
def _(self):
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 025811f519..e269ef4304 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -32,7 +32,7 @@ from pyspark.storagelevel import StorageLevel
from pyspark.traceback_utils import SCCallSiteSync
from pyspark.sql import since
from pyspark.sql.types import _parse_datatype_json_string
-from pyspark.sql.column import Column, _to_seq, _to_java_column
+from pyspark.sql.column import Column, _to_seq, _to_list, _to_java_column
from pyspark.sql.readwriter import DataFrameWriter
from pyspark.sql.types import *
@@ -494,7 +494,7 @@ class DataFrame(object):
if w < 0.0:
raise ValueError("Weights must be positive. Found weight value: %s" % w)
seed = seed if seed is not None else random.randint(0, sys.maxsize)
- rdd_array = self._jdf.randomSplit(_to_seq(self.sql_ctx._sc, weights), long(seed))
+ rdd_array = self._jdf.randomSplit(_to_list(self.sql_ctx._sc, weights), long(seed))
return [DataFrame(rdd, self.sql_ctx) for rdd in rdd_array]
@property