diff options
author | Sean Owen <sowen@cloudera.com> | 2015-08-25 12:33:13 +0100 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2015-08-25 12:33:13 +0100 |
commit | 69c9c177160e32a2fbc9b36ecc52156077fca6fc (patch) | |
tree | 57345aaf19c3149038bfca5c4ddccf33d41bdd5b /python/pyspark/sql | |
parent | 7f1e507bf7e82bff323c5dec3c1ee044687c4173 (diff) | |
download | spark-69c9c177160e32a2fbc9b36ecc52156077fca6fc.tar.gz spark-69c9c177160e32a2fbc9b36ecc52156077fca6fc.tar.bz2 spark-69c9c177160e32a2fbc9b36ecc52156077fca6fc.zip |
[SPARK-9613] [CORE] Ban use of JavaConversions and migrate all existing uses to JavaConverters
Replace `JavaConversions` implicits with `JavaConverters`
Most occurrences I've seen so far are necessary conversions; a few have been avoidable. None are in critical code as far as I see, yet.
Author: Sean Owen <sowen@cloudera.com>
Closes #8033 from srowen/SPARK-9613.
Diffstat (limited to 'python/pyspark/sql')
-rw-r--r-- | python/pyspark/sql/column.py | 12 | ||||
-rw-r--r-- | python/pyspark/sql/dataframe.py | 4 |
2 files changed, 14 insertions, 2 deletions
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 8af8637cf9..0948f9b27c 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -61,6 +61,18 @@ def _to_seq(sc, cols, converter=None): return sc._jvm.PythonUtils.toSeq(cols) +def _to_list(sc, cols, converter=None): + """ + Convert a list of Column (or names) into a JVM (Scala) List of Column. + + An optional `converter` could be used to convert items in `cols` + into JVM Column objects. + """ + if converter: + cols = [converter(c) for c in cols] + return sc._jvm.PythonUtils.toList(cols) + + def _unary_op(name, doc="unary operator"): """ Create a method for given unary operator """ def _(self): diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 025811f519..e269ef4304 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -32,7 +32,7 @@ from pyspark.storagelevel import StorageLevel from pyspark.traceback_utils import SCCallSiteSync from pyspark.sql import since from pyspark.sql.types import _parse_datatype_json_string -from pyspark.sql.column import Column, _to_seq, _to_java_column +from pyspark.sql.column import Column, _to_seq, _to_list, _to_java_column from pyspark.sql.readwriter import DataFrameWriter from pyspark.sql.types import * @@ -494,7 +494,7 @@ class DataFrame(object): if w < 0.0: raise ValueError("Weights must be positive. Found weight value: %s" % w) seed = seed if seed is not None else random.randint(0, sys.maxsize) - rdd_array = self._jdf.randomSplit(_to_seq(self.sql_ctx._sc, weights), long(seed)) + rdd_array = self._jdf.randomSplit(_to_list(self.sql_ctx._sc, weights), long(seed)) return [DataFrame(rdd, self.sql_ctx) for rdd in rdd_array] @property |