aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/functions.py
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-04-17 11:29:27 -0500
committerReynold Xin <rxin@databricks.com>2015-04-17 11:29:27 -0500
commitc84d91692aa25c01882bcc3f9fd5de3cfa786195 (patch)
tree7951bc6429ae21eb62de4ed6c6de658b379affde /python/pyspark/sql/functions.py
parentdc48ba9f9f7449dd2f12cbad288b65c8119d9284 (diff)
downloadspark-c84d91692aa25c01882bcc3f9fd5de3cfa786195.tar.gz
spark-c84d91692aa25c01882bcc3f9fd5de3cfa786195.tar.bz2
spark-c84d91692aa25c01882bcc3f9fd5de3cfa786195.zip
[SPARK-6957] [SPARK-6958] [SQL] improve API compatibility to pandas
``` select(['cola', 'colb']) groupby(['colA', 'colB']) groupby([df.colA, df.colB]) df.sort('A', ascending=True) df.sort(['A', 'B'], ascending=True) df.sort(['A', 'B'], ascending=[1, 0]) ``` cc rxin Author: Davies Liu <davies@databricks.com> Closes #5544 from davies/compatibility and squashes the following commits: 4944058 [Davies Liu] add docstrings adb2816 [Davies Liu] Merge branch 'master' of github.com:apache/spark into compatibility bcbbcab [Davies Liu] support ascending as list 8dabdf0 [Davies Liu] improve API compatibility to pandas
Diffstat (limited to 'python/pyspark/sql/functions.py')
-rw-r--r--python/pyspark/sql/functions.py11
1 files changed, 3 insertions, 8 deletions
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index 1d65369528..bb47923f24 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -23,13 +23,11 @@ import sys
if sys.version < "3":
from itertools import imap as map
-from py4j.java_collections import ListConverter
-
from pyspark import SparkContext
from pyspark.rdd import _prepare_for_python_RDD
from pyspark.serializers import PickleSerializer, AutoBatchedSerializer
from pyspark.sql.types import StringType
-from pyspark.sql.dataframe import Column, _to_java_column
+from pyspark.sql.dataframe import Column, _to_java_column, _to_seq
__all__ = ['countDistinct', 'approxCountDistinct', 'udf']
@@ -87,8 +85,7 @@ def countDistinct(col, *cols):
[Row(c=2)]
"""
sc = SparkContext._active_spark_context
- jcols = ListConverter().convert([_to_java_column(c) for c in cols], sc._gateway._gateway_client)
- jc = sc._jvm.functions.countDistinct(_to_java_column(col), sc._jvm.PythonUtils.toSeq(jcols))
+ jc = sc._jvm.functions.countDistinct(_to_java_column(col), _to_seq(sc, cols, _to_java_column))
return Column(jc)
@@ -138,9 +135,7 @@ class UserDefinedFunction(object):
def __call__(self, *cols):
sc = SparkContext._active_spark_context
- jcols = ListConverter().convert([_to_java_column(c) for c in cols],
- sc._gateway._gateway_client)
- jc = self._judf.apply(sc._jvm.PythonUtils.toSeq(jcols))
+ jc = self._judf.apply(_to_seq(sc, cols, _to_java_column))
return Column(jc)