diff options
author | Reynold Xin <rxin@databricks.com> | 2016-03-14 19:25:49 -0700 |
---|---|---|
committer | Michael Armbrust <michael@databricks.com> | 2016-03-14 19:25:49 -0700 |
commit | 8e0b030606927741f91317660cd14a8a5ed6e5f9 (patch) | |
tree | 254ce8cf1ff726c561a34ce82f1dbad4b0e99c51 /python/pyspark | |
parent | 4bf460979562a7d8cec403f0bd603f88517fdb2b (diff) | |
download | spark-8e0b030606927741f91317660cd14a8a5ed6e5f9.tar.gz spark-8e0b030606927741f91317660cd14a8a5ed6e5f9.tar.bz2 spark-8e0b030606927741f91317660cd14a8a5ed6e5f9.zip |
[SPARK-10380][SQL] Fix confusing documentation examples for astype/drop_duplicates.
## What changes were proposed in this pull request?
We have seen users getting confused by the documentation for astype and drop_duplicates, because the examples in them do not use these functions (but do uses their aliases). This patch simply removes all examples for these functions, and say that they are aliases.
## How was this patch tested?
Existing PySpark unit tests.
Closes #11543.
Author: Reynold Xin <rxin@databricks.com>
Closes #11698 from rxin/SPARK-10380.
Diffstat (limited to 'python/pyspark')
-rw-r--r-- | python/pyspark/__init__.py | 20 | ||||
-rw-r--r-- | python/pyspark/sql/column.py | 4 | ||||
-rw-r--r-- | python/pyspark/sql/dataframe.py | 20 |
3 files changed, 37 insertions, 7 deletions
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py index d530723ca9..111ebaafee 100644 --- a/python/pyspark/__init__.py +++ b/python/pyspark/__init__.py @@ -37,6 +37,8 @@ Public classes: """ +import types + from pyspark.conf import SparkConf from pyspark.context import SparkContext from pyspark.rdd import RDD @@ -64,6 +66,24 @@ def since(version): return deco +def copy_func(f, name=None, sinceversion=None, doc=None): + """ + Returns a function with same code, globals, defaults, closure, and + name (or provide a new name). + """ + # See + # http://stackoverflow.com/questions/6527633/how-can-i-make-a-deepcopy-of-a-function-in-python + fn = types.FunctionType(f.__code__, f.__globals__, name or f.__name__, f.__defaults__, + f.__closure__) + # in case f was given attrs (note this dict is a shallow copy): + fn.__dict__.update(f.__dict__) + if doc is not None: + fn.__doc__ = doc + if sinceversion is not None: + fn = since(sinceversion)(fn) + return fn + + # for back compatibility from pyspark.sql import SQLContext, HiveContext, Row diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index 3866a49c0b..19ec6fcc5d 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -22,7 +22,7 @@ if sys.version >= '3': basestring = str long = int -from pyspark import since +from pyspark import copy_func, since from pyspark.context import SparkContext from pyspark.rdd import ignore_unicode_prefix from pyspark.sql.types import * @@ -337,7 +337,7 @@ class Column(object): raise TypeError("unexpected type: %s" % type(dataType)) return Column(jc) - astype = cast + astype = copy_func(cast, sinceversion=1.4, doc=":func:`astype` is an alias for :func:`cast`.") @since(1.3) def between(self, lowerBound, upperBound): diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 7008e8fadf..7e1854c43b 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -26,7 +26,7 @@ if sys.version >= '3': else: from itertools import imap as map -from pyspark import since +from pyspark import copy_func, since from pyspark.rdd import RDD, _load_from_socket, ignore_unicode_prefix from pyspark.serializers import BatchedSerializer, PickleSerializer, UTF8Deserializer from pyspark.storagelevel import StorageLevel @@ -829,8 +829,6 @@ class DataFrame(object): raise TypeError("condition should be string or Column") return DataFrame(jdf, self.sql_ctx) - where = filter - @ignore_unicode_prefix @since(1.3) def groupBy(self, *cols): @@ -1361,8 +1359,20 @@ class DataFrame(object): # Pandas compatibility ########################################################################################## - groupby = groupBy - drop_duplicates = dropDuplicates + groupby = copy_func( + groupBy, + sinceversion=1.4, + doc=":func:`groupby` is an alias for :func:`groupBy`.") + + drop_duplicates = copy_func( + dropDuplicates, + sinceversion=1.4, + doc=":func:`drop_duplicates` is an alias for :func:`dropDuplicates`.") + + where = copy_func( + filter, + sinceversion=1.3, + doc=":func:`where` is an alias for :func:`filter`.") def _to_scala_map(sc, jm): |