aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/dataframe.py
diff options
context:
space:
mode:
authorWenchen Fan <wenchen@databricks.com>2016-03-02 15:26:34 -0800
committerReynold Xin <rxin@databricks.com>2016-03-02 15:26:34 -0800
commit4dd24811d9035c52c5965fca2fc6431aac6963fc (patch)
tree49771acf8775633de5fe38cf9b38c929b70c99c6 /python/pyspark/sql/dataframe.py
parente2780ce8252ded93a695125c0a745d8b93193cca (diff)
downloadspark-4dd24811d9035c52c5965fca2fc6431aac6963fc.tar.gz
spark-4dd24811d9035c52c5965fca2fc6431aac6963fc.tar.bz2
spark-4dd24811d9035c52c5965fca2fc6431aac6963fc.zip
[SPARK-13594][SQL] remove typed operations(e.g. map, flatMap) from python DataFrame
## What changes were proposed in this pull request? Remove `map`, `flatMap`, `mapPartitions` from python DataFrame, to prepare for Dataset API in the future. ## How was this patch tested? existing tests Author: Wenchen Fan <wenchen@databricks.com> Closes #11445 from cloud-fan/python-clean.
Diffstat (limited to 'python/pyspark/sql/dataframe.py')
-rw-r--r--python/pyspark/sql/dataframe.py42
1 files changed, 2 insertions, 40 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 76fbb0c9aa..99d665fafe 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -267,44 +267,6 @@ class DataFrame(object):
self._jdf, num)
return list(_load_from_socket(port, BatchedSerializer(PickleSerializer())))
- @ignore_unicode_prefix
- @since(1.3)
- def map(self, f):
- """ Returns a new :class:`RDD` by applying a the ``f`` function to each :class:`Row`.
-
- This is a shorthand for ``df.rdd.map()``.
-
- >>> df.map(lambda p: p.name).collect()
- [u'Alice', u'Bob']
- """
- return self.rdd.map(f)
-
- @ignore_unicode_prefix
- @since(1.3)
- def flatMap(self, f):
- """ Returns a new :class:`RDD` by first applying the ``f`` function to each :class:`Row`,
- and then flattening the results.
-
- This is a shorthand for ``df.rdd.flatMap()``.
-
- >>> df.flatMap(lambda p: p.name).collect()
- [u'A', u'l', u'i', u'c', u'e', u'B', u'o', u'b']
- """
- return self.rdd.flatMap(f)
-
- @since(1.3)
- def mapPartitions(self, f, preservesPartitioning=False):
- """Returns a new :class:`RDD` by applying the ``f`` function to each partition.
-
- This is a shorthand for ``df.rdd.mapPartitions()``.
-
- >>> rdd = sc.parallelize([1, 2, 3, 4], 4)
- >>> def f(iterator): yield 1
- >>> rdd.mapPartitions(f).sum()
- 4
- """
- return self.rdd.mapPartitions(f, preservesPartitioning)
-
@since(1.3)
def foreach(self, f):
"""Applies the ``f`` function to all :class:`Row` of this :class:`DataFrame`.
@@ -315,7 +277,7 @@ class DataFrame(object):
... print(person.name)
>>> df.foreach(f)
"""
- return self.rdd.foreach(f)
+ self.rdd.foreach(f)
@since(1.3)
def foreachPartition(self, f):
@@ -328,7 +290,7 @@ class DataFrame(object):
... print(person.name)
>>> df.foreachPartition(f)
"""
- return self.rdd.foreachPartition(f)
+ self.rdd.foreachPartition(f)
@since(1.3)
def cache(self):