diff options
author | Wenchen Fan <wenchen@databricks.com> | 2016-03-02 15:26:34 -0800 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-03-02 15:26:34 -0800 |
commit | 4dd24811d9035c52c5965fca2fc6431aac6963fc (patch) | |
tree | 49771acf8775633de5fe38cf9b38c929b70c99c6 /python/pyspark/sql/dataframe.py | |
parent | e2780ce8252ded93a695125c0a745d8b93193cca (diff) | |
download | spark-4dd24811d9035c52c5965fca2fc6431aac6963fc.tar.gz spark-4dd24811d9035c52c5965fca2fc6431aac6963fc.tar.bz2 spark-4dd24811d9035c52c5965fca2fc6431aac6963fc.zip |
[SPARK-13594][SQL] remove typed operations(e.g. map, flatMap) from python DataFrame
## What changes were proposed in this pull request?
Remove `map`, `flatMap`, `mapPartitions` from python DataFrame, to prepare for Dataset API in the future.
## How was this patch tested?
existing tests
Author: Wenchen Fan <wenchen@databricks.com>
Closes #11445 from cloud-fan/python-clean.
Diffstat (limited to 'python/pyspark/sql/dataframe.py')
-rw-r--r-- | python/pyspark/sql/dataframe.py | 42 |
1 files changed, 2 insertions, 40 deletions
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 76fbb0c9aa..99d665fafe 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -267,44 +267,6 @@ class DataFrame(object): self._jdf, num) return list(_load_from_socket(port, BatchedSerializer(PickleSerializer()))) - @ignore_unicode_prefix - @since(1.3) - def map(self, f): - """ Returns a new :class:`RDD` by applying a the ``f`` function to each :class:`Row`. - - This is a shorthand for ``df.rdd.map()``. - - >>> df.map(lambda p: p.name).collect() - [u'Alice', u'Bob'] - """ - return self.rdd.map(f) - - @ignore_unicode_prefix - @since(1.3) - def flatMap(self, f): - """ Returns a new :class:`RDD` by first applying the ``f`` function to each :class:`Row`, - and then flattening the results. - - This is a shorthand for ``df.rdd.flatMap()``. - - >>> df.flatMap(lambda p: p.name).collect() - [u'A', u'l', u'i', u'c', u'e', u'B', u'o', u'b'] - """ - return self.rdd.flatMap(f) - - @since(1.3) - def mapPartitions(self, f, preservesPartitioning=False): - """Returns a new :class:`RDD` by applying the ``f`` function to each partition. - - This is a shorthand for ``df.rdd.mapPartitions()``. - - >>> rdd = sc.parallelize([1, 2, 3, 4], 4) - >>> def f(iterator): yield 1 - >>> rdd.mapPartitions(f).sum() - 4 - """ - return self.rdd.mapPartitions(f, preservesPartitioning) - @since(1.3) def foreach(self, f): """Applies the ``f`` function to all :class:`Row` of this :class:`DataFrame`. @@ -315,7 +277,7 @@ class DataFrame(object): ... print(person.name) >>> df.foreach(f) """ - return self.rdd.foreach(f) + self.rdd.foreach(f) @since(1.3) def foreachPartition(self, f): @@ -328,7 +290,7 @@ class DataFrame(object): ... print(person.name) >>> df.foreachPartition(f) """ - return self.rdd.foreachPartition(f) + self.rdd.foreachPartition(f) @since(1.3) def cache(self): |