aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/context.py
diff options
context:
space:
mode:
authorDaoyuan Wang <daoyuan.wang@intel.com>2015-05-18 21:43:12 -0700
committerReynold Xin <rxin@databricks.com>2015-05-18 21:43:12 -0700
commitc2437de1899e09894df4ec27adfaa7fac158fd3a (patch)
tree6e357bd2dd9a21d619aaf4279eeac4edf7e73866 /python/pyspark/context.py
parentd03638cc2d414cee9ac7481084672e454495dfc1 (diff)
downloadspark-c2437de1899e09894df4ec27adfaa7fac158fd3a.tar.gz
spark-c2437de1899e09894df4ec27adfaa7fac158fd3a.tar.bz2
spark-c2437de1899e09894df4ec27adfaa7fac158fd3a.zip
[SPARK-7150] SparkContext.range() and SQLContext.range()
This PR is based on #6081, thanks adrian-wang. Closes #6081 Author: Daoyuan Wang <daoyuan.wang@intel.com> Author: Davies Liu <davies@databricks.com> Closes #6230 from davies/range and squashes the following commits: d3ce5fe [Davies Liu] add tests 789eda5 [Davies Liu] add range() in Python 4590208 [Davies Liu] Merge commit 'refs/pull/6081/head' of github.com:apache/spark into range cbf5200 [Daoyuan Wang] let's add python support in a separate PR f45e3b2 [Daoyuan Wang] remove redundant toLong 617da76 [Daoyuan Wang] fix safe marge for corner cases 867c417 [Daoyuan Wang] fix 13dbe84 [Daoyuan Wang] update bd998ba [Daoyuan Wang] update comments d3a0c1b [Daoyuan Wang] add range api()
Diffstat (limited to 'python/pyspark/context.py')
-rw-r--r--python/pyspark/context.py16
1 files changed, 16 insertions, 0 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index d25ee85523..1f2b40b29f 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -319,6 +319,22 @@ class SparkContext(object):
with SparkContext._lock:
SparkContext._active_spark_context = None
+ def range(self, start, end, step=1, numSlices=None):
+ """
+ Create a new RDD of int containing elements from `start` to `end`
+ (exclusive), increased by `step` every element.
+
+ :param start: the start value
+ :param end: the end value (exclusive)
+ :param step: the incremental step (default: 1)
+ :param numSlices: the number of partitions of the new RDD
+ :return: An RDD of int
+
+ >>> sc.range(1, 7, 2).collect()
+ [1, 3, 5]
+ """
+ return self.parallelize(xrange(start, end, step), numSlices)
+
def parallelize(self, c, numSlices=None):
"""
Distribute a local Python collection to form an RDD. Using xrange