diff options
author | Josh Rosen <joshrosen@eecs.berkeley.edu> | 2012-12-29 22:22:56 +0000 |
---|---|---|
committer | Josh Rosen <joshrosen@eecs.berkeley.edu> | 2012-12-29 22:25:34 +0000 |
commit | 6ee1ff2663cf1f776dd33e448548a8ddcf974dc6 (patch) | |
tree | f814fa77775cf1f0d9c15a0dbc0bb6dc2ec3c32e /pyspark | |
parent | c2b105af34f7241ac0597d9c35fbf66633a3eaf6 (diff) | |
download | spark-6ee1ff2663cf1f776dd33e448548a8ddcf974dc6.tar.gz spark-6ee1ff2663cf1f776dd33e448548a8ddcf974dc6.tar.bz2 spark-6ee1ff2663cf1f776dd33e448548a8ddcf974dc6.zip |
Fix bug in pyspark.serializers.batch; add .gitignore.
Diffstat (limited to 'pyspark')
-rw-r--r-- | pyspark/.gitignore | 2 | ||||
-rw-r--r-- | pyspark/pyspark/rdd.py | 4 | ||||
-rw-r--r-- | pyspark/pyspark/serializers.py | 2 |
3 files changed, 6 insertions, 2 deletions
diff --git a/pyspark/.gitignore b/pyspark/.gitignore new file mode 100644 index 0000000000..5c56e638f9 --- /dev/null +++ b/pyspark/.gitignore @@ -0,0 +1,2 @@ +*.pyc +docs/ diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py index 111476d274..20f84b2dd0 100644 --- a/pyspark/pyspark/rdd.py +++ b/pyspark/pyspark/rdd.py @@ -695,7 +695,9 @@ def _test(): import doctest from pyspark.context import SparkContext globs = globals().copy() - globs['sc'] = SparkContext('local[4]', 'PythonTest') + # The small batch size here ensures that we see multiple batches, + # even in these small test examples: + globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2) doctest.testmod(globs=globs) globs['sc'].stop() diff --git a/pyspark/pyspark/serializers.py b/pyspark/pyspark/serializers.py index 4ed925697c..8b08f7ef0f 100644 --- a/pyspark/pyspark/serializers.py +++ b/pyspark/pyspark/serializers.py @@ -24,7 +24,7 @@ def batched(iterator, batchSize): if count == batchSize: yield Batch(items) items = [] - count = [] + count = 0 if items: yield Batch(items) |