aboutsummaryrefslogtreecommitdiff
path: root/pyspark
diff options
context:
space:
mode:
authorJosh Rosen <joshrosen@eecs.berkeley.edu>2012-12-29 22:22:56 +0000
committerJosh Rosen <joshrosen@eecs.berkeley.edu>2012-12-29 22:25:34 +0000
commit6ee1ff2663cf1f776dd33e448548a8ddcf974dc6 (patch)
treef814fa77775cf1f0d9c15a0dbc0bb6dc2ec3c32e /pyspark
parentc2b105af34f7241ac0597d9c35fbf66633a3eaf6 (diff)
downloadspark-6ee1ff2663cf1f776dd33e448548a8ddcf974dc6.tar.gz
spark-6ee1ff2663cf1f776dd33e448548a8ddcf974dc6.tar.bz2
spark-6ee1ff2663cf1f776dd33e448548a8ddcf974dc6.zip
Fix bug in pyspark.serializers.batch; add .gitignore.
Diffstat (limited to 'pyspark')
-rw-r--r--pyspark/.gitignore2
-rw-r--r--pyspark/pyspark/rdd.py4
-rw-r--r--pyspark/pyspark/serializers.py2
3 files changed, 6 insertions, 2 deletions
diff --git a/pyspark/.gitignore b/pyspark/.gitignore
new file mode 100644
index 0000000000..5c56e638f9
--- /dev/null
+++ b/pyspark/.gitignore
@@ -0,0 +1,2 @@
+*.pyc
+docs/
diff --git a/pyspark/pyspark/rdd.py b/pyspark/pyspark/rdd.py
index 111476d274..20f84b2dd0 100644
--- a/pyspark/pyspark/rdd.py
+++ b/pyspark/pyspark/rdd.py
@@ -695,7 +695,9 @@ def _test():
import doctest
from pyspark.context import SparkContext
globs = globals().copy()
- globs['sc'] = SparkContext('local[4]', 'PythonTest')
+ # The small batch size here ensures that we see multiple batches,
+ # even in these small test examples:
+ globs['sc'] = SparkContext('local[4]', 'PythonTest', batchSize=2)
doctest.testmod(globs=globs)
globs['sc'].stop()
diff --git a/pyspark/pyspark/serializers.py b/pyspark/pyspark/serializers.py
index 4ed925697c..8b08f7ef0f 100644
--- a/pyspark/pyspark/serializers.py
+++ b/pyspark/pyspark/serializers.py
@@ -24,7 +24,7 @@ def batched(iterator, batchSize):
if count == batchSize:
yield Batch(items)
items = []
- count = []
+ count = 0
if items:
yield Batch(items)