aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/rdd.py
diff options
context:
space:
mode:
authorNicholas Chammas <nicholas.chammas@gmail.com>2014-08-06 12:58:24 -0700
committerReynold Xin <rxin@apache.org>2014-08-06 12:58:24 -0700
commitd614967b0bad1e6c5277d612602ec0a653a00258 (patch)
tree8df1a52cbe074af4f928c0ac8f08a63075882d0b /python/pyspark/rdd.py
parenta6cd31108f0d73ce6823daafe8447677e03cfd13 (diff)
downloadspark-d614967b0bad1e6c5277d612602ec0a653a00258.tar.gz
spark-d614967b0bad1e6c5277d612602ec0a653a00258.tar.bz2
spark-d614967b0bad1e6c5277d612602ec0a653a00258.zip
[SPARK-2627] [PySpark] have the build enforce PEP 8 automatically
As described in [SPARK-2627](https://issues.apache.org/jira/browse/SPARK-2627), we'd like Python code to automatically be checked for PEP 8 compliance by Jenkins. This pull request aims to do that. Notes: * We may need to install [`pep8`](https://pypi.python.org/pypi/pep8) on the build server. * I'm expecting tests to fail now that PEP 8 compliance is being checked as part of the build. I'm fine with cleaning up any remaining PEP 8 violations as part of this pull request. * I did not understand why the RAT and scalastyle reports are saved to text files. I did the same for the PEP 8 check, but only so that the console output style can match those for the RAT and scalastyle checks. The PEP 8 report is removed right after the check is complete. * Updates to the ["Contributing to Spark"](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark) guide will be submitted elsewhere, as I don't believe that text is part of the Spark repo. Author: Nicholas Chammas <nicholas.chammas@gmail.com> Author: nchammas <nicholas.chammas@gmail.com> Closes #1744 from nchammas/master and squashes the following commits: 274b238 [Nicholas Chammas] [SPARK-2627] [PySpark] minor indentation changes 983d963 [nchammas] Merge pull request #5 from apache/master 1db5314 [nchammas] Merge pull request #4 from apache/master 0e0245f [Nicholas Chammas] [SPARK-2627] undo erroneous whitespace fixes bf30942 [Nicholas Chammas] [SPARK-2627] PEP8: comment spacing 6db9a44 [nchammas] Merge pull request #3 from apache/master 7b4750e [Nicholas Chammas] merge upstream changes 91b7584 [Nicholas Chammas] [SPARK-2627] undo unnecessary line breaks 44e3e56 [Nicholas Chammas] [SPARK-2627] use tox.ini to exclude files b09fae2 [Nicholas Chammas] don't wrap comments unnecessarily bfb9f9f [Nicholas Chammas] [SPARK-2627] keep up with the PEP 8 fixes 9da347f [nchammas] Merge pull request #2 from apache/master aa5b4b5 [Nicholas Chammas] [SPARK-2627] follow Spark bash style for if blocks d0a83b9 [Nicholas Chammas] [SPARK-2627] check that pep8 downloaded fine dffb5dd [Nicholas Chammas] [SPARK-2627] download pep8 at runtime a1ce7ae [Nicholas Chammas] [SPARK-2627] space out test report sections 21da538 [Nicholas Chammas] [SPARK-2627] it's PEP 8, not PEP8 6f4900b [Nicholas Chammas] [SPARK-2627] more misc PEP 8 fixes fe57ed0 [Nicholas Chammas] removing merge conflict backups 9c01d4c [nchammas] Merge pull request #1 from apache/master 9a66cb0 [Nicholas Chammas] resolving merge conflicts a31ccc4 [Nicholas Chammas] [SPARK-2627] miscellaneous PEP 8 fixes beaa9ac [Nicholas Chammas] [SPARK-2627] fail check on non-zero status 723ed39 [Nicholas Chammas] always delete the report file 0541ebb [Nicholas Chammas] [SPARK-2627] call Python linter from run-tests 12440fa [Nicholas Chammas] [SPARK-2627] add Scala linter 61c07b9 [Nicholas Chammas] [SPARK-2627] add Python linter 75ad552 [Nicholas Chammas] make check output style consistent
Diffstat (limited to 'python/pyspark/rdd.py')
-rw-r--r--python/pyspark/rdd.py22
1 files changed, 13 insertions, 9 deletions
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 309f5a9b60..30b834d208 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -233,7 +233,7 @@ class RDD(object):
def _toPickleSerialization(self):
if (self._jrdd_deserializer == PickleSerializer() or
- self._jrdd_deserializer == BatchedSerializer(PickleSerializer())):
+ self._jrdd_deserializer == BatchedSerializer(PickleSerializer())):
return self
else:
return self._reserialize(BatchedSerializer(PickleSerializer(), 10))
@@ -1079,7 +1079,9 @@ class RDD(object):
pickledRDD = self._toPickleSerialization()
batched = isinstance(pickledRDD._jrdd_deserializer, BatchedSerializer)
self.ctx._jvm.PythonRDD.saveAsNewAPIHadoopFile(pickledRDD._jrdd, batched, path,
- outputFormatClass, keyClass, valueClass, keyConverter, valueConverter, jconf)
+ outputFormatClass,
+ keyClass, valueClass,
+ keyConverter, valueConverter, jconf)
def saveAsHadoopDataset(self, conf, keyConverter=None, valueConverter=None):
"""
@@ -1125,8 +1127,10 @@ class RDD(object):
pickledRDD = self._toPickleSerialization()
batched = isinstance(pickledRDD._jrdd_deserializer, BatchedSerializer)
self.ctx._jvm.PythonRDD.saveAsHadoopFile(pickledRDD._jrdd, batched, path,
- outputFormatClass, keyClass, valueClass, keyConverter, valueConverter,
- jconf, compressionCodecClass)
+ outputFormatClass,
+ keyClass, valueClass,
+ keyConverter, valueConverter,
+ jconf, compressionCodecClass)
def saveAsSequenceFile(self, path, compressionCodecClass=None):
"""
@@ -1348,7 +1352,7 @@ class RDD(object):
outputSerializer = self.ctx._unbatched_serializer
limit = (_parse_memory(self.ctx._conf.get(
- "spark.python.worker.memory", "512m")) / 2)
+ "spark.python.worker.memory", "512m")) / 2)
def add_shuffle_key(split, iterator):
@@ -1430,12 +1434,12 @@ class RDD(object):
spill = (self.ctx._conf.get("spark.shuffle.spill", 'True').lower()
== 'true')
memory = _parse_memory(self.ctx._conf.get(
- "spark.python.worker.memory", "512m"))
+ "spark.python.worker.memory", "512m"))
agg = Aggregator(createCombiner, mergeValue, mergeCombiners)
def combineLocally(iterator):
merger = ExternalMerger(agg, memory * 0.9, serializer) \
- if spill else InMemoryMerger(agg)
+ if spill else InMemoryMerger(agg)
merger.mergeValues(iterator)
return merger.iteritems()
@@ -1444,7 +1448,7 @@ class RDD(object):
def _mergeCombiners(iterator):
merger = ExternalMerger(agg, memory, serializer) \
- if spill else InMemoryMerger(agg)
+ if spill else InMemoryMerger(agg)
merger.mergeCombiners(iterator)
return merger.iteritems()
@@ -1588,7 +1592,7 @@ class RDD(object):
"""
for fraction in fractions.values():
assert fraction >= 0.0, "Negative fraction value: %s" % fraction
- return self.mapPartitionsWithIndex( \
+ return self.mapPartitionsWithIndex(
RDDStratifiedSampler(withReplacement, fractions, seed).func, True)
def subtractByKey(self, other, numPartitions=None):