aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/context.py
diff options
context:
space:
mode:
authorNicholas Chammas <nicholas.chammas@gmail.com>2014-08-06 12:58:24 -0700
committerReynold Xin <rxin@apache.org>2014-08-06 12:58:24 -0700
commitd614967b0bad1e6c5277d612602ec0a653a00258 (patch)
tree8df1a52cbe074af4f928c0ac8f08a63075882d0b /python/pyspark/context.py
parenta6cd31108f0d73ce6823daafe8447677e03cfd13 (diff)
downloadspark-d614967b0bad1e6c5277d612602ec0a653a00258.tar.gz
spark-d614967b0bad1e6c5277d612602ec0a653a00258.tar.bz2
spark-d614967b0bad1e6c5277d612602ec0a653a00258.zip
[SPARK-2627] [PySpark] have the build enforce PEP 8 automatically
As described in [SPARK-2627](https://issues.apache.org/jira/browse/SPARK-2627), we'd like Python code to automatically be checked for PEP 8 compliance by Jenkins. This pull request aims to do that. Notes: * We may need to install [`pep8`](https://pypi.python.org/pypi/pep8) on the build server. * I'm expecting tests to fail now that PEP 8 compliance is being checked as part of the build. I'm fine with cleaning up any remaining PEP 8 violations as part of this pull request. * I did not understand why the RAT and scalastyle reports are saved to text files. I did the same for the PEP 8 check, but only so that the console output style can match those for the RAT and scalastyle checks. The PEP 8 report is removed right after the check is complete. * Updates to the ["Contributing to Spark"](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark) guide will be submitted elsewhere, as I don't believe that text is part of the Spark repo. Author: Nicholas Chammas <nicholas.chammas@gmail.com> Author: nchammas <nicholas.chammas@gmail.com> Closes #1744 from nchammas/master and squashes the following commits: 274b238 [Nicholas Chammas] [SPARK-2627] [PySpark] minor indentation changes 983d963 [nchammas] Merge pull request #5 from apache/master 1db5314 [nchammas] Merge pull request #4 from apache/master 0e0245f [Nicholas Chammas] [SPARK-2627] undo erroneous whitespace fixes bf30942 [Nicholas Chammas] [SPARK-2627] PEP8: comment spacing 6db9a44 [nchammas] Merge pull request #3 from apache/master 7b4750e [Nicholas Chammas] merge upstream changes 91b7584 [Nicholas Chammas] [SPARK-2627] undo unnecessary line breaks 44e3e56 [Nicholas Chammas] [SPARK-2627] use tox.ini to exclude files b09fae2 [Nicholas Chammas] don't wrap comments unnecessarily bfb9f9f [Nicholas Chammas] [SPARK-2627] keep up with the PEP 8 fixes 9da347f [nchammas] Merge pull request #2 from apache/master aa5b4b5 [Nicholas Chammas] [SPARK-2627] follow Spark bash style for if blocks d0a83b9 [Nicholas Chammas] [SPARK-2627] check that pep8 downloaded fine dffb5dd [Nicholas Chammas] [SPARK-2627] download pep8 at runtime a1ce7ae [Nicholas Chammas] [SPARK-2627] space out test report sections 21da538 [Nicholas Chammas] [SPARK-2627] it's PEP 8, not PEP8 6f4900b [Nicholas Chammas] [SPARK-2627] more misc PEP 8 fixes fe57ed0 [Nicholas Chammas] removing merge conflict backups 9c01d4c [nchammas] Merge pull request #1 from apache/master 9a66cb0 [Nicholas Chammas] resolving merge conflicts a31ccc4 [Nicholas Chammas] [SPARK-2627] miscellaneous PEP 8 fixes beaa9ac [Nicholas Chammas] [SPARK-2627] fail check on non-zero status 723ed39 [Nicholas Chammas] always delete the report file 0541ebb [Nicholas Chammas] [SPARK-2627] call Python linter from run-tests 12440fa [Nicholas Chammas] [SPARK-2627] add Scala linter 61c07b9 [Nicholas Chammas] [SPARK-2627] add Python linter 75ad552 [Nicholas Chammas] make check output style consistent
Diffstat (limited to 'python/pyspark/context.py')
-rw-r--r--python/pyspark/context.py25
1 files changed, 14 insertions, 11 deletions
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 2e80eb50f2..4001ecab5e 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -47,6 +47,7 @@ DEFAULT_CONFIGS = {
class SparkContext(object):
+
"""
Main entry point for Spark functionality. A SparkContext represents the
connection to a Spark cluster, and can be used to create L{RDD}s and
@@ -213,7 +214,7 @@ class SparkContext(object):
if instance:
if (SparkContext._active_spark_context and
- SparkContext._active_spark_context != instance):
+ SparkContext._active_spark_context != instance):
currentMaster = SparkContext._active_spark_context.master
currentAppName = SparkContext._active_spark_context.appName
callsite = SparkContext._active_spark_context._callsite
@@ -406,7 +407,7 @@ class SparkContext(object):
batchSize = max(1, batchSize or self._default_batch_size_for_serialized_input)
ser = BatchedSerializer(PickleSerializer()) if (batchSize > 1) else PickleSerializer()
jrdd = self._jvm.PythonRDD.sequenceFile(self._jsc, path, keyClass, valueClass,
- keyConverter, valueConverter, minSplits, batchSize)
+ keyConverter, valueConverter, minSplits, batchSize)
return RDD(jrdd, self, ser)
def newAPIHadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConverter=None,
@@ -437,7 +438,8 @@ class SparkContext(object):
batchSize = max(1, batchSize or self._default_batch_size_for_serialized_input)
ser = BatchedSerializer(PickleSerializer()) if (batchSize > 1) else PickleSerializer()
jrdd = self._jvm.PythonRDD.newAPIHadoopFile(self._jsc, path, inputFormatClass, keyClass,
- valueClass, keyConverter, valueConverter, jconf, batchSize)
+ valueClass, keyConverter, valueConverter,
+ jconf, batchSize)
return RDD(jrdd, self, ser)
def newAPIHadoopRDD(self, inputFormatClass, keyClass, valueClass, keyConverter=None,
@@ -465,7 +467,8 @@ class SparkContext(object):
batchSize = max(1, batchSize or self._default_batch_size_for_serialized_input)
ser = BatchedSerializer(PickleSerializer()) if (batchSize > 1) else PickleSerializer()
jrdd = self._jvm.PythonRDD.newAPIHadoopRDD(self._jsc, inputFormatClass, keyClass,
- valueClass, keyConverter, valueConverter, jconf, batchSize)
+ valueClass, keyConverter, valueConverter,
+ jconf, batchSize)
return RDD(jrdd, self, ser)
def hadoopFile(self, path, inputFormatClass, keyClass, valueClass, keyConverter=None,
@@ -496,7 +499,8 @@ class SparkContext(object):
batchSize = max(1, batchSize or self._default_batch_size_for_serialized_input)
ser = BatchedSerializer(PickleSerializer()) if (batchSize > 1) else PickleSerializer()
jrdd = self._jvm.PythonRDD.hadoopFile(self._jsc, path, inputFormatClass, keyClass,
- valueClass, keyConverter, valueConverter, jconf, batchSize)
+ valueClass, keyConverter, valueConverter,
+ jconf, batchSize)
return RDD(jrdd, self, ser)
def hadoopRDD(self, inputFormatClass, keyClass, valueClass, keyConverter=None,
@@ -523,8 +527,9 @@ class SparkContext(object):
jconf = self._dictToJavaMap(conf)
batchSize = max(1, batchSize or self._default_batch_size_for_serialized_input)
ser = BatchedSerializer(PickleSerializer()) if (batchSize > 1) else PickleSerializer()
- jrdd = self._jvm.PythonRDD.hadoopRDD(self._jsc, inputFormatClass, keyClass, valueClass,
- keyConverter, valueConverter, jconf, batchSize)
+ jrdd = self._jvm.PythonRDD.hadoopRDD(self._jsc, inputFormatClass, keyClass,
+ valueClass, keyConverter, valueConverter,
+ jconf, batchSize)
return RDD(jrdd, self, ser)
def _checkpointFile(self, name, input_deserializer):
@@ -555,8 +560,7 @@ class SparkContext(object):
first = rdds[0]._jrdd
rest = [x._jrdd for x in rdds[1:]]
rest = ListConverter().convert(rest, self._gateway._gateway_client)
- return RDD(self._jsc.union(first, rest), self,
- rdds[0]._jrdd_deserializer)
+ return RDD(self._jsc.union(first, rest), self, rdds[0]._jrdd_deserializer)
def broadcast(self, value):
"""
@@ -568,8 +572,7 @@ class SparkContext(object):
pickleSer = PickleSerializer()
pickled = pickleSer.dumps(value)
jbroadcast = self._jsc.broadcast(bytearray(pickled))
- return Broadcast(jbroadcast.id(), value, jbroadcast,
- self._pickled_broadcast_vars)
+ return Broadcast(jbroadcast.id(), value, jbroadcast, self._pickled_broadcast_vars)
def accumulator(self, value, accum_param=None):
"""