aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/mllib/util.py8
-rw-r--r--python/pyspark/rdd.py4
-rw-r--r--python/pyspark/sql/dataframe.py2
-rw-r--r--python/pyspark/sql/streaming.py6
4 files changed, 10 insertions, 10 deletions
diff --git a/python/pyspark/mllib/util.py b/python/pyspark/mllib/util.py
index 48867a08db..ed6fd4bca4 100644
--- a/python/pyspark/mllib/util.py
+++ b/python/pyspark/mllib/util.py
@@ -140,8 +140,8 @@ class MLUtils(object):
>>> from pyspark.mllib.regression import LabeledPoint
>>> from glob import glob
>>> from pyspark.mllib.util import MLUtils
- >>> examples = [LabeledPoint(1.1, Vectors.sparse(3, [(0, 1.23), (2, 4.56)])), \
- LabeledPoint(0.0, Vectors.dense([1.01, 2.02, 3.03]))]
+ >>> examples = [LabeledPoint(1.1, Vectors.sparse(3, [(0, 1.23), (2, 4.56)])),
+ ... LabeledPoint(0.0, Vectors.dense([1.01, 2.02, 3.03]))]
>>> tempFile = NamedTemporaryFile(delete=True)
>>> tempFile.close()
>>> MLUtils.saveAsLibSVMFile(sc.parallelize(examples), tempFile.name)
@@ -166,8 +166,8 @@ class MLUtils(object):
>>> from tempfile import NamedTemporaryFile
>>> from pyspark.mllib.util import MLUtils
>>> from pyspark.mllib.regression import LabeledPoint
- >>> examples = [LabeledPoint(1.1, Vectors.sparse(3, [(0, -1.23), (2, 4.56e-7)])), \
- LabeledPoint(0.0, Vectors.dense([1.01, 2.02, 3.03]))]
+ >>> examples = [LabeledPoint(1.1, Vectors.sparse(3, [(0, -1.23), (2, 4.56e-7)])),
+ ... LabeledPoint(0.0, Vectors.dense([1.01, 2.02, 3.03]))]
>>> tempFile = NamedTemporaryFile(delete=True)
>>> tempFile.close()
>>> sc.parallelize(examples, 1).saveAsTextFile(tempFile.name)
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 0508235c1c..5fb10f86f4 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -754,8 +754,8 @@ class RDD(object):
Applies a function to each partition of this RDD.
>>> def f(iterator):
- ... for x in iterator:
- ... print(x)
+ ... for x in iterator:
+ ... print(x)
>>> sc.parallelize([1, 2, 3, 4, 5]).foreachPartition(f)
"""
def func(it):
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 0f7d8fba3b..0ac481a8a8 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -61,7 +61,7 @@ class DataFrame(object):
people = sqlContext.read.parquet("...")
department = sqlContext.read.parquet("...")
- people.filter(people.age > 30).join(department, people.deptId == department.id)\
+ people.filter(people.age > 30).join(department, people.deptId == department.id) \\
.groupBy(department.name, "gender").agg({"salary": "avg", "age": "max"})
.. versionadded:: 1.3
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
index cbd827950b..4e438fd5be 100644
--- a/python/pyspark/sql/streaming.py
+++ b/python/pyspark/sql/streaming.py
@@ -315,9 +315,9 @@ class DataStreamReader(OptionUtils):
:param schema: optional :class:`pyspark.sql.types.StructType` for the input schema.
:param options: all other string options
- >>> json_sdf = spark.readStream.format("json")\
- .schema(sdf_schema)\
- .load(tempfile.mkdtemp())
+ >>> json_sdf = spark.readStream.format("json") \\
+ ... .schema(sdf_schema) \\
+ ... .load(tempfile.mkdtemp())
>>> json_sdf.isStreaming
True
>>> json_sdf.schema == sdf_schema