aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorhyukjinkwon <gurwls223@gmail.com>2017-01-02 15:23:19 +0000
committerSean Owen <sowen@cloudera.com>2017-01-02 15:23:19 +0000
commit46b212602428f1f11c184c836b4e09c150d0ee30 (patch)
treeb30420dbdfe979f65c390edbfe2d103572c07501 /examples
parentf1330b1d9e7b1d5de611e59eecae1bf0b0616d81 (diff)
downloadspark-46b212602428f1f11c184c836b4e09c150d0ee30.tar.gz
spark-46b212602428f1f11c184c836b4e09c150d0ee30.tar.bz2
spark-46b212602428f1f11c184c836b4e09c150d0ee30.zip
[SPARK-19002][BUILD][PYTHON] Check pep8 against all Python scripts
## What changes were proposed in this pull request? This PR proposes to check pep8 against all other Python scripts and fix the errors as below: ```bash ./dev/create-release/generate-contributors.py ./dev/create-release/releaseutils.py ./dev/create-release/translate-contributors.py ./dev/lint-python ./python/docs/epytext.py ./examples/src/main/python/mllib/decision_tree_classification_example.py ./examples/src/main/python/mllib/decision_tree_regression_example.py ./examples/src/main/python/mllib/gradient_boosting_classification_example.py ./examples/src/main/python/mllib/gradient_boosting_regression_example.py ./examples/src/main/python/mllib/linear_regression_with_sgd_example.py ./examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py ./examples/src/main/python/mllib/naive_bayes_example.py ./examples/src/main/python/mllib/random_forest_classification_example.py ./examples/src/main/python/mllib/random_forest_regression_example.py ./examples/src/main/python/mllib/svm_with_sgd_example.py ./examples/src/main/python/streaming/network_wordjoinsentiments.py ./sql/hive/src/test/resources/data/scripts/cat.py ./sql/hive/src/test/resources/data/scripts/cat_error.py ./sql/hive/src/test/resources/data/scripts/doubleescapedtab.py ./sql/hive/src/test/resources/data/scripts/dumpdata_script.py ./sql/hive/src/test/resources/data/scripts/escapedcarriagereturn.py ./sql/hive/src/test/resources/data/scripts/escapednewline.py ./sql/hive/src/test/resources/data/scripts/escapedtab.py ./sql/hive/src/test/resources/data/scripts/input20_script.py ./sql/hive/src/test/resources/data/scripts/newline.py ``` ## How was this patch tested? - `./python/docs/epytext.py` ```bash cd ./python/docs $$ make html ``` - pep8 check (Python 2.7 / Python 3.3.6) ``` ./dev/lint-python ``` - `./dev/merge_spark_pr.py` (Python 2.7 only / Python 3.3.6 not working) ```bash python -m doctest -v ./dev/merge_spark_pr.py ``` - `./dev/create-release/releaseutils.py` `./dev/create-release/generate-contributors.py` `./dev/create-release/translate-contributors.py` (Python 2.7 only / Python 3.3.6 not working) ```bash python generate-contributors.py python translate-contributors.py ``` - Examples (Python 2.7 / Python 3.3.6) ```bash ./bin/spark-submit examples/src/main/python/mllib/decision_tree_classification_example.py ./bin/spark-submit examples/src/main/python/mllib/decision_tree_regression_example.py ./bin/spark-submit examples/src/main/python/mllib/gradient_boosting_classification_example.py ./bin/spark-submit examples/src/main/python/mllib/gradient_boosting_regression_example.p ./bin/spark-submit examples/src/main/python/mllib/random_forest_classification_example.py ./bin/spark-submit examples/src/main/python/mllib/random_forest_regression_example.py ``` - Examples (Python 2.7 only / Python 3.3.6 not working) ``` ./bin/spark-submit examples/src/main/python/mllib/linear_regression_with_sgd_example.py ./bin/spark-submit examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py ./bin/spark-submit examples/src/main/python/mllib/naive_bayes_example.py ./bin/spark-submit examples/src/main/python/mllib/svm_with_sgd_example.py ``` - `sql/hive/src/test/resources/data/scripts/*.py` (Python 2.7 / Python 3.3.6 within suggested changes) Manually tested only changed ones. - `./dev/github_jira_sync.py` (Python 2.7 only / Python 3.3.6 not working) Manually tested this after disabling actually adding comments and links. And also via Jenkins tests. Author: hyukjinkwon <gurwls223@gmail.com> Closes #16405 from HyukjinKwon/minor-pep8.
Diffstat (limited to 'examples')
-rw-r--r--examples/src/main/python/mllib/decision_tree_classification_example.py3
-rw-r--r--examples/src/main/python/mllib/decision_tree_regression_example.py2
-rw-r--r--examples/src/main/python/mllib/gradient_boosting_classification_example.py3
-rw-r--r--examples/src/main/python/mllib/gradient_boosting_regression_example.py2
-rw-r--r--examples/src/main/python/mllib/linear_regression_with_sgd_example.py2
-rw-r--r--examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py2
-rw-r--r--examples/src/main/python/mllib/naive_bayes_example.py4
-rw-r--r--examples/src/main/python/mllib/random_forest_classification_example.py3
-rw-r--r--examples/src/main/python/mllib/random_forest_regression_example.py2
-rw-r--r--examples/src/main/python/mllib/svm_with_sgd_example.py2
-rw-r--r--examples/src/main/python/streaming/network_wordjoinsentiments.py4
11 files changed, 16 insertions, 13 deletions
diff --git a/examples/src/main/python/mllib/decision_tree_classification_example.py b/examples/src/main/python/mllib/decision_tree_classification_example.py
index 1b529768b6..7eecf50058 100644
--- a/examples/src/main/python/mllib/decision_tree_classification_example.py
+++ b/examples/src/main/python/mllib/decision_tree_classification_example.py
@@ -44,7 +44,8 @@ if __name__ == "__main__":
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
- testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
+ testErr = labelsAndPredictions.filter(
+ lambda lp: lp[0] != lp[1]).count() / float(testData.count())
print('Test Error = ' + str(testErr))
print('Learned classification tree model:')
print(model.toDebugString())
diff --git a/examples/src/main/python/mllib/decision_tree_regression_example.py b/examples/src/main/python/mllib/decision_tree_regression_example.py
index cf518eac67..acf9e25fdf 100644
--- a/examples/src/main/python/mllib/decision_tree_regression_example.py
+++ b/examples/src/main/python/mllib/decision_tree_regression_example.py
@@ -44,7 +44,7 @@ if __name__ == "__main__":
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
- testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\
+ testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
float(testData.count())
print('Test Mean Squared Error = ' + str(testMSE))
print('Learned regression tree model:')
diff --git a/examples/src/main/python/mllib/gradient_boosting_classification_example.py b/examples/src/main/python/mllib/gradient_boosting_classification_example.py
index b204cd1b31..65a03572be 100644
--- a/examples/src/main/python/mllib/gradient_boosting_classification_example.py
+++ b/examples/src/main/python/mllib/gradient_boosting_classification_example.py
@@ -43,7 +43,8 @@ if __name__ == "__main__":
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
- testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
+ testErr = labelsAndPredictions.filter(
+ lambda lp: lp[0] != lp[1]).count() / float(testData.count())
print('Test Error = ' + str(testErr))
print('Learned classification GBT model:')
print(model.toDebugString())
diff --git a/examples/src/main/python/mllib/gradient_boosting_regression_example.py b/examples/src/main/python/mllib/gradient_boosting_regression_example.py
index 758e224a9e..877f8ab461 100644
--- a/examples/src/main/python/mllib/gradient_boosting_regression_example.py
+++ b/examples/src/main/python/mllib/gradient_boosting_regression_example.py
@@ -43,7 +43,7 @@ if __name__ == "__main__":
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
- testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\
+ testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
float(testData.count())
print('Test Mean Squared Error = ' + str(testMSE))
print('Learned regression GBT model:')
diff --git a/examples/src/main/python/mllib/linear_regression_with_sgd_example.py b/examples/src/main/python/mllib/linear_regression_with_sgd_example.py
index 6fbaeff0cd..6744463d40 100644
--- a/examples/src/main/python/mllib/linear_regression_with_sgd_example.py
+++ b/examples/src/main/python/mllib/linear_regression_with_sgd_example.py
@@ -44,7 +44,7 @@ if __name__ == "__main__":
# Evaluate the model on training data
valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
MSE = valuesAndPreds \
- .map(lambda (v, p): (v - p)**2) \
+ .map(lambda vp: (vp[0] - vp[1])**2) \
.reduce(lambda x, y: x + y) / valuesAndPreds.count()
print("Mean Squared Error = " + str(MSE))
diff --git a/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py b/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py
index e030b74ba6..c9b768b314 100644
--- a/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py
+++ b/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py
@@ -44,7 +44,7 @@ if __name__ == "__main__":
# Evaluating the model on training data
labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
- trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
+ trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(parsedData.count())
print("Training Error = " + str(trainErr))
# Save and load model
diff --git a/examples/src/main/python/mllib/naive_bayes_example.py b/examples/src/main/python/mllib/naive_bayes_example.py
index 749353b20e..a29fcccac5 100644
--- a/examples/src/main/python/mllib/naive_bayes_example.py
+++ b/examples/src/main/python/mllib/naive_bayes_example.py
@@ -50,7 +50,7 @@ if __name__ == "__main__":
# Make prediction and test accuracy.
predictionAndLabel = test.map(lambda p: (model.predict(p.features), p.label))
- accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
+ accuracy = 1.0 * predictionAndLabel.filter(lambda pl: pl[0] == pl[1]).count() / test.count()
print('model accuracy {}'.format(accuracy))
# Save and load model
@@ -59,7 +59,7 @@ if __name__ == "__main__":
model.save(sc, output_dir)
sameModel = NaiveBayesModel.load(sc, output_dir)
predictionAndLabel = test.map(lambda p: (sameModel.predict(p.features), p.label))
- accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
+ accuracy = 1.0 * predictionAndLabel.filter(lambda pl: pl[0] == pl[1]).count() / test.count()
print('sameModel accuracy {}'.format(accuracy))
# $example off$
diff --git a/examples/src/main/python/mllib/random_forest_classification_example.py b/examples/src/main/python/mllib/random_forest_classification_example.py
index 9e5a8dcaab..5ac67520da 100644
--- a/examples/src/main/python/mllib/random_forest_classification_example.py
+++ b/examples/src/main/python/mllib/random_forest_classification_example.py
@@ -45,7 +45,8 @@ if __name__ == "__main__":
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
- testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
+ testErr = labelsAndPredictions.filter(
+ lambda lp: lp[0] != lp[1]).count() / float(testData.count())
print('Test Error = ' + str(testErr))
print('Learned classification forest model:')
print(model.toDebugString())
diff --git a/examples/src/main/python/mllib/random_forest_regression_example.py b/examples/src/main/python/mllib/random_forest_regression_example.py
index 2e1be34c1a..7e986a0d30 100644
--- a/examples/src/main/python/mllib/random_forest_regression_example.py
+++ b/examples/src/main/python/mllib/random_forest_regression_example.py
@@ -45,7 +45,7 @@ if __name__ == "__main__":
# Evaluate model on test instances and compute test error
predictions = model.predict(testData.map(lambda x: x.features))
labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
- testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\
+ testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
float(testData.count())
print('Test Mean Squared Error = ' + str(testMSE))
print('Learned regression forest model:')
diff --git a/examples/src/main/python/mllib/svm_with_sgd_example.py b/examples/src/main/python/mllib/svm_with_sgd_example.py
index 309ab09cc3..24b8f431e0 100644
--- a/examples/src/main/python/mllib/svm_with_sgd_example.py
+++ b/examples/src/main/python/mllib/svm_with_sgd_example.py
@@ -38,7 +38,7 @@ if __name__ == "__main__":
# Evaluating the model on training data
labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
- trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
+ trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(parsedData.count())
print("Training Error = " + str(trainErr))
# Save and load model
diff --git a/examples/src/main/python/streaming/network_wordjoinsentiments.py b/examples/src/main/python/streaming/network_wordjoinsentiments.py
index b85517dfdd..b309d9fad3 100644
--- a/examples/src/main/python/streaming/network_wordjoinsentiments.py
+++ b/examples/src/main/python/streaming/network_wordjoinsentiments.py
@@ -67,8 +67,8 @@ if __name__ == "__main__":
# with the static RDD inside the transform() method and then multiplying
# the frequency of the words by its sentiment value
happiest_words = word_counts.transform(lambda rdd: word_sentiments.join(rdd)) \
- .map(lambda (word, tuple): (word, float(tuple[0]) * tuple[1])) \
- .map(lambda (word, happiness): (happiness, word)) \
+ .map(lambda word_tuples: (word_tuples[0], float(word_tuples[1][0]) * word_tuples[1][1])) \
+ .map(lambda word_happiness: (word_happiness[1], word_happiness[0])) \
.transform(lambda rdd: rdd.sortByKey(False))
happiest_words.foreachRDD(print_happiest_words)