From 46b212602428f1f11c184c836b4e09c150d0ee30 Mon Sep 17 00:00:00 2001 From: hyukjinkwon Date: Mon, 2 Jan 2017 15:23:19 +0000 Subject: [SPARK-19002][BUILD][PYTHON] Check pep8 against all Python scripts ## What changes were proposed in this pull request? This PR proposes to check pep8 against all other Python scripts and fix the errors as below: ```bash ./dev/create-release/generate-contributors.py ./dev/create-release/releaseutils.py ./dev/create-release/translate-contributors.py ./dev/lint-python ./python/docs/epytext.py ./examples/src/main/python/mllib/decision_tree_classification_example.py ./examples/src/main/python/mllib/decision_tree_regression_example.py ./examples/src/main/python/mllib/gradient_boosting_classification_example.py ./examples/src/main/python/mllib/gradient_boosting_regression_example.py ./examples/src/main/python/mllib/linear_regression_with_sgd_example.py ./examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py ./examples/src/main/python/mllib/naive_bayes_example.py ./examples/src/main/python/mllib/random_forest_classification_example.py ./examples/src/main/python/mllib/random_forest_regression_example.py ./examples/src/main/python/mllib/svm_with_sgd_example.py ./examples/src/main/python/streaming/network_wordjoinsentiments.py ./sql/hive/src/test/resources/data/scripts/cat.py ./sql/hive/src/test/resources/data/scripts/cat_error.py ./sql/hive/src/test/resources/data/scripts/doubleescapedtab.py ./sql/hive/src/test/resources/data/scripts/dumpdata_script.py ./sql/hive/src/test/resources/data/scripts/escapedcarriagereturn.py ./sql/hive/src/test/resources/data/scripts/escapednewline.py ./sql/hive/src/test/resources/data/scripts/escapedtab.py ./sql/hive/src/test/resources/data/scripts/input20_script.py ./sql/hive/src/test/resources/data/scripts/newline.py ``` ## How was this patch tested? - `./python/docs/epytext.py` ```bash cd ./python/docs $$ make html ``` - pep8 check (Python 2.7 / Python 3.3.6) ``` ./dev/lint-python ``` - `./dev/merge_spark_pr.py` (Python 2.7 only / Python 3.3.6 not working) ```bash python -m doctest -v ./dev/merge_spark_pr.py ``` - `./dev/create-release/releaseutils.py` `./dev/create-release/generate-contributors.py` `./dev/create-release/translate-contributors.py` (Python 2.7 only / Python 3.3.6 not working) ```bash python generate-contributors.py python translate-contributors.py ``` - Examples (Python 2.7 / Python 3.3.6) ```bash ./bin/spark-submit examples/src/main/python/mllib/decision_tree_classification_example.py ./bin/spark-submit examples/src/main/python/mllib/decision_tree_regression_example.py ./bin/spark-submit examples/src/main/python/mllib/gradient_boosting_classification_example.py ./bin/spark-submit examples/src/main/python/mllib/gradient_boosting_regression_example.p ./bin/spark-submit examples/src/main/python/mllib/random_forest_classification_example.py ./bin/spark-submit examples/src/main/python/mllib/random_forest_regression_example.py ``` - Examples (Python 2.7 only / Python 3.3.6 not working) ``` ./bin/spark-submit examples/src/main/python/mllib/linear_regression_with_sgd_example.py ./bin/spark-submit examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py ./bin/spark-submit examples/src/main/python/mllib/naive_bayes_example.py ./bin/spark-submit examples/src/main/python/mllib/svm_with_sgd_example.py ``` - `sql/hive/src/test/resources/data/scripts/*.py` (Python 2.7 / Python 3.3.6 within suggested changes) Manually tested only changed ones. - `./dev/github_jira_sync.py` (Python 2.7 only / Python 3.3.6 not working) Manually tested this after disabling actually adding comments and links. And also via Jenkins tests. Author: hyukjinkwon Closes #16405 from HyukjinKwon/minor-pep8. --- .../src/main/python/mllib/decision_tree_classification_example.py | 3 ++- examples/src/main/python/mllib/decision_tree_regression_example.py | 2 +- .../src/main/python/mllib/gradient_boosting_classification_example.py | 3 ++- .../src/main/python/mllib/gradient_boosting_regression_example.py | 2 +- examples/src/main/python/mllib/linear_regression_with_sgd_example.py | 2 +- .../src/main/python/mllib/logistic_regression_with_lbfgs_example.py | 2 +- examples/src/main/python/mllib/naive_bayes_example.py | 4 ++-- .../src/main/python/mllib/random_forest_classification_example.py | 3 ++- examples/src/main/python/mllib/random_forest_regression_example.py | 2 +- examples/src/main/python/mllib/svm_with_sgd_example.py | 2 +- examples/src/main/python/streaming/network_wordjoinsentiments.py | 4 ++-- 11 files changed, 16 insertions(+), 13 deletions(-) (limited to 'examples/src') diff --git a/examples/src/main/python/mllib/decision_tree_classification_example.py b/examples/src/main/python/mllib/decision_tree_classification_example.py index 1b529768b6..7eecf50058 100644 --- a/examples/src/main/python/mllib/decision_tree_classification_example.py +++ b/examples/src/main/python/mllib/decision_tree_classification_example.py @@ -44,7 +44,8 @@ if __name__ == "__main__": # Evaluate model on test instances and compute test error predictions = model.predict(testData.map(lambda x: x.features)) labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions) - testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count()) + testErr = labelsAndPredictions.filter( + lambda lp: lp[0] != lp[1]).count() / float(testData.count()) print('Test Error = ' + str(testErr)) print('Learned classification tree model:') print(model.toDebugString()) diff --git a/examples/src/main/python/mllib/decision_tree_regression_example.py b/examples/src/main/python/mllib/decision_tree_regression_example.py index cf518eac67..acf9e25fdf 100644 --- a/examples/src/main/python/mllib/decision_tree_regression_example.py +++ b/examples/src/main/python/mllib/decision_tree_regression_example.py @@ -44,7 +44,7 @@ if __name__ == "__main__": # Evaluate model on test instances and compute test error predictions = model.predict(testData.map(lambda x: x.features)) labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions) - testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\ + testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\ float(testData.count()) print('Test Mean Squared Error = ' + str(testMSE)) print('Learned regression tree model:') diff --git a/examples/src/main/python/mllib/gradient_boosting_classification_example.py b/examples/src/main/python/mllib/gradient_boosting_classification_example.py index b204cd1b31..65a03572be 100644 --- a/examples/src/main/python/mllib/gradient_boosting_classification_example.py +++ b/examples/src/main/python/mllib/gradient_boosting_classification_example.py @@ -43,7 +43,8 @@ if __name__ == "__main__": # Evaluate model on test instances and compute test error predictions = model.predict(testData.map(lambda x: x.features)) labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions) - testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count()) + testErr = labelsAndPredictions.filter( + lambda lp: lp[0] != lp[1]).count() / float(testData.count()) print('Test Error = ' + str(testErr)) print('Learned classification GBT model:') print(model.toDebugString()) diff --git a/examples/src/main/python/mllib/gradient_boosting_regression_example.py b/examples/src/main/python/mllib/gradient_boosting_regression_example.py index 758e224a9e..877f8ab461 100644 --- a/examples/src/main/python/mllib/gradient_boosting_regression_example.py +++ b/examples/src/main/python/mllib/gradient_boosting_regression_example.py @@ -43,7 +43,7 @@ if __name__ == "__main__": # Evaluate model on test instances and compute test error predictions = model.predict(testData.map(lambda x: x.features)) labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions) - testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\ + testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\ float(testData.count()) print('Test Mean Squared Error = ' + str(testMSE)) print('Learned regression GBT model:') diff --git a/examples/src/main/python/mllib/linear_regression_with_sgd_example.py b/examples/src/main/python/mllib/linear_regression_with_sgd_example.py index 6fbaeff0cd..6744463d40 100644 --- a/examples/src/main/python/mllib/linear_regression_with_sgd_example.py +++ b/examples/src/main/python/mllib/linear_regression_with_sgd_example.py @@ -44,7 +44,7 @@ if __name__ == "__main__": # Evaluate the model on training data valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features))) MSE = valuesAndPreds \ - .map(lambda (v, p): (v - p)**2) \ + .map(lambda vp: (vp[0] - vp[1])**2) \ .reduce(lambda x, y: x + y) / valuesAndPreds.count() print("Mean Squared Error = " + str(MSE)) diff --git a/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py b/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py index e030b74ba6..c9b768b314 100644 --- a/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py +++ b/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py @@ -44,7 +44,7 @@ if __name__ == "__main__": # Evaluating the model on training data labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features))) - trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count()) + trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(parsedData.count()) print("Training Error = " + str(trainErr)) # Save and load model diff --git a/examples/src/main/python/mllib/naive_bayes_example.py b/examples/src/main/python/mllib/naive_bayes_example.py index 749353b20e..a29fcccac5 100644 --- a/examples/src/main/python/mllib/naive_bayes_example.py +++ b/examples/src/main/python/mllib/naive_bayes_example.py @@ -50,7 +50,7 @@ if __name__ == "__main__": # Make prediction and test accuracy. predictionAndLabel = test.map(lambda p: (model.predict(p.features), p.label)) - accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count() + accuracy = 1.0 * predictionAndLabel.filter(lambda pl: pl[0] == pl[1]).count() / test.count() print('model accuracy {}'.format(accuracy)) # Save and load model @@ -59,7 +59,7 @@ if __name__ == "__main__": model.save(sc, output_dir) sameModel = NaiveBayesModel.load(sc, output_dir) predictionAndLabel = test.map(lambda p: (sameModel.predict(p.features), p.label)) - accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count() + accuracy = 1.0 * predictionAndLabel.filter(lambda pl: pl[0] == pl[1]).count() / test.count() print('sameModel accuracy {}'.format(accuracy)) # $example off$ diff --git a/examples/src/main/python/mllib/random_forest_classification_example.py b/examples/src/main/python/mllib/random_forest_classification_example.py index 9e5a8dcaab..5ac67520da 100644 --- a/examples/src/main/python/mllib/random_forest_classification_example.py +++ b/examples/src/main/python/mllib/random_forest_classification_example.py @@ -45,7 +45,8 @@ if __name__ == "__main__": # Evaluate model on test instances and compute test error predictions = model.predict(testData.map(lambda x: x.features)) labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions) - testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count()) + testErr = labelsAndPredictions.filter( + lambda lp: lp[0] != lp[1]).count() / float(testData.count()) print('Test Error = ' + str(testErr)) print('Learned classification forest model:') print(model.toDebugString()) diff --git a/examples/src/main/python/mllib/random_forest_regression_example.py b/examples/src/main/python/mllib/random_forest_regression_example.py index 2e1be34c1a..7e986a0d30 100644 --- a/examples/src/main/python/mllib/random_forest_regression_example.py +++ b/examples/src/main/python/mllib/random_forest_regression_example.py @@ -45,7 +45,7 @@ if __name__ == "__main__": # Evaluate model on test instances and compute test error predictions = model.predict(testData.map(lambda x: x.features)) labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions) - testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\ + testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\ float(testData.count()) print('Test Mean Squared Error = ' + str(testMSE)) print('Learned regression forest model:') diff --git a/examples/src/main/python/mllib/svm_with_sgd_example.py b/examples/src/main/python/mllib/svm_with_sgd_example.py index 309ab09cc3..24b8f431e0 100644 --- a/examples/src/main/python/mllib/svm_with_sgd_example.py +++ b/examples/src/main/python/mllib/svm_with_sgd_example.py @@ -38,7 +38,7 @@ if __name__ == "__main__": # Evaluating the model on training data labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features))) - trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count()) + trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(parsedData.count()) print("Training Error = " + str(trainErr)) # Save and load model diff --git a/examples/src/main/python/streaming/network_wordjoinsentiments.py b/examples/src/main/python/streaming/network_wordjoinsentiments.py index b85517dfdd..b309d9fad3 100644 --- a/examples/src/main/python/streaming/network_wordjoinsentiments.py +++ b/examples/src/main/python/streaming/network_wordjoinsentiments.py @@ -67,8 +67,8 @@ if __name__ == "__main__": # with the static RDD inside the transform() method and then multiplying # the frequency of the words by its sentiment value happiest_words = word_counts.transform(lambda rdd: word_sentiments.join(rdd)) \ - .map(lambda (word, tuple): (word, float(tuple[0]) * tuple[1])) \ - .map(lambda (word, happiness): (happiness, word)) \ + .map(lambda word_tuples: (word_tuples[0], float(word_tuples[1][0]) * word_tuples[1][1])) \ + .map(lambda word_happiness: (word_happiness[1], word_happiness[0])) \ .transform(lambda rdd: rdd.sortByKey(False)) happiest_words.foreachRDD(print_happiest_words) -- cgit v1.2.3