From 46b212602428f1f11c184c836b4e09c150d0ee30 Mon Sep 17 00:00:00 2001
From: hyukjinkwon <gurwls223@gmail.com>
Date: Mon, 2 Jan 2017 15:23:19 +0000
Subject: [SPARK-19002][BUILD][PYTHON] Check pep8 against all Python scripts

## What changes were proposed in this pull request?

This PR proposes to check pep8 against all other Python scripts and fix the errors as below:

```bash
./dev/create-release/generate-contributors.py
./dev/create-release/releaseutils.py
./dev/create-release/translate-contributors.py
./dev/lint-python
./python/docs/epytext.py
./examples/src/main/python/mllib/decision_tree_classification_example.py
./examples/src/main/python/mllib/decision_tree_regression_example.py
./examples/src/main/python/mllib/gradient_boosting_classification_example.py
./examples/src/main/python/mllib/gradient_boosting_regression_example.py
./examples/src/main/python/mllib/linear_regression_with_sgd_example.py
./examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py
./examples/src/main/python/mllib/naive_bayes_example.py
./examples/src/main/python/mllib/random_forest_classification_example.py
./examples/src/main/python/mllib/random_forest_regression_example.py
./examples/src/main/python/mllib/svm_with_sgd_example.py
./examples/src/main/python/streaming/network_wordjoinsentiments.py
./sql/hive/src/test/resources/data/scripts/cat.py
./sql/hive/src/test/resources/data/scripts/cat_error.py
./sql/hive/src/test/resources/data/scripts/doubleescapedtab.py
./sql/hive/src/test/resources/data/scripts/dumpdata_script.py
./sql/hive/src/test/resources/data/scripts/escapedcarriagereturn.py
./sql/hive/src/test/resources/data/scripts/escapednewline.py
./sql/hive/src/test/resources/data/scripts/escapedtab.py
./sql/hive/src/test/resources/data/scripts/input20_script.py
./sql/hive/src/test/resources/data/scripts/newline.py
```

## How was this patch tested?

- `./python/docs/epytext.py`

  ```bash
  cd ./python/docs $$ make html
  ```

- pep8 check (Python 2.7 / Python 3.3.6)

  ```
  ./dev/lint-python
  ```

- `./dev/merge_spark_pr.py` (Python 2.7 only / Python 3.3.6 not working)

  ```bash
  python -m doctest -v ./dev/merge_spark_pr.py
  ```

- `./dev/create-release/releaseutils.py` `./dev/create-release/generate-contributors.py` `./dev/create-release/translate-contributors.py` (Python 2.7 only / Python 3.3.6 not working)

  ```bash
  python generate-contributors.py
  python translate-contributors.py
  ```

- Examples (Python 2.7 / Python 3.3.6)

  ```bash
  ./bin/spark-submit examples/src/main/python/mllib/decision_tree_classification_example.py
  ./bin/spark-submit examples/src/main/python/mllib/decision_tree_regression_example.py
  ./bin/spark-submit examples/src/main/python/mllib/gradient_boosting_classification_example.py
  ./bin/spark-submit examples/src/main/python/mllib/gradient_boosting_regression_example.p
  ./bin/spark-submit examples/src/main/python/mllib/random_forest_classification_example.py
  ./bin/spark-submit examples/src/main/python/mllib/random_forest_regression_example.py
  ```

- Examples (Python 2.7 only / Python 3.3.6 not working)
  ```
  ./bin/spark-submit examples/src/main/python/mllib/linear_regression_with_sgd_example.py
  ./bin/spark-submit examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py
  ./bin/spark-submit examples/src/main/python/mllib/naive_bayes_example.py
  ./bin/spark-submit examples/src/main/python/mllib/svm_with_sgd_example.py
  ```

- `sql/hive/src/test/resources/data/scripts/*.py` (Python 2.7 / Python 3.3.6 within suggested changes)

  Manually tested only changed ones.

- `./dev/github_jira_sync.py` (Python 2.7 only / Python 3.3.6 not working)

  Manually tested this after disabling actually adding comments and links.

And also via Jenkins tests.

Author: hyukjinkwon <gurwls223@gmail.com>

Closes #16405 from HyukjinKwon/minor-pep8.
---
 .../src/main/python/mllib/decision_tree_classification_example.py     | 3 ++-
 examples/src/main/python/mllib/decision_tree_regression_example.py    | 2 +-
 .../src/main/python/mllib/gradient_boosting_classification_example.py | 3 ++-
 .../src/main/python/mllib/gradient_boosting_regression_example.py     | 2 +-
 examples/src/main/python/mllib/linear_regression_with_sgd_example.py  | 2 +-
 .../src/main/python/mllib/logistic_regression_with_lbfgs_example.py   | 2 +-
 examples/src/main/python/mllib/naive_bayes_example.py                 | 4 ++--
 .../src/main/python/mllib/random_forest_classification_example.py     | 3 ++-
 examples/src/main/python/mllib/random_forest_regression_example.py    | 2 +-
 examples/src/main/python/mllib/svm_with_sgd_example.py                | 2 +-
 examples/src/main/python/streaming/network_wordjoinsentiments.py      | 4 ++--
 11 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'examples/src')

diff --git a/examples/src/main/python/mllib/decision_tree_classification_example.py b/examples/src/main/python/mllib/decision_tree_classification_example.py
index 1b529768b6..7eecf50058 100644
--- a/examples/src/main/python/mllib/decision_tree_classification_example.py
+++ b/examples/src/main/python/mllib/decision_tree_classification_example.py
@@ -44,7 +44,8 @@ if __name__ == "__main__":
     # Evaluate model on test instances and compute test error
     predictions = model.predict(testData.map(lambda x: x.features))
     labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
-    testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
+    testErr = labelsAndPredictions.filter(
+        lambda lp: lp[0] != lp[1]).count() / float(testData.count())
     print('Test Error = ' + str(testErr))
     print('Learned classification tree model:')
     print(model.toDebugString())
diff --git a/examples/src/main/python/mllib/decision_tree_regression_example.py b/examples/src/main/python/mllib/decision_tree_regression_example.py
index cf518eac67..acf9e25fdf 100644
--- a/examples/src/main/python/mllib/decision_tree_regression_example.py
+++ b/examples/src/main/python/mllib/decision_tree_regression_example.py
@@ -44,7 +44,7 @@ if __name__ == "__main__":
     # Evaluate model on test instances and compute test error
     predictions = model.predict(testData.map(lambda x: x.features))
     labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
-    testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\
+    testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
         float(testData.count())
     print('Test Mean Squared Error = ' + str(testMSE))
     print('Learned regression tree model:')
diff --git a/examples/src/main/python/mllib/gradient_boosting_classification_example.py b/examples/src/main/python/mllib/gradient_boosting_classification_example.py
index b204cd1b31..65a03572be 100644
--- a/examples/src/main/python/mllib/gradient_boosting_classification_example.py
+++ b/examples/src/main/python/mllib/gradient_boosting_classification_example.py
@@ -43,7 +43,8 @@ if __name__ == "__main__":
     # Evaluate model on test instances and compute test error
     predictions = model.predict(testData.map(lambda x: x.features))
     labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
-    testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
+    testErr = labelsAndPredictions.filter(
+        lambda lp: lp[0] != lp[1]).count() / float(testData.count())
     print('Test Error = ' + str(testErr))
     print('Learned classification GBT model:')
     print(model.toDebugString())
diff --git a/examples/src/main/python/mllib/gradient_boosting_regression_example.py b/examples/src/main/python/mllib/gradient_boosting_regression_example.py
index 758e224a9e..877f8ab461 100644
--- a/examples/src/main/python/mllib/gradient_boosting_regression_example.py
+++ b/examples/src/main/python/mllib/gradient_boosting_regression_example.py
@@ -43,7 +43,7 @@ if __name__ == "__main__":
     # Evaluate model on test instances and compute test error
     predictions = model.predict(testData.map(lambda x: x.features))
     labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
-    testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\
+    testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
         float(testData.count())
     print('Test Mean Squared Error = ' + str(testMSE))
     print('Learned regression GBT model:')
diff --git a/examples/src/main/python/mllib/linear_regression_with_sgd_example.py b/examples/src/main/python/mllib/linear_regression_with_sgd_example.py
index 6fbaeff0cd..6744463d40 100644
--- a/examples/src/main/python/mllib/linear_regression_with_sgd_example.py
+++ b/examples/src/main/python/mllib/linear_regression_with_sgd_example.py
@@ -44,7 +44,7 @@ if __name__ == "__main__":
     # Evaluate the model on training data
     valuesAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
     MSE = valuesAndPreds \
-        .map(lambda (v, p): (v - p)**2) \
+        .map(lambda vp: (vp[0] - vp[1])**2) \
         .reduce(lambda x, y: x + y) / valuesAndPreds.count()
     print("Mean Squared Error = " + str(MSE))
 
diff --git a/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py b/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py
index e030b74ba6..c9b768b314 100644
--- a/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py
+++ b/examples/src/main/python/mllib/logistic_regression_with_lbfgs_example.py
@@ -44,7 +44,7 @@ if __name__ == "__main__":
 
     # Evaluating the model on training data
     labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
-    trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
+    trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(parsedData.count())
     print("Training Error = " + str(trainErr))
 
     # Save and load model
diff --git a/examples/src/main/python/mllib/naive_bayes_example.py b/examples/src/main/python/mllib/naive_bayes_example.py
index 749353b20e..a29fcccac5 100644
--- a/examples/src/main/python/mllib/naive_bayes_example.py
+++ b/examples/src/main/python/mllib/naive_bayes_example.py
@@ -50,7 +50,7 @@ if __name__ == "__main__":
 
     # Make prediction and test accuracy.
     predictionAndLabel = test.map(lambda p: (model.predict(p.features), p.label))
-    accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
+    accuracy = 1.0 * predictionAndLabel.filter(lambda pl: pl[0] == pl[1]).count() / test.count()
     print('model accuracy {}'.format(accuracy))
 
     # Save and load model
@@ -59,7 +59,7 @@ if __name__ == "__main__":
     model.save(sc, output_dir)
     sameModel = NaiveBayesModel.load(sc, output_dir)
     predictionAndLabel = test.map(lambda p: (sameModel.predict(p.features), p.label))
-    accuracy = 1.0 * predictionAndLabel.filter(lambda (x, v): x == v).count() / test.count()
+    accuracy = 1.0 * predictionAndLabel.filter(lambda pl: pl[0] == pl[1]).count() / test.count()
     print('sameModel accuracy {}'.format(accuracy))
 
     # $example off$
diff --git a/examples/src/main/python/mllib/random_forest_classification_example.py b/examples/src/main/python/mllib/random_forest_classification_example.py
index 9e5a8dcaab..5ac67520da 100644
--- a/examples/src/main/python/mllib/random_forest_classification_example.py
+++ b/examples/src/main/python/mllib/random_forest_classification_example.py
@@ -45,7 +45,8 @@ if __name__ == "__main__":
     # Evaluate model on test instances and compute test error
     predictions = model.predict(testData.map(lambda x: x.features))
     labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
-    testErr = labelsAndPredictions.filter(lambda (v, p): v != p).count() / float(testData.count())
+    testErr = labelsAndPredictions.filter(
+        lambda lp: lp[0] != lp[1]).count() / float(testData.count())
     print('Test Error = ' + str(testErr))
     print('Learned classification forest model:')
     print(model.toDebugString())
diff --git a/examples/src/main/python/mllib/random_forest_regression_example.py b/examples/src/main/python/mllib/random_forest_regression_example.py
index 2e1be34c1a..7e986a0d30 100644
--- a/examples/src/main/python/mllib/random_forest_regression_example.py
+++ b/examples/src/main/python/mllib/random_forest_regression_example.py
@@ -45,7 +45,7 @@ if __name__ == "__main__":
     # Evaluate model on test instances and compute test error
     predictions = model.predict(testData.map(lambda x: x.features))
     labelsAndPredictions = testData.map(lambda lp: lp.label).zip(predictions)
-    testMSE = labelsAndPredictions.map(lambda (v, p): (v - p) * (v - p)).sum() /\
+    testMSE = labelsAndPredictions.map(lambda lp: (lp[0] - lp[1]) * (lp[0] - lp[1])).sum() /\
         float(testData.count())
     print('Test Mean Squared Error = ' + str(testMSE))
     print('Learned regression forest model:')
diff --git a/examples/src/main/python/mllib/svm_with_sgd_example.py b/examples/src/main/python/mllib/svm_with_sgd_example.py
index 309ab09cc3..24b8f431e0 100644
--- a/examples/src/main/python/mllib/svm_with_sgd_example.py
+++ b/examples/src/main/python/mllib/svm_with_sgd_example.py
@@ -38,7 +38,7 @@ if __name__ == "__main__":
 
     # Evaluating the model on training data
     labelsAndPreds = parsedData.map(lambda p: (p.label, model.predict(p.features)))
-    trainErr = labelsAndPreds.filter(lambda (v, p): v != p).count() / float(parsedData.count())
+    trainErr = labelsAndPreds.filter(lambda lp: lp[0] != lp[1]).count() / float(parsedData.count())
     print("Training Error = " + str(trainErr))
 
     # Save and load model
diff --git a/examples/src/main/python/streaming/network_wordjoinsentiments.py b/examples/src/main/python/streaming/network_wordjoinsentiments.py
index b85517dfdd..b309d9fad3 100644
--- a/examples/src/main/python/streaming/network_wordjoinsentiments.py
+++ b/examples/src/main/python/streaming/network_wordjoinsentiments.py
@@ -67,8 +67,8 @@ if __name__ == "__main__":
     # with the static RDD inside the transform() method and then multiplying
     # the frequency of the words by its sentiment value
     happiest_words = word_counts.transform(lambda rdd: word_sentiments.join(rdd)) \
-        .map(lambda (word, tuple): (word, float(tuple[0]) * tuple[1])) \
-        .map(lambda (word, happiness): (happiness, word)) \
+        .map(lambda word_tuples: (word_tuples[0], float(word_tuples[1][0]) * word_tuples[1][1])) \
+        .map(lambda word_happiness: (word_happiness[1], word_happiness[0])) \
         .transform(lambda rdd: rdd.sortByKey(False))
 
     happiest_words.foreachRDD(print_happiest_words)
-- 
cgit v1.2.3