diff options
author | Sean Owen <sowen@cloudera.com> | 2016-03-03 09:54:09 +0000 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2016-03-03 09:54:09 +0000 |
commit | e97fc7f176f8bf501c9b3afd8410014e3b0e1602 (patch) | |
tree | 23a11a3646b13195aaf50078a0f35fad96190618 /examples/src | |
parent | 02b7677e9584f5ccd68869abdb0bf980dc847ce1 (diff) | |
download | spark-e97fc7f176f8bf501c9b3afd8410014e3b0e1602.tar.gz spark-e97fc7f176f8bf501c9b3afd8410014e3b0e1602.tar.bz2 spark-e97fc7f176f8bf501c9b3afd8410014e3b0e1602.zip |
[SPARK-13423][WIP][CORE][SQL][STREAMING] Static analysis fixes for 2.x
## What changes were proposed in this pull request?
Make some cross-cutting code improvements according to static analysis. These are individually up for discussion since they exist in separate commits that can be reverted. The changes are broadly:
- Inner class should be static
- Mismatched hashCode/equals
- Overflow in compareTo
- Unchecked warnings
- Misuse of assert, vs junit.assert
- get(a) + getOrElse(b) -> getOrElse(a,b)
- Array/String .size -> .length (occasionally, -> .isEmpty / .nonEmpty) to avoid implicit conversions
- Dead code
- tailrec
- exists(_ == ) -> contains find + nonEmpty -> exists filter + size -> count
- reduce(_+_) -> sum map + flatten -> map
The most controversial may be .size -> .length simply because of its size. It is intended to avoid implicits that might be expensive in some places.
## How was the this patch tested?
Existing Jenkins unit tests.
Author: Sean Owen <sowen@cloudera.com>
Closes #11292 from srowen/SPARK-13423.
Diffstat (limited to 'examples/src')
8 files changed, 11 insertions, 9 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaActorWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaActorWordCount.java index cf774667f6..7bb70d0c06 100644 --- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaActorWordCount.java +++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaActorWordCount.java @@ -60,7 +60,9 @@ class JavaSampleActorReceiver<T> extends JavaActorReceiver { @Override public void onReceive(Object msg) throws Exception { - store((T) msg); + @SuppressWarnings("unchecked") + T msgT = (T) msg; + store(msgT); } @Override diff --git a/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala b/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala index b26db0b246..e37a3fa69d 100644 --- a/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala @@ -88,7 +88,7 @@ object DFSReadWriteTest { def runLocalWordCount(fileContents: List[String]): Int = { fileContents.flatMap(_.split(" ")) .flatMap(_.split("\t")) - .filter(_.size > 0) + .filter(_.nonEmpty) .groupBy(w => w) .mapValues(_.size) .values @@ -119,7 +119,7 @@ object DFSReadWriteTest { val dfsWordCount = readFileRDD .flatMap(_.split(" ")) .flatMap(_.split("\t")) - .filter(_.size > 0) + .filter(_.nonEmpty) .map(w => (w, 1)) .countByKey() .values diff --git a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala index bec61f3cd4..a2d59a1c95 100644 --- a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala @@ -26,7 +26,7 @@ import org.apache.spark.util.Utils * test driver submission in the standalone scheduler. */ object DriverSubmissionTest { def main(args: Array[String]) { - if (args.size < 1) { + if (args.length < 1) { println("Usage: DriverSubmissionTest <seconds-to-sleep>") System.exit(0) } diff --git a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala index a797111dba..134c3d1d63 100644 --- a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala @@ -46,7 +46,7 @@ object MultiBroadcastTest { val barr1 = sc.broadcast(arr1) val barr2 = sc.broadcast(arr2) val observedSizes: RDD[(Int, Int)] = sc.parallelize(1 to 10, slices).map { _ => - (barr1.value.size, barr2.value.size) + (barr1.value.length, barr2.value.length) } // Collect the small RDD so we can print the observed sizes locally. observedSizes.collect().foreach(i => println(i)) diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala index 69799b7c2b..4263680c6f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala @@ -58,7 +58,7 @@ object SparkALS { } def update(i: Int, m: RealVector, us: Array[RealVector], R: RealMatrix) : RealVector = { - val U = us.size + val U = us.length val F = us(0).getDimension var XtX: RealMatrix = new Array2DRowRealMatrix(F, F) var Xty: RealVector = new ArrayRealVector(F) diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala index 038b2fe611..e89d555884 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala @@ -118,7 +118,7 @@ object LDAExample { preprocess(sc, params.input, params.vocabSize, params.stopwordFile) corpus.cache() val actualCorpusSize = corpus.count() - val actualVocabSize = vocabArray.size + val actualVocabSize = vocabArray.length val preprocessElapsed = (System.nanoTime() - preprocessStart) / 1e9 println() diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala index c4e5e965b8..011db4fd0c 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala @@ -79,7 +79,7 @@ object SampledRDDs { val sampledRDD = examples.sample(withReplacement = true, fraction = fraction) println(s" RDD.sample(): sample has ${sampledRDD.count()} examples") val sampledArray = examples.takeSample(withReplacement = true, num = expectedSampleSize) - println(s" RDD.takeSample(): sample has ${sampledArray.size} examples") + println(s" RDD.takeSample(): sample has ${sampledArray.length} examples") println() diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala index 4b43550a06..773a2e5fc2 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala @@ -69,7 +69,7 @@ object PageViewStream { .groupByKey() val errorRatePerZipCode = statusesPerZipCode.map{ case(zip, statuses) => - val normalCount = statuses.filter(_ == 200).size + val normalCount = statuses.count(_ == 200) val errorCount = statuses.size - normalCount val errorRatio = errorCount.toFloat / statuses.size if (errorRatio > 0.05) { |