aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2016-03-03 09:54:09 +0000
committerSean Owen <sowen@cloudera.com>2016-03-03 09:54:09 +0000
commite97fc7f176f8bf501c9b3afd8410014e3b0e1602 (patch)
tree23a11a3646b13195aaf50078a0f35fad96190618 /examples
parent02b7677e9584f5ccd68869abdb0bf980dc847ce1 (diff)
downloadspark-e97fc7f176f8bf501c9b3afd8410014e3b0e1602.tar.gz
spark-e97fc7f176f8bf501c9b3afd8410014e3b0e1602.tar.bz2
spark-e97fc7f176f8bf501c9b3afd8410014e3b0e1602.zip
[SPARK-13423][WIP][CORE][SQL][STREAMING] Static analysis fixes for 2.x
## What changes were proposed in this pull request? Make some cross-cutting code improvements according to static analysis. These are individually up for discussion since they exist in separate commits that can be reverted. The changes are broadly: - Inner class should be static - Mismatched hashCode/equals - Overflow in compareTo - Unchecked warnings - Misuse of assert, vs junit.assert - get(a) + getOrElse(b) -> getOrElse(a,b) - Array/String .size -> .length (occasionally, -> .isEmpty / .nonEmpty) to avoid implicit conversions - Dead code - tailrec - exists(_ == ) -> contains find + nonEmpty -> exists filter + size -> count - reduce(_+_) -> sum map + flatten -> map The most controversial may be .size -> .length simply because of its size. It is intended to avoid implicits that might be expensive in some places. ## How was the this patch tested? Existing Jenkins unit tests. Author: Sean Owen <sowen@cloudera.com> Closes #11292 from srowen/SPARK-13423.
Diffstat (limited to 'examples')
-rw-r--r--examples/src/main/java/org/apache/spark/examples/streaming/JavaActorWordCount.java4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala4
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/SparkALS.scala2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala2
-rw-r--r--examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala2
8 files changed, 11 insertions, 9 deletions
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaActorWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaActorWordCount.java
index cf774667f6..7bb70d0c06 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaActorWordCount.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaActorWordCount.java
@@ -60,7 +60,9 @@ class JavaSampleActorReceiver<T> extends JavaActorReceiver {
@Override
public void onReceive(Object msg) throws Exception {
- store((T) msg);
+ @SuppressWarnings("unchecked")
+ T msgT = (T) msg;
+ store(msgT);
}
@Override
diff --git a/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala b/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala
index b26db0b246..e37a3fa69d 100644
--- a/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/DFSReadWriteTest.scala
@@ -88,7 +88,7 @@ object DFSReadWriteTest {
def runLocalWordCount(fileContents: List[String]): Int = {
fileContents.flatMap(_.split(" "))
.flatMap(_.split("\t"))
- .filter(_.size > 0)
+ .filter(_.nonEmpty)
.groupBy(w => w)
.mapValues(_.size)
.values
@@ -119,7 +119,7 @@ object DFSReadWriteTest {
val dfsWordCount = readFileRDD
.flatMap(_.split(" "))
.flatMap(_.split("\t"))
- .filter(_.size > 0)
+ .filter(_.nonEmpty)
.map(w => (w, 1))
.countByKey()
.values
diff --git a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
index bec61f3cd4..a2d59a1c95 100644
--- a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
@@ -26,7 +26,7 @@ import org.apache.spark.util.Utils
* test driver submission in the standalone scheduler. */
object DriverSubmissionTest {
def main(args: Array[String]) {
- if (args.size < 1) {
+ if (args.length < 1) {
println("Usage: DriverSubmissionTest <seconds-to-sleep>")
System.exit(0)
}
diff --git a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
index a797111dba..134c3d1d63 100644
--- a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
@@ -46,7 +46,7 @@ object MultiBroadcastTest {
val barr1 = sc.broadcast(arr1)
val barr2 = sc.broadcast(arr2)
val observedSizes: RDD[(Int, Int)] = sc.parallelize(1 to 10, slices).map { _ =>
- (barr1.value.size, barr2.value.size)
+ (barr1.value.length, barr2.value.length)
}
// Collect the small RDD so we can print the observed sizes locally.
observedSizes.collect().foreach(i => println(i))
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
index 69799b7c2b..4263680c6f 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala
@@ -58,7 +58,7 @@ object SparkALS {
}
def update(i: Int, m: RealVector, us: Array[RealVector], R: RealMatrix) : RealVector = {
- val U = us.size
+ val U = us.length
val F = us(0).getDimension
var XtX: RealMatrix = new Array2DRowRealMatrix(F, F)
var Xty: RealVector = new ArrayRealVector(F)
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
index 038b2fe611..e89d555884 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala
@@ -118,7 +118,7 @@ object LDAExample {
preprocess(sc, params.input, params.vocabSize, params.stopwordFile)
corpus.cache()
val actualCorpusSize = corpus.count()
- val actualVocabSize = vocabArray.size
+ val actualVocabSize = vocabArray.length
val preprocessElapsed = (System.nanoTime() - preprocessStart) / 1e9
println()
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala
index c4e5e965b8..011db4fd0c 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/SampledRDDs.scala
@@ -79,7 +79,7 @@ object SampledRDDs {
val sampledRDD = examples.sample(withReplacement = true, fraction = fraction)
println(s" RDD.sample(): sample has ${sampledRDD.count()} examples")
val sampledArray = examples.takeSample(withReplacement = true, num = expectedSampleSize)
- println(s" RDD.takeSample(): sample has ${sampledArray.size} examples")
+ println(s" RDD.takeSample(): sample has ${sampledArray.length} examples")
println()
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
index 4b43550a06..773a2e5fc2 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewStream.scala
@@ -69,7 +69,7 @@ object PageViewStream {
.groupByKey()
val errorRatePerZipCode = statusesPerZipCode.map{
case(zip, statuses) =>
- val normalCount = statuses.filter(_ == 200).size
+ val normalCount = statuses.count(_ == 200)
val errorCount = statuses.size - normalCount
val errorRatio = errorCount.toFloat / statuses.size
if (errorRatio > 0.05) {