aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorwangyang <wangyang@haizhi.com>2016-06-10 13:10:03 -0700
committerReynold Xin <rxin@databricks.com>2016-06-10 13:10:03 -0700
commit026eb90644be7685971dacaabae67a293edd0133 (patch)
treecacc1ed1fb398d122bb5a46fd20b94574203fd58
parent865ec32dd997e63aea01a871d1c7b4947f43c111 (diff)
downloadspark-026eb90644be7685971dacaabae67a293edd0133.tar.gz
spark-026eb90644be7685971dacaabae67a293edd0133.tar.bz2
spark-026eb90644be7685971dacaabae67a293edd0133.zip
[SPARK-15875] Try to use Seq.isEmpty and Seq.nonEmpty instead of Seq.length == 0 and Seq.length > 0
## What changes were proposed in this pull request? In scala, immutable.List.length is an expensive operation so we should avoid using Seq.length == 0 or Seq.lenth > 0, and use Seq.isEmpty and Seq.nonEmpty instead. ## How was this patch tested? existing tests Author: wangyang <wangyang@haizhi.com> Closes #13601 from yangw1234/isEmpty.
-rw-r--r--core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala2
-rw-r--r--core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala2
-rw-r--r--sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala2
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala4
-rw-r--r--sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala2
-rw-r--r--streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala2
9 files changed, 12 insertions, 12 deletions
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index 3df87f62f2..6a5e6f7c5a 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -235,7 +235,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
}
private def cleanupIdleWorkers() {
- while (idleWorkers.length > 0) {
+ while (idleWorkers.nonEmpty) {
val worker = idleWorkers.dequeue()
try {
// the worker will exit after closing the socket
diff --git a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
index b6366f3e68..d744d67592 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PartitionerAwareUnionRDD.scala
@@ -60,7 +60,7 @@ class PartitionerAwareUnionRDD[T: ClassTag](
sc: SparkContext,
var rdds: Seq[RDD[T]]
) extends RDD[T](sc, rdds.map(x => new OneToOneDependency(x))) {
- require(rdds.length > 0)
+ require(rdds.nonEmpty)
require(rdds.forall(_.partitioner.isDefined))
require(rdds.flatMap(_.partitioner).toSet.size == 1,
"Parent RDDs have different partitioners: " + rdds.flatMap(_.partitioner))
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
index fc71f8365c..6ddc72afde 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalAppendOnlyMap.scala
@@ -375,14 +375,14 @@ class ExternalAppendOnlyMap[K, V, C](
/**
* Return true if there exists an input stream that still has unvisited pairs.
*/
- override def hasNext: Boolean = mergeHeap.length > 0
+ override def hasNext: Boolean = mergeHeap.nonEmpty
/**
* Select a key with the minimum hash, then combine all values with the same key from all
* input streams.
*/
override def next(): (K, C) = {
- if (mergeHeap.length == 0) {
+ if (mergeHeap.isEmpty) {
throw new NoSuchElementException
}
// Select a key from the StreamBuffer that holds the lowest key hash
@@ -397,7 +397,7 @@ class ExternalAppendOnlyMap[K, V, C](
// For all other streams that may have this key (i.e. have the same minimum key hash),
// merge in the corresponding value (if any) from that stream
val mergedBuffers = ArrayBuffer[StreamBuffer](minBuffer)
- while (mergeHeap.length > 0 && mergeHeap.head.minKeyHash == minHash) {
+ while (mergeHeap.nonEmpty && mergeHeap.head.minKeyHash == minHash) {
val newBuffer = mergeHeap.dequeue()
minCombiner = mergeIfKeyExists(minKey, minCombiner, newBuffer)
mergedBuffers += newBuffer
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index 38728f2693..871b1c7d21 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -441,7 +441,7 @@ class KMeans private (
val rs = (0 until runs).filter { r =>
rand.nextDouble() < 2.0 * c(r) * k / sumCosts(r)
}
- if (rs.length > 0) Some((p, rs)) else None
+ if (rs.nonEmpty) Some((p, rs)) else None
}
}.collect()
mergeNewCenters()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 58f3904183..be527005bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -1667,7 +1667,7 @@ class Analyzer(
// We do a final check and see if we only have a single Window Spec defined in an
// expressions.
- if (distinctWindowSpec.length == 0 ) {
+ if (distinctWindowSpec.isEmpty) {
failAnalysis(s"$expr does not have any WindowExpression.")
} else if (distinctWindowSpec.length > 1) {
// newExpressionsWithWindowFunctions only have expressions with a single
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 1e10d73a4b..e342c23d19 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -617,7 +617,7 @@ object NullPropagation extends Rule[LogicalPlan] {
// For Coalesce, remove null literals.
case e @ Coalesce(children) =>
val newChildren = children.filter(nonNullLiteral)
- if (newChildren.length == 0) {
+ if (newChildren.isEmpty) {
Literal.create(null, e.dataType)
} else if (newChildren.length == 1) {
newChildren.head
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
index 17126519eb..05dbacf07a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
@@ -56,8 +56,8 @@ case class SortAggregateExec(
override def requiredChildDistribution: List[Distribution] = {
requiredChildDistributionExpressions match {
- case Some(exprs) if exprs.length == 0 => AllTuples :: Nil
- case Some(exprs) if exprs.length > 0 => ClusteredDistribution(exprs) :: Nil
+ case Some(exprs) if exprs.isEmpty => AllTuples :: Nil
+ case Some(exprs) if exprs.nonEmpty => ClusteredDistribution(exprs) :: Nil
case None => UnspecifiedDistribution :: Nil
}
}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
index e63c7c581e..edfdf7cd6b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
@@ -113,7 +113,7 @@ private[sql] object SQLMetrics {
val validValues = values.filter(_ >= 0)
val Seq(sum, min, med, max) = {
- val metric = if (validValues.length == 0) {
+ val metric = if (validValues.isEmpty) {
Seq.fill(4)(0L)
} else {
val sorted = validValues.sorted
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
index 47eb9b806f..0dde120927 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala
@@ -29,7 +29,7 @@ class TransformedDStream[U: ClassTag] (
transformFunc: (Seq[RDD[_]], Time) => RDD[U]
) extends DStream[U](parents.head.ssc) {
- require(parents.length > 0, "List of DStreams to transform is empty")
+ require(parents.nonEmpty, "List of DStreams to transform is empty")
require(parents.map(_.ssc).distinct.size == 1, "Some of the DStreams have different contexts")
require(parents.map(_.slideDuration).distinct.size == 1,
"Some of the DStreams have different slide durations")