From 26c1089c37149061f838129bb53330ded68ff4c9 Mon Sep 17 00:00:00 2001
From: Josh Rosen <joshrosen@databricks.com>
Date: Sun, 5 Jun 2016 16:51:00 -0700
Subject: [SPARK-15748][SQL] Replace inefficient foldLeft() call with flatMap()
 in PartitionStatistics

`PartitionStatistics` uses `foldLeft` and list concatenation (`++`) to flatten an iterator of lists, but this is extremely inefficient compared to simply doing `flatMap`/`flatten` because it performs many unnecessary object allocations. Simply replacing this `foldLeft` by a `flatMap` results in decent performance gains when constructing PartitionStatistics instances for tables with many columns.

This patch fixes this and also makes two similar changes in MLlib and streaming to try to fix all known occurrences of this pattern.

Author: Josh Rosen <joshrosen@databricks.com>

Closes #13491 from JoshRosen/foldleft-to-flatmap.
---
 .../src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'streaming')
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index b97e24f28b..46cd3092e9 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -396,11 +396,11 @@ private[ui] class StreamingPage(parent: StreamingTab)
       .map(_.ceil.toLong)
       .getOrElse(0L)
 
-    val content = listener.receivedRecordRateWithBatchTime.toList.sortBy(_._1).map {
+    val content: Seq[Node] = listener.receivedRecordRateWithBatchTime.toList.sortBy(_._1).flatMap {
       case (streamId, recordRates) =>
         generateInputDStreamRow(
           jsCollector, streamId, recordRates, minX, maxX, minY, maxYCalculated)
-    }.foldLeft[Seq[Node]](Nil)(_ ++ _)
+    }
 
     // scalastyle:off
     <table class="table table-bordered" style="width: auto">
-- 
cgit v1.2.3