diff options
author | Wenchen Fan <wenchen@databricks.com> | 2016-08-11 11:02:11 -0700 |
---|---|---|
committer | Yin Huai <yhuai@databricks.com> | 2016-08-11 11:02:11 -0700 |
commit | acaf2a81ad5238fd1bc81e7be2c328f40c07e755 (patch) | |
tree | 347a73148aa4bfe1d01bb6eadf97d7a68ae14575 /sql | |
parent | 0f72e4f04b227b9cd5d7ae5958e09b1def49420a (diff) | |
download | spark-acaf2a81ad5238fd1bc81e7be2c328f40c07e755.tar.gz spark-acaf2a81ad5238fd1bc81e7be2c328f40c07e755.tar.bz2 spark-acaf2a81ad5238fd1bc81e7be2c328f40c07e755.zip |
[SPARK-17021][SQL] simplify the constructor parameters of QuantileSummaries
## What changes were proposed in this pull request?
1. `sampled` doesn't need to be `ArrayBuffer`, we never update it, but assign new value
2. `count` doesn't need to be `var`, we never mutate it.
3. `headSampled` doesn't need to be in constructor, we never pass a non-empty `headSampled` to constructor
## How was this patch tested?
existing tests.
Author: Wenchen Fan <wenchen@databricks.com>
Closes #14603 from cloud-fan/simply.
Diffstat (limited to 'sql')
-rw-r--r-- | sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala | 21 |
1 files changed, 11 insertions, 10 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala index 50eecb4098..7c58c4897f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala @@ -114,14 +114,15 @@ object StatFunctions extends Logging { * See the G-K article for more details. * @param count the count of all the elements *inserted in the sampled buffer* * (excluding the head buffer) - * @param headSampled a buffer of latest samples seen so far */ class QuantileSummaries( val compressThreshold: Int, val relativeError: Double, - val sampled: ArrayBuffer[Stats] = ArrayBuffer.empty, - private[stat] var count: Long = 0L, - val headSampled: ArrayBuffer[Double] = ArrayBuffer.empty) extends Serializable { + val sampled: Array[Stats] = Array.empty, + val count: Long = 0L) extends Serializable { + + // a buffer of latest samples seen so far + private val headSampled: ArrayBuffer[Double] = ArrayBuffer.empty import QuantileSummaries._ @@ -186,7 +187,7 @@ object StatFunctions extends Logging { newSamples.append(sampled(sampleIdx)) sampleIdx += 1 } - new QuantileSummaries(compressThreshold, relativeError, newSamples, currentCount) + new QuantileSummaries(compressThreshold, relativeError, newSamples.toArray, currentCount) } /** @@ -207,7 +208,7 @@ object StatFunctions extends Logging { } private def shallowCopy: QuantileSummaries = { - new QuantileSummaries(compressThreshold, relativeError, sampled, count, headSampled) + new QuantileSummaries(compressThreshold, relativeError, sampled, count) } /** @@ -305,11 +306,11 @@ object StatFunctions extends Logging { private def compressImmut( currentSamples: IndexedSeq[Stats], - mergeThreshold: Double): ArrayBuffer[Stats] = { - val res: ArrayBuffer[Stats] = ArrayBuffer.empty + mergeThreshold: Double): Array[Stats] = { if (currentSamples.isEmpty) { - return res + return Array.empty[Stats] } + val res: ArrayBuffer[Stats] = ArrayBuffer.empty // Start for the last element, which is always part of the set. // The head contains the current new head, that may be merged with the current element. var head = currentSamples.last @@ -332,7 +333,7 @@ object StatFunctions extends Logging { res.prepend(head) // If necessary, add the minimum element: res.prepend(currentSamples.head) - res + res.toArray } } |