aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2015-02-13 16:43:49 -0800
committerXiangrui Meng <meng@databricks.com>2015-02-13 16:43:49 -0800
commitd50a91d529b0913364b483c511397d4af308a435 (patch)
tree7e16f6d31dc663cfb9fc66bb6adcbb3ac00c95a6 /mllib
parentcc56c8729a76af85aa6eb5d2f99787cca5e5b38f (diff)
downloadspark-d50a91d529b0913364b483c511397d4af308a435.tar.gz
spark-d50a91d529b0913364b483c511397d4af308a435.tar.bz2
spark-d50a91d529b0913364b483c511397d4af308a435.zip
[SPARK-5803][MLLIB] use ArrayBuilder to build primitive arrays
because ArrayBuffer is not specialized. Author: Xiangrui Meng <meng@databricks.com> Closes #4594 from mengxr/SPARK-5803 and squashes the following commits: 1261bd5 [Xiangrui Meng] merge master a4ea872 [Xiangrui Meng] use ArrayBuilder to build primitive arrays
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala6
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala10
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala10
3 files changed, 13 insertions, 13 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index a3e40200bc..59a79e5c6a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -21,7 +21,7 @@ import java.lang.{Iterable => JavaIterable}
import scala.collection.JavaConverters._
import scala.collection.mutable
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable.ArrayBuilder
import com.github.fommil.netlib.BLAS.{getInstance => blas}
@@ -272,7 +272,7 @@ class Word2Vec extends Serializable with Logging {
def hasNext: Boolean = iter.hasNext
def next(): Array[Int] = {
- var sentence = new ArrayBuffer[Int]
+ val sentence = ArrayBuilder.make[Int]
var sentenceLength = 0
while (iter.hasNext && sentenceLength < MAX_SENTENCE_LENGTH) {
val word = bcVocabHash.value.get(iter.next())
@@ -283,7 +283,7 @@ class Word2Vec extends Serializable with Logging {
case None =>
}
}
- sentence.toArray
+ sentence.result()
}
}
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
index 9a586b9d9c..f1f85994e6 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/DecisionTree.scala
@@ -17,9 +17,9 @@
package org.apache.spark.mllib.tree
-import scala.collection.mutable
import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuilder
import org.apache.spark.Logging
import org.apache.spark.annotation.Experimental
@@ -1136,7 +1136,7 @@ object DecisionTree extends Serializable with Logging {
logDebug("stride = " + stride)
// iterate `valueCount` to find splits
- val splits = new ArrayBuffer[Double]
+ val splitsBuilder = ArrayBuilder.make[Double]
var index = 1
// currentCount: sum of counts of values that have been visited
var currentCount = valueCounts(0)._2
@@ -1154,13 +1154,13 @@ object DecisionTree extends Serializable with Logging {
// makes the gap between currentCount and targetCount smaller,
// previous value is a split threshold.
if (previousGap < currentGap) {
- splits.append(valueCounts(index - 1)._1)
+ splitsBuilder += valueCounts(index - 1)._1
targetCount += stride
}
index += 1
}
- splits.toArray
+ splitsBuilder.result()
}
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
index f7cba6c6cb..308f7f3578 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
@@ -19,7 +19,7 @@ package org.apache.spark.mllib.util
import java.util.StringTokenizer
-import scala.collection.mutable.{ArrayBuffer, ListBuffer}
+import scala.collection.mutable.{ArrayBuilder, ListBuffer}
import org.apache.spark.SparkException
@@ -51,7 +51,7 @@ private[mllib] object NumericParser {
}
private def parseArray(tokenizer: StringTokenizer): Array[Double] = {
- val values = ArrayBuffer.empty[Double]
+ val values = ArrayBuilder.make[Double]
var parsing = true
var allowComma = false
var token: String = null
@@ -67,14 +67,14 @@ private[mllib] object NumericParser {
}
} else {
// expecting a number
- values.append(parseDouble(token))
+ values += parseDouble(token)
allowComma = true
}
}
if (parsing) {
throw new SparkException(s"An array must end with ']'.")
}
- values.toArray
+ values.result()
}
private def parseTuple(tokenizer: StringTokenizer): Seq[_] = {
@@ -114,7 +114,7 @@ private[mllib] object NumericParser {
try {
java.lang.Double.parseDouble(s)
} catch {
- case e: Throwable =>
+ case e: NumberFormatException =>
throw new SparkException(s"Cannot parse a double from: $s", e)
}
}