aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Dubovsky <dubovsky@avast.com>2014-12-30 14:19:07 -0800
committerXiangrui Meng <meng@databricks.com>2014-12-30 14:19:07 -0800
commit0f31992c61f6662e5347745f6a1ac272a5fd63c9 (patch)
treeab53848611b13d4e62bd53b2399feb1eca74f0de
parentb239ea1c31aeaa752d5dc8f45423df1f8c0924ca (diff)
downloadspark-0f31992c61f6662e5347745f6a1ac272a5fd63c9.tar.gz
spark-0f31992c61f6662e5347745f6a1ac272a5fd63c9.tar.bz2
spark-0f31992c61f6662e5347745f6a1ac272a5fd63c9.zip
[Spark-4995] Replace Vector.toBreeze.activeIterator with foreachActive
New foreachActive method of vector was introduced by SPARK-4431 as more efficient alternative to vector.toBreeze.activeIterator. There are some parts of codebase where it was not yet replaced. dbtsai Author: Jakub Dubovsky <dubovsky@avast.com> Closes #3846 from james64/SPARK-4995-foreachActive and squashes the following commits: 3eb7e37 [Jakub Dubovsky] Scalastyle fix 32fe6c6 [Jakub Dubovsky] activeIterator removed - IndexedRowMatrix.toBreeze 47a4777 [Jakub Dubovsky] activeIterator removed in RowMatrix.toBreeze 90a7d98 [Jakub Dubovsky] activeIterator removed in MLUtils.saveAsLibSVMFile
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala2
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala4
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala8
3 files changed, 8 insertions, 6 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
index 5c1acca0ec..36d8cadd2b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala
@@ -142,7 +142,7 @@ class IndexedRowMatrix(
val mat = BDM.zeros[Double](m, n)
rows.collect().foreach { case IndexedRow(rowIndex, vector) =>
val i = rowIndex.toInt
- vector.toBreeze.activeIterator.foreach { case (j, v) =>
+ vector.foreachActive { case (j, v) =>
mat(i, j) = v
}
}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index 10a515af88..a3fca53929 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -588,8 +588,8 @@ class RowMatrix(
val n = numCols().toInt
val mat = BDM.zeros[Double](m, n)
var i = 0
- rows.collect().foreach { v =>
- v.toBreeze.activeIterator.foreach { case (j, v) =>
+ rows.collect().foreach { vector =>
+ vector.foreachActive { case (j, v) =>
mat(i, j) = v
}
i += 1
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index 1d07b5dab8..da0da0a168 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -154,10 +154,12 @@ object MLUtils {
def saveAsLibSVMFile(data: RDD[LabeledPoint], dir: String) {
// TODO: allow to specify label precision and feature precision.
val dataStr = data.map { case LabeledPoint(label, features) =>
- val featureStrings = features.toBreeze.activeIterator.map { case (i, v) =>
- s"${i + 1}:$v"
+ val sb = new StringBuilder(label.toString)
+ features.foreachActive { case (i, v) =>
+ sb += ' '
+ sb ++= s"${i + 1}:$v"
}
- (Iterator(label) ++ featureStrings).mkString(" ")
+ sb.mkString
}
dataStr.saveAsTextFile(dir)
}