aboutsummaryrefslogtreecommitdiff
path: root/mllib
diff options
context:
space:
mode:
authorJoseph E. Gonzalez <joseph.e.gonzalez@gmail.com>2014-10-30 00:05:57 -0700
committerXiangrui Meng <meng@databricks.com>2014-10-30 00:05:57 -0700
commitc7ad0852084dc28f3ebc144adfd4928b23f1c8ea (patch)
tree1f6e9f61eb7413f9933eafccb76415f97e8d0b9d /mllib
parent6db3157464e36f7a572ada5f1e7f88730aa23dbd (diff)
downloadspark-c7ad0852084dc28f3ebc144adfd4928b23f1c8ea.tar.gz
spark-c7ad0852084dc28f3ebc144adfd4928b23f1c8ea.tar.bz2
spark-c7ad0852084dc28f3ebc144adfd4928b23f1c8ea.zip
[SPARK-4130][MLlib] Fixing libSVM parser bug with extra whitespace
This simple patch filters out extra whitespace entries. Author: Joseph E. Gonzalez <joseph.e.gonzalez@gmail.com> Author: Joey <joseph.e.gonzalez@gmail.com> Closes #2996 from jegonzal/loadLibSVM and squashes the following commits: e0227ab [Joey] improving readability e028e84 [Joseph E. Gonzalez] fixing whitespace bug in loadLibSVMFile when parsing libSVM files
Diffstat (limited to 'mllib')
-rw-r--r--mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala2
1 files changed, 1 insertions, 1 deletions
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index dce0adffa6..b88e08bf14 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -76,7 +76,7 @@ object MLUtils {
.map { line =>
val items = line.split(' ')
val label = items.head.toDouble
- val (indices, values) = items.tail.map { item =>
+ val (indices, values) = items.tail.filter(_.nonEmpty).map { item =>
val indexAndValue = item.split(':')
val index = indexAndValue(0).toInt - 1 // Convert 1-based indices to 0-based.
val value = indexAndValue(1).toDouble