[SPARK-11622][MLLIB] Make LibSVMRelation extends HadoopFsRelation and…

… Add LibSVMOutputWriter The behavior of LibSVMRelation is not changed except adding LibSVMOutputWriter * Partition is still not supported * Multiple input paths is not supported Author: Jeff Zhang <zjffdu@apache.org> Closes #9595 from zjffdu/SPARK-11622.
author: Jeff Zhang <zjffdu@apache.org> 2016-01-26 17:31:19 -0800
committer: Xiangrui Meng <meng@databricks.com> 2016-01-26 17:31:19 -0800
commit: 1dac964c1b996d38c65818414fc8401961a1de8a (patch)
tree: 9fa21c83ff1737851ab87e1e044eef73be5c588e /mllib/src/test
parent: 22662b241629b56205719ede2f801a476e10a3cd (diff)
download: spark-1dac964c1b996d38c65818414fc8401961a1de8a.tar.gz
spark-1dac964c1b996d38c65818414fc8401961a1de8a.tar.bz2
spark-1dac964c1b996d38c65818414fc8401961a1de8a.zip
1 files changed, 22 insertions, 1 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
index 5f4d5f11bd..528d9e21cb 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.source.libsvm
 
-import java.io.File
+import java.io.{File, IOException}
 
 import com.google.common.base.Charsets
 import com.google.common.io.Files
@@ -25,6 +25,7 @@ import com.google.common.io.Files
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
+import org.apache.spark.sql.SaveMode
 import org.apache.spark.util.Utils
 
 class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
@@ -82,4 +83,24 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext {
     val v = row1.getAs[SparseVector](1)
     assert(v == Vectors.sparse(100, Seq((0, 1.0), (2, 2.0), (4, 3.0))))
   }
+
+  test("write libsvm data and read it again") {
+    val df = sqlContext.read.format("libsvm").load(path)
+    val tempDir2 = Utils.createTempDir()
+    val writepath = tempDir2.toURI.toString
+    df.write.format("libsvm").mode(SaveMode.Overwrite).save(writepath)
+
+    val df2 = sqlContext.read.format("libsvm").load(writepath)
+    val row1 = df2.first()
+    val v = row1.getAs[SparseVector](1)
+    assert(v == Vectors.sparse(6, Seq((0, 1.0), (2, 2.0), (4, 3.0))))
+  }
+
+  test("write libsvm data failed due to invalid schema") {
+    val df = sqlContext.read.format("text").load(path)
+    val e = intercept[IOException] {
+      df.write.format("libsvm").save(path + "_2")
+    }
+    assert(e.getMessage.contains("Illegal schema for libsvm data"))
+  }
 }
author	Jeff Zhang <zjffdu@apache.org>	2016-01-26 17:31:19 -0800
committer	Xiangrui Meng <meng@databricks.com>	2016-01-26 17:31:19 -0800
commit	1dac964c1b996d38c65818414fc8401961a1de8a (patch)
tree	9fa21c83ff1737851ab87e1e044eef73be5c588e /mllib/src/test
parent	22662b241629b56205719ede2f801a476e10a3cd (diff)
download	spark-1dac964c1b996d38c65818414fc8401961a1de8a.tar.gz spark-1dac964c1b996d38c65818414fc8401961a1de8a.tar.bz2 spark-1dac964c1b996d38c65818414fc8401961a1de8a.zip