diff options
author | Jeff Zhang <zjffdu@apache.org> | 2016-01-26 17:31:19 -0800 |
---|---|---|
committer | Xiangrui Meng <meng@databricks.com> | 2016-01-26 17:31:19 -0800 |
commit | 1dac964c1b996d38c65818414fc8401961a1de8a (patch) | |
tree | 9fa21c83ff1737851ab87e1e044eef73be5c588e /mllib/src/test | |
parent | 22662b241629b56205719ede2f801a476e10a3cd (diff) | |
download | spark-1dac964c1b996d38c65818414fc8401961a1de8a.tar.gz spark-1dac964c1b996d38c65818414fc8401961a1de8a.tar.bz2 spark-1dac964c1b996d38c65818414fc8401961a1de8a.zip |
[SPARK-11622][MLLIB] Make LibSVMRelation extends HadoopFsRelation and…
… Add LibSVMOutputWriter
The behavior of LibSVMRelation is not changed except adding LibSVMOutputWriter
* Partition is still not supported
* Multiple input paths is not supported
Author: Jeff Zhang <zjffdu@apache.org>
Closes #9595 from zjffdu/SPARK-11622.
Diffstat (limited to 'mllib/src/test')
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala | 23 |
1 files changed, 22 insertions, 1 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala index 5f4d5f11bd..528d9e21cb 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.ml.source.libsvm -import java.io.File +import java.io.{File, IOException} import com.google.common.base.Charsets import com.google.common.io.Files @@ -25,6 +25,7 @@ import com.google.common.io.Files import org.apache.spark.SparkFunSuite import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vectors} import org.apache.spark.mllib.util.MLlibTestSparkContext +import org.apache.spark.sql.SaveMode import org.apache.spark.util.Utils class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext { @@ -82,4 +83,24 @@ class LibSVMRelationSuite extends SparkFunSuite with MLlibTestSparkContext { val v = row1.getAs[SparseVector](1) assert(v == Vectors.sparse(100, Seq((0, 1.0), (2, 2.0), (4, 3.0)))) } + + test("write libsvm data and read it again") { + val df = sqlContext.read.format("libsvm").load(path) + val tempDir2 = Utils.createTempDir() + val writepath = tempDir2.toURI.toString + df.write.format("libsvm").mode(SaveMode.Overwrite).save(writepath) + + val df2 = sqlContext.read.format("libsvm").load(writepath) + val row1 = df2.first() + val v = row1.getAs[SparseVector](1) + assert(v == Vectors.sparse(6, Seq((0, 1.0), (2, 2.0), (4, 3.0)))) + } + + test("write libsvm data failed due to invalid schema") { + val df = sqlContext.read.format("text").load(path) + val e = intercept[IOException] { + df.write.format("libsvm").save(path + "_2") + } + assert(e.getMessage.contains("Illegal schema for libsvm data")) + } } |