diff options
author | Yuhao Yang <hhbyyh@gmail.com> | 2015-07-26 14:02:20 +0100 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2015-07-26 14:02:20 +0100 |
commit | b79bf1df6238c087c3ec524344f1fc179719c5de (patch) | |
tree | f30cb1f2771e24b5327e066d715ca80ee2e8efce /mllib/src | |
parent | 4a01bfc2a2e664186028ea32095d32d29c9f9e38 (diff) | |
download | spark-b79bf1df6238c087c3ec524344f1fc179719c5de.tar.gz spark-b79bf1df6238c087c3ec524344f1fc179719c5de.tar.bz2 spark-b79bf1df6238c087c3ec524344f1fc179719c5de.zip |
[SPARK-9337] [MLLIB] Add an ut for Word2Vec to verify the empty vocabulary check
jira: https://issues.apache.org/jira/browse/SPARK-9337
Word2Vec should throw exception when vocabulary is empty
Author: Yuhao Yang <hhbyyh@gmail.com>
Closes #7660 from hhbyyh/ut4Word2vec and squashes the following commits:
17a18cb [Yuhao Yang] add ut for word2vec
Diffstat (limited to 'mllib/src')
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala index 4cc8d1129b..a864eec460 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala @@ -45,6 +45,16 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext { assert(newModel.getVectors.mapValues(_.toSeq) === word2VecMap.mapValues(_.toSeq)) } + test("Word2Vec throws exception when vocabulary is empty") { + intercept[IllegalArgumentException] { + val sentence = "a b c" + val localDoc = Seq(sentence, sentence) + val doc = sc.parallelize(localDoc) + .map(line => line.split(" ").toSeq) + new Word2Vec().setMinCount(10).fit(doc) + } + } + test("Word2VecModel") { val num = 2 val word2VecMap = Map( |