From b79bf1df6238c087c3ec524344f1fc179719c5de Mon Sep 17 00:00:00 2001 From: Yuhao Yang Date: Sun, 26 Jul 2015 14:02:20 +0100 Subject: [SPARK-9337] [MLLIB] Add an ut for Word2Vec to verify the empty vocabulary check jira: https://issues.apache.org/jira/browse/SPARK-9337 Word2Vec should throw exception when vocabulary is empty Author: Yuhao Yang Closes #7660 from hhbyyh/ut4Word2vec and squashes the following commits: 17a18cb [Yuhao Yang] add ut for word2vec --- .../scala/org/apache/spark/mllib/feature/Word2VecSuite.scala | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'mllib/src') diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala index 4cc8d1129b..a864eec460 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala @@ -45,6 +45,16 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext { assert(newModel.getVectors.mapValues(_.toSeq) === word2VecMap.mapValues(_.toSeq)) } + test("Word2Vec throws exception when vocabulary is empty") { + intercept[IllegalArgumentException] { + val sentence = "a b c" + val localDoc = Seq(sentence, sentence) + val doc = sc.parallelize(localDoc) + .map(line => line.split(" ").toSeq) + new Word2Vec().setMinCount(10).fit(doc) + } + } + test("Word2VecModel") { val num = 2 val word2VecMap = Map( -- cgit v1.2.3