diff options
author | Antonio Murgia <antonio.murgia2@studio.unibo.it> | 2015-12-05 15:42:02 +0000 |
---|---|---|
committer | Sean Owen <sowen@cloudera.com> | 2015-12-05 15:42:02 +0000 |
commit | e9c9ae22b96e08e5bb40a029e84d342efb1aec0c (patch) | |
tree | 90bb3086977860ddac182ef8d04672aff28fd328 /mllib/src/test | |
parent | ee94b70ce56661ea26c5aad17778ade32f3f1d3d (diff) | |
download | spark-e9c9ae22b96e08e5bb40a029e84d342efb1aec0c.tar.gz spark-e9c9ae22b96e08e5bb40a029e84d342efb1aec0c.tar.bz2 spark-e9c9ae22b96e08e5bb40a029e84d342efb1aec0c.zip |
[SPARK-11994][MLLIB] Word2VecModel load and save cause SparkException when model is bigger than spark.kryoserializer.buffer.max
Author: Antonio Murgia <antonio.murgia2@studio.unibo.it>
Closes #9989 from tmnd1991/SPARK-11932.
Diffstat (limited to 'mllib/src/test')
-rw-r--r-- | mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala index a864eec460..37d01e2876 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala @@ -92,4 +92,23 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext { } } + + test("big model load / save") { + // create a model bigger than 32MB since 9000 * 1000 * 4 > 2^25 + val word2VecMap = Map((0 to 9000).map(i => s"$i" -> Array.fill(1000)(0.1f)): _*) + val model = new Word2VecModel(word2VecMap) + + val tempDir = Utils.createTempDir() + val path = tempDir.toURI.toString + + try { + model.save(sc, path) + val sameModel = Word2VecModel.load(sc, path) + assert(sameModel.getVectors.mapValues(_.toSeq) === model.getVectors.mapValues(_.toSeq)) + } finally { + Utils.deleteRecursively(tempDir) + } + } + + } |