[SPARK-11994][MLLIB] Word2VecModel load and save cause SparkException when model is bigger than spark.kryoserializer.buffer.max

Author: Antonio Murgia <antonio.murgia2@studio.unibo.it> Closes #9989 from tmnd1991/SPARK-11932.
author: Antonio Murgia <antonio.murgia2@studio.unibo.it> 2015-12-05 15:42:02 +0000
committer: Sean Owen <sowen@cloudera.com> 2015-12-05 15:42:02 +0000
commit: e9c9ae22b96e08e5bb40a029e84d342efb1aec0c (patch)
tree: 90bb3086977860ddac182ef8d04672aff28fd328 /mllib/src/test
parent: ee94b70ce56661ea26c5aad17778ade32f3f1d3d (diff)
download: spark-e9c9ae22b96e08e5bb40a029e84d342efb1aec0c.tar.gz
spark-e9c9ae22b96e08e5bb40a029e84d342efb1aec0c.tar.bz2
spark-e9c9ae22b96e08e5bb40a029e84d342efb1aec0c.zip
1 files changed, 19 insertions, 0 deletions
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
index a864eec460..37d01e2876 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/feature/Word2VecSuite.scala
@@ -92,4 +92,23 @@ class Word2VecSuite extends SparkFunSuite with MLlibTestSparkContext {
     }
 
   }
+
+  test("big model load / save") {
+    // create a model bigger than 32MB since 9000 * 1000 * 4 > 2^25
+    val word2VecMap = Map((0 to 9000).map(i => s"$i" -> Array.fill(1000)(0.1f)): _*)
+    val model = new Word2VecModel(word2VecMap)
+
+    val tempDir = Utils.createTempDir()
+    val path = tempDir.toURI.toString
+
+    try {
+      model.save(sc, path)
+      val sameModel = Word2VecModel.load(sc, path)
+      assert(sameModel.getVectors.mapValues(_.toSeq) === model.getVectors.mapValues(_.toSeq))
+    } finally {
+      Utils.deleteRecursively(tempDir)
+    }
+  }
+
+
 }
author	Antonio Murgia <antonio.murgia2@studio.unibo.it>	2015-12-05 15:42:02 +0000
committer	Sean Owen <sowen@cloudera.com>	2015-12-05 15:42:02 +0000
commit	e9c9ae22b96e08e5bb40a029e84d342efb1aec0c (patch)
tree	90bb3086977860ddac182ef8d04672aff28fd328 /mllib/src/test
parent	ee94b70ce56661ea26c5aad17778ade32f3f1d3d (diff)
download	spark-e9c9ae22b96e08e5bb40a029e84d342efb1aec0c.tar.gz spark-e9c9ae22b96e08e5bb40a029e84d342efb1aec0c.tar.bz2 spark-e9c9ae22b96e08e5bb40a029e84d342efb1aec0c.zip