From 51ade51a9fd64fc2fe651c505a286e6f29f59d40 Mon Sep 17 00:00:00 2001 From: Sean Owen Date: Wed, 13 Jul 2016 11:39:32 +0100 Subject: [SPARK-16440][MLLIB] Undeleted broadcast variables in Word2Vec causing OoM for long runs ## What changes were proposed in this pull request? Unpersist broadcasted vars in Word2Vec.fit for more timely / reliable resource cleanup ## How was this patch tested? Jenkins tests Author: Sean Owen Closes #14153 from srowen/SPARK-16440. --- mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala | 3 +++ 1 file changed, 3 insertions(+) (limited to 'mllib/src') diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index f2211df3f9..6b9c8ee2e3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -434,6 +434,9 @@ class Word2Vec extends Serializable with Logging { bcSyn1Global.unpersist(false) } newSentences.unpersist() + expTable.unpersist() + bcVocab.unpersist() + bcVocabHash.unpersist() val wordArray = vocab.map(_.word) new Word2VecModel(wordArray.zipWithIndex.toMap, syn0Global) -- cgit v1.2.3