From 57c07204ca452564b930085cfa9e8b099e45b2a9 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Fri, 6 Feb 2015 13:55:49 +0100 Subject: Limit the size of the ByteCodeRepository cache I observed cases (eg Scaladoc tests) where we end up with 17k+ ClassNodes, which makes 500 MB. --- .../tools/nsc/backend/jvm/BCodeSkelBuilder.scala | 2 +- .../tools/nsc/backend/jvm/BTypesFromSymbols.scala | 3 +- .../nsc/backend/jvm/opt/ByteCodeRepository.scala | 37 ++++++++++++++++++++-- 3 files changed, 38 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala b/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala index e40e928761..61606419bd 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BCodeSkelBuilder.scala @@ -133,7 +133,7 @@ abstract class BCodeSkelBuilder extends BCodeHelpers { if (settings.YoptInlinerEnabled) { // The inliner needs to find all classes in the code repo, also those being compiled - byteCodeRepository.classes(cnode.name) = Some((cnode, ByteCodeRepository.CompilationUnit)) + byteCodeRepository.add(cnode, ByteCodeRepository.CompilationUnit) } assert(cd.symbol == claszSymbol, "Someone messed up BCodePhase.claszSymbol during genPlainClass().") diff --git a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala index a217e54ed8..b90030dd8c 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/BTypesFromSymbols.scala @@ -413,7 +413,8 @@ class BTypesFromSymbols[G <: Global](val global: G) extends BTypes { // phase travel required, see implementation of `compiles`. for nested classes, it checks if the // enclosingTopLevelClass is being compiled. after flatten, all classes are considered top-level, // so `compiles` would return `false`. - if (exitingPickler(currentRun.compiles(classSym))) buildFromSymbol + if (exitingPickler(currentRun.compiles(classSym))) buildFromSymbol // InlineInfo required for classes being compiled, we have to create the classfile attribute + else if (!inlinerEnabled) BTypes.EmptyInlineInfo // For other classes, we need the InlineInfo only inf the inliner is enabled. else { // For classes not being compiled, the InlineInfo is read from the classfile attribute. This // fixes an issue with mixed-in methods: the mixin phase enters mixin methods only to class diff --git a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala index fb58f1b189..0958601d73 100644 --- a/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala +++ b/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala @@ -17,6 +17,7 @@ import OptimizerReporting._ import BytecodeUtils._ import ByteCodeRepository._ import BTypes.InternalName +import java.util.concurrent.atomic.AtomicLong /** * The ByteCodeRepository provides utilities to read the bytecode of classfiles from the compilation @@ -26,16 +27,48 @@ import BTypes.InternalName * @param classes Cache for parsed ClassNodes. Also stores the source of the bytecode: * [[Classfile]] if read from `classPath`, [[CompilationUnit]] if the bytecode * corresponds to a class being compiled. + * The `Long` field encodes the age of the node in the map, which allows removing + * old entries when the map grows too large. * For Java classes in mixed compilation, the map contains `None`: there is no * ClassNode generated by the backend and also no classfile that could be parsed. */ -class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val classes: collection.concurrent.Map[InternalName, Option[(ClassNode, Source)]]) { +class ByteCodeRepository(val classPath: ClassFileLookup[AbstractFile], val classes: collection.concurrent.Map[InternalName, Option[(ClassNode, Source, Long)]]) { + + private val maxCacheSize = 1500 + private val targetSize = 500 + + private val idCounter = new AtomicLong(0) + + /** + * Prevent the code repository from growing too large. Profiling reveals that the average size + * of a ClassNode is about 30 kb. I observed having 17k+ classes in the cache, i.e., 500 mb. + * + * We can only remove classes with `Source == Classfile`, those can be parsed again if requested. + */ + private def limitCacheSize(): Unit = { + if (classes.count(c => c._2.isDefined && c._2.get._2 == Classfile) > maxCacheSize) { + val removeId = idCounter.get - targetSize + val toRemove = classes.iterator.collect({ + case (name, Some((_, Classfile, id))) if id < removeId => name + }).toList + toRemove foreach classes.remove + } + } + + def add(classNode: ClassNode, source: Source) = { + classes(classNode.name) = Some((classNode, source, idCounter.incrementAndGet())) + } + /** * The class node and source for an internal name. If the class node is not yet available, it is * parsed from the classfile on the compile classpath. */ def classNodeAndSource(internalName: InternalName): Option[(ClassNode, Source)] = { - classes.getOrElseUpdate(internalName, parseClass(internalName).map((_, Classfile))) + val r = classes.getOrElseUpdate(internalName, { + limitCacheSize() + parseClass(internalName).map((_, Classfile, idCounter.incrementAndGet())) + }) + r.map(v => (v._1, v._2)) } /** -- cgit v1.2.3