summaryrefslogtreecommitdiff
path: root/src/compiler/scala/tools/nsc/backend/jvm/opt/ByteCodeRepository.scala
blob: f2ff73c44d52b4dd06695d9b018981d25be17d09 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
/* NSC -- new Scala compiler
 * Copyright 2005-2014 LAMP/EPFL
 * @author  Martin Odersky
 */

package scala.tools.nsc
package backend.jvm
package opt

import scala.tools.asm
import asm.tree._
import scala.collection.JavaConverters._
import scala.collection.{concurrent, mutable}
import scala.tools.asm.Attribute
import scala.tools.nsc.backend.jvm.BackendReporting._
import scala.tools.nsc.util.ClassPath
import BytecodeUtils._
import BTypes.InternalName
import java.util.concurrent.atomic.AtomicLong

/**
 * The ByteCodeRepository provides utilities to read the bytecode of classfiles from the compilation
 * classpath. Parsed classes are cached in the `classes` map.
 *
 * @param classPath The compiler classpath where classfiles are searched and read from.
 */
class ByteCodeRepository[BT <: BTypes](val classPath: ClassPath, val btypes: BT) {
  import btypes._

  /**
   * Contains ClassNodes and the canonical path of the source file path of classes being compiled in
   * the current compilation run.
   */
  val compilingClasses: concurrent.Map[InternalName, (ClassNode, String)] = recordPerRunCache(concurrent.TrieMap.empty)

  /**
   * Cache for parsed ClassNodes.
   * The `Long` field encodes the age of the node in the map, which allows removing old entries when
   * the map grows too large (see limitCacheSize).
   * For Java classes in mixed compilation, the map contains an error message: no ClassNode is
   * generated by the backend and also no classfile that could be parsed.
   */
  val parsedClasses: concurrent.Map[InternalName, Either[ClassNotFound, (ClassNode, Long)]] = recordPerRunCache(concurrent.TrieMap.empty)

  private val maxCacheSize = 1500
  private val targetSize   = 500

  private object lruCounter extends AtomicLong(0l) with collection.generic.Clearable {
    def clear(): Unit = { this.set(0l) }
  }
  recordPerRunCache(lruCounter)

  /**
   * Prevent the code repository from growing too large. Profiling reveals that the average size
   * of a ClassNode is about 30 kb. I observed having 17k+ classes in the cache, i.e., 500 mb.
   */
  private def limitCacheSize(): Unit = {
    if (parsedClasses.size > maxCacheSize) {
      // OK if multiple threads get here
      val minimalLRU = parsedClasses.valuesIterator.collect({
        case Right((_, lru)) => lru
      }).toList.sorted(Ordering.Long.reverse).drop(targetSize).headOption.getOrElse(Long.MaxValue)
      parsedClasses retain {
        case (_, Right((_, lru))) => lru > minimalLRU
        case _ => false
      }
    }
  }

  def add(classNode: ClassNode, sourceFilePath: Option[String]) = sourceFilePath match {
    case Some(path) if path != "<no file>" => compilingClasses(classNode.name) = (classNode, path)
    case _                                 => parsedClasses(classNode.name) = Right((classNode, lruCounter.incrementAndGet()))
  }

  private def parsedClassNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = {
    val r = parsedClasses.get(internalName) match {
      case Some(l @ Left(_)) => l
      case Some(r @ Right((classNode, _))) =>
        parsedClasses(internalName) = Right((classNode, lruCounter.incrementAndGet()))
        r
      case None =>
        limitCacheSize()
        val res = parseClass(internalName).map((_, lruCounter.incrementAndGet()))
        parsedClasses(internalName) = res
        res
    }
    r.map(_._1)
  }

  /**
   * The class node and source file path (if the class is being compiled) for an internal name. If
   * the class node is not yet available, it is parsed from the classfile on the compile classpath.
   */
  def classNodeAndSourceFilePath(internalName: InternalName): Either[ClassNotFound, (ClassNode, Option[String])] = {
    compilingClasses.get(internalName) match {
      case Some((c, p)) => Right((c, Some(p)))
      case _            => parsedClassNode(internalName).map((_, None))
    }
  }

  /**
   * The class node for an internal name. If the class node is not yet available, it is parsed from
   * the classfile on the compile classpath.
   */
  def classNode(internalName: InternalName): Either[ClassNotFound, ClassNode] = {
    compilingClasses.get(internalName) match {
      case Some((c, _)) => Right(c)
      case None         => parsedClassNode(internalName)
    }
  }

  /**
   * The field node for a field matching `name` and `descriptor`, accessed in class `classInternalName`.
   * The declaration of the field may be in one of the superclasses.
   *
   * @return The [[FieldNode]] of the requested field and the [[InternalName]] of its declaring
   *         class, or an error message if the field could not be found
   */
  def fieldNode(classInternalName: InternalName, name: String, descriptor: String): Either[FieldNotFound, (FieldNode, InternalName)] = {
    def fieldNodeImpl(parent: InternalName): Either[FieldNotFound, (FieldNode, InternalName)] = {
      classNode(parent) match {
        case Left(e)  => Left(FieldNotFound(name, descriptor, classInternalName, Some(e)))
        case Right(c) =>
          c.fields.asScala.find(f => f.name == name && f.desc == descriptor) match {
            case Some(f) => Right((f, parent))
            case None    =>
              if (c.superName == null) Left(FieldNotFound(name, descriptor, classInternalName, None))
              else fieldNode(c.superName, name, descriptor)
          }
      }
    }
    fieldNodeImpl(classInternalName)
  }

  /**
   * The method node for a method matching `name` and `descriptor`, accessed in class `ownerInternalNameOrArrayDescriptor`.
   * The declaration of the method may be in one of the parents.
   *
   * Note that the JVM spec performs method lookup in two steps: resolution and selection.
   *
   * Method resolution, defined in jvms-5.4.3.3 and jvms-5.4.3.4, is the first step and is identical
   * for all invocation styles (virtual, interface, special, static). If C is the receiver class
   * in the invocation instruction:
   *   1 find a matching method (name and descriptor) in C
   *   2 then in C's superclasses
   *   3 then find the maximally-specific matching superinterface methods, succeed if there's a
   *     single non-abstract one. static and private methods in superinterfaces are not considered.
   *   4 then pick a random non-static, non-private superinterface method.
   *   5 then fail.
   *
   * Note that for an `invokestatic` instruction, a method reference `B.m` may resolve to `A.m`, if
   * class `B` doesn't specify a matching method `m`, but the parent `A` does.
   *
   * Selection depends on the invocation style and is defined in jvms-6.5.
   *   - invokestatic: invokes the resolved method
   *   - invokevirtual / invokeinterface: searches for an override of the resolved method starting
   *     at the dynamic receiver type. the search procedure is basically the same as in resolution,
   *     but it fails at 4 instead of picking a superinterface method at random.
   *   - invokespecial: if C is the receiver in the invocation instruction, searches for an override
   *     of the resolved method starting at
   *       - the superclass of the current class, if C is a superclass of the current class
   *       - C otherwise
   *     again, the search procedure is the same.
   *
   * In the method here we implement method *resolution*. Whether or not the returned method is
   * actually invoked at runtime depends on the invocation instruction and the class hierarchy, so
   * the users (e.g. the inliner) have to be aware of method selection.
   *
   * Note that the returned method may be abstract (ACC_ABSTRACT), native (ACC_NATIVE) or signature
   * polymorphic (methods `invoke` and `invokeExact` in class `MethodHandles`).
   *
   * @return The [[MethodNode]] of the requested method and the [[InternalName]] of its declaring
   *         class, or an error message if the method could not be found. An error message is also
   *         returned if method resolution results in multiple default methods.
   */
  def methodNode(ownerInternalNameOrArrayDescriptor: String, name: String, descriptor: String): Either[MethodNotFound, (MethodNode, InternalName)] = {
    def findMethod(c: ClassNode): Option[MethodNode] = c.methods.asScala.find(m => m.name == name && m.desc == descriptor)

    // https://docs.oracle.com/javase/specs/jvms/se8/html/jvms-2.html#jvms-2.9: "In Java SE 8, the only
    // signature polymorphic methods are the invoke and invokeExact methods of the class MethodHandle.
    def isSignaturePolymorphic(owner: InternalName) = owner == coreBTypes.jliMethodHandleRef.internalName && (name == "invoke" || name == "invokeExact")

    // Note: if `owner` is an interface, in the first iteration we search for a matching member in the interface itself.
    // If that fails, the recursive invocation checks in the superclass (which is Object) with `publicInstanceOnly == true`.
    // This is specified in jvms-5.4.3.4: interface method resolution only returns public, non-static methods of Object.
    def findInSuperClasses(owner: ClassNode, publicInstanceOnly: Boolean = false): Either[ClassNotFound, Option[(MethodNode, InternalName)]] = {
      findMethod(owner) match {
        case Some(m) if !publicInstanceOnly || (isPublicMethod(m) && !isStaticMethod(m)) => Right(Some((m, owner.name)))
        case None =>
          if (isSignaturePolymorphic(owner.name)) Right(Some((owner.methods.asScala.find(_.name == name).get, owner.name)))
          else if (owner.superName == null) Right(None)
          else classNode(owner.superName).flatMap(findInSuperClasses(_, isInterface(owner)))
      }
    }

    def findInInterfaces(initialOwner: ClassNode): Either[ClassNotFound, Option[(MethodNode, InternalName)]] = {
      val visited = mutable.Set.empty[InternalName]
      val found = mutable.ListBuffer.empty[(MethodNode, ClassNode)]

      def findIn(owner: ClassNode): Option[ClassNotFound] = {
        for (i <- owner.interfaces.asScala if !visited(i)) classNode(i) match {
          case Left(e) => return Some(e)
          case Right(c) =>
            visited += i
            // abstract and static methods are excluded, see jvms-5.4.3.3
            for (m <- findMethod(c) if !isPrivateMethod(m) && !isStaticMethod(m)) found += ((m, c))
            val recursionResult = findIn(c)
            if (recursionResult.isDefined) return recursionResult
        }
        None
      }

      findIn(initialOwner)

      val result =
        if (found.size <= 1) found.headOption
        else {
          val maxSpecific = found.filterNot({
            case (method, owner) =>
              isAbstractMethod(method) || {
                val ownerTp = classBTypeFromClassNode(owner)
                found exists {
                  case (other, otherOwner) =>
                    (other ne method) && {
                      val otherTp = classBTypeFromClassNode(otherOwner)
                      otherTp.isSubtypeOf(ownerTp).get
                    }
                }
              }
          })
          // (*) note that if there's no single, non-abstract, maximally-specific method, the jvm
          // method resolution (jvms-5.4.3.3) returns any of the non-private, non-static parent
          // methods at random (abstract or concrete).
          // we chose not to do this here, to prevent the inliner from potentially inlining the
          // wrong method. in other words, we guarantee that a concrete method is only returned if
          // it resolves deterministically.
          // however, there may be multiple abstract methods inherited. in this case we *do* want
          // to return a result to allow performing accessibility checks in the inliner. note that
          // for accessibility it does not matter which of these methods is return, as they are all
          // non-private (i.e., public, protected is not possible, jvms-4.1).
          // the remaining case (when there's no max-specific method, but some non-abstract one)
          // does not occur in bytecode generated by scalac or javac. we return no result in this
          // case. this may at worst prevent some optimizations from happening.
          if (maxSpecific.size == 1) maxSpecific.headOption
          else if (found.forall(p => isAbstractMethod(p._1))) found.headOption // (*)
          else None
        }
      Right(result.map(p => (p._1, p._2.name)))
    }

    // In a MethodInsnNode, the `owner` field may be an array descriptor, for example when invoking `clone`. We don't have a method node to return in this case.
    if (ownerInternalNameOrArrayDescriptor.charAt(0) == '[') {
      Left(MethodNotFound(name, descriptor, ownerInternalNameOrArrayDescriptor, None))
    } else {
      def notFound(cnf: Option[ClassNotFound]) = Left(MethodNotFound(name, descriptor, ownerInternalNameOrArrayDescriptor, cnf))
      val res: Either[ClassNotFound, Option[(MethodNode, InternalName)]] = classNode(ownerInternalNameOrArrayDescriptor).flatMap(c =>
        findInSuperClasses(c) flatMap {
          case None => findInInterfaces(c)
          case res => Right(res)
        }
      )
      res match {
        case Left(e) => notFound(Some(e))
        case Right(None) => notFound(None)
        case Right(Some(res)) => Right(res)
      }
    }
  }

  private def parseClass(internalName: InternalName): Either[ClassNotFound, ClassNode] = {
    val fullName = internalName.replace('/', '.')
    classPath.findClassFile(fullName) map { classFile =>
      val classNode = new asm.tree.ClassNode()
      val classReader = new asm.ClassReader(classFile.toByteArray)

      // Passing the InlineInfoAttributePrototype makes the ClassReader invoke the specific `read`
      // method of the InlineInfoAttribute class, instead of putting the byte array into a generic
      // Attribute.
      // We don't need frames when inlining, but we want to keep the local variable table, so we
      // don't use SKIP_DEBUG.
      classReader.accept(classNode, Array[Attribute](InlineInfoAttributePrototype), asm.ClassReader.SKIP_FRAMES)
      // SKIP_FRAMES leaves line number nodes. Remove them because they are not correct after
      // inlining.
      // TODO: we need to remove them also for classes that are not parsed from classfiles, why not simplify and do it once when inlining?
      // OR: instead of skipping line numbers for inlined code, use write a SourceDebugExtension
      // attribute that contains JSR-45 data that encodes debugging info.
      //   http://docs.oracle.com/javase/specs/jvms/se7/html/jvms-4.html#jvms-4.7.11
      //   https://jcp.org/aboutJava/communityprocess/final/jsr045/index.html
      removeLineNumberNodes(classNode)
      classNode
    } match {
      case Some(node) => Right(node)
      case None       => Left(ClassNotFound(internalName, javaDefinedClasses(internalName)))
    }
  }
}