diff options
Diffstat (limited to 'compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala')
-rw-r--r-- | compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala b/compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala new file mode 100644 index 000000000..b84e2eb47 --- /dev/null +++ b/compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala @@ -0,0 +1,132 @@ +package dotty.tools +package dotc +package parsing + +import scala.reflect.internal.Chars._ + +abstract class CharArrayReader { self => + + val buf: Array[Char] + protected def startFrom = 0 + + /** Switch whether unicode should be decoded */ + protected def decodeUni: Boolean = true + + /** An error routine to call on bad unicode escapes \\uxxxx. */ + protected def error(msg: String, offset: Int): Unit + + /** the last read character */ + var ch: Char = _ + + /** The offset one past the last read character */ + var charOffset: Int = startFrom + + /** The offset before the last read character */ + var lastCharOffset: Int = startFrom + + /** The start offset of the current line */ + var lineStartOffset: Int = startFrom + + /** The start offset of the line before the current one */ + var lastLineStartOffset: Int = startFrom + + private var lastUnicodeOffset = -1 + + /** Is last character a unicode escape \\uxxxx? */ + def isUnicodeEscape = charOffset == lastUnicodeOffset + + /** Advance one character; reducing CR;LF pairs to just LF */ + final def nextChar(): Unit = { + val idx = charOffset + lastCharOffset = idx + if (idx >= buf.length) { + ch = SU + } else { + val c = buf(idx) + ch = c + charOffset = idx + 1 + if (c == '\\') potentialUnicode() + else if (c < ' ') { skipCR(); potentialLineEnd() } + } + } + + def getc() = { nextChar() ; ch } + + /** Advance one character, leaving CR;LF pairs intact. + * This is for use in multi-line strings, so there are no + * "potential line ends" here. + */ + final def nextRawChar(): Unit = { + val idx = charOffset + lastCharOffset = idx + if (idx >= buf.length) { + ch = SU + } else { + val c = buf(charOffset) + ch = c + charOffset = idx + 1 + if (c == '\\') potentialUnicode() + } + } + + /** Interpret \\uxxxx escapes */ + private def potentialUnicode(): Unit = { + def evenSlashPrefix: Boolean = { + var p = charOffset - 2 + while (p >= 0 && buf(p) == '\\') p -= 1 + (charOffset - p) % 2 == 0 + } + def udigit: Int = { + if (charOffset >= buf.length) { + // Since the positioning code is very insistent about throwing exceptions, + // we have to decrement the position so our error message can be seen, since + // we are one past EOF. This happens with e.g. val x = \ u 1 <EOF> + error("incomplete unicode escape", charOffset - 1) + SU + } + else { + val d = digit2int(buf(charOffset), 16) + if (d >= 0) charOffset += 1 + else error("error in unicode escape", charOffset) + d + } + } + if (charOffset < buf.length && buf(charOffset) == 'u' && decodeUni && evenSlashPrefix) { + do charOffset += 1 + while (charOffset < buf.length && buf(charOffset) == 'u') + val code = udigit << 12 | udigit << 8 | udigit << 4 | udigit + lastUnicodeOffset = charOffset + ch = code.toChar + } + } + + /** replace CR;LF by LF */ + private def skipCR(): Unit = { + if (ch == CR) + if (charOffset < buf.length && buf(charOffset) == LF) { + charOffset += 1 + ch = LF + } + } + + /** Handle line ends */ + private def potentialLineEnd(): Unit = { + if (ch == LF || ch == FF) { + lastLineStartOffset = lineStartOffset + lineStartOffset = charOffset + } + } + + def isAtEnd = charOffset >= buf.length + + /** A new reader that takes off at the current character position */ + def lookaheadReader = new CharArrayLookaheadReader + + class CharArrayLookaheadReader extends CharArrayReader { + val buf = self.buf + charOffset = self.charOffset + ch = self.ch + override def decodeUni = self.decodeUni + def error(msg: String, offset: Int) = self.error(msg, offset) + } +} |