aboutsummaryrefslogtreecommitdiff
path: root/compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala')
-rw-r--r--compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala132
1 files changed, 132 insertions, 0 deletions
diff --git a/compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala b/compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala
new file mode 100644
index 000000000..b84e2eb47
--- /dev/null
+++ b/compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala
@@ -0,0 +1,132 @@
+package dotty.tools
+package dotc
+package parsing
+
+import scala.reflect.internal.Chars._
+
+abstract class CharArrayReader { self =>
+
+ val buf: Array[Char]
+ protected def startFrom = 0
+
+ /** Switch whether unicode should be decoded */
+ protected def decodeUni: Boolean = true
+
+ /** An error routine to call on bad unicode escapes \\uxxxx. */
+ protected def error(msg: String, offset: Int): Unit
+
+ /** the last read character */
+ var ch: Char = _
+
+ /** The offset one past the last read character */
+ var charOffset: Int = startFrom
+
+ /** The offset before the last read character */
+ var lastCharOffset: Int = startFrom
+
+ /** The start offset of the current line */
+ var lineStartOffset: Int = startFrom
+
+ /** The start offset of the line before the current one */
+ var lastLineStartOffset: Int = startFrom
+
+ private var lastUnicodeOffset = -1
+
+ /** Is last character a unicode escape \\uxxxx? */
+ def isUnicodeEscape = charOffset == lastUnicodeOffset
+
+ /** Advance one character; reducing CR;LF pairs to just LF */
+ final def nextChar(): Unit = {
+ val idx = charOffset
+ lastCharOffset = idx
+ if (idx >= buf.length) {
+ ch = SU
+ } else {
+ val c = buf(idx)
+ ch = c
+ charOffset = idx + 1
+ if (c == '\\') potentialUnicode()
+ else if (c < ' ') { skipCR(); potentialLineEnd() }
+ }
+ }
+
+ def getc() = { nextChar() ; ch }
+
+ /** Advance one character, leaving CR;LF pairs intact.
+ * This is for use in multi-line strings, so there are no
+ * "potential line ends" here.
+ */
+ final def nextRawChar(): Unit = {
+ val idx = charOffset
+ lastCharOffset = idx
+ if (idx >= buf.length) {
+ ch = SU
+ } else {
+ val c = buf(charOffset)
+ ch = c
+ charOffset = idx + 1
+ if (c == '\\') potentialUnicode()
+ }
+ }
+
+ /** Interpret \\uxxxx escapes */
+ private def potentialUnicode(): Unit = {
+ def evenSlashPrefix: Boolean = {
+ var p = charOffset - 2
+ while (p >= 0 && buf(p) == '\\') p -= 1
+ (charOffset - p) % 2 == 0
+ }
+ def udigit: Int = {
+ if (charOffset >= buf.length) {
+ // Since the positioning code is very insistent about throwing exceptions,
+ // we have to decrement the position so our error message can be seen, since
+ // we are one past EOF. This happens with e.g. val x = \ u 1 <EOF>
+ error("incomplete unicode escape", charOffset - 1)
+ SU
+ }
+ else {
+ val d = digit2int(buf(charOffset), 16)
+ if (d >= 0) charOffset += 1
+ else error("error in unicode escape", charOffset)
+ d
+ }
+ }
+ if (charOffset < buf.length && buf(charOffset) == 'u' && decodeUni && evenSlashPrefix) {
+ do charOffset += 1
+ while (charOffset < buf.length && buf(charOffset) == 'u')
+ val code = udigit << 12 | udigit << 8 | udigit << 4 | udigit
+ lastUnicodeOffset = charOffset
+ ch = code.toChar
+ }
+ }
+
+ /** replace CR;LF by LF */
+ private def skipCR(): Unit = {
+ if (ch == CR)
+ if (charOffset < buf.length && buf(charOffset) == LF) {
+ charOffset += 1
+ ch = LF
+ }
+ }
+
+ /** Handle line ends */
+ private def potentialLineEnd(): Unit = {
+ if (ch == LF || ch == FF) {
+ lastLineStartOffset = lineStartOffset
+ lineStartOffset = charOffset
+ }
+ }
+
+ def isAtEnd = charOffset >= buf.length
+
+ /** A new reader that takes off at the current character position */
+ def lookaheadReader = new CharArrayLookaheadReader
+
+ class CharArrayLookaheadReader extends CharArrayReader {
+ val buf = self.buf
+ charOffset = self.charOffset
+ ch = self.ch
+ override def decodeUni = self.decodeUni
+ def error(msg: String, offset: Int) = self.error(msg, offset)
+ }
+}