aboutsummaryrefslogtreecommitdiff
path: root/compiler/src/dotty/tools/dotc/parsing/CharArrayReader.scala
blob: e20eb392d52cae11bf2b1905a0fd9e925dd814d3 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package dotty.tools
package dotc
package parsing

import util.Chars._

abstract class CharArrayReader { self =>

  val buf: Array[Char]
  protected def startFrom = 0

  /** Switch whether unicode should be decoded */
  protected def decodeUni: Boolean = true

  /** An error routine to call on bad unicode escapes \\uxxxx. */
  protected def error(msg: String, offset: Int): Unit

  /** the last read character */
  var ch: Char = _

  /** The offset one past the last read character */
  var charOffset: Int = startFrom

  /** The offset before the last read character */
  var lastCharOffset: Int = startFrom

  /** The start offset of the current line */
  var lineStartOffset: Int = startFrom

  /** The start offset of the line before the current one */
  var lastLineStartOffset: Int = startFrom

  private var lastUnicodeOffset = -1

  /** Is last character a unicode escape \\uxxxx? */
  def isUnicodeEscape = charOffset == lastUnicodeOffset

  /** Advance one character; reducing CR;LF pairs to just LF */
  final def nextChar(): Unit = {
    val idx = charOffset
    lastCharOffset = idx
    if (idx >= buf.length) {
      ch = SU
    } else {
      val c = buf(idx)
      ch = c
      charOffset = idx + 1
      if (c == '\\') potentialUnicode()
      else if (c < ' ') { skipCR(); potentialLineEnd() }
    }
  }

  def getc() = { nextChar() ; ch }

  /** Advance one character, leaving CR;LF pairs intact.
   *  This is for use in multi-line strings, so there are no
   *  "potential line ends" here.
   */
  final def nextRawChar(): Unit = {
    val idx = charOffset
    lastCharOffset = idx
    if (idx >= buf.length) {
      ch = SU
    } else {
      val c = buf(charOffset)
      ch = c
      charOffset = idx + 1
      if (c == '\\') potentialUnicode()
    }
  }

  /** Interpret \\uxxxx escapes */
  private def potentialUnicode(): Unit = {
    def evenSlashPrefix: Boolean = {
      var p = charOffset - 2
      while (p >= 0 && buf(p) == '\\') p -= 1
      (charOffset - p) % 2 == 0
    }
    def udigit: Int = {
      if (charOffset >= buf.length) {
        // Since the positioning code is very insistent about throwing exceptions,
        // we have to decrement the position so our error message can be seen, since
        // we are one past EOF.  This happens with e.g. val x = \ u 1 <EOF>
        error("incomplete unicode escape", charOffset - 1)
        SU
      }
      else {
        val d = digit2int(buf(charOffset), 16)
        if (d >= 0) charOffset += 1
        else error("error in unicode escape", charOffset)
        d
      }
    }
    if (charOffset < buf.length && buf(charOffset) == 'u' && decodeUni && evenSlashPrefix) {
      do charOffset += 1
      while (charOffset < buf.length && buf(charOffset) == 'u')
      val code = udigit << 12 | udigit << 8 | udigit << 4 | udigit
      lastUnicodeOffset = charOffset
      ch = code.toChar
    }
  }

  /** replace CR;LF by LF */
  private def skipCR(): Unit = {
    if (ch == CR)
      if (charOffset < buf.length && buf(charOffset) == LF) {
        charOffset += 1
        ch = LF
      }
  }

  /** Handle line ends */
  private def potentialLineEnd(): Unit = {
    if (ch == LF || ch == FF) {
      lastLineStartOffset = lineStartOffset
      lineStartOffset = charOffset
    }
  }

  def isAtEnd = charOffset >= buf.length

  /** A new reader that takes off at the current character position */
  def lookaheadReader = new CharArrayLookaheadReader

  class CharArrayLookaheadReader extends CharArrayReader {
    val buf = self.buf
    charOffset = self.charOffset
    ch = self.ch
    override def decodeUni = self.decodeUni
    def error(msg: String, offset: Int) = self.error(msg, offset)
  }
}