Scanners added.

Moving Positions, Chars to new packages. Added Source positions. Added untyped trees module. Factored out behavior between typed and untyped trees.
author: Martin Odersky <odersky@gmail.com> 2013-04-17 09:48:22 +0200
committer: Martin Odersky <odersky@gmail.com> 2013-04-17 10:16:22 +0200
commit: ca8dc7ada663e44aafe470944dd17256dbde151c (patch)
tree: d15939e204042e358e0c83064250f1f18c1c4f25 /src/dotty/tools/dotc/parsing
parent: e32fedb6844eab11a27e365a570b2033a0f6f78d (diff)
download: dotty-ca8dc7ada663e44aafe470944dd17256dbde151c.tar.gz
dotty-ca8dc7ada663e44aafe470944dd17256dbde151c.tar.bz2
dotty-ca8dc7ada663e44aafe470944dd17256dbde151c.zip
3 files changed, 1260 insertions, 0 deletions
diff --git a/src/dotty/tools/dotc/parsing/CharArrayReader.scala b/src/dotty/tools/dotc/parsing/CharArrayReader.scala
new file mode 100644
index 000000000..29346b78a
--- /dev/null
+++ b/src/dotty/tools/dotc/parsing/CharArrayReader.scala
@@ -0,0 +1,131 @@
+package dotty.tools
+package dotc
+package parsing
+
+import scala.reflect.internal.Chars._
+
+abstract class CharArrayReader { self =>
+
+  val buf: Array[Char]
+
+  /** Switch whether unicode should be decoded */
+  protected def decodeUni: Boolean = true
+
+  /** An error routine to call on bad unicode escapes \\uxxxx. */
+  protected def error(msg: String, offset: Int): Unit
+
+  /** the last read character */
+  var ch: Char = _
+
+  /** The offset one past the last read character */
+  var charOffset: Int = 0
+
+  /** The offset before the last read character */
+  var lastCharOffset: Int = 0
+
+  /** The start offset of the current line */
+  var lineStartOffset: Int = 0
+
+  /** The start offset of the line before the current one */
+  var lastLineStartOffset: Int = 0
+
+  private var lastUnicodeOffset = -1
+
+  /** Is last character a unicode escape \\uxxxx? */
+  def isUnicodeEscape = charOffset == lastUnicodeOffset
+
+  /** Advance one character; reducing CR;LF pairs to just LF */
+  final def nextChar(): Unit = {
+    val idx = charOffset
+    lastCharOffset = idx
+    if (idx >= buf.length) {
+      ch = SU
+    } else {
+      val c = buf(idx)
+      ch = c
+      charOffset = idx + 1
+      if (c == '\\') potentialUnicode()
+      else if (c < ' ') { skipCR(); potentialLineEnd() }
+    }
+  }
+
+  def getc() = { nextChar() ; ch }
+
+  /** Advance one character, leaving CR;LF pairs intact.
+   *  This is for use in multi-line strings, so there are no
+   *  "potential line ends" here.
+   */
+  final def nextRawChar(): Unit = {
+    val idx = charOffset
+    lastCharOffset = idx
+    if (idx >= buf.length) {
+      ch = SU
+    } else {
+      val c = buf(charOffset)
+      ch = c
+      charOffset = idx + 1
+      if (c == '\\') potentialUnicode()
+    }
+  }
+
+  /** Interpret \\uxxxx escapes */
+  private def potentialUnicode() {
+    def evenSlashPrefix: Boolean = {
+      var p = charOffset - 2
+      while (p >= 0 && buf(p) == '\\') p -= 1
+      (charOffset - p) % 2 == 0
+    }
+    def udigit: Int = {
+      if (charOffset >= buf.length) {
+        // Since the positioning code is very insistent about throwing exceptions,
+        // we have to decrement the position so our error message can be seen, since
+        // we are one past EOF.  This happens with e.g. val x = \ u 1 <EOF>
+        error("incomplete unicode escape", charOffset - 1)
+        SU
+      }
+      else {
+        val d = digit2int(buf(charOffset), 16)
+        if (d >= 0) charOffset += 1
+        else error("error in unicode escape", charOffset)
+        d
+      }
+    }
+    if (charOffset < buf.length && buf(charOffset) == 'u' && decodeUni && evenSlashPrefix) {
+      do charOffset += 1
+      while (charOffset < buf.length && buf(charOffset) == 'u')
+      val code = udigit << 12 | udigit << 8 | udigit << 4 | udigit
+      lastUnicodeOffset = charOffset
+      ch = code.toChar
+    }
+  }
+
+  /** replace CR;LF by LF */
+  private def skipCR() {
+    if (ch == CR)
+      if (charOffset < buf.length && buf(charOffset) == LF) {
+        charOffset += 1
+        ch = LF
+      }
+  }
+
+  /** Handle line ends */
+  private def potentialLineEnd() {
+    if (ch == LF || ch == FF) {
+      lastLineStartOffset = lineStartOffset
+      lineStartOffset = charOffset
+    }
+  }
+
+  def isAtEnd = charOffset >= buf.length
+
+  /** A new reader that takes off at the current character position */
+  def lookaheadReader = new CharArrayLookaheadReader
+
+  class CharArrayLookaheadReader extends CharArrayReader {
+    val buf = self.buf
+    charOffset = self.charOffset
+    ch = self.ch
+    override def decodeUni = self.decodeUni
+    def error(msg: String, offset: Int) = self.error(msg, offset)
+  }
+}
diff --git a/src/dotty/tools/dotc/parsing/Scanners.scala b/src/dotty/tools/dotc/parsing/Scanners.scala
new file mode 100644
index 000000000..2b3ec9bc2
--- /dev/null
+++ b/src/dotty/tools/dotc/parsing/Scanners.scala
@@ -0,0 +1,958 @@
+package dotty.tools
+package dotc
+package parsing
+
+import Tokens._
+import core.Names._, core.Contexts._, core.Decorators._, util.Positions._
+import core.StdNames._
+import util.SourceFile
+import java.lang.Character.isDigit
+import scala.reflect.internal.Chars._
+import Tokens._
+import scala.annotation.{ switch, tailrec }
+import scala.collection.{ mutable, immutable }
+import mutable.{ ListBuffer, ArrayBuffer }
+import scala.xml.Utility.isNameStart
+
+object Scanners {
+
+  /** Offset into source character array */
+  type Offset = Int
+
+  /** An undefined offset */
+  val NoOffset: Offset = -1
+
+  case class Comment(pos: Position, chrs: String) {
+    def isDocComment = chrs.startsWith("/**")
+  }
+
+  type Token = Int
+
+  trait TokenData {
+
+    /** the next token */
+    var token: Token = EMPTY
+
+    /** the offset of the first character of the current token */
+    var offset: Offset = 0
+
+    /** the offset of the character following the token preceding this one */
+    var lastOffset: Offset = 0
+
+    /** the name of an identifier */
+    var name: TermName = null
+
+    /** the string value of a literal */
+    var strVal: String = null
+
+    /** the base of a number */
+    var base: Int = 0
+
+    def copyFrom(td: TokenData) = {
+      this.token = td.token
+      this.offset = td.offset
+      this.lastOffset = td.lastOffset
+      this.name = td.name
+      this.strVal = td.strVal
+      this.base = td.base
+    }
+  }
+
+  class Scanner(source: SourceFile)(implicit ctx: Context) extends CharArrayReader with TokenData {
+
+    val buf = source.content
+
+    var keepComments = false
+
+    /** All comments in the reverse order of their position in the source.
+     *  set only when `keepComments` is true.
+     */
+    var revComments: List[Comment] = Nil
+
+    /** the last error offset
+     */
+    var errOffset: Offset = NoOffset
+
+    /** A buffer for comments */
+    val commentBuf = new StringBuilder
+
+    /** A character buffer for literals
+     */
+    val litBuf = new StringBuilder
+
+    /** append Unicode character to "litBuf" buffer
+     */
+    protected def putChar(c: Char): Unit = litBuf.append(c)
+
+    /** Clear buffer and set string */
+    private def setStrVal() = flushBuf(litBuf)
+
+    private class TokenData0 extends TokenData
+
+    /** we need one token lookahead and one token history
+     */
+    private val next : TokenData = new TokenData0
+    private val prev : TokenData = new TokenData0
+
+    /** a stack of tokens which indicates whether line-ends can be statement separators
+     *  also used for keeping track of nesting levels.
+     *  We keep track of the closing symbol of a region. This can be
+     *  RPAREN    if region starts with '('
+     *  RBRACKET  if region starts with '['
+     *  RBRACE    if region starts with '{'
+     *  ARROW     if region starts with `case'
+     *  STRINGLIT if region is a string interpolation expression starting with '${'
+     *            (the STRINGLIT appears twice in succession on the stack iff the
+     *             expression is a multiline string literal).
+     */
+    var sepRegions: List[Token] = List()
+
+// Get next token ------------------------------------------------------------
+
+    /** Are we directly in a string interpolation expression?
+     */
+    private def inStringInterpolation =
+      sepRegions.nonEmpty && sepRegions.head == STRINGLIT
+
+    /** Are we directly in a multiline string interpolation expression?
+     *  @pre inStringInterpolation
+     */
+    private def inMultiLineInterpolation =
+      inStringInterpolation && sepRegions.tail.nonEmpty && sepRegions.tail.head == STRINGPART
+
+    /** read next token and return last offset
+     */
+    def skipToken(): Offset = {
+      val off = offset
+      nextToken()
+      off
+    }
+
+    def adjustSepRegions(lastToken: Token): Unit = (lastToken: @switch) match {
+      case LPAREN =>
+        sepRegions = RPAREN :: sepRegions
+      case LBRACKET =>
+        sepRegions = RBRACKET :: sepRegions
+      case LBRACE =>
+        sepRegions = RBRACE :: sepRegions
+      case CASE =>
+        sepRegions = ARROW :: sepRegions
+      case RBRACE =>
+        while (!sepRegions.isEmpty && sepRegions.head != RBRACE)
+          sepRegions = sepRegions.tail
+        if (!sepRegions.isEmpty) sepRegions = sepRegions.tail
+      case RBRACKET | RPAREN =>
+        if (!sepRegions.isEmpty && sepRegions.head == lastToken)
+          sepRegions = sepRegions.tail
+      case ARROW =>
+        if (!sepRegions.isEmpty && sepRegions.head == lastToken)
+          sepRegions = sepRegions.tail
+      case STRINGLIT =>
+        if (inMultiLineInterpolation)
+          sepRegions = sepRegions.tail.tail
+        else if (inStringInterpolation)
+          sepRegions = sepRegions.tail
+      case _ =>
+    }
+
+    /** Produce next token, filling TokenData fields of Scanner.
+     */
+    def nextToken() {
+      val lastToken = token
+      adjustSepRegions(lastToken)
+
+      // Read a token or copy it from `next` tokenData
+      if (next.token == EMPTY) {
+        lastOffset = lastCharOffset
+        if (inStringInterpolation) fetchStringPart()
+        else fetchToken()
+        if (token == ERROR) adjustSepRegions(STRINGLIT)
+      } else {
+        this copyFrom next
+        next.token = EMPTY
+      }
+
+      /** Insert NEWLINE or NEWLINES if
+       *  - we are after a newline
+       *  - we are within a { ... } or on toplevel (wrt sepRegions)
+       *  - the current token can start a statement and the one before can end it
+       *  insert NEWLINES if we are past a blank line, NEWLINE otherwise
+       */
+      if (isAfterLineEnd() &&
+          (canEndStatTokens contains lastToken) &&
+          (canStartStatTokens contains token) &&
+          (sepRegions.isEmpty || sepRegions.head == RBRACE)) {
+        next copyFrom this
+        offset = lineStartOffset min lastLineStartOffset
+        token = if (pastBlankLine()) NEWLINES else NEWLINE
+      }
+
+      postProcessToken()
+//      print("["+this+"]")
+    }
+
+    def postProcessToken() = {
+      // Join CASE + CLASS => CASECLASS, CASE + OBJECT => CASEOBJECT, SEMI + ELSE => ELSE
+      if (token == CASE) {
+        prev copyFrom this
+        val nextLastOffset = lastCharOffset
+        fetchToken()
+        def resetOffset() {
+          offset = prev.offset
+          lastOffset = prev.lastOffset
+        }
+        if (token == CLASS) {
+          token = CASECLASS
+          resetOffset()
+        } else if (token == OBJECT) {
+          token = CASEOBJECT
+          resetOffset()
+        } else {
+          lastOffset = nextLastOffset
+          next copyFrom this
+          this copyFrom prev
+        }
+      } else if (token == SEMI) {
+        prev copyFrom this
+        fetchToken()
+        if (token != ELSE) {
+          next copyFrom this
+          this copyFrom prev
+        }
+      }
+    }
+
+    /** Is current token first one after a newline? */
+    def isAfterLineEnd(): Boolean =
+      lastOffset < lineStartOffset &&
+      (lineStartOffset <= offset ||
+       lastOffset < lastLineStartOffset && lastLineStartOffset <= offset)
+
+    /** Is there a blank line between the current token and the last one?
+     *  @pre  afterLineEnd().
+     */
+    private def pastBlankLine(): Boolean = {
+      val end = offset
+      def recur(idx: Offset, isBlank: Boolean): Boolean =
+        idx < end && {
+          val ch = buf(idx)
+          if (ch == LF || ch == FF) isBlank || recur(idx + 1, true)
+          else recur(idx + 1, isBlank && ch <= ' ')
+        }
+      recur(lastOffset, false)
+    }
+
+    /** read next token, filling TokenData fields of Scanner.
+     */
+    protected final def fetchToken() {
+      offset = charOffset - 1
+      (ch: @switch) match {
+        case ' ' | '\t' | CR | LF | FF =>
+          nextChar()
+          fetchToken()
+        case 'A' | 'B' | 'C' | 'D' | 'E' |
+             'F' | 'G' | 'H' | 'I' | 'J' |
+             'K' | 'L' | 'M' | 'N' | 'O' |
+             'P' | 'Q' | 'R' | 'S' | 'T' |
+             'U' | 'V' | 'W' | 'X' | 'Y' |
+             'Z' | '$' | '_' |
+             'a' | 'b' | 'c' | 'd' | 'e' |
+             'f' | 'g' | 'h' | 'i' | 'j' |
+             'k' | 'l' | 'm' | 'n' | 'o' |
+             'p' | 'q' | 'r' | 's' | 't' |
+             'u' | 'v' | 'w' | 'x' | 'y' |
+             'z' =>
+          putChar(ch)
+          nextChar()
+          getIdentRest()
+          if (ch == '"' && token == IDENTIFIER)
+            token = INTERPOLATIONID
+        case '<' => // is XMLSTART?
+          def fetchLT() = {
+            val last = if (charOffset >= 2) buf(charOffset - 2) else ' '
+            nextChar()
+            last match {
+              case ' ' | '\t' | '\n' | '{' | '(' | '>' if isNameStart(ch) || ch == '!' || ch == '?' =>
+                token = XMLSTART
+              case _ =>
+                // Console.println("found '<', but last is '"+in.last+"'"); // DEBUG
+                putChar('<')
+                getOperatorRest()
+            }
+          }
+          fetchLT
+        case '~' | '!' | '@' | '#' | '%' |
+             '^' | '*' | '+' | '-' | /*'<' | */
+             '>' | '?' | ':' | '=' | '&' |
+             '|' | '\\' =>
+          putChar(ch)
+          nextChar()
+          getOperatorRest()
+        case '/' =>
+          if (skipComment()) {
+            fetchToken()
+          } else {
+            putChar('/')
+            getOperatorRest()
+          }
+        case '0' =>
+          def fetchZero() = {
+            putChar(ch)
+            nextChar()
+            if (ch == 'x' || ch == 'X') {
+              nextChar()
+              base = 16
+            } else {
+              /**
+               * What should leading 0 be in the future? It is potentially dangerous
+               *  to let it be base-10 because of history.  Should it be an error? Is
+               *  there a realistic situation where one would need it?
+               */
+              if (isDigit(ch))
+                error("Non-zero numbers may not have a leading zero.")
+            }
+            getNumber()
+          }
+          fetchZero
+        case '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
+          base = 10
+          getNumber()
+        case '`' =>
+          getBackquotedIdent()
+        case '\"' =>
+          def fetchDoubleQuote() = {
+            if (token == INTERPOLATIONID) {
+              nextRawChar()
+              if (ch == '\"') {
+                nextRawChar()
+                if (ch == '\"') {
+                  nextRawChar()
+                  getStringPart(multiLine = true)
+                  sepRegions = STRINGPART :: sepRegions // indicate string part
+                  sepRegions = STRINGLIT :: sepRegions // once more to indicate multi line string part
+                } else {
+                  token = STRINGLIT
+                  strVal = ""
+                }
+              } else {
+                getStringPart(multiLine = false)
+                sepRegions = STRINGLIT :: sepRegions // indicate single line string part
+              }
+            } else {
+              nextChar()
+              if (ch == '\"') {
+                nextChar()
+                if (ch == '\"') {
+                  nextRawChar()
+                  getRawStringLit()
+                } else {
+                  token = STRINGLIT
+                  strVal = ""
+                }
+              } else {
+                getStringLit()
+              }
+            }
+          }
+          fetchDoubleQuote
+        case '\'' =>
+          def fetchSingleQuote() = {
+            nextChar()
+            if (isIdentifierStart(ch))
+              charLitOr(getIdentRest)
+            else if (isOperatorPart(ch) && (ch != '\\'))
+              charLitOr(getOperatorRest)
+            else {
+              getLitChar()
+              if (ch == '\'') {
+                nextChar()
+                token = CHARLIT
+                setStrVal()
+              } else {
+                error("unclosed character literal")
+              }
+            }
+          }
+          fetchSingleQuote
+        case '.' =>
+          nextChar()
+          if ('0' <= ch && ch <= '9') {
+            putChar('.'); getFraction(); setStrVal()
+          } else {
+            token = DOT
+          }
+        case ';' =>
+          nextChar(); token = SEMI
+        case ',' =>
+          nextChar(); token = COMMA
+        case '(' =>
+          nextChar(); token = LPAREN
+        case '{' =>
+          nextChar(); token = LBRACE
+        case ')' =>
+          nextChar(); token = RPAREN
+        case '}' =>
+          nextChar(); token = RBRACE
+        case '[' =>
+          nextChar(); token = LBRACKET
+        case ']' =>
+          nextChar(); token = RBRACKET
+        case SU =>
+          if (isAtEnd) token = EOF
+          else {
+            error("illegal character")
+            nextChar()
+          }
+        case _ =>
+          def fetchOther() = {
+            if (ch == '\u21D2') {
+              nextChar(); token = ARROW
+            } else if (ch == '\u2190') {
+              nextChar(); token = LARROW
+            } else if (Character.isUnicodeIdentifierStart(ch)) {
+              putChar(ch)
+              nextChar()
+              getIdentRest()
+            } else if (isSpecial(ch)) {
+              putChar(ch)
+              nextChar()
+              getOperatorRest()
+            } else {
+              error(f"illegal character '\\u${ch: Int}%04x'")
+              nextChar()
+            }
+          }
+          fetchOther
+      }
+    }
+
+    private def skipComment(): Boolean = {
+      def appendToComment(ch: Char) =
+        if (keepComments) commentBuf.append(ch)
+      def nextChar() = {
+        appendToComment(ch)
+        Scanner.this.nextChar()
+      }
+      def skipLine(): Unit = {
+        nextChar()
+        if ((ch != CR) && (ch != LF) && (ch != SU)) skipLine()
+      }
+      @tailrec
+      def skipBlock(openComments: Int): Unit = {
+        val last = ch
+        nextChar()
+        if (ch == '/')
+          if (last == '*') {
+            if (openComments > 0) skipBlock(openComments - 1)
+          } else {
+            nextChar()
+            if (ch == '*') { nextChar(); skipBlock(openComments + 1) }
+            else skipBlock(openComments)
+          }
+        else if (ch == SU) incompleteInputError("unclosed comment")
+        else skipBlock(openComments)
+      }
+      val start = lastCharOffset
+      def finishComment(): Boolean = {
+        if (keepComments) {
+          val pos = Position(start, charOffset)
+          nextChar()
+          revComments = Comment(pos, flushBuf(commentBuf)) :: revComments
+        }
+        true
+      }
+      nextChar()
+      if (ch == '/') { skipLine(); finishComment() }
+      else if (ch == '*') { nextChar(); skipBlock(0); finishComment() }
+      else false
+    }
+
+// Identifiers ---------------------------------------------------------------
+
+    private def getBackquotedIdent() {
+      nextChar()
+      getLitChars('`')
+      if (ch == '`') {
+        nextChar()
+        finishNamed(BACKQUOTED_IDENT)
+        if (name.length == 0)
+          error("empty quoted identifier")
+        else if (name == nme.WILDCARD)
+          error("wildcard invalid as backquoted identifier")
+      }
+      else error("unclosed quoted identifier")
+    }
+
+    private def getIdentRest(): Unit = (ch: @switch) match {
+      case 'A' | 'B' | 'C' | 'D' | 'E' |
+           'F' | 'G' | 'H' | 'I' | 'J' |
+           'K' | 'L' | 'M' | 'N' | 'O' |
+           'P' | 'Q' | 'R' | 'S' | 'T' |
+           'U' | 'V' | 'W' | 'X' | 'Y' |
+           'Z' | '$' |
+           'a' | 'b' | 'c' | 'd' | 'e' |
+           'f' | 'g' | 'h' | 'i' | 'j' |
+           'k' | 'l' | 'm' | 'n' | 'o' |
+           'p' | 'q' | 'r' | 's' | 't' |
+           'u' | 'v' | 'w' | 'x' | 'y' |
+           'z' |
+           '0' | '1' | '2' | '3' | '4' |
+           '5' | '6' | '7' | '8' | '9' =>
+        putChar(ch)
+        nextChar()
+        getIdentRest()
+      case '_' =>
+        putChar(ch)
+        nextChar()
+        getIdentOrOperatorRest()
+      case SU => // strangely enough, Character.isUnicodeIdentifierPart(SU) returns true!
+        finishNamed()
+      case _ =>
+        if (Character.isUnicodeIdentifierPart(ch)) {
+          putChar(ch)
+          nextChar()
+          getIdentRest()
+        } else {
+          finishNamed()
+        }
+    }
+
+    private def getOperatorRest(): Unit = (ch: @switch) match {
+      case '~' | '!' | '@' | '#' | '%' |
+           '^' | '*' | '+' | '-' | '<' |
+           '>' | '?' | ':' | '=' | '&' |
+           '|' | '\\' =>
+        putChar(ch); nextChar(); getOperatorRest()
+      case '/' =>
+        if (skipComment()) finishNamed()
+        else { putChar('/'); getOperatorRest() }
+      case _ =>
+        if (isSpecial(ch)) { putChar(ch); nextChar(); getOperatorRest() }
+        else finishNamed()
+    }
+
+    private def getIdentOrOperatorRest() {
+      if (isIdentifierPart(ch))
+        getIdentRest()
+      else ch match {
+        case '~' | '!' | '@' | '#' | '%' |
+             '^' | '*' | '+' | '-' | '<' |
+             '>' | '?' | ':' | '=' | '&' |
+             '|' | '\\' | '/' =>
+          getOperatorRest()
+        case _ =>
+          if (isSpecial(ch)) getOperatorRest()
+          else finishNamed()
+      }
+    }
+
+
+// Literals -----------------------------------------------------------------
+
+    private def getStringLit() = {
+      getLitChars('"')
+      if (ch == '"') {
+        setStrVal()
+        nextChar()
+        token = STRINGLIT
+      } else error("unclosed string literal")
+    }
+
+    private def getRawStringLit(): Unit = {
+      if (ch == '\"') {
+        nextRawChar()
+        if (isTripleQuote()) {
+          setStrVal()
+          token = STRINGLIT
+        } else
+          getRawStringLit()
+      } else if (ch == SU) {
+        incompleteInputError("unclosed multi-line string literal")
+      } else {
+        putChar(ch)
+        nextRawChar()
+        getRawStringLit()
+      }
+    }
+
+    @annotation.tailrec private def getStringPart(multiLine: Boolean): Unit = {
+      def finishStringPart() = {
+        setStrVal()
+        token = STRINGPART
+        next.lastOffset = charOffset - 1
+        next.offset = charOffset - 1
+      }
+      if (ch == '"') {
+        if (multiLine) {
+          nextRawChar()
+          if (isTripleQuote()) {
+            setStrVal()
+            token = STRINGLIT
+          } else
+            getStringPart(multiLine)
+        } else {
+          nextChar()
+          setStrVal()
+          token = STRINGLIT
+        }
+      } else if (ch == '$') {
+        nextRawChar()
+        if (ch == '$') {
+          putChar(ch)
+          nextRawChar()
+          getStringPart(multiLine)
+        } else if (ch == '{') {
+          finishStringPart()
+          nextRawChar()
+          next.token = LBRACE
+        } else if (Character.isUnicodeIdentifierStart(ch)) {
+          finishStringPart()
+          do {
+            putChar(ch)
+            nextRawChar()
+          } while (ch != SU && Character.isUnicodeIdentifierPart(ch))
+          finishNamed(target = next)
+        } else {
+          error("invalid string interpolation: `$$', `$'ident or `$'BlockExpr expected")
+        }
+      } else {
+        val isUnclosedLiteral = !isUnicodeEscape && (ch == SU || (!multiLine && (ch == CR || ch == LF)))
+        if (isUnclosedLiteral) {
+          if (multiLine)
+            incompleteInputError("unclosed multi-line string literal")
+          else
+            error("unclosed string literal")
+        }
+        else {
+          putChar(ch)
+          nextRawChar()
+          getStringPart(multiLine)
+        }
+      }
+    }
+
+    private def fetchStringPart() = {
+      offset = charOffset - 1
+      getStringPart(multiLine = inMultiLineInterpolation)
+    }
+
+    private def isTripleQuote(): Boolean =
+      if (ch == '"') {
+        nextRawChar()
+        if (ch == '"') {
+          nextChar()
+          while (ch == '"') {
+            putChar('"')
+            nextChar()
+          }
+          true
+        } else {
+          putChar('"')
+          putChar('"')
+          false
+        }
+      } else {
+        putChar('"')
+        false
+      }
+
+    /** copy current character into litBuf, interpreting any escape sequences,
+     *  and advance to next character.
+     */
+    protected def getLitChar(): Unit =
+      if (ch == '\\') {
+        nextChar()
+        if ('0' <= ch && ch <= '7') {
+          val leadch: Char = ch
+          var oct: Int = digit2int(ch, 8)
+          nextChar()
+          if ('0' <= ch && ch <= '7') {
+            oct = oct * 8 + digit2int(ch, 8)
+            nextChar()
+            if (leadch <= '3' && '0' <= ch && ch <= '7') {
+              oct = oct * 8 + digit2int(ch, 8)
+              nextChar()
+            }
+          }
+          putChar(oct.toChar)
+        } else {
+          ch match {
+            case 'b'  => putChar('\b')
+            case 't'  => putChar('\t')
+            case 'n'  => putChar('\n')
+            case 'f'  => putChar('\f')
+            case 'r'  => putChar('\r')
+            case '\"' => putChar('\"')
+            case '\'' => putChar('\'')
+            case '\\' => putChar('\\')
+            case _    => invalidEscape()
+          }
+          nextChar()
+        }
+      } else  {
+        putChar(ch)
+        nextChar()
+      }
+
+    protected def invalidEscape(): Unit = {
+      error("invalid escape character", charOffset - 1)
+      putChar(ch)
+    }
+
+    private def getLitChars(delimiter: Char) = {
+      while (ch != delimiter && !isAtEnd && (ch != SU && ch != CR && ch != LF || isUnicodeEscape))
+        getLitChar()
+    }
+
+    /** read fractional part and exponent of floating point number
+     *  if one is present.
+     */
+    protected def getFraction() {
+      token = DOUBLELIT
+      while ('0' <= ch && ch <= '9') {
+        putChar(ch)
+        nextChar()
+      }
+      if (ch == 'e' || ch == 'E') {
+        val lookahead = lookaheadReader
+        lookahead.nextChar()
+        if (lookahead.ch == '+' || lookahead.ch == '-') {
+          lookahead.nextChar()
+        }
+        if ('0' <= lookahead.ch && lookahead.ch <= '9') {
+          putChar(ch)
+          nextChar()
+          if (ch == '+' || ch == '-') {
+            putChar(ch)
+            nextChar()
+          }
+          while ('0' <= ch && ch <= '9') {
+            putChar(ch)
+            nextChar()
+          }
+        }
+        token = DOUBLELIT
+      }
+      if (ch == 'd' || ch == 'D') {
+        putChar(ch)
+        nextChar()
+        token = DOUBLELIT
+      } else if (ch == 'f' || ch == 'F') {
+        putChar(ch)
+        nextChar()
+        token = FLOATLIT
+      }
+      checkNoLetter()
+    }
+    def checkNoLetter() {
+      if (isIdentifierPart(ch) && ch >= ' ')
+        error("Invalid literal number")
+    }
+
+    /** Read a number into strVal and set base
+    */
+    protected def getNumber() {
+      while (digit2int(ch, base) >= 0) {
+        putChar(ch)
+        nextChar()
+      }
+      token = INTLIT
+      if (base == 10 && ch == '.') {
+        val isDefinitelyNumber = {
+          val lookahead = lookaheadReader
+          val c = lookahead.getc()
+          (c: @switch) match {
+            /** Another digit is a giveaway. */
+            case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
+              true
+
+            /** Backquoted idents like 22.`foo`. */
+            case '`' =>
+              false
+
+            /** These letters may be part of a literal, or a method invocation on an Int.
+             */
+            case 'd' | 'D' | 'f' | 'F' =>
+              !isIdentifierPart(lookahead.getc())
+
+            /** A little more special handling for e.g. 5e7 */
+            case 'e' | 'E' =>
+              val ch = lookahead.getc()
+              !isIdentifierPart(ch) || (isDigit(ch) || ch == '+' || ch == '-')
+
+            case x =>
+              !isIdentifierStart(x)
+          }
+        }
+        if (isDefinitelyNumber) {
+          putChar(ch)
+          nextChar()
+          getFraction()
+        }
+      } else (ch: @switch) match {
+        case 'e' | 'E' | 'f' | 'F' | 'd' | 'D' if base == 10 =>
+          getFraction()
+        case 'l' | 'L' =>
+          nextChar()
+          token = LONGLIT
+        case _ =>
+      }
+      setStrVal()
+    }
+
+    /** Parse character literal if current character is followed by \',
+     *  or follow with given op and return a symbol literal token
+     */
+    def charLitOr(op: () => Unit) {
+      putChar(ch)
+      nextChar()
+      if (ch == '\'') {
+        nextChar()
+        token = CHARLIT
+        setStrVal()
+      } else {
+        op()
+        token = SYMBOLLIT
+        strVal = name.toString
+      }
+    }
+
+// Setting token data ----------------------------------------------------
+
+        /** Clear buffer and set name and token */
+    def finishNamed(idtoken: Token = IDENTIFIER, target: TokenData = this): Unit = {
+      target.name = flushBuf(litBuf).toTermName
+      target.token = idtoken
+      if (idtoken == IDENTIFIER) {
+        val idx = target.name.start
+        if (idx >= 0 && idx < kwArray.length) target.token = kwArray(idx)
+      }
+    }
+
+    /** Return buffer contents and clear */
+    def flushBuf(buf: StringBuilder): String = {
+      val str = buf.toString
+      buf.clear()
+      str
+    }
+
+    /** Convert current strVal to char value
+     */
+    def charVal: Char = if (strVal.length > 0) strVal.charAt(0) else 0
+
+    /** Convert current strVal, base to long value
+     *  This is tricky because of max negative value.
+     */
+    def intVal(negated: Boolean): Long = {
+      if (token == CHARLIT && !negated) {
+        charVal
+      } else {
+        var value: Long = 0
+        val divider = if (base == 10) 1 else 2
+        val limit: Long =
+          if (token == LONGLIT) Long.MaxValue else Int.MaxValue
+        var i = 0
+        val len = strVal.length
+        while (i < len) {
+          val d = digit2int(strVal charAt i, base)
+          if (d < 0) {
+            error("malformed integer number")
+            return 0
+          }
+          if (value < 0 ||
+              limit / (base / divider) < value ||
+              limit - (d / divider) < value * (base / divider) &&
+              !(negated && limit == value * base - 1 + d)) {
+                error("integer number too large")
+                return 0
+              }
+          value = value * base + d
+          i += 1
+        }
+        if (negated) -value else value
+      }
+    }
+
+    def intVal: Long = intVal(false)
+
+    /** Convert current strVal, base to double value
+    */
+    def floatVal(negated: Boolean): Double = {
+      val limit: Double =
+        if (token == DOUBLELIT) Double.MaxValue else Float.MaxValue
+      try {
+        val value: Double = java.lang.Double.valueOf(strVal).doubleValue()
+        if (value > limit)
+          error("floating point number too large")
+        if (negated) -value else value
+      } catch {
+        case _: NumberFormatException =>
+          error("malformed floating point number")
+          0.0
+      }
+    }
+
+    def floatVal: Double = floatVal(false)
+
+    override def toString = showTokenDetailed(token)
+
+    def show: String = token match {
+      case IDENTIFIER | BACKQUOTED_IDENT => s"id($name)"
+      case CHARLIT => s"char($intVal)"
+      case INTLIT => s"int($intVal)"
+      case LONGLIT => s"long($intVal)"
+      case FLOATLIT => s"float($floatVal)"
+      case DOUBLELIT => s"double($floatVal)"
+      case STRINGLIT => s"string($strVal)"
+      case STRINGPART => s"stringpart($strVal)"
+      case INTERPOLATIONID => s"interpolationid($name)"
+      case SEMI => ";"
+      case NEWLINE => ";"
+      case NEWLINES => ";;"
+      case COMMA => ","
+      case _ => showToken(token)
+    }
+
+// (does not seem to be needed) def flush = { charOffset = offset; nextChar(); this }
+
+    /* Resume normal scanning after XML */
+    def resume(lastToken: Token) = {
+      token = lastToken
+      if (next.token != EMPTY && !ctx.reporter.hasErrors)
+        error("unexpected end of input: possible missing '}' in XML block")
+
+      nextToken()
+    }
+
+// Errors -----------------------------------------------------------------
+
+    /** Generate an error at the given offset */
+    def error(msg: String, off: Offset = offset) = {
+      ctx.error(msg, source atPos Position(off))
+      token = ERROR
+      errOffset = off
+    }
+
+    /** signal an error where the input ended in the middle of a token */
+    def incompleteInputError(msg: String) {
+      ctx.reporter.incompleteInputError(msg, source atPos Position(offset))
+      token = EOF
+      errOffset = offset
+    }
+
+   /* Initialization: read first char, then first token */
+    nextChar()
+    nextToken()
+  } // end Scanner
+
+  // ------------- keyword configuration -----------------------------------
+
+  private val kwArray: Array[Token] = {
+    def start(tok: Token) = tok.toString.toTermName.start
+    val sourceKeywords = keywords.filterNot(_.toString contains " ")
+    val lastIdx = sourceKeywords.map(start).max
+    val arr = Array.fill(lastIdx + 1)(IDENTIFIER)
+    for (kw <- sourceKeywords) arr(start(kw)) = kw
+    arr
+  }
+}
diff --git a/src/dotty/tools/dotc/parsing/Tokens.scala b/src/dotty/tools/dotc/parsing/Tokens.scala
new file mode 100644
index 000000000..f573df49d
--- /dev/null
+++ b/src/dotty/tools/dotc/parsing/Tokens.scala
@@ -0,0 +1,171 @@
+package dotty.tools
+package dotc
+package parsing
+
+import collection.mutable
+import collection.immutable.BitSet
+import scala.annotation.switch
+
+object Tokens {
+
+  final val minToken = EMPTY
+  final val maxToken = XMLSTART
+
+  type TokenSet = BitSet
+
+  def tokenRange(lo: Int, hi: Int): TokenSet = BitSet(lo to hi: _*)
+
+  def showTokenDetailed(token: Int) = debugString(token)
+
+  def showToken(token: Int) = {
+    val str = tokenString(token)
+    if (keywords contains token) s"'$str'" else str
+  }
+
+  val tokenString, debugString = new Array[String](maxToken + 1)
+
+  def enter(token: Int, str: String, debugStr: String = ""): Unit = {
+    tokenString(token) = str
+    debugString(token) = if (debugStr.isEmpty) str else debugStr
+  }
+
+  /** special tokens */
+  final val EMPTY = 0;             enter(EMPTY, "<empty>") // a missing token, used in lookahead
+  final val ERROR = 1;             enter(ERROR, "erroneous token") // an erroneous token
+  final val EOF = 2;               enter(EOF, "eof")
+
+  /** literals */
+  final val CHARLIT = 3;           enter(CHARLIT, "character literal")
+  final val INTLIT = 4;            enter(INTLIT, "integer literal")
+  final val LONGLIT = 5;           enter(LONGLIT, "long literal")
+  final val FLOATLIT = 6;          enter(FLOATLIT, "float literal")
+  final val DOUBLELIT = 7;         enter(DOUBLELIT, "double literal")
+  final val STRINGLIT = 8;         enter(STRINGLIT, "string literal")
+  final val STRINGPART = 9;        enter(STRINGPART, "string literal", "string literal part")
+  final val INTERPOLATIONID = 10;  enter(INTERPOLATIONID, "string interpolator")
+  final val SYMBOLLIT = 11;        enter(SYMBOLLIT, "symbol literal") // TODO: deprecate
+
+  final val literalTokens = tokenRange(CHARLIT, SYMBOLLIT)
+
+  /** identifiers */
+  final val IDENTIFIER = 12;       enter(IDENTIFIER, "identifier")
+  final val BACKQUOTED_IDENT = 13; enter(BACKQUOTED_IDENT, "identifier", "backquoted ident")
+
+  final val identifierTokens = BitSet(IDENTIFIER, BACKQUOTED_IDENT)
+
+  def isIdentifier(token : Int) =
+    token >= IDENTIFIER && token <= BACKQUOTED_IDENT
+
+  /** alphabetic keywords */
+  final val IF = 20;               enter(IF, "if")
+  final val FOR = 21;              enter(FOR, "for")
+  final val ELSE = 22;             enter(ELSE, "else")
+  final val THIS = 23;             enter(THIS, "this")
+  final val NULL = 24;             enter(NULL, "null")
+  final val NEW = 25;              enter(NEW, "new")
+  final val WITH = 26;             enter(WITH, "with")
+  final val SUPER = 27;            enter(SUPER, "super")
+  final val CASE = 28;             enter(CASE, "case")
+  final val CASECLASS = 29;        enter(CASECLASS, "case class")
+  final val CASEOBJECT = 30;       enter(CASEOBJECT, "case object")
+  final val VAL = 31;              enter(VAL, "val")
+  final val ABSTRACT = 32;         enter(ABSTRACT, "abstract")
+  final val FINAL = 33;            enter(FINAL, "final")
+  final val PRIVATE = 34;          enter(PRIVATE, "private")
+  final val PROTECTED = 35;        enter(PROTECTED, "protected")
+  final val OVERRIDE = 36;         enter(OVERRIDE, "override")
+  final val IMPLICIT = 37;         enter(IMPLICIT, "implicit")
+  final val VAR = 38;              enter(VAR, "var")
+  final val DEF = 39;              enter(DEF, "def")
+  final val TYPE = 40;             enter(TYPE, "type")
+  final val EXTENDS = 41;          enter(EXTENDS, "extends")
+  final val TRUE = 42;             enter(TRUE, "true")
+  final val FALSE = 43;            enter(FALSE, "false")
+  final val OBJECT = 44;           enter(OBJECT, "object")
+  final val CLASS = 45;            enter(CLASS, "class")
+  final val IMPORT = 46;           enter(IMPORT, "import")
+  final val PACKAGE = 47;          enter(PACKAGE, "package")
+  final val YIELD = 48;            enter(YIELD, "yield")
+  final val DO = 49;               enter(DO, "do")
+  final val TRAIT = 50;            enter(TRAIT, "trait")
+  final val SEALED = 51;           enter(SEALED, "sealed")
+  final val THROW = 52;            enter(THROW, "throw")
+  final val TRY = 53;              enter(TRY, "try")
+  final val CATCH = 54;            enter(CATCH, "catch")
+  final val FINALLY = 55;          enter(FINALLY, "finally")
+  final val WHILE = 56;            enter(WHILE, "while")
+  final val RETURN = 57;           enter(RETURN, "return")
+  final val MATCH = 58;            enter(MATCH, "match")
+  final val FORSOME = 59;          enter(FORSOME, "forSome") // TODO: deprecate
+  final val LAZY = 61;             enter(LAZY, "lazy")
+  final val THEN = 62;             enter(THEN, "then")
+
+  final val alphaKeywords = tokenRange(IF, LAZY)
+
+  /** special symbols */
+  final val COMMA = 70;            enter(COMMA, "','")
+  final val SEMI = 71;             enter(DOT, "'.'")
+  final val DOT = 72;              enter(SEMI, "';'")
+  final val NEWLINE = 78;          enter(NEWLINE, "';'", "new line")
+  final val NEWLINES = 79;         enter(NEWLINES, "';'", "new lines")
+
+  /** special keywords */
+  final val USCORE = 73;           enter(USCORE, "_")
+  final val COLON = 74;            enter(COLON, ":")
+  final val EQUALS = 75;           enter(EQUALS, "==")
+  final val LARROW = 76;           enter(LARROW, "<-")
+  final val ARROW = 77;            enter(ARROW, "=>")
+  final val SUBTYPE = 80;          enter(SUBTYPE, "<:")
+  final val SUPERTYPE = 81;        enter(SUPERTYPE, ">:")
+  final val HASH = 82;             enter(HASH, "#")
+  final val AT = 83;               enter(AT, "@")
+  final val VIEWBOUND = 84;        enter(VIEWBOUND, "<%") // TODO: deprecate
+
+  final val symbolicKeywords = tokenRange(USCORE, VIEWBOUND)
+  final val symbolicTokens = tokenRange(COMMA, VIEWBOUND)
+  final val keywords = alphaKeywords | symbolicKeywords
+
+  /** parentheses */
+  final val LPAREN = 90;           enter(LPAREN, "'('")
+  final val RPAREN = 91;           enter(RPAREN, "')'")
+  final val LBRACKET = 92;         enter(LBRACKET, "'['")
+  final val RBRACKET = 93;         enter(RBRACKET, "']'")
+  final val LBRACE = 94;           enter(LBRACE, "'{'")
+  final val RBRACE = 95;           enter(RBRACE, "'}'")
+
+  /** XML mode */
+  final val XMLSTART = 96;         enter(XMLSTART, "$XMLSTART$<") // TODO: deprecate
+
+  final val allTokens = tokenRange(minToken, maxToken)
+
+  final val atomicExprTokens = literalTokens | identifierTokens | BitSet(
+    USCORE, NULL, THIS, SUPER, TRUE, FALSE, RETURN, XMLSTART)
+
+  final val canStartExpressionTokens = atomicExprTokens | BitSet(
+    LBRACE, LPAREN, IF, DO, WHILE, FOR, NEW, TRY, THROW)
+
+  final val canStartTypeTokens = literalTokens | identifierTokens | BitSet(
+    THIS, SUPER, USCORE, LPAREN, AT)
+
+  final val templateIntroTokens = BitSet(CLASS, TRAIT, OBJECT, CASECLASS, CASEOBJECT)
+
+  final val dclIntroTokens = BitSet(DEF, VAL, VAR, TYPE)
+
+  final val defIntroTokens = templateIntroTokens | dclIntroTokens
+
+  final val localModifierTokens = BitSet(
+    ABSTRACT, FINAL, SEALED, IMPLICIT, LAZY)
+
+  final val modifierTokens = localModifierTokens | BitSet(
+    PRIVATE, PROTECTED, OVERRIDE)
+
+  /** Is token only legal as start of statement (eof also included)? */
+  final val mustStartStatTokens = defIntroTokens | modifierTokens | BitSet(
+    CASE, IMPORT, PACKAGE)
+
+  final val canStartStatTokens = canStartExpressionTokens | mustStartStatTokens | BitSet(
+    AT)
+
+  final val canEndStatTokens = atomicExprTokens | BitSet(
+    TYPE, RPAREN, RBRACE, RBRACKET)
+}
author	Martin Odersky <odersky@gmail.com>	2013-04-17 09:48:22 +0200
committer	Martin Odersky <odersky@gmail.com>	2013-04-17 10:16:22 +0200
commit	ca8dc7ada663e44aafe470944dd17256dbde151c (patch)
tree	d15939e204042e358e0c83064250f1f18c1c4f25 /src/dotty/tools/dotc/parsing
parent	e32fedb6844eab11a27e365a570b2033a0f6f78d (diff)
download	dotty-ca8dc7ada663e44aafe470944dd17256dbde151c.tar.gz dotty-ca8dc7ada663e44aafe470944dd17256dbde151c.tar.bz2 dotty-ca8dc7ada663e44aafe470944dd17256dbde151c.zip