aboutsummaryrefslogtreecommitdiff
path: root/src/dotty/tools/dotc/parsing
diff options
context:
space:
mode:
authorMartin Odersky <odersky@gmail.com>2013-04-17 09:48:22 +0200
committerMartin Odersky <odersky@gmail.com>2013-04-17 10:16:22 +0200
commitca8dc7ada663e44aafe470944dd17256dbde151c (patch)
treed15939e204042e358e0c83064250f1f18c1c4f25 /src/dotty/tools/dotc/parsing
parente32fedb6844eab11a27e365a570b2033a0f6f78d (diff)
downloaddotty-ca8dc7ada663e44aafe470944dd17256dbde151c.tar.gz
dotty-ca8dc7ada663e44aafe470944dd17256dbde151c.tar.bz2
dotty-ca8dc7ada663e44aafe470944dd17256dbde151c.zip
Scanners added.
Moving Positions, Chars to new packages. Added Source positions. Added untyped trees module. Factored out behavior between typed and untyped trees.
Diffstat (limited to 'src/dotty/tools/dotc/parsing')
-rw-r--r--src/dotty/tools/dotc/parsing/CharArrayReader.scala131
-rw-r--r--src/dotty/tools/dotc/parsing/Scanners.scala958
-rw-r--r--src/dotty/tools/dotc/parsing/Tokens.scala171
3 files changed, 1260 insertions, 0 deletions
diff --git a/src/dotty/tools/dotc/parsing/CharArrayReader.scala b/src/dotty/tools/dotc/parsing/CharArrayReader.scala
new file mode 100644
index 000000000..29346b78a
--- /dev/null
+++ b/src/dotty/tools/dotc/parsing/CharArrayReader.scala
@@ -0,0 +1,131 @@
+package dotty.tools
+package dotc
+package parsing
+
+import scala.reflect.internal.Chars._
+
+abstract class CharArrayReader { self =>
+
+ val buf: Array[Char]
+
+ /** Switch whether unicode should be decoded */
+ protected def decodeUni: Boolean = true
+
+ /** An error routine to call on bad unicode escapes \\uxxxx. */
+ protected def error(msg: String, offset: Int): Unit
+
+ /** the last read character */
+ var ch: Char = _
+
+ /** The offset one past the last read character */
+ var charOffset: Int = 0
+
+ /** The offset before the last read character */
+ var lastCharOffset: Int = 0
+
+ /** The start offset of the current line */
+ var lineStartOffset: Int = 0
+
+ /** The start offset of the line before the current one */
+ var lastLineStartOffset: Int = 0
+
+ private var lastUnicodeOffset = -1
+
+ /** Is last character a unicode escape \\uxxxx? */
+ def isUnicodeEscape = charOffset == lastUnicodeOffset
+
+ /** Advance one character; reducing CR;LF pairs to just LF */
+ final def nextChar(): Unit = {
+ val idx = charOffset
+ lastCharOffset = idx
+ if (idx >= buf.length) {
+ ch = SU
+ } else {
+ val c = buf(idx)
+ ch = c
+ charOffset = idx + 1
+ if (c == '\\') potentialUnicode()
+ else if (c < ' ') { skipCR(); potentialLineEnd() }
+ }
+ }
+
+ def getc() = { nextChar() ; ch }
+
+ /** Advance one character, leaving CR;LF pairs intact.
+ * This is for use in multi-line strings, so there are no
+ * "potential line ends" here.
+ */
+ final def nextRawChar(): Unit = {
+ val idx = charOffset
+ lastCharOffset = idx
+ if (idx >= buf.length) {
+ ch = SU
+ } else {
+ val c = buf(charOffset)
+ ch = c
+ charOffset = idx + 1
+ if (c == '\\') potentialUnicode()
+ }
+ }
+
+ /** Interpret \\uxxxx escapes */
+ private def potentialUnicode() {
+ def evenSlashPrefix: Boolean = {
+ var p = charOffset - 2
+ while (p >= 0 && buf(p) == '\\') p -= 1
+ (charOffset - p) % 2 == 0
+ }
+ def udigit: Int = {
+ if (charOffset >= buf.length) {
+ // Since the positioning code is very insistent about throwing exceptions,
+ // we have to decrement the position so our error message can be seen, since
+ // we are one past EOF. This happens with e.g. val x = \ u 1 <EOF>
+ error("incomplete unicode escape", charOffset - 1)
+ SU
+ }
+ else {
+ val d = digit2int(buf(charOffset), 16)
+ if (d >= 0) charOffset += 1
+ else error("error in unicode escape", charOffset)
+ d
+ }
+ }
+ if (charOffset < buf.length && buf(charOffset) == 'u' && decodeUni && evenSlashPrefix) {
+ do charOffset += 1
+ while (charOffset < buf.length && buf(charOffset) == 'u')
+ val code = udigit << 12 | udigit << 8 | udigit << 4 | udigit
+ lastUnicodeOffset = charOffset
+ ch = code.toChar
+ }
+ }
+
+ /** replace CR;LF by LF */
+ private def skipCR() {
+ if (ch == CR)
+ if (charOffset < buf.length && buf(charOffset) == LF) {
+ charOffset += 1
+ ch = LF
+ }
+ }
+
+ /** Handle line ends */
+ private def potentialLineEnd() {
+ if (ch == LF || ch == FF) {
+ lastLineStartOffset = lineStartOffset
+ lineStartOffset = charOffset
+ }
+ }
+
+ def isAtEnd = charOffset >= buf.length
+
+ /** A new reader that takes off at the current character position */
+ def lookaheadReader = new CharArrayLookaheadReader
+
+ class CharArrayLookaheadReader extends CharArrayReader {
+ val buf = self.buf
+ charOffset = self.charOffset
+ ch = self.ch
+ override def decodeUni = self.decodeUni
+ def error(msg: String, offset: Int) = self.error(msg, offset)
+ }
+}
diff --git a/src/dotty/tools/dotc/parsing/Scanners.scala b/src/dotty/tools/dotc/parsing/Scanners.scala
new file mode 100644
index 000000000..2b3ec9bc2
--- /dev/null
+++ b/src/dotty/tools/dotc/parsing/Scanners.scala
@@ -0,0 +1,958 @@
+package dotty.tools
+package dotc
+package parsing
+
+import Tokens._
+import core.Names._, core.Contexts._, core.Decorators._, util.Positions._
+import core.StdNames._
+import util.SourceFile
+import java.lang.Character.isDigit
+import scala.reflect.internal.Chars._
+import Tokens._
+import scala.annotation.{ switch, tailrec }
+import scala.collection.{ mutable, immutable }
+import mutable.{ ListBuffer, ArrayBuffer }
+import scala.xml.Utility.isNameStart
+
+object Scanners {
+
+ /** Offset into source character array */
+ type Offset = Int
+
+ /** An undefined offset */
+ val NoOffset: Offset = -1
+
+ case class Comment(pos: Position, chrs: String) {
+ def isDocComment = chrs.startsWith("/**")
+ }
+
+ type Token = Int
+
+ trait TokenData {
+
+ /** the next token */
+ var token: Token = EMPTY
+
+ /** the offset of the first character of the current token */
+ var offset: Offset = 0
+
+ /** the offset of the character following the token preceding this one */
+ var lastOffset: Offset = 0
+
+ /** the name of an identifier */
+ var name: TermName = null
+
+ /** the string value of a literal */
+ var strVal: String = null
+
+ /** the base of a number */
+ var base: Int = 0
+
+ def copyFrom(td: TokenData) = {
+ this.token = td.token
+ this.offset = td.offset
+ this.lastOffset = td.lastOffset
+ this.name = td.name
+ this.strVal = td.strVal
+ this.base = td.base
+ }
+ }
+
+ class Scanner(source: SourceFile)(implicit ctx: Context) extends CharArrayReader with TokenData {
+
+ val buf = source.content
+
+ var keepComments = false
+
+ /** All comments in the reverse order of their position in the source.
+ * set only when `keepComments` is true.
+ */
+ var revComments: List[Comment] = Nil
+
+ /** the last error offset
+ */
+ var errOffset: Offset = NoOffset
+
+ /** A buffer for comments */
+ val commentBuf = new StringBuilder
+
+ /** A character buffer for literals
+ */
+ val litBuf = new StringBuilder
+
+ /** append Unicode character to "litBuf" buffer
+ */
+ protected def putChar(c: Char): Unit = litBuf.append(c)
+
+ /** Clear buffer and set string */
+ private def setStrVal() = flushBuf(litBuf)
+
+ private class TokenData0 extends TokenData
+
+ /** we need one token lookahead and one token history
+ */
+ private val next : TokenData = new TokenData0
+ private val prev : TokenData = new TokenData0
+
+ /** a stack of tokens which indicates whether line-ends can be statement separators
+ * also used for keeping track of nesting levels.
+ * We keep track of the closing symbol of a region. This can be
+ * RPAREN if region starts with '('
+ * RBRACKET if region starts with '['
+ * RBRACE if region starts with '{'
+ * ARROW if region starts with `case'
+ * STRINGLIT if region is a string interpolation expression starting with '${'
+ * (the STRINGLIT appears twice in succession on the stack iff the
+ * expression is a multiline string literal).
+ */
+ var sepRegions: List[Token] = List()
+
+// Get next token ------------------------------------------------------------
+
+ /** Are we directly in a string interpolation expression?
+ */
+ private def inStringInterpolation =
+ sepRegions.nonEmpty && sepRegions.head == STRINGLIT
+
+ /** Are we directly in a multiline string interpolation expression?
+ * @pre inStringInterpolation
+ */
+ private def inMultiLineInterpolation =
+ inStringInterpolation && sepRegions.tail.nonEmpty && sepRegions.tail.head == STRINGPART
+
+ /** read next token and return last offset
+ */
+ def skipToken(): Offset = {
+ val off = offset
+ nextToken()
+ off
+ }
+
+ def adjustSepRegions(lastToken: Token): Unit = (lastToken: @switch) match {
+ case LPAREN =>
+ sepRegions = RPAREN :: sepRegions
+ case LBRACKET =>
+ sepRegions = RBRACKET :: sepRegions
+ case LBRACE =>
+ sepRegions = RBRACE :: sepRegions
+ case CASE =>
+ sepRegions = ARROW :: sepRegions
+ case RBRACE =>
+ while (!sepRegions.isEmpty && sepRegions.head != RBRACE)
+ sepRegions = sepRegions.tail
+ if (!sepRegions.isEmpty) sepRegions = sepRegions.tail
+ case RBRACKET | RPAREN =>
+ if (!sepRegions.isEmpty && sepRegions.head == lastToken)
+ sepRegions = sepRegions.tail
+ case ARROW =>
+ if (!sepRegions.isEmpty && sepRegions.head == lastToken)
+ sepRegions = sepRegions.tail
+ case STRINGLIT =>
+ if (inMultiLineInterpolation)
+ sepRegions = sepRegions.tail.tail
+ else if (inStringInterpolation)
+ sepRegions = sepRegions.tail
+ case _ =>
+ }
+
+ /** Produce next token, filling TokenData fields of Scanner.
+ */
+ def nextToken() {
+ val lastToken = token
+ adjustSepRegions(lastToken)
+
+ // Read a token or copy it from `next` tokenData
+ if (next.token == EMPTY) {
+ lastOffset = lastCharOffset
+ if (inStringInterpolation) fetchStringPart()
+ else fetchToken()
+ if (token == ERROR) adjustSepRegions(STRINGLIT)
+ } else {
+ this copyFrom next
+ next.token = EMPTY
+ }
+
+ /** Insert NEWLINE or NEWLINES if
+ * - we are after a newline
+ * - we are within a { ... } or on toplevel (wrt sepRegions)
+ * - the current token can start a statement and the one before can end it
+ * insert NEWLINES if we are past a blank line, NEWLINE otherwise
+ */
+ if (isAfterLineEnd() &&
+ (canEndStatTokens contains lastToken) &&
+ (canStartStatTokens contains token) &&
+ (sepRegions.isEmpty || sepRegions.head == RBRACE)) {
+ next copyFrom this
+ offset = lineStartOffset min lastLineStartOffset
+ token = if (pastBlankLine()) NEWLINES else NEWLINE
+ }
+
+ postProcessToken()
+// print("["+this+"]")
+ }
+
+ def postProcessToken() = {
+ // Join CASE + CLASS => CASECLASS, CASE + OBJECT => CASEOBJECT, SEMI + ELSE => ELSE
+ if (token == CASE) {
+ prev copyFrom this
+ val nextLastOffset = lastCharOffset
+ fetchToken()
+ def resetOffset() {
+ offset = prev.offset
+ lastOffset = prev.lastOffset
+ }
+ if (token == CLASS) {
+ token = CASECLASS
+ resetOffset()
+ } else if (token == OBJECT) {
+ token = CASEOBJECT
+ resetOffset()
+ } else {
+ lastOffset = nextLastOffset
+ next copyFrom this
+ this copyFrom prev
+ }
+ } else if (token == SEMI) {
+ prev copyFrom this
+ fetchToken()
+ if (token != ELSE) {
+ next copyFrom this
+ this copyFrom prev
+ }
+ }
+ }
+
+ /** Is current token first one after a newline? */
+ def isAfterLineEnd(): Boolean =
+ lastOffset < lineStartOffset &&
+ (lineStartOffset <= offset ||
+ lastOffset < lastLineStartOffset && lastLineStartOffset <= offset)
+
+ /** Is there a blank line between the current token and the last one?
+ * @pre afterLineEnd().
+ */
+ private def pastBlankLine(): Boolean = {
+ val end = offset
+ def recur(idx: Offset, isBlank: Boolean): Boolean =
+ idx < end && {
+ val ch = buf(idx)
+ if (ch == LF || ch == FF) isBlank || recur(idx + 1, true)
+ else recur(idx + 1, isBlank && ch <= ' ')
+ }
+ recur(lastOffset, false)
+ }
+
+ /** read next token, filling TokenData fields of Scanner.
+ */
+ protected final def fetchToken() {
+ offset = charOffset - 1
+ (ch: @switch) match {
+ case ' ' | '\t' | CR | LF | FF =>
+ nextChar()
+ fetchToken()
+ case 'A' | 'B' | 'C' | 'D' | 'E' |
+ 'F' | 'G' | 'H' | 'I' | 'J' |
+ 'K' | 'L' | 'M' | 'N' | 'O' |
+ 'P' | 'Q' | 'R' | 'S' | 'T' |
+ 'U' | 'V' | 'W' | 'X' | 'Y' |
+ 'Z' | '$' | '_' |
+ 'a' | 'b' | 'c' | 'd' | 'e' |
+ 'f' | 'g' | 'h' | 'i' | 'j' |
+ 'k' | 'l' | 'm' | 'n' | 'o' |
+ 'p' | 'q' | 'r' | 's' | 't' |
+ 'u' | 'v' | 'w' | 'x' | 'y' |
+ 'z' =>
+ putChar(ch)
+ nextChar()
+ getIdentRest()
+ if (ch == '"' && token == IDENTIFIER)
+ token = INTERPOLATIONID
+ case '<' => // is XMLSTART?
+ def fetchLT() = {
+ val last = if (charOffset >= 2) buf(charOffset - 2) else ' '
+ nextChar()
+ last match {
+ case ' ' | '\t' | '\n' | '{' | '(' | '>' if isNameStart(ch) || ch == '!' || ch == '?' =>
+ token = XMLSTART
+ case _ =>
+ // Console.println("found '<', but last is '"+in.last+"'"); // DEBUG
+ putChar('<')
+ getOperatorRest()
+ }
+ }
+ fetchLT
+ case '~' | '!' | '@' | '#' | '%' |
+ '^' | '*' | '+' | '-' | /*'<' | */
+ '>' | '?' | ':' | '=' | '&' |
+ '|' | '\\' =>
+ putChar(ch)
+ nextChar()
+ getOperatorRest()
+ case '/' =>
+ if (skipComment()) {
+ fetchToken()
+ } else {
+ putChar('/')
+ getOperatorRest()
+ }
+ case '0' =>
+ def fetchZero() = {
+ putChar(ch)
+ nextChar()
+ if (ch == 'x' || ch == 'X') {
+ nextChar()
+ base = 16
+ } else {
+ /**
+ * What should leading 0 be in the future? It is potentially dangerous
+ * to let it be base-10 because of history. Should it be an error? Is
+ * there a realistic situation where one would need it?
+ */
+ if (isDigit(ch))
+ error("Non-zero numbers may not have a leading zero.")
+ }
+ getNumber()
+ }
+ fetchZero
+ case '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
+ base = 10
+ getNumber()
+ case '`' =>
+ getBackquotedIdent()
+ case '\"' =>
+ def fetchDoubleQuote() = {
+ if (token == INTERPOLATIONID) {
+ nextRawChar()
+ if (ch == '\"') {
+ nextRawChar()
+ if (ch == '\"') {
+ nextRawChar()
+ getStringPart(multiLine = true)
+ sepRegions = STRINGPART :: sepRegions // indicate string part
+ sepRegions = STRINGLIT :: sepRegions // once more to indicate multi line string part
+ } else {
+ token = STRINGLIT
+ strVal = ""
+ }
+ } else {
+ getStringPart(multiLine = false)
+ sepRegions = STRINGLIT :: sepRegions // indicate single line string part
+ }
+ } else {
+ nextChar()
+ if (ch == '\"') {
+ nextChar()
+ if (ch == '\"') {
+ nextRawChar()
+ getRawStringLit()
+ } else {
+ token = STRINGLIT
+ strVal = ""
+ }
+ } else {
+ getStringLit()
+ }
+ }
+ }
+ fetchDoubleQuote
+ case '\'' =>
+ def fetchSingleQuote() = {
+ nextChar()
+ if (isIdentifierStart(ch))
+ charLitOr(getIdentRest)
+ else if (isOperatorPart(ch) && (ch != '\\'))
+ charLitOr(getOperatorRest)
+ else {
+ getLitChar()
+ if (ch == '\'') {
+ nextChar()
+ token = CHARLIT
+ setStrVal()
+ } else {
+ error("unclosed character literal")
+ }
+ }
+ }
+ fetchSingleQuote
+ case '.' =>
+ nextChar()
+ if ('0' <= ch && ch <= '9') {
+ putChar('.'); getFraction(); setStrVal()
+ } else {
+ token = DOT
+ }
+ case ';' =>
+ nextChar(); token = SEMI
+ case ',' =>
+ nextChar(); token = COMMA
+ case '(' =>
+ nextChar(); token = LPAREN
+ case '{' =>
+ nextChar(); token = LBRACE
+ case ')' =>
+ nextChar(); token = RPAREN
+ case '}' =>
+ nextChar(); token = RBRACE
+ case '[' =>
+ nextChar(); token = LBRACKET
+ case ']' =>
+ nextChar(); token = RBRACKET
+ case SU =>
+ if (isAtEnd) token = EOF
+ else {
+ error("illegal character")
+ nextChar()
+ }
+ case _ =>
+ def fetchOther() = {
+ if (ch == '\u21D2') {
+ nextChar(); token = ARROW
+ } else if (ch == '\u2190') {
+ nextChar(); token = LARROW
+ } else if (Character.isUnicodeIdentifierStart(ch)) {
+ putChar(ch)
+ nextChar()
+ getIdentRest()
+ } else if (isSpecial(ch)) {
+ putChar(ch)
+ nextChar()
+ getOperatorRest()
+ } else {
+ error(f"illegal character '\\u${ch: Int}%04x'")
+ nextChar()
+ }
+ }
+ fetchOther
+ }
+ }
+
+ private def skipComment(): Boolean = {
+ def appendToComment(ch: Char) =
+ if (keepComments) commentBuf.append(ch)
+ def nextChar() = {
+ appendToComment(ch)
+ Scanner.this.nextChar()
+ }
+ def skipLine(): Unit = {
+ nextChar()
+ if ((ch != CR) && (ch != LF) && (ch != SU)) skipLine()
+ }
+ @tailrec
+ def skipBlock(openComments: Int): Unit = {
+ val last = ch
+ nextChar()
+ if (ch == '/')
+ if (last == '*') {
+ if (openComments > 0) skipBlock(openComments - 1)
+ } else {
+ nextChar()
+ if (ch == '*') { nextChar(); skipBlock(openComments + 1) }
+ else skipBlock(openComments)
+ }
+ else if (ch == SU) incompleteInputError("unclosed comment")
+ else skipBlock(openComments)
+ }
+ val start = lastCharOffset
+ def finishComment(): Boolean = {
+ if (keepComments) {
+ val pos = Position(start, charOffset)
+ nextChar()
+ revComments = Comment(pos, flushBuf(commentBuf)) :: revComments
+ }
+ true
+ }
+ nextChar()
+ if (ch == '/') { skipLine(); finishComment() }
+ else if (ch == '*') { nextChar(); skipBlock(0); finishComment() }
+ else false
+ }
+
+// Identifiers ---------------------------------------------------------------
+
+ private def getBackquotedIdent() {
+ nextChar()
+ getLitChars('`')
+ if (ch == '`') {
+ nextChar()
+ finishNamed(BACKQUOTED_IDENT)
+ if (name.length == 0)
+ error("empty quoted identifier")
+ else if (name == nme.WILDCARD)
+ error("wildcard invalid as backquoted identifier")
+ }
+ else error("unclosed quoted identifier")
+ }
+
+ private def getIdentRest(): Unit = (ch: @switch) match {
+ case 'A' | 'B' | 'C' | 'D' | 'E' |
+ 'F' | 'G' | 'H' | 'I' | 'J' |
+ 'K' | 'L' | 'M' | 'N' | 'O' |
+ 'P' | 'Q' | 'R' | 'S' | 'T' |
+ 'U' | 'V' | 'W' | 'X' | 'Y' |
+ 'Z' | '$' |
+ 'a' | 'b' | 'c' | 'd' | 'e' |
+ 'f' | 'g' | 'h' | 'i' | 'j' |
+ 'k' | 'l' | 'm' | 'n' | 'o' |
+ 'p' | 'q' | 'r' | 's' | 't' |
+ 'u' | 'v' | 'w' | 'x' | 'y' |
+ 'z' |
+ '0' | '1' | '2' | '3' | '4' |
+ '5' | '6' | '7' | '8' | '9' =>
+ putChar(ch)
+ nextChar()
+ getIdentRest()
+ case '_' =>
+ putChar(ch)
+ nextChar()
+ getIdentOrOperatorRest()
+ case SU => // strangely enough, Character.isUnicodeIdentifierPart(SU) returns true!
+ finishNamed()
+ case _ =>
+ if (Character.isUnicodeIdentifierPart(ch)) {
+ putChar(ch)
+ nextChar()
+ getIdentRest()
+ } else {
+ finishNamed()
+ }
+ }
+
+ private def getOperatorRest(): Unit = (ch: @switch) match {
+ case '~' | '!' | '@' | '#' | '%' |
+ '^' | '*' | '+' | '-' | '<' |
+ '>' | '?' | ':' | '=' | '&' |
+ '|' | '\\' =>
+ putChar(ch); nextChar(); getOperatorRest()
+ case '/' =>
+ if (skipComment()) finishNamed()
+ else { putChar('/'); getOperatorRest() }
+ case _ =>
+ if (isSpecial(ch)) { putChar(ch); nextChar(); getOperatorRest() }
+ else finishNamed()
+ }
+
+ private def getIdentOrOperatorRest() {
+ if (isIdentifierPart(ch))
+ getIdentRest()
+ else ch match {
+ case '~' | '!' | '@' | '#' | '%' |
+ '^' | '*' | '+' | '-' | '<' |
+ '>' | '?' | ':' | '=' | '&' |
+ '|' | '\\' | '/' =>
+ getOperatorRest()
+ case _ =>
+ if (isSpecial(ch)) getOperatorRest()
+ else finishNamed()
+ }
+ }
+
+
+// Literals -----------------------------------------------------------------
+
+ private def getStringLit() = {
+ getLitChars('"')
+ if (ch == '"') {
+ setStrVal()
+ nextChar()
+ token = STRINGLIT
+ } else error("unclosed string literal")
+ }
+
+ private def getRawStringLit(): Unit = {
+ if (ch == '\"') {
+ nextRawChar()
+ if (isTripleQuote()) {
+ setStrVal()
+ token = STRINGLIT
+ } else
+ getRawStringLit()
+ } else if (ch == SU) {
+ incompleteInputError("unclosed multi-line string literal")
+ } else {
+ putChar(ch)
+ nextRawChar()
+ getRawStringLit()
+ }
+ }
+
+ @annotation.tailrec private def getStringPart(multiLine: Boolean): Unit = {
+ def finishStringPart() = {
+ setStrVal()
+ token = STRINGPART
+ next.lastOffset = charOffset - 1
+ next.offset = charOffset - 1
+ }
+ if (ch == '"') {
+ if (multiLine) {
+ nextRawChar()
+ if (isTripleQuote()) {
+ setStrVal()
+ token = STRINGLIT
+ } else
+ getStringPart(multiLine)
+ } else {
+ nextChar()
+ setStrVal()
+ token = STRINGLIT
+ }
+ } else if (ch == '$') {
+ nextRawChar()
+ if (ch == '$') {
+ putChar(ch)
+ nextRawChar()
+ getStringPart(multiLine)
+ } else if (ch == '{') {
+ finishStringPart()
+ nextRawChar()
+ next.token = LBRACE
+ } else if (Character.isUnicodeIdentifierStart(ch)) {
+ finishStringPart()
+ do {
+ putChar(ch)
+ nextRawChar()
+ } while (ch != SU && Character.isUnicodeIdentifierPart(ch))
+ finishNamed(target = next)
+ } else {
+ error("invalid string interpolation: `$$', `$'ident or `$'BlockExpr expected")
+ }
+ } else {
+ val isUnclosedLiteral = !isUnicodeEscape && (ch == SU || (!multiLine && (ch == CR || ch == LF)))
+ if (isUnclosedLiteral) {
+ if (multiLine)
+ incompleteInputError("unclosed multi-line string literal")
+ else
+ error("unclosed string literal")
+ }
+ else {
+ putChar(ch)
+ nextRawChar()
+ getStringPart(multiLine)
+ }
+ }
+ }
+
+ private def fetchStringPart() = {
+ offset = charOffset - 1
+ getStringPart(multiLine = inMultiLineInterpolation)
+ }
+
+ private def isTripleQuote(): Boolean =
+ if (ch == '"') {
+ nextRawChar()
+ if (ch == '"') {
+ nextChar()
+ while (ch == '"') {
+ putChar('"')
+ nextChar()
+ }
+ true
+ } else {
+ putChar('"')
+ putChar('"')
+ false
+ }
+ } else {
+ putChar('"')
+ false
+ }
+
+ /** copy current character into litBuf, interpreting any escape sequences,
+ * and advance to next character.
+ */
+ protected def getLitChar(): Unit =
+ if (ch == '\\') {
+ nextChar()
+ if ('0' <= ch && ch <= '7') {
+ val leadch: Char = ch
+ var oct: Int = digit2int(ch, 8)
+ nextChar()
+ if ('0' <= ch && ch <= '7') {
+ oct = oct * 8 + digit2int(ch, 8)
+ nextChar()
+ if (leadch <= '3' && '0' <= ch && ch <= '7') {
+ oct = oct * 8 + digit2int(ch, 8)
+ nextChar()
+ }
+ }
+ putChar(oct.toChar)
+ } else {
+ ch match {
+ case 'b' => putChar('\b')
+ case 't' => putChar('\t')
+ case 'n' => putChar('\n')
+ case 'f' => putChar('\f')
+ case 'r' => putChar('\r')
+ case '\"' => putChar('\"')
+ case '\'' => putChar('\'')
+ case '\\' => putChar('\\')
+ case _ => invalidEscape()
+ }
+ nextChar()
+ }
+ } else {
+ putChar(ch)
+ nextChar()
+ }
+
+ protected def invalidEscape(): Unit = {
+ error("invalid escape character", charOffset - 1)
+ putChar(ch)
+ }
+
+ private def getLitChars(delimiter: Char) = {
+ while (ch != delimiter && !isAtEnd && (ch != SU && ch != CR && ch != LF || isUnicodeEscape))
+ getLitChar()
+ }
+
+ /** read fractional part and exponent of floating point number
+ * if one is present.
+ */
+ protected def getFraction() {
+ token = DOUBLELIT
+ while ('0' <= ch && ch <= '9') {
+ putChar(ch)
+ nextChar()
+ }
+ if (ch == 'e' || ch == 'E') {
+ val lookahead = lookaheadReader
+ lookahead.nextChar()
+ if (lookahead.ch == '+' || lookahead.ch == '-') {
+ lookahead.nextChar()
+ }
+ if ('0' <= lookahead.ch && lookahead.ch <= '9') {
+ putChar(ch)
+ nextChar()
+ if (ch == '+' || ch == '-') {
+ putChar(ch)
+ nextChar()
+ }
+ while ('0' <= ch && ch <= '9') {
+ putChar(ch)
+ nextChar()
+ }
+ }
+ token = DOUBLELIT
+ }
+ if (ch == 'd' || ch == 'D') {
+ putChar(ch)
+ nextChar()
+ token = DOUBLELIT
+ } else if (ch == 'f' || ch == 'F') {
+ putChar(ch)
+ nextChar()
+ token = FLOATLIT
+ }
+ checkNoLetter()
+ }
+ def checkNoLetter() {
+ if (isIdentifierPart(ch) && ch >= ' ')
+ error("Invalid literal number")
+ }
+
+ /** Read a number into strVal and set base
+ */
+ protected def getNumber() {
+ while (digit2int(ch, base) >= 0) {
+ putChar(ch)
+ nextChar()
+ }
+ token = INTLIT
+ if (base == 10 && ch == '.') {
+ val isDefinitelyNumber = {
+ val lookahead = lookaheadReader
+ val c = lookahead.getc()
+ (c: @switch) match {
+ /** Another digit is a giveaway. */
+ case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
+ true
+
+ /** Backquoted idents like 22.`foo`. */
+ case '`' =>
+ false
+
+ /** These letters may be part of a literal, or a method invocation on an Int.
+ */
+ case 'd' | 'D' | 'f' | 'F' =>
+ !isIdentifierPart(lookahead.getc())
+
+ /** A little more special handling for e.g. 5e7 */
+ case 'e' | 'E' =>
+ val ch = lookahead.getc()
+ !isIdentifierPart(ch) || (isDigit(ch) || ch == '+' || ch == '-')
+
+ case x =>
+ !isIdentifierStart(x)
+ }
+ }
+ if (isDefinitelyNumber) {
+ putChar(ch)
+ nextChar()
+ getFraction()
+ }
+ } else (ch: @switch) match {
+ case 'e' | 'E' | 'f' | 'F' | 'd' | 'D' if base == 10 =>
+ getFraction()
+ case 'l' | 'L' =>
+ nextChar()
+ token = LONGLIT
+ case _ =>
+ }
+ setStrVal()
+ }
+
+ /** Parse character literal if current character is followed by \',
+ * or follow with given op and return a symbol literal token
+ */
+ def charLitOr(op: () => Unit) {
+ putChar(ch)
+ nextChar()
+ if (ch == '\'') {
+ nextChar()
+ token = CHARLIT
+ setStrVal()
+ } else {
+ op()
+ token = SYMBOLLIT
+ strVal = name.toString
+ }
+ }
+
+// Setting token data ----------------------------------------------------
+
+ /** Clear buffer and set name and token */
+ def finishNamed(idtoken: Token = IDENTIFIER, target: TokenData = this): Unit = {
+ target.name = flushBuf(litBuf).toTermName
+ target.token = idtoken
+ if (idtoken == IDENTIFIER) {
+ val idx = target.name.start
+ if (idx >= 0 && idx < kwArray.length) target.token = kwArray(idx)
+ }
+ }
+
+ /** Return buffer contents and clear */
+ def flushBuf(buf: StringBuilder): String = {
+ val str = buf.toString
+ buf.clear()
+ str
+ }
+
+ /** Convert current strVal to char value
+ */
+ def charVal: Char = if (strVal.length > 0) strVal.charAt(0) else 0
+
+ /** Convert current strVal, base to long value
+ * This is tricky because of max negative value.
+ */
+ def intVal(negated: Boolean): Long = {
+ if (token == CHARLIT && !negated) {
+ charVal
+ } else {
+ var value: Long = 0
+ val divider = if (base == 10) 1 else 2
+ val limit: Long =
+ if (token == LONGLIT) Long.MaxValue else Int.MaxValue
+ var i = 0
+ val len = strVal.length
+ while (i < len) {
+ val d = digit2int(strVal charAt i, base)
+ if (d < 0) {
+ error("malformed integer number")
+ return 0
+ }
+ if (value < 0 ||
+ limit / (base / divider) < value ||
+ limit - (d / divider) < value * (base / divider) &&
+ !(negated && limit == value * base - 1 + d)) {
+ error("integer number too large")
+ return 0
+ }
+ value = value * base + d
+ i += 1
+ }
+ if (negated) -value else value
+ }
+ }
+
+ def intVal: Long = intVal(false)
+
+ /** Convert current strVal, base to double value
+ */
+ def floatVal(negated: Boolean): Double = {
+ val limit: Double =
+ if (token == DOUBLELIT) Double.MaxValue else Float.MaxValue
+ try {
+ val value: Double = java.lang.Double.valueOf(strVal).doubleValue()
+ if (value > limit)
+ error("floating point number too large")
+ if (negated) -value else value
+ } catch {
+ case _: NumberFormatException =>
+ error("malformed floating point number")
+ 0.0
+ }
+ }
+
+ def floatVal: Double = floatVal(false)
+
+ override def toString = showTokenDetailed(token)
+
+ def show: String = token match {
+ case IDENTIFIER | BACKQUOTED_IDENT => s"id($name)"
+ case CHARLIT => s"char($intVal)"
+ case INTLIT => s"int($intVal)"
+ case LONGLIT => s"long($intVal)"
+ case FLOATLIT => s"float($floatVal)"
+ case DOUBLELIT => s"double($floatVal)"
+ case STRINGLIT => s"string($strVal)"
+ case STRINGPART => s"stringpart($strVal)"
+ case INTERPOLATIONID => s"interpolationid($name)"
+ case SEMI => ";"
+ case NEWLINE => ";"
+ case NEWLINES => ";;"
+ case COMMA => ","
+ case _ => showToken(token)
+ }
+
+// (does not seem to be needed) def flush = { charOffset = offset; nextChar(); this }
+
+ /* Resume normal scanning after XML */
+ def resume(lastToken: Token) = {
+ token = lastToken
+ if (next.token != EMPTY && !ctx.reporter.hasErrors)
+ error("unexpected end of input: possible missing '}' in XML block")
+
+ nextToken()
+ }
+
+// Errors -----------------------------------------------------------------
+
+ /** Generate an error at the given offset */
+ def error(msg: String, off: Offset = offset) = {
+ ctx.error(msg, source atPos Position(off))
+ token = ERROR
+ errOffset = off
+ }
+
+ /** signal an error where the input ended in the middle of a token */
+ def incompleteInputError(msg: String) {
+ ctx.reporter.incompleteInputError(msg, source atPos Position(offset))
+ token = EOF
+ errOffset = offset
+ }
+
+ /* Initialization: read first char, then first token */
+ nextChar()
+ nextToken()
+ } // end Scanner
+
+ // ------------- keyword configuration -----------------------------------
+
+ private val kwArray: Array[Token] = {
+ def start(tok: Token) = tok.toString.toTermName.start
+ val sourceKeywords = keywords.filterNot(_.toString contains " ")
+ val lastIdx = sourceKeywords.map(start).max
+ val arr = Array.fill(lastIdx + 1)(IDENTIFIER)
+ for (kw <- sourceKeywords) arr(start(kw)) = kw
+ arr
+ }
+}
diff --git a/src/dotty/tools/dotc/parsing/Tokens.scala b/src/dotty/tools/dotc/parsing/Tokens.scala
new file mode 100644
index 000000000..f573df49d
--- /dev/null
+++ b/src/dotty/tools/dotc/parsing/Tokens.scala
@@ -0,0 +1,171 @@
+package dotty.tools
+package dotc
+package parsing
+
+import collection.mutable
+import collection.immutable.BitSet
+import scala.annotation.switch
+
+object Tokens {
+
+ final val minToken = EMPTY
+ final val maxToken = XMLSTART
+
+ type TokenSet = BitSet
+
+ def tokenRange(lo: Int, hi: Int): TokenSet = BitSet(lo to hi: _*)
+
+ def showTokenDetailed(token: Int) = debugString(token)
+
+ def showToken(token: Int) = {
+ val str = tokenString(token)
+ if (keywords contains token) s"'$str'" else str
+ }
+
+ val tokenString, debugString = new Array[String](maxToken + 1)
+
+ def enter(token: Int, str: String, debugStr: String = ""): Unit = {
+ tokenString(token) = str
+ debugString(token) = if (debugStr.isEmpty) str else debugStr
+ }
+
+ /** special tokens */
+ final val EMPTY = 0; enter(EMPTY, "<empty>") // a missing token, used in lookahead
+ final val ERROR = 1; enter(ERROR, "erroneous token") // an erroneous token
+ final val EOF = 2; enter(EOF, "eof")
+
+ /** literals */
+ final val CHARLIT = 3; enter(CHARLIT, "character literal")
+ final val INTLIT = 4; enter(INTLIT, "integer literal")
+ final val LONGLIT = 5; enter(LONGLIT, "long literal")
+ final val FLOATLIT = 6; enter(FLOATLIT, "float literal")
+ final val DOUBLELIT = 7; enter(DOUBLELIT, "double literal")
+ final val STRINGLIT = 8; enter(STRINGLIT, "string literal")
+ final val STRINGPART = 9; enter(STRINGPART, "string literal", "string literal part")
+ final val INTERPOLATIONID = 10; enter(INTERPOLATIONID, "string interpolator")
+ final val SYMBOLLIT = 11; enter(SYMBOLLIT, "symbol literal") // TODO: deprecate
+
+ final val literalTokens = tokenRange(CHARLIT, SYMBOLLIT)
+
+ /** identifiers */
+ final val IDENTIFIER = 12; enter(IDENTIFIER, "identifier")
+ final val BACKQUOTED_IDENT = 13; enter(BACKQUOTED_IDENT, "identifier", "backquoted ident")
+
+ final val identifierTokens = BitSet(IDENTIFIER, BACKQUOTED_IDENT)
+
+ def isIdentifier(token : Int) =
+ token >= IDENTIFIER && token <= BACKQUOTED_IDENT
+
+ /** alphabetic keywords */
+ final val IF = 20; enter(IF, "if")
+ final val FOR = 21; enter(FOR, "for")
+ final val ELSE = 22; enter(ELSE, "else")
+ final val THIS = 23; enter(THIS, "this")
+ final val NULL = 24; enter(NULL, "null")
+ final val NEW = 25; enter(NEW, "new")
+ final val WITH = 26; enter(WITH, "with")
+ final val SUPER = 27; enter(SUPER, "super")
+ final val CASE = 28; enter(CASE, "case")
+ final val CASECLASS = 29; enter(CASECLASS, "case class")
+ final val CASEOBJECT = 30; enter(CASEOBJECT, "case object")
+ final val VAL = 31; enter(VAL, "val")
+ final val ABSTRACT = 32; enter(ABSTRACT, "abstract")
+ final val FINAL = 33; enter(FINAL, "final")
+ final val PRIVATE = 34; enter(PRIVATE, "private")
+ final val PROTECTED = 35; enter(PROTECTED, "protected")
+ final val OVERRIDE = 36; enter(OVERRIDE, "override")
+ final val IMPLICIT = 37; enter(IMPLICIT, "implicit")
+ final val VAR = 38; enter(VAR, "var")
+ final val DEF = 39; enter(DEF, "def")
+ final val TYPE = 40; enter(TYPE, "type")
+ final val EXTENDS = 41; enter(EXTENDS, "extends")
+ final val TRUE = 42; enter(TRUE, "true")
+ final val FALSE = 43; enter(FALSE, "false")
+ final val OBJECT = 44; enter(OBJECT, "object")
+ final val CLASS = 45; enter(CLASS, "class")
+ final val IMPORT = 46; enter(IMPORT, "import")
+ final val PACKAGE = 47; enter(PACKAGE, "package")
+ final val YIELD = 48; enter(YIELD, "yield")
+ final val DO = 49; enter(DO, "do")
+ final val TRAIT = 50; enter(TRAIT, "trait")
+ final val SEALED = 51; enter(SEALED, "sealed")
+ final val THROW = 52; enter(THROW, "throw")
+ final val TRY = 53; enter(TRY, "try")
+ final val CATCH = 54; enter(CATCH, "catch")
+ final val FINALLY = 55; enter(FINALLY, "finally")
+ final val WHILE = 56; enter(WHILE, "while")
+ final val RETURN = 57; enter(RETURN, "return")
+ final val MATCH = 58; enter(MATCH, "match")
+ final val FORSOME = 59; enter(FORSOME, "forSome") // TODO: deprecate
+ final val LAZY = 61; enter(LAZY, "lazy")
+ final val THEN = 62; enter(THEN, "then")
+
+ final val alphaKeywords = tokenRange(IF, LAZY)
+
+ /** special symbols */
+ final val COMMA = 70; enter(COMMA, "','")
+ final val SEMI = 71; enter(DOT, "'.'")
+ final val DOT = 72; enter(SEMI, "';'")
+ final val NEWLINE = 78; enter(NEWLINE, "';'", "new line")
+ final val NEWLINES = 79; enter(NEWLINES, "';'", "new lines")
+
+ /** special keywords */
+ final val USCORE = 73; enter(USCORE, "_")
+ final val COLON = 74; enter(COLON, ":")
+ final val EQUALS = 75; enter(EQUALS, "==")
+ final val LARROW = 76; enter(LARROW, "<-")
+ final val ARROW = 77; enter(ARROW, "=>")
+ final val SUBTYPE = 80; enter(SUBTYPE, "<:")
+ final val SUPERTYPE = 81; enter(SUPERTYPE, ">:")
+ final val HASH = 82; enter(HASH, "#")
+ final val AT = 83; enter(AT, "@")
+ final val VIEWBOUND = 84; enter(VIEWBOUND, "<%") // TODO: deprecate
+
+ final val symbolicKeywords = tokenRange(USCORE, VIEWBOUND)
+ final val symbolicTokens = tokenRange(COMMA, VIEWBOUND)
+ final val keywords = alphaKeywords | symbolicKeywords
+
+ /** parentheses */
+ final val LPAREN = 90; enter(LPAREN, "'('")
+ final val RPAREN = 91; enter(RPAREN, "')'")
+ final val LBRACKET = 92; enter(LBRACKET, "'['")
+ final val RBRACKET = 93; enter(RBRACKET, "']'")
+ final val LBRACE = 94; enter(LBRACE, "'{'")
+ final val RBRACE = 95; enter(RBRACE, "'}'")
+
+ /** XML mode */
+ final val XMLSTART = 96; enter(XMLSTART, "$XMLSTART$<") // TODO: deprecate
+
+ final val allTokens = tokenRange(minToken, maxToken)
+
+ final val atomicExprTokens = literalTokens | identifierTokens | BitSet(
+ USCORE, NULL, THIS, SUPER, TRUE, FALSE, RETURN, XMLSTART)
+
+ final val canStartExpressionTokens = atomicExprTokens | BitSet(
+ LBRACE, LPAREN, IF, DO, WHILE, FOR, NEW, TRY, THROW)
+
+ final val canStartTypeTokens = literalTokens | identifierTokens | BitSet(
+ THIS, SUPER, USCORE, LPAREN, AT)
+
+ final val templateIntroTokens = BitSet(CLASS, TRAIT, OBJECT, CASECLASS, CASEOBJECT)
+
+ final val dclIntroTokens = BitSet(DEF, VAL, VAR, TYPE)
+
+ final val defIntroTokens = templateIntroTokens | dclIntroTokens
+
+ final val localModifierTokens = BitSet(
+ ABSTRACT, FINAL, SEALED, IMPLICIT, LAZY)
+
+ final val modifierTokens = localModifierTokens | BitSet(
+ PRIVATE, PROTECTED, OVERRIDE)
+
+ /** Is token only legal as start of statement (eof also included)? */
+ final val mustStartStatTokens = defIntroTokens | modifierTokens | BitSet(
+ CASE, IMPORT, PACKAGE)
+
+ final val canStartStatTokens = canStartExpressionTokens | mustStartStatTokens | BitSet(
+ AT)
+
+ final val canEndStatTokens = atomicExprTokens | BitSet(
+ TYPE, RPAREN, RBRACE, RBRACKET)
+}