diff options
author | Ondrej Lhotak <olhotak@uwaterloo.ca> | 2014-09-04 17:06:02 +0200 |
---|---|---|
committer | Dmitry Petrashko <dmitry.petrashko@gmail.com> | 2014-11-22 20:10:20 +0100 |
commit | 03627e71d9fdc4b2211d244cc8fd844d57997357 (patch) | |
tree | b0062570c12f5713c09d0aa2519819c202c1f8cb /src/dotty/tools/dotc/parsing/Tokens.scala | |
parent | cc1d36f64d324b972ec3116f93c4f62fafd706bc (diff) | |
download | dotty-03627e71d9fdc4b2211d244cc8fd844d57997357.tar.gz dotty-03627e71d9fdc4b2211d244cc8fd844d57997357.tar.bz2 dotty-03627e71d9fdc4b2211d244cc8fd844d57997357.zip |
Initial implementation of Java scanner and parser
Ported from scalac 2.11.x branch SHA 9753f23f9362b25a9f481b11dd8d51187187882a
This is mostly a direct port, with few significant dotty-specific
changes needed. The two more significant changes are:
In dotty, the first constructor of a class is pulled out separately from
the other stats in the Template.
The keyword detection code (buildKeywordArray) was moved into Tokens so
that it can more cleanly be shared by the Scala and Java scanners.
Diffstat (limited to 'src/dotty/tools/dotc/parsing/Tokens.scala')
-rw-r--r-- | src/dotty/tools/dotc/parsing/Tokens.scala | 143 |
1 files changed, 100 insertions, 43 deletions
diff --git a/src/dotty/tools/dotc/parsing/Tokens.scala b/src/dotty/tools/dotc/parsing/Tokens.scala index 09124d0d1..226a3710d 100644 --- a/src/dotty/tools/dotc/parsing/Tokens.scala +++ b/src/dotty/tools/dotc/parsing/Tokens.scala @@ -3,11 +3,10 @@ package dotc package parsing import collection.immutable.BitSet +import core.Decorators._ -object Tokens { - - final val minToken = EMPTY - final val maxToken = XMLSTART +abstract class TokensCommon { + val maxToken: Int type Token = Int type TokenSet = BitSet @@ -24,6 +23,7 @@ object Tokens { val tokenString, debugString = new Array[String](maxToken + 1) def enter(token: Int, str: String, debugStr: String = ""): Unit = { + assert(tokenString(token) == null) tokenString(token) = str debugString(token) = if (debugStr.isEmpty) str else debugStr } @@ -41,17 +41,12 @@ object Tokens { final val DOUBLELIT = 7; enter(DOUBLELIT, "double literal") final val STRINGLIT = 8; enter(STRINGLIT, "string literal") final val STRINGPART = 9; enter(STRINGPART, "string literal", "string literal part") - final val INTERPOLATIONID = 10; enter(INTERPOLATIONID, "string interpolator") - final val SYMBOLLIT = 11; enter(SYMBOLLIT, "symbol literal") // TODO: deprecate + //final val INTERPOLATIONID = 10; enter(INTERPOLATIONID, "string interpolator") + //final val SYMBOLLIT = 11; enter(SYMBOLLIT, "symbol literal") // TODO: deprecate /** identifiers */ final val IDENTIFIER = 12; enter(IDENTIFIER, "identifier") - final val BACKQUOTED_IDENT = 13; enter(BACKQUOTED_IDENT, "identifier", "backquoted ident") - - final val identifierTokens = BitSet(IDENTIFIER, BACKQUOTED_IDENT) - - def isIdentifier(token : Int) = - token >= IDENTIFIER && token <= BACKQUOTED_IDENT + //final val BACKQUOTED_IDENT = 13; enter(BACKQUOTED_IDENT, "identifier", "backquoted ident") /** alphabetic keywords */ final val IF = 20; enter(IF, "if") @@ -60,67 +55,63 @@ object Tokens { final val THIS = 23; enter(THIS, "this") final val NULL = 24; enter(NULL, "null") final val NEW = 25; enter(NEW, "new") - final val WITH = 26; enter(WITH, "with") + //final val WITH = 26; enter(WITH, "with") final val SUPER = 27; enter(SUPER, "super") - final val CASE = 28; enter(CASE, "case") - final val CASECLASS = 29; enter(CASECLASS, "case class") - final val CASEOBJECT = 30; enter(CASEOBJECT, "case object") - final val VAL = 31; enter(VAL, "val") + //final val CASE = 28; enter(CASE, "case") + //final val CASECLASS = 29; enter(CASECLASS, "case class") + //final val CASEOBJECT = 30; enter(CASEOBJECT, "case object") + //final val VAL = 31; enter(VAL, "val") final val ABSTRACT = 32; enter(ABSTRACT, "abstract") final val FINAL = 33; enter(FINAL, "final") final val PRIVATE = 34; enter(PRIVATE, "private") final val PROTECTED = 35; enter(PROTECTED, "protected") final val OVERRIDE = 36; enter(OVERRIDE, "override") - final val IMPLICIT = 37; enter(IMPLICIT, "implicit") - final val VAR = 38; enter(VAR, "var") - final val DEF = 39; enter(DEF, "def") - final val TYPE = 40; enter(TYPE, "type") + //final val IMPLICIT = 37; enter(IMPLICIT, "implicit") + //final val VAR = 38; enter(VAR, "var") + //final val DEF = 39; enter(DEF, "def") + //final val TYPE = 40; enter(TYPE, "type") final val EXTENDS = 41; enter(EXTENDS, "extends") final val TRUE = 42; enter(TRUE, "true") final val FALSE = 43; enter(FALSE, "false") - final val OBJECT = 44; enter(OBJECT, "object") + //final val OBJECT = 44; enter(OBJECT, "object") final val CLASS = 45; enter(CLASS, "class") final val IMPORT = 46; enter(IMPORT, "import") final val PACKAGE = 47; enter(PACKAGE, "package") - final val YIELD = 48; enter(YIELD, "yield") + //final val YIELD = 48; enter(YIELD, "yield") final val DO = 49; enter(DO, "do") - final val TRAIT = 50; enter(TRAIT, "trait") - final val SEALED = 51; enter(SEALED, "sealed") + //final val TRAIT = 50; enter(TRAIT, "trait") + //final val SEALED = 51; enter(SEALED, "sealed") final val THROW = 52; enter(THROW, "throw") final val TRY = 53; enter(TRY, "try") final val CATCH = 54; enter(CATCH, "catch") final val FINALLY = 55; enter(FINALLY, "finally") final val WHILE = 56; enter(WHILE, "while") final val RETURN = 57; enter(RETURN, "return") - final val MATCH = 58; enter(MATCH, "match") - final val LAZY = 59; enter(LAZY, "lazy") - final val THEN = 60; enter(THEN, "then") - final val FORSOME = 61; enter(FORSOME, "forSome") // TODO: deprecate - - final val alphaKeywords = tokenRange(IF, FORSOME) + //final val MATCH = 58; enter(MATCH, "match") + //final val LAZY = 59; enter(LAZY, "lazy") + //final val THEN = 60; enter(THEN, "then") + //final val FORSOME = 61; enter(FORSOME, "forSome") // TODO: deprecate /** special symbols */ final val COMMA = 70; enter(COMMA, "','") final val SEMI = 71; enter(DOT, "'.'") final val DOT = 72; enter(SEMI, "';'") - final val NEWLINE = 78; enter(NEWLINE, "end of statement", "new line") - final val NEWLINES = 79; enter(NEWLINES, "end of statement", "new lines") + //final val NEWLINE = 78; enter(NEWLINE, "end of statement", "new line") + //final val NEWLINES = 79; enter(NEWLINES, "end of statement", "new lines") /** special keywords */ - final val USCORE = 73; enter(USCORE, "_") + //final val USCORE = 73; enter(USCORE, "_") final val COLON = 74; enter(COLON, ":") final val EQUALS = 75; enter(EQUALS, "=") - final val LARROW = 76; enter(LARROW, "<-") - final val ARROW = 77; enter(ARROW, "=>") - final val SUBTYPE = 80; enter(SUBTYPE, "<:") - final val SUPERTYPE = 81; enter(SUPERTYPE, ">:") - final val HASH = 82; enter(HASH, "#") + //final val LARROW = 76; enter(LARROW, "<-") + //final val ARROW = 77; enter(ARROW, "=>") + //final val SUBTYPE = 80; enter(SUBTYPE, "<:") + //final val SUPERTYPE = 81; enter(SUPERTYPE, ">:") + //final val HASH = 82; enter(HASH, "#") final val AT = 83; enter(AT, "@") - final val VIEWBOUND = 84; enter(VIEWBOUND, "<%") // TODO: deprecate + //final val VIEWBOUND = 84; enter(VIEWBOUND, "<%") // TODO: deprecate - final val symbolicKeywords = tokenRange(USCORE, VIEWBOUND) - final val symbolicTokens = tokenRange(COMMA, VIEWBOUND) - final val keywords = alphaKeywords | symbolicKeywords + val keywords: TokenSet /** parentheses */ final val LPAREN = 90; enter(LPAREN, "'('") @@ -133,9 +124,75 @@ object Tokens { final val firstParen = LPAREN final val lastParen = RBRACE + def buildKeywordArray(keywords: TokenSet) = { + def start(tok: Token) = tokenString(tok).toTermName.start + def sourceKeywords = keywords.toList.filter { (kw: Token) => + val ts = tokenString(kw) + (ts != null) && !ts.contains(' ') + } + + val lastKeywordStart = sourceKeywords.map(start).max + + val arr = Array.fill(lastKeywordStart + 1)(IDENTIFIER) + for (kw <- sourceKeywords) arr(start(kw)) = kw + (lastKeywordStart, arr) + } +} + +object Tokens extends TokensCommon { + final val minToken = EMPTY + final val maxToken = XMLSTART + + final val INTERPOLATIONID = 10; enter(INTERPOLATIONID, "string interpolator") + final val SYMBOLLIT = 11; enter(SYMBOLLIT, "symbol literal") // TODO: deprecate + + final val BACKQUOTED_IDENT = 13; enter(BACKQUOTED_IDENT, "identifier", "backquoted ident") + + final val identifierTokens = BitSet(IDENTIFIER, BACKQUOTED_IDENT) + + def isIdentifier(token : Int) = + token >= IDENTIFIER && token <= BACKQUOTED_IDENT + + /** alphabetic keywords */ + final val WITH = 26; enter(WITH, "with") + final val CASE = 28; enter(CASE, "case") + final val CASECLASS = 29; enter(CASECLASS, "case class") + final val CASEOBJECT = 30; enter(CASEOBJECT, "case object") + final val VAL = 31; enter(VAL, "val") + final val IMPLICIT = 37; enter(IMPLICIT, "implicit") + final val VAR = 38; enter(VAR, "var") + final val DEF = 39; enter(DEF, "def") + final val TYPE = 40; enter(TYPE, "type") + final val OBJECT = 44; enter(OBJECT, "object") + final val YIELD = 48; enter(YIELD, "yield") + final val TRAIT = 50; enter(TRAIT, "trait") + final val SEALED = 51; enter(SEALED, "sealed") + final val MATCH = 58; enter(MATCH, "match") + final val LAZY = 59; enter(LAZY, "lazy") + final val THEN = 60; enter(THEN, "then") + final val FORSOME = 61; enter(FORSOME, "forSome") // TODO: deprecate + + /** special symbols */ + final val NEWLINE = 78; enter(NEWLINE, "end of statement", "new line") + final val NEWLINES = 79; enter(NEWLINES, "end of statement", "new lines") + + /** special keywords */ + final val USCORE = 73; enter(USCORE, "_") + final val LARROW = 76; enter(LARROW, "<-") + final val ARROW = 77; enter(ARROW, "=>") + final val SUBTYPE = 80; enter(SUBTYPE, "<:") + final val SUPERTYPE = 81; enter(SUPERTYPE, ">:") + final val HASH = 82; enter(HASH, "#") + final val VIEWBOUND = 84; enter(VIEWBOUND, "<%") // TODO: deprecate + /** XML mode */ final val XMLSTART = 96; enter(XMLSTART, "$XMLSTART$<") // TODO: deprecate + final val alphaKeywords = tokenRange(IF, FORSOME) + final val symbolicKeywords = tokenRange(USCORE, VIEWBOUND) + final val symbolicTokens = tokenRange(COMMA, VIEWBOUND) + final val keywords = alphaKeywords | symbolicKeywords + final val allTokens = tokenRange(minToken, maxToken) final val literalTokens = tokenRange(CHARLIT, SYMBOLLIT) | BitSet(TRUE, FALSE, NULL) |