diff options
Diffstat (limited to 'sources/scalac/ast/parser/Scanner.java')
-rw-r--r-- | sources/scalac/ast/parser/Scanner.java | 793 |
1 files changed, 793 insertions, 0 deletions
diff --git a/sources/scalac/ast/parser/Scanner.java b/sources/scalac/ast/parser/Scanner.java new file mode 100644 index 0000000000..0e4af3a09d --- /dev/null +++ b/sources/scalac/ast/parser/Scanner.java @@ -0,0 +1,793 @@ +/* ____ ____ ____ ____ ______ *\ +** / __// __ \/ __// __ \/ ____/ SOcos COmpiles Scala ** +** __\_ \/ /_/ / /__/ /_/ /\_ \ (c) 2002, LAMP/EPFL ** +** /_____/\____/\___/\____/____/ ** +** ** +** $Id$ +\* */ + +package scalac.ast.parser; + +import scalac.*; +import scalac.util.Name; +import scalac.util.Position; + +/** A scanner for the programming language Scala. + * + * @author Matthias Zenger, Martin Odersky + * @version 1.0 + */ +public class Scanner extends TokenData { + + /** layout & character constants + */ + public int tabinc = 8; + public final static byte LF = 0xA; + protected final static byte FF = 0xC; + protected final static byte CR = 0xD; + protected final static byte SU = Sourcefile.SU; + + /** the names of all tokens + */ + public Name[] tokenName = new Name[128]; + public int numToken = 0; + + /** keyword array; maps from name indices to tokens + */ + protected byte[] key; + protected int maxKey = 0; + + /** we need one token lookahead + */ + protected TokenData next = new TokenData(); + protected TokenData prev = new TokenData(); + + /** the first character position after the previous token + */ + public int lastpos = 0; + + /** the last error position + */ + public int errpos = -1; + + /** the input buffer: + */ + protected byte[] buf; + protected int bp; + + /** the current character + */ + protected byte ch; + + /** the line and column position of the current character + */ + public int cline; + public int ccol; + + /** the current sourcefile + */ + public Sourcefile currentSource; + + /** a buffer for character and string literals + */ + protected byte[] lit = new byte[64]; + protected int litlen; + + /** the compilation unit + */ + public Unit unit; + + + /** Construct a scanner from a file input stream. + */ + public Scanner(Unit unit) { + this.unit = unit; + buf = (currentSource = unit.source).getBuffer(); + cline = 1; + bp = -1; + ccol = 0; + nextch(); + token = EMPTY; + init(); + nextToken(); + } + + private void nextch() { + ch = buf[++bp]; ccol++; + } + + /** read next token and return last position + */ + public int skipToken() { + int p = pos; + nextToken(); + return p; + } + + public void nextToken() { + if (token == RBRACE) { + int prevpos = pos; + fetchToken(); + switch (token) { + case ELSE: case EXTENDS: case WITH: + case YIELD: case DO: + case COMMA: case SEMI: case DOT: + case COLON: case EQUALS: case ARROW: + case LARROW: case SUBTYPE: case AT: + case HASH: case AS: case IS: + case RPAREN: case RBRACKET: case RBRACE: + break; + default: + if (token == EOF || + ((pos >>> Position.LINESHIFT) > + (prevpos >>> Position.LINESHIFT))) { + next.copyFrom(this); + this.token = SEMI; + this.pos = prevpos; + } + } + } else { + if (next.token == EMPTY) { + fetchToken(); + } else { + copyFrom(next); + next.token = EMPTY; + } + if (token == CASE) { + prev.copyFrom(this); + fetchToken(); + if (token == CLASS) { + token = CASECLASS; + } else { + next.copyFrom(this); + this.copyFrom(prev); + } + } else if (token == SEMI) { + prev.copyFrom(this); + fetchToken(); + if (token != ELSE) { + next.copyFrom(this); + this.copyFrom(prev); + } + } + } + //System.out.println("<" + token2string(token) + ">");//DEBUG + } + + /** read next token + */ + public void fetchToken() { + if (token == EOF) return; + lastpos = Position.encode(cline, ccol, currentSource.id); + int index = bp; + while(true) { + switch (ch) { + case ' ': + nextch(); + break; + case '\t': + ccol = ((ccol - 1) / tabinc * tabinc) + tabinc; + nextch(); + break; + case CR: + cline++; + ccol = 0; + nextch(); + if (ch == LF) { + ccol = 0; + nextch(); + } + break; + case LF: + case FF: + cline++; + ccol = 0; + nextch(); + break; + default: + pos = Position.encode(cline, ccol, currentSource.id); + index = bp; + switch (ch) { + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': case '$': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + nextch(); + getIdentRest(index); + return; + case '~': case '!': case '@': case '#': case '%': + case '^': case '*': case '+': case '-': case '<': + case '>': case '?': case ':': + case '=': case '&': case '|': + nextch(); + getOperatorRest(index); + return; + case '/': + nextch(); + if (!skipComment()) { + getOperatorRest(index); + return; + } + break; + case '_': + nextch(); + getIdentOrOperatorRest(index); + return; + case '0': + nextch(); + if (ch == 'x' || ch == 'X') { + nextch(); + getNumber(index + 2, 16); + } else + getNumber(index, 8); + return; + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + getNumber(index, 10); + return; + case '\"': + nextch(); + litlen = 0; + while (ch != '\"' && ch != CR && ch != LF && ch != SU) + getlitch(); + if (ch == '\"') { + token = STRINGLIT; + name = Name.fromSource(lit, 0, litlen); + nextch(); + } + else + syntaxError("unclosed character literal"); + return; + case '\'': + nextch(); + litlen = 0; + getlitch(); + if (ch == '\'') { + nextch(); + token = CHARLIT; + byte[] ascii = new byte[litlen * 2]; + int alen = SourceRepresentation.source2ascii(lit, 0, litlen, ascii); + if (alen > 0) + intVal = SourceRepresentation.ascii2string(ascii, 0, alen).charAt(0); + else + intVal = 0; + } else + syntaxError("unclosed character literal"); + return; + case '.': + nextch(); + if (('0' <= ch) && (ch <= '9')) getFraction(index); + else token = DOT; + return; + case ';': + nextch(); token = SEMI; + return; + case ',': + nextch(); token = COMMA; + return; + case '(': + nextch(); token = LPAREN; + return; + case '{': + nextch(); token = LBRACE; + return; + case ')': + nextch(); token = RPAREN; + return; + case '}': + nextch(); token = RBRACE; + return; + case '[': + nextch(); token = LBRACKET; + return; + case ']': + nextch(); token = RBRACKET; + return; + case SU: + token = EOF; + currentSource.lines = cline; + return; + default: + nextch(); + syntaxError("illegal character"); + return; + } + } + } + } + + private boolean skipComment() { + if (ch == '/') { + do { + nextch(); + } while ((ch != CR) && (ch != LF) && (ch != SU)); + return true; + } else if (ch == '*') { + int openComments = 1; + while (openComments > 0) { + do { + do { + if (ch == CR) { + cline++; + ccol = 0; + nextch(); + if (ch == LF) { + ccol = 0; + nextch(); + } + } else if (ch == LF) { + cline++; + ccol = 0; + nextch(); + } + else if (ch == '\t') { + ccol = ((ccol - 1) / tabinc * tabinc) + tabinc; + nextch(); + } else if (ch == '/') { + nextch(); + if (ch == '*') { + nextch(); + openComments++; + } + } else { + nextch(); + } + } while ((ch != '*') && (ch != SU)); + while (ch == '*') { + nextch(); + } + } while (ch != '/' && ch != SU); + if (ch == '/') { + nextch(); + openComments--; + } else { + syntaxError("unclosed comment"); + return true; + } + } + return true; + } else { + return false; + } + } + + private void getIdentRest(int index) { + while (true) { + switch (ch) { + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': case '$': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + nextch(); + break; + case '_': + nextch(); + getIdentOrOperatorRest(index); + return; + default: + treatIdent(index, bp); + return; + } + } + } + + private void getOperatorRest(int index) { + while (true) { + switch (ch) { + case '~': case '!': case '@': case '#': case '%': + case '^': case '*': case '+': case '-': case '<': + case '>': case '?': case ':': + case '=': case '&': case '|': + nextch(); + break; + case '/': + int lastbp = bp; + nextch(); + if (skipComment()) { + treatIdent(index, lastbp); + return; + } else { + break; + } + case '_': + nextch(); + getIdentOrOperatorRest(index); + return; + default: + treatIdent(index, bp); + return; + } + } + } + + private void getIdentOrOperatorRest(int index) { + switch (ch) { + case 'A': case 'B': case 'C': case 'D': case 'E': + case 'F': case 'G': case 'H': case 'I': case 'J': + case 'K': case 'L': case 'M': case 'N': case 'O': + case 'P': case 'Q': case 'R': case 'S': case 'T': + case 'U': case 'V': case 'W': case 'X': case 'Y': + case 'Z': case '$': + case 'a': case 'b': case 'c': case 'd': case 'e': + case 'f': case 'g': case 'h': case 'i': case 'j': + case 'k': case 'l': case 'm': case 'n': case 'o': + case 'p': case 'q': case 'r': case 's': case 't': + case 'u': case 'v': case 'w': case 'x': case 'y': + case 'z': + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + getIdentRest(index); + return; + case '~': case '!': case '@': case '#': case '%': + case '^': case '*': case '+': case '-': case '<': + case '>': case '?': case ':': + case '=': case '&': case '|': + case '/': + getOperatorRest(index); + return; + case '_': + nextch(); + getIdentOrOperatorRest(index); + return; + default: + treatIdent(index, bp); + return; + } + } + + void treatIdent(int start, int end) { + name = Name.fromAscii(buf, start, end - start); + if (name.index <= maxKey) + token = key[name.index]; + else + token = IDENTIFIER; + } + + /** generate an error at the given position + */ + void syntaxError(int pos, String msg) { + unit.error(pos, msg); + token = ERROR; + errpos = pos; + } + + /** generate an error at the current token position + */ + void syntaxError(String msg) { + syntaxError(pos, msg); + } + + /** append characteter to "lit" buffer + */ + protected void putch(byte c) { + if (litlen == lit.length) { + byte[] newlit = new byte[lit.length * 2]; + System.arraycopy(lit, 0, newlit, 0, lit.length); + lit = newlit; + } + lit[litlen++] = c; + } + + /** return true iff next 6 characters are a valid unicode sequence: + */ + protected boolean isUnicode() { + return + (bp + 6) < buf.length && + (buf[bp] == '\\') && + (buf[bp+1] == 'u') && + (SourceRepresentation.digit2int(buf[bp+2], 16) >= 0) && + (SourceRepresentation.digit2int(buf[bp+3], 16) >= 0) && + (SourceRepresentation.digit2int(buf[bp+4], 16) >= 0) && + (SourceRepresentation.digit2int(buf[bp+5], 16) >= 0); + } + + /** read next character in character or string literal: + */ + protected void getlitch() { + if (ch == '\\') { + if (isUnicode()) { + putch(ch); nextch(); + putch(ch); nextch(); + putch(ch); nextch(); + putch(ch); nextch(); + putch(ch); nextch(); + putch(ch); nextch(); + } else { + nextch(); + if ('0' <= ch && ch <= '7') { + byte leadch = ch; + int oct = SourceRepresentation.digit2int(ch, 8); + nextch(); + if ('0' <= ch && ch <= '7') { + oct = oct * 8 + SourceRepresentation.digit2int(ch, 8); + nextch(); + if (leadch <= '3' && '0' <= ch && ch <= '7') { + oct = oct * 8 + SourceRepresentation.digit2int(ch, 8); + nextch(); + } + } + putch((byte)oct); + } else if (ch != SU) { + switch (ch) { + case 'b': case 't': case 'n': + case 'f': case 'r': case '\"': + case '\'': case '\\': + putch((byte)'\\'); + putch(ch); + break; + default: + syntaxError(Position.encode(cline, ccol, currentSource.id) - 1, "invalid escape character"); + putch(ch); + } + nextch(); + } + } + } else if (ch != SU) { + putch(ch); + nextch(); + } + } + + /** read fractional part of floating point number; + * Then floatVal := buf[index..], converted to a floating point number. + */ + protected void getFraction(int index) { + while (SourceRepresentation.digit2int(ch, 10) >= 0) { + nextch(); + } + token = DOUBLELIT; + if ((ch == 'e') || (ch == 'E')) { + nextch(); + if ((ch == '+') || (ch == '-')) { + byte sign = ch; + nextch(); + if (('0' > ch) || (ch > '9')) { + ch = sign; + bp--; + ccol--; + } + } + while (SourceRepresentation.digit2int(ch, 10) >= 0) { + nextch(); + } + } + double limit = Double.MAX_VALUE; + if ((ch == 'd') || (ch == 'D')) { + nextch(); + } else if ((ch == 'f') || (ch == 'F')) { + token = FLOATLIT; + limit = Float.MAX_VALUE; + nextch(); + } + try { + floatVal = Double.valueOf(new String(buf, index, bp - index)).doubleValue(); + if (floatVal > limit) + syntaxError("floating point number too large"); + } catch (NumberFormatException e) { + syntaxError("malformed floating point number"); + } + } + + /** intVal := buf[index..index+len-1], converted to an integer number. + * base = the base of the number; one of 8, 10, 16. + * max = the maximal number before an overflow. + */ + protected void makeInt (int index, int len, int base, long max) { + intVal = 0; + int divider = (base == 10 ? 1 : 2); + for (int i = 0; i < len; i++) { + int d = SourceRepresentation.digit2int(buf[index + i], base); + if (d < 0) { + syntaxError("malformed integer number"); + return; + } + if (intVal < 0 || + max / (base / divider) < intVal || + max - (d / divider) < (intVal * (base / divider) - 0)) { + syntaxError("integer number too large"); + return; + } + intVal = intVal * base + d; + } + } + + /** read a number, + * and convert buf[index..], setting either intVal or floatVal. + * base = the base of the number; one of 8, 10, 16. + */ + protected void getNumber(int index, int base) { + while (SourceRepresentation.digit2int(ch, base == 8 ? 10 : base) >= 0) { + nextch(); + } + if (base <= 10 && ch == '.') { + nextch(); + if ((ch >= '0') && (ch <= '9')) + getFraction(index); + else { + ch = buf[--bp]; ccol--; + makeInt(index, bp - index, base, Integer.MAX_VALUE); + intVal = (int)intVal; + token = INTLIT; + } + } else if (base <= 10 && + (ch == 'e' || ch == 'E' || + ch == 'f' || ch == 'F' || + ch == 'd' || ch == 'D')) + getFraction(index); + else { + if (ch == 'l' || ch == 'L') { + makeInt(index, bp - index, base, Long.MAX_VALUE); + nextch(); + token = LONGLIT; + } else { + makeInt(index, bp - index, base, Integer.MAX_VALUE); + intVal = (int)intVal; + token = INTLIT; + } + } + } + + public int name2token(Name name) { + if (name.index <= maxKey) + return key[name.index]; + else + return IDENTIFIER; + } + + public String token2string(int token) { + switch (token) { + case IDENTIFIER: + return "identifier"; + case CHARLIT: + return "character literal"; + case INTLIT: + return "integer literal"; + case LONGLIT: + return "long literal"; + case FLOATLIT: + return "float literal"; + case DOUBLELIT: + return "double literal"; + case STRINGLIT: + return "string literal"; + case LPAREN: + return "'('"; + case RPAREN: + return "')'"; + case LBRACE: + return "'{'"; + case RBRACE: + return "'}'"; + case LBRACKET: + return "'['"; + case RBRACKET: + return "']'"; + case EOF: + return "eof"; + case ERROR: + return "something"; + case SEMI: + return "';'"; + case COMMA: + return "','"; + default: + try { + return "'" + tokenName[token].toString() + "'"; + } catch (ArrayIndexOutOfBoundsException e) { + return "'<" + token + ">'"; + } + } + } + + public String toString() { + switch (token) { + case IDENTIFIER: + return "id(" + name + ")"; + case CHARLIT: + return "char(" + intVal + ")"; + case INTLIT: + return "int(" + intVal + ")"; + case LONGLIT: + return "long(" + intVal + ")"; + case FLOATLIT: + return "float(" + floatVal + ")"; + case DOUBLELIT: + return "double(" + floatVal + ")"; + case STRINGLIT: + return "string(" + name + ")"; + case SEMI: + return ";"; + case COMMA: + return ","; + default: + return token2string(token); + } + } + + protected void enterKeyword(String s, int tokenId) { + while (tokenId > tokenName.length) { + Name[] newTokName = new Name[tokenName.length * 2]; + System.arraycopy(tokenName, 0, newTokName, 0, newTokName.length); + tokenName = newTokName; + } + Name n = Name.fromString(s); + tokenName[tokenId] = n; + if (n.index > maxKey) + maxKey = n.index; + if (tokenId >= numToken) + numToken = tokenId + 1; + } + + protected void init() { + initKeywords(); + key = new byte[maxKey+1]; + for (int i = 0; i <= maxKey; i++) + key[i] = IDENTIFIER; + for (byte j = 0; j < numToken; j++) + if (tokenName[j] != null) + key[tokenName[j].index] = j; + } + + protected void initKeywords() { + enterKeyword("if", IF); + enterKeyword("for", FOR); + enterKeyword("else", ELSE); + enterKeyword("this", THIS); + enterKeyword("null", NULL); + enterKeyword("new", NEW); + enterKeyword("with", WITH); + enterKeyword("super", SUPER); + enterKeyword("case", CASE); + enterKeyword("val", VAL); + enterKeyword("abstract", ABSTRACT); + enterKeyword("final", FINAL); + enterKeyword("private", PRIVATE); + enterKeyword("protected", PROTECTED); + enterKeyword("qualified", QUALIFIED); + enterKeyword("override", OVERRIDE); + enterKeyword("var", VAR); + enterKeyword("def", DEF); + enterKeyword("type", TYPE); + enterKeyword("extends", EXTENDS); + enterKeyword("let", LET); + enterKeyword("module", MODULE); + enterKeyword("class",CLASS); + enterKeyword("constr",CONSTR); + enterKeyword("import", IMPORT); + enterKeyword("package", PACKAGE); + enterKeyword(".", DOT); + enterKeyword("_", USCORE); + enterKeyword(":", COLON); + enterKeyword("=", EQUALS); + enterKeyword("=>", ARROW); + enterKeyword("<-", LARROW); + enterKeyword("<:", SUBTYPE); + enterKeyword("yield", YIELD); + enterKeyword("do", DO); + enterKeyword("@", AT); + enterKeyword("#", HASH); + enterKeyword("trait", TRAIT); + enterKeyword("as", AS); + enterKeyword("is", IS); + } +} + + |