summaryrefslogtreecommitdiff
path: root/sources/scalac/ast/parser/Scanner.java
diff options
context:
space:
mode:
Diffstat (limited to 'sources/scalac/ast/parser/Scanner.java')
-rw-r--r--sources/scalac/ast/parser/Scanner.java793
1 files changed, 793 insertions, 0 deletions
diff --git a/sources/scalac/ast/parser/Scanner.java b/sources/scalac/ast/parser/Scanner.java
new file mode 100644
index 0000000000..0e4af3a09d
--- /dev/null
+++ b/sources/scalac/ast/parser/Scanner.java
@@ -0,0 +1,793 @@
+/* ____ ____ ____ ____ ______ *\
+** / __// __ \/ __// __ \/ ____/ SOcos COmpiles Scala **
+** __\_ \/ /_/ / /__/ /_/ /\_ \ (c) 2002, LAMP/EPFL **
+** /_____/\____/\___/\____/____/ **
+** **
+** $Id$
+\* */
+
+package scalac.ast.parser;
+
+import scalac.*;
+import scalac.util.Name;
+import scalac.util.Position;
+
+/** A scanner for the programming language Scala.
+ *
+ * @author Matthias Zenger, Martin Odersky
+ * @version 1.0
+ */
+public class Scanner extends TokenData {
+
+ /** layout & character constants
+ */
+ public int tabinc = 8;
+ public final static byte LF = 0xA;
+ protected final static byte FF = 0xC;
+ protected final static byte CR = 0xD;
+ protected final static byte SU = Sourcefile.SU;
+
+ /** the names of all tokens
+ */
+ public Name[] tokenName = new Name[128];
+ public int numToken = 0;
+
+ /** keyword array; maps from name indices to tokens
+ */
+ protected byte[] key;
+ protected int maxKey = 0;
+
+ /** we need one token lookahead
+ */
+ protected TokenData next = new TokenData();
+ protected TokenData prev = new TokenData();
+
+ /** the first character position after the previous token
+ */
+ public int lastpos = 0;
+
+ /** the last error position
+ */
+ public int errpos = -1;
+
+ /** the input buffer:
+ */
+ protected byte[] buf;
+ protected int bp;
+
+ /** the current character
+ */
+ protected byte ch;
+
+ /** the line and column position of the current character
+ */
+ public int cline;
+ public int ccol;
+
+ /** the current sourcefile
+ */
+ public Sourcefile currentSource;
+
+ /** a buffer for character and string literals
+ */
+ protected byte[] lit = new byte[64];
+ protected int litlen;
+
+ /** the compilation unit
+ */
+ public Unit unit;
+
+
+ /** Construct a scanner from a file input stream.
+ */
+ public Scanner(Unit unit) {
+ this.unit = unit;
+ buf = (currentSource = unit.source).getBuffer();
+ cline = 1;
+ bp = -1;
+ ccol = 0;
+ nextch();
+ token = EMPTY;
+ init();
+ nextToken();
+ }
+
+ private void nextch() {
+ ch = buf[++bp]; ccol++;
+ }
+
+ /** read next token and return last position
+ */
+ public int skipToken() {
+ int p = pos;
+ nextToken();
+ return p;
+ }
+
+ public void nextToken() {
+ if (token == RBRACE) {
+ int prevpos = pos;
+ fetchToken();
+ switch (token) {
+ case ELSE: case EXTENDS: case WITH:
+ case YIELD: case DO:
+ case COMMA: case SEMI: case DOT:
+ case COLON: case EQUALS: case ARROW:
+ case LARROW: case SUBTYPE: case AT:
+ case HASH: case AS: case IS:
+ case RPAREN: case RBRACKET: case RBRACE:
+ break;
+ default:
+ if (token == EOF ||
+ ((pos >>> Position.LINESHIFT) >
+ (prevpos >>> Position.LINESHIFT))) {
+ next.copyFrom(this);
+ this.token = SEMI;
+ this.pos = prevpos;
+ }
+ }
+ } else {
+ if (next.token == EMPTY) {
+ fetchToken();
+ } else {
+ copyFrom(next);
+ next.token = EMPTY;
+ }
+ if (token == CASE) {
+ prev.copyFrom(this);
+ fetchToken();
+ if (token == CLASS) {
+ token = CASECLASS;
+ } else {
+ next.copyFrom(this);
+ this.copyFrom(prev);
+ }
+ } else if (token == SEMI) {
+ prev.copyFrom(this);
+ fetchToken();
+ if (token != ELSE) {
+ next.copyFrom(this);
+ this.copyFrom(prev);
+ }
+ }
+ }
+ //System.out.println("<" + token2string(token) + ">");//DEBUG
+ }
+
+ /** read next token
+ */
+ public void fetchToken() {
+ if (token == EOF) return;
+ lastpos = Position.encode(cline, ccol, currentSource.id);
+ int index = bp;
+ while(true) {
+ switch (ch) {
+ case ' ':
+ nextch();
+ break;
+ case '\t':
+ ccol = ((ccol - 1) / tabinc * tabinc) + tabinc;
+ nextch();
+ break;
+ case CR:
+ cline++;
+ ccol = 0;
+ nextch();
+ if (ch == LF) {
+ ccol = 0;
+ nextch();
+ }
+ break;
+ case LF:
+ case FF:
+ cline++;
+ ccol = 0;
+ nextch();
+ break;
+ default:
+ pos = Position.encode(cline, ccol, currentSource.id);
+ index = bp;
+ switch (ch) {
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F': case 'G': case 'H': case 'I': case 'J':
+ case 'K': case 'L': case 'M': case 'N': case 'O':
+ case 'P': case 'Q': case 'R': case 'S': case 'T':
+ case 'U': case 'V': case 'W': case 'X': case 'Y':
+ case 'Z': case '$':
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f': case 'g': case 'h': case 'i': case 'j':
+ case 'k': case 'l': case 'm': case 'n': case 'o':
+ case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y':
+ case 'z':
+ nextch();
+ getIdentRest(index);
+ return;
+ case '~': case '!': case '@': case '#': case '%':
+ case '^': case '*': case '+': case '-': case '<':
+ case '>': case '?': case ':':
+ case '=': case '&': case '|':
+ nextch();
+ getOperatorRest(index);
+ return;
+ case '/':
+ nextch();
+ if (!skipComment()) {
+ getOperatorRest(index);
+ return;
+ }
+ break;
+ case '_':
+ nextch();
+ getIdentOrOperatorRest(index);
+ return;
+ case '0':
+ nextch();
+ if (ch == 'x' || ch == 'X') {
+ nextch();
+ getNumber(index + 2, 16);
+ } else
+ getNumber(index, 8);
+ return;
+ case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ getNumber(index, 10);
+ return;
+ case '\"':
+ nextch();
+ litlen = 0;
+ while (ch != '\"' && ch != CR && ch != LF && ch != SU)
+ getlitch();
+ if (ch == '\"') {
+ token = STRINGLIT;
+ name = Name.fromSource(lit, 0, litlen);
+ nextch();
+ }
+ else
+ syntaxError("unclosed character literal");
+ return;
+ case '\'':
+ nextch();
+ litlen = 0;
+ getlitch();
+ if (ch == '\'') {
+ nextch();
+ token = CHARLIT;
+ byte[] ascii = new byte[litlen * 2];
+ int alen = SourceRepresentation.source2ascii(lit, 0, litlen, ascii);
+ if (alen > 0)
+ intVal = SourceRepresentation.ascii2string(ascii, 0, alen).charAt(0);
+ else
+ intVal = 0;
+ } else
+ syntaxError("unclosed character literal");
+ return;
+ case '.':
+ nextch();
+ if (('0' <= ch) && (ch <= '9')) getFraction(index);
+ else token = DOT;
+ return;
+ case ';':
+ nextch(); token = SEMI;
+ return;
+ case ',':
+ nextch(); token = COMMA;
+ return;
+ case '(':
+ nextch(); token = LPAREN;
+ return;
+ case '{':
+ nextch(); token = LBRACE;
+ return;
+ case ')':
+ nextch(); token = RPAREN;
+ return;
+ case '}':
+ nextch(); token = RBRACE;
+ return;
+ case '[':
+ nextch(); token = LBRACKET;
+ return;
+ case ']':
+ nextch(); token = RBRACKET;
+ return;
+ case SU:
+ token = EOF;
+ currentSource.lines = cline;
+ return;
+ default:
+ nextch();
+ syntaxError("illegal character");
+ return;
+ }
+ }
+ }
+ }
+
+ private boolean skipComment() {
+ if (ch == '/') {
+ do {
+ nextch();
+ } while ((ch != CR) && (ch != LF) && (ch != SU));
+ return true;
+ } else if (ch == '*') {
+ int openComments = 1;
+ while (openComments > 0) {
+ do {
+ do {
+ if (ch == CR) {
+ cline++;
+ ccol = 0;
+ nextch();
+ if (ch == LF) {
+ ccol = 0;
+ nextch();
+ }
+ } else if (ch == LF) {
+ cline++;
+ ccol = 0;
+ nextch();
+ }
+ else if (ch == '\t') {
+ ccol = ((ccol - 1) / tabinc * tabinc) + tabinc;
+ nextch();
+ } else if (ch == '/') {
+ nextch();
+ if (ch == '*') {
+ nextch();
+ openComments++;
+ }
+ } else {
+ nextch();
+ }
+ } while ((ch != '*') && (ch != SU));
+ while (ch == '*') {
+ nextch();
+ }
+ } while (ch != '/' && ch != SU);
+ if (ch == '/') {
+ nextch();
+ openComments--;
+ } else {
+ syntaxError("unclosed comment");
+ return true;
+ }
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private void getIdentRest(int index) {
+ while (true) {
+ switch (ch) {
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F': case 'G': case 'H': case 'I': case 'J':
+ case 'K': case 'L': case 'M': case 'N': case 'O':
+ case 'P': case 'Q': case 'R': case 'S': case 'T':
+ case 'U': case 'V': case 'W': case 'X': case 'Y':
+ case 'Z': case '$':
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f': case 'g': case 'h': case 'i': case 'j':
+ case 'k': case 'l': case 'm': case 'n': case 'o':
+ case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y':
+ case 'z':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ nextch();
+ break;
+ case '_':
+ nextch();
+ getIdentOrOperatorRest(index);
+ return;
+ default:
+ treatIdent(index, bp);
+ return;
+ }
+ }
+ }
+
+ private void getOperatorRest(int index) {
+ while (true) {
+ switch (ch) {
+ case '~': case '!': case '@': case '#': case '%':
+ case '^': case '*': case '+': case '-': case '<':
+ case '>': case '?': case ':':
+ case '=': case '&': case '|':
+ nextch();
+ break;
+ case '/':
+ int lastbp = bp;
+ nextch();
+ if (skipComment()) {
+ treatIdent(index, lastbp);
+ return;
+ } else {
+ break;
+ }
+ case '_':
+ nextch();
+ getIdentOrOperatorRest(index);
+ return;
+ default:
+ treatIdent(index, bp);
+ return;
+ }
+ }
+ }
+
+ private void getIdentOrOperatorRest(int index) {
+ switch (ch) {
+ case 'A': case 'B': case 'C': case 'D': case 'E':
+ case 'F': case 'G': case 'H': case 'I': case 'J':
+ case 'K': case 'L': case 'M': case 'N': case 'O':
+ case 'P': case 'Q': case 'R': case 'S': case 'T':
+ case 'U': case 'V': case 'W': case 'X': case 'Y':
+ case 'Z': case '$':
+ case 'a': case 'b': case 'c': case 'd': case 'e':
+ case 'f': case 'g': case 'h': case 'i': case 'j':
+ case 'k': case 'l': case 'm': case 'n': case 'o':
+ case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y':
+ case 'z':
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ getIdentRest(index);
+ return;
+ case '~': case '!': case '@': case '#': case '%':
+ case '^': case '*': case '+': case '-': case '<':
+ case '>': case '?': case ':':
+ case '=': case '&': case '|':
+ case '/':
+ getOperatorRest(index);
+ return;
+ case '_':
+ nextch();
+ getIdentOrOperatorRest(index);
+ return;
+ default:
+ treatIdent(index, bp);
+ return;
+ }
+ }
+
+ void treatIdent(int start, int end) {
+ name = Name.fromAscii(buf, start, end - start);
+ if (name.index <= maxKey)
+ token = key[name.index];
+ else
+ token = IDENTIFIER;
+ }
+
+ /** generate an error at the given position
+ */
+ void syntaxError(int pos, String msg) {
+ unit.error(pos, msg);
+ token = ERROR;
+ errpos = pos;
+ }
+
+ /** generate an error at the current token position
+ */
+ void syntaxError(String msg) {
+ syntaxError(pos, msg);
+ }
+
+ /** append characteter to "lit" buffer
+ */
+ protected void putch(byte c) {
+ if (litlen == lit.length) {
+ byte[] newlit = new byte[lit.length * 2];
+ System.arraycopy(lit, 0, newlit, 0, lit.length);
+ lit = newlit;
+ }
+ lit[litlen++] = c;
+ }
+
+ /** return true iff next 6 characters are a valid unicode sequence:
+ */
+ protected boolean isUnicode() {
+ return
+ (bp + 6) < buf.length &&
+ (buf[bp] == '\\') &&
+ (buf[bp+1] == 'u') &&
+ (SourceRepresentation.digit2int(buf[bp+2], 16) >= 0) &&
+ (SourceRepresentation.digit2int(buf[bp+3], 16) >= 0) &&
+ (SourceRepresentation.digit2int(buf[bp+4], 16) >= 0) &&
+ (SourceRepresentation.digit2int(buf[bp+5], 16) >= 0);
+ }
+
+ /** read next character in character or string literal:
+ */
+ protected void getlitch() {
+ if (ch == '\\') {
+ if (isUnicode()) {
+ putch(ch); nextch();
+ putch(ch); nextch();
+ putch(ch); nextch();
+ putch(ch); nextch();
+ putch(ch); nextch();
+ putch(ch); nextch();
+ } else {
+ nextch();
+ if ('0' <= ch && ch <= '7') {
+ byte leadch = ch;
+ int oct = SourceRepresentation.digit2int(ch, 8);
+ nextch();
+ if ('0' <= ch && ch <= '7') {
+ oct = oct * 8 + SourceRepresentation.digit2int(ch, 8);
+ nextch();
+ if (leadch <= '3' && '0' <= ch && ch <= '7') {
+ oct = oct * 8 + SourceRepresentation.digit2int(ch, 8);
+ nextch();
+ }
+ }
+ putch((byte)oct);
+ } else if (ch != SU) {
+ switch (ch) {
+ case 'b': case 't': case 'n':
+ case 'f': case 'r': case '\"':
+ case '\'': case '\\':
+ putch((byte)'\\');
+ putch(ch);
+ break;
+ default:
+ syntaxError(Position.encode(cline, ccol, currentSource.id) - 1, "invalid escape character");
+ putch(ch);
+ }
+ nextch();
+ }
+ }
+ } else if (ch != SU) {
+ putch(ch);
+ nextch();
+ }
+ }
+
+ /** read fractional part of floating point number;
+ * Then floatVal := buf[index..], converted to a floating point number.
+ */
+ protected void getFraction(int index) {
+ while (SourceRepresentation.digit2int(ch, 10) >= 0) {
+ nextch();
+ }
+ token = DOUBLELIT;
+ if ((ch == 'e') || (ch == 'E')) {
+ nextch();
+ if ((ch == '+') || (ch == '-')) {
+ byte sign = ch;
+ nextch();
+ if (('0' > ch) || (ch > '9')) {
+ ch = sign;
+ bp--;
+ ccol--;
+ }
+ }
+ while (SourceRepresentation.digit2int(ch, 10) >= 0) {
+ nextch();
+ }
+ }
+ double limit = Double.MAX_VALUE;
+ if ((ch == 'd') || (ch == 'D')) {
+ nextch();
+ } else if ((ch == 'f') || (ch == 'F')) {
+ token = FLOATLIT;
+ limit = Float.MAX_VALUE;
+ nextch();
+ }
+ try {
+ floatVal = Double.valueOf(new String(buf, index, bp - index)).doubleValue();
+ if (floatVal > limit)
+ syntaxError("floating point number too large");
+ } catch (NumberFormatException e) {
+ syntaxError("malformed floating point number");
+ }
+ }
+
+ /** intVal := buf[index..index+len-1], converted to an integer number.
+ * base = the base of the number; one of 8, 10, 16.
+ * max = the maximal number before an overflow.
+ */
+ protected void makeInt (int index, int len, int base, long max) {
+ intVal = 0;
+ int divider = (base == 10 ? 1 : 2);
+ for (int i = 0; i < len; i++) {
+ int d = SourceRepresentation.digit2int(buf[index + i], base);
+ if (d < 0) {
+ syntaxError("malformed integer number");
+ return;
+ }
+ if (intVal < 0 ||
+ max / (base / divider) < intVal ||
+ max - (d / divider) < (intVal * (base / divider) - 0)) {
+ syntaxError("integer number too large");
+ return;
+ }
+ intVal = intVal * base + d;
+ }
+ }
+
+ /** read a number,
+ * and convert buf[index..], setting either intVal or floatVal.
+ * base = the base of the number; one of 8, 10, 16.
+ */
+ protected void getNumber(int index, int base) {
+ while (SourceRepresentation.digit2int(ch, base == 8 ? 10 : base) >= 0) {
+ nextch();
+ }
+ if (base <= 10 && ch == '.') {
+ nextch();
+ if ((ch >= '0') && (ch <= '9'))
+ getFraction(index);
+ else {
+ ch = buf[--bp]; ccol--;
+ makeInt(index, bp - index, base, Integer.MAX_VALUE);
+ intVal = (int)intVal;
+ token = INTLIT;
+ }
+ } else if (base <= 10 &&
+ (ch == 'e' || ch == 'E' ||
+ ch == 'f' || ch == 'F' ||
+ ch == 'd' || ch == 'D'))
+ getFraction(index);
+ else {
+ if (ch == 'l' || ch == 'L') {
+ makeInt(index, bp - index, base, Long.MAX_VALUE);
+ nextch();
+ token = LONGLIT;
+ } else {
+ makeInt(index, bp - index, base, Integer.MAX_VALUE);
+ intVal = (int)intVal;
+ token = INTLIT;
+ }
+ }
+ }
+
+ public int name2token(Name name) {
+ if (name.index <= maxKey)
+ return key[name.index];
+ else
+ return IDENTIFIER;
+ }
+
+ public String token2string(int token) {
+ switch (token) {
+ case IDENTIFIER:
+ return "identifier";
+ case CHARLIT:
+ return "character literal";
+ case INTLIT:
+ return "integer literal";
+ case LONGLIT:
+ return "long literal";
+ case FLOATLIT:
+ return "float literal";
+ case DOUBLELIT:
+ return "double literal";
+ case STRINGLIT:
+ return "string literal";
+ case LPAREN:
+ return "'('";
+ case RPAREN:
+ return "')'";
+ case LBRACE:
+ return "'{'";
+ case RBRACE:
+ return "'}'";
+ case LBRACKET:
+ return "'['";
+ case RBRACKET:
+ return "']'";
+ case EOF:
+ return "eof";
+ case ERROR:
+ return "something";
+ case SEMI:
+ return "';'";
+ case COMMA:
+ return "','";
+ default:
+ try {
+ return "'" + tokenName[token].toString() + "'";
+ } catch (ArrayIndexOutOfBoundsException e) {
+ return "'<" + token + ">'";
+ }
+ }
+ }
+
+ public String toString() {
+ switch (token) {
+ case IDENTIFIER:
+ return "id(" + name + ")";
+ case CHARLIT:
+ return "char(" + intVal + ")";
+ case INTLIT:
+ return "int(" + intVal + ")";
+ case LONGLIT:
+ return "long(" + intVal + ")";
+ case FLOATLIT:
+ return "float(" + floatVal + ")";
+ case DOUBLELIT:
+ return "double(" + floatVal + ")";
+ case STRINGLIT:
+ return "string(" + name + ")";
+ case SEMI:
+ return ";";
+ case COMMA:
+ return ",";
+ default:
+ return token2string(token);
+ }
+ }
+
+ protected void enterKeyword(String s, int tokenId) {
+ while (tokenId > tokenName.length) {
+ Name[] newTokName = new Name[tokenName.length * 2];
+ System.arraycopy(tokenName, 0, newTokName, 0, newTokName.length);
+ tokenName = newTokName;
+ }
+ Name n = Name.fromString(s);
+ tokenName[tokenId] = n;
+ if (n.index > maxKey)
+ maxKey = n.index;
+ if (tokenId >= numToken)
+ numToken = tokenId + 1;
+ }
+
+ protected void init() {
+ initKeywords();
+ key = new byte[maxKey+1];
+ for (int i = 0; i <= maxKey; i++)
+ key[i] = IDENTIFIER;
+ for (byte j = 0; j < numToken; j++)
+ if (tokenName[j] != null)
+ key[tokenName[j].index] = j;
+ }
+
+ protected void initKeywords() {
+ enterKeyword("if", IF);
+ enterKeyword("for", FOR);
+ enterKeyword("else", ELSE);
+ enterKeyword("this", THIS);
+ enterKeyword("null", NULL);
+ enterKeyword("new", NEW);
+ enterKeyword("with", WITH);
+ enterKeyword("super", SUPER);
+ enterKeyword("case", CASE);
+ enterKeyword("val", VAL);
+ enterKeyword("abstract", ABSTRACT);
+ enterKeyword("final", FINAL);
+ enterKeyword("private", PRIVATE);
+ enterKeyword("protected", PROTECTED);
+ enterKeyword("qualified", QUALIFIED);
+ enterKeyword("override", OVERRIDE);
+ enterKeyword("var", VAR);
+ enterKeyword("def", DEF);
+ enterKeyword("type", TYPE);
+ enterKeyword("extends", EXTENDS);
+ enterKeyword("let", LET);
+ enterKeyword("module", MODULE);
+ enterKeyword("class",CLASS);
+ enterKeyword("constr",CONSTR);
+ enterKeyword("import", IMPORT);
+ enterKeyword("package", PACKAGE);
+ enterKeyword(".", DOT);
+ enterKeyword("_", USCORE);
+ enterKeyword(":", COLON);
+ enterKeyword("=", EQUALS);
+ enterKeyword("=>", ARROW);
+ enterKeyword("<-", LARROW);
+ enterKeyword("<:", SUBTYPE);
+ enterKeyword("yield", YIELD);
+ enterKeyword("do", DO);
+ enterKeyword("@", AT);
+ enterKeyword("#", HASH);
+ enterKeyword("trait", TRAIT);
+ enterKeyword("as", AS);
+ enterKeyword("is", IS);
+ }
+}
+
+