path: root/sources/scalac/ast/parser/Scanner.java



/*     ____ ____  ____ ____  ______                                     *\
**    / __// __ \/ __// __ \/ ____/    SOcos COmpiles Scala             **
**  __\_ \/ /_/ / /__/ /_/ /\_ \       (c) 2002, LAMP/EPFL              **
** /_____/\____/\___/\____/____/                                        **
**                                                                      **
** $Id$
\*                                                                      */

package scalac.ast.parser;

import scalac.*;
import scalac.util.Name;
import scalac.util.Position;

/** A scanner for the programming language Scala.
 *
 *  @author     Matthias Zenger, Martin Odersky
 *  @version    1.0
 */
public class Scanner extends TokenData {

    /** layout & character constants
     */
    public int tabinc = 8;
    public final static byte LF = 0xA;
    protected final static byte FF = 0xC;
    protected final static byte CR = 0xD;
    protected final static byte SU = Sourcefile.SU;

    /** the names of all tokens
     */
    public Name[]       tokenName = new Name[128];
    public int          numToken = 0;

    /** keyword array; maps from name indices to tokens
     */
    protected byte[]    key;
    protected int       maxKey = 0;

    /** we need one token lookahead
     */
    protected TokenData next = new TokenData();
    protected TokenData prev = new TokenData();

    /** the first character position after the previous token
     */
    public int          lastpos = 0;

    /** the last error position
     */
    public int          errpos = -1;

    /** the input buffer:
     */
    protected byte[]    buf;
    protected int       bp;

    /** the current character
     */
    protected byte      ch;

    /** the line and column position of the current character
     */
    public int          cline;
    public int          ccol;

    /** the current sourcefile
     */
    public Sourcefile   currentSource;

    /** a buffer for character and string literals
     */
    protected byte[]    lit = new byte[64];
    protected int       litlen;

    /** the compilation unit
     */
    public Unit unit;


    /** Construct a scanner from a file input stream.
     */
    public Scanner(Unit unit) {
        this.unit = unit;
        buf = (currentSource = unit.source).getBuffer();
        cline = 1;
        bp = -1;
        ccol = 0;
	nextch();
        token = EMPTY;
        init();
        nextToken();
    }

    private void nextch() {
	ch = buf[++bp]; ccol++;
    }

    /** read next token and return last position
     */
    public int skipToken() {
        int p = pos;
        nextToken();
        return p;
    }

    public void nextToken() {
	if (token == RBRACE) {
	    int prevpos = pos;
	    fetchToken();
	    switch (token) {
	    case ELSE:   case EXTENDS:  case WITH:
	    case YIELD:  case DO:
	    case COMMA:  case SEMI:     case DOT:
	    case COLON:  case EQUALS:   case ARROW:
            case LARROW: case SUBTYPE:
            case HASH:   case AS:       case IS:
	    case RPAREN: case RBRACKET: case RBRACE:
		break;
	    default:
		if (token == EOF ||
		    ((pos >>> Position.LINESHIFT) >
		     (prevpos >>> Position.LINESHIFT))) {
		    next.copyFrom(this);
		    this.token = SEMI;
		    this.pos = prevpos;
		}
	    }
	} else {
	    if (next.token == EMPTY) {
		fetchToken();
	    } else {
		copyFrom(next);
		next.token = EMPTY;
	    }
	    if (token == CASE) {
		prev.copyFrom(this);
		fetchToken();
		if (token == CLASS) {
		    token = CASECLASS;
		} else {
		    next.copyFrom(this);
		    this.copyFrom(prev);
		}
	    } else if (token == SEMI) {
		prev.copyFrom(this);
		fetchToken();
		if (token != ELSE) {
		    next.copyFrom(this);
		    this.copyFrom(prev);
		}
	    }
	}
	//System.out.println("<" + token2string(token) + ">");//DEBUG
    }

    /** read next token
     */
    public void fetchToken() {
        if (token == EOF) return;
        lastpos = Position.encode(cline, ccol, currentSource.id);
	int index = bp;
	while(true) {
	    switch (ch) {
	    case ' ':
		nextch();
		break;
	    case '\t':
		ccol = ((ccol - 1) / tabinc * tabinc) + tabinc;
		nextch();
		break;
	    case CR:
		cline++;
		ccol = 0;
		nextch();
		if (ch == LF) {
		    ccol = 0;
		    nextch();
		}
		break;
	    case LF:
	    case FF:
		cline++;
		ccol = 0;
		nextch();
		break;
	    default:
		pos = Position.encode(cline, ccol, currentSource.id);
		index = bp;
		switch (ch) {
		case 'A': case 'B': case 'C': case 'D': case 'E':
		case 'F': case 'G': case 'H': case 'I': case 'J':
		case 'K': case 'L': case 'M': case 'N': case 'O':
		case 'P': case 'Q': case 'R': case 'S': case 'T':
		case 'U': case 'V': case 'W': case 'X': case 'Y':
		case 'Z': case '$':
		case 'a': case 'b': case 'c': case 'd': case 'e':
		case 'f': case 'g': case 'h': case 'i': case 'j':
		case 'k': case 'l': case 'm': case 'n': case 'o':
		case 'p': case 'q': case 'r': case 's': case 't':
		case 'u': case 'v': case 'w': case 'x': case 'y':
		case 'z':
		    nextch();
		    getIdentRest(index);
		    return;
		case '~': case '!': case '@': case '#': case '%':
		case '^': case '*': case '+': case '-': case '<':
		case '>': case '?': case ':':
		case '=': case '&': case '|':
		    nextch();
		    getOperatorRest(index);
		    return;
		case '/':
		    nextch();
		    if (!skipComment()) {
			getOperatorRest(index);
			return;
		    }
		    break;
		case '_':
		    nextch();
		    getIdentOrOperatorRest(index);
		    return;
		case '0':
		    nextch();
		    if (ch == 'x' || ch == 'X') {
			nextch();
			getNumber(index + 2, 16);
		    } else
			getNumber(index, 8);
		    return;
		case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
		    getNumber(index, 10);
		    return;
		case '\"':
		    nextch();
		    litlen = 0;
		    while (ch != '\"' && ch != CR && ch != LF && ch != SU)
			getlitch();
		    if (ch == '\"') {
			token = STRINGLIT;
			name = Name.fromSource(lit, 0, litlen);
			nextch();
		    }
		    else
			syntaxError("unclosed character literal");
		    return;
		case '\'':
		    nextch();
		    litlen = 0;
		    switch (ch) {
		    case 'A': case 'B': case 'C': case 'D': case 'E':
		    case 'F': case 'G': case 'H': case 'I': case 'J':
		    case 'K': case 'L': case 'M': case 'N': case 'O':
		    case 'P': case 'Q': case 'R': case 'S': case 'T':
		    case 'U': case 'V': case 'W': case 'X': case 'Y':
		    case 'Z': case '$':
		    case 'a': case 'b': case 'c': case 'd': case 'e':
		    case 'f': case 'g': case 'h': case 'i': case 'j':
		    case 'k': case 'l': case 'm': case 'n': case 'o':
		    case 'p': case 'q': case 'r': case 's': case 't':
		    case 'u': case 'v': case 'w': case 'x': case 'y':
		    case 'z':
			index = bp;
			putch(ch);
			nextch();
			if (ch != '\'') {
			    getIdentRest(index);
			    token = SYMBOLLIT;
			    return;
			}
			break;
		    default:
			getlitch();
		    }
		    if (ch == '\'') {
			nextch();
			token = CHARLIT;
			byte[] ascii = new byte[litlen * 2];
			int alen = SourceRepresentation.source2ascii(lit, 0, litlen, ascii);
			if (alen > 0)
			    intVal = SourceRepresentation.ascii2string(ascii, 0, alen).charAt(0);
			else
			    intVal = 0;
		    } else
			syntaxError("unclosed character literal");
		    return;
		case '.':
		    nextch();
		    if (('0' <= ch) && (ch <= '9')) getFraction(index);
		    else token = DOT;
		    return;
		case ';':
		    nextch(); token = SEMI;
		    return;
		case ',':
		    nextch(); token = COMMA;
		    return;
		case '(':
		    nextch(); token = LPAREN;
		    return;
		case '{':
		    nextch(); token = LBRACE;
		    return;
		case ')':
		    nextch(); token = RPAREN;
		    return;
		case '}':
		    nextch(); token = RBRACE;
		    return;
		case '[':
		    nextch(); token = LBRACKET;
		    return;
		case ']':
		    nextch(); token = RBRACKET;
		    return;
		case SU:
		    token = EOF;
		    currentSource.lines = cline;
		    return;
		default:
		    nextch();
		    syntaxError("illegal character");
		    return;
		}
	    }
        }
    }

    private boolean skipComment() {
	if (ch == '/') {
	    do {
		nextch();
	    } while ((ch != CR) && (ch != LF) && (ch != SU));
	    return true;
	} else if (ch == '*') {
	    int openComments = 1;
	    while (openComments > 0) {
		do {
		    do {
			if (ch == CR) {
			    cline++;
			    ccol = 0;
			    nextch();
			    if (ch == LF) {
				ccol = 0;
				nextch();
			    }
			} else if (ch == LF) {
			    cline++;
			    ccol = 0;
			    nextch();
			}
			else if (ch == '\t') {
			    ccol = ((ccol - 1) / tabinc * tabinc) + tabinc;
			    nextch();
			} else if (ch == '/') {
			    nextch();
			    if (ch == '*') {
				nextch();
				openComments++;
			    }
			} else {
			    nextch();
			}
		    } while ((ch != '*') && (ch != SU));
		    while (ch == '*') {
			nextch();
		    }
		} while (ch != '/' && ch != SU);
		if (ch == '/') {
		    nextch();
		    openComments--;
		} else {
		    syntaxError("unclosed comment");
		    return true;
		}
	    }
	    return true;
	} else {
	    return false;
	}
    }

    private void getIdentRest(int index) {
	while (true) {
	    switch (ch) {
	    case 'A': case 'B': case 'C': case 'D': case 'E':
	    case 'F': case 'G': case 'H': case 'I': case 'J':
	    case 'K': case 'L': case 'M': case 'N': case 'O':
	    case 'P': case 'Q': case 'R': case 'S': case 'T':
	    case 'U': case 'V': case 'W': case 'X': case 'Y':
	    case 'Z': case '$':
	    case 'a': case 'b': case 'c': case 'd': case 'e':
	    case 'f': case 'g': case 'h': case 'i': case 'j':
	    case 'k': case 'l': case 'm': case 'n': case 'o':
	    case 'p': case 'q': case 'r': case 's': case 't':
	    case 'u': case 'v': case 'w': case 'x': case 'y':
	    case 'z':
	    case '0': case '1': case '2': case '3': case '4':
	    case '5': case '6': case '7': case '8': case '9':
		nextch();
		break;
	    case '_':
		nextch();
		getIdentOrOperatorRest(index);
		return;
	    default:
		treatIdent(index, bp);
		return;
	    }
	}
    }

    private void getOperatorRest(int index) {
	while (true) {
	    switch (ch) {
	    case '~': case '!': case '@': case '#': case '%':
	    case '^': case '*': case '+': case '-': case '<':
	    case '>': case '?': case ':':
	    case '=': case '&': case '|':
		nextch();
		break;
	    case '/':
		int lastbp = bp;
		nextch();
		if (skipComment()) {
		    treatIdent(index, lastbp);
		    return;
		} else {
		    break;
		}
	    case '_':
		nextch();
		getIdentOrOperatorRest(index);
		return;
	    default:
		treatIdent(index, bp);
		return;
	    }
	}
    }

    private void getIdentOrOperatorRest(int index) {
	switch (ch) {
	case 'A': case 'B': case 'C': case 'D': case 'E':
	case 'F': case 'G': case 'H': case 'I': case 'J':
	case 'K': case 'L': case 'M': case 'N': case 'O':
	case 'P': case 'Q': case 'R': case 'S': case 'T':
	case 'U': case 'V': case 'W': case 'X': case 'Y':
	case 'Z': case '$':
	case 'a': case 'b': case 'c': case 'd': case 'e':
	case 'f': case 'g': case 'h': case 'i': case 'j':
	case 'k': case 'l': case 'm': case 'n': case 'o':
	case 'p': case 'q': case 'r': case 's': case 't':
	case 'u': case 'v': case 'w': case 'x': case 'y':
	case 'z':
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	    getIdentRest(index);
	    return;
	case '~': case '!': case '@': case '#': case '%':
	case '^': case '*': case '+': case '-': case '<':
	case '>': case '?': case ':':
	case '=': case '&': case '|':
	case '/':
	    getOperatorRest(index);
	    return;
	case '_':
	    nextch();
	    getIdentOrOperatorRest(index);
	    return;
	default:
	    treatIdent(index, bp);
	    return;
	}
    }

    void treatIdent(int start, int end) {
	name = Name.fromAscii(buf, start, end - start);
	if (name.index <= maxKey)
	    token = key[name.index];
	else
	    token = IDENTIFIER;
    }

    /** generate an error at the given position
     */
    void syntaxError(int pos, String msg) {
        unit.error(pos, msg);
        token = ERROR;
        errpos = pos;
    }

    /** generate an error at the current token position
     */
    void syntaxError(String msg) {
        syntaxError(pos, msg);
    }

    /** append characteter to "lit" buffer
     */
    protected void putch(byte c) {
        if (litlen == lit.length) {
            byte[] newlit = new byte[lit.length * 2];
            System.arraycopy(lit, 0, newlit, 0, lit.length);
            lit = newlit;
        }
        lit[litlen++] = c;
    }

    /** return true iff next 6 characters are a valid unicode sequence:
     */
    protected boolean isUnicode() {
        return
            (bp + 6) < buf.length &&
            (buf[bp] == '\\') &&
            (buf[bp+1] == 'u') &&
            (SourceRepresentation.digit2int(buf[bp+2], 16) >= 0) &&
            (SourceRepresentation.digit2int(buf[bp+3], 16) >= 0) &&
            (SourceRepresentation.digit2int(buf[bp+4], 16) >= 0) &&
            (SourceRepresentation.digit2int(buf[bp+5], 16) >= 0);
    }

    /** read next character in character or string literal:
     */
    protected void getlitch() {
        if (ch == '\\') {
            if (isUnicode()) {
                putch(ch); nextch();
                putch(ch); nextch();
                putch(ch); nextch();
                putch(ch); nextch();
                putch(ch); nextch();
                putch(ch); nextch();
            } else {
                nextch();
                if ('0' <= ch && ch <= '7') {
                    byte leadch = ch;
                    int oct = SourceRepresentation.digit2int(ch, 8);
                    nextch();
                    if ('0' <= ch && ch <= '7') {
                        oct = oct * 8 + SourceRepresentation.digit2int(ch, 8);
                        nextch();
                        if (leadch <= '3' && '0' <= ch && ch <= '7') {
                            oct = oct * 8 + SourceRepresentation.digit2int(ch, 8);
                            nextch();
                        }
                    }
                    putch((byte)oct);
                } else if (ch != SU) {
                    switch (ch) {
                        case 'b': case 't': case 'n':
                        case 'f': case 'r': case '\"':
                        case '\'': case '\\':
                            putch((byte)'\\');
                            putch(ch);
                            break;
                        default:
                            syntaxError(Position.encode(cline, ccol, currentSource.id) - 1, "invalid escape character");
                            putch(ch);
                    }
                    nextch();
                }
            }
        } else if (ch != SU) {
            putch(ch);
            nextch();
        }
    }

    /** read fractional part of floating point number;
     *  Then floatVal := buf[index..], converted to a floating point number.
     */
    protected void getFraction(int index) {
        while (SourceRepresentation.digit2int(ch, 10) >= 0) {
            nextch();
        }
        token = DOUBLELIT;
        if ((ch == 'e') || (ch == 'E')) {
            nextch();
            if ((ch == '+') || (ch == '-')) {
                byte sign = ch;
                nextch();
                if (('0' > ch) || (ch > '9')) {
                    ch = sign;
                    bp--;
                    ccol--;
                }
            }
            while (SourceRepresentation.digit2int(ch, 10) >= 0) {
                nextch();
            }
        }
        double limit = Double.MAX_VALUE;
        if ((ch == 'd') || (ch == 'D')) {
            nextch();
        } else if ((ch == 'f') || (ch == 'F')) {
            token = FLOATLIT;
            limit = Float.MAX_VALUE;
            nextch();
        }
        try {
            floatVal = Double.valueOf(new String(buf, index, bp - index)).doubleValue();
            if (floatVal > limit)
                syntaxError("floating point number too large");
        } catch (NumberFormatException e) {
            syntaxError("malformed floating point number");
        }
    }

    /** intVal := buf[index..index+len-1], converted to an integer number.
     *  base = the base of the number; one of 8, 10, 16.
     *  max  = the maximal number before an overflow.
     */
    protected void makeInt (int index, int len, int base, long max) {
        intVal = 0;
        int divider = (base == 10 ? 1 : 2);
        for (int i = 0; i < len; i++) {
            int d = SourceRepresentation.digit2int(buf[index + i], base);
            if (d < 0) {
                syntaxError("malformed integer number");
                return;
            }
            if (intVal < 0 ||
                max / (base / divider) < intVal ||
                max - (d / divider) < (intVal * (base / divider) - 0)) {
                syntaxError("integer number too large");
                return;
            }
            intVal = intVal * base + d;
        }
    }

    /** read a number,
     *  and convert buf[index..], setting either intVal or floatVal.
     *  base = the base of the number; one of 8, 10, 16.
     */
    protected void getNumber(int index, int base) {
        while (SourceRepresentation.digit2int(ch, base == 8 ? 10 : base) >= 0) {
            nextch();
        }
        if (base <= 10 && ch == '.') {
            nextch();
            if ((ch >= '0') && (ch <= '9'))
                getFraction(index);
            else {
                ch = buf[--bp]; ccol--;
                makeInt(index, bp - index, base, Integer.MAX_VALUE);
                intVal = (int)intVal;
                token = INTLIT;
            }
        } else if (base <= 10 &&
           (ch == 'e' || ch == 'E' ||
            ch == 'f' || ch == 'F' ||
            ch == 'd' || ch == 'D'))
            getFraction(index);
        else {
            if (ch == 'l' || ch == 'L') {
                makeInt(index, bp - index, base, Long.MAX_VALUE);
                nextch();
                token = LONGLIT;
            } else {
                makeInt(index, bp - index, base, Integer.MAX_VALUE);
                intVal = (int)intVal;
                token = INTLIT;
            }
        }
    }

    public int name2token(Name name) {
        if (name.index <= maxKey)
            return key[name.index];
        else
            return IDENTIFIER;
    }

    public String token2string(int token) {
        switch (token) {
            case IDENTIFIER:
                return "identifier";
            case CHARLIT:
                return "character literal";
            case INTLIT:
                return "integer literal";
            case LONGLIT:
                return "long literal";
            case FLOATLIT:
                return "float literal";
            case DOUBLELIT:
                return "double literal";
            case STRINGLIT:
                return "string literal";
            case SYMBOLLIT:
                return "symbol literal";
            case LPAREN:
                return "'('";
            case RPAREN:
                return "')'";
            case LBRACE:
                return "'{'";
            case RBRACE:
                return "'}'";
            case LBRACKET:
                return "'['";
            case RBRACKET:
                return "']'";
            case EOF:
                return "eof";
            case ERROR:
                return "something";
            case SEMI:
                return "';'";
            case COMMA:
                return "','";
	    case CASECLASS:
		return "case class";
            default:
                try {
                    return "'" + tokenName[token].toString() + "'";
                } catch (ArrayIndexOutOfBoundsException e) {
                    return "'<" + token + ">'";
                } catch (NullPointerException e) {
                    return "'<(" + token + ")>'";
                }
        }
    }

    public String toString() {
        switch (token) {
            case IDENTIFIER:
                return "id(" + name + ")";
            case CHARLIT:
                return "char(" + intVal + ")";
            case INTLIT:
                return "int(" + intVal + ")";
            case LONGLIT:
                return "long(" + intVal + ")";
            case FLOATLIT:
                return "float(" + floatVal + ")";
            case DOUBLELIT:
                return "double(" + floatVal + ")";
            case STRINGLIT:
                return "string(" + name + ")";
            case SEMI:
                return ";";
            case COMMA:
                return ",";
            default:
                return token2string(token);
        }
    }

    protected void enterKeyword(String s, int tokenId) {
        while (tokenId > tokenName.length) {
            Name[]  newTokName = new Name[tokenName.length * 2];
            System.arraycopy(tokenName, 0, newTokName, 0, newTokName.length);
            tokenName = newTokName;
        }
        Name n = Name.fromString(s);
        tokenName[tokenId] = n;
        if (n.index > maxKey)
            maxKey = n.index;
        if (tokenId >= numToken)
            numToken = tokenId + 1;
    }

    protected void init() {
        initKeywords();
        key = new byte[maxKey+1];
        for (int i = 0; i <= maxKey; i++)
            key[i] = IDENTIFIER;
        for (byte j = 0; j < numToken; j++)
            if (tokenName[j] != null)
                key[tokenName[j].index] = j;
    }

    protected void initKeywords() {
        enterKeyword("if", IF);
        enterKeyword("for", FOR);
        enterKeyword("else", ELSE);
        enterKeyword("this", THIS);
        enterKeyword("null", NULL);
        enterKeyword("new", NEW);
        enterKeyword("with", WITH);
        enterKeyword("super", SUPER);
        enterKeyword("case", CASE);
        enterKeyword("val", VAL);
        enterKeyword("abstract", ABSTRACT);
        enterKeyword("final", FINAL);
        enterKeyword("private", PRIVATE);
        enterKeyword("protected", PROTECTED);
        enterKeyword("override", OVERRIDE);
        enterKeyword("var", VAR);
        enterKeyword("def", DEF);
        enterKeyword("type", TYPE);
        enterKeyword("extends", EXTENDS);
        enterKeyword("module", MODULE);
        enterKeyword("class",CLASS);
        enterKeyword("constr",CONSTR);
        enterKeyword("import", IMPORT);
        enterKeyword("package", PACKAGE);
	enterKeyword("true", TRUE);
	enterKeyword("false", FALSE);
        enterKeyword(".", DOT);
        enterKeyword("_", USCORE);
        enterKeyword(":", COLON);
        enterKeyword("=", EQUALS);
        enterKeyword("=>", ARROW);
        enterKeyword("<-", LARROW);
        enterKeyword("<:", SUBTYPE);
        enterKeyword("yield", YIELD);
        enterKeyword("do", DO);
        enterKeyword("#", HASH);
        enterKeyword("trait", TRAIT);
	enterKeyword("as", AS);
	enterKeyword("is", IS);
    }
}
/*     ____ ____  ____ ____  ______                                     *\
**    / __// __ \/ __// __ \/ ____/    SOcos COmpiles Scala             **
**  __\_ \/ /_/ / /__/ /_/ /\_ \       (c) 2002, LAMP/EPFL              **
** /_____/\____/\___/\____/____/                                        **
**                                                                      **
** $Id$
\*                                                                      */

package scalac.ast.parser;

import scalac.*;
import scalac.util.Name;
import scalac.util.Position;

/** A scanner for the programming language Scala.
 *
 *  @author     Matthias Zenger, Martin Odersky
 *  @version    1.0
 */
public class Scanner extends TokenData {

    /** layout & character constants
     */
    public int tabinc = 8;
    public final static byte LF = 0xA;
    protected final static byte FF = 0xC;
    protected final static byte CR = 0xD;
    protected final static byte SU = Sourcefile.SU;

    /** the names of all tokens
     */
    public Name[]       tokenName = new Name[128];
    public int          numToken = 0;

    /** keyword array; maps from name indices to tokens
     */
    protected byte[]    key;
    protected int       maxKey = 0;

    /** we need one token lookahead
     */
    protected TokenData next = new TokenData();
    protected TokenData prev = new TokenData();

    /** the first character position after the previous token
     */
    public int          lastpos = 0;

    /** the last error position
     */
    public int          errpos = -1;

    /** the input buffer:
     */
    protected byte[]    buf;
    protected int       bp;

    /** the current character
     */
    protected byte      ch;

    /** the line and column position of the current character
     */
    public int          cline;
    public int          ccol;

    /** the current sourcefile
     */
    public Sourcefile   currentSource;

    /** a buffer for character and string literals
     */
    protected byte[]    lit = new byte[64];
    protected int       litlen;

    /** the compilation unit
     */
    public Unit unit;


    /** Construct a scanner from a file input stream.
     */
    public Scanner(Unit unit) {
        this.unit = unit;
        buf = (currentSource = unit.source).getBuffer();
        cline = 1;
        bp = -1;
        ccol = 0;
	nextch();
        token = EMPTY;
        init();
        nextToken();
    }

    private void nextch() {
	ch = buf[++bp]; ccol++;
    }

    /** read next token and return last position
     */
    public int skipToken() {
        int p = pos;
        nextToken();
        return p;
    }

    public void nextToken() {
	if (token == RBRACE) {
	    int prevpos = pos;
	    fetchToken();
	    switch (token) {
	    case ELSE:   case EXTENDS:  case WITH:
	    case YIELD:  case DO:
	    case COMMA:  case SEMI:     case DOT:
	    case COLON:  case EQUALS:   case ARROW:
            case LARROW: case SUBTYPE:
            case HASH:   case AS:       case IS:
	    case RPAREN: case RBRACKET: case RBRACE:
		break;
	    default:
		if (token == EOF ||
		    ((pos >>> Position.LINESHIFT) >
		     (prevpos >>> Position.LINESHIFT))) {
		    next.copyFrom(this);
		    this.token = SEMI;
		    this.pos = prevpos;
		}
	    }
	} else {
	    if (next.token == EMPTY) {
		fetchToken();
	    } else {
		copyFrom(next);
		next.token = EMPTY;
	    }
	    if (token == CASE) {
		prev.copyFrom(this);
		fetchToken();
		if (token == CLASS) {
		    token = CASECLASS;
		} else {
		    next.copyFrom(this);
		    this.copyFrom(prev);
		}
	    } else if (token == SEMI) {
		prev.copyFrom(this);
		fetchToken();
		if (token != ELSE) {
		    next.copyFrom(this);
		    this.copyFrom(prev);
		}
	    }
	}
	//System.out.println("<" + token2string(token) + ">");//DEBUG
    }

    /** read next token
     */
    public void fetchToken() {
        if (token == EOF) return;
        lastpos = Position.encode(cline, ccol, currentSource.id);
	int index = bp;
	while(true) {
	    switch (ch) {
	    case ' ':
		nextch();
		break;
	    case '\t':
		ccol = ((ccol - 1) / tabinc * tabinc) + tabinc;
		nextch();
		break;
	    case CR:
		cline++;
		ccol = 0;
		nextch();
		if (ch == LF) {
		    ccol = 0;
		    nextch();
		}
		break;
	    case LF:
	    case FF:
		cline++;
		ccol = 0;
		nextch();
		break;
	    default:
		pos = Position.encode(cline, ccol, currentSource.id);
		index = bp;
		switch (ch) {
		case 'A': case 'B': case 'C': case 'D': case 'E':
		case 'F': case 'G': case 'H': case 'I': case 'J':
		case 'K': case 'L': case 'M': case 'N': case 'O':
		case 'P': case 'Q': case 'R': case 'S': case 'T':
		case 'U': case 'V': case 'W': case 'X': case 'Y':
		case 'Z': case '$':
		case 'a': case 'b': case 'c': case 'd': case 'e':
		case 'f': case 'g': case 'h': case 'i': case 'j':
		case 'k': case 'l': case 'm': case 'n': case 'o':
		case 'p': case 'q': case 'r': case 's': case 't':
		case 'u': case 'v': case 'w': case 'x': case 'y':
		case 'z':
		    nextch();
		    getIdentRest(index);
		    return;
		case '~': case '!': case '@': case '#': case '%':
		case '^': case '*': case '+': case '-': case '<':
		case '>': case '?': case ':':
		case '=': case '&': case '|':
		    nextch();
		    getOperatorRest(index);
		    return;
		case '/':
		    nextch();
		    if (!skipComment()) {
			getOperatorRest(index);
			return;
		    }
		    break;
		case '_':
		    nextch();
		    getIdentOrOperatorRest(index);
		    return;
		case '0':
		    nextch();
		    if (ch == 'x' || ch == 'X') {
			nextch();
			getNumber(index + 2, 16);
		    } else
			getNumber(index, 8);
		    return;
		case '1': case '2': case '3': case '4':
		case '5': case '6': case '7': case '8': case '9':
		    getNumber(index, 10);
		    return;
		case '\"':
		    nextch();
		    litlen = 0;
		    while (ch != '\"' && ch != CR && ch != LF && ch != SU)
			getlitch();
		    if (ch == '\"') {
			token = STRINGLIT;
			name = Name.fromSource(lit, 0, litlen);
			nextch();
		    }
		    else
			syntaxError("unclosed character literal");
		    return;
		case '\'':
		    nextch();
		    litlen = 0;
		    switch (ch) {
		    case 'A': case 'B': case 'C': case 'D': case 'E':
		    case 'F': case 'G': case 'H': case 'I': case 'J':
		    case 'K': case 'L': case 'M': case 'N': case 'O':
		    case 'P': case 'Q': case 'R': case 'S': case 'T':
		    case 'U': case 'V': case 'W': case 'X': case 'Y':
		    case 'Z': case '$':
		    case 'a': case 'b': case 'c': case 'd': case 'e':
		    case 'f': case 'g': case 'h': case 'i': case 'j':
		    case 'k': case 'l': case 'm': case 'n': case 'o':
		    case 'p': case 'q': case 'r': case 's': case 't':
		    case 'u': case 'v': case 'w': case 'x': case 'y':
		    case 'z':
			index = bp;
			putch(ch);
			nextch();
			if (ch != '\'') {
			    getIdentRest(index);
			    token = SYMBOLLIT;
			    return;
			}
			break;
		    default:
			getlitch();
		    }
		    if (ch == '\'') {
			nextch();
			token = CHARLIT;
			byte[] ascii = new byte[litlen * 2];
			int alen = SourceRepresentation.source2ascii(lit, 0, litlen, ascii);
			if (alen > 0)
			    intVal = SourceRepresentation.ascii2string(ascii, 0, alen).charAt(0);
			else
			    intVal = 0;
		    } else
			syntaxError("unclosed character literal");
		    return;
		case '.':
		    nextch();
		    if (('0' <= ch) && (ch <= '9')) getFraction(index);
		    else token = DOT;
		    return;
		case ';':
		    nextch(); token = SEMI;
		    return;
		case ',':
		    nextch(); token = COMMA;
		    return;
		case '(':
		    nextch(); token = LPAREN;
		    return;
		case '{':
		    nextch(); token = LBRACE;
		    return;
		case ')':
		    nextch(); token = RPAREN;
		    return;
		case '}':
		    nextch(); token = RBRACE;
		    return;
		case '[':
		    nextch(); token = LBRACKET;
		    return;
		case ']':
		    nextch(); token = RBRACKET;
		    return;
		case SU:
		    token = EOF;
		    currentSource.lines = cline;
		    return;
		default:
		    nextch();
		    syntaxError("illegal character");
		    return;
		}
	    }
        }
    }

    private boolean skipComment() {
	if (ch == '/') {
	    do {
		nextch();
	    } while ((ch != CR) && (ch != LF) && (ch != SU));
	    return true;
	} else if (ch == '*') {
	    int openComments = 1;
	    while (openComments > 0) {
		do {
		    do {
			if (ch == CR) {
			    cline++;
			    ccol = 0;
			    nextch();
			    if (ch == LF) {
				ccol = 0;
				nextch();
			    }
			} else if (ch == LF) {
			    cline++;
			    ccol = 0;
			    nextch();
			}
			else if (ch == '\t') {
			    ccol = ((ccol - 1) / tabinc * tabinc) + tabinc;
			    nextch();
			} else if (ch == '/') {
			    nextch();
			    if (ch == '*') {
				nextch();
				openComments++;
			    }
			} else {
			    nextch();
			}
		    } while ((ch != '*') && (ch != SU));
		    while (ch == '*') {
			nextch();
		    }
		} while (ch != '/' && ch != SU);
		if (ch == '/') {
		    nextch();
		    openComments--;
		} else {
		    syntaxError("unclosed comment");
		    return true;
		}
	    }
	    return true;
	} else {
	    return false;
	}
    }

    private void getIdentRest(int index) {
	while (true) {
	    switch (ch) {
	    case 'A': case 'B': case 'C': case 'D': case 'E':
	    case 'F': case 'G': case 'H': case 'I': case 'J':
	    case 'K': case 'L': case 'M': case 'N': case 'O':
	    case 'P': case 'Q': case 'R': case 'S': case 'T':
	    case 'U': case 'V': case 'W': case 'X': case 'Y':
	    case 'Z': case '$':
	    case 'a': case 'b': case 'c': case 'd': case 'e':
	    case 'f': case 'g': case 'h': case 'i': case 'j':
	    case 'k': case 'l': case 'm': case 'n': case 'o':
	    case 'p': case 'q': case 'r': case 's': case 't':
	    case 'u': case 'v': case 'w': case 'x': case 'y':
	    case 'z':
	    case '0': case '1': case '2': case '3': case '4':
	    case '5': case '6': case '7': case '8': case '9':
		nextch();
		break;
	    case '_':
		nextch();
		getIdentOrOperatorRest(index);
		return;
	    default:
		treatIdent(index, bp);
		return;
	    }
	}
    }

    private void getOperatorRest(int index) {
	while (true) {
	    switch (ch) {
	    case '~': case '!': case '@': case '#': case '%':
	    case '^': case '*': case '+': case '-': case '<':
	    case '>': case '?': case ':':
	    case '=': case '&': case '|':
		nextch();
		break;
	    case '/':
		int lastbp = bp;
		nextch();
		if (skipComment()) {
		    treatIdent(index, lastbp);
		    return;
		} else {
		    break;
		}
	    case '_':
		nextch();
		getIdentOrOperatorRest(index);
		return;
	    default:
		treatIdent(index, bp);
		return;
	    }
	}
    }

    private void getIdentOrOperatorRest(int index) {
	switch (ch) {
	case 'A': case 'B': case 'C': case 'D': case 'E':
	case 'F': case 'G': case 'H': case 'I': case 'J':
	case 'K': case 'L': case 'M': case 'N': case 'O':
	case 'P': case 'Q': case 'R': case 'S': case 'T':
	case 'U': case 'V': case 'W': case 'X': case 'Y':
	case 'Z': case '$':
	case 'a': case 'b': case 'c': case 'd': case 'e':
	case 'f': case 'g': case 'h': case 'i': case 'j':
	case 'k': case 'l': case 'm': case 'n': case 'o':
	case 'p': case 'q': case 'r': case 's': case 't':
	case 'u': case 'v': case 'w': case 'x': case 'y':
	case 'z':
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	    getIdentRest(index);
	    return;
	case '~': case '!': case '@': case '#': case '%':
	case '^': case '*': case '+': case '-': case '<':
	case '>': case '?': case ':':
	case '=': case '&': case '|':
	case '/':
	    getOperatorRest(index);
	    return;
	case '_':
	    nextch();
	    getIdentOrOperatorRest(index);
	    return;
	default:
	    treatIdent(index, bp);
	    return;
	}
    }

    void treatIdent(int start, int end) {
	name = Name.fromAscii(buf, start, end - start);
	if (name.index <= maxKey)
	    token = key[name.index];
	else
	    token = IDENTIFIER;
    }

    /** generate an error at the given position
     */
    void syntaxError(int pos, String msg) {
        unit.error(pos, msg);
        token = ERROR;
        errpos = pos;
    }

    /** generate an error at the current token position
     */
    void syntaxError(String msg) {
        syntaxError(pos, msg);
    }

    /** append characteter to "lit" buffer
     */
    protected void putch(byte c) {
        if (litlen == lit.length) {
            byte[] newlit = new byte[lit.length * 2];
            System.arraycopy(lit, 0, newlit, 0, lit.length);
            lit = newlit;
        }
        lit[litlen++] = c;
    }

    /** return true iff next 6 characters are a valid unicode sequence:
     */
    protected boolean isUnicode() {
        return
            (bp + 6) < buf.length &&
            (buf[bp] == '\\') &&
            (buf[bp+1] == 'u') &&
            (SourceRepresentation.digit2int(buf[bp+2], 16) >= 0) &&
            (SourceRepresentation.digit2int(buf[bp+3], 16) >= 0) &&
            (SourceRepresentation.digit2int(buf[bp+4], 16) >= 0) &&
            (SourceRepresentation.digit2int(buf[bp+5], 16) >= 0);
    }

    /** read next character in character or string literal:
     */
    protected void getlitch() {
        if (ch == '\\') {
            if (isUnicode()) {
                putch(ch); nextch();
                putch(ch); nextch();
                putch(ch); nextch();
                putch(ch); nextch();
                putch(ch); nextch();
                putch(ch); nextch();
            } else {
                nextch();
                if ('0' <= ch && ch <= '7') {
                    byte leadch = ch;
                    int oct = SourceRepresentation.digit2int(ch, 8);
                    nextch();
                    if ('0' <= ch && ch <= '7') {
                        oct = oct * 8 + SourceRepresentation.digit2int(ch, 8);
                        nextch();
                        if (leadch <= '3' && '0' <= ch && ch <= '7') {
                            oct = oct * 8 + SourceRepresentation.digit2int(ch, 8);
                            nextch();
                        }
                    }
                    putch((byte)oct);
                } else if (ch != SU) {
                    switch (ch) {
                        case 'b': case 't': case 'n':
                        case 'f': case 'r': case '\"':
                        case '\'': case '\\':
                            putch((byte)'\\');
                            putch(ch);
                            break;
                        default:
                            syntaxError(Position.encode(cline, ccol, currentSource.id) - 1, "invalid escape character");
                            putch(ch);
                    }
                    nextch();
                }
            }
        } else if (ch != SU) {
            putch(ch);
            nextch();
        }
    }

    /** read fractional part of floating point number;
     *  Then floatVal := buf[index..], converted to a floating point number.
     */
    protected void getFraction(int index) {
        while (SourceRepresentation.digit2int(ch, 10) >= 0) {
            nextch();
        }
        token = DOUBLELIT;
        if ((ch == 'e') || (ch == 'E')) {
            nextch();
            if ((ch == '+') || (ch == '-')) {
                byte sign = ch;
                nextch();
                if (('0' > ch) || (ch > '9')) {
                    ch = sign;
                    bp--;
                    ccol--;
                }
            }
            while (SourceRepresentation.digit2int(ch, 10) >= 0) {
                nextch();
            }
        }
        double limit = Double.MAX_VALUE;
        if ((ch == 'd') || (ch == 'D')) {
            nextch();
        } else if ((ch == 'f') || (ch == 'F')) {
            token = FLOATLIT;
            limit = Float.MAX_VALUE;
            nextch();
        }
        try {
            floatVal = Double.valueOf(new String(buf, index, bp - index)).doubleValue();
            if (floatVal > limit)
                syntaxError("floating point number too large");
        } catch (NumberFormatException e) {
            syntaxError("malformed floating point number");
        }
    }

    /** intVal := buf[index..index+len-1], converted to an integer number.
     *  base = the base of the number; one of 8, 10, 16.
     *  max  = the maximal number before an overflow.
     */
    protected void makeInt (int index, int len, int base, long max) {
        intVal = 0;
        int divider = (base == 10 ? 1 : 2);
        for (int i = 0; i < len; i++) {
            int d = SourceRepresentation.digit2int(buf[index + i], base);
            if (d < 0) {
                syntaxError("malformed integer number");
                return;
            }
            if (intVal < 0 ||
                max / (base / divider) < intVal ||
                max - (d / divider) < (intVal * (base / divider) - 0)) {
                syntaxError("integer number too large");
                return;
            }
            intVal = intVal * base + d;
        }
    }

    /** read a number,
     *  and convert buf[index..], setting either intVal or floatVal.
     *  base = the base of the number; one of 8, 10, 16.
     */
    protected void getNumber(int index, int base) {
        while (SourceRepresentation.digit2int(ch, base == 8 ? 10 : base) >= 0) {
            nextch();
        }
        if (base <= 10 && ch == '.') {
            nextch();
            if ((ch >= '0') && (ch <= '9'))
                getFraction(index);
            else {
                ch = buf[--bp]; ccol--;
                makeInt(index, bp - index, base, Integer.MAX_VALUE);
                intVal = (int)intVal;
                token = INTLIT;
            }
        } else if (base <= 10 &&
           (ch == 'e' || ch == 'E' ||
            ch == 'f' || ch == 'F' ||
            ch == 'd' || ch == 'D'))
            getFraction(index);
        else {
            if (ch == 'l' || ch == 'L') {
                makeInt(index, bp - index, base, Long.MAX_VALUE);
                nextch();
                token = LONGLIT;
            } else {
                makeInt(index, bp - index, base, Integer.MAX_VALUE);
                intVal = (int)intVal;
                token = INTLIT;
            }
        }
    }

    public int name2token(Name name) {
        if (name.index <= maxKey)
            return key[name.index];
        else
            return IDENTIFIER;
    }

    public String token2string(int token) {
        switch (token) {
            case IDENTIFIER:
                return "identifier";
            case CHARLIT:
                return "character literal";
            case INTLIT:
                return "integer literal";
            case LONGLIT:
                return "long literal";
            case FLOATLIT:
                return "float literal";
            case DOUBLELIT:
                return "double literal";
            case STRINGLIT:
                return "string literal";
            case SYMBOLLIT:
                return "symbol literal";
            case LPAREN:
                return "'('";
            case RPAREN:
                return "')'";
            case LBRACE:
                return "'{'";
            case RBRACE:
                return "'}'";
            case LBRACKET:
                return "'['";
            case RBRACKET:
                return "']'";
            case EOF:
                return "eof";
            case ERROR:
                return "something";
            case SEMI:
                return "';'";
            case COMMA:
                return "','";
	    case CASECLASS:
		return "case class";
            default:
                try {
                    return "'" + tokenName[token].toString() + "'";
                } catch (ArrayIndexOutOfBoundsException e) {
                    return "'<" + token + ">'";
                } catch (NullPointerException e) {
                    return "'<(" + token + ")>'";
                }
        }
    }

    public String toString() {
        switch (token) {
            case IDENTIFIER:
                return "id(" + name + ")";
            case CHARLIT:
                return "char(" + intVal + ")";
            case INTLIT:
                return "int(" + intVal + ")";
            case LONGLIT:
                return "long(" + intVal + ")";
            case FLOATLIT:
                return "float(" + floatVal + ")";
            case DOUBLELIT:
                return "double(" + floatVal + ")";
            case STRINGLIT:
                return "string(" + name + ")";
            case SEMI:
                return ";";
            case COMMA:
                return ",";
            default:
                return token2string(token);
        }
    }

    protected void enterKeyword(String s, int tokenId) {
        while (tokenId > tokenName.length) {
            Name[]  newTokName = new Name[tokenName.length * 2];
            System.arraycopy(tokenName, 0, newTokName, 0, newTokName.length);
            tokenName = newTokName;
        }
        Name n = Name.fromString(s);
        tokenName[tokenId] = n;
        if (n.index > maxKey)
            maxKey = n.index;
        if (tokenId >= numToken)
            numToken = tokenId + 1;
    }

    protected void init() {
        initKeywords();
        key = new byte[maxKey+1];
        for (int i = 0; i <= maxKey; i++)
            key[i] = IDENTIFIER;
        for (byte j = 0; j < numToken; j++)
            if (tokenName[j] != null)
                key[tokenName[j].index] = j;
    }

    protected void initKeywords() {
        enterKeyword("if", IF);
        enterKeyword("for", FOR);
        enterKeyword("else", ELSE);
        enterKeyword("this", THIS);
        enterKeyword("null", NULL);
        enterKeyword("new", NEW);
        enterKeyword("with", WITH);
        enterKeyword("super", SUPER);
        enterKeyword("case", CASE);
        enterKeyword("val", VAL);
        enterKeyword("abstract", ABSTRACT);
        enterKeyword("final", FINAL);
        enterKeyword("private", PRIVATE);
        enterKeyword("protected", PROTECTED);
        enterKeyword("override", OVERRIDE);
        enterKeyword("var", VAR);
        enterKeyword("def", DEF);
        enterKeyword("type", TYPE);
        enterKeyword("extends", EXTENDS);
        enterKeyword("module", MODULE);
        enterKeyword("class",CLASS);
        enterKeyword("constr",CONSTR);
        enterKeyword("import", IMPORT);
        enterKeyword("package", PACKAGE);
	enterKeyword("true", TRUE);
	enterKeyword("false", FALSE);
        enterKeyword(".", DOT);
        enterKeyword("_", USCORE);
        enterKeyword(":", COLON);
        enterKeyword("=", EQUALS);
        enterKeyword("=>", ARROW);
        enterKeyword("<-", LARROW);
        enterKeyword("<:", SUBTYPE);
        enterKeyword("yield", YIELD);
        enterKeyword("do", DO);
        enterKeyword("#", HASH);
        enterKeyword("trait", TRAIT);
	enterKeyword("as", AS);
	enterKeyword("is", IS);
    }
}