/*     ____ ____  ____ ____  ______                                     *\
**    / __// __ \/ __// __ \/ ____/    SOcos COmpiles Scala             **
**  __\_ \/ /_/ / /__/ /_/ /\_ \       (c) 2002, LAMP/EPFL              **
** /_____/\____/\___/\____/____/                                        **
**                                                                      **
** $Id$
\*                                                                      */

package scalac.ast.parser;

import ch.epfl.lamp.util.Position;
import ch.epfl.lamp.util.SourceFile;

import scalac.*;
import scalac.util.Name;


/** A scanner for the programming language Scala.
 *
 *  @author     Matthias Zenger, Martin Odersky
 *  @version    1.0
 */
public class Scanner extends TokenData {

    /** buffer for the documentation comment
     */
    protected StringBuffer docBuffer = null;

    /** add the given character to the documentation buffer
     */
    protected void addCharToDoc(byte ch) {
        if (docBuffer != null) docBuffer.append((char) ch);
    }

    /** layout & character constants
     */
    public int tabinc = 8;
    protected final static byte LF = SourceFile.LF;
    protected final static byte FF = SourceFile.FF;
    protected final static byte CR = SourceFile.CR;
    protected final static byte SU = SourceFile.SU;

    /** the names of all tokens
     */
    public Name[]       tokenName = new Name[128];
    public int          numToken = 0;

    /** keyword array; maps from name indices to tokens
     */
    protected byte[]    key;
    protected int       maxKey = 0;

    /** we need one token lookahead
     */
    protected TokenData next = new TokenData();
    protected TokenData prev = new TokenData();

    /** the first character position after the previous token
     */
    public int          lastpos = 0;

    /** the last error position
     */
    public int          errpos = -1;

    /** the input buffer:
     */
    protected byte[]    buf;
    protected int       bp;

    /** the current character
     */
    protected byte      ch;

    /** the line and column position of the current character
     */
    public int          cline;
    public int          ccol;

    /** a buffer for character and string literals
     */
    protected byte[]    lit = new byte[64];
    protected int       litlen;

    /** the compilation unit
     */
    public Unit unit;


    /** Construct a scanner from a file input stream.
     */
    public Scanner(Unit unit) {
        this.unit = unit;
        buf = unit.source.bytes();
        cline = 1;
        bp = -1;
        ccol = 0;
        nextch();
        token = EMPTY;
        init();
        nextToken();
    }

    /** only used to determine keywords. used in dtd2scala tool */
    public Scanner() {
        initKeywords();
    }

    private void nextch() {
        ch = buf[++bp]; ccol++;
    }

    /** read next token and return last position
     */
    public int skipToken() {
        int p = pos;
        nextToken();
        return p;
    }

    public void nextToken() {
        if (token == RBRACE) {
            int prevpos = pos;
            fetchToken();
            switch (token) {
            case ELSE:   case EXTENDS:  case WITH:
            case YIELD:  case CATCH:    case FINALLY:
            case COMMA:  case SEMI:     case DOT:
            case COLON:  case EQUALS:   case ARROW:
            case LARROW: case SUBTYPE:  case SUPERTYPE:
            case HASH:   case AT:
            case RPAREN: case RBRACKET: case RBRACE:
                break;
            default:
                if (token == EOF ||
                    ((pos >>> Position.COLUMN_BITS) >
                     (prevpos >>> Position.COLUMN_BITS))) {
                    next.copyFrom(this);
                    this.token = SEMI;
                    this.pos = prevpos;
                }
            }
        } else {
            if (next.token == EMPTY) {
                fetchToken();
            } else {
                copyFrom(next);
                next.token = EMPTY;
            }
            if (token == CASE) {
                prev.copyFrom(this);
                fetchToken();
                if (token == CLASS) {
                    token = CASECLASS;
                } else if (token == OBJECT) {
                    token = CASEOBJECT;
                } else {
                    next.copyFrom(this);
                    this.copyFrom(prev);
                }
            } else if (token == SEMI) {
                prev.copyFrom(this);
                fetchToken();
                if (token != ELSE) {
                    next.copyFrom(this);
                    this.copyFrom(prev);
                }
            }
        }
        //System.out.println("<" + token2string(token) + ">");//DEBUG
    }

    /** read next token
     */
    public void fetchToken() {
        if (token == EOF) return;
        lastpos = Position.encode(cline, ccol);
        int index = bp;
        while(true) {
            switch (ch) {
            case ' ':
                nextch();
                break;
            case '\t':
                ccol = ((ccol - 1) / tabinc * tabinc) + tabinc;
                nextch();
                break;
            case CR:
                cline++;
                ccol = 0;
                nextch();
                if (ch == LF) {
                    ccol = 0;
                    nextch();
                }
                break;
            case LF:
            case FF:
                cline++;
                ccol = 0;
                nextch();
                break;
            default:
                pos = Position.encode(cline, ccol);
                index = bp;
                switch (ch) {
                case 'A': case 'B': case 'C': case 'D': case 'E':
                case 'F': case 'G': case 'H': case 'I': case 'J':
                case 'K': case 'L': case 'M': case 'N': case 'O':
                case 'P': case 'Q': case 'R': case 'S': case 'T':
                case 'U': case 'V': case 'W': case 'X': case 'Y':
                case 'Z': case '$':
                case 'a': case 'b': case 'c': case 'd': case 'e':
                case 'f': case 'g': case 'h': case 'i': case 'j':
                case 'k': case 'l': case 'm': case 'n': case 'o':
                case 'p': case 'q': case 'r': case 's': case 't':
                case 'u': case 'v': case 'w': case 'x': case 'y':
                case 'z':
                    nextch();
                    getIdentRest(index);
                    return;
                case '~': case '!': case '@': case '#': case '%':
                case '^': case '*': case '+': case '-': case '<':
                case '>': case '?': case ':':
                case '=': case '&': case '|':
                    nextch();
                    getOperatorRest(index);
                    return;
                case '/':
                    nextch();
                    if (!skipComment()) {
                        getOperatorRest(index);
                        return;
                    }
                    break;
                case '_':
                    nextch();
                    getIdentRest(index);
                    return;
                case '0':
                    nextch();
                    if (ch == 'x' || ch == 'X') {
                        nextch();
                        getNumber(index + 2, 16);
                    } else
                        getNumber(index, 8);
                    return;
                case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                    getNumber(index, 10);
                    return;
                case '\"':
                    nextch();
                    litlen = 0;
                    while (ch != '\"' && ch != CR && ch != LF && ch != SU)
                        getlitch();
                    if (ch == '\"') {
                        token = STRINGLIT;
                        name = Name.fromSource(lit, 0, litlen);
                        nextch();
                    }
                    else
                        syntaxError("unclosed character literal");
                    return;
                case '\'':
                    nextch();
                    litlen = 0;
                    switch (ch) {
                    case 'A': case 'B': case 'C': case 'D': case 'E':
                    case 'F': case 'G': case 'H': case 'I': case 'J':
                    case 'K': case 'L': case 'M': case 'N': case 'O':
                    case 'P': case 'Q': case 'R': case 'S': case 'T':
                    case 'U': case 'V': case 'W': case 'X': case 'Y':
                    case 'Z': case '$':
                    case 'a': case 'b': case 'c': case 'd': case 'e':
                    case 'f': case 'g': case 'h': case 'i': case 'j':
                    case 'k': case 'l': case 'm': case 'n': case 'o':
                    case 'p': case 'q': case 'r': case 's': case 't':
                    case 'u': case 'v': case 'w': case 'x': case 'y':
                    case 'z':
                        index = bp;
                        putch(ch);
                        nextch();
                        if (ch != '\'') {
                            getIdentRest(index);
                            token = SYMBOLLIT;
                            return;
                        }
                        break;
                    default:
                        getlitch();
                    }
                    if (ch == '\'') {
                        nextch();
                        token = CHARLIT;
                        byte[] ascii = new byte[litlen * 2];
                        int alen = SourceRepresentation.source2ascii(lit, 0, litlen, ascii);
                        if (alen > 0)
                            intVal = SourceRepresentation.ascii2string(ascii, 0, alen).charAt(0);
                        else
                            intVal = 0;
                    } else
                        syntaxError("unclosed character literal");
                    return;
                case '.':
                    nextch();
                    if (('0' <= ch) && (ch <= '9')) getFraction(index);
                    else token = DOT;
                    return;
                case ';':
                    nextch(); token = SEMI;
                    return;
                case ',':
                    nextch(); token = COMMA;
                    return;
                case '(':
                    nextch(); token = LPAREN;
                    return;
                case '{':
                    nextch(); token = LBRACE;
                    return;
                case ')':
                    nextch(); token = RPAREN;
                    return;
                case '}':
                    nextch(); token = RBRACE;
                    return;
                case '[':
                    nextch(); token = LBRACKET;
                    return;
                case ']':
                    nextch(); token = RBRACKET;
                    return;
                case SU:
                    token = EOF;
                    return;
                default:
                    nextch();
                    syntaxError("illegal character");
                    return;
                }
            }
        }
    }

    private boolean skipComment() {
        if (ch == '/') {
            do {
                nextch();
            } while ((ch != CR) && (ch != LF) && (ch != SU));
            return true;
        } else if (ch == '*') {
            docBuffer = null;
            int openComments = 1;
            nextch();
            if (ch == '*') {
                docBuffer = new StringBuffer("/**");
            }
            while (openComments > 0) {
                do {
                    do {
                        if (ch == CR) {
                            cline++;
                            ccol = 0;
                            nextch(); addCharToDoc(ch);
                            if (ch == LF) {
                                ccol = 0;
                                nextch(); addCharToDoc(ch);
                            }
                        } else if (ch == LF) {
                            cline++;
                            ccol = 0;
                            nextch(); addCharToDoc(ch);
                        }
                        else if (ch == '\t') {
                            ccol = ((ccol - 1) / tabinc * tabinc) + tabinc;
                            nextch(); addCharToDoc(ch);
                        } else if (ch == '/') {
                            nextch(); addCharToDoc(ch);
                            if (ch == '*') {
                                nextch(); addCharToDoc(ch);
                                openComments++;
                            }
                        } else {
                            nextch(); addCharToDoc(ch);
                        }
                    } while ((ch != '*') && (ch != SU));
                    while (ch == '*') {
                        nextch(); addCharToDoc(ch);
                    }
                } while (ch != '/' && ch != SU);
                if (ch == '/') {
                    nextch();
                    openComments--;
                } else {
                    syntaxError("unclosed comment");
                    return true;
                }
            }
            return true;
        } else {
            return false;
        }
    }

    private void getIdentRest(int index) {
        while (true) {
            switch (ch) {
            case 'A': case 'B': case 'C': case 'D': case 'E':
            case 'F': case 'G': case 'H': case 'I': case 'J':
            case 'K': case 'L': case 'M': case 'N': case 'O':
            case 'P': case 'Q': case 'R': case 'S': case 'T':
            case 'U': case 'V': case 'W': case 'X': case 'Y':
            case 'Z': case '$':
            case 'a': case 'b': case 'c': case 'd': case 'e':
            case 'f': case 'g': case 'h': case 'i': case 'j':
            case 'k': case 'l': case 'm': case 'n': case 'o':
            case 'p': case 'q': case 'r': case 's': case 't':
            case 'u': case 'v': case 'w': case 'x': case 'y':
            case 'z':
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
                nextch();
                break;
            case '_':
                nextch();
                getIdentOrOperatorRest(index);
                return;
            default:
                treatIdent(index, bp);
                return;
            }
        }
    }

    private void getOperatorRest(int index) {
        while (true) {
            switch (ch) {
            case '~': case '!': case '@': case '#': case '%':
            case '^': case '*': case '+': case '-': case '<':
            case '>': case '?': case ':':
            case '=': case '&': case '|':
                nextch();
                break;
            case '/':
                int lastbp = bp;
                nextch();
                if (skipComment()) {
                    treatIdent(index, lastbp);
                    return;
                } else {
                    break;
                }
            /* case '_':
                nextch();
                getIdentOrOperatorRest(index);
                return; */
            default:
                treatIdent(index, bp);
                return;
            }
        }
    }

    private void getIdentOrOperatorRest(int index) {
        switch (ch) {
        case 'A': case 'B': case 'C': case 'D': case 'E':
        case 'F': case 'G': case 'H': case 'I': case 'J':
        case 'K': case 'L': case 'M': case 'N': case 'O':
        case 'P': case 'Q': case 'R': case 'S': case 'T':
        case 'U': case 'V': case 'W': case 'X': case 'Y':
       	case 'Z': case '$':
        case 'a': case 'b': case 'c': case 'd': case 'e':
        case 'f': case 'g': case 'h': case 'i': case 'j':
        case 'k': case 'l': case 'm': case 'n': case 'o':
        case 'p': case 'q': case 'r': case 's': case 't':
        case 'u': case 'v': case 'w': case 'x': case 'y':
        case 'z':
        case '0': case '1': case '2': case '3': case '4':
        case '5': case '6': case '7': case '8': case '9':
            getIdentRest(index);
            return;
        case '~': case '!': case '@': case '#': case '%':
        case '^': case '*': case '+': case '-': case '<':
        case '>': case '?': case ':':
        case '=': case '&': case '|':
        case '/':
            getOperatorRest(index);
            return;
        case '_':
            nextch();
            getIdentOrOperatorRest(index);
            return;
        default:
            treatIdent(index, bp);
            return;
        }
    }

    /** returns true if argument corresponds to a keyword.
     *  Used in dtd2scala tool.
     */
    public boolean isKeyword(String str) {
        Name name = Name.fromString(str);
        return (name.index <= maxKey);
    }

    void treatIdent(int start, int end) {
        name = Name.fromAscii(buf, start, end - start);
        if (name.index <= maxKey) {
            token = key[name.index];
        }
        else
            token = IDENTIFIER;
    }

    /** generate an error at the given position
     */
    void syntaxError(int pos, String msg) {
        unit.error(pos, msg);
        token = ERROR;
        errpos = pos;
    }

    /** generate an error at the current token position
     */
    void syntaxError(String msg) {
        syntaxError(pos, msg);
    }

    /** append characteter to "lit" buffer
     */
    protected void putch(byte c) {
        if (litlen == lit.length) {
            byte[] newlit = new byte[lit.length * 2];
            System.arraycopy(lit, 0, newlit, 0, lit.length);
            lit = newlit;
        }
        lit[litlen++] = c;
    }

    /** return true iff next 6 characters are a valid unicode sequence:
     */
    protected boolean isUnicode() {
        return
            (bp + 6) < buf.length &&
            (buf[bp] == '\\') &&
            (buf[bp+1] == 'u') &&
            (SourceRepresentation.digit2int(buf[bp+2], 16) >= 0) &&
            (SourceRepresentation.digit2int(buf[bp+3], 16) >= 0) &&
            (SourceRepresentation.digit2int(buf[bp+4], 16) >= 0) &&
            (SourceRepresentation.digit2int(buf[bp+5], 16) >= 0);
    }

    /** read next character in character or string literal:
     */
    protected void getlitch() {
        if (ch == '\\') {
            if (isUnicode()) {
                putch(ch); nextch();
                putch(ch); nextch();
                putch(ch); nextch();
                putch(ch); nextch();
                putch(ch); nextch();
                putch(ch); nextch();
            } else {
                nextch();
                if ('0' <= ch && ch <= '7') {
                    byte leadch = ch;
                    int oct = SourceRepresentation.digit2int(ch, 8);
                    nextch();
                    if ('0' <= ch && ch <= '7') {
                        oct = oct * 8 + SourceRepresentation.digit2int(ch, 8);
                        nextch();
                        if (leadch <= '3' && '0' <= ch && ch <= '7') {
                            oct = oct * 8 + SourceRepresentation.digit2int(ch, 8);
                            nextch();
                        }
                    }
                    putch((byte)oct);
                } else if (ch != SU) {
                    switch (ch) {
                        case 'b': case 't': case 'n':
                        case 'f': case 'r': case '\"':
                        case '\'': case '\\':
                            putch((byte)'\\');
                            putch(ch);
                            break;
                        default:
                            syntaxError(Position.encode(cline, ccol) - 1, "invalid escape character");
                            putch(ch);
                    }
                    nextch();
                }
            }
        } else if (ch != SU) {
            putch(ch);
            nextch();
        }
    }

    /** read fractional part of floating point number;
     *  Then floatVal := buf[index..], converted to a floating point number.
     */
    protected void getFraction(int index) {
        while (SourceRepresentation.digit2int(ch, 10) >= 0) {
            nextch();
        }
        token = DOUBLELIT;
        if ((ch == 'e') || (ch == 'E')) {
            nextch();
            if ((ch == '+') || (ch == '-')) {
                byte sign = ch;
                nextch();
                if (('0' > ch) || (ch > '9')) {
                    ch = sign;
                    bp--;
                    ccol--;
                }
            }
            while (SourceRepresentation.digit2int(ch, 10) >= 0) {
                nextch();
            }
        }
        double limit = Double.MAX_VALUE;
        if ((ch == 'd') || (ch == 'D')) {
            nextch();
        } else if ((ch == 'f') || (ch == 'F')) {
            token = FLOATLIT;
            limit = Float.MAX_VALUE;
            nextch();
        }
        try {
            floatVal = Double.valueOf(new String(buf, index, bp - index)).doubleValue();
            if (floatVal > limit)
                syntaxError("floating point number too large");
        } catch (NumberFormatException e) {
            syntaxError("malformed floating point number");
        }
    }

    /** intVal := buf[index..index+len-1], converted to an integer number.
     *  base = the base of the number; one of 8, 10, 16.
     *  max  = the maximal number before an overflow.
     */
    protected void makeInt (int index, int len, int base, long max) {
        intVal = 0;
        int divider = (base == 10 ? 1 : 2);
        for (int i = 0; i < len; i++) {
            int d = SourceRepresentation.digit2int(buf[index + i], base);
            if (d < 0) {
                syntaxError("malformed integer number");
                return;
            }
            if (intVal < 0 ||
                max / (base / divider) < intVal ||
                max - (d / divider) < (intVal * (base / divider) - 0)) {
                syntaxError("integer number too large");
                return;
            }
            intVal = intVal * base + d;
        }
    }

    /** read a number,
     *  and convert buf[index..], setting either intVal or floatVal.
     *  base = the base of the number; one of 8, 10, 16.
     */
    protected void getNumber(int index, int base) {
        while (SourceRepresentation.digit2int(ch, base == 8 ? 10 : base) >= 0) {
            nextch();
        }
        if (base <= 10 && ch == '.') {
            nextch();
            if ((ch >= '0') && (ch <= '9'))
                getFraction(index);
            else {
                ch = buf[--bp]; ccol--;
                makeInt(index, bp - index, base, Integer.MAX_VALUE);
                intVal = (int)intVal;
                token = INTLIT;
            }
        } else if (base <= 10 &&
           (ch == 'e' || ch == 'E' ||
            ch == 'f' || ch == 'F' ||
            ch == 'd' || ch == 'D'))
            getFraction(index);
        else {
            if (ch == 'l' || ch == 'L') {
                makeInt(index, bp - index, base, Long.MAX_VALUE);
                nextch();
                token = LONGLIT;
            } else {
                makeInt(index, bp - index, base, Integer.MAX_VALUE);
                intVal = (int)intVal;
                token = INTLIT;
            }
        }
    }

    public int name2token(Name name) {
        if (name.index <= maxKey)
            return key[name.index];
        else
            return IDENTIFIER;
    }

    public String token2string(int token) {
        switch (token) {
            case IDENTIFIER:
                return "identifier";
            case CHARLIT:
                return "character literal";
            case INTLIT:
                return "integer literal";
            case LONGLIT:
                return "long literal";
            case FLOATLIT:
                return "float literal";
            case DOUBLELIT:
                return "double literal";
            case STRINGLIT:
                return "string literal";
            case SYMBOLLIT:
                return "symbol literal";
            case LPAREN:
                return "'('";
            case RPAREN:
                return "')'";
            case LBRACE:
                return "'{'";
            case RBRACE:
                return "'}'";
            case LBRACKET:
                return "'['";
            case RBRACKET:
                return "']'";
            case EOF:
                return "eof";
            case ERROR:
                return "something";
            case SEMI:
                return "';'";
            case COMMA:
                return "','";
            case CASECLASS:
                return "case class";
            case CASEOBJECT:
                return "case object";
            default:
                try {
                    return "'" + tokenName[token].toString() + "'";
                } catch (ArrayIndexOutOfBoundsException e) {
                    return "'<" + token + ">'";
                } catch (NullPointerException e) {
                    return "'<(" + token + ")>'";
                }
        }
    }

    public String toString() {
        switch (token) {
            case IDENTIFIER:
                return "id(" + name + ")";
            case CHARLIT:
                return "char(" + intVal + ")";
            case INTLIT:
                return "int(" + intVal + ")";
            case LONGLIT:
                return "long(" + intVal + ")";
            case FLOATLIT:
                return "float(" + floatVal + ")";
            case DOUBLELIT:
                return "double(" + floatVal + ")";
            case STRINGLIT:
                return "string(" + name + ")";
            case SEMI:
                return ";";
            case COMMA:
                return ",";
            default:
                return token2string(token);
        }
    }

    protected void enterKeyword(String s, int tokenId) {
        while (tokenId > tokenName.length) {
            Name[]  newTokName = new Name[tokenName.length * 2];
            System.arraycopy(tokenName, 0, newTokName, 0, newTokName.length);
            tokenName = newTokName;
        }
        Name n = Name.fromString(s);
        tokenName[tokenId] = n;
        if (n.index > maxKey)
            maxKey = n.index;
        if (tokenId >= numToken)
            numToken = tokenId + 1;
    }

    protected void init() {
        initKeywords();
        key = new byte[maxKey+1];
        for (int i = 0; i <= maxKey; i++)
            key[i] = IDENTIFIER;
        for (byte j = 0; j < numToken; j++)
            if (tokenName[j] != null)
                key[tokenName[j].index] = j;
    }

    protected void initKeywords() {
        enterKeyword("abstract", ABSTRACT);
        enterKeyword("case", CASE);
        enterKeyword("class", CLASS);
        enterKeyword("catch", CATCH);
        enterKeyword("def", DEF);
        enterKeyword("do", DO);
        enterKeyword("else", ELSE);
        enterKeyword("extends", EXTENDS);
        enterKeyword("false", FALSE);
        enterKeyword("final", FINAL);
        enterKeyword("finally", FINALLY);
        enterKeyword("for", FOR);
        enterKeyword("if", IF);
        enterKeyword("import", IMPORT);
        enterKeyword("new", NEW);
        enterKeyword("null", NULL);
        enterKeyword("object", OBJECT);
        enterKeyword("override", OVERRIDE);
        enterKeyword("package", PACKAGE);
        enterKeyword("private", PRIVATE);
        enterKeyword("protected", PROTECTED);
        enterKeyword("return", RETURN);
        enterKeyword("sealed", SEALED);
        enterKeyword("super", SUPER);
        enterKeyword("this", THIS);
        enterKeyword("throw", THROW);
        enterKeyword("trait", TRAIT);
        enterKeyword("true", TRUE);
        enterKeyword("try", TRY);
        enterKeyword("type", TYPE);
        enterKeyword("val", VAL);
        enterKeyword("var", VAR);
        enterKeyword("with", WITH);
        enterKeyword("while", WHILE);
        enterKeyword("yield", YIELD);
        enterKeyword(".", DOT);
        enterKeyword("_", USCORE);
        enterKeyword(":", COLON);
        enterKeyword("=", EQUALS);
        enterKeyword("=>", ARROW);
        enterKeyword("<-", LARROW);
        enterKeyword("<:", SUBTYPE);
        enterKeyword(">:", SUPERTYPE);
        enterKeyword("#", HASH);
        enterKeyword("@", AT);
    }
}