From 7465e94917326f52367e8c8c294d50bff307ad1d Mon Sep 17 00:00:00 2001 From: paltherr Date: Sun, 21 Mar 2004 15:31:22 +0000 Subject: - Changed Scanner and SourceFile to work with c... - Changed Scanner and SourceFile to work with chars instead of bytes --- .../tools/scalac/ast/parser/MarkupParser.scala | 9 ++- .../scala/tools/scalac/ast/parser/Scanner.scala | 65 +++++++++------------- sources/scala/tools/util/Position.java | 2 +- sources/scala/tools/util/SourceFile.java | 43 +++++--------- sources/scalac/Global.java | 42 ++++++++++++-- sources/scalac/util/SourceRepresentation.java | 5 +- 6 files changed, 87 insertions(+), 79 deletions(-) diff --git a/sources/scala/tools/scalac/ast/parser/MarkupParser.scala b/sources/scala/tools/scalac/ast/parser/MarkupParser.scala index 1e88bebd53..f52f30092e 100644 --- a/sources/scala/tools/scalac/ast/parser/MarkupParser.scala +++ b/sources/scala/tools/scalac/ast/parser/MarkupParser.scala @@ -1,3 +1,10 @@ +/* ____ ____ ____ ____ ______ *\ +** / __// __ \/ __// __ \/ ____/ SOcos COmpiles Scala ** +** __\_ \/ /_/ / /__/ /_/ /\_ \ (c) 2002-2004, LAMP/EPFL ** +** /_____/\____/\___/\____/____/ ** +\* */ + +// $Id$ import scalac.ast._; import scalac.atree.AConstant; @@ -211,7 +218,7 @@ class MarkupParser( unit:Unit, s:Scanner, p:Parser ) { var aMap = ListMap.Empty[Name,Tree]; while( s.xIsNameStart ) { val key = s.xName; s.xEQ; - val endch = s.ch.asInstanceOf[char]; + val endch = s.ch; val value = endch match { case '"' | '\'' => val pos = s.pos; diff --git a/sources/scala/tools/scalac/ast/parser/Scanner.scala b/sources/scala/tools/scalac/ast/parser/Scanner.scala index 022bc8a082..d323f87f0c 100644 --- a/sources/scala/tools/scalac/ast/parser/Scanner.scala +++ b/sources/scala/tools/scalac/ast/parser/Scanner.scala @@ -35,9 +35,8 @@ class Scanner(_unit: Unit) extends TokenData { /** add the given character to the documentation buffer */ - protected def addCharToDoc(ch: byte): unit = - if (docBuffer != null) - docBuffer.append(ch.asInstanceOf[char]); + protected def addCharToDoc(ch: char): unit = + if (docBuffer != null) docBuffer.append(ch); /** layout & character constants */ @@ -62,12 +61,12 @@ class Scanner(_unit: Unit) extends TokenData { /** the input buffer: */ - var buf: Array[byte] = unit.source.getContent(); + var buf: Array[char] = unit.source.getContent(); var bp: int = -1; /** the current character */ - var ch: byte = _; + var ch: char = _; /** the line and column position of the current character */ @@ -76,7 +75,7 @@ class Scanner(_unit: Unit) extends TokenData { /** a buffer for character and string literals */ - var lit = new Array[byte](64); + var lit = new Array[char](64); var litlen: int = _; @@ -89,7 +88,7 @@ class Scanner(_unit: Unit) extends TokenData { def nextch(): unit = { bp = bp + 1; ch = buf(bp); ccol = ccol + 1; - //System.out.print(bp + "[" + (ch.asInstanceOf[char]) + "]");//DEBUG + //System.out.print(bp + "[" + ch + "]");//DEBUG } /** read next token and return last position @@ -243,7 +242,7 @@ class Scanner(_unit: Unit) extends TokenData { 'u' | 'v' | 'w' | 'x' | 'y' | 'z' => index = bp; - putAscii(ch); + putChar(ch); nextch(); if (ch != '\'') { getIdentRest(index); @@ -441,7 +440,7 @@ class Scanner(_unit: Unit) extends TokenData { getlitch(); if (ch == delimiter) { token = STRINGLIT; - name = Name.fromAscii(lit, 0, litlen); + name = Name.fromString(new String(lit, 0, litlen)); nextch(); } else { syntaxError("unclosed character literal"); @@ -449,7 +448,7 @@ class Scanner(_unit: Unit) extends TokenData { } def treatIdent(start: int, end: int) = { - name = Name.fromAscii(buf, start, end - start); + name = Name.fromString(new String(buf, start, end - start)); token = name2token(name); } @@ -465,31 +464,17 @@ class Scanner(_unit: Unit) extends TokenData { */ def syntaxError(msg: String): unit = syntaxError(pos, msg); - /** append ascii character to "lit" buffer + /** append Unicode character to "lit" buffer */ - private def putAscii(c: byte) = { + private def putChar(c: char) = { if (litlen == lit.length) { - val newlit = new Array[byte](lit.length * 2); + val newlit = new Array[char](lit.length * 2); System.arraycopy(lit, 0, newlit, 0, lit.length); lit = newlit; } lit(litlen) = c; litlen = litlen + 1; } - /** append Unicode character to "lit" buffer - */ - private def putChar(c: char) = { - if (c <= 0x7F) { - putAscii(c.asInstanceOf[byte]); - } else if (c <= 0x3FF) { - putAscii((0xC0 | (c >> 6)).asInstanceOf[byte]); - putAscii((0x80 | (c & 0x3F)).asInstanceOf[byte]); - } else { - putAscii((0xE0 | (c >> 12)).asInstanceOf[byte]); - putAscii((0x80 | ((c >> 6) & 0x3F)).asInstanceOf[byte]); - putAscii((0x80 | (c & 0x3F)).asInstanceOf[byte]); - } - } /** return true iff next 6 characters are a valid unicode sequence: */ @@ -518,7 +503,7 @@ class Scanner(_unit: Unit) extends TokenData { } else { nextch(); if ('0' <= ch && ch <= '7') { - val leadch: byte = ch; + val leadch: char = ch; var oct: int = SourceRepresentation.digit2int(ch, 8); nextch(); if ('0' <= ch && ch <= '7') { @@ -542,13 +527,13 @@ class Scanner(_unit: Unit) extends TokenData { case '\\' => putChar('\\') case _ => syntaxError(Position.encode(cline, ccol) - 1, "invalid escape character"); - putAscii(ch); + putChar(ch); } nextch(); } } } else if (ch != SU) { - putAscii(ch); + putChar(ch); nextch(); } @@ -562,7 +547,7 @@ class Scanner(_unit: Unit) extends TokenData { if ((ch == 'e') || (ch == 'E')) { nextch(); if ((ch == '+') || (ch == '-')) { - val sign: byte = ch; + val sign: char = ch; nextch(); if (('0' > ch) || (ch > '9')) { ch = sign; @@ -580,7 +565,7 @@ class Scanner(_unit: Unit) extends TokenData { token = FLOATLIT; nextch(); } - name = Name.fromAscii(buf, index, bp - index); + name = Name.fromString(new String(buf, index, bp - index)); } /** convert name, base to long value @@ -589,7 +574,7 @@ class Scanner(_unit: Unit) extends TokenData { def intVal(negated: boolean): long = { if (token == CHARLIT && !negated) { if (litlen > 0) - SourceRepresentation.ascii2string(lit, 0, litlen).charAt(0) + lit(0) else 0 } else { @@ -599,7 +584,7 @@ class Scanner(_unit: Unit) extends TokenData { var i = 0; val len = name.length(); while (i < len) { - val d = SourceRepresentation.digit2int(name.charAt(i).asInstanceOf[byte], base); + val d = SourceRepresentation.digit2int(name.charAt(i), base); if (d < 0) { syntaxError("malformed integer number"); return 0; @@ -658,7 +643,7 @@ class Scanner(_unit: Unit) extends TokenData { (ch1 >= 'A' && ch1 <= 'Z') || (ch1 >= '0' && ch1 <= '9') || ch1 == '$' || ch1 == '_') { - name = Name.fromAscii(buf, index, bp - index); + name = Name.fromString(new String(buf, index, bp - index)); token = INTLIT; } else getFraction(index); @@ -668,7 +653,7 @@ class Scanner(_unit: Unit) extends TokenData { bp = bp - 1; ch = buf(bp); ccol = ccol - 1; - name = Name.fromAscii(buf, index, bp - index); + name = Name.fromString(new String(buf, index, bp - index)); token = INTLIT; } else getFraction(index); @@ -679,11 +664,11 @@ class Scanner(_unit: Unit) extends TokenData { getFraction(index); } else { if (ch == 'l' || ch == 'L') { - name = Name.fromAscii(buf, index, bp - index); + name = Name.fromString(new String(buf, index, bp - index)); nextch(); token = LONGLIT; } else { - name = Name.fromAscii(buf, index, bp - index); + name = Name.fromString(new String(buf, index, bp - index)); token = INTLIT; } } @@ -789,7 +774,7 @@ class Scanner(_unit: Unit) extends TokenData { if( xIsNameStart ) { val index = bp; while( xIsNameChar ) { xNext; } - Name.fromAscii( buf, index, bp - index ); + Name.fromString(new String( buf, index, bp - index )); } else { xSyntaxError( "name expected" ); Names.EMPTY @@ -848,7 +833,7 @@ class Scanner(_unit: Unit) extends TokenData { if( ch == that ) { xNext; } else { - xSyntaxError("'"+that+"' expected instead of '"+ch.asInstanceOf[char]+"'"); + xSyntaxError("'" + that + "' expected instead of '" + ch + "'"); } } /* end XML tokenizing */ diff --git a/sources/scala/tools/util/Position.java b/sources/scala/tools/util/Position.java index 1e431b19a0..25d356c144 100644 --- a/sources/scala/tools/util/Position.java +++ b/sources/scala/tools/util/Position.java @@ -100,7 +100,7 @@ public class Position { /** Initializes a new instance. */ public Position(String sourcename) { - this(new SourceFile(sourcename, new byte[0])); + this(new SourceFile(sourcename, new char[0])); } /** Initializes a new instance. */ diff --git a/sources/scala/tools/util/SourceFile.java b/sources/scala/tools/util/SourceFile.java index f3a17c1d86..520dc23099 100644 --- a/sources/scala/tools/util/SourceFile.java +++ b/sources/scala/tools/util/SourceFile.java @@ -8,9 +8,6 @@ package scala.tools.util; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; - /** This class represents a single source file. */ public class SourceFile { @@ -19,10 +16,10 @@ public class SourceFile { // Public Constants /** Constants used for source parsing */ - public static final byte LF = 0x0A; - public static final byte FF = 0x0C; - public static final byte CR = 0x0D; - public static final byte SU = 0x1A; + public static final char LF = '\u000A'; + public static final char FF = '\u000C'; + public static final char CR = '\u000D'; + public static final char SU = '\u001A'; //######################################################################## // Private Fields @@ -31,10 +28,7 @@ public class SourceFile { private final AbstractFile file; /** The content of this source file */ - private final byte[] content; - - /** The encoding of this source file or null if unspecified */ - private String encoding; + private final char[] content; /** The position of the last line returned by getLine */ private int lineNumber = 0; @@ -46,12 +40,12 @@ public class SourceFile { // Public Constructors /** Initializes this instance with given name and content. */ - public SourceFile(String sourcename, byte[] content) { - this(new ByteArrayFile(sourcename, content), content); + public SourceFile(String sourcename, char[] content) { + this(new CharArrayFile(sourcename, content), content); } /** Initializes this instance with given file and content. */ - public SourceFile(AbstractFile file, byte[] content) { + public SourceFile(AbstractFile file, char[] content) { this.file = file; this.content = normalize(content); } @@ -65,15 +59,10 @@ public class SourceFile { } /** Returns the content of this source file. */ - public byte[] getContent() { + public char[] getContent() { return content; } - /** Sets the encoding of the file. */ - public void setEncoding(String encoding) { - this.encoding = encoding; - } - /** * Returns an instance of Position representing the given line and * column of this source file. @@ -99,13 +88,7 @@ public class SourceFile { if (content[index - 1] == CR && content[index] == LF) index++; } nextIndex = index; - try { - return encoding != null ? - new String(content, lineStart, lineLength, encoding) : - new String(content, lineStart, lineLength); - } catch (UnsupportedEncodingException exception) { - throw new Error(exception); // !!! use ApplicationError - } + return new String(content, lineStart, lineLength); } /** Returns the path of the underlying file. */ @@ -116,11 +99,11 @@ public class SourceFile { //######################################################################## // Private Functions - /** Ensures that the last byte of the array is SU. */ - private static byte[] normalize(byte[] input) { + /** Ensures that the last char of the array is SU. */ + private static char[] normalize(char[] input) { if (input.length > 0 && input[input.length - 1] == SU) return input; - byte[] content = new byte[input.length + 1]; + char[] content = new char[input.length + 1]; System.arraycopy(input, 0, content, 0, input.length); content[input.length] = SU; return content; diff --git a/sources/scalac/Global.java b/sources/scalac/Global.java index c7b2001a33..088332e0f7 100644 --- a/sources/scalac/Global.java +++ b/sources/scalac/Global.java @@ -16,11 +16,16 @@ import java.io.OutputStream; import java.io.PrintWriter; import java.io.IOException; import java.io.OutputStream; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.IllegalCharsetNameException; +import java.nio.charset.UnsupportedCharsetException; import java.util.*; import scala.tools.util.AbstractFile; import scala.tools.util.Position; import scala.tools.util.SourceFile; +import scala.tools.util.SourceReader; import scalac.ast.*; import scalac.ast.parser.*; @@ -70,6 +75,18 @@ public abstract class Global { */ private final Stack startTimes = new Stack(); + /** the source file charset + */ + private final Charset charset; + + /** the source file decoder + */ + private final CharsetDecoder decoder; + + /** the source file reader + */ + private final SourceReader reader; + /** all compilation units */ public Unit[] units; @@ -194,7 +211,23 @@ public abstract class Global { this.printtokens = args.print.tokens; this.classPath = args.classpath(); this.outpath = args.outpath(); - this.encoding = args.encoding.value; + String encoding = args.encoding.value; + Charset charset = null; + try { + charset = Charset.forName(encoding); + } catch (IllegalCharsetNameException exception) { + args.encoding.error("illegal charset name '" + encoding + "'"); + } catch (UnsupportedCharsetException exception) { + args.encoding.error("unsupported charset '" + encoding + "'"); + } + if (charset == null) { + encoding = "ISO-8859-1"; // A mandatory charset + charset = Charset.forName(encoding); + } + this.encoding = encoding; + this.charset = charset; + this.decoder = charset.newDecoder(); + this.reader = new SourceReader(decoder); this.target = interpret ? TARGET_INT : args.target.value.intern(); this.separate = args.separate.value.equals("yes") || args.separate.value.equals("default") && !this.target.equals(TARGET_INT); @@ -259,7 +292,7 @@ public abstract class Global { /** Creates a virtual source file with given name and content. */ public SourceFile getSourceFile(String sourcename, String content) { - return new SourceFile(sourcename, content.getBytes()); + return new SourceFile(sourcename, content.toCharArray()); } /** Reads and returns the source file in file with given name. */ @@ -273,9 +306,8 @@ public abstract class Global { /** Reads and returns the source file in given abstract file. */ public SourceFile getSourceFile(AbstractFile file) throws IOException { if (!file.exists()) throw new FileNotFoundException( - "source file '" + file.getPath() + "' could not be found"); - byte[] content = file.read(); - return new SourceFile(file, content); + "source file '" + file + "' could not be found"); + return new SourceFile(file, reader.read(file)); } /** Reads and returns the source file of given clasz. */ diff --git a/sources/scalac/util/SourceRepresentation.java b/sources/scalac/util/SourceRepresentation.java index becd53b26c..3ddbae6812 100644 --- a/sources/scalac/util/SourceRepresentation.java +++ b/sources/scalac/util/SourceRepresentation.java @@ -20,7 +20,7 @@ public final class SourceRepresentation { .setAllSeparators(File.separatorChar) .setRootSeparator('\0'); - public static int digit2int(byte ch, int base) { + public static int digit2int(char ch, int base) { if ('0' <= ch && ch <= '9' && ch < '0' + base) return ch - '0'; else if ('A' <= ch && ch < 'A' + base - 10) @@ -72,7 +72,8 @@ public final class SourceRepresentation { int k = 1; int d = 0; while (k <= 4 && d >= 0) { - d = digit2int(source[offset + i + k], 16); + // !!! (char) + d = digit2int((char)source[offset + i + k], 16); code = code * 16 + d; k++; } -- cgit v1.2.3