summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpaltherr <paltherr@epfl.ch>2004-03-21 15:31:22 +0000
committerpaltherr <paltherr@epfl.ch>2004-03-21 15:31:22 +0000
commit7465e94917326f52367e8c8c294d50bff307ad1d (patch)
tree4e3e6e7df6fb05ee6f92be99ac0fd6eae0fe4711
parent9cfde36da89989890b1c4123806578560ed0fe4c (diff)
downloadscala-7465e94917326f52367e8c8c294d50bff307ad1d.tar.gz
scala-7465e94917326f52367e8c8c294d50bff307ad1d.tar.bz2
scala-7465e94917326f52367e8c8c294d50bff307ad1d.zip
- Changed Scanner and SourceFile to work with c...
- Changed Scanner and SourceFile to work with chars instead of bytes
-rw-r--r--sources/scala/tools/scalac/ast/parser/MarkupParser.scala9
-rw-r--r--sources/scala/tools/scalac/ast/parser/Scanner.scala65
-rw-r--r--sources/scala/tools/util/Position.java2
-rw-r--r--sources/scala/tools/util/SourceFile.java43
-rw-r--r--sources/scalac/Global.java42
-rw-r--r--sources/scalac/util/SourceRepresentation.java5
6 files changed, 87 insertions, 79 deletions
diff --git a/sources/scala/tools/scalac/ast/parser/MarkupParser.scala b/sources/scala/tools/scalac/ast/parser/MarkupParser.scala
index 1e88bebd53..f52f30092e 100644
--- a/sources/scala/tools/scalac/ast/parser/MarkupParser.scala
+++ b/sources/scala/tools/scalac/ast/parser/MarkupParser.scala
@@ -1,3 +1,10 @@
+/* ____ ____ ____ ____ ______ *\
+** / __// __ \/ __// __ \/ ____/ SOcos COmpiles Scala **
+** __\_ \/ /_/ / /__/ /_/ /\_ \ (c) 2002-2004, LAMP/EPFL **
+** /_____/\____/\___/\____/____/ **
+\* */
+
+// $Id$
import scalac.ast._;
import scalac.atree.AConstant;
@@ -211,7 +218,7 @@ class MarkupParser( unit:Unit, s:Scanner, p:Parser ) {
var aMap = ListMap.Empty[Name,Tree];
while( s.xIsNameStart ) {
val key = s.xName; s.xEQ;
- val endch = s.ch.asInstanceOf[char];
+ val endch = s.ch;
val value = endch match {
case '"' | '\'' =>
val pos = s.pos;
diff --git a/sources/scala/tools/scalac/ast/parser/Scanner.scala b/sources/scala/tools/scalac/ast/parser/Scanner.scala
index 022bc8a082..d323f87f0c 100644
--- a/sources/scala/tools/scalac/ast/parser/Scanner.scala
+++ b/sources/scala/tools/scalac/ast/parser/Scanner.scala
@@ -35,9 +35,8 @@ class Scanner(_unit: Unit) extends TokenData {
/** add the given character to the documentation buffer
*/
- protected def addCharToDoc(ch: byte): unit =
- if (docBuffer != null)
- docBuffer.append(ch.asInstanceOf[char]);
+ protected def addCharToDoc(ch: char): unit =
+ if (docBuffer != null) docBuffer.append(ch);
/** layout & character constants
*/
@@ -62,12 +61,12 @@ class Scanner(_unit: Unit) extends TokenData {
/** the input buffer:
*/
- var buf: Array[byte] = unit.source.getContent();
+ var buf: Array[char] = unit.source.getContent();
var bp: int = -1;
/** the current character
*/
- var ch: byte = _;
+ var ch: char = _;
/** the line and column position of the current character
*/
@@ -76,7 +75,7 @@ class Scanner(_unit: Unit) extends TokenData {
/** a buffer for character and string literals
*/
- var lit = new Array[byte](64);
+ var lit = new Array[char](64);
var litlen: int = _;
@@ -89,7 +88,7 @@ class Scanner(_unit: Unit) extends TokenData {
def nextch(): unit = {
bp = bp + 1; ch = buf(bp); ccol = ccol + 1;
- //System.out.print(bp + "[" + (ch.asInstanceOf[char]) + "]");//DEBUG
+ //System.out.print(bp + "[" + ch + "]");//DEBUG
}
/** read next token and return last position
@@ -243,7 +242,7 @@ class Scanner(_unit: Unit) extends TokenData {
'u' | 'v' | 'w' | 'x' | 'y' |
'z' =>
index = bp;
- putAscii(ch);
+ putChar(ch);
nextch();
if (ch != '\'') {
getIdentRest(index);
@@ -441,7 +440,7 @@ class Scanner(_unit: Unit) extends TokenData {
getlitch();
if (ch == delimiter) {
token = STRINGLIT;
- name = Name.fromAscii(lit, 0, litlen);
+ name = Name.fromString(new String(lit, 0, litlen));
nextch();
} else {
syntaxError("unclosed character literal");
@@ -449,7 +448,7 @@ class Scanner(_unit: Unit) extends TokenData {
}
def treatIdent(start: int, end: int) = {
- name = Name.fromAscii(buf, start, end - start);
+ name = Name.fromString(new String(buf, start, end - start));
token = name2token(name);
}
@@ -465,31 +464,17 @@ class Scanner(_unit: Unit) extends TokenData {
*/
def syntaxError(msg: String): unit = syntaxError(pos, msg);
- /** append ascii character to "lit" buffer
+ /** append Unicode character to "lit" buffer
*/
- private def putAscii(c: byte) = {
+ private def putChar(c: char) = {
if (litlen == lit.length) {
- val newlit = new Array[byte](lit.length * 2);
+ val newlit = new Array[char](lit.length * 2);
System.arraycopy(lit, 0, newlit, 0, lit.length);
lit = newlit;
}
lit(litlen) = c;
litlen = litlen + 1;
}
- /** append Unicode character to "lit" buffer
- */
- private def putChar(c: char) = {
- if (c <= 0x7F) {
- putAscii(c.asInstanceOf[byte]);
- } else if (c <= 0x3FF) {
- putAscii((0xC0 | (c >> 6)).asInstanceOf[byte]);
- putAscii((0x80 | (c & 0x3F)).asInstanceOf[byte]);
- } else {
- putAscii((0xE0 | (c >> 12)).asInstanceOf[byte]);
- putAscii((0x80 | ((c >> 6) & 0x3F)).asInstanceOf[byte]);
- putAscii((0x80 | (c & 0x3F)).asInstanceOf[byte]);
- }
- }
/** return true iff next 6 characters are a valid unicode sequence:
*/
@@ -518,7 +503,7 @@ class Scanner(_unit: Unit) extends TokenData {
} else {
nextch();
if ('0' <= ch && ch <= '7') {
- val leadch: byte = ch;
+ val leadch: char = ch;
var oct: int = SourceRepresentation.digit2int(ch, 8);
nextch();
if ('0' <= ch && ch <= '7') {
@@ -542,13 +527,13 @@ class Scanner(_unit: Unit) extends TokenData {
case '\\' => putChar('\\')
case _ =>
syntaxError(Position.encode(cline, ccol) - 1, "invalid escape character");
- putAscii(ch);
+ putChar(ch);
}
nextch();
}
}
} else if (ch != SU) {
- putAscii(ch);
+ putChar(ch);
nextch();
}
@@ -562,7 +547,7 @@ class Scanner(_unit: Unit) extends TokenData {
if ((ch == 'e') || (ch == 'E')) {
nextch();
if ((ch == '+') || (ch == '-')) {
- val sign: byte = ch;
+ val sign: char = ch;
nextch();
if (('0' > ch) || (ch > '9')) {
ch = sign;
@@ -580,7 +565,7 @@ class Scanner(_unit: Unit) extends TokenData {
token = FLOATLIT;
nextch();
}
- name = Name.fromAscii(buf, index, bp - index);
+ name = Name.fromString(new String(buf, index, bp - index));
}
/** convert name, base to long value
@@ -589,7 +574,7 @@ class Scanner(_unit: Unit) extends TokenData {
def intVal(negated: boolean): long = {
if (token == CHARLIT && !negated) {
if (litlen > 0)
- SourceRepresentation.ascii2string(lit, 0, litlen).charAt(0)
+ lit(0)
else
0
} else {
@@ -599,7 +584,7 @@ class Scanner(_unit: Unit) extends TokenData {
var i = 0;
val len = name.length();
while (i < len) {
- val d = SourceRepresentation.digit2int(name.charAt(i).asInstanceOf[byte], base);
+ val d = SourceRepresentation.digit2int(name.charAt(i), base);
if (d < 0) {
syntaxError("malformed integer number");
return 0;
@@ -658,7 +643,7 @@ class Scanner(_unit: Unit) extends TokenData {
(ch1 >= 'A' && ch1 <= 'Z') ||
(ch1 >= '0' && ch1 <= '9') ||
ch1 == '$' || ch1 == '_') {
- name = Name.fromAscii(buf, index, bp - index);
+ name = Name.fromString(new String(buf, index, bp - index));
token = INTLIT;
} else
getFraction(index);
@@ -668,7 +653,7 @@ class Scanner(_unit: Unit) extends TokenData {
bp = bp - 1;
ch = buf(bp);
ccol = ccol - 1;
- name = Name.fromAscii(buf, index, bp - index);
+ name = Name.fromString(new String(buf, index, bp - index));
token = INTLIT;
} else
getFraction(index);
@@ -679,11 +664,11 @@ class Scanner(_unit: Unit) extends TokenData {
getFraction(index);
} else {
if (ch == 'l' || ch == 'L') {
- name = Name.fromAscii(buf, index, bp - index);
+ name = Name.fromString(new String(buf, index, bp - index));
nextch();
token = LONGLIT;
} else {
- name = Name.fromAscii(buf, index, bp - index);
+ name = Name.fromString(new String(buf, index, bp - index));
token = INTLIT;
}
}
@@ -789,7 +774,7 @@ class Scanner(_unit: Unit) extends TokenData {
if( xIsNameStart ) {
val index = bp;
while( xIsNameChar ) { xNext; }
- Name.fromAscii( buf, index, bp - index );
+ Name.fromString(new String( buf, index, bp - index ));
} else {
xSyntaxError( "name expected" );
Names.EMPTY
@@ -848,7 +833,7 @@ class Scanner(_unit: Unit) extends TokenData {
if( ch == that ) {
xNext;
} else {
- xSyntaxError("'"+that+"' expected instead of '"+ch.asInstanceOf[char]+"'");
+ xSyntaxError("'" + that + "' expected instead of '" + ch + "'");
}
}
/* end XML tokenizing */
diff --git a/sources/scala/tools/util/Position.java b/sources/scala/tools/util/Position.java
index 1e431b19a0..25d356c144 100644
--- a/sources/scala/tools/util/Position.java
+++ b/sources/scala/tools/util/Position.java
@@ -100,7 +100,7 @@ public class Position {
/** Initializes a new instance. */
public Position(String sourcename) {
- this(new SourceFile(sourcename, new byte[0]));
+ this(new SourceFile(sourcename, new char[0]));
}
/** Initializes a new instance. */
diff --git a/sources/scala/tools/util/SourceFile.java b/sources/scala/tools/util/SourceFile.java
index f3a17c1d86..520dc23099 100644
--- a/sources/scala/tools/util/SourceFile.java
+++ b/sources/scala/tools/util/SourceFile.java
@@ -8,9 +8,6 @@
package scala.tools.util;
-import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
-
/** This class represents a single source file. */
public class SourceFile {
@@ -19,10 +16,10 @@ public class SourceFile {
// Public Constants
/** Constants used for source parsing */
- public static final byte LF = 0x0A;
- public static final byte FF = 0x0C;
- public static final byte CR = 0x0D;
- public static final byte SU = 0x1A;
+ public static final char LF = '\u000A';
+ public static final char FF = '\u000C';
+ public static final char CR = '\u000D';
+ public static final char SU = '\u001A';
//########################################################################
// Private Fields
@@ -31,10 +28,7 @@ public class SourceFile {
private final AbstractFile file;
/** The content of this source file */
- private final byte[] content;
-
- /** The encoding of this source file or null if unspecified */
- private String encoding;
+ private final char[] content;
/** The position of the last line returned by getLine */
private int lineNumber = 0;
@@ -46,12 +40,12 @@ public class SourceFile {
// Public Constructors
/** Initializes this instance with given name and content. */
- public SourceFile(String sourcename, byte[] content) {
- this(new ByteArrayFile(sourcename, content), content);
+ public SourceFile(String sourcename, char[] content) {
+ this(new CharArrayFile(sourcename, content), content);
}
/** Initializes this instance with given file and content. */
- public SourceFile(AbstractFile file, byte[] content) {
+ public SourceFile(AbstractFile file, char[] content) {
this.file = file;
this.content = normalize(content);
}
@@ -65,15 +59,10 @@ public class SourceFile {
}
/** Returns the content of this source file. */
- public byte[] getContent() {
+ public char[] getContent() {
return content;
}
- /** Sets the encoding of the file. */
- public void setEncoding(String encoding) {
- this.encoding = encoding;
- }
-
/**
* Returns an instance of Position representing the given line and
* column of this source file.
@@ -99,13 +88,7 @@ public class SourceFile {
if (content[index - 1] == CR && content[index] == LF) index++;
}
nextIndex = index;
- try {
- return encoding != null ?
- new String(content, lineStart, lineLength, encoding) :
- new String(content, lineStart, lineLength);
- } catch (UnsupportedEncodingException exception) {
- throw new Error(exception); // !!! use ApplicationError
- }
+ return new String(content, lineStart, lineLength);
}
/** Returns the path of the underlying file. */
@@ -116,11 +99,11 @@ public class SourceFile {
//########################################################################
// Private Functions
- /** Ensures that the last byte of the array is SU. */
- private static byte[] normalize(byte[] input) {
+ /** Ensures that the last char of the array is SU. */
+ private static char[] normalize(char[] input) {
if (input.length > 0 && input[input.length - 1] == SU)
return input;
- byte[] content = new byte[input.length + 1];
+ char[] content = new char[input.length + 1];
System.arraycopy(input, 0, content, 0, input.length);
content[input.length] = SU;
return content;
diff --git a/sources/scalac/Global.java b/sources/scalac/Global.java
index c7b2001a33..088332e0f7 100644
--- a/sources/scalac/Global.java
+++ b/sources/scalac/Global.java
@@ -16,11 +16,16 @@ import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.IOException;
import java.io.OutputStream;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
import java.util.*;
import scala.tools.util.AbstractFile;
import scala.tools.util.Position;
import scala.tools.util.SourceFile;
+import scala.tools.util.SourceReader;
import scalac.ast.*;
import scalac.ast.parser.*;
@@ -70,6 +75,18 @@ public abstract class Global {
*/
private final Stack startTimes = new Stack();
+ /** the source file charset
+ */
+ private final Charset charset;
+
+ /** the source file decoder
+ */
+ private final CharsetDecoder decoder;
+
+ /** the source file reader
+ */
+ private final SourceReader reader;
+
/** all compilation units
*/
public Unit[] units;
@@ -194,7 +211,23 @@ public abstract class Global {
this.printtokens = args.print.tokens;
this.classPath = args.classpath();
this.outpath = args.outpath();
- this.encoding = args.encoding.value;
+ String encoding = args.encoding.value;
+ Charset charset = null;
+ try {
+ charset = Charset.forName(encoding);
+ } catch (IllegalCharsetNameException exception) {
+ args.encoding.error("illegal charset name '" + encoding + "'");
+ } catch (UnsupportedCharsetException exception) {
+ args.encoding.error("unsupported charset '" + encoding + "'");
+ }
+ if (charset == null) {
+ encoding = "ISO-8859-1"; // A mandatory charset
+ charset = Charset.forName(encoding);
+ }
+ this.encoding = encoding;
+ this.charset = charset;
+ this.decoder = charset.newDecoder();
+ this.reader = new SourceReader(decoder);
this.target = interpret ? TARGET_INT : args.target.value.intern();
this.separate = args.separate.value.equals("yes") ||
args.separate.value.equals("default") && !this.target.equals(TARGET_INT);
@@ -259,7 +292,7 @@ public abstract class Global {
/** Creates a virtual source file with given name and content. */
public SourceFile getSourceFile(String sourcename, String content) {
- return new SourceFile(sourcename, content.getBytes());
+ return new SourceFile(sourcename, content.toCharArray());
}
/** Reads and returns the source file in file with given name. */
@@ -273,9 +306,8 @@ public abstract class Global {
/** Reads and returns the source file in given abstract file. */
public SourceFile getSourceFile(AbstractFile file) throws IOException {
if (!file.exists()) throw new FileNotFoundException(
- "source file '" + file.getPath() + "' could not be found");
- byte[] content = file.read();
- return new SourceFile(file, content);
+ "source file '" + file + "' could not be found");
+ return new SourceFile(file, reader.read(file));
}
/** Reads and returns the source file of given clasz. */
diff --git a/sources/scalac/util/SourceRepresentation.java b/sources/scalac/util/SourceRepresentation.java
index becd53b26c..3ddbae6812 100644
--- a/sources/scalac/util/SourceRepresentation.java
+++ b/sources/scalac/util/SourceRepresentation.java
@@ -20,7 +20,7 @@ public final class SourceRepresentation {
.setAllSeparators(File.separatorChar)
.setRootSeparator('\0');
- public static int digit2int(byte ch, int base) {
+ public static int digit2int(char ch, int base) {
if ('0' <= ch && ch <= '9' && ch < '0' + base)
return ch - '0';
else if ('A' <= ch && ch < 'A' + base - 10)
@@ -72,7 +72,8 @@ public final class SourceRepresentation {
int k = 1;
int d = 0;
while (k <= 4 && d >= 0) {
- d = digit2int(source[offset + i + k], 16);
+ // !!! (char)
+ d = digit2int((char)source[offset + i + k], 16);
code = code * 16 + d;
k++;
}