diff options
author | Paul Phillips <paulp@improving.org> | 2012-01-31 00:14:47 -0800 |
---|---|---|
committer | Paul Phillips <paulp@improving.org> | 2012-02-01 14:09:23 -0800 |
commit | 264ff5d5e8dbec4ae2e13bf52e66a965d884b25c (patch) | |
tree | 4af5300946ce2fe20064f153b703b1317769093e | |
parent | fbd5efe49cf23b446762dfa5026e8bac82ab04fc (diff) | |
download | scala-264ff5d5e8dbec4ae2e13bf52e66a965d884b25c.tar.gz scala-264ff5d5e8dbec4ae2e13bf52e66a965d884b25c.tar.bz2 scala-264ff5d5e8dbec4ae2e13bf52e66a965d884b25c.zip |
Fix for parser OOM.
The scanner performs some sketchy heuristics when it sees an ascii 1A
since it may be EOF or it may be part of a literal. Due to this, it
failed to detect an unterminated string literal if the opening quote was
unicode-escaped, leading to memory exhaustion as it read SUs until the
universe ended.
We're parsing a fixed input with known length! There's no reason to be
guessing about whether a char is EOF. If we're at the end of the file,
it's the end of file. Otherwise, it is not the end of the file.
-rw-r--r-- | src/compiler/scala/tools/nsc/ast/parser/Scanners.scala | 10 | ||||
-rw-r--r-- | test/files/neg/unicode-unterminated-quote.check | 4 | ||||
-rw-r--r-- | test/files/neg/unicode-unterminated-quote.scala | 2 |
3 files changed, 12 insertions, 4 deletions
diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala index 4478fb6128..dae264fffe 100644 --- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala @@ -84,6 +84,8 @@ trait Scanners extends ScannersCommon { abstract class Scanner extends CharArrayReader with TokenData with ScannerCommon { private def isDigit(c: Char) = java.lang.Character isDigit c + + def isAtEnd = charOffset >= buf.length def flush = { charOffset = offset; nextChar(); this } @@ -449,7 +451,7 @@ trait Scanners extends ScannersCommon { case ']' => nextChar(); token = RBRACKET case SU => - if (charOffset >= buf.length) token = EOF + if (isAtEnd) token = EOF else { syntaxError("illegal character") nextChar() @@ -771,10 +773,10 @@ trait Scanners extends ScannersCommon { putChar(ch) } - private def getLitChars(delimiter: Char) = - while (ch != delimiter && (ch != CR && ch != LF && ch != SU || isUnicodeEscape)) { + private def getLitChars(delimiter: Char) = { + while (ch != delimiter && !isAtEnd && (ch != SU && ch != CR && ch != LF || isUnicodeEscape)) getLitChar() - } + } /** read fractional part and exponent of floating point number * if one is present. diff --git a/test/files/neg/unicode-unterminated-quote.check b/test/files/neg/unicode-unterminated-quote.check new file mode 100644 index 0000000000..fc5caa6d7e --- /dev/null +++ b/test/files/neg/unicode-unterminated-quote.check @@ -0,0 +1,4 @@ +unicode-unterminated-quote.scala:2: error: unclosed string literal + val x = /u0022 + ^ +one error found diff --git a/test/files/neg/unicode-unterminated-quote.scala b/test/files/neg/unicode-unterminated-quote.scala new file mode 100644 index 0000000000..bb6eab667f --- /dev/null +++ b/test/files/neg/unicode-unterminated-quote.scala @@ -0,0 +1,2 @@ +class A { + val x = \u0022
\ No newline at end of file |