From 264ff5d5e8dbec4ae2e13bf52e66a965d884b25c Mon Sep 17 00:00:00 2001 From: Paul Phillips Date: Tue, 31 Jan 2012 00:14:47 -0800 Subject: Fix for parser OOM. The scanner performs some sketchy heuristics when it sees an ascii 1A since it may be EOF or it may be part of a literal. Due to this, it failed to detect an unterminated string literal if the opening quote was unicode-escaped, leading to memory exhaustion as it read SUs until the universe ended. We're parsing a fixed input with known length! There's no reason to be guessing about whether a char is EOF. If we're at the end of the file, it's the end of file. Otherwise, it is not the end of the file. --- src/compiler/scala/tools/nsc/ast/parser/Scanners.scala | 10 ++++++---- test/files/neg/unicode-unterminated-quote.check | 4 ++++ test/files/neg/unicode-unterminated-quote.scala | 2 ++ 3 files changed, 12 insertions(+), 4 deletions(-) create mode 100644 test/files/neg/unicode-unterminated-quote.check create mode 100644 test/files/neg/unicode-unterminated-quote.scala diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala index 4478fb6128..dae264fffe 100644 --- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala @@ -84,6 +84,8 @@ trait Scanners extends ScannersCommon { abstract class Scanner extends CharArrayReader with TokenData with ScannerCommon { private def isDigit(c: Char) = java.lang.Character isDigit c + + def isAtEnd = charOffset >= buf.length def flush = { charOffset = offset; nextChar(); this } @@ -449,7 +451,7 @@ trait Scanners extends ScannersCommon { case ']' => nextChar(); token = RBRACKET case SU => - if (charOffset >= buf.length) token = EOF + if (isAtEnd) token = EOF else { syntaxError("illegal character") nextChar() @@ -771,10 +773,10 @@ trait Scanners extends ScannersCommon { putChar(ch) } - private def getLitChars(delimiter: Char) = - while (ch != delimiter && (ch != CR && ch != LF && ch != SU || isUnicodeEscape)) { + private def getLitChars(delimiter: Char) = { + while (ch != delimiter && !isAtEnd && (ch != SU && ch != CR && ch != LF || isUnicodeEscape)) getLitChar() - } + } /** read fractional part and exponent of floating point number * if one is present. diff --git a/test/files/neg/unicode-unterminated-quote.check b/test/files/neg/unicode-unterminated-quote.check new file mode 100644 index 0000000000..fc5caa6d7e --- /dev/null +++ b/test/files/neg/unicode-unterminated-quote.check @@ -0,0 +1,4 @@ +unicode-unterminated-quote.scala:2: error: unclosed string literal + val x = /u0022 + ^ +one error found diff --git a/test/files/neg/unicode-unterminated-quote.scala b/test/files/neg/unicode-unterminated-quote.scala new file mode 100644 index 0000000000..bb6eab667f --- /dev/null +++ b/test/files/neg/unicode-unterminated-quote.scala @@ -0,0 +1,2 @@ +class A { + val x = \u0022 \ No newline at end of file -- cgit v1.2.3