Fix for parser OOM.

The scanner performs some sketchy heuristics when it sees an ascii 1A since it may be EOF or it may be part of a literal. Due to this, it failed to detect an unterminated string literal if the opening quote was unicode-escaped, leading to memory exhaustion as it read SUs until the universe ended. We're parsing a fixed input with known length! There's no reason to be guessing about whether a char is EOF. If we're at the end of the file, it's the end of file. Otherwise, it is not the end of the file.
author: Paul Phillips <paulp@improving.org> 2012-01-31 00:14:47 -0800
committer: Paul Phillips <paulp@improving.org> 2012-02-01 14:09:23 -0800
commit: 264ff5d5e8dbec4ae2e13bf52e66a965d884b25c (patch)
tree: 4af5300946ce2fe20064f153b703b1317769093e
parent: fbd5efe49cf23b446762dfa5026e8bac82ab04fc (diff)
download: scala-264ff5d5e8dbec4ae2e13bf52e66a965d884b25c.tar.gz
scala-264ff5d5e8dbec4ae2e13bf52e66a965d884b25c.tar.bz2
scala-264ff5d5e8dbec4ae2e13bf52e66a965d884b25c.zip
3 files changed, 12 insertions, 4 deletions
diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
index 4478fb6128..dae264fffe 100644
--- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
+++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
@@ -84,6 +84,8 @@ trait Scanners extends ScannersCommon {
 
   abstract class Scanner extends CharArrayReader with TokenData with ScannerCommon {
     private def isDigit(c: Char) = java.lang.Character isDigit c
+    
+    def isAtEnd = charOffset >= buf.length
 
     def flush = { charOffset = offset; nextChar(); this }
 
@@ -449,7 +451,7 @@ trait Scanners extends ScannersCommon {
         case ']' =>
           nextChar(); token = RBRACKET
         case SU =>
-          if (charOffset >= buf.length) token = EOF
+          if (isAtEnd) token = EOF
           else {
             syntaxError("illegal character")
             nextChar()
@@ -771,10 +773,10 @@ trait Scanners extends ScannersCommon {
       putChar(ch)
     }
 
-    private def getLitChars(delimiter: Char) =
-      while (ch != delimiter && (ch != CR && ch != LF && ch != SU || isUnicodeEscape)) {
+    private def getLitChars(delimiter: Char) = {
+      while (ch != delimiter && !isAtEnd && (ch != SU && ch != CR && ch != LF || isUnicodeEscape))
         getLitChar()
-      }
+    }
 
     /** read fractional part and exponent of floating point number
      *  if one is present.
diff --git a/test/files/neg/unicode-unterminated-quote.check b/test/files/neg/unicode-unterminated-quote.check
new file mode 100644
index 0000000000..fc5caa6d7e
--- /dev/null
+++ b/test/files/neg/unicode-unterminated-quote.check
@@ -0,0 +1,4 @@
+unicode-unterminated-quote.scala:2: error: unclosed string literal
+  val x = /u0022
+               ^
+one error found
diff --git a/test/files/neg/unicode-unterminated-quote.scala b/test/files/neg/unicode-unterminated-quote.scala
new file mode 100644
index 0000000000..bb6eab667f
--- /dev/null
+++ b/test/files/neg/unicode-unterminated-quote.scala
@@ -0,0 +1,2 @@
+class A {
+  val x = \u0022
+\ No newline at end of file
author	Paul Phillips <paulp@improving.org>	2012-01-31 00:14:47 -0800
committer	Paul Phillips <paulp@improving.org>	2012-02-01 14:09:23 -0800
commit	264ff5d5e8dbec4ae2e13bf52e66a965d884b25c (patch)
tree	4af5300946ce2fe20064f153b703b1317769093e
parent	fbd5efe49cf23b446762dfa5026e8bac82ab04fc (diff)
download	scala-264ff5d5e8dbec4ae2e13bf52e66a965d884b25c.tar.gz scala-264ff5d5e8dbec4ae2e13bf52e66a965d884b25c.tar.bz2 scala-264ff5d5e8dbec4ae2e13bf52e66a965d884b25c.zip