From 264ff5d5e8dbec4ae2e13bf52e66a965d884b25c Mon Sep 17 00:00:00 2001
From: Paul Phillips <paulp@improving.org>
Date: Tue, 31 Jan 2012 00:14:47 -0800
Subject: Fix for parser OOM.

The scanner performs some sketchy heuristics when it sees an ascii 1A
since it may be EOF or it may be part of a literal. Due to this, it
failed to detect an unterminated string literal if the opening quote was
unicode-escaped, leading to memory exhaustion as it read SUs until the
universe ended.

We're parsing a fixed input with known length! There's no reason to be
guessing about whether a char is EOF. If we're at the end of the file,
it's the end of file.  Otherwise, it is not the end of the file.
---
 src/compiler/scala/tools/nsc/ast/parser/Scanners.scala | 10 ++++++----
 test/files/neg/unicode-unterminated-quote.check        |  4 ++++
 test/files/neg/unicode-unterminated-quote.scala        |  2 ++
 3 files changed, 12 insertions(+), 4 deletions(-)
 create mode 100644 test/files/neg/unicode-unterminated-quote.check
 create mode 100644 test/files/neg/unicode-unterminated-quote.scala

diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
index 4478fb6128..dae264fffe 100644
--- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
+++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
@@ -84,6 +84,8 @@ trait Scanners extends ScannersCommon {
 
   abstract class Scanner extends CharArrayReader with TokenData with ScannerCommon {
     private def isDigit(c: Char) = java.lang.Character isDigit c
+    
+    def isAtEnd = charOffset >= buf.length
 
     def flush = { charOffset = offset; nextChar(); this }
 
@@ -449,7 +451,7 @@ trait Scanners extends ScannersCommon {
         case ']' =>
           nextChar(); token = RBRACKET
         case SU =>
-          if (charOffset >= buf.length) token = EOF
+          if (isAtEnd) token = EOF
           else {
             syntaxError("illegal character")
             nextChar()
@@ -771,10 +773,10 @@ trait Scanners extends ScannersCommon {
       putChar(ch)
     }
 
-    private def getLitChars(delimiter: Char) =
-      while (ch != delimiter && (ch != CR && ch != LF && ch != SU || isUnicodeEscape)) {
+    private def getLitChars(delimiter: Char) = {
+      while (ch != delimiter && !isAtEnd && (ch != SU && ch != CR && ch != LF || isUnicodeEscape))
         getLitChar()
-      }
+    }
 
     /** read fractional part and exponent of floating point number
      *  if one is present.
diff --git a/test/files/neg/unicode-unterminated-quote.check b/test/files/neg/unicode-unterminated-quote.check
new file mode 100644
index 0000000000..fc5caa6d7e
--- /dev/null
+++ b/test/files/neg/unicode-unterminated-quote.check
@@ -0,0 +1,4 @@
+unicode-unterminated-quote.scala:2: error: unclosed string literal
+  val x = /u0022
+               ^
+one error found
diff --git a/test/files/neg/unicode-unterminated-quote.scala b/test/files/neg/unicode-unterminated-quote.scala
new file mode 100644
index 0000000000..bb6eab667f
--- /dev/null
+++ b/test/files/neg/unicode-unterminated-quote.scala
@@ -0,0 +1,2 @@
+class A {
+  val x = \u0022
\ No newline at end of file
-- 
cgit v1.2.3