From 6ea9f72ffd0e1b606cb5ad4670b4db8330fa29b9 Mon Sep 17 00:00:00 2001
From: Martin Odersky <odersky@gmail.com>
Date: Mon, 16 Jan 2012 17:49:12 +0100
Subject: A string interpolation implementation of SIP-11.

This is  the complete implementation of SIP-11, in its version of 15-Jan-2012. For now, the interpolations are enabled only under the -Xexperimental flag.
---
 src/compiler/scala/reflect/internal/StdNames.scala |   2 +-
 .../scala/tools/nsc/ast/parser/Parsers.scala       |  63 +++---
 .../scala/tools/nsc/ast/parser/Scanners.scala      | 245 +++++++++++++--------
 .../scala/tools/nsc/ast/parser/Tokens.scala        |  10 +-
 4 files changed, 192 insertions(+), 128 deletions(-)

(limited to 'src/compiler')

diff --git a/src/compiler/scala/reflect/internal/StdNames.scala b/src/compiler/scala/reflect/internal/StdNames.scala
index 2871ba59f6..507621ea42 100644
--- a/src/compiler/scala/reflect/internal/StdNames.scala
+++ b/src/compiler/scala/reflect/internal/StdNames.scala
@@ -271,6 +271,7 @@ trait StdNames extends NameManglers { self: SymbolTable =>
     // Compiler utilized names
     // val productElementName: NameType = "productElementName"
     val Ident: NameType                = "Ident"
+    val StringContext: NameType        = "StringContext"
     val TYPE_ : NameType               = "TYPE"
     val TypeTree: NameType             = "TypeTree"
     val UNIT : NameType                = "UNIT"
@@ -320,7 +321,6 @@ trait StdNames extends NameManglers { self: SymbolTable =>
     val find_ : NameType               = "find"
     val flatMap: NameType              = "flatMap"
     val foreach: NameType              = "foreach"
-    val formatted: NameType            = "formatted"
     val freeValue : NameType           = "freeValue"
     val genericArrayOps: NameType      = "genericArrayOps"
     val get: NameType                  = "get"
diff --git a/src/compiler/scala/tools/nsc/ast/parser/Parsers.scala b/src/compiler/scala/tools/nsc/ast/parser/Parsers.scala
index 580b2a16eb..ce41bc456e 100644
--- a/src/compiler/scala/tools/nsc/ast/parser/Parsers.scala
+++ b/src/compiler/scala/tools/nsc/ast/parser/Parsers.scala
@@ -617,7 +617,7 @@ self =>
 
     def isLiteralToken(token: Int) = token match {
       case CHARLIT | INTLIT | LONGLIT | FLOATLIT | DOUBLELIT |
-           STRINGLIT | STRINGPART | SYMBOLLIT | TRUE | FALSE | NULL => true
+           STRINGLIT | INTERPOLATIONID | SYMBOLLIT | TRUE | FALSE | NULL => true
       case _                                                        => false
     }
     def isLiteral = isLiteralToken(in.token)
@@ -1103,7 +1103,7 @@ self =>
      *  }}}
      *  @note  The returned tree does not yet have a position
      */
-    def literal(isNegated: Boolean): Tree = {
+    def literal(isNegated: Boolean = false): Tree = {
       def finish(value: Any): Tree = {
         val t = Literal(Constant(value))
         in.nextToken()
@@ -1111,19 +1111,19 @@ self =>
       }
       if (in.token == SYMBOLLIT)
         Apply(scalaDot(nme.Symbol), List(finish(in.strVal)))
-      else if (in.token == STRINGPART)
+      else if (in.token == INTERPOLATIONID)
         interpolatedString()
       else finish(in.token match {
-        case CHARLIT               => in.charVal
-        case INTLIT                => in.intVal(isNegated).toInt
-        case LONGLIT               => in.intVal(isNegated)
-        case FLOATLIT              => in.floatVal(isNegated).toFloat
-        case DOUBLELIT             => in.floatVal(isNegated)
-        case STRINGLIT             => in.strVal.intern()
-        case TRUE                  => true
-        case FALSE                 => false
-        case NULL                  => null
-        case _                     =>
+        case CHARLIT   => in.charVal
+        case INTLIT    => in.intVal(isNegated).toInt
+        case LONGLIT   => in.intVal(isNegated)
+        case FLOATLIT  => in.floatVal(isNegated).toFloat
+        case DOUBLELIT => in.floatVal(isNegated)
+        case STRINGLIT | STRINGPART => in.strVal.intern()
+        case TRUE      => true
+        case FALSE     => false
+        case NULL      => null
+        case _         =>
           syntaxErrorOrIncomplete("illegal literal", true)
           null
       })
@@ -1138,16 +1138,27 @@ self =>
       }
     }
 
-    private def interpolatedString(): Tree = {
-      var t = atPos(o2p(in.offset))(New(TypeTree(definitions.StringBuilderClass.tpe), List(List())))
+    private def interpolatedString(): Tree = atPos(in.offset) {
+      val start = in.offset
+      val interpolator = in.name
+      
+      val partsBuf = new ListBuffer[Tree]
+      val exprBuf = new ListBuffer[Tree]
+      in.nextToken()
       while (in.token == STRINGPART) {
-        t = stringOp(t, nme.append)
-        var e = expr()
-        if (in.token == STRINGFMT) e = stringOp(e, nme.formatted)
-        t = atPos(t.pos.startOrPoint)(Apply(Select(t, nme.append), List(e)))
+        partsBuf += literal()
+        exprBuf += {
+          if (in.token == IDENTIFIER) atPos(in.offset)(Ident(ident()))
+          else expr()
+        }
       }
-      if (in.token == STRINGLIT) t = stringOp(t, nme.append)
-      atPos(t.pos)(Select(t, nme.toString_))
+      if (in.token == STRINGLIT) partsBuf += literal()
+      
+      val t1 = atPos(o2p(start)) { Ident(nme.StringContext) }
+      val t2 = atPos(start) { Apply(t1, partsBuf.toList) }
+      t2 setPos t2.pos.makeTransparent
+      val t3 = Select(t2, interpolator) setPos t2.pos
+      atPos(start) { Apply(t3, exprBuf.toList) }
     }
 
 /* ------------- NEW LINES ------------------------------------------------- */
@@ -1469,7 +1480,7 @@ self =>
         atPos(in.offset) {
           val name = nme.toUnaryName(rawIdent())
           // val name = nme.toUnaryName(ident())  // val name: Name = "unary_" + ident()
-          if (name == nme.UNARY_- && isNumericLit) simpleExprRest(atPos(in.offset)(literal(true)), true)
+          if (name == nme.UNARY_- && isNumericLit) simpleExprRest(atPos(in.offset)(literal(isNegated = true)), true)
           else Select(stripParens(simpleExpr()), name)
         }
       }
@@ -1493,7 +1504,7 @@ self =>
     def simpleExpr(): Tree = {
       var canApply = true
       val t =
-        if (isLiteral) atPos(in.offset)(literal(false))
+        if (isLiteral) atPos(in.offset)(literal())
         else in.token match {
           case XMLSTART =>
             xmlLiteral()
@@ -1827,7 +1838,7 @@ self =>
               case INTLIT | LONGLIT | FLOATLIT | DOUBLELIT =>
                 t match {
                   case Ident(nme.MINUS) =>
-                    return atPos(start) { literal(true) }
+                    return atPos(start) { literal(isNegated = true) }
                   case _ =>
                 }
               case _ =>
@@ -1844,8 +1855,8 @@ self =>
             in.nextToken()
             atPos(start, start) { Ident(nme.WILDCARD) }
           case CHARLIT | INTLIT | LONGLIT | FLOATLIT | DOUBLELIT |
-               STRINGLIT | STRINGPART | SYMBOLLIT | TRUE | FALSE | NULL =>
-            atPos(start) { literal(false) }
+               STRINGLIT | INTERPOLATIONID | SYMBOLLIT | TRUE | FALSE | NULL =>
+            atPos(start) { literal() }
           case LPAREN =>
             atPos(start)(makeParens(noSeq.patterns()))
           case XMLSTART =>
diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
index a25b3afbc6..a2a577a7ab 100644
--- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
+++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
@@ -161,12 +161,25 @@ trait Scanners extends ScannersCommon {
      *  RBRACKET  if region starts with '['
      *  RBRACE    if region starts with '{'
      *  ARROW     if region starts with `case'
-     *  STRINGFMT if region is a string interpolation expression starting with '\{'
+     *  STRINGLIT if region is a string interpolation expression starting with '${'
+     *            (the STRINGLIT appears twice in succession on the stack iff the 
+     *             expression is a multiline string literal).
      */
     var sepRegions: List[Int] = List()
 
 // Get next token ------------------------------------------------------------
 
+    /** Are we directly in a string interpolation expression?
+     */
+    @inline private def inStringInterpolation = 
+      sepRegions.nonEmpty && sepRegions.head == STRINGLIT
+    
+    /** Are we directly in a multiline string interpolation expression?
+     *  @pre: inStringInterpolation
+     */
+    @inline private def inMultiLineInterpolation = 
+      sepRegions.tail.nonEmpty && sepRegions.tail.head == STRINGPART    
+    
     /** read next token and return last offset
      */
     def skipToken(): Offset = {
@@ -189,29 +202,31 @@ trait Scanners extends ScannersCommon {
           sepRegions = RBRACE :: sepRegions
         case CASE =>
           sepRegions = ARROW :: sepRegions
-        case STRINGPART =>
-          sepRegions = STRINGFMT :: sepRegions
         case RBRACE =>
-          sepRegions = sepRegions dropWhile (_ != RBRACE)
+          while (!sepRegions.isEmpty && sepRegions.head != RBRACE) 
+            sepRegions = sepRegions.tail
           if (!sepRegions.isEmpty) sepRegions = sepRegions.tail
-        case RBRACKET | RPAREN | ARROW | STRINGFMT =>
+          docBuffer = null
+        case RBRACKET | RPAREN =>
           if (!sepRegions.isEmpty && sepRegions.head == lastToken)
             sepRegions = sepRegions.tail
-        case _ =>
-      }
-      (lastToken: @switch) match {
-        case RBRACE | RBRACKET | RPAREN =>
           docBuffer = null
+        case ARROW =>
+          if (!sepRegions.isEmpty && sepRegions.head == lastToken)
+            sepRegions = sepRegions.tail
+        case STRINGLIT =>
+          if (inStringInterpolation)
+            sepRegions = sepRegions.tail
         case _ =>
       }
-
+ 
       // Read a token or copy it from `next` tokenData
       if (next.token == EMPTY) {
         lastOffset = charOffset - 1
-        if(lastOffset > 0 && buf(lastOffset) == '\n' && buf(lastOffset - 1) == '\r') {
+        if (lastOffset > 0 && buf(lastOffset) == '\n' && buf(lastOffset - 1) == '\r') {
           lastOffset -= 1
         }
-        fetchToken()
+        if (inStringInterpolation) fetchStringPart() else fetchToken()
       } else {
         this copyFrom next
         next.token = EMPTY
@@ -308,7 +323,9 @@ trait Scanners extends ScannersCommon {
              'z' =>
           putChar(ch)
           nextChar()
-          getIdentRest()  // scala-mode: wrong indent for multi-line case blocks
+          getIdentRest()  
+          if (ch == '"' && token == IDENTIFIER && settings.Xexperimental.value) 
+            token = INTERPOLATIONID
         case '<' => // is XMLSTART?
           val last = if (charOffset >= 2) buf(charOffset - 2) else ' '
           nextChar()
@@ -360,18 +377,37 @@ trait Scanners extends ScannersCommon {
         case '`' =>
           getBackquotedIdent()
         case '\"' =>
-          nextChar()
-          if (ch == '\"') {
-            nextChar()
+          if (token == INTERPOLATIONID) {
+            nextRawChar()
             if (ch == '\"') {
               nextRawChar()
-              getMultiLineStringLit()
+              if (ch == '\"') {
+                nextRawChar()
+                getStringPart(multiLine = true)
+                sepRegions = STRINGLIT :: sepRegions // indicate string part
+                sepRegions = STRINGLIT :: sepRegions // once more to indicate multi line string part
+              } else {
+                token = STRINGLIT
+                strVal = ""
+              }
             } else {
-              token = STRINGLIT
-              strVal = ""
+              getStringPart(multiLine = false)
+              sepRegions = STRINGLIT :: sepRegions // indicate single line string part
             }
           } else {
-            getStringPart()
+            nextChar()
+            if (ch == '\"') {
+              nextChar()
+              if (ch == '\"') {
+                nextRawChar()
+                getRawStringLit()
+              } else {
+                token = STRINGLIT
+                strVal = ""
+              }
+            } else { 
+              getStringLit()
+            }
           }
         case '\'' =>
           nextChar()
@@ -397,9 +433,7 @@ trait Scanners extends ScannersCommon {
             token = DOT
           }
         case ';' =>
-          nextChar()
-          if (inStringInterpolation) getFormatString()
-          else token = SEMI
+          nextChar(); token = SEMI
         case ',' =>
           nextChar(); token = COMMA
         case '(' =>
@@ -409,16 +443,7 @@ trait Scanners extends ScannersCommon {
         case ')' =>
           nextChar(); token = RPAREN
         case '}' =>
-          if (token == STRINGFMT) {
-            nextChar()
-            getStringPart()
-          } else if (inStringInterpolation) {
-            strVal = "";
-            token = STRINGFMT
-          } else {
-            nextChar();
-            token = RBRACE
-          }
+          nextChar(); token = RBRACE
         case '[' =>
           nextChar(); token = LBRACKET
         case ']' =>
@@ -506,11 +531,6 @@ trait Scanners extends ScannersCommon {
       }
     }
 
-    /** Are we directly in a string interpolation expression?
-     */
-    private def inStringInterpolation =
-      sepRegions.nonEmpty && sepRegions.head == STRINGFMT
-
     /** Can token start a statement? */
     def inFirstOfStat(token: Int) = token match {
       case EOF | CATCH | ELSE | EXTENDS | FINALLY | FORSOME | MATCH | WITH | YIELD |
@@ -608,71 +628,110 @@ trait Scanners extends ScannersCommon {
           else finishNamed()
       }
     }
+    
+ 
+// Literals -----------------------------------------------------------------
 
-    def getFormatString() = {
-      getLitChars('}', '"', ' ', '\t')
-      if (ch == '}') {
-        setStrVal()
-        if (strVal.length > 0) strVal = "%" + strVal
-        token = STRINGFMT
-      } else {
-        syntaxError("unclosed format string")
-      }
-    }
-
-    def getStringPart() = {
-      while (ch != '"' && (ch != CR && ch != LF && ch != SU || isUnicodeEscape) && maybeGetLitChar()) {}
+    private def getStringLit() = {
+      getLitChars('"')
       if (ch == '"') {
         setStrVal()
         nextChar()
         token = STRINGLIT
-      } else if (ch == '{' && settings.Xexperimental.value) {
-        setStrVal()
-        nextChar()
-        token = STRINGPART
-      } else {
-        syntaxError("unclosed string literal")
-      }
+      } else syntaxError("unclosed string literal")
     }
 
-    private def getMultiLineStringLit() {
+    private def getRawStringLit(): Unit = {
       if (ch == '\"') {
         nextRawChar()
-        if (ch == '\"') {
+        if (isTripleQuote()) {
+          setStrVal()
+          token = STRINGLIT
+        } else
+          getRawStringLit()
+      } else if (ch == SU) {
+        incompleteInputError("unclosed multi-line string literal")
+      } else {
+        putChar(ch)
+        nextRawChar()
+        getRawStringLit()
+      }
+    }
+   
+    @annotation.tailrec private def getStringPart(multiLine: Boolean): Unit = {
+      def finishStringPart() = {
+        setStrVal()
+        token = STRINGPART
+        next.lastOffset = charOffset - 1
+        next.offset = charOffset - 1
+      }   
+      if (ch == '"') {
+        nextRawChar()
+        if (!multiLine || isTripleQuote()) {
+          setStrVal()
+          token = STRINGLIT
+        } else 
+          getStringPart(multiLine)
+      } else if (ch == '$') {
+        nextRawChar()
+        if (ch == '$') {
+          putChar(ch)
           nextRawChar()
-          if (ch == '\"') {
-            nextChar()
-            while (ch == '\"') {
-              putChar('\"')
-              nextChar()
-            }
-            token = STRINGLIT
-            setStrVal()
-          } else {
-            putChar('\"')
-            putChar('\"')
-            getMultiLineStringLit()
-          }
+          getStringPart(multiLine)
+        } else if (ch == '{') {
+          finishStringPart()
+          nextRawChar()
+          next.token = LBRACE
+        } else if (Character.isUnicodeIdentifierStart(ch)) {
+          finishStringPart()
+          do {
+            putChar(ch)
+            nextRawChar()
+          } while (Character.isUnicodeIdentifierPart(ch))
+          next.token = IDENTIFIER
+          next.name = newTermName(cbuf.toString)
+          cbuf.clear()
         } else {
-          putChar('\"')
-          getMultiLineStringLit()
+          syntaxError("invalid string interpolation")
         }
-      } else if (ch == SU) {
-        incompleteInputError("unclosed multi-line string literal")
+      } else if ((ch == CR || ch == LF || ch == SU) && !isUnicodeEscape) {
+        syntaxError("unclosed string literal")
       } else {
         putChar(ch)
         nextRawChar()
-        getMultiLineStringLit()
+        getStringPart(multiLine)
       }
     }
+  
+    private def fetchStringPart() = {
+      offset = charOffset - 1
+      getStringPart(multiLine = inMultiLineInterpolation)
+    }
+    
+    private def isTripleQuote(): Boolean =
+      if (ch == '"') {
+        nextRawChar()
+        if (ch == '"') {
+          nextChar()
+          while (ch == '"') {
+            putChar('"')
+            nextChar()
+          }
+          true
+        } else {
+          putChar('"')
+          putChar('"')
+          false
+        }
+      } else {
+        putChar('"')
+        false
+      }
 
-// Literals -----------------------------------------------------------------
-
-    /** read next character in character or string literal:
-     *  if character sequence is a \{ escape, do not copy it into the string and return false.
-     *  otherwise return true.
+    /** copy current character into cbuf, interpreting any escape sequences, 
+     *  and advance to next character.
      */
-    protected def maybeGetLitChar(): Boolean = {
+    protected def getLitChar(): Unit =
       if (ch == '\\') {
         nextChar()
         if ('0' <= ch && ch <= '7') {
@@ -698,7 +757,6 @@ trait Scanners extends ScannersCommon {
             case '\"' => putChar('\"')
             case '\'' => putChar('\'')
             case '\\' => putChar('\\')
-            case '{'  => return false
             case _    => invalidEscape()
           }
           nextChar()
@@ -707,22 +765,16 @@ trait Scanners extends ScannersCommon {
         putChar(ch)
         nextChar()
       }
-      true
-    }
 
     protected def invalidEscape(): Unit = {
       syntaxError(charOffset - 1, "invalid escape character")
       putChar(ch)
     }
 
-    protected def getLitChar(): Unit =
-      if (!maybeGetLitChar()) invalidEscape()
-
-    private def getLitChars(delimiters: Char*) {
-      while (!(delimiters contains ch) && (ch != CR && ch != LF && ch != SU || isUnicodeEscape)) {
+    private def getLitChars(delimiter: Char) =
+      while (ch != delimiter && (ch != CR && ch != LF && ch != SU || isUnicodeEscape)) {
         getLitChar()
       }
-    }
 
     /** read fractional part and exponent of floating point number
      *  if one is present.
@@ -971,8 +1023,8 @@ trait Scanners extends ScannersCommon {
         "string(" + strVal + ")"
       case STRINGPART =>
         "stringpart(" + strVal + ")"
-      case STRINGFMT =>
-        "stringfmt(" + strVal + ")"
+      case INTERPOLATIONID =>
+        "interpolationid(" + name + ")"
       case SEMI =>
         ";"
       case NEWLINE =>
@@ -1088,8 +1140,7 @@ trait Scanners extends ScannersCommon {
     case LONGLIT => "long literal"
     case FLOATLIT => "float literal"
     case DOUBLELIT => "double literal"
-    case STRINGLIT | STRINGPART => "string literal"
-    case STRINGFMT => "format string"
+    case STRINGLIT | STRINGPART | INTERPOLATIONID => "string literal"
     case SYMBOLLIT => "symbol literal"
     case LPAREN => "'('"
     case RPAREN => "')'"
diff --git a/src/compiler/scala/tools/nsc/ast/parser/Tokens.scala b/src/compiler/scala/tools/nsc/ast/parser/Tokens.scala
index 07970bb36e..091f333c27 100644
--- a/src/compiler/scala/tools/nsc/ast/parser/Tokens.scala
+++ b/src/compiler/scala/tools/nsc/ast/parser/Tokens.scala
@@ -45,18 +45,20 @@ abstract class Tokens {
 }
 
 object Tokens extends Tokens {
-  final val STRINGPART = 7
+  final val STRINGPART = 7  // a part of an interpolated string
   final val SYMBOLLIT = 8
-  final val STRINGFMT = 9
+  final val INTERPOLATIONID = 9 // the lead identifier of an interpolated string
+
   def isLiteral(code: Int) =
-    code >= CHARLIT && code <= SYMBOLLIT
+    code >= CHARLIT && code <= INTERPOLATIONID
+
 
   /** identifiers */
   final val IDENTIFIER = 10
   final val BACKQUOTED_IDENT = 11
   def isIdentifier(code: Int) =
     code >= IDENTIFIER && code <= BACKQUOTED_IDENT
-
+    
   @switch def canBeginExpression(code: Int) = code match {
     case IDENTIFIER|BACKQUOTED_IDENT|USCORE       => true
     case LBRACE|LPAREN|LBRACKET|COMMENT           => true
-- 
cgit v1.2.3