From 6ea9f72ffd0e1b606cb5ad4670b4db8330fa29b9 Mon Sep 17 00:00:00 2001 From: Martin Odersky Date: Mon, 16 Jan 2012 17:49:12 +0100 Subject: A string interpolation implementation of SIP-11. This is the complete implementation of SIP-11, in its version of 15-Jan-2012. For now, the interpolations are enabled only under the -Xexperimental flag. --- src/compiler/scala/reflect/internal/StdNames.scala | 2 +- .../scala/tools/nsc/ast/parser/Parsers.scala | 63 +++--- .../scala/tools/nsc/ast/parser/Scanners.scala | 245 +++++++++++++-------- .../scala/tools/nsc/ast/parser/Tokens.scala | 10 +- 4 files changed, 192 insertions(+), 128 deletions(-) (limited to 'src/compiler') diff --git a/src/compiler/scala/reflect/internal/StdNames.scala b/src/compiler/scala/reflect/internal/StdNames.scala index 2871ba59f6..507621ea42 100644 --- a/src/compiler/scala/reflect/internal/StdNames.scala +++ b/src/compiler/scala/reflect/internal/StdNames.scala @@ -271,6 +271,7 @@ trait StdNames extends NameManglers { self: SymbolTable => // Compiler utilized names // val productElementName: NameType = "productElementName" val Ident: NameType = "Ident" + val StringContext: NameType = "StringContext" val TYPE_ : NameType = "TYPE" val TypeTree: NameType = "TypeTree" val UNIT : NameType = "UNIT" @@ -320,7 +321,6 @@ trait StdNames extends NameManglers { self: SymbolTable => val find_ : NameType = "find" val flatMap: NameType = "flatMap" val foreach: NameType = "foreach" - val formatted: NameType = "formatted" val freeValue : NameType = "freeValue" val genericArrayOps: NameType = "genericArrayOps" val get: NameType = "get" diff --git a/src/compiler/scala/tools/nsc/ast/parser/Parsers.scala b/src/compiler/scala/tools/nsc/ast/parser/Parsers.scala index 580b2a16eb..ce41bc456e 100644 --- a/src/compiler/scala/tools/nsc/ast/parser/Parsers.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/Parsers.scala @@ -617,7 +617,7 @@ self => def isLiteralToken(token: Int) = token match { case CHARLIT | INTLIT | LONGLIT | FLOATLIT | DOUBLELIT | - STRINGLIT | STRINGPART | SYMBOLLIT | TRUE | FALSE | NULL => true + STRINGLIT | INTERPOLATIONID | SYMBOLLIT | TRUE | FALSE | NULL => true case _ => false } def isLiteral = isLiteralToken(in.token) @@ -1103,7 +1103,7 @@ self => * }}} * @note The returned tree does not yet have a position */ - def literal(isNegated: Boolean): Tree = { + def literal(isNegated: Boolean = false): Tree = { def finish(value: Any): Tree = { val t = Literal(Constant(value)) in.nextToken() @@ -1111,19 +1111,19 @@ self => } if (in.token == SYMBOLLIT) Apply(scalaDot(nme.Symbol), List(finish(in.strVal))) - else if (in.token == STRINGPART) + else if (in.token == INTERPOLATIONID) interpolatedString() else finish(in.token match { - case CHARLIT => in.charVal - case INTLIT => in.intVal(isNegated).toInt - case LONGLIT => in.intVal(isNegated) - case FLOATLIT => in.floatVal(isNegated).toFloat - case DOUBLELIT => in.floatVal(isNegated) - case STRINGLIT => in.strVal.intern() - case TRUE => true - case FALSE => false - case NULL => null - case _ => + case CHARLIT => in.charVal + case INTLIT => in.intVal(isNegated).toInt + case LONGLIT => in.intVal(isNegated) + case FLOATLIT => in.floatVal(isNegated).toFloat + case DOUBLELIT => in.floatVal(isNegated) + case STRINGLIT | STRINGPART => in.strVal.intern() + case TRUE => true + case FALSE => false + case NULL => null + case _ => syntaxErrorOrIncomplete("illegal literal", true) null }) @@ -1138,16 +1138,27 @@ self => } } - private def interpolatedString(): Tree = { - var t = atPos(o2p(in.offset))(New(TypeTree(definitions.StringBuilderClass.tpe), List(List()))) + private def interpolatedString(): Tree = atPos(in.offset) { + val start = in.offset + val interpolator = in.name + + val partsBuf = new ListBuffer[Tree] + val exprBuf = new ListBuffer[Tree] + in.nextToken() while (in.token == STRINGPART) { - t = stringOp(t, nme.append) - var e = expr() - if (in.token == STRINGFMT) e = stringOp(e, nme.formatted) - t = atPos(t.pos.startOrPoint)(Apply(Select(t, nme.append), List(e))) + partsBuf += literal() + exprBuf += { + if (in.token == IDENTIFIER) atPos(in.offset)(Ident(ident())) + else expr() + } } - if (in.token == STRINGLIT) t = stringOp(t, nme.append) - atPos(t.pos)(Select(t, nme.toString_)) + if (in.token == STRINGLIT) partsBuf += literal() + + val t1 = atPos(o2p(start)) { Ident(nme.StringContext) } + val t2 = atPos(start) { Apply(t1, partsBuf.toList) } + t2 setPos t2.pos.makeTransparent + val t3 = Select(t2, interpolator) setPos t2.pos + atPos(start) { Apply(t3, exprBuf.toList) } } /* ------------- NEW LINES ------------------------------------------------- */ @@ -1469,7 +1480,7 @@ self => atPos(in.offset) { val name = nme.toUnaryName(rawIdent()) // val name = nme.toUnaryName(ident()) // val name: Name = "unary_" + ident() - if (name == nme.UNARY_- && isNumericLit) simpleExprRest(atPos(in.offset)(literal(true)), true) + if (name == nme.UNARY_- && isNumericLit) simpleExprRest(atPos(in.offset)(literal(isNegated = true)), true) else Select(stripParens(simpleExpr()), name) } } @@ -1493,7 +1504,7 @@ self => def simpleExpr(): Tree = { var canApply = true val t = - if (isLiteral) atPos(in.offset)(literal(false)) + if (isLiteral) atPos(in.offset)(literal()) else in.token match { case XMLSTART => xmlLiteral() @@ -1827,7 +1838,7 @@ self => case INTLIT | LONGLIT | FLOATLIT | DOUBLELIT => t match { case Ident(nme.MINUS) => - return atPos(start) { literal(true) } + return atPos(start) { literal(isNegated = true) } case _ => } case _ => @@ -1844,8 +1855,8 @@ self => in.nextToken() atPos(start, start) { Ident(nme.WILDCARD) } case CHARLIT | INTLIT | LONGLIT | FLOATLIT | DOUBLELIT | - STRINGLIT | STRINGPART | SYMBOLLIT | TRUE | FALSE | NULL => - atPos(start) { literal(false) } + STRINGLIT | INTERPOLATIONID | SYMBOLLIT | TRUE | FALSE | NULL => + atPos(start) { literal() } case LPAREN => atPos(start)(makeParens(noSeq.patterns())) case XMLSTART => diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala index a25b3afbc6..a2a577a7ab 100644 --- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala @@ -161,12 +161,25 @@ trait Scanners extends ScannersCommon { * RBRACKET if region starts with '[' * RBRACE if region starts with '{' * ARROW if region starts with `case' - * STRINGFMT if region is a string interpolation expression starting with '\{' + * STRINGLIT if region is a string interpolation expression starting with '${' + * (the STRINGLIT appears twice in succession on the stack iff the + * expression is a multiline string literal). */ var sepRegions: List[Int] = List() // Get next token ------------------------------------------------------------ + /** Are we directly in a string interpolation expression? + */ + @inline private def inStringInterpolation = + sepRegions.nonEmpty && sepRegions.head == STRINGLIT + + /** Are we directly in a multiline string interpolation expression? + * @pre: inStringInterpolation + */ + @inline private def inMultiLineInterpolation = + sepRegions.tail.nonEmpty && sepRegions.tail.head == STRINGPART + /** read next token and return last offset */ def skipToken(): Offset = { @@ -189,29 +202,31 @@ trait Scanners extends ScannersCommon { sepRegions = RBRACE :: sepRegions case CASE => sepRegions = ARROW :: sepRegions - case STRINGPART => - sepRegions = STRINGFMT :: sepRegions case RBRACE => - sepRegions = sepRegions dropWhile (_ != RBRACE) + while (!sepRegions.isEmpty && sepRegions.head != RBRACE) + sepRegions = sepRegions.tail if (!sepRegions.isEmpty) sepRegions = sepRegions.tail - case RBRACKET | RPAREN | ARROW | STRINGFMT => + docBuffer = null + case RBRACKET | RPAREN => if (!sepRegions.isEmpty && sepRegions.head == lastToken) sepRegions = sepRegions.tail - case _ => - } - (lastToken: @switch) match { - case RBRACE | RBRACKET | RPAREN => docBuffer = null + case ARROW => + if (!sepRegions.isEmpty && sepRegions.head == lastToken) + sepRegions = sepRegions.tail + case STRINGLIT => + if (inStringInterpolation) + sepRegions = sepRegions.tail case _ => } - + // Read a token or copy it from `next` tokenData if (next.token == EMPTY) { lastOffset = charOffset - 1 - if(lastOffset > 0 && buf(lastOffset) == '\n' && buf(lastOffset - 1) == '\r') { + if (lastOffset > 0 && buf(lastOffset) == '\n' && buf(lastOffset - 1) == '\r') { lastOffset -= 1 } - fetchToken() + if (inStringInterpolation) fetchStringPart() else fetchToken() } else { this copyFrom next next.token = EMPTY @@ -308,7 +323,9 @@ trait Scanners extends ScannersCommon { 'z' => putChar(ch) nextChar() - getIdentRest() // scala-mode: wrong indent for multi-line case blocks + getIdentRest() + if (ch == '"' && token == IDENTIFIER && settings.Xexperimental.value) + token = INTERPOLATIONID case '<' => // is XMLSTART? val last = if (charOffset >= 2) buf(charOffset - 2) else ' ' nextChar() @@ -360,18 +377,37 @@ trait Scanners extends ScannersCommon { case '`' => getBackquotedIdent() case '\"' => - nextChar() - if (ch == '\"') { - nextChar() + if (token == INTERPOLATIONID) { + nextRawChar() if (ch == '\"') { nextRawChar() - getMultiLineStringLit() + if (ch == '\"') { + nextRawChar() + getStringPart(multiLine = true) + sepRegions = STRINGLIT :: sepRegions // indicate string part + sepRegions = STRINGLIT :: sepRegions // once more to indicate multi line string part + } else { + token = STRINGLIT + strVal = "" + } } else { - token = STRINGLIT - strVal = "" + getStringPart(multiLine = false) + sepRegions = STRINGLIT :: sepRegions // indicate single line string part } } else { - getStringPart() + nextChar() + if (ch == '\"') { + nextChar() + if (ch == '\"') { + nextRawChar() + getRawStringLit() + } else { + token = STRINGLIT + strVal = "" + } + } else { + getStringLit() + } } case '\'' => nextChar() @@ -397,9 +433,7 @@ trait Scanners extends ScannersCommon { token = DOT } case ';' => - nextChar() - if (inStringInterpolation) getFormatString() - else token = SEMI + nextChar(); token = SEMI case ',' => nextChar(); token = COMMA case '(' => @@ -409,16 +443,7 @@ trait Scanners extends ScannersCommon { case ')' => nextChar(); token = RPAREN case '}' => - if (token == STRINGFMT) { - nextChar() - getStringPart() - } else if (inStringInterpolation) { - strVal = ""; - token = STRINGFMT - } else { - nextChar(); - token = RBRACE - } + nextChar(); token = RBRACE case '[' => nextChar(); token = LBRACKET case ']' => @@ -506,11 +531,6 @@ trait Scanners extends ScannersCommon { } } - /** Are we directly in a string interpolation expression? - */ - private def inStringInterpolation = - sepRegions.nonEmpty && sepRegions.head == STRINGFMT - /** Can token start a statement? */ def inFirstOfStat(token: Int) = token match { case EOF | CATCH | ELSE | EXTENDS | FINALLY | FORSOME | MATCH | WITH | YIELD | @@ -608,71 +628,110 @@ trait Scanners extends ScannersCommon { else finishNamed() } } + + +// Literals ----------------------------------------------------------------- - def getFormatString() = { - getLitChars('}', '"', ' ', '\t') - if (ch == '}') { - setStrVal() - if (strVal.length > 0) strVal = "%" + strVal - token = STRINGFMT - } else { - syntaxError("unclosed format string") - } - } - - def getStringPart() = { - while (ch != '"' && (ch != CR && ch != LF && ch != SU || isUnicodeEscape) && maybeGetLitChar()) {} + private def getStringLit() = { + getLitChars('"') if (ch == '"') { setStrVal() nextChar() token = STRINGLIT - } else if (ch == '{' && settings.Xexperimental.value) { - setStrVal() - nextChar() - token = STRINGPART - } else { - syntaxError("unclosed string literal") - } + } else syntaxError("unclosed string literal") } - private def getMultiLineStringLit() { + private def getRawStringLit(): Unit = { if (ch == '\"') { nextRawChar() - if (ch == '\"') { + if (isTripleQuote()) { + setStrVal() + token = STRINGLIT + } else + getRawStringLit() + } else if (ch == SU) { + incompleteInputError("unclosed multi-line string literal") + } else { + putChar(ch) + nextRawChar() + getRawStringLit() + } + } + + @annotation.tailrec private def getStringPart(multiLine: Boolean): Unit = { + def finishStringPart() = { + setStrVal() + token = STRINGPART + next.lastOffset = charOffset - 1 + next.offset = charOffset - 1 + } + if (ch == '"') { + nextRawChar() + if (!multiLine || isTripleQuote()) { + setStrVal() + token = STRINGLIT + } else + getStringPart(multiLine) + } else if (ch == '$') { + nextRawChar() + if (ch == '$') { + putChar(ch) nextRawChar() - if (ch == '\"') { - nextChar() - while (ch == '\"') { - putChar('\"') - nextChar() - } - token = STRINGLIT - setStrVal() - } else { - putChar('\"') - putChar('\"') - getMultiLineStringLit() - } + getStringPart(multiLine) + } else if (ch == '{') { + finishStringPart() + nextRawChar() + next.token = LBRACE + } else if (Character.isUnicodeIdentifierStart(ch)) { + finishStringPart() + do { + putChar(ch) + nextRawChar() + } while (Character.isUnicodeIdentifierPart(ch)) + next.token = IDENTIFIER + next.name = newTermName(cbuf.toString) + cbuf.clear() } else { - putChar('\"') - getMultiLineStringLit() + syntaxError("invalid string interpolation") } - } else if (ch == SU) { - incompleteInputError("unclosed multi-line string literal") + } else if ((ch == CR || ch == LF || ch == SU) && !isUnicodeEscape) { + syntaxError("unclosed string literal") } else { putChar(ch) nextRawChar() - getMultiLineStringLit() + getStringPart(multiLine) } } + + private def fetchStringPart() = { + offset = charOffset - 1 + getStringPart(multiLine = inMultiLineInterpolation) + } + + private def isTripleQuote(): Boolean = + if (ch == '"') { + nextRawChar() + if (ch == '"') { + nextChar() + while (ch == '"') { + putChar('"') + nextChar() + } + true + } else { + putChar('"') + putChar('"') + false + } + } else { + putChar('"') + false + } -// Literals ----------------------------------------------------------------- - - /** read next character in character or string literal: - * if character sequence is a \{ escape, do not copy it into the string and return false. - * otherwise return true. + /** copy current character into cbuf, interpreting any escape sequences, + * and advance to next character. */ - protected def maybeGetLitChar(): Boolean = { + protected def getLitChar(): Unit = if (ch == '\\') { nextChar() if ('0' <= ch && ch <= '7') { @@ -698,7 +757,6 @@ trait Scanners extends ScannersCommon { case '\"' => putChar('\"') case '\'' => putChar('\'') case '\\' => putChar('\\') - case '{' => return false case _ => invalidEscape() } nextChar() @@ -707,22 +765,16 @@ trait Scanners extends ScannersCommon { putChar(ch) nextChar() } - true - } protected def invalidEscape(): Unit = { syntaxError(charOffset - 1, "invalid escape character") putChar(ch) } - protected def getLitChar(): Unit = - if (!maybeGetLitChar()) invalidEscape() - - private def getLitChars(delimiters: Char*) { - while (!(delimiters contains ch) && (ch != CR && ch != LF && ch != SU || isUnicodeEscape)) { + private def getLitChars(delimiter: Char) = + while (ch != delimiter && (ch != CR && ch != LF && ch != SU || isUnicodeEscape)) { getLitChar() } - } /** read fractional part and exponent of floating point number * if one is present. @@ -971,8 +1023,8 @@ trait Scanners extends ScannersCommon { "string(" + strVal + ")" case STRINGPART => "stringpart(" + strVal + ")" - case STRINGFMT => - "stringfmt(" + strVal + ")" + case INTERPOLATIONID => + "interpolationid(" + name + ")" case SEMI => ";" case NEWLINE => @@ -1088,8 +1140,7 @@ trait Scanners extends ScannersCommon { case LONGLIT => "long literal" case FLOATLIT => "float literal" case DOUBLELIT => "double literal" - case STRINGLIT | STRINGPART => "string literal" - case STRINGFMT => "format string" + case STRINGLIT | STRINGPART | INTERPOLATIONID => "string literal" case SYMBOLLIT => "symbol literal" case LPAREN => "'('" case RPAREN => "')'" diff --git a/src/compiler/scala/tools/nsc/ast/parser/Tokens.scala b/src/compiler/scala/tools/nsc/ast/parser/Tokens.scala index 07970bb36e..091f333c27 100644 --- a/src/compiler/scala/tools/nsc/ast/parser/Tokens.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/Tokens.scala @@ -45,18 +45,20 @@ abstract class Tokens { } object Tokens extends Tokens { - final val STRINGPART = 7 + final val STRINGPART = 7 // a part of an interpolated string final val SYMBOLLIT = 8 - final val STRINGFMT = 9 + final val INTERPOLATIONID = 9 // the lead identifier of an interpolated string + def isLiteral(code: Int) = - code >= CHARLIT && code <= SYMBOLLIT + code >= CHARLIT && code <= INTERPOLATIONID + /** identifiers */ final val IDENTIFIER = 10 final val BACKQUOTED_IDENT = 11 def isIdentifier(code: Int) = code >= IDENTIFIER && code <= BACKQUOTED_IDENT - + @switch def canBeginExpression(code: Int) = code match { case IDENTIFIER|BACKQUOTED_IDENT|USCORE => true case LBRACE|LPAREN|LBRACKET|COMMENT => true -- cgit v1.2.3