summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--spec/01-lexical-syntax.md49
-rw-r--r--spec/13-syntax-summary.md5
-rw-r--r--src/compiler/scala/tools/nsc/ast/parser/Scanners.scala8
-rw-r--r--test/files/neg/t6810.check28
-rw-r--r--test/files/neg/t6810.scala26
5 files changed, 89 insertions, 27 deletions
diff --git a/spec/01-lexical-syntax.md b/spec/01-lexical-syntax.md
index e26cb796c8..06e3a458a4 100644
--- a/spec/01-lexical-syntax.md
+++ b/spec/01-lexical-syntax.md
@@ -398,40 +398,46 @@ members of type `Boolean`.
### Character Literals
```ebnf
-characterLiteral ::= ‘'’ (printableChar | charEscapeSeq) ‘'’
+characterLiteral ::= ‘'’ (charNoQuoteOrNewline | UnicodeEscape | charEscapeSeq) ‘'’
```
A character literal is a single character enclosed in quotes.
-The character is either a printable unicode character or is described
-by an [escape sequence](#escape-sequences).
+The character can be any Unicode character except the single quote
+delimiter or `\u000A` (LF) or `\u000D` (CR);
+or any Unicode character represented by either a
+[Unicode escape](01-lexical-syntax.html) or by an [escape sequence](#escape-sequences).
> ```scala
> 'a' '\u0041' '\n' '\t'
> ```
-Note that `'\u000A'` is _not_ a valid character literal because
-Unicode conversion is done before literal parsing and the Unicode
-character `\u000A` (line feed) is not a printable
-character. One can use instead the escape sequence `'\n'` or
-the octal escape `'\12'` ([see here](#escape-sequences)).
+Note that although Unicode conversion is done early during parsing,
+so that Unicode characters are generally equivalent to their escaped
+expansion in the source text, literal parsing accepts arbitrary
+Unicode escapes, including the character literal `'\u000A'`,
+which can also be written using the escape sequence `'\n'`.
### String Literals
```ebnf
stringLiteral ::= ‘"’ {stringElement} ‘"’
-stringElement ::= printableCharNoDoubleQuote | charEscapeSeq
+stringElement ::= charNoDoubleQuoteOrNewline | UnicodeEscape | charEscapeSeq
```
-A string literal is a sequence of characters in double quotes. The
-characters are either printable unicode character or are described by
-[escape sequences](#escape-sequences). If the string literal
-contains a double quote character, it must be escaped,
-i.e. `"\""`. The value of a string literal is an instance of
-class `String`.
+A string literal is a sequence of characters in double quotes.
+The characters can be any Unicode character except the double quote
+delimiter or `\u000A` (LF) or `\u000D` (CR);
+or any Unicode character represented by either a
+[Unicode escape](01-lexical-syntax.html) or by an [escape sequence](#escape-sequences).
+
+If the string literal contains a double quote character, it must be escaped using
+`"\""`.
+
+The value of a string literal is an instance of class `String`.
> ```scala
-> "Hello,\nWorld!"
-> "This string contains a \" character."
+> "Hello, world!\n"
+> "\"Hello,\" replied the world."
> ```
#### Multi-Line String Literals
@@ -443,11 +449,10 @@ multiLineChars ::= {[‘"’] [‘"’] charNoDoubleQuote} {‘"’}
A multi-line string literal is a sequence of characters enclosed in
triple quotes `""" ... """`. The sequence of characters is
-arbitrary, except that it may contain three or more consuctive quote characters
-only at the very end. Characters
-must not necessarily be printable; newlines or other
-control characters are also permitted. Unicode escapes work as everywhere else, but none
-of the escape sequences [here](#escape-sequences) are interpreted.
+arbitrary, except that it may contain three or more consecutive quote characters
+only at the very end. In particular, embedded newlines
+are permitted. Unicode escapes work as everywhere else, but none
+of the [escape sequences](#escape-sequences) are interpreted.
> ```scala
> """the present string
diff --git a/spec/13-syntax-summary.md b/spec/13-syntax-summary.md
index 7f73e107de..a4b4aae570 100644
--- a/spec/13-syntax-summary.md
+++ b/spec/13-syntax-summary.md
@@ -57,11 +57,12 @@ floatType ::= ‘F’ | ‘f’ | ‘D’ | ‘d’
booleanLiteral ::= ‘true’ | ‘false’
-characterLiteral ::= ‘'’ (printableChar | charEscapeSeq) ‘'’
+characterLiteral ::= ‘'’ (charNoQuoteOrNewline | UnicodeEscape | charEscapeSeq) ‘'’
stringLiteral ::= ‘"’ {stringElement} ‘"’
| ‘"""’ multiLineChars ‘"""’
-stringElement ::= (printableChar except ‘"’)
+stringElement ::= charNoDoubleQuoteOrNewline
+ | UnicodeEscape
| charEscapeSeq
multiLineChars ::= {[‘"’] [‘"’] charNoDoubleQuote} {‘"’}
diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
index cd41c75298..51bb0d3c5b 100644
--- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
+++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
@@ -515,7 +515,7 @@ trait Scanners extends ScannersCommon {
charLitOr(getIdentRest)
else if (isOperatorPart(ch) && (ch != '\\'))
charLitOr(getOperatorRest)
- else {
+ else if (!isAtEnd && (ch != SU && ch != CR && ch != LF || isUnicodeEscape)) {
getLitChar()
if (ch == '\'') {
nextChar()
@@ -525,6 +525,8 @@ trait Scanners extends ScannersCommon {
syntaxError("unclosed character literal")
}
}
+ else
+ syntaxError("unclosed character literal")
}
fetchSingleQuote()
case '.' =>
@@ -690,7 +692,7 @@ trait Scanners extends ScannersCommon {
private def unclosedStringLit(): Unit = syntaxError("unclosed string literal")
- private def getRawStringLit(): Unit = {
+ @tailrec private def getRawStringLit(): Unit = {
if (ch == '\"') {
nextRawChar()
if (isTripleQuote()) {
@@ -707,7 +709,7 @@ trait Scanners extends ScannersCommon {
}
}
- @scala.annotation.tailrec private def getStringPart(multiLine: Boolean): Unit = {
+ @tailrec private def getStringPart(multiLine: Boolean): Unit = {
def finishStringPart() = {
setStrVal()
token = STRINGPART
diff --git a/test/files/neg/t6810.check b/test/files/neg/t6810.check
new file mode 100644
index 0000000000..497ef35070
--- /dev/null
+++ b/test/files/neg/t6810.check
@@ -0,0 +1,28 @@
+t6810.scala:4: error: unclosed character literal
+ val y = '
+ ^
+t6810.scala:5: error: unclosed character literal
+' // but not embedded EOL sequences not represented as escapes
+^
+t6810.scala:9: error: unclosed string literal
+ val Y = "
+ ^
+t6810.scala:10: error: unclosed string literal
+" // obviously not
+^
+t6810.scala:20: error: unclosed quoted identifier
+ val `
+ ^
+t6810.scala:21: error: unclosed quoted identifier
+` = EOL // not raw string literals aka triple-quoted, multiline strings
+^
+t6810.scala:24: error: unclosed character literal
+ val b = '
+ ^
+t6810.scala:25: error: unclosed character literal
+' // CR seen as EOL by scanner
+^
+t6810.scala:24: error: '=' expected but ';' found.
+ val b = '
+^
+9 errors found
diff --git a/test/files/neg/t6810.scala b/test/files/neg/t6810.scala
new file mode 100644
index 0000000000..50c305d70c
--- /dev/null
+++ b/test/files/neg/t6810.scala
@@ -0,0 +1,26 @@
+
+trait t6810 {
+ val x = '\u000A' // char literals accept arbitrary unicode escapes
+ val y = '
+' // but not embedded EOL sequences not represented as escapes
+ val z = '\n' // normally, expect this escape
+
+ val X = "\u000A" // it's the same as ordinary string literals
+ val Y = "
+" // obviously not
+ val Z = "\n" // normally, expect this escape
+
+ val A = """
+""" // which is what these are for
+ val B = s"""
+""" // or the same for interpolated strings
+
+ import scala.compat.Platform.EOL
+ val `\u000A` = EOL // backquoted identifiers are arbitrary string literals
+ val `
+` = EOL // not raw string literals aka triple-quoted, multiline strings
+
+ val a = '\u000D' // similar treatment of CR
+ val b = ' ' // CR seen as EOL by scanner
+ val c = '\r' // traditionally
+}