From 5135bae5a7c2d156dd55dfc0aabf8b41d393f4a2 Mon Sep 17 00:00:00 2001
From: Adriaan Moors <adriaan.moors@typesafe.com>
Date: Wed, 12 Mar 2014 16:24:33 -0700
Subject: SI-5086 clean up EBNF

- sequences of block statements were wrong
  (btw, note that BlockStat matches the empty sequence of tokens)
- lexical syntax was pretty messy: clarified, removed extraneous backslashes
---
 15-scala-syntax-summary.md | 36 +++++++++++++++++++++++-------------
 1 file changed, 23 insertions(+), 13 deletions(-)

(limited to '15-scala-syntax-summary.md')

diff --git a/15-scala-syntax-summary.md b/15-scala-syntax-summary.md
index 7d6fc10d3e..d4d5166312 100644
--- a/15-scala-syntax-summary.md
+++ b/15-scala-syntax-summary.md
@@ -1,17 +1,28 @@
 # Scala Syntax Summary
 
-<!-- TODO: introduce SeqPattern syntax -->
+The following descriptions of Scala tokens uses literal characters `‘c’` when referring to the ASCII fragment `\u0000` – `\u007F`.
 
-The lexical syntax of Scala is given by the following grammar in EBNF
-form.
+_Unicode escapes_ are used to represent the Unicode character with the given hexadecimal code:
+
+```
+UnicodeEscape ::= ‘\‘ ‘u‘ {‘u‘} hexDigit hexDigit hexDigit hexDigit
+hexDigit      ::= ‘0’ | … | ‘9’ | ‘A’ | … | ‘F’ | ‘a’ | … | ‘f’
+```
+
+The lexical syntax of Scala is given by the following grammar in EBNF form:
 
 ``` 
-upper            ::=  ‘A’ | … | ‘Z’ | ‘\$’ | ‘_’  // and Unicode category Lu
+whiteSpace       ::=  ‘\u0020’ | ‘\u0009’ | ‘\u000D’ | ‘\u000A’
+upper            ::=  ‘A’ | … | ‘Z’ | ‘$’ | ‘_’  // and Unicode category Lu
 lower            ::=  ‘a’ | … | ‘z’ // and Unicode category Ll
 letter           ::=  upper | lower // and Unicode categories Lo, Lt, Nl
 digit            ::=  ‘0’ | … | ‘9’
-opchar           ::= // “all other characters in \u0020-\u007F and Unicode
-                     // categories Sm, So except parentheses ([{}]) and periods”
+paren            ::=  ‘(’ | ‘)’ | ‘[’ | ‘]’ | ‘{’ | ‘}’
+delim            ::=  ‘`’ | ‘'’ | ‘"’ | ‘.’ | ‘;’ | ‘,’
+opchar           ::= // printableChar not matched by (whiteSpace | upper | lower |
+                     // letter | digit | paren | delim | opchar | Unicode_Sm | Unicode_So)
+printableChar    ::= // all characters in [\u0020, \u007F] inclusive
+charEscapeSeq    ::= ‘\‘ (‘b‘ | ‘t‘ | ‘n‘ | ‘f‘ | ‘r‘ | ‘"‘ | ‘'‘ | ‘\‘)
 
 op               ::=  opchar {opchar} 
 varid            ::=  lower idrest
@@ -19,7 +30,7 @@ plainid          ::=  upper idrest
                  |  varid
                  |  op
 id               ::=  plainid
-                 |  ‘\`’ stringLiteral ‘\`’
+                 |  ‘`’ stringLiteral ‘`’
 idrest           ::=  {letter | digit} [‘_’ op]
 
 integerLiteral   ::=  (decimalNumeral | hexNumeral) [‘L’ | ‘l’]
@@ -38,18 +49,17 @@ floatType        ::=  ‘F’ | ‘f’ | ‘D’ | ‘d’
 
 booleanLiteral   ::=  ‘true’ | ‘false’
 
-characterLiteral ::=  ‘\'‘ printableChar ‘\'’
-                 |  ‘\’ charEscapeSeq ‘\'’
+characterLiteral ::=  ‘'’ (printableChar | charEscapeSeq) ‘'’
 
 stringLiteral    ::=  ‘"’ {stringElement} ‘"’
                  |  ‘"""’ multiLineChars ‘"""’
-stringElement    ::=  printableCharNoDoubleQuote 
+stringElement    ::=  (printableChar except ‘"’)
                  |  charEscapeSeq
 multiLineChars   ::=  {[‘"’] [‘"’] charNoDoubleQuote} {‘"’}
 
 symbolLiteral    ::=  ‘'’ plainid
 
-comment          ::=  ‘/*’ “any sequence of characters” ‘*/’
+comment          ::=  ‘/*’ “any sequence of characters; nested comments are allowed” ‘*/’
                  |  ‘//’ “any sequence of characters up to end of line”
 
 nl               ::=  $\mathit{“new line character”}$
@@ -141,7 +151,7 @@ grammar.
                       |  [nl] BlockExpr
   BlockExpr         ::=  ‘{’ CaseClauses ‘}’
                       |  ‘{’ Block ‘}’
-  Block             ::=  {BlockStat semi} [ResultExpr]
+  Block             ::=  BlockStat {semi BlockStat} [ResultExpr]
   BlockStat         ::=  Import
                       |  {Annotation} [‘implicit’ | ‘lazy’] Def
                       |  {Annotation} {LocalModifier} TmplDef
@@ -210,7 +220,6 @@ grammar.
 
   Annotation        ::=  ‘@’ SimpleType {ArgumentExprs}
   ConstrAnnotation  ::=  ‘@’ SimpleType ArgumentExprs
-  NameValuePair     ::=  ‘val’ id ‘=’ PrefixExpr
 
   TemplateBody      ::=  [nl] ‘{’ [SelfType] TemplateStat {semi TemplateStat} ‘}’
   TemplateStat      ::=  Import
@@ -287,6 +296,7 @@ grammar.
 ```
 
 <!-- TODO add:
+SeqPattern ::= ...
 
 SimplePattern    ::= StableId  [TypePatArgs] [‘(’ [SeqPatterns] ‘)’]
 TypePatArgs ::= ‘[’ TypePatArg {‘,’ TypePatArg} ‘]’
-- 
cgit v1.2.3