From 5135bae5a7c2d156dd55dfc0aabf8b41d393f4a2 Mon Sep 17 00:00:00 2001 From: Adriaan Moors Date: Wed, 12 Mar 2014 16:24:33 -0700 Subject: SI-5086 clean up EBNF - sequences of block statements were wrong (btw, note that BlockStat matches the empty sequence of tokens) - lexical syntax was pretty messy: clarified, removed extraneous backslashes --- 15-scala-syntax-summary.md | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to '15-scala-syntax-summary.md') diff --git a/15-scala-syntax-summary.md b/15-scala-syntax-summary.md index 7d6fc10d3e..d4d5166312 100644 --- a/15-scala-syntax-summary.md +++ b/15-scala-syntax-summary.md @@ -1,17 +1,28 @@ # Scala Syntax Summary - +The following descriptions of Scala tokens uses literal characters `‘c’` when referring to the ASCII fragment `\u0000` – `\u007F`. -The lexical syntax of Scala is given by the following grammar in EBNF -form. +_Unicode escapes_ are used to represent the Unicode character with the given hexadecimal code: + +``` +UnicodeEscape ::= ‘\‘ ‘u‘ {‘u‘} hexDigit hexDigit hexDigit hexDigit +hexDigit ::= ‘0’ | … | ‘9’ | ‘A’ | … | ‘F’ | ‘a’ | … | ‘f’ +``` + +The lexical syntax of Scala is given by the following grammar in EBNF form: ``` -upper ::= ‘A’ | … | ‘Z’ | ‘\$’ | ‘_’ // and Unicode category Lu +whiteSpace ::= ‘\u0020’ | ‘\u0009’ | ‘\u000D’ | ‘\u000A’ +upper ::= ‘A’ | … | ‘Z’ | ‘$’ | ‘_’ // and Unicode category Lu lower ::= ‘a’ | … | ‘z’ // and Unicode category Ll letter ::= upper | lower // and Unicode categories Lo, Lt, Nl digit ::= ‘0’ | … | ‘9’ -opchar ::= // “all other characters in \u0020-\u007F and Unicode - // categories Sm, So except parentheses ([{}]) and periods” +paren ::= ‘(’ | ‘)’ | ‘[’ | ‘]’ | ‘{’ | ‘}’ +delim ::= ‘`’ | ‘'’ | ‘"’ | ‘.’ | ‘;’ | ‘,’ +opchar ::= // printableChar not matched by (whiteSpace | upper | lower | + // letter | digit | paren | delim | opchar | Unicode_Sm | Unicode_So) +printableChar ::= // all characters in [\u0020, \u007F] inclusive +charEscapeSeq ::= ‘\‘ (‘b‘ | ‘t‘ | ‘n‘ | ‘f‘ | ‘r‘ | ‘"‘ | ‘'‘ | ‘\‘) op ::= opchar {opchar} varid ::= lower idrest @@ -19,7 +30,7 @@ plainid ::= upper idrest | varid | op id ::= plainid - | ‘\`’ stringLiteral ‘\`’ + | ‘`’ stringLiteral ‘`’ idrest ::= {letter | digit} [‘_’ op] integerLiteral ::= (decimalNumeral | hexNumeral) [‘L’ | ‘l’] @@ -38,18 +49,17 @@ floatType ::= ‘F’ | ‘f’ | ‘D’ | ‘d’ booleanLiteral ::= ‘true’ | ‘false’ -characterLiteral ::= ‘\'‘ printableChar ‘\'’ - | ‘\’ charEscapeSeq ‘\'’ +characterLiteral ::= ‘'’ (printableChar | charEscapeSeq) ‘'’ stringLiteral ::= ‘"’ {stringElement} ‘"’ | ‘"""’ multiLineChars ‘"""’ -stringElement ::= printableCharNoDoubleQuote +stringElement ::= (printableChar except ‘"’) | charEscapeSeq multiLineChars ::= {[‘"’] [‘"’] charNoDoubleQuote} {‘"’} symbolLiteral ::= ‘'’ plainid -comment ::= ‘/*’ “any sequence of characters” ‘*/’ +comment ::= ‘/*’ “any sequence of characters; nested comments are allowed” ‘*/’ | ‘//’ “any sequence of characters up to end of line” nl ::= $\mathit{“new line character”}$ @@ -141,7 +151,7 @@ grammar. | [nl] BlockExpr BlockExpr ::= ‘{’ CaseClauses ‘}’ | ‘{’ Block ‘}’ - Block ::= {BlockStat semi} [ResultExpr] + Block ::= BlockStat {semi BlockStat} [ResultExpr] BlockStat ::= Import | {Annotation} [‘implicit’ | ‘lazy’] Def | {Annotation} {LocalModifier} TmplDef @@ -210,7 +220,6 @@ grammar. Annotation ::= ‘@’ SimpleType {ArgumentExprs} ConstrAnnotation ::= ‘@’ SimpleType ArgumentExprs - NameValuePair ::= ‘val’ id ‘=’ PrefixExpr TemplateBody ::= [nl] ‘{’ [SelfType] TemplateStat {semi TemplateStat} ‘}’ TemplateStat ::= Import @@ -287,6 +296,7 @@ grammar. ```