diff options
Diffstat (limited to 'docs/docs')
-rw-r--r-- | docs/docs/index.md | 1 | ||||
-rw-r--r-- | docs/docs/internals/syntax.md | 356 |
2 files changed, 357 insertions, 0 deletions
diff --git a/docs/docs/index.md b/docs/docs/index.md index 8fceedbd0..0d9bc6b6f 100644 --- a/docs/docs/index.md +++ b/docs/docs/index.md @@ -20,6 +20,7 @@ Index - [Eclipse](contributing/eclipse.md) setting up dev environment - [Intellij-IDEA](contributing/intellij-idea.md) setting up dev environment * Internals document the compiler internals + - [Syntax Summary](internals/syntax.md) - [Project Structure](internals/overall-structure.md) of the project - [Backend](internals/backend.md) details on the bytecode backend diff --git a/docs/docs/internals/syntax.md b/docs/docs/internals/syntax.md new file mode 100644 index 000000000..7c8cb1ea2 --- /dev/null +++ b/docs/docs/internals/syntax.md @@ -0,0 +1,356 @@ +--- +layout: default +title: "Scala Syntax Summary" +--- + +The following descriptions of Scala tokens uses literal characters `‘c’` when +referring to the ASCII fragment `\u0000` – `\u007F`. + +_Unicode escapes_ are used to represent the Unicode character with the given +hexadecimal code: + +```ebnf +UnicodeEscape ::= ‘\’ ‘u’ {‘u’} hexDigit hexDigit hexDigit hexDigit +hexDigit ::= ‘0’ | … | ‘9’ | ‘A’ | … | ‘F’ | ‘a’ | … | ‘f’ +``` + +Informal descriptions are typeset as `“some comment”`. + +### Lexical Syntax +The lexical syntax of Scala is given by the following grammar in EBNF +form. + +```ebnf +whiteSpace ::= ‘\u0020’ | ‘\u0009’ | ‘\u000D’ | ‘\u000A’ +upper ::= ‘A’ | … | ‘Z’ | ‘\$’ | ‘_’ “… and Unicode category Lu” +lower ::= ‘a’ | … | ‘z’ “… and Unicode category Ll” +letter ::= upper | lower “… and Unicode categories Lo, Lt, Nl” +digit ::= ‘0’ | … | ‘9’ +paren ::= ‘(’ | ‘)’ | ‘[’ | ‘]’ | ‘{’ | ‘}’ +delim ::= ‘`’ | ‘'’ | ‘"’ | ‘.’ | ‘;’ | ‘,’ +opchar ::= “printableChar not matched by (whiteSpace | upper | lower | + letter | digit | paren | delim | opchar | Unicode_Sm | + Unicode_So)” +printableChar ::= “all characters in [\u0020, \u007F] inclusive” +charEscapeSeq ::= ‘\’ (‘b’ | ‘t’ | ‘n’ | ‘f’ | ‘r’ | ‘"’ | ‘'’ | ‘\’) + +op ::= opchar {opchar} +varid ::= lower idrest +alphaid ::= upper idrest + | varid +plainid ::= alphaid + | op +id ::= plainid + | ‘`’ { charNoBackQuoteOrNewline | UnicodeEscape | charEscapeSeq } ‘`’ + | INT // interpolation id, only for quasi-quotes +idrest ::= {letter | digit} [‘_’ op] + +integerLiteral ::= (decimalNumeral | hexNumeral) [‘L’ | ‘l’] +decimalNumeral ::= ‘0’ | nonZeroDigit {digit} +hexNumeral ::= ‘0’ (‘x’ | ‘X’) hexDigit {hexDigit} +digit ::= ‘0’ | nonZeroDigit +nonZeroDigit ::= ‘1’ | … | ‘9’ + +floatingPointLiteral + ::= digit {digit} ‘.’ {digit} [exponentPart] [floatType] + | ‘.’ digit {digit} [exponentPart] [floatType] + | digit {digit} exponentPart [floatType] + | digit {digit} [exponentPart] floatType +exponentPart ::= (‘E’ | ‘e’) [‘+’ | ‘-’] digit {digit} +floatType ::= ‘F’ | ‘f’ | ‘D’ | ‘d’ + +booleanLiteral ::= ‘true’ | ‘false’ + +characterLiteral ::= ‘'’ (printableChar | charEscapeSeq) ‘'’ + +stringLiteral ::= ‘"’ {stringElement} ‘"’ + | ‘"""’ multiLineChars ‘"""’ +stringElement ::= printableChar \ (‘"’ | ‘\’) + | UnicodeEscape + | charEscapeSeq +multiLineChars ::= {[‘"’] [‘"’] char \ ‘"’} {‘"’} +processedStringLiteral + ::= alphaid ‘"’ {printableChar \ (‘"’ | ‘$’) | escape} ‘"’ + | alphaid ‘"""’ {[‘"’] [‘"’] char \ (‘"’ | ‘$’) | escape} {‘"’} ‘"""’ +escape ::= ‘$$’ + | ‘$’ letter { letter | digit } + | ‘{’ Block [‘;’ whiteSpace stringFormat whiteSpace] ‘}’ +stringFormat ::= {printableChar \ (‘"’ | ‘}’ | ‘ ’ | ‘\t’ | ‘\n’)} + +symbolLiteral ::= ‘'’ plainid + +comment ::= ‘/*’ “any sequence of characters; nested comments are allowed” ‘*/’ + | ‘//’ “any sequence of characters up to end of line” + +nl ::= “new line character” +semi ::= ‘;’ | nl {nl} +``` + + +## Context-free Syntax + +The context-free syntax of Scala is given by the following EBNF +grammar: + +### Literals and Paths +```ebnf +SimpleLiteral ::= [‘-’] integerLiteral + | [‘-’] floatingPointLiteral + | booleanLiteral + | characterLiteral + | stringLiteral +Literal ::= SimpleLiteral + | processedStringLiteral + | symbolLiteral + | ‘null’ + +QualId ::= id {‘.’ id} +ids ::= id {‘,’ id} + +Path ::= StableId + | [id ‘.’] ‘this’ +StableId ::= id + | Path ‘.’ id + | [id ‘.’] ‘super’ [ClassQualifier] ‘.’ id +ClassQualifier ::= ‘[’ id ‘]’ +``` + +### Types +```ebnf +Type ::= [‘implicit’] FunArgTypes ‘=>’ Type Function(ts, t) + | HkTypeParamClause ‘=>’ Type TypeLambda(ps, t) + | InfixType +FunArgTypes ::= InfixType + | ‘(’ [ FunArgType {‘,’ FunArgType } ] ‘)’ +InfixType ::= RefinedType {id [nl] RefinedType} InfixOp(t1, op, t2) +RefinedType ::= WithType {[nl] Refinement} RefinedTypeTree(t, ds) +WithType ::= AnnotType {‘with’ AnnotType} (deprecated) +AnnotType ::= SimpleType {Annotation} Annotated(t, annot) +SimpleType ::= SimpleType (TypeArgs | NamedTypeArgs) AppliedTypeTree(t, args) + | SimpleType ‘#’ id Select(t, name) + | StableId + | Path ‘.’ ‘type’ SingletonTypeTree(p) + | ‘(’ ArgTypes ‘)’ Tuple(ts) + | ‘_’ TypeBounds + | Refinement RefinedTypeTree(EmptyTree, refinement) + | SimpleLiteral SingletonTypeTree(l) +ArgTypes ::= Type {‘,’ Type} + | NamedTypeArg {‘,’ NamedTypeArg} +FunArgType ::= Type + | ‘=>’ Type PrefixOp(=>, t) +ParamType ::= [‘=>’] ParamValueType +ParamValueType ::= Type [‘*’] PostfixOp(t, "*") +TypeArgs ::= ‘[’ ArgTypes ‘]’ ts +NamedTypeArg ::= id ‘=’ Type NamedArg(id, t) +NamedTypeArgs ::= ‘[’ NamedTypeArg {‘,’ NamedTypeArg} ‘]’ nts +Refinement ::= ‘{’ [Dcl] {semi [Dcl]} ‘}’ ds +TypeBounds ::= [‘>:’ Type] [‘<:’ Type] | INT TypeBoundsTree(lo, hi) +TypeParamBounds ::= TypeBounds {‘<%’ Type} {‘:’ Type} ContextBounds(typeBounds, tps) +``` + +### Expressions +```ebnf +Expr ::= [‘implicit’] FunParams ‘=>’ Expr Function(args, expr), Function(ValDef([implicit], id, TypeTree(), EmptyTree), expr) + | Expr1 +BlockResult ::= [‘implicit’] FunParams ‘=>’ Block + | Expr1 +FunParams ::= Bindings + | id + | ‘_’ +Expr1 ::= ‘if’ ‘(’ Expr ‘)’ {nl} Expr [[semi] ‘else’ Expr] If(Parens(cond), thenp, elsep?) + | ‘if’ Expr ‘then’ Expr [[semi] ‘else’ Expr] If(cond, thenp, elsep?) + | ‘while’ ‘(’ Expr ‘)’ {nl} Expr WhileDo(Parens(cond), body) + | ‘while’ Expr ‘do’ Expr WhileDo(cond, body) + | ‘do’ Expr [semi] ‘while’ Expr DoWhile(expr, cond) + | ‘try’ Expr Catches [‘finally’ Expr] Try(expr, catches, expr?) + | ‘try’ Expr [‘finally’ Expr] Try(expr, Nil, expr?) + | ‘throw’ Expr Throw(expr) + | ‘return’ [Expr] Return(expr?) + | ForExpr + | [SimpleExpr ‘.’] id ‘=’ Expr Assign(expr, expr) + | SimpleExpr1 ArgumentExprs ‘=’ Expr Assign(expr, expr) + | PostfixExpr [Ascription] + | PostfixExpr ‘match’ ‘{’ CaseClauses ‘}’ Match(expr, cases) -- point on match +Ascription ::= ‘:’ InfixType Typed(expr, tp) + | ‘:’ Annotation {Annotation} Typed(expr, Annotated(EmptyTree, annot)*) +Catches ::= ‘catch’ Expr +PostfixExpr ::= InfixExpr [id] PostfixOp(expr, op) +InfixExpr ::= PrefixExpr + | InfixExpr id [nl] InfixExpr InfixOp(expr, op, expr) +PrefixExpr ::= [‘-’ | ‘+’ | ‘~’ | ‘!’] SimpleExpr PrefixOp(expr, op) +SimpleExpr ::= ‘new’ Template New(templ) + | BlockExpr + | SimpleExpr1 [‘_’] PostfixOp(expr, _) +SimpleExpr1 ::= Literal + | Path + | ‘_’ + | ‘(’ ExprsInParens ‘)’ Parens(exprs) + | SimpleExpr ‘.’ id Select(expr, id) + | SimpleExpr (TypeArgs | NamedTypeArgs) TypeApply(expr, args) + | SimpleExpr1 ArgumentExprs Apply(expr, args) + | XmlExpr +ExprsInParens ::= ExprInParens {‘,’ ExprInParens} +ExprInParens ::= PostfixExpr ‘:’ Type + | Expr +ParArgumentExprs ::= ‘(’ ExprsInParens ‘)’ exprs + | ‘(’ [ExprsInParens] PostfixExpr ‘:’ ‘_’ ‘*’ ‘)’ exprs :+ Typed(expr, Ident(wildcardStar)) +ArgumentExprs ::= ParArgumentExprs + | [nl] BlockExpr +BlockExpr ::= ‘{’ CaseClauses ‘}’ Match(EmptyTree, cases) + | ‘{’ Block ‘}’ block // starts at { +Block ::= {BlockStat semi} [BlockResult] Block(stats, expr?) +BlockStat ::= Import + | {Annotation} [‘implicit’ | ‘lazy’] Def + | {Annotation} {LocalModifier} TmplDef + | Expr1 + +ForExpr ::= ‘for’ (‘(’ Enumerators ‘)’ | ‘{’ Enumerators ‘}’) ForYield(enums, expr) + {nl} [‘yield’] Expr + | ‘for’ Enumerators (‘do’ Expr | ‘yield’ Expr) ForDo(enums, expr) +Enumerators ::= Generator {semi Enumerator | Guard} +Enumerator ::= Generator + | Guard + | Pattern1 ‘=’ Expr GenAlias(pat, expr) +Generator ::= Pattern1 ‘<-’ Expr GenFrom(pat, expr) +Guard ::= ‘if’ PostfixExpr + +CaseClauses ::= CaseClause { CaseClause } CaseDef(pat, guard?, block) // block starts at => +CaseClause ::= ‘case’ (Pattern [Guard] ‘=>’ Block | INT) + +Pattern ::= Pattern1 { ‘|’ Pattern1 } Alternative(pats) +Pattern1 ::= PatVar ‘:’ RefinedType Bind(name, Typed(Ident(wildcard), tpe)) + | Pattern2 +Pattern2 ::= [varid ‘@’] InfixPattern Bind(name, pat) +InfixPattern ::= SimplePattern { id [nl] SimplePattern } InfixOp(pat, op, pat) +SimplePattern ::= PatVar Ident(wildcard) + | Literal Bind(name, Ident(wildcard)) + | ‘(’ [Patterns] ‘)’ Parens(pats) Tuple(pats) + | XmlPattern + | SimplePattern1 [TypeArgs] [ArgumentPatterns] +SimplePattern1 ::= Path + | ‘{’ Block ‘}’ + | SimplePattern1 ‘.’ id +PatVar ::= varid + | ‘_’ +Patterns ::= Pattern {‘,’ Pattern} +ArgumentPatterns ::= ‘(’ [Patterns] ‘)’ Apply(fn, pats) + | ‘(’ [Patterns ‘,’] Pattern2 ‘:’ ‘_’ ‘*’ ‘)’ +``` + +### Type and Value Parameters +```ebnf +ClsTypeParamClause::= ‘[’ ClsTypeParam {‘,’ ClsTypeParam} ‘]’ +ClsTypeParam ::= {Annotation} [{Modifier} type] [‘+’ | ‘-’] TypeDef(Modifiers, name, tparams, bounds) + id [HkTypeParamClause] TypeParamBounds Bound(below, above, context) + +DefTypeParamClause::= ‘[’ DefTypeParam {‘,’ DefTypeParam} ‘]’ +DefTypeParam ::= {Annotation} id [HkTypeParamClause] TypeParamBounds + +TypTypeParamClause::= ‘[’ TypTypeParam {‘,’ TypTypeParam} ‘]’ +TypTypeParam ::= {Annotation} id [HkTypeParamClause] TypeBounds + +HkTypeParamClause ::= ‘[’ HkTypeParam {‘,’ HkTypeParam} ‘]’ +HkTypeParam ::= {Annotation} [‘+’ | ‘-’] (Id[HkTypeParamClause] | ‘_’) + TypeBounds + +ClsParamClauses ::= {ClsParamClause} [[nl] ‘(’ ‘implicit’ ClsParams ‘)’] +ClsParamClause ::= [nl] ‘(’ [ClsParams] ‘)’ +ClsParams ::= ClsParam {‘,’ ClsParam} +ClsParam ::= {Annotation} ValDef(mods, id, tpe, expr) -- point of mods on val/var + [{Modifier} (‘val’ | ‘var’) | ‘inline’] Param +Param ::= id ‘:’ ParamType [‘=’ Expr] + | INT + +DefParamClauses ::= {DefParamClause} [[nl] ‘(’ ‘implicit’ DefParams ‘)’] +DefParamClause ::= [nl] ‘(’ [DefParams] ‘)’ +DefParams ::= DefParam {‘,’ DefParam} +DefParam ::= {Annotation} [‘inline’] Param ValDef(mods, id, tpe, expr) -- point of mods at id. +``` + +### Bindings and Imports +```ebnf +Bindings ::= ‘(’ Binding {‘,’ Binding} ‘)’ +Binding ::= (id | ‘_’) [‘:’ Type] ValDef(_, id, tpe, EmptyTree) + +Modifier ::= LocalModifier + | AccessModifier + | ‘override’ +LocalModifier ::= ‘abstract’ + | ‘final’ + | ‘sealed’ + | ‘implicit’ + | ‘lazy’ +AccessModifier ::= (‘private’ | ‘protected’) [AccessQualifier] +AccessQualifier ::= ‘[’ (id | ‘this’) ‘]’ + +Annotation ::= ‘@’ SimpleType {ParArgumentExprs} Apply(tpe, args) + +TemplateBody ::= [nl] ‘{’ [SelfType] TemplateStat {semi TemplateStat} ‘}’ (self, stats) +TemplateStat ::= Import + | {Annotation [nl]} {Modifier} Def + | {Annotation [nl]} {Modifier} Dcl + | Expr1 +SelfType ::= id [‘:’ InfixType] ‘=>’ ValDef(_, name, tpt, _) + | ‘this’ ‘:’ InfixType ‘=>’ + +Import ::= ‘import’ ImportExpr {‘,’ ImportExpr} +ImportExpr ::= StableId ‘.’ (id | ‘_’ | ImportSelectors) Import(expr, sels) +ImportSelectors ::= ‘{’ {ImportSelector ‘,’} (ImportSelector | ‘_’) ‘}’ +ImportSelector ::= id [‘=>’ id | ‘=>’ ‘_’] Ident(name), Pair(id, id) +``` + +### Declarations and Definitions +```ebnf +Dcl ::= ‘val’ ValDcl + | ‘var’ VarDcl + | ‘def’ DefDcl + | ‘type’ {nl} TypeDcl + | INT + +ValDcl ::= ids ‘:’ Type PatDef(_, ids, tpe, EmptyTree) +VarDcl ::= ids ‘:’ Type PatDef(_, ids, tpe, EmptyTree) +DefDcl ::= DefSig [‘:’ Type] DefDef(_, name, tparams, vparamss, tpe, EmptyTree) +DefSig ::= id [DefTypeParamClause] DefParamClauses +TypeDcl ::= id [TypTypeParamClause] [‘=’ Type] TypeDefTree(_, name, tparams, tpt) + | id [HkTypeParamClause] TypeBounds TypeDefTree(_, name, tparams, bounds) + +Def ::= ‘val’ PatDef + | ‘var’ VarDef + | ‘def’ DefDef + | ‘type’ {nl} TypeDcl + | TmplDef + | INT +PatDef ::= Pattern2 {‘,’ Pattern2} [‘:’ Type] ‘=’ Expr PatDef(_, pats, tpe?, expr) +VarDef ::= PatDef + | ids ‘:’ Type ‘=’ ‘_’ +DefDef ::= DefSig [‘:’ Type] ‘=’ Expr DefDef(_, name, tparams, vparamss, tpe, expr) + | DefSig [nl] ‘{’ Block ‘}’ DefDef(_, name, tparams, vparamss, tpe, Block) + | ‘this’ DefParamClause DefParamClauses DefDef(_, <init>, Nil, vparamss, EmptyTree, expr | Block) + (‘=’ ConstrExpr | [nl] ConstrBlock) + +TmplDef ::= ([‘case’] ‘class’ | ‘trait’) ClassDef + | [‘case’] ‘object’ ObjectDef +ClassDef ::= id [ClsTypeParamClause] ClassDef(mods, name, tparams, templ) + [ConstrMods] ClsParamClauses TemplateOpt with DefDef(_, <init>, Nil, vparamss, EmptyTree, EmptyTree) as first stat +ConstrMods ::= AccessModifier + | Annotation {Annotation} (AccessModifier | ‘this’) +ObjectDef ::= id TemplateOpt ModuleDef(mods, name, template) // no constructor +TemplateOpt ::= [‘extends’ Template | [nl] TemplateBody] +Template ::= ConstrApps [TemplateBody] | TemplateBody Template(constr, parents, self, stats) +ConstrApps ::= ConstrApp {‘with’ ConstrApp} +ConstrApp ::= AnnotType {ArgumentExprs} Apply(tp, args) +ConstrExpr ::= SelfInvocation + | ConstrBlock +SelfInvocation ::= ‘this’ ArgumentExprs {ArgumentExprs} +ConstrBlock ::= ‘{’ SelfInvocation {semi BlockStat} ‘}’ + +TopStatSeq ::= TopStat {semi TopStat} +TopStat ::= {Annotation [nl]} {Modifier} TmplDef + | Import + | Packaging + | PackageObject +Packaging ::= ‘package’ QualId [nl] ‘{’ TopStatSeq ‘}’ Package(qid, stats) +PackageObject ::= ‘package’ ‘object’ ObjectDef object with package in mods. + +CompilationUnit ::= {‘package’ QualId semi} TopStatSeq Package(qid, stats) +``` |