diff options
author | lihaoyi <haoyi.sg@gmail.com> | 2014-11-24 02:48:13 -0800 |
---|---|---|
committer | lihaoyi <haoyi.sg@gmail.com> | 2014-11-24 02:48:13 -0800 |
commit | 6829fb683e1e0ab3a14272a756af63a1a40ebfd7 (patch) | |
tree | b716835039cf23356d083fb4a97ca4475afe1a90 | |
parent | c6e266f8d0f8d8ce948ddf6b8539e28606e9b009 (diff) | |
download | hands-on-scala-js-6829fb683e1e0ab3a14272a756af63a1a40ebfd7.tar.gz hands-on-scala-js-6829fb683e1e0ab3a14272a756af63a1a40ebfd7.tar.bz2 hands-on-scala-js-6829fb683e1e0ab3a14272a756af63a1a40ebfd7.zip |
finish move
5 files changed, 956 insertions, 0 deletions
diff --git a/scalaParser/src/main/scala/scalaParser/ScalaSyntax.scala b/scalaParser/src/main/scala/scalaParser/ScalaSyntax.scala new file mode 100644 index 0000000..60b504b --- /dev/null +++ b/scalaParser/src/main/scala/scalaParser/ScalaSyntax.scala @@ -0,0 +1,412 @@ +package scalaParser +import acyclic.file +import language.implicitConversions +import syntax._ +import org.parboiled2._ + +/** + * Parser for Scala syntax. + * + * The `G` parameter that gets passed in to each rule stands for + * "Greedy", and determines whether or not that rule is to consume + * newlines after the last terminal in that rule. We need to pass it + * everywhere so it can go all the way to the last terminal deep + * inside the parse tree, which can then decide whether or not to + * consume whitespace. + * + * The vast majority of terminals will consume newlines; only rules + * which occur in {} blocks won't have their terminals consume newlines, + * and only the *last* terminal in the rule will be affected. + * That's why the parser does terminals-consume-newlines-by-default, + * and leaves it up to the dev to thread the `G` variable where-ever + * we want the opposite behavior. + */ +class ScalaSyntax(val input: ParserInput) extends Parser with Basic with Identifiers with Literals { + // Aliases for common things. These things are used in almost every parser + // in the file, so it makes sense to keep them short. + type B = Boolean + val t = true + type R0 = Rule0 + /** + * Parses all whitespace, excluding newlines. This is only + * really useful in e.g. {} blocks, where we want to avoid + * capturing newlines so semicolon-inference would work + */ + def WS = rule { zeroOrMore(Basic.WhitespaceChar | Literals.Comment) } + + /** + * Parses whitespace, including newlines. + * This is the default for most things + */ + def WL = rule{ zeroOrMore(Basic.WhitespaceChar | Literals.Comment | Basic.Newline) } + + + /** + * Whitespace which captures or doesn't-capture + * newlines depending on the G that gets passed in + */ + def W(G: B = t) = + if (G) WL + else WS + + /** + * By default, all strings and characters greedily + * capture all whitespace immediately after the token. + */ + implicit private[this] def wspStr(s: String): R0 = rule { str(s) ~ WL } + implicit private[this] def wspChar(s: Char): R0 = rule { ch(s) ~ WL } + + /** + * Most keywords don't just require the correct characters to match, + * they have to ensure that subsequent characters *don't* match in + * order for it to be a keyword. This enforces that rule for key-words + * (W) and key-operators (O) which have different non-match criteria. + */ + object K { + def W(s: String) = rule { + Key.W(s) ~ WL + } + + def O(s: String) = rule { + Key.O(s) ~ WL + } + } + + /** + * Occasionally, you want to decide whether or not to + * capture newlines based on the context, so use this + * and pass in G manually. + */ + def StrW(s: String, G: B): R0 = rule { str(s) ~ W(G) } + + def pos = cursor -> cursorChar + + /** + * helper printing function + */ + def pr(s: String) = rule { run(println(s"LOGGING $cursor: $s")) } + + def Id(G: B = t) = rule { Identifiers.Id ~ W(G) } + def VarId(G: B = t) = rule { Identifiers.VarId ~ W(G) } + def Literal(G: B = t) = rule { Literals.Literal ~ W(G) } + def Semi = rule { Basic.Semi ~ WL } + def Newline = rule { Basic.Newline ~ WL } + + def QualId(G: B = t) = rule { oneOrMore(Id(false)) separatedBy '.' ~ W(G) } + def Ids = rule { oneOrMore(Id()) separatedBy ',' } + + def Path(G: B = t): R0 = rule { + zeroOrMore(Id(G) ~ '.') ~ K.W("this") ~ zeroOrMore(Id(G)).separatedBy('.') | + StableId(G) + } + def StableId(G: B = t): R0 = rule { + zeroOrMore(Id() ~ '.') ~ (K.W("this") | K.W("super") ~ optional(ClassQualifier)) ~ '.' ~ oneOrMore(Id(G)).separatedBy('.') | + Id(false) ~ zeroOrMore(WL ~ '.' ~ WL ~ Id(false)) ~ W(G) + } + + def ClassQualifier = rule { '[' ~ Id() ~ ']' } + + def Type(G: B = t): R0 = rule { + FunctionArgTypes ~ K.O("=>") ~ Type(G) | InfixType(false) ~ optional(WL ~ ExistentialClause) ~ W(G) + } + def FunctionArgTypes = rule { + InfixType() | '(' ~ optional(oneOrMore(ParamType) separatedBy ',') ~ ')' + } + + def ExistentialClause = rule { "forSome" ~ '{' ~ oneOrMore(ExistentialDcl(false)).separatedBy(Semi) } + def ExistentialDcl(G: B = t) = rule { K.W("type") ~ TypeDcl(G) | K.W("val") ~ ValDcl(G) } + + def InfixType(G: B = t) = rule { + CompoundType(false) ~ zeroOrMore(WL ~ Id() ~ optional(Newline) ~ CompoundType(false)) ~ W(G) + } + def CompoundType(G: B = t) = rule { + oneOrMore(AnnotType(false)).separatedBy(WL ~ K.W("with")) ~ optional(Refinement(false)) ~ W(G) + } + def AnnotType(G: B = t) = rule { + SimpleType(false) ~ zeroOrMore(WL ~ Annotation(false)) ~ W(G) + } + def SimpleType(G: B = t): R0 = rule { + BasicType(false) ~ + optional(WL ~ '#' ~ Id(false)) ~ + optional(WL ~ TypeArgs(false)) ~ + W(G) + } + def BasicType(G: B = t): R0 = rule { + '(' ~ Types ~ ')' | + Path() ~ '.' ~ K.W("type") | + StableId(G) + } + def TypeArgs(G: B = t) = rule { '[' ~ Types ~ StrW("]", G) } + def Types = rule { oneOrMore(Type()).separatedBy(',') } + def Refinement(G: B = t) = rule { + optional(Newline) ~ '{' ~ oneOrMore(RefineStat).separatedBy(Semi) ~ StrW("}", G) + } + def RefineStat = rule { "type" ~ TypeDef(false) | Dcl(false) | MATCH } + def TypePat = rule { CompoundType() } + def Ascription(G: B = t) = rule { + ":" ~ ("_" ~ StrW("*", G) | InfixType(G) | oneOrMore(Annotation(G))) + } + + def ParamType = rule { K.O("=>") ~ Type() | Type() ~ "*" | Type() } + + def Expr(G: B = t): R0 = rule { (Bindings | optional(K.W("implicit")) ~ Id() | "_") ~ K.O("=>") ~ Expr(G) | Expr1(G) } + def Expr1(G: B = t): R0 = rule { + IfCFlow(G) | + WhileCFlow(G) | + TryCFlow(G) | + DoWhileCFlow(G) | + ForCFlow(G) | + K.W("throw") ~ Expr(G) | + K.W("return") ~ optional(Expr(G)) | + SimpleExpr() ~ K.O("=") ~ Expr(G) | + PostfixExpr(false) ~ optional("match" ~ '{' ~ CaseClauses ~ StrW("}", false) | Ascription(false)) ~ W(G) + } + def IfCFlow(G: B = t) = rule { "if" ~ '(' ~ Expr() ~ ')' ~ zeroOrMore(Newline) ~ Expr(G) ~ optional(optional(Semi) ~ K.W("else") ~ Expr(G)) } + def WhileCFlow(G: B = t) = rule { "while" ~ '(' ~ Expr() ~ ')' ~ zeroOrMore(Newline) ~ Expr(G) } + def TryCFlow(G: B = t) = rule { + K.W("try") ~ Expr(false) ~ + optional(WL ~ K.W("catch") ~ Expr(false)) ~ + optional(WL ~ K.W("finally") ~ Expr(false)) ~ + W(G) + } + + def DoWhileCFlow(G: B = t) = rule { K.W("do") ~ Expr() ~ optional(Semi) ~ "while" ~ '(' ~ Expr() ~ StrW(")", G) } + def ForCFlow(G: B = t) = rule { + "for" ~ + ('(' ~ Enumerators ~ ')' | '{' ~ Enumerators ~ '}') ~ + zeroOrMore(Newline) ~ + optional(K.W("yield")) ~ + Expr(G) } + def PostfixExpr(G: B = t): R0 = rule { InfixExpr(G) ~ optional(Id() ~ optional(Newline)) } + def InfixExpr(G: B = t): R0 = rule { PrefixExpr(G) ~ zeroOrMore(Id() ~ optional(Newline) ~ PrefixExpr(G)) } + def PrefixExpr(G: B = t) = rule { optional(anyOf("-+~!")) ~ SimpleExpr(G) } + + def SimpleExpr(G: B = t): R0 = rule { + SimpleExpr1(false) ~ + zeroOrMore(WL ~ ('.' ~ Id(false) | TypeArgs(false) | ArgumentExprs(false))) ~ + optional(WL ~ StrW("_", false)) ~ + W(G) + } + + def SimpleExpr1(G: B = t) = rule{ + K.W("new") ~ (ClassTemplate(G) | TemplateBody(G)) | + BlockExpr(G) | + Literal(G) ~ drop[String] | + Path(G) | + K.W("_") | + '(' ~ optional(Exprs) ~ StrW(")", G) + } + + + + def Exprs: R0 = rule { oneOrMore(Expr()).separatedBy(',') } + def ArgumentExprs(G: B = t): R0 = rule { + '(' ~ optional(Exprs ~ optional(K.O(":") ~ K.W("_") ~ '*')) ~ StrW(")", G) | + optional(Newline) ~ BlockExpr(G) + } + + def BlockExpr(G: B = t): R0 = rule { '{' ~ (CaseClauses | Block) ~ StrW("}", G) } + def Block: R0 = rule { + zeroOrMore(BlockStat ~ Semi) ~ optional(ResultExpr()) + } + + def BlockStat: R0 = rule { + Semi | + Import(false) | + zeroOrMore(Annotation(false)) ~ (optional(K.W("implicit") | K.W("lazy")) ~ Def(false) | zeroOrMore(LocalModifier) ~ TmplDef(false)) | + Expr1(false) + } + def ResultExpr(G: B = t): R0 = rule { (Bindings | optional(K.W("implicit")) ~ Id() | "_") ~ K.W("=>") ~ Block | Expr1(t) } + def Enumerators: R0 = rule { Generator(false) ~ zeroOrMore(Semi ~ Enumerator(false)) ~ WL } + def Enumerator(G: B = t): R0 = rule { Generator(G) | Guard(G) | Pattern1 ~ K.O("=") ~ Expr(G) } + def Generator(G: B = t): R0 = rule { Pattern1 ~ K.O("<-") ~ Expr(false) ~ optional(WL ~ Guard(false)) ~ W(G) } + def CaseClauses: R0 = rule { oneOrMore(CaseClause) } + def CaseClause: R0 = rule { K.W("case") ~ Pattern ~ optional(Guard(true)) ~ K.O("=>") ~ Block } + def Guard(G: B = t): R0 = rule { K.W("if") ~ PostfixExpr(G) } + def Pattern: R0 = rule { + oneOrMore(Pattern1).separatedBy('|') + } + def Pattern1: R0 = rule { + K.W("_") ~ K.O(":") ~ TypePat | VarId() ~ K.O(":") ~ TypePat | Pattern2 + } + def Pattern2: R0 = rule { + VarId() ~ "@" ~ Pattern3 | Pattern3 | VarId() + } + def Pattern3: R0 = rule { + SimplePattern ~ zeroOrMore(Id() ~ SimplePattern) + } + def SimplePattern: R0 = rule { + K.W("_") | + Literal() ~ drop[String] | + '(' ~ optional(Patterns) ~ ')' | + ( + StableId() ~ + optional( + '(' ~ + (optional(Patterns ~ ',') ~ optional(VarId() ~ '@') ~ K.W("_") ~ '*' | optional(Patterns)) ~ + ')' + ) + ) | + VarId() + } + def Patterns: R0 = rule { K.W("_") ~ '*' | oneOrMore(Pattern).separatedBy(',') } + + def TypeParamClause: R0 = rule { '[' ~ oneOrMore(VariantTypeParam).separatedBy(',') ~ ']' } + def FunTypeParamClause: R0 = rule { '[' ~ oneOrMore(TypeParam).separatedBy(',') ~ ']' } + def VariantTypeParam: R0 = rule { zeroOrMore(Annotation()) ~ optional(anyOf("+-")) ~ TypeParam } + def TypeParam: R0 = rule { + (Id() | K.W("_")) ~ + optional(TypeParamClause) ~ + optional(K.O(">:") ~ Type()) ~ + optional(K.O("<:") ~ Type()) ~ + zeroOrMore(K.O("<%") ~ Type()) ~ + zeroOrMore(K.O(":") ~ Type()) + } + def ParamClauses: R0 = rule { zeroOrMore(ParamClause) ~ optional(optional(Newline) ~ '(' ~ K.W("implicit") ~ Params ~ ')') } + def ParamClause: R0 = rule { optional(Newline) ~ '(' ~ optional(Params) ~ ')' } + def Params: R0 = rule { zeroOrMore(Param).separatedBy(',') } + def Param: R0 = rule { zeroOrMore(Annotation()) ~ Id() ~ optional(K.O(":") ~ ParamType) ~ optional(K.O("=") ~ Expr()) } + def ClassParamClauses(G: B = t): R0 = rule { zeroOrMore(ClassParamClause(G)) ~ optional(optional(Newline) ~ '(' ~ K.W("implicit") ~ ClassParam ~ StrW(")", G)) } + def ClassParamClause(G: B = t): R0 = rule { optional(Newline) ~ '(' ~ optional(ClassParams) ~ StrW(")", G) } + def ClassParams: R0 = rule { oneOrMore(ClassParam).separatedBy(',') } + def ClassParam: R0 = rule { zeroOrMore(Annotation()) ~ optional(zeroOrMore(Modifier) ~ (K.W("val") | K.W("var"))) ~ Id() ~ K.O(":") ~ ParamType ~ optional(K.O("=") ~ Expr()) } + + def Bindings: R0 = rule { '(' ~ oneOrMore(Binding).separatedBy(',') ~ ')' } + def Binding: R0 = rule { (Id() | K.W("_")) ~ optional(K.O(":") ~ Type()) } + + def Modifier: R0 = rule { LocalModifier | AccessModifier | K.W("override") } + def LocalModifier: R0 = rule { K.W("abstract") | K.W("final") | K.W("sealed") | K.W("implicit") | K.W("lazy") } + def AccessModifier: R0 = rule { (K.W("private") | K.W("protected")) ~ optional(AccessQualifier) } + def AccessQualifier: R0 = rule { '[' ~ (K.W("this") | Id()) ~ ']' } + + def Annotation(G: B = t): R0 = rule { '@' ~ SimpleType(false) ~ zeroOrMore(WL ~ ArgumentExprs(false)) ~ W(G) } + def ConstrAnnotation: R0 = rule { '@' ~ SimpleType() ~ ArgumentExprs() } + + def TemplateBody(G: B = t): R0 = rule { + WL ~ + '{' ~ + optional(SelfType) ~ + TemplateStat ~ + zeroOrMore(Semi ~ TemplateStat) ~ + WL ~ + StrW("}", G) + } + def TemplateStat: R0 = rule { + Import(false) | + zeroOrMore(Annotation() ~ optional(Newline)) ~ zeroOrMore(Modifier) ~ (Def(false) | Dcl(false)) | + Expr(false) | + MATCH + } + + def SelfType: R0 = rule { K.W("this") ~ K.O(":") ~ Type() ~ K.O("=>") | Id() ~ optional(K.O(":") ~ Type()) ~ K.O("=>") } + + def Import(G: B = t): R0 = rule { K.W("import") ~ oneOrMore(ImportExpr(G)).separatedBy(',') } + + def ImportExpr(G: B = t): R0 = rule { StableId(G) ~ optional('.' ~ (StrW("_", G) | ImportSelectors(G))) } + def ImportSelectors(G: B = t): R0 = rule { '{' ~ zeroOrMore(ImportSelector ~ ',') ~ (ImportSelector | K.W("_")) ~ StrW("}", G) } + def ImportSelector: R0 = rule { Id() ~ optional(K.O("=>") ~ (Id() | K.W("_"))) } + + def Dcl(G: B = t): R0 = rule { + K.W("val") ~ ValDcl(G) | + K.W("var") ~ VarDcl(G) | + K.W("def") ~ FunDcl(G) | + K.W("type") ~ zeroOrMore(Newline) ~ TypeDcl(G) + } + def ValDcl(G: B = t): R0 = rule { Ids ~ K.O(":") ~ Type(G) } + def VarDcl(G: B = t): R0 = rule { Ids ~ K.O(":") ~ Type(G) } + def FunDcl(G: B = t): R0 = rule { FunSig(false) ~ optional(WL ~ K.O(":") ~ Type(G)) } + def FunSig(G: B = t): R0 = rule { Id() ~ optional(FunTypeParamClause) ~ ParamClauses } + def TypeDcl(G: B = t): R0 = rule { + Id(false) ~ + optional(WL ~ TypeParamClause) ~ + optional(WL ~ K.O(">:") ~ Type(false)) ~ + optional(WL ~ K.O("<:") ~ Type(false)) ~ + W(G) + } + + def PatVarDef(G: B = t): R0 = rule { K.W("val") ~ PatDef(G) | K.W("var") ~ VarDef(G) } + def Def(G: B = t): R0 = rule { K.W("def") ~ FunDef(G) | K.W("type") ~ zeroOrMore(Newline) ~ TypeDef(G) | PatVarDef(G) | TmplDef(G) } + def PatDef(G: B = t): R0 = rule { oneOrMore(Pattern2).separatedBy(',') ~ optional(K.O(":") ~ Type()) ~ K.O("=") ~ Expr(G) } + def VarDef(G: B = t): R0 = rule { Ids ~ K.O(":") ~ Type() ~ K.O("=") ~ K.W("_") | PatDef(G) } + def FunDef(G: B = t): R0 = rule { + K.W("this") ~ ParamClause ~ ParamClauses ~ (K.O("=") ~ ConstrExpr | optional(Newline) ~ ConstrBlock) | + FunSig() ~ + ( + optional(K.O(":") ~ Type()) ~ K.O("=") ~ optional(K.W("macro")) ~ Expr(G) | + optional(Newline) ~ '{' ~ Block ~ StrW("}", G) + ) + } + def TypeDef(G: B = t): R0 = rule { Id() ~ optional(TypeParamClause) ~ K.O("=") ~ Type(G) } + + def TmplDef(G: B = t): R0 = rule { + K.W("trait") ~ TraitDef(G) | + optional(K.W("case")) ~ (K.W("class") ~ ClassDef(G) | + K.W("object") ~ ObjectDef(G)) + } + def ClassDef(G: B = t): R0 = rule { + Id() ~ + optional(TypeParamClause) ~ + zeroOrMore(ConstrAnnotation) ~ + optional(AccessModifier) ~ + ClassParamClauses(false) ~ + ClassTemplateOpt(false) ~ + W(G) + + } + def TraitDef(G: B = t): R0 = rule { Id() ~ optional(TypeParamClause) ~ TraitTemplateOpt(G) } + def ObjectDef(G: B = t): R0 = rule { Id() ~ ClassTemplateOpt(G) } + def ClassTemplateOpt(G: B = t): R0 = rule { + WL ~ K.W("extends") ~ ClassTemplate(G) | + optional(WL ~ optional(K.W("extends")) ~ TemplateBody(G)) + } + def TraitTemplateOpt(G: B = t): R0 = rule { K.W("extends") ~ TraitTemplate(G) | optional(optional(K.W("extends")) ~ TemplateBody(G)) } + def ClassTemplate(G: B = t): R0 = rule { + optional(EarlyDefs) ~ + ClassParents(false) ~ + optional(WL ~ TemplateBody(false)) ~ + W(G) + } + + def TraitTemplate(G: B = t): R0 = rule { + optional(EarlyDefs) ~ TraitParents(false) ~ optional(TemplateBody(false)) ~ W(G) + } + def ClassParents(G: B = t): R0 = rule { + Constr(false) ~ zeroOrMore(WL ~ K.W("with") ~ AnnotType(G)) ~ W(G) + } + def TraitParents(G: B = t): R0 = rule { + AnnotType(false) ~ zeroOrMore(WL ~ K.W("with") ~ AnnotType(false)) ~ W(G) + } + def Constr(G: B = t): R0 = rule { + AnnotType(false) ~ zeroOrMore(WL ~ ArgumentExprs(false)) ~ + W(G) + } + def EarlyDefs: R0 = rule { + '{' ~ optional(oneOrMore(EarlyDef).separatedBy(Semi)) ~ '}' ~ K.W("with") + } + def EarlyDef: R0 = rule { + zeroOrMore(Annotation() ~ optional(Newline)) ~ zeroOrMore(Modifier) ~ PatVarDef(false) + } + def ConstrExpr: R0 = rule { ConstrBlock | SelfInvocation } + def ConstrBlock: R0 = rule { '{' ~ SelfInvocation ~ zeroOrMore(Semi ~ BlockStat) ~ '}' } + def SelfInvocation: R0 = rule { K.W("this") ~ oneOrMore(ArgumentExprs()) } + + def TopStatSeq: R0 = rule { zeroOrMore(TopStat).separatedBy(Semi) } + def TopStat: R0 = rule { + Packaging | + PackageObject(false) | + Import(false) | + zeroOrMore(Annotation(false) ~ optional(Newline)) ~ zeroOrMore(Modifier) ~ TmplDef(false) | + MATCH + } + def Packaging: R0 = rule { K.W("package") ~ QualId() ~ '{' ~ TopStatSeq ~ '}' } + def PackageObject(G: B = t): R0 = rule { K.W("package") ~ K.W("object") ~ ObjectDef(G) } + def CompilationUnit: Rule1[String] = rule { + capture( + WL ~ + zeroOrMore(Semi) ~ + zeroOrMore(K.W("package") ~ QualId(false)).separatedBy(Semi) ~ + TopStatSeq ~ + EOI + ) + } +} diff --git a/scalaParser/src/main/scala/scalaParser/syntax/Basic.scala b/scalaParser/src/main/scala/scalaParser/syntax/Basic.scala new file mode 100644 index 0000000..8d3232a --- /dev/null +++ b/scalaParser/src/main/scala/scalaParser/syntax/Basic.scala @@ -0,0 +1,51 @@ +package scalaParser +package syntax +import acyclic.file +import org.parboiled2._ + +trait Basic { self: Parser => + object Basic{ + def UnicodeExcape = rule { "\\u" ~ 4.times(HexDigit) } + + + //Numbers and digits + def HexDigit = rule { Digit | "a" - "f" | "A" - "Z" } + def Digit = rule { "0" | NonZeroDigit } + def NonZeroDigit = rule { "1" - "9" } + def HexNumeral = rule { "0x" ~ oneOrMore(HexDigit) } + def DecimalNumeral = rule(oneOrMore(Digit)) + def ExponentPart = rule { anyOf("Ee") ~ optional(anyOf("+-")) ~ oneOrMore(Digit) } + def FloatType = rule { anyOf("FfDd") } + + def Parentheses = rule { "(" | ")" | "[" | "]" | "{" | "}" } + def DelimiterChar = rule { "'" | "\"" | "." | ";" | "," } + + def WhitespaceChar = rule { "\u0020" | "\u0009" } + def Newline = rule { "\r\n" | "\n" } + def Semi = rule { ';' | oneOrMore(Newline) } + def OperatorChar = rule { + anyOf("""!#$%&*+-/:<=>?@\^|~""") | + CharPredicate.from(_.getType match { + case Character.OTHER_SYMBOL | Character.MATH_SYMBOL => true; case _ => false + }) + } + def Letter = rule { Upper | Lower | CharPredicate.from(c => c.isLetter | c.isDigit) } + def Lower = rule { "a" - "z" | "$" | "_" | CharPredicate.from(_.isLower) } + def Upper = rule { "A" - "Z" | CharPredicate.from(_.isUpper) } + } + /** + * Most keywords don't just require the correct characters to match, + * they have to ensure that subsequent characters *don't* match in + * order for it to be a keyword. This enforces that rule for key-words + * (W) and key-operators (O) which have different non-match criteria. + */ + object Key { + def W(s: String) = rule { + str(s) ~ !(Basic.Letter | Basic.Digit) + } + + def O(s: String) = rule { + str(s) ~ !Basic.OperatorChar + } + } +} diff --git a/scalaParser/src/main/scala/scalaParser/syntax/Identifiers.scala b/scalaParser/src/main/scala/scalaParser/syntax/Identifiers.scala new file mode 100644 index 0000000..4bc972f --- /dev/null +++ b/scalaParser/src/main/scala/scalaParser/syntax/Identifiers.scala @@ -0,0 +1,35 @@ +package scalaParser +package syntax +import acyclic.file +import org.parboiled2._ + +trait Identifiers { self: Parser with Basic => + object Identifiers{ + import Basic._ + def Operator = rule(oneOrMore(OperatorChar)) + + def VarId = rule { + !(Keywords ~ (WhitespaceChar | Newline | "//" | "/*")) ~ Lower ~ IdRest + } + def PlainId = rule { Upper ~ IdRest | VarId | !(Keywords ~ (WhitespaceChar | Newline | "//" | "/*")) ~ Operator } + def Id = rule { PlainId | ("`" ~ oneOrMore(noneOf("`")) ~ "`") } + def IdRest = rule { + zeroOrMore(zeroOrMore("_") ~ oneOrMore(!"_" ~ Letter | Digit)) ~ + optional(oneOrMore("_") ~ optional(Operator)) + } + + + def AlphabetKeywords = rule { + "abstract" | "case" | "catch" | "class" | "def" | "do" | "else" | "extends" | "false" | "finally" | "final" | "finally" | "forSome" | "for" | "if" | + "implicit" | "import" | "lazy" | "match" | "new" | "null" | "object" | "override" | "package" | "private" | "protected" | "return" | + "sealed" | "super" | "this" | "throw" | "trait" | "try" | "true" | "type" | "val" | "var" | "while" | "with" | "yield" | "_" + } + def SymbolicKeywords = rule{ + ":" | ";" | "=>" | "=" | "<-" | "<:" | "<%" | ">:" | "#" | "@" | "\u21d2" | "\u2190" + } + def Keywords = rule { + AlphabetKeywords ~ !Letter | SymbolicKeywords ~ !OperatorChar + + } + } +} diff --git a/scalaParser/src/main/scala/scalaParser/syntax/Literals.scala b/scalaParser/src/main/scala/scalaParser/syntax/Literals.scala new file mode 100644 index 0000000..b8342e2 --- /dev/null +++ b/scalaParser/src/main/scala/scalaParser/syntax/Literals.scala @@ -0,0 +1,57 @@ +package scalaParser +package syntax +import acyclic.file +import org.parboiled2._ + +trait Literals { self: Parser with Basic with Identifiers => + object Literals{ + import Basic._ + def FloatingPointLiteral = rule { + capture( + "." ~ oneOrMore(Digit) ~ optional(ExponentPart) ~ optional(FloatType) | + oneOrMore(Digit) ~ ( + "." ~ oneOrMore(Digit) ~ optional(ExponentPart) ~ optional(FloatType) | + ExponentPart ~ optional(FloatType) | + optional(ExponentPart) ~ FloatType)) + } + + def IntegerLiteral = rule { capture((DecimalNumeral | HexNumeral) ~ optional(anyOf("Ll"))) } + + def BooleanLiteral = rule { capture(Key.W("true") | Key.W("false")) } + + def MultilineComment: Rule0 = rule { "/*" ~ zeroOrMore(MultilineComment | !"*/" ~ ANY) ~ "*/" } + def Comment: Rule0 = rule { + MultilineComment | + "//" ~ zeroOrMore(!Basic.Newline ~ ANY) ~ &(Basic.Newline | EOI) + } + + def Literal = rule { + (capture(optional("-")) ~ (FloatingPointLiteral | IntegerLiteral) ~> ((sign: String, number) => sign + number)) | + BooleanLiteral | + CharacterLiteral | + StringLiteral | + SymbolLiteral | + capture(Key.W("null") ~ !(Basic.Letter | Basic.Digit)) + } + + + def EscapedChars = rule { '\\' ~ anyOf("rnt\\\"") } + + // Note that symbols can take on the same values as keywords! + def SymbolLiteral = rule { ''' ~ capture(Identifiers.PlainId | Identifiers.Keywords) } + + def CharacterLiteral = rule { ''' ~ capture(UnicodeExcape | EscapedChars | !'\\' ~ CharPredicate.from(isPrintableChar)) ~ ''' } + + def MultiLineChars = rule { zeroOrMore(optional('"') ~ optional('"') ~ noneOf("\"")) } + def StringLiteral = rule { + (optional(Identifiers.Id) ~ "\"\"\"" ~ capture(MultiLineChars) ~ capture("\"\"\"" ~ zeroOrMore('"')) ~> ((multilineChars: String, quotes) => multilineChars + quotes.dropRight(3))) | + (optional(Identifiers.Id) ~ '"' ~ capture(zeroOrMore("\\\"" | noneOf("\n\""))) ~ '"') + } + + def isPrintableChar(c: Char): Boolean = { + val block = Character.UnicodeBlock.of(c) + !Character.isISOControl(c) && !Character.isSurrogate(c) && block != null && block != Character.UnicodeBlock.SPECIALS + } + } +} + diff --git a/scalaParser/src/test/scala/scalaParser/SyntaxTest.scala b/scalaParser/src/test/scala/scalaParser/SyntaxTest.scala new file mode 100644 index 0000000..99ec182 --- /dev/null +++ b/scalaParser/src/test/scala/scalaParser/SyntaxTest.scala @@ -0,0 +1,401 @@ +package scalaParser + +import org.parboiled2.ParseError +import utest._ +import utest.framework.Test +import utest.util.Tree + +import scala.util.{Failure, Success} + +object SyntaxTest extends TestSuite{ + def check[T](input: String) = { + new ScalaSyntax(input).CompilationUnit.run() match{ + case Failure(f: ParseError) => + println(f.position) + println(f.formatExpectedAsString) +// println(f.formatTraces) + throw new Exception(f.position + "\t" + f.formatTraces) + case Success(parsed) => + assert(parsed == input) + } + } + println("running") + def tests = TestSuite{ + 'unit { + * - check( + "package torimatomeru" + + ) + * - check( + """ + |package torimatomeru + | + |import org.parboiled2.ParseError + |import utest._ + |import utest.framework.Test + """.stripMargin + + ) + * - check( + """ + |package torimatomeru + | + |import org.parboiled2.ParseError + |import utest._ + |import utest.framework.Test + |import utest.util.Tree + | + |import scala.util.{Failure, Success} + | + |object SyntaxTest extends TestSuite + """.stripMargin + ) + * - check( + """ + |object SyntaxTest extends TestSuite{ + | def check[T](input: String) = { + | + | } + |} + """.stripMargin + ) + * - check( + """ + |object SyntaxTest{ + | a() + | throw 1 + |} + """.stripMargin + ) + * - check( + """ + |object SyntaxTest extends TestSuite{ + | def check[T](input: String) = { + | new ScalaSyntax(input).CompilationUnit.run() match{ + | case Failure(f: ParseError) => + | println(f.position) + | println(f.formatExpectedAsString) + | println(f.formatTraces) + | throw new Exception(f.position + "\t" + f.formatTraces) + | case Success(parsed) => + | assert(parsed == input) + | } + | } + |} + """.stripMargin + ) + * - check( + """package scalatex + | + | + |import org.parboiled2._ + |import torimatomeru.ScalaSyntax + | + |import scalatex.stages.{Trim, Parser, Ast} + |import scalatex.stages.Ast.Block.{IfElse, For, Text} + |import Ast.Chain.Args + | + |object ParserTests extends utest.TestSuite{ + | import Ast._ + | import utest._ + | def check[T](input: String, parse: Parser => scala.util.Try[T], expected: T) = { + | val parsed = parse(new Parser(input)).get + | assert(parsed == expected) + | } + | def tests = TestSuite{} + |} + """.stripMargin + ) + * - check( + """ + |object Moo{ + | a + | .b + | + | c + |} + """.stripMargin + ) + * - check( + """ + |object Moo{ + | filename + | .asInstanceOf[Literal] + |10 + |} + """.stripMargin + ) + * - check( + """ + |object Cow{ + | ().mkString + | + | 1 + |} + """.stripMargin + ) + * - check( + """ + |object O{ + | private[this] val applyMacroFull = 1 + |} + """.stripMargin + ) + * - check( + """ + |object O{ + | private[this] def applyMacroFull(c: Context) + | (expr: c.Expr[String], + | runtimeErrors: Boolean, + | debug: Boolean) + | : c.Expr[Frag] = { + | } + |} + """.stripMargin + ) + * - check( + """ + |object O{ + | class DebugFailure extends Exception + | + | 1 + |} + """.stripMargin + ) + * - check( + """ + |package torimatomeru + | + |package syntax + | + |import org.parboiled2._ + | + """.stripMargin + ) + * - check( + """ + |object Foo{ + | 0 match { + | case A | B => 0 + | } + |} + """.stripMargin + ) + * - check( + """ + |object Compiler{ + | + | def apply = { + | def rec = t match { + | case 0 => 0 + | } + | + | rec(tree) + | } + |} + | + """.stripMargin + ) + * - check( + """ + |object O { + | A(A(A(A(A(A(A(A()))))))) + |} + | + """.stripMargin + ) + * - check( + """ + |object O{ + | A(A(A(A(A(A(A(A(A(A(A(A(A(A(A(A()))))))))))))))) + |} + """.stripMargin + ) + * - check( + """ + |object L{ + | a.b = c + | a().b = c + |} + """.stripMargin + ) + * - check( + """/* __ *\ + |** ________ ___ / / ___ __ ____ Scala.js CLI ** + |** / __/ __// _ | / / / _ | __ / // __/ (c) 2013-2014, LAMP/EPFL ** + |** __\ \/ /__/ __ |/ /__/ __ |/_// /_\ \ http://scala-js.org/ ** + |** /____/\___/_/ |_/____/_/ | |__/ /____/ ** + |** |/____/ ** + |\* */ + | + |package scala.scalajs.cli + | + """.stripMargin + ) + * - check( + """ + |object O{ + | for { + | a <- b + | c <- d + | } { + | 1 + | } + |} + """.stripMargin + ) + * - check( + """ + |object O{ + | val jarFile = + | try { 1 } + | catch { case _: F => G } + |} + """.stripMargin + ) + * - check( + """ + |object F{ + | func{ case _: F => fail } + |} + """.stripMargin + ) + * - check( + """ + |object Foo{ + | val a = d // g + | val b = e // h + | val c = f + |} + """.stripMargin + ) + * - check( + """ + |object L{ + | x match{ + | case y.Y(z) => z + | } + |} + """.stripMargin + ) + * - check( + """object K{ + | val a: B { + | val c: D + | } + | + | 1 + |} + """.stripMargin + ) + * - check( + """ + |object LOLS{ + | def run() {} + | + | def apply() {} + |} + """.stripMargin + ) + * - check( + """ + |object O{ + | a =:= b.c + |} + """.stripMargin + ) + * - check( + """ + |object K{ + | a( + | 1: _* + | ) + |} + """.stripMargin + ) + * - check( + """ + |object P{ + | tree match { + | case stats :+ expr => 1 + | } + |} + """.stripMargin + ) + * - check( + """ + |object K{ + | val trueA = 1 + |} + """.stripMargin + ) + * - check( + """ + |object K{ + | val nullo :: cow = 1 + |} + """.stripMargin + ) + * - check( + """ + |object K{ + | val omg_+ = 1 + |} + """.stripMargin + ) + * - check( + """ + |object K{ + | val + = 1 + | var * = 2 + |} + """.stripMargin + ) + * - check( + """ + |object O{ + | c match { + | case b_ => 1 + | } + |} + """.stripMargin + ) + } + def checkFile(path: String) = check(io.Source.fromFile(path).mkString) + 'file{ + + + * - checkFile("scalaParser/src/main/scala/scalaParser/syntax/Basic.scala") + * - checkFile("scalaParser/src/main/scala/scalaParser/syntax/Identifiers.scala") + * - checkFile("scalaParser/src/main/scala/scalaParser/syntax/Literals.scala") + * - checkFile("scalaParser/src/main/scala/scalaParser/ScalaSyntax.scala") + + * - checkFile("scalaParser/src/test/scala/scalaParser/SyntaxTest.scala") + + + * - checkFile("scalatexApi/src/main/scala/scalatex/stages/Compiler.scala") + * - checkFile("scalatexApi/src/main/scala/scalatex/stages/Parser.scala") + * - checkFile("scalatexApi/src/main/scala/scalatex/stages/Trim.scala") + * - checkFile("scalatexApi/src/main/scala/scalatex/package.scala") + + * - checkFile("scalatexApi/src/test/scala/scalatex/ParserTests.scala") + * - checkFile("scalatexApi/src/test/scala/scalatex/BasicTests.scala") + * - checkFile("scalatexApi/src/test/scala/scalatex/ErrorTests.scala") + * - checkFile("scalatexApi/src/test/scala/scalatex/TestUtil.scala") + + * - checkFile("scalatexPlugin/src/main/scala/scalatex/ScalaTexPlugin.scala") + } + +// 'omg{ +// val root = new java.io.File("../scala-js/") +// def listFiles(s: java.io.File): Iterator[String] = { +// val (dirs, files) = s.listFiles().toIterator.partition(_.isDirectory) +// files.map(_.getPath) ++ dirs.flatMap(listFiles) +// } +// for(f <- listFiles(root).filter(_.endsWith(".scala"))){ +// println("CHECKING " + f) +// checkFile(f) +// } +// } + } +} |