From 5cf89bb9d5e0d3c42610d3d2be47815ef41ecd65 Mon Sep 17 00:00:00 2001 From: Li Haoyi Date: Sun, 2 Nov 2014 21:03:28 -0800 Subject: Moved new implementation out of test/ folder --- .../src/main/scala/scalatex/ScalatexParser.scala | 37 ++++ .../src/main/scala/torimatomeru/ScalaSyntax.scala | 246 +++++++++++++++++++++ .../src/main/scala/torimatomeru/syntax/Basic.scala | 30 +++ .../scala/torimatomeru/syntax/Identifiers.scala | 22 ++ .../main/scala/torimatomeru/syntax/Literals.scala | 58 +++++ scalatexApi/src/test/scala/scalatex/Main.scala | 44 +--- .../src/test/scala/torimatomeru/ScalaSyntax.scala | 246 --------------------- 7 files changed, 400 insertions(+), 283 deletions(-) create mode 100644 scalatexApi/src/main/scala/scalatex/ScalatexParser.scala create mode 100644 scalatexApi/src/main/scala/torimatomeru/ScalaSyntax.scala create mode 100644 scalatexApi/src/main/scala/torimatomeru/syntax/Basic.scala create mode 100644 scalatexApi/src/main/scala/torimatomeru/syntax/Identifiers.scala create mode 100644 scalatexApi/src/main/scala/torimatomeru/syntax/Literals.scala delete mode 100644 scalatexApi/src/test/scala/torimatomeru/ScalaSyntax.scala diff --git a/scalatexApi/src/main/scala/scalatex/ScalatexParser.scala b/scalatexApi/src/main/scala/scalatex/ScalatexParser.scala new file mode 100644 index 0000000..406f4cd --- /dev/null +++ b/scalatexApi/src/main/scala/scalatex/ScalatexParser.scala @@ -0,0 +1,37 @@ +package scalatex + +import org.parboiled2._ +import torimatomeru.ScalaSyntax + +trait Ast{ + def offset: Int +} +object Ast{ + case class Block(parts: Seq[Block.Sub], offset: Int = 0) extends Chain.Sub + object Block{ + trait Sub + case class Text(txt: String, offset: Int = 0) extends Block.Sub + } + case class Code(code: String, offset: Int = 0) + case class Chain(lhs: Code, parts: Seq[Chain.Sub], offset: Int = 0) extends Block.Sub + object Chain{ + trait Sub + case class Prop(str: String, offset: Int = 0) extends Sub + case class Args(str: String, offset: Int = 0) extends Sub + } +} +class ScalatexParser(input: ParserInput) extends ScalaSyntax(input) { + def TextNot(chars: String) = rule { capture(oneOrMore(noneOf(chars) | "@@")) ~> (x => Ast.Block.Text(x.replace("@@", "@"))) } + def Text = TextNot("@") + def Code = rule { + "@" ~ capture(Id | BlockExpr | ('(' ~ optional(Exprs) ~ ')')) ~> (Ast.Code(_)) + } + def ScalaChain = rule { Code ~ zeroOrMore(Extension) ~> (Ast.Chain(_, _)) } + def Extension: Rule1[Ast.Chain.Sub] = rule { + (capture(('.' ~ Id) ~ optional(TypeArgs)) ~> (Ast.Chain.Prop(_))) | + (capture(!BlockExpr ~ ArgumentExprs) ~> (Ast.Chain.Args(_))) | + TBlock + } + def TBlock = rule{ '{' ~ Body ~ '}' } + def Body = rule{ zeroOrMore(TextNot("@}") | ScalaChain) ~> (x => Ast.Block(x)) } +} diff --git a/scalatexApi/src/main/scala/torimatomeru/ScalaSyntax.scala b/scalatexApi/src/main/scala/torimatomeru/ScalaSyntax.scala new file mode 100644 index 0000000..5bbd0af --- /dev/null +++ b/scalatexApi/src/main/scala/torimatomeru/ScalaSyntax.scala @@ -0,0 +1,246 @@ +package torimatomeru + +import language.implicitConversions +import syntax._ +import org.parboiled2._ + +class ScalaSyntax(val input: ParserInput) extends Parser with Basic with Identifiers with Literals { + + def Whitespace = rule { zeroOrMore(WhitespaceChar | Comment) } + + /** + * Every token handles space at the end. + * Don't let it propagate to mixins + */ + implicit private[this] def wspStr(s: String): Rule0 = rule { + str(s) ~ Whitespace + } + implicit private[this] def wspChar(s: Char): Rule0 = rule { + ch(s) ~ Whitespace + } + + def pos = cursor -> cursorChar + + /** + * helper printing function + */ + def pr(s: String) = rule { run(print(s)) } + + ////////////////////////////////////////////////// + // Override rules from dependencies + // in order to handle white spaces + // Note: when you add your AST, make sure to + // only capture super.rule and not the whitespace + ////////////////////////////////////////////////// + + def IdS = rule { super.Id ~ Whitespace } + def VarIdS = rule { super.VarId ~ Whitespace } + def LiteralS = rule { super.Literal ~ Whitespace } + def SemiS = rule { super.Semi ~ Whitespace } + def NewlineS = rule { super.Newline ~ Whitespace } + + /////////////////////////////////////////// + // Qualifiers and Ids + /////////////////////////////////////////// + + def QualId = rule { oneOrMore(IdS) separatedBy '.' } + def Ids = rule { oneOrMore(IdS) separatedBy ',' } + + //path and stableId were refactored (wrt spec) to avoid recursiveness and be more specific + def Path: Rule0 = rule { zeroOrMore(IdS ~ '.') ~ "this" ~ zeroOrMore(IdS).separatedBy('.') | StableId } + def StableId: Rule0 = rule { + zeroOrMore(IdS ~ '.') ~ ("this" | "super" ~ optional(ClassQualifier)) ~ '.' ~ oneOrMore(IdS).separatedBy('.') | + IdS ~ zeroOrMore('.' ~ IdS) + } +// def StableId: Rule0 = rule { zeroOrMore(Id ~ '.') ~ optional("this" | "super" ~ optional(ClassQualifier)) ~ oneOrMore(Id).separatedBy('.') } + def ClassQualifier = rule { '[' ~ IdS ~ ']' } + + /////////////////////////////////////////// + // Types and more Types + /////////////////////////////////////////// + + def Type: Rule0 = rule { FunctionArgTypes ~ "=>" ~ Type | InfixType ~ optional(ExistentialClause) } + def FunctionArgTypes = rule { InfixType | '(' ~ optional(oneOrMore(ParamType) separatedBy ',') ~ ')' } + + def ExistentialClause = rule { "forSome" ~ '{' ~ oneOrMore(ExistentialDcl).separatedBy(SemiS) } + def ExistentialDcl = rule { "type" ~ TypeDcl | "val" ~ ValDcl } + + def InfixType = rule { CompoundType ~ zeroOrMore(IdS ~ optional(NewlineS) ~ CompoundType) } + def CompoundType = rule { oneOrMore(AnnotType).separatedBy("with") ~ optional(Refinement) } + def AnnotType = rule { SimpleType ~ zeroOrMore(Annotation) } + def SimpleType: Rule0 = rule { + BasicType ~ optional('#' ~ IdS) ~ optional(TypeArgs) + } + def BasicType: Rule0 = rule { + '(' ~ Types ~ ')' | + Path ~ '.' ~ "type" | + StableId + } + def TypeArgs = rule { '[' ~ Types ~ ']' } + def Types = rule { oneOrMore(Type).separatedBy(',') } + def Refinement = rule { optional(NewlineS) ~ '{' ~ oneOrMore(RefineStat).separatedBy(SemiS) ~ '}' } + def RefineStat = rule { "type" ~ TypeDef | Dcl | MATCH } + def TypePat = rule { Type } + def Ascription = rule { ":" ~ (InfixType | oneOrMore(Annotation) | "_" ~ "*") } + + def ParamType = rule { "=>" ~ Type | Type ~ "*" | Type } + + ///////////////////////////////////////////////// + // Declarations, Expressions and Pattern Matching + ///////////////////////////////////////////////// + + def Expr: Rule0 = rule { (Bindings | optional("implicit") ~ IdS | "_") ~ "=>" ~ Expr | Expr1 } + def Expr1: Rule0 = rule { + IfCFlow | + WhileCFlow | + TryCFlow | + DoWhileCFlow | + ForCFlow | + "throw" ~ Expr | + "return" ~ optional(Expr) | + SimpleExpr1 ~ ArgumentExprs ~ '=' ~ Expr | + optional(SimpleExpr ~ '.') ~ IdS ~ '=' ~ Expr | + PostfixExpr ~ optional("match" ~ '{' ~ CaseClauses ~ '}' | Ascription) + } + + def IfCFlow = rule { "if" ~ '(' ~ Expr ~ ')' ~ zeroOrMore(NewlineS) ~ Expr ~ optional(optional(SemiS) ~ "else" ~ Expr) } + def WhileCFlow = rule { "while" ~ '(' ~ Expr ~ ')' ~ zeroOrMore(NewlineS) ~ Expr } + def TryCFlow = rule { "try" ~ '{' ~ Block ~ '}' ~ optional("catch" ~ '{' ~ CaseClauses ~ '}') ~ optional("finally" ~ Expr) } + def DoWhileCFlow = rule { "do" ~ Expr ~ optional(SemiS) ~ "while" ~ '(' ~ Expr ~ ')' } + def ForCFlow = rule { "for" ~ ('(' ~ Enumerators ~ ')' | '{' ~ Enumerators ~ '}') ~ zeroOrMore(NewlineS) ~ optional("yield") ~ Expr } + def PostfixExpr: Rule0 = rule { InfixExpr ~ optional(IdS ~ optional(NewlineS)) } + def InfixExpr: Rule0 = rule { PrefixExpr ~ zeroOrMore(IdS ~ optional(NewlineS) ~ PrefixExpr) } + def PrefixExpr = rule { optional(anyOf("-+~!")) ~ SimpleExpr } + def SimpleExpr: Rule0 = rule { SimpleExprNoLiteral | SimpleExpr1 ~ optional(ArgumentExprs) ~ optional('_') } + def SimpleExprNoLiteral: Rule0 = rule { "new" ~ (ClassTemplate | TemplateBody) | BlockExpr } + def SimpleExpr1: Rule0 = rule { + // run(println("SimpleExpr1 matching on " + pos)) ~ + LiteralS ~ drop[String] | //literal currently captures, so it can be used outside. but since all our rules lack AST, we drop its value in order to be able to compose them + Path | + '_' | + '(' ~ optional(Exprs) ~ ')' | + SimpleExprNoLiteral ~ '.' ~ IdS | + SimpleExprNoLiteral ~ TypeArgs /*| + XmlExpr*/ + } + def Exprs: Rule0 = rule { oneOrMore(Expr) separatedBy ',' } + def ArgumentExprs: Rule0 = rule { + '(' ~ (optional(Exprs ~ ',') ~ PostfixExpr ~ ':' ~ '_' ~ '*' | optional(Exprs)) ~ ')' | + optional(NewlineS) ~ BlockExpr + } + def BlockExpr: Rule0 = rule { '{' ~ (CaseClauses | Block) ~ '}' } + def Block: Rule0 = rule { zeroOrMore(BlockStat ~ SemiS) ~ optional(ResultExpr) } + def BlockStat: Rule0 = rule { + &(SemiS) ~ MATCH | //shortcircuit when Semi is found + Import | + zeroOrMore(Annotation) ~ (optional("implicit" | "lazy") ~ Def | zeroOrMore(LocalModifier) ~ TmplDef) | + Expr1 + } + def ResultExpr: Rule0 = rule { (Bindings | optional("implicit") ~ IdS | "_") ~ "=>" ~ Block | Expr1 } + def Enumerators: Rule0 = rule { Generator ~ zeroOrMore(SemiS ~ Enumerator) } + def Enumerator: Rule0 = rule { Generator | Guard | Pattern1 ~ '=' ~ Expr } + def Generator: Rule0 = rule { Pattern1 ~ "<-" ~ Expr ~ optional(Guard) } + def CaseClauses: Rule0 = rule { oneOrMore(CaseClause) } + def CaseClause: Rule0 = rule { "case" ~ Pattern ~ optional(Guard) ~ "=>" ~ Block } + def Guard: Rule0 = rule { "if" ~ PostfixExpr } + def Pattern: Rule0 = rule { oneOrMore(Pattern1) separatedBy '|' } + def Pattern1: Rule0 = rule { '_' ~ ':' ~ TypePat | VarIdS ~ ':' ~ TypePat | Pattern2 } + def Pattern2: Rule0 = rule { VarIdS ~ optional("@" ~ Pattern3) | Pattern3 } + def Pattern3: Rule0 = rule { SimplePattern ~ zeroOrMore(IdS ~ optional(NewlineS) ~ SimplePattern) } // this pattern doesn't make sense to me... + def SimplePattern: Rule0 = rule { + '_' | + LiteralS ~ drop[String] | //literal currently captures, so it can be used outside. but since all our rules lack AST, we drop its value in order to be able to compose them + '(' ~ optional(Patterns) ~ ')' | + StableId ~ '(' ~ (optional(Patterns ~ ',') ~ optional(VarIdS ~ '@') ~ '_' ~ '*' | optional(Patterns)) ~ ')' | + VarIdS /*| + XmlPattern*/ + } + def Patterns: Rule0 = rule { '_' ~ '*' | oneOrMore(Pattern).separatedBy(',') } + + def TypeParamClause: Rule0 = rule { '[' ~ oneOrMore(VariantTypeParam).separatedBy(',') ~ ']' } + def FunTypeParamClause: Rule0 = rule { '[' ~ oneOrMore(TypeParam).separatedBy(',') ~ ']' } + def VariantTypeParam: Rule0 = rule { zeroOrMore(Annotation) ~ optional(anyOf("+-")) ~ TypeParam } + def TypeParam: Rule0 = rule { (IdS | '_') ~ optional(TypeParamClause) ~ optional(">:" ~ Type) ~ optional("<:" ~ Type) ~ zeroOrMore("<%" ~ Type) ~ zeroOrMore(':' ~ Type) } + def ParamClauses: Rule0 = rule { zeroOrMore(ParamClause) ~ optional(optional(NewlineS) ~ '(' ~ "implicit" ~ Params ~ ')') } + def ParamClause: Rule0 = rule { optional(NewlineS) ~ '(' ~ optional(Params) ~ ')' } + def Params: Rule0 = rule { zeroOrMore(Param).separatedBy(',') } + def Param: Rule0 = rule { zeroOrMore(Annotation) ~ IdS ~ optional(':' ~ ParamType) ~ optional('=' ~ Expr) } + def ClassParamClauses: Rule0 = rule { zeroOrMore(ClassParamClause) ~ optional(optional(NewlineS) ~ '(' ~ "implicit" ~ ClassParam ~ ')') } + def ClassParamClause: Rule0 = rule { optional(NewlineS) ~ '(' ~ optional(ClassParam) ~ ')' } + def ClassParams: Rule0 = rule { oneOrMore(ClassParam).separatedBy(',') } + def ClassParam: Rule0 = rule { zeroOrMore(Annotation) ~ optional(zeroOrMore(Modifier) ~ ("val" | "var")) ~ IdS ~ ":" ~ ParamType ~ optional("=" ~ Expr) } + + def Bindings: Rule0 = rule { '(' ~ oneOrMore(Binding).separatedBy(',') ~ ')' } + def Binding: Rule0 = rule { (IdS | '_') ~ optional(':' ~ Type) } + + def Modifier: Rule0 = rule { LocalModifier | AccessModifier | "override" } + def LocalModifier: Rule0 = rule { "abstract" | "final" | "sealed" | "implicit" | "lazy" } + def AccessModifier: Rule0 = rule { ("private" | "protected") ~ optional(AccessQualifier) } + def AccessQualifier: Rule0 = rule { '[' ~ ("this" ~ IdS) ~ ']' } + + def Annotation: Rule0 = rule { '@' ~ SimpleType ~ zeroOrMore(ArgumentExprs) } + def ConstrAnnotation: Rule0 = rule { '@' ~ SimpleType ~ ArgumentExprs } + def NameValuePair: Rule0 = rule { "val" ~ IdS ~ '=' ~ PrefixExpr } + + def TemplateBody: Rule0 = rule { optional(NewlineS) ~ '{' ~ optional(SelfType) ~ TemplateStat ~ zeroOrMore(SemiS ~ TemplateStat) ~ '}' } + def TemplateStat: Rule0 = rule { + Import | + zeroOrMore(Annotation ~ optional(NewlineS)) ~ zeroOrMore(Modifier) ~ (Def | Dcl) | + Expr | + MATCH + } + + def SelfType: Rule0 = rule { "this" ~ ':' ~ Type ~ "=>" | IdS ~ optional(':' ~ Type) ~ "=>" } + + def Import: Rule0 = rule { "import" ~ oneOrMore(ImportExpr).separatedBy(',') } + + //ImportExpr is slightly changed wrt spec because StableId always consumes all the Ids possible, so there is no need to one at the end + def ImportExpr: Rule0 = rule { StableId ~ optional('.' ~ ('_' | ImportSelectors)) } + def ImportSelectors: Rule0 = rule { '{' ~ zeroOrMore(ImportSelector ~ ',') ~ (ImportSelector | '_') ~ '}' } + def ImportSelector: Rule0 = rule { IdS ~ optional("=>" ~ (IdS | '_')) } + + def Dcl: Rule0 = rule { + "val" ~ ValDcl | + "var" ~ VarDcl | + "def" ~ FunDcl | + "type" ~ zeroOrMore(NewlineS) ~ TypeDcl + } + def ValDcl: Rule0 = rule { Ids ~ ':' ~ Type } + def VarDcl: Rule0 = rule { Ids ~ ':' ~ Type } + def FunDcl: Rule0 = rule { FunSig ~ optional(':' ~ Type) } + def FunSig: Rule0 = rule { IdS ~ optional(FunTypeParamClause) ~ ParamClauses } + def TypeDcl: Rule0 = rule { IdS ~ optional(TypeParamClause) ~ optional(">:" ~ Type) ~ optional("<:" ~ Type) } + + def PatVarDef: Rule0 = rule { "val" ~ PatDef | "var" ~ VarDef } + def Def: Rule0 = rule { "def" ~ FunDef | "type" ~ zeroOrMore(NewlineS) ~ TypeDef | PatVarDef | TmplDef } + def PatDef: Rule0 = rule { oneOrMore(Pattern2).separatedBy(',') ~ optional(':' ~ Type) ~ '=' ~ Expr } + def VarDef: Rule0 = rule { Ids ~ ':' ~ Type ~ '=' ~ '_' | PatDef } + def FunDef: Rule0 = rule { + "this" ~ ParamClause ~ ParamClauses ~ ('=' ~ ConstrExpr | optional(NewlineS) ~ ConstrBlock) | + FunSig ~ (optional(':' ~ Type) ~ '=' ~ Expr | optional(NewlineS) ~ '{' ~ Block ~ '}') + } + def TypeDef: Rule0 = rule { IdS ~ optional(TypeParamClause) ~ '=' ~ Type } + + def TmplDef: Rule0 = rule { "trait" ~ TraitDef | optional("case") ~ ("class" ~ ClassDef | "object" ~ ObjectDef) } + def ClassDef: Rule0 = rule { IdS ~ optional(TypeParamClause) ~ zeroOrMore(ConstrAnnotation) ~ optional(AccessModifier) ~ ClassParamClauses ~ ClassTemplateOpt } + def TraitDef: Rule0 = rule { IdS ~ optional(TypeParamClause) ~ TraitTemplateOpt } + def ObjectDef: Rule0 = rule { IdS ~ ClassTemplateOpt } + def ClassTemplateOpt: Rule0 = rule { "extends" ~ ClassTemplate | optional(optional("extends") ~ TemplateBody) } + def TraitTemplateOpt: Rule0 = rule { "extends" ~ TraitTemplate | optional(optional("extends") ~ TemplateBody) } + def ClassTemplate: Rule0 = rule { optional(EarlyDefs) ~ ClassParents ~ optional(TemplateBody) } + def TraitTemplate: Rule0 = rule { optional(EarlyDefs) ~ TraitParents ~ optional(TemplateBody) } + def ClassParents: Rule0 = rule { Constr ~ zeroOrMore("with" ~ AnnotType) } + def TraitParents: Rule0 = rule { AnnotType ~ zeroOrMore("with" ~ AnnotType) } + def Constr: Rule0 = rule { AnnotType ~ zeroOrMore(ArgumentExprs) } + def EarlyDefs: Rule0 = rule { '{' ~ optional(oneOrMore(EarlyDef).separatedBy(SemiS)) ~ '}' ~ "with" } + def EarlyDef: Rule0 = rule { zeroOrMore(Annotation ~ optional(NewlineS)) ~ zeroOrMore(Modifier) ~ PatVarDef } + def ConstrExpr: Rule0 = rule { ConstrBlock | SelfInvocation } + def ConstrBlock: Rule0 = rule { '{' ~ SelfInvocation ~ zeroOrMore(SemiS ~ BlockStat) ~ '}' } + def SelfInvocation: Rule0 = rule { "this" ~ oneOrMore(ArgumentExprs) } + + def TopStatSeq: Rule0 = rule { oneOrMore(TopStat).separatedBy(SemiS) } + def TopStat: Rule0 = rule { Packaging | PackageObject | Import | zeroOrMore(Annotation ~ optional(NewlineS)) ~ zeroOrMore(Modifier) ~ TmplDef | MATCH } + def Packaging: Rule0 = rule { "package" ~ QualId ~ optional(NewlineS) ~ '{' ~ TopStatSeq ~ '}' } + def PackageObject: Rule0 = rule { "package" ~ "object" ~ ObjectDef } + def CompilationUnit: Rule0 = rule { zeroOrMore("package" ~ QualId ~ SemiS) ~ TopStatSeq } +} diff --git a/scalatexApi/src/main/scala/torimatomeru/syntax/Basic.scala b/scalatexApi/src/main/scala/torimatomeru/syntax/Basic.scala new file mode 100644 index 0000000..4240ea9 --- /dev/null +++ b/scalatexApi/src/main/scala/torimatomeru/syntax/Basic.scala @@ -0,0 +1,30 @@ +package torimatomeru +package syntax + +import org.parboiled2._ + +trait Basic { self: ScalaSyntax => + + def UnicodeExcape = rule { "\\u" ~ 4.times(HexDigit) } + + + //Numbers and digits + def HexDigit = rule { Digit | "a" - "f" | "A" - "Z" } + def Digit = rule { "0" | NonZeroDigit } + def NonZeroDigit = rule { "1" - "9" } + def HexNumeral = rule { "0x" ~ oneOrMore(HexDigit) } + def DecimalNumeral = rule(oneOrMore(Digit)) + def ExponentPart = rule { anyOf("Ee") ~ optional(anyOf("+-")) ~ oneOrMore(Digit) } + def FloatType = rule { anyOf("FfDd") } + + def Parentheses = rule { "(" | ")" | "[" | "]" | "{" | "}" } + def DelimiterChar = rule { "'" | "\"" | "." | ";" | "," } + + def WhitespaceChar = rule { "\u0020" | "\u0009" } + def Newline = rule { "\r\n" | "\n" } + def Semi = rule { ';' | oneOrMore(NewlineS) } + def OperatorChar = rule { anyOf("""!#$%&*+-/:<=>?@\^|~""") | CharPredicate.from(c => c.getType match { case Character.OTHER_SYMBOL | Character.MATH_SYMBOL => true; case _ => false}) } + def Letter = rule { Upper | Lower | CharPredicate.from(c => c.isLetter | c.isDigit) } + def Lower = rule { "a" - "z" | "$" | "_" | CharPredicate.from(_.isLower) } + def Upper = rule { "A" - "Z" | CharPredicate.from(_.isUpper) } +} diff --git a/scalatexApi/src/main/scala/torimatomeru/syntax/Identifiers.scala b/scalatexApi/src/main/scala/torimatomeru/syntax/Identifiers.scala new file mode 100644 index 0000000..9c39577 --- /dev/null +++ b/scalatexApi/src/main/scala/torimatomeru/syntax/Identifiers.scala @@ -0,0 +1,22 @@ +package torimatomeru +package syntax + +import org.parboiled2._ + +trait Identifiers { self: Parser with Basic => + + def Operator = rule(oneOrMore(OperatorChar)) + + def VarId = rule { !(Keywords ~ (WhitespaceChar | Newline | "//" | "/*")) ~ Lower ~ IdRest } + def PlainId = rule { Upper ~ IdRest | VarId | !(Keywords ~ (WhitespaceChar | Newline | "//" | "/*")) ~ Operator } + def Id = rule { PlainId | ("`" ~ oneOrMore(noneOf("`")) ~ "`") } + def IdRest = rule { zeroOrMore(Letter | Digit) ~ optional("_" ~ Operator) } + + + def Keywords = rule { + "abstract" | "case" | "catch" | "class" | "def" | "do" | "else" | "extends" | "false" | "finally" | "final" | "finally" | "forSome" | "for" | "if" | + "implicit" | "import" | "lazy" | "match" | "new" | "null" | "object" | "override" | "package" | "private" | "protected" | "return" | + "sealed" | "super" | "this" | "throw" | "trait" | "try" | "true" | "type" | "val" | "var" | "while" | "with" | "yield" | "_" | + ":" | ";" | "=>" | "=" | "<-" | "<:" | "<%" | ">:" | "#" | "@" | "\u21d2" | "\u2190" + } +} diff --git a/scalatexApi/src/main/scala/torimatomeru/syntax/Literals.scala b/scalatexApi/src/main/scala/torimatomeru/syntax/Literals.scala new file mode 100644 index 0000000..be6f171 --- /dev/null +++ b/scalatexApi/src/main/scala/torimatomeru/syntax/Literals.scala @@ -0,0 +1,58 @@ +package torimatomeru +package syntax + +import org.parboiled2._ + +trait Literals extends StringLiterals { self: ScalaSyntax => + + def FloatingPointLiteral = rule { + capture( + "." ~ oneOrMore(Digit) ~ optional(ExponentPart) ~ optional(FloatType) | + oneOrMore(Digit) ~ ( + "." ~ oneOrMore(Digit) ~ optional(ExponentPart) ~ optional(FloatType) | + ExponentPart ~ optional(FloatType) | + optional(ExponentPart) ~ FloatType)) + } + + def IntegerLiteral = rule { capture((DecimalNumeral | HexNumeral) ~ optional(anyOf("Ll"))) } + + def BooleanLiteral = rule { capture("true" | "false") } + + def MultilineComment: Rule0 = rule { "/*" ~ zeroOrMore(MultilineComment | !"*/" ~ ANY) ~ "*/" } + def Comment: Rule0 = rule { + MultilineComment | + "//" ~ zeroOrMore(!NewlineS ~ ANY) ~ (NewlineS | EOI) + } + + def Literal = rule { + (capture(optional("-")) ~ (FloatingPointLiteral | IntegerLiteral) ~> ((sign: String, number) => sign + number)) | + BooleanLiteral | + CharacterLiteral | + StringLiteral | + SymbolLiteral | + capture("null") + } +} + +/** + * Placed the string defintions in this trait to isolate them, because they are overly complex. + */ +private[syntax] trait StringLiterals { self: Literals with ScalaSyntax => + + def EscapedChars = rule { '\\' ~ anyOf("rnt\\\"") } + + def SymbolLiteral = rule { ''' ~ capture(PlainId) } + + def CharacterLiteral = rule { ''' ~ capture(UnicodeExcape | EscapedChars | !'\\' ~ CharPredicate.from(isPrintableChar)) ~ ''' } + + def MultiLineChars = rule { zeroOrMore(optional('"') ~ optional('"') ~ noneOf("\"")) } + def StringLiteral = rule { + ("\"\"\"" ~ capture(MultiLineChars) ~ capture("\"\"\"" ~ zeroOrMore('"')) ~> ((multilineChars: String, quotes) => multilineChars + quotes.dropRight(3))) | + ('"' ~ capture(zeroOrMore("\\\"" | noneOf("\n\""))) ~ '"') + } + + def isPrintableChar(c: Char): Boolean = { + val block = Character.UnicodeBlock.of(c) + !Character.isISOControl(c) && !Character.isSurrogate(c) && block != null && block != Character.UnicodeBlock.SPECIALS + } +} diff --git a/scalatexApi/src/test/scala/scalatex/Main.scala b/scalatexApi/src/test/scala/scalatex/Main.scala index 968e34c..fa0d213 100644 --- a/scalatexApi/src/test/scala/scalatex/Main.scala +++ b/scalatexApi/src/test/scala/scalatex/Main.scala @@ -30,7 +30,12 @@ object Main extends utest.TestSuite{ 'Block{ * - check("{i am a cow}", _.TBlock.run(), Block(Seq(Block.Text("i am a cow")))) * - check("{i @am a @cow}", _.TBlock.run(), - Block(Seq(Block.Text("i "), Chain(Code("am"),Seq()), Block.Text(" a "), Chain(Code("cow"),Seq()))) + Block(Seq( + Block.Text("i "), + Chain(Code("am"),Seq()), + Block.Text(" a "), + Chain(Code("cow"),Seq()) + )) ) } 'Chain{ @@ -46,43 +51,8 @@ object Main extends utest.TestSuite{ )) ) } - - } - def p(input: String) = { - new ScalatexParser(input) - } -} -trait Ast{ - def offset: Int -} -object Ast{ - case class Code(code: String, offset: Int = 0) extends Ast - case class Block(parts: Seq[Block.Sub], offset: Int = 0) extends Chain.Sub - object Block{ - trait Sub - case class Text(txt: String, offset: Int = 0) extends Block.Sub } - case class Chain(lhs: Code, parts: Seq[Chain.Sub], offset: Int = 0) extends Block.Sub - object Chain{ - trait Sub extends Ast - case class Prop(str: String, offset: Int = 0) extends Sub - case class Args(str: String, offset: Int = 0) extends Sub - } -} -class ScalatexParser(input: ParserInput) extends ScalaSyntax(input) { - def TextNot(chars: String) = rule { capture(oneOrMore(noneOf(chars) | "@@")) ~> (x => Ast.Block.Text(x.replace("@@", "@"))) } - def Text = TextNot("@") - def Code = rule { - "@" ~ capture(Id | BlockExpr | ('(' ~ optional(Exprs) ~ ')')) ~> (Ast.Code(_)) - } - def ScalaChain = rule { Code ~ zeroOrMore(Extension) ~> (Ast.Chain(_, _)) } - def Extension: Rule1[Ast.Chain.Sub] = rule { - (capture(('.' ~ Id) ~ optional(TypeArgs)) ~> (Ast.Chain.Prop(_))) | - (capture(!BlockExpr ~ ArgumentExprs) ~> (Ast.Chain.Args(_))) | - TBlock - } - def TBlock = rule{ '{' ~ Body ~ '}' } - def Body = rule{ zeroOrMore(TextNot("@}") | ScalaChain) ~> (x => Ast.Block(x)) } + } diff --git a/scalatexApi/src/test/scala/torimatomeru/ScalaSyntax.scala b/scalatexApi/src/test/scala/torimatomeru/ScalaSyntax.scala deleted file mode 100644 index 5bbd0af..0000000 --- a/scalatexApi/src/test/scala/torimatomeru/ScalaSyntax.scala +++ /dev/null @@ -1,246 +0,0 @@ -package torimatomeru - -import language.implicitConversions -import syntax._ -import org.parboiled2._ - -class ScalaSyntax(val input: ParserInput) extends Parser with Basic with Identifiers with Literals { - - def Whitespace = rule { zeroOrMore(WhitespaceChar | Comment) } - - /** - * Every token handles space at the end. - * Don't let it propagate to mixins - */ - implicit private[this] def wspStr(s: String): Rule0 = rule { - str(s) ~ Whitespace - } - implicit private[this] def wspChar(s: Char): Rule0 = rule { - ch(s) ~ Whitespace - } - - def pos = cursor -> cursorChar - - /** - * helper printing function - */ - def pr(s: String) = rule { run(print(s)) } - - ////////////////////////////////////////////////// - // Override rules from dependencies - // in order to handle white spaces - // Note: when you add your AST, make sure to - // only capture super.rule and not the whitespace - ////////////////////////////////////////////////// - - def IdS = rule { super.Id ~ Whitespace } - def VarIdS = rule { super.VarId ~ Whitespace } - def LiteralS = rule { super.Literal ~ Whitespace } - def SemiS = rule { super.Semi ~ Whitespace } - def NewlineS = rule { super.Newline ~ Whitespace } - - /////////////////////////////////////////// - // Qualifiers and Ids - /////////////////////////////////////////// - - def QualId = rule { oneOrMore(IdS) separatedBy '.' } - def Ids = rule { oneOrMore(IdS) separatedBy ',' } - - //path and stableId were refactored (wrt spec) to avoid recursiveness and be more specific - def Path: Rule0 = rule { zeroOrMore(IdS ~ '.') ~ "this" ~ zeroOrMore(IdS).separatedBy('.') | StableId } - def StableId: Rule0 = rule { - zeroOrMore(IdS ~ '.') ~ ("this" | "super" ~ optional(ClassQualifier)) ~ '.' ~ oneOrMore(IdS).separatedBy('.') | - IdS ~ zeroOrMore('.' ~ IdS) - } -// def StableId: Rule0 = rule { zeroOrMore(Id ~ '.') ~ optional("this" | "super" ~ optional(ClassQualifier)) ~ oneOrMore(Id).separatedBy('.') } - def ClassQualifier = rule { '[' ~ IdS ~ ']' } - - /////////////////////////////////////////// - // Types and more Types - /////////////////////////////////////////// - - def Type: Rule0 = rule { FunctionArgTypes ~ "=>" ~ Type | InfixType ~ optional(ExistentialClause) } - def FunctionArgTypes = rule { InfixType | '(' ~ optional(oneOrMore(ParamType) separatedBy ',') ~ ')' } - - def ExistentialClause = rule { "forSome" ~ '{' ~ oneOrMore(ExistentialDcl).separatedBy(SemiS) } - def ExistentialDcl = rule { "type" ~ TypeDcl | "val" ~ ValDcl } - - def InfixType = rule { CompoundType ~ zeroOrMore(IdS ~ optional(NewlineS) ~ CompoundType) } - def CompoundType = rule { oneOrMore(AnnotType).separatedBy("with") ~ optional(Refinement) } - def AnnotType = rule { SimpleType ~ zeroOrMore(Annotation) } - def SimpleType: Rule0 = rule { - BasicType ~ optional('#' ~ IdS) ~ optional(TypeArgs) - } - def BasicType: Rule0 = rule { - '(' ~ Types ~ ')' | - Path ~ '.' ~ "type" | - StableId - } - def TypeArgs = rule { '[' ~ Types ~ ']' } - def Types = rule { oneOrMore(Type).separatedBy(',') } - def Refinement = rule { optional(NewlineS) ~ '{' ~ oneOrMore(RefineStat).separatedBy(SemiS) ~ '}' } - def RefineStat = rule { "type" ~ TypeDef | Dcl | MATCH } - def TypePat = rule { Type } - def Ascription = rule { ":" ~ (InfixType | oneOrMore(Annotation) | "_" ~ "*") } - - def ParamType = rule { "=>" ~ Type | Type ~ "*" | Type } - - ///////////////////////////////////////////////// - // Declarations, Expressions and Pattern Matching - ///////////////////////////////////////////////// - - def Expr: Rule0 = rule { (Bindings | optional("implicit") ~ IdS | "_") ~ "=>" ~ Expr | Expr1 } - def Expr1: Rule0 = rule { - IfCFlow | - WhileCFlow | - TryCFlow | - DoWhileCFlow | - ForCFlow | - "throw" ~ Expr | - "return" ~ optional(Expr) | - SimpleExpr1 ~ ArgumentExprs ~ '=' ~ Expr | - optional(SimpleExpr ~ '.') ~ IdS ~ '=' ~ Expr | - PostfixExpr ~ optional("match" ~ '{' ~ CaseClauses ~ '}' | Ascription) - } - - def IfCFlow = rule { "if" ~ '(' ~ Expr ~ ')' ~ zeroOrMore(NewlineS) ~ Expr ~ optional(optional(SemiS) ~ "else" ~ Expr) } - def WhileCFlow = rule { "while" ~ '(' ~ Expr ~ ')' ~ zeroOrMore(NewlineS) ~ Expr } - def TryCFlow = rule { "try" ~ '{' ~ Block ~ '}' ~ optional("catch" ~ '{' ~ CaseClauses ~ '}') ~ optional("finally" ~ Expr) } - def DoWhileCFlow = rule { "do" ~ Expr ~ optional(SemiS) ~ "while" ~ '(' ~ Expr ~ ')' } - def ForCFlow = rule { "for" ~ ('(' ~ Enumerators ~ ')' | '{' ~ Enumerators ~ '}') ~ zeroOrMore(NewlineS) ~ optional("yield") ~ Expr } - def PostfixExpr: Rule0 = rule { InfixExpr ~ optional(IdS ~ optional(NewlineS)) } - def InfixExpr: Rule0 = rule { PrefixExpr ~ zeroOrMore(IdS ~ optional(NewlineS) ~ PrefixExpr) } - def PrefixExpr = rule { optional(anyOf("-+~!")) ~ SimpleExpr } - def SimpleExpr: Rule0 = rule { SimpleExprNoLiteral | SimpleExpr1 ~ optional(ArgumentExprs) ~ optional('_') } - def SimpleExprNoLiteral: Rule0 = rule { "new" ~ (ClassTemplate | TemplateBody) | BlockExpr } - def SimpleExpr1: Rule0 = rule { - // run(println("SimpleExpr1 matching on " + pos)) ~ - LiteralS ~ drop[String] | //literal currently captures, so it can be used outside. but since all our rules lack AST, we drop its value in order to be able to compose them - Path | - '_' | - '(' ~ optional(Exprs) ~ ')' | - SimpleExprNoLiteral ~ '.' ~ IdS | - SimpleExprNoLiteral ~ TypeArgs /*| - XmlExpr*/ - } - def Exprs: Rule0 = rule { oneOrMore(Expr) separatedBy ',' } - def ArgumentExprs: Rule0 = rule { - '(' ~ (optional(Exprs ~ ',') ~ PostfixExpr ~ ':' ~ '_' ~ '*' | optional(Exprs)) ~ ')' | - optional(NewlineS) ~ BlockExpr - } - def BlockExpr: Rule0 = rule { '{' ~ (CaseClauses | Block) ~ '}' } - def Block: Rule0 = rule { zeroOrMore(BlockStat ~ SemiS) ~ optional(ResultExpr) } - def BlockStat: Rule0 = rule { - &(SemiS) ~ MATCH | //shortcircuit when Semi is found - Import | - zeroOrMore(Annotation) ~ (optional("implicit" | "lazy") ~ Def | zeroOrMore(LocalModifier) ~ TmplDef) | - Expr1 - } - def ResultExpr: Rule0 = rule { (Bindings | optional("implicit") ~ IdS | "_") ~ "=>" ~ Block | Expr1 } - def Enumerators: Rule0 = rule { Generator ~ zeroOrMore(SemiS ~ Enumerator) } - def Enumerator: Rule0 = rule { Generator | Guard | Pattern1 ~ '=' ~ Expr } - def Generator: Rule0 = rule { Pattern1 ~ "<-" ~ Expr ~ optional(Guard) } - def CaseClauses: Rule0 = rule { oneOrMore(CaseClause) } - def CaseClause: Rule0 = rule { "case" ~ Pattern ~ optional(Guard) ~ "=>" ~ Block } - def Guard: Rule0 = rule { "if" ~ PostfixExpr } - def Pattern: Rule0 = rule { oneOrMore(Pattern1) separatedBy '|' } - def Pattern1: Rule0 = rule { '_' ~ ':' ~ TypePat | VarIdS ~ ':' ~ TypePat | Pattern2 } - def Pattern2: Rule0 = rule { VarIdS ~ optional("@" ~ Pattern3) | Pattern3 } - def Pattern3: Rule0 = rule { SimplePattern ~ zeroOrMore(IdS ~ optional(NewlineS) ~ SimplePattern) } // this pattern doesn't make sense to me... - def SimplePattern: Rule0 = rule { - '_' | - LiteralS ~ drop[String] | //literal currently captures, so it can be used outside. but since all our rules lack AST, we drop its value in order to be able to compose them - '(' ~ optional(Patterns) ~ ')' | - StableId ~ '(' ~ (optional(Patterns ~ ',') ~ optional(VarIdS ~ '@') ~ '_' ~ '*' | optional(Patterns)) ~ ')' | - VarIdS /*| - XmlPattern*/ - } - def Patterns: Rule0 = rule { '_' ~ '*' | oneOrMore(Pattern).separatedBy(',') } - - def TypeParamClause: Rule0 = rule { '[' ~ oneOrMore(VariantTypeParam).separatedBy(',') ~ ']' } - def FunTypeParamClause: Rule0 = rule { '[' ~ oneOrMore(TypeParam).separatedBy(',') ~ ']' } - def VariantTypeParam: Rule0 = rule { zeroOrMore(Annotation) ~ optional(anyOf("+-")) ~ TypeParam } - def TypeParam: Rule0 = rule { (IdS | '_') ~ optional(TypeParamClause) ~ optional(">:" ~ Type) ~ optional("<:" ~ Type) ~ zeroOrMore("<%" ~ Type) ~ zeroOrMore(':' ~ Type) } - def ParamClauses: Rule0 = rule { zeroOrMore(ParamClause) ~ optional(optional(NewlineS) ~ '(' ~ "implicit" ~ Params ~ ')') } - def ParamClause: Rule0 = rule { optional(NewlineS) ~ '(' ~ optional(Params) ~ ')' } - def Params: Rule0 = rule { zeroOrMore(Param).separatedBy(',') } - def Param: Rule0 = rule { zeroOrMore(Annotation) ~ IdS ~ optional(':' ~ ParamType) ~ optional('=' ~ Expr) } - def ClassParamClauses: Rule0 = rule { zeroOrMore(ClassParamClause) ~ optional(optional(NewlineS) ~ '(' ~ "implicit" ~ ClassParam ~ ')') } - def ClassParamClause: Rule0 = rule { optional(NewlineS) ~ '(' ~ optional(ClassParam) ~ ')' } - def ClassParams: Rule0 = rule { oneOrMore(ClassParam).separatedBy(',') } - def ClassParam: Rule0 = rule { zeroOrMore(Annotation) ~ optional(zeroOrMore(Modifier) ~ ("val" | "var")) ~ IdS ~ ":" ~ ParamType ~ optional("=" ~ Expr) } - - def Bindings: Rule0 = rule { '(' ~ oneOrMore(Binding).separatedBy(',') ~ ')' } - def Binding: Rule0 = rule { (IdS | '_') ~ optional(':' ~ Type) } - - def Modifier: Rule0 = rule { LocalModifier | AccessModifier | "override" } - def LocalModifier: Rule0 = rule { "abstract" | "final" | "sealed" | "implicit" | "lazy" } - def AccessModifier: Rule0 = rule { ("private" | "protected") ~ optional(AccessQualifier) } - def AccessQualifier: Rule0 = rule { '[' ~ ("this" ~ IdS) ~ ']' } - - def Annotation: Rule0 = rule { '@' ~ SimpleType ~ zeroOrMore(ArgumentExprs) } - def ConstrAnnotation: Rule0 = rule { '@' ~ SimpleType ~ ArgumentExprs } - def NameValuePair: Rule0 = rule { "val" ~ IdS ~ '=' ~ PrefixExpr } - - def TemplateBody: Rule0 = rule { optional(NewlineS) ~ '{' ~ optional(SelfType) ~ TemplateStat ~ zeroOrMore(SemiS ~ TemplateStat) ~ '}' } - def TemplateStat: Rule0 = rule { - Import | - zeroOrMore(Annotation ~ optional(NewlineS)) ~ zeroOrMore(Modifier) ~ (Def | Dcl) | - Expr | - MATCH - } - - def SelfType: Rule0 = rule { "this" ~ ':' ~ Type ~ "=>" | IdS ~ optional(':' ~ Type) ~ "=>" } - - def Import: Rule0 = rule { "import" ~ oneOrMore(ImportExpr).separatedBy(',') } - - //ImportExpr is slightly changed wrt spec because StableId always consumes all the Ids possible, so there is no need to one at the end - def ImportExpr: Rule0 = rule { StableId ~ optional('.' ~ ('_' | ImportSelectors)) } - def ImportSelectors: Rule0 = rule { '{' ~ zeroOrMore(ImportSelector ~ ',') ~ (ImportSelector | '_') ~ '}' } - def ImportSelector: Rule0 = rule { IdS ~ optional("=>" ~ (IdS | '_')) } - - def Dcl: Rule0 = rule { - "val" ~ ValDcl | - "var" ~ VarDcl | - "def" ~ FunDcl | - "type" ~ zeroOrMore(NewlineS) ~ TypeDcl - } - def ValDcl: Rule0 = rule { Ids ~ ':' ~ Type } - def VarDcl: Rule0 = rule { Ids ~ ':' ~ Type } - def FunDcl: Rule0 = rule { FunSig ~ optional(':' ~ Type) } - def FunSig: Rule0 = rule { IdS ~ optional(FunTypeParamClause) ~ ParamClauses } - def TypeDcl: Rule0 = rule { IdS ~ optional(TypeParamClause) ~ optional(">:" ~ Type) ~ optional("<:" ~ Type) } - - def PatVarDef: Rule0 = rule { "val" ~ PatDef | "var" ~ VarDef } - def Def: Rule0 = rule { "def" ~ FunDef | "type" ~ zeroOrMore(NewlineS) ~ TypeDef | PatVarDef | TmplDef } - def PatDef: Rule0 = rule { oneOrMore(Pattern2).separatedBy(',') ~ optional(':' ~ Type) ~ '=' ~ Expr } - def VarDef: Rule0 = rule { Ids ~ ':' ~ Type ~ '=' ~ '_' | PatDef } - def FunDef: Rule0 = rule { - "this" ~ ParamClause ~ ParamClauses ~ ('=' ~ ConstrExpr | optional(NewlineS) ~ ConstrBlock) | - FunSig ~ (optional(':' ~ Type) ~ '=' ~ Expr | optional(NewlineS) ~ '{' ~ Block ~ '}') - } - def TypeDef: Rule0 = rule { IdS ~ optional(TypeParamClause) ~ '=' ~ Type } - - def TmplDef: Rule0 = rule { "trait" ~ TraitDef | optional("case") ~ ("class" ~ ClassDef | "object" ~ ObjectDef) } - def ClassDef: Rule0 = rule { IdS ~ optional(TypeParamClause) ~ zeroOrMore(ConstrAnnotation) ~ optional(AccessModifier) ~ ClassParamClauses ~ ClassTemplateOpt } - def TraitDef: Rule0 = rule { IdS ~ optional(TypeParamClause) ~ TraitTemplateOpt } - def ObjectDef: Rule0 = rule { IdS ~ ClassTemplateOpt } - def ClassTemplateOpt: Rule0 = rule { "extends" ~ ClassTemplate | optional(optional("extends") ~ TemplateBody) } - def TraitTemplateOpt: Rule0 = rule { "extends" ~ TraitTemplate | optional(optional("extends") ~ TemplateBody) } - def ClassTemplate: Rule0 = rule { optional(EarlyDefs) ~ ClassParents ~ optional(TemplateBody) } - def TraitTemplate: Rule0 = rule { optional(EarlyDefs) ~ TraitParents ~ optional(TemplateBody) } - def ClassParents: Rule0 = rule { Constr ~ zeroOrMore("with" ~ AnnotType) } - def TraitParents: Rule0 = rule { AnnotType ~ zeroOrMore("with" ~ AnnotType) } - def Constr: Rule0 = rule { AnnotType ~ zeroOrMore(ArgumentExprs) } - def EarlyDefs: Rule0 = rule { '{' ~ optional(oneOrMore(EarlyDef).separatedBy(SemiS)) ~ '}' ~ "with" } - def EarlyDef: Rule0 = rule { zeroOrMore(Annotation ~ optional(NewlineS)) ~ zeroOrMore(Modifier) ~ PatVarDef } - def ConstrExpr: Rule0 = rule { ConstrBlock | SelfInvocation } - def ConstrBlock: Rule0 = rule { '{' ~ SelfInvocation ~ zeroOrMore(SemiS ~ BlockStat) ~ '}' } - def SelfInvocation: Rule0 = rule { "this" ~ oneOrMore(ArgumentExprs) } - - def TopStatSeq: Rule0 = rule { oneOrMore(TopStat).separatedBy(SemiS) } - def TopStat: Rule0 = rule { Packaging | PackageObject | Import | zeroOrMore(Annotation ~ optional(NewlineS)) ~ zeroOrMore(Modifier) ~ TmplDef | MATCH } - def Packaging: Rule0 = rule { "package" ~ QualId ~ optional(NewlineS) ~ '{' ~ TopStatSeq ~ '}' } - def PackageObject: Rule0 = rule { "package" ~ "object" ~ ObjectDef } - def CompilationUnit: Rule0 = rule { zeroOrMore("package" ~ QualId ~ SemiS) ~ TopStatSeq } -} -- cgit v1.2.3