diff options
Diffstat (limited to 'scalatex/scalaParser/src/main/scala/scalaParser/syntax')
3 files changed, 144 insertions, 0 deletions
diff --git a/scalatex/scalaParser/src/main/scala/scalaParser/syntax/Basic.scala b/scalatex/scalaParser/src/main/scala/scalaParser/syntax/Basic.scala new file mode 100644 index 0000000..8d3232a --- /dev/null +++ b/scalatex/scalaParser/src/main/scala/scalaParser/syntax/Basic.scala @@ -0,0 +1,51 @@ +package scalaParser +package syntax +import acyclic.file +import org.parboiled2._ + +trait Basic { self: Parser => + object Basic{ + def UnicodeExcape = rule { "\\u" ~ 4.times(HexDigit) } + + + //Numbers and digits + def HexDigit = rule { Digit | "a" - "f" | "A" - "Z" } + def Digit = rule { "0" | NonZeroDigit } + def NonZeroDigit = rule { "1" - "9" } + def HexNumeral = rule { "0x" ~ oneOrMore(HexDigit) } + def DecimalNumeral = rule(oneOrMore(Digit)) + def ExponentPart = rule { anyOf("Ee") ~ optional(anyOf("+-")) ~ oneOrMore(Digit) } + def FloatType = rule { anyOf("FfDd") } + + def Parentheses = rule { "(" | ")" | "[" | "]" | "{" | "}" } + def DelimiterChar = rule { "'" | "\"" | "." | ";" | "," } + + def WhitespaceChar = rule { "\u0020" | "\u0009" } + def Newline = rule { "\r\n" | "\n" } + def Semi = rule { ';' | oneOrMore(Newline) } + def OperatorChar = rule { + anyOf("""!#$%&*+-/:<=>?@\^|~""") | + CharPredicate.from(_.getType match { + case Character.OTHER_SYMBOL | Character.MATH_SYMBOL => true; case _ => false + }) + } + def Letter = rule { Upper | Lower | CharPredicate.from(c => c.isLetter | c.isDigit) } + def Lower = rule { "a" - "z" | "$" | "_" | CharPredicate.from(_.isLower) } + def Upper = rule { "A" - "Z" | CharPredicate.from(_.isUpper) } + } + /** + * Most keywords don't just require the correct characters to match, + * they have to ensure that subsequent characters *don't* match in + * order for it to be a keyword. This enforces that rule for key-words + * (W) and key-operators (O) which have different non-match criteria. + */ + object Key { + def W(s: String) = rule { + str(s) ~ !(Basic.Letter | Basic.Digit) + } + + def O(s: String) = rule { + str(s) ~ !Basic.OperatorChar + } + } +} diff --git a/scalatex/scalaParser/src/main/scala/scalaParser/syntax/Identifiers.scala b/scalatex/scalaParser/src/main/scala/scalaParser/syntax/Identifiers.scala new file mode 100644 index 0000000..4bc972f --- /dev/null +++ b/scalatex/scalaParser/src/main/scala/scalaParser/syntax/Identifiers.scala @@ -0,0 +1,35 @@ +package scalaParser +package syntax +import acyclic.file +import org.parboiled2._ + +trait Identifiers { self: Parser with Basic => + object Identifiers{ + import Basic._ + def Operator = rule(oneOrMore(OperatorChar)) + + def VarId = rule { + !(Keywords ~ (WhitespaceChar | Newline | "//" | "/*")) ~ Lower ~ IdRest + } + def PlainId = rule { Upper ~ IdRest | VarId | !(Keywords ~ (WhitespaceChar | Newline | "//" | "/*")) ~ Operator } + def Id = rule { PlainId | ("`" ~ oneOrMore(noneOf("`")) ~ "`") } + def IdRest = rule { + zeroOrMore(zeroOrMore("_") ~ oneOrMore(!"_" ~ Letter | Digit)) ~ + optional(oneOrMore("_") ~ optional(Operator)) + } + + + def AlphabetKeywords = rule { + "abstract" | "case" | "catch" | "class" | "def" | "do" | "else" | "extends" | "false" | "finally" | "final" | "finally" | "forSome" | "for" | "if" | + "implicit" | "import" | "lazy" | "match" | "new" | "null" | "object" | "override" | "package" | "private" | "protected" | "return" | + "sealed" | "super" | "this" | "throw" | "trait" | "try" | "true" | "type" | "val" | "var" | "while" | "with" | "yield" | "_" + } + def SymbolicKeywords = rule{ + ":" | ";" | "=>" | "=" | "<-" | "<:" | "<%" | ">:" | "#" | "@" | "\u21d2" | "\u2190" + } + def Keywords = rule { + AlphabetKeywords ~ !Letter | SymbolicKeywords ~ !OperatorChar + + } + } +} diff --git a/scalatex/scalaParser/src/main/scala/scalaParser/syntax/Literals.scala b/scalatex/scalaParser/src/main/scala/scalaParser/syntax/Literals.scala new file mode 100644 index 0000000..9fd9d5b --- /dev/null +++ b/scalatex/scalaParser/src/main/scala/scalaParser/syntax/Literals.scala @@ -0,0 +1,58 @@ +package scalaParser +package syntax +import acyclic.file +import org.parboiled2._ + +trait Literals { self: Parser with Basic with Identifiers => + object Literals{ + import Basic._ + def FloatingPointLiteral = rule { + + "." ~ oneOrMore(Digit) ~ optional(ExponentPart) ~ optional(FloatType) | + oneOrMore(Digit) ~ ( + "." ~ oneOrMore(Digit) ~ optional(ExponentPart) ~ optional(FloatType) | + ExponentPart ~ optional(FloatType) | + optional(ExponentPart) ~ FloatType + ) + } + + def IntegerLiteral = rule { (DecimalNumeral | HexNumeral) ~ optional(anyOf("Ll")) } + + def BooleanLiteral = rule { Key.W("true") | Key.W("false") } + + def MultilineComment: Rule0 = rule { "/*" ~ zeroOrMore(MultilineComment | !"*/" ~ ANY) ~ "*/" } + def Comment: Rule0 = rule { + MultilineComment | + "//" ~ zeroOrMore(!Basic.Newline ~ ANY) ~ &(Basic.Newline | EOI) + } + + def Literal = rule { + (optional("-") ~ (FloatingPointLiteral | IntegerLiteral)) | + BooleanLiteral | + CharacterLiteral | + StringLiteral | + SymbolLiteral | + (Key.W("null") ~ !(Basic.Letter | Basic.Digit)) + } + + + def EscapedChars = rule { '\\' ~ anyOf("rnt\\\"") } + + // Note that symbols can take on the same values as keywords! + def SymbolLiteral = rule { ''' ~ (Identifiers.PlainId | Identifiers.Keywords) } + + def CharacterLiteral = rule { ''' ~ (UnicodeExcape | EscapedChars | !'\\' ~ CharPredicate.from(isPrintableChar)) ~ ''' } + + def MultiLineChars = rule { zeroOrMore(optional('"') ~ optional('"') ~ noneOf("\"")) } + def StringLiteral = rule { + (optional(Identifiers.Id) ~ "\"\"\"" ~ MultiLineChars ~ ("\"\"\"" ~ zeroOrMore('"'))) | + (optional(Identifiers.Id) ~ '"' ~ zeroOrMore("\\\"" | noneOf("\n\"")) ~ '"') + } + + def isPrintableChar(c: Char): Boolean = { + val block = Character.UnicodeBlock.of(c) + !Character.isISOControl(c) && !Character.isSurrogate(c) && block != null && block != Character.UnicodeBlock.SPECIALS + } + } +} + |