scalatexApi/src/main/scala/scalatex/stages/Parser.scala


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105

package scalatex
package stages

import org.parboiled2._
import torimatomeru.ScalaSyntax
import Util._

/**
 * Parses the input text into a roughly-structured AST. This AST
 * is much simpler than the real Scala AST, but serves us well
 * enough until we stuff the code-strings into the real Scala
 * parser later
 */
object Parser extends (String => Ast.Block){
  def apply(input: String): Ast.Block = {
    new Parser(input).Body.run().get
  }
}
class Parser(input: ParserInput, indent: Int = 0) extends ScalaSyntax(input) {
  val txt = input.sliceString(0, input.length)
  val indentTable = txt.split('\n').map{ s =>
    if (s.trim == "") -1
    else s.takeWhile(_ == ' ').length
  }
  val nextIndentTable = (0 until indentTable.length).map { i =>
    val index = indentTable.indexWhere(_ != -1, i + 1)
    if (index == -1) 100000
    else indentTable(index)
  }
  def cursorNextIndent() = {
    nextIndentTable(txt.take(cursor).count(_ == '\n'))
  }

  def TextNot(chars: String) = rule {
    capture(oneOrMore(noneOf(chars + "\n") | "@@")) ~> {
      x => Ast.Block.Text(x.replace("@@", "@"))
    }
  }
  def Text = TextNot("@")
  def Code = rule {
    "@" ~ capture(Id | BlockExpr2 | ('(' ~ optional(Exprs) ~ ')'))
  }
  def BlankLine = rule{ '\n' ~ zeroOrMore(' ') ~ &('\n') }
  def Indent = rule{ '\n' ~ indent.times(' ') ~ zeroOrMore(' ') }
  def LoneScalaChain: Rule2[Ast.Block.Text, Ast.Chain] = rule {
    (capture(Indent) ~> (Ast.Block.Text(_))) ~
    ScalaChain ~
    zeroOrMore(BlankLine) ~
    test(cursorNextIndent() > indent) ~
    runSubParser {
      new Parser(_, cursorNextIndent()).Body
    } ~> { (chain: Ast.Chain, body: Ast.Block) =>
      chain.copy(parts = chain.parts :+ body)
    }
  }

  def ScalaChain = rule {
    Code ~ zeroOrMore(Extension) ~> {Ast.Chain(_, _)}
  }
  def Extension: Rule1[Ast.Chain.Sub] = rule {
    ('.' ~ capture(Id) ~> (Ast.Chain.Prop(_))) |
    (capture(TypeArgs2) ~> (Ast.Chain.TypeArgs(_))) |
    (capture(ArgumentExprs2) ~> (Ast.Chain.Args(_))) |
    TBlock
  }
  def Ws = Whitespace
  // clones of the version in ScalaSyntax, but without tailing whitespace or newlines
  def TypeArgs2 = rule { '[' ~ Ws ~ Types ~ ']' }
  def ArgumentExprs2 = rule {
    '(' ~ Ws ~ (optional(Exprs ~ ',' ~ Ws) ~ PostfixExpr ~ ':' ~ Ws ~ '_' ~ Ws ~ '*' ~ Ws | optional(Exprs)) ~ ')'
  }
  def BlockExpr2: Rule0 = rule { '{' ~ Ws ~ (CaseClauses | Block) ~ '}' }
  def TBlock = rule{ '{' ~ Body ~ '}' }

  def Body = rule{
    oneOrMore(
      LoneScalaChain ~> (Seq(_, _)) |
      TextNot("@}") ~> (Seq(_)) |
      (capture(Indent) ~> (x => Seq(Ast.Block.Text(x)))) |
      (capture(BlankLine) ~> (x => Seq(Ast.Block.Text(x)))) |
      ScalaChain ~> (Seq(_))
    ) ~> {x =>
      Ast.Block(x.flatten)
    }
  }
}

trait Ast{
  def offset: Int
}
object Ast{
  case class Block(parts: Seq[Block.Sub], offset: Int = 0) extends Chain.Sub
  object Block{
    trait Sub extends Ast
    case class Text(txt: String, offset: Int = 0) extends Block.Sub
  }

  case class Chain(lhs: String, parts: Seq[Chain.Sub], offset: Int = 0) extends Block.Sub
  object Chain{
    trait Sub extends Ast
    case class Prop(str: String, offset: Int = 0) extends Sub
    case class TypeArgs(str: String, offset: Int = 0) extends Sub
    case class Args(str: String, offset: Int = 0) extends Sub
  }
}