summaryrefslogtreecommitdiff
path: root/src/library/scala/xml/dtd/ContentModelParser.scala
blob: ca84bcad703050ea3dfdd470ed7f213f843a6a56 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*                     __                                               *\
**     ________ ___   / /  ___     Scala API                            **
**    / __/ __// _ | / /  / _ |    (c) 2002-2013, LAMP/EPFL             **
**  __\ \/ /__/ __ |/ /__/ __ |    http://www.scala-lang.org/           **
** /____/\___/_/ |_/____/_/ | |                                         **
**                          |/                                          **
\*                                                                      */

package scala.xml
package dtd

/** Parser for regexps (content models in DTD element declarations) */

object ContentModelParser extends Scanner { // a bit too permissive concerning #PCDATA
  import ContentModel._

  /** parses the argument to a regexp */
  def parse(s: String): ContentModel = { initScanner(s); contentspec }

  def accept(tok: Int) = {
    if (token != tok) {
      if ((tok == STAR) && (token == END))                  // common mistake
        scala.sys.error("in DTDs, \n"+
              "mixed content models must be like (#PCDATA|Name|Name|...)*")
      else
        scala.sys.error("expected "+token2string(tok)+
              ", got unexpected token:"+token2string(token))
    }
    nextToken()
  }

  // s [ '+' | '*' | '?' ]
  def maybeSuffix(s: RegExp) = token match {
    case STAR => nextToken(); Star(s)
    case PLUS => nextToken(); Sequ(s, Star(s))
    case OPT  => nextToken(); Alt(Eps, s)
    case _    => s
  }

  // contentspec ::= EMPTY | ANY | (#PCDATA) | "(#PCDATA|"regexp)

  def contentspec: ContentModel = token match {

    case NAME => value match {
      case "ANY"   => ANY
      case "EMPTY" => EMPTY
      case _       => scala.sys.error("expected ANY, EMPTY or '(' instead of " + value )
    }
    case LPAREN =>

      nextToken()
      sOpt()
      if (token != TOKEN_PCDATA)
        ELEMENTS(regexp)
      else {
        nextToken()
        token match {
        case RPAREN =>
          PCDATA
        case CHOICE =>
          val res = MIXED(choiceRest(Eps))
          sOpt()
          accept( RPAREN )
          accept( STAR )
          res
        case _ =>
          scala.sys.error("unexpected token:" + token2string(token) )
        }
      }

    case _ =>
      scala.sys.error("unexpected token:" + token2string(token) )
  }
  //                                  sopt ::= S?
  def sOpt() = if( token == S ) nextToken()

  //                      (' S? mixed ::= '#PCDATA' S? ')'
  //                                    | '#PCDATA' (S? '|' S? atom)* S? ')*'

  //       '(' S? regexp ::= cp S? [seqRest|choiceRest] ')' [ '+' | '*' | '?' ]
  def regexp: RegExp = {
    val p = particle
    sOpt()
    maybeSuffix(token match {
      case RPAREN  => nextToken(); p
      case CHOICE  => val q = choiceRest( p );accept( RPAREN ); q
      case COMMA   => val q = seqRest( p );   accept( RPAREN ); q
    })
  }

  //                                             seqRest ::= (',' S? cp S?)+
  def seqRest(p: RegExp) = {
    var k = List(p)
    while( token == COMMA ) {
      nextToken()
      sOpt()
      k = particle::k
      sOpt()
    }
    Sequ( k.reverse:_* )
  }

  //                                          choiceRest ::= ('|' S? cp S?)+
  def choiceRest( p:RegExp ) = {
    var k = List( p )
    while( token == CHOICE ) {
      nextToken()
      sOpt()
      k = particle::k
      sOpt()
    }
    Alt( k.reverse:_* )
  }

  //                                  particle ::=  '(' S? regexp
  //                                             |  name [ '+' | '*' | '?' ]
  def particle = token match {
    case LPAREN => nextToken(); sOpt(); regexp
    case NAME   => val a = Letter(ElemName(value)); nextToken(); maybeSuffix(a)
    case _      => scala.sys.error("expected '(' or Name, got:"+token2string(token))
  }

  //                                     atom ::= name
  def atom = token match {
    case NAME   => val a = Letter(ElemName(value)); nextToken(); a
    case _      => scala.sys.error("expected Name, got:"+token2string(token))
  }
}