summaryrefslogtreecommitdiff
path: root/src/library/scala/xml/dtd/ContentModelParser.scala
blob: d9912ee0f86e6ef9aa61d734ac21e11b970a6bd4 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/*                     __                                               *\
**     ________ ___   / /  ___     Scala API                            **
**    / __/ __// _ | / /  / _ |    (c) 2002-2009, LAMP/EPFL             **
**  __\ \/ /__/ __ |/ /__/ __ |    http://www.scala-lang.org/           **
** /____/\___/_/ |_/____/_/ | |                                         **
**                          |/                                          **
\*                                                                      */

// $Id$


package scala.xml.dtd


/** Parser for regexps (content models in DTD element declarations) */

object ContentModelParser extends Scanner { // a bit too permissive concerning #PCDATA
  import ContentModel._

  /** parses the argument to a regexp */
  def parse(s: String): ContentModel = { initScanner(s); contentspec }

  def accept(tok: Int) = {
    if (token != tok) {
      if ((tok == STAR) && (token == END))                  // common mistake
        error("in DTDs, \n"+
              "mixed content models must be like (#PCDATA|Name|Name|...)*");
      else
        error("expected "+token2string(tok)+
              ", got unexpected token:"+token2string(token));
    }
    nextToken
  }

  // s [ '+' | '*' | '?' ]
  def maybeSuffix(s: RegExp) = token match {
    case STAR => nextToken; Star(s)
    case PLUS => nextToken; Sequ(s, Star(s))
    case OPT  => nextToken; Alt(Eps, s)
    case _    => s
  }

  // contentspec ::= EMPTY | ANY | (#PCDATA) | "(#PCDATA|"regexp)

  def contentspec: ContentModel = token match {

    case NAME => value match {
      case "ANY"   => ANY
      case "EMPTY" => EMPTY
      case _       => error("expected ANY, EMPTY or '(' instead of " + value );
    }
    case LPAREN =>

      nextToken;
      sOpt;
      if (token != TOKEN_PCDATA)
        ELEMENTS(regexp);
      else {
        nextToken;
        token match {
        case RPAREN =>
          PCDATA
        case CHOICE =>
          val res = MIXED(choiceRest(Eps));
          sOpt;
          accept( RPAREN );
          accept( STAR );
          res
        case _ =>
          error("unexpected token:" + token2string(token) );
        }
      }

    case _ =>
      error("unexpected token:" + token2string(token) );
    }
  //                                  sopt ::= S?
  def sOpt = if( token == S ) nextToken;

  //                      (' S? mixed ::= '#PCDATA' S? ')'
  //                                    | '#PCDATA' (S? '|' S? atom)* S? ')*'
  /*
  def mixed = {
    accept( TOKEN_PCDATA );
    sOpt;
    if( token == RPAREN )
      PCDATA_
    else {
      val t = choiceRest( PCDATA_ );
      if( !isMixed( t ) )
        error("mixed content models must be like (#PCDATA.|.|.|.)*");
      accept( RPAREN );
      // lax: (workaround for buggy Java XML parser in JDK1.4.2)
      if( token == STAR ) accept( STAR );
      // strict:
      // accept( STAR );
      Star( t )
    }
  }
*/
  //       '(' S? regexp ::= cp S? [seqRest|choiceRest] ')' [ '+' | '*' | '?' ]
  def regexp: RegExp = {
    val p = particle;
    sOpt;
    maybeSuffix( token match {
      case RPAREN  => nextToken; p
      case CHOICE  => val q = choiceRest( p );accept( RPAREN ); q
      case COMMA   => val q = seqRest( p );   accept( RPAREN ); q
    })
  }

  //                                             seqRest ::= (',' S? cp S?)+
  def seqRest(p: RegExp) = {
    var k = List(p);
    while( token == COMMA ) {
      nextToken;
      sOpt;
      k = particle::k;
      sOpt;
    }
    Sequ( k.reverse:_* )
  }

  //                                          choiceRest ::= ('|' S? cp S?)+
  def choiceRest( p:RegExp ) = {
    var k = List( p );
    while( token == CHOICE ) {
      nextToken;
      sOpt;
      k = particle::k;
      sOpt;
    }
    Alt( k.reverse:_* )
  }

  //                                  particle ::=  '(' S? regexp
  //                                             |  name [ '+' | '*' | '?' ]
  def particle = token match {
    case LPAREN => nextToken; sOpt; regexp;
    case NAME   => val a = Letter(ElemName(value)); nextToken; maybeSuffix(a)
    case _      => error("expected '(' or Name, got:"+token2string(token));
  }

  //                                     atom ::= name
  def atom = token match {
    case NAME   => val a = Letter(ElemName(value)); nextToken; a
    case _      => error("expected Name, got:"+token2string(token));
  }
}