summaryrefslogtreecommitdiff
path: root/src/library/scala/xml/dtd/ContentModel.scala
blob: abc71f55bd7ed6a5062430e9e26b9d752954b723 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
/*                     __                                               *\
**     ________ ___   / /  ___     Scala API                            **
**    / __/ __// _ | / /  / _ |    (c) 2002-2013, LAMP/EPFL             **
**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
** /____/\___/_/ |_/____/_/ | |                                         **
**                          |/                                          **
\*                                                                      */



package scala.xml
package dtd

import scala.util.regexp.WordExp
import scala.util.automata._
import scala.xml.Utility.sbToString
import PartialFunction._

object ContentModel extends WordExp {
  type _labelT = ElemName
  type _regexpT = RegExp

  object Translator extends WordBerrySethi {
    override val lang: ContentModel.this.type = ContentModel.this
  }

  case class ElemName(name: String) extends Label {
    override def toString() = """ElemName("%s")""" format name
  }

  def isMixed(cm: ContentModel) = cond(cm) { case _: MIXED => true }
  def containsText(cm: ContentModel) = (cm == PCDATA) || isMixed(cm)
  def parse(s: String): ContentModel = ContentModelParser.parse(s)

  def getLabels(r: RegExp): Set[String] = {
    def traverse(r: RegExp): Set[String] = r match { // !!! check for match translation problem
      case Letter(ElemName(name)) => Set(name)
      case Star(  x @ _  ) => traverse( x ) // bug if x@_*
      case Sequ( xs @ _* ) => Set(xs flatMap traverse: _*)
      case Alt(  xs @ _* ) => Set(xs flatMap traverse: _*)
    }

    traverse(r)
  }

  def buildString(r: RegExp): String = sbToString(buildString(r, _))

  /* precond: rs.length >= 1 */
  private def buildString(rs: Seq[RegExp], sb: StringBuilder, sep: Char) {
    buildString(rs.head, sb)
    for (z <- rs.tail) {
      sb append sep
      buildString(z, sb)
    }
  }

  def buildString(c: ContentModel, sb: StringBuilder): StringBuilder = c match {
    case ANY                    => sb append "ANY"
    case EMPTY                  => sb append "EMPTY"
    case PCDATA                 => sb append "(#PCDATA)"
    case ELEMENTS(_) | MIXED(_) => c buildString sb
  }

  def buildString(r: RegExp, sb: StringBuilder): StringBuilder =
    r match {  // !!! check for match translation problem
      case Eps =>
        sb
      case Sequ(rs @ _*) =>
        sb.append( '(' ); buildString(rs, sb, ','); sb.append( ')' )
      case Alt(rs @ _*) =>
        sb.append( '(' ); buildString(rs, sb, '|');  sb.append( ')' )
      case Star(r: RegExp) =>
        sb.append( '(' ); buildString(r, sb); sb.append( ")*" )
      case Letter(ElemName(name)) =>
        sb.append(name)
    }

}

sealed abstract class ContentModel
{
  override def toString(): String = sbToString(buildString)
  def buildString(sb: StringBuilder): StringBuilder
}

case object PCDATA extends ContentModel {
  override def buildString(sb: StringBuilder): StringBuilder = sb.append("(#PCDATA)")
}
case object EMPTY extends ContentModel {
  override def buildString(sb: StringBuilder): StringBuilder = sb.append("EMPTY")
}
case object ANY extends ContentModel {
  override def buildString(sb: StringBuilder): StringBuilder = sb.append("ANY")
}
sealed abstract class DFAContentModel extends ContentModel {
  import ContentModel.{ ElemName, Translator }
  def r: ContentModel.RegExp

  lazy val dfa: DetWordAutom[ElemName] = {
    val nfa = Translator.automatonFrom(r, 1)
    new SubsetConstruction(nfa).determinize
  }
}

case class MIXED(r: ContentModel.RegExp) extends DFAContentModel {
  import ContentModel.{ Alt, RegExp }

  override def buildString(sb: StringBuilder): StringBuilder = {
    val newAlt = r match { case Alt(rs @ _*) => Alt(rs drop 1: _*) }

    sb append "(#PCDATA|"
    ContentModel.buildString(newAlt: RegExp, sb)
    sb append ")*"
  }
}

case class ELEMENTS(r: ContentModel.RegExp) extends DFAContentModel {
  override def buildString(sb: StringBuilder): StringBuilder =
    ContentModel.buildString(r, sb)
}