summaryrefslogtreecommitdiff
path: root/src/library/scala/util/automata/WordBerrySethi.scala
blob: 235a74dd7a52060deef243898234337c8a5af742 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
/*                     __                                               *\
**     ________ ___   / /  ___     Scala API                            **
**    / __/ __// _ | / /  / _ |    (c) 2003-2011, LAMP/EPFL             **
**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
** /____/\___/_/ |_/____/_/ | |                                         **
**                          |/                                          **
\*                                                                      */

package scala.util.automata

import scala.collection.{ immutable, mutable }
import scala.util.regexp.WordExp

/** This class turns a regular expression into a [[scala.util.automata.NondetWordAutom]]
  * celebrated position automata construction (also called ''Berry-Sethi'' or ''Glushkov'').
  *
  *  @author Burak Emir
  *  @version 1.0
  */
@deprecated("This class will be removed", "2.10.0")
abstract class WordBerrySethi extends BaseBerrySethi {
  override val lang: WordExp

  import lang.{ Alt, Eps, Letter, Meta, RegExp, Sequ, Star, _labelT }

  protected var labels: mutable.HashSet[_labelT]                   = _
  // don't let this fool you, only labelAt is a real, surjective mapping
  protected var labelAt: Map[Int, _labelT]                         = _ // new alphabet "gamma"
  protected var deltaq: Array[mutable.HashMap[_labelT, List[Int]]] = _ // delta
  protected var defaultq: Array[List[Int]]                         = _ // default transitions
  protected var initials: Set[Int]                                 = _

  /** Computes `first(r)` where the word regexp `r`.
   *
   *  @param r the regular expression
   *  @return  the computed set `first(r)`
   */
  protected override def compFirst(r: RegExp): Set[Int] = r match {
    case x: Letter  => Set(x.pos)
    case _          => super.compFirst(r)
  }

  /** Computes `last(r)` where the word regexp `r`.
   *
   *  @param r the regular expression
   *  @return  the computed set `last(r)`
   */
  protected override def compLast(r: RegExp): Set[Int] = r match {
    case x: Letter  => Set(x.pos)
    case _          => super.compLast(r)
  }

  /** Returns the first set of an expression, setting the follow set along
   *  the way.
   *
   *  @param r    the regular expression
   *  @return     the computed set
   */
  protected override def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match {
      case x: Letter  => follow(x.pos) = fol1 ; Set(x.pos)
      case Eps        => emptySet
      case _          => super.compFollow1(fol1, r)
    }

  /** Returns "Sethi-length" of a pattern, creating the set of position
   *  along the way
   */

  /** Called at the leaves of the regexp */
  protected def seenLabel(r: RegExp, i: Int, label: _labelT) {
    labelAt = labelAt.updated(i, label)
    this.labels += label
  }

  // overridden in BindingBerrySethi
  protected def seenLabel(r: RegExp, label: _labelT): Int = {
    pos += 1
    seenLabel(r, pos, label)
    pos
  }

  // todo: replace global variable pos with acc
  override def traverse(r: RegExp): Unit = r match {
    case a @ Letter(label) => a.pos = seenLabel(r, label)
    case Eps               => // ignore
    case _                 => super.traverse(r)
  }


  protected def makeTransition(src: Int, dest: Int, label: _labelT) {
    val q = deltaq(src)
    q.update(label, dest :: q.getOrElse(label, Nil))
  }

  protected def initialize(subexpr: Seq[RegExp]): Unit = {
    this.labelAt = immutable.Map()
    this.follow = mutable.HashMap()
    this.labels = mutable.HashSet()
    this.pos = 0

    // determine "Sethi-length" of the regexp
    subexpr foreach traverse

    this.initials = Set(0)
  }

  protected def initializeAutom() {
    finals   = immutable.Map.empty[Int, Int]                    // final states
    deltaq   = new Array[mutable.HashMap[_labelT, List[Int]]](pos) // delta
    defaultq = new Array[List[Int]](pos)                        // default transitions

    for (j <- 0 until pos) {
      deltaq(j) = mutable.HashMap[_labelT, List[Int]]()
      defaultq(j) = Nil
    }
  }

  protected def collectTransitions(): Unit =                // make transitions
    for (j <- 0 until pos ; fol = follow(j) ; k <- fol) {
      if (pos == k) finals = finals.updated(j, finalTag)
      else makeTransition(j, k, labelAt(k))
    }

  def automatonFrom(pat: RegExp, finalTag: Int): NondetWordAutom[_labelT] = {
    this.finalTag = finalTag

    pat match {
      case x: Sequ =>
        // (1,2) compute follow + first
        initialize(x.rs)
        pos += 1
        compFollow(x.rs)  // this used to be assigned to var globalFirst and then never used.

        // (3) make automaton from follow sets
        initializeAutom()
        collectTransitions()

        if (x.isNullable) // initial state is final
          finals = finals.updated(0, finalTag)

        val delta1      = immutable.Map(deltaq.zipWithIndex map (_.swap): _*)
        val finalsArr   = (0 until pos map (k => finals.getOrElse(k, 0))).toArray  // 0 == not final
        val initialsArr = initials.toArray

        val deltaArr: Array[mutable.Map[_labelT, immutable.BitSet]] =
          (0 until pos map { x =>
            mutable.HashMap(delta1(x).toSeq map { case (k, v) => k -> immutable.BitSet(v: _*) } : _*)
          }).toArray

        val defaultArr  = (0 until pos map (k => immutable.BitSet(defaultq(k): _*))).toArray

        new NondetWordAutom[_labelT] {
          val nstates  = pos
          val labels   = WordBerrySethi.this.labels.toList
          val initials = initialsArr
          val finals   = finalsArr
          val delta    = deltaArr
          val default  = defaultArr
        }
      case z =>
       automatonFrom(Sequ(z.asInstanceOf[this.lang._regexpT]), finalTag)
    }
  }
}