1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
|
/* __ *\
** ________ ___ / / ___ Scala API **
** / __/ __// _ | / / / _ | (c) 2003-2011, LAMP/EPFL **
** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
** /____/\___/_/ |_/____/_/ | | **
** |/ **
\* */
package scala.util.automata
import scala.collection.{ immutable, mutable }
import scala.util.regexp.WordExp
/** This class turns a regular expression into a [[scala.util.automata.NondetWordAutom]]
* celebrated position automata construction (also called ''Berry-Sethi'' or ''Glushkov'').
*
* @author Burak Emir
* @version 1.0
*/
@deprecated("This class will be removed", "2.10.0")
abstract class WordBerrySethi extends BaseBerrySethi {
override val lang: WordExp
import lang.{ Alt, Eps, Letter, Meta, RegExp, Sequ, Star, _labelT }
protected var labels: mutable.HashSet[_labelT] = _
// don't let this fool you, only labelAt is a real, surjective mapping
protected var labelAt: Map[Int, _labelT] = _ // new alphabet "gamma"
protected var deltaq: Array[mutable.HashMap[_labelT, List[Int]]] = _ // delta
protected var defaultq: Array[List[Int]] = _ // default transitions
protected var initials: Set[Int] = _
/** Computes `first(r)` where the word regexp `r`.
*
* @param r the regular expression
* @return the computed set `first(r)`
*/
protected override def compFirst(r: RegExp): Set[Int] = r match {
case x: Letter => Set(x.pos)
case _ => super.compFirst(r)
}
/** Computes `last(r)` where the word regexp `r`.
*
* @param r the regular expression
* @return the computed set `last(r)`
*/
protected override def compLast(r: RegExp): Set[Int] = r match {
case x: Letter => Set(x.pos)
case _ => super.compLast(r)
}
/** Returns the first set of an expression, setting the follow set along
* the way.
*
* @param r the regular expression
* @return the computed set
*/
protected override def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match {
case x: Letter => follow(x.pos) = fol1 ; Set(x.pos)
case Eps => emptySet
case _ => super.compFollow1(fol1, r)
}
/** Returns "Sethi-length" of a pattern, creating the set of position
* along the way
*/
/** Called at the leaves of the regexp */
protected def seenLabel(r: RegExp, i: Int, label: _labelT) {
labelAt = labelAt.updated(i, label)
this.labels += label
}
// overridden in BindingBerrySethi
protected def seenLabel(r: RegExp, label: _labelT): Int = {
pos += 1
seenLabel(r, pos, label)
pos
}
// todo: replace global variable pos with acc
override def traverse(r: RegExp): Unit = r match {
case a @ Letter(label) => a.pos = seenLabel(r, label)
case Eps => // ignore
case _ => super.traverse(r)
}
protected def makeTransition(src: Int, dest: Int, label: _labelT) {
val q = deltaq(src)
q.update(label, dest :: q.getOrElse(label, Nil))
}
protected def initialize(subexpr: Seq[RegExp]): Unit = {
this.labelAt = immutable.Map()
this.follow = mutable.HashMap()
this.labels = mutable.HashSet()
this.pos = 0
// determine "Sethi-length" of the regexp
subexpr foreach traverse
this.initials = Set(0)
}
protected def initializeAutom() {
finals = immutable.Map.empty[Int, Int] // final states
deltaq = new Array[mutable.HashMap[_labelT, List[Int]]](pos) // delta
defaultq = new Array[List[Int]](pos) // default transitions
for (j <- 0 until pos) {
deltaq(j) = mutable.HashMap[_labelT, List[Int]]()
defaultq(j) = Nil
}
}
protected def collectTransitions(): Unit = // make transitions
for (j <- 0 until pos ; fol = follow(j) ; k <- fol) {
if (pos == k) finals = finals.updated(j, finalTag)
else makeTransition(j, k, labelAt(k))
}
def automatonFrom(pat: RegExp, finalTag: Int): NondetWordAutom[_labelT] = {
this.finalTag = finalTag
pat match {
case x: Sequ =>
// (1,2) compute follow + first
initialize(x.rs)
pos += 1
compFollow(x.rs) // this used to be assigned to var globalFirst and then never used.
// (3) make automaton from follow sets
initializeAutom()
collectTransitions()
if (x.isNullable) // initial state is final
finals = finals.updated(0, finalTag)
val delta1 = immutable.Map(deltaq.zipWithIndex map (_.swap): _*)
val finalsArr = (0 until pos map (k => finals.getOrElse(k, 0))).toArray // 0 == not final
val initialsArr = initials.toArray
val deltaArr: Array[mutable.Map[_labelT, immutable.BitSet]] =
(0 until pos map { x =>
mutable.HashMap(delta1(x).toSeq map { case (k, v) => k -> immutable.BitSet(v: _*) } : _*)
}).toArray
val defaultArr = (0 until pos map (k => immutable.BitSet(defaultq(k): _*))).toArray
new NondetWordAutom[_labelT] {
val nstates = pos
val labels = WordBerrySethi.this.labels.toList
val finals = finalsArr
val delta = deltaArr
val default = defaultArr
}
case z =>
automatonFrom(Sequ(z.asInstanceOf[this.lang._regexpT]), finalTag)
}
}
}
|