summaryrefslogtreecommitdiff
path: root/src/library/scala/util/automata/BaseBerrySethi.scala
blob: 522e3dc903452a3b55b474bfa0c4628a245f9c93 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
/*                     __                                               *\
**     ________ ___   / /  ___     Scala API                            **
**    / __/ __// _ | / /  / _ |    (c) 2003-2006, LAMP/EPFL             **
**  __\ \/ /__/ __ |/ /__/ __ |                                         **
** /____/\___/_/ |_/____/_/ | |                                         **
**                          |/                                          **
\*                                                                      */

// $Id$


package scala.util.automata


import scala.util.regexp.Base

import scala.collection.mutable
import scala.collection.immutable
import compat.Platform

/** this turns a regexp over A into a NondetWorkAutom over A using the
 *  celebrated position automata construction (also called Berry-Sethi or
 *  Glushkov)
 */
abstract class BaseBerrySethi {

  val lang: Base
  import lang.{Alt,Eps,Meta,RegExp,Sequ,Star}

  protected var pos = 0

  protected var globalFirst: immutable.Set[Int] = _

  // results which hold all info for the NondetWordAutomaton
  protected var follow: mutable.HashMap[Int, immutable.Set[Int]] = _

  protected var finalTag: Int = _

  protected var finals: immutable.TreeMap[int,int] = _      // final states

  // constants --------------------------

  final val emptySet:immutable.Set[Int] = new immutable.TreeSet[Int]()

  /** computes first( r ) for the word regexp r */
  protected def compFirst(r: RegExp): immutable.Set[Int] = r match {
    case x:Alt =>
      var tmp = emptySet
      val it = x.rs.elements                       // union
      while (it.hasNext) { tmp = tmp incl compFirst(it.next) }
      tmp
    case Eps => emptySet
    //case x:Letter => emptySet + posMap(x);  // singleton set
    case x:Meta => compFirst(x.r)
    case x:Sequ =>
      var tmp = emptySet;
      val it = x.rs.elements;                       // union
      while (it.hasNext) {
	val z = it.next
	tmp = tmp incl compFirst(z)
	if (!z.isNullable)
          return tmp
      }
      tmp
    case Star(t) => compFirst(t)
    case _ =>
      error("unexpected pattern " + Platform.getClass(r))
  }

  /** computes last( r ) for the regexp r */
  protected def compLast(r: RegExp): immutable.Set[Int] = r match {
    case x:Alt =>
      var tmp = emptySet
      val it = x.rs.elements                      // union
      while (it.hasNext) { tmp = tmp incl compFirst(it.next) }
      tmp
    case Eps => emptySet
    //case x:Letter => emptySet + posMap(x) // singleton set
    case x:Meta => compLast(x.r)
    case x:Sequ =>
      var tmp = emptySet
      val it = x.rs.elements.toList.reverse.elements       // union
      while (it.hasNext) {
        val z = it.next
        tmp = tmp incl compLast(z)
        if (!z.isNullable)
          return tmp
      }
      tmp
    case Star(t)  => compLast(t)
    case _        =>
      error("unexpected pattern " + Platform.getClass(r))
  }

  /** Starts from the right-to-left
   *  precondition: pos is final
   *               pats are successor patterns of a Sequence node
   *
   *  @param r ...
   *  @return  ...
   */
  protected def compFollow(r: Seq[RegExp]): immutable.Set[Int] = {
    //Console.println("compFollow( "+r.toList)
    var first = emptySet
    var fol = emptySet
    if (r.length > 0) {//non-empty expr

      val it = r.elements.toList.reverse.elements

      fol = fol + pos // don't modify pos !
      while (it.hasNext) {
        val p = it.next
        //Console.println("  p now = "+p)
        first = compFollow1(fol, p)
        //Console.println("  first = "+first)
        //Console.println("  fol before = "+fol)
        fol =
          if (p.isNullable) fol incl first
          else first
        //Console.println("  fol after = "+fol)
      }
    }
    this.follow.update(0, fol /*first*/)
    //Console.println("follow(0) = "+fol)
    fol
  }

  /** returns the first set of an expression, setting the follow set along
   *  the way.
   *
   *  @param fol1 ...
   *  @param r    ...
   *  @return     ...
   */
  protected def compFollow1(fol1: immutable.Set[Int], r: RegExp): immutable.Set[Int] = {
    var fol = fol1
    //System.out.println("compFollow1("+fol+","+r+")")
    r match {

      case x:Alt =>
        var first = emptySet
        val it = x.rs.elements.toList.reverse.elements
        while (it.hasNext)
          first = first incl compFollow1(fol, it.next);
        first

      /*
      case x:Letter =>
        val i = posMap( x );
        this.follow.update( i, fol );
        emptySet + i;
      */
      case x:Meta =>
        compFollow1(fol1, x.r)

      case x:Star =>
        fol = fol incl compFirst(x.r)
        compFollow1(fol, x.r)

      case x:Sequ =>
        var first = emptySet
        val it = x.rs.elements.toList.reverse.elements
        while (it.hasNext) {
          val p = it.next
          first = compFollow1(fol, p)
          fol =
            if (p.isNullable) fol incl first
            else first
        }
        first

      case _ =>
        error("unexpected pattern: " + Platform.getClass(r))
    }
  }

  /** returns "Sethi-length" of a pattern, creating the set of position
   *  along the way.
   *
   *  @param r ...
   */
  // todo: replace global variable pos with acc
  protected def traverse(r: RegExp): Unit = r match {
    // (is tree automaton stuff, more than Berry-Sethi)
    case x:Alt =>
      val it = x.rs.elements
      while (it.hasNext) traverse(it.next)
    case x:Sequ =>
      val it = x.rs.elements
      while (it.hasNext) traverse(it.next)
    case x:Meta =>
      traverse(x.r)
    case Star(t) =>
      traverse(t)
    case _ =>
      error("unexp pattern " + Platform.getClass(r))
  }

}