summaryrefslogtreecommitdiff
path: root/src/library/scala/util/automata/BaseBerrySethi.scala
blob: d8d260c478ee8b706fc3e39924b5d223b4e39d12 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
/*                     __                                               *\
**     ________ ___   / /  ___     Scala API                            **
**    / __/ __// _ | / /  / _ |    (c) 2003-2011, LAMP/EPFL             **
**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
** /____/\___/_/ |_/____/_/ | |                                         **
**                          |/                                          **
\*                                                                      */

package scala.util.automata

import scala.util.regexp.{ Base }
import scala.collection.{ mutable, immutable }

// todo: replace global variable pos with acc

/** This class turns a regular expression over `A` into a
  * [[scala.util.automata.NondetWordAutom]] over `A` using the celebrated
  * position automata construction (also called ''Berry-Sethi'' or ''Glushkov'').
  */
@deprecated("This class will be removed", "2.10.0")
abstract class BaseBerrySethi {
  val lang: Base
  import lang.{ Alt, Eps, Meta, RegExp, Sequ, Star }

  protected var pos = 0

  // results which hold all info for the NondetWordAutomaton
  protected var follow: mutable.HashMap[Int, Set[Int]] = _

  protected var finalTag: Int = _

  protected var finals: immutable.Map[Int, Int] = _     // final states

  // constants --------------------------

  final val emptySet: Set[Int] = Set()

  private def doComp(r: RegExp, compFunction: RegExp => Set[Int]) = r match {
    case x: Alt   => (x.rs map compFirst).foldLeft(emptySet)(_ ++ _)
    case Eps      => emptySet
    case x: Meta  => compFunction(x.r)
    case x: Sequ  =>
      val (l1, l2) = x.rs span (_.isNullable)
      ((l1 ++ (l2 take 1)) map compFunction).foldLeft(emptySet)(_ ++ _)
    case Star(t)  => compFunction(t)
    case _        => throw new IllegalArgumentException("unexpected pattern " + r.getClass)
  }

  /** Computes `first(r)` for the word regexp `r`. */
  protected def compFirst(r: RegExp): Set[Int] = doComp(r, compFirst)

  /** Computes `last(r)` for the regexp `r`. */
  protected def compLast(r: RegExp): Set[Int] = doComp(r, compLast)

  /** Starts from the right-to-left
   *  precondition: pos is final
   *               pats are successor patterns of a Sequence node
   */
  protected def compFollow(rs: Seq[RegExp]): Set[Int] = {
    follow(0) =
      if (rs.isEmpty) emptySet
      else rs.foldRight(Set(pos))((p, fol) => {
        val first = compFollow1(fol, p)

        if (p.isNullable) fol ++ first
        else first
      })

    follow(0)
  }

  /** Returns the first set of an expression, setting the follow set along the way.
   */
  protected def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match {
    case x: Alt     => Set((x.rs reverseMap (compFollow1(fol1, _))).flatten: _*)
    case x: Meta    => compFollow1(fol1, x.r)
    case x: Star    => compFollow1(fol1 ++ compFirst(x.r), x.r)
    case x: Sequ    =>
      x.rs.foldRight(fol1) { (p, fol) =>
        val first = compFollow1(fol, p)

        if (p.isNullable) fol ++ first
        else first
      }
    case _          => throw new IllegalArgumentException("unexpected pattern: " + r.getClass)
  }

  /** Returns the "Sethi-length" of a pattern, creating the set of position along the way.
   */
  protected def traverse(r: RegExp): Unit = r match {
    // (is tree automaton stuff, more than Berry-Sethi)
    case x: Alt  => x.rs foreach traverse
    case x: Sequ => x.rs foreach traverse
    case x: Meta => traverse(x.r)
    case Star(t) => traverse(t)
    case _       => throw new IllegalArgumentException("unexp pattern " + r.getClass)
  }
}