Merge pull request #3937 from som-snytt/issue/8787-doc-backport

[backport] SI-8787 Backport Regex doc
author: Grzegorz Kossakowski <grzegorz.kossakowski@gmail.com> 2014-09-02 11:27:00 +0200
committer: Grzegorz Kossakowski <grzegorz.kossakowski@gmail.com> 2014-09-02 11:27:00 +0200
commit: d2a5555585a857138844e71943dcf86b89e79b81 (patch)
tree: 172c3d37c3b05277e52e5f7ab3890e4fc9171aa7
parent: 1d3613b5e2e82479464d90cc3401ccebb09bee89 (diff)
parent: 887622759d302dbe8cb40cca7debe6d2bfffcaee (diff)
download: scala-d2a5555585a857138844e71943dcf86b89e79b81.tar.gz
scala-d2a5555585a857138844e71943dcf86b89e79b81.tar.bz2
scala-d2a5555585a857138844e71943dcf86b89e79b81.zip
1 files changed, 291 insertions, 186 deletions
diff --git a/src/library/scala/util/matching/Regex.scala b/src/library/scala/util/matching/Regex.scala
index 716d746552..8d82e08d7f 100644
--- a/src/library/scala/util/matching/Regex.scala
+++ b/src/library/scala/util/matching/Regex.scala
@@ -1,12 +1,11 @@
 /*                     __                                               *\
 **     ________ ___   / /  ___     Scala API                            **
-**    / __/ __// _ | / /  / _ |    (c) 2007-2013, LAMP/EPFL             **
+**    / __/ __// _ | / /  / _ |    (c) 2007-2014, LAMP/EPFL             **
 **  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
 ** /____/\___/_/ |_/____/_/ | |                                         **
 **                          |/                                          **
 \*                                                                      */
 
-
 /**
  * This package is concerned with regular expression (regex) matching against strings,
  * with the main goal of pulling out information from those matches, or replacing
@@ -33,97 +32,132 @@ package scala.util.matching
 import scala.collection.AbstractIterator
 import java.util.regex.{ Pattern, Matcher }
 
-/** This class provides methods for creating and using regular expressions.
- *  It is based on the regular expressions of the JDK since 1.4.
+/** A regular expression is used to determine whether a string matches a pattern
+ *  and, if it does, to extract or transform the parts that match.
  *
- *  Its main goal is to extract strings that match a pattern, or the subgroups
- *  that make it up. For that reason, it is usually used with for comprehensions
- *  and matching (see methods for examples).
+ *  This class delegates to the [[java.util.regex]] package of the Java Platform.
+ *  See the documentation for [[java.util.regex.Pattern]] for details about
+ *  the regular expression syntax for pattern strings.
  *
- *  A Regex is created from a [[java.lang.String]] representation of the
- *  regular expression pattern^1^. That pattern is compiled
- *  during construction, so frequently used patterns should be declared outside
- *  loops if performance is of concern. Possibly, they might be declared on a
- *  companion object, so that they need only to be initialized once.
+ *  An instance of `Regex` represents a compiled regular expression pattern.
+ *  Since compilation is expensive, frequently used `Regex`es should be constructed
+ *  once, outside of loops and perhaps in a companion object.
  *
- *  The canonical way of creating regex patterns is by using the method `r`, provided
- *  on [[java.lang.String]] through an implicit conversion into
- *  [[scala.collection.immutable.WrappedString]]. Using triple quotes to write these
- *  strings avoids having to quote the backslash character (`\`).
+ *  The canonical way to create a `Regex` is by using the method `r`, provided
+ *  implicitly for strings:
  *
- *  Using the constructor directly, on the other hand, makes
- *  it possible to declare names for subgroups in the pattern.
+ *  {{{
+ *  val date = """(\d\d\d\d)-(\d\d)-(\d\d)""".r
+ *  }}}
  *
- *  For example, both declarations below generate the same regex, but the second
- *  one associate names with the subgroups.
+ *  Since escapes are not processed in multi-line string literals, using triple quotes
+ *  avoids having to escape the backslash character, so that `"\\d"` can be written `"""\d"""`.
+ *
+ *  To extract the capturing groups when a `Regex` is matched, use it as
+ *  an extractor in a pattern match:
  *
  *  {{{
- *  val dateP1 = """(\d\d\d\d)-(\d\d)-(\d\d)""".r
- *  val dateP2 = new scala.util.matching.Regex("""(\d\d\d\d)-(\d\d)-(\d\d)""", "year", "month", "day")
+ *  "2004-01-20" match {
+ *    case date(year, month, day) => s"$year was a good year for PLs."
+ *  }
  *  }}}
  *
- *  There are two ways of using a `Regex` to find a pattern: calling methods on
- *  Regex, such as `findFirstIn` or `findAllIn`, or using it as an extractor in a
- *  pattern match.
+ *  To check only whether the `Regex` matches, ignoring any groups,
+ *  use a sequence wildcard:
  *
- *  Note, however, that when Regex is used as an extractor in a pattern match, it
- *  only succeeds if the whole text can be matched. For this reason, one usually
- *  calls a method to find the matching substrings, and then use it as an extractor
- *  to break match into subgroups.
+ *  {{{
+ *  "2004-01-20" match {
+ *    case date(_*) => "It's a date!"
+ *  }
+ *  }}}
  *
- *  As an example, the above patterns can be used like this:
+ *  That works because a `Regex` extractor produces a sequence of strings.
+ *  Extracting only the year from a date could also be expressed with
+ *  a sequence wildcard:
  *
  *  {{{
- *  val dateP1(year, month, day) = "2011-07-15"
+ *  "2004-01-20" match {
+ *    case date(year, _*) => s"$year was a good year for PLs."
+ *  }
+ *  }}}
  *
- *  // val dateP1(year, month, day) = "Date 2011-07-15" // throws an exception at runtime
+ *  In a pattern match, `Regex` normally matches the entire input.
+ *  However, an unanchored `Regex` finds the pattern anywhere
+ *  in the input.
  *
- *  val copyright: String = dateP1 findFirstIn "Date of this document: 2011-07-15" match {
- *    case Some(dateP1(year, month, day)) => "Copyright "+year
- *    case None                           => "No copyright"
+ *  {{{
+ *  val embeddedDate = date.unanchored
+ *  "Date: 2004-01-20 17:25:18 GMT (10 years, 28 weeks, 5 days, 17 hours and 51 minutes ago)" match {
+ *    case embeddedDate("2004", "01", "20") => "A Scala is born."
  *  }
+ *  }}}
  *
- *  val copyright: Option[String] = for {
- *    dateP1(year, month, day) <- dateP1 findFirstIn "Last modified 2011-07-15"
- *  } yield year
-
- *  def getYears(text: String): Iterator[String] = for (dateP1(year, _, _) <- dateP1 findAllIn text) yield year
- *  def getFirstDay(text: String): Option[String] = for (m <- dateP2 findFirstMatchIn text) yield m group "day"
+ *  To find or replace matches of the pattern, use the various find and replace methods.
+ *  There is a flavor of each method that produces matched strings and
+ *  another that produces `Match` objects.
+ *
+ *  For example, pattern matching with an unanchored `Regex`, as in the previous example,
+ *  is the same as using `findFirstMatchIn`, except that the findFirst methods return an `Option`,
+ *  or `None` for no match:
+ *
+ *  {{{
+ *  val dates = "Important dates in history: 2004-01-20, 1958-09-05, 2010-10-06, 2011-07-15"
+ *  val firstDate = date findFirstIn dates getOrElse "No date found."
+ *  val firstYear = for (m <- date findFirstMatchIn dates) yield m group 1
  *  }}}
  *
- *  Regex does not provide a method that returns a [[scala.Boolean]]. One can
- *  use [[java.lang.String]] `matches` method, or, if `Regex` is preferred,
- *  either ignore the return value or test the `Option` for emptyness. For example:
+ *  To find all matches:
  *
  *  {{{
- *  def hasDate(text: String): Boolean = (dateP1 findFirstIn text).nonEmpty
- *  def printLinesWithDates(lines: Traversable[String]) {
- *    lines foreach { line =>
- *      dateP1 findFirstIn line foreach { _ => println(line) }
- *    }
- *  }
+ *  val allYears = for (m <- date findAllMatchIn dates) yield m group 1
+ *  }}}
+ *
+ *  But `findAllIn` returns a special iterator of strings that can be queried for the `MatchData`
+ *  of the last match:
+ *
+ *  {{{
+ *  val mi = date findAllIn dates
+ *  val oldies = mi filter (_ => (mi group 1).toInt < 1960) map (s => s"$s: An oldie but goodie.")
+ *  }}}
+ *
+ *  Note that `findAllIn` finds matches that don't overlap. (See [[findAllIn]] for more examples.)
+ *
+ *  {{{
+ *  val num = """(\d+)""".r
+ *  val all = (num findAllIn "123").toList  // List("123"), not List("123", "23", "3")
+ *  }}}
+ *
+ *  Text replacement can be performed unconditionally or as a function of the current match:
+ *
+ *  {{{
+ *  val redacted    = date replaceAllIn (dates, "XXXX-XX-XX")
+ *  val yearsOnly   = date replaceAllIn (dates, m => m group 1)
+ *  val months      = (0 to 11) map { i => val c = Calendar.getInstance; c.set(2014, i, 1); f"$c%tb" }
+ *  val reformatted = date replaceAllIn (dates, _ match { case Groups(y,m,d) => f"${months(m.toInt - 1)} $d, $y" })
+ *  }}}
+ *
+ *  The `Groups` extractor is used to extract groups from a `Match` without reapplying the `Regex`.
+ *  In the expression for `reformatted`, each `date` match is computed once. But it is possible to apply a
+ *  `Regex` to a `Match` resulting from a different pattern:
+ *
+ *  {{{
+ *  val docSpree = """2011(?:-\d{2}){2}""".r
+ *  val docView  = date replaceAllIn (dates, _ match {
+ *    case docSpree() => "Historic doc spree!"
+ *    case _          => "Something else happened"
+ *  })
  *  }}}
  *
- *  There are also methods that can be used to replace the patterns
- *  on a text. The substitutions can be simple replacements, or more
- *  complex functions. For example:
+ *  If group names are supplied to the `Regex` constructor, they can be used this way:
  *
  *  {{{
- *  val months = Map( 1 -> "Jan", 2 -> "Feb", 3 -> "Mar",
- *                    4 -> "Apr", 5 -> "May", 6 -> "Jun",
- *                    7 -> "Jul", 8 -> "Aug", 9 -> "Sep",
- *                    10 -> "Oct", 11 -> "Nov", 12 -> "Dec")
- *
- *  import scala.util.matching.Regex.Match
- *  def reformatDate(text: String) = dateP2 replaceAllIn ( text, (m: Match) =>
- *    "%s %s, %s" format (months(m group "month" toInt), m group "day", m group "year")
- *  )
+ *  val namedDate  = new Regex("""(\d\d\d\d)-(\d\d)-(\d\d)""", "year", "month", "day")
+ *  val namedYears = for (m <- namedDate findAllMatchIn dates) yield m group "year"
  *  }}}
  *
- *  You can use special pattern syntax constructs like `(?idmsux-idmsux)`¹ to switch
- *  various regex compilation options like `CASE_INSENSITIVE` or `UNICODE_CASE`.
+ *  This constructor does not support options as flags, which must be
+ *  supplied as inline flags in the pattern string: `(?idmsux-idmsux)`.
  *
- *  @note ¹ A detailed description is available in [[java.util.regex.Pattern]].
  *  @see [[java.util.regex.Pattern]]
  *
  *  @author  Thibaud Hottelier
@@ -139,9 +173,8 @@ import java.util.regex.{ Pattern, Matcher }
  *  interpreted as a reference to a group in the matched pattern, with numbers
  *  1 through 9 corresponding to the first nine groups, and 0 standing for the
  *  whole match. Any other character is an error. The backslash (`\`) character
- *  will be interpreted as an escape character, and can be used to escape the
- *  dollar sign. One can use [[scala.util.matching.Regex]]'s `quoteReplacement`
- *  to automatically escape these characters.
+ *  will be interpreted as an escape character and can be used to escape the
+ *  dollar sign. Use `Regex.quoteReplacement` to escape these characters.
  */
 @SerialVersionUID(-2094783597747625537L)
 class Regex(regex: String, groupNames: String*) extends Serializable {
@@ -152,30 +185,60 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
   /** The compiled pattern */
   val pattern = Pattern.compile(regex)
 
-  /** Tries to match target (whole match) and returns the matching subgroups.
-   *  if the pattern has no subgroups, then it returns an empty list on a
-   *  successful match.
+  /** Tries to match either a [[java.lang.CharSequence]] or the matched
+   *  input of a previous `Match`.
    *
-   *  Note, however, that if some subgroup has not been matched, a `null` will
-   *  be returned for that subgroup.
+   *  If the match of a `CharSequence` succeeds, the result is a list of the capturing
+   *  groups (with a `null` element if a group did not match any input).
+   *  If the pattern specifies no groups, then the result will be an empty list
+   *  on a successful match.
+   *
+   *  This method attempts to match the entire input by default; to find the next
+   *  matching subsequence, use an unanchored `Regex`.
    *
    *  For example:
    *
    *  {{{
    *  val p1 = "ab*c".r
-   *  val p2 = "a(b*)c".r
-   *
    *  val p1Matches = "abbbc" match {
-   *    case p1() => true
+   *    case p1() => true               // no groups
    *    case _    => false
    *  }
-   *
+   *  val p2 = "a(b*)c".r
+   *  val p2Matches = "abbbc" match {
+   *    case p2(_*) => true             // any groups
+   *    case _      => false
+   *  }
    *  val numberOfB = "abbbc" match {
-   *    case p2(b) => Some(b.length)
+   *    case p2(b) => Some(b.length)    // one group
    *    case _     => None
    *  }
+   *  val p3 = "b*".r.unanchored
+   *  val p3Matches = "abbbc" match {
+   *    case p3() => true               // find the b's
+   *    case _    => false
+   *  }
+   *  val p4 = "a(b*)(c+)".r
+   *  val p4Matches = "abbbcc" match {
+   *    case p4(_*) => true             // multiple groups
+   *    case _      => false
+   *  }
+   *  val allGroups = "abbbcc" match {
+   *    case p4(all @ _*) => all mkString "/" // "bbb/cc"
+   *    case _            => ""
+   *  }
+   *  val cGroup = "abbbcc" match {
+   *    case p4(_, c) => c
+   *    case _        => ""
+   *  }
    *  }}}
    *
+   *  When matching a [[scala.util.matching.Regex.Match]],
+   *  a previously failed match results in None.
+   *
+   *  Otherwise, this `Regex` is applied to the previously matched input,
+   *  and the result of that match is used.
+   *
    *  @param target The string to match
    *  @return       The matches
    */
@@ -184,102 +247,127 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
       val m = pattern matcher s
       if (runMatcher(m)) Some((1 to m.groupCount).toList map m.group)
       else None
-    case m: Match        => unapplySeq(m.matched)
-    case _               => None
+    case m: Match => unapplySeq(m.matched)
+    case _ => None
   }
+
+  //  @see UnanchoredRegex
   protected def runMatcher(m: Matcher) = m.matches()
 
-  /** Return all matches of this regexp in given character sequence as a [[scala.util.matching.Regex.MatchIterator]],
+  /** Return all non-overlapping matches of this `Regex` in the given character 
+   *  sequence as a [[scala.util.matching.Regex.MatchIterator]],
    *  which is a special [[scala.collection.Iterator]] that returns the
-   *  matched strings, but can also be converted into a normal iterator
-   *  that returns objects of type [[scala.util.matching.Regex.Match]]
-   *  that can be queried for data such as the text that precedes the
-   *  match, subgroups, etc.
+   *  matched strings but can also be queried for more data about the last match,
+   *  such as capturing groups and start position.
+   * 
+   *  A `MatchIterator` can also be converted into an iterator
+   *  that returns objects of type [[scala.util.matching.Regex.Match]],
+   *  such as is normally returned by `findAllMatchIn`.
+   * 
+   *  Where potential matches overlap, the first possible match is returned,
+   *  followed by the next match that follows the input consumed by the
+   *  first match:
+   *
+   *  {{{
+   *  val hat  = "hat[^a]+".r
+   *  val hathaway = "hathatthattthatttt"
+   *  val hats = (hat findAllIn hathaway).toList                     // List(hath, hattth)
+   *  val pos  = (hat findAllMatchIn hathaway map (_.start)).toList  // List(0, 7)
+   *  }}}
+   *
+   *  To return overlapping matches, it is possible to formulate a regular expression
+   *  with lookahead (`?=`) that does not consume the overlapping region.
+   *
+   *  {{{
+   *  val madhatter = "(h)(?=(at[^a]+))".r
+   *  val madhats   = (madhatter findAllMatchIn hathaway map {
+   *    case madhatter(x,y) => s"$x$y"
+   *  }).toList                                       // List(hath, hatth, hattth, hatttt)
+   *  }}}
+   *
+   *  Attempting to retrieve match information before performing the first match
+   *  or after exhausting the iterator results in [[java.lang.IllegalStateException]].
+   *  See [[scala.util.matching.Regex.MatchIterator]] for details.
    *
    *  @param source The text to match against.
-   *  @return       A [[scala.util.matching.Regex.MatchIterator]] of all matches.
+   *  @return       A [[scala.util.matching.Regex.MatchIterator]] of matched substrings.
    *  @example      {{{for (words <- """\w+""".r findAllIn "A simple example.") yield words}}}
    */
-  def findAllIn(source: java.lang.CharSequence) = new Regex.MatchIterator(source, this, groupNames)
+  def findAllIn(source: CharSequence) = new Regex.MatchIterator(source, this, groupNames)
 
-
-  /** Return all matches of this regexp in given character sequence as a
+  /** Return all non-overlapping matches of this regexp in given character sequence as a
    *  [[scala.collection.Iterator]] of [[scala.util.matching.Regex.Match]].
    *
    *  @param source The text to match against.
    *  @return       A [[scala.collection.Iterator]] of [[scala.util.matching.Regex.Match]] for all matches.
    *  @example      {{{for (words <- """\w+""".r findAllMatchIn "A simple example.") yield words.start}}}
    */
-  def findAllMatchIn(source: java.lang.CharSequence): Iterator[Match] = {
+  def findAllMatchIn(source: CharSequence): Iterator[Match] = {
     val matchIterator = findAllIn(source)
     new Iterator[Match] {
       def hasNext = matchIterator.hasNext
       def next: Match = {
-        matchIterator.next;
+        matchIterator.next()
         new Match(matchIterator.source, matchIterator.matcher, matchIterator.groupNames).force
       }
     }
   }
 
-  /** Return optionally first matching string of this regexp in given character sequence,
-   *  or None if it does not exist.
+  /** Return an optional first matching string of this `Regex` in the given character sequence,
+   *  or None if there is no match.
    *
    *  @param source The text to match against.
    *  @return       An [[scala.Option]] of the first matching string in the text.
    *  @example      {{{"""\w+""".r findFirstIn "A simple example." foreach println // prints "A"}}}
    */
-  def findFirstIn(source: java.lang.CharSequence): Option[String] = {
+  def findFirstIn(source: CharSequence): Option[String] = {
     val m = pattern.matcher(source)
     if (m.find) Some(m.group) else None
   }
 
-  /** Return optionally first match of this regexp in given character sequence,
+  /** Return an optional first match of this `Regex` in the given character sequence,
    *  or None if it does not exist.
    *
-   *  The main difference between this method and `findFirstIn` is that the (optional) return
-   *  type for this is [[scala.util.matching.Regex.Match]], through which more
-   *  data can be obtained about the match, such as the strings that precede and follow it,
-   *  or subgroups.
+   *  If the match is successful, the [[scala.util.matching.Regex.Match]] can be queried for
+   *  more data.
    *
    *  @param source The text to match against.
    *  @return       A [[scala.Option]] of [[scala.util.matching.Regex.Match]] of the first matching string in the text.
    *  @example      {{{("""[a-z]""".r findFirstMatchIn "A simple example.") map (_.start) // returns Some(2), the index of the first match in the text}}}
    */
-  def findFirstMatchIn(source: java.lang.CharSequence): Option[Match] = {
+  def findFirstMatchIn(source: CharSequence): Option[Match] = {
     val m = pattern.matcher(source)
     if (m.find) Some(new Match(source, m, groupNames)) else None
   }
 
-  /** Return optionally match of this regexp at the beginning of the
-   *  given character sequence, or None if regexp matches no prefix
+  /** Return an optional match of this `Regex` at the beginning of the
+   *  given character sequence, or None if it matches no prefix
    *  of the character sequence.
    *
-   *  The main difference from this method to `findFirstIn` is that this
-   *  method will not return any matches that do not begin at the start
-   *  of the text being matched against.
+   *  Unlike `findFirstIn`, this method will only return a match at
+   *  the beginning of the input.
    *
    *  @param source The text to match against.
    *  @return       A [[scala.Option]] of the matched prefix.
-   *  @example      {{{"""[a-z]""".r findPrefixOf "A simple example." // returns None, since the text does not begin with a lowercase letter}}}
+   *  @example      {{{"""\p{Lower}""".r findPrefixOf "A simple example." // returns None, since the text does not begin with a lowercase letter}}}
    */
-  def findPrefixOf(source: java.lang.CharSequence): Option[String] = {
+  def findPrefixOf(source: CharSequence): Option[String] = {
     val m = pattern.matcher(source)
     if (m.lookingAt) Some(m.group) else None
   }
 
-  /** Return optionally match of this regexp at the beginning of the
-   *  given character sequence, or None if regexp matches no prefix
+  /** Return an optional match of this `Regex` at the beginning of the
+   *  given character sequence, or None if it matches no prefix
    *  of the character sequence.
    *
-   *  The main difference from this method to `findFirstMatchIn` is that
-   *  this method will not return any matches that do not begin at the
-   *  start of the text being matched against.
+   *  Unlike `findFirstMatchIn`, this method will only return a match at
+   *  the beginning of the input.
    *
    *  @param source The text to match against.
    *  @return       A [[scala.Option]] of the [[scala.util.matching.Regex.Match]] of the matched string.
    *  @example      {{{"""\w+""".r findPrefixMatchOf "A simple example." map (_.after) // returns Some(" simple example.")}}}
    */
-  def findPrefixMatchOf(source: java.lang.CharSequence): Option[Match] = {
+  def findPrefixMatchOf(source: CharSequence): Option[Match] = {
     val m = pattern.matcher(source)
     if (m.lookingAt) Some(new Match(source, m, groupNames)) else None
   }
@@ -293,7 +381,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
    *  @return            The resulting string
    *  @example           {{{"""\d+""".r replaceAllIn ("July 15", "<NUMBER>") // returns "July <NUMBER>"}}}
    */
-  def replaceAllIn(target: java.lang.CharSequence, replacement: String): String = {
+  def replaceAllIn(target: CharSequence, replacement: String): String = {
     val m = pattern.matcher(target)
     m.replaceAll(replacement)
   }
@@ -307,7 +395,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
    * import scala.util.matching.Regex
    * val datePattern = new Regex("""(\d\d\d\d)-(\d\d)-(\d\d)""", "year", "month", "day")
    * val text = "From 2011-07-15 to 2011-07-17"
-   * val repl = datePattern replaceAllIn (text, m => m.group("month")+"/"+m.group("day"))
+   * val repl = datePattern replaceAllIn (text, m => s"${m group "month"}/${m group "day"}")
    * }}}
    *
    * $replacementString
@@ -316,7 +404,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
    * @param replacer    The function which maps a match to another string.
    * @return            The target string after replacements.
    */
-  def replaceAllIn(target: java.lang.CharSequence, replacer: Match => String): String = {
+  def replaceAllIn(target: CharSequence, replacer: Match => String): String = {
     val it = new Regex.MatchIterator(target, this, groupNames).replacementData
     it foreach (md => it replace replacer(md))
     it.replaced
@@ -330,10 +418,10 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
    * {{{
    * import scala.util.matching.Regex._
    *
-   * val map = Map("x" -> "a var", "y" -> """some $ and \ signs""")
+   * val vars = Map("x" -> "a var", "y" -> """some $ and \ signs""")
    * val text = "A text with variables %x, %y and %z."
    * val varPattern = """%(\w+)""".r
-   * val mapper = (m: Match) => map get (m group 1) map (quoteReplacement(_))
+   * val mapper = (m: Match) => vars get (m group 1) map (quoteReplacement(_))
    * val repl = varPattern replaceSomeIn (text, mapper)
    * }}}
    *
@@ -343,7 +431,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
    * @param replacer    The function which optionally maps a match to another string.
    * @return            The target string after replacements.
    */
-  def replaceSomeIn(target: java.lang.CharSequence, replacer: Match => Option[String]): String = {
+  def replaceSomeIn(target: CharSequence, replacer: Match => Option[String]): String = {
     val it = new Regex.MatchIterator(target, this, groupNames).replacementData
     for (matchdata <- it ; replacement <- replacer(matchdata))
       it replace replacement
@@ -359,7 +447,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
    *  @param replacement The string that will replace the match
    *  @return            The resulting string
    */
-  def replaceFirstIn(target: java.lang.CharSequence, replacement: String): String = {
+  def replaceFirstIn(target: CharSequence, replacement: String): String = {
     val m = pattern.matcher(target)
     m.replaceFirst(replacement)
   }
@@ -370,21 +458,29 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
    *  @return        The array of strings computed by splitting the
    *                 input around matches of this regexp
    */
-  def split(toSplit: java.lang.CharSequence): Array[String] =
+  def split(toSplit: CharSequence): Array[String] =
     pattern.split(toSplit)
 
   /** Create a new Regex with the same pattern, but no requirement that
-   *  the entire String matches in extractor patterns.  For instance, the strings
-   *  shown below lead to successful matches, where they would not otherwise.
+   *  the entire String matches in extractor patterns.
+   *
+   *  Normally, matching on `date` behaves as though the pattern were
+   *  enclosed in anchors, `"^pattern$"`.
+   *
+   *  The unanchored `Regex` behaves as though those anchors were removed.
+   *
+   *  Note that this method does not actually strip any matchers from the pattern.
+   *
+   *  Calling `anchored` returns the original `Regex`.
    *
    *  {{{
-   *  val dateP1 = """(\d\d\d\d)-(\d\d)-(\d\d)""".r.unanchored
+   *  val date = """(\d\d\d\d)-(\d\d)-(\d\d)""".r.unanchored
    *
-   *  val dateP1(year, month, day) = "Date 2011-07-15"
+   *  val date(year, month, day) = "Date 2011-07-15"                       // OK
    *
    *  val copyright: String = "Date of this document: 2011-07-15" match {
-   *    case dateP1(year, month, day) => "Copyright "+year
-   *    case _                        => "No copyright"
+   *    case date(year, month, day) => s"Copyright $year"                  // OK
+   *    case _                      => "No copyright"
    *  }
    *  }}}
    *
@@ -397,93 +493,96 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
   override def toString = regex
 }
 
+/** A [[Regex]] that finds the first match when used in a pattern match.
+ *
+ *  @see [[Regex#unanchored]]
+ */
 trait UnanchoredRegex extends Regex {
   override protected def runMatcher(m: Matcher) = m.find()
   override def unanchored = this
 }
 
 /** This object defines inner classes that describe
- *  regex matches and helper objects. The class hierarchy
- *  is as follows:
- *
- *  {{{
- *            MatchData
- *            /      \
- *   MatchIterator  Match
- *  }}}
- *
+ *  regex matches and helper objects.
  */
 object Regex {
 
-  /** This class provides methods to access
-   *  the details of a match.
-   */
+  /** This class provides methods to access the details of a match. */
   trait MatchData {
 
-    /** The source from where the match originated */
-    val source: java.lang.CharSequence
+    /** The source from which the match originated */
+    val source: CharSequence
 
-    /** The names of the groups, or some empty sequence if one defined */
+    /** The names of the groups, or an empty sequence if none defined */
     val groupNames: Seq[String]
 
-    /** The number of subgroups in the pattern (not all of these need to match!) */
+    /** The number of capturing groups in the pattern.
+     *  (For a given successful match, some of those groups may not have matched any input.)
+     */
     def groupCount: Int
 
     /** The index of the first matched character, or -1 if nothing was matched */
     def start: Int
 
     /** The index of the first matched character in group `i`,
-     *  or -1 if nothing was matched for that group */
+     *  or -1 if nothing was matched for that group.
+     */
     def start(i: Int): Int
 
-    /** The index of the last matched character, or -1 if nothing was matched */
+    /** The index following the last matched character, or -1 if nothing was matched. */
     def end: Int
 
     /** The index following the last matched character in group `i`,
-     *  or -1 if nothing was matched for that group */
+     *  or -1 if nothing was matched for that group.
+     */
     def end(i: Int): Int
 
-    /** The matched string, or `null` if nothing was matched */
+    /** The matched string, or `null` if nothing was matched. */
     def matched: String =
       if (start >= 0) source.subSequence(start, end).toString
       else null
 
     /** The matched string in group `i`,
-     *  or `null` if nothing was matched */
+     *  or `null` if nothing was matched.
+     */
     def group(i: Int): String =
       if (start(i) >= 0) source.subSequence(start(i), end(i)).toString
       else null
 
-    /** All matched subgroups, i.e. not including group(0) */
+    /** All capturing groups, i.e., not including group(0). */
     def subgroups: List[String] = (1 to groupCount).toList map group
 
     /** The char sequence before first character of match,
-     *  or `null` if nothing was matched */
-    def before: java.lang.CharSequence =
+     *  or `null` if nothing was matched.
+     */
+    def before: CharSequence =
       if (start >= 0) source.subSequence(0, start)
       else null
 
     /** The char sequence before first character of match in group `i`,
-     *  or `null` if nothing was matched for that group  */
-    def before(i: Int): java.lang.CharSequence =
+     *  or `null` if nothing was matched for that group.
+     */
+    def before(i: Int): CharSequence =
       if (start(i) >= 0) source.subSequence(0, start(i))
       else null
 
     /** Returns char sequence after last character of match,
-     *  or `null` if nothing was matched */
-    def after: java.lang.CharSequence =
+     *  or `null` if nothing was matched.
+     */
+    def after: CharSequence =
       if (end >= 0) source.subSequence(end, source.length)
       else null
 
     /** The char sequence after last character of match in group `i`,
-     *  or `null` if nothing was matched for that group  */
-    def after(i: Int): java.lang.CharSequence =
+     *  or `null` if nothing was matched for that group.
+     */
+    def after(i: Int): CharSequence =
       if (end(i) >= 0) source.subSequence(end(i), source.length)
       else null
 
     private lazy val nameToIndex: Map[String, Int] = Map[String, Int]() ++ ("" :: groupNames.toList).zipWithIndex
 
-    /** Returns the group with given name
+    /** Returns the group with given name.
      *
      *  @param id The group name
      *  @return   The requested group
@@ -494,24 +593,22 @@ object Regex {
       case Some(index) => group(index)
     }
 
-    /** The matched string; equivalent to `matched.toString` */
+    /** The matched string; equivalent to `matched.toString`. */
     override def toString = matched
-
   }
 
-  /** Provides information about a succesful match.
-   */
-  class Match(val source: java.lang.CharSequence,
+  /** Provides information about a successful match. */
+  class Match(val source: CharSequence,
               matcher: Matcher,
               val groupNames: Seq[String]) extends MatchData {
 
-    /** The index of the first matched character */
+    /** The index of the first matched character. */
     val start = matcher.start
 
-    /** The index following the last matched character */
+    /** The index following the last matched character. */
     val end = matcher.end
 
-    /** The number of subgroups */
+    /** The number of subgroups. */
     def groupCount = matcher.groupCount
 
     private lazy val starts: Array[Int] =
@@ -519,19 +616,19 @@ object Regex {
     private lazy val ends: Array[Int] =
       ((0 to groupCount) map matcher.end).toArray
 
-    /** The index of the first matched character in group `i` */
+    /** The index of the first matched character in group `i`. */
     def start(i: Int) = starts(i)
 
-    /** The index following the last matched character in group `i` */
+    /** The index following the last matched character in group `i`. */
     def end(i: Int) = ends(i)
 
     /** The match itself with matcher-dependent lazy vals forced,
-     *  so that match is valid even once matcher is advanced
+     *  so that match is valid even once matcher is advanced.
      */
     def force: this.type = { starts; ends; this }
   }
 
-  /** An extractor object for Matches, yielding the matched string
+  /** An extractor object for Matches, yielding the matched string.
    *
    *  This can be used to help writing replacer functions when you
    *  are not interested in match data. For example:
@@ -546,24 +643,32 @@ object Regex {
     def unapply(m: Match): Some[String] = Some(m.matched)
   }
 
-  /** An extractor object that yields the groups in the match. Using an extractor
-   *  rather than the original regex avoids recomputing the match.
+  /** An extractor object that yields the groups in the match. Using this extractor
+   *  rather than the original `Regex` ensures that the match is not recomputed.
    *
    *  {{{
    *  import scala.util.matching.Regex.Groups
    *
-   *  val datePattern = """(\d\d\d\d)-(\d\d)-(\d\d)""".r
+   *  val date = """(\d\d\d\d)-(\d\d)-(\d\d)""".r
    *  val text = "The doc spree happened on 2011-07-15."
-   *  val day = datePattern replaceAllIn(text, _ match { case Groups(year, month, day) => month+"/"+day })
+   *  val day = date replaceAllIn(text, _ match { case Groups(_, month, day) => s"$month/$day" })
    *  }}}
    */
   object Groups {
     def unapplySeq(m: Match): Option[Seq[String]] = if (m.groupCount > 0) Some(1 to m.groupCount map m.group) else None
   }
 
-  /** A class to step through a sequence of regex matches
+  /** A class to step through a sequence of regex matches.
+   *
+   *  All methods inherited from [[scala.util.matching.Regex.MatchData]] will throw
+   *  a [[java.lang.IllegalStateException]] until the matcher is initialized. The
+   *  matcher can be initialized by calling `hasNext` or `next()` or causing these
+   *  methods to be called, such as by invoking `toString` or iterating through
+   *  the iterator's elements.
+   *
+   *  @see [[java.util.regex.Matcher]]
    */
-  class MatchIterator(val source: java.lang.CharSequence, val regex: Regex, val groupNames: Seq[String])
+  class MatchIterator(val source: CharSequence, val regex: Regex, val groupNames: Seq[String])
   extends AbstractIterator[String] with Iterator[String] with MatchData { self =>
 
     protected[Regex] val matcher = regex.pattern.matcher(source)
@@ -575,7 +680,7 @@ object Regex {
       nextSeen
     }
 
-    /** The next matched substring of `source` */
+    /** The next matched substring of `source`. */
     def next(): String = {
       if (!hasNext) throw new NoSuchElementException
       nextSeen = false
@@ -584,32 +689,32 @@ object Regex {
 
     override def toString = super[AbstractIterator].toString
 
-    /** The index of the first matched character */
+    /** The index of the first matched character. */
     def start: Int = matcher.start
 
-    /** The index of the first matched character in group `i` */
+    /** The index of the first matched character in group `i`. */
     def start(i: Int): Int = matcher.start(i)
 
-    /** The index of the last matched character */
+    /** The index of the last matched character. */
     def end: Int = matcher.end
 
-    /** The index following the last matched character in group `i` */
+    /** The index following the last matched character in group `i`. */
     def end(i: Int): Int = matcher.end(i)
 
-    /** The number of subgroups */
+    /** The number of subgroups. */
     def groupCount = matcher.groupCount
 
-    /** Convert to an iterator that yields MatchData elements instead of Strings */
+    /** Convert to an iterator that yields MatchData elements instead of Strings. */
     def matchData: Iterator[Match] = new AbstractIterator[Match] {
       def hasNext = self.hasNext
-      def next = { self.next; new Match(source, matcher, groupNames).force }
+      def next = { self.next(); new Match(source, matcher, groupNames).force }
     }
 
-    /** Convert to an iterator that yields MatchData elements instead of Strings and has replacement support */
+    /** Convert to an iterator that yields MatchData elements instead of Strings and has replacement support. */
     private[matching] def replacementData = new AbstractIterator[Match] with Replacement {
       def matcher = self.matcher
       def hasNext = self.hasNext
-      def next = { self.next; new Match(source, matcher, groupNames).force }
+      def next = { self.next(); new Match(source, matcher, groupNames).force }
     }
   }
author	Grzegorz Kossakowski <grzegorz.kossakowski@gmail.com>	2014-09-02 11:27:00 +0200
committer	Grzegorz Kossakowski <grzegorz.kossakowski@gmail.com>	2014-09-02 11:27:00 +0200
commit	d2a5555585a857138844e71943dcf86b89e79b81 (patch)
tree	172c3d37c3b05277e52e5f7ab3890e4fc9171aa7
parent	1d3613b5e2e82479464d90cc3401ccebb09bee89 (diff)
parent	887622759d302dbe8cb40cca7debe6d2bfffcaee (diff)
download	scala-d2a5555585a857138844e71943dcf86b89e79b81.tar.gz scala-d2a5555585a857138844e71943dcf86b89e79b81.tar.bz2 scala-d2a5555585a857138844e71943dcf86b89e79b81.zip