From e3dec9f006ac2631281fb936c4ca206daa8fda5d Mon Sep 17 00:00:00 2001 From: "Daniel C. Sobral" Date: Wed, 25 Jan 2012 13:30:40 -0200 Subject: Regex improvements This adds findAllMatchIn to Regex to mirror other similar methods. It also overloads StringLike's "r", adding a version that accepts group names. It includes test cases for both methods. Closes SI-2460. --- .../scala/collection/immutable/StringLike.scala | 15 ++++++++++++-- src/library/scala/util/matching/Regex.scala | 23 ++++++++++++++++++++-- 2 files changed, 34 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/library/scala/collection/immutable/StringLike.scala b/src/library/scala/collection/immutable/StringLike.scala index f9697565de..fc4e7bf0a8 100644 --- a/src/library/scala/collection/immutable/StringLike.scala +++ b/src/library/scala/collection/immutable/StringLike.scala @@ -207,9 +207,20 @@ self => /** You can follow a string with `.r`, turning it into a `Regex`. E.g. * - * """A\w*""".r is the regular expression for identifiers starting with `A`. + * `"""A\w*""".r` is the regular expression for identifiers starting with `A`. */ - def r: Regex = new Regex(toString) + def r: Regex = r() + + /** You can follow a string with `.r(g1, ... , gn)`, turning it into a `Regex`, + * with group names g1 through gn. + * + * `"""(\d\d)-(\d\d)-(\d\d\d\d)""".r("month", "day", "year")` matches dates + * and provides its subcomponents through groups named "month", "day" and + * "year". + * + * @param groupNames The names of the groups in the pattern, in the order they appear. + */ + def r(groupNames: String*): Regex = new Regex(toString, groupNames: _*) def toBoolean: Boolean = parseBoolean(toString) def toByte: Byte = java.lang.Byte.parseByte(toString) diff --git a/src/library/scala/util/matching/Regex.scala b/src/library/scala/util/matching/Regex.scala index ca97515e23..2debd247b8 100644 --- a/src/library/scala/util/matching/Regex.scala +++ b/src/library/scala/util/matching/Regex.scala @@ -180,7 +180,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable { None } - /** Return all matches of this regexp in given character sequence as a [[scala.util.mathcing.Regex.MatchIterator]], + /** Return all matches of this regexp in given character sequence as a [[scala.util.matching.Regex.MatchIterator]], * which is a special [[scala.collection.Iterator]] that returns the * matched strings, but can also be converted into a normal iterator * that returns objects of type [[scala.util.matching.Regex.Match]] @@ -193,6 +193,25 @@ class Regex(regex: String, groupNames: String*) extends Serializable { */ def findAllIn(source: java.lang.CharSequence) = new Regex.MatchIterator(source, this, groupNames) + + /** Return all matches of this regexp in given character sequence as a + * [[scala.collection.Iterator]] of [[scala.util.matching.Regex.Match]. + * + * @param source The text to match against. + * @return A [[scala.collection.Iterator]] of [[scala.util.matching.Regex.Match]] for all matches. + * @example {{{for (words <- """\w+""".r findAllMatchIn "A simple example.") yield words.start}}} + */ + def findAllMatchIn(source: java.lang.CharSequence): Iterator[Match] = { + val matchIterator = findAllIn(source) + new Iterator[Match] { + def hasNext = matchIterator.hasNext + def next: Match = { + matchIterator.next; + new Match(matchIterator.source, matchIterator.matcher, matchIterator.groupNames).force + } + } + } + /** Return optionally first matching string of this regexp in given character sequence, * or None if it does not exist. * @@ -505,7 +524,7 @@ object Regex { class MatchIterator(val source: java.lang.CharSequence, val regex: Regex, val groupNames: Seq[String]) extends AbstractIterator[String] with Iterator[String] with MatchData { self => - protected val matcher = regex.pattern.matcher(source) + protected[Regex] val matcher = regex.pattern.matcher(source) private var nextSeen = false /** Is there another match? */ -- cgit v1.2.3