diff options
author | Paul Phillips <paulp@improving.org> | 2011-07-16 05:51:44 +0000 |
---|---|---|
committer | Paul Phillips <paulp@improving.org> | 2011-07-16 05:51:44 +0000 |
commit | 90725a50c4b90efeb747dc1b27159288c816d74e (patch) | |
tree | 23814e958c3fd4ec4992c095c0188af4f126bbfe /src/library/scala/util/matching/Regex.scala | |
parent | 29e501db0b70abb6e6791b03c7af6fc11cadfc20 (diff) | |
download | scala-90725a50c4b90efeb747dc1b27159288c816d74e.tar.gz scala-90725a50c4b90efeb747dc1b27159288c816d74e.tar.bz2 scala-90725a50c4b90efeb747dc1b27159288c816d74e.zip |
Documentation and examples for Regex.
Contributed by Daniel C. Sobral
Diffstat (limited to 'src/library/scala/util/matching/Regex.scala')
-rw-r--r-- | src/library/scala/util/matching/Regex.scala | 161 |
1 files changed, 154 insertions, 7 deletions
diff --git a/src/library/scala/util/matching/Regex.scala b/src/library/scala/util/matching/Regex.scala index 7de4587724..481a4e14a8 100644 --- a/src/library/scala/util/matching/Regex.scala +++ b/src/library/scala/util/matching/Regex.scala @@ -15,6 +15,41 @@ import java.util.regex.{ Pattern, Matcher } /** This class provides methods for creating and using regular expressions. * It is based on the regular expressions of the JDK since 1.4. * + * Its main goal is to extract strings that match a pattern, or the subgroups + * that make it up. For that reason, it is usually used with for comprehensions + * and matching (see methods for examples). + * + * Because regex patterns make extensive use of the backslash character (`\`), + * it is usually defined with triple quotes so that backslash characters won't + * need to be quoted. Also, an implicit conversion is available through + * [[scala.Predef]] that makes converting a [[java.lang.String]] into a Regex + * as easy as calling the method `r` on it. For example: + * + * {{{ + * val datePattern = """(\d\d\d\d)-(\d\d)-(\d\d)""".r + * }}} + * + * Regex provide methods to find and replace patterns, but also provides + * extractors for pattern subgroups. Note, however, that extractors require + * that the whole text be matched, or they fail. + * + * For example, the subgroups in the pattern above can be obtained in the following ways: + * + * {{{ + * val datePattern(year, month, day) = "2011-07-15" + * + * // val datePattern(year, month, day) = "Date 2011-07-15" // throws an exception at runtime + * + * val copyright: String = datePattern findFirstIn "Date of this document: 2011-07-15" match { + * case Some(datePattern(year, month, day)) => "Copyright "+year + * case None => "No copyright" + * } + * + * val copyright: Option[String] = for { + * datePattern(year, month, day) <- datePattern findFirstIn "Last modified 2011-07-15" + * } yield year + * }}} + * * You can use special pattern syntax constructs like `(?idmsux-idmsux)`ยน to switch * various regex compilation options like `CASE_INSENSITIVE` or `UNICODE_CASE`. * @@ -37,7 +72,29 @@ class Regex(regex: String, groupNames: String*) extends Serializable { /** The compiled pattern */ val pattern = Pattern.compile(regex) - /** Tries to match target (whole match) and returns the matches. + /** Tries to match target (whole match) and returns the matching subgroups. + * if the pattern has no subgroups, then it returns an empty list on a + * successful match. + * + * Note, however, that if some subgroup has not been matched, a `null` will + * be returned for that subgroup. + * + * For example: + * + * {{{ + * val p1 = "ab*c".r + * val p2 = "a(b*)c".r + * + * val p1Matches = "abbbc" match { + * case p1() => true + * case _ => false + * } + * + * val numberOfB = "abbbc" match { + * case p2(b) => Some(b.length) + * case _ => None + * } + * }}} * * @param target The string to match * @return The matches @@ -53,12 +110,25 @@ class Regex(regex: String, groupNames: String*) extends Serializable { None } - /** Return all matches of this regexp in given character sequence as an iterator + /** Return all matches of this regexp in given character sequence as a [[scala.util.mathcing.Regex.MatchIterator]], + * which is a special [[scala.collection.Iterator]] that returns the + * matched strings, but can also be converted into a normal iterator + * that returns objects of type [[scala.util.matching.Regex.Match]] + * that can be queried for data such as the text that precedes the + * match, subgroups, etc. + * + * @param source The text to match against. + * @return A [[scala.util.matching.Reegex.MatchIterator]] of all matches. + * @example {{{for (words <- """\w+""".r findAllIn "A simple example.") yield words}}} */ def findAllIn(source: java.lang.CharSequence) = new Regex.MatchIterator(source, this, groupNames) /** Return optionally first matching string of this regexp in given character sequence, - * None if it does not exist. + * or None if it does not exist. + * + * @param source The text to match against. + * @return An [[scala.Option]] of the first matching string in the text. + * @example {{{"""\w+""".r findFirstIn "A simple example." foreach println // prints "A"}}} */ def findFirstIn(source: java.lang.CharSequence): Option[String] = { val m = pattern.matcher(source) @@ -66,7 +136,16 @@ class Regex(regex: String, groupNames: String*) extends Serializable { } /** Return optionally first match of this regexp in given character sequence, - * None if it does not exist. + * or None if it does not exist. + * + * The main difference between this method and `findFirstIn` is that the (optional) return + * type for this is [[scala.util.matching.Regex.Match]], through which more + * data can be obtained about the match, such as the strings that precede and follow it, + * or subgroups. + * + * @param source The text to match against. + * @return A [[scala.Option]] of [[scala.util.matching.Regex.Match]] of the first matching string in the text. + * @example {{{("""[a-z]""".r findFirstMatchIn "A simple example.") map (_.start) // returns Some(2), the index of the first match in the text}}} */ def findFirstMatchIn(source: java.lang.CharSequence): Option[Match] = { val m = pattern.matcher(source) @@ -76,6 +155,14 @@ class Regex(regex: String, groupNames: String*) extends Serializable { /** Return optionally match of this regexp at the beginning of the * given character sequence, or None if regexp matches no prefix * of the character sequence. + * + * The main difference from this method to `findFirstIn` is that this + * method will not return any matches that do not begin at the start + * of the text being matched against. + * + * @param source The text to match against. + * @return A [[scala.Option]] of the matched prefix. + * @example {{{"""[a-z]""".r findPrefixOf "A simple example." // returns None, since the text does not begin with a lowercase letter}}} */ def findPrefixOf(source: java.lang.CharSequence): Option[String] = { val m = pattern.matcher(source) @@ -85,6 +172,14 @@ class Regex(regex: String, groupNames: String*) extends Serializable { /** Return optionally match of this regexp at the beginning of the * given character sequence, or None if regexp matches no prefix * of the character sequence. + * + * The main difference from this method to `findFirstMatchIn` is that + * this method will not return any matches that do not begin at the + * start of the text being matched against. + * + * @param source The text to match against. + * @return A [[scala.Option]] of the [[scala.util.matching.Regex.Match]] of the matched string. + * @example {{{"""\w+""".r findPrefixMatchOf "A simple example." map (_.after) // returns Some(" simple example.")}}} */ def findPrefixMatchOf(source: java.lang.CharSequence): Option[Match] = { val m = pattern.matcher(source) @@ -96,6 +191,7 @@ class Regex(regex: String, groupNames: String*) extends Serializable { * @param target The string to match * @param replacement The string that will replace each match * @return The resulting string + * @example {{{"""\d+""".r replaceAllIn ("July 15", "<NUMBER>") // returns "July <NUMBER>"}}} */ def replaceAllIn(target: java.lang.CharSequence, replacement: String): String = { val m = pattern.matcher(target) @@ -103,7 +199,16 @@ class Regex(regex: String, groupNames: String*) extends Serializable { } /** - * Replaces all matches using a replacer function. + * Replaces all matches using a replacer function. The replacer function takes a + * [[scala.util.matching.Regex.Match]] so that extra information can be obtained + * from the match. For example: + * + * {{{ + * import scala.util.matching.Regex + * val datePattern = new Regex("""(\d\d\d\d)-(\d\d)-(\d\d)""", "year", "month", "day") + * val text = "From 2011-07-15 to 2011-07-17" + * val repl = datePattern replaceAllIn (text, m => m.group("month")+"/"+m.group("day")) + * }}} * * @param target The string to match. * @param replacer The function which maps a match to another string. @@ -115,6 +220,25 @@ class Regex(regex: String, groupNames: String*) extends Serializable { it.replaced } + /** + * Replaces some of the matches using a replacer function that returns an [[scala.Option]]. + * The replacer function takes a [[scala.util.matching.Regex.Match]] so that extra + * information can be btained from the match. For example: + * + * {{{ + * import scala.util.matching.Regex._ + * + * val map = Map("x" -> "a var", "y" -> "another var") + * val text = "A text with variables %x, %y and %z." + * val varPattern = """%(\w+)""".r + * val mapper = (m: Match) => map get (m group 1) + * val repl = varPattern replaceSomeIn (text, mapper) + * }}} + * + * @param target The string to match. + * @param replacer The function which optionally maps a match to another string. + * @return The target string after replacements. + */ def replaceSomeIn(target: java.lang.CharSequence, replacer: Match => Option[String]): String = { val it = new Regex.MatchIterator(target, this, groupNames).replacementData for (matchdata <- it ; replacement <- replacer(matchdata)) @@ -274,12 +398,35 @@ object Regex { def force: this.type = { starts; ends; this } } - /** An extractor object for Matches, yielding the matched string */ + /** An extractor object for Matches, yielding the matched string + * + * This can be used to help writing replacer functions when you + * are not interested in match data. For example: + * + * {{{ + * import scala.util.matching.Regex.Match + * """\w+""".r replaceAllIn ("A simple example.", _ match { case Match(s) => s.toUpperCase }) + * }}} + * + */ object Match { def unapply(m: Match): Some[String] = Some(m.matched) } - /** An extractor object that yields groups in the match. */ + /** An extractor object that yields groups in the match. The main + * advantage of using this is extractor instead of using the original + * regex is that this avoids rematching the string. + * + * For example: + * + * {{{ + * import scala.util.matching.Regex.Groups + * + * val datePattern = """(\d\d\d\d)-(\d\d)-(\d\d)""".r + * val text = "The doc spree happened on 2011-07-15." + * val day = datePattern replaceAllIn(text, _ match { case Groups(year, month, day) => month+"/"+day }) + * }}} + */ object Groups { def unapplySeq(m: Match): Option[Seq[String]] = if (m.groupCount > 0) Some(1 to m.groupCount map m.group) else None } |