From 1475df9bedc03417708f20d94b5e3db5c80f3036 Mon Sep 17 00:00:00 2001 From: Paul Phillips Date: Wed, 2 May 2012 14:02:04 -0700 Subject: Unanchored regex extractors. This patch is really by Lanny Ripple , but I reworked it because I didn't want to put any more methods onto String. Instead, there is a method on Regex which removes the anchoring quality. """\d\d'"".r.unanchored --- src/library/scala/util/matching/Regex.scala | 39 +++++++++++++++++++----- test/files/run/si5045.check | 6 ++++ test/files/run/si5045.scala | 46 +++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 test/files/run/si5045.check create mode 100644 test/files/run/si5045.scala diff --git a/src/library/scala/util/matching/Regex.scala b/src/library/scala/util/matching/Regex.scala index a83619cf01..3655a0a019 100644 --- a/src/library/scala/util/matching/Regex.scala +++ b/src/library/scala/util/matching/Regex.scala @@ -145,6 +145,7 @@ import java.util.regex.{ Pattern, Matcher } */ @SerialVersionUID(-2094783597747625537L) class Regex(regex: String, groupNames: String*) extends Serializable { + outer => import Regex._ @@ -179,15 +180,14 @@ class Regex(regex: String, groupNames: String*) extends Serializable { * @return The matches */ def unapplySeq(target: Any): Option[List[String]] = target match { - case s: java.lang.CharSequence => - val m = pattern.matcher(s) - if (m.matches) Some((1 to m.groupCount).toList map m.group) + case s: CharSequence => + val m = pattern matcher s + if (runMatcher(m)) Some((1 to m.groupCount).toList map m.group) else None - case Match(s) => - unapplySeq(s) - case _ => - None + case m: Match => unapplySeq(m.matched) + case _ => None } + protected def runMatcher(m: Matcher) = m.matches() /** Return all matches of this regexp in given character sequence as a [[scala.util.matching.Regex.MatchIterator]], * which is a special [[scala.collection.Iterator]] that returns the @@ -373,10 +373,35 @@ class Regex(regex: String, groupNames: String*) extends Serializable { def split(toSplit: java.lang.CharSequence): Array[String] = pattern.split(toSplit) + /** Create a new Regex with the same pattern, but no requirement that + * the entire String matches in extractor patterns. For instance, the strings + * shown below lead to successful matches, where they would not otherwise. + * + * {{{ + * val dateP1 = """(\d\d\d\d)-(\d\d)-(\d\d)""".r.unanchored + * + * val dateP1(year, month, day) = "Date 2011-07-15" + * + * val copyright: String = "Date of this document: 2011-07-15" match { + * case dateP1(year, month, day) => "Copyright "+year + * case _ => "No copyright" + * } + * }}} + * + * @return The new unanchored regex + */ + def unanchored: UnanchoredRegex = new Regex(regex, groupNames: _*) with UnanchoredRegex { override def anchored = outer } + def anchored: Regex = this + /** The string defining the regular expression */ override def toString = regex } +trait UnanchoredRegex extends Regex { + override protected def runMatcher(m: Matcher) = m.find() + override def unanchored = this +} + /** This object defines inner classes that describe * regex matches and helper objects. The class hierarchy * is as follows: diff --git a/test/files/run/si5045.check b/test/files/run/si5045.check new file mode 100644 index 0000000000..7e9c1961b7 --- /dev/null +++ b/test/files/run/si5045.check @@ -0,0 +1,6 @@ + extract an exact match 2011-07-15 2011-07-15 + extract from middle of string 2011-07-15 2011-07-15 + extract from middle of string (P2) 2011-07-15 2011-07-15 + extract from middle of string (P3) 2011-07-15 2011-07-15 + copyright example has date Copyright 2011 Copyright 2011 + copyright example missing date No copyright No copyright diff --git a/test/files/run/si5045.scala b/test/files/run/si5045.scala new file mode 100644 index 0000000000..e198b101f3 --- /dev/null +++ b/test/files/run/si5045.scala @@ -0,0 +1,46 @@ +object Test extends App { + + import scala.util.matching.{ Regex, UnanchoredRegex } + + val dateP1 = """(\d\d\d\d)-(\d\d)-(\d\d)""".r.unanchored + val dateP2 = """(\d\d\d\d)-(\d\d)-(\d\d)""" r ("year", "month", "day") unanchored + val dateP3 = new Regex("""(\d\d\d\d)-(\d\d)-(\d\d)""", "year", "month", "day") with UnanchoredRegex + + val yearStr = "2011" + val dateStr = List(yearStr,"07","15").mkString("-") + + def test(msg: String)(strs: Seq[String]): Unit = println("%40s %s".format(msg, strs mkString " ")) + + test("extract an exact match") { + val dateP1(y,m,d) = dateStr + Seq(List(y,m,d).mkString("-"), dateStr) + } + + test("extract from middle of string") { + val dateP1(y,m,d) = "Tested on "+dateStr+"." + Seq(List(y,m,d).mkString("-"), dateStr) + } + + test("extract from middle of string (P2)") { + val dateP2(y,m,d) = "Tested on "+dateStr+"." + Seq(List(y,m,d).mkString("-"), dateStr) + } + + test("extract from middle of string (P3)") { + val dateP2(y,m,d) = "Tested on "+dateStr+"." + Seq(List(y,m,d).mkString("-"), dateStr) + } + + def copyright(in: String): String = in match { + case dateP1(year, month, day) => "Copyright "+year + case _ => "No copyright" + } + + test("copyright example has date") { + Seq(copyright("Date of this document: "+dateStr), "Copyright "+yearStr) + } + + test("copyright example missing date") { + Seq(copyright("Date of this document: unknown"), "No copyright") + } +} -- cgit v1.2.3