summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaul Phillips <paulp@improving.org>2012-05-02 14:02:04 -0700
committerPaul Phillips <paulp@improving.org>2012-05-02 15:20:25 -0700
commit1475df9bedc03417708f20d94b5e3db5c80f3036 (patch)
tree6dc7e6fee60f17ce379766def8ec5b9f1918b3d5
parent4b8c54cd9e52dbacc239d05c8149d7f249bbebab (diff)
downloadscala-1475df9bedc03417708f20d94b5e3db5c80f3036.tar.gz
scala-1475df9bedc03417708f20d94b5e3db5c80f3036.tar.bz2
scala-1475df9bedc03417708f20d94b5e3db5c80f3036.zip
Unanchored regex extractors.
This patch is really by Lanny Ripple <lanny@spotinfluence.com>, but I reworked it because I didn't want to put any more methods onto String. Instead, there is a method on Regex which removes the anchoring quality. """\d\d'"".r.unanchored
-rw-r--r--src/library/scala/util/matching/Regex.scala39
-rw-r--r--test/files/run/si5045.check6
-rw-r--r--test/files/run/si5045.scala46
3 files changed, 84 insertions, 7 deletions
diff --git a/src/library/scala/util/matching/Regex.scala b/src/library/scala/util/matching/Regex.scala
index a83619cf01..3655a0a019 100644
--- a/src/library/scala/util/matching/Regex.scala
+++ b/src/library/scala/util/matching/Regex.scala
@@ -145,6 +145,7 @@ import java.util.regex.{ Pattern, Matcher }
*/
@SerialVersionUID(-2094783597747625537L)
class Regex(regex: String, groupNames: String*) extends Serializable {
+ outer =>
import Regex._
@@ -179,15 +180,14 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
* @return The matches
*/
def unapplySeq(target: Any): Option[List[String]] = target match {
- case s: java.lang.CharSequence =>
- val m = pattern.matcher(s)
- if (m.matches) Some((1 to m.groupCount).toList map m.group)
+ case s: CharSequence =>
+ val m = pattern matcher s
+ if (runMatcher(m)) Some((1 to m.groupCount).toList map m.group)
else None
- case Match(s) =>
- unapplySeq(s)
- case _ =>
- None
+ case m: Match => unapplySeq(m.matched)
+ case _ => None
}
+ protected def runMatcher(m: Matcher) = m.matches()
/** Return all matches of this regexp in given character sequence as a [[scala.util.matching.Regex.MatchIterator]],
* which is a special [[scala.collection.Iterator]] that returns the
@@ -373,10 +373,35 @@ class Regex(regex: String, groupNames: String*) extends Serializable {
def split(toSplit: java.lang.CharSequence): Array[String] =
pattern.split(toSplit)
+ /** Create a new Regex with the same pattern, but no requirement that
+ * the entire String matches in extractor patterns. For instance, the strings
+ * shown below lead to successful matches, where they would not otherwise.
+ *
+ * {{{
+ * val dateP1 = """(\d\d\d\d)-(\d\d)-(\d\d)""".r.unanchored
+ *
+ * val dateP1(year, month, day) = "Date 2011-07-15"
+ *
+ * val copyright: String = "Date of this document: 2011-07-15" match {
+ * case dateP1(year, month, day) => "Copyright "+year
+ * case _ => "No copyright"
+ * }
+ * }}}
+ *
+ * @return The new unanchored regex
+ */
+ def unanchored: UnanchoredRegex = new Regex(regex, groupNames: _*) with UnanchoredRegex { override def anchored = outer }
+ def anchored: Regex = this
+
/** The string defining the regular expression */
override def toString = regex
}
+trait UnanchoredRegex extends Regex {
+ override protected def runMatcher(m: Matcher) = m.find()
+ override def unanchored = this
+}
+
/** This object defines inner classes that describe
* regex matches and helper objects. The class hierarchy
* is as follows:
diff --git a/test/files/run/si5045.check b/test/files/run/si5045.check
new file mode 100644
index 0000000000..7e9c1961b7
--- /dev/null
+++ b/test/files/run/si5045.check
@@ -0,0 +1,6 @@
+ extract an exact match 2011-07-15 2011-07-15
+ extract from middle of string 2011-07-15 2011-07-15
+ extract from middle of string (P2) 2011-07-15 2011-07-15
+ extract from middle of string (P3) 2011-07-15 2011-07-15
+ copyright example has date Copyright 2011 Copyright 2011
+ copyright example missing date No copyright No copyright
diff --git a/test/files/run/si5045.scala b/test/files/run/si5045.scala
new file mode 100644
index 0000000000..e198b101f3
--- /dev/null
+++ b/test/files/run/si5045.scala
@@ -0,0 +1,46 @@
+object Test extends App {
+
+ import scala.util.matching.{ Regex, UnanchoredRegex }
+
+ val dateP1 = """(\d\d\d\d)-(\d\d)-(\d\d)""".r.unanchored
+ val dateP2 = """(\d\d\d\d)-(\d\d)-(\d\d)""" r ("year", "month", "day") unanchored
+ val dateP3 = new Regex("""(\d\d\d\d)-(\d\d)-(\d\d)""", "year", "month", "day") with UnanchoredRegex
+
+ val yearStr = "2011"
+ val dateStr = List(yearStr,"07","15").mkString("-")
+
+ def test(msg: String)(strs: Seq[String]): Unit = println("%40s %s".format(msg, strs mkString " "))
+
+ test("extract an exact match") {
+ val dateP1(y,m,d) = dateStr
+ Seq(List(y,m,d).mkString("-"), dateStr)
+ }
+
+ test("extract from middle of string") {
+ val dateP1(y,m,d) = "Tested on "+dateStr+"."
+ Seq(List(y,m,d).mkString("-"), dateStr)
+ }
+
+ test("extract from middle of string (P2)") {
+ val dateP2(y,m,d) = "Tested on "+dateStr+"."
+ Seq(List(y,m,d).mkString("-"), dateStr)
+ }
+
+ test("extract from middle of string (P3)") {
+ val dateP2(y,m,d) = "Tested on "+dateStr+"."
+ Seq(List(y,m,d).mkString("-"), dateStr)
+ }
+
+ def copyright(in: String): String = in match {
+ case dateP1(year, month, day) => "Copyright "+year
+ case _ => "No copyright"
+ }
+
+ test("copyright example has date") {
+ Seq(copyright("Date of this document: "+dateStr), "Copyright "+yearStr)
+ }
+
+ test("copyright example missing date") {
+ Seq(copyright("Date of this document: unknown"), "No copyright")
+ }
+}