From fec54d6bb437280e74e4ec0286e42ae757b9b691 Mon Sep 17 00:00:00 2001 From: som-snytt Date: Wed, 11 May 2016 04:58:27 -0700 Subject: SI-9666: Use inline group names in Regex (#4990) Delegate `Match group name` to the underlying `matcher`. If that fails, try explicit group names as a fall back. No attempt is made to correlate inline and explicit names. In the following case, either name is accepted: ``` new Regex("a(?b*)c", "Bee") ``` But if names are reversed, the error is undetected: ``` new Regex("a(?b*)(?c)", "Bar", "Bee") ``` Throw IllegalArg on bad group name to be consistent with Java. --- src/library/scala/util/matching/Regex.scala | 29 +++++++++--- test/files/run/reify_printf.scala | 1 - test/junit/scala/util/matching/RegexTest.scala | 64 ++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 8 deletions(-) diff --git a/src/library/scala/util/matching/Regex.scala b/src/library/scala/util/matching/Regex.scala index 6d3d015b1a..bd55fb5d04 100644 --- a/src/library/scala/util/matching/Regex.scala +++ b/src/library/scala/util/matching/Regex.scala @@ -182,6 +182,9 @@ class Regex private[matching](val pattern: Pattern, groupNames: String*) extends * val namedYears = for (m <- namedDate findAllMatchIn dates) yield m group "year" * }}} * + * Group names supplied to the constructor are preferred to inline group names + * when retrieving matched groups by name. Not all platforms support inline names. + * * This constructor does not support options as flags, which must be * supplied as inline flags in the pattern string: `(?idmsux-idmsux)`. * @@ -578,6 +581,9 @@ object Regex { */ trait MatchData { + /** Basically, wraps a platform Matcher. */ + protected def matcher: Matcher + /** The source from which the match originated */ val source: CharSequence @@ -650,16 +656,25 @@ object Regex { private lazy val nameToIndex: Map[String, Int] = Map[String, Int]() ++ ("" :: groupNames.toList).zipWithIndex - /** Returns the group with given name. + /** Returns the group with the given name. + * + * Uses explicit group names when supplied; otherwise, + * queries the underlying implementation for inline named groups. + * Not all platforms support inline group names. * * @param id The group name * @return The requested group - * @throws NoSuchElementException if the requested group name is not defined + * @throws IllegalArgumentException if the requested group name is not defined */ - def group(id: String): String = nameToIndex.get(id) match { - case None => throw new NoSuchElementException("group name "+id+" not defined") - case Some(index) => group(index) - } + def group(id: String): String = ( + if (groupNames.isEmpty) + matcher group id + else + nameToIndex.get(id) match { + case Some(index) => group(index) + case None => matcher group id + } + ) /** The matched string; equivalent to `matched.toString`. */ override def toString = matched @@ -667,7 +682,7 @@ object Regex { /** Provides information about a successful match. */ class Match(val source: CharSequence, - private[matching] val matcher: Matcher, + protected[matching] val matcher: Matcher, val groupNames: Seq[String]) extends MatchData { /** The index of the first matched character. */ diff --git a/test/files/run/reify_printf.scala b/test/files/run/reify_printf.scala index c4ade79837..099a353e89 100644 --- a/test/files/run/reify_printf.scala +++ b/test/files/run/reify_printf.scala @@ -6,7 +6,6 @@ import scala.tools.reflect.ToolBox import scala.reflect.api._ import scala.reflect.api.Trees import scala.reflect.internal.Types -import scala.util.matching.Regex object Test extends App { //val output = new ByteArrayOutputStream() diff --git a/test/junit/scala/util/matching/RegexTest.scala b/test/junit/scala/util/matching/RegexTest.scala index 5b13397d6a..06d0445e1c 100644 --- a/test/junit/scala/util/matching/RegexTest.scala +++ b/test/junit/scala/util/matching/RegexTest.scala @@ -6,6 +6,8 @@ import org.junit.Test import org.junit.runner.RunWith import org.junit.runners.JUnit4 +import scala.tools.testing.AssertUtil._ + @RunWith(classOf[JUnit4]) class RegexTest { @Test def t8022CharSequence(): Unit = { @@ -44,4 +46,66 @@ class RegexTest { } assertEquals(List((1,2),(3,4),(5,6)), z) } + + @Test def `SI-9666: use inline group names`(): Unit = { + val r = new Regex("a(?b*)c") + val ms = r findAllIn "stuff abbbc more abc and so on" + assertTrue(ms.hasNext) + assertEquals("abbbc", ms.next()) + assertEquals("bbb", ms group "Bee") + assertTrue(ms.hasNext) + assertEquals("abc", ms.next()) + assertEquals("b", ms group "Bee") + assertFalse(ms.hasNext) + } + + @Test def `SI-9666: use explicit group names`(): Unit = { + val r = new Regex("a(b*)c", "Bee") + val ms = r findAllIn "stuff abbbc more abc and so on" + assertTrue(ms.hasNext) + assertEquals("abbbc", ms.next()) + assertEquals("bbb", ms group "Bee") + assertTrue(ms.hasNext) + assertEquals("abc", ms.next()) + assertEquals("b", ms group "Bee") + assertFalse(ms.hasNext) + } + + @Test def `SI-9666: fall back to explicit group names`(): Unit = { + val r = new Regex("a(?b*)c", "Bee") + val ms = r findAllIn "stuff abbbc more abc and so on" + assertTrue(ms.hasNext) + assertEquals("abbbc", ms.next()) + assertEquals("bbb", ms group "Bee") + assertEquals("bbb", ms group "Bar") + assertTrue(ms.hasNext) + assertEquals("abc", ms.next()) + assertEquals("b", ms group "Bee") + assertEquals("b", ms group "Bar") + assertFalse(ms.hasNext) + } + + //type NoGroup = NoSuchElementException + type NoGroup = IllegalArgumentException + + @Test def `SI-9666: throw on bad name`(): Unit = { + assertThrows[NoGroup] { + val r = new Regex("a(?b*)c") + val ms = r findAllIn "stuff abbbc more abc and so on" + assertTrue(ms.hasNext) + ms group "Bee" + } + assertThrows[NoGroup] { + val r = new Regex("a(?b*)c", "Bar") + val ms = r findAllIn "stuff abbbc more abc and so on" + assertTrue(ms.hasNext) + ms group "Bee" + } + assertThrows[NoGroup] { + val r = new Regex("a(b*)c", "Bar") + val ms = r findAllIn "stuff abbbc more abc and so on" + assertTrue(ms.hasNext) + ms group "Bee" + } + } } -- cgit v1.2.3