summaryrefslogtreecommitdiff
path: root/javalib/src/main/scala/java/util/regex
diff options
context:
space:
mode:
Diffstat (limited to 'javalib/src/main/scala/java/util/regex')
-rw-r--r--javalib/src/main/scala/java/util/regex/MatchResult.scala13
-rw-r--r--javalib/src/main/scala/java/util/regex/Matcher.scala274
-rw-r--r--javalib/src/main/scala/java/util/regex/Pattern.scala154
3 files changed, 441 insertions, 0 deletions
diff --git a/javalib/src/main/scala/java/util/regex/MatchResult.scala b/javalib/src/main/scala/java/util/regex/MatchResult.scala
new file mode 100644
index 0000000..f321c60
--- /dev/null
+++ b/javalib/src/main/scala/java/util/regex/MatchResult.scala
@@ -0,0 +1,13 @@
+package java.util.regex
+
+trait MatchResult {
+ def groupCount(): Int
+
+ def start(): Int
+ def end(): Int
+ def group(): String
+
+ def start(group: Int): Int
+ def end(group: Int): Int
+ def group(group: Int): String
+}
diff --git a/javalib/src/main/scala/java/util/regex/Matcher.scala b/javalib/src/main/scala/java/util/regex/Matcher.scala
new file mode 100644
index 0000000..331f56b
--- /dev/null
+++ b/javalib/src/main/scala/java/util/regex/Matcher.scala
@@ -0,0 +1,274 @@
+package java.util.regex
+
+import scala.language.implicitConversions
+
+import scala.annotation.switch
+
+import scala.scalajs.js
+
+final class Matcher private[regex] (
+ private var pattern0: Pattern, private var input0: CharSequence,
+ private var regionStart0: Int, private var regionEnd0: Int)
+ extends AnyRef with MatchResult {
+
+ import Matcher._
+
+ def pattern(): Pattern = pattern0
+
+ // Configuration (updated manually)
+ private var regexp = new js.RegExp(pattern0.jspattern, pattern0.jsflags)
+ private var inputstr = input0.subSequence(regionStart0, regionEnd0).toString
+
+ // Match result (updated by successful matches)
+ private var lastMatch: js.RegExp.ExecResult = null
+ private var lastMatchIsValid = false
+ private var canStillFind = true
+
+ // Append state (updated by replacement methods)
+ private var appendPos: Int = 0
+
+ // Lookup methods
+
+ def matches(): Boolean = {
+ reset()
+ find()
+ // TODO this check is wrong with non-greedy patterns
+ // Further, it might be wrong to just use ^$ delimiters for two reasons:
+ // - They might already be there
+ // - They might not behave as expected when newline characters are present
+ if ((lastMatch ne null) && (start != 0 || end != inputstr.length))
+ reset()
+ lastMatch ne null
+ }
+
+ def lookingAt(): Boolean = {
+ reset()
+ find()
+ if ((lastMatch ne null) && (start != 0))
+ reset()
+ lastMatch ne null
+ }
+
+ def find(): Boolean = if (canStillFind) {
+ lastMatchIsValid = true
+ lastMatch = regexp.exec(inputstr)
+ if (lastMatch ne null) {
+ if (lastMatch(0).get.isEmpty)
+ regexp.lastIndex += 1
+ } else {
+ canStillFind = false
+ }
+ lastMatch ne null
+ } else false
+
+ def find(start: Int): Boolean = {
+ reset()
+ regexp.lastIndex = start
+ find()
+ }
+
+ // Replace methods
+
+ def appendReplacement(sb: StringBuffer, replacement: String): Matcher = {
+ sb.append(inputstr.substring(appendPos, start))
+
+ @inline def isDigit(c: Char) = c >= '0' && c <= '9'
+
+ val len = replacement.length
+ var i = 0
+ while (i < len) {
+ replacement.charAt(i) match {
+ case '$' =>
+ i += 1
+ val j = i
+ while (i < len && isDigit(replacement.charAt(i)))
+ i += 1
+ val group = Integer.parseInt(replacement.substring(j, i))
+ sb.append(this.group(group))
+
+ case '\\' =>
+ i += 1
+ if (i < len)
+ sb.append(replacement.charAt(i))
+ i += 1
+
+ case c =>
+ sb.append(c)
+ i += 1
+ }
+ }
+
+ appendPos = end
+ this
+ }
+
+ def appendTail(sb: StringBuffer): StringBuffer = {
+ sb.append(inputstr.substring(appendPos))
+ appendPos = inputstr.length
+ sb
+ }
+
+ def replaceFirst(replacement: String): String = {
+ reset()
+
+ if (find()) {
+ val sb = new StringBuffer
+ appendReplacement(sb, replacement)
+ appendTail(sb)
+ sb.toString
+ } else {
+ inputstr
+ }
+ }
+
+ def replaceAll(replacement: String): String = {
+ reset()
+
+ val sb = new StringBuffer
+ while (find()) {
+ appendReplacement(sb, replacement)
+ }
+ appendTail(sb)
+
+ sb.toString
+ }
+
+ // Reset methods
+
+ def reset(): Matcher = {
+ regexp.lastIndex = 0
+ lastMatch = null
+ lastMatchIsValid = false
+ canStillFind = true
+ appendPos = 0
+ this
+ }
+
+ def reset(input: CharSequence): Matcher = {
+ regionStart0 = 0
+ regionEnd0 = input.length()
+ input0 = input
+ inputstr = input0.toString
+ reset()
+ }
+
+ def usePattern(pattern: Pattern): Matcher = {
+ val prevLastIndex = regexp.lastIndex
+ pattern0 = pattern
+ regexp = new js.RegExp(pattern.jspattern, pattern.jsflags)
+ regexp.lastIndex = prevLastIndex
+ lastMatch = null
+ this
+ }
+
+ // Query state methods - implementation of MatchResult
+
+ private def ensureLastMatch: js.RegExp.ExecResult = {
+ if (lastMatch == null)
+ throw new IllegalStateException("No match available")
+ lastMatch
+ }
+
+ def groupCount(): Int = ensureLastMatch.length-1
+
+ def start(): Int = ensureLastMatch.index
+ def end(): Int = start() + group().length
+ def group(): String = ensureLastMatch(0).get
+
+ def start(group: Int): Int = {
+ if (group == 0) start()
+ else {
+ val last = ensureLastMatch
+ // not provided by JS RegExp, so we make up something that at least
+ // will have some sound behavior from scala.util.matching.Regex
+ last(group).fold(-1) {
+ groupStr => inputstr.indexOf(groupStr, last.index)
+ }
+ }
+ }
+
+ def end(group: Int): Int = {
+ val s = start(group)
+ if (s == -1) -1
+ else s + this.group(group).length
+ }
+
+ def group(group: Int): String = ensureLastMatch(group).orNull
+
+ // Seal the state
+
+ def toMatchResult(): MatchResult = new SealedResult(inputstr, lastMatch)
+
+ // Other query state methods
+
+ def hitEnd(): Boolean =
+ lastMatchIsValid && (lastMatch == null || end() == inputstr.length)
+
+ //def requireEnd(): Boolean // I don't understand the spec
+
+ // Stub methods for region management
+
+ def regionStart(): Int = regionStart0
+ def regionEnd(): Int = regionEnd0
+ def region(start: Int, end: Int): Matcher =
+ new Matcher(pattern0, input0, start, end)
+
+ def hasTransparentBounds(): Boolean = false
+ //def useTransparentBounds(b: Boolean): Matcher
+
+ def hasAnchoringBounds(): Boolean = true
+ //def useAnchoringBounds(b: Boolean): Matcher
+}
+
+object Matcher {
+ def quoteReplacement(s: String): String = {
+ var result = ""
+ var i = 0
+ while (i < s.length) {
+ val c = s.charAt(i)
+ result += ((c: @switch) match {
+ case '\\' | '$' => "\\"+c
+ case _ => c
+ })
+ i += 1
+ }
+ result
+ }
+
+ private final class SealedResult(inputstr: String,
+ lastMatch: js.RegExp.ExecResult) extends MatchResult {
+
+ def groupCount(): Int = ensureLastMatch.length-1
+
+ def start(): Int = ensureLastMatch.index
+ def end(): Int = start() + group().length
+ def group(): String = ensureLastMatch(0).get
+
+ def start(group: Int): Int = {
+ if (group == 0) start()
+ else {
+ val last = ensureLastMatch
+
+ // not provided by JS RegExp, so we make up something that at least
+ // will have some sound behavior from scala.util.matching.Regex
+ last(group).fold(-1) {
+ groupStr => inputstr.indexOf(groupStr, last.index)
+ }
+ }
+ }
+
+ def end(group: Int): Int = {
+ val s = start(group)
+ if (s == -1) -1
+ else s + this.group(group).length
+ }
+
+ def group(group: Int): String = ensureLastMatch(group).orNull
+
+ private def ensureLastMatch: js.RegExp.ExecResult = {
+ if (lastMatch == null)
+ throw new IllegalStateException("No match available")
+ lastMatch
+ }
+ }
+}
diff --git a/javalib/src/main/scala/java/util/regex/Pattern.scala b/javalib/src/main/scala/java/util/regex/Pattern.scala
new file mode 100644
index 0000000..fda103f
--- /dev/null
+++ b/javalib/src/main/scala/java/util/regex/Pattern.scala
@@ -0,0 +1,154 @@
+package java.util.regex
+
+import scala.annotation.switch
+
+import scala.scalajs.js
+
+final class Pattern private (pattern0: String, flags0: Int)
+ extends Serializable {
+
+ import Pattern._
+
+ def pattern(): String = pattern0
+ def flags(): Int = flags1
+
+ private[regex] val (jspattern, flags1) = {
+ if ((flags0 & LITERAL) != 0) (quote(pattern0), flags0)
+ else {
+ trySplitHack(pattern0, flags0) orElse
+ tryFlagHack(pattern0, flags0) getOrElse
+ (pattern0, flags0)
+ }
+ }
+
+ private[regex] val jsflags = {
+ var f = "g"
+ if ((flags & CASE_INSENSITIVE) != 0)
+ f += "i"
+ if ((flags & MULTILINE) != 0)
+ f += "m"
+ f
+ }
+
+ override def toString(): String = pattern0
+
+ def matcher(input: CharSequence): Matcher =
+ new Matcher(this, input, 0, input.length)
+
+ def split(input: CharSequence): Array[String] =
+ split(input, 0)
+
+ def split(input: CharSequence, limit: Int): Array[String] = {
+ val lim = if (limit > 0) limit else Int.MaxValue
+
+ val result = js.Array[String]()
+ val inputStr = input.toString
+ val matcher = this.matcher(inputStr)
+ var prevEnd = 0
+
+ // Actually split original string
+ while ((result.length < lim-1) && matcher.find()) {
+ result.push(inputStr.substring(prevEnd, matcher.start))
+ prevEnd = matcher.end
+ }
+ result.push(inputStr.substring(prevEnd))
+
+ // Remove a leading empty element iff the first match was zero-length
+ // and there is no other place the regex matches
+ if (prevEnd == 0 && result.length == 2 && (lim > 2 || !matcher.find())) {
+ Array(inputStr)
+ } else {
+ var len = result.length
+ if (limit == 0) {
+ while (len > 1 && result(len-1).isEmpty)
+ len -= 1
+ }
+
+ val actualResult = new Array[String](len)
+ result.copyToArray(actualResult)
+ actualResult
+ }
+ }
+}
+
+object Pattern {
+ final val UNIX_LINES = 0x01
+ final val CASE_INSENSITIVE = 0x02
+ final val COMMENTS = 0x04
+ final val MULTILINE = 0x08
+ final val LITERAL = 0x10
+ final val DOTALL = 0x20
+ final val UNICODE_CASE = 0x40
+ final val CANON_EQ = 0x80
+ final val UNICODE_CHARACTER_CLASS = 0x100
+
+ def compile(regex: String, flags: Int): Pattern =
+ new Pattern(regex, flags)
+
+ def compile(regex: String): Pattern =
+ new Pattern(regex, 0)
+
+ def matches(regex: String, input: CharSequence): Boolean =
+ compile(regex).matcher(input).matches()
+
+ def quote(s: String): String = {
+ var result = ""
+ var i = 0
+ while (i < s.length) {
+ val c = s.charAt(i)
+ result += ((c: @switch) match {
+ case '\\' | '.' | '(' | ')' | '[' | ']' | '{' | '}' | '|'
+ | '?' | '*' | '+' | '^' | '$' => "\\"+c
+ case _ => c
+ })
+ i += 1
+ }
+ result
+ }
+
+ /** This is a hack to support StringLike.split().
+ * It replaces occurrences of \Q<char>\E by quoted(<char>)
+ */
+ @inline
+ private def trySplitHack(pat: String, flags: Int) = {
+ val m = splitHackPat.exec(pat)
+ if (m != null)
+ Some((quote(m(1).get), flags))
+ else
+ None
+ }
+
+ @inline
+ private def tryFlagHack(pat: String, flags0: Int) = {
+ val m = flagHackPat.exec(pat)
+ if (m != null) {
+ val newPat = pat.substring(m(0).get.length) // cut off the flag specifiers
+ val flags1 = m(1).fold(flags0) { chars =>
+ chars.foldLeft(flags0) { (f, c) => f | charToFlag(c) }
+ }
+ val flags2 = m(2).fold(flags1) { chars =>
+ chars.foldLeft(flags1) { (f, c) => f & ~charToFlag(c) }
+ }
+ Some((newPat, flags2))
+ } else
+ None
+ }
+
+ private def charToFlag(c: Char) = (c: @switch) match {
+ case 'i' => CASE_INSENSITIVE
+ case 'd' => UNIX_LINES
+ case 'm' => MULTILINE
+ case 's' => DOTALL
+ case 'u' => UNICODE_CASE
+ case 'x' => COMMENTS
+ case 'U' => UNICODE_CHARACTER_CLASS
+ case _ => sys.error("bad in-pattern flag")
+ }
+
+ /** matches \Q<char>\E to support StringLike.split */
+ private val splitHackPat = new js.RegExp("^\\\\Q(.|\\n|\\r)\\\\E$")
+
+ /** regex to match flag specifiers in regex. E.g. (?u), (?-i), (?U-i) */
+ private val flagHackPat =
+ new js.RegExp("^\\(\\?([idmsuxU]*)(?:-([idmsuxU]*))?\\)")
+}