diff options
Diffstat (limited to 'javalib/src/main/scala/java/util/regex')
-rw-r--r-- | javalib/src/main/scala/java/util/regex/MatchResult.scala | 13 | ||||
-rw-r--r-- | javalib/src/main/scala/java/util/regex/Matcher.scala | 274 | ||||
-rw-r--r-- | javalib/src/main/scala/java/util/regex/Pattern.scala | 154 |
3 files changed, 441 insertions, 0 deletions
diff --git a/javalib/src/main/scala/java/util/regex/MatchResult.scala b/javalib/src/main/scala/java/util/regex/MatchResult.scala new file mode 100644 index 0000000..f321c60 --- /dev/null +++ b/javalib/src/main/scala/java/util/regex/MatchResult.scala @@ -0,0 +1,13 @@ +package java.util.regex + +trait MatchResult { + def groupCount(): Int + + def start(): Int + def end(): Int + def group(): String + + def start(group: Int): Int + def end(group: Int): Int + def group(group: Int): String +} diff --git a/javalib/src/main/scala/java/util/regex/Matcher.scala b/javalib/src/main/scala/java/util/regex/Matcher.scala new file mode 100644 index 0000000..331f56b --- /dev/null +++ b/javalib/src/main/scala/java/util/regex/Matcher.scala @@ -0,0 +1,274 @@ +package java.util.regex + +import scala.language.implicitConversions + +import scala.annotation.switch + +import scala.scalajs.js + +final class Matcher private[regex] ( + private var pattern0: Pattern, private var input0: CharSequence, + private var regionStart0: Int, private var regionEnd0: Int) + extends AnyRef with MatchResult { + + import Matcher._ + + def pattern(): Pattern = pattern0 + + // Configuration (updated manually) + private var regexp = new js.RegExp(pattern0.jspattern, pattern0.jsflags) + private var inputstr = input0.subSequence(regionStart0, regionEnd0).toString + + // Match result (updated by successful matches) + private var lastMatch: js.RegExp.ExecResult = null + private var lastMatchIsValid = false + private var canStillFind = true + + // Append state (updated by replacement methods) + private var appendPos: Int = 0 + + // Lookup methods + + def matches(): Boolean = { + reset() + find() + // TODO this check is wrong with non-greedy patterns + // Further, it might be wrong to just use ^$ delimiters for two reasons: + // - They might already be there + // - They might not behave as expected when newline characters are present + if ((lastMatch ne null) && (start != 0 || end != inputstr.length)) + reset() + lastMatch ne null + } + + def lookingAt(): Boolean = { + reset() + find() + if ((lastMatch ne null) && (start != 0)) + reset() + lastMatch ne null + } + + def find(): Boolean = if (canStillFind) { + lastMatchIsValid = true + lastMatch = regexp.exec(inputstr) + if (lastMatch ne null) { + if (lastMatch(0).get.isEmpty) + regexp.lastIndex += 1 + } else { + canStillFind = false + } + lastMatch ne null + } else false + + def find(start: Int): Boolean = { + reset() + regexp.lastIndex = start + find() + } + + // Replace methods + + def appendReplacement(sb: StringBuffer, replacement: String): Matcher = { + sb.append(inputstr.substring(appendPos, start)) + + @inline def isDigit(c: Char) = c >= '0' && c <= '9' + + val len = replacement.length + var i = 0 + while (i < len) { + replacement.charAt(i) match { + case '$' => + i += 1 + val j = i + while (i < len && isDigit(replacement.charAt(i))) + i += 1 + val group = Integer.parseInt(replacement.substring(j, i)) + sb.append(this.group(group)) + + case '\\' => + i += 1 + if (i < len) + sb.append(replacement.charAt(i)) + i += 1 + + case c => + sb.append(c) + i += 1 + } + } + + appendPos = end + this + } + + def appendTail(sb: StringBuffer): StringBuffer = { + sb.append(inputstr.substring(appendPos)) + appendPos = inputstr.length + sb + } + + def replaceFirst(replacement: String): String = { + reset() + + if (find()) { + val sb = new StringBuffer + appendReplacement(sb, replacement) + appendTail(sb) + sb.toString + } else { + inputstr + } + } + + def replaceAll(replacement: String): String = { + reset() + + val sb = new StringBuffer + while (find()) { + appendReplacement(sb, replacement) + } + appendTail(sb) + + sb.toString + } + + // Reset methods + + def reset(): Matcher = { + regexp.lastIndex = 0 + lastMatch = null + lastMatchIsValid = false + canStillFind = true + appendPos = 0 + this + } + + def reset(input: CharSequence): Matcher = { + regionStart0 = 0 + regionEnd0 = input.length() + input0 = input + inputstr = input0.toString + reset() + } + + def usePattern(pattern: Pattern): Matcher = { + val prevLastIndex = regexp.lastIndex + pattern0 = pattern + regexp = new js.RegExp(pattern.jspattern, pattern.jsflags) + regexp.lastIndex = prevLastIndex + lastMatch = null + this + } + + // Query state methods - implementation of MatchResult + + private def ensureLastMatch: js.RegExp.ExecResult = { + if (lastMatch == null) + throw new IllegalStateException("No match available") + lastMatch + } + + def groupCount(): Int = ensureLastMatch.length-1 + + def start(): Int = ensureLastMatch.index + def end(): Int = start() + group().length + def group(): String = ensureLastMatch(0).get + + def start(group: Int): Int = { + if (group == 0) start() + else { + val last = ensureLastMatch + // not provided by JS RegExp, so we make up something that at least + // will have some sound behavior from scala.util.matching.Regex + last(group).fold(-1) { + groupStr => inputstr.indexOf(groupStr, last.index) + } + } + } + + def end(group: Int): Int = { + val s = start(group) + if (s == -1) -1 + else s + this.group(group).length + } + + def group(group: Int): String = ensureLastMatch(group).orNull + + // Seal the state + + def toMatchResult(): MatchResult = new SealedResult(inputstr, lastMatch) + + // Other query state methods + + def hitEnd(): Boolean = + lastMatchIsValid && (lastMatch == null || end() == inputstr.length) + + //def requireEnd(): Boolean // I don't understand the spec + + // Stub methods for region management + + def regionStart(): Int = regionStart0 + def regionEnd(): Int = regionEnd0 + def region(start: Int, end: Int): Matcher = + new Matcher(pattern0, input0, start, end) + + def hasTransparentBounds(): Boolean = false + //def useTransparentBounds(b: Boolean): Matcher + + def hasAnchoringBounds(): Boolean = true + //def useAnchoringBounds(b: Boolean): Matcher +} + +object Matcher { + def quoteReplacement(s: String): String = { + var result = "" + var i = 0 + while (i < s.length) { + val c = s.charAt(i) + result += ((c: @switch) match { + case '\\' | '$' => "\\"+c + case _ => c + }) + i += 1 + } + result + } + + private final class SealedResult(inputstr: String, + lastMatch: js.RegExp.ExecResult) extends MatchResult { + + def groupCount(): Int = ensureLastMatch.length-1 + + def start(): Int = ensureLastMatch.index + def end(): Int = start() + group().length + def group(): String = ensureLastMatch(0).get + + def start(group: Int): Int = { + if (group == 0) start() + else { + val last = ensureLastMatch + + // not provided by JS RegExp, so we make up something that at least + // will have some sound behavior from scala.util.matching.Regex + last(group).fold(-1) { + groupStr => inputstr.indexOf(groupStr, last.index) + } + } + } + + def end(group: Int): Int = { + val s = start(group) + if (s == -1) -1 + else s + this.group(group).length + } + + def group(group: Int): String = ensureLastMatch(group).orNull + + private def ensureLastMatch: js.RegExp.ExecResult = { + if (lastMatch == null) + throw new IllegalStateException("No match available") + lastMatch + } + } +} diff --git a/javalib/src/main/scala/java/util/regex/Pattern.scala b/javalib/src/main/scala/java/util/regex/Pattern.scala new file mode 100644 index 0000000..fda103f --- /dev/null +++ b/javalib/src/main/scala/java/util/regex/Pattern.scala @@ -0,0 +1,154 @@ +package java.util.regex + +import scala.annotation.switch + +import scala.scalajs.js + +final class Pattern private (pattern0: String, flags0: Int) + extends Serializable { + + import Pattern._ + + def pattern(): String = pattern0 + def flags(): Int = flags1 + + private[regex] val (jspattern, flags1) = { + if ((flags0 & LITERAL) != 0) (quote(pattern0), flags0) + else { + trySplitHack(pattern0, flags0) orElse + tryFlagHack(pattern0, flags0) getOrElse + (pattern0, flags0) + } + } + + private[regex] val jsflags = { + var f = "g" + if ((flags & CASE_INSENSITIVE) != 0) + f += "i" + if ((flags & MULTILINE) != 0) + f += "m" + f + } + + override def toString(): String = pattern0 + + def matcher(input: CharSequence): Matcher = + new Matcher(this, input, 0, input.length) + + def split(input: CharSequence): Array[String] = + split(input, 0) + + def split(input: CharSequence, limit: Int): Array[String] = { + val lim = if (limit > 0) limit else Int.MaxValue + + val result = js.Array[String]() + val inputStr = input.toString + val matcher = this.matcher(inputStr) + var prevEnd = 0 + + // Actually split original string + while ((result.length < lim-1) && matcher.find()) { + result.push(inputStr.substring(prevEnd, matcher.start)) + prevEnd = matcher.end + } + result.push(inputStr.substring(prevEnd)) + + // Remove a leading empty element iff the first match was zero-length + // and there is no other place the regex matches + if (prevEnd == 0 && result.length == 2 && (lim > 2 || !matcher.find())) { + Array(inputStr) + } else { + var len = result.length + if (limit == 0) { + while (len > 1 && result(len-1).isEmpty) + len -= 1 + } + + val actualResult = new Array[String](len) + result.copyToArray(actualResult) + actualResult + } + } +} + +object Pattern { + final val UNIX_LINES = 0x01 + final val CASE_INSENSITIVE = 0x02 + final val COMMENTS = 0x04 + final val MULTILINE = 0x08 + final val LITERAL = 0x10 + final val DOTALL = 0x20 + final val UNICODE_CASE = 0x40 + final val CANON_EQ = 0x80 + final val UNICODE_CHARACTER_CLASS = 0x100 + + def compile(regex: String, flags: Int): Pattern = + new Pattern(regex, flags) + + def compile(regex: String): Pattern = + new Pattern(regex, 0) + + def matches(regex: String, input: CharSequence): Boolean = + compile(regex).matcher(input).matches() + + def quote(s: String): String = { + var result = "" + var i = 0 + while (i < s.length) { + val c = s.charAt(i) + result += ((c: @switch) match { + case '\\' | '.' | '(' | ')' | '[' | ']' | '{' | '}' | '|' + | '?' | '*' | '+' | '^' | '$' => "\\"+c + case _ => c + }) + i += 1 + } + result + } + + /** This is a hack to support StringLike.split(). + * It replaces occurrences of \Q<char>\E by quoted(<char>) + */ + @inline + private def trySplitHack(pat: String, flags: Int) = { + val m = splitHackPat.exec(pat) + if (m != null) + Some((quote(m(1).get), flags)) + else + None + } + + @inline + private def tryFlagHack(pat: String, flags0: Int) = { + val m = flagHackPat.exec(pat) + if (m != null) { + val newPat = pat.substring(m(0).get.length) // cut off the flag specifiers + val flags1 = m(1).fold(flags0) { chars => + chars.foldLeft(flags0) { (f, c) => f | charToFlag(c) } + } + val flags2 = m(2).fold(flags1) { chars => + chars.foldLeft(flags1) { (f, c) => f & ~charToFlag(c) } + } + Some((newPat, flags2)) + } else + None + } + + private def charToFlag(c: Char) = (c: @switch) match { + case 'i' => CASE_INSENSITIVE + case 'd' => UNIX_LINES + case 'm' => MULTILINE + case 's' => DOTALL + case 'u' => UNICODE_CASE + case 'x' => COMMENTS + case 'U' => UNICODE_CHARACTER_CLASS + case _ => sys.error("bad in-pattern flag") + } + + /** matches \Q<char>\E to support StringLike.split */ + private val splitHackPat = new js.RegExp("^\\\\Q(.|\\n|\\r)\\\\E$") + + /** regex to match flag specifiers in regex. E.g. (?u), (?-i), (?U-i) */ + private val flagHackPat = + new js.RegExp("^\\(\\?([idmsuxU]*)(?:-([idmsuxU]*))?\\)") +} |