diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/library/scala/xml/parsing/TokenTests.scala | 129 |
1 files changed, 39 insertions, 90 deletions
diff --git a/src/library/scala/xml/parsing/TokenTests.scala b/src/library/scala/xml/parsing/TokenTests.scala index 608a715d15..69788b5c22 100644 --- a/src/library/scala/xml/parsing/TokenTests.scala +++ b/src/library/scala/xml/parsing/TokenTests.scala @@ -10,7 +10,7 @@ package scala.xml -package parsing; +package parsing /** @@ -19,33 +19,33 @@ package parsing; trait TokenTests { /** (#x20 | #x9 | #xD | #xA) */ - final def isSpace( ch:Char ): Boolean = ch match { + final def isSpace(ch: Char): Boolean = ch match { case '\u0009' | '\u000A' | '\u000D' | '\u0020' => true - case _ => false; + case _ => false } - /** (#x20 | #x9 | #xD | #xA)+ */ - final def isSpace(cs: Seq[Char]): Boolean = { - val it = cs.iterator; - it.hasNext && it.forall { isSpace }; - } + final def isSpace(cs: Seq[Char]): Boolean = cs.nonEmpty && (cs forall isSpace) + + /** These are 99% sure to be redundant but refactoring on the safe side. */ + def isAlpha(c: Char) = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') + def isAlphaDigit(c: Char) = isAlpha(c) || (c >= '0' && c <= '9') /** NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' * | CombiningChar | Extender * * see [4] and Appendix B of XML 1.0 specification */ - def isNameChar(ch: Char) = isNameStart(ch) || (ch match { - case '.' | '-' | ':' => true; - case _ => java.lang.Character.getType( ch ).toByte match { - case java.lang.Character.COMBINING_SPACING_MARK => true; // Mc - case java.lang.Character.ENCLOSING_MARK => true; // Me - case java.lang.Character.NON_SPACING_MARK => true; // Mn - case java.lang.Character.MODIFIER_LETTER => true; // Lm - case java.lang.Character.DECIMAL_DIGIT_NUMBER => true; // Nd - case _ => false; - } - }); + def isNameChar(ch: Char) = { + import java.lang.Character._ + // The constants represent groups Mc, Me, Mn, Lm, and Nd. + + isNameStart(ch) || (getType(ch).toByte match { + case COMBINING_SPACING_MARK | + ENCLOSING_MARK | NON_SPACING_MARK | + MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true + case _ => ".-:" contains ch + }) + } /** NameStart ::= ( Letter | '_' ) * where Letter means in one of the Unicode general @@ -54,48 +54,27 @@ trait TokenTests { * We do not allow a name to start with ':'. * see [3] and Appendix B of XML 1.0 specification */ - def isNameStart(ch: Char) = - java.lang.Character.getType(ch).asInstanceOf[Byte] match { - case java.lang.Character.LOWERCASE_LETTER => true; - case java.lang.Character.UPPERCASE_LETTER => true; - case java.lang.Character.OTHER_LETTER => true; - case java.lang.Character.TITLECASE_LETTER => true; - case java.lang.Character.LETTER_NUMBER => true; - case _ => ch match { - case '_' => true - case _ => false; - } + def isNameStart(ch: Char) = { + import java.lang.Character._ + + getType(ch).toByte match { + case LOWERCASE_LETTER | + UPPERCASE_LETTER | OTHER_LETTER | + TITLECASE_LETTER | LETTER_NUMBER => true + case _ => ch == '_' } + } /** Name ::= ( Letter | '_' ) (NameChar)* * * see [5] of XML 1.0 specification */ - def isName(s: String): Boolean = { - if( s.length() > 0 ) { - val y = s.iterator; - if (isNameStart(y.next)) { - while (y.hasNext && isNameChar(y.next)) {}; - !y.hasNext - } else false; - } else false; - } + def isName(s: String) = + s.nonEmpty && isNameStart(s.head) && (s.tail forall isNameChar) - def isPubIDChar(ch: Char): Boolean = { - //Console.println("char: '" + ch + "'"); - ch match { - case '\u0020' | '\u000D' | '\u000A' => true; - case _ if - (('0' <= ch && ch <= '9') || ('a' <= ch && ch <= 'z') || - ('A' <= ch && ch <= 'Z')) => true; - case '-' | '\''| '(' | ')' | '+' | ',' | '.' | - '/' | ':' | '=' | '?' | ';' | '!' | '*' | - '#' | '@' | '$' | '_' | '%' => true - case _ => - //Console.println("false: '" + ch + "'"); - false; - } - } + def isPubIDChar(ch: Char): Boolean = + isAlphaDigit(ch) || (isSpace(ch) && ch != '\u0009') || + ("""-\()+,./:=?;!*#@$_%""" contains ch) /** * Returns true if the encoding name is a valid IANA encoding. @@ -105,43 +84,13 @@ trait TokenTests { * * @param ianaEncoding The IANA encoding name. */ - def isValidIANAEncoding(ianaEncoding: Seq[Char]): Boolean = { - val it = ianaEncoding.iterator; - if (!it.hasNext) - return false; - - var c = it.next; - if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { - while (it.hasNext) { - c = it.next; - if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') && - (c < '0' || c > '9') && c != '.' && c != '_' && - c != '-') { - return false; - } - } - return true; - } else - return false; - } // isValidIANAEncoding(String): Boolean - - def checkSysID( s:String ): Boolean = { - s.indexOf('"'.asInstanceOf[Int]) == -1 || s.indexOf('\''.asInstanceOf[Int]) == -1 - } + def isValidIANAEncoding(ianaEncoding: Seq[Char]) = { + def charOK(c: Char) = isAlphaDigit(c) || ("._-" contains c) - def checkPubID(s: String): Boolean = { - //Console.println("checkPubID of \""+s+"\""); - if (s.length() > 0) { - val y = s.iterator; - var c = ' '; - while (y.hasNext && isPubIDChar(c)) { - //Console.println(c); - c = y.next - }; - !y.hasNext - } - else - true + ianaEncoding.nonEmpty && isAlpha(ianaEncoding.head) && + (ianaEncoding.tail forall charOK) } + def checkSysID(s: String) = List('"', '\'') exists (c => !(s contains c)) + def checkPubID(s: String) = s forall isPubIDChar } |