From b4e3becbf348bad95f48da5a94f4ddb4272b2891 Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Wed, 24 Dec 2014 17:59:49 -0800 Subject: [backport] SI-9060 Backpatch fifth-edition names Because the compiler and library share some code in this version, compiler must exclude xml tags that look like Scala operators, such as `<:`. This is an upstream port of: scala-xml/commit/968f7bd94e934c781c19e25847ab09ac98cfbaf6 --- .../scala/tools/nsc/ast/parser/Scanners.scala | 4 +- src/library/scala/xml/parsing/TokenTests.scala | 56 ++++++++-------------- test/files/jvm/unittest_xml.scala | 4 +- 3 files changed, 25 insertions(+), 39 deletions(-) diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala index 8d295a28d0..3ee7941995 100644 --- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala @@ -367,7 +367,9 @@ trait Scanners extends ScannersCommon { val last = if (charOffset >= 2) buf(charOffset - 2) else ' ' nextChar() last match { - case ' ' | '\t' | '\n' | '{' | '(' | '>' if isNameStart(ch) || ch == '!' || ch == '?' => + // exclude valid xml names that happen to be Scala operator chars + case ' ' | '\t' | '\n' | '{' | '(' | '>' if (isNameStart(ch) && ch != ':' && !isSpecial(ch)) + || ch == '!' || ch == '?' => token = XMLSTART case _ => // Console.println("found '<', but last is '"+in.last+"'"); // DEBUG diff --git a/src/library/scala/xml/parsing/TokenTests.scala b/src/library/scala/xml/parsing/TokenTests.scala index c9cafaeea1..ce7accbd98 100644 --- a/src/library/scala/xml/parsing/TokenTests.scala +++ b/src/library/scala/xml/parsing/TokenTests.scala @@ -30,43 +30,25 @@ trait TokenTests { def isAlpha(c: Char) = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') def isAlphaDigit(c: Char) = isAlpha(c) || (c >= '0' && c <= '9') - /** {{{ - * NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' - * | CombiningChar | Extender - * }}} - * See [4] and Appendix B of XML 1.0 specification. - */ - def isNameChar(ch: Char) = { - import java.lang.Character._ - // The constants represent groups Mc, Me, Mn, Lm, and Nd. - - isNameStart(ch) || (getType(ch).toByte match { - case COMBINING_SPACING_MARK | - ENCLOSING_MARK | NON_SPACING_MARK | - MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true - case _ => ".-:" contains ch - }) - } - - /** {{{ - * NameStart ::= ( Letter | '_' ) - * }}} - * where Letter means in one of the Unicode general - * categories `{ Ll, Lu, Lo, Lt, Nl }`. - * - * We do not allow a name to start with `:`. - * See [3] and Appendix B of XML 1.0 specification - */ - def isNameStart(ch: Char) = { - import java.lang.Character._ - - getType(ch).toByte match { - case LOWERCASE_LETTER | - UPPERCASE_LETTER | OTHER_LETTER | - TITLECASE_LETTER | LETTER_NUMBER => true - case _ => ch == '_' - } - } + def isNameChar(c: Char): Boolean = ( + isNameStart(c) || + (c >= '0' && c <= '9') || + c == '-' || + c == '.' || + c == 0xB7 || + (c >= 0x300 && c <= 0x36F) || + (c >= 0x203F && c <= 0x2040) + ) + def isNameStart(c: Char): Boolean = ( + if (c < 0x00C0) isAlpha(c) || c == ':' || c == '_' + else if (c < 0x0300) c != 0xD7 && c != 0xF7 + else if (c < 0x2000) c >= 0x370 && c != 0x37E + else if (c < 0x3001) c == 0x200C || c == 0x200D || (0x2070 to 0x218F contains c) || + (0x2C00 to 0x2FEF contains c) + else if (c < 0xD800) true + else if (c < 0x10000) (0xF900 to 0xFDCF contains c) || (0xFDF0 to 0xFFFD contains c) + else false // codepoint < 0xF0000 + ) /** {{{ * Name ::= ( Letter | '_' ) (NameChar)* diff --git a/test/files/jvm/unittest_xml.scala b/test/files/jvm/unittest_xml.scala index 106334e625..9ffd459fde 100644 --- a/test/files/jvm/unittest_xml.scala +++ b/test/files/jvm/unittest_xml.scala @@ -62,7 +62,9 @@ object Test { object UtilityTest { def run() { assert(Utility.isNameStart('b')) - assert(!Utility.isNameStart(':')) + + // no longer: this was a convenience for the implementation, not to spec. + //assert(!Utility.isNameStart(':')) val x = -- cgit v1.2.3