src/library/scala/xml/parsing/TokenTests.scala


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100

/*                     __                                               *\
**     ________ ___   / /  ___     Scala API                            **
**    / __/ __// _ | / /  / _ |    (c) 2003-2011, LAMP/EPFL             **
**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
** /____/\___/_/ |_/____/_/ | |                                         **
**                          |/                                          **
\*                                                                      */

package scala.xml
package parsing

/**
 * Helper functions for parsing XML fragments
 */
trait TokenTests {

  /** {{{
   *  (#x20 | #x9 | #xD | #xA)
   *  }}} */
  final def isSpace(ch: Char): Boolean = ch match {
    case '\u0009' | '\u000A' | '\u000D' | '\u0020' => true
    case _                                         => false
  }
  /** {{{
   *  (#x20 | #x9 | #xD | #xA)+
   *  }}} */
  final def isSpace(cs: Seq[Char]): Boolean = cs.nonEmpty && (cs forall isSpace)

  /** These are 99% sure to be redundant but refactoring on the safe side. */
  def isAlpha(c: Char) = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
  def isAlphaDigit(c: Char) = isAlpha(c) || (c >= '0' && c <= '9')

  /** {{{
   *  NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
   *             | CombiningChar | Extender
   *  }}}
   *  See [4] and Appendix B of XML 1.0 specification.
  */
  def isNameChar(ch: Char) = {
    import java.lang.Character._
    // The constants represent groups Mc, Me, Mn, Lm, and Nd.

    isNameStart(ch) || (getType(ch).toByte match {
      case COMBINING_SPACING_MARK |
              ENCLOSING_MARK | NON_SPACING_MARK |
              MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true
      case _                                         => ".-:" contains ch
    })
  }

  /** {{{
   *  NameStart ::= ( Letter | '_' )
   *  }}}
   *  where Letter means in one of the Unicode general
   *  categories `{ Ll, Lu, Lo, Lt, Nl }`.
   *
   *  We do not allow a name to start with `:`.
   *  See [3] and Appendix B of XML 1.0 specification
   */
  def isNameStart(ch: Char) = {
    import java.lang.Character._

    getType(ch).toByte match {
      case LOWERCASE_LETTER |
              UPPERCASE_LETTER | OTHER_LETTER |
              TITLECASE_LETTER | LETTER_NUMBER => true
      case _                                   => ch == '_'
    }
  }

  /** {{{
   *  Name ::= ( Letter | '_' ) (NameChar)*
   *  }}}
   *  See [5] of XML 1.0 specification.
   */
  def isName(s: String) =
    s.nonEmpty && isNameStart(s.head) && (s.tail forall isNameChar)

  def isPubIDChar(ch: Char): Boolean =
    isAlphaDigit(ch) || (isSpace(ch) && ch != '\u0009') ||
    ("""-\()+,./:=?;!*#@$_%""" contains ch)

  /**
   * Returns `true` if the encoding name is a valid IANA encoding.
   * This method does not verify that there is a decoder available
   * for this encoding, only that the characters are valid for an
   * IANA encoding name.
   *
   * @param ianaEncoding The IANA encoding name.
   */
  def isValidIANAEncoding(ianaEncoding: Seq[Char]) = {
    def charOK(c: Char) = isAlphaDigit(c) || ("._-" contains c)

    ianaEncoding.nonEmpty && isAlpha(ianaEncoding.head) &&
    (ianaEncoding.tail forall charOK)
  }

  def checkSysID(s: String) = List('"', '\'') exists (c => !(s contains c))
  def checkPubID(s: String) = s forall isPubIDChar
}