Remove dependency on xml in ast.parser

Copied the following files from scala.xml to ast.parser.xml: - MarkupParsers differs from scala.xml.MarkupParsers only in the first 17 lines - Utility.scala was refactored and reduced (also includes TokenTests)
author: Adriaan Moors <adriaan.moors@typesafe.com> 2013-06-20 10:55:58 -0700
committer: Adriaan Moors <adriaan.moors@typesafe.com> 2013-06-20 15:46:26 -0700
commit: 4e9b33ab24bb3bf922c37a05a79af364b7b32b84 (patch)
tree: 3176a0696b42591c920d1611348304bd99a357e1
parent: ef1264b2ff134771a2df4ee30e9a509fb7a78c49 (diff)
download: scala-4e9b33ab24bb3bf922c37a05a79af364b7b32b84.tar.gz
scala-4e9b33ab24bb3bf922c37a05a79af364b7b32b84.tar.bz2
scala-4e9b33ab24bb3bf922c37a05a79af364b7b32b84.zip
5 files changed, 441 insertions, 14 deletions
diff --git a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala
index 70b9bf3168..d3f495f280 100755
--- a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala
+++ b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala
@@ -10,7 +10,7 @@ import scala.collection.mutable
 import mutable.{ Buffer, ArrayBuffer, ListBuffer }
 import scala.util.control.ControlThrowable
 import scala.tools.nsc.util.CharArrayReader
-import scala.xml.parsing.MarkupParserCommon
+import scala.tools.nsc.ast.parser.xml.{MarkupParserCommon, Utility}
 import scala.reflect.internal.Chars.{ SU, LF }
 
 // XXX/Note: many/most of the functions in here are almost direct cut and pastes
@@ -41,7 +41,7 @@ trait MarkupParsers {
   import global._
 
   class MarkupParser(parser: SourceFileParser, final val preserveWS: Boolean) extends MarkupParserCommon {
-
+    import Utility.{ isNameStart, isSpace }
     import Tokens.{ LBRACE, RBRACE }
 
     type PositionType = Position
diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
index 82a3144304..a8162a01bf 100644
--- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
+++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
@@ -12,7 +12,7 @@ import Tokens._
 import scala.annotation.{ switch, tailrec }
 import scala.collection.{ mutable, immutable }
 import mutable.{ ListBuffer, ArrayBuffer }
-import scala.xml.Utility.{ isNameStart }
+import scala.tools.nsc.ast.parser.xml.Utility.isNameStart
 import scala.language.postfixOps
 
 /** See Parsers.scala / ParsersCommon for some explanation of ScannersCommon.
diff --git a/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala b/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala
index f326212d5b..1abc0c860c 100755
--- a/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala
+++ b/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala
@@ -7,8 +7,6 @@ package scala.tools.nsc
 package ast.parser
 
 import scala.collection.{ mutable, immutable }
-import scala.xml.{ EntityRef, Text }
-import scala.xml.XML.{ xmlns }
 import symtab.Flags.MUTABLE
 import scala.reflect.internal.util.StringOps.splitWhere
 
@@ -143,14 +141,12 @@ abstract class SymbolicXMLBuilder(p: Parsers#Parser, preserveWS: Boolean) {
     (buf map convertToTextPat).toList
 
   def parseAttribute(pos: Position, s: String): Tree = {
-    val ts = scala.xml.Utility.parseAttributeValue(s) map {
-      case Text(s)      => text(pos, s)
-      case EntityRef(s) => entityRef(pos, s)
-    }
-    ts.length match {
-      case 0 => gen.mkNil
-      case 1 => ts.head
-      case _ => makeXMLseq(pos, ts.toList)
+    import xml.Utility.parseAttributeValue
+
+    parseAttributeValue(s, text(pos, _), entityRef(pos, _)) match {
+      case Nil      => gen.mkNil
+      case t :: Nil => t
+      case ts       => makeXMLseq(pos, ts.toList)
     }
   }
 
@@ -198,7 +194,7 @@ abstract class SymbolicXMLBuilder(p: Parsers#Parser, preserveWS: Boolean) {
 
     /* Extract all the namespaces from the attribute map. */
     val namespaces: List[Tree] =
-      for (z <- attrMap.keys.toList ; if z startsWith xmlns) yield {
+      for (z <- attrMap.keys.toList ; if z startsWith "xmlns") yield {
         val ns = splitPrefix(z) match {
           case (Some(_), rest)  => rest
           case _                => null
diff --git a/src/compiler/scala/tools/nsc/ast/parser/xml/MarkupParserCommon.scala b/src/compiler/scala/tools/nsc/ast/parser/xml/MarkupParserCommon.scala
new file mode 100644
index 0000000000..f6cfb64ed8
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/ast/parser/xml/MarkupParserCommon.scala
@@ -0,0 +1,255 @@
+/*                     __                                               *\
+**     ________ ___   / /  ___     Scala API                            **
+**    / __/ __// _ | / /  / _ |    (c) 2003-2013, LAMP/EPFL             **
+**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
+** /____/\___/_/ |_/____/_/ | |                                         **
+**                          |/                                          **
+\*                                                                      */
+
+package scala.tools.nsc.ast.parser.xml
+
+/** This is not a public trait - it contains common code shared
+ *  between the library level XML parser and the compiler's.
+ *  All members should be accessed through those.
+ */
+private[scala] trait MarkupParserCommon {
+  import Utility._
+  import scala.reflect.internal.Chars.SU
+
+  protected def unreachable = scala.sys.error("Cannot be reached.")
+
+  // type HandleType       // MarkupHandler, SymbolicXMLBuilder
+  type InputType        // Source, CharArrayReader
+  type PositionType     // Int, Position
+  type ElementType      // NodeSeq, Tree
+  type NamespaceType    // NamespaceBinding, Any
+  type AttributesType   // (MetaData, NamespaceBinding), mutable.Map[String, Tree]
+
+  def mkAttributes(name: String, pscope: NamespaceType): AttributesType
+  def mkProcInstr(position: PositionType, name: String, text: String): ElementType
+
+  /** parse a start or empty tag.
+   *  [40] STag         ::= '<' Name { S Attribute } [S]
+   *  [44] EmptyElemTag ::= '<' Name { S Attribute } [S]
+   */
+  protected def xTag(pscope: NamespaceType): (String, AttributesType) = {
+    val name = xName
+    xSpaceOpt()
+
+    (name, mkAttributes(name, pscope))
+  }
+
+  /** '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>'
+   *
+   * see [15]
+   */
+  def xProcInstr: ElementType = {
+    val n = xName
+    xSpaceOpt()
+    xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>")
+  }
+
+  /** attribute value, terminated by either `'` or `"`. value may not contain `<`.
+   @param endCh either `'` or `"`
+   */
+  def xAttributeValue(endCh: Char): String = {
+    val buf = new StringBuilder
+    while (ch != endCh) {
+      // well-formedness constraint
+      if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "")
+      else if (ch == SU) truncatedError("")
+      else buf append ch_returning_nextch
+    }
+    ch_returning_nextch
+    // @todo: normalize attribute value
+    buf.toString
+  }
+
+  def xAttributeValue(): String = {
+    val str = xAttributeValue(ch_returning_nextch)
+    // well-formedness constraint
+    normalizeAttributeValue(str)
+  }
+
+  private def takeUntilChar(it: Iterator[Char], end: Char): String = {
+    val buf = new StringBuilder
+    while (it.hasNext) it.next() match {
+      case `end`  => return buf.toString
+      case ch     => buf append ch
+    }
+    scala.sys.error("Expected '%s'".format(end))
+  }
+
+  /** [42]  '<' xmlEndTag ::=  '<' '/' Name S? '>'
+   */
+  def xEndTag(startName: String) {
+    xToken('/')
+    if (xName != startName)
+      errorNoEnd(startName)
+
+    xSpaceOpt()
+    xToken('>')
+  }
+
+  /** actually, Name ::= (Letter | '_' | ':') (NameChar)*  but starting with ':' cannot happen
+   *  Name ::= (Letter | '_') (NameChar)*
+   *
+   *  see  [5] of XML 1.0 specification
+   *
+   *  pre-condition:  ch != ':' // assured by definition of XMLSTART token
+   *  post-condition: name does neither start, nor end in ':'
+   */
+  def xName: String = {
+    if (ch == SU)
+      truncatedError("")
+    else if (!isNameStart(ch))
+      return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "")
+
+    val buf = new StringBuilder
+
+    do buf append ch_returning_nextch
+    while (isNameChar(ch))
+
+    if (buf.last == ':') {
+      reportSyntaxError( "name cannot end in ':'" )
+      buf.toString dropRight 1
+    }
+    else buf.toString
+  }
+
+  private def attr_unescape(s: String) = s match {
+    case "lt"     => "<"
+    case "gt"     => ">"
+    case "amp"    => "&"
+    case "apos"   => "'"
+    case "quot"   => "\""
+    case "quote"  => "\""
+    case _        => "&" + s + ";"
+  }
+
+  /** Replaces only character references right now.
+   *  see spec 3.3.3
+   */
+  private def normalizeAttributeValue(attval: String): String = {
+    val buf = new StringBuilder
+    val it = attval.iterator.buffered
+
+    while (it.hasNext) buf append (it.next() match {
+      case ' ' | '\t' | '\n' | '\r' => " "
+      case '&' if it.head == '#'    => it.next() ; xCharRef(it)
+      case '&'                      => attr_unescape(takeUntilChar(it, ';'))
+      case c                        => c
+    })
+
+    buf.toString
+  }
+
+  /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";"
+   *            | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
+   *
+   * see [66]
+   */
+  def xCharRef(ch: () => Char, nextch: () => Unit): String =
+    Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _)
+
+  def xCharRef(it: Iterator[Char]): String = {
+    var c = it.next()
+    Utility.parseCharRef(() => c, () => { c = it.next() }, reportSyntaxError _, truncatedError _)
+  }
+
+  def xCharRef: String = xCharRef(() => ch, () => nextch())
+
+  /** Create a lookahead reader which does not influence the input */
+  def lookahead(): BufferedIterator[Char]
+
+  /** The library and compiler parsers had the interesting distinction of
+   *  different behavior for nextch (a function for which there are a total
+   *  of two plausible behaviors, so we know the design space was fully
+   *  explored.) One of them returned the value of nextch before the increment
+   *  and one of them the new value.  So to unify code we have to at least
+   *  temporarily abstract over the nextchs.
+   */
+  def ch: Char
+  def nextch(): Unit
+  protected def ch_returning_nextch: Char
+  def eof: Boolean
+
+  // def handle: HandleType
+  var tmppos: PositionType
+
+  def xHandleError(that: Char, msg: String): Unit
+  def reportSyntaxError(str: String): Unit
+  def reportSyntaxError(pos: Int, str: String): Unit
+
+  def truncatedError(msg: String): Nothing
+  def errorNoEnd(tag: String): Nothing
+
+  protected def errorAndResult[T](msg: String, x: T): T = {
+    reportSyntaxError(msg)
+    x
+  }
+
+  def xToken(that: Char) {
+    if (ch == that) nextch()
+    else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch))
+  }
+  def xToken(that: Seq[Char]) { that foreach xToken }
+
+  /** scan [S] '=' [S]*/
+  def xEQ() = { xSpaceOpt(); xToken('='); xSpaceOpt() }
+
+  /** skip optional space S? */
+  def xSpaceOpt() = while (isSpace(ch) && !eof) nextch()
+
+  /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
+  def xSpace() =
+    if (isSpace(ch)) { nextch(); xSpaceOpt() }
+    else xHandleError(ch, "whitespace expected")
+
+  /** Apply a function and return the passed value */
+  def returning[T](x: T)(f: T => Unit): T = { f(x); x }
+
+  /** Execute body with a variable saved and restored after execution */
+  def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = {
+    val saved = getter
+    try body
+    finally setter(saved)
+  }
+
+  /** Take characters from input stream until given String "until"
+   *  is seen.  Once seen, the accumulated characters are passed
+   *  along with the current Position to the supplied handler function.
+   */
+  protected def xTakeUntil[T](
+    handler: (PositionType, String) => T,
+    positioner: () => PositionType,
+    until: String): T =
+  {
+    val sb = new StringBuilder
+    val head = until.head
+    val rest = until.tail
+
+    while (true) {
+      if (ch == head && peek(rest))
+        return handler(positioner(), sb.toString)
+      else if (ch == SU)
+        truncatedError("")  // throws TruncatedXMLControl in compiler
+
+      sb append ch
+      nextch()
+    }
+    unreachable
+  }
+
+  /** Create a non-destructive lookahead reader and see if the head
+   *  of the input would match the given String.  If yes, return true
+   *  and drop the entire String from input; if no, return false
+   *  and leave input unchanged.
+   */
+  private def peek(lookingFor: String): Boolean =
+    (lookahead() take lookingFor.length sameElements lookingFor.iterator) && {
+      // drop the chars from the real reader (all lookahead + orig)
+      (0 to lookingFor.length) foreach (_ => nextch())
+      true
+    }
+}
diff --git a/src/compiler/scala/tools/nsc/ast/parser/xml/Utility.scala b/src/compiler/scala/tools/nsc/ast/parser/xml/Utility.scala
new file mode 100755
index 0000000000..39e4831af2
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/ast/parser/xml/Utility.scala
@@ -0,0 +1,176 @@
+/*                     __                                               *\
+**     ________ ___   / /  ___     Scala API                            **
+**    / __/ __// _ | / /  / _ |    (c) 2003-2013, LAMP/EPFL             **
+**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
+** /____/\___/_/ |_/____/_/ | |                                         **
+**                          |/                                          **
+\*                                                                      */
+
+package scala.tools.nsc.ast.parser.xml
+
+import scala.collection.mutable
+
+
+/**
+ * The `Utility` object provides utility functions for processing instances
+ * of bound and not bound XML classes, as well as escaping text nodes.
+ *
+ * @author Burak Emir
+ */
+object Utility {
+  import scala.reflect.internal.Chars.SU
+
+  private val unescMap = Map(
+    "lt"    -> '<',
+    "gt"    -> '>',
+    "amp"   -> '&',
+    "quot"  -> '"',
+    "apos"  -> '\''
+  )
+
+  /**
+   * Appends unescaped string to `s`, `amp` becomes `&amp;`,
+   * `lt` becomes `&lt;` etc..
+   *
+   * @return    `'''null'''` if `ref` was not a predefined entity.
+   */
+  private final def unescape(ref: String, s: StringBuilder): StringBuilder =
+    ((unescMap get ref) map (s append _)).orNull
+
+  def parseAttributeValue[T](value: String, text: String => T, entityRef: String => T): List[T] = {
+    val sb  = new StringBuilder
+    var rfb: StringBuilder = null
+    val nb = new mutable.ListBuffer[T]()
+
+    val it = value.iterator
+    while (it.hasNext) {
+      var c = it.next()
+      // entity! flush buffer into text node
+      if (c == '&') {
+        c = it.next()
+        if (c == '#') {
+          c = it.next()
+          val theChar = parseCharRef ({ ()=> c },{ () => c = it.next() },{s => throw new RuntimeException(s)}, {s => throw new RuntimeException(s)})
+          sb.append(theChar)
+        }
+        else {
+          if (rfb eq null) rfb = new StringBuilder()
+          rfb append c
+          c = it.next()
+          while (c != ';') {
+            rfb.append(c)
+            c = it.next()
+          }
+          val ref = rfb.toString()
+          rfb.clear()
+          unescape(ref,sb) match {
+            case null =>
+              if (!sb.isEmpty) {  // flush buffer
+                nb += text(sb.toString())
+                sb.clear()
+              }
+              nb += entityRef(ref) // add entityref
+            case _ =>
+          }
+        }
+      }
+      else sb append c
+    }
+
+    if(!sb.isEmpty) // flush buffer
+      nb += text(sb.toString())
+
+    nb.toList
+  }
+
+  /**
+   * {{{
+   *   CharRef ::= "&amp;#" '0'..'9' {'0'..'9'} ";"
+   *             | "&amp;#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
+   * }}}
+   * See [66]
+   */
+  def parseCharRef(ch: () => Char, nextch: () => Unit, reportSyntaxError: String => Unit, reportTruncatedError: String => Unit): String = {
+    val hex  = (ch() == 'x') && { nextch(); true }
+    val base = if (hex) 16 else 10
+    var i = 0
+    while (ch() != ';') {
+      ch() match {
+        case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
+          i = i * base + ch().asDigit
+        case 'a' | 'b' | 'c' | 'd' | 'e' | 'f'
+           | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' =>
+          if (! hex)
+            reportSyntaxError("hex char not allowed in decimal char ref\n" +
+                              "Did you mean to write &#x ?")
+          else
+            i = i * base + ch().asDigit
+        case SU =>
+          reportTruncatedError("")
+        case _ =>
+          reportSyntaxError("character '" + ch() + "' not allowed in char ref\n")
+      }
+      nextch()
+    }
+    new String(Array(i), 0, 1)
+  }
+
+  /** {{{
+   *  (#x20 | #x9 | #xD | #xA)
+   *  }}} */
+  final def isSpace(ch: Char): Boolean = ch match {
+    case '\u0009' | '\u000A' | '\u000D' | '\u0020' => true
+    case _                                         => false
+  }
+  /** {{{
+   *  (#x20 | #x9 | #xD | #xA)+
+   *  }}} */
+  final def isSpace(cs: Seq[Char]): Boolean = cs.nonEmpty && (cs forall isSpace)
+
+  /** {{{
+   *  NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
+   *             | CombiningChar | Extender
+   *  }}}
+   *  See [4] and Appendix B of XML 1.0 specification.
+  */
+  def isNameChar(ch: Char) = {
+    import java.lang.Character._
+    // The constants represent groups Mc, Me, Mn, Lm, and Nd.
+
+    isNameStart(ch) || (getType(ch).toByte match {
+      case COMBINING_SPACING_MARK |
+              ENCLOSING_MARK | NON_SPACING_MARK |
+              MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true
+      case _                                         => ".-:" contains ch
+    })
+  }
+
+  /** {{{
+   *  NameStart ::= ( Letter | '_' )
+   *  }}}
+   *  where Letter means in one of the Unicode general
+   *  categories `{ Ll, Lu, Lo, Lt, Nl }`.
+   *
+   *  We do not allow a name to start with `:`.
+   *  See [3] and Appendix B of XML 1.0 specification
+   */
+  def isNameStart(ch: Char) = {
+    import java.lang.Character._
+
+    getType(ch).toByte match {
+      case LOWERCASE_LETTER |
+              UPPERCASE_LETTER | OTHER_LETTER |
+              TITLECASE_LETTER | LETTER_NUMBER => true
+      case _                                   => ch == '_'
+    }
+  }
+
+  /** {{{
+   *  Name ::= ( Letter | '_' ) (NameChar)*
+   *  }}}
+   *  See [5] of XML 1.0 specification.
+   */
+  def isName(s: String) =
+    s.nonEmpty && isNameStart(s.head) && (s.tail forall isNameChar)
+
+}
author	Adriaan Moors <adriaan.moors@typesafe.com>	2013-06-20 10:55:58 -0700
committer	Adriaan Moors <adriaan.moors@typesafe.com>	2013-06-20 15:46:26 -0700
commit	4e9b33ab24bb3bf922c37a05a79af364b7b32b84 (patch)
tree	3176a0696b42591c920d1611348304bd99a357e1
parent	ef1264b2ff134771a2df4ee30e9a509fb7a78c49 (diff)
download	scala-4e9b33ab24bb3bf922c37a05a79af364b7b32b84.tar.gz scala-4e9b33ab24bb3bf922c37a05a79af364b7b32b84.tar.bz2 scala-4e9b33ab24bb3bf922c37a05a79af364b7b32b84.zip