summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdriaan Moors <adriaan.moors@typesafe.com>2013-06-20 10:55:58 -0700
committerAdriaan Moors <adriaan.moors@typesafe.com>2013-06-20 15:46:26 -0700
commit4e9b33ab24bb3bf922c37a05a79af364b7b32b84 (patch)
tree3176a0696b42591c920d1611348304bd99a357e1
parentef1264b2ff134771a2df4ee30e9a509fb7a78c49 (diff)
downloadscala-4e9b33ab24bb3bf922c37a05a79af364b7b32b84.tar.gz
scala-4e9b33ab24bb3bf922c37a05a79af364b7b32b84.tar.bz2
scala-4e9b33ab24bb3bf922c37a05a79af364b7b32b84.zip
Remove dependency on xml in ast.parser
Copied the following files from scala.xml to ast.parser.xml: - MarkupParsers differs from scala.xml.MarkupParsers only in the first 17 lines - Utility.scala was refactored and reduced (also includes TokenTests)
-rwxr-xr-xsrc/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala4
-rw-r--r--src/compiler/scala/tools/nsc/ast/parser/Scanners.scala2
-rwxr-xr-xsrc/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala18
-rw-r--r--src/compiler/scala/tools/nsc/ast/parser/xml/MarkupParserCommon.scala255
-rwxr-xr-xsrc/compiler/scala/tools/nsc/ast/parser/xml/Utility.scala176
5 files changed, 441 insertions, 14 deletions
diff --git a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala
index 70b9bf3168..d3f495f280 100755
--- a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala
+++ b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala
@@ -10,7 +10,7 @@ import scala.collection.mutable
import mutable.{ Buffer, ArrayBuffer, ListBuffer }
import scala.util.control.ControlThrowable
import scala.tools.nsc.util.CharArrayReader
-import scala.xml.parsing.MarkupParserCommon
+import scala.tools.nsc.ast.parser.xml.{MarkupParserCommon, Utility}
import scala.reflect.internal.Chars.{ SU, LF }
// XXX/Note: many/most of the functions in here are almost direct cut and pastes
@@ -41,7 +41,7 @@ trait MarkupParsers {
import global._
class MarkupParser(parser: SourceFileParser, final val preserveWS: Boolean) extends MarkupParserCommon {
-
+ import Utility.{ isNameStart, isSpace }
import Tokens.{ LBRACE, RBRACE }
type PositionType = Position
diff --git a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
index 82a3144304..a8162a01bf 100644
--- a/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
+++ b/src/compiler/scala/tools/nsc/ast/parser/Scanners.scala
@@ -12,7 +12,7 @@ import Tokens._
import scala.annotation.{ switch, tailrec }
import scala.collection.{ mutable, immutable }
import mutable.{ ListBuffer, ArrayBuffer }
-import scala.xml.Utility.{ isNameStart }
+import scala.tools.nsc.ast.parser.xml.Utility.isNameStart
import scala.language.postfixOps
/** See Parsers.scala / ParsersCommon for some explanation of ScannersCommon.
diff --git a/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala b/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala
index f326212d5b..1abc0c860c 100755
--- a/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala
+++ b/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala
@@ -7,8 +7,6 @@ package scala.tools.nsc
package ast.parser
import scala.collection.{ mutable, immutable }
-import scala.xml.{ EntityRef, Text }
-import scala.xml.XML.{ xmlns }
import symtab.Flags.MUTABLE
import scala.reflect.internal.util.StringOps.splitWhere
@@ -143,14 +141,12 @@ abstract class SymbolicXMLBuilder(p: Parsers#Parser, preserveWS: Boolean) {
(buf map convertToTextPat).toList
def parseAttribute(pos: Position, s: String): Tree = {
- val ts = scala.xml.Utility.parseAttributeValue(s) map {
- case Text(s) => text(pos, s)
- case EntityRef(s) => entityRef(pos, s)
- }
- ts.length match {
- case 0 => gen.mkNil
- case 1 => ts.head
- case _ => makeXMLseq(pos, ts.toList)
+ import xml.Utility.parseAttributeValue
+
+ parseAttributeValue(s, text(pos, _), entityRef(pos, _)) match {
+ case Nil => gen.mkNil
+ case t :: Nil => t
+ case ts => makeXMLseq(pos, ts.toList)
}
}
@@ -198,7 +194,7 @@ abstract class SymbolicXMLBuilder(p: Parsers#Parser, preserveWS: Boolean) {
/* Extract all the namespaces from the attribute map. */
val namespaces: List[Tree] =
- for (z <- attrMap.keys.toList ; if z startsWith xmlns) yield {
+ for (z <- attrMap.keys.toList ; if z startsWith "xmlns") yield {
val ns = splitPrefix(z) match {
case (Some(_), rest) => rest
case _ => null
diff --git a/src/compiler/scala/tools/nsc/ast/parser/xml/MarkupParserCommon.scala b/src/compiler/scala/tools/nsc/ast/parser/xml/MarkupParserCommon.scala
new file mode 100644
index 0000000000..f6cfb64ed8
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/ast/parser/xml/MarkupParserCommon.scala
@@ -0,0 +1,255 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala.tools.nsc.ast.parser.xml
+
+/** This is not a public trait - it contains common code shared
+ * between the library level XML parser and the compiler's.
+ * All members should be accessed through those.
+ */
+private[scala] trait MarkupParserCommon {
+ import Utility._
+ import scala.reflect.internal.Chars.SU
+
+ protected def unreachable = scala.sys.error("Cannot be reached.")
+
+ // type HandleType // MarkupHandler, SymbolicXMLBuilder
+ type InputType // Source, CharArrayReader
+ type PositionType // Int, Position
+ type ElementType // NodeSeq, Tree
+ type NamespaceType // NamespaceBinding, Any
+ type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree]
+
+ def mkAttributes(name: String, pscope: NamespaceType): AttributesType
+ def mkProcInstr(position: PositionType, name: String, text: String): ElementType
+
+ /** parse a start or empty tag.
+ * [40] STag ::= '<' Name { S Attribute } [S]
+ * [44] EmptyElemTag ::= '<' Name { S Attribute } [S]
+ */
+ protected def xTag(pscope: NamespaceType): (String, AttributesType) = {
+ val name = xName
+ xSpaceOpt()
+
+ (name, mkAttributes(name, pscope))
+ }
+
+ /** '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>'
+ *
+ * see [15]
+ */
+ def xProcInstr: ElementType = {
+ val n = xName
+ xSpaceOpt()
+ xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>")
+ }
+
+ /** attribute value, terminated by either `'` or `"`. value may not contain `<`.
+ @param endCh either `'` or `"`
+ */
+ def xAttributeValue(endCh: Char): String = {
+ val buf = new StringBuilder
+ while (ch != endCh) {
+ // well-formedness constraint
+ if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "")
+ else if (ch == SU) truncatedError("")
+ else buf append ch_returning_nextch
+ }
+ ch_returning_nextch
+ // @todo: normalize attribute value
+ buf.toString
+ }
+
+ def xAttributeValue(): String = {
+ val str = xAttributeValue(ch_returning_nextch)
+ // well-formedness constraint
+ normalizeAttributeValue(str)
+ }
+
+ private def takeUntilChar(it: Iterator[Char], end: Char): String = {
+ val buf = new StringBuilder
+ while (it.hasNext) it.next() match {
+ case `end` => return buf.toString
+ case ch => buf append ch
+ }
+ scala.sys.error("Expected '%s'".format(end))
+ }
+
+ /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>'
+ */
+ def xEndTag(startName: String) {
+ xToken('/')
+ if (xName != startName)
+ errorNoEnd(startName)
+
+ xSpaceOpt()
+ xToken('>')
+ }
+
+ /** actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen
+ * Name ::= (Letter | '_') (NameChar)*
+ *
+ * see [5] of XML 1.0 specification
+ *
+ * pre-condition: ch != ':' // assured by definition of XMLSTART token
+ * post-condition: name does neither start, nor end in ':'
+ */
+ def xName: String = {
+ if (ch == SU)
+ truncatedError("")
+ else if (!isNameStart(ch))
+ return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "")
+
+ val buf = new StringBuilder
+
+ do buf append ch_returning_nextch
+ while (isNameChar(ch))
+
+ if (buf.last == ':') {
+ reportSyntaxError( "name cannot end in ':'" )
+ buf.toString dropRight 1
+ }
+ else buf.toString
+ }
+
+ private def attr_unescape(s: String) = s match {
+ case "lt" => "<"
+ case "gt" => ">"
+ case "amp" => "&"
+ case "apos" => "'"
+ case "quot" => "\""
+ case "quote" => "\""
+ case _ => "&" + s + ";"
+ }
+
+ /** Replaces only character references right now.
+ * see spec 3.3.3
+ */
+ private def normalizeAttributeValue(attval: String): String = {
+ val buf = new StringBuilder
+ val it = attval.iterator.buffered
+
+ while (it.hasNext) buf append (it.next() match {
+ case ' ' | '\t' | '\n' | '\r' => " "
+ case '&' if it.head == '#' => it.next() ; xCharRef(it)
+ case '&' => attr_unescape(takeUntilChar(it, ';'))
+ case c => c
+ })
+
+ buf.toString
+ }
+
+ /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";"
+ * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
+ *
+ * see [66]
+ */
+ def xCharRef(ch: () => Char, nextch: () => Unit): String =
+ Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _)
+
+ def xCharRef(it: Iterator[Char]): String = {
+ var c = it.next()
+ Utility.parseCharRef(() => c, () => { c = it.next() }, reportSyntaxError _, truncatedError _)
+ }
+
+ def xCharRef: String = xCharRef(() => ch, () => nextch())
+
+ /** Create a lookahead reader which does not influence the input */
+ def lookahead(): BufferedIterator[Char]
+
+ /** The library and compiler parsers had the interesting distinction of
+ * different behavior for nextch (a function for which there are a total
+ * of two plausible behaviors, so we know the design space was fully
+ * explored.) One of them returned the value of nextch before the increment
+ * and one of them the new value. So to unify code we have to at least
+ * temporarily abstract over the nextchs.
+ */
+ def ch: Char
+ def nextch(): Unit
+ protected def ch_returning_nextch: Char
+ def eof: Boolean
+
+ // def handle: HandleType
+ var tmppos: PositionType
+
+ def xHandleError(that: Char, msg: String): Unit
+ def reportSyntaxError(str: String): Unit
+ def reportSyntaxError(pos: Int, str: String): Unit
+
+ def truncatedError(msg: String): Nothing
+ def errorNoEnd(tag: String): Nothing
+
+ protected def errorAndResult[T](msg: String, x: T): T = {
+ reportSyntaxError(msg)
+ x
+ }
+
+ def xToken(that: Char) {
+ if (ch == that) nextch()
+ else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch))
+ }
+ def xToken(that: Seq[Char]) { that foreach xToken }
+
+ /** scan [S] '=' [S]*/
+ def xEQ() = { xSpaceOpt(); xToken('='); xSpaceOpt() }
+
+ /** skip optional space S? */
+ def xSpaceOpt() = while (isSpace(ch) && !eof) nextch()
+
+ /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
+ def xSpace() =
+ if (isSpace(ch)) { nextch(); xSpaceOpt() }
+ else xHandleError(ch, "whitespace expected")
+
+ /** Apply a function and return the passed value */
+ def returning[T](x: T)(f: T => Unit): T = { f(x); x }
+
+ /** Execute body with a variable saved and restored after execution */
+ def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = {
+ val saved = getter
+ try body
+ finally setter(saved)
+ }
+
+ /** Take characters from input stream until given String "until"
+ * is seen. Once seen, the accumulated characters are passed
+ * along with the current Position to the supplied handler function.
+ */
+ protected def xTakeUntil[T](
+ handler: (PositionType, String) => T,
+ positioner: () => PositionType,
+ until: String): T =
+ {
+ val sb = new StringBuilder
+ val head = until.head
+ val rest = until.tail
+
+ while (true) {
+ if (ch == head && peek(rest))
+ return handler(positioner(), sb.toString)
+ else if (ch == SU)
+ truncatedError("") // throws TruncatedXMLControl in compiler
+
+ sb append ch
+ nextch()
+ }
+ unreachable
+ }
+
+ /** Create a non-destructive lookahead reader and see if the head
+ * of the input would match the given String. If yes, return true
+ * and drop the entire String from input; if no, return false
+ * and leave input unchanged.
+ */
+ private def peek(lookingFor: String): Boolean =
+ (lookahead() take lookingFor.length sameElements lookingFor.iterator) && {
+ // drop the chars from the real reader (all lookahead + orig)
+ (0 to lookingFor.length) foreach (_ => nextch())
+ true
+ }
+}
diff --git a/src/compiler/scala/tools/nsc/ast/parser/xml/Utility.scala b/src/compiler/scala/tools/nsc/ast/parser/xml/Utility.scala
new file mode 100755
index 0000000000..39e4831af2
--- /dev/null
+++ b/src/compiler/scala/tools/nsc/ast/parser/xml/Utility.scala
@@ -0,0 +1,176 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala.tools.nsc.ast.parser.xml
+
+import scala.collection.mutable
+
+
+/**
+ * The `Utility` object provides utility functions for processing instances
+ * of bound and not bound XML classes, as well as escaping text nodes.
+ *
+ * @author Burak Emir
+ */
+object Utility {
+ import scala.reflect.internal.Chars.SU
+
+ private val unescMap = Map(
+ "lt" -> '<',
+ "gt" -> '>',
+ "amp" -> '&',
+ "quot" -> '"',
+ "apos" -> '\''
+ )
+
+ /**
+ * Appends unescaped string to `s`, `amp` becomes `&amp;`,
+ * `lt` becomes `&lt;` etc..
+ *
+ * @return `'''null'''` if `ref` was not a predefined entity.
+ */
+ private final def unescape(ref: String, s: StringBuilder): StringBuilder =
+ ((unescMap get ref) map (s append _)).orNull
+
+ def parseAttributeValue[T](value: String, text: String => T, entityRef: String => T): List[T] = {
+ val sb = new StringBuilder
+ var rfb: StringBuilder = null
+ val nb = new mutable.ListBuffer[T]()
+
+ val it = value.iterator
+ while (it.hasNext) {
+ var c = it.next()
+ // entity! flush buffer into text node
+ if (c == '&') {
+ c = it.next()
+ if (c == '#') {
+ c = it.next()
+ val theChar = parseCharRef ({ ()=> c },{ () => c = it.next() },{s => throw new RuntimeException(s)}, {s => throw new RuntimeException(s)})
+ sb.append(theChar)
+ }
+ else {
+ if (rfb eq null) rfb = new StringBuilder()
+ rfb append c
+ c = it.next()
+ while (c != ';') {
+ rfb.append(c)
+ c = it.next()
+ }
+ val ref = rfb.toString()
+ rfb.clear()
+ unescape(ref,sb) match {
+ case null =>
+ if (!sb.isEmpty) { // flush buffer
+ nb += text(sb.toString())
+ sb.clear()
+ }
+ nb += entityRef(ref) // add entityref
+ case _ =>
+ }
+ }
+ }
+ else sb append c
+ }
+
+ if(!sb.isEmpty) // flush buffer
+ nb += text(sb.toString())
+
+ nb.toList
+ }
+
+ /**
+ * {{{
+ * CharRef ::= "&amp;#" '0'..'9' {'0'..'9'} ";"
+ * | "&amp;#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
+ * }}}
+ * See [66]
+ */
+ def parseCharRef(ch: () => Char, nextch: () => Unit, reportSyntaxError: String => Unit, reportTruncatedError: String => Unit): String = {
+ val hex = (ch() == 'x') && { nextch(); true }
+ val base = if (hex) 16 else 10
+ var i = 0
+ while (ch() != ';') {
+ ch() match {
+ case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
+ i = i * base + ch().asDigit
+ case 'a' | 'b' | 'c' | 'd' | 'e' | 'f'
+ | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' =>
+ if (! hex)
+ reportSyntaxError("hex char not allowed in decimal char ref\n" +
+ "Did you mean to write &#x ?")
+ else
+ i = i * base + ch().asDigit
+ case SU =>
+ reportTruncatedError("")
+ case _ =>
+ reportSyntaxError("character '" + ch() + "' not allowed in char ref\n")
+ }
+ nextch()
+ }
+ new String(Array(i), 0, 1)
+ }
+
+ /** {{{
+ * (#x20 | #x9 | #xD | #xA)
+ * }}} */
+ final def isSpace(ch: Char): Boolean = ch match {
+ case '\u0009' | '\u000A' | '\u000D' | '\u0020' => true
+ case _ => false
+ }
+ /** {{{
+ * (#x20 | #x9 | #xD | #xA)+
+ * }}} */
+ final def isSpace(cs: Seq[Char]): Boolean = cs.nonEmpty && (cs forall isSpace)
+
+ /** {{{
+ * NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
+ * | CombiningChar | Extender
+ * }}}
+ * See [4] and Appendix B of XML 1.0 specification.
+ */
+ def isNameChar(ch: Char) = {
+ import java.lang.Character._
+ // The constants represent groups Mc, Me, Mn, Lm, and Nd.
+
+ isNameStart(ch) || (getType(ch).toByte match {
+ case COMBINING_SPACING_MARK |
+ ENCLOSING_MARK | NON_SPACING_MARK |
+ MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true
+ case _ => ".-:" contains ch
+ })
+ }
+
+ /** {{{
+ * NameStart ::= ( Letter | '_' )
+ * }}}
+ * where Letter means in one of the Unicode general
+ * categories `{ Ll, Lu, Lo, Lt, Nl }`.
+ *
+ * We do not allow a name to start with `:`.
+ * See [3] and Appendix B of XML 1.0 specification
+ */
+ def isNameStart(ch: Char) = {
+ import java.lang.Character._
+
+ getType(ch).toByte match {
+ case LOWERCASE_LETTER |
+ UPPERCASE_LETTER | OTHER_LETTER |
+ TITLECASE_LETTER | LETTER_NUMBER => true
+ case _ => ch == '_'
+ }
+ }
+
+ /** {{{
+ * Name ::= ( Letter | '_' ) (NameChar)*
+ * }}}
+ * See [5] of XML 1.0 specification.
+ */
+ def isName(s: String) =
+ s.nonEmpty && isNameStart(s.head) && (s.tail forall isNameChar)
+
+}