summaryrefslogtreecommitdiff
path: root/src/xml
diff options
context:
space:
mode:
Diffstat (limited to 'src/xml')
-rw-r--r--src/xml/scala/xml/Atom.scala47
-rw-r--r--src/xml/scala/xml/Attribute.scala101
-rw-r--r--src/xml/scala/xml/Comment.scala31
-rw-r--r--src/xml/scala/xml/Document.scala92
-rwxr-xr-xsrc/xml/scala/xml/Elem.scala135
-rw-r--r--src/xml/scala/xml/EntityRef.scala40
-rw-r--r--src/xml/scala/xml/Equality.scala107
-rw-r--r--src/xml/scala/xml/Group.scala42
-rw-r--r--src/xml/scala/xml/MalformedAttributeException.scala15
-rw-r--r--src/xml/scala/xml/MetaData.scala217
-rw-r--r--src/xml/scala/xml/NamespaceBinding.scala83
-rwxr-xr-xsrc/xml/scala/xml/Node.scala198
-rw-r--r--src/xml/scala/xml/NodeBuffer.scala47
-rw-r--r--src/xml/scala/xml/NodeSeq.scala157
-rw-r--r--src/xml/scala/xml/Null.scala62
-rw-r--r--src/xml/scala/xml/PCData.scala44
-rw-r--r--src/xml/scala/xml/PrefixedAttribute.scala61
-rwxr-xr-xsrc/xml/scala/xml/PrettyPrinter.scala263
-rw-r--r--src/xml/scala/xml/ProcInstr.scala39
-rw-r--r--src/xml/scala/xml/QNode.scala20
-rw-r--r--src/xml/scala/xml/SpecialNode.scala33
-rw-r--r--src/xml/scala/xml/Text.scala39
-rw-r--r--src/xml/scala/xml/TextBuffer.scala46
-rw-r--r--src/xml/scala/xml/TopScope.scala31
-rw-r--r--src/xml/scala/xml/TypeSymbol.scala15
-rw-r--r--src/xml/scala/xml/Unparsed.scala36
-rw-r--r--src/xml/scala/xml/UnprefixedAttribute.scala61
-rwxr-xr-xsrc/xml/scala/xml/Utility.scala410
-rwxr-xr-xsrc/xml/scala/xml/XML.scala109
-rw-r--r--src/xml/scala/xml/Xhtml.scala97
-rw-r--r--src/xml/scala/xml/dtd/ContentModel.scala118
-rw-r--r--src/xml/scala/xml/dtd/ContentModelParser.scala129
-rw-r--r--src/xml/scala/xml/dtd/DTD.scala35
-rw-r--r--src/xml/scala/xml/dtd/Decl.scala157
-rw-r--r--src/xml/scala/xml/dtd/DocType.scala39
-rw-r--r--src/xml/scala/xml/dtd/ElementValidator.scala132
-rw-r--r--src/xml/scala/xml/dtd/ExternalID.scala86
-rw-r--r--src/xml/scala/xml/dtd/Scanner.scala79
-rw-r--r--src/xml/scala/xml/dtd/Tokens.scala45
-rw-r--r--src/xml/scala/xml/dtd/ValidationException.scala44
-rw-r--r--src/xml/scala/xml/dtd/impl/Base.scala67
-rw-r--r--src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala98
-rw-r--r--src/xml/scala/xml/dtd/impl/DetWordAutom.scala50
-rw-r--r--src/xml/scala/xml/dtd/impl/Inclusion.scala70
-rw-r--r--src/xml/scala/xml/dtd/impl/NondetWordAutom.scala60
-rw-r--r--src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala37
-rw-r--r--src/xml/scala/xml/dtd/impl/SubsetConstruction.scala108
-rw-r--r--src/xml/scala/xml/dtd/impl/SyntaxError.scala21
-rw-r--r--src/xml/scala/xml/dtd/impl/WordBerrySethi.scala162
-rw-r--r--src/xml/scala/xml/dtd/impl/WordExp.scala59
-rwxr-xr-xsrc/xml/scala/xml/factory/Binder.scala61
-rw-r--r--src/xml/scala/xml/factory/LoggedNodeFactory.scala90
-rw-r--r--src/xml/scala/xml/factory/NodeFactory.scala61
-rw-r--r--src/xml/scala/xml/factory/XMLLoader.scala61
-rw-r--r--src/xml/scala/xml/include/CircularIncludeException.scala25
-rw-r--r--src/xml/scala/xml/include/UnavailableResourceException.scala20
-rw-r--r--src/xml/scala/xml/include/XIncludeException.scala58
-rw-r--r--src/xml/scala/xml/include/sax/EncodingHeuristics.scala98
-rw-r--r--src/xml/scala/xml/include/sax/XIncludeFilter.scala373
-rw-r--r--src/xml/scala/xml/include/sax/XIncluder.scala187
-rw-r--r--src/xml/scala/xml/package.scala19
-rwxr-xr-xsrc/xml/scala/xml/parsing/ConstructingHandler.scala34
-rw-r--r--src/xml/scala/xml/parsing/ConstructingParser.scala55
-rwxr-xr-xsrc/xml/scala/xml/parsing/DefaultMarkupHandler.scala30
-rw-r--r--src/xml/scala/xml/parsing/ExternalSources.scala38
-rw-r--r--src/xml/scala/xml/parsing/FactoryAdapter.scala187
-rw-r--r--src/xml/scala/xml/parsing/FatalError.scala17
-rwxr-xr-xsrc/xml/scala/xml/parsing/MarkupHandler.scala127
-rwxr-xr-xsrc/xml/scala/xml/parsing/MarkupParser.scala938
-rw-r--r--src/xml/scala/xml/parsing/MarkupParserCommon.scala260
-rw-r--r--src/xml/scala/xml/parsing/NoBindingFactoryAdapter.scala37
-rw-r--r--src/xml/scala/xml/parsing/TokenTests.scala101
-rw-r--r--src/xml/scala/xml/parsing/ValidatingMarkupHandler.scala104
-rw-r--r--src/xml/scala/xml/parsing/XhtmlEntities.scala54
-rw-r--r--src/xml/scala/xml/parsing/XhtmlParser.scala31
-rw-r--r--src/xml/scala/xml/persistent/CachedFileStorage.scala129
-rw-r--r--src/xml/scala/xml/persistent/Index.scala17
-rw-r--r--src/xml/scala/xml/persistent/SetStorage.scala42
-rw-r--r--src/xml/scala/xml/pull/XMLEvent.scala60
-rwxr-xr-xsrc/xml/scala/xml/pull/XMLEventReader.scala157
-rw-r--r--src/xml/scala/xml/pull/package.scala42
-rw-r--r--src/xml/scala/xml/transform/BasicTransformer.scala60
-rw-r--r--src/xml/scala/xml/transform/RewriteRule.scala28
-rw-r--r--src/xml/scala/xml/transform/RuleTransformer.scala16
84 files changed, 7891 insertions, 0 deletions
diff --git a/src/xml/scala/xml/Atom.scala b/src/xml/scala/xml/Atom.scala
new file mode 100644
index 0000000000..33e58ba7e7
--- /dev/null
+++ b/src/xml/scala/xml/Atom.scala
@@ -0,0 +1,47 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** The class `Atom` provides an XML node for text (`PCDATA`).
+ * It is used in both non-bound and bound XML representations.
+ *
+ * @author Burak Emir
+ * @param data the text contained in this node, may not be `'''null'''`.
+ */
+class Atom[+A](val data: A) extends SpecialNode with Serializable {
+ if (data == null)
+ throw new IllegalArgumentException("cannot construct "+getClass.getSimpleName+" with null")
+
+ override protected def basisForHashCode: Seq[Any] = Seq(data)
+
+ override def strict_==(other: Equality) = other match {
+ case x: Atom[_] => data == x.data
+ case _ => false
+ }
+
+ override def canEqual(other: Any) = other match {
+ case _: Atom[_] => true
+ case _ => false
+ }
+
+ final override def doCollectNamespaces = false
+ final override def doTransform = false
+
+ def label = "#PCDATA"
+
+ /** Returns text, with some characters escaped according to the XML
+ * specification.
+ */
+ def buildString(sb: StringBuilder): StringBuilder =
+ Utility.escape(data.toString, sb)
+
+ override def text: String = data.toString
+
+}
diff --git a/src/xml/scala/xml/Attribute.scala b/src/xml/scala/xml/Attribute.scala
new file mode 100644
index 0000000000..e4b2b69fc6
--- /dev/null
+++ b/src/xml/scala/xml/Attribute.scala
@@ -0,0 +1,101 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** This singleton object contains the `apply` and `unapply` methods for
+ * convenient construction and deconstruction.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+object Attribute {
+ def unapply(x: Attribute) = x match {
+ case PrefixedAttribute(_, key, value, next) => Some((key, value, next))
+ case UnprefixedAttribute(key, value, next) => Some((key, value, next))
+ case _ => None
+ }
+
+ /** Convenience functions which choose Un/Prefixedness appropriately */
+ def apply(key: String, value: Seq[Node], next: MetaData): Attribute =
+ new UnprefixedAttribute(key, value, next)
+
+ def apply(pre: String, key: String, value: String, next: MetaData): Attribute =
+ if (pre == null || pre == "") new UnprefixedAttribute(key, value, next)
+ else new PrefixedAttribute(pre, key, value, next)
+
+ def apply(pre: String, key: String, value: Seq[Node], next: MetaData): Attribute =
+ if (pre == null || pre == "") new UnprefixedAttribute(key, value, next)
+ else new PrefixedAttribute(pre, key, value, next)
+
+ def apply(pre: Option[String], key: String, value: Seq[Node], next: MetaData): Attribute =
+ pre match {
+ case None => new UnprefixedAttribute(key, value, next)
+ case Some(p) => new PrefixedAttribute(p, key, value, next)
+ }
+}
+
+/** The `Attribute` trait defines the interface shared by both
+ * [[scala.xml.PrefixedAttribute]] and [[scala.xml.UnprefixedAttribute]].
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+abstract trait Attribute extends MetaData {
+ def pre: String // will be null if unprefixed
+ val key: String
+ val value: Seq[Node]
+ val next: MetaData
+
+ def apply(key: String): Seq[Node]
+ def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node]
+ def copy(next: MetaData): Attribute
+
+ def remove(key: String) =
+ if (!isPrefixed && this.key == key) next
+ else copy(next remove key)
+
+ def remove(namespace: String, scope: NamespaceBinding, key: String) =
+ if (this.key == key && (scope getURI pre) == namespace) next
+ else copy(next.remove(namespace, scope, key))
+
+ def isPrefixed: Boolean = pre != null
+
+ def getNamespace(owner: Node): String
+
+ def wellformed(scope: NamespaceBinding): Boolean = {
+ val arg = if (isPrefixed) scope getURI pre else null
+ (next(arg, scope, key) == null) && (next wellformed scope)
+ }
+
+ /** Returns an iterator on attributes */
+ override def iterator: Iterator[MetaData] = {
+ if (value == null) next.iterator
+ else Iterator.single(this) ++ next.iterator
+ }
+
+ override def size: Int = {
+ if (value == null) next.size
+ else 1 + next.size
+ }
+
+ /** Appends string representation of only this attribute to stringbuffer.
+ */
+ protected def toString1(sb: StringBuilder) {
+ if (value == null)
+ return
+ if (isPrefixed)
+ sb append pre append ':'
+
+ sb append key append '='
+ val sb2 = new StringBuilder()
+ Utility.sequenceToXML(value, TopScope, sb2, stripComments = true)
+ Utility.appendQuoted(sb2.toString, sb)
+ }
+}
diff --git a/src/xml/scala/xml/Comment.scala b/src/xml/scala/xml/Comment.scala
new file mode 100644
index 0000000000..b8dccdcb16
--- /dev/null
+++ b/src/xml/scala/xml/Comment.scala
@@ -0,0 +1,31 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** The class `Comment` implements an XML node for comments.
+ *
+ * @author Burak Emir
+ * @param commentText the text contained in this node, may not contain "--"
+ */
+case class Comment(commentText: String) extends SpecialNode {
+
+ def label = "#REM"
+ override def text = ""
+ final override def doCollectNamespaces = false
+ final override def doTransform = false
+
+ if (commentText contains "--")
+ throw new IllegalArgumentException("text contains \"--\"")
+
+ /** Appends &quot;<!-- text -->&quot; to this string buffer.
+ */
+ override def buildString(sb: StringBuilder) =
+ sb append "<!--" append commentText append "-->"
+}
diff --git a/src/xml/scala/xml/Document.scala b/src/xml/scala/xml/Document.scala
new file mode 100644
index 0000000000..9a725014fc
--- /dev/null
+++ b/src/xml/scala/xml/Document.scala
@@ -0,0 +1,92 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** A document information item (according to InfoSet spec). The comments
+ * are copied from the Infoset spec, only augmented with some information
+ * on the Scala types for definitions that might have no value.
+ * Also plays the role of an `XMLEvent` for pull parsing.
+ *
+ * @author Burak Emir
+ * @version 1.0, 26/04/2005
+ */
+@SerialVersionUID(-2289320563321795109L)
+class Document extends NodeSeq with pull.XMLEvent with Serializable {
+
+ /** An ordered list of child information items, in document
+ * order. The list contains exactly one element information item. The
+ * list also contains one processing instruction information item for
+ * each processing instruction outside the document element, and one
+ * comment information item for each comment outside the document
+ * element. Processing instructions and comments within the DTD are
+ * excluded. If there is a document type declaration, the list also
+ * contains a document type declaration information item.
+ */
+ var children: Seq[Node] = _
+
+ /** The element information item corresponding to the document element. */
+ var docElem: Node = _
+
+ /** The dtd that comes with the document, if any */
+ var dtd: scala.xml.dtd.DTD = _
+
+ /** An unordered set of notation information items, one for each notation
+ * declared in the DTD. If any notation is multiply declared, this property
+ * has no value.
+ */
+ def notations: Seq[scala.xml.dtd.NotationDecl] =
+ dtd.notations
+
+ /** An unordered set of unparsed entity information items, one for each
+ * unparsed entity declared in the DTD.
+ */
+ def unparsedEntities: Seq[scala.xml.dtd.EntityDecl] =
+ dtd.unparsedEntities
+
+ /** The base URI of the document entity. */
+ var baseURI: String = _
+
+ /** The name of the character encoding scheme in which the document entity
+ * is expressed.
+ */
+ var encoding: Option[String] = _
+
+ /** An indication of the standalone status of the document, either
+ * true or false. This property is derived from the optional standalone
+ * document declaration in the XML declaration at the beginning of the
+ * document entity, and has no value (`None`) if there is no
+ * standalone document declaration.
+ */
+ var standAlone: Option[Boolean] = _
+
+ /** A string representing the XML version of the document. This
+ * property is derived from the XML declaration optionally present at
+ * the beginning of the document entity, and has no value (`None`)
+ * if there is no XML declaration.
+ */
+ var version: Option[String] = _
+
+ /** 9. This property is not strictly speaking part of the infoset of
+ * the document. Rather it is an indication of whether the processor
+ * has read the complete DTD. Its value is a boolean. If it is false,
+ * then certain properties (indicated in their descriptions below) may
+ * be unknown. If it is true, those properties are never unknown.
+ */
+ var allDeclarationsProcessed = false
+
+ // methods for NodeSeq
+
+ def theSeq: Seq[Node] = this.docElem
+
+ override def canEqual(other: Any) = other match {
+ case _: Document => true
+ case _ => false
+ }
+}
diff --git a/src/xml/scala/xml/Elem.scala b/src/xml/scala/xml/Elem.scala
new file mode 100755
index 0000000000..484cf98744
--- /dev/null
+++ b/src/xml/scala/xml/Elem.scala
@@ -0,0 +1,135 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** This singleton object contains the `apply` and `unapplySeq` methods for
+ * convenient construction and deconstruction. It is possible to deconstruct
+ * any `Node` instance (that is not a `SpecialNode` or a `Group`) using the
+ * syntax `case Elem(prefix, label, attribs, scope, child @ _*) => ...`
+ *
+ * Copyright 2008 Google Inc. All Rights Reserved.
+ * @author Burak Emir <bqe@google.com>
+ */
+object Elem {
+ /** Build an Elem, setting its minimizeEmpty property to `true` if it has no children. Note that this
+ * default may not be exactly what you want, as some XML dialects don't permit some elements to be minimized.
+ *
+ * @deprecated This factory method is retained for backward compatibility; please use the other one, with which you
+ * can specify your own preference for minimizeEmpty.
+ */
+ @deprecated("Use the other apply method in this object", "2.10.0")
+ def apply(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, child: Node*): Elem =
+ apply(prefix, label, attributes, scope, child.isEmpty, child: _*)
+
+ def apply(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, minimizeEmpty: Boolean, child: Node*): Elem =
+ new Elem(prefix, label, attributes, scope, minimizeEmpty, child: _*)
+
+ def unapplySeq(n: Node) = n match {
+ case _: SpecialNode | _: Group => None
+ case _ => Some((n.prefix, n.label, n.attributes, n.scope, n.child))
+ }
+
+ import scala.sys.process._
+ /** Implicitly convert a [[scala.xml.Elem]] into a
+ * [[scala.sys.process.ProcessBuilder]]. This is done by obtaining the text
+ * elements of the element, trimming spaces, and then converting the result
+ * from string to a process. Importantly, tags are completely ignored, so
+ * they cannot be used to separate parameters.
+ */
+ @deprecated("To create a scala.sys.process.Process from an xml.Elem, please use Process(elem.text.trim).", "2.11.0")
+ implicit def xmlToProcess(command: scala.xml.Elem): ProcessBuilder = Process(command.text.trim)
+
+ @deprecated("To create a scala.sys.process.Process from an xml.Elem, please use Process(elem.text.trim).", "2.11.0")
+ implicit def processXml(p: Process.type) = new {
+ /** Creates a [[scala.sys.process.ProcessBuilder]] from a Scala XML Element.
+ * This can be used as a way to template strings.
+ *
+ * @example {{{
+ * apply(<x> {dxPath.absolutePath} --dex --output={classesDexPath.absolutePath} {classesMinJarPath.absolutePath}</x>)
+ * }}}
+ */
+ def apply(command: Elem): ProcessBuilder = Process(command.text.trim)
+ }
+}
+
+
+/** The case class `Elem` extends the `Node` class,
+ * providing an immutable data object representing an XML element.
+ *
+ * @param prefix namespace prefix (may be null, but not the empty string)
+ * @param label the element name
+ * @param attributes1 the attribute map
+ * @param scope the scope containing the namespace bindings
+ * @param minimizeEmpty `true` if this element should be serialized as minimized (i.e. "&lt;el/&gt;") when
+ * empty; `false` if it should be written out in long form.
+ * @param child the children of this node
+ *
+ * Copyright 2008 Google Inc. All Rights Reserved.
+ * @author Burak Emir <bqe@google.com>
+ */
+class Elem(
+ override val prefix: String,
+ val label: String,
+ attributes1: MetaData,
+ override val scope: NamespaceBinding,
+ val minimizeEmpty: Boolean,
+ val child: Node*)
+extends Node with Serializable
+{
+ @deprecated("This constructor is retained for backward compatibility. Please use the primary constructor, which lets you specify your own preference for `minimizeEmpty`.", "2.10.0")
+ def this(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, child: Node*) = {
+ this(prefix, label, attributes, scope, child.isEmpty, child: _*)
+ }
+
+ final override def doCollectNamespaces = true
+ final override def doTransform = true
+
+ override val attributes = MetaData.normalize(attributes1, scope)
+
+ if (prefix == "")
+ throw new IllegalArgumentException("prefix of zero length, use null instead")
+
+ if (scope == null)
+ throw new IllegalArgumentException("scope is null, use scala.xml.TopScope for empty scope")
+
+ //@todo: copy the children,
+ // setting namespace scope if necessary
+ // cleaning adjacent text nodes if necessary
+
+ override protected def basisForHashCode: Seq[Any] =
+ prefix :: label :: attributes :: child.toList
+
+ /** Returns a new element with updated attributes, resolving namespace uris
+ * from this element's scope. See MetaData.update for details.
+ *
+ * @param updates MetaData with new and updated attributes
+ * @return a new symbol with updated attributes
+ */
+ final def %(updates: MetaData): Elem =
+ copy(attributes = MetaData.update(attributes, scope, updates))
+
+ /** Returns a copy of this element with any supplied arguments replacing
+ * this element's value for that field.
+ *
+ * @return a new symbol with updated attributes
+ */
+ def copy(
+ prefix: String = this.prefix,
+ label: String = this.label,
+ attributes: MetaData = this.attributes,
+ scope: NamespaceBinding = this.scope,
+ minimizeEmpty: Boolean = this.minimizeEmpty,
+ child: Seq[Node] = this.child.toSeq
+ ): Elem = Elem(prefix, label, attributes, scope, minimizeEmpty, child: _*)
+
+ /** Returns concatenation of `text(n)` for each child `n`.
+ */
+ override def text = (child map (_.text)).mkString
+}
diff --git a/src/xml/scala/xml/EntityRef.scala b/src/xml/scala/xml/EntityRef.scala
new file mode 100644
index 0000000000..7a58831075
--- /dev/null
+++ b/src/xml/scala/xml/EntityRef.scala
@@ -0,0 +1,40 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** The class `EntityRef` implements an XML node for entity references.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ * @param entityName the name of the entity reference, for example `amp`.
+ */
+case class EntityRef(entityName: String) extends SpecialNode {
+ final override def doCollectNamespaces = false
+ final override def doTransform = false
+ def label = "#ENTITY"
+
+ override def text = entityName match {
+ case "lt" => "<"
+ case "gt" => ">"
+ case "amp" => "&"
+ case "apos" => "'"
+ case "quot" => "\""
+ case _ => Utility.sbToString(buildString)
+ }
+
+ /** Appends `"&amp; entityName;"` to this string buffer.
+ *
+ * @param sb the string buffer.
+ * @return the modified string buffer `sb`.
+ */
+ override def buildString(sb: StringBuilder) =
+ sb.append("&").append(entityName).append(";")
+
+}
diff --git a/src/xml/scala/xml/Equality.scala b/src/xml/scala/xml/Equality.scala
new file mode 100644
index 0000000000..021d185812
--- /dev/null
+++ b/src/xml/scala/xml/Equality.scala
@@ -0,0 +1,107 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** In an attempt to contain the damage being inflicted on consistency by the
+ * ad hoc `equals` methods spread around `xml`, the logic is centralized and
+ * all the `xml` classes go through the `xml.Equality trait`. There are two
+ * forms of `xml` comparison.
+ *
+ * 1. `'''def''' strict_==(other: scala.xml.Equality)`
+ *
+ * This one tries to honor the little things like symmetry and hashCode
+ * contracts. The `equals` method routes all comparisons through this.
+ *
+ * 1. `xml_==(other: Any)`
+ *
+ * This one picks up where `strict_==` leaves off. It might declare any two
+ * things equal.
+ *
+ * As things stood, the logic not only made a mockery of the collections
+ * equals contract, but also laid waste to that of case classes.
+ *
+ * Among the obstacles to sanity are/were:
+ *
+ * Node extends NodeSeq extends Seq[Node]
+ * MetaData extends Iterable[MetaData]
+ * The hacky "Group" xml node which throws exceptions
+ * with wild abandon, so don't get too close
+ * Rampant asymmetry and impossible hashCodes
+ * Most classes claiming to be equal to "String" if
+ * some specific stringification of it was the same.
+ * String was never going to return the favor.
+ */
+
+object Equality {
+ def asRef(x: Any): AnyRef = x.asInstanceOf[AnyRef]
+
+ /** Note - these functions assume strict equality has already failed.
+ */
+ def compareBlithely(x1: AnyRef, x2: String): Boolean = x1 match {
+ case x: Atom[_] => x.data == x2
+ case x: NodeSeq => x.text == x2
+ case _ => false
+ }
+ def compareBlithely(x1: AnyRef, x2: Node): Boolean = x1 match {
+ case x: NodeSeq if x.length == 1 => x2 == x(0)
+ case _ => false
+ }
+ def compareBlithely(x1: AnyRef, x2: AnyRef): Boolean = {
+ if (x1 == null || x2 == null)
+ return (x1 eq x2)
+
+ x2 match {
+ case s: String => compareBlithely(x1, s)
+ case n: Node => compareBlithely(x1, n)
+ case _ => false
+ }
+ }
+}
+import Equality._
+
+trait Equality extends scala.Equals {
+ protected def basisForHashCode: Seq[Any]
+
+ def strict_==(other: Equality): Boolean
+ def strict_!=(other: Equality) = !strict_==(other)
+
+ /** We insist we're only equal to other `xml.Equality` implementors,
+ * which heads off a lot of inconsistency up front.
+ */
+ override def canEqual(other: Any): Boolean = other match {
+ case x: Equality => true
+ case _ => false
+ }
+
+ /** It's be nice to make these final, but there are probably
+ * people out there subclassing the XML types, especially when
+ * it comes to equals. However WE at least can pretend they
+ * are final since clearly individual classes cannot be trusted
+ * to maintain a semblance of order.
+ */
+ override def hashCode() = basisForHashCode.##
+ override def equals(other: Any) = doComparison(other, blithe = false)
+ final def xml_==(other: Any) = doComparison(other, blithe = true)
+ final def xml_!=(other: Any) = !xml_==(other)
+
+ /** The "blithe" parameter expresses the caller's unconcerned attitude
+ * regarding the usual constraints on equals. The method is thereby
+ * given carte blanche to declare any two things equal.
+ */
+ private def doComparison(other: Any, blithe: Boolean) = {
+ val strictlyEqual = other match {
+ case x: AnyRef if this eq x => true
+ case x: Equality => (x canEqual this) && (this strict_== x)
+ case _ => false
+ }
+
+ strictlyEqual || (blithe && compareBlithely(this, asRef(other)))
+ }
+}
diff --git a/src/xml/scala/xml/Group.scala b/src/xml/scala/xml/Group.scala
new file mode 100644
index 0000000000..e3af615008
--- /dev/null
+++ b/src/xml/scala/xml/Group.scala
@@ -0,0 +1,42 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** A hack to group XML nodes in one node for output.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+final case class Group(nodes: Seq[Node]) extends Node {
+ override def theSeq = nodes
+
+ override def canEqual(other: Any) = other match {
+ case x: Group => true
+ case _ => false
+ }
+
+ override def strict_==(other: Equality) = other match {
+ case Group(xs) => nodes sameElements xs
+ case _ => false
+ }
+
+ override protected def basisForHashCode = nodes
+
+ /** Since Group is very much a hack it throws an exception if you
+ * try to do anything with it.
+ */
+ private def fail(msg: String) = throw new UnsupportedOperationException("class Group does not support method '%s'" format msg)
+
+ def label = fail("label")
+ override def attributes = fail("attributes")
+ override def namespace = fail("namespace")
+ override def child = fail("child")
+ def buildString(sb: StringBuilder) = fail("toString(StringBuilder)")
+}
diff --git a/src/xml/scala/xml/MalformedAttributeException.scala b/src/xml/scala/xml/MalformedAttributeException.scala
new file mode 100644
index 0000000000..d499ad3e10
--- /dev/null
+++ b/src/xml/scala/xml/MalformedAttributeException.scala
@@ -0,0 +1,15 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+
+
+case class MalformedAttributeException(msg: String) extends RuntimeException(msg)
diff --git a/src/xml/scala/xml/MetaData.scala b/src/xml/scala/xml/MetaData.scala
new file mode 100644
index 0000000000..8b5ea187cb
--- /dev/null
+++ b/src/xml/scala/xml/MetaData.scala
@@ -0,0 +1,217 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+import Utility.sbToString
+import scala.annotation.tailrec
+import scala.collection.{ AbstractIterable, Iterator }
+
+/**
+ * Copyright 2008 Google Inc. All Rights Reserved.
+ * @author Burak Emir <bqe@google.com>
+ */
+object MetaData {
+ /**
+ * appends all attributes from new_tail to attribs, without attempting to
+ * detect or remove duplicates. The method guarantees that all attributes
+ * from attribs come before the attributes in new_tail, but does not
+ * guarantee to preserve the relative order of attribs.
+ *
+ * Duplicates can be removed with `normalize`.
+ */
+ @tailrec // temporarily marked final so it will compile under -Xexperimental
+ final def concatenate(attribs: MetaData, new_tail: MetaData): MetaData =
+ if (attribs eq Null) new_tail
+ else concatenate(attribs.next, attribs copy new_tail)
+
+ /**
+ * returns normalized MetaData, with all duplicates removed and namespace prefixes resolved to
+ * namespace URIs via the given scope.
+ */
+ def normalize(attribs: MetaData, scope: NamespaceBinding): MetaData = {
+ def iterate(md: MetaData, normalized_attribs: MetaData, set: Set[String]): MetaData = {
+ lazy val key = getUniversalKey(md, scope)
+ if (md eq Null) normalized_attribs
+ else if ((md.value eq null) || set(key)) iterate(md.next, normalized_attribs, set)
+ else md copy iterate(md.next, normalized_attribs, set + key)
+ }
+ iterate(attribs, Null, Set())
+ }
+
+ /**
+ * returns key if md is unprefixed, pre+key is md is prefixed
+ */
+ def getUniversalKey(attrib: MetaData, scope: NamespaceBinding) = attrib match {
+ case prefixed: PrefixedAttribute => scope.getURI(prefixed.pre) + prefixed.key
+ case unprefixed: UnprefixedAttribute => unprefixed.key
+ }
+
+ /**
+ * returns MetaData with attributes updated from given MetaData
+ */
+ def update(attribs: MetaData, scope: NamespaceBinding, updates: MetaData): MetaData =
+ normalize(concatenate(updates, attribs), scope)
+
+}
+
+/** This class represents an attribute and at the same time a linked list of
+ * attributes. Every instance of this class is either
+ * - an instance of `UnprefixedAttribute key,value` or
+ * - an instance of `PrefixedAttribute namespace_prefix,key,value` or
+ * - `Null, the empty attribute list.
+ *
+ * Namespace URIs are obtained by using the namespace scope of the element
+ * owning this attribute (see `getNamespace`).
+ *
+ * Copyright 2008 Google Inc. All Rights Reserved.
+ * @author Burak Emir <bqe@google.com>
+ */
+abstract class MetaData
+extends AbstractIterable[MetaData]
+ with Iterable[MetaData]
+ with Equality
+ with Serializable {
+
+ /** Updates this MetaData with the MetaData given as argument. All attributes that occur in updates
+ * are part of the resulting MetaData. If an attribute occurs in both this instance and
+ * updates, only the one in updates is part of the result (avoiding duplicates). For prefixed
+ * attributes, namespaces are resolved using the given scope, which defaults to TopScope.
+ *
+ * @param updates MetaData with new and updated attributes
+ * @return a new MetaData instance that contains old, new and updated attributes
+ */
+ def append(updates: MetaData, scope: NamespaceBinding = TopScope): MetaData =
+ MetaData.update(this, scope, updates)
+
+ /**
+ * Gets value of unqualified (unprefixed) attribute with given key, null if not found
+ *
+ * @param key
+ * @return value as Seq[Node] if key is found, null otherwise
+ */
+ def apply(key: String): Seq[Node]
+
+ /** convenience method, same as `apply(namespace, owner.scope, key)`.
+ *
+ * @param namespace_uri namespace uri of key
+ * @param owner the element owning this attribute list
+ * @param key the attribute key
+ */
+ final def apply(namespace_uri: String, owner: Node, key: String): Seq[Node] =
+ apply(namespace_uri, owner.scope, key)
+
+ /**
+ * Gets value of prefixed attribute with given key and namespace, null if not found
+ *
+ * @param namespace_uri namespace uri of key
+ * @param scp a namespace scp (usually of the element owning this attribute list)
+ * @param k to be looked for
+ * @return value as Seq[Node] if key is found, null otherwise
+ */
+ def apply(namespace_uri: String, scp: NamespaceBinding, k: String): Seq[Node]
+
+ /** returns a copy of this MetaData item with next field set to argument.
+ */
+ def copy(next: MetaData): MetaData
+
+ /** if owner is the element of this metadata item, returns namespace */
+ def getNamespace(owner: Node): String
+
+ def hasNext = (Null != next)
+
+ def length: Int = length(0)
+
+ def length(i: Int): Int = next.length(i + 1)
+
+ def isPrefixed: Boolean
+
+ override def canEqual(other: Any) = other match {
+ case _: MetaData => true
+ case _ => false
+ }
+ override def strict_==(other: Equality) = other match {
+ case m: MetaData => this.asAttrMap == m.asAttrMap
+ case _ => false
+ }
+ protected def basisForHashCode: Seq[Any] = List(this.asAttrMap)
+
+ /** filters this sequence of meta data */
+ override def filter(f: MetaData => Boolean): MetaData =
+ if (f(this)) copy(next filter f)
+ else next filter f
+
+ /** returns key of this MetaData item */
+ def key: String
+
+ /** returns value of this MetaData item */
+ def value: Seq[Node]
+
+ /** Returns a String containing "prefix:key" if the first key is
+ * prefixed, and "key" otherwise.
+ */
+ def prefixedKey = this match {
+ case x: Attribute if x.isPrefixed => x.pre + ":" + key
+ case _ => key
+ }
+
+ /** Returns a Map containing the attributes stored as key/value pairs.
+ */
+ def asAttrMap: Map[String, String] =
+ (iterator map (x => (x.prefixedKey, x.value.text))).toMap
+
+ /** returns Null or the next MetaData item */
+ def next: MetaData
+
+ /**
+ * Gets value of unqualified (unprefixed) attribute with given key, None if not found
+ *
+ * @param key
+ * @return value in Some(Seq[Node]) if key is found, None otherwise
+ */
+ final def get(key: String): Option[Seq[Node]] = Option(apply(key))
+
+ /** same as get(uri, owner.scope, key) */
+ final def get(uri: String, owner: Node, key: String): Option[Seq[Node]] =
+ get(uri, owner.scope, key)
+
+ /** gets value of qualified (prefixed) attribute with given key.
+ *
+ * @param uri namespace of key
+ * @param scope a namespace scp (usually of the element owning this attribute list)
+ * @param key to be looked fore
+ * @return value as Some[Seq[Node]] if key is found, None otherwise
+ */
+ final def get(uri: String, scope: NamespaceBinding, key: String): Option[Seq[Node]] =
+ Option(apply(uri, scope, key))
+
+ protected def toString1(): String = sbToString(toString1)
+
+ // appends string representations of single attribute to StringBuilder
+ protected def toString1(sb: StringBuilder): Unit
+
+ override def toString(): String = sbToString(buildString)
+
+ def buildString(sb: StringBuilder): StringBuilder = {
+ sb append ' '
+ toString1(sb)
+ next buildString sb
+ }
+
+ /**
+ */
+ def wellformed(scope: NamespaceBinding): Boolean
+
+ def remove(key: String): MetaData
+
+ def remove(namespace: String, scope: NamespaceBinding, key: String): MetaData
+
+ final def remove(namespace: String, owner: Node, key: String): MetaData =
+ remove(namespace, owner.scope, key)
+}
diff --git a/src/xml/scala/xml/NamespaceBinding.scala b/src/xml/scala/xml/NamespaceBinding.scala
new file mode 100644
index 0000000000..b320466976
--- /dev/null
+++ b/src/xml/scala/xml/NamespaceBinding.scala
@@ -0,0 +1,83 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+import Utility.sbToString
+
+/** The class `NamespaceBinding` represents namespace bindings
+ * and scopes. The binding for the default namespace is treated as a null
+ * prefix. the absent namespace is represented with the null uri. Neither
+ * prefix nor uri may be empty, which is not checked.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+@SerialVersionUID(0 - 2518644165573446725L)
+case class NamespaceBinding(prefix: String, uri: String, parent: NamespaceBinding) extends AnyRef with Equality
+{
+ if (prefix == "")
+ throw new IllegalArgumentException("zero length prefix not allowed")
+
+ def getURI(_prefix: String): String =
+ if (prefix == _prefix) uri else parent getURI _prefix
+
+ /** Returns some prefix that is mapped to the URI.
+ *
+ * @param _uri the input URI
+ * @return the prefix that is mapped to the input URI, or null
+ * if no prefix is mapped to the URI.
+ */
+ def getPrefix(_uri: String): String =
+ if (_uri == uri) prefix else parent getPrefix _uri
+
+ override def toString(): String = sbToString(buildString(_, TopScope))
+
+ private def shadowRedefined(stop: NamespaceBinding): NamespaceBinding = {
+ def prefixList(x: NamespaceBinding): List[String] =
+ if ((x == null) || (x eq stop)) Nil
+ else x.prefix :: prefixList(x.parent)
+ def fromPrefixList(l: List[String]): NamespaceBinding = l match {
+ case Nil => stop
+ case x :: xs => new NamespaceBinding(x, this.getURI(x), fromPrefixList(xs))
+ }
+ val ps0 = prefixList(this).reverse
+ val ps = ps0.distinct
+ if (ps.size == ps0.size) this
+ else fromPrefixList(ps)
+ }
+
+ override def canEqual(other: Any) = other match {
+ case _: NamespaceBinding => true
+ case _ => false
+ }
+
+ override def strict_==(other: Equality) = other match {
+ case x: NamespaceBinding => (prefix == x.prefix) && (uri == x.uri) && (parent == x.parent)
+ case _ => false
+ }
+
+ def basisForHashCode: Seq[Any] = List(prefix, uri, parent)
+
+ def buildString(stop: NamespaceBinding): String = sbToString(buildString(_, stop))
+
+ def buildString(sb: StringBuilder, stop: NamespaceBinding) {
+ shadowRedefined(stop).doBuildString(sb, stop)
+ }
+
+ private def doBuildString(sb: StringBuilder, stop: NamespaceBinding) {
+ if ((this == null) || (this eq stop)) return // contains?
+
+ val s = " xmlns%s=\"%s\"".format(
+ (if (prefix != null) ":" + prefix else ""),
+ (if (uri != null) uri else "")
+ )
+ parent.doBuildString(sb append s, stop) // copy(ignore)
+ }
+}
diff --git a/src/xml/scala/xml/Node.scala b/src/xml/scala/xml/Node.scala
new file mode 100755
index 0000000000..e121284252
--- /dev/null
+++ b/src/xml/scala/xml/Node.scala
@@ -0,0 +1,198 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** This singleton object contains the `unapplySeq` method for
+ * convenient deconstruction.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+object Node {
+ /** the constant empty attribute sequence */
+ final def NoAttributes: MetaData = Null
+
+ /** the empty namespace */
+ val EmptyNamespace = ""
+
+ def unapplySeq(n: Node) = Some((n.label, n.attributes, n.child))
+}
+
+/**
+ * An abstract class representing XML with nodes of a labelled tree.
+ * This class contains an implementation of a subset of XPath for navigation.
+ *
+ * @author Burak Emir and others
+ * @version 1.1
+ */
+abstract class Node extends NodeSeq {
+
+ /** prefix of this node */
+ def prefix: String = null
+
+ /** label of this node. I.e. "foo" for &lt;foo/&gt;) */
+ def label: String
+
+ /** used internally. Atom/Molecule = -1 PI = -2 Comment = -3 EntityRef = -5
+ */
+ def isAtom = this.isInstanceOf[Atom[_]]
+
+ /** The logic formerly found in typeTag$, as best I could infer it. */
+ def doCollectNamespaces = true // if (tag >= 0) DO collect namespaces
+ def doTransform = true // if (tag < 0) DO NOT transform
+
+ /**
+ * method returning the namespace bindings of this node. by default, this
+ * is TopScope, which means there are no namespace bindings except the
+ * predefined one for "xml".
+ */
+ def scope: NamespaceBinding = TopScope
+
+ /**
+ * convenience, same as `getNamespace(this.prefix)`
+ */
+ def namespace = getNamespace(this.prefix)
+
+ /**
+ * Convenience method, same as `scope.getURI(pre)` but additionally
+ * checks if scope is `'''null'''`.
+ *
+ * @param pre the prefix whose namespace name we would like to obtain
+ * @return the namespace if `scope != null` and prefix was
+ * found, else `null`
+ */
+ def getNamespace(pre: String): String = if (scope eq null) null else scope.getURI(pre)
+
+ /**
+ * Convenience method, looks up an unprefixed attribute in attributes of this node.
+ * Same as `attributes.getValue(key)`
+ *
+ * @param key of queried attribute.
+ * @return value of `UnprefixedAttribute` with given key
+ * in attributes, if it exists, otherwise `null`.
+ */
+ final def attribute(key: String): Option[Seq[Node]] = attributes.get(key)
+
+ /**
+ * Convenience method, looks up a prefixed attribute in attributes of this node.
+ * Same as `attributes.getValue(uri, this, key)`-
+ *
+ * @param uri namespace of queried attribute (may not be null).
+ * @param key of queried attribute.
+ * @return value of `PrefixedAttribute` with given namespace
+ * and given key, otherwise `'''null'''`.
+ */
+ final def attribute(uri: String, key: String): Option[Seq[Node]] =
+ attributes.get(uri, this, key)
+
+ /**
+ * Returns attribute meaning all attributes of this node, prefixed and
+ * unprefixed, in no particular order. In class `Node`, this
+ * defaults to `Null` (the empty attribute list).
+ *
+ * @return all attributes of this node
+ */
+ def attributes: MetaData = Null
+
+ /**
+ * Returns child axis i.e. all children of this node.
+ *
+ * @return all children of this node
+ */
+ def child: Seq[Node]
+
+ /** Children which do not stringify to "" (needed for equality)
+ */
+ def nonEmptyChildren: Seq[Node] = child filterNot (_.toString == "")
+
+ /**
+ * Descendant axis (all descendants of this node, not including node itself)
+ * includes all text nodes, element nodes, comments and processing instructions.
+ */
+ def descendant: List[Node] =
+ child.toList.flatMap { x => x::x.descendant }
+
+ /**
+ * Descendant axis (all descendants of this node, including thisa node)
+ * includes all text nodes, element nodes, comments and processing instructions.
+ */
+ def descendant_or_self: List[Node] = this :: descendant
+
+ override def canEqual(other: Any) = other match {
+ case x: Group => false
+ case x: Node => true
+ case _ => false
+ }
+
+ override protected def basisForHashCode: Seq[Any] =
+ prefix :: label :: attributes :: nonEmptyChildren.toList
+
+ override def strict_==(other: Equality) = other match {
+ case _: Group => false
+ case x: Node =>
+ (prefix == x.prefix) &&
+ (label == x.label) &&
+ (attributes == x.attributes) &&
+ // (scope == x.scope) // note - original code didn't compare scopes so I left it as is.
+ (nonEmptyChildren sameElements x.nonEmptyChildren)
+ case _ =>
+ false
+ }
+
+ // implementations of NodeSeq methods
+
+ /**
+ * returns a sequence consisting of only this node
+ */
+ def theSeq: Seq[Node] = this :: Nil
+
+ /**
+ * String representation of this node
+ *
+ * @param stripComments if true, strips comment nodes from result
+ */
+ def buildString(stripComments: Boolean): String =
+ Utility.serialize(this, stripComments = stripComments).toString
+
+ /**
+ * Same as `toString('''false''')`.
+ */
+ override def toString(): String = buildString(stripComments = false)
+
+ /**
+ * Appends qualified name of this node to `StringBuilder`.
+ */
+ def nameToString(sb: StringBuilder): StringBuilder = {
+ if (null != prefix) {
+ sb append prefix
+ sb append ':'
+ }
+ sb append label
+ }
+
+ /**
+ * Returns a type symbol (e.g. DTD, XSD), default `'''null'''`.
+ */
+ def xmlType(): TypeSymbol = null
+
+ /**
+ * Returns a text representation of this node. Note that this is not equivalent to
+ * the XPath node-test called text(), it is rather an implementation of the
+ * XPath function string()
+ * Martin to Burak: to do: if you make this method abstract, the compiler will now
+ * complain if there's no implementation in a subclass. Is this what we want? Note that
+ * this would break doc/DocGenator and doc/ModelToXML, with an error message like:
+ * {{{
+ * doc\DocGenerator.scala:1219: error: object creation impossible, since there is a deferred declaration of method text in class Node of type => String which is not implemented in a subclass
+ * new SpecialNode {
+ * ^
+ * }}} */
+ override def text: String = super.text
+}
diff --git a/src/xml/scala/xml/NodeBuffer.scala b/src/xml/scala/xml/NodeBuffer.scala
new file mode 100644
index 0000000000..ae7c7b2bf8
--- /dev/null
+++ b/src/xml/scala/xml/NodeBuffer.scala
@@ -0,0 +1,47 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/**
+ * This class acts as a Buffer for nodes. If it is used as a sequence of
+ * nodes `Seq[Node]`, it must be ensured that no updates occur after that
+ * point, because `scala.xml.Node` is assumed to be immutable.
+ *
+ * Despite this being a sequence, don't use it as key in a hashtable.
+ * Calling the hashcode function will result in a runtime error.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+class NodeBuffer extends scala.collection.mutable.ArrayBuffer[Node] {
+
+ /**
+ * Append given object to this buffer, returns reference on this
+ * `NodeBuffer` for convenience. Some rules apply:
+ * - If argument `o` is `'''null'''`, it is ignored.
+ * - If it is an `Iterator` or `Iterable`, its elements will be added.
+ * - If `o` is a node, it is added as it is.
+ * - If it is anything else, it gets wrapped in an [[scala.xml.Atom]].
+ *
+ * @param o converts to an xml node and adds to this node buffer
+ * @return this nodebuffer
+ */
+ def &+(o: Any): NodeBuffer = {
+ o match {
+ case null | _: Unit | Text("") => // ignore
+ case it: Iterator[_] => it foreach &+
+ case n: Node => super.+=(n)
+ case ns: Iterable[_] => this &+ ns.iterator
+ case ns: Array[_] => this &+ ns.iterator
+ case d => super.+=(new Atom(d))
+ }
+ this
+ }
+}
diff --git a/src/xml/scala/xml/NodeSeq.scala b/src/xml/scala/xml/NodeSeq.scala
new file mode 100644
index 0000000000..b8022472fb
--- /dev/null
+++ b/src/xml/scala/xml/NodeSeq.scala
@@ -0,0 +1,157 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+import scala.collection.{ mutable, immutable, generic, SeqLike, AbstractSeq }
+import mutable.{ Builder, ListBuffer }
+import generic.{ CanBuildFrom }
+import scala.language.implicitConversions
+
+/** This object ...
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+object NodeSeq {
+ final val Empty = fromSeq(Nil)
+ def fromSeq(s: Seq[Node]): NodeSeq = new NodeSeq {
+ def theSeq = s
+ }
+ type Coll = NodeSeq
+ implicit def canBuildFrom: CanBuildFrom[Coll, Node, NodeSeq] =
+ new CanBuildFrom[Coll, Node, NodeSeq] {
+ def apply(from: Coll) = newBuilder
+ def apply() = newBuilder
+ }
+ def newBuilder: Builder[Node, NodeSeq] = new ListBuffer[Node] mapResult fromSeq
+ implicit def seqToNodeSeq(s: Seq[Node]): NodeSeq = fromSeq(s)
+}
+
+/** This class implements a wrapper around `Seq[Node]` that adds XPath
+ * and comprehension methods.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+abstract class NodeSeq extends AbstractSeq[Node] with immutable.Seq[Node] with SeqLike[Node, NodeSeq] with Equality {
+ import NodeSeq.seqToNodeSeq // import view magic for NodeSeq wrappers
+
+ /** Creates a list buffer as builder for this class */
+ override protected[this] def newBuilder = NodeSeq.newBuilder
+
+ def theSeq: Seq[Node]
+ def length = theSeq.length
+ override def iterator = theSeq.iterator
+
+ def apply(i: Int): Node = theSeq(i)
+ def apply(f: Node => Boolean): NodeSeq = filter(f)
+
+ def xml_sameElements[A](that: Iterable[A]): Boolean = {
+ val these = this.iterator
+ val those = that.iterator
+ while (these.hasNext && those.hasNext)
+ if (these.next xml_!= those.next)
+ return false
+
+ !these.hasNext && !those.hasNext
+ }
+
+ protected def basisForHashCode: Seq[Any] = theSeq
+
+ override def canEqual(other: Any) = other match {
+ case _: NodeSeq => true
+ case _ => false
+ }
+
+ override def strict_==(other: Equality) = other match {
+ case x: NodeSeq => (length == x.length) && (theSeq sameElements x.theSeq)
+ case _ => false
+ }
+
+ /** Projection function, which returns elements of `this` sequence based
+ * on the string `that`. Use:
+ * - `this \ "foo"` to get a list of all elements that are labelled with `"foo"`;
+ * - `\ "_"` to get a list of all elements (wildcard);
+ * - `ns \ "@foo"` to get the unprefixed attribute `"foo"`;
+ * - `ns \ "@{uri}foo"` to get the prefixed attribute `"pre:foo"` whose
+ * prefix `"pre"` is resolved to the namespace `"uri"`.
+ *
+ * For attribute projections, the resulting [[scala.xml.NodeSeq]] attribute
+ * values are wrapped in a [[scala.xml.Group]].
+ *
+ * There is no support for searching a prefixed attribute by its literal prefix.
+ *
+ * The document order is preserved.
+ */
+ def \(that: String): NodeSeq = {
+ def fail = throw new IllegalArgumentException(that)
+ def atResult = {
+ lazy val y = this(0)
+ val attr =
+ if (that.length == 1) fail
+ else if (that(1) == '{') {
+ val i = that indexOf '}'
+ if (i == -1) fail
+ val (uri, key) = (that.substring(2,i), that.substring(i+1, that.length()))
+ if (uri == "" || key == "") fail
+ else y.attribute(uri, key)
+ }
+ else y.attribute(that drop 1)
+
+ attr match {
+ case Some(x) => Group(x)
+ case _ => NodeSeq.Empty
+ }
+ }
+
+ def makeSeq(cond: (Node) => Boolean) =
+ NodeSeq fromSeq (this flatMap (_.child) filter cond)
+
+ that match {
+ case "" => fail
+ case "_" => makeSeq(!_.isAtom)
+ case _ if (that(0) == '@' && this.length == 1) => atResult
+ case _ => makeSeq(_.label == that)
+ }
+ }
+
+ /** Projection function, which returns elements of `this` sequence and of
+ * all its subsequences, based on the string `that`. Use:
+ * - `this \\ 'foo` to get a list of all elements that are labelled with `"foo"`;
+ * - `\\ "_"` to get a list of all elements (wildcard);
+ * - `ns \\ "@foo"` to get the unprefixed attribute `"foo"`;
+ * - `ns \\ "@{uri}foo"` to get each prefixed attribute `"pre:foo"` whose
+ * prefix `"pre"` is resolved to the namespace `"uri"`.
+ *
+ * For attribute projections, the resulting [[scala.xml.NodeSeq]] attribute
+ * values are wrapped in a [[scala.xml.Group]].
+ *
+ * There is no support for searching a prefixed attribute by its literal prefix.
+ *
+ * The document order is preserved.
+ */
+ def \\ (that: String): NodeSeq = {
+ def filt(cond: (Node) => Boolean) = this flatMap (_.descendant_or_self) filter cond
+ that match {
+ case "_" => filt(!_.isAtom)
+ case _ if that(0) == '@' => filt(!_.isAtom) flatMap (_ \ that)
+ case _ => filt(x => !x.isAtom && x.label == that)
+ }
+ }
+
+ /** Convenience method which returns string text of the named attribute. Use:
+ * - `that \@ "foo"` to get the string text of attribute `"foo"`;
+ */
+ def \@(attributeName: String): String = (this \ ("@" + attributeName)).text
+
+ override def toString(): String = theSeq.mkString
+
+ def text: String = (this map (_.text)).mkString
+}
diff --git a/src/xml/scala/xml/Null.scala b/src/xml/scala/xml/Null.scala
new file mode 100644
index 0000000000..f763c023c4
--- /dev/null
+++ b/src/xml/scala/xml/Null.scala
@@ -0,0 +1,62 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+import Utility.isNameStart
+import scala.collection.Iterator
+
+/** Essentially, every method in here is a dummy, returning Zero[T].
+ * It provides a backstop for the unusual collection defined by MetaData,
+ * sort of a linked list of tails.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+case object Null extends MetaData {
+ override def iterator = Iterator.empty
+ override def size = 0
+ override def append(m: MetaData, scope: NamespaceBinding = TopScope): MetaData = m
+ override def filter(f: MetaData => Boolean): MetaData = this
+
+ def copy(next: MetaData) = next
+ def getNamespace(owner: Node) = null
+
+ override def hasNext = false
+ def next = null
+ def key = null
+ def value = null
+ def isPrefixed = false
+
+ override def length = 0
+ override def length(i: Int) = i
+
+ override def strict_==(other: Equality) = other match {
+ case x: MetaData => x.length == 0
+ case _ => false
+ }
+ override protected def basisForHashCode: Seq[Any] = Nil
+
+ def apply(namespace: String, scope: NamespaceBinding, key: String) = null
+ def apply(key: String) =
+ if (isNameStart(key.head)) null
+ else throw new IllegalArgumentException("not a valid attribute name '"+key+"', so can never match !")
+
+ protected def toString1(sb: StringBuilder) = ()
+ override protected def toString1(): String = ""
+
+ override def toString(): String = ""
+
+ override def buildString(sb: StringBuilder): StringBuilder = sb
+
+ override def wellformed(scope: NamespaceBinding) = true
+
+ def remove(key: String) = this
+ def remove(namespace: String, scope: NamespaceBinding, key: String) = this
+}
diff --git a/src/xml/scala/xml/PCData.scala b/src/xml/scala/xml/PCData.scala
new file mode 100644
index 0000000000..31eea2b6d7
--- /dev/null
+++ b/src/xml/scala/xml/PCData.scala
@@ -0,0 +1,44 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** This class (which is not used by all XML parsers, but always used by the
+ * XHTML one) represents parseable character data, which appeared as CDATA
+ * sections in the input and is to be preserved as CDATA section in the output.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+class PCData(data: String) extends Atom[String](data) {
+
+ /** Returns text, with some characters escaped according to the XML
+ * specification.
+ *
+ * @param sb the input string buffer associated to some XML element
+ * @return the input string buffer with the formatted CDATA section
+ */
+ override def buildString(sb: StringBuilder): StringBuilder =
+ sb append "<![CDATA[%s]]>".format(data)
+}
+
+/** This singleton object contains the `apply`and `unapply` methods for
+ * convenient construction and deconstruction.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+object PCData {
+ def apply(data: String) = new PCData(data)
+ def unapply(other: Any): Option[String] = other match {
+ case x: PCData => Some(x.data)
+ case _ => None
+ }
+}
+
diff --git a/src/xml/scala/xml/PrefixedAttribute.scala b/src/xml/scala/xml/PrefixedAttribute.scala
new file mode 100644
index 0000000000..4ab79c8677
--- /dev/null
+++ b/src/xml/scala/xml/PrefixedAttribute.scala
@@ -0,0 +1,61 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package xml
+
+/** prefixed attributes always have a non-null namespace.
+ *
+ * @param pre
+ * @param key
+ * @param value the attribute value
+ * @param next1
+ */
+class PrefixedAttribute(
+ val pre: String,
+ val key: String,
+ val value: Seq[Node],
+ val next1: MetaData)
+extends Attribute
+{
+ val next = if (value ne null) next1 else next1.remove(key)
+
+ /** same as this(pre, key, Text(value), next), or no attribute if value is null */
+ def this(pre: String, key: String, value: String, next: MetaData) =
+ this(pre, key, if (value ne null) Text(value) else null: NodeSeq, next)
+
+ /** same as this(pre, key, value.get, next), or no attribute if value is None */
+ def this(pre: String, key: String, value: Option[Seq[Node]], next: MetaData) =
+ this(pre, key, value.orNull, next)
+
+ /** Returns a copy of this unprefixed attribute with the given
+ * next field.
+ */
+ def copy(next: MetaData) =
+ new PrefixedAttribute(pre, key, value, next)
+
+ def getNamespace(owner: Node) =
+ owner.getNamespace(pre)
+
+ /** forwards the call to next (because caller looks for unprefixed attribute */
+ def apply(key: String): Seq[Node] = next(key)
+
+ /** gets attribute value of qualified (prefixed) attribute with given key
+ */
+ def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] = {
+ if (key == this.key && scope.getURI(pre) == namespace)
+ value
+ else
+ next(namespace, scope, key)
+ }
+}
+
+object PrefixedAttribute {
+ def unapply(x: PrefixedAttribute) = Some((x.pre, x.key, x.value, x.next))
+}
diff --git a/src/xml/scala/xml/PrettyPrinter.scala b/src/xml/scala/xml/PrettyPrinter.scala
new file mode 100755
index 0000000000..9e01905357
--- /dev/null
+++ b/src/xml/scala/xml/PrettyPrinter.scala
@@ -0,0 +1,263 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+import Utility.sbToString
+
+/** Class for pretty printing. After instantiating, you can use the
+ * format() and formatNode() methods to convert XML to a formatted
+ * string. The class can be reused to pretty print any number of
+ * XML nodes.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ *
+ * @param width the width to fit the output into
+ * @param step indentation
+ */
+class PrettyPrinter(width: Int, step: Int) {
+
+ class BrokenException() extends java.lang.Exception
+
+ class Item
+ case object Break extends Item {
+ override def toString() = "\\"
+ }
+ case class Box(col: Int, s: String) extends Item
+ case class Para(s: String) extends Item
+
+ protected var items: List[Item] = Nil
+
+ protected var cur = 0
+
+ protected def reset() = {
+ cur = 0
+ items = Nil
+ }
+
+ /** Try to cut at whitespace.
+ */
+ protected def cut(s: String, ind: Int): List[Item] = {
+ val tmp = width - cur
+ if (s.length <= tmp)
+ return List(Box(ind, s))
+ var i = s indexOf ' '
+ if (i > tmp || i == -1) throw new BrokenException() // cannot break
+
+ var last: List[Int] = Nil
+ while (i != -1 && i < tmp) {
+ last = i::last
+ i = s.indexOf(' ', i+1)
+ }
+ var res: List[Item] = Nil
+ while (Nil != last) try {
+ val b = Box(ind, s.substring(0, last.head))
+ cur = ind
+ res = b :: Break :: cut(s.substring(last.head, s.length), ind)
+ // backtrack
+ last = last.tail
+ } catch {
+ case _:BrokenException => last = last.tail
+ }
+ throw new BrokenException()
+ }
+
+ /** Try to make indented box, if possible, else para.
+ */
+ protected def makeBox(ind: Int, s: String) =
+ if (cur + s.length > width) { // fits in this line
+ items ::= Box(ind, s)
+ cur += s.length
+ }
+ else try cut(s, ind) foreach (items ::= _) // break it up
+ catch { case _: BrokenException => makePara(ind, s) } // give up, para
+
+ // dont respect indent in para, but afterwards
+ protected def makePara(ind: Int, s: String) = {
+ items = Break::Para(s)::Break::items
+ cur = ind
+ }
+
+ // respect indent
+ protected def makeBreak() = { // using wrapping here...
+ items = Break :: items
+ cur = 0
+ }
+
+ protected def leafTag(n: Node) = {
+ def mkLeaf(sb: StringBuilder) {
+ sb append '<'
+ n nameToString sb
+ n.attributes buildString sb
+ sb append "/>"
+ }
+ sbToString(mkLeaf)
+ }
+
+ protected def startTag(n: Node, pscope: NamespaceBinding): (String, Int) = {
+ var i = 0
+ def mkStart(sb: StringBuilder) {
+ sb append '<'
+ n nameToString sb
+ i = sb.length + 1
+ n.attributes buildString sb
+ n.scope.buildString(sb, pscope)
+ sb append '>'
+ }
+ (sbToString(mkStart), i)
+ }
+
+ protected def endTag(n: Node) = {
+ def mkEnd(sb: StringBuilder) {
+ sb append "</"
+ n nameToString sb
+ sb append '>'
+ }
+ sbToString(mkEnd)
+ }
+
+ protected def childrenAreLeaves(n: Node): Boolean = {
+ def isLeaf(l: Node) = l match {
+ case _:Atom[_] | _:Comment | _:EntityRef | _:ProcInstr => true
+ case _ => false
+ }
+ n.child forall isLeaf
+ }
+
+ protected def fits(test: String) =
+ test.length < width - cur
+
+ private def doPreserve(node: Node) =
+ node.attribute(XML.namespace, XML.space).map(_.toString == XML.preserve) getOrElse false
+
+ protected def traverse(node: Node, pscope: NamespaceBinding, ind: Int): Unit = node match {
+
+ case Text(s) if s.trim() == "" =>
+ ;
+ case _:Atom[_] | _:Comment | _:EntityRef | _:ProcInstr =>
+ makeBox( ind, node.toString().trim() )
+ case g @ Group(xs) =>
+ traverse(xs.iterator, pscope, ind)
+ case _ =>
+ val test = {
+ val sb = new StringBuilder()
+ Utility.serialize(node, pscope, sb, stripComments = false)
+ if (doPreserve(node)) sb.toString
+ else TextBuffer.fromString(sb.toString).toText(0).data
+ }
+ if (childrenAreLeaves(node) && fits(test)) {
+ makeBox(ind, test)
+ } else {
+ val (stg, len2) = startTag(node, pscope)
+ val etg = endTag(node)
+ if (stg.length < width - cur) { // start tag fits
+ makeBox(ind, stg)
+ makeBreak()
+ traverse(node.child.iterator, node.scope, ind + step)
+ makeBox(ind, etg)
+ } else if (len2 < width - cur) {
+ // <start label + attrs + tag + content + end tag
+ makeBox(ind, stg.substring(0, len2))
+ makeBreak() // todo: break the rest in pieces
+ /*{ //@todo
+ val sq:Seq[String] = stg.split(" ");
+ val it = sq.iterator;
+ it.next;
+ for (c <- it) {
+ makeBox(ind+len2-2, c)
+ makeBreak()
+ }
+ }*/
+ makeBox(ind, stg.substring(len2, stg.length))
+ makeBreak()
+ traverse(node.child.iterator, node.scope, ind + step)
+ makeBox(cur, etg)
+ makeBreak()
+ } else { // give up
+ makeBox(ind, test)
+ makeBreak()
+ }
+ }
+ }
+
+ protected def traverse(it: Iterator[Node], scope: NamespaceBinding, ind: Int ): Unit =
+ for (c <- it) {
+ traverse(c, scope, ind)
+ makeBreak()
+ }
+
+ /** Appends a formatted string containing well-formed XML with
+ * given namespace to prefix mapping to the given string buffer.
+ *
+ * @param n the node to be serialized
+ * @param sb the stringbuffer to append to
+ */
+ def format(n: Node, sb: StringBuilder) { // entry point
+ format(n, null, sb)
+ }
+
+ def format(n: Node, pscope: NamespaceBinding, sb: StringBuilder) { // entry point
+ var lastwasbreak = false
+ reset()
+ traverse(n, pscope, 0)
+ var cur = 0
+ for (b <- items.reverse) b match {
+ case Break =>
+ if (!lastwasbreak) sb.append('\n') // on windows: \r\n ?
+ lastwasbreak = true
+ cur = 0
+// while (cur < last) {
+// sb append ' '
+// cur += 1
+// }
+
+ case Box(i, s) =>
+ lastwasbreak = false
+ while (cur < i) {
+ sb append ' '
+ cur += 1
+ }
+ sb.append(s)
+ case Para( s ) =>
+ lastwasbreak = false
+ sb append s
+ }
+ }
+
+ // public convenience methods
+
+ /** Returns a formatted string containing well-formed XML with
+ * given namespace to prefix mapping.
+ *
+ * @param n the node to be serialized
+ * @param pscope the namespace to prefix mapping
+ * @return the formatted string
+ */
+ def format(n: Node, pscope: NamespaceBinding = null): String =
+ sbToString(format(n, pscope, _))
+
+ /** Returns a formatted string containing well-formed XML.
+ *
+ * @param nodes the sequence of nodes to be serialized
+ * @param pscope the namespace to prefix mapping
+ */
+ def formatNodes(nodes: Seq[Node], pscope: NamespaceBinding = null): String =
+ sbToString(formatNodes(nodes, pscope, _))
+
+ /** Appends a formatted string containing well-formed XML with
+ * the given namespace to prefix mapping to the given stringbuffer.
+ *
+ * @param nodes the nodes to be serialized
+ * @param pscope the namespace to prefix mapping
+ * @param sb the string buffer to which to append to
+ */
+ def formatNodes(nodes: Seq[Node], pscope: NamespaceBinding, sb: StringBuilder): Unit =
+ nodes foreach (n => sb append format(n, pscope))
+}
diff --git a/src/xml/scala/xml/ProcInstr.scala b/src/xml/scala/xml/ProcInstr.scala
new file mode 100644
index 0000000000..189c1c6878
--- /dev/null
+++ b/src/xml/scala/xml/ProcInstr.scala
@@ -0,0 +1,39 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package xml
+
+/** an XML node for processing instructions (PI)
+ *
+ * @author Burak Emir
+ * @param target target name of this PI
+ * @param proctext text contained in this node, may not contain "?>"
+ */
+case class ProcInstr(target: String, proctext: String) extends SpecialNode
+{
+ if (!Utility.isName(target))
+ throw new IllegalArgumentException(target+" must be an XML Name")
+ if (proctext contains "?>")
+ throw new IllegalArgumentException(proctext+" may not contain \"?>\"")
+ if (target.toLowerCase == "xml")
+ throw new IllegalArgumentException(target+" is reserved")
+
+ final override def doCollectNamespaces = false
+ final override def doTransform = false
+
+ final def label = "#PI"
+ override def text = ""
+
+ /** appends &quot;&lt;?&quot; target (&quot; &quot;+text)?+&quot;?&gt;&quot;
+ * to this stringbuffer.
+ */
+ override def buildString(sb: StringBuilder) =
+ sb append "<?%s%s?>".format(target, (if (proctext == "") "" else " " + proctext))
+}
diff --git a/src/xml/scala/xml/QNode.scala b/src/xml/scala/xml/QNode.scala
new file mode 100644
index 0000000000..f9e3f1854b
--- /dev/null
+++ b/src/xml/scala/xml/QNode.scala
@@ -0,0 +1,20 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** This object provides an extractor method to match a qualified node with
+ * its namespace URI
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+object QNode {
+ def unapplySeq(n: Node) = Some((n.scope.getURI(n.prefix), n.label, n.attributes, n.child))
+}
diff --git a/src/xml/scala/xml/SpecialNode.scala b/src/xml/scala/xml/SpecialNode.scala
new file mode 100644
index 0000000000..5fef8ef66c
--- /dev/null
+++ b/src/xml/scala/xml/SpecialNode.scala
@@ -0,0 +1,33 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** `SpecialNode` is a special XML node which represents either text
+ * `(PCDATA)`, a comment, a `PI`, or an entity ref.
+ *
+ * `SpecialNode`s also play the role of [[scala.xml.pull.XMLEvent]]s for
+ * pull-parsing.
+ *
+ * @author Burak Emir
+ */
+abstract class SpecialNode extends Node with pull.XMLEvent {
+
+ /** always empty */
+ final override def attributes = Null
+
+ /** always Node.EmptyNamespace */
+ final override def namespace = null
+
+ /** always empty */
+ final def child = Nil
+
+ /** Append string representation to the given string buffer argument. */
+ def buildString(sb: StringBuilder): StringBuilder
+}
diff --git a/src/xml/scala/xml/Text.scala b/src/xml/scala/xml/Text.scala
new file mode 100644
index 0000000000..debea0c025
--- /dev/null
+++ b/src/xml/scala/xml/Text.scala
@@ -0,0 +1,39 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** The class `Text` implements an XML node for text (PCDATA).
+ * It is used in both non-bound and bound XML representations.
+ *
+ * @author Burak Emir
+ * @param data the text contained in this node, may not be null.
+ */
+class Text(data: String) extends Atom[String](data) {
+
+ /** Returns text, with some characters escaped according to the XML
+ * specification.
+ */
+ override def buildString(sb: StringBuilder): StringBuilder =
+ Utility.escape(data, sb)
+}
+
+/** This singleton object contains the `apply`and `unapply` methods for
+ * convenient construction and deconstruction.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+object Text {
+ def apply(data: String) = new Text(data)
+ def unapply(other: Any): Option[String] = other match {
+ case x: Text => Some(x.data)
+ case _ => None
+ }
+}
diff --git a/src/xml/scala/xml/TextBuffer.scala b/src/xml/scala/xml/TextBuffer.scala
new file mode 100644
index 0000000000..514b1701af
--- /dev/null
+++ b/src/xml/scala/xml/TextBuffer.scala
@@ -0,0 +1,46 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package xml
+
+import Utility.isSpace
+
+object TextBuffer {
+ def fromString(str: String): TextBuffer = new TextBuffer() append str
+}
+
+/** The class `TextBuffer` is for creating text nodes without surplus
+ * whitespace. All occurrences of one or more whitespace in strings
+ * appended with the `append` method will be replaced by a single space
+ * character, and leading and trailing space will be removed completely.
+ */
+class TextBuffer
+{
+ val sb = new StringBuilder()
+
+ /** Appends this string to the text buffer, trimming whitespaces as needed.
+ */
+ def append(cs: Seq[Char]): this.type = {
+ cs foreach { c =>
+ if (!isSpace(c)) sb append c
+ else if (sb.isEmpty || !isSpace(sb.last)) sb append ' '
+ }
+ this
+ }
+
+ /** Returns an empty sequence if text is only whitespace.
+ *
+ * @return the text without whitespaces.
+ */
+ def toText: Seq[Text] = sb.toString.trim match {
+ case "" => Nil
+ case s => Seq(Text(s))
+ }
+}
diff --git a/src/xml/scala/xml/TopScope.scala b/src/xml/scala/xml/TopScope.scala
new file mode 100644
index 0000000000..474fbbbdb5
--- /dev/null
+++ b/src/xml/scala/xml/TopScope.scala
@@ -0,0 +1,31 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package xml
+
+/** top level namespace scope. only contains the predefined binding
+ * for the &quot;xml&quot; prefix which is bound to
+ * &quot;http://www.w3.org/XML/1998/namespace&quot;
+ */
+object TopScope extends NamespaceBinding(null, null, null) {
+
+ import XML.{ xml, namespace }
+
+ override def getURI(prefix1: String): String =
+ if (prefix1 == xml) namespace else null
+
+ override def getPrefix(uri1: String): String =
+ if (uri1 == namespace) xml else null
+
+ override def toString() = ""
+
+ override def buildString(stop: NamespaceBinding) = ""
+ override def buildString(sb: StringBuilder, ignore: NamespaceBinding) = {}
+}
diff --git a/src/xml/scala/xml/TypeSymbol.scala b/src/xml/scala/xml/TypeSymbol.scala
new file mode 100644
index 0000000000..fb371ee340
--- /dev/null
+++ b/src/xml/scala/xml/TypeSymbol.scala
@@ -0,0 +1,15 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+
+
+abstract class TypeSymbol
diff --git a/src/xml/scala/xml/Unparsed.scala b/src/xml/scala/xml/Unparsed.scala
new file mode 100644
index 0000000000..bc190eb724
--- /dev/null
+++ b/src/xml/scala/xml/Unparsed.scala
@@ -0,0 +1,36 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+/** An XML node for unparsed content. It will be output verbatim, all bets
+ * are off regarding wellformedness etc.
+ *
+ * @author Burak Emir
+ * @param data content in this node, may not be null.
+ */
+class Unparsed(data: String) extends Atom[String](data) {
+
+ /** Returns text, with some characters escaped according to XML
+ * specification.
+ */
+ override def buildString(sb: StringBuilder): StringBuilder =
+ sb append data
+}
+
+/** This singleton object contains the `apply`and `unapply` methods for
+ * convenient construction and deconstruction.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+object Unparsed {
+ def apply(data: String) = new Unparsed(data)
+ def unapply(x: Unparsed) = Some(x.data)
+}
diff --git a/src/xml/scala/xml/UnprefixedAttribute.scala b/src/xml/scala/xml/UnprefixedAttribute.scala
new file mode 100644
index 0000000000..6fa827da5f
--- /dev/null
+++ b/src/xml/scala/xml/UnprefixedAttribute.scala
@@ -0,0 +1,61 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package xml
+
+/** Unprefixed attributes have the null namespace, and no prefix field
+ *
+ * @author Burak Emir
+ */
+class UnprefixedAttribute(
+ val key: String,
+ val value: Seq[Node],
+ next1: MetaData)
+extends Attribute
+{
+ final val pre = null
+ val next = if (value ne null) next1 else next1.remove(key)
+
+ /** same as this(key, Text(value), next), or no attribute if value is null */
+ def this(key: String, value: String, next: MetaData) =
+ this(key, if (value ne null) Text(value) else null: NodeSeq, next)
+
+ /** same as this(key, value.get, next), or no attribute if value is None */
+ def this(key: String, value: Option[Seq[Node]], next: MetaData) =
+ this(key, value.orNull, next)
+
+ /** returns a copy of this unprefixed attribute with the given next field*/
+ def copy(next: MetaData) = new UnprefixedAttribute(key, value, next)
+
+ final def getNamespace(owner: Node): String = null
+
+ /**
+ * Gets value of unqualified (unprefixed) attribute with given key, null if not found
+ *
+ * @param key
+ * @return value as Seq[Node] if key is found, null otherwise
+ */
+ def apply(key: String): Seq[Node] =
+ if (key == this.key) value else next(key)
+
+ /**
+ * Forwards the call to next (because caller looks for prefixed attribute).
+ *
+ * @param namespace
+ * @param scope
+ * @param key
+ * @return ..
+ */
+ def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] =
+ next(namespace, scope, key)
+}
+object UnprefixedAttribute {
+ def unapply(x: UnprefixedAttribute) = Some((x.key, x.value, x.next))
+}
diff --git a/src/xml/scala/xml/Utility.scala b/src/xml/scala/xml/Utility.scala
new file mode 100755
index 0000000000..9134476401
--- /dev/null
+++ b/src/xml/scala/xml/Utility.scala
@@ -0,0 +1,410 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+import scala.collection.mutable
+import parsing.XhtmlEntities
+import scala.language.implicitConversions
+
+/**
+ * The `Utility` object provides utility functions for processing instances
+ * of bound and not bound XML classes, as well as escaping text nodes.
+ *
+ * @author Burak Emir
+ */
+object Utility extends AnyRef with parsing.TokenTests {
+ final val SU = '\u001A'
+
+ // [Martin] This looks dubious. We don't convert StringBuilders to
+ // Strings anywhere else, why do it here?
+ implicit def implicitSbToString(sb: StringBuilder) = sb.toString()
+
+ // helper for the extremely oft-repeated sequence of creating a
+ // StringBuilder, passing it around, and then grabbing its String.
+ private [xml] def sbToString(f: (StringBuilder) => Unit): String = {
+ val sb = new StringBuilder
+ f(sb)
+ sb.toString
+ }
+ private[xml] def isAtomAndNotText(x: Node) = x.isAtom && !x.isInstanceOf[Text]
+
+ /** Trims an element - call this method, when you know that it is an
+ * element (and not a text node) so you know that it will not be trimmed
+ * away. With this assumption, the function can return a `Node`, rather
+ * than a `Seq[Node]`. If you don't know, call `trimProper` and account
+ * for the fact that you may get back an empty sequence of nodes.
+ *
+ * Precondition: node is not a text node (it might be trimmed)
+ */
+ def trim(x: Node): Node = x match {
+ case Elem(pre, lab, md, scp, child@_*) =>
+ Elem(pre, lab, md, scp, (child flatMap trimProper):_*)
+ }
+
+ /** trim a child of an element. `Attribute` values and `Atom` nodes that
+ * are not `Text` nodes are unaffected.
+ */
+ def trimProper(x:Node): Seq[Node] = x match {
+ case Elem(pre,lab,md,scp,child@_*) =>
+ Elem(pre,lab,md,scp, (child flatMap trimProper):_*)
+ case Text(s) =>
+ new TextBuffer().append(s).toText
+ case _ =>
+ x
+ }
+
+ /** returns a sorted attribute list */
+ def sort(md: MetaData): MetaData = if((md eq Null) || (md.next eq Null)) md else {
+ val key = md.key
+ val smaller = sort(md.filter { m => m.key < key })
+ val greater = sort(md.filter { m => m.key > key })
+ smaller.foldRight (md copy greater) ((x, xs) => x copy xs)
+ }
+
+ /** Return the node with its attribute list sorted alphabetically
+ * (prefixes are ignored) */
+ def sort(n:Node): Node = n match {
+ case Elem(pre,lab,md,scp,child@_*) =>
+ Elem(pre,lab,sort(md),scp, (child map sort):_*)
+ case _ => n
+ }
+
+ /**
+ * Escapes the characters &lt; &gt; &amp; and &quot; from string.
+ */
+ final def escape(text: String): String = sbToString(escape(text, _))
+
+ object Escapes {
+ /** For reasons unclear escape and unescape are a long ways from
+ * being logical inverses. */
+ val pairs = Map(
+ "lt" -> '<',
+ "gt" -> '>',
+ "amp" -> '&',
+ "quot" -> '"'
+ // enigmatic comment explaining why this isn't escaped --
+ // is valid xhtml but not html, and IE doesn't know it, says jweb
+ // "apos" -> '\''
+ )
+ val escMap = pairs map { case (s, c) => c-> ("&%s;" format s) }
+ val unescMap = pairs ++ Map("apos" -> '\'')
+ }
+ import Escapes.{ escMap, unescMap }
+
+ /**
+ * Appends escaped string to `s`.
+ */
+ final def escape(text: String, s: StringBuilder): StringBuilder = {
+ // Implemented per XML spec:
+ // http://www.w3.org/International/questions/qa-controls
+ // imperative code 3x-4x faster than current implementation
+ // dpp (David Pollak) 2010/02/03
+ val len = text.length
+ var pos = 0
+ while (pos < len) {
+ text.charAt(pos) match {
+ case '<' => s.append("&lt;")
+ case '>' => s.append("&gt;")
+ case '&' => s.append("&amp;")
+ case '"' => s.append("&quot;")
+ case '\n' => s.append('\n')
+ case '\r' => s.append('\r')
+ case '\t' => s.append('\t')
+ case c => if (c >= ' ') s.append(c)
+ }
+
+ pos += 1
+ }
+ s
+ }
+
+ /**
+ * Appends unescaped string to `s`, `amp` becomes `&amp;`,
+ * `lt` becomes `&lt;` etc..
+ *
+ * @return `'''null'''` if `ref` was not a predefined entity.
+ */
+ final def unescape(ref: String, s: StringBuilder): StringBuilder =
+ ((unescMap get ref) map (s append _)).orNull
+
+ /**
+ * Returns a set of all namespaces used in a sequence of nodes
+ * and all their descendants, including the empty namespaces.
+ */
+ def collectNamespaces(nodes: Seq[Node]): mutable.Set[String] =
+ nodes.foldLeft(new mutable.HashSet[String]) { (set, x) => collectNamespaces(x, set) ; set }
+
+ /**
+ * Adds all namespaces in node to set.
+ */
+ def collectNamespaces(n: Node, set: mutable.Set[String]) {
+ if (n.doCollectNamespaces) {
+ set += n.namespace
+ for (a <- n.attributes) a match {
+ case _:PrefixedAttribute =>
+ set += a.getNamespace(n)
+ case _ =>
+ }
+ for (i <- n.child)
+ collectNamespaces(i, set)
+ }
+ }
+
+ // def toXML(
+ // x: Node,
+ // pscope: NamespaceBinding = TopScope,
+ // sb: StringBuilder = new StringBuilder,
+ // stripComments: Boolean = false,
+ // decodeEntities: Boolean = true,
+ // preserveWhitespace: Boolean = false,
+ // minimizeTags: Boolean = false): String =
+ // {
+ // toXMLsb(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
+ // sb.toString()
+ // }
+
+ /**
+ * Serialize the provided Node to the provided StringBuilder.
+ * <p/>
+ * Note that calling this source-compatible method will result in the same old, arguably almost universally unwanted,
+ * behaviour.
+ */
+ @deprecated("Please use `serialize` instead and specify a `minimizeTags` parameter", "2.10.0")
+ def toXML(
+ x: Node,
+ pscope: NamespaceBinding = TopScope,
+ sb: StringBuilder = new StringBuilder,
+ stripComments: Boolean = false,
+ decodeEntities: Boolean = true,
+ preserveWhitespace: Boolean = false,
+ minimizeTags: Boolean = false): StringBuilder =
+ {
+ serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, if (minimizeTags) MinimizeMode.Always else MinimizeMode.Never)
+ }
+
+ /**
+ * Serialize an XML Node to a StringBuilder.
+ *
+ * This is essentially a minor rework of `toXML` that can't have the same name due to an unfortunate
+ * combination of named/default arguments and overloading.
+ *
+ * @todo use a Writer instead
+ */
+ def serialize(
+ x: Node,
+ pscope: NamespaceBinding = TopScope,
+ sb: StringBuilder = new StringBuilder,
+ stripComments: Boolean = false,
+ decodeEntities: Boolean = true,
+ preserveWhitespace: Boolean = false,
+ minimizeTags: MinimizeMode.Value = MinimizeMode.Default): StringBuilder =
+ {
+ x match {
+ case c: Comment if !stripComments => c buildString sb
+ case s: SpecialNode => s buildString sb
+ case g: Group => for (c <- g.nodes) serialize(c, g.scope, sb, minimizeTags = minimizeTags) ; sb
+ case el: Elem =>
+ // print tag with namespace declarations
+ sb.append('<')
+ el.nameToString(sb)
+ if (el.attributes ne null) el.attributes.buildString(sb)
+ el.scope.buildString(sb, pscope)
+ if (el.child.isEmpty &&
+ (minimizeTags == MinimizeMode.Always ||
+ (minimizeTags == MinimizeMode.Default && el.minimizeEmpty)))
+ {
+ // no children, so use short form: <xyz .../>
+ sb.append("/>")
+ } else {
+ // children, so use long form: <xyz ...>...</xyz>
+ sb.append('>')
+ sequenceToXML(el.child, el.scope, sb, stripComments)
+ sb.append("</")
+ el.nameToString(sb)
+ sb.append('>')
+ }
+ case _ => throw new IllegalArgumentException("Don't know how to serialize a " + x.getClass.getName)
+ }
+ }
+
+ def sequenceToXML(
+ children: Seq[Node],
+ pscope: NamespaceBinding = TopScope,
+ sb: StringBuilder = new StringBuilder,
+ stripComments: Boolean = false,
+ decodeEntities: Boolean = true,
+ preserveWhitespace: Boolean = false,
+ minimizeTags: MinimizeMode.Value = MinimizeMode.Default): Unit =
+ {
+ if (children.isEmpty) return
+ else if (children forall isAtomAndNotText) { // add space
+ val it = children.iterator
+ val f = it.next()
+ serialize(f, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
+ while (it.hasNext) {
+ val x = it.next()
+ sb.append(' ')
+ serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
+ }
+ }
+ else children foreach { serialize(_, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) }
+ }
+
+ /**
+ * Returns prefix of qualified name if any.
+ */
+ final def prefix(name: String): Option[String] = (name indexOf ':') match {
+ case -1 => None
+ case i => Some(name.substring(0, i))
+ }
+
+ /**
+ * Returns a hashcode for the given constituents of a node
+ */
+ def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) =
+ scala.util.hashing.MurmurHash3.orderedHash(label +: attribHashCode +: scpeHash +: children, pre.##)
+
+ def appendQuoted(s: String): String = sbToString(appendQuoted(s, _))
+
+ /**
+ * Appends &quot;s&quot; if string `s` does not contain &quot;,
+ * &apos;s&apos; otherwise.
+ */
+ def appendQuoted(s: String, sb: StringBuilder) = {
+ val ch = if (s contains '"') '\'' else '"'
+ sb.append(ch).append(s).append(ch)
+ }
+
+ /**
+ * Appends &quot;s&quot; and escapes and &quot; i s with \&quot;
+ */
+ def appendEscapedQuoted(s: String, sb: StringBuilder): StringBuilder = {
+ sb.append('"')
+ for (c <- s) c match {
+ case '"' => sb.append('\\'); sb.append('"')
+ case _ => sb.append(c)
+ }
+ sb.append('"')
+ }
+
+ def getName(s: String, index: Int): String = {
+ if (index >= s.length) null
+ else {
+ val xs = s drop index
+ if (xs.nonEmpty && isNameStart(xs.head)) xs takeWhile isNameChar
+ else ""
+ }
+ }
+
+ /**
+ * Returns `'''null'''` if the value is a correct attribute value,
+ * error message if it isn't.
+ */
+ def checkAttributeValue(value: String): String = {
+ var i = 0
+ while (i < value.length) {
+ value.charAt(i) match {
+ case '<' =>
+ return "< not allowed in attribute value"
+ case '&' =>
+ val n = getName(value, i+1)
+ if (n eq null)
+ return "malformed entity reference in attribute value ["+value+"]"
+ i = i + n.length + 1
+ if (i >= value.length || value.charAt(i) != ';')
+ return "malformed entity reference in attribute value ["+value+"]"
+ case _ =>
+ }
+ i = i + 1
+ }
+ null
+ }
+
+ def parseAttributeValue(value: String): Seq[Node] = {
+ val sb = new StringBuilder
+ var rfb: StringBuilder = null
+ val nb = new NodeBuffer()
+
+ val it = value.iterator
+ while (it.hasNext) {
+ var c = it.next()
+ // entity! flush buffer into text node
+ if (c == '&') {
+ c = it.next()
+ if (c == '#') {
+ c = it.next()
+ val theChar = parseCharRef ({ ()=> c },{ () => c = it.next() },{s => throw new RuntimeException(s)}, {s => throw new RuntimeException(s)})
+ sb.append(theChar)
+ }
+ else {
+ if (rfb eq null) rfb = new StringBuilder()
+ rfb append c
+ c = it.next()
+ while (c != ';') {
+ rfb.append(c)
+ c = it.next()
+ }
+ val ref = rfb.toString()
+ rfb.clear()
+ unescape(ref,sb) match {
+ case null =>
+ if (sb.length > 0) { // flush buffer
+ nb += Text(sb.toString())
+ sb.clear()
+ }
+ nb += EntityRef(ref) // add entityref
+ case _ =>
+ }
+ }
+ }
+ else sb append c
+ }
+ if (sb.length > 0) { // flush buffer
+ val x = Text(sb.toString())
+ if (nb.length == 0)
+ return x
+ else
+ nb += x
+ }
+ nb
+ }
+
+ /**
+ * {{{
+ * CharRef ::= "&amp;#" '0'..'9' {'0'..'9'} ";"
+ * | "&amp;#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
+ * }}}
+ * See [66]
+ */
+ def parseCharRef(ch: () => Char, nextch: () => Unit, reportSyntaxError: String => Unit, reportTruncatedError: String => Unit): String = {
+ val hex = (ch() == 'x') && { nextch(); true }
+ val base = if (hex) 16 else 10
+ var i = 0
+ while (ch() != ';') {
+ ch() match {
+ case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' =>
+ i = i * base + ch().asDigit
+ case 'a' | 'b' | 'c' | 'd' | 'e' | 'f'
+ | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' =>
+ if (! hex)
+ reportSyntaxError("hex char not allowed in decimal char ref\n" +
+ "Did you mean to write &#x ?")
+ else
+ i = i * base + ch().asDigit
+ case SU =>
+ reportTruncatedError("")
+ case _ =>
+ reportSyntaxError("character '" + ch() + "' not allowed in char ref\n")
+ }
+ nextch()
+ }
+ new String(Array(i), 0, 1)
+ }
+}
diff --git a/src/xml/scala/xml/XML.scala b/src/xml/scala/xml/XML.scala
new file mode 100755
index 0000000000..020264e509
--- /dev/null
+++ b/src/xml/scala/xml/XML.scala
@@ -0,0 +1,109 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+
+import parsing.NoBindingFactoryAdapter
+import factory.XMLLoader
+import java.io.{ File, FileDescriptor, FileInputStream, FileOutputStream }
+import java.io.{ InputStream, Reader, StringReader, Writer }
+import java.nio.channels.Channels
+import scala.util.control.Exception.ultimately
+
+object Source {
+ def fromFile(file: File) = new InputSource(new FileInputStream(file))
+ def fromFile(fd: FileDescriptor) = new InputSource(new FileInputStream(fd))
+ def fromFile(name: String) = new InputSource(new FileInputStream(name))
+
+ def fromInputStream(is: InputStream) = new InputSource(is)
+ def fromReader(reader: Reader) = new InputSource(reader)
+ def fromSysId(sysID: String) = new InputSource(sysID)
+ def fromString(string: String) = fromReader(new StringReader(string))
+}
+
+/**
+ * Governs how empty elements (i.e. those without child elements) should be serialized.
+ */
+object MinimizeMode extends Enumeration {
+ /** Minimize empty tags if they were originally empty when parsed, or if they were constructed
+ * with [[scala.xml.Elem]]`#minimizeEmpty` == true
+ */
+ val Default = Value
+
+ /** Always minimize empty tags. Note that this may be problematic for XHTML, in which
+ * case [[scala.xml.Xhtml]]`#toXhtml` should be used instead.
+ */
+ val Always = Value
+
+ /** Never minimize empty tags.
+ */
+ val Never = Value
+}
+
+/** The object `XML` provides constants, and functions to load
+ * and save XML elements. Use this when data binding is not desired, i.e.
+ * when XML is handled using `Symbol` nodes.
+ *
+ * @author Burak Emir
+ * @version 1.0, 25/04/2005
+ */
+object XML extends XMLLoader[Elem] {
+ val xml = "xml"
+ val xmlns = "xmlns"
+ val namespace = "http://www.w3.org/XML/1998/namespace"
+ val preserve = "preserve"
+ val space = "space"
+ val lang = "lang"
+ val encoding = "ISO-8859-1"
+
+ /** Returns an XMLLoader whose load* methods will use the supplied SAXParser. */
+ def withSAXParser(p: SAXParser): XMLLoader[Elem] =
+ new XMLLoader[Elem] { override val parser: SAXParser = p }
+
+ /** Saves a node to a file with given filename using given encoding
+ * optionally with xmldecl and doctype declaration.
+ *
+ * @param filename the filename
+ * @param node the xml node we want to write
+ * @param enc encoding to use
+ * @param xmlDecl if true, write xml declaration
+ * @param doctype if not null, write doctype declaration
+ */
+ final def save(
+ filename: String,
+ node: Node,
+ enc: String = encoding,
+ xmlDecl: Boolean = false,
+ doctype: dtd.DocType = null
+ ): Unit =
+ {
+ val fos = new FileOutputStream(filename)
+ val w = Channels.newWriter(fos.getChannel(), enc)
+
+ ultimately(w.close())(
+ write(w, node, enc, xmlDecl, doctype)
+ )
+ }
+
+ /** Writes the given node using writer, optionally with xml decl and doctype.
+ * It's the caller's responsibility to close the writer.
+ *
+ * @param w the writer
+ * @param node the xml node we want to write
+ * @param enc the string to be used in `xmlDecl`
+ * @param xmlDecl if true, write xml declaration
+ * @param doctype if not null, write doctype declaration
+ */
+ final def write(w: java.io.Writer, node: Node, enc: String, xmlDecl: Boolean, doctype: dtd.DocType, minimizeTags: MinimizeMode.Value = MinimizeMode.Default) {
+ /* TODO: optimize by giving writer parameter to toXML*/
+ if (xmlDecl) w.write("<?xml version='1.0' encoding='" + enc + "'?>\n")
+ if (doctype ne null) w.write( doctype.toString() + "\n")
+ w.write(Utility.serialize(node, minimizeTags = minimizeTags).toString)
+ }
+}
diff --git a/src/xml/scala/xml/Xhtml.scala b/src/xml/scala/xml/Xhtml.scala
new file mode 100644
index 0000000000..6a12c1a89a
--- /dev/null
+++ b/src/xml/scala/xml/Xhtml.scala
@@ -0,0 +1,97 @@
+
+package scala
+package xml
+
+import parsing.XhtmlEntities
+import Utility.{ sbToString, isAtomAndNotText }
+
+/* (c) David Pollak 2007 WorldWide Conferencing, LLC */
+
+object Xhtml
+{
+ /**
+ * Convenience function: same as toXhtml(node, false, false)
+ *
+ * @param node the node
+ */
+ def toXhtml(node: Node): String = sbToString(sb => toXhtml(x = node, sb = sb))
+
+ /**
+ * Convenience function: amounts to calling toXhtml(node) on each
+ * node in the sequence.
+ *
+ * @param nodeSeq the node sequence
+ */
+ def toXhtml(nodeSeq: NodeSeq): String = sbToString(sb => sequenceToXML(nodeSeq: Seq[Node], sb = sb))
+
+ /** Elements which we believe are safe to minimize if minimizeTags is true.
+ * See http://www.w3.org/TR/xhtml1/guidelines.html#C_3
+ */
+ private val minimizableElements =
+ List("base", "meta", "link", "hr", "br", "param", "img", "area", "input", "col")
+
+ def toXhtml(
+ x: Node,
+ pscope: NamespaceBinding = TopScope,
+ sb: StringBuilder = new StringBuilder,
+ stripComments: Boolean = false,
+ decodeEntities: Boolean = false,
+ preserveWhitespace: Boolean = false,
+ minimizeTags: Boolean = true): Unit =
+ {
+ def decode(er: EntityRef) = XhtmlEntities.entMap.get(er.entityName) match {
+ case Some(chr) if chr.toInt >= 128 => sb.append(chr)
+ case _ => er.buildString(sb)
+ }
+ def shortForm =
+ minimizeTags &&
+ (x.child == null || x.child.length == 0) &&
+ (minimizableElements contains x.label)
+
+ x match {
+ case c: Comment => if (!stripComments) c buildString sb
+ case er: EntityRef if decodeEntities => decode(er)
+ case x: SpecialNode => x buildString sb
+ case g: Group =>
+ g.nodes foreach { toXhtml(_, x.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) }
+
+ case _ =>
+ sb.append('<')
+ x.nameToString(sb)
+ if (x.attributes ne null) x.attributes.buildString(sb)
+ x.scope.buildString(sb, pscope)
+
+ if (shortForm) sb.append(" />")
+ else {
+ sb.append('>')
+ sequenceToXML(x.child, x.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
+ sb.append("</")
+ x.nameToString(sb)
+ sb.append('>')
+ }
+ }
+ }
+
+ /**
+ * Amounts to calling toXhtml(node, ...) with the given parameters on each node.
+ */
+ def sequenceToXML(
+ children: Seq[Node],
+ pscope: NamespaceBinding = TopScope,
+ sb: StringBuilder = new StringBuilder,
+ stripComments: Boolean = false,
+ decodeEntities: Boolean = false,
+ preserveWhitespace: Boolean = false,
+ minimizeTags: Boolean = true): Unit =
+ {
+ if (children.isEmpty)
+ return
+
+ val doSpaces = children forall isAtomAndNotText // interleave spaces
+ for (c <- children.take(children.length - 1)) {
+ toXhtml(c, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
+ if (doSpaces) sb append ' '
+ }
+ toXhtml(children.last, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags)
+ }
+}
diff --git a/src/xml/scala/xml/dtd/ContentModel.scala b/src/xml/scala/xml/dtd/ContentModel.scala
new file mode 100644
index 0000000000..4007985dce
--- /dev/null
+++ b/src/xml/scala/xml/dtd/ContentModel.scala
@@ -0,0 +1,118 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package dtd
+
+import scala.xml.dtd.impl._
+import scala.xml.Utility.sbToString
+import PartialFunction._
+
+object ContentModel extends WordExp {
+ type _labelT = ElemName
+ type _regexpT = RegExp
+
+ object Translator extends WordBerrySethi {
+ override val lang: ContentModel.this.type = ContentModel.this
+ }
+
+ case class ElemName(name: String) extends Label {
+ override def toString() = """ElemName("%s")""" format name
+ }
+
+ def isMixed(cm: ContentModel) = cond(cm) { case _: MIXED => true }
+ def containsText(cm: ContentModel) = (cm == PCDATA) || isMixed(cm)
+ def parse(s: String): ContentModel = ContentModelParser.parse(s)
+
+ def getLabels(r: RegExp): Set[String] = {
+ def traverse(r: RegExp): Set[String] = r match { // !!! check for match translation problem
+ case Letter(ElemName(name)) => Set(name)
+ case Star( x @ _ ) => traverse( x ) // bug if x@_*
+ case Sequ( xs @ _* ) => Set(xs flatMap traverse: _*)
+ case Alt( xs @ _* ) => Set(xs flatMap traverse: _*)
+ }
+
+ traverse(r)
+ }
+
+ def buildString(r: RegExp): String = sbToString(buildString(r, _))
+
+ /* precond: rs.length >= 1 */
+ private def buildString(rs: Seq[RegExp], sb: StringBuilder, sep: Char) {
+ buildString(rs.head, sb)
+ for (z <- rs.tail) {
+ sb append sep
+ buildString(z, sb)
+ }
+ }
+
+ def buildString(c: ContentModel, sb: StringBuilder): StringBuilder = c match {
+ case ANY => sb append "ANY"
+ case EMPTY => sb append "EMPTY"
+ case PCDATA => sb append "(#PCDATA)"
+ case ELEMENTS(_) | MIXED(_) => c buildString sb
+ }
+
+ def buildString(r: RegExp, sb: StringBuilder): StringBuilder =
+ r match { // !!! check for match translation problem
+ case Eps =>
+ sb
+ case Sequ(rs @ _*) =>
+ sb.append( '(' ); buildString(rs, sb, ','); sb.append( ')' )
+ case Alt(rs @ _*) =>
+ sb.append( '(' ); buildString(rs, sb, '|'); sb.append( ')' )
+ case Star(r: RegExp) =>
+ sb.append( '(' ); buildString(r, sb); sb.append( ")*" )
+ case Letter(ElemName(name)) =>
+ sb.append(name)
+ }
+
+}
+
+sealed abstract class ContentModel
+{
+ override def toString(): String = sbToString(buildString)
+ def buildString(sb: StringBuilder): StringBuilder
+}
+
+case object PCDATA extends ContentModel {
+ override def buildString(sb: StringBuilder): StringBuilder = sb.append("(#PCDATA)")
+}
+case object EMPTY extends ContentModel {
+ override def buildString(sb: StringBuilder): StringBuilder = sb.append("EMPTY")
+}
+case object ANY extends ContentModel {
+ override def buildString(sb: StringBuilder): StringBuilder = sb.append("ANY")
+}
+sealed abstract class DFAContentModel extends ContentModel {
+ import ContentModel.{ ElemName, Translator }
+ def r: ContentModel.RegExp
+
+ lazy val dfa: DetWordAutom[ElemName] = {
+ val nfa = Translator.automatonFrom(r, 1)
+ new SubsetConstruction(nfa).determinize
+ }
+}
+
+case class MIXED(r: ContentModel.RegExp) extends DFAContentModel {
+ import ContentModel.{ Alt, RegExp }
+
+ override def buildString(sb: StringBuilder): StringBuilder = {
+ val newAlt = r match { case Alt(rs @ _*) => Alt(rs drop 1: _*) }
+
+ sb append "(#PCDATA|"
+ ContentModel.buildString(newAlt: RegExp, sb)
+ sb append ")*"
+ }
+}
+
+case class ELEMENTS(r: ContentModel.RegExp) extends DFAContentModel {
+ override def buildString(sb: StringBuilder): StringBuilder =
+ ContentModel.buildString(r, sb)
+}
diff --git a/src/xml/scala/xml/dtd/ContentModelParser.scala b/src/xml/scala/xml/dtd/ContentModelParser.scala
new file mode 100644
index 0000000000..71b391c422
--- /dev/null
+++ b/src/xml/scala/xml/dtd/ContentModelParser.scala
@@ -0,0 +1,129 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package dtd
+
+/** Parser for regexps (content models in DTD element declarations) */
+
+object ContentModelParser extends Scanner { // a bit too permissive concerning #PCDATA
+ import ContentModel._
+
+ /** parses the argument to a regexp */
+ def parse(s: String): ContentModel = { initScanner(s); contentspec }
+
+ def accept(tok: Int) = {
+ if (token != tok) {
+ if ((tok == STAR) && (token == END)) // common mistake
+ scala.sys.error("in DTDs, \n"+
+ "mixed content models must be like (#PCDATA|Name|Name|...)*")
+ else
+ scala.sys.error("expected "+token2string(tok)+
+ ", got unexpected token:"+token2string(token))
+ }
+ nextToken()
+ }
+
+ // s [ '+' | '*' | '?' ]
+ def maybeSuffix(s: RegExp) = token match {
+ case STAR => nextToken(); Star(s)
+ case PLUS => nextToken(); Sequ(s, Star(s))
+ case OPT => nextToken(); Alt(Eps, s)
+ case _ => s
+ }
+
+ // contentspec ::= EMPTY | ANY | (#PCDATA) | "(#PCDATA|"regexp)
+
+ def contentspec: ContentModel = token match {
+
+ case NAME => value match {
+ case "ANY" => ANY
+ case "EMPTY" => EMPTY
+ case _ => scala.sys.error("expected ANY, EMPTY or '(' instead of " + value )
+ }
+ case LPAREN =>
+
+ nextToken()
+ sOpt()
+ if (token != TOKEN_PCDATA)
+ ELEMENTS(regexp)
+ else {
+ nextToken()
+ token match {
+ case RPAREN =>
+ PCDATA
+ case CHOICE =>
+ val res = MIXED(choiceRest(Eps))
+ sOpt()
+ accept( RPAREN )
+ accept( STAR )
+ res
+ case _ =>
+ scala.sys.error("unexpected token:" + token2string(token) )
+ }
+ }
+
+ case _ =>
+ scala.sys.error("unexpected token:" + token2string(token) )
+ }
+ // sopt ::= S?
+ def sOpt() = if( token == S ) nextToken()
+
+ // (' S? mixed ::= '#PCDATA' S? ')'
+ // | '#PCDATA' (S? '|' S? atom)* S? ')*'
+
+ // '(' S? regexp ::= cp S? [seqRest|choiceRest] ')' [ '+' | '*' | '?' ]
+ def regexp: RegExp = {
+ val p = particle
+ sOpt()
+ maybeSuffix(token match {
+ case RPAREN => nextToken(); p
+ case CHOICE => val q = choiceRest( p );accept( RPAREN ); q
+ case COMMA => val q = seqRest( p ); accept( RPAREN ); q
+ })
+ }
+
+ // seqRest ::= (',' S? cp S?)+
+ def seqRest(p: RegExp) = {
+ var k = List(p)
+ while( token == COMMA ) {
+ nextToken()
+ sOpt()
+ k = particle::k
+ sOpt()
+ }
+ Sequ( k.reverse:_* )
+ }
+
+ // choiceRest ::= ('|' S? cp S?)+
+ def choiceRest( p:RegExp ) = {
+ var k = List( p )
+ while( token == CHOICE ) {
+ nextToken()
+ sOpt()
+ k = particle::k
+ sOpt()
+ }
+ Alt( k.reverse:_* )
+ }
+
+ // particle ::= '(' S? regexp
+ // | name [ '+' | '*' | '?' ]
+ def particle = token match {
+ case LPAREN => nextToken(); sOpt(); regexp
+ case NAME => val a = Letter(ElemName(value)); nextToken(); maybeSuffix(a)
+ case _ => scala.sys.error("expected '(' or Name, got:"+token2string(token))
+ }
+
+ // atom ::= name
+ def atom = token match {
+ case NAME => val a = Letter(ElemName(value)); nextToken(); a
+ case _ => scala.sys.error("expected Name, got:"+token2string(token))
+ }
+}
diff --git a/src/xml/scala/xml/dtd/DTD.scala b/src/xml/scala/xml/dtd/DTD.scala
new file mode 100644
index 0000000000..16a824fe2c
--- /dev/null
+++ b/src/xml/scala/xml/dtd/DTD.scala
@@ -0,0 +1,35 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package xml
+package dtd
+
+import scala.collection.mutable
+
+/** A document type declaration.
+ *
+ * @author Burak Emir
+ */
+abstract class DTD {
+ var externalID: ExternalID = null
+ var decls: List[Decl] = Nil
+ def notations: Seq[NotationDecl] = Nil
+ def unparsedEntities: Seq[EntityDecl] = Nil
+
+ var elem: mutable.Map[String, ElemDecl] = new mutable.HashMap[String, ElemDecl]()
+ var attr: mutable.Map[String, AttListDecl] = new mutable.HashMap[String, AttListDecl]()
+ var ent: mutable.Map[String, EntityDecl] = new mutable.HashMap[String, EntityDecl]()
+
+ override def toString() =
+ "DTD [\n%s%s]".format(
+ Option(externalID) getOrElse "",
+ decls.mkString("", "\n", "\n")
+ )
+}
diff --git a/src/xml/scala/xml/dtd/Decl.scala b/src/xml/scala/xml/dtd/Decl.scala
new file mode 100644
index 0000000000..8bf859c460
--- /dev/null
+++ b/src/xml/scala/xml/dtd/Decl.scala
@@ -0,0 +1,157 @@
+/* __ *\
+ ** ________ ___ / / ___ Scala API **
+ ** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+ ** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ **
+ ** /____/\___/_/ |_/____/_/ | | **
+ ** |/ **
+ \* */
+
+package scala
+package xml
+package dtd
+
+import Utility.sbToString
+
+sealed abstract class Decl
+
+sealed abstract class MarkupDecl extends Decl {
+ def buildString(sb: StringBuilder): StringBuilder
+}
+
+/** an element declaration
+ */
+case class ElemDecl(name: String, contentModel: ContentModel)
+extends MarkupDecl {
+ override def buildString(sb: StringBuilder): StringBuilder = {
+ sb append "<!ELEMENT " append name append ' '
+
+ ContentModel.buildString(contentModel, sb)
+ sb append '>'
+ }
+}
+
+case class AttListDecl(name: String, attrs:List[AttrDecl])
+extends MarkupDecl {
+ override def buildString(sb: StringBuilder): StringBuilder = {
+ sb append "<!ATTLIST " append name append '\n' append attrs.mkString("","\n",">")
+ }
+}
+
+/** an attribute declaration. at this point, the tpe is a string. Future
+ * versions might provide a way to access the attribute types more
+ * directly.
+ */
+case class AttrDecl(name: String, tpe: String, default: DefaultDecl) {
+ override def toString(): String = sbToString(buildString)
+
+ def buildString(sb: StringBuilder): StringBuilder = {
+ sb append " " append name append ' ' append tpe append ' '
+ default buildString sb
+ }
+
+}
+
+/** an entity declaration */
+sealed abstract class EntityDecl extends MarkupDecl
+
+/** a parsed general entity declaration */
+case class ParsedEntityDecl(name: String, entdef: EntityDef) extends EntityDecl {
+ override def buildString(sb: StringBuilder): StringBuilder = {
+ sb append "<!ENTITY " append name append ' '
+ entdef buildString sb append '>'
+ }
+}
+
+/** a parameter entity declaration */
+case class ParameterEntityDecl(name: String, entdef: EntityDef) extends EntityDecl {
+ override def buildString(sb: StringBuilder): StringBuilder = {
+ sb append "<!ENTITY % " append name append ' '
+ entdef buildString sb append '>'
+ }
+}
+
+/** an unparsed entity declaration */
+case class UnparsedEntityDecl( name:String, extID:ExternalID, notation:String ) extends EntityDecl {
+ override def buildString(sb: StringBuilder): StringBuilder = {
+ sb append "<!ENTITY " append name append ' '
+ extID buildString sb append " NDATA " append notation append '>'
+ }
+}
+/** a notation declaration */
+case class NotationDecl( name:String, extID:ExternalID ) extends MarkupDecl {
+ override def buildString(sb: StringBuilder): StringBuilder = {
+ sb append "<!NOTATION " append name append ' '
+ extID buildString sb
+ }
+}
+
+sealed abstract class EntityDef {
+ def buildString(sb: StringBuilder): StringBuilder
+}
+
+case class IntDef(value:String) extends EntityDef {
+ private def validateValue() {
+ var tmp = value
+ var ix = tmp indexOf '%'
+ while (ix != -1) {
+ val iz = tmp.indexOf(';', ix)
+ if(iz == -1 && iz == ix + 1)
+ throw new IllegalArgumentException("no % allowed in entity value, except for parameter-entity-references")
+ else {
+ val n = tmp.substring(ix, iz)
+
+ if (!Utility.isName(n))
+ throw new IllegalArgumentException("internal entity def: \""+n+"\" must be an XML Name")
+
+ tmp = tmp.substring(iz+1, tmp.length)
+ ix = tmp indexOf '%'
+ }
+ }
+ }
+ validateValue()
+
+ override def buildString(sb: StringBuilder): StringBuilder =
+ Utility.appendQuoted(value, sb)
+
+}
+
+case class ExtDef(extID:ExternalID) extends EntityDef {
+ override def buildString(sb: StringBuilder): StringBuilder =
+ extID buildString sb
+}
+
+
+/** a parsed entity reference */
+case class PEReference(ent:String) extends MarkupDecl {
+ if( !Utility.isName( ent ))
+ throw new IllegalArgumentException("ent must be an XML Name")
+
+ override def buildString(sb: StringBuilder): StringBuilder =
+ sb append '%' append ent append ';'
+}
+
+
+// default declarations for attributes
+
+sealed abstract class DefaultDecl {
+ override def toString(): String
+ def buildString(sb: StringBuilder): StringBuilder
+}
+
+case object REQUIRED extends DefaultDecl {
+ override def toString(): String = "#REQUIRED"
+ override def buildString(sb: StringBuilder) = sb append "#REQUIRED"
+}
+
+case object IMPLIED extends DefaultDecl {
+ override def toString(): String = "#IMPLIED"
+ override def buildString(sb: StringBuilder) = sb append "#IMPLIED"
+}
+
+case class DEFAULT(fixed: Boolean, attValue: String) extends DefaultDecl {
+ override def toString(): String = sbToString(buildString)
+ override def buildString(sb: StringBuilder): StringBuilder = {
+ if (fixed) sb append "#FIXED "
+ Utility.appendEscapedQuoted(attValue, sb)
+ }
+}
diff --git a/src/xml/scala/xml/dtd/DocType.scala b/src/xml/scala/xml/dtd/DocType.scala
new file mode 100644
index 0000000000..849d560cc9
--- /dev/null
+++ b/src/xml/scala/xml/dtd/DocType.scala
@@ -0,0 +1,39 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package xml
+package dtd
+
+/** An XML node for document type declaration.
+ *
+ * @author Burak Emir
+ *
+ * @param name name of this DOCTYPE
+ * @param extID NoExternalID or the external ID of this doctype
+ * @param intSubset sequence of internal subset declarations
+ */
+case class DocType(name: String, extID: ExternalID, intSubset: Seq[dtd.Decl]) {
+ if (!Utility.isName(name))
+ throw new IllegalArgumentException(name+" must be an XML Name")
+
+ /** returns "&lt;!DOCTYPE + name + extID? + ("["+intSubSet+"]")? >" */
+ final override def toString() = {
+ def intString =
+ if (intSubset.isEmpty) ""
+ else intSubset.mkString("[", "", "]")
+
+ """<!DOCTYPE %s %s%s>""".format(name, extID.toString(), intString)
+ }
+}
+
+object DocType {
+ /** Creates a doctype with no external id, nor internal subset declarations. */
+ def apply(name: String): DocType = apply(name, NoExternalID, Nil)
+}
diff --git a/src/xml/scala/xml/dtd/ElementValidator.scala b/src/xml/scala/xml/dtd/ElementValidator.scala
new file mode 100644
index 0000000000..4830769a7d
--- /dev/null
+++ b/src/xml/scala/xml/dtd/ElementValidator.scala
@@ -0,0 +1,132 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package dtd
+
+import PartialFunction._
+import scala.collection.mutable
+
+import ContentModel.ElemName
+import MakeValidationException._ // @todo other exceptions
+
+import impl._
+
+/** validate children and/or attributes of an element
+ * exceptions are created but not thrown.
+ */
+class ElementValidator() extends Function1[Node,Boolean] {
+
+ private var exc: List[ValidationException] = Nil
+
+ protected var contentModel: ContentModel = _
+ protected var dfa: DetWordAutom[ElemName] = _
+ protected var adecls: List[AttrDecl] = _
+
+ /** set content model, enabling element validation */
+ def setContentModel(cm: ContentModel) = {
+ contentModel = cm
+ cm match {
+ case ELEMENTS(r) =>
+ val nfa = ContentModel.Translator.automatonFrom(r, 1)
+ dfa = new SubsetConstruction(nfa).determinize
+ case _ =>
+ dfa = null
+ }
+ }
+
+ def getContentModel = contentModel
+
+ /** set meta data, enabling attribute validation */
+ def setMetaData(adecls: List[AttrDecl]) { this.adecls = adecls }
+
+ def getIterable(nodes: Seq[Node], skipPCDATA: Boolean): Iterable[ElemName] = {
+ def isAllWhitespace(a: Atom[_]) = cond(a.data) { case s: String if s.trim == "" => true }
+
+ nodes.filter {
+ case y: SpecialNode => y match {
+ case a: Atom[_] if isAllWhitespace(a) => false // always skip all-whitespace nodes
+ case _ => !skipPCDATA
+ }
+ case x => x.namespace eq null
+ } . map (x => ElemName(x.label))
+ }
+
+ /** check attributes, return true if md corresponds to attribute declarations in adecls.
+ */
+ def check(md: MetaData): Boolean = {
+ val len: Int = exc.length
+ val ok = new mutable.BitSet(adecls.length)
+
+ for (attr <- md) {
+ def attrStr = attr.value.toString
+ def find(Key: String): Option[AttrDecl] = {
+ adecls.zipWithIndex find {
+ case (a @ AttrDecl(Key, _, _), j) => ok += j ; return Some(a)
+ case _ => false
+ }
+ None
+ }
+
+ find(attr.key) match {
+ case None =>
+ exc ::= fromUndefinedAttribute(attr.key)
+
+ case Some(AttrDecl(_, tpe, DEFAULT(true, fixedValue))) if attrStr != fixedValue =>
+ exc ::= fromFixedAttribute(attr.key, fixedValue, attrStr)
+
+ case _ =>
+ }
+ }
+
+ adecls.zipWithIndex foreach {
+ case (AttrDecl(key, tpe, REQUIRED), j) if !ok(j) => exc ::= fromMissingAttribute(key, tpe)
+ case _ =>
+ }
+
+ exc.length == len //- true if no new exception
+ }
+
+ /** check children, return true if conform to content model
+ * @note contentModel != null
+ */
+ def check(nodes: Seq[Node]): Boolean = contentModel match {
+ case ANY => true
+ case EMPTY => getIterable(nodes, skipPCDATA = false).isEmpty
+ case PCDATA => getIterable(nodes, skipPCDATA = true).isEmpty
+ case MIXED(ContentModel.Alt(branches @ _*)) => // @todo
+ val j = exc.length
+ def find(Key: String): Boolean =
+ branches exists { case ContentModel.Letter(ElemName(Key)) => true ; case _ => false }
+
+ getIterable(nodes, skipPCDATA = true) map (_.name) filterNot find foreach {
+ exc ::= MakeValidationException fromUndefinedElement _
+ }
+ (exc.length == j) // - true if no new exception
+
+ case _: ELEMENTS =>
+ dfa isFinal {
+ getIterable(nodes, skipPCDATA = false).foldLeft(0) { (q, e) =>
+ (dfa delta q).getOrElse(e, throw ValidationException("element %s not allowed here" format e))
+ }
+ }
+ case _ => false
+ }
+
+ /** applies various validations - accumulates error messages in exc
+ * @todo fail on first error, ignore other errors (rearranging conditions)
+ */
+ def apply(n: Node): Boolean =
+ //- ? check children
+ ((contentModel == null) || check(n.child)) &&
+ //- ? check attributes
+ ((adecls == null) || check(n.attributes))
+}
diff --git a/src/xml/scala/xml/dtd/ExternalID.scala b/src/xml/scala/xml/dtd/ExternalID.scala
new file mode 100644
index 0000000000..880633d860
--- /dev/null
+++ b/src/xml/scala/xml/dtd/ExternalID.scala
@@ -0,0 +1,86 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package xml
+package dtd
+
+/** an ExternalIDs - either PublicID or SystemID
+ *
+ * @author Burak Emir
+ */
+sealed abstract class ExternalID extends parsing.TokenTests {
+ def quoted(s: String) = {
+ val c = if (s contains '"') '\'' else '"'
+ c + s + c
+ }
+
+ // public != null: PUBLIC " " publicLiteral " " [systemLiteral]
+ // public == null: SYSTEM " " systemLiteral
+ override def toString(): String = {
+ lazy val quotedSystemLiteral = quoted(systemId)
+ lazy val quotedPublicLiteral = quoted(publicId)
+
+ if (publicId == null) "SYSTEM " + quotedSystemLiteral
+ else "PUBLIC " + quotedPublicLiteral +
+ (if (systemId == null) "" else " " + quotedSystemLiteral)
+ }
+ def buildString(sb: StringBuilder): StringBuilder =
+ sb.append(this.toString())
+
+ def systemId: String
+ def publicId: String
+}
+
+/** a system identifier
+ *
+ * @author Burak Emir
+ * @param systemId the system identifier literal
+ */
+case class SystemID(systemId: String) extends ExternalID {
+ val publicId = null
+
+ if (!checkSysID(systemId))
+ throw new IllegalArgumentException("can't use both \" and ' in systemId")
+}
+
+
+/** a public identifier (see http://www.w3.org/QA/2002/04/valid-dtd-list.html).
+ *
+ * @author Burak Emir
+ * @param publicId the public identifier literal
+ * @param systemId (can be null for notation pubIDs) the system identifier literal
+ */
+case class PublicID(publicId: String, systemId: String) extends ExternalID {
+ if (!checkPubID(publicId))
+ throw new IllegalArgumentException("publicId must consist of PubidChars")
+
+ if (systemId != null && !checkSysID(systemId))
+ throw new IllegalArgumentException("can't use both \" and ' in systemId")
+
+ /** the constant "#PI" */
+ def label = "#PI"
+
+ /** always empty */
+ def attribute = Node.NoAttributes
+
+ /** always empty */
+ def child = Nil
+}
+
+/** A marker used when a `DocType` contains no external id.
+ *
+ * @author Michael Bayne
+ */
+object NoExternalID extends ExternalID {
+ val publicId = null
+ val systemId = null
+
+ override def toString = ""
+}
diff --git a/src/xml/scala/xml/dtd/Scanner.scala b/src/xml/scala/xml/dtd/Scanner.scala
new file mode 100644
index 0000000000..5f9d1ccaed
--- /dev/null
+++ b/src/xml/scala/xml/dtd/Scanner.scala
@@ -0,0 +1,79 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package xml
+package dtd
+
+/** Scanner for regexps (content models in DTD element declarations)
+ * todo: cleanup
+ */
+class Scanner extends Tokens with parsing.TokenTests {
+
+ final val ENDCH = '\u0000'
+
+ var token:Int = END
+ var value:String = _
+
+ private var it: Iterator[Char] = null
+ private var c: Char = 'z'
+
+ /** initializes the scanner on input s */
+ final def initScanner(s: String) {
+ value = ""
+ it = (s).iterator
+ token = 1+END
+ next()
+ nextToken()
+ }
+
+ /** scans the next token */
+ final def nextToken() {
+ if (token != END) token = readToken
+ }
+
+ // todo: see XML specification... probably isLetter,isDigit is fine
+ final def isIdentChar = ( ('a' <= c && c <= 'z')
+ || ('A' <= c && c <= 'Z'))
+
+ final def next() = if (it.hasNext) c = it.next() else c = ENDCH
+
+ final def acc(d: Char) {
+ if (c == d) next() else scala.sys.error("expected '"+d+"' found '"+c+"' !")
+ }
+
+ final def accS(ds: Seq[Char]) { ds foreach acc }
+
+ final def readToken: Int =
+ if (isSpace(c)) {
+ while (isSpace(c)) c = it.next()
+ S
+ } else c match {
+ case '(' => next(); LPAREN
+ case ')' => next(); RPAREN
+ case ',' => next(); COMMA
+ case '*' => next(); STAR
+ case '+' => next(); PLUS
+ case '?' => next(); OPT
+ case '|' => next(); CHOICE
+ case '#' => next(); accS( "PCDATA" ); TOKEN_PCDATA
+ case ENDCH => END
+ case _ =>
+ if (isNameStart(c)) name; // NAME
+ else scala.sys.error("unexpected character:" + c)
+ }
+
+ final def name = {
+ val sb = new StringBuilder()
+ do { sb.append(c); next() } while (isNameChar(c))
+ value = sb.toString()
+ NAME
+ }
+
+}
diff --git a/src/xml/scala/xml/dtd/Tokens.scala b/src/xml/scala/xml/dtd/Tokens.scala
new file mode 100644
index 0000000000..07e888e77a
--- /dev/null
+++ b/src/xml/scala/xml/dtd/Tokens.scala
@@ -0,0 +1,45 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package dtd
+
+
+class Tokens {
+
+ // Tokens
+
+ final val TOKEN_PCDATA = 0
+ final val NAME = 1
+ final val LPAREN = 3
+ final val RPAREN = 4
+ final val COMMA = 5
+ final val STAR = 6
+ final val PLUS = 7
+ final val OPT = 8
+ final val CHOICE = 9
+ final val END = 10
+ final val S = 13
+
+ final def token2string(i: Int): String = i match {
+ case 0 => "#PCDATA"
+ case 1 => "NAME"
+ case 3 => "("
+ case 4 => ")"
+ case 5 => ","
+ case 6 => "*"
+ case 7 => "+"
+ case 8 => "?"
+ case 9 => "|"
+ case 10 => "END"
+ case 13 => " "
+ }
+}
diff --git a/src/xml/scala/xml/dtd/ValidationException.scala b/src/xml/scala/xml/dtd/ValidationException.scala
new file mode 100644
index 0000000000..1bfae55286
--- /dev/null
+++ b/src/xml/scala/xml/dtd/ValidationException.scala
@@ -0,0 +1,44 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package dtd
+
+
+case class ValidationException(e: String) extends Exception(e)
+
+/**
+ * @author Burak Emir
+ */
+object MakeValidationException {
+ def fromFixedAttribute(k: String, value: String, actual: String) =
+ ValidationException("value of attribute " + k + " FIXED to \""+
+ value+"\", but document tries \""+actual+"\"")
+
+ def fromNonEmptyElement() =
+ new ValidationException("element should be *empty*")
+
+ def fromUndefinedElement(label: String) =
+ new ValidationException("element \""+ label +"\" not allowed here")
+
+ def fromUndefinedAttribute(key: String) =
+ new ValidationException("attribute " + key +" not allowed here")
+
+ def fromMissingAttribute(allKeys: Set[String]) = {
+ val sb = new StringBuilder("missing value for REQUIRED attribute")
+ if (allKeys.size > 1) sb.append('s')
+ allKeys foreach (k => sb append "'%s'".format(k))
+ new ValidationException(sb.toString())
+ }
+
+ def fromMissingAttribute(key: String, tpe: String) =
+ new ValidationException("missing value for REQUIRED attribute %s of type %s".format(key, tpe))
+}
diff --git a/src/xml/scala/xml/dtd/impl/Base.scala b/src/xml/scala/xml/dtd/impl/Base.scala
new file mode 100644
index 0000000000..91ff03a93a
--- /dev/null
+++ b/src/xml/scala/xml/dtd/impl/Base.scala
@@ -0,0 +1,67 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml.dtd.impl
+
+/** Basic regular expressions.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+
+@deprecated("This class will be removed", "2.10.0")
+private[dtd] abstract class Base {
+ type _regexpT <: RegExp
+
+ abstract class RegExp {
+ val isNullable: Boolean
+ }
+
+ object Alt {
+ /** `Alt( R,R,R* )`. */
+ def apply(rs: _regexpT*) =
+ if (rs.size < 2) throw new SyntaxError("need at least 2 branches in Alt")
+ else new Alt(rs: _*)
+ // Can't enforce that statically without changing the interface
+ // def apply(r1: _regexpT, r2: _regexpT, rs: _regexpT*) = new Alt(Seq(r1, r2) ++ rs: _*)
+ def unapplySeq(x: Alt) = Some(x.rs)
+ }
+
+ class Alt private (val rs: _regexpT*) extends RegExp {
+ final val isNullable = rs exists (_.isNullable)
+ }
+
+ object Sequ {
+ /** Sequ( R,R* ) */
+ def apply(rs: _regexpT*) = if (rs.isEmpty) Eps else new Sequ(rs: _*)
+ def unapplySeq(x: Sequ) = Some(x.rs)
+ }
+
+ class Sequ private (val rs: _regexpT*) extends RegExp {
+ final val isNullable = rs forall (_.isNullable)
+ }
+
+ case class Star(r: _regexpT) extends RegExp {
+ final lazy val isNullable = true
+ }
+
+ // The empty Sequ.
+ case object Eps extends RegExp {
+ final lazy val isNullable = true
+ override def toString() = "Eps"
+ }
+
+ /** this class can be used to add meta information to regexps. */
+ class Meta(r1: _regexpT) extends RegExp {
+ final val isNullable = r1.isNullable
+ def r = r1
+ }
+}
diff --git a/src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala b/src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala
new file mode 100644
index 0000000000..f30309b037
--- /dev/null
+++ b/src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala
@@ -0,0 +1,98 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml.dtd.impl
+
+import scala.collection.{ mutable, immutable }
+
+// todo: replace global variable pos with acc
+
+/** This class turns a regular expression over `A` into a
+ * [[scala.util.automata.NondetWordAutom]] over `A` using the celebrated
+ * position automata construction (also called ''Berry-Sethi'' or ''Glushkov'').
+ */
+@deprecated("This class will be removed", "2.10.0")
+private[dtd] abstract class BaseBerrySethi {
+ val lang: Base
+ import lang.{ Alt, Eps, Meta, RegExp, Sequ, Star }
+
+ protected var pos = 0
+
+ // results which hold all info for the NondetWordAutomaton
+ protected var follow: mutable.HashMap[Int, Set[Int]] = _
+
+ protected var finalTag: Int = _
+
+ protected var finals: immutable.Map[Int, Int] = _ // final states
+
+ // constants --------------------------
+
+ final val emptySet: Set[Int] = Set()
+
+ private def doComp(r: RegExp, compFunction: RegExp => Set[Int]) = r match {
+ case x: Alt => (x.rs map compFirst).foldLeft(emptySet)(_ ++ _)
+ case Eps => emptySet
+ case x: Meta => compFunction(x.r)
+ case x: Sequ =>
+ val (l1, l2) = x.rs span (_.isNullable)
+ ((l1 ++ (l2 take 1)) map compFunction).foldLeft(emptySet)(_ ++ _)
+ case Star(t) => compFunction(t)
+ case _ => throw new IllegalArgumentException("unexpected pattern " + r.getClass)
+ }
+
+ /** Computes `first(r)` for the word regexp `r`. */
+ protected def compFirst(r: RegExp): Set[Int] = doComp(r, compFirst)
+
+ /** Computes `last(r)` for the regexp `r`. */
+ protected def compLast(r: RegExp): Set[Int] = doComp(r, compLast)
+
+ /** Starts from the right-to-left
+ * precondition: pos is final
+ * pats are successor patterns of a Sequence node
+ */
+ protected def compFollow(rs: Seq[RegExp]): Set[Int] = {
+ follow(0) =
+ if (rs.isEmpty) emptySet
+ else rs.foldRight(Set(pos))((p, fol) => {
+ val first = compFollow1(fol, p)
+
+ if (p.isNullable) fol ++ first
+ else first
+ })
+
+ follow(0)
+ }
+
+ /** Returns the first set of an expression, setting the follow set along the way.
+ */
+ protected def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match {
+ case x: Alt => Set((x.rs reverseMap (compFollow1(fol1, _))).flatten: _*)
+ case x: Meta => compFollow1(fol1, x.r)
+ case x: Star => compFollow1(fol1 ++ compFirst(x.r), x.r)
+ case x: Sequ =>
+ x.rs.foldRight(fol1) { (p, fol) =>
+ val first = compFollow1(fol, p)
+
+ if (p.isNullable) fol ++ first
+ else first
+ }
+ case _ => throw new IllegalArgumentException("unexpected pattern: " + r.getClass)
+ }
+
+ /** Returns the "Sethi-length" of a pattern, creating the set of position along the way.
+ */
+ protected def traverse(r: RegExp): Unit = r match {
+ // (is tree automaton stuff, more than Berry-Sethi)
+ case x: Alt => x.rs foreach traverse
+ case x: Sequ => x.rs foreach traverse
+ case x: Meta => traverse(x.r)
+ case Star(t) => traverse(t)
+ case _ => throw new IllegalArgumentException("unexp pattern " + r.getClass)
+ }
+}
diff --git a/src/xml/scala/xml/dtd/impl/DetWordAutom.scala b/src/xml/scala/xml/dtd/impl/DetWordAutom.scala
new file mode 100644
index 0000000000..6f8ba4de72
--- /dev/null
+++ b/src/xml/scala/xml/dtd/impl/DetWordAutom.scala
@@ -0,0 +1,50 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml.dtd.impl
+
+import scala.collection.{ mutable, immutable }
+
+/** A deterministic automaton. States are integers, where
+ * 0 is always the only initial state. Transitions are represented
+ * in the delta function. A default transitions is one that
+ * is taken when no other transition can be taken.
+ * All states are reachable. Accepting states are those for which
+ * the partial function 'finals' is defined.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+@deprecated("This class will be removed", "2.10.0")
+private[dtd] abstract class DetWordAutom[T <: AnyRef] {
+ val nstates: Int
+ val finals: Array[Int]
+ val delta: Array[mutable.Map[T, Int]]
+ val default: Array[Int]
+
+ def isFinal(q: Int) = finals(q) != 0
+ def isSink(q: Int) = delta(q).isEmpty && default(q) == q
+ def next(q: Int, label: T) = delta(q).getOrElse(label, default(q))
+
+ override def toString() = {
+ val sb = new StringBuilder("[DetWordAutom nstates=")
+ sb.append(nstates)
+ sb.append(" finals=")
+ val map = Map(finals.zipWithIndex map (_.swap): _*)
+ sb.append(map.toString())
+ sb.append(" delta=\n")
+
+ for (i <- 0 until nstates) {
+ sb append "%d->%s\n".format(i, delta(i))
+ if (i < default.length)
+ sb append "_>%s\n".format(default(i))
+ }
+ sb.toString
+ }
+}
diff --git a/src/xml/scala/xml/dtd/impl/Inclusion.scala b/src/xml/scala/xml/dtd/impl/Inclusion.scala
new file mode 100644
index 0000000000..07b6afaeba
--- /dev/null
+++ b/src/xml/scala/xml/dtd/impl/Inclusion.scala
@@ -0,0 +1,70 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml.dtd.impl
+
+
+/** A fast test of language inclusion between minimal automata.
+ * inspired by the ''AMoRE automata library''.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+@deprecated("This class will be removed", "2.10.0")
+private[dtd] trait Inclusion[A <: AnyRef] {
+
+ val labels: Seq[A]
+
+ /** Returns true if `dfa1` is included in `dfa2`.
+ */
+ def inclusion(dfa1: DetWordAutom[A], dfa2: DetWordAutom[A]) = {
+
+ def encode(q1: Int, q2: Int) = 1 + q1 + q2 * dfa1.nstates
+ def decode2(c: Int) = (c-1) / (dfa1.nstates) //integer division
+ def decode1(c: Int) = (c-1) % (dfa1.nstates)
+
+ var q1 = 0 //dfa1.initstate; // == 0
+ var q2 = 0 //dfa2.initstate; // == 0
+
+ val max = 1 + dfa1.nstates * dfa2.nstates
+ val mark = new Array[Int](max)
+
+ var result = true
+ var current = encode(q1, q2)
+ var last = current
+ mark(last) = max // mark (q1,q2)
+ while (current != 0 && result) {
+ //Console.println("current = [["+q1+" "+q2+"]] = "+current);
+ for (letter <- labels) {
+ val r1 = dfa1.next(q1,letter)
+ val r2 = dfa2.next(q2,letter)
+ if (dfa1.isFinal(r1) && !dfa2.isFinal(r2))
+ result = false
+ val test = encode(r1, r2)
+ //Console.println("test = [["+r1+" "+r2+"]] = "+test);
+ if (mark(test) == 0) {
+ mark(last) = test
+ mark(test) = max
+ last = test
+ }
+ }
+ val ncurrent = mark(current)
+ if( ncurrent != max ) {
+ q1 = decode1(ncurrent)
+ q2 = decode2(ncurrent)
+ current = ncurrent
+ } else {
+ current = 0
+ }
+ }
+ result
+ }
+}
diff --git a/src/xml/scala/xml/dtd/impl/NondetWordAutom.scala b/src/xml/scala/xml/dtd/impl/NondetWordAutom.scala
new file mode 100644
index 0000000000..0bb19a7e3e
--- /dev/null
+++ b/src/xml/scala/xml/dtd/impl/NondetWordAutom.scala
@@ -0,0 +1,60 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml.dtd.impl
+
+import scala.collection.{ immutable, mutable }
+
+/** A nondeterministic automaton. States are integers, where
+ * 0 is always the only initial state. Transitions are represented
+ * in the delta function. Default transitions are transitions that
+ * are taken when no other transitions can be applied.
+ * All states are reachable. Accepting states are those for which
+ * the partial function `finals` is defined.
+ */
+@deprecated("This class will be removed", "2.10.0")
+private[dtd] abstract class NondetWordAutom[T <: AnyRef] {
+ val nstates: Int
+ val labels: Seq[T]
+ val finals: Array[Int] // 0 means not final
+ val delta: Array[mutable.Map[T, immutable.BitSet]]
+ val default: Array[immutable.BitSet]
+
+ /** @return true if the state is final */
+ final def isFinal(state: Int) = finals(state) > 0
+
+ /** @return tag of final state */
+ final def finalTag(state: Int) = finals(state)
+
+ /** @return true if the set of states contains at least one final state */
+ final def containsFinal(Q: immutable.BitSet): Boolean = Q exists isFinal
+
+ /** @return true if there are no accepting states */
+ final def isEmpty = (0 until nstates) forall (x => !isFinal(x))
+
+ /** @return an immutable.BitSet with the next states for given state and label */
+ def next(q: Int, a: T): immutable.BitSet = delta(q).getOrElse(a, default(q))
+
+ /** @return an immutable.BitSet with the next states for given state and label */
+ def next(Q: immutable.BitSet, a: T): immutable.BitSet = next(Q, next(_, a))
+ def nextDefault(Q: immutable.BitSet): immutable.BitSet = next(Q, default)
+
+ private def next(Q: immutable.BitSet, f: (Int) => immutable.BitSet): immutable.BitSet =
+ (Q map f).foldLeft(immutable.BitSet.empty)(_ ++ _)
+
+ private def finalStates = 0 until nstates filter isFinal
+ override def toString = {
+
+ val finalString = Map(finalStates map (j => j -> finals(j)) : _*).toString
+ val deltaString = (0 until nstates)
+ .map(i => " %d->%s\n _>%s\n".format(i, delta(i), default(i))).mkString
+
+ "[NondetWordAutom nstates=%d finals=%s delta=\n%s".format(nstates, finalString, deltaString)
+ }
+}
diff --git a/src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala b/src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala
new file mode 100644
index 0000000000..1720604132
--- /dev/null
+++ b/src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala
@@ -0,0 +1,37 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml.dtd.impl
+
+/** Pointed regular hedge expressions, a useful subclass of regular hedge expressions.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+@deprecated("This class will be removed", "2.10.0")
+private[dtd] abstract class PointedHedgeExp extends Base {
+
+ type _regexpT <: RegExp
+ type _labelT
+
+ case class Node(label: _labelT, r: _regexpT) extends RegExp {
+ final val isNullable = false
+ }
+
+ case class TopIter(r1: _regexpT, r2: _regexpT) extends RegExp {
+ final val isNullable = r1.isNullable && r2.isNullable //?
+ }
+
+ case object Point extends RegExp {
+ final val isNullable = false
+ }
+
+}
diff --git a/src/xml/scala/xml/dtd/impl/SubsetConstruction.scala b/src/xml/scala/xml/dtd/impl/SubsetConstruction.scala
new file mode 100644
index 0000000000..632ca1eb18
--- /dev/null
+++ b/src/xml/scala/xml/dtd/impl/SubsetConstruction.scala
@@ -0,0 +1,108 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml.dtd.impl
+
+import scala.collection.{ mutable, immutable }
+
+@deprecated("This class will be removed", "2.10.0")
+private[dtd] class SubsetConstruction[T <: AnyRef](val nfa: NondetWordAutom[T]) {
+ import nfa.labels
+
+ def selectTag(Q: immutable.BitSet, finals: Array[Int]) =
+ (Q map finals filter (_ > 0)).min
+
+ def determinize: DetWordAutom[T] = {
+ // for assigning numbers to bitsets
+ var indexMap = scala.collection.Map[immutable.BitSet, Int]()
+ var invIndexMap = scala.collection.Map[Int, immutable.BitSet]()
+ var ix = 0
+
+ // we compute the dfa with states = bitsets
+ val q0 = immutable.BitSet(0) // the set { 0 }
+ val sink = immutable.BitSet.empty // the set { }
+
+ var states = Set(q0, sink) // initial set of sets
+ val delta = new mutable.HashMap[immutable.BitSet, mutable.HashMap[T, immutable.BitSet]]
+ var deftrans = mutable.Map(q0 -> sink, sink -> sink) // initial transitions
+ var finals: mutable.Map[immutable.BitSet, Int] = mutable.Map()
+ val rest = new mutable.Stack[immutable.BitSet]
+
+ rest.push(sink, q0)
+
+ def addFinal(q: immutable.BitSet) {
+ if (nfa containsFinal q)
+ finals = finals.updated(q, selectTag(q, nfa.finals))
+ }
+ def add(Q: immutable.BitSet) {
+ if (!states(Q)) {
+ states += Q
+ rest push Q
+ addFinal(Q)
+ }
+ }
+
+ addFinal(q0) // initial state may also be a final state
+
+ while (!rest.isEmpty) {
+ val P = rest.pop()
+ // assign a number to this bitset
+ indexMap = indexMap.updated(P, ix)
+ invIndexMap = invIndexMap.updated(ix, P)
+ ix += 1
+
+ // make transition map
+ val Pdelta = new mutable.HashMap[T, immutable.BitSet]
+ delta.update(P, Pdelta)
+
+ labels foreach { label =>
+ val Q = nfa.next(P, label)
+ Pdelta.update(label, Q)
+ add(Q)
+ }
+
+ // collect default transitions
+ val Pdef = nfa nextDefault P
+ deftrans = deftrans.updated(P, Pdef)
+ add(Pdef)
+ }
+
+ // create DetWordAutom, using indices instead of sets
+ val nstatesR = states.size
+ val deltaR = new Array[mutable.Map[T, Int]](nstatesR)
+ val defaultR = new Array[Int](nstatesR)
+ val finalsR = new Array[Int](nstatesR)
+
+ for (Q <- states) {
+ val q = indexMap(Q)
+ val trans = delta(Q)
+ val transDef = deftrans(Q)
+ val qDef = indexMap(transDef)
+ val ntrans = new mutable.HashMap[T, Int]()
+
+ for ((label, value) <- trans) {
+ val p = indexMap(value)
+ if (p != qDef)
+ ntrans.update(label, p)
+ }
+
+ deltaR(q) = ntrans
+ defaultR(q) = qDef
+ }
+
+ finals foreach { case (k,v) => finalsR(indexMap(k)) = v }
+
+ new DetWordAutom [T] {
+ val nstates = nstatesR
+ val delta = deltaR
+ val default = defaultR
+ val finals = finalsR
+ }
+ }
+}
diff --git a/src/xml/scala/xml/dtd/impl/SyntaxError.scala b/src/xml/scala/xml/dtd/impl/SyntaxError.scala
new file mode 100644
index 0000000000..a5b8a5aba0
--- /dev/null
+++ b/src/xml/scala/xml/dtd/impl/SyntaxError.scala
@@ -0,0 +1,21 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml.dtd.impl
+
+/** This runtime exception is thrown if an attempt to instantiate a
+ * syntactically incorrect expression is detected.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+@deprecated("This class will be removed", "2.10.0")
+private[dtd] class SyntaxError(e: String) extends RuntimeException(e)
diff --git a/src/xml/scala/xml/dtd/impl/WordBerrySethi.scala b/src/xml/scala/xml/dtd/impl/WordBerrySethi.scala
new file mode 100644
index 0000000000..9bf3fa518b
--- /dev/null
+++ b/src/xml/scala/xml/dtd/impl/WordBerrySethi.scala
@@ -0,0 +1,162 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml.dtd.impl
+
+import scala.collection.{ immutable, mutable }
+
+/** This class turns a regular expression into a [[scala.util.automata.NondetWordAutom]]
+ * celebrated position automata construction (also called ''Berry-Sethi'' or ''Glushkov'').
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+@deprecated("This class will be removed", "2.10.0")
+private[dtd] abstract class WordBerrySethi extends BaseBerrySethi {
+ override val lang: WordExp
+
+ import lang.{ Alt, Eps, Letter, RegExp, Sequ, Star, _labelT }
+
+ protected var labels: mutable.HashSet[_labelT] = _
+ // don't let this fool you, only labelAt is a real, surjective mapping
+ protected var labelAt: Map[Int, _labelT] = _ // new alphabet "gamma"
+ protected var deltaq: Array[mutable.HashMap[_labelT, List[Int]]] = _ // delta
+ protected var defaultq: Array[List[Int]] = _ // default transitions
+ protected var initials: Set[Int] = _
+
+ /** Computes `first(r)` where the word regexp `r`.
+ *
+ * @param r the regular expression
+ * @return the computed set `first(r)`
+ */
+ protected override def compFirst(r: RegExp): Set[Int] = r match {
+ case x: Letter => Set(x.pos)
+ case _ => super.compFirst(r)
+ }
+
+ /** Computes `last(r)` where the word regexp `r`.
+ *
+ * @param r the regular expression
+ * @return the computed set `last(r)`
+ */
+ protected override def compLast(r: RegExp): Set[Int] = r match {
+ case x: Letter => Set(x.pos)
+ case _ => super.compLast(r)
+ }
+
+ /** Returns the first set of an expression, setting the follow set along
+ * the way.
+ *
+ * @param r the regular expression
+ * @return the computed set
+ */
+ protected override def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match {
+ case x: Letter => follow(x.pos) = fol1 ; Set(x.pos)
+ case Eps => emptySet
+ case _ => super.compFollow1(fol1, r)
+ }
+
+ /** Returns "Sethi-length" of a pattern, creating the set of position
+ * along the way
+ */
+
+ /** Called at the leaves of the regexp */
+ protected def seenLabel(r: RegExp, i: Int, label: _labelT) {
+ labelAt = labelAt.updated(i, label)
+ this.labels += label
+ }
+
+ // overridden in BindingBerrySethi
+ protected def seenLabel(r: RegExp, label: _labelT): Int = {
+ pos += 1
+ seenLabel(r, pos, label)
+ pos
+ }
+
+ // todo: replace global variable pos with acc
+ override def traverse(r: RegExp): Unit = r match {
+ case a @ Letter(label) => a.pos = seenLabel(r, label)
+ case Eps => // ignore
+ case _ => super.traverse(r)
+ }
+
+
+ protected def makeTransition(src: Int, dest: Int, label: _labelT) {
+ val q = deltaq(src)
+ q.update(label, dest :: q.getOrElse(label, Nil))
+ }
+
+ protected def initialize(subexpr: Seq[RegExp]): Unit = {
+ this.labelAt = immutable.Map()
+ this.follow = mutable.HashMap()
+ this.labels = mutable.HashSet()
+ this.pos = 0
+
+ // determine "Sethi-length" of the regexp
+ subexpr foreach traverse
+
+ this.initials = Set(0)
+ }
+
+ protected def initializeAutom() {
+ finals = immutable.Map.empty[Int, Int] // final states
+ deltaq = new Array[mutable.HashMap[_labelT, List[Int]]](pos) // delta
+ defaultq = new Array[List[Int]](pos) // default transitions
+
+ for (j <- 0 until pos) {
+ deltaq(j) = mutable.HashMap[_labelT, List[Int]]()
+ defaultq(j) = Nil
+ }
+ }
+
+ protected def collectTransitions(): Unit = // make transitions
+ for (j <- 0 until pos ; fol = follow(j) ; k <- fol) {
+ if (pos == k) finals = finals.updated(j, finalTag)
+ else makeTransition(j, k, labelAt(k))
+ }
+
+ def automatonFrom(pat: RegExp, finalTag: Int): NondetWordAutom[_labelT] = {
+ this.finalTag = finalTag
+
+ pat match {
+ case x: Sequ =>
+ // (1,2) compute follow + first
+ initialize(x.rs)
+ pos += 1
+ compFollow(x.rs) // this used to be assigned to var globalFirst and then never used.
+
+ // (3) make automaton from follow sets
+ initializeAutom()
+ collectTransitions()
+
+ if (x.isNullable) // initial state is final
+ finals = finals.updated(0, finalTag)
+
+ val delta1 = immutable.Map(deltaq.zipWithIndex map (_.swap): _*)
+ val finalsArr = (0 until pos map (k => finals.getOrElse(k, 0))).toArray // 0 == not final
+
+ val deltaArr: Array[mutable.Map[_labelT, immutable.BitSet]] =
+ (0 until pos map { x =>
+ mutable.HashMap(delta1(x).toSeq map { case (k, v) => k -> immutable.BitSet(v: _*) } : _*)
+ }).toArray
+
+ val defaultArr = (0 until pos map (k => immutable.BitSet(defaultq(k): _*))).toArray
+
+ new NondetWordAutom[_labelT] {
+ val nstates = pos
+ val labels = WordBerrySethi.this.labels.toList
+ val finals = finalsArr
+ val delta = deltaArr
+ val default = defaultArr
+ }
+ case z =>
+ automatonFrom(Sequ(z.asInstanceOf[this.lang._regexpT]), finalTag)
+ }
+ }
+}
diff --git a/src/xml/scala/xml/dtd/impl/WordExp.scala b/src/xml/scala/xml/dtd/impl/WordExp.scala
new file mode 100644
index 0000000000..a4bb54c1ea
--- /dev/null
+++ b/src/xml/scala/xml/dtd/impl/WordExp.scala
@@ -0,0 +1,59 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml.dtd.impl
+
+/**
+ * The class `WordExp` provides regular word expressions.
+ *
+ * Users have to instantiate type member `_regexpT <;: RegExp`
+ * (from class `Base`) and a type member `_labelT <;: Label`.
+ *
+ * Here is a short example:
+ * {{{
+ * import scala.util.regexp._
+ * import scala.util.automata._
+ * object MyLang extends WordExp {
+ * type _regexpT = RegExp
+ * type _labelT = MyChar
+ *
+ * case class MyChar(c:Char) extends Label
+ * }
+ * import MyLang._
+ * // (a* | b)*
+ * val rex = Star(Alt(Star(Letter(MyChar('a'))),Letter(MyChar('b'))))
+ * object MyBerriSethi extends WordBerrySethi {
+ * override val lang = MyLang
+ * }
+ * val nfa = MyBerriSethi.automatonFrom(Sequ(rex), 1)
+ * }}}
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+@deprecated("This class will be removed", "2.10.0")
+private[dtd] abstract class WordExp extends Base {
+
+ abstract class Label
+
+ type _regexpT <: RegExp
+ type _labelT <: Label
+
+ case class Letter(a: _labelT) extends RegExp {
+ final lazy val isNullable = false
+ var pos = -1
+ }
+
+ case class Wildcard() extends RegExp {
+ final lazy val isNullable = false
+ var pos = -1
+ }
+}
diff --git a/src/xml/scala/xml/factory/Binder.scala b/src/xml/scala/xml/factory/Binder.scala
new file mode 100755
index 0000000000..947f99e6a4
--- /dev/null
+++ b/src/xml/scala/xml/factory/Binder.scala
@@ -0,0 +1,61 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package factory
+
+import parsing.ValidatingMarkupHandler
+
+/**
+ * @author Burak Emir
+ */
+abstract class Binder(val preserveWS: Boolean) extends ValidatingMarkupHandler {
+
+ var result: NodeBuffer = new NodeBuffer()
+
+ def reportSyntaxError(pos:Int, str:String) = {}
+
+ final def procInstr(pos: Int, target: String, txt: String) =
+ ProcInstr(target, txt)
+
+ final def comment(pos: Int, txt: String) =
+ Comment(txt)
+
+ final def entityRef(pos: Int, n: String) =
+ EntityRef(n)
+
+ final def text(pos: Int, txt: String) =
+ Text(txt)
+
+ final def traverse(n:Node): Unit = n match {
+ case x:ProcInstr =>
+ result &+ procInstr(0, x.target, x.text)
+ case x:Comment =>
+ result &+ comment(0, x.text)
+ case x:Text =>
+ result &+ text(0, x.data)
+ case x:EntityRef =>
+ result &+ entityRef(0, x.entityName)
+ case x:Elem =>
+ elemStart(0, x.prefix, x.label, x.attributes, x.scope)
+ val old = result
+ result = new NodeBuffer()
+ for (m <- x.child) traverse(m)
+ result = old &+ elem(0, x.prefix, x.label, x.attributes, x.scope, x.minimizeEmpty, NodeSeq.fromSeq(result)).toList
+ elemEnd(0, x.prefix, x.label)
+ }
+
+ final def validate(n: Node): Node = {
+ this.rootLabel = n.label
+ traverse(n)
+ result(0)
+ }
+}
diff --git a/src/xml/scala/xml/factory/LoggedNodeFactory.scala b/src/xml/scala/xml/factory/LoggedNodeFactory.scala
new file mode 100644
index 0000000000..bc074bfc83
--- /dev/null
+++ b/src/xml/scala/xml/factory/LoggedNodeFactory.scala
@@ -0,0 +1,90 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package factory
+
+/** This class logs what the nodefactory is actually doing.
+ * If you want to see what happens during loading, use it like this:
+{{{
+object testLogged extends App {
+ val x = new scala.xml.parsing.NoBindingFactoryAdapter
+ with scala.xml.factory.LoggedNodeFactory[scala.xml.Elem] {
+ override def log(s: String) = println(s)
+ }
+
+ Console.println("Start")
+ val doc = x.load(new java.net.URL("http://example.com/file.xml"))
+ Console.println("End")
+ Console.println(doc)
+}
+}}}
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+@deprecated("This trait will be removed.", "2.11")
+trait LoggedNodeFactory[A <: Node] extends NodeFactory[A] {
+ // configuration values
+ val logNode = true
+ val logText = false
+ val logComment = false
+ val logProcInstr = false
+
+ final val NONE = 0
+ final val CACHE = 1
+ final val FULL = 2
+ /** 0 = no logging, 1 = cache hits, 2 = detail */
+ val logCompressLevel = 1
+
+ // methods of NodeFactory
+
+ /** logged version of makeNode method */
+ override def makeNode(pre: String, label: String, attrSeq: MetaData,
+ scope: NamespaceBinding, children: Seq[Node]): A = {
+ if (logNode)
+ log("[makeNode for "+label+"]")
+
+ val hash = Utility.hashCode(pre, label, attrSeq.##, scope.##, children)
+
+ /*
+ if(logCompressLevel >= FULL) {
+ log("[hashcode total:"+hash);
+ log(" elem name "+uname+" hash "+ ? ));
+ log(" attrs "+attrSeq+" hash "+attrSeq.hashCode());
+ log(" children :"+children+" hash "+children.hashCode());
+ }
+ */
+ if (!cache.get( hash ).isEmpty && (logCompressLevel >= CACHE))
+ log("[cache hit !]")
+
+ super.makeNode(pre, label, attrSeq, scope, children)
+ }
+
+ override def makeText(s: String) = {
+ if (logText)
+ log("[makeText:\""+s+"\"]")
+ super.makeText(s)
+ }
+
+ override def makeComment(s: String): Seq[Comment] = {
+ if (logComment)
+ log("[makeComment:\""+s+"\"]")
+ super.makeComment(s)
+ }
+
+ override def makeProcInstr(t: String, s: String): Seq[ProcInstr] = {
+ if (logProcInstr)
+ log("[makeProcInstr:\""+t+" "+ s+"\"]")
+ super.makeProcInstr(t, s)
+ }
+
+ @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11")
+ def log(msg: String): Unit = {}
+}
diff --git a/src/xml/scala/xml/factory/NodeFactory.scala b/src/xml/scala/xml/factory/NodeFactory.scala
new file mode 100644
index 0000000000..94801bb554
--- /dev/null
+++ b/src/xml/scala/xml/factory/NodeFactory.scala
@@ -0,0 +1,61 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package xml
+package factory
+
+import parsing.{ FactoryAdapter, NoBindingFactoryAdapter }
+import java.io.{ InputStream, Reader, StringReader, File, FileDescriptor, FileInputStream }
+
+trait NodeFactory[A <: Node] {
+ val ignoreComments = false
+ val ignoreProcInstr = false
+
+ /* default behaviour is to use hash-consing */
+ val cache = new scala.collection.mutable.HashMap[Int, List[A]]
+
+ protected def create(pre: String, name: String, attrs: MetaData, scope: NamespaceBinding, children:Seq[Node]): A
+
+ protected def construct(hash: Int, old:List[A], pre: String, name: String, attrSeq:MetaData, scope: NamespaceBinding, children:Seq[Node]): A = {
+ val el = create(pre, name, attrSeq, scope, children)
+ cache.update(hash, el :: old)
+ el
+ }
+
+ def eqElements(ch1: Seq[Node], ch2: Seq[Node]): Boolean =
+ ch1.view.zipAll(ch2.view, null, null) forall { case (x,y) => x eq y }
+
+ def nodeEquals(n: Node, pre: String, name: String, attrSeq:MetaData, scope: NamespaceBinding, children: Seq[Node]) =
+ n.prefix == pre &&
+ n.label == name &&
+ n.attributes == attrSeq &&
+ // scope?
+ eqElements(n.child, children)
+
+ def makeNode(pre: String, name: String, attrSeq: MetaData, scope: NamespaceBinding, children: Seq[Node]): A = {
+ val hash = Utility.hashCode( pre, name, attrSeq.##, scope.##, children)
+ def cons(old: List[A]) = construct(hash, old, pre, name, attrSeq, scope, children)
+
+ (cache get hash) match {
+ case Some(list) => // find structurally equal
+ list.find(nodeEquals(_, pre, name, attrSeq, scope, children)) match {
+ case Some(x) => x
+ case _ => cons(list)
+ }
+ case None => cons(Nil)
+ }
+ }
+
+ def makeText(s: String) = Text(s)
+ def makeComment(s: String): Seq[Comment] =
+ if (ignoreComments) Nil else List(Comment(s))
+ def makeProcInstr(t: String, s: String): Seq[ProcInstr] =
+ if (ignoreProcInstr) Nil else List(ProcInstr(t, s))
+}
diff --git a/src/xml/scala/xml/factory/XMLLoader.scala b/src/xml/scala/xml/factory/XMLLoader.scala
new file mode 100644
index 0000000000..b69f187039
--- /dev/null
+++ b/src/xml/scala/xml/factory/XMLLoader.scala
@@ -0,0 +1,61 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package xml
+package factory
+
+import javax.xml.parsers.SAXParserFactory
+import parsing.{ FactoryAdapter, NoBindingFactoryAdapter }
+import java.io.{ InputStream, Reader, File, FileDescriptor }
+import java.net.URL
+
+/** Presents collection of XML loading methods which use the parser
+ * created by "def parser".
+ */
+trait XMLLoader[T <: Node]
+{
+ import scala.xml.Source._
+ def adapter: FactoryAdapter = new NoBindingFactoryAdapter()
+
+ /* Override this to use a different SAXParser. */
+ def parser: SAXParser = {
+ val f = SAXParserFactory.newInstance()
+ f.setNamespaceAware(false)
+ f.newSAXParser()
+ }
+
+ /** Loads XML from the given InputSource, using the supplied parser.
+ * The methods available in scala.xml.XML use the XML parser in the JDK.
+ */
+ def loadXML(source: InputSource, parser: SAXParser): T = {
+ val newAdapter = adapter
+
+ newAdapter.scopeStack push TopScope
+ parser.parse(source, newAdapter)
+ newAdapter.scopeStack.pop()
+
+ newAdapter.rootElem.asInstanceOf[T]
+ }
+
+ /** Loads XML from the given file, file descriptor, or filename. */
+ def loadFile(file: File): T = loadXML(fromFile(file), parser)
+ def loadFile(fd: FileDescriptor): T = loadXML(fromFile(fd), parser)
+ def loadFile(name: String): T = loadXML(fromFile(name), parser)
+
+ /** loads XML from given InputStream, Reader, sysID, InputSource, or URL. */
+ def load(is: InputStream): T = loadXML(fromInputStream(is), parser)
+ def load(reader: Reader): T = loadXML(fromReader(reader), parser)
+ def load(sysID: String): T = loadXML(fromSysId(sysID), parser)
+ def load(source: InputSource): T = loadXML(source, parser)
+ def load(url: URL): T = loadXML(fromInputStream(url.openStream()), parser)
+
+ /** Loads XML from the given String. */
+ def loadString(string: String): T = loadXML(fromString(string), parser)
+}
diff --git a/src/xml/scala/xml/include/CircularIncludeException.scala b/src/xml/scala/xml/include/CircularIncludeException.scala
new file mode 100644
index 0000000000..351f403008
--- /dev/null
+++ b/src/xml/scala/xml/include/CircularIncludeException.scala
@@ -0,0 +1,25 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package include
+
+/**
+ * A `CircularIncludeException` is thrown when an included document attempts
+ * to include itself or one of its ancestor documents.
+ */
+class CircularIncludeException(message: String) extends XIncludeException {
+
+ /**
+ * Constructs a `CircularIncludeException` with `'''null'''`.
+ * as its error detail message.
+ */
+ def this() = this(null)
+
+}
diff --git a/src/xml/scala/xml/include/UnavailableResourceException.scala b/src/xml/scala/xml/include/UnavailableResourceException.scala
new file mode 100644
index 0000000000..47b176e0f3
--- /dev/null
+++ b/src/xml/scala/xml/include/UnavailableResourceException.scala
@@ -0,0 +1,20 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package include
+
+/**
+ * An `UnavailableResourceException` is thrown when an included document
+ * cannot be found or loaded.
+ */
+class UnavailableResourceException(message: String)
+extends XIncludeException(message) {
+ def this() = this(null)
+}
diff --git a/src/xml/scala/xml/include/XIncludeException.scala b/src/xml/scala/xml/include/XIncludeException.scala
new file mode 100644
index 0000000000..11e1644d83
--- /dev/null
+++ b/src/xml/scala/xml/include/XIncludeException.scala
@@ -0,0 +1,58 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package include
+
+/**
+ * `XIncludeException` is the generic superclass for all checked exceptions
+ * that may be thrown as a result of a violation of XInclude's rules.
+ *
+ * Constructs an `XIncludeException` with the specified detail message.
+ * The error message string `message` can later be retrieved by the
+ * `{@link java.lang.Throwable#getMessage}`
+ * method of class `java.lang.Throwable`.
+ *
+ * @param message the detail message.
+ */
+class XIncludeException(message: String) extends Exception(message) {
+
+ /**
+ * uses `'''null'''` as its error detail message.
+ */
+ def this() = this(null)
+
+ private var rootCause: Throwable = null
+
+ /**
+ * When an `IOException`, `MalformedURLException` or other generic
+ * exception is thrown while processing an XML document for XIncludes,
+ * it is customarily replaced by some form of `XIncludeException`.
+ * This method allows you to store the original exception.
+ *
+ * @param nestedException the underlying exception which
+ * caused the XIncludeException to be thrown
+ */
+ def setRootCause(nestedException: Throwable ) {
+ this.rootCause = nestedException
+ }
+
+ /**
+ * When an `IOException`, `MalformedURLException` or other generic
+ * exception is thrown while processing an XML document for XIncludes,
+ * it is customarily replaced by some form of `XIncludeException`.
+ * This method allows you to retrieve the original exception.
+ * It returns null if no such exception caused this `XIncludeException`.
+ *
+ * @return Throwable the underlying exception which caused the
+ * `XIncludeException` to be thrown
+ */
+ def getRootCause(): Throwable = this.rootCause
+
+}
diff --git a/src/xml/scala/xml/include/sax/EncodingHeuristics.scala b/src/xml/scala/xml/include/sax/EncodingHeuristics.scala
new file mode 100644
index 0000000000..57ab5ed91c
--- /dev/null
+++ b/src/xml/scala/xml/include/sax/EncodingHeuristics.scala
@@ -0,0 +1,98 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package include.sax
+
+import java.io.InputStream
+import scala.util.matching.Regex
+
+/** `EncodingHeuristics` reads from a stream
+ * (which should be buffered) and attempts to guess
+ * what the encoding of the text in the stream is.
+ * If it fails to determine the type of the encoding,
+ * it returns the default UTF-8.
+ *
+ * @author Burak Emir
+ * @author Paul Phillips
+ */
+object EncodingHeuristics
+{
+ object EncodingNames {
+ // UCS-4 isn't yet implemented in java releases anyway...
+ val bigUCS4 = "UCS-4"
+ val littleUCS4 = "UCS-4"
+ val unusualUCS4 = "UCS-4"
+ val bigUTF16 = "UTF-16BE"
+ val littleUTF16 = "UTF-16LE"
+ val utf8 = "UTF-8"
+ val default = utf8
+ }
+ import EncodingNames._
+
+ /** This utility method attempts to determine the XML character encoding
+ * by examining the input stream, as specified at
+ * [[http://www.w3.org/TR/xml/#sec-guessing w3]].
+ *
+ * @param in `InputStream` to read from.
+ * @throws IOException if the stream cannot be reset
+ * @return the name of the encoding.
+ */
+ def readEncodingFromStream(in: InputStream): String = {
+ var ret: String = null
+ val bytesToRead = 1024 // enough to read most XML encoding declarations
+ def resetAndRet = { in.reset ; ret }
+
+ // This may fail if there are a lot of space characters before the end
+ // of the encoding declaration
+ in mark bytesToRead
+ val bytes = (in.read, in.read, in.read, in.read)
+
+ // first look for byte order mark
+ ret = bytes match {
+ case (0x00, 0x00, 0xFE, 0xFF) => bigUCS4
+ case (0xFF, 0xFE, 0x00, 0x00) => littleUCS4
+ case (0x00, 0x00, 0xFF, 0xFE) => unusualUCS4
+ case (0xFE, 0xFF, 0x00, 0x00) => unusualUCS4
+ case (0xFE, 0xFF, _ , _ ) => bigUTF16
+ case (0xFF, 0xFE, _ , _ ) => littleUTF16
+ case (0xEF, 0xBB, 0xBF, _ ) => utf8
+ case _ => null
+ }
+ if (ret != null)
+ return resetAndRet
+
+ def readASCIIEncoding: String = {
+ val data = new Array[Byte](bytesToRead - 4)
+ val length = in.read(data, 0, bytesToRead - 4)
+
+ // Use Latin-1 (ISO-8859-1) because all byte sequences are legal.
+ val declaration = new String(data, 0, length, "ISO-8859-1")
+ val regexp = """(?m).*?encoding\s*=\s*["'](.+?)['"]""".r
+ (regexp findFirstMatchIn declaration) match {
+ case None => default
+ case Some(md) => md.subgroups(0)
+ }
+ }
+
+ // no byte order mark present; first character must be '<' or whitespace
+ ret = bytes match {
+ case (0x00, 0x00, 0x00, '<' ) => bigUCS4
+ case ('<' , 0x00, 0x00, 0x00) => littleUCS4
+ case (0x00, 0x00, '<' , 0x00) => unusualUCS4
+ case (0x00, '<' , 0x00, 0x00) => unusualUCS4
+ case (0x00, '<' , 0x00, '?' ) => bigUTF16 // XXX must read encoding
+ case ('<' , 0x00, '?' , 0x00) => littleUTF16 // XXX must read encoding
+ case ('<' , '?' , 'x' , 'm' ) => readASCIIEncoding
+ case (0x4C, 0x6F, 0xA7, 0x94) => utf8 // XXX EBCDIC
+ case _ => utf8 // no XML or text declaration present
+ }
+ resetAndRet
+ }
+}
diff --git a/src/xml/scala/xml/include/sax/XIncludeFilter.scala b/src/xml/scala/xml/include/sax/XIncludeFilter.scala
new file mode 100644
index 0000000000..3fa3beefb0
--- /dev/null
+++ b/src/xml/scala/xml/include/sax/XIncludeFilter.scala
@@ -0,0 +1,373 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package include.sax
+
+import scala.xml.include._
+
+import org.xml.sax.{ Attributes, XMLReader, Locator }
+import org.xml.sax.helpers.{ XMLReaderFactory, XMLFilterImpl, NamespaceSupport, AttributesImpl }
+
+import java.io.{ InputStream, BufferedInputStream, InputStreamReader, IOException, UnsupportedEncodingException }
+import java.util.Stack
+import java.net.{ URL, MalformedURLException }
+
+/** This is a SAX filter which resolves all XInclude include elements before
+ * passing them on to the client application. Currently this class has the
+ * following known deviation from the XInclude specification:
+ *
+ * 1. XPointer is not supported.
+ *
+ * Furthermore, I would definitely use a new instance of this class for each
+ * document you want to process. I doubt it can be used successfully on
+ * multiple documents. Furthermore, I can virtually guarantee that this
+ * class is not thread safe. You have been warned.
+ *
+ * Since this class is not designed to be subclassed, and since I have not
+ * yet considered how that might affect the methods herein or what other
+ * protected methods might be needed to support subclasses, I have declared
+ * this class final. I may remove this restriction later, though the use-case
+ * for subclassing is weak. This class is designed to have its functionality
+ * extended via a horizontal chain of filters, not a vertical hierarchy of
+ * sub and superclasses.
+ *
+ * To use this class:
+ *
+ * - Construct an `XIncludeFilter` object with a known base URL
+ * - Pass the `XMLReader` object from which the raw document will be read to
+ * the `setParent()` method of this object.
+ * - Pass your own `ContentHandler` object to the `setContentHandler()`
+ * method of this object. This is the object which will receive events
+ * from the parsed and included document.
+ * - Optional: if you wish to receive comments, set your own `LexicalHandler`
+ * object as the value of this object's
+ * `http://xml.org/sax/properties/lexical-handler` property.
+ * Also make sure your `LexicalHandler` asks this object for the status of
+ * each comment using `insideIncludeElement` before doing anything with the
+ * comment.
+ * - Pass the URL of the document to read to this object's `parse()` method
+ *
+ * e.g.
+ * {{{
+ * val includer = new XIncludeFilter(base)
+ * includer setParent parser
+ * includer setContentHandler new SAXXIncluder(System.out)
+ * includer parse args(i)
+ * }}}
+ * translated from Elliotte Rusty Harold's Java source.
+ *
+ * @author Burak Emir
+ */
+class XIncludeFilter extends XMLFilterImpl {
+
+ final val XINCLUDE_NAMESPACE = "http://www.w3.org/2001/XInclude"
+
+ private val bases = new Stack[URL]()
+ private val locators = new Stack[Locator]()
+
+/* private EntityResolver resolver;
+
+ public XIncludeFilter() {
+ this(null);
+ }
+
+ public XIncludeFilter(EntityResolver resolver) {
+ this.resolver = resolver;
+ } */
+
+
+ // what if this isn't called????
+ // do I need to check this in startDocument() and push something
+ // there????
+ override def setDocumentLocator(locator: Locator) {
+ locators push locator
+ val base = locator.getSystemId()
+ try {
+ bases.push(new URL(base))
+ }
+ catch {
+ case e:MalformedURLException =>
+ throw new UnsupportedOperationException("Unrecognized SYSTEM ID: " + base)
+ }
+ super.setDocumentLocator(locator)
+ }
+
+
+ // necessary to throw away contents of non-empty XInclude elements
+ private var level = 0
+
+ /** This utility method returns true if and only if this reader is
+ * currently inside a non-empty include element. (This is '''not''' the
+ * same as being inside the node set which replaces the include element.)
+ * This is primarily needed for comments inside include elements.
+ * It must be checked by the actual `LexicalHandler` to see whether
+ * a comment is passed or not.
+ *
+ * @return boolean
+ */
+ def insideIncludeElement(): Boolean = level != 0
+
+ override def startElement(uri: String, localName: String, qName: String, atts1: Attributes) {
+ var atts = atts1
+ if (level == 0) { // We're not inside an xi:include element
+
+ // Adjust bases stack by pushing either the new
+ // value of xml:base or the base of the parent
+ val base = atts.getValue(NamespaceSupport.XMLNS, "base")
+ val parentBase = bases.peek().asInstanceOf[URL]
+ var currentBase = parentBase
+ if (base != null) {
+ try {
+ currentBase = new URL(parentBase, base)
+ }
+ catch {
+ case e: MalformedURLException =>
+ throw new SAXException("Malformed base URL: "
+ + currentBase, e)
+ }
+ }
+ bases push currentBase
+
+ if (uri.equals(XINCLUDE_NAMESPACE) && localName.equals("include")) {
+ // include external document
+ val href = atts.getValue("href")
+ // Verify that there is an href attribute
+ if (href == null) {
+ throw new SAXException("Missing href attribute")
+ }
+
+ var parse = atts getValue "parse"
+ if (parse == null) parse = "xml"
+
+ if (parse equals "text") {
+ val encoding = atts getValue "encoding"
+ includeTextDocument(href, encoding)
+ }
+ else if (parse equals "xml") {
+ includeXMLDocument(href)
+ }
+ // Need to check this also in DOM and JDOM????
+ else {
+ throw new SAXException(
+ "Illegal value for parse attribute: " + parse)
+ }
+ level += 1
+ }
+ else {
+ if (atRoot) {
+ // add xml:base attribute if necessary
+ val attsImpl = new AttributesImpl(atts)
+ attsImpl.addAttribute(NamespaceSupport.XMLNS, "base",
+ "xml:base", "CDATA", currentBase.toExternalForm())
+ atts = attsImpl
+ atRoot = false
+ }
+ super.startElement(uri, localName, qName, atts)
+ }
+ }
+ }
+
+ override def endElement(uri: String, localName: String, qName: String) {
+ if (uri.equals(XINCLUDE_NAMESPACE)
+ && localName.equals("include")) {
+ level -= 1
+ }
+ else if (level == 0) {
+ bases.pop()
+ super.endElement(uri, localName, qName)
+ }
+ }
+
+ private var depth = 0
+
+ override def startDocument() {
+ level = 0
+ if (depth == 0) super.startDocument()
+ depth += 1
+ }
+
+ override def endDocument() {
+ locators.pop()
+ bases.pop() // pop the URL for the document itself
+ depth -= 1
+ if (depth == 0) super.endDocument()
+ }
+
+ // how do prefix mappings move across documents????
+ override def startPrefixMapping(prefix: String , uri: String) {
+ if (level == 0) super.startPrefixMapping(prefix, uri)
+ }
+
+ override def endPrefixMapping(prefix: String) {
+ if (level == 0) super.endPrefixMapping(prefix)
+ }
+
+ override def characters(ch: Array[Char], start: Int, length: Int) {
+ if (level == 0) super.characters(ch, start, length)
+ }
+
+ override def ignorableWhitespace(ch: Array[Char], start: Int, length: Int) {
+ if (level == 0) super.ignorableWhitespace(ch, start, length)
+ }
+
+ override def processingInstruction(target: String, data: String) {
+ if (level == 0) super.processingInstruction(target, data)
+ }
+
+ override def skippedEntity(name: String) {
+ if (level == 0) super.skippedEntity(name)
+ }
+
+ // convenience method for error messages
+ private def getLocation(): String = {
+ var locationString = ""
+ val locator = locators.peek().asInstanceOf[Locator]
+ var publicID = ""
+ var systemID = ""
+ var column = -1
+ var line = -1
+ if (locator != null) {
+ publicID = locator.getPublicId()
+ systemID = locator.getSystemId()
+ line = locator.getLineNumber()
+ column = locator.getColumnNumber()
+ }
+ locationString = (" in document included from " + publicID
+ + " at " + systemID
+ + " at line " + line + ", column " + column)
+
+ locationString
+ }
+
+ /** This utility method reads a document at a specified URL and fires off
+ * calls to `characters()`. It's used to include files with `parse="text"`.
+ *
+ * @param url URL of the document that will be read
+ * @param encoding1 Encoding of the document; e.g. UTF-8,
+ * ISO-8859-1, etc.
+ * @return void
+ * @throws SAXException if the requested document cannot
+ be downloaded from the specified URL
+ or if the encoding is not recognized
+ */
+ private def includeTextDocument(url: String, encoding1: String) {
+ var encoding = encoding1
+ if (encoding == null || encoding.trim().equals("")) encoding = "UTF-8"
+ var source: URL = null
+ try {
+ val base = bases.peek().asInstanceOf[URL]
+ source = new URL(base, url)
+ }
+ catch {
+ case e: MalformedURLException =>
+ val ex = new UnavailableResourceException("Unresolvable URL " + url
+ + getLocation())
+ ex.setRootCause(e)
+ throw new SAXException("Unresolvable URL " + url + getLocation(), ex)
+ }
+
+ try {
+ val uc = source.openConnection()
+ val in = new BufferedInputStream(uc.getInputStream())
+ val encodingFromHeader = uc.getContentEncoding()
+ var contentType = uc.getContentType()
+ if (encodingFromHeader != null)
+ encoding = encodingFromHeader
+ else {
+ // What if file does not have a MIME type but name ends in .xml????
+ // MIME types are case-insensitive
+ // Java may be picking this up from file URL
+ if (contentType != null) {
+ contentType = contentType.toLowerCase()
+ if (contentType.equals("text/xml")
+ || contentType.equals("application/xml")
+ || (contentType.startsWith("text/") && contentType.endsWith("+xml") )
+ || (contentType.startsWith("application/") && contentType.endsWith("+xml"))) {
+ encoding = EncodingHeuristics.readEncodingFromStream(in)
+ }
+ }
+ }
+ val reader = new InputStreamReader(in, encoding)
+ val c = new Array[Char](1024)
+ var charsRead: Int = 0 // bogus init value
+ do {
+ charsRead = reader.read(c, 0, 1024)
+ if (charsRead > 0) this.characters(c, 0, charsRead)
+ } while (charsRead != -1)
+ }
+ catch {
+ case e: UnsupportedEncodingException =>
+ throw new SAXException("Unsupported encoding: "
+ + encoding + getLocation(), e)
+ case e: IOException =>
+ throw new SAXException("Document not found: "
+ + source.toExternalForm() + getLocation(), e)
+ }
+
+ }
+
+ private var atRoot = false
+
+ /** This utility method reads a document at a specified URL
+ * and fires off calls to various `ContentHandler` methods.
+ * It's used to include files with `parse="xml"`.
+ *
+ * @param url URL of the document that will be read
+ * @return void
+ * @throws SAXException if the requested document cannot
+ be downloaded from the specified URL.
+ */
+ private def includeXMLDocument(url: String) {
+ val source =
+ try new URL(bases.peek(), url)
+ catch {
+ case e: MalformedURLException =>
+ val ex = new UnavailableResourceException("Unresolvable URL " + url + getLocation())
+ ex setRootCause e
+ throw new SAXException("Unresolvable URL " + url + getLocation(), ex)
+ }
+
+ try {
+ val parser: XMLReader =
+ try XMLReaderFactory.createXMLReader()
+ catch {
+ case e: SAXException =>
+ try XMLReaderFactory.createXMLReader(XercesClassName)
+ catch { case _: SAXException => return System.err.println("Could not find an XML parser") }
+ }
+
+ parser setContentHandler this
+ val resolver = this.getEntityResolver()
+ if (resolver != null)
+ parser setEntityResolver resolver
+
+ // save old level and base
+ val previousLevel = level
+ this.level = 0
+ if (bases contains source)
+ throw new SAXException(
+ "Circular XInclude Reference",
+ new CircularIncludeException("Circular XInclude Reference to " + source + getLocation())
+ )
+
+ bases push source
+ atRoot = true
+ parser parse source.toExternalForm()
+
+ // restore old level and base
+ this.level = previousLevel
+ bases.pop()
+ }
+ catch {
+ case e: IOException =>
+ throw new SAXException("Document not found: " + source.toExternalForm() + getLocation(), e)
+ }
+ }
+}
diff --git a/src/xml/scala/xml/include/sax/XIncluder.scala b/src/xml/scala/xml/include/sax/XIncluder.scala
new file mode 100644
index 0000000000..1939fa1875
--- /dev/null
+++ b/src/xml/scala/xml/include/sax/XIncluder.scala
@@ -0,0 +1,187 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package include.sax
+
+import scala.collection.mutable
+import org.xml.sax.{ ContentHandler, XMLReader, Locator, Attributes }
+import org.xml.sax.ext.LexicalHandler
+import java.io.{ File, OutputStream, OutputStreamWriter, Writer, IOException }
+
+/** XIncluder is a SAX `ContentHandler` that writes its XML document onto
+ * an output stream after resolving all `xinclude:include` elements.
+ *
+ * Based on Eliotte Rusty Harold's SAXXIncluder.
+ */
+class XIncluder(outs: OutputStream, encoding: String) extends ContentHandler with LexicalHandler {
+
+ var out = new OutputStreamWriter(outs, encoding)
+
+ def setDocumentLocator(locator: Locator) {}
+
+ def startDocument() {
+ try {
+ out.write("<?xml version='1.0' encoding='"
+ + encoding + "'?>\r\n")
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Write failed", e)
+ }
+ }
+
+ def endDocument() {
+ try {
+ out.flush()
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Flush failed", e)
+ }
+ }
+
+ def startPrefixMapping(prefix: String , uri: String) {}
+
+ def endPrefixMapping(prefix: String) {}
+
+ def startElement(namespaceURI: String, localName: String, qualifiedName: String, atts: Attributes) = {
+ try {
+ out.write("<" + qualifiedName)
+ var i = 0; while (i < atts.getLength()) {
+ out.write(" ")
+ out.write(atts.getQName(i))
+ out.write("='")
+ val value = atts.getValue(i)
+ // @todo Need to use character references if the encoding
+ // can't support the character
+ out.write(scala.xml.Utility.escape(value))
+ out.write("'")
+ i += 1
+ }
+ out.write(">")
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Write failed", e)
+ }
+ }
+
+ def endElement(namespaceURI: String, localName:String, qualifiedName: String) {
+ try {
+ out.write("</" + qualifiedName + ">")
+ }
+ catch {
+ case e: IOException =>
+ throw new SAXException("Write failed", e)
+ }
+ }
+
+ // need to escape characters that are not in the given
+ // encoding using character references????
+ def characters(ch: Array[Char], start: Int, length: Int) {
+ try {
+ var i = 0; while (i < length) {
+ val c = ch(start+i)
+ if (c == '&') out.write("&amp;")
+ else if (c == '<') out.write("&lt;")
+ // This next fix is normally not necessary.
+ // However, it is required if text contains ]]>
+ // (The end CDATA section delimiter)
+ else if (c == '>') out.write("&gt;")
+ else out.write(c.toInt)
+ i += 1
+ }
+ }
+ catch {
+ case e: IOException =>
+ throw new SAXException("Write failed", e)
+ }
+ }
+
+ def ignorableWhitespace(ch: Array[Char], start: Int , length: Int) {
+ this.characters(ch, start, length)
+ }
+
+ // do I need to escape text in PI????
+ def processingInstruction(target: String, data: String) {
+ try {
+ out.write("<?" + target + " " + data + "?>")
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Write failed", e)
+ }
+ }
+
+ def skippedEntity(name: String) {
+ try {
+ out.write("&" + name + ";")
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Write failed", e)
+ }
+ }
+
+ // LexicalHandler methods
+ private var inDTD: Boolean = false
+ private val entities = new mutable.Stack[String]()
+
+ def startDTD(name: String, publicID: String, systemID: String) {
+ inDTD = true
+ // if this is the source document, output a DOCTYPE declaration
+ if (entities.isEmpty) {
+ var id = ""
+ if (publicID != null) id = " PUBLIC \"" + publicID + "\" \"" + systemID + '"'
+ else if (systemID != null) id = " SYSTEM \"" + systemID + '"'
+ try {
+ out.write("<!DOCTYPE " + name + id + ">\r\n")
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Error while writing DOCTYPE", e)
+ }
+ }
+ }
+ def endDTD() {}
+
+ def startEntity(name: String) {
+ entities push name
+ }
+
+ def endEntity(name: String) {
+ entities.pop()
+ }
+
+ def startCDATA() {}
+ def endCDATA() {}
+
+ // Just need this reference so we can ask if a comment is
+ // inside an include element or not
+ private var filter: XIncludeFilter = null
+
+ def setFilter(filter: XIncludeFilter) {
+ this.filter = filter
+ }
+
+ def comment(ch: Array[Char], start: Int, length: Int) {
+ if (!inDTD && !filter.insideIncludeElement()) {
+ try {
+ out.write("<!--")
+ out.write(ch, start, length)
+ out.write("-->")
+ }
+ catch {
+ case e: IOException =>
+ throw new SAXException("Write failed", e)
+ }
+ }
+ }
+}
diff --git a/src/xml/scala/xml/package.scala b/src/xml/scala/xml/package.scala
new file mode 100644
index 0000000000..4001cc5ffb
--- /dev/null
+++ b/src/xml/scala/xml/package.scala
@@ -0,0 +1,19 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+
+package object xml {
+ val XercesClassName = "org.apache.xerces.parsers.SAXParser"
+
+ type SAXException = org.xml.sax.SAXException
+ type SAXParseException = org.xml.sax.SAXParseException
+ type EntityResolver = org.xml.sax.EntityResolver
+ type InputSource = org.xml.sax.InputSource
+ type SAXParser = javax.xml.parsers.SAXParser
+}
diff --git a/src/xml/scala/xml/parsing/ConstructingHandler.scala b/src/xml/scala/xml/parsing/ConstructingHandler.scala
new file mode 100755
index 0000000000..ba416e4301
--- /dev/null
+++ b/src/xml/scala/xml/parsing/ConstructingHandler.scala
@@ -0,0 +1,34 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package parsing
+
+/** Implementation of MarkupHandler that constructs nodes.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+abstract class ConstructingHandler extends MarkupHandler
+{
+ val preserveWS: Boolean
+
+ def elem(pos: Int, pre: String, label: String, attrs: MetaData,
+ pscope: NamespaceBinding, empty: Boolean, nodes: NodeSeq): NodeSeq =
+ Elem(pre, label, attrs, pscope, empty, nodes:_*)
+
+ def procInstr(pos: Int, target: String, txt: String) =
+ ProcInstr(target, txt)
+
+ def comment(pos: Int, txt: String) = Comment(txt)
+ def entityRef(pos: Int, n: String) = EntityRef(n)
+ def text(pos: Int, txt: String) = Text(txt)
+}
diff --git a/src/xml/scala/xml/parsing/ConstructingParser.scala b/src/xml/scala/xml/parsing/ConstructingParser.scala
new file mode 100644
index 0000000000..3caeddabf4
--- /dev/null
+++ b/src/xml/scala/xml/parsing/ConstructingParser.scala
@@ -0,0 +1,55 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package parsing
+
+import java.io.File
+import scala.io.Source
+
+object ConstructingParser {
+ def fromFile(inp: File, preserveWS: Boolean) =
+ new ConstructingParser(Source.fromFile(inp), preserveWS).initialize
+
+ def fromSource(inp: Source, preserveWS: Boolean) =
+ new ConstructingParser(inp, preserveWS).initialize
+}
+
+/** An xml parser. parses XML and invokes callback methods of a MarkupHandler.
+ * Don't forget to call next.ch on a freshly instantiated parser in order to
+ * initialize it. If you get the parser from the object method, initialization
+ * is already done for you.
+ *
+ * {{{
+ * object parseFromURL {
+ * def main(args: Array[String]) {
+ * val url = args(0)
+ * val src = scala.io.Source.fromURL(url)
+ * val cpa = scala.xml.parsing.ConstructingParser.fromSource(src, false) // fromSource initializes automatically
+ * val doc = cpa.document()
+ *
+ * // let's see what it is
+ * val ppr = new scala.xml.PrettyPrinter(80, 5)
+ * val ele = doc.docElem
+ * println("finished parsing")
+ * val out = ppr.format(ele)
+ * println(out)
+ * }
+ * }
+ * }}} */
+class ConstructingParser(val input: Source, val preserveWS: Boolean)
+extends ConstructingHandler
+with ExternalSources
+with MarkupParser {
+
+ // default impl. of Logged
+ override def log(msg: String): Unit = {}
+}
diff --git a/src/xml/scala/xml/parsing/DefaultMarkupHandler.scala b/src/xml/scala/xml/parsing/DefaultMarkupHandler.scala
new file mode 100755
index 0000000000..6ec7474843
--- /dev/null
+++ b/src/xml/scala/xml/parsing/DefaultMarkupHandler.scala
@@ -0,0 +1,30 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package parsing
+
+
+/** Default implementation of markup handler always returns `NodeSeq.Empty` */
+abstract class DefaultMarkupHandler extends MarkupHandler {
+
+ def elem(pos: Int, pre: String, label: String, attrs: MetaData,
+ scope:NamespaceBinding, empty: Boolean, args: NodeSeq) = NodeSeq.Empty
+
+ def procInstr(pos: Int, target: String, txt: String) = NodeSeq.Empty
+
+ def comment(pos: Int, comment: String ): NodeSeq = NodeSeq.Empty
+
+ def entityRef(pos: Int, n: String) = NodeSeq.Empty
+
+ def text(pos: Int, txt:String) = NodeSeq.Empty
+
+}
diff --git a/src/xml/scala/xml/parsing/ExternalSources.scala b/src/xml/scala/xml/parsing/ExternalSources.scala
new file mode 100644
index 0000000000..bb939bca95
--- /dev/null
+++ b/src/xml/scala/xml/parsing/ExternalSources.scala
@@ -0,0 +1,38 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package parsing
+
+import java.net.URL
+import java.io.File.separator
+
+import scala.io.Source
+
+/**
+ * @author Burak Emir
+ * @version 1.0
+ */
+trait ExternalSources {
+ self: ExternalSources with MarkupParser with MarkupHandler =>
+
+ def externalSource(systemId: String): Source = {
+ if (systemId startsWith "http:")
+ return Source fromURL new URL(systemId)
+
+ val fileStr: String = input.descr match {
+ case x if x startsWith "file:" => x drop 5
+ case x => x take ((x lastIndexOf separator) + 1)
+ }
+
+ Source.fromFile(fileStr + systemId)
+ }
+}
diff --git a/src/xml/scala/xml/parsing/FactoryAdapter.scala b/src/xml/scala/xml/parsing/FactoryAdapter.scala
new file mode 100644
index 0000000000..2154bdf5ba
--- /dev/null
+++ b/src/xml/scala/xml/parsing/FactoryAdapter.scala
@@ -0,0 +1,187 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package parsing
+
+import java.io.{ InputStream, Reader, File, FileDescriptor, FileInputStream }
+import scala.collection.{ mutable, Iterator }
+import org.xml.sax.Attributes
+import org.xml.sax.helpers.DefaultHandler
+
+// can be mixed into FactoryAdapter if desired
+trait ConsoleErrorHandler extends DefaultHandler {
+ // ignore warning, crimson warns even for entity resolution!
+ override def warning(ex: SAXParseException): Unit = { }
+ override def error(ex: SAXParseException): Unit = printError("Error", ex)
+ override def fatalError(ex: SAXParseException): Unit = printError("Fatal Error", ex)
+
+ protected def printError(errtype: String, ex: SAXParseException): Unit =
+ Console.withOut(Console.err) {
+ val s = "[%s]:%d:%d: %s".format(
+ errtype, ex.getLineNumber, ex.getColumnNumber, ex.getMessage)
+ Console.println(s)
+ Console.flush()
+ }
+}
+
+/** SAX adapter class, for use with Java SAX parser. Keeps track of
+ * namespace bindings, without relying on namespace handling of the
+ * underlying SAX parser.
+ */
+abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node] {
+ var rootElem: Node = null
+
+ val buffer = new StringBuilder()
+ val attribStack = new mutable.Stack[MetaData]
+ val hStack = new mutable.Stack[Node] // [ element ] contains siblings
+ val tagStack = new mutable.Stack[String]
+ var scopeStack = new mutable.Stack[NamespaceBinding]
+
+ var curTag : String = null
+ var capture: Boolean = false
+
+ // abstract methods
+
+ /** Tests if an XML element contains text.
+ * @return true if element named `localName` contains text.
+ */
+ def nodeContainsText(localName: String): Boolean // abstract
+
+ /** creates an new non-text(tree) node.
+ * @param elemName
+ * @param attribs
+ * @param chIter
+ * @return a new XML element.
+ */
+ def createNode(pre: String, elemName: String, attribs: MetaData,
+ scope: NamespaceBinding, chIter: List[Node]): Node // abstract
+
+ /** creates a Text node.
+ * @param text
+ * @return a new Text node.
+ */
+ def createText(text: String): Text // abstract
+
+ /** creates a new processing instruction node.
+ */
+ def createProcInstr(target: String, data: String): Seq[ProcInstr]
+
+ //
+ // ContentHandler methods
+ //
+
+ val normalizeWhitespace = false
+
+ /** Characters.
+ * @param ch
+ * @param offset
+ * @param length
+ */
+ override def characters(ch: Array[Char], offset: Int, length: Int): Unit = {
+ if (!capture) return
+ // compliant: report every character
+ else if (!normalizeWhitespace) buffer.appendAll(ch, offset, length)
+ // normalizing whitespace is not compliant, but useful
+ else {
+ var it = ch.slice(offset, offset + length).iterator
+ while (it.hasNext) {
+ val c = it.next()
+ val isSpace = c.isWhitespace
+ buffer append (if (isSpace) ' ' else c)
+ if (isSpace)
+ it = it dropWhile (_.isWhitespace)
+ }
+ }
+ }
+
+ private def splitName(s: String) = {
+ val idx = s indexOf ':'
+ if (idx < 0) (null, s)
+ else (s take idx, s drop (idx + 1))
+ }
+
+ /* ContentHandler methods */
+
+ /* Start element. */
+ override def startElement(
+ uri: String,
+ _localName: String,
+ qname: String,
+ attributes: Attributes): Unit =
+ {
+ captureText()
+ tagStack push curTag
+ curTag = qname
+
+ val localName = splitName(qname)._2
+ capture = nodeContainsText(localName)
+
+ hStack push null
+ var m: MetaData = Null
+ var scpe: NamespaceBinding =
+ if (scopeStack.isEmpty) TopScope
+ else scopeStack.top
+
+ for (i <- 0 until attributes.getLength()) {
+ val qname = attributes getQName i
+ val value = attributes getValue i
+ val (pre, key) = splitName(qname)
+ def nullIfEmpty(s: String) = if (s == "") null else s
+
+ if (pre == "xmlns" || (pre == null && qname == "xmlns")) {
+ val arg = if (pre == null) null else key
+ scpe = new NamespaceBinding(arg, nullIfEmpty(value), scpe)
+ }
+ else
+ m = Attribute(Option(pre), key, Text(value), m)
+ }
+
+ scopeStack push scpe
+ attribStack push m
+ }
+
+
+ /** captures text, possibly normalizing whitespace
+ */
+ def captureText(): Unit = {
+ if (capture && buffer.length > 0)
+ hStack push createText(buffer.toString)
+
+ buffer.clear()
+ }
+
+ /** End element.
+ * @param uri
+ * @param _localName
+ * @param qname
+ * @throws org.xml.sax.SAXException if ..
+ */
+ override def endElement(uri: String , _localName: String, qname: String): Unit = {
+ captureText()
+ val metaData = attribStack.pop()
+
+ // reverse order to get it right
+ val v = (Iterator continually hStack.pop takeWhile (_ != null)).toList.reverse
+ val (pre, localName) = splitName(qname)
+ val scp = scopeStack.pop()
+
+ // create element
+ rootElem = createNode(pre, localName, metaData, scp, v)
+ hStack push rootElem
+ curTag = tagStack.pop()
+ capture = curTag != null && nodeContainsText(curTag) // root level
+ }
+
+ /** Processing instruction.
+ */
+ override def processingInstruction(target: String, data: String) {
+ hStack pushAll createProcInstr(target, data)
+ }
+}
diff --git a/src/xml/scala/xml/parsing/FatalError.scala b/src/xml/scala/xml/parsing/FatalError.scala
new file mode 100644
index 0000000000..ab3cb2a74d
--- /dev/null
+++ b/src/xml/scala/xml/parsing/FatalError.scala
@@ -0,0 +1,17 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package parsing
+
+/** !!! This is poorly named, but I guess it's in the API.
+ */
+case class FatalError(msg: String) extends java.lang.RuntimeException(msg)
diff --git a/src/xml/scala/xml/parsing/MarkupHandler.scala b/src/xml/scala/xml/parsing/MarkupHandler.scala
new file mode 100755
index 0000000000..1ebffb9c90
--- /dev/null
+++ b/src/xml/scala/xml/parsing/MarkupHandler.scala
@@ -0,0 +1,127 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package parsing
+
+import scala.collection.mutable
+import scala.io.Source
+import scala.xml.dtd._
+
+/** class that handles markup - provides callback methods to MarkupParser.
+ * the default is nonvalidating behaviour
+ *
+ * @author Burak Emir
+ * @version 1.0
+ *
+ * @todo can we ignore more entity declarations (i.e. those with extIDs)?
+ * @todo expanding entity references
+ */
+abstract class MarkupHandler {
+
+ /** returns true is this markup handler is validating */
+ val isValidating: Boolean = false
+
+ var decls: List[Decl] = Nil
+ var ent: mutable.Map[String, EntityDecl] = new mutable.HashMap[String, EntityDecl]()
+
+ def lookupElemDecl(Label: String): ElemDecl = {
+ for (z @ ElemDecl(Label, _) <- decls)
+ return z
+
+ null
+ }
+
+ def replacementText(entityName: String): Source =
+ Source fromString ((ent get entityName) match {
+ case Some(ParsedEntityDecl(_, IntDef(value))) => value
+ case Some(ParameterEntityDecl(_, IntDef(value))) => " %s " format value
+ case Some(_) => "<!-- %s; -->" format entityName
+ case None => "<!-- unknown entity %s; -->" format entityName
+ })
+
+ def endDTD(n: String): Unit = ()
+
+ /** callback method invoked by MarkupParser after start-tag of element.
+ *
+ * @param pos the position in the sourcefile
+ * @param pre the prefix
+ * @param label the local name
+ * @param attrs the attributes (metadata)
+ */
+ def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding): Unit = ()
+
+ /** callback method invoked by MarkupParser after end-tag of element.
+ *
+ * @param pos the position in the source file
+ * @param pre the prefix
+ * @param label the local name
+ */
+ def elemEnd(pos: Int, pre: String, label: String): Unit = ()
+
+ /** callback method invoked by MarkupParser after parsing an element,
+ * between the elemStart and elemEnd callbacks
+ *
+ * @param pos the position in the source file
+ * @param pre the prefix
+ * @param label the local name
+ * @param attrs the attributes (metadata)
+ * @param empty `true` if the element was previously empty; `false` otherwise.
+ * @param args the children of this element
+ */
+ def elem(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, empty: Boolean, args: NodeSeq): NodeSeq
+
+ /** callback method invoked by MarkupParser after parsing PI.
+ */
+ def procInstr(pos: Int, target: String, txt: String): NodeSeq
+
+ /** callback method invoked by MarkupParser after parsing comment.
+ */
+ def comment(pos: Int, comment: String): NodeSeq
+
+ /** callback method invoked by MarkupParser after parsing entity ref.
+ * @todo expanding entity references
+ */
+ def entityRef(pos: Int, n: String): NodeSeq
+
+ /** callback method invoked by MarkupParser after parsing text.
+ */
+ def text(pos: Int, txt: String): NodeSeq
+
+ // DTD handler methods
+
+ def elemDecl(n: String, cmstr: String): Unit = ()
+
+ def attListDecl(name: String, attList: List[AttrDecl]): Unit = ()
+
+ private def someEntityDecl(name: String, edef: EntityDef, f: (String, EntityDef) => EntityDecl): Unit =
+ edef match {
+ case _: ExtDef if !isValidating => // ignore (cf REC-xml 4.4.1)
+ case _ =>
+ val y = f(name, edef)
+ decls ::= y
+ ent.update(name, y)
+ }
+
+ def parameterEntityDecl(name: String, edef: EntityDef): Unit =
+ someEntityDecl(name, edef, ParameterEntityDecl.apply _)
+
+ def parsedEntityDecl(name: String, edef: EntityDef): Unit =
+ someEntityDecl(name, edef, ParsedEntityDecl.apply _)
+
+ def peReference(name: String) { decls ::= PEReference(name) }
+ def unparsedEntityDecl(name: String, extID: ExternalID, notat: String): Unit = ()
+ def notationDecl(notat: String, extID: ExternalID): Unit = ()
+ def reportSyntaxError(pos: Int, str: String): Unit
+
+ @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11")
+ def log(msg: String): Unit = {}
+}
diff --git a/src/xml/scala/xml/parsing/MarkupParser.scala b/src/xml/scala/xml/parsing/MarkupParser.scala
new file mode 100755
index 0000000000..3bbd136b67
--- /dev/null
+++ b/src/xml/scala/xml/parsing/MarkupParser.scala
@@ -0,0 +1,938 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package parsing
+
+import scala.io.Source
+import scala.xml.dtd._
+import Utility.Escapes.{ pairs => unescape }
+
+/**
+ * An XML parser.
+ *
+ * Parses XML 1.0, invokes callback methods of a `MarkupHandler` and returns
+ * whatever the markup handler returns. Use `ConstructingParser` if you just
+ * want to parse XML to construct instances of `scala.xml.Node`.
+ *
+ * While XML elements are returned, DTD declarations - if handled - are
+ * collected using side-effects.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+trait MarkupParser extends MarkupParserCommon with TokenTests
+{
+ self: MarkupParser with MarkupHandler =>
+
+ type PositionType = Int
+ type InputType = Source
+ type ElementType = NodeSeq
+ type AttributesType = (MetaData, NamespaceBinding)
+ type NamespaceType = NamespaceBinding
+
+ def truncatedError(msg: String): Nothing = throw FatalError(msg)
+ def errorNoEnd(tag: String) = throw FatalError("expected closing tag of " + tag)
+
+ def xHandleError(that: Char, msg: String) = reportSyntaxError(msg)
+
+ val input: Source
+
+ /** if true, does not remove surplus whitespace */
+ val preserveWS: Boolean
+
+ def externalSource(systemLiteral: String): Source
+
+ //
+ // variables, values
+ //
+
+ protected var curInput: Source = input
+
+ // See ticket #3720 for motivations.
+ private class WithLookAhead(underlying: Source) extends Source {
+ private val queue = scala.collection.mutable.Queue[Char]()
+ def lookahead(): BufferedIterator[Char] = {
+ val iter = queue.iterator ++ new Iterator[Char] {
+ def hasNext = underlying.hasNext
+ def next() = { val x = underlying.next(); queue += x; x }
+ }
+ iter.buffered
+ }
+ val iter = new Iterator[Char] {
+ def hasNext = underlying.hasNext || !queue.isEmpty
+ def next() = if (!queue.isEmpty) queue.dequeue() else underlying.next()
+ }
+ }
+
+ def lookahead(): BufferedIterator[Char] = curInput match {
+ case curInputWLA:WithLookAhead =>
+ curInputWLA.lookahead()
+ case _ =>
+ val newInput = new WithLookAhead(curInput)
+ curInput = newInput
+ newInput.lookahead()
+ }
+
+
+ /** the handler of the markup, returns this */
+ private val handle: MarkupHandler = this
+
+ /** stack of inputs */
+ var inpStack: List[Source] = Nil
+
+ /** holds the position in the source file */
+ var pos: Int = _
+
+ /* used when reading external subset */
+ var extIndex = -1
+
+ /** holds temporary values of pos */
+ var tmppos: Int = _
+
+ /** holds the next character */
+ var nextChNeeded: Boolean = false
+ var reachedEof: Boolean = false
+ var lastChRead: Char = _
+ def ch: Char = {
+ if (nextChNeeded) {
+ if (curInput.hasNext) {
+ lastChRead = curInput.next()
+ pos = curInput.pos
+ } else {
+ val ilen = inpStack.length
+ //Console.println(" ilen = "+ilen+ " extIndex = "+extIndex);
+ if ((ilen != extIndex) && (ilen > 0)) {
+ /* for external source, inpStack == Nil ! need notify of eof! */
+ pop()
+ } else {
+ reachedEof = true
+ lastChRead = 0.asInstanceOf[Char]
+ }
+ }
+ nextChNeeded = false
+ }
+ lastChRead
+ }
+
+ /** character buffer, for names */
+ protected val cbuf = new StringBuilder()
+
+ var dtd: DTD = null
+
+ protected var doc: Document = null
+
+ def eof: Boolean = { ch; reachedEof }
+
+ //
+ // methods
+ //
+
+ /** {{{
+ * <? prolog ::= xml S ... ?>
+ * }}} */
+ def xmlProcInstr(): MetaData = {
+ xToken("xml")
+ xSpace()
+ val (md,scp) = xAttributes(TopScope)
+ if (scp != TopScope)
+ reportSyntaxError("no xmlns definitions here, please.")
+ xToken('?')
+ xToken('>')
+ md
+ }
+
+ /** Factored out common code.
+ */
+ private def prologOrTextDecl(isProlog: Boolean): (Option[String], Option[String], Option[Boolean]) = {
+ var info_ver: Option[String] = None
+ var info_enc: Option[String] = None
+ var info_stdl: Option[Boolean] = None
+
+ val m = xmlProcInstr()
+ var n = 0
+
+ if (isProlog)
+ xSpaceOpt()
+
+ m("version") match {
+ case null =>
+ case Text("1.0") => info_ver = Some("1.0"); n += 1
+ case _ => reportSyntaxError("cannot deal with versions != 1.0")
+ }
+
+ m("encoding") match {
+ case null =>
+ case Text(enc) =>
+ if (!isValidIANAEncoding(enc))
+ reportSyntaxError("\"" + enc + "\" is not a valid encoding")
+ else {
+ info_enc = Some(enc)
+ n += 1
+ }
+ }
+
+ if (isProlog) {
+ m("standalone") match {
+ case null =>
+ case Text("yes") => info_stdl = Some(true); n += 1
+ case Text("no") => info_stdl = Some(false); n += 1
+ case _ => reportSyntaxError("either 'yes' or 'no' expected")
+ }
+ }
+
+ if (m.length - n != 0) {
+ val s = if (isProlog) "SDDecl? " else ""
+ reportSyntaxError("VersionInfo EncodingDecl? %sor '?>' expected!" format s)
+ }
+
+ (info_ver, info_enc, info_stdl)
+ }
+
+ /** {{{
+ * <? prolog ::= xml S?
+ * // this is a bit more lenient than necessary...
+ * }}} */
+ def prolog(): (Option[String], Option[String], Option[Boolean]) =
+ prologOrTextDecl(isProlog = true)
+
+ /** prolog, but without standalone */
+ def textDecl(): (Option[String], Option[String]) =
+ prologOrTextDecl(isProlog = false) match { case (x1, x2, _) => (x1, x2) }
+
+ /** {{{
+ * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
+ * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
+ * [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
+ * [25] Eq ::= S? '=' S?
+ * [26] VersionNum ::= '1.0'
+ * [27] Misc ::= Comment | PI | S
+ * }}} */
+ def document(): Document = {
+ doc = new Document()
+
+ this.dtd = null
+ var info_prolog: (Option[String], Option[String], Option[Boolean]) = (None, None, None)
+ if ('<' != ch) {
+ reportSyntaxError("< expected")
+ return null
+ }
+
+ nextch() // is prolog ?
+ var children: NodeSeq = null
+ if ('?' == ch) {
+ nextch()
+ info_prolog = prolog()
+ doc.version = info_prolog._1
+ doc.encoding = info_prolog._2
+ doc.standAlone = info_prolog._3
+
+ children = content(TopScope) // DTD handled as side effect
+ }
+ else {
+ val ts = new NodeBuffer()
+ content1(TopScope, ts) // DTD handled as side effect
+ ts &+ content(TopScope)
+ children = NodeSeq.fromSeq(ts)
+ }
+ //println("[MarkupParser::document] children now: "+children.toList)
+ var elemCount = 0
+ var theNode: Node = null
+ for (c <- children) c match {
+ case _:ProcInstr =>
+ case _:Comment =>
+ case _:EntityRef => // todo: fix entities, shouldn't be "special"
+ reportSyntaxError("no entity references allowed here")
+ case s:SpecialNode =>
+ if (s.toString.trim().length > 0) //non-empty text nodes not allowed
+ elemCount += 2
+ case m:Node =>
+ elemCount += 1
+ theNode = m
+ }
+ if (1 != elemCount) {
+ reportSyntaxError("document must contain exactly one element")
+ Console.println(children.toList)
+ }
+
+ doc.children = children
+ doc.docElem = theNode
+ doc
+ }
+
+ /** append Unicode character to name buffer*/
+ protected def putChar(c: Char) = cbuf append c
+
+ /** As the current code requires you to call nextch once manually
+ * after construction, this method formalizes that suboptimal reality.
+ */
+ def initialize: this.type = {
+ nextch()
+ this
+ }
+
+ protected def ch_returning_nextch: Char = { val res = ch; nextch(); res }
+
+ def mkAttributes(name: String, pscope: NamespaceBinding): AttributesType =
+ if (isNameStart (ch)) xAttributes(pscope)
+ else (Null, pscope)
+
+ def mkProcInstr(position: Int, name: String, text: String): ElementType =
+ handle.procInstr(position, name, text)
+
+ /** this method tells ch to get the next character when next called */
+ def nextch() {
+ // Read current ch if needed
+ ch
+
+ // Mark next ch to be required
+ nextChNeeded = true
+ }
+
+ /** parse attribute and create namespace scope, metadata
+ * {{{
+ * [41] Attributes ::= { S Name Eq AttValue }
+ * }}}
+ */
+ def xAttributes(pscope: NamespaceBinding): (MetaData, NamespaceBinding) = {
+ var scope: NamespaceBinding = pscope
+ var aMap: MetaData = Null
+ while (isNameStart(ch)) {
+ val qname = xName
+ xEQ() // side effect
+ val value = xAttributeValue()
+
+ Utility.prefix(qname) match {
+ case Some("xmlns") =>
+ val prefix = qname.substring(6 /*xmlns:*/ , qname.length)
+ scope = new NamespaceBinding(prefix, value, scope)
+
+ case Some(prefix) =>
+ val key = qname.substring(prefix.length+1, qname.length)
+ aMap = new PrefixedAttribute(prefix, key, Text(value), aMap)
+
+ case _ =>
+ if( qname == "xmlns" )
+ scope = new NamespaceBinding(null, value, scope)
+ else
+ aMap = new UnprefixedAttribute(qname, Text(value), aMap)
+ }
+
+ if ((ch != '/') && (ch != '>') && ('?' != ch))
+ xSpace()
+ }
+
+ if(!aMap.wellformed(scope))
+ reportSyntaxError( "double attribute")
+
+ (aMap,scope)
+ }
+
+ /** entity value, terminated by either ' or ". value may not contain &lt;.
+ * {{{
+ * AttValue ::= `'` { _ } `'`
+ * | `"` { _ } `"`
+ * }}}
+ */
+ def xEntityValue(): String = {
+ val endch = ch
+ nextch()
+ while (ch != endch && !eof) {
+ putChar(ch)
+ nextch()
+ }
+ nextch()
+ val str = cbuf.toString()
+ cbuf.length = 0
+ str
+ }
+
+ /** {{{
+ * '<! CharData ::= [CDATA[ ( {char} - {char}"]]>"{char} ) ']]>'
+ *
+ * see [15]
+ * }}} */
+ def xCharData: NodeSeq = {
+ xToken("[CDATA[")
+ def mkResult(pos: Int, s: String): NodeSeq = {
+ handle.text(pos, s)
+ PCData(s)
+ }
+ xTakeUntil(mkResult, () => pos, "]]>")
+ }
+
+ /** {{{
+ * Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
+ *
+ * see [15]
+ * }}} */
+ def xComment: NodeSeq = {
+ val sb: StringBuilder = new StringBuilder()
+ xToken("--")
+ while (true) {
+ if (ch == '-' && { sb.append(ch); nextch(); ch == '-' }) {
+ sb.length = sb.length - 1
+ nextch()
+ xToken('>')
+ return handle.comment(pos, sb.toString())
+ } else sb.append(ch)
+ nextch()
+ }
+ throw FatalError("this cannot happen")
+ }
+
+ /* todo: move this into the NodeBuilder class */
+ def appendText(pos: Int, ts: NodeBuffer, txt: String): Unit = {
+ if (preserveWS)
+ ts &+ handle.text(pos, txt)
+ else
+ for (t <- TextBuffer.fromString(txt).toText) {
+ ts &+ handle.text(pos, t.text)
+ }
+ }
+
+ /** {{{
+ * '<' content1 ::= ...
+ * }}} */
+ def content1(pscope: NamespaceBinding, ts: NodeBuffer) {
+ ch match {
+ case '!' =>
+ nextch()
+ if ('[' == ch) // CDATA
+ ts &+ xCharData
+ else if ('D' == ch) // doctypedecl, parse DTD // @todo REMOVE HACK
+ parseDTD()
+ else // comment
+ ts &+ xComment
+ case '?' => // PI
+ nextch()
+ ts &+ xProcInstr
+ case _ =>
+ ts &+ element1(pscope) // child
+ }
+ }
+
+ /** {{{
+ * content1 ::= '<' content1 | '&' charref ...
+ * }}} */
+ def content(pscope: NamespaceBinding): NodeSeq = {
+ val ts = new NodeBuffer
+ var exit = eof
+ // todo: optimize seq repr.
+ def done = new NodeSeq { val theSeq = ts.toList }
+
+ while (!exit) {
+ tmppos = pos
+ exit = eof
+
+ if (eof)
+ return done
+
+ ch match {
+ case '<' => // another tag
+ nextch(); ch match {
+ case '/' => exit = true // end tag
+ case _ => content1(pscope, ts)
+ }
+
+ // postcond: xEmbeddedBlock == false!
+ case '&' => // EntityRef or CharRef
+ nextch(); ch match {
+ case '#' => // CharacterRef
+ nextch()
+ val theChar = handle.text(tmppos, xCharRef(() => ch, () => nextch()))
+ xToken(';')
+ ts &+ theChar
+ case _ => // EntityRef
+ val n = xName
+ xToken(';')
+
+ if (unescape contains n) {
+ handle.entityRef(tmppos, n)
+ ts &+ unescape(n)
+ } else push(n)
+ }
+ case _ => // text content
+ appendText(tmppos, ts, xText)
+ }
+ }
+ done
+ } // content(NamespaceBinding)
+
+ /** {{{
+ * externalID ::= SYSTEM S syslit
+ * PUBLIC S pubid S syslit
+ * }}} */
+ def externalID(): ExternalID = ch match {
+ case 'S' =>
+ nextch()
+ xToken("YSTEM")
+ xSpace()
+ val sysID = systemLiteral()
+ new SystemID(sysID)
+ case 'P' =>
+ nextch(); xToken("UBLIC")
+ xSpace()
+ val pubID = pubidLiteral()
+ xSpace()
+ val sysID = systemLiteral()
+ new PublicID(pubID, sysID)
+ }
+
+
+ /** parses document type declaration and assigns it to instance variable
+ * dtd.
+ * {{{
+ * <! parseDTD ::= DOCTYPE name ... >
+ * }}} */
+ def parseDTD() { // dirty but fast
+ var extID: ExternalID = null
+ if (this.dtd ne null)
+ reportSyntaxError("unexpected character (DOCTYPE already defined")
+ xToken("DOCTYPE")
+ xSpace()
+ val n = xName
+ xSpace()
+ //external ID
+ if ('S' == ch || 'P' == ch) {
+ extID = externalID()
+ xSpaceOpt()
+ }
+
+ /* parse external subset of DTD
+ */
+
+ if ((null != extID) && isValidating) {
+
+ pushExternal(extID.systemId)
+ extIndex = inpStack.length
+
+ extSubset()
+ pop()
+ extIndex = -1
+ }
+
+ if ('[' == ch) { // internal subset
+ nextch()
+ /* TODO */
+ intSubset()
+ // TODO: do the DTD parsing?? ?!?!?!?!!
+ xToken(']')
+ xSpaceOpt()
+ }
+ xToken('>')
+ this.dtd = new DTD {
+ /*override var*/ externalID = extID
+ /*override val */decls = handle.decls.reverse
+ }
+ //this.dtd.initializeEntities();
+ if (doc ne null)
+ doc.dtd = this.dtd
+
+ handle.endDTD(n)
+ }
+
+ def element(pscope: NamespaceBinding): NodeSeq = {
+ xToken('<')
+ element1(pscope)
+ }
+
+ /** {{{
+ * '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag
+ * | xmlTag1 '/' '>'
+ * }}} */
+ def element1(pscope: NamespaceBinding): NodeSeq = {
+ val pos = this.pos
+ val (qname, (aMap, scope)) = xTag(pscope)
+ val (pre, local) = Utility.prefix(qname) match {
+ case Some(p) => (p, qname drop p.length+1)
+ case _ => (null, qname)
+ }
+ val ts = {
+ if (ch == '/') { // empty element
+ xToken("/>")
+ handle.elemStart(pos, pre, local, aMap, scope)
+ NodeSeq.Empty
+ }
+ else { // element with content
+ xToken('>')
+ handle.elemStart(pos, pre, local, aMap, scope)
+ val tmp = content(scope)
+ xEndTag(qname)
+ tmp
+ }
+ }
+ val res = handle.elem(pos, pre, local, aMap, scope, ts == NodeSeq.Empty, ts)
+ handle.elemEnd(pos, pre, local)
+ res
+ }
+
+ /** Parse character data.
+ *
+ * precondition: `xEmbeddedBlock == false` (we are not in a scala block)
+ */
+ private def xText: String = {
+ var exit = false
+ while (! exit) {
+ putChar(ch)
+ nextch()
+
+ exit = eof || ( ch == '<' ) || ( ch == '&' )
+ }
+ val str = cbuf.toString
+ cbuf.length = 0
+ str
+ }
+
+ /** attribute value, terminated by either ' or ". value may not contain &lt;.
+ * {{{
+ * AttValue ::= `'` { _ } `'`
+ * | `"` { _ } `"`
+ * }}} */
+ def systemLiteral(): String = {
+ val endch = ch
+ if (ch != '\'' && ch != '"')
+ reportSyntaxError("quote ' or \" expected")
+ nextch()
+ while (ch != endch && !eof) {
+ putChar(ch)
+ nextch()
+ }
+ nextch()
+ val str = cbuf.toString()
+ cbuf.length = 0
+ str
+ }
+
+ /** {{{
+ * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
+ * }}} */
+ def pubidLiteral(): String = {
+ val endch = ch
+ if (ch!='\'' && ch != '"')
+ reportSyntaxError("quote ' or \" expected")
+ nextch()
+ while (ch != endch && !eof) {
+ putChar(ch)
+ //println("hello '"+ch+"'"+isPubIDChar(ch))
+ if (!isPubIDChar(ch))
+ reportSyntaxError("char '"+ch+"' is not allowed in public id")
+ nextch()
+ }
+ nextch()
+ val str = cbuf.toString
+ cbuf.length = 0
+ str
+ }
+
+ //
+ // dtd parsing
+ //
+
+ def extSubset(): Unit = {
+ var textdecl: (Option[String],Option[String]) = null
+ if (ch == '<') {
+ nextch()
+ if (ch == '?') {
+ nextch()
+ textdecl = textDecl()
+ } else
+ markupDecl1()
+ }
+ while (!eof)
+ markupDecl()
+ }
+
+ def markupDecl1() = {
+ def doInclude() = {
+ xToken('['); while(']' != ch) markupDecl(); nextch() // ']'
+ }
+ def doIgnore() = {
+ xToken('['); while(']' != ch) nextch(); nextch() // ']'
+ }
+ if ('?' == ch) {
+ nextch()
+ xProcInstr // simply ignore processing instructions!
+ } else {
+ xToken('!')
+ ch match {
+ case '-' =>
+ xComment // ignore comments
+
+ case 'E' =>
+ nextch()
+ if ('L' == ch) {
+ nextch()
+ elementDecl()
+ } else
+ entityDecl()
+
+ case 'A' =>
+ nextch()
+ attrDecl()
+
+ case 'N' =>
+ nextch()
+ notationDecl()
+
+ case '[' if inpStack.length >= extIndex =>
+ nextch()
+ xSpaceOpt()
+ ch match {
+ case '%' =>
+ nextch()
+ val ent = xName
+ xToken(';')
+ xSpaceOpt()
+
+ push(ent)
+ xSpaceOpt()
+ val stmt = xName
+ xSpaceOpt()
+
+ stmt match {
+ // parameter entity
+ case "INCLUDE" => doInclude()
+ case "IGNORE" => doIgnore()
+ }
+ case 'I' =>
+ nextch()
+ ch match {
+ case 'G' =>
+ nextch()
+ xToken("NORE")
+ xSpaceOpt()
+ doIgnore()
+ case 'N' =>
+ nextch()
+ xToken("NCLUDE")
+ doInclude()
+ }
+ }
+ xToken(']')
+ xToken('>')
+
+ case _ =>
+ curInput.reportError(pos, "unexpected character '"+ch+"', expected some markupdecl")
+ while (ch!='>')
+ nextch()
+ }
+ }
+ }
+
+ def markupDecl(): Unit = ch match {
+ case '%' => // parameter entity reference
+ nextch()
+ val ent = xName
+ xToken(';')
+ if (!isValidating)
+ handle.peReference(ent) // n-v: just create PE-reference
+ else
+ push(ent) // v: parse replacementText
+
+ //peReference
+ case '<' =>
+ nextch()
+ markupDecl1()
+ case _ if isSpace(ch) =>
+ xSpace()
+ case _ =>
+ reportSyntaxError("markupdecl: unexpected character '"+ch+"' #" + ch.toInt)
+ nextch()
+ }
+
+ /** "rec-xml/#ExtSubset" pe references may not occur within markup declarations
+ */
+ def intSubset() {
+ //Console.println("(DEBUG) intSubset()")
+ xSpace()
+ while (']' != ch)
+ markupDecl()
+ }
+
+ /** &lt;! element := ELEMENT
+ */
+ def elementDecl() {
+ xToken("EMENT")
+ xSpace()
+ val n = xName
+ xSpace()
+ while ('>' != ch) {
+ //Console.println("["+ch+"]")
+ putChar(ch)
+ nextch()
+ }
+ //Console.println("END["+ch+"]")
+ nextch()
+ val cmstr = cbuf.toString()
+ cbuf.length = 0
+ handle.elemDecl(n, cmstr)
+ }
+
+ /** {{{
+ * <! attlist := ATTLIST
+ * }}} */
+ def attrDecl() = {
+ xToken("TTLIST")
+ xSpace()
+ val n = xName
+ xSpace()
+ var attList: List[AttrDecl] = Nil
+
+ // later: find the elemDecl for n
+ while ('>' != ch) {
+ val aname = xName
+ xSpace()
+ // could be enumeration (foo,bar) parse this later :-/
+ while ('"' != ch && '\'' != ch && '#' != ch && '<' != ch) {
+ if (!isSpace(ch))
+ cbuf.append(ch)
+ nextch()
+ }
+ val atpe = cbuf.toString
+ cbuf.length = 0
+
+ val defdecl: DefaultDecl = ch match {
+ case '\'' | '"' =>
+ DEFAULT(fixed = false, xAttributeValue())
+
+ case '#' =>
+ nextch()
+ xName match {
+ case "FIXED" => xSpace() ; DEFAULT(fixed = true, xAttributeValue())
+ case "IMPLIED" => IMPLIED
+ case "REQUIRED" => REQUIRED
+ }
+ case _ =>
+ null
+ }
+ xSpaceOpt()
+
+ attList ::= AttrDecl(aname, atpe, defdecl)
+ cbuf.length = 0
+ }
+ nextch()
+ handle.attListDecl(n, attList.reverse)
+ }
+
+ /** {{{
+ * <! element := ELEMENT
+ * }}} */
+ def entityDecl() = {
+ var isParameterEntity = false
+ xToken("NTITY")
+ xSpace()
+ if ('%' == ch) {
+ nextch()
+ isParameterEntity = true
+ xSpace()
+ }
+ val n = xName
+ xSpace()
+ ch match {
+ case 'S' | 'P' => //sy
+ val extID = externalID()
+ if (isParameterEntity) {
+ xSpaceOpt()
+ xToken('>')
+ handle.parameterEntityDecl(n, ExtDef(extID))
+ } else { // notation?
+ xSpace()
+ if ('>' != ch) {
+ xToken("NDATA")
+ xSpace()
+ val notat = xName
+ xSpaceOpt()
+ xToken('>')
+ handle.unparsedEntityDecl(n, extID, notat)
+ } else {
+ nextch()
+ handle.parsedEntityDecl(n, ExtDef(extID))
+ }
+ }
+
+ case '"' | '\'' =>
+ val av = xEntityValue()
+ xSpaceOpt()
+ xToken('>')
+ if (isParameterEntity)
+ handle.parameterEntityDecl(n, IntDef(av))
+ else
+ handle.parsedEntityDecl(n, IntDef(av))
+ }
+ {}
+ } // entityDecl
+
+ /** {{{
+ * 'N' notationDecl ::= "OTATION"
+ * }}} */
+ def notationDecl() {
+ xToken("OTATION")
+ xSpace()
+ val notat = xName
+ xSpace()
+ val extID = if (ch == 'S') {
+ externalID()
+ }
+ else if (ch == 'P') {
+ /* PublicID (without system, only used in NOTATION) */
+ nextch()
+ xToken("UBLIC")
+ xSpace()
+ val pubID = pubidLiteral()
+ xSpaceOpt()
+ val sysID = if (ch != '>')
+ systemLiteral()
+ else
+ null
+ new PublicID(pubID, sysID)
+ } else {
+ reportSyntaxError("PUBLIC or SYSTEM expected")
+ scala.sys.error("died parsing notationdecl")
+ }
+ xSpaceOpt()
+ xToken('>')
+ handle.notationDecl(notat, extID)
+ }
+
+ def reportSyntaxError(pos: Int, str: String) { curInput.reportError(pos, str) }
+ def reportSyntaxError(str: String) { reportSyntaxError(pos, str) }
+ def reportValidationError(pos: Int, str: String) { reportSyntaxError(pos, str) }
+
+ def push(entityName: String) {
+ if (!eof)
+ inpStack = curInput :: inpStack
+
+ // can't push before getting next character if needed
+ ch
+
+ curInput = replacementText(entityName)
+ nextch()
+ }
+
+ def pushExternal(systemId: String) {
+ if (!eof)
+ inpStack = curInput :: inpStack
+
+ // can't push before getting next character if needed
+ ch
+
+ curInput = externalSource(systemId)
+ nextch()
+ }
+
+ def pop() {
+ curInput = inpStack.head
+ inpStack = inpStack.tail
+ lastChRead = curInput.ch
+ nextChNeeded = false
+ pos = curInput.pos
+ reachedEof = false // must be false, because of places where entity refs occur
+ }
+}
diff --git a/src/xml/scala/xml/parsing/MarkupParserCommon.scala b/src/xml/scala/xml/parsing/MarkupParserCommon.scala
new file mode 100644
index 0000000000..57c1651558
--- /dev/null
+++ b/src/xml/scala/xml/parsing/MarkupParserCommon.scala
@@ -0,0 +1,260 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package parsing
+
+import scala.io.Source
+import scala.annotation.switch
+import Utility.Escapes.{ pairs => unescape }
+
+import Utility.SU
+
+/** This is not a public trait - it contains common code shared
+ * between the library level XML parser and the compiler's.
+ * All members should be accessed through those.
+ */
+private[scala] trait MarkupParserCommon extends TokenTests {
+ protected def unreachable = scala.sys.error("Cannot be reached.")
+
+ // type HandleType // MarkupHandler, SymbolicXMLBuilder
+ type InputType // Source, CharArrayReader
+ type PositionType // Int, Position
+ type ElementType // NodeSeq, Tree
+ type NamespaceType // NamespaceBinding, Any
+ type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree]
+
+ def mkAttributes(name: String, pscope: NamespaceType): AttributesType
+ def mkProcInstr(position: PositionType, name: String, text: String): ElementType
+
+ /** parse a start or empty tag.
+ * [40] STag ::= '<' Name { S Attribute } [S]
+ * [44] EmptyElemTag ::= '<' Name { S Attribute } [S]
+ */
+ protected def xTag(pscope: NamespaceType): (String, AttributesType) = {
+ val name = xName
+ xSpaceOpt()
+
+ (name, mkAttributes(name, pscope))
+ }
+
+ /** '<?' ProcInstr ::= Name [S ({Char} - ({Char}'>?' {Char})]'?>'
+ *
+ * see [15]
+ */
+ def xProcInstr: ElementType = {
+ val n = xName
+ xSpaceOpt()
+ xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>")
+ }
+
+ /** attribute value, terminated by either `'` or `"`. value may not contain `<`.
+ @param endCh either `'` or `"`
+ */
+ def xAttributeValue(endCh: Char): String = {
+ val buf = new StringBuilder
+ while (ch != endCh) {
+ // well-formedness constraint
+ if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "")
+ else if (ch == SU) truncatedError("")
+ else buf append ch_returning_nextch
+ }
+ ch_returning_nextch
+ // @todo: normalize attribute value
+ buf.toString
+ }
+
+ def xAttributeValue(): String = {
+ val str = xAttributeValue(ch_returning_nextch)
+ // well-formedness constraint
+ normalizeAttributeValue(str)
+ }
+
+ private def takeUntilChar(it: Iterator[Char], end: Char): String = {
+ val buf = new StringBuilder
+ while (it.hasNext) it.next() match {
+ case `end` => return buf.toString
+ case ch => buf append ch
+ }
+ scala.sys.error("Expected '%s'".format(end))
+ }
+
+ /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>'
+ */
+ def xEndTag(startName: String) {
+ xToken('/')
+ if (xName != startName)
+ errorNoEnd(startName)
+
+ xSpaceOpt()
+ xToken('>')
+ }
+
+ /** actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen
+ * Name ::= (Letter | '_') (NameChar)*
+ *
+ * see [5] of XML 1.0 specification
+ *
+ * pre-condition: ch != ':' // assured by definition of XMLSTART token
+ * post-condition: name does neither start, nor end in ':'
+ */
+ def xName: String = {
+ if (ch == SU)
+ truncatedError("")
+ else if (!isNameStart(ch))
+ return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "")
+
+ val buf = new StringBuilder
+
+ do buf append ch_returning_nextch
+ while (isNameChar(ch))
+
+ if (buf.last == ':') {
+ reportSyntaxError( "name cannot end in ':'" )
+ buf.toString dropRight 1
+ }
+ else buf.toString
+ }
+
+ private def attr_unescape(s: String) = s match {
+ case "lt" => "<"
+ case "gt" => ">"
+ case "amp" => "&"
+ case "apos" => "'"
+ case "quot" => "\""
+ case "quote" => "\""
+ case _ => "&" + s + ";"
+ }
+
+ /** Replaces only character references right now.
+ * see spec 3.3.3
+ */
+ private def normalizeAttributeValue(attval: String): String = {
+ val buf = new StringBuilder
+ val it = attval.iterator.buffered
+
+ while (it.hasNext) buf append (it.next() match {
+ case ' ' | '\t' | '\n' | '\r' => " "
+ case '&' if it.head == '#' => it.next() ; xCharRef(it)
+ case '&' => attr_unescape(takeUntilChar(it, ';'))
+ case c => c
+ })
+
+ buf.toString
+ }
+
+ /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";"
+ * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
+ *
+ * see [66]
+ */
+ def xCharRef(ch: () => Char, nextch: () => Unit): String =
+ Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _)
+
+ def xCharRef(it: Iterator[Char]): String = {
+ var c = it.next()
+ Utility.parseCharRef(() => c, () => { c = it.next() }, reportSyntaxError _, truncatedError _)
+ }
+
+ def xCharRef: String = xCharRef(() => ch, () => nextch())
+
+ /** Create a lookahead reader which does not influence the input */
+ def lookahead(): BufferedIterator[Char]
+
+ /** The library and compiler parsers had the interesting distinction of
+ * different behavior for nextch (a function for which there are a total
+ * of two plausible behaviors, so we know the design space was fully
+ * explored.) One of them returned the value of nextch before the increment
+ * and one of them the new value. So to unify code we have to at least
+ * temporarily abstract over the nextchs.
+ */
+ def ch: Char
+ def nextch(): Unit
+ protected def ch_returning_nextch: Char
+ def eof: Boolean
+
+ // def handle: HandleType
+ var tmppos: PositionType
+
+ def xHandleError(that: Char, msg: String): Unit
+ def reportSyntaxError(str: String): Unit
+ def reportSyntaxError(pos: Int, str: String): Unit
+
+ def truncatedError(msg: String): Nothing
+ def errorNoEnd(tag: String): Nothing
+
+ protected def errorAndResult[T](msg: String, x: T): T = {
+ reportSyntaxError(msg)
+ x
+ }
+
+ def xToken(that: Char) {
+ if (ch == that) nextch()
+ else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch))
+ }
+ def xToken(that: Seq[Char]) { that foreach xToken }
+
+ /** scan [S] '=' [S]*/
+ def xEQ() = { xSpaceOpt(); xToken('='); xSpaceOpt() }
+
+ /** skip optional space S? */
+ def xSpaceOpt() = while (isSpace(ch) && !eof) nextch()
+
+ /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
+ def xSpace() =
+ if (isSpace(ch)) { nextch(); xSpaceOpt() }
+ else xHandleError(ch, "whitespace expected")
+
+ /** Apply a function and return the passed value */
+ def returning[T](x: T)(f: T => Unit): T = { f(x); x }
+
+ /** Execute body with a variable saved and restored after execution */
+ def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = {
+ val saved = getter
+ try body
+ finally setter(saved)
+ }
+
+ /** Take characters from input stream until given String "until"
+ * is seen. Once seen, the accumulated characters are passed
+ * along with the current Position to the supplied handler function.
+ */
+ protected def xTakeUntil[T](
+ handler: (PositionType, String) => T,
+ positioner: () => PositionType,
+ until: String): T =
+ {
+ val sb = new StringBuilder
+ val head = until.head
+ val rest = until.tail
+
+ while (true) {
+ if (ch == head && peek(rest))
+ return handler(positioner(), sb.toString)
+ else if (ch == SU)
+ truncatedError("") // throws TruncatedXMLControl in compiler
+
+ sb append ch
+ nextch()
+ }
+ unreachable
+ }
+
+ /** Create a non-destructive lookahead reader and see if the head
+ * of the input would match the given String. If yes, return true
+ * and drop the entire String from input; if no, return false
+ * and leave input unchanged.
+ */
+ private def peek(lookingFor: String): Boolean =
+ (lookahead() take lookingFor.length sameElements lookingFor.iterator) && {
+ // drop the chars from the real reader (all lookahead + orig)
+ (0 to lookingFor.length) foreach (_ => nextch())
+ true
+ }
+}
diff --git a/src/xml/scala/xml/parsing/NoBindingFactoryAdapter.scala b/src/xml/scala/xml/parsing/NoBindingFactoryAdapter.scala
new file mode 100644
index 0000000000..56ac185f47
--- /dev/null
+++ b/src/xml/scala/xml/parsing/NoBindingFactoryAdapter.scala
@@ -0,0 +1,37 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package xml
+package parsing
+
+import factory.NodeFactory
+
+/** nobinding adaptor providing callbacks to parser to create elements.
+* implements hash-consing
+*/
+class NoBindingFactoryAdapter extends FactoryAdapter with NodeFactory[Elem]
+{
+ /** True. Every XML node may contain text that the application needs */
+ def nodeContainsText(label: String) = true
+
+ /** From NodeFactory. Constructs an instance of scala.xml.Elem */
+ protected def create(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: Seq[Node]): Elem =
+ Elem(pre, label, attrs, scope, children: _*)
+
+ /** From FactoryAdapter. Creates a node. never creates the same node twice, using hash-consing. */
+ def createNode(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: List[Node]): Elem =
+ Elem(pre, label, attrs, scope, children: _*)
+
+ /** Creates a text node. */
+ def createText(text: String) = Text(text)
+
+ /** Creates a processing instruction. */
+ def createProcInstr(target: String, data: String) = makeProcInstr(target, data)
+}
diff --git a/src/xml/scala/xml/parsing/TokenTests.scala b/src/xml/scala/xml/parsing/TokenTests.scala
new file mode 100644
index 0000000000..8dd9cdfaa3
--- /dev/null
+++ b/src/xml/scala/xml/parsing/TokenTests.scala
@@ -0,0 +1,101 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package parsing
+
+/**
+ * Helper functions for parsing XML fragments
+ */
+trait TokenTests {
+
+ /** {{{
+ * (#x20 | #x9 | #xD | #xA)
+ * }}} */
+ final def isSpace(ch: Char): Boolean = ch match {
+ case '\u0009' | '\u000A' | '\u000D' | '\u0020' => true
+ case _ => false
+ }
+ /** {{{
+ * (#x20 | #x9 | #xD | #xA)+
+ * }}} */
+ final def isSpace(cs: Seq[Char]): Boolean = cs.nonEmpty && (cs forall isSpace)
+
+ /** These are 99% sure to be redundant but refactoring on the safe side. */
+ def isAlpha(c: Char) = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
+ def isAlphaDigit(c: Char) = isAlpha(c) || (c >= '0' && c <= '9')
+
+ /** {{{
+ * NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
+ * | CombiningChar | Extender
+ * }}}
+ * See [4] and Appendix B of XML 1.0 specification.
+ */
+ def isNameChar(ch: Char) = {
+ import java.lang.Character._
+ // The constants represent groups Mc, Me, Mn, Lm, and Nd.
+
+ isNameStart(ch) || (getType(ch).toByte match {
+ case COMBINING_SPACING_MARK |
+ ENCLOSING_MARK | NON_SPACING_MARK |
+ MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true
+ case _ => ".-:" contains ch
+ })
+ }
+
+ /** {{{
+ * NameStart ::= ( Letter | '_' )
+ * }}}
+ * where Letter means in one of the Unicode general
+ * categories `{ Ll, Lu, Lo, Lt, Nl }`.
+ *
+ * We do not allow a name to start with `:`.
+ * See [3] and Appendix B of XML 1.0 specification
+ */
+ def isNameStart(ch: Char) = {
+ import java.lang.Character._
+
+ getType(ch).toByte match {
+ case LOWERCASE_LETTER |
+ UPPERCASE_LETTER | OTHER_LETTER |
+ TITLECASE_LETTER | LETTER_NUMBER => true
+ case _ => ch == '_'
+ }
+ }
+
+ /** {{{
+ * Name ::= ( Letter | '_' ) (NameChar)*
+ * }}}
+ * See [5] of XML 1.0 specification.
+ */
+ def isName(s: String) =
+ s.nonEmpty && isNameStart(s.head) && (s.tail forall isNameChar)
+
+ def isPubIDChar(ch: Char): Boolean =
+ isAlphaDigit(ch) || (isSpace(ch) && ch != '\u0009') ||
+ ("""-\()+,./:=?;!*#@$_%""" contains ch)
+
+ /**
+ * Returns `true` if the encoding name is a valid IANA encoding.
+ * This method does not verify that there is a decoder available
+ * for this encoding, only that the characters are valid for an
+ * IANA encoding name.
+ *
+ * @param ianaEncoding The IANA encoding name.
+ */
+ def isValidIANAEncoding(ianaEncoding: Seq[Char]) = {
+ def charOK(c: Char) = isAlphaDigit(c) || ("._-" contains c)
+
+ ianaEncoding.nonEmpty && isAlpha(ianaEncoding.head) &&
+ (ianaEncoding.tail forall charOK)
+ }
+
+ def checkSysID(s: String) = List('"', '\'') exists (c => !(s contains c))
+ def checkPubID(s: String) = s forall isPubIDChar
+}
diff --git a/src/xml/scala/xml/parsing/ValidatingMarkupHandler.scala b/src/xml/scala/xml/parsing/ValidatingMarkupHandler.scala
new file mode 100644
index 0000000000..1b20901249
--- /dev/null
+++ b/src/xml/scala/xml/parsing/ValidatingMarkupHandler.scala
@@ -0,0 +1,104 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package parsing
+
+import scala.xml.dtd._
+
+abstract class ValidatingMarkupHandler extends MarkupHandler {
+
+ var rootLabel:String = _
+ var qStack: List[Int] = Nil
+ var qCurrent: Int = -1
+
+ var declStack: List[ElemDecl] = Nil
+ var declCurrent: ElemDecl = null
+
+ final override val isValidating = true
+
+ override def endDTD(n:String) = {
+ rootLabel = n
+ }
+ override def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope:NamespaceBinding) {
+
+ def advanceDFA(dm:DFAContentModel) = {
+ val trans = dm.dfa.delta(qCurrent)
+ log("advanceDFA(dm): " + dm)
+ log("advanceDFA(trans): " + trans)
+ trans.get(ContentModel.ElemName(label)) match {
+ case Some(qNew) => qCurrent = qNew
+ case _ => reportValidationError(pos, "DTD says, wrong element, expected one of "+trans.keys)
+ }
+ }
+ // advance in current automaton
+ log("[qCurrent = "+qCurrent+" visiting "+label+"]")
+
+ if (qCurrent == -1) { // root
+ log(" checking root")
+ if (label != rootLabel)
+ reportValidationError(pos, "this element should be "+rootLabel)
+ } else {
+ log(" checking node")
+ declCurrent.contentModel match {
+ case ANY =>
+ case EMPTY =>
+ reportValidationError(pos, "DTD says, no elems, no text allowed here")
+ case PCDATA =>
+ reportValidationError(pos, "DTD says, no elements allowed here")
+ case m @ MIXED(r) =>
+ advanceDFA(m)
+ case e @ ELEMENTS(r) =>
+ advanceDFA(e)
+ }
+ }
+ // push state, decl
+ qStack = qCurrent :: qStack
+ declStack = declCurrent :: declStack
+
+ declCurrent = lookupElemDecl(label)
+ qCurrent = 0
+ log(" done now")
+ }
+
+ override def elemEnd(pos: Int, pre: String, label: String) {
+ log(" elemEnd")
+ qCurrent = qStack.head
+ qStack = qStack.tail
+ declCurrent = declStack.head
+ declStack = declStack.tail
+ log(" qCurrent now" + qCurrent)
+ log(" declCurrent now" + declCurrent)
+ }
+
+ final override def elemDecl(name: String, cmstr: String) {
+ decls = ElemDecl(name, ContentModel.parse(cmstr)) :: decls
+ }
+
+ final override def attListDecl(name: String, attList: List[AttrDecl]) {
+ decls = AttListDecl(name, attList) :: decls
+ }
+
+ final override def unparsedEntityDecl(name: String, extID: ExternalID, notat: String) {
+ decls = UnparsedEntityDecl(name, extID, notat) :: decls
+ }
+
+ final override def notationDecl(notat: String, extID: ExternalID) {
+ decls = NotationDecl(notat, extID) :: decls
+ }
+
+ final override def peReference(name: String) {
+ decls = PEReference(name) :: decls
+ }
+
+ /** report a syntax error */
+ def reportValidationError(pos: Int, str: String): Unit
+}
diff --git a/src/xml/scala/xml/parsing/XhtmlEntities.scala b/src/xml/scala/xml/parsing/XhtmlEntities.scala
new file mode 100644
index 0000000000..3683af202c
--- /dev/null
+++ b/src/xml/scala/xml/parsing/XhtmlEntities.scala
@@ -0,0 +1,54 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package parsing
+
+import scala.xml.dtd.{ IntDef, ParsedEntityDecl }
+
+/**
+ * @author (c) David Pollak 2007 WorldWide Conferencing, LLC.
+ *
+ */
+object XhtmlEntities {
+ val entList = List(("quot",34), ("amp",38), ("lt",60), ("gt",62), ("nbsp",160), ("iexcl",161), ("cent",162), ("pound",163), ("curren",164), ("yen",165),
+ ("euro",8364), ("brvbar",166), ("sect",167), ("uml",168), ("copy",169), ("ordf",170), ("laquo",171), ("shy",173), ("reg",174), ("trade",8482),
+ ("macr",175), ("deg",176), ("plusmn",177), ("sup2",178), ("sup3",179), ("acute",180), ("micro",181), ("para",182), ("middot",183), ("cedil",184),
+ ("sup1",185), ("ordm",186), ("raquo",187), ("frac14",188), ("frac12",189), ("frac34",190), ("iquest",191), ("times",215), ("divide",247),
+ ("Agrave",192), ("Aacute",193), ("Acirc",194), ("Atilde",195), ("Auml",196), ("Aring",197), ("AElig",198), ("Ccedil",199), ("Egrave",200),
+ ("Eacute",201), ("Ecirc",202), ("Euml",203), ("Igrave",204), ("Iacute",205), ("Icirc",206), ("Iuml",207), ("ETH",208), ("Ntilde",209),
+ ("Ograve",210), ("Oacute",211), ("Ocirc",212), ("Otilde",213), ("Ouml",214), ("Oslash",216), ("Ugrave",217), ("Uacute",218), ("Ucirc",219),
+ ("Uuml",220), ("Yacute",221), ("THORN",222), ("szlig",223), ("agrave",224), ("aacute",225), ("acirc",226), ("atilde",227), ("auml",228),
+ ("aring",229), ("aelig",230), ("ccedil",231), ("egrave",232), ("eacute",233), ("ecirc",234), ("euml",235), ("igrave",236), ("iacute",237),
+ ("icirc",238), ("iuml",239), ("eth",240), ("ntilde",241), ("ograve",242), ("oacute",243), ("ocirc",244), ("otilde",245), ("ouml",246),
+ ("oslash",248), ("ugrave",249), ("uacute",250), ("ucirc",251), ("uuml",252), ("yacute",253), ("thorn",254), ("yuml",255), ("OElig",338),
+ ("oelig",339), ("Scaron",352), ("scaron",353), ("Yuml",376), ("circ",710), ("ensp",8194), ("emsp",8195), ("zwnj",204), ("zwj",8205), ("lrm",8206),
+ ("rlm",8207), ("ndash",8211), ("mdash",8212), ("lsquo",8216), ("rsquo",8217), ("sbquo",8218), ("ldquo",8220), ("rdquo",8221), ("bdquo",8222),
+ ("dagger",8224), ("Dagger",8225), ("permil",8240), ("lsaquo",8249), ("rsaquo",8250), ("fnof",402), ("bull",8226), ("hellip",8230), ("prime",8242),
+ ("Prime",8243), ("oline",8254), ("frasl",8260), ("weierp",8472), ("image",8465), ("real",8476), ("alefsym",8501), ("larr",8592), ("uarr",8593),
+ ("rarr",8594), ("darr",8495), ("harr",8596), ("crarr",8629), ("lArr",8656), ("uArr",8657), ("rArr",8658), ("dArr",8659), ("hArr",8660),
+ ("forall",8704), ("part",8706), ("exist",8707), ("empty",8709), ("nabla",8711), ("isin",8712), ("notin",8713), ("ni",8715), ("prod",8719),
+ ("sum",8721), ("minus",8722), ("lowast",8727), ("radic",8730), ("prop",8733), ("infin",8734), ("ang",8736), ("and",8743), ("or",8744),
+ ("cap",8745), ("cup",8746), ("int",8747), ("there4",8756), ("sim",8764), ("cong",8773), ("asymp",8776), ("ne",8800), ("equiv",8801), ("le",8804),
+ ("ge",8805), ("sub",8834), ("sup",8835), ("nsub",8836), ("sube",8838), ("supe",8839), ("oplus",8853), ("otimes",8855), ("perp",8869), ("sdot",8901),
+ ("lceil",8968), ("rceil",8969), ("lfloor",8970), ("rfloor",8971), ("lang",9001), ("rang",9002), ("loz",9674), ("spades",9824), ("clubs",9827),
+ ("hearts",9829), ("diams",9830), ("Alpha",913), ("Beta",914), ("Gamma",915), ("Delta",916), ("Epsilon",917), ("Zeta",918), ("Eta",919),
+ ("Theta",920), ("Iota",921), ("Kappa",922), ("Lambda",923), ("Mu",924), ("Nu",925), ("Xi",926), ("Omicron",927), ("Pi",928), ("Rho",929),
+ ("Sigma",931), ("Tau",932), ("Upsilon",933), ("Phi",934), ("Chi",935), ("Psi",936), ("Omega",937), ("alpha",945), ("beta",946), ("gamma",947),
+ ("delta",948), ("epsilon",949), ("zeta",950), ("eta",951), ("theta",952), ("iota",953), ("kappa",954), ("lambda",955), ("mu",956), ("nu",957),
+ ("xi",958), ("omicron",959), ("pi",960), ("rho",961), ("sigmaf",962), ("sigma",963), ("tau",964), ("upsilon",965), ("phi",966), ("chi",967),
+ ("psi",968), ("omega",969), ("thetasym",977), ("upsih",978), ("piv",982))
+
+ val entMap: Map[String, Char] = Map.empty[String, Char] ++ entList.map { case (name, value) => (name, value.toChar)}
+
+ val entities = entList.
+ map { case (name, value) => (name, new ParsedEntityDecl(name, new IntDef(value.toChar.toString)))}
+
+ def apply() = entities
+}
diff --git a/src/xml/scala/xml/parsing/XhtmlParser.scala b/src/xml/scala/xml/parsing/XhtmlParser.scala
new file mode 100644
index 0000000000..6ce5bec8d0
--- /dev/null
+++ b/src/xml/scala/xml/parsing/XhtmlParser.scala
@@ -0,0 +1,31 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package parsing
+
+import scala.io.Source
+
+/** An XML Parser that preserves `CDATA` blocks and knows about
+ * [[scala.xml.parsing.XhtmlEntities]].
+ *
+ * @author (c) David Pollak, 2007 WorldWide Conferencing, LLC.
+ */
+class XhtmlParser(val input: Source) extends ConstructingHandler with MarkupParser with ExternalSources {
+ val preserveWS = true
+ ent ++= XhtmlEntities()
+}
+
+/** Convenience method that instantiates, initializes and runs an `XhtmlParser`.
+ *
+ * @author Burak Emir
+ */
+object XhtmlParser {
+ def apply(source: Source): NodeSeq = new XhtmlParser(source).initialize.document()
+}
diff --git a/src/xml/scala/xml/persistent/CachedFileStorage.scala b/src/xml/scala/xml/persistent/CachedFileStorage.scala
new file mode 100644
index 0000000000..a1489ef3f4
--- /dev/null
+++ b/src/xml/scala/xml/persistent/CachedFileStorage.scala
@@ -0,0 +1,129 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package persistent
+
+import java.io.{ File, FileOutputStream }
+import java.nio.ByteBuffer
+import java.nio.channels.Channels
+import java.lang.Thread
+
+import scala.collection.Iterator
+
+/** Mutable storage of immutable xml trees. Everything is kept in memory,
+ * with a thread periodically checking for changes and writing to file.
+ *
+ * To ensure atomicity, two files are used, `filename1` and `'$'+filename1`.
+ * The implementation switches between the two, deleting the older one
+ * after a complete dump of the database has been written.
+ *
+ * @author Burak Emir
+ */
+abstract class CachedFileStorage(private val file1: File) extends Thread {
+
+ private val file2 = new File(file1.getParent, file1.getName+"$")
+
+ /** Either equals `file1` or `file2`, references the next file in which
+ * updates will be stored.
+ */
+ private var theFile: File = null
+
+ private def switch() = { theFile = if (theFile == file1) file2 else file1; }
+
+ /** this storage modified since last modification check */
+ protected var dirty = false
+
+ /** period between modification checks, in milliseconds */
+ protected val interval = 1000
+
+ /** finds and loads the storage file. subclasses should call this method
+ * prior to any other, but only once, to obtain the initial sequence of nodes.
+ */
+ protected def initialNodes: Iterator[Node] = (file1.exists, file2.exists) match {
+ case (false,false) =>
+ theFile = file1
+ Iterator.empty
+ case (true, true ) if (file1.lastModified < file2.lastModified) =>
+ theFile = file2
+ load
+ case (true, _ ) =>
+ theFile = file1
+ load
+ case _ =>
+ theFile = file2
+ load
+ }
+
+ /** returns an iterator over the nodes in this storage */
+ def nodes: Iterator[Node]
+
+ /** adds a node, setting this.dirty to true as a side effect */
+ def += (e: Node): Unit
+
+ /** removes a tree, setting this.dirty to true as a side effect */
+ def -= (e: Node): Unit
+
+ /* loads and parses XML from file */
+ private def load: Iterator[Node] = {
+ import scala.io.Source
+ import scala.xml.parsing.ConstructingParser
+ log("[load]\nloading "+theFile)
+ val src = Source.fromFile(theFile)
+ log("parsing "+theFile)
+ val res = ConstructingParser.fromSource(src,preserveWS = false).document.docElem(0)
+ switch()
+ log("[load done]")
+ res.child.iterator
+ }
+
+ /** saves the XML to file */
+ private def save() = if (this.dirty) {
+ log("[save]\ndeleting "+theFile)
+ theFile.delete()
+ log("creating new "+theFile)
+ theFile.createNewFile()
+ val fos = new FileOutputStream(theFile)
+ val c = fos.getChannel()
+
+ // @todo: optimize
+ val storageNode = <nodes>{ nodes.toList }</nodes>
+ val w = Channels.newWriter(c, "utf-8")
+ XML.write(w, storageNode, "utf-8", xmlDecl = true, doctype = null)
+
+ log("writing to "+theFile)
+
+ w.close
+ c.close
+ fos.close
+ dirty = false
+ switch()
+ log("[save done]")
+ }
+
+ /** Run method of the thread. remember to use `start()` to start a thread,
+ * not `run`. */
+ override def run = {
+ log("[run]\nstarting storage thread, checking every "+interval+" ms")
+ while (true) {
+ Thread.sleep( this.interval.toLong )
+ save()
+ }
+ }
+
+ /** Force writing of contents to the file, even if there has not been any
+ * update. */
+ def flush() = {
+ this.dirty = true
+ save()
+ }
+
+ @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11")
+ def log(msg: String): Unit = {}
+}
diff --git a/src/xml/scala/xml/persistent/Index.scala b/src/xml/scala/xml/persistent/Index.scala
new file mode 100644
index 0000000000..9ee45e7086
--- /dev/null
+++ b/src/xml/scala/xml/persistent/Index.scala
@@ -0,0 +1,17 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package persistent
+
+/** an Index returns some unique key that is part of a node
+ */
+abstract class Index[A] extends Function1[Node,A] {}
diff --git a/src/xml/scala/xml/persistent/SetStorage.scala b/src/xml/scala/xml/persistent/SetStorage.scala
new file mode 100644
index 0000000000..8db56a2e71
--- /dev/null
+++ b/src/xml/scala/xml/persistent/SetStorage.scala
@@ -0,0 +1,42 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+package scala
+package xml
+package persistent
+
+import scala.collection.mutable
+import java.io.File
+
+/** A persistent store with set semantics. This class allows to add and remove
+ * trees, but never contains two structurally equal trees.
+ *
+ * @author Burak Emir
+ */
+class SetStorage(file: File) extends CachedFileStorage(file) {
+
+ private val theSet = mutable.HashSet[Node]()
+
+ // initialize
+
+ {
+ val it = super.initialNodes
+ dirty = it.hasNext
+ theSet ++= it
+ }
+
+ /* forwarding methods to hashset*/
+
+ def += (e: Node): Unit = synchronized { this.dirty = true; theSet += e }
+
+ def -= (e: Node): Unit = synchronized { this.dirty = true; theSet -= e }
+
+ def nodes = synchronized { theSet.iterator }
+
+}
diff --git a/src/xml/scala/xml/pull/XMLEvent.scala b/src/xml/scala/xml/pull/XMLEvent.scala
new file mode 100644
index 0000000000..3beb3648e7
--- /dev/null
+++ b/src/xml/scala/xml/pull/XMLEvent.scala
@@ -0,0 +1,60 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package pull
+
+/** An XML event for pull parsing. All events received during
+ * parsing will be one of the subclasses of this trait.
+ */
+trait XMLEvent
+
+/**
+ * An Element's start tag was encountered.
+ * @param pre prefix, if any, on the element. This is the `xs` in `<xs:string>foo</xs:string>`.
+ * @param label the name of the element, not including the prefix
+ * @param attrs any attributes on the element
+ */
+case class EvElemStart(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding) extends XMLEvent
+
+/**
+ * An Element's end tag was encountered.
+ * @param pre prefix, if any, on the element. This is the `xs` in `<xs:string>foo</xs:string>`.
+ * @param label the name of the element, not including the prefix
+ */
+case class EvElemEnd(pre: String, label: String) extends XMLEvent
+
+/**
+ * A text node was encountered.
+ * @param text the text that was found
+ */
+case class EvText(text: String) extends XMLEvent
+
+/** An entity reference was encountered.
+ * @param entity the name of the entity, e.g. `gt` when encountering the entity `&gt;`
+ */
+case class EvEntityRef(entity: String) extends XMLEvent
+
+/**
+ * A processing instruction was encountered.
+ * @param target the "PITarget" of the processing instruction. For the instruction `<?foo bar="baz"?>`, the target would
+ * be `foo`
+ * @param text the remainder of the instruction. For the instruction `<?foo bar="baz"?>`, the text would
+ * be `bar="baz"`
+ * @see [[http://www.w3.org/TR/REC-xml/#sec-pi]]
+ */
+case class EvProcInstr(target: String, text: String) extends XMLEvent
+
+/**
+ * A comment was encountered
+ * @param text the text of the comment
+ */
+case class EvComment(text: String) extends XMLEvent
diff --git a/src/xml/scala/xml/pull/XMLEventReader.scala b/src/xml/scala/xml/pull/XMLEventReader.scala
new file mode 100755
index 0000000000..76e51e17fd
--- /dev/null
+++ b/src/xml/scala/xml/pull/XMLEventReader.scala
@@ -0,0 +1,157 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package pull
+
+import scala.io.Source
+import java.lang.Thread
+import java.util.concurrent.LinkedBlockingQueue
+import java.nio.channels.ClosedChannelException
+import scala.xml.parsing.{ ExternalSources, MarkupHandler, MarkupParser }
+
+/**
+ * Main entry point into creating an event-based XML parser. Treating this
+ * as a [[scala.collection.Iterator]] will provide access to the generated events.
+ * @param src A [[scala.io.Source]] for XML data to parse
+ *
+ * @author Burak Emir
+ * @author Paul Phillips
+ */
+class XMLEventReader(src: Source)
+extends scala.collection.AbstractIterator[XMLEvent]
+ with ProducerConsumerIterator[XMLEvent] {
+
+ // We implement a pull parser as an iterator, but since we may be operating on
+ // a stream (e.g. XML over a network) there may be arbitrarily long periods when
+ // the queue is empty. Fortunately the ProducerConsumerIterator is ideally
+ // suited to this task, possibly because it was written for use by this class.
+
+ // to override as necessary
+ val preserveWS = true
+
+ override val MaxQueueSize = 1000
+ protected case object POISON extends XMLEvent
+ val EndOfStream = POISON
+
+ // thread machinery
+ private[this] val parser = new Parser(src)
+ private[this] val parserThread = new Thread(parser, "XMLEventReader")
+ parserThread.start
+ // enqueueing the poison object is the reliable way to cause the
+ // iterator to terminate; hasNext will return false once it sees it.
+ // Calling interrupt() on the parserThread is the only way we can get
+ // it to stop producing tokens since it's lost deep in document() -
+ // we cross our fingers the interrupt() gets to its target, but if it
+ // fails for whatever reason the iterator correctness is not impacted,
+ // only performance (because it will finish the entire XML document,
+ // or at least as much as it can fit in the queue.)
+ def stop() = {
+ produce(POISON)
+ parserThread.interrupt()
+ }
+
+ private class Parser(val input: Source) extends MarkupHandler with MarkupParser with ExternalSources with Runnable {
+ val preserveWS = XMLEventReader.this.preserveWS
+ // track level for elem memory usage optimization
+ private var level = 0
+
+ // this is Parser's way to add to the queue - the odd return type
+ // is to conform to MarkupHandler's interface
+ def setEvent(es: XMLEvent*): NodeSeq = {
+ es foreach produce
+ NodeSeq.Empty
+ }
+
+ override def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding) {
+ level += 1
+ setEvent(EvElemStart(pre, label, attrs, scope))
+ }
+ override def elemEnd(pos: Int, pre: String, label: String) {
+ setEvent(EvElemEnd(pre, label))
+ level -= 1
+ }
+
+ // this is a dummy to satisfy MarkupHandler's API
+ // memory usage optimization return one <ignore/> for top level to satisfy
+ // MarkupParser.document() otherwise NodeSeq.Empty
+ private var ignoreWritten = false
+ final def elem(pos: Int, pre: String, label: String, attrs: MetaData, pscope: NamespaceBinding, empty: Boolean, nodes: NodeSeq): NodeSeq =
+ if (level == 1 && !ignoreWritten) {ignoreWritten = true; <ignore/> } else NodeSeq.Empty
+
+ def procInstr(pos: Int, target: String, txt: String) = setEvent(EvProcInstr(target, txt))
+ def comment(pos: Int, txt: String) = setEvent(EvComment(txt))
+ def entityRef(pos: Int, n: String) = setEvent(EvEntityRef(n))
+ def text(pos: Int, txt:String) = setEvent(EvText(txt))
+
+ override def run() {
+ curInput = input
+ interruptibly { this.initialize.document() }
+ setEvent(POISON)
+ }
+ }
+}
+
+// An iterator designed for one or more producers to generate
+// elements, and a single consumer to iterate. Iteration will continue
+// until closeIterator() is called, after which point producers
+// calling produce() will receive interruptions.
+//
+// Since hasNext may block indefinitely if nobody is producing,
+// there is also an available() method which will return true if
+// the next call hasNext is guaranteed not to block.
+//
+// This is not thread-safe for multiple consumers!
+trait ProducerConsumerIterator[T >: Null] extends Iterator[T] {
+ // abstract - iterator-specific distinguished object for marking eos
+ val EndOfStream: T
+
+ // defaults to unbounded - override to positive Int if desired
+ val MaxQueueSize = -1
+
+ def interruptibly[T](body: => T): Option[T] = try Some(body) catch {
+ case _: InterruptedException => Thread.currentThread.interrupt(); None
+ case _: ClosedChannelException => None
+ }
+
+ private[this] lazy val queue =
+ if (MaxQueueSize < 0) new LinkedBlockingQueue[T]()
+ else new LinkedBlockingQueue[T](MaxQueueSize)
+ private[this] var buffer: T = _
+ private def fillBuffer() = {
+ buffer = interruptibly(queue.take) getOrElse EndOfStream
+ isElement(buffer)
+ }
+ private def isElement(x: T) = x != null && x != EndOfStream
+ private def eos() = buffer == EndOfStream
+
+ // public producer interface - this is the only method producers call, so
+ // LinkedBlockingQueue's synchronization is all we need.
+ def produce(x: T): Unit = if (!eos) interruptibly(queue put x)
+
+ // consumer/iterator interface - we need not synchronize access to buffer
+ // because we required there to be only one consumer.
+ def hasNext = !eos && (buffer != null || fillBuffer)
+
+ def next() = {
+ if (eos()) throw new NoSuchElementException("ProducerConsumerIterator")
+ if (buffer == null) fillBuffer()
+
+ drainBuffer()
+ }
+
+ def available() = isElement(buffer) || isElement(queue.peek)
+
+ private def drainBuffer() = {
+ assert(!eos)
+ val res = buffer
+ buffer = null
+ res
+ }
+}
diff --git a/src/xml/scala/xml/pull/package.scala b/src/xml/scala/xml/pull/package.scala
new file mode 100644
index 0000000000..0e3019446b
--- /dev/null
+++ b/src/xml/scala/xml/pull/package.scala
@@ -0,0 +1,42 @@
+package scala
+package xml
+
+/**
+ * Classes needed to view an XML document as a series of events. The document
+ * is parsed by an [[scala.xml.pull.XMLEventReader]] instance. You can treat it as
+ * an [[scala.collection.Iterator]] to retrieve the events, which are all
+ * subclasses of [[scala.xml.pull.XMLEvent]].
+ *
+ * {{{
+ * scala> val source = Source.fromString("""<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+ * <?instruction custom value="customvalue"?>
+ * <!DOCTYPE foo [
+ * <!ENTITY bar "BAR">
+ * ]><foo>Hello<!-- this is a comment --><bar>&bar;</bar><bar>&gt;</bar></foo>""")
+ *
+ * source: scala.io.Source = non-empty iterator
+ *
+ * scala> val reader = new XMLEventReader(source)
+ * reader: scala.xml.pull.XMLEventReader = non-empty iterator
+ *
+ * scala> reader.foreach{ println(_) }
+ * EvProcInstr(instruction,custom value="customvalue")
+ * EvText(
+ * )
+ * EvElemStart(null,foo,,)
+ * EvText(Hello)
+ * EvComment( this is a comment )
+ * EvElemStart(null,bar,,)
+ * EvText(BAR)
+ * EvElemEnd(null,bar)
+ * EvElemStart(null,bar,,)
+ * EvEntityRef(gt)
+ * EvElemEnd(null,bar)
+ * EvElemEnd(null,foo)
+ * EvText(
+ *
+ * )
+ *
+ * }}}
+ */
+package object pull
diff --git a/src/xml/scala/xml/transform/BasicTransformer.scala b/src/xml/scala/xml/transform/BasicTransformer.scala
new file mode 100644
index 0000000000..c98339fd67
--- /dev/null
+++ b/src/xml/scala/xml/transform/BasicTransformer.scala
@@ -0,0 +1,60 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package transform
+
+/** A class for XML transformations.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+abstract class BasicTransformer extends Function1[Node,Node]
+{
+ protected def unchanged(n: Node, ns: Seq[Node]) =
+ ns.length == 1 && (ns.head == n)
+
+ /** Call transform(Node) for each node in ns, append results
+ * to NodeBuffer.
+ */
+ def transform(it: Iterator[Node], nb: NodeBuffer): Seq[Node] =
+ it.foldLeft(nb)(_ ++= transform(_)).toSeq
+
+ /** Call transform(Node) to each node in ns, yield ns if nothing changes,
+ * otherwise a new sequence of concatenated results.
+ */
+ def transform(ns: Seq[Node]): Seq[Node] = {
+ val (xs1, xs2) = ns span (n => unchanged(n, transform(n)))
+
+ if (xs2.isEmpty) ns
+ else xs1 ++ transform(xs2.head) ++ transform(xs2.tail)
+ }
+
+ def transform(n: Node): Seq[Node] = {
+ if (n.doTransform) n match {
+ case Group(xs) => Group(transform(xs)) // un-group the hack Group tag
+ case _ =>
+ val ch = n.child
+ val nch = transform(ch)
+
+ if (ch eq nch) n
+ else Elem(n.prefix, n.label, n.attributes, n.scope, nch: _*)
+ }
+ else n
+ }
+
+ def apply(n: Node): Node = {
+ val seq = transform(n)
+ if (seq.length > 1)
+ throw new UnsupportedOperationException("transform must return single node for root")
+ else seq.head
+ }
+}
diff --git a/src/xml/scala/xml/transform/RewriteRule.scala b/src/xml/scala/xml/transform/RewriteRule.scala
new file mode 100644
index 0000000000..1399ee538d
--- /dev/null
+++ b/src/xml/scala/xml/transform/RewriteRule.scala
@@ -0,0 +1,28 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+
+
+package scala
+package xml
+package transform
+
+/** A RewriteRule, when applied to a term, yields either
+ * the result of rewriting the term or the term itself if the rule
+ * is not applied.
+ *
+ * @author Burak Emir
+ * @version 1.0
+ */
+abstract class RewriteRule extends BasicTransformer {
+ /** a name for this rewrite rule */
+ val name = this.toString()
+ override def transform(ns: Seq[Node]): Seq[Node] = super.transform(ns)
+ override def transform(n: Node): Seq[Node] = n
+}
+
diff --git a/src/xml/scala/xml/transform/RuleTransformer.scala b/src/xml/scala/xml/transform/RuleTransformer.scala
new file mode 100644
index 0000000000..3a222ba759
--- /dev/null
+++ b/src/xml/scala/xml/transform/RuleTransformer.scala
@@ -0,0 +1,16 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+package scala
+package xml
+package transform
+
+class RuleTransformer(rules: RewriteRule*) extends BasicTransformer {
+ override def transform(n: Node): Seq[Node] =
+ rules.foldLeft(super.transform(n)) { (res, rule) => rule transform res }
+}