summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBurak Emir <emir@epfl.ch>2006-07-14 17:56:53 +0000
committerBurak Emir <emir@epfl.ch>2006-07-14 17:56:53 +0000
commitbb6dc39a5d5c1c2c4ddfb3425be462e0d658c851 (patch)
tree5bef1a84b60120854608ac1a785e2785174af537 /src
parent90f48c4fbea8ead08e0b1198cbf761b399cfb631 (diff)
downloadscala-bb6dc39a5d5c1c2c4ddfb3425be462e0d658c851.tar.gz
scala-bb6dc39a5d5c1c2c4ddfb3425be462e0d658c851.tar.bz2
scala-bb6dc39a5d5c1c2c4ddfb3425be462e0d658c851.zip
added event (pull) parsing
Diffstat (limited to 'src')
-rw-r--r--src/library/scala/xml/Document.scala3
-rw-r--r--src/library/scala/xml/NodeSeq.scala115
-rw-r--r--src/library/scala/xml/SpecialNode.scala4
-rw-r--r--src/library/scala/xml/parsing/MarkupHandler.scala3
-rw-r--r--src/library/scala/xml/parsing/MarkupParser.scala32
-rw-r--r--src/library/scala/xml/pull/XMLEvent.scala21
-rw-r--r--src/library/scala/xml/pull/XMLEventReader.scala128
7 files changed, 250 insertions, 56 deletions
diff --git a/src/library/scala/xml/Document.scala b/src/library/scala/xml/Document.scala
index f1c0fc95cc..ec0b5e2a3c 100644
--- a/src/library/scala/xml/Document.scala
+++ b/src/library/scala/xml/Document.scala
@@ -15,8 +15,9 @@ package scala.xml;
/** A document information item (according to InfoSet spec). The comments
* are copied from the Infoset spec, only augmented with some information
* on the Scala types for definitions that might have no value.
+ * also plays the role of an XMLEvent for pull parsing
*/
-class Document extends NodeSeq {
+class Document extends NodeSeq with pull.XMLEvent {
/** An ordered list of child information items, in document
* order. The list contains exactly one element information item. The
diff --git a/src/library/scala/xml/NodeSeq.scala b/src/library/scala/xml/NodeSeq.scala
index 16cb6be7b3..a5eeed8251 100644
--- a/src/library/scala/xml/NodeSeq.scala
+++ b/src/library/scala/xml/NodeSeq.scala
@@ -43,25 +43,43 @@ abstract class NodeSeq extends Seq[Node] {
* of all elements of this sequence that are labelled with "foo".
* Use \ "_" as a wildcard. The document order is preserved.
*/
- def \(that: String):NodeSeq = {
- var res: NodeSeq = NodeSeq.Empty;
- that match {
- case "_" =>
- res = for( val x <- this; val y <- x.child; y.typeTag$ != -1) yield { y }
-
- case _ if (that.charAt(0) == '@') && (this.length == 1) =>
- val k = that.substring(1);
- val y = this(0);
- val v = y.attribute(k);
- if( v != null ) {
- res = NodeSeq.fromSeq(Seq.single(Text(v)));
+ def \(that: String): NodeSeq = that match {
+ case "_" =>
+ var zs:List[Node] = List[Node]()
+ val it = this.elements
+ while(it.hasNext) {
+ val x = it.next
+ val jt = x.child.elements
+ while(jt.hasNext) {
+ val y = jt.next
+ if(y.typeTag$ != -1)
+ zs = y::zs
}
-
- case _ =>
- res = for( val x <- this; val y <- x.child: NodeSeq; y.label == that )
- yield { y }
- }
- res
+ }
+ NodeSeq.fromSeq(zs.reverse)
+
+ case _ if (that.charAt(0) == '@') && (this.length == 1) =>
+ val k = that.substring(1);
+ val y = this(0);
+ val v = y.attribute(k);
+ if( v != null ) {
+ NodeSeq.fromSeq(Seq.single(Text(v):Node));
+ } else
+ NodeSeq.Empty
+
+ case _ =>
+ var zs:List[Node] = Nil
+ val it = this.elements
+ while(it.hasNext) {
+ val x = it.next
+ val jt = x.child.elements
+ while(jt.hasNext) {
+ val y = jt.next
+ if(y.label == that)
+ zs = y::zs
+ }
+ }
+ NodeSeq.fromSeq(zs.reverse)
}
/** projection function. Similar to XPath, use this \\ 'foo to get a list
@@ -70,22 +88,51 @@ abstract class NodeSeq extends Seq[Node] {
*/
def \\ ( that:String ): NodeSeq = that match {
- case "_" => for( val x <- this;
- val y <- x.descendant_or_self: NodeSeq;
- y.typeTag$ != -1 )
- yield { y }
- case _ if that.charAt(0) == '@' =>
- val attrib = that.substring(1);
- (for(val x <- this;
- val y <- x.descendant_or_self: NodeSeq;
- y.typeTag$ != -1;
- val z <- y \ that)
- yield { z }):NodeSeq
- case _ => for( val x <- this;
- val y <- x.descendant_or_self: NodeSeq;
- y.typeTag$ != -1;
- y.label == that)
- yield { y }
+ case "_" =>
+ var zs:List[Node] = List[Node]()
+ val it = this.elements
+ while(it.hasNext) {
+ val x = it.next
+ val jt = x.descendant_or_self.elements
+ while(jt.hasNext) {
+ val y = jt.next
+ if(y.typeTag$ != -1)
+ zs = y::zs
+ }
+ }
+ zs.reverse
+
+ case _ if that.charAt(0) == '@' =>
+ var zs: List[Node] = Nil
+ val it = this.elements
+ while(it.hasNext) {
+ val x = it.next
+ val jt = x.descendant_or_self.elements
+ while(jt.hasNext) {
+ val y = jt.next
+ if(y.typeTag$ != -1) {
+ val kt = (y \ that).elements
+ while(kt.hasNext) {
+ zs = (kt.next)::zs
+ }
+ }
+ }
+ }
+ zs.reverse
+
+ case _ =>
+ var zs:List[Node] = List[Node]()
+ val it = this.elements
+ while(it.hasNext) {
+ val x = it.next
+ val jt = x.descendant_or_self.elements
+ while(jt.hasNext) {
+ val y = jt.next
+ if(y.typeTag$ != -1 && y.label == that)
+ zs = y::zs
+ }
+ }
+ zs.reverse
}
override def toString():String = theSeq.elements.foldLeft ("") {
diff --git a/src/library/scala/xml/SpecialNode.scala b/src/library/scala/xml/SpecialNode.scala
index b0cd95a2f4..68fafc5def 100644
--- a/src/library/scala/xml/SpecialNode.scala
+++ b/src/library/scala/xml/SpecialNode.scala
@@ -15,10 +15,10 @@ import scala.runtime.compat.StringBuilder
/** &lt;code&gt;SpecialNode&lt;/code&gt; is a special XML node which
* represents either text (PCDATA), a comment, a PI, or an entity ref.
- *
+ * SpecialNodes also play the role of XMLEvents for pull-parsing.
* @author Burak Emir
*/
-abstract class SpecialNode extends Node {
+abstract class SpecialNode extends Node with pull.XMLEvent {
/** always empty */
final override def attributes = Null
diff --git a/src/library/scala/xml/parsing/MarkupHandler.scala b/src/library/scala/xml/parsing/MarkupHandler.scala
index d4c00a25b6..ab7c3799a2 100644
--- a/src/library/scala/xml/parsing/MarkupHandler.scala
+++ b/src/library/scala/xml/parsing/MarkupHandler.scala
@@ -76,7 +76,8 @@ abstract class MarkupHandler extends AnyRef with Logged {
* @param pre the prefix
* @param label the local name
* @param attrs the attributes (metadata)
- */ def elemEnd(pos: int, pre: String, label: String): Unit = {}
+ */
+ def elemEnd(pos: int, pre: String, label: String): Unit = {}
/** callback method invoked by MarkupParser after parsing an elementm,
* between the elemStart and elemEnd callbacks
diff --git a/src/library/scala/xml/parsing/MarkupParser.scala b/src/library/scala/xml/parsing/MarkupParser.scala
index c8f3e02507..ceecd6f79e 100644
--- a/src/library/scala/xml/parsing/MarkupParser.scala
+++ b/src/library/scala/xml/parsing/MarkupParser.scala
@@ -65,6 +65,8 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit
var dtd: DTD = null;
+ protected var doc: Document = null;
+
var eof: Boolean = false;
//
@@ -172,10 +174,10 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit
def document(): Document = {
//Console.println("(DEBUG) document");
+ doc = new Document();
this.dtd = null;
- var info_prolog: Tuple3[Option[String], Option[String], Option[Boolean]] =
- Tuple3(None, None, None);
+ var info_prolog: Tuple3[Option[String], Option[String], Option[Boolean]] = Tuple3(None, None, None);
if ('<' != ch) {
reportSyntaxError("< expected");
return null;
@@ -186,11 +188,17 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit
if ('?' == ch) {
//Console.println("[MarkupParser::document] starts with xml declaration");
nextch;
- info_prolog = prolog();
+ info_prolog = prolog()
+ doc.version = info_prolog._1
+ doc.encoding = info_prolog._2
+ doc.standAlone = info_prolog._3
+
children = content(TopScope); // DTD handled as side effect
} else {
//Console.println("[MarkupParser::document] does not start with xml declaration");
+ //
+
val ts = new NodeBuffer();
content1(TopScope, ts); // DTD handled as side effect
ts &+ content(TopScope);
@@ -216,13 +224,8 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit
Console.println(children.toList);
}
- val doc = new Document();
doc.children = children;
doc.docElem = theNode;
- doc.version = info_prolog._1;
- doc.encoding = info_prolog._2;
- doc.standAlone = info_prolog._3;
- doc.dtd = this.dtd;
return doc
}
@@ -245,7 +248,6 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit
} else {
eof = true;
ch = 0.asInstanceOf[Char];
- //throw new Exception("this is the end")
}
}
}
@@ -269,15 +271,6 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit
xToken(it.next);
}
- /** checks whether next character starts a Scala block, if yes, skip it.
- * @return true if next character starts a scala block
- def xCheckEmbeddedBlock:Boolean = {
- xEmbeddedBlock =
- enableEmbeddedExpressions && (ch == '{') && { nextch; ch != '{' };
- return xEmbeddedBlock;
- }
- */
-
/** parse attribute and create namespace scope, metadata
* [41] Attributes ::= { S Name Eq AttValue }
*/
@@ -651,6 +644,9 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit
/*override val */decls = handle.decls.reverse;
}
//this.dtd.initializeEntities();
+ if(doc!=null)
+ doc.dtd = this.dtd
+
handle.endDTD(n);
}
diff --git a/src/library/scala/xml/pull/XMLEvent.scala b/src/library/scala/xml/pull/XMLEvent.scala
new file mode 100644
index 0000000000..50e657489d
--- /dev/null
+++ b/src/library/scala/xml/pull/XMLEvent.scala
@@ -0,0 +1,21 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2006, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+// $Id$
+
+
+package scala.xml.pull
+
+/** represents an XMLEvent for pull parsing
+ */
+trait XMLEvent {
+}
+
+case class ElemStart(pre: String, label: String, attrs: MetaData, scope:NamespaceBinding) extends XMLEvent
+
+case class ElemEnd(pre: String, label: String) extends XMLEvent
diff --git a/src/library/scala/xml/pull/XMLEventReader.scala b/src/library/scala/xml/pull/XMLEventReader.scala
new file mode 100644
index 0000000000..a8888a8b86
--- /dev/null
+++ b/src/library/scala/xml/pull/XMLEventReader.scala
@@ -0,0 +1,128 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2006, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+\* */
+
+// $Id$
+
+
+package scala.xml.pull
+
+import scala.io.Source
+import scala.xml.parsing.{MarkupParser, MarkupHandler,ExternalSources}
+
+/** a pull parser that offers to view an XML document as a series of events.
+ * Please note that this API might change. Here's how to use this class
+ *
+<pre>
+import scala.xml._
+import scala.xml.pull._
+import scala.io.Source
+
+object reader {
+ val src = Source.fromString("<hello><world/></hello>")
+ val er = new XMLEventReader().initialize(src)
+
+ def main(args:Array[String]): unit = {
+ Console.println(er.next)
+ Console.println(er.next)
+ }
+}
+</pre>
+ */
+class XMLEventReader extends Iterator[XMLEvent] {
+
+ var src:Source = null
+ def getSource = this.src
+ def initialize(src:Source): this.type = {
+ this.src = src
+ this.parserThread = new Thread(new Parser())
+ this.parserThread.start()
+ this
+ }
+
+ // -- this part of the class is for communication with the thread
+ var xmlEvent: XMLEvent = null
+ var continue: Boolean = true
+
+ def myresume = synchronized {
+ while(continue) {
+ wait()
+ }
+ continue = true;
+ notifyAll
+ }
+ def getAndClearEvent: XMLEvent = synchronized {
+ while(xmlEvent == null) {
+ wait()
+ }
+ val r = xmlEvent
+ xmlEvent = null
+ r
+ }
+ def setEvent(e:XMLEvent) = {
+ xmlEvent = e;
+ }
+
+ def doNotify() = synchronized {
+ XMLEventReader.this.continue = false;
+ notifyAll()
+ while(!XMLEventReader.this.continue) wait();
+ NodeSeq.Empty
+ }
+
+ // iterator methods
+
+ def next: XMLEvent = {
+ myresume;
+ val r = getAndClearEvent
+ r
+ }
+
+ def hasNext = true
+
+ var parserThread: Thread = null
+
+ class Parser extends MarkupHandler with MarkupParser with ExternalSources with Runnable {
+
+ val preserveWS = true
+ val input = XMLEventReader.this.getSource
+
+ override def elemStart(pos:int, pre: String, label: String, attrs: MetaData, scope:NamespaceBinding):Unit = {
+ setEvent(ElemStart(pre,label,attrs,scope)); doNotify
+ }
+
+ override def elemEnd(pos: int, pre: String, label: String): Unit = {
+ setEvent(ElemEnd(pre,label)); doNotify
+ }
+
+ final def elem(pos: int, pre: String, label: String, attrs: MetaData, pscope: NamespaceBinding, nodes: NodeSeq): NodeSeq =
+ NodeSeq.Empty
+
+ def procInstr(pos: Int, target: String, txt: String ) = {
+ setEvent(ElemStart(null,"comm",null,null)); doNotify
+ }
+
+ def comment(pos: Int, txt: String ) = {
+ setEvent(ElemStart(null,"comm",null,null)); doNotify
+ }
+
+ def entityRef(pos: Int, n: String) = {
+ setEvent(ElemStart(null,"eref",null,null)); doNotify
+ }
+
+ def text(pos: Int, txt:String) = {
+ setEvent(ElemStart(null,"tex",null,null)); doNotify
+ }
+
+ override def run(): unit = {
+ curInput = input
+ this.nextch
+ doNotify()
+ this.document()
+ }
+ }
+}