From bb6dc39a5d5c1c2c4ddfb3425be462e0d658c851 Mon Sep 17 00:00:00 2001 From: Burak Emir Date: Fri, 14 Jul 2006 17:56:53 +0000 Subject: added event (pull) parsing --- src/library/scala/xml/Document.scala | 3 +- src/library/scala/xml/NodeSeq.scala | 115 +++++++++++++------ src/library/scala/xml/SpecialNode.scala | 4 +- src/library/scala/xml/parsing/MarkupHandler.scala | 3 +- src/library/scala/xml/parsing/MarkupParser.scala | 32 +++--- src/library/scala/xml/pull/XMLEvent.scala | 21 ++++ src/library/scala/xml/pull/XMLEventReader.scala | 128 ++++++++++++++++++++++ 7 files changed, 250 insertions(+), 56 deletions(-) create mode 100644 src/library/scala/xml/pull/XMLEvent.scala create mode 100644 src/library/scala/xml/pull/XMLEventReader.scala (limited to 'src/library') diff --git a/src/library/scala/xml/Document.scala b/src/library/scala/xml/Document.scala index f1c0fc95cc..ec0b5e2a3c 100644 --- a/src/library/scala/xml/Document.scala +++ b/src/library/scala/xml/Document.scala @@ -15,8 +15,9 @@ package scala.xml; /** A document information item (according to InfoSet spec). The comments * are copied from the Infoset spec, only augmented with some information * on the Scala types for definitions that might have no value. + * also plays the role of an XMLEvent for pull parsing */ -class Document extends NodeSeq { +class Document extends NodeSeq with pull.XMLEvent { /** An ordered list of child information items, in document * order. The list contains exactly one element information item. The diff --git a/src/library/scala/xml/NodeSeq.scala b/src/library/scala/xml/NodeSeq.scala index 16cb6be7b3..a5eeed8251 100644 --- a/src/library/scala/xml/NodeSeq.scala +++ b/src/library/scala/xml/NodeSeq.scala @@ -43,25 +43,43 @@ abstract class NodeSeq extends Seq[Node] { * of all elements of this sequence that are labelled with "foo". * Use \ "_" as a wildcard. The document order is preserved. */ - def \(that: String):NodeSeq = { - var res: NodeSeq = NodeSeq.Empty; - that match { - case "_" => - res = for( val x <- this; val y <- x.child; y.typeTag$ != -1) yield { y } - - case _ if (that.charAt(0) == '@') && (this.length == 1) => - val k = that.substring(1); - val y = this(0); - val v = y.attribute(k); - if( v != null ) { - res = NodeSeq.fromSeq(Seq.single(Text(v))); + def \(that: String): NodeSeq = that match { + case "_" => + var zs:List[Node] = List[Node]() + val it = this.elements + while(it.hasNext) { + val x = it.next + val jt = x.child.elements + while(jt.hasNext) { + val y = jt.next + if(y.typeTag$ != -1) + zs = y::zs } - - case _ => - res = for( val x <- this; val y <- x.child: NodeSeq; y.label == that ) - yield { y } - } - res + } + NodeSeq.fromSeq(zs.reverse) + + case _ if (that.charAt(0) == '@') && (this.length == 1) => + val k = that.substring(1); + val y = this(0); + val v = y.attribute(k); + if( v != null ) { + NodeSeq.fromSeq(Seq.single(Text(v):Node)); + } else + NodeSeq.Empty + + case _ => + var zs:List[Node] = Nil + val it = this.elements + while(it.hasNext) { + val x = it.next + val jt = x.child.elements + while(jt.hasNext) { + val y = jt.next + if(y.label == that) + zs = y::zs + } + } + NodeSeq.fromSeq(zs.reverse) } /** projection function. Similar to XPath, use this \\ 'foo to get a list @@ -70,22 +88,51 @@ abstract class NodeSeq extends Seq[Node] { */ def \\ ( that:String ): NodeSeq = that match { - case "_" => for( val x <- this; - val y <- x.descendant_or_self: NodeSeq; - y.typeTag$ != -1 ) - yield { y } - case _ if that.charAt(0) == '@' => - val attrib = that.substring(1); - (for(val x <- this; - val y <- x.descendant_or_self: NodeSeq; - y.typeTag$ != -1; - val z <- y \ that) - yield { z }):NodeSeq - case _ => for( val x <- this; - val y <- x.descendant_or_self: NodeSeq; - y.typeTag$ != -1; - y.label == that) - yield { y } + case "_" => + var zs:List[Node] = List[Node]() + val it = this.elements + while(it.hasNext) { + val x = it.next + val jt = x.descendant_or_self.elements + while(jt.hasNext) { + val y = jt.next + if(y.typeTag$ != -1) + zs = y::zs + } + } + zs.reverse + + case _ if that.charAt(0) == '@' => + var zs: List[Node] = Nil + val it = this.elements + while(it.hasNext) { + val x = it.next + val jt = x.descendant_or_self.elements + while(jt.hasNext) { + val y = jt.next + if(y.typeTag$ != -1) { + val kt = (y \ that).elements + while(kt.hasNext) { + zs = (kt.next)::zs + } + } + } + } + zs.reverse + + case _ => + var zs:List[Node] = List[Node]() + val it = this.elements + while(it.hasNext) { + val x = it.next + val jt = x.descendant_or_self.elements + while(jt.hasNext) { + val y = jt.next + if(y.typeTag$ != -1 && y.label == that) + zs = y::zs + } + } + zs.reverse } override def toString():String = theSeq.elements.foldLeft ("") { diff --git a/src/library/scala/xml/SpecialNode.scala b/src/library/scala/xml/SpecialNode.scala index b0cd95a2f4..68fafc5def 100644 --- a/src/library/scala/xml/SpecialNode.scala +++ b/src/library/scala/xml/SpecialNode.scala @@ -15,10 +15,10 @@ import scala.runtime.compat.StringBuilder /** <code>SpecialNode</code> is a special XML node which * represents either text (PCDATA), a comment, a PI, or an entity ref. - * + * SpecialNodes also play the role of XMLEvents for pull-parsing. * @author Burak Emir */ -abstract class SpecialNode extends Node { +abstract class SpecialNode extends Node with pull.XMLEvent { /** always empty */ final override def attributes = Null diff --git a/src/library/scala/xml/parsing/MarkupHandler.scala b/src/library/scala/xml/parsing/MarkupHandler.scala index d4c00a25b6..ab7c3799a2 100644 --- a/src/library/scala/xml/parsing/MarkupHandler.scala +++ b/src/library/scala/xml/parsing/MarkupHandler.scala @@ -76,7 +76,8 @@ abstract class MarkupHandler extends AnyRef with Logged { * @param pre the prefix * @param label the local name * @param attrs the attributes (metadata) - */ def elemEnd(pos: int, pre: String, label: String): Unit = {} + */ + def elemEnd(pos: int, pre: String, label: String): Unit = {} /** callback method invoked by MarkupParser after parsing an elementm, * between the elemStart and elemEnd callbacks diff --git a/src/library/scala/xml/parsing/MarkupParser.scala b/src/library/scala/xml/parsing/MarkupParser.scala index c8f3e02507..ceecd6f79e 100644 --- a/src/library/scala/xml/parsing/MarkupParser.scala +++ b/src/library/scala/xml/parsing/MarkupParser.scala @@ -65,6 +65,8 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit var dtd: DTD = null; + protected var doc: Document = null; + var eof: Boolean = false; // @@ -172,10 +174,10 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit def document(): Document = { //Console.println("(DEBUG) document"); + doc = new Document(); this.dtd = null; - var info_prolog: Tuple3[Option[String], Option[String], Option[Boolean]] = - Tuple3(None, None, None); + var info_prolog: Tuple3[Option[String], Option[String], Option[Boolean]] = Tuple3(None, None, None); if ('<' != ch) { reportSyntaxError("< expected"); return null; @@ -186,11 +188,17 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit if ('?' == ch) { //Console.println("[MarkupParser::document] starts with xml declaration"); nextch; - info_prolog = prolog(); + info_prolog = prolog() + doc.version = info_prolog._1 + doc.encoding = info_prolog._2 + doc.standAlone = info_prolog._3 + children = content(TopScope); // DTD handled as side effect } else { //Console.println("[MarkupParser::document] does not start with xml declaration"); + // + val ts = new NodeBuffer(); content1(TopScope, ts); // DTD handled as side effect ts &+ content(TopScope); @@ -216,13 +224,8 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit Console.println(children.toList); } - val doc = new Document(); doc.children = children; doc.docElem = theNode; - doc.version = info_prolog._1; - doc.encoding = info_prolog._2; - doc.standAlone = info_prolog._3; - doc.dtd = this.dtd; return doc } @@ -245,7 +248,6 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit } else { eof = true; ch = 0.asInstanceOf[Char]; - //throw new Exception("this is the end") } } } @@ -269,15 +271,6 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit xToken(it.next); } - /** checks whether next character starts a Scala block, if yes, skip it. - * @return true if next character starts a scala block - def xCheckEmbeddedBlock:Boolean = { - xEmbeddedBlock = - enableEmbeddedExpressions && (ch == '{') && { nextch; ch != '{' }; - return xEmbeddedBlock; - } - */ - /** parse attribute and create namespace scope, metadata * [41] Attributes ::= { S Name Eq AttValue } */ @@ -651,6 +644,9 @@ trait MarkupParser requires (MarkupParser with MarkupHandler) extends AnyRef wit /*override val */decls = handle.decls.reverse; } //this.dtd.initializeEntities(); + if(doc!=null) + doc.dtd = this.dtd + handle.endDTD(n); } diff --git a/src/library/scala/xml/pull/XMLEvent.scala b/src/library/scala/xml/pull/XMLEvent.scala new file mode 100644 index 0000000000..50e657489d --- /dev/null +++ b/src/library/scala/xml/pull/XMLEvent.scala @@ -0,0 +1,21 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2006, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +// $Id$ + + +package scala.xml.pull + +/** represents an XMLEvent for pull parsing + */ +trait XMLEvent { +} + +case class ElemStart(pre: String, label: String, attrs: MetaData, scope:NamespaceBinding) extends XMLEvent + +case class ElemEnd(pre: String, label: String) extends XMLEvent diff --git a/src/library/scala/xml/pull/XMLEventReader.scala b/src/library/scala/xml/pull/XMLEventReader.scala new file mode 100644 index 0000000000..a8888a8b86 --- /dev/null +++ b/src/library/scala/xml/pull/XMLEventReader.scala @@ -0,0 +1,128 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2006, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +// $Id$ + + +package scala.xml.pull + +import scala.io.Source +import scala.xml.parsing.{MarkupParser, MarkupHandler,ExternalSources} + +/** a pull parser that offers to view an XML document as a series of events. + * Please note that this API might change. Here's how to use this class + * +
+import scala.xml._
+import scala.xml.pull._
+import scala.io.Source
+
+object reader {
+  val src = Source.fromString("")
+  val er = new XMLEventReader().initialize(src)
+
+  def main(args:Array[String]): unit = {
+    Console.println(er.next)
+    Console.println(er.next)
+  }
+}
+
+ */ +class XMLEventReader extends Iterator[XMLEvent] { + + var src:Source = null + def getSource = this.src + def initialize(src:Source): this.type = { + this.src = src + this.parserThread = new Thread(new Parser()) + this.parserThread.start() + this + } + + // -- this part of the class is for communication with the thread + var xmlEvent: XMLEvent = null + var continue: Boolean = true + + def myresume = synchronized { + while(continue) { + wait() + } + continue = true; + notifyAll + } + def getAndClearEvent: XMLEvent = synchronized { + while(xmlEvent == null) { + wait() + } + val r = xmlEvent + xmlEvent = null + r + } + def setEvent(e:XMLEvent) = { + xmlEvent = e; + } + + def doNotify() = synchronized { + XMLEventReader.this.continue = false; + notifyAll() + while(!XMLEventReader.this.continue) wait(); + NodeSeq.Empty + } + + // iterator methods + + def next: XMLEvent = { + myresume; + val r = getAndClearEvent + r + } + + def hasNext = true + + var parserThread: Thread = null + + class Parser extends MarkupHandler with MarkupParser with ExternalSources with Runnable { + + val preserveWS = true + val input = XMLEventReader.this.getSource + + override def elemStart(pos:int, pre: String, label: String, attrs: MetaData, scope:NamespaceBinding):Unit = { + setEvent(ElemStart(pre,label,attrs,scope)); doNotify + } + + override def elemEnd(pos: int, pre: String, label: String): Unit = { + setEvent(ElemEnd(pre,label)); doNotify + } + + final def elem(pos: int, pre: String, label: String, attrs: MetaData, pscope: NamespaceBinding, nodes: NodeSeq): NodeSeq = + NodeSeq.Empty + + def procInstr(pos: Int, target: String, txt: String ) = { + setEvent(ElemStart(null,"comm",null,null)); doNotify + } + + def comment(pos: Int, txt: String ) = { + setEvent(ElemStart(null,"comm",null,null)); doNotify + } + + def entityRef(pos: Int, n: String) = { + setEvent(ElemStart(null,"eref",null,null)); doNotify + } + + def text(pos: Int, txt:String) = { + setEvent(ElemStart(null,"tex",null,null)); doNotify + } + + override def run(): unit = { + curInput = input + this.nextch + doNotify() + this.document() + } + } +} -- cgit v1.2.3