From 7ddd0a60210de7aef99fcdfbab75693f48b78ac9 Mon Sep 17 00:00:00 2001 From: buraq Date: Fri, 23 Jul 2004 13:03:20 +0000 Subject: parsing lib xmlxmlxmlxml --- sources/scala/xml/parsing/AttribValue.scala | 8 ++- .../scala/xml/parsing/ConstructingHandler.scala | 17 +++--- sources/scala/xml/parsing/ConstructingParser.scala | 22 +++++++ sources/scala/xml/parsing/MarkupHandler.scala | 68 ++++++++++++++++++++-- sources/scala/xml/parsing/MarkupParser.scala | 67 ++++++++++++++------- 5 files changed, 147 insertions(+), 35 deletions(-) create mode 100644 sources/scala/xml/parsing/ConstructingParser.scala (limited to 'sources') diff --git a/sources/scala/xml/parsing/AttribValue.scala b/sources/scala/xml/parsing/AttribValue.scala index fbb17241e6..7dc25f2238 100644 --- a/sources/scala/xml/parsing/AttribValue.scala +++ b/sources/scala/xml/parsing/AttribValue.scala @@ -1,6 +1,8 @@ package scala.xml.parsing ; /** a container for attribute values */ -trait AttribValue[A] { - def value: A; -}; +trait AttribValue; + +case class NamespaceDecl(uri: String) extends AttribValue; +case class CDataValue(value: String) extends AttribValue; +case class CustomValue[A](value:A) extends AttribValue; diff --git a/sources/scala/xml/parsing/ConstructingHandler.scala b/sources/scala/xml/parsing/ConstructingHandler.scala index 33b9480241..cf9bf8f787 100644 --- a/sources/scala/xml/parsing/ConstructingHandler.scala +++ b/sources/scala/xml/parsing/ConstructingHandler.scala @@ -3,21 +3,24 @@ package scala.xml.parsing; import scala.collection.immutable.Map ; import scala.collection.mutable ; -/** @todo: make ConstructingMarkupHandler */ -abstract class ConstructingHandler extends MarkupHandler[Node,String] { +/** */ +class ConstructingHandler extends MarkupHandler[Node,String] { - //def attributeCDataValue(pos: int, str:String): AttribValue[String]; - //def attributeEmbedded(pos: int, x:MarkupType): AttribValue[String]; + def attributeCDataValue(pos: int, str:String) = CDataValue(str); - def element(pos: int, label: String, attrMap1: mutable.Map[String,AttribValue[String]], args: mutable.Buffer[Node]) = { + def attributeNamespaceDecl(pos: int, uri: String) = NamespaceDecl(uri); + + def element(pos: int, uri: String, label: String, attrMap1: mutable.Map[String,AttribValue], args: mutable.Buffer[Node]) = { var attrs = new Array[Attribute](attrMap1.size); { var i = 0; val it = attrMap1.elements; while( it.hasNext ) { - val Pair(ke:String, va: AttribValue[String]) = it.next; - attrs( i ) = Attribute("",ke,va.value); + val Pair(ke:String, va: AttribValue) = it.next; + va match { + case CDataValue(str) => attrs( i ) = Attribute("",ke,str); + } i = i + 1; } } diff --git a/sources/scala/xml/parsing/ConstructingParser.scala b/sources/scala/xml/parsing/ConstructingParser.scala new file mode 100644 index 0000000000..cfa92a89f8 --- /dev/null +++ b/sources/scala/xml/parsing/ConstructingParser.scala @@ -0,0 +1,22 @@ +package scala.xml.parsing ; + + +/** an xml parser. parses XML and invokes callback methods of a MarkupHandler + */ +abstract class ConstructingParser extends MarkupParser[Node, String] { + + val handle = new ConstructingHandler(); + + //val enableEmbeddedExpressions: Boolean; + + /** report a syntax error */ + def reportSyntaxError(str: String): Unit = throw FatalError(str); + + /** this method assign the next character to ch and advances in input */ + def nextch: Unit; + + /** this method should assign the first character of the input to ch */ + def init: Unit; + + +} diff --git a/sources/scala/xml/parsing/MarkupHandler.scala b/sources/scala/xml/parsing/MarkupHandler.scala index aacc05ac93..608ae54578 100644 --- a/sources/scala/xml/parsing/MarkupHandler.scala +++ b/sources/scala/xml/parsing/MarkupHandler.scala @@ -1,14 +1,60 @@ package scala.xml.parsing; -import scala.collection.immutable.Map ; +import scala.collection.immutable ; import scala.collection.mutable ; +import scala.collection.Map ; /** @todo: make ConstructingMarkupHandler */ abstract class MarkupHandler[MarkupType, AVType] { - def attributeCDataValue(pos: int, str:String): AttribValue[AVType]; - def attributeEmbedded(pos: int, x:MarkupType): AttribValue[AVType]; - def element(pos: int, label: String, attrMap1: mutable.Map[String,AttribValue[AVType]], args: mutable.Buffer[MarkupType]): MarkupType; + /** a stack of prefix namespace mappings */ + protected val prefixStack = + new mutable.Stack[immutable.Map[String,String]](); + + /** mapping from prefixes to namespaces */ + var namespace: immutable.Map[String,String] = + new immutable.TreeMap[String,String]; + + /** returns prefix of the qualified name if any */ + final def namespacePrefix(name: String): Option[String] = { + val i = name.indexOf(':'); + if( i != -1 ) Some( name.substring(0, i) ) else None + } + + /** removes xmlns attributes from attr as a side effect, and returns a prefix + * map resulting from them + */ + final def namespaceDecl(aMap: mutable.Map[String, AttribValue]): Map[String, String] = { + val setNS = new mutable.HashMap[String, String]; + /* DEBUG */ + val attrIt = aMap.keys; + while( attrIt.hasNext ) { + val z = attrIt.next; + if( z.startsWith("xmlns") ) { + val uri = aMap( z ) match { + case NamespaceDecl(uri1) => uri1; + case _ => throw FatalError("bad namespace declaration"); + } + val i = z.indexOf(':'); + if( i == -1 ) + setNS.update("", uri ); + else { + val zz = z.substring( i+1, z.length() ); + setNS.update( zz, uri ); + } + aMap -= z; + } + } + setNS; + } + + def attributeCDataValue(pos: int, str:String): AttribValue; + def attributeNamespaceDecl(pos: int, uri: String): AttribValue; + + /** be careful to copy everything from attrMap1, as it will change + * @param attrMap1 the attribute map. + */ + def element(pos: int, uri: String, label: String, attrMap1: mutable.Map[String,AttribValue], args: mutable.Buffer[MarkupType]): MarkupType; def charData(pos: Int, txt: String ): MarkupType; def procInstr(pos: Int, target: String, txt: String): MarkupType; @@ -17,4 +63,18 @@ abstract class MarkupHandler[MarkupType, AVType] { def text(pos: Int, txt:String): MarkupType; + + def internal_startPrefixMapping(pref: Map[String, String]) = { + if( !pref.isEmpty ) { + this.prefixStack.push( this.namespace ); + this.namespace incl pref; + } + } + + def internal_endPrefixMapping(pref: Map[String, String]): Unit = { + if( !pref.isEmpty ) { + this.namespace = prefixStack.pop; + } + } + } diff --git a/sources/scala/xml/parsing/MarkupParser.scala b/sources/scala/xml/parsing/MarkupParser.scala index 4bf7a81b2f..6e2c0bfbbe 100644 --- a/sources/scala/xml/parsing/MarkupParser.scala +++ b/sources/scala/xml/parsing/MarkupParser.scala @@ -12,6 +12,10 @@ package scala.xml.parsing; import scala.collection.{ mutable, Map }; import scala.collection.immutable.ListMap; +/** an xml parser. parses XML, invokes callback methods of a MarkupHandler + * and returns whatever the markup handler returns. Use ConstructingParser + * if you just want to parse XML to construct instances of scala.xml.Node. + */ abstract class MarkupParser[MarkupType, AVType] { /** the handler of the markup */ @@ -35,8 +39,15 @@ abstract class MarkupParser[MarkupType, AVType] { /** append Unicode character to name buffer*/ protected def putChar(c: Char) = cbuf.append(c); + protected var aMap: mutable.Map[String,AttribValue] = + new mutable.HashMap[String,AttribValue]; + + final val noChildren = new mutable.ListBuffer[MarkupType]; + //var xEmbeddedBlock = false; + var defaultURI: String = ""; + val lookupURI: mutable.Map[String,String] = new mutable.HashMap[String,String](); /** this method assign the next character to ch and advances in input */ def nextch: Unit; @@ -73,13 +84,13 @@ abstract class MarkupParser[MarkupType, AVType] { * | `{` scalablock `}` */ def xAttributes = { - var aMap = new mutable.HashMap[String, AttribValue[AVType]]; - while (xml.Parsing.isNameStart(ch)) { + aMap.clear; + while( xml.Parsing.isNameStart( ch )) { val key = xName; xEQ; val delim = ch; val pos1 = pos; - val value: AttribValue[AVType] = ch match { + val value:AttribValue = ch match { case '"' | '\'' => nextch; val tmp = xAttributeValue(delim); @@ -125,15 +136,13 @@ abstract class MarkupParser[MarkupType, AVType] { * [40] STag ::= '<' Name { S Attribute } [S] * [44] EmptyElemTag ::= '<' Name { S Attribute } [S] */ - def xTag: Pair[String, mutable.Map[String, AttribValue[AVType]]] = { - val elemName = xName; + protected def xTag: String = { + val elemqName = xName; xSpaceOpt; - val aMap = if (xml.Parsing.isNameStart(ch)) { + if(xml.Parsing.isNameStart( ch )) { xAttributes; - } else { - new mutable.HashMap[String, AttribValue[AVType]](); } - Tuple2(elemName, aMap) + elemqName; } /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>' @@ -292,20 +301,37 @@ abstract class MarkupParser[MarkupType, AVType] { /** '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag * | xmlTag1 '/' '>' */ -def element: MarkupType = { + def element: MarkupType = { + var pref: Map[String, String] = _; var pos1 = pos; - val Tuple2(qname, attrMap) = xTag; - if (ch == '/') { // empty element - xToken('/'); - xToken('>'); - handle.element(pos1, qname, attrMap, new mutable.ListBuffer[MarkupType]); + val qname = xTag; + val ts: mutable.Buffer[MarkupType] = { + if(ch == '/') { // empty element + xToken('/'); + xToken('>'); + pref = handle.namespaceDecl( aMap ); + handle.internal_startPrefixMapping( pref ); + noChildren; + } else { // element with content + xToken('>'); + pref = handle.namespaceDecl( aMap ); + handle.internal_startPrefixMapping( pref ); + val tmp = content; + xEndTag( qname ); + tmp; + } } - else { // handle content - xToken('>'); - val ts = content; - xEndTag(qname); - handle.element(pos1, qname, attrMap, ts) + var name = qname; + val uri = handle.namespacePrefix(qname).match { + case Some(pref) => + name = name.substring(pref.length()+1, name.length()); + handle.namespace( pref ); + case _ => + handle.namespace(""); } + val res = handle.element(pos1, uri, name, aMap, ts ); + handle.internal_endPrefixMapping( pref ); + res } //def xEmbeddedExpr: MarkupType; @@ -330,7 +356,6 @@ def element: MarkupType = { } } - /** scan [S] '=' [S]*/ def xEQ = { xSpaceOpt; xToken('='); xSpaceOpt } -- cgit v1.2.3