From 05ac4be4a37e498a88f35ad6b4e9eb3023c41210 Mon Sep 17 00:00:00 2001 From: buraq Date: Wed, 4 May 2005 17:37:37 +0000 Subject: XML library, DTD validation (changes in parser,... XML library, DTD validation (changes in parser, additions to scala.xml.dtd) --- config/list/library.lst | 5 +- sources/scala/xml/Atom.scala | 38 +++++++ sources/scala/xml/Molecule.scala | 38 +++++++ sources/scala/xml/Node.scala | 3 + sources/scala/xml/PrettyPrinter.scala | 4 +- sources/scala/xml/Text.scala | 15 +-- sources/scala/xml/TextBuffer.scala | 2 +- sources/scala/xml/TypeSymbol.scala | 1 + sources/scala/xml/Utility.scala | 23 ++-- sources/scala/xml/dtd/ContentModel.scala | 99 ++++++++++++----- sources/scala/xml/dtd/DTD.scala | 31 +++++- sources/scala/xml/dtd/Decl.scala | 144 ++++++++++++++++++------- sources/scala/xml/dtd/DtdTypeSymbol.scala | 1 + sources/scala/xml/dtd/ExternalID.scala | 43 ++++++-- sources/scala/xml/dtd/Parser.scala | 51 +++++---- sources/scala/xml/dtd/Tokens.scala | 2 - sources/scala/xml/parsing/FactoryAdapter.scala | 2 +- sources/scala/xml/parsing/MarkupParser.scala | 87 +++++++++++---- sources/scala/xml/parsing/TokenTests.scala | 27 +++-- 19 files changed, 462 insertions(+), 154 deletions(-) create mode 100644 sources/scala/xml/Atom.scala create mode 100644 sources/scala/xml/Molecule.scala create mode 100644 sources/scala/xml/TypeSymbol.scala create mode 100644 sources/scala/xml/dtd/DtdTypeSymbol.scala diff --git a/config/list/library.lst b/config/list/library.lst index 1e86a6ef70..670e328265 100644 --- a/config/list/library.lst +++ b/config/list/library.lst @@ -214,6 +214,7 @@ util/regexp/WordExp.scala util/logging/Logged.scala util/logging/ConsoleLogger.scala +xml/Atom.scala xml/Comment.scala xml/Document.scala xml/Elem.scala @@ -233,6 +234,7 @@ xml/SpecialNode.scala xml/Text.scala xml/TextBuffer.scala xml/TopScope.scala +xml/TypeSymbol.scala xml/UnprefixedAttribute.scala xml/Utility.scala xml/XML.scala @@ -245,7 +247,8 @@ xml/dtd/ExternalID.scala xml/dtd/Parser.scala xml/dtd/Scanner.scala xml/dtd/Tokens.scala -xml/dtd/Validation.scala +xml/dtd/DtdTypeSymbol.scala +#xml/dtd/Validation.scala xml/dtd/ValidationException.scala xml/factory/NodeFactory.scala diff --git a/sources/scala/xml/Atom.scala b/sources/scala/xml/Atom.scala new file mode 100644 index 0000000000..1b44c6b104 --- /dev/null +++ b/sources/scala/xml/Atom.scala @@ -0,0 +1,38 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2004, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +** $Id$ +\* */ + +package scala.xml; + +/** an XML node for text (PCDATA). Used in both non-bound and bound XML + * representations + * @author Burak Emir + * @param text the text contained in this node, may not be null. + */ +class Atom[+A]( val data: A ) extends SpecialNode { + + final override def typeTag$:Int = -1; + + /** the constant "#PCDATA" + */ + def label = "#PCDATA"; + + override def equals(x:Any) = x match { + case s:Atom[A] => data == s.data ; + case _ => false; + } + + /** hashcode for this Text */ + override def hashCode() = + data.hashCode(); + + /** returns text, with some characters escaped according to XML spec */ + def toString(sb:StringBuffer) = + Utility.escape( data.toString(), sb ); + +} diff --git a/sources/scala/xml/Molecule.scala b/sources/scala/xml/Molecule.scala new file mode 100644 index 0000000000..b1c965eadc --- /dev/null +++ b/sources/scala/xml/Molecule.scala @@ -0,0 +1,38 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2004, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +** $Id$ +\* */ + +package scala.xml; + +/** an XML node for text (PCDATA). Used in both non-bound and bound XML + * representations + * @author Burak Emir + * @param text the text contained in this node, may not be null. + */ +class Molecule[+A]( val list: List[A] ) extends SpecialNode { + + final override def typeTag$:Int = -1; + + /** the constant "#PCDATA" + */ + def label = "#PCDATA"; + + final override def equals(x:Any) = x match { + case s:Molecule[A] => list == s.list ; + case _ => false; + } + + /** hashcode for this Text */ + override def hashCode() = + list.hashCode(); + + /** returns text, with some characters escaped according to XML spec */ + def toString(sb:StringBuffer) = + sb.append(list.mkString(""," ","")) + +} diff --git a/sources/scala/xml/Node.scala b/sources/scala/xml/Node.scala index 58158eca9f..d9ee68e03c 100644 --- a/sources/scala/xml/Node.scala +++ b/sources/scala/xml/Node.scala @@ -111,4 +111,7 @@ abstract class Node extends NodeSeq { sb.append(label); } + /** returns a type symbol (e.g. DTD, XSD), default null */ + def xmlType(): TypeSymbol = null; + } diff --git a/sources/scala/xml/PrettyPrinter.scala b/sources/scala/xml/PrettyPrinter.scala index aa5181326f..0e48042b48 100644 --- a/sources/scala/xml/PrettyPrinter.scala +++ b/sources/scala/xml/PrettyPrinter.scala @@ -126,7 +126,7 @@ class PrettyPrinter( width:Int, step:Int ) { val it = n.child.elements; while( it.hasNext ) it.next match { - case _:Text[Any] | _:Comment | _:EntityRef | _:ProcInstr => + case _:Atom[Any] | _: Molecule[Any] | _:Comment | _:EntityRef | _:ProcInstr => case _:Node => return true; } return false @@ -135,7 +135,7 @@ class PrettyPrinter( width:Int, step:Int ) { protected def traverse( node:Node, pscope: NamespaceBinding, ind:int ):Unit = { node match { - case _:Text[Any] | _:Comment | _:EntityRef | _:ProcInstr => + case _:Atom[Any] | _:Molecule[Any] | _:Comment | _:EntityRef | _:ProcInstr => makeBox( ind, node.toString() ); case _:Node => diff --git a/sources/scala/xml/Text.scala b/sources/scala/xml/Text.scala index b784ae5b58..5dc569b81c 100644 --- a/sources/scala/xml/Text.scala +++ b/sources/scala/xml/Text.scala @@ -14,7 +14,7 @@ package scala.xml; * @author Burak Emir * @param text the text contained in this node, may not be null. */ -case class Text[+A]( data: A ) extends SpecialNode { +case class Text( _data: String ) extends Atom[String](_data) { if(null == data) throw new java.lang.NullPointerException("tried to construct Text with null"); @@ -23,23 +23,14 @@ case class Text[+A]( data: A ) extends SpecialNode { */ def text = data.toString(); - final override def typeTag$:Int = -1; - - /** the constant "#PCDATA" - */ - def label = "#PCDATA"; - final override def equals(x:Any) = x match { case s:String => s.equals( data.toString() ); - case s:Text[A] => data == s.data ; + case s:Text => data == s.data ; case _ => false; } - /** hashcode for this Text */ - override def hashCode() = data.hashCode(); - /** returns text, with some characters escaped according to XML spec */ - def toString(sb:StringBuffer) = + override def toString(sb:StringBuffer) = Utility.escape( data.toString(), sb ); } diff --git a/sources/scala/xml/TextBuffer.scala b/sources/scala/xml/TextBuffer.scala index ab286ed318..448fd861d5 100644 --- a/sources/scala/xml/TextBuffer.scala +++ b/sources/scala/xml/TextBuffer.scala @@ -39,7 +39,7 @@ class TextBuffer { } /** returns an empty sequence if text is only whitespace */ - def toText:Seq[Text[String]] = { + def toText:Seq[Text] = { var len = sb.length(); /* invariant */ if( len == 0 ) return Nil; diff --git a/sources/scala/xml/TypeSymbol.scala b/sources/scala/xml/TypeSymbol.scala new file mode 100644 index 0000000000..57b46dc98f --- /dev/null +++ b/sources/scala/xml/TypeSymbol.scala @@ -0,0 +1 @@ +package scala.xml; trait TypeSymbol {} diff --git a/sources/scala/xml/Utility.scala b/sources/scala/xml/Utility.scala index a14f7cf8c0..afff9c8317 100644 --- a/sources/scala/xml/Utility.scala +++ b/sources/scala/xml/Utility.scala @@ -18,7 +18,7 @@ import scala.collection.mutable; */ object Utility with parsing.TokenTests { - def view(s: String): Text[String] = Text(s); + def view(s: String): Text = Text(s); /* escapes the characters < > & and " from string */ final def escape(text: String): String = @@ -153,16 +153,25 @@ object Utility with parsing.TokenTests { } */ - def systemLiteralToString(s: String) = { - val sb = new StringBuffer("SYSTEM "); - appendQuoted(s, sb); + def systemLiteralToString(s: String): String = { + val sb = new StringBuffer(); + systemLiteralToString(sb, s); sb.toString(); + } + + def systemLiteralToString(sb: StringBuffer, s: String): StringBuffer = { + sb.append("SYSTEM "); + appendQuoted(s, sb); } - def publicLiteralToString(s: String) = { - val sb = new StringBuffer("PUBLIC "); - sb.append('"').append(s).append('"'); + def publicLiteralToString(s: String): String = { + val sb = new StringBuffer(); + systemLiteralToString(sb, s); sb.toString(); + } + + def publicLiteralToString(sb: StringBuffer, s: String): StringBuffer = { + sb.append("PUBLIC \"").append(s).append('"') } /** diff --git a/sources/scala/xml/dtd/ContentModel.scala b/sources/scala/xml/dtd/ContentModel.scala index a0d8bcf059..42885c3e07 100644 --- a/sources/scala/xml/dtd/ContentModel.scala +++ b/sources/scala/xml/dtd/ContentModel.scala @@ -9,18 +9,9 @@ object ContentModel extends scala.util.regexp.WordExp { override def toString() = "ElemName(\""+name+"\")"; } - case object PCDATA_ extends RegExp { - final val isNullable = false; - override def toString() = "PCDATA_"; - } - - case object ANY_ extends RegExp { - final val isNullable = true; - override def toString() = "ANY_"; - } - - def parse(s: String): RegExp = Parser.parse( s ); + def parse(s: String): ContentModel = Parser.parse( s ); + /* def isMixed(alt: Alt): Boolean = { val it = alt.rs.elements; it.next == PCDATA_ && { @@ -28,6 +19,7 @@ object ContentModel extends scala.util.regexp.WordExp { !it.hasNext } } + */ def getLabels(r: RegExp): scala.collection.Set[String] = { val s = new scala.collection.mutable.HashSet[String](); @@ -54,37 +46,86 @@ object ContentModel extends scala.util.regexp.WordExp { sb.toString(); } - /* precond: rs.length > 1 */ - private def toString(rs: Seq[RegExp], sb: StringBuffer):Unit = { + /* precond: rs.length >= 1 */ + private def toString(rs: Seq[RegExp], sb: StringBuffer, sep: Char): Unit = { val it = rs.elements; - sb.append('('); toString(it.next, sb); for(val z <- it) { - sb.append( ',' ); + sb.append( sep ); toString( z, sb ); } - sb.append( ')' ); } - def toString(r: RegExp, sb:StringBuffer):Unit = { + def toString(c: ContentModel, sb: StringBuffer): StringBuffer = c.match { + + case ANY => + sb.append("ANY"); + + case EMPTY => + sb.append("EMPTY"); + + case PCDATA => + sb.append("(#PCDATA)"); + + case ELEMENTS( r ) => + toString(r, sb) + + case MIXED( r ) => + sb.append("(#PCDATA"); toString(r, sb); sb.append( ')' ) + + } + + def toString(r: RegExp, sb:StringBuffer): StringBuffer = { r match { - case PCDATA_ => sb.append("PCDATA_"); - case ANY_ => sb.append("ANY_"); - case Eps => sb.append("Eps"); + case Eps => + sb + case Sequ(rs @ _*) => - sb.append("Sequ"); - toString(rs, sb); + sb.append( '(' ); toString(rs, sb, ','); sb.append( ')' ); + case Alt(rs @ _*) => - sb.append("Alt"); - toString(rs, sb); + sb.append( '(' ); toString(rs, sb, '|'); sb.append( ')' ); + case Star(r: RegExp) => - sb.append("Star("); - toString(r, sb); - sb.append(')'); + sb.append( '(' ); toString(r, sb); sb.append( ")*" ); + case Letter(ElemName(name)) => - sb.append("Letter(ElemName(\""); sb.append(name); - sb.append("\"))"); + } } } + +sealed abstract class ContentModel { + override def toString(): String = { + val sb = new StringBuffer(); + toString(sb); + sb.toString(); + } + + def toString(sb:StringBuffer): StringBuffer; +} + +case object PCDATA extends ContentModel { + def toString(sb:StringBuffer): StringBuffer = sb.append("(#PCDATA)"); +} +case object EMPTY extends ContentModel { + def toString(sb:StringBuffer): StringBuffer = sb.append("EMPTY"); +} +case object ANY extends ContentModel { + def toString(sb:StringBuffer): StringBuffer = sb.append("ANY"); +} + +case class MIXED(r:ContentModel.RegExp) extends ContentModel { + def toString(sb:StringBuffer): StringBuffer = { + sb.append("(#PCDATA|"); + ContentModel.toString(r, sb); + sb.append(")*"); + } +} + +case class ELEMENTS(r:ContentModel.RegExp) extends ContentModel { + def toString(sb:StringBuffer): StringBuffer = + ContentModel.toString(r, sb); + +} diff --git a/sources/scala/xml/dtd/DTD.scala b/sources/scala/xml/dtd/DTD.scala index 4aef465f45..20a329714f 100644 --- a/sources/scala/xml/dtd/DTD.scala +++ b/sources/scala/xml/dtd/DTD.scala @@ -1,5 +1,7 @@ package scala.xml.dtd; +import scala.collection.mutable.{ HashMap, Map } + /** a document type declaration */ abstract class DTD { @@ -9,15 +11,38 @@ abstract class DTD { def unparsedEntities: Seq[EntityDecl] = Nil; - var decls: List[MarkupDecl] = Nil; + var elem: Map[String, ElemDecl] = + new HashMap[String, ElemDecl](); + + var attr: Map[String, AttListDecl] = + new HashMap[String, AttListDecl](); + + + var decls: List[Decl] = Nil; //def getElemDecl(elem:String): ElemDecl; //def getAttribDecl(elem: String, attr: String): AttrDecl; override def toString() = { - val s = super.toString(); - "[DTD "+s.substring(s.indexOf('@'), s.length())+"]"; + val sb = new StringBuffer(); + sb.append("DTD [\n"); + if(null != externalID) + sb.append(externalID.toString()).append('\n'); + for(val d <- decls) + sb.append(d.toString()).append('\n'); + sb.append("]").toString() } + /** creates fresh type symbols from declarations */ + def createTypeSymbols(): Unit = { + elem.clear; + /* + for(val d <- decl) + d.match { + case ElemDecl(name, contentModel) => + elementType.update(name, new ElementType(name, contentModel) + } + */ + } } diff --git a/sources/scala/xml/dtd/Decl.scala b/sources/scala/xml/dtd/Decl.scala index 538e5101f4..2b662e2c99 100644 --- a/sources/scala/xml/dtd/Decl.scala +++ b/sources/scala/xml/dtd/Decl.scala @@ -10,55 +10,105 @@ package scala.xml.dtd ; -import scala.collection.Map ; - abstract class Decl ; -abstract class MarkupDecl extends Decl ; +abstract class MarkupDecl extends Decl { + + final override def toString(): String = { + toString(new StringBuffer()).toString(); + } + + def toString(sb: StringBuffer): StringBuffer; + +} /** an element declaration */ -case class ElemDecl(name: String, contentModel: ContentModel.RegExp, attList: AttListDecl) extends MarkupDecl { +case class ElemDecl(name: String, contentModel: ContentModel) extends MarkupDecl with DtdTypeSymbol { //def mixed = ; // to do - def setAttList(nAttList:AttListDecl) = - ElemDecl(name, contentModel, nAttList); + def toString(sb: StringBuffer): StringBuffer = { + sb + .append("'); + } + } // ElemDecl -case class AttListDecl(name: String, attrs:List[AttrDecl]) extends MarkupDecl; +case class AttListDecl(name: String, attrs:List[AttrDecl]) extends MarkupDecl with DtdTypeSymbol { + + def toString(sb: StringBuffer): StringBuffer = { + sb + .append("")); + } +} /** an attribute declaration. at this point, the tpe is a string. Future * versions might provide a way to access the attribute types more * directly. */ case class AttrDecl( name:String, tpe:String, default:DefaultDecl ) { - final override def toString() = { - val sb = new StringBuffer("AttrDecl("); - sb.append('"'); - sb.append( name ); - sb.append('"'); - sb.append(','); - sb.append('"'); - sb.append( tpe ); - sb.append('"'); - sb.append(','); - sb.append(default.toString()); - sb.append(')'); - sb.toString(); + + final override def toString(): String = + toString(new StringBuffer()).toString(); + + final def toString(sb: StringBuffer): StringBuffer = { + sb.append(" ").append( name ).append(' ').append( tpe ).append(' '); + default.toString(sb) } + } -class EntityDecl extends MarkupDecl; /** an entity declaration */ +abstract class EntityDecl extends MarkupDecl; -case class ParsedEntityDecl( name:String, entdef:EntityDef ) - extends EntityDecl; +/** a parsed general entity declaration */ +case class ParsedEntityDecl( name:String, entdef:EntityDef ) extends EntityDecl { -case class ParameterEntityDecl(name: String, entdef: EntityDef) - extends EntityDecl; + final def toString(sb: StringBuffer): StringBuffer = { + sb.append("'); + } +} + +/** a parameter entity declaration */ +case class ParameterEntityDecl(name: String, entdef: EntityDef) extends EntityDecl { -class EntityDef; + final def toString(sb: StringBuffer): StringBuffer = { + sb.append("'); + } +} + +/** an unparsed entity declaration */ +case class UnparsedEntityDecl( name:String, extID:ExternalID, notation:String ) extends EntityDecl { + final def toString(sb: StringBuffer): StringBuffer = { + sb.append("'); + } +} +/** a notation declaration */ +case class NotationDecl( name:String, extID:ExternalID ) extends MarkupDecl { + final def toString(sb: StringBuffer): StringBuffer = { + sb.append("" **/ -class ExternalID ; +abstract class ExternalID { + + /** returns "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral */ + override def toString(): String; + + /** returns "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral */ + def toString(sb: StringBuffer): StringBuffer; + +} /** a system identifier * @@ -24,14 +32,18 @@ class ExternalID ; * @param systemLiteral the system identifier literal **/ -case class SystemID( systemLiteral:String ) extends ExternalID { +case class SystemID( systemLiteral:String ) extends ExternalID with parsing.TokenTests{ - if( !Utility.checkSysID( systemLiteral ) ) + if( !checkSysID( systemLiteral ) ) throw new IllegalArgumentException( "can't use both \" and ' in systemLiteral" ); + /** returns " SYSTEM "+systemLiteral */ final override def toString() = Utility.systemLiteralToString( systemLiteral ); + + final def toString(sb: StringBuffer): StringBuffer = + Utility.systemLiteralToString( sb, systemLiteral ); } @@ -39,15 +51,18 @@ case class SystemID( systemLiteral:String ) extends ExternalID { * * @author Burak Emir * @param publicLiteral the public identifier literal - * @param systemLiteral the system identifier literal + * @param systemLiteral (can be null for notation pubIDs) the system identifier literal **/ -case class PublicID( publicLiteral:String, systemLiteral:String ) extends ExternalID { +case class PublicID( publicLiteral:String, systemLiteral:String ) extends ExternalID with parsing.TokenTests{ + //Console.println("constructing PublicID \""+publicLiteral+"\" "+systemLiteral); - if( !Utility.checkPubID( publicLiteral )) + //Console.println("util returns "+checkPubID( publicLiteral )); + + if( !checkPubID( publicLiteral )) throw new IllegalArgumentException( "publicLiteral must consist of PubidChars" ); - if( !Utility.checkSysID( systemLiteral ) ) + if( systemLiteral!= null && !checkSysID( systemLiteral ) ) throw new IllegalArgumentException( "can't use both \" and ' in systemLiteral" ); @@ -62,8 +77,16 @@ case class PublicID( publicLiteral:String, systemLiteral:String ) extends Extern final def child = Nil; /** returns "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral */ - final override def toString() = - Utility.publicLiteralToString( publicLiteral ) - + Utility.systemLiteralToString( systemLiteral ); + final override def toString(): String = { + toString(new StringBuffer()).toString(); + } + /** appends "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral to argument */ + final def toString(sb: StringBuffer): StringBuffer = { + Utility.publicLiteralToString( sb, publicLiteral ).append(' '); + if(systemLiteral!=null) + Utility.systemLiteralToString( sb, systemLiteral ); + else + sb + } } diff --git a/sources/scala/xml/dtd/Parser.scala b/sources/scala/xml/dtd/Parser.scala index 410e82ce06..75ad2def5b 100644 --- a/sources/scala/xml/dtd/Parser.scala +++ b/sources/scala/xml/dtd/Parser.scala @@ -6,7 +6,7 @@ object Parser with Scanner { // a bit too permissive concerning #PCDATA import ContentModel._ ; /** parses the argument to a regexp */ - def parse(s:String):RegExp = { initScanner( s ); contentspec } + def parse(s:String): ContentModel = { initScanner( s ); contentspec } // zzz parser methods zzz def accept( tok:int ) = { @@ -29,32 +29,47 @@ object Parser with Scanner { // a bit too permissive concerning #PCDATA case _ => s } - // contentspec ::= EMPTY|ANY|mixed|regexp - def contentspec:RegExp = token match { - case NAME => - if( value.equals( "ANY" ) ) - ANY_ - else if( value.equals( "EMPTY" ) ) - Eps - else - error("unexpected name:" + value ); + // contentspec ::= EMPTY | ANY | (#PCDATA) | "(#PCDATA|"regexp) + + def contentspec: ContentModel = token match { + case NAME => value.match { + case "ANY" => ANY + case "EMPTY" => EMPTY + case _ => error("expected ANY, EMPTY or '(' instead of " + value ); + } case LPAREN => + nextToken; sOpt; - if( token == TOKEN_PCDATA ) - mixed; - else - regexp; - case _ => error("unexpected token:" + token2string(token) ); - } - + if( token != TOKEN_PCDATA ) + ELEMENTS(regexp); + else { + nextToken; + token match { + case RPAREN => + PCDATA + case CHOICE => + val res = MIXED(choiceRest(Eps)); + sOpt; + accept( RPAREN ); + accept( STAR ); + res + case _ => + error("unexpected token:" + token2string(token) ); + } + } + + case _ => + error("unexpected token:" + token2string(token) ); + } // sopt ::= S? def sOpt = if( token == S ) nextToken; // (' S? mixed ::= '#PCDATA' S? ')' // | '#PCDATA' (S? '|' S? atom)* S? ')*' + /* def mixed = { accept( TOKEN_PCDATA ); sOpt; @@ -72,7 +87,7 @@ object Parser with Scanner { // a bit too permissive concerning #PCDATA Star( t ) } } - +*/ // '(' S? regexp ::= cp S? [seqRest|choiceRest] ')' [ '+' | '*' | '?' ] def regexp:RegExp = { //Console.println("regexp, token = "+token2string(token)); diff --git a/sources/scala/xml/dtd/Tokens.scala b/sources/scala/xml/dtd/Tokens.scala index 458a1b37e4..589e3e4f13 100644 --- a/sources/scala/xml/dtd/Tokens.scala +++ b/sources/scala/xml/dtd/Tokens.scala @@ -6,7 +6,6 @@ class Tokens { final val TOKEN_PCDATA = 0; final val NAME = 1; - final val EMPTY = 2; final val LPAREN = 3; final val RPAREN = 4; final val COMMA = 5; @@ -20,7 +19,6 @@ class Tokens { final def token2string( i:int ):String = i.match { case 0 => "#PCDATA"; case 1 => "NAME"; - case 2 => "EMPTY"; case 3 => "("; case 4 => ")"; case 5 => ","; diff --git a/sources/scala/xml/parsing/FactoryAdapter.scala b/sources/scala/xml/parsing/FactoryAdapter.scala index d634a400d5..e23032ca70 100644 --- a/sources/scala/xml/parsing/FactoryAdapter.scala +++ b/sources/scala/xml/parsing/FactoryAdapter.scala @@ -63,7 +63,7 @@ abstract class FactoryAdapter extends DefaultHandler() { * @param text * @return a new Text node. */ - def createText( text:String ):Text[String]; // abstract + def createText( text:String ):Text; // abstract // // ContentHandler methods diff --git a/sources/scala/xml/parsing/MarkupParser.scala b/sources/scala/xml/parsing/MarkupParser.scala index b21b323f68..3096e5879d 100644 --- a/sources/scala/xml/parsing/MarkupParser.scala +++ b/sources/scala/xml/parsing/MarkupParser.scala @@ -52,6 +52,8 @@ abstract class MarkupParser with TokenTests { */ def prolog(): Tuple3[Option[String], Option[String], Option[Boolean]] = { + //Console.println("(DEBUG) prolog"); + var info_ver: Option[String] = None; var info_enc: Option[String] = None; var info_stdl: Option[Boolean] = None; @@ -113,6 +115,9 @@ abstract class MarkupParser with TokenTests { */ def document(): Document = { + + //Console.println("(DEBUG) document"); + this.dtd = null; var info_prolog: Tuple3[Option[String], Option[String], Option[Boolean]] = Tuple3(None,None,None); @@ -145,6 +150,7 @@ abstract class MarkupParser with TokenTests { reportSyntaxError("document must contain exactly one element"); Console.println(children.toList); } + val doc = new Document(); doc.children = children; doc.docElem = theNode; @@ -439,24 +445,29 @@ abstract class MarkupParser with TokenTests { case 'S' => nextch; xToken("YSTEM"); + xSpace; val sysID = systemLiteral(); new SystemID(sysID); case 'P' => nextch; xToken("UBLIC"); + xSpace; val pubID = pubidLiteral(); xSpace; val sysID = systemLiteral(); new PublicID(pubID, sysID); } + + /** parses document type declaration and assigns it to instance variable * dtd. * * <! parseDTD ::= DOCTYPE name ... > */ def parseDTD(): Unit = { // dirty but fast + //Console.println("(DEBUG) parseDTD"); var extID: ExternalID = null; if(this.dtd != null) - reportSyntaxError("unexpected character"); + reportSyntaxError("unexpected character (DOCTYPE already defined"); xToken("DOCTYPE"); xSpace; val n = xName; @@ -469,8 +480,10 @@ abstract class MarkupParser with TokenTests { if('[' == ch) { // internal subset nextch; /* TODO */ - while(']' != ch) - nextch; + //Console.println("hello"); + intSubset(); + //while(']' != ch) + // nextch; // TODO: do the DTD parsing?? ?!?!?!?!! xToken(']'); xSpaceOpt; @@ -478,6 +491,7 @@ abstract class MarkupParser with TokenTests { xToken('>'); this.dtd = new DTD { override var externalID = extID; + override val decls = MarkupParser.this.decls.reverse; } } @@ -631,6 +645,7 @@ abstract class MarkupParser with TokenTests { nextch; while (ch != endch) { putChar(ch); + //Console.println("hello '"+ch+"'"+isPubIDChar(ch)); if(!isPubIDChar(ch)) reportSyntaxError("char '"+ch+"' is not allowed in public id"); nextch; @@ -646,13 +661,15 @@ abstract class MarkupParser with TokenTests { // def intSubset(): Unit = { + //Console.println("(DEBUG) intSubset()"); xSpace; - while(']' != ch) + while(']' != ch) { ch match { case '%' => nextch; decls = PEReference(xName) :: decls; - xToken(';') + xToken(';'); + xSpace; //peReference case '<' => nextch; @@ -682,9 +699,12 @@ abstract class MarkupParser with TokenTests { notationDecl(); } } - case _ => - reportSyntaxError("unexpected character"); + xSpace; + case _ => + reportSyntaxError("unexpected character '"+ch+"'"); + nextch; } + } } /** <! element := ELEMENT @@ -702,33 +722,40 @@ abstract class MarkupParser with TokenTests { val cmstr = cbuf.toString(); cbuf.setLength( 0 ); val cm = ContentModel.parse(cmstr); - decls = ElemDecl(n, cm, null)::decls; + decls = ElemDecl(n, cm)::decls; } - /** <! element := ELEMENT + /** <! attlist := ATTLIST */ def attrDecl() = { xToken("TTLIST"); xSpace; val n = xName; + xSpace; var attList: List[AttrDecl] = Nil; // later: find the elemDecl for n while('>' != ch) { - Console.println(""); val aname = xName; + //Console.println("attribute name: "+aname); var defdecl: DefaultDecl = null; xSpace; + // could be enumeration (foo,bar) parse this later :-/ while('"' != ch && '\'' != ch && '#' != ch && '<' != ch) { if(!isSpace(ch)) cbuf.append(ch); nextch; } + val atpe = cbuf.toString(); + cbuf.setLength(0); + //Console.println("attr type: "+atpe); ch match { case '\'' | '"' => val defValue = xAttributeValue(); // default value defdecl = DEFAULT(false, defValue); - case '#' => xName.match { + case '#' => + nextch; + xName.match { case "FIXED" => xSpace; val defValue = xAttributeValue(); // default value @@ -742,7 +769,7 @@ abstract class MarkupParser with TokenTests { } xSpaceOpt; - attList = AttrDecl(xName, cbuf.toString(), defdecl) :: attList; + attList = AttrDecl(aname, atpe, defdecl) :: attList; cbuf.setLength(0); } nextch; @@ -752,23 +779,26 @@ abstract class MarkupParser with TokenTests { /** <! element := ELEMENT */ def entityDecl() = { + //Console.println("entityDecl()"); var isParameterEntity = false; var entdef: EntityDef = null; xToken("NTITY"); xSpace; if('%' == ch) { + nextch; isParameterEntity = true; xSpace; } val n = xName; xSpace; - + //Console.println("hello"); val res = ch match { case 'S' | 'P' => //sy val extID = externalID(); if(isParameterEntity) { - + xSpaceOpt; + xToken('>'); ParameterEntityDecl(n, ExtDef(extID)) } else { // notation? @@ -778,21 +808,27 @@ abstract class MarkupParser with TokenTests { xToken("NDATA"); xSpace; val notat = xName; - xSpace; + xSpaceOpt; + xToken('>'); UnparsedEntityDecl(n, extID, notat); } else - + nextch; ParsedEntityDecl(n, ExtDef(extID)); } case '"' | '\'' => + //Console.println("hello 2"); val av = xAttributeValue(); + xSpaceOpt; + xToken('>'); + //Console.println("hello 3"); if(isParameterEntity) ParameterEntityDecl(n, IntDef(av)) else ParsedEntityDecl(n, IntDef(av)); } + //Console.println("res = "+res); decls = res :: decls; } // entityDecl @@ -803,8 +839,23 @@ abstract class MarkupParser with TokenTests { xSpace; val notat = xName; xSpace; - val extID = externalID(); - xSpace; + val extID = if(ch == 'S') { + externalID(); + } else if(ch == 'P') { + /** PublicID (without system, only used in NOTATION) */ + nextch; + xToken("UBLIC"); + xSpace; + val pubID = pubidLiteral(); + xSpaceOpt; + val sysID = if(ch != '>') + systemLiteral() + else + null; + new PublicID(pubID, sysID); + } else + error("PUBLIC or SYSTEM expected"); + xSpaceOpt; xToken('>'); decls = NotationDecl(notat, extID) :: decls; } diff --git a/sources/scala/xml/parsing/TokenTests.scala b/sources/scala/xml/parsing/TokenTests.scala index c148d484f3..d06660303d 100644 --- a/sources/scala/xml/parsing/TokenTests.scala +++ b/sources/scala/xml/parsing/TokenTests.scala @@ -76,13 +76,17 @@ trait TokenTests { } else false; } - def isPubIDChar( c:Char ) = c match { - case '\u0020' | '\u000D' | '\u000A' => true; - case _ if - ('0' < c && c < '9')||('a' < c && c < 'z')||('A' < c && c < 'Z') => true; - case '-' | '\''| '(' | ')' | '+' | ',' | '.' | '/' | ':' | '=' | - '?' | ';' | '!' | '*' | '#' | '@' | '$' | '_' | '%' => true - case _ => false; + def isPubIDChar( c:Char ) = { + //Console.println("char: '"+c+"'"); + c match { + case '\u0020' | '\u000D' | '\u000A' => true; + case _ if + ('0' <= c && c <= '9')||('a' <= c && c <= 'z')||('A' <= c && c <= 'Z') => true; + case '-' | '\''| '(' | ')' | '+' | ',' | '.' | '/' | ':' | '=' | + '?' | ';' | '!' | '*' | '#' | '@' | '$' | '_' | '%' => true + case _ => //Console.println("false: '"+c+"'"); + false; + } } /** @@ -117,11 +121,16 @@ trait TokenTests { s.indexOf('"') == -1 || s.indexOf('\'') == -1 } - def checkPubID( s:String ):boolean = { + def checkPubID( s:String ): Boolean = { + //Console.println("checkPubID of \""+s+"\""); if( s.length() > 0 ) { val z:Seq[Char] = s; val y = z.elements; - while( y.hasNext && isPubIDChar( y.next ) ){}; + var c = ' '; + while( y.hasNext && isPubIDChar( c ) ){ + //Console.println(c); + c = y.next + }; !y.hasNext } else true } -- cgit v1.2.3