summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorburaq <buraq@epfl.ch>2005-05-25 11:40:22 +0000
committerburaq <buraq@epfl.ch>2005-05-25 11:40:22 +0000
commit8bdf158f08081c4d5c99b1247e204c8766565ebd (patch)
tree146a8c1b2fdddb0095c9e38dcdf408c0cd51762d
parent1af5b9aeedd8bbcc897077bd837b8b0827cf94f8 (diff)
downloadscala-8bdf158f08081c4d5c99b1247e204c8766565ebd.tar.gz
scala-8bdf158f08081c4d5c99b1247e204c8766565ebd.tar.bz2
scala-8bdf158f08081c4d5c99b1247e204c8766565ebd.zip
improved handling of DTDs
-rw-r--r--sources/scala/io/Source.scala23
-rw-r--r--sources/scala/xml/dtd/DTD.scala26
-rw-r--r--sources/scala/xml/dtd/ExternalID.scala30
-rw-r--r--sources/scala/xml/parsing/ConstructingParser.scala39
-rw-r--r--sources/scala/xml/parsing/MarkupHandler.scala44
-rw-r--r--sources/scala/xml/parsing/MarkupParser.scala452
-rw-r--r--sources/scala/xml/parsing/ValidatingMarkupHandler.scala11
7 files changed, 463 insertions, 162 deletions
diff --git a/sources/scala/io/Source.scala b/sources/scala/io/Source.scala
index 3926655d27..65a2bcbff7 100644
--- a/sources/scala/io/Source.scala
+++ b/sources/scala/io/Source.scala
@@ -58,6 +58,11 @@ object Source {
def fromFile(name: String, enc: String): Source =
fromFile( new File( name ), enc);
+ /** creates Source from file with given file: URI
+ */
+ def fromFile(uri: java.net.URI): Source =
+ fromFile(new File(uri));
+
/** creates Source from file, using default character encoding, setting its
* description to filename.
*/
@@ -66,12 +71,7 @@ object Source {
val is = new FileInputStream( file );
is.read( arr );
val s = fromBytes(arr);
- s.descr = new StringBuffer()
- .append( file.getAbsolutePath() )
- .append( File.pathSeparator )
- .append( file.getName() )
- .toString();
- s
+ return setFileDescriptor(file,s);
}
/** creates Source from file, using given character encoding, setting its
@@ -83,8 +83,17 @@ object Source {
is.read( arr );
val s = fromBytes(arr, enc);
s.descr = file.getName();
+ return setFileDescriptor(file,s);
+ }
+
+ def setFileDescriptor(file: File, s: Source): Source = {
+ s.descr = new StringBuffer()
+ .append( "file:" )
+ .append( file.getAbsolutePath() )
+ .toString();
s
}
+
}
/** an iterable representation of source files.
@@ -113,7 +122,7 @@ abstract class Source extends Iterator[Char] {
*/
var ch: Char = _;
- /** description of this source */
+ /** description of this source, default empty */
var descr: String = "";
var nerrors = 0;
diff --git a/sources/scala/xml/dtd/DTD.scala b/sources/scala/xml/dtd/DTD.scala
index 2b13209c39..9cf55f7077 100644
--- a/sources/scala/xml/dtd/DTD.scala
+++ b/sources/scala/xml/dtd/DTD.scala
@@ -12,14 +12,11 @@ abstract class DTD {
def unparsedEntities: Seq[EntityDecl] = Nil;
- var elem: Map[String, ElemDecl] =
- new HashMap[String, ElemDecl]();
+ var elem: Map[String, ElemDecl] = new HashMap[String, ElemDecl]();
- var attr: Map[String, AttListDecl] =
- new HashMap[String, AttListDecl]();
+ var attr: Map[String, AttListDecl] = new HashMap[String, AttListDecl]();
- var ent: Map[String, EntityDecl] =
- new HashMap[String, EntityDecl]();
+ var ent: Map[String, EntityDecl] = new HashMap[String, EntityDecl]();
var decls: List[Decl] = Nil;
@@ -37,24 +34,15 @@ abstract class DTD {
sb.append("]").toString()
}
+ /*
def initializeEntities() = {
for(val x <- decls) x match {
- case y @ ParsedEntityDecl(name, _) => ent.update(name, y);
+ case y @ ParsedEntityDecl(name, _) => ent.update(name, y);
case y @ UnparsedEntityDecl(name, _, _) => ent.update(name, y);
- case y @ ParameterEntityDecl(name, _) => ent.update(name, y);
+ case y @ ParameterEntityDecl(name, _) => ent.update(name, y);
case _ =>
}
}
-
- def replacementText( entityName: String ): Source = {
- ent.get(entityName) match {
- case Some(ParsedEntityDecl(_, IntDef(value))) =>
- Source.fromString(value);
- case Some(_) =>
- Source.fromString("<!-- "+entityName+"; -->");
- case None =>
- Source.fromString("<!-- unknown entity "+entityName+"; -->")
- }
- }
+ */
}
diff --git a/sources/scala/xml/dtd/ExternalID.scala b/sources/scala/xml/dtd/ExternalID.scala
index 5263bea0d8..965d330a99 100644
--- a/sources/scala/xml/dtd/ExternalID.scala
+++ b/sources/scala/xml/dtd/ExternalID.scala
@@ -24,6 +24,8 @@ abstract class ExternalID {
/** returns "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral */
def toString(sb: StringBuffer): StringBuffer;
+ def systemId: String;
+
}
/** a system identifier
@@ -32,18 +34,18 @@ abstract class ExternalID {
* @param systemLiteral the system identifier literal
**/
-case class SystemID( systemLiteral:String ) extends ExternalID with parsing.TokenTests{
+case class SystemID( systemId:String ) extends ExternalID with parsing.TokenTests{
- if( !checkSysID( systemLiteral ) )
+ if( !checkSysID( systemId ) )
throw new IllegalArgumentException(
"can't use both \" and ' in systemLiteral"
);
/** returns " SYSTEM "+systemLiteral */
final override def toString() =
- Utility.systemLiteralToString( systemLiteral );
+ Utility.systemLiteralToString( systemId );
final def toString(sb: StringBuffer): StringBuffer =
- Utility.systemLiteralToString( sb, systemLiteral );
+ Utility.systemLiteralToString( sb, systemId );
}
@@ -53,18 +55,18 @@ case class SystemID( systemLiteral:String ) extends ExternalID with parsing.Toke
* @param publicLiteral the public identifier literal
* @param systemLiteral (can be null for notation pubIDs) the system identifier literal
**/
-case class PublicID( publicLiteral:String, systemLiteral:String ) extends ExternalID with parsing.TokenTests{
+case class PublicID( publicId:String, systemId:String ) extends ExternalID with parsing.TokenTests{
//Console.println("constructing PublicID \""+publicLiteral+"\" "+systemLiteral);
//Console.println("util returns "+checkPubID( publicLiteral ));
- if( !checkPubID( publicLiteral ))
+ if( !checkPubID( publicId ))
throw new IllegalArgumentException(
- "publicLiteral must consist of PubidChars"
+ "publicId must consist of PubidChars"
);
- if( systemLiteral!= null && !checkSysID( systemLiteral ) )
+ if( systemId != null && !checkSysID( systemId ) )
throw new IllegalArgumentException(
- "can't use both \" and ' in systemLiteral"
+ "can't use both \" and ' in systemId"
);
/** the constant "#PI" */
@@ -76,16 +78,16 @@ case class PublicID( publicLiteral:String, systemLiteral:String ) extends Extern
/** always empty */
final def child = Nil;
- /** returns "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral */
+ /** returns "PUBLIC "+publicId+" SYSTEM "+systemId */
final override def toString(): String = {
toString(new StringBuffer()).toString();
}
- /** appends "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral to argument */
+ /** appends "PUBLIC "+publicId+" SYSTEM "+systemId to argument */
final def toString(sb: StringBuffer): StringBuffer = {
- Utility.publicLiteralToString( sb, publicLiteral ).append(' ');
- if(systemLiteral!=null)
- Utility.systemLiteralToString( sb, systemLiteral );
+ Utility.publicLiteralToString( sb, publicId ).append(' ');
+ if(systemId!=null)
+ Utility.systemLiteralToString( sb, systemId );
else
sb
}
diff --git a/sources/scala/xml/parsing/ConstructingParser.scala b/sources/scala/xml/parsing/ConstructingParser.scala
index c525a08c54..7cf3bd6226 100644
--- a/sources/scala/xml/parsing/ConstructingParser.scala
+++ b/sources/scala/xml/parsing/ConstructingParser.scala
@@ -9,7 +9,34 @@
package scala.xml.parsing ;
+import scala.io.Source;
+
object ConstructingParser {
+
+
+ def fromFile(inp: java.io.File, preserveWS: Boolean) = {
+ /* DEBUG
+ val src = Source.fromFile(inp);
+ while(src.hasNext) {
+ Console.print(src.next);
+ if(!src.hasNext) {
+ Console.print("last character!");
+ Console.print(src.ch);
+ }
+ }
+ */
+ val p = new ConstructingParser(Source.fromFile(inp), preserveWS);
+ /*
+ {
+ override def externalSource(systemLiteral: String): Source = {
+ Source.fromFile(new java.io.File(inp.getParent(), systemLiteral));
+ }
+ }
+ */
+ p.nextch;
+ p
+ }
+
def fromSource(inp: scala.io.Source, preserveWS: Boolean) = {
val p = new ConstructingParser(inp, preserveWS);
p.nextch;
@@ -19,12 +46,22 @@ object ConstructingParser {
/** an xml parser. parses XML and invokes callback methods of a MarkupHandler
*/
-class ConstructingParser(inp: scala.io.Source, presWS:Boolean)
+class ConstructingParser(inp: Source, presWS:Boolean)
extends ConstructingHandler
with MarkupParser {
+ override val isValidating = true;
val preserveWS = presWS;
val input = inp;
val handle = this;
+ override def externalSource(systemLiteral: String): Source = {
+ var fileStr = inp.descr;
+ if(inp.descr.startsWith("file:")) {
+ fileStr = inp.descr.substring(5, inp.descr.length());
+ }
+ fileStr = fileStr.substring(0,fileStr.lastIndexOf(java.io.File.separator)+1);
+
+ Source.fromFile(fileStr + systemLiteral);
+ }
}
diff --git a/sources/scala/xml/parsing/MarkupHandler.scala b/sources/scala/xml/parsing/MarkupHandler.scala
index 60b7027ee3..63fb090cc3 100644
--- a/sources/scala/xml/parsing/MarkupHandler.scala
+++ b/sources/scala/xml/parsing/MarkupHandler.scala
@@ -9,15 +9,22 @@
package scala.xml.parsing;
+import scala.io.Source;
+import scala.collection.mutable.{ HashMap, Map }
import scala.xml.dtd._ ;
+import scala.util.logging._;
+
/** class that handles markup - provides callback methods to MarkupParser.
* the default is nonvalidating behaviour
*
* @todo can we ignore more entity declarations (i.e. those with extIDs)?
* @todo expanding entity references
*/
-abstract class MarkupHandler {
+abstract class MarkupHandler with Logged with ConsoleLogger {
+
+ // impl. of Logged
+ //def log(msg:String) = {}
/** returns true is this markup handler is validing */
val isValidating: Boolean = false;
@@ -27,6 +34,21 @@ abstract class MarkupHandler {
var decls: List[scala.xml.dtd.Decl] = Nil;
+ var ent: Map[String, EntityDecl] = new HashMap[String, EntityDecl]();
+
+ def replacementText( entityName: String ): Source = {
+ ent.get(entityName) match {
+ case Some(ParsedEntityDecl(_, IntDef(value))) =>
+ Source.fromString(value);
+ case Some(ParameterEntityDecl(_, IntDef(value))) =>
+ Source.fromString(" "+value+" ");
+ case Some(_) =>
+ Source.fromString("<!-- "+entityName+"; -->");
+ case None =>
+ Source.fromString("<!-- unknown entity "+entityName+"; -->")
+ }
+ }
+
/** callback method invoked by MarkupParser after parsing an element.
*
* @param pos the position in the sourcefile
@@ -60,18 +82,26 @@ abstract class MarkupHandler {
def attListDecl(name: String, attList: List[AttrDecl]): Unit = {}
- def parameterEntityDecl(name: String, edef: EntityDef): Unit = edef match {
- case _:ExtDef if !isValidating =>
- ; // ignore (cf REC-xml 4.4.1)
- case _ =>
- decls = ParameterEntityDecl(name, edef) :: decls;
+ def parameterEntityDecl(name: String, edef: EntityDef): Unit = {
+ //log("parameterEntityDecl("+name+","+edef+")");
+ edef match {
+ case _:ExtDef if !isValidating =>
+ ; // ignore (cf REC-xml 4.4.1)
+ case _ =>
+ val y = ParameterEntityDecl(name, edef);
+ decls = y :: decls;
+ ent.update(name, y);
+ //log("ent.get(..) = "+ent.get(name));
+ }
}
def parsedEntityDecl(name: String, edef: EntityDef): Unit = edef match {
case _:ExtDef if !isValidating =>
; // ignore (cf REC-xml 4.8 and 4.4.1)
case _ =>
- decls = ParsedEntityDecl(name, edef) :: decls;
+ val y = ParsedEntityDecl(name, edef);
+ decls = y :: decls;
+ ent.update(name, y)
}
def unparsedEntityDecl(name: String, extID: ExternalID, notat: String): Unit =
diff --git a/sources/scala/xml/parsing/MarkupParser.scala b/sources/scala/xml/parsing/MarkupParser.scala
index 5c77d39bee..53b9e9e6a4 100644
--- a/sources/scala/xml/parsing/MarkupParser.scala
+++ b/sources/scala/xml/parsing/MarkupParser.scala
@@ -19,11 +19,16 @@ import scala.xml.dtd._ ;
* and returns whatever the markup handler returns. Use
* <code>ConstructingParser</code> if you just want to parse XML to
* construct instances of <code>scala.xml.Node</code>.
+ *
+ * While XML elements are returned, DTD declarations - if handled - are
+ * collected using side-effects.
*/
abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef with TokenTests {
val input: Source;
+ def externalSource(systemLiteral: String): Source;
+
//
// variables, values
//
@@ -39,6 +44,12 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
/** holds the position in the source file */
var pos: Int = _;
+ /* true if reading external sources */
+ var isReadingExternal = false;;
+
+ /* true if reading external subset */
+ var inExtSubSet = false;
+
/** holds temporary values of pos */
var tmppos: Int = _;
@@ -56,6 +67,19 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
// methods
//
+ /** &lt;? prolog ::= xml S ... ?&gt;
+ */
+ def xmlProcInstr(): MetaData = {
+ xToken("xml");
+ xSpace;
+ val Pair(md,scp) = xAttributes(TopScope);
+ if(scp != TopScope)
+ reportSyntaxError("no xmlns definitions here, please.");
+ xToken('?');
+ xToken('>');
+ md
+ }
+
/** &lt;? prolog ::= xml S
*/
def prolog(): Tuple3[Option[String], Option[String], Option[Boolean]] = {
@@ -66,51 +90,75 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
var info_enc: Option[String] = None;
var info_stdl: Option[Boolean] = None;
- xToken("xml");
- xSpace;
- val Pair(md,scp) = xAttributes(TopScope);
- xToken('?');
- xToken('>');
+ var m = xmlProcInstr();
+
xSpace;
- if(TopScope == scp) {
- var m = md;
-
- if (!m.isPrefixed && m.key == "version") {
- if (m.value == "1.0") {
- info_ver = Some("1.0");
- m = m.next;
- } else {
- reportSyntaxError("cannot deal with versions != 1.0");
- }
- } else
- reportSyntaxError("VersionInfo expected!");
-
- if (!m.isPrefixed && m.key == "encoding") {
- val enc = m.value;
- if (!isValidIANAEncoding(enc))
- reportSyntaxError("\"" + enc + "\" is not a valid encoding");
- info_enc = Some(enc);
- m = m.next
+
+ if (!m.isPrefixed && m.key == "version") {
+ if (m.value == "1.0") {
+ info_ver = Some("1.0");
+ m = m.next;
+ } else {
+ reportSyntaxError("cannot deal with versions != 1.0");
}
+ } else
+ reportSyntaxError("VersionInfo expected!");
+
+ if (!m.isPrefixed && m.key == "encoding") {
+ val enc = m.value;
+ if (!isValidIANAEncoding(enc))
+ reportSyntaxError("\"" + enc + "\" is not a valid encoding");
+ info_enc = Some(enc);
+ m = m.next
+ }
- if (!m.isPrefixed && m.key == "standalone") {
- m.value match {
- case "yes" =>
- info_stdl = Some(true);
- case "no" =>
- info_stdl = Some(false);
- case _ =>
- reportSyntaxError("either 'yes' or 'no' expected");
- }
- m = m.next
+ if (!m.isPrefixed && m.key == "standalone") {
+ m.value match {
+ case "yes" =>
+ info_stdl = Some(true);
+ case "no" =>
+ info_stdl = Some(false);
+ case _ =>
+ reportSyntaxError("either 'yes' or 'no' expected");
}
+ m = m.next
+ }
+
+ if (m != Null)
+ reportSyntaxError("VersionInfo EncodingDecl? SDDecl? or '?>' expected!");
+ Tuple3(info_ver,info_enc,info_stdl)
+ }
+
+ /** prolog, but without standalone */
+ def textDecl(): Tuple2[Option[String],Option[String]] = {
+
+ var info_ver: Option[String] = None;
+ var info_enc: Option[String] = None;
+
+ var m = xmlProcInstr();
- if (m != Null)
- reportSyntaxError("VersionInfo EncodingDecl? SDDecl? or '?>' expected!");
+ if (!m.isPrefixed && m.key == "version") {
+ if (m.value == "1.0") {
+ info_ver = Some("1.0");
+ m = m.next;
+ } else {
+ reportSyntaxError("cannot deal with versions != 1.0");
+ }
} else
- reportSyntaxError("no xmlns definitions here, please");
+ reportSyntaxError("VersionInfo expected!");
+
+ if (m != Null && !m.isPrefixed && m.key == "encoding") {
+ val enc = m.value;
+ if (!isValidIANAEncoding(enc))
+ reportSyntaxError("\"" + enc + "\" is not a valid encoding");
+ info_enc = Some(enc);
+ m = m.next
+ }
- Tuple3(info_ver,info_enc,info_stdl)
+ if (m != Null)
+ reportSyntaxError("VersionInfo EncodingDecl? SDDecl? or '?>' expected!");
+
+ Tuple2(info_ver, info_enc);
}
/**
@@ -152,7 +200,7 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
elemCount = elemCount + 2;
case m:Node =>
elemCount = elemCount + 1;
- theNode = m;
+ theNode = m;
}
if (1 != elemCount) {
reportSyntaxError("document must contain exactly one element");
@@ -177,13 +225,21 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
/** this method assign the next character to ch and advances in input */
def nextch: Unit = {
if (curInput.hasNext) {
- ch = input.next;
- pos = input.pos;
+ ch = curInput.next;
+ pos = curInput.pos;
+ } else {
+ //Console.println("nextch, curInput.hasNext == false ") ;
+ //Console.println("nextch, isReadingExternal == "+isReadingExternal);
+ //Console.println("nextch, Nil != inpStack == "+(Nil!=inpStack));
+ if ((!isReadingExternal) && (Nil != inpStack)) {
+ /** for external source, we like to be notified of eof! */
+ pop();
+ } else {
+ eof = true;
+ ch = 0.asInstanceOf[Char];
+ //throw new Exception("this is the end")
+ }
}
- else if (Nil != inpStack)
- pop();
- else
- eof = true;
}
//final val enableEmbeddedExpressions: Boolean = false;
@@ -193,8 +249,10 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
def xToken(that: Char): Unit = {
if (ch == that)
nextch;
- else
+ else {
reportSyntaxError("'" + that + "' expected instead of '" + ch + "'");
+ error("FATAL");
+ }
}
def xToken(that: Seq[Char]): Unit = {
@@ -274,6 +332,24 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
}
+ /** entity value, terminated by either ' or ". value may not contain &lt;.
+ * AttValue ::= `'` { _ } `'`
+ * | `"` { _ } `"`
+ */
+ def xEntityValue(): String = {
+ val endch = ch;
+ nextch;
+ while (ch != endch) {
+ putChar(ch);
+ nextch;
+ }
+ nextch;
+ val str = cbuf.toString();
+ cbuf.setLength(0);
+ str
+ }
+
+
/** parse a start or empty tag.
* [40] STag ::= '&lt;' Name { S Attribute } [S]
* [44] EmptyElemTag ::= '&lt;' Name { S Attribute } [S]
@@ -382,6 +458,26 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
}
}
+ /** '<' content1 ::= ... */
+ def content1(pscope: NamespaceBinding, ts: NodeBuffer): Unit = {
+ ch match {
+ case '!' =>
+ nextch;
+ if ('[' == ch) // CDATA
+ ts + xCharData;
+ else if ('D' == ch) // doctypedecl, parse DTD
+ parseDTD();
+ else // comment
+ ts + xComment;
+ case '?' => // PI
+ nextch;
+ ts + xProcInstr;
+ case _ =>
+ ts + element1(pscope); // child
+ }
+ }
+
+ /** content1 ::= '&lt;' content1 | '&amp;' charref ... */
def content(pscope: NamespaceBinding): NodeSeq = {
var ts = new NodeBuffer;
var exit = eof;
@@ -399,24 +495,10 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
nextch;
//Console.println("after ch = '"+ch+"' line="+scala.io.Position.line(pos)+" pos="+pos);
- ch match {
- case '/' =>
- exit = true; // end tag
- case '!' =>
- nextch;
- if ('[' == ch) // CDATA
- ts + xCharData;
- else if ('D' == ch) // doctypedecl, parse DTD
- parseDTD();
- else // comment
- ts + xComment;
- case '?' => // PI
- nextch;
- ts + xProcInstr;
- case _ =>
- ts + element1(pscope); // child
- }
-
+ if('/' ==ch)
+ exit = true; // end tag
+ else
+ content1(pscope, ts)
//case '{' =>
/* if( xCheckEmbeddedBlock ) {
ts.appendAll(xEmbeddedExpr);
@@ -496,6 +578,44 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
extID = externalID();
xSpace;
}
+
+ /* parse external subset of DTD
+ */
+
+ if(null != extID) {
+ val extSubsetSrc = externalSource( extID.systemId );
+
+ isReadingExternal = true;
+ inExtSubSet = true;
+ /*
+ .indexOf(':') != -1) { // assume URI
+ Source.fromFile(new java.net.URI(extID.systemLiteral));
+ } else {
+ Source.fromFile(extID.systemLiteral);
+ }
+ */
+ //Console.println("I'll print it now");
+ val old = curInput;
+ tmppos = curInput.pos;
+ val oldch = ch;
+ curInput = extSubsetSrc;
+ pos = 0;
+ nextch;
+ extSubset();
+
+ isReadingExternal = false;
+ inExtSubSet = false;
+
+ curInput = old;
+ pos = curInput.pos;
+ ch = curInput.ch;
+ eof = false;
+ //while(extSubsetSrc.hasNext)
+ //Console.print(extSubsetSrc.next);
+
+ //Console.println("returned from external, current ch = "+ch )
+ }
+
if ('[' == ch) { // internal subset
nextch;
/* TODO */
@@ -512,7 +632,7 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
override var externalID = extID;
override val decls = handle.decls.reverse;
}
- this.dtd.initializeEntities();
+ //this.dtd.initializeEntities();
}
def element(pscope: NamespaceBinding): NodeSeq = {
@@ -556,10 +676,10 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
*/
def xName: String = {
if (isNameStart(ch)) {
- do {
+ while (isNameChar(ch)) {
putChar(ch);
nextch;
- } while (isNameChar(ch));
+ }
val n = cbuf.toString().intern();
cbuf.setLength(0);
n
@@ -574,7 +694,7 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
def xEQ = { xSpaceOpt; xToken('='); xSpaceOpt }
/** skip optional space S? */
- def xSpaceOpt = while (isSpace(ch)) { nextch; };
+ def xSpaceOpt = while (isSpace(ch) && !eof) { nextch; };
/** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
def xSpace = {
@@ -681,53 +801,157 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
// dtd parsing
//
- def intSubset(): Unit = {
- //Console.println("(DEBUG) intSubset()");
- xSpace;
- while (']' != ch) {
+ def extSubset(): Unit = {
+ var textdecl:Tuple2[Option[String],Option[String]] = null;
+ if(ch=='<') {
+ nextch;
+ if(ch=='?') {
+ nextch;
+ textdecl = textDecl()
+ } else
+ markupDecl1();
+ }
+ while(curInput.hasNext) {
+ markupDecl();
+ }
+ }
+
+ def markupDecl1() = {
+ def doInclude() = {
+ xToken('['); while(']' != ch) markupDecl(); nextch // ']'
+ }
+ def doIgnore() = {
+ xToken('['); while(']' != ch) nextch; nextch; // ']'
+ }
+ if('?' == ch) {
+ nextch;
+ xProcInstr; // simply ignore processing instructions!
+ } else {
+ xToken('!');
ch match {
- case '%' =>
+ case '-' =>
+ xComment ; // ignore comments
+
+ case 'E' =>
nextch;
- handle.peReference(xName);
- xToken(';');
- xSpace;
- //peReference
- case '<' =>
- nextch;
+ if ('L' == ch) {
+ nextch;
+ elementDecl()
+ } else
+ entityDecl();
- if('?' == ch)
- xProcInstr; // simply ignore processing instructions!
- else {
- xToken('!');
- ch match {
- case '-' =>
- xComment ; // ignore comments
+ case 'A' =>
+ nextch;
+ attrDecl();
- case 'E' =>
- nextch;
- if ('L' == ch) {
- nextch;
- elementDecl()
- } else
- entityDecl();
+ case 'N' =>
+ nextch;
+ notationDecl();
- case 'A' =>
+ case '[' if inExtSubSet =>
+ nextch;
+ xSpaceOpt;
+ ch match {
+ case '%' =>
nextch;
- attrDecl();
-
- case 'N' =>
+ val ent = xName;
+ xToken(';');
+ xSpaceOpt;
+ /*
+ Console.println("hello, pushing!");
+ {
+ val test = replacementText(ent);
+ while(test.hasNext)
+ Console.print(test.next);
+ } */
+ push(ent);
+ xSpaceOpt;
+ //Console.println("hello, getting name");
+ val stmt = xName;
+ //Console.println("hello, got name");
+ xSpaceOpt;
+ //Console.println("how can we be eof = "+eof);
+
+ // eof = true because not external?!
+ if(!eof)
+ error("expected only INCLUDE or IGNORE");
+
+ pop();
+
+
+ //Console.println("hello, popped");
+ stmt.match {
+ // parameter entity
+ case "INCLUDE" =>
+ doInclude();
+ case "IGNORE" =>
+ doIgnore()
+ }
+ case 'I' =>
nextch;
- notationDecl();
+ ch.match {
+ case 'G' =>
+ nextch;
+ xToken("NORE");
+ xSpaceOpt;
+ doIgnore()
+ case 'N' =>
+ nextch;
+ xToken("NCLUDE");
+ doInclude()
+ }
}
- }
- xSpace;
- case _ =>
- reportSyntaxError("unexpected character '"+ch+"'");
- nextch;
+ xToken(']');
+ xToken('>');
+
+ case _ =>
+ curInput.reportError(pos, "unexpected character '"+ch+"', expected some markupdecl");
+ while(ch!='>')
+ nextch;
+
}
}
}
+ def markupDecl(): Unit = ch match {
+ /** parameter entity reference
+ * n-v: just create PE-reference
+ * v: "parse replacementText into NodeBuffer ?"
+ */
+ case '%' =>
+ nextch;
+ val ent = xName;
+ xToken(';');
+ if(!isValidating)
+ handle.peReference(ent);
+ else {
+ //Console.println("pushed entity "+ent);
+ push(ent);
+ }
+ //peReference
+ case '<' =>
+ nextch;
+ markupDecl1();
+
+ case _ if isSpace(ch) =>
+ xSpace;
+ case _ =>
+ //Console.println("still think am reading external: "+isReadingExternal);
+ reportSyntaxError("markupdecl: unexpected character '"+ch+"'");
+ nextch;
+ }
+
+ /** "rec-xml/#ExtSubset" pe references may not occur within markup
+ declarations
+ */
+ def intSubset(): Unit = {
+ //Console.println("(DEBUG) intSubset()");
+ xSpace;
+ while (']' != ch) {
+ markupDecl()
+ }
+ }
+
/** &lt;! element := ELEMENT
*/
def elementDecl(): Unit = {
@@ -837,7 +1061,7 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
}
case '"' | '\'' =>
- val av = xAttributeValue();
+ val av = xEntityValue();
xSpaceOpt;
xToken('>');
if (isParameterEntity)
@@ -887,15 +1111,31 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi
}
def push(entityName:String) = {
+ //Console.println("BEFORE PUSHING "+ch);
+ //Console.println("BEFORE PUSHING "+pos);
+ //Console.println("PUSHING "+entityName);
inpStack = curInput :: inpStack;
- curInput = this.dtd.replacementText(entityName);
+ curInput = replacementText(entityName);
nextch;
}
+ /*
+ def push(src:Source) = {
+ curInput = src;
+ nextch;
+ }
+ */
def pop() = {
+ //Console.println("POPPING");
curInput = inpStack.head;
inpStack = inpStack.tail;
- nextch;
+ ch = curInput.ch;
+ pos = curInput.pos;
+ eof = !curInput.hasNext;
+ //Console.println("returned (popped), current ch = "+ch )
+ //Console.println("POPPING ch now "+ch);
+ //Console.println("POPPING ch now "+pos);
+ //nextch;
}
}
diff --git a/sources/scala/xml/parsing/ValidatingMarkupHandler.scala b/sources/scala/xml/parsing/ValidatingMarkupHandler.scala
index a0ee7f4479..3de932cd1b 100644
--- a/sources/scala/xml/parsing/ValidatingMarkupHandler.scala
+++ b/sources/scala/xml/parsing/ValidatingMarkupHandler.scala
@@ -12,14 +12,9 @@ abstract class ValidatingMarkupHandler extends MarkupHandler {
final override def attListDecl(name: String, attList: List[AttrDecl]): Unit =
decls = AttListDecl( name, attList) :: decls;
- final override def parameterEntityDecl(name: String, edef: EntityDef): Unit =
- decls = ParameterEntityDecl( name, edef) :: decls;
-
- final override def parsedEntityDecl(name: String, edef: EntityDef): Unit =
- decls = ParsedEntityDecl( name, edef) :: decls;
-
- final override def unparsedEntityDecl(name: String, extID: ExternalID, notat: String): Unit =
- decls = UnparsedEntityDecl( name, extID, notat) :: decls;
+ final override def unparsedEntityDecl(name: String, extID: ExternalID, notat: String): Unit = {
+ decls = UnparsedEntityDecl( name, extID, notat) :: decls;
+ }
final override def notationDecl(notat: String, extID: ExternalID): Unit =
decls = NotationDecl( notat, extID) :: decls;