summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorburaq <buraq@epfl.ch>2005-05-04 17:37:37 +0000
committerburaq <buraq@epfl.ch>2005-05-04 17:37:37 +0000
commit05ac4be4a37e498a88f35ad6b4e9eb3023c41210 (patch)
tree94b8ad0831ac51531ac5d2c3698dd47731dc4c29
parentda0dcd118873c9ea86cd98ba1eadc74f8a1bb9e3 (diff)
downloadscala-05ac4be4a37e498a88f35ad6b4e9eb3023c41210.tar.gz
scala-05ac4be4a37e498a88f35ad6b4e9eb3023c41210.tar.bz2
scala-05ac4be4a37e498a88f35ad6b4e9eb3023c41210.zip
XML library, DTD validation (changes in parser,...
XML library, DTD validation (changes in parser, additions to scala.xml.dtd)
-rw-r--r--config/list/library.lst5
-rw-r--r--sources/scala/xml/Atom.scala38
-rw-r--r--sources/scala/xml/Molecule.scala38
-rw-r--r--sources/scala/xml/Node.scala3
-rw-r--r--sources/scala/xml/PrettyPrinter.scala4
-rw-r--r--sources/scala/xml/Text.scala15
-rw-r--r--sources/scala/xml/TextBuffer.scala2
-rw-r--r--sources/scala/xml/TypeSymbol.scala1
-rw-r--r--sources/scala/xml/Utility.scala23
-rw-r--r--sources/scala/xml/dtd/ContentModel.scala99
-rw-r--r--sources/scala/xml/dtd/DTD.scala31
-rw-r--r--sources/scala/xml/dtd/Decl.scala144
-rw-r--r--sources/scala/xml/dtd/DtdTypeSymbol.scala1
-rw-r--r--sources/scala/xml/dtd/ExternalID.scala43
-rw-r--r--sources/scala/xml/dtd/Parser.scala51
-rw-r--r--sources/scala/xml/dtd/Tokens.scala2
-rw-r--r--sources/scala/xml/parsing/FactoryAdapter.scala2
-rw-r--r--sources/scala/xml/parsing/MarkupParser.scala87
-rw-r--r--sources/scala/xml/parsing/TokenTests.scala27
19 files changed, 462 insertions, 154 deletions
diff --git a/config/list/library.lst b/config/list/library.lst
index 1e86a6ef70..670e328265 100644
--- a/config/list/library.lst
+++ b/config/list/library.lst
@@ -214,6 +214,7 @@ util/regexp/WordExp.scala
util/logging/Logged.scala
util/logging/ConsoleLogger.scala
+xml/Atom.scala
xml/Comment.scala
xml/Document.scala
xml/Elem.scala
@@ -233,6 +234,7 @@ xml/SpecialNode.scala
xml/Text.scala
xml/TextBuffer.scala
xml/TopScope.scala
+xml/TypeSymbol.scala
xml/UnprefixedAttribute.scala
xml/Utility.scala
xml/XML.scala
@@ -245,7 +247,8 @@ xml/dtd/ExternalID.scala
xml/dtd/Parser.scala
xml/dtd/Scanner.scala
xml/dtd/Tokens.scala
-xml/dtd/Validation.scala
+xml/dtd/DtdTypeSymbol.scala
+#xml/dtd/Validation.scala
xml/dtd/ValidationException.scala
xml/factory/NodeFactory.scala
diff --git a/sources/scala/xml/Atom.scala b/sources/scala/xml/Atom.scala
new file mode 100644
index 0000000000..1b44c6b104
--- /dev/null
+++ b/sources/scala/xml/Atom.scala
@@ -0,0 +1,38 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2004, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+** $Id$
+\* */
+
+package scala.xml;
+
+/** an XML node for text (PCDATA). Used in both non-bound and bound XML
+ * representations
+ * @author Burak Emir
+ * @param text the text contained in this node, may not be null.
+ */
+class Atom[+A]( val data: A ) extends SpecialNode {
+
+ final override def typeTag$:Int = -1;
+
+ /** the constant "#PCDATA"
+ */
+ def label = "#PCDATA";
+
+ override def equals(x:Any) = x match {
+ case s:Atom[A] => data == s.data ;
+ case _ => false;
+ }
+
+ /** hashcode for this Text */
+ override def hashCode() =
+ data.hashCode();
+
+ /** returns text, with some characters escaped according to XML spec */
+ def toString(sb:StringBuffer) =
+ Utility.escape( data.toString(), sb );
+
+}
diff --git a/sources/scala/xml/Molecule.scala b/sources/scala/xml/Molecule.scala
new file mode 100644
index 0000000000..b1c965eadc
--- /dev/null
+++ b/sources/scala/xml/Molecule.scala
@@ -0,0 +1,38 @@
+/* __ *\
+** ________ ___ / / ___ Scala API **
+** / __/ __// _ | / / / _ | (c) 2003-2004, LAMP/EPFL **
+** __\ \/ /__/ __ |/ /__/ __ | **
+** /____/\___/_/ |_/____/_/ | | **
+** |/ **
+** $Id$
+\* */
+
+package scala.xml;
+
+/** an XML node for text (PCDATA). Used in both non-bound and bound XML
+ * representations
+ * @author Burak Emir
+ * @param text the text contained in this node, may not be null.
+ */
+class Molecule[+A]( val list: List[A] ) extends SpecialNode {
+
+ final override def typeTag$:Int = -1;
+
+ /** the constant "#PCDATA"
+ */
+ def label = "#PCDATA";
+
+ final override def equals(x:Any) = x match {
+ case s:Molecule[A] => list == s.list ;
+ case _ => false;
+ }
+
+ /** hashcode for this Text */
+ override def hashCode() =
+ list.hashCode();
+
+ /** returns text, with some characters escaped according to XML spec */
+ def toString(sb:StringBuffer) =
+ sb.append(list.mkString(""," ",""))
+
+}
diff --git a/sources/scala/xml/Node.scala b/sources/scala/xml/Node.scala
index 58158eca9f..d9ee68e03c 100644
--- a/sources/scala/xml/Node.scala
+++ b/sources/scala/xml/Node.scala
@@ -111,4 +111,7 @@ abstract class Node extends NodeSeq {
sb.append(label);
}
+ /** returns a type symbol (e.g. DTD, XSD), default null */
+ def xmlType(): TypeSymbol = null;
+
}
diff --git a/sources/scala/xml/PrettyPrinter.scala b/sources/scala/xml/PrettyPrinter.scala
index aa5181326f..0e48042b48 100644
--- a/sources/scala/xml/PrettyPrinter.scala
+++ b/sources/scala/xml/PrettyPrinter.scala
@@ -126,7 +126,7 @@ class PrettyPrinter( width:Int, step:Int ) {
val it = n.child.elements;
while( it.hasNext )
it.next match {
- case _:Text[Any] | _:Comment | _:EntityRef | _:ProcInstr =>
+ case _:Atom[Any] | _: Molecule[Any] | _:Comment | _:EntityRef | _:ProcInstr =>
case _:Node => return true;
}
return false
@@ -135,7 +135,7 @@ class PrettyPrinter( width:Int, step:Int ) {
protected def traverse( node:Node, pscope: NamespaceBinding, ind:int ):Unit = {
node match {
- case _:Text[Any] | _:Comment | _:EntityRef | _:ProcInstr =>
+ case _:Atom[Any] | _:Molecule[Any] | _:Comment | _:EntityRef | _:ProcInstr =>
makeBox( ind, node.toString() );
case _:Node =>
diff --git a/sources/scala/xml/Text.scala b/sources/scala/xml/Text.scala
index b784ae5b58..5dc569b81c 100644
--- a/sources/scala/xml/Text.scala
+++ b/sources/scala/xml/Text.scala
@@ -14,7 +14,7 @@ package scala.xml;
* @author Burak Emir
* @param text the text contained in this node, may not be null.
*/
-case class Text[+A]( data: A ) extends SpecialNode {
+case class Text( _data: String ) extends Atom[String](_data) {
if(null == data)
throw new java.lang.NullPointerException("tried to construct Text with null");
@@ -23,23 +23,14 @@ case class Text[+A]( data: A ) extends SpecialNode {
*/
def text = data.toString();
- final override def typeTag$:Int = -1;
-
- /** the constant "#PCDATA"
- */
- def label = "#PCDATA";
-
final override def equals(x:Any) = x match {
case s:String => s.equals( data.toString() );
- case s:Text[A] => data == s.data ;
+ case s:Text => data == s.data ;
case _ => false;
}
- /** hashcode for this Text */
- override def hashCode() = data.hashCode();
-
/** returns text, with some characters escaped according to XML spec */
- def toString(sb:StringBuffer) =
+ override def toString(sb:StringBuffer) =
Utility.escape( data.toString(), sb );
}
diff --git a/sources/scala/xml/TextBuffer.scala b/sources/scala/xml/TextBuffer.scala
index ab286ed318..448fd861d5 100644
--- a/sources/scala/xml/TextBuffer.scala
+++ b/sources/scala/xml/TextBuffer.scala
@@ -39,7 +39,7 @@ class TextBuffer {
}
/** returns an empty sequence if text is only whitespace */
- def toText:Seq[Text[String]] = {
+ def toText:Seq[Text] = {
var len = sb.length(); /* invariant */
if( len == 0 ) return Nil;
diff --git a/sources/scala/xml/TypeSymbol.scala b/sources/scala/xml/TypeSymbol.scala
new file mode 100644
index 0000000000..57b46dc98f
--- /dev/null
+++ b/sources/scala/xml/TypeSymbol.scala
@@ -0,0 +1 @@
+package scala.xml; trait TypeSymbol {}
diff --git a/sources/scala/xml/Utility.scala b/sources/scala/xml/Utility.scala
index a14f7cf8c0..afff9c8317 100644
--- a/sources/scala/xml/Utility.scala
+++ b/sources/scala/xml/Utility.scala
@@ -18,7 +18,7 @@ import scala.collection.mutable;
*/
object Utility with parsing.TokenTests {
- def view(s: String): Text[String] = Text(s);
+ def view(s: String): Text = Text(s);
/* escapes the characters &lt; &gt; &amp; and &quot; from string */
final def escape(text: String): String =
@@ -153,16 +153,25 @@ object Utility with parsing.TokenTests {
}
*/
- def systemLiteralToString(s: String) = {
- val sb = new StringBuffer("SYSTEM ");
- appendQuoted(s, sb);
+ def systemLiteralToString(s: String): String = {
+ val sb = new StringBuffer();
+ systemLiteralToString(sb, s);
sb.toString();
+ }
+
+ def systemLiteralToString(sb: StringBuffer, s: String): StringBuffer = {
+ sb.append("SYSTEM ");
+ appendQuoted(s, sb);
}
- def publicLiteralToString(s: String) = {
- val sb = new StringBuffer("PUBLIC ");
- sb.append('"').append(s).append('"');
+ def publicLiteralToString(s: String): String = {
+ val sb = new StringBuffer();
+ systemLiteralToString(sb, s);
sb.toString();
+ }
+
+ def publicLiteralToString(sb: StringBuffer, s: String): StringBuffer = {
+ sb.append("PUBLIC \"").append(s).append('"')
}
/**
diff --git a/sources/scala/xml/dtd/ContentModel.scala b/sources/scala/xml/dtd/ContentModel.scala
index a0d8bcf059..42885c3e07 100644
--- a/sources/scala/xml/dtd/ContentModel.scala
+++ b/sources/scala/xml/dtd/ContentModel.scala
@@ -9,18 +9,9 @@ object ContentModel extends scala.util.regexp.WordExp {
override def toString() = "ElemName(\""+name+"\")";
}
- case object PCDATA_ extends RegExp {
- final val isNullable = false;
- override def toString() = "PCDATA_";
- }
-
- case object ANY_ extends RegExp {
- final val isNullable = true;
- override def toString() = "ANY_";
- }
-
- def parse(s: String): RegExp = Parser.parse( s );
+ def parse(s: String): ContentModel = Parser.parse( s );
+ /*
def isMixed(alt: Alt): Boolean = {
val it = alt.rs.elements;
it.next == PCDATA_ && {
@@ -28,6 +19,7 @@ object ContentModel extends scala.util.regexp.WordExp {
!it.hasNext
}
}
+ */
def getLabels(r: RegExp): scala.collection.Set[String] = {
val s = new scala.collection.mutable.HashSet[String]();
@@ -54,37 +46,86 @@ object ContentModel extends scala.util.regexp.WordExp {
sb.toString();
}
- /* precond: rs.length > 1 */
- private def toString(rs: Seq[RegExp], sb: StringBuffer):Unit = {
+ /* precond: rs.length >= 1 */
+ private def toString(rs: Seq[RegExp], sb: StringBuffer, sep: Char): Unit = {
val it = rs.elements;
- sb.append('(');
toString(it.next, sb);
for(val z <- it) {
- sb.append( ',' );
+ sb.append( sep );
toString( z, sb );
}
- sb.append( ')' );
}
- def toString(r: RegExp, sb:StringBuffer):Unit = {
+ def toString(c: ContentModel, sb: StringBuffer): StringBuffer = c.match {
+
+ case ANY =>
+ sb.append("ANY");
+
+ case EMPTY =>
+ sb.append("EMPTY");
+
+ case PCDATA =>
+ sb.append("(#PCDATA)");
+
+ case ELEMENTS( r ) =>
+ toString(r, sb)
+
+ case MIXED( r ) =>
+ sb.append("(#PCDATA"); toString(r, sb); sb.append( ')' )
+
+ }
+
+ def toString(r: RegExp, sb:StringBuffer): StringBuffer = {
r match {
- case PCDATA_ => sb.append("PCDATA_");
- case ANY_ => sb.append("ANY_");
- case Eps => sb.append("Eps");
+ case Eps =>
+ sb
+
case Sequ(rs @ _*) =>
- sb.append("Sequ");
- toString(rs, sb);
+ sb.append( '(' ); toString(rs, sb, ','); sb.append( ')' );
+
case Alt(rs @ _*) =>
- sb.append("Alt");
- toString(rs, sb);
+ sb.append( '(' ); toString(rs, sb, '|'); sb.append( ')' );
+
case Star(r: RegExp) =>
- sb.append("Star(");
- toString(r, sb);
- sb.append(')');
+ sb.append( '(' ); toString(r, sb); sb.append( ")*" );
+
case Letter(ElemName(name)) =>
- sb.append("Letter(ElemName(\"");
sb.append(name);
- sb.append("\"))");
+
}
}
}
+
+sealed abstract class ContentModel {
+ override def toString(): String = {
+ val sb = new StringBuffer();
+ toString(sb);
+ sb.toString();
+ }
+
+ def toString(sb:StringBuffer): StringBuffer;
+}
+
+case object PCDATA extends ContentModel {
+ def toString(sb:StringBuffer): StringBuffer = sb.append("(#PCDATA)");
+}
+case object EMPTY extends ContentModel {
+ def toString(sb:StringBuffer): StringBuffer = sb.append("EMPTY");
+}
+case object ANY extends ContentModel {
+ def toString(sb:StringBuffer): StringBuffer = sb.append("ANY");
+}
+
+case class MIXED(r:ContentModel.RegExp) extends ContentModel {
+ def toString(sb:StringBuffer): StringBuffer = {
+ sb.append("(#PCDATA|");
+ ContentModel.toString(r, sb);
+ sb.append(")*");
+ }
+}
+
+case class ELEMENTS(r:ContentModel.RegExp) extends ContentModel {
+ def toString(sb:StringBuffer): StringBuffer =
+ ContentModel.toString(r, sb);
+
+}
diff --git a/sources/scala/xml/dtd/DTD.scala b/sources/scala/xml/dtd/DTD.scala
index 4aef465f45..20a329714f 100644
--- a/sources/scala/xml/dtd/DTD.scala
+++ b/sources/scala/xml/dtd/DTD.scala
@@ -1,5 +1,7 @@
package scala.xml.dtd;
+import scala.collection.mutable.{ HashMap, Map }
+
/** a document type declaration */
abstract class DTD {
@@ -9,15 +11,38 @@ abstract class DTD {
def unparsedEntities: Seq[EntityDecl] = Nil;
- var decls: List[MarkupDecl] = Nil;
+ var elem: Map[String, ElemDecl] =
+ new HashMap[String, ElemDecl]();
+
+ var attr: Map[String, AttListDecl] =
+ new HashMap[String, AttListDecl]();
+
+
+ var decls: List[Decl] = Nil;
//def getElemDecl(elem:String): ElemDecl;
//def getAttribDecl(elem: String, attr: String): AttrDecl;
override def toString() = {
- val s = super.toString();
- "[DTD "+s.substring(s.indexOf('@'), s.length())+"]";
+ val sb = new StringBuffer();
+ sb.append("DTD [\n");
+ if(null != externalID)
+ sb.append(externalID.toString()).append('\n');
+ for(val d <- decls)
+ sb.append(d.toString()).append('\n');
+ sb.append("]").toString()
}
+ /** creates fresh type symbols from declarations */
+ def createTypeSymbols(): Unit = {
+ elem.clear;
+ /*
+ for(val d <- decl)
+ d.match {
+ case ElemDecl(name, contentModel) =>
+ elementType.update(name, new ElementType(name, contentModel)
+ }
+ */
+ }
}
diff --git a/sources/scala/xml/dtd/Decl.scala b/sources/scala/xml/dtd/Decl.scala
index 538e5101f4..2b662e2c99 100644
--- a/sources/scala/xml/dtd/Decl.scala
+++ b/sources/scala/xml/dtd/Decl.scala
@@ -10,55 +10,105 @@
package scala.xml.dtd ;
-import scala.collection.Map ;
-
abstract class Decl ;
-abstract class MarkupDecl extends Decl ;
+abstract class MarkupDecl extends Decl {
+
+ final override def toString(): String = {
+ toString(new StringBuffer()).toString();
+ }
+
+ def toString(sb: StringBuffer): StringBuffer;
+
+}
/** an element declaration
*/
-case class ElemDecl(name: String, contentModel: ContentModel.RegExp, attList: AttListDecl) extends MarkupDecl {
+case class ElemDecl(name: String, contentModel: ContentModel) extends MarkupDecl with DtdTypeSymbol {
//def mixed = ; // to do
- def setAttList(nAttList:AttListDecl) =
- ElemDecl(name, contentModel, nAttList);
+ def toString(sb: StringBuffer): StringBuffer = {
+ sb
+ .append("<!ELEMENT ")
+ .append(name)
+ .append(' ');
+
+ ContentModel.toString(contentModel, sb);
+ sb.append('>');
+ }
+
} // ElemDecl
-case class AttListDecl(name: String, attrs:List[AttrDecl]) extends MarkupDecl;
+case class AttListDecl(name: String, attrs:List[AttrDecl]) extends MarkupDecl with DtdTypeSymbol {
+
+ def toString(sb: StringBuffer): StringBuffer = {
+ sb
+ .append("<!ATTLIST ")
+ .append(name)
+ .append('\n')
+ .append(attrs.mkString("","\n",">"));
+ }
+}
/** an attribute declaration. at this point, the tpe is a string. Future
* versions might provide a way to access the attribute types more
* directly.
*/
case class AttrDecl( name:String, tpe:String, default:DefaultDecl ) {
- final override def toString() = {
- val sb = new StringBuffer("AttrDecl(");
- sb.append('"');
- sb.append( name );
- sb.append('"');
- sb.append(',');
- sb.append('"');
- sb.append( tpe );
- sb.append('"');
- sb.append(',');
- sb.append(default.toString());
- sb.append(')');
- sb.toString();
+
+ final override def toString(): String =
+ toString(new StringBuffer()).toString();
+
+ final def toString(sb: StringBuffer): StringBuffer = {
+ sb.append(" ").append( name ).append(' ').append( tpe ).append(' ');
+ default.toString(sb)
}
+
}
-class EntityDecl extends MarkupDecl;
/** an entity declaration */
+abstract class EntityDecl extends MarkupDecl;
-case class ParsedEntityDecl( name:String, entdef:EntityDef )
- extends EntityDecl;
+/** a parsed general entity declaration */
+case class ParsedEntityDecl( name:String, entdef:EntityDef ) extends EntityDecl {
-case class ParameterEntityDecl(name: String, entdef: EntityDef)
- extends EntityDecl;
+ final def toString(sb: StringBuffer): StringBuffer = {
+ sb.append("<!ENTITY ").append( name ).append(' ');
+ entdef.toString(sb).append('>');
+ }
+}
+
+/** a parameter entity declaration */
+case class ParameterEntityDecl(name: String, entdef: EntityDef) extends EntityDecl {
-class EntityDef;
+ final def toString(sb: StringBuffer): StringBuffer = {
+ sb.append("<!ENTITY % ").append( name ).append(' ');
+ entdef.toString(sb).append('>');
+ }
+}
+
+/** an unparsed entity declaration */
+case class UnparsedEntityDecl( name:String, extID:ExternalID, notation:String ) extends EntityDecl {
+ final def toString(sb: StringBuffer): StringBuffer = {
+ sb.append("<!ENTITY ").append( name );
+ extID.toString(sb).append(" NDATA ").append(notation).append('>');
+ }
+}
+/** a notation declaration */
+case class NotationDecl( name:String, extID:ExternalID ) extends MarkupDecl {
+ final def toString(sb: StringBuffer): StringBuffer = {
+ sb.append("<!NOTATION ").append( name ).append(' ');
+ extID.toString(sb);
+ }
+}
+
+abstract class EntityDef {
+ final override def toString(): String =
+ toString(new StringBuffer()).toString();
+
+ def toString(sb: StringBuffer): StringBuffer;
+}
case class IntDef(value:String) extends EntityDef {
private def validateValue(): Unit = {
@@ -80,41 +130,53 @@ case class IntDef(value:String) extends EntityDef {
}
}
validateValue();
+
+ final def toString(sb: StringBuffer): StringBuffer =
+ Utility.appendQuoted(value, sb);
+
+}
+
+case class ExtDef(extID:ExternalID) extends EntityDef {
+ final def toString(sb: StringBuffer): StringBuffer =
+ extID.toString(sb);
}
-case class ExtDef(extID:ExternalID) extends EntityDef;
-/** an entity declaration */
-case class UnparsedEntityDecl( name:String, extID:ExternalID, notation:String ) extends EntityDecl;
-/** a notation declaration */
-case class NotationDecl( name:String, extID:ExternalID ) extends MarkupDecl;
/** a parsed entity reference */
case class PEReference(ent:String) extends MarkupDecl {
if( !Utility.isName( ent ))
throw new IllegalArgumentException("ent must be an XML Name");
- final override def toString() = "%"+ent+";"
+ final def toString(sb: StringBuffer): StringBuffer =
+ sb.append('%').append(ent).append(';');
}
// default declarations for attributes
-class DefaultDecl ;
+abstract class DefaultDecl {
+ override def toString(): String;
+ def toString(sb: StringBuffer): StringBuffer;
+}
case object REQUIRED extends DefaultDecl {
- final override def toString() = "REQUIRED";
+ final override def toString(): String = "#REQUIRED";
+ final def toString(sb:StringBuffer) = sb.append("#REQUIRED");
}
+
case object IMPLIED extends DefaultDecl {
- final override def toString() = "IMPLIED";
+ final override def toString(): String = "#IMPLIED";
+ final def toString(sb:StringBuffer) = sb.append("#IMPLIED");
}
+
case class DEFAULT(fixed:boolean, attValue:String) extends DefaultDecl {
- final override def toString() = {
- val sb = new StringBuffer("DEFAULT(");
- sb.append( fixed );
- sb.append(',');
+ final override def toString(): String =
+ toString(new StringBuffer()).toString();
+
+ final def toString(sb:StringBuffer): StringBuffer = {
+ if(fixed)
+ sb.append("#FIXED ");
Utility.appendEscapedQuoted( attValue, sb );
- sb.append(')');
- sb.toString()
}
}
diff --git a/sources/scala/xml/dtd/DtdTypeSymbol.scala b/sources/scala/xml/dtd/DtdTypeSymbol.scala
new file mode 100644
index 0000000000..633e11e646
--- /dev/null
+++ b/sources/scala/xml/dtd/DtdTypeSymbol.scala
@@ -0,0 +1 @@
+package scala.xml.dtd; trait DtdTypeSymbol {}
diff --git a/sources/scala/xml/dtd/ExternalID.scala b/sources/scala/xml/dtd/ExternalID.scala
index ea0f7fbc6c..5263bea0d8 100644
--- a/sources/scala/xml/dtd/ExternalID.scala
+++ b/sources/scala/xml/dtd/ExternalID.scala
@@ -16,7 +16,15 @@ package scala.xml.dtd;
* @param text text contained in this node, may not contain "?>"
**/
-class ExternalID ;
+abstract class ExternalID {
+
+ /** returns "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral */
+ override def toString(): String;
+
+ /** returns "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral */
+ def toString(sb: StringBuffer): StringBuffer;
+
+}
/** a system identifier
*
@@ -24,14 +32,18 @@ class ExternalID ;
* @param systemLiteral the system identifier literal
**/
-case class SystemID( systemLiteral:String ) extends ExternalID {
+case class SystemID( systemLiteral:String ) extends ExternalID with parsing.TokenTests{
- if( !Utility.checkSysID( systemLiteral ) )
+ if( !checkSysID( systemLiteral ) )
throw new IllegalArgumentException(
"can't use both \" and ' in systemLiteral"
);
+ /** returns " SYSTEM "+systemLiteral */
final override def toString() =
Utility.systemLiteralToString( systemLiteral );
+
+ final def toString(sb: StringBuffer): StringBuffer =
+ Utility.systemLiteralToString( sb, systemLiteral );
}
@@ -39,15 +51,18 @@ case class SystemID( systemLiteral:String ) extends ExternalID {
*
* @author Burak Emir
* @param publicLiteral the public identifier literal
- * @param systemLiteral the system identifier literal
+ * @param systemLiteral (can be null for notation pubIDs) the system identifier literal
**/
-case class PublicID( publicLiteral:String, systemLiteral:String ) extends ExternalID {
+case class PublicID( publicLiteral:String, systemLiteral:String ) extends ExternalID with parsing.TokenTests{
+ //Console.println("constructing PublicID \""+publicLiteral+"\" "+systemLiteral);
- if( !Utility.checkPubID( publicLiteral ))
+ //Console.println("util returns "+checkPubID( publicLiteral ));
+
+ if( !checkPubID( publicLiteral ))
throw new IllegalArgumentException(
"publicLiteral must consist of PubidChars"
);
- if( !Utility.checkSysID( systemLiteral ) )
+ if( systemLiteral!= null && !checkSysID( systemLiteral ) )
throw new IllegalArgumentException(
"can't use both \" and ' in systemLiteral"
);
@@ -62,8 +77,16 @@ case class PublicID( publicLiteral:String, systemLiteral:String ) extends Extern
final def child = Nil;
/** returns "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral */
- final override def toString() =
- Utility.publicLiteralToString( publicLiteral )
- + Utility.systemLiteralToString( systemLiteral );
+ final override def toString(): String = {
+ toString(new StringBuffer()).toString();
+ }
+ /** appends "PUBLIC "+publicLiteral+" SYSTEM "+systemLiteral to argument */
+ final def toString(sb: StringBuffer): StringBuffer = {
+ Utility.publicLiteralToString( sb, publicLiteral ).append(' ');
+ if(systemLiteral!=null)
+ Utility.systemLiteralToString( sb, systemLiteral );
+ else
+ sb
+ }
}
diff --git a/sources/scala/xml/dtd/Parser.scala b/sources/scala/xml/dtd/Parser.scala
index 410e82ce06..75ad2def5b 100644
--- a/sources/scala/xml/dtd/Parser.scala
+++ b/sources/scala/xml/dtd/Parser.scala
@@ -6,7 +6,7 @@ object Parser with Scanner { // a bit too permissive concerning #PCDATA
import ContentModel._ ;
/** parses the argument to a regexp */
- def parse(s:String):RegExp = { initScanner( s ); contentspec }
+ def parse(s:String): ContentModel = { initScanner( s ); contentspec }
// zzz parser methods zzz
def accept( tok:int ) = {
@@ -29,32 +29,47 @@ object Parser with Scanner { // a bit too permissive concerning #PCDATA
case _ => s
}
- // contentspec ::= EMPTY|ANY|mixed|regexp
- def contentspec:RegExp = token match {
- case NAME =>
- if( value.equals( "ANY" ) )
- ANY_
- else if( value.equals( "EMPTY" ) )
- Eps
- else
- error("unexpected name:" + value );
+ // contentspec ::= EMPTY | ANY | (#PCDATA) | "(#PCDATA|"regexp)
+
+ def contentspec: ContentModel = token match {
+ case NAME => value.match {
+ case "ANY" => ANY
+ case "EMPTY" => EMPTY
+ case _ => error("expected ANY, EMPTY or '(' instead of " + value );
+ }
case LPAREN =>
+
nextToken;
sOpt;
- if( token == TOKEN_PCDATA )
- mixed;
- else
- regexp;
- case _ => error("unexpected token:" + token2string(token) );
- }
-
+ if( token != TOKEN_PCDATA )
+ ELEMENTS(regexp);
+ else {
+ nextToken;
+ token match {
+ case RPAREN =>
+ PCDATA
+ case CHOICE =>
+ val res = MIXED(choiceRest(Eps));
+ sOpt;
+ accept( RPAREN );
+ accept( STAR );
+ res
+ case _ =>
+ error("unexpected token:" + token2string(token) );
+ }
+ }
+
+ case _ =>
+ error("unexpected token:" + token2string(token) );
+ }
// sopt ::= S?
def sOpt = if( token == S ) nextToken;
// (' S? mixed ::= '#PCDATA' S? ')'
// | '#PCDATA' (S? '|' S? atom)* S? ')*'
+ /*
def mixed = {
accept( TOKEN_PCDATA );
sOpt;
@@ -72,7 +87,7 @@ object Parser with Scanner { // a bit too permissive concerning #PCDATA
Star( t )
}
}
-
+*/
// '(' S? regexp ::= cp S? [seqRest|choiceRest] ')' [ '+' | '*' | '?' ]
def regexp:RegExp = {
//Console.println("regexp, token = "+token2string(token));
diff --git a/sources/scala/xml/dtd/Tokens.scala b/sources/scala/xml/dtd/Tokens.scala
index 458a1b37e4..589e3e4f13 100644
--- a/sources/scala/xml/dtd/Tokens.scala
+++ b/sources/scala/xml/dtd/Tokens.scala
@@ -6,7 +6,6 @@ class Tokens {
final val TOKEN_PCDATA = 0;
final val NAME = 1;
- final val EMPTY = 2;
final val LPAREN = 3;
final val RPAREN = 4;
final val COMMA = 5;
@@ -20,7 +19,6 @@ class Tokens {
final def token2string( i:int ):String = i.match {
case 0 => "#PCDATA";
case 1 => "NAME";
- case 2 => "EMPTY";
case 3 => "(";
case 4 => ")";
case 5 => ",";
diff --git a/sources/scala/xml/parsing/FactoryAdapter.scala b/sources/scala/xml/parsing/FactoryAdapter.scala
index d634a400d5..e23032ca70 100644
--- a/sources/scala/xml/parsing/FactoryAdapter.scala
+++ b/sources/scala/xml/parsing/FactoryAdapter.scala
@@ -63,7 +63,7 @@ abstract class FactoryAdapter extends DefaultHandler() {
* @param text
* @return a new Text node.
*/
- def createText( text:String ):Text[String]; // abstract
+ def createText( text:String ):Text; // abstract
//
// ContentHandler methods
diff --git a/sources/scala/xml/parsing/MarkupParser.scala b/sources/scala/xml/parsing/MarkupParser.scala
index b21b323f68..3096e5879d 100644
--- a/sources/scala/xml/parsing/MarkupParser.scala
+++ b/sources/scala/xml/parsing/MarkupParser.scala
@@ -52,6 +52,8 @@ abstract class MarkupParser with TokenTests {
*/
def prolog(): Tuple3[Option[String], Option[String], Option[Boolean]] = {
+ //Console.println("(DEBUG) prolog");
+
var info_ver: Option[String] = None;
var info_enc: Option[String] = None;
var info_stdl: Option[Boolean] = None;
@@ -113,6 +115,9 @@ abstract class MarkupParser with TokenTests {
*/
def document(): Document = {
+
+ //Console.println("(DEBUG) document");
+
this.dtd = null;
var info_prolog: Tuple3[Option[String], Option[String], Option[Boolean]] =
Tuple3(None,None,None);
@@ -145,6 +150,7 @@ abstract class MarkupParser with TokenTests {
reportSyntaxError("document must contain exactly one element");
Console.println(children.toList);
}
+
val doc = new Document();
doc.children = children;
doc.docElem = theNode;
@@ -439,24 +445,29 @@ abstract class MarkupParser with TokenTests {
case 'S' =>
nextch;
xToken("YSTEM");
+ xSpace;
val sysID = systemLiteral();
new SystemID(sysID);
case 'P' =>
nextch; xToken("UBLIC");
+ xSpace;
val pubID = pubidLiteral();
xSpace;
val sysID = systemLiteral();
new PublicID(pubID, sysID);
}
+
+
/** parses document type declaration and assigns it to instance variable
* dtd.
*
* &lt;! parseDTD ::= DOCTYPE name ... >
*/
def parseDTD(): Unit = { // dirty but fast
+ //Console.println("(DEBUG) parseDTD");
var extID: ExternalID = null;
if(this.dtd != null)
- reportSyntaxError("unexpected character");
+ reportSyntaxError("unexpected character (DOCTYPE already defined");
xToken("DOCTYPE");
xSpace;
val n = xName;
@@ -469,8 +480,10 @@ abstract class MarkupParser with TokenTests {
if('[' == ch) { // internal subset
nextch;
/* TODO */
- while(']' != ch)
- nextch;
+ //Console.println("hello");
+ intSubset();
+ //while(']' != ch)
+ // nextch;
// TODO: do the DTD parsing?? ?!?!?!?!!
xToken(']');
xSpaceOpt;
@@ -478,6 +491,7 @@ abstract class MarkupParser with TokenTests {
xToken('>');
this.dtd = new DTD {
override var externalID = extID;
+ override val decls = MarkupParser.this.decls.reverse;
}
}
@@ -631,6 +645,7 @@ abstract class MarkupParser with TokenTests {
nextch;
while (ch != endch) {
putChar(ch);
+ //Console.println("hello '"+ch+"'"+isPubIDChar(ch));
if(!isPubIDChar(ch))
reportSyntaxError("char '"+ch+"' is not allowed in public id");
nextch;
@@ -646,13 +661,15 @@ abstract class MarkupParser with TokenTests {
//
def intSubset(): Unit = {
+ //Console.println("(DEBUG) intSubset()");
xSpace;
- while(']' != ch)
+ while(']' != ch) {
ch match {
case '%' =>
nextch;
decls = PEReference(xName) :: decls;
- xToken(';')
+ xToken(';');
+ xSpace;
//peReference
case '<' =>
nextch;
@@ -682,9 +699,12 @@ abstract class MarkupParser with TokenTests {
notationDecl();
}
}
- case _ =>
- reportSyntaxError("unexpected character");
+ xSpace;
+ case _ =>
+ reportSyntaxError("unexpected character '"+ch+"'");
+ nextch;
}
+ }
}
/** &lt;! element := ELEMENT
@@ -702,33 +722,40 @@ abstract class MarkupParser with TokenTests {
val cmstr = cbuf.toString();
cbuf.setLength( 0 );
val cm = ContentModel.parse(cmstr);
- decls = ElemDecl(n, cm, null)::decls;
+ decls = ElemDecl(n, cm)::decls;
}
- /** &lt;! element := ELEMENT
+ /** &lt;! attlist := ATTLIST
*/
def attrDecl() = {
xToken("TTLIST");
xSpace;
val n = xName;
+ xSpace;
var attList: List[AttrDecl] = Nil;
// later: find the elemDecl for n
while('>' != ch) {
- Console.println("");
val aname = xName;
+ //Console.println("attribute name: "+aname);
var defdecl: DefaultDecl = null;
xSpace;
+ // could be enumeration (foo,bar) parse this later :-/
while('"' != ch && '\'' != ch && '#' != ch && '<' != ch) {
if(!isSpace(ch))
cbuf.append(ch);
nextch;
}
+ val atpe = cbuf.toString();
+ cbuf.setLength(0);
+ //Console.println("attr type: "+atpe);
ch match {
case '\'' | '"' =>
val defValue = xAttributeValue(); // default value
defdecl = DEFAULT(false, defValue);
- case '#' => xName.match {
+ case '#' =>
+ nextch;
+ xName.match {
case "FIXED" =>
xSpace;
val defValue = xAttributeValue(); // default value
@@ -742,7 +769,7 @@ abstract class MarkupParser with TokenTests {
}
xSpaceOpt;
- attList = AttrDecl(xName, cbuf.toString(), defdecl) :: attList;
+ attList = AttrDecl(aname, atpe, defdecl) :: attList;
cbuf.setLength(0);
}
nextch;
@@ -752,23 +779,26 @@ abstract class MarkupParser with TokenTests {
/** &lt;! element := ELEMENT
*/
def entityDecl() = {
+ //Console.println("entityDecl()");
var isParameterEntity = false;
var entdef: EntityDef = null;
xToken("NTITY");
xSpace;
if('%' == ch) {
+ nextch;
isParameterEntity = true;
xSpace;
}
val n = xName;
xSpace;
-
+ //Console.println("hello");
val res = ch match {
case 'S' | 'P' => //sy
val extID = externalID();
if(isParameterEntity) {
-
+ xSpaceOpt;
+ xToken('>');
ParameterEntityDecl(n, ExtDef(extID))
} else { // notation?
@@ -778,21 +808,27 @@ abstract class MarkupParser with TokenTests {
xToken("NDATA");
xSpace;
val notat = xName;
- xSpace;
+ xSpaceOpt;
+ xToken('>');
UnparsedEntityDecl(n, extID, notat);
} else
-
+ nextch;
ParsedEntityDecl(n, ExtDef(extID));
}
case '"' | '\'' =>
+ //Console.println("hello 2");
val av = xAttributeValue();
+ xSpaceOpt;
+ xToken('>');
+ //Console.println("hello 3");
if(isParameterEntity)
ParameterEntityDecl(n, IntDef(av))
else
ParsedEntityDecl(n, IntDef(av));
}
+ //Console.println("res = "+res);
decls = res :: decls;
} // entityDecl
@@ -803,8 +839,23 @@ abstract class MarkupParser with TokenTests {
xSpace;
val notat = xName;
xSpace;
- val extID = externalID();
- xSpace;
+ val extID = if(ch == 'S') {
+ externalID();
+ } else if(ch == 'P') {
+ /** PublicID (without system, only used in NOTATION) */
+ nextch;
+ xToken("UBLIC");
+ xSpace;
+ val pubID = pubidLiteral();
+ xSpaceOpt;
+ val sysID = if(ch != '>')
+ systemLiteral()
+ else
+ null;
+ new PublicID(pubID, sysID);
+ } else
+ error("PUBLIC or SYSTEM expected");
+ xSpaceOpt;
xToken('>');
decls = NotationDecl(notat, extID) :: decls;
}
diff --git a/sources/scala/xml/parsing/TokenTests.scala b/sources/scala/xml/parsing/TokenTests.scala
index c148d484f3..d06660303d 100644
--- a/sources/scala/xml/parsing/TokenTests.scala
+++ b/sources/scala/xml/parsing/TokenTests.scala
@@ -76,13 +76,17 @@ trait TokenTests {
} else false;
}
- def isPubIDChar( c:Char ) = c match {
- case '\u0020' | '\u000D' | '\u000A' => true;
- case _ if
- ('0' < c && c < '9')||('a' < c && c < 'z')||('A' < c && c < 'Z') => true;
- case '-' | '\''| '(' | ')' | '+' | ',' | '.' | '/' | ':' | '=' |
- '?' | ';' | '!' | '*' | '#' | '@' | '$' | '_' | '%' => true
- case _ => false;
+ def isPubIDChar( c:Char ) = {
+ //Console.println("char: '"+c+"'");
+ c match {
+ case '\u0020' | '\u000D' | '\u000A' => true;
+ case _ if
+ ('0' <= c && c <= '9')||('a' <= c && c <= 'z')||('A' <= c && c <= 'Z') => true;
+ case '-' | '\''| '(' | ')' | '+' | ',' | '.' | '/' | ':' | '=' |
+ '?' | ';' | '!' | '*' | '#' | '@' | '$' | '_' | '%' => true
+ case _ => //Console.println("false: '"+c+"'");
+ false;
+ }
}
/**
@@ -117,11 +121,16 @@ trait TokenTests {
s.indexOf('"') == -1 || s.indexOf('\'') == -1
}
- def checkPubID( s:String ):boolean = {
+ def checkPubID( s:String ): Boolean = {
+ //Console.println("checkPubID of \""+s+"\"");
if( s.length() > 0 ) {
val z:Seq[Char] = s;
val y = z.elements;
- while( y.hasNext && isPubIDChar( y.next ) ){};
+ var c = ' ';
+ while( y.hasNext && isPubIDChar( c ) ){
+ //Console.println(c);
+ c = y.next
+ };
!y.hasNext
} else true
}