diff options
-rw-r--r-- | sources/scala/xml/MetaData.scala | 9 | ||||
-rw-r--r-- | sources/scala/xml/Node.scala | 2 | ||||
-rw-r--r-- | sources/scala/xml/dtd/ElementValidator.scala | 182 | ||||
-rw-r--r-- | sources/scala/xml/dtd/ValidationException.scala | 5 | ||||
-rw-r--r-- | sources/scala/xml/parsing/MarkupParser.scala | 2 | ||||
-rw-r--r-- | test/files/jvm/xmlstuff.check | 11 | ||||
-rw-r--r-- | test/files/jvm/xmlstuff.scala | 37 |
7 files changed, 206 insertions, 42 deletions
diff --git a/sources/scala/xml/MetaData.scala b/sources/scala/xml/MetaData.scala index 5ddc0f3646..e8ce25e884 100644 --- a/sources/scala/xml/MetaData.scala +++ b/sources/scala/xml/MetaData.scala @@ -53,10 +53,13 @@ abstract class MetaData extends Iterable[MetaData] { } def elements = new Iterator[MetaData] { - var x = MetaData.this; - def hasNext = x.hasNext; + var x: MetaData = _; + def hasNext = null == x || x.hasNext; def next = { - x = x.next; + x = if(null == x) + MetaData.this; + else + x.next; x } } diff --git a/sources/scala/xml/Node.scala b/sources/scala/xml/Node.scala index d9ee68e03c..46beedd24c 100644 --- a/sources/scala/xml/Node.scala +++ b/sources/scala/xml/Node.scala @@ -34,7 +34,7 @@ abstract class Node extends NodeSeq { /** label of this node. I.e. "foo" for <foo/>) */ def label: String; - /** used internally. Text = -1 PI = -2 Comment = -3 EntityRef = -5 */ + /** used internally. Atom/Molecule = -1 PI = -2 Comment = -3 EntityRef = -5 */ def typeTag$: Int = 0; /** the namespace bindings */ diff --git a/sources/scala/xml/dtd/ElementValidator.scala b/sources/scala/xml/dtd/ElementValidator.scala index f301bebd87..483b132a86 100644 --- a/sources/scala/xml/dtd/ElementValidator.scala +++ b/sources/scala/xml/dtd/ElementValidator.scala @@ -3,50 +3,162 @@ package scala.xml.dtd; import ContentModel.ElemName ; import scala.util.automata._ ; +/** validate children and/or attributes of an element + * exceptions are created but not thrown. + */ class ElementValidator() extends Function1[Node,Boolean] { - protected var _dfa: DetWordAutom[ElemName] = _; + var exc: List[ValidationException] = Nil; + + protected var contentModel: ContentModel = _; + protected var dfa: DetWordAutom[ElemName] = _; + protected var adecls: List[AttrDecl] = _; - def setContentModel(cm:ContentModel) = cm match { - //case ANY => true ; - //case EMPTY => //@todo - //case PCDATA => - //case m@MIXED(r) => - case ELEMENTS( r ) => - val nfa = ContentModel.Translator.automatonFrom(r, 1); - _dfa = new SubsetConstruction(nfa).determinize; + /** set content model, enabling element validation */ + def setContentModel(cm:ContentModel) = { + contentModel = cm; cm match { + case ELEMENTS( r ) => + val nfa = ContentModel.Translator.automatonFrom(r, 1); + dfa = new SubsetConstruction(nfa).determinize; + case _ => + dfa = null; + } } - def getIterator(ns: Seq[Node]): Iterator[ElemName] = - ns . toList - . filter { x => x.namespace == null } - . map { x => ElemName(x.label) } - . elements; - /** @pre _dfa != null - */ - def runDFA(ns: Seq[Node]): Boolean = { - var q = 0; - val it = getIterator(ns); - //Console.println("it empty from the start? "+(!it.hasNext)); - while( it.hasNext ) { - val e = it.next; - // Console.println("next = "+e); - // Console.println(" got :"+ElemName(e)); - // Console.println("delta:" + _dfa.delta(q)); - - _dfa.delta(q).get(e).match { - case Some(p) => q = p; - case _ => throw ValidationException("element "+e+" not allowed here") + /** set meta data, enabling attribute validation */ + def setMetaData(adecls: List[AttrDecl]) = + this.adecls = adecls; + + def getIterator(nodes: Seq[Node], skipPCDATA: Boolean): Iterator[ElemName] = + nodes . toList + . filter { x => x match { + case y:SpecialNode => y match { + + case a:Atom[String] if a.data match { case t => t.trim().length == 0 } => + false; // always skip all-whitespace nodes + + case _ => + !skipPCDATA + + } + case _ => + x.namespace == null + }} + . map { x => ElemName(x.label) } + . elements; + + /** check attributes, return true if md corresponds to attribute declarations in adecls. + */ + def check(md: MetaData): Boolean = { + //Console.println("checking md = "+md); + //Console.println("adecls = "+adecls); + //@todo other exceptions + import MakeValidationException._; + val len: Int = exc.length; + var j = 0; + var ok = new scala.collection.mutable.BitSet(adecls.length); + def find(Key:String): AttrDecl = { + var attr: AttrDecl = _; + val jt = adecls.elements; while(j < adecls.length) { + jt.next match { + case a @ AttrDecl(Key, _, _) => attr = a; ok.set(j); j = adecls.length; + case _ => j = j + 1; + } } - //Console.println("q now " + q); + attr } - _dfa.isFinal(q) + val it = md.elements; while(it.hasNext) { + val attr = it.next; + //Console.println("attr:"+attr); + j = 0; + find(attr.key) match { + + case null => + //Console.println("exc"); + exc = fromUndefinedAttribute( attr.key ) :: exc; + + case AttrDecl(_, tpe, DEFAULT(true, fixedValue)) if(attr.value != fixedValue) => + exc = fromFixedAttribute( attr.key, fixedValue, attr.value) :: exc; + + case s => + //Console.println("s: "+s); + + } + } + //Console.println("so far:"+(exc.length == len)); + + val missing = ok.toSet( false ); + j = 0; var kt = adecls.elements; while(kt.hasNext) { + kt.next match { + case AttrDecl(key, tpe, REQUIRED) if !ok(j) => + exc = fromMissingAttribute( key, tpe ) :: exc; + j = j + 1; + case _ => + j = j + 1; + } + } + //Console.println("finish:"+(exc.length == len)); + (exc.length == len) //- true if no new exception } - def apply(n: Node): Boolean = { - var res = (null == _dfa) || runDFA(n.child); - // res = ... // @todo attributes - res + /** check children, return true if conform to content model + * @pre contentModel != null + */ + def check(nodes: Seq[Node]): Boolean = contentModel match { + + case ANY => true ; + + case EMPTY => !getIterator(nodes, false).hasNext + + case PCDATA => !getIterator(nodes, true).hasNext; + + case MIXED(ContentModel.Alt(branches @ _*)) => //@todo + val j = exc.length; + def find(Key: String): Boolean = { + var res = false; + val jt = branches.elements; + while(jt.hasNext && !res) + jt.next match { + case ContentModel.Letter(ElemName(Key)) => res = true; + case _ => + } + res + } + + var it = getIterator(nodes, true); while(it.hasNext) { + var label = it.next.name; + if(!find(label)) { + exc = MakeValidationException.fromUndefinedElement(label) :: exc; + } + } + + (exc.length == j) //- true if no new exception + + case _:ELEMENTS => + var q = 0; + val it = getIterator(nodes, false); + //Console.println("it empty from the start? "+(!it.hasNext)); + while( it.hasNext ) { + val e = it.next; + dfa.delta(q).get(e).match { + case Some(p) => q = p; + case _ => throw ValidationException("element "+e+" not allowed here") + } + //Console.println("q now " + q); + } + dfa.isFinal(q) //- true if arrived in final state } + /** applies various validations - accumulates error messages in exc + * @todo: fail on first error, ignore other errors (rearranging conditions) + */ + def apply(n: Node): Boolean = { + //- ? check children + var res = (null == contentModel) || check( n.child ); + + //- ? check attributes + res = ((null == adecls) || check( n.attributes )) && res; + + res + } } diff --git a/sources/scala/xml/dtd/ValidationException.scala b/sources/scala/xml/dtd/ValidationException.scala index 295c7ce929..065fd401a3 100644 --- a/sources/scala/xml/dtd/ValidationException.scala +++ b/sources/scala/xml/dtd/ValidationException.scala @@ -20,4 +20,9 @@ object MakeValidationException { { if( allKeys.size > 1 ) "s" else "" }+ allKeys ); } + + def fromMissingAttribute( key: String, tpe: String ) = { + new ValidationException("missing value for REQUIRED attribute "+key+" of type "+tpe); + } + } diff --git a/sources/scala/xml/parsing/MarkupParser.scala b/sources/scala/xml/parsing/MarkupParser.scala index 5a5d68aa89..7e6dc73184 100644 --- a/sources/scala/xml/parsing/MarkupParser.scala +++ b/sources/scala/xml/parsing/MarkupParser.scala @@ -937,7 +937,7 @@ abstract class MarkupParser: (MarkupParser with MarkupHandler) extends AnyRef wi case _ if isSpace(ch) => xSpace; case _ => - reportSyntaxError("markupdecl: unexpected character '"+ch+"'"); + reportSyntaxError("markupdecl: unexpected character '"+ch+"' #" + ch.asInstanceOf[Int]); nextch; } diff --git a/test/files/jvm/xmlstuff.check b/test/files/jvm/xmlstuff.check index 9803618268..7e88bb5c35 100644 --- a/test/files/jvm/xmlstuff.check +++ b/test/files/jvm/xmlstuff.check @@ -26,5 +26,14 @@ namespaces passed ok passed ok passed ok -validation +validation - elements +passed ok +passed ok +passed ok +passed ok +validation - attributes +passed ok +passed ok +passed ok +passed ok passed ok diff --git a/test/files/jvm/xmlstuff.scala b/test/files/jvm/xmlstuff.scala index 82d58a59a3..8f3520c109 100644 --- a/test/files/jvm/xmlstuff.scala +++ b/test/files/jvm/xmlstuff.scala @@ -157,7 +157,7 @@ DEPRECATED, don't support namespaces in pattern match anymore case _ => false; }); // end tmp */ - Console.println("validation"); + Console.println("validation - elements"); val vtor = new scala.xml.dtd.ElementValidator(); { import scala.xml.dtd.ELEMENTS; @@ -167,6 +167,41 @@ DEPRECATED, don't support namespaces in pattern match anymore Sequ( Letter(ElemName("bar")), Star(Letter(ElemName("baz"))) ))); + + } + assertEquals( vtor( <foo><bar/><baz/><baz/></foo> ), true ); + { + import scala.xml.dtd.MIXED; + import scala.xml.dtd.ContentModel._; + + vtor.setContentModel( + MIXED( + Alt(Letter(ElemName("bar")), + Letter(ElemName("baz")), + Letter(ElemName("bal"))))); } + assertEquals( vtor( <foo><bar/><baz/><baz/></foo> ), true ); + assertEquals( vtor( <foo>ab<bar/>cd<baz/>ed<baz/>gh</foo> ), true ); + assertEquals( vtor( <foo> <ugha/> <bugha/> </foo> ), false ); + + Console.println("validation - attributes"); + vtor.setContentModel(null); + vtor.setMetaData(List()); + assertEquals( vtor( <foo bar="hello"/> ), false ); + + { + import scala.xml.dtd._ ; + vtor.setMetaData(List(AttrDecl("bar","CDATA",IMPLIED))); + } + assertEquals( vtor( <foo href="http://foo.com" bar="hello"/> ), false ); + assertEquals( vtor( <foo bar="hello"/> ), true ); + + { + import scala.xml.dtd._ ; + vtor.setMetaData(List(AttrDecl("bar","CDATA",REQUIRED))); + } + assertEquals( vtor( <foo href="http://foo.com" /> ), false ); + assertEquals( vtor( <foo bar="http://foo.com" /> ), true ); + } |