From 4340f799da2c7a7097eb327c353d0d96084197f8 Mon Sep 17 00:00:00 2001 From: Adriaan Moors Date: Thu, 20 Jun 2013 11:39:47 -0700 Subject: Spin off src/library/scala/xml to src/xml/scala/xml. Summary: - Remove the last vestiges of xml from Predef and Contexts. - Change build to compile scala.xml to scala-xml.jar. - Deploy scala-xml module to maven. - Update partest accordingly. Note: An older compiler cannot use the new standard library to compile projects that use XML. Thus, skipping locker will break the build until we use 2.11.0-M4 for STARR. In the future build process, where we drop locker, we would have to release a milestone that supports the old and the new approach to xml. As soon as we'd be using that new milestone for starr, we could drop support for the old approach. --- src/build/bnd/scala-xml.bnd | 5 + src/build/maven/maven-deploy.xml | 1 + src/build/maven/scala-xml-pom.xml | 59 ++ src/build/pack.xml | 5 + .../scala/tools/nsc/typechecker/Contexts.scala | 21 +- src/library/scala/Predef.scala | 9 - src/library/scala/xml/Atom.scala | 47 -- src/library/scala/xml/Attribute.scala | 101 --- src/library/scala/xml/Comment.scala | 31 - src/library/scala/xml/Document.scala | 92 -- src/library/scala/xml/Elem.scala | 135 --- src/library/scala/xml/EntityRef.scala | 40 - src/library/scala/xml/Equality.scala | 107 --- src/library/scala/xml/Group.scala | 42 - .../scala/xml/MalformedAttributeException.scala | 15 - src/library/scala/xml/MetaData.scala | 217 ----- src/library/scala/xml/NamespaceBinding.scala | 83 -- src/library/scala/xml/Node.scala | 198 ----- src/library/scala/xml/NodeBuffer.scala | 47 -- src/library/scala/xml/NodeSeq.scala | 157 ---- src/library/scala/xml/Null.scala | 62 -- src/library/scala/xml/PCData.scala | 44 - src/library/scala/xml/PrefixedAttribute.scala | 61 -- src/library/scala/xml/PrettyPrinter.scala | 263 ------ src/library/scala/xml/ProcInstr.scala | 39 - src/library/scala/xml/QNode.scala | 20 - src/library/scala/xml/SpecialNode.scala | 33 - src/library/scala/xml/Text.scala | 39 - src/library/scala/xml/TextBuffer.scala | 46 - src/library/scala/xml/TopScope.scala | 31 - src/library/scala/xml/TypeSymbol.scala | 15 - src/library/scala/xml/Unparsed.scala | 36 - src/library/scala/xml/UnprefixedAttribute.scala | 61 -- src/library/scala/xml/Utility.scala | 410 --------- src/library/scala/xml/XML.scala | 109 --- src/library/scala/xml/Xhtml.scala | 97 --- src/library/scala/xml/dtd/ContentModel.scala | 118 --- src/library/scala/xml/dtd/ContentModelParser.scala | 129 --- src/library/scala/xml/dtd/DTD.scala | 35 - src/library/scala/xml/dtd/Decl.scala | 157 ---- src/library/scala/xml/dtd/DocType.scala | 39 - src/library/scala/xml/dtd/ElementValidator.scala | 132 --- src/library/scala/xml/dtd/ExternalID.scala | 86 -- src/library/scala/xml/dtd/Scanner.scala | 79 -- src/library/scala/xml/dtd/Tokens.scala | 45 - .../scala/xml/dtd/ValidationException.scala | 44 - src/library/scala/xml/dtd/impl/Base.scala | 67 -- .../scala/xml/dtd/impl/BaseBerrySethi.scala | 98 --- src/library/scala/xml/dtd/impl/DetWordAutom.scala | 50 -- src/library/scala/xml/dtd/impl/Inclusion.scala | 70 -- .../scala/xml/dtd/impl/NondetWordAutom.scala | 60 -- .../scala/xml/dtd/impl/PointedHedgeExp.scala | 37 - .../scala/xml/dtd/impl/SubsetConstruction.scala | 108 --- src/library/scala/xml/dtd/impl/SyntaxError.scala | 21 - .../scala/xml/dtd/impl/WordBerrySethi.scala | 162 ---- src/library/scala/xml/dtd/impl/WordExp.scala | 59 -- src/library/scala/xml/factory/Binder.scala | 61 -- .../scala/xml/factory/LoggedNodeFactory.scala | 90 -- src/library/scala/xml/factory/NodeFactory.scala | 61 -- src/library/scala/xml/factory/XMLLoader.scala | 61 -- .../xml/include/CircularIncludeException.scala | 25 - .../xml/include/UnavailableResourceException.scala | 20 - .../scala/xml/include/XIncludeException.scala | 58 -- .../scala/xml/include/sax/EncodingHeuristics.scala | 98 --- .../scala/xml/include/sax/XIncludeFilter.scala | 373 -------- src/library/scala/xml/include/sax/XIncluder.scala | 187 ---- src/library/scala/xml/package.scala | 19 - .../scala/xml/parsing/ConstructingHandler.scala | 34 - .../scala/xml/parsing/ConstructingParser.scala | 55 -- .../scala/xml/parsing/DefaultMarkupHandler.scala | 30 - .../scala/xml/parsing/ExternalSources.scala | 38 - src/library/scala/xml/parsing/FactoryAdapter.scala | 187 ---- src/library/scala/xml/parsing/FatalError.scala | 17 - src/library/scala/xml/parsing/MarkupHandler.scala | 127 --- src/library/scala/xml/parsing/MarkupParser.scala | 938 --------------------- .../scala/xml/parsing/MarkupParserCommon.scala | 260 ------ .../xml/parsing/NoBindingFactoryAdapter.scala | 37 - src/library/scala/xml/parsing/TokenTests.scala | 101 --- .../xml/parsing/ValidatingMarkupHandler.scala | 104 --- src/library/scala/xml/parsing/XhtmlEntities.scala | 54 -- src/library/scala/xml/parsing/XhtmlParser.scala | 31 - .../scala/xml/persistent/CachedFileStorage.scala | 129 --- src/library/scala/xml/persistent/Index.scala | 17 - src/library/scala/xml/persistent/SetStorage.scala | 42 - src/library/scala/xml/pull/XMLEvent.scala | 60 -- src/library/scala/xml/pull/XMLEventReader.scala | 157 ---- src/library/scala/xml/pull/package.scala | 42 - .../scala/xml/transform/BasicTransformer.scala | 60 -- src/library/scala/xml/transform/RewriteRule.scala | 28 - .../scala/xml/transform/RuleTransformer.scala | 16 - .../scala/tools/partest/nest/FileManager.scala | 3 +- src/xml/scala/xml/Atom.scala | 47 ++ src/xml/scala/xml/Attribute.scala | 101 +++ src/xml/scala/xml/Comment.scala | 31 + src/xml/scala/xml/Document.scala | 92 ++ src/xml/scala/xml/Elem.scala | 135 +++ src/xml/scala/xml/EntityRef.scala | 40 + src/xml/scala/xml/Equality.scala | 107 +++ src/xml/scala/xml/Group.scala | 42 + .../scala/xml/MalformedAttributeException.scala | 15 + src/xml/scala/xml/MetaData.scala | 217 +++++ src/xml/scala/xml/NamespaceBinding.scala | 83 ++ src/xml/scala/xml/Node.scala | 198 +++++ src/xml/scala/xml/NodeBuffer.scala | 47 ++ src/xml/scala/xml/NodeSeq.scala | 157 ++++ src/xml/scala/xml/Null.scala | 62 ++ src/xml/scala/xml/PCData.scala | 44 + src/xml/scala/xml/PrefixedAttribute.scala | 61 ++ src/xml/scala/xml/PrettyPrinter.scala | 263 ++++++ src/xml/scala/xml/ProcInstr.scala | 39 + src/xml/scala/xml/QNode.scala | 20 + src/xml/scala/xml/SpecialNode.scala | 33 + src/xml/scala/xml/Text.scala | 39 + src/xml/scala/xml/TextBuffer.scala | 46 + src/xml/scala/xml/TopScope.scala | 31 + src/xml/scala/xml/TypeSymbol.scala | 15 + src/xml/scala/xml/Unparsed.scala | 36 + src/xml/scala/xml/UnprefixedAttribute.scala | 61 ++ src/xml/scala/xml/Utility.scala | 410 +++++++++ src/xml/scala/xml/XML.scala | 109 +++ src/xml/scala/xml/Xhtml.scala | 97 +++ src/xml/scala/xml/dtd/ContentModel.scala | 118 +++ src/xml/scala/xml/dtd/ContentModelParser.scala | 129 +++ src/xml/scala/xml/dtd/DTD.scala | 35 + src/xml/scala/xml/dtd/Decl.scala | 157 ++++ src/xml/scala/xml/dtd/DocType.scala | 39 + src/xml/scala/xml/dtd/ElementValidator.scala | 132 +++ src/xml/scala/xml/dtd/ExternalID.scala | 86 ++ src/xml/scala/xml/dtd/Scanner.scala | 79 ++ src/xml/scala/xml/dtd/Tokens.scala | 45 + src/xml/scala/xml/dtd/ValidationException.scala | 44 + src/xml/scala/xml/dtd/impl/Base.scala | 67 ++ src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala | 98 +++ src/xml/scala/xml/dtd/impl/DetWordAutom.scala | 50 ++ src/xml/scala/xml/dtd/impl/Inclusion.scala | 70 ++ src/xml/scala/xml/dtd/impl/NondetWordAutom.scala | 60 ++ src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala | 37 + .../scala/xml/dtd/impl/SubsetConstruction.scala | 108 +++ src/xml/scala/xml/dtd/impl/SyntaxError.scala | 21 + src/xml/scala/xml/dtd/impl/WordBerrySethi.scala | 162 ++++ src/xml/scala/xml/dtd/impl/WordExp.scala | 59 ++ src/xml/scala/xml/factory/Binder.scala | 61 ++ src/xml/scala/xml/factory/LoggedNodeFactory.scala | 90 ++ src/xml/scala/xml/factory/NodeFactory.scala | 61 ++ src/xml/scala/xml/factory/XMLLoader.scala | 61 ++ .../xml/include/CircularIncludeException.scala | 25 + .../xml/include/UnavailableResourceException.scala | 20 + src/xml/scala/xml/include/XIncludeException.scala | 58 ++ .../scala/xml/include/sax/EncodingHeuristics.scala | 98 +++ src/xml/scala/xml/include/sax/XIncludeFilter.scala | 373 ++++++++ src/xml/scala/xml/include/sax/XIncluder.scala | 187 ++++ src/xml/scala/xml/package.scala | 19 + .../scala/xml/parsing/ConstructingHandler.scala | 34 + src/xml/scala/xml/parsing/ConstructingParser.scala | 55 ++ .../scala/xml/parsing/DefaultMarkupHandler.scala | 30 + src/xml/scala/xml/parsing/ExternalSources.scala | 38 + src/xml/scala/xml/parsing/FactoryAdapter.scala | 187 ++++ src/xml/scala/xml/parsing/FatalError.scala | 17 + src/xml/scala/xml/parsing/MarkupHandler.scala | 127 +++ src/xml/scala/xml/parsing/MarkupParser.scala | 938 +++++++++++++++++++++ src/xml/scala/xml/parsing/MarkupParserCommon.scala | 260 ++++++ .../xml/parsing/NoBindingFactoryAdapter.scala | 37 + src/xml/scala/xml/parsing/TokenTests.scala | 101 +++ .../xml/parsing/ValidatingMarkupHandler.scala | 104 +++ src/xml/scala/xml/parsing/XhtmlEntities.scala | 54 ++ src/xml/scala/xml/parsing/XhtmlParser.scala | 31 + .../scala/xml/persistent/CachedFileStorage.scala | 129 +++ src/xml/scala/xml/persistent/Index.scala | 17 + src/xml/scala/xml/persistent/SetStorage.scala | 42 + src/xml/scala/xml/pull/XMLEvent.scala | 60 ++ src/xml/scala/xml/pull/XMLEventReader.scala | 157 ++++ src/xml/scala/xml/pull/package.scala | 42 + src/xml/scala/xml/transform/BasicTransformer.scala | 60 ++ src/xml/scala/xml/transform/RewriteRule.scala | 28 + src/xml/scala/xml/transform/RuleTransformer.scala | 16 + 175 files changed, 7969 insertions(+), 7916 deletions(-) create mode 100644 src/build/bnd/scala-xml.bnd create mode 100644 src/build/maven/scala-xml-pom.xml delete mode 100644 src/library/scala/xml/Atom.scala delete mode 100644 src/library/scala/xml/Attribute.scala delete mode 100644 src/library/scala/xml/Comment.scala delete mode 100644 src/library/scala/xml/Document.scala delete mode 100755 src/library/scala/xml/Elem.scala delete mode 100644 src/library/scala/xml/EntityRef.scala delete mode 100644 src/library/scala/xml/Equality.scala delete mode 100644 src/library/scala/xml/Group.scala delete mode 100644 src/library/scala/xml/MalformedAttributeException.scala delete mode 100644 src/library/scala/xml/MetaData.scala delete mode 100644 src/library/scala/xml/NamespaceBinding.scala delete mode 100755 src/library/scala/xml/Node.scala delete mode 100644 src/library/scala/xml/NodeBuffer.scala delete mode 100644 src/library/scala/xml/NodeSeq.scala delete mode 100644 src/library/scala/xml/Null.scala delete mode 100644 src/library/scala/xml/PCData.scala delete mode 100644 src/library/scala/xml/PrefixedAttribute.scala delete mode 100755 src/library/scala/xml/PrettyPrinter.scala delete mode 100644 src/library/scala/xml/ProcInstr.scala delete mode 100644 src/library/scala/xml/QNode.scala delete mode 100644 src/library/scala/xml/SpecialNode.scala delete mode 100644 src/library/scala/xml/Text.scala delete mode 100644 src/library/scala/xml/TextBuffer.scala delete mode 100644 src/library/scala/xml/TopScope.scala delete mode 100644 src/library/scala/xml/TypeSymbol.scala delete mode 100644 src/library/scala/xml/Unparsed.scala delete mode 100644 src/library/scala/xml/UnprefixedAttribute.scala delete mode 100755 src/library/scala/xml/Utility.scala delete mode 100755 src/library/scala/xml/XML.scala delete mode 100644 src/library/scala/xml/Xhtml.scala delete mode 100644 src/library/scala/xml/dtd/ContentModel.scala delete mode 100644 src/library/scala/xml/dtd/ContentModelParser.scala delete mode 100644 src/library/scala/xml/dtd/DTD.scala delete mode 100644 src/library/scala/xml/dtd/Decl.scala delete mode 100644 src/library/scala/xml/dtd/DocType.scala delete mode 100644 src/library/scala/xml/dtd/ElementValidator.scala delete mode 100644 src/library/scala/xml/dtd/ExternalID.scala delete mode 100644 src/library/scala/xml/dtd/Scanner.scala delete mode 100644 src/library/scala/xml/dtd/Tokens.scala delete mode 100644 src/library/scala/xml/dtd/ValidationException.scala delete mode 100644 src/library/scala/xml/dtd/impl/Base.scala delete mode 100644 src/library/scala/xml/dtd/impl/BaseBerrySethi.scala delete mode 100644 src/library/scala/xml/dtd/impl/DetWordAutom.scala delete mode 100644 src/library/scala/xml/dtd/impl/Inclusion.scala delete mode 100644 src/library/scala/xml/dtd/impl/NondetWordAutom.scala delete mode 100644 src/library/scala/xml/dtd/impl/PointedHedgeExp.scala delete mode 100644 src/library/scala/xml/dtd/impl/SubsetConstruction.scala delete mode 100644 src/library/scala/xml/dtd/impl/SyntaxError.scala delete mode 100644 src/library/scala/xml/dtd/impl/WordBerrySethi.scala delete mode 100644 src/library/scala/xml/dtd/impl/WordExp.scala delete mode 100755 src/library/scala/xml/factory/Binder.scala delete mode 100644 src/library/scala/xml/factory/LoggedNodeFactory.scala delete mode 100644 src/library/scala/xml/factory/NodeFactory.scala delete mode 100644 src/library/scala/xml/factory/XMLLoader.scala delete mode 100644 src/library/scala/xml/include/CircularIncludeException.scala delete mode 100644 src/library/scala/xml/include/UnavailableResourceException.scala delete mode 100644 src/library/scala/xml/include/XIncludeException.scala delete mode 100644 src/library/scala/xml/include/sax/EncodingHeuristics.scala delete mode 100644 src/library/scala/xml/include/sax/XIncludeFilter.scala delete mode 100644 src/library/scala/xml/include/sax/XIncluder.scala delete mode 100644 src/library/scala/xml/package.scala delete mode 100755 src/library/scala/xml/parsing/ConstructingHandler.scala delete mode 100644 src/library/scala/xml/parsing/ConstructingParser.scala delete mode 100755 src/library/scala/xml/parsing/DefaultMarkupHandler.scala delete mode 100644 src/library/scala/xml/parsing/ExternalSources.scala delete mode 100644 src/library/scala/xml/parsing/FactoryAdapter.scala delete mode 100644 src/library/scala/xml/parsing/FatalError.scala delete mode 100755 src/library/scala/xml/parsing/MarkupHandler.scala delete mode 100755 src/library/scala/xml/parsing/MarkupParser.scala delete mode 100644 src/library/scala/xml/parsing/MarkupParserCommon.scala delete mode 100644 src/library/scala/xml/parsing/NoBindingFactoryAdapter.scala delete mode 100644 src/library/scala/xml/parsing/TokenTests.scala delete mode 100644 src/library/scala/xml/parsing/ValidatingMarkupHandler.scala delete mode 100644 src/library/scala/xml/parsing/XhtmlEntities.scala delete mode 100644 src/library/scala/xml/parsing/XhtmlParser.scala delete mode 100644 src/library/scala/xml/persistent/CachedFileStorage.scala delete mode 100644 src/library/scala/xml/persistent/Index.scala delete mode 100644 src/library/scala/xml/persistent/SetStorage.scala delete mode 100644 src/library/scala/xml/pull/XMLEvent.scala delete mode 100755 src/library/scala/xml/pull/XMLEventReader.scala delete mode 100644 src/library/scala/xml/pull/package.scala delete mode 100644 src/library/scala/xml/transform/BasicTransformer.scala delete mode 100644 src/library/scala/xml/transform/RewriteRule.scala delete mode 100644 src/library/scala/xml/transform/RuleTransformer.scala create mode 100644 src/xml/scala/xml/Atom.scala create mode 100644 src/xml/scala/xml/Attribute.scala create mode 100644 src/xml/scala/xml/Comment.scala create mode 100644 src/xml/scala/xml/Document.scala create mode 100755 src/xml/scala/xml/Elem.scala create mode 100644 src/xml/scala/xml/EntityRef.scala create mode 100644 src/xml/scala/xml/Equality.scala create mode 100644 src/xml/scala/xml/Group.scala create mode 100644 src/xml/scala/xml/MalformedAttributeException.scala create mode 100644 src/xml/scala/xml/MetaData.scala create mode 100644 src/xml/scala/xml/NamespaceBinding.scala create mode 100755 src/xml/scala/xml/Node.scala create mode 100644 src/xml/scala/xml/NodeBuffer.scala create mode 100644 src/xml/scala/xml/NodeSeq.scala create mode 100644 src/xml/scala/xml/Null.scala create mode 100644 src/xml/scala/xml/PCData.scala create mode 100644 src/xml/scala/xml/PrefixedAttribute.scala create mode 100755 src/xml/scala/xml/PrettyPrinter.scala create mode 100644 src/xml/scala/xml/ProcInstr.scala create mode 100644 src/xml/scala/xml/QNode.scala create mode 100644 src/xml/scala/xml/SpecialNode.scala create mode 100644 src/xml/scala/xml/Text.scala create mode 100644 src/xml/scala/xml/TextBuffer.scala create mode 100644 src/xml/scala/xml/TopScope.scala create mode 100644 src/xml/scala/xml/TypeSymbol.scala create mode 100644 src/xml/scala/xml/Unparsed.scala create mode 100644 src/xml/scala/xml/UnprefixedAttribute.scala create mode 100755 src/xml/scala/xml/Utility.scala create mode 100755 src/xml/scala/xml/XML.scala create mode 100644 src/xml/scala/xml/Xhtml.scala create mode 100644 src/xml/scala/xml/dtd/ContentModel.scala create mode 100644 src/xml/scala/xml/dtd/ContentModelParser.scala create mode 100644 src/xml/scala/xml/dtd/DTD.scala create mode 100644 src/xml/scala/xml/dtd/Decl.scala create mode 100644 src/xml/scala/xml/dtd/DocType.scala create mode 100644 src/xml/scala/xml/dtd/ElementValidator.scala create mode 100644 src/xml/scala/xml/dtd/ExternalID.scala create mode 100644 src/xml/scala/xml/dtd/Scanner.scala create mode 100644 src/xml/scala/xml/dtd/Tokens.scala create mode 100644 src/xml/scala/xml/dtd/ValidationException.scala create mode 100644 src/xml/scala/xml/dtd/impl/Base.scala create mode 100644 src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala create mode 100644 src/xml/scala/xml/dtd/impl/DetWordAutom.scala create mode 100644 src/xml/scala/xml/dtd/impl/Inclusion.scala create mode 100644 src/xml/scala/xml/dtd/impl/NondetWordAutom.scala create mode 100644 src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala create mode 100644 src/xml/scala/xml/dtd/impl/SubsetConstruction.scala create mode 100644 src/xml/scala/xml/dtd/impl/SyntaxError.scala create mode 100644 src/xml/scala/xml/dtd/impl/WordBerrySethi.scala create mode 100644 src/xml/scala/xml/dtd/impl/WordExp.scala create mode 100755 src/xml/scala/xml/factory/Binder.scala create mode 100644 src/xml/scala/xml/factory/LoggedNodeFactory.scala create mode 100644 src/xml/scala/xml/factory/NodeFactory.scala create mode 100644 src/xml/scala/xml/factory/XMLLoader.scala create mode 100644 src/xml/scala/xml/include/CircularIncludeException.scala create mode 100644 src/xml/scala/xml/include/UnavailableResourceException.scala create mode 100644 src/xml/scala/xml/include/XIncludeException.scala create mode 100644 src/xml/scala/xml/include/sax/EncodingHeuristics.scala create mode 100644 src/xml/scala/xml/include/sax/XIncludeFilter.scala create mode 100644 src/xml/scala/xml/include/sax/XIncluder.scala create mode 100644 src/xml/scala/xml/package.scala create mode 100755 src/xml/scala/xml/parsing/ConstructingHandler.scala create mode 100644 src/xml/scala/xml/parsing/ConstructingParser.scala create mode 100755 src/xml/scala/xml/parsing/DefaultMarkupHandler.scala create mode 100644 src/xml/scala/xml/parsing/ExternalSources.scala create mode 100644 src/xml/scala/xml/parsing/FactoryAdapter.scala create mode 100644 src/xml/scala/xml/parsing/FatalError.scala create mode 100755 src/xml/scala/xml/parsing/MarkupHandler.scala create mode 100755 src/xml/scala/xml/parsing/MarkupParser.scala create mode 100644 src/xml/scala/xml/parsing/MarkupParserCommon.scala create mode 100644 src/xml/scala/xml/parsing/NoBindingFactoryAdapter.scala create mode 100644 src/xml/scala/xml/parsing/TokenTests.scala create mode 100644 src/xml/scala/xml/parsing/ValidatingMarkupHandler.scala create mode 100644 src/xml/scala/xml/parsing/XhtmlEntities.scala create mode 100644 src/xml/scala/xml/parsing/XhtmlParser.scala create mode 100644 src/xml/scala/xml/persistent/CachedFileStorage.scala create mode 100644 src/xml/scala/xml/persistent/Index.scala create mode 100644 src/xml/scala/xml/persistent/SetStorage.scala create mode 100644 src/xml/scala/xml/pull/XMLEvent.scala create mode 100755 src/xml/scala/xml/pull/XMLEventReader.scala create mode 100644 src/xml/scala/xml/pull/package.scala create mode 100644 src/xml/scala/xml/transform/BasicTransformer.scala create mode 100644 src/xml/scala/xml/transform/RewriteRule.scala create mode 100644 src/xml/scala/xml/transform/RuleTransformer.scala (limited to 'src') diff --git a/src/build/bnd/scala-xml.bnd b/src/build/bnd/scala-xml.bnd new file mode 100644 index 0000000000..6203c57dfe --- /dev/null +++ b/src/build/bnd/scala-xml.bnd @@ -0,0 +1,5 @@ +Bundle-Name: Scala XML Library +Bundle-SymbolicName: org.scala-lang.scala-xml +ver: @VERSION@ +Bundle-Version: ${ver} +Export-Package: *;version=${ver} diff --git a/src/build/maven/maven-deploy.xml b/src/build/maven/maven-deploy.xml index bf82346b80..a51562103c 100644 --- a/src/build/maven/maven-deploy.xml +++ b/src/build/maven/maven-deploy.xml @@ -108,6 +108,7 @@ + diff --git a/src/build/maven/scala-xml-pom.xml b/src/build/maven/scala-xml-pom.xml new file mode 100644 index 0000000000..629872c2e2 --- /dev/null +++ b/src/build/maven/scala-xml-pom.xml @@ -0,0 +1,59 @@ + + 4.0.0 + org.scala-lang + scala-xml + jar + @VERSION@ + Scala XML + XML Library for the Scala Programming Language + http://www.scala-lang.org/ + 2002 + + LAMP/EPFL + http://lamp.epfl.ch/ + + + + BSD-like + http://www.scala-lang.org/downloads/license.html + + repo + + + + scm:git:git://github.com/scala/scala.git + https://github.com/scala/scala.git + + + JIRA + https://issues.scala-lang.org/ + + + http://www.scala-lang.org/api/@VERSION@/ + + + + + + scala-tools.org + @RELEASE_REPOSITORY@ + + + scala-tools.org + @SNAPSHOT_REPOSITORY@ + false + + + + + lamp + EPFL LAMP + + + Typesafe + Typesafe, Inc. + + + diff --git a/src/build/pack.xml b/src/build/pack.xml index f8eb3c67c5..44198adb1e 100644 --- a/src/build/pack.xml +++ b/src/build/pack.xml @@ -153,6 +153,7 @@ MAIN DISTRIBUTION PACKAGING + @@ -203,6 +204,10 @@ MAIN DISTRIBUTION PACKAGING basedir="${build-docs.dir}/library"> + + + diff --git a/src/compiler/scala/tools/nsc/typechecker/Contexts.scala b/src/compiler/scala/tools/nsc/typechecker/Contexts.scala index 1f4ff7cc2d..1f8f13ae02 100644 --- a/src/compiler/scala/tools/nsc/typechecker/Contexts.scala +++ b/src/compiler/scala/tools/nsc/typechecker/Contexts.scala @@ -99,22 +99,13 @@ trait Contexts { self: Analyzer => // there must be a scala.xml package when xml literals were parsed in this unit if (unit.hasXml && ScalaXmlPackage == NoSymbol) - unit.error(unit.firstXmlPos, "XML literals may only be used if the package scala.xml is present in the compilation classpath.") - - // TODO: remove the def below and drop `|| predefDefinesDollarScope` in the condition for `contextWithXML` - // as soon as 2.11.0-M4 is released and used as STARR (and $scope is no longer defined in Predef) - // Until then, to allow compiling quick with pre-2.11.0-M4 STARR, - // which relied on Predef defining `val $scope`, we've left it in place. - // Since the new scheme also imports $scope (as an alias for scala.xml.TopScope), - // we must check whether it is still there and not import the alias to avoid ambiguity. - // (All of this is only necessary to compile the full quick stage with STARR. - // if using locker, Predef.$scope is no longer needed.) - def predefDefinesDollarScope = definitions.getMemberIfDefined(PredefModule, nme.dollarScope) != NoSymbol - - // hack for the old xml library (detected by looking for scala.xml.TopScope, which needs to be in scope as $scope) - // import scala.xml.{TopScope => $scope} + unit.error(unit.firstXmlPos, "To compile XML syntax, the scala.xml package must be on the classpath.\nPlease see https://github.com/scala/scala/wiki/Scala-2.11#xml.") + + // scala-xml needs `scala.xml.TopScope` to be in scope globally as `$scope` + // We detect `scala-xml` by looking for `scala.xml.TopScope` and + // inject the equivalent of `import scala.xml.{TopScope => $scope}` val contextWithXML = - if (!unit.hasXml || ScalaXmlTopScope == NoSymbol || predefDefinesDollarScope) rootImportsContext + if (!unit.hasXml || ScalaXmlTopScope == NoSymbol) rootImportsContext else rootImportsContext.make(gen.mkImport(ScalaXmlPackage, nme.TopScope, nme.dollarScope)) val c = contextWithXML.make(tree, unit = unit) diff --git a/src/library/scala/Predef.scala b/src/library/scala/Predef.scala index a188602543..3b588e261f 100644 --- a/src/library/scala/Predef.scala +++ b/src/library/scala/Predef.scala @@ -134,15 +134,6 @@ object Predef extends LowPriorityImplicits with DeprecatedPredef { @inline def implicitly[T](implicit e: T) = e // for summoning implicit values from the nether world -- TODO: when dependent method types are on by default, give this result type `e.type`, so that inliner has better chance of knowing which method to inline in calls like `implicitly[MatchingStrategy[Option]].zero` @inline def locally[T](x: T): T = x // to communicate intent and avoid unmoored statements - // TODO: remove `val $scope = ...` as soon as 2.11.0-M4 is released and used as STARR - // As it has a '$' in its name, we don't have to deprecate first. - // The compiler now aliases `scala.xml.TopScope` to `$scope` (unless Predef.$scope is still there). - // This definition left in place for older compilers and to compile quick with pre-2.11.0-M4 STARR. - // In principle we don't need it to compile library/reflect/compiler (there's no xml left there), - // so a new locker can be built without this definition, and locker can build quick - // (partest, scaladoc still require xml). - val $scope = scala.xml.TopScope - // errors and asserts ------------------------------------------------- // !!! Remove this when possible - ideally for 2.11. diff --git a/src/library/scala/xml/Atom.scala b/src/library/scala/xml/Atom.scala deleted file mode 100644 index 33e58ba7e7..0000000000 --- a/src/library/scala/xml/Atom.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** The class `Atom` provides an XML node for text (`PCDATA`). - * It is used in both non-bound and bound XML representations. - * - * @author Burak Emir - * @param data the text contained in this node, may not be `'''null'''`. - */ -class Atom[+A](val data: A) extends SpecialNode with Serializable { - if (data == null) - throw new IllegalArgumentException("cannot construct "+getClass.getSimpleName+" with null") - - override protected def basisForHashCode: Seq[Any] = Seq(data) - - override def strict_==(other: Equality) = other match { - case x: Atom[_] => data == x.data - case _ => false - } - - override def canEqual(other: Any) = other match { - case _: Atom[_] => true - case _ => false - } - - final override def doCollectNamespaces = false - final override def doTransform = false - - def label = "#PCDATA" - - /** Returns text, with some characters escaped according to the XML - * specification. - */ - def buildString(sb: StringBuilder): StringBuilder = - Utility.escape(data.toString, sb) - - override def text: String = data.toString - -} diff --git a/src/library/scala/xml/Attribute.scala b/src/library/scala/xml/Attribute.scala deleted file mode 100644 index e4b2b69fc6..0000000000 --- a/src/library/scala/xml/Attribute.scala +++ /dev/null @@ -1,101 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** This singleton object contains the `apply` and `unapply` methods for - * convenient construction and deconstruction. - * - * @author Burak Emir - * @version 1.0 - */ -object Attribute { - def unapply(x: Attribute) = x match { - case PrefixedAttribute(_, key, value, next) => Some((key, value, next)) - case UnprefixedAttribute(key, value, next) => Some((key, value, next)) - case _ => None - } - - /** Convenience functions which choose Un/Prefixedness appropriately */ - def apply(key: String, value: Seq[Node], next: MetaData): Attribute = - new UnprefixedAttribute(key, value, next) - - def apply(pre: String, key: String, value: String, next: MetaData): Attribute = - if (pre == null || pre == "") new UnprefixedAttribute(key, value, next) - else new PrefixedAttribute(pre, key, value, next) - - def apply(pre: String, key: String, value: Seq[Node], next: MetaData): Attribute = - if (pre == null || pre == "") new UnprefixedAttribute(key, value, next) - else new PrefixedAttribute(pre, key, value, next) - - def apply(pre: Option[String], key: String, value: Seq[Node], next: MetaData): Attribute = - pre match { - case None => new UnprefixedAttribute(key, value, next) - case Some(p) => new PrefixedAttribute(p, key, value, next) - } -} - -/** The `Attribute` trait defines the interface shared by both - * [[scala.xml.PrefixedAttribute]] and [[scala.xml.UnprefixedAttribute]]. - * - * @author Burak Emir - * @version 1.0 - */ -abstract trait Attribute extends MetaData { - def pre: String // will be null if unprefixed - val key: String - val value: Seq[Node] - val next: MetaData - - def apply(key: String): Seq[Node] - def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] - def copy(next: MetaData): Attribute - - def remove(key: String) = - if (!isPrefixed && this.key == key) next - else copy(next remove key) - - def remove(namespace: String, scope: NamespaceBinding, key: String) = - if (this.key == key && (scope getURI pre) == namespace) next - else copy(next.remove(namespace, scope, key)) - - def isPrefixed: Boolean = pre != null - - def getNamespace(owner: Node): String - - def wellformed(scope: NamespaceBinding): Boolean = { - val arg = if (isPrefixed) scope getURI pre else null - (next(arg, scope, key) == null) && (next wellformed scope) - } - - /** Returns an iterator on attributes */ - override def iterator: Iterator[MetaData] = { - if (value == null) next.iterator - else Iterator.single(this) ++ next.iterator - } - - override def size: Int = { - if (value == null) next.size - else 1 + next.size - } - - /** Appends string representation of only this attribute to stringbuffer. - */ - protected def toString1(sb: StringBuilder) { - if (value == null) - return - if (isPrefixed) - sb append pre append ':' - - sb append key append '=' - val sb2 = new StringBuilder() - Utility.sequenceToXML(value, TopScope, sb2, stripComments = true) - Utility.appendQuoted(sb2.toString, sb) - } -} diff --git a/src/library/scala/xml/Comment.scala b/src/library/scala/xml/Comment.scala deleted file mode 100644 index b8dccdcb16..0000000000 --- a/src/library/scala/xml/Comment.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** The class `Comment` implements an XML node for comments. - * - * @author Burak Emir - * @param commentText the text contained in this node, may not contain "--" - */ -case class Comment(commentText: String) extends SpecialNode { - - def label = "#REM" - override def text = "" - final override def doCollectNamespaces = false - final override def doTransform = false - - if (commentText contains "--") - throw new IllegalArgumentException("text contains \"--\"") - - /** Appends "" to this string buffer. - */ - override def buildString(sb: StringBuilder) = - sb append "" -} diff --git a/src/library/scala/xml/Document.scala b/src/library/scala/xml/Document.scala deleted file mode 100644 index 9a725014fc..0000000000 --- a/src/library/scala/xml/Document.scala +++ /dev/null @@ -1,92 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** A document information item (according to InfoSet spec). The comments - * are copied from the Infoset spec, only augmented with some information - * on the Scala types for definitions that might have no value. - * Also plays the role of an `XMLEvent` for pull parsing. - * - * @author Burak Emir - * @version 1.0, 26/04/2005 - */ -@SerialVersionUID(-2289320563321795109L) -class Document extends NodeSeq with pull.XMLEvent with Serializable { - - /** An ordered list of child information items, in document - * order. The list contains exactly one element information item. The - * list also contains one processing instruction information item for - * each processing instruction outside the document element, and one - * comment information item for each comment outside the document - * element. Processing instructions and comments within the DTD are - * excluded. If there is a document type declaration, the list also - * contains a document type declaration information item. - */ - var children: Seq[Node] = _ - - /** The element information item corresponding to the document element. */ - var docElem: Node = _ - - /** The dtd that comes with the document, if any */ - var dtd: scala.xml.dtd.DTD = _ - - /** An unordered set of notation information items, one for each notation - * declared in the DTD. If any notation is multiply declared, this property - * has no value. - */ - def notations: Seq[scala.xml.dtd.NotationDecl] = - dtd.notations - - /** An unordered set of unparsed entity information items, one for each - * unparsed entity declared in the DTD. - */ - def unparsedEntities: Seq[scala.xml.dtd.EntityDecl] = - dtd.unparsedEntities - - /** The base URI of the document entity. */ - var baseURI: String = _ - - /** The name of the character encoding scheme in which the document entity - * is expressed. - */ - var encoding: Option[String] = _ - - /** An indication of the standalone status of the document, either - * true or false. This property is derived from the optional standalone - * document declaration in the XML declaration at the beginning of the - * document entity, and has no value (`None`) if there is no - * standalone document declaration. - */ - var standAlone: Option[Boolean] = _ - - /** A string representing the XML version of the document. This - * property is derived from the XML declaration optionally present at - * the beginning of the document entity, and has no value (`None`) - * if there is no XML declaration. - */ - var version: Option[String] = _ - - /** 9. This property is not strictly speaking part of the infoset of - * the document. Rather it is an indication of whether the processor - * has read the complete DTD. Its value is a boolean. If it is false, - * then certain properties (indicated in their descriptions below) may - * be unknown. If it is true, those properties are never unknown. - */ - var allDeclarationsProcessed = false - - // methods for NodeSeq - - def theSeq: Seq[Node] = this.docElem - - override def canEqual(other: Any) = other match { - case _: Document => true - case _ => false - } -} diff --git a/src/library/scala/xml/Elem.scala b/src/library/scala/xml/Elem.scala deleted file mode 100755 index 484cf98744..0000000000 --- a/src/library/scala/xml/Elem.scala +++ /dev/null @@ -1,135 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** This singleton object contains the `apply` and `unapplySeq` methods for - * convenient construction and deconstruction. It is possible to deconstruct - * any `Node` instance (that is not a `SpecialNode` or a `Group`) using the - * syntax `case Elem(prefix, label, attribs, scope, child @ _*) => ...` - * - * Copyright 2008 Google Inc. All Rights Reserved. - * @author Burak Emir - */ -object Elem { - /** Build an Elem, setting its minimizeEmpty property to `true` if it has no children. Note that this - * default may not be exactly what you want, as some XML dialects don't permit some elements to be minimized. - * - * @deprecated This factory method is retained for backward compatibility; please use the other one, with which you - * can specify your own preference for minimizeEmpty. - */ - @deprecated("Use the other apply method in this object", "2.10.0") - def apply(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, child: Node*): Elem = - apply(prefix, label, attributes, scope, child.isEmpty, child: _*) - - def apply(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, minimizeEmpty: Boolean, child: Node*): Elem = - new Elem(prefix, label, attributes, scope, minimizeEmpty, child: _*) - - def unapplySeq(n: Node) = n match { - case _: SpecialNode | _: Group => None - case _ => Some((n.prefix, n.label, n.attributes, n.scope, n.child)) - } - - import scala.sys.process._ - /** Implicitly convert a [[scala.xml.Elem]] into a - * [[scala.sys.process.ProcessBuilder]]. This is done by obtaining the text - * elements of the element, trimming spaces, and then converting the result - * from string to a process. Importantly, tags are completely ignored, so - * they cannot be used to separate parameters. - */ - @deprecated("To create a scala.sys.process.Process from an xml.Elem, please use Process(elem.text.trim).", "2.11.0") - implicit def xmlToProcess(command: scala.xml.Elem): ProcessBuilder = Process(command.text.trim) - - @deprecated("To create a scala.sys.process.Process from an xml.Elem, please use Process(elem.text.trim).", "2.11.0") - implicit def processXml(p: Process.type) = new { - /** Creates a [[scala.sys.process.ProcessBuilder]] from a Scala XML Element. - * This can be used as a way to template strings. - * - * @example {{{ - * apply( {dxPath.absolutePath} --dex --output={classesDexPath.absolutePath} {classesMinJarPath.absolutePath}) - * }}} - */ - def apply(command: Elem): ProcessBuilder = Process(command.text.trim) - } -} - - -/** The case class `Elem` extends the `Node` class, - * providing an immutable data object representing an XML element. - * - * @param prefix namespace prefix (may be null, but not the empty string) - * @param label the element name - * @param attributes1 the attribute map - * @param scope the scope containing the namespace bindings - * @param minimizeEmpty `true` if this element should be serialized as minimized (i.e. "<el/>") when - * empty; `false` if it should be written out in long form. - * @param child the children of this node - * - * Copyright 2008 Google Inc. All Rights Reserved. - * @author Burak Emir - */ -class Elem( - override val prefix: String, - val label: String, - attributes1: MetaData, - override val scope: NamespaceBinding, - val minimizeEmpty: Boolean, - val child: Node*) -extends Node with Serializable -{ - @deprecated("This constructor is retained for backward compatibility. Please use the primary constructor, which lets you specify your own preference for `minimizeEmpty`.", "2.10.0") - def this(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, child: Node*) = { - this(prefix, label, attributes, scope, child.isEmpty, child: _*) - } - - final override def doCollectNamespaces = true - final override def doTransform = true - - override val attributes = MetaData.normalize(attributes1, scope) - - if (prefix == "") - throw new IllegalArgumentException("prefix of zero length, use null instead") - - if (scope == null) - throw new IllegalArgumentException("scope is null, use scala.xml.TopScope for empty scope") - - //@todo: copy the children, - // setting namespace scope if necessary - // cleaning adjacent text nodes if necessary - - override protected def basisForHashCode: Seq[Any] = - prefix :: label :: attributes :: child.toList - - /** Returns a new element with updated attributes, resolving namespace uris - * from this element's scope. See MetaData.update for details. - * - * @param updates MetaData with new and updated attributes - * @return a new symbol with updated attributes - */ - final def %(updates: MetaData): Elem = - copy(attributes = MetaData.update(attributes, scope, updates)) - - /** Returns a copy of this element with any supplied arguments replacing - * this element's value for that field. - * - * @return a new symbol with updated attributes - */ - def copy( - prefix: String = this.prefix, - label: String = this.label, - attributes: MetaData = this.attributes, - scope: NamespaceBinding = this.scope, - minimizeEmpty: Boolean = this.minimizeEmpty, - child: Seq[Node] = this.child.toSeq - ): Elem = Elem(prefix, label, attributes, scope, minimizeEmpty, child: _*) - - /** Returns concatenation of `text(n)` for each child `n`. - */ - override def text = (child map (_.text)).mkString -} diff --git a/src/library/scala/xml/EntityRef.scala b/src/library/scala/xml/EntityRef.scala deleted file mode 100644 index 7a58831075..0000000000 --- a/src/library/scala/xml/EntityRef.scala +++ /dev/null @@ -1,40 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** The class `EntityRef` implements an XML node for entity references. - * - * @author Burak Emir - * @version 1.0 - * @param entityName the name of the entity reference, for example `amp`. - */ -case class EntityRef(entityName: String) extends SpecialNode { - final override def doCollectNamespaces = false - final override def doTransform = false - def label = "#ENTITY" - - override def text = entityName match { - case "lt" => "<" - case "gt" => ">" - case "amp" => "&" - case "apos" => "'" - case "quot" => "\"" - case _ => Utility.sbToString(buildString) - } - - /** Appends `"& entityName;"` to this string buffer. - * - * @param sb the string buffer. - * @return the modified string buffer `sb`. - */ - override def buildString(sb: StringBuilder) = - sb.append("&").append(entityName).append(";") - -} diff --git a/src/library/scala/xml/Equality.scala b/src/library/scala/xml/Equality.scala deleted file mode 100644 index 021d185812..0000000000 --- a/src/library/scala/xml/Equality.scala +++ /dev/null @@ -1,107 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** In an attempt to contain the damage being inflicted on consistency by the - * ad hoc `equals` methods spread around `xml`, the logic is centralized and - * all the `xml` classes go through the `xml.Equality trait`. There are two - * forms of `xml` comparison. - * - * 1. `'''def''' strict_==(other: scala.xml.Equality)` - * - * This one tries to honor the little things like symmetry and hashCode - * contracts. The `equals` method routes all comparisons through this. - * - * 1. `xml_==(other: Any)` - * - * This one picks up where `strict_==` leaves off. It might declare any two - * things equal. - * - * As things stood, the logic not only made a mockery of the collections - * equals contract, but also laid waste to that of case classes. - * - * Among the obstacles to sanity are/were: - * - * Node extends NodeSeq extends Seq[Node] - * MetaData extends Iterable[MetaData] - * The hacky "Group" xml node which throws exceptions - * with wild abandon, so don't get too close - * Rampant asymmetry and impossible hashCodes - * Most classes claiming to be equal to "String" if - * some specific stringification of it was the same. - * String was never going to return the favor. - */ - -object Equality { - def asRef(x: Any): AnyRef = x.asInstanceOf[AnyRef] - - /** Note - these functions assume strict equality has already failed. - */ - def compareBlithely(x1: AnyRef, x2: String): Boolean = x1 match { - case x: Atom[_] => x.data == x2 - case x: NodeSeq => x.text == x2 - case _ => false - } - def compareBlithely(x1: AnyRef, x2: Node): Boolean = x1 match { - case x: NodeSeq if x.length == 1 => x2 == x(0) - case _ => false - } - def compareBlithely(x1: AnyRef, x2: AnyRef): Boolean = { - if (x1 == null || x2 == null) - return (x1 eq x2) - - x2 match { - case s: String => compareBlithely(x1, s) - case n: Node => compareBlithely(x1, n) - case _ => false - } - } -} -import Equality._ - -trait Equality extends scala.Equals { - protected def basisForHashCode: Seq[Any] - - def strict_==(other: Equality): Boolean - def strict_!=(other: Equality) = !strict_==(other) - - /** We insist we're only equal to other `xml.Equality` implementors, - * which heads off a lot of inconsistency up front. - */ - override def canEqual(other: Any): Boolean = other match { - case x: Equality => true - case _ => false - } - - /** It's be nice to make these final, but there are probably - * people out there subclassing the XML types, especially when - * it comes to equals. However WE at least can pretend they - * are final since clearly individual classes cannot be trusted - * to maintain a semblance of order. - */ - override def hashCode() = basisForHashCode.## - override def equals(other: Any) = doComparison(other, blithe = false) - final def xml_==(other: Any) = doComparison(other, blithe = true) - final def xml_!=(other: Any) = !xml_==(other) - - /** The "blithe" parameter expresses the caller's unconcerned attitude - * regarding the usual constraints on equals. The method is thereby - * given carte blanche to declare any two things equal. - */ - private def doComparison(other: Any, blithe: Boolean) = { - val strictlyEqual = other match { - case x: AnyRef if this eq x => true - case x: Equality => (x canEqual this) && (this strict_== x) - case _ => false - } - - strictlyEqual || (blithe && compareBlithely(this, asRef(other))) - } -} diff --git a/src/library/scala/xml/Group.scala b/src/library/scala/xml/Group.scala deleted file mode 100644 index e3af615008..0000000000 --- a/src/library/scala/xml/Group.scala +++ /dev/null @@ -1,42 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** A hack to group XML nodes in one node for output. - * - * @author Burak Emir - * @version 1.0 - */ -final case class Group(nodes: Seq[Node]) extends Node { - override def theSeq = nodes - - override def canEqual(other: Any) = other match { - case x: Group => true - case _ => false - } - - override def strict_==(other: Equality) = other match { - case Group(xs) => nodes sameElements xs - case _ => false - } - - override protected def basisForHashCode = nodes - - /** Since Group is very much a hack it throws an exception if you - * try to do anything with it. - */ - private def fail(msg: String) = throw new UnsupportedOperationException("class Group does not support method '%s'" format msg) - - def label = fail("label") - override def attributes = fail("attributes") - override def namespace = fail("namespace") - override def child = fail("child") - def buildString(sb: StringBuilder) = fail("toString(StringBuilder)") -} diff --git a/src/library/scala/xml/MalformedAttributeException.scala b/src/library/scala/xml/MalformedAttributeException.scala deleted file mode 100644 index d499ad3e10..0000000000 --- a/src/library/scala/xml/MalformedAttributeException.scala +++ /dev/null @@ -1,15 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml - - -case class MalformedAttributeException(msg: String) extends RuntimeException(msg) diff --git a/src/library/scala/xml/MetaData.scala b/src/library/scala/xml/MetaData.scala deleted file mode 100644 index 8b5ea187cb..0000000000 --- a/src/library/scala/xml/MetaData.scala +++ /dev/null @@ -1,217 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import Utility.sbToString -import scala.annotation.tailrec -import scala.collection.{ AbstractIterable, Iterator } - -/** - * Copyright 2008 Google Inc. All Rights Reserved. - * @author Burak Emir - */ -object MetaData { - /** - * appends all attributes from new_tail to attribs, without attempting to - * detect or remove duplicates. The method guarantees that all attributes - * from attribs come before the attributes in new_tail, but does not - * guarantee to preserve the relative order of attribs. - * - * Duplicates can be removed with `normalize`. - */ - @tailrec // temporarily marked final so it will compile under -Xexperimental - final def concatenate(attribs: MetaData, new_tail: MetaData): MetaData = - if (attribs eq Null) new_tail - else concatenate(attribs.next, attribs copy new_tail) - - /** - * returns normalized MetaData, with all duplicates removed and namespace prefixes resolved to - * namespace URIs via the given scope. - */ - def normalize(attribs: MetaData, scope: NamespaceBinding): MetaData = { - def iterate(md: MetaData, normalized_attribs: MetaData, set: Set[String]): MetaData = { - lazy val key = getUniversalKey(md, scope) - if (md eq Null) normalized_attribs - else if ((md.value eq null) || set(key)) iterate(md.next, normalized_attribs, set) - else md copy iterate(md.next, normalized_attribs, set + key) - } - iterate(attribs, Null, Set()) - } - - /** - * returns key if md is unprefixed, pre+key is md is prefixed - */ - def getUniversalKey(attrib: MetaData, scope: NamespaceBinding) = attrib match { - case prefixed: PrefixedAttribute => scope.getURI(prefixed.pre) + prefixed.key - case unprefixed: UnprefixedAttribute => unprefixed.key - } - - /** - * returns MetaData with attributes updated from given MetaData - */ - def update(attribs: MetaData, scope: NamespaceBinding, updates: MetaData): MetaData = - normalize(concatenate(updates, attribs), scope) - -} - -/** This class represents an attribute and at the same time a linked list of - * attributes. Every instance of this class is either - * - an instance of `UnprefixedAttribute key,value` or - * - an instance of `PrefixedAttribute namespace_prefix,key,value` or - * - `Null, the empty attribute list. - * - * Namespace URIs are obtained by using the namespace scope of the element - * owning this attribute (see `getNamespace`). - * - * Copyright 2008 Google Inc. All Rights Reserved. - * @author Burak Emir - */ -abstract class MetaData -extends AbstractIterable[MetaData] - with Iterable[MetaData] - with Equality - with Serializable { - - /** Updates this MetaData with the MetaData given as argument. All attributes that occur in updates - * are part of the resulting MetaData. If an attribute occurs in both this instance and - * updates, only the one in updates is part of the result (avoiding duplicates). For prefixed - * attributes, namespaces are resolved using the given scope, which defaults to TopScope. - * - * @param updates MetaData with new and updated attributes - * @return a new MetaData instance that contains old, new and updated attributes - */ - def append(updates: MetaData, scope: NamespaceBinding = TopScope): MetaData = - MetaData.update(this, scope, updates) - - /** - * Gets value of unqualified (unprefixed) attribute with given key, null if not found - * - * @param key - * @return value as Seq[Node] if key is found, null otherwise - */ - def apply(key: String): Seq[Node] - - /** convenience method, same as `apply(namespace, owner.scope, key)`. - * - * @param namespace_uri namespace uri of key - * @param owner the element owning this attribute list - * @param key the attribute key - */ - final def apply(namespace_uri: String, owner: Node, key: String): Seq[Node] = - apply(namespace_uri, owner.scope, key) - - /** - * Gets value of prefixed attribute with given key and namespace, null if not found - * - * @param namespace_uri namespace uri of key - * @param scp a namespace scp (usually of the element owning this attribute list) - * @param k to be looked for - * @return value as Seq[Node] if key is found, null otherwise - */ - def apply(namespace_uri: String, scp: NamespaceBinding, k: String): Seq[Node] - - /** returns a copy of this MetaData item with next field set to argument. - */ - def copy(next: MetaData): MetaData - - /** if owner is the element of this metadata item, returns namespace */ - def getNamespace(owner: Node): String - - def hasNext = (Null != next) - - def length: Int = length(0) - - def length(i: Int): Int = next.length(i + 1) - - def isPrefixed: Boolean - - override def canEqual(other: Any) = other match { - case _: MetaData => true - case _ => false - } - override def strict_==(other: Equality) = other match { - case m: MetaData => this.asAttrMap == m.asAttrMap - case _ => false - } - protected def basisForHashCode: Seq[Any] = List(this.asAttrMap) - - /** filters this sequence of meta data */ - override def filter(f: MetaData => Boolean): MetaData = - if (f(this)) copy(next filter f) - else next filter f - - /** returns key of this MetaData item */ - def key: String - - /** returns value of this MetaData item */ - def value: Seq[Node] - - /** Returns a String containing "prefix:key" if the first key is - * prefixed, and "key" otherwise. - */ - def prefixedKey = this match { - case x: Attribute if x.isPrefixed => x.pre + ":" + key - case _ => key - } - - /** Returns a Map containing the attributes stored as key/value pairs. - */ - def asAttrMap: Map[String, String] = - (iterator map (x => (x.prefixedKey, x.value.text))).toMap - - /** returns Null or the next MetaData item */ - def next: MetaData - - /** - * Gets value of unqualified (unprefixed) attribute with given key, None if not found - * - * @param key - * @return value in Some(Seq[Node]) if key is found, None otherwise - */ - final def get(key: String): Option[Seq[Node]] = Option(apply(key)) - - /** same as get(uri, owner.scope, key) */ - final def get(uri: String, owner: Node, key: String): Option[Seq[Node]] = - get(uri, owner.scope, key) - - /** gets value of qualified (prefixed) attribute with given key. - * - * @param uri namespace of key - * @param scope a namespace scp (usually of the element owning this attribute list) - * @param key to be looked fore - * @return value as Some[Seq[Node]] if key is found, None otherwise - */ - final def get(uri: String, scope: NamespaceBinding, key: String): Option[Seq[Node]] = - Option(apply(uri, scope, key)) - - protected def toString1(): String = sbToString(toString1) - - // appends string representations of single attribute to StringBuilder - protected def toString1(sb: StringBuilder): Unit - - override def toString(): String = sbToString(buildString) - - def buildString(sb: StringBuilder): StringBuilder = { - sb append ' ' - toString1(sb) - next buildString sb - } - - /** - */ - def wellformed(scope: NamespaceBinding): Boolean - - def remove(key: String): MetaData - - def remove(namespace: String, scope: NamespaceBinding, key: String): MetaData - - final def remove(namespace: String, owner: Node, key: String): MetaData = - remove(namespace, owner.scope, key) -} diff --git a/src/library/scala/xml/NamespaceBinding.scala b/src/library/scala/xml/NamespaceBinding.scala deleted file mode 100644 index b320466976..0000000000 --- a/src/library/scala/xml/NamespaceBinding.scala +++ /dev/null @@ -1,83 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import Utility.sbToString - -/** The class `NamespaceBinding` represents namespace bindings - * and scopes. The binding for the default namespace is treated as a null - * prefix. the absent namespace is represented with the null uri. Neither - * prefix nor uri may be empty, which is not checked. - * - * @author Burak Emir - * @version 1.0 - */ -@SerialVersionUID(0 - 2518644165573446725L) -case class NamespaceBinding(prefix: String, uri: String, parent: NamespaceBinding) extends AnyRef with Equality -{ - if (prefix == "") - throw new IllegalArgumentException("zero length prefix not allowed") - - def getURI(_prefix: String): String = - if (prefix == _prefix) uri else parent getURI _prefix - - /** Returns some prefix that is mapped to the URI. - * - * @param _uri the input URI - * @return the prefix that is mapped to the input URI, or null - * if no prefix is mapped to the URI. - */ - def getPrefix(_uri: String): String = - if (_uri == uri) prefix else parent getPrefix _uri - - override def toString(): String = sbToString(buildString(_, TopScope)) - - private def shadowRedefined(stop: NamespaceBinding): NamespaceBinding = { - def prefixList(x: NamespaceBinding): List[String] = - if ((x == null) || (x eq stop)) Nil - else x.prefix :: prefixList(x.parent) - def fromPrefixList(l: List[String]): NamespaceBinding = l match { - case Nil => stop - case x :: xs => new NamespaceBinding(x, this.getURI(x), fromPrefixList(xs)) - } - val ps0 = prefixList(this).reverse - val ps = ps0.distinct - if (ps.size == ps0.size) this - else fromPrefixList(ps) - } - - override def canEqual(other: Any) = other match { - case _: NamespaceBinding => true - case _ => false - } - - override def strict_==(other: Equality) = other match { - case x: NamespaceBinding => (prefix == x.prefix) && (uri == x.uri) && (parent == x.parent) - case _ => false - } - - def basisForHashCode: Seq[Any] = List(prefix, uri, parent) - - def buildString(stop: NamespaceBinding): String = sbToString(buildString(_, stop)) - - def buildString(sb: StringBuilder, stop: NamespaceBinding) { - shadowRedefined(stop).doBuildString(sb, stop) - } - - private def doBuildString(sb: StringBuilder, stop: NamespaceBinding) { - if ((this == null) || (this eq stop)) return // contains? - - val s = " xmlns%s=\"%s\"".format( - (if (prefix != null) ":" + prefix else ""), - (if (uri != null) uri else "") - ) - parent.doBuildString(sb append s, stop) // copy(ignore) - } -} diff --git a/src/library/scala/xml/Node.scala b/src/library/scala/xml/Node.scala deleted file mode 100755 index e121284252..0000000000 --- a/src/library/scala/xml/Node.scala +++ /dev/null @@ -1,198 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** This singleton object contains the `unapplySeq` method for - * convenient deconstruction. - * - * @author Burak Emir - * @version 1.0 - */ -object Node { - /** the constant empty attribute sequence */ - final def NoAttributes: MetaData = Null - - /** the empty namespace */ - val EmptyNamespace = "" - - def unapplySeq(n: Node) = Some((n.label, n.attributes, n.child)) -} - -/** - * An abstract class representing XML with nodes of a labelled tree. - * This class contains an implementation of a subset of XPath for navigation. - * - * @author Burak Emir and others - * @version 1.1 - */ -abstract class Node extends NodeSeq { - - /** prefix of this node */ - def prefix: String = null - - /** label of this node. I.e. "foo" for <foo/>) */ - def label: String - - /** used internally. Atom/Molecule = -1 PI = -2 Comment = -3 EntityRef = -5 - */ - def isAtom = this.isInstanceOf[Atom[_]] - - /** The logic formerly found in typeTag$, as best I could infer it. */ - def doCollectNamespaces = true // if (tag >= 0) DO collect namespaces - def doTransform = true // if (tag < 0) DO NOT transform - - /** - * method returning the namespace bindings of this node. by default, this - * is TopScope, which means there are no namespace bindings except the - * predefined one for "xml". - */ - def scope: NamespaceBinding = TopScope - - /** - * convenience, same as `getNamespace(this.prefix)` - */ - def namespace = getNamespace(this.prefix) - - /** - * Convenience method, same as `scope.getURI(pre)` but additionally - * checks if scope is `'''null'''`. - * - * @param pre the prefix whose namespace name we would like to obtain - * @return the namespace if `scope != null` and prefix was - * found, else `null` - */ - def getNamespace(pre: String): String = if (scope eq null) null else scope.getURI(pre) - - /** - * Convenience method, looks up an unprefixed attribute in attributes of this node. - * Same as `attributes.getValue(key)` - * - * @param key of queried attribute. - * @return value of `UnprefixedAttribute` with given key - * in attributes, if it exists, otherwise `null`. - */ - final def attribute(key: String): Option[Seq[Node]] = attributes.get(key) - - /** - * Convenience method, looks up a prefixed attribute in attributes of this node. - * Same as `attributes.getValue(uri, this, key)`- - * - * @param uri namespace of queried attribute (may not be null). - * @param key of queried attribute. - * @return value of `PrefixedAttribute` with given namespace - * and given key, otherwise `'''null'''`. - */ - final def attribute(uri: String, key: String): Option[Seq[Node]] = - attributes.get(uri, this, key) - - /** - * Returns attribute meaning all attributes of this node, prefixed and - * unprefixed, in no particular order. In class `Node`, this - * defaults to `Null` (the empty attribute list). - * - * @return all attributes of this node - */ - def attributes: MetaData = Null - - /** - * Returns child axis i.e. all children of this node. - * - * @return all children of this node - */ - def child: Seq[Node] - - /** Children which do not stringify to "" (needed for equality) - */ - def nonEmptyChildren: Seq[Node] = child filterNot (_.toString == "") - - /** - * Descendant axis (all descendants of this node, not including node itself) - * includes all text nodes, element nodes, comments and processing instructions. - */ - def descendant: List[Node] = - child.toList.flatMap { x => x::x.descendant } - - /** - * Descendant axis (all descendants of this node, including thisa node) - * includes all text nodes, element nodes, comments and processing instructions. - */ - def descendant_or_self: List[Node] = this :: descendant - - override def canEqual(other: Any) = other match { - case x: Group => false - case x: Node => true - case _ => false - } - - override protected def basisForHashCode: Seq[Any] = - prefix :: label :: attributes :: nonEmptyChildren.toList - - override def strict_==(other: Equality) = other match { - case _: Group => false - case x: Node => - (prefix == x.prefix) && - (label == x.label) && - (attributes == x.attributes) && - // (scope == x.scope) // note - original code didn't compare scopes so I left it as is. - (nonEmptyChildren sameElements x.nonEmptyChildren) - case _ => - false - } - - // implementations of NodeSeq methods - - /** - * returns a sequence consisting of only this node - */ - def theSeq: Seq[Node] = this :: Nil - - /** - * String representation of this node - * - * @param stripComments if true, strips comment nodes from result - */ - def buildString(stripComments: Boolean): String = - Utility.serialize(this, stripComments = stripComments).toString - - /** - * Same as `toString('''false''')`. - */ - override def toString(): String = buildString(stripComments = false) - - /** - * Appends qualified name of this node to `StringBuilder`. - */ - def nameToString(sb: StringBuilder): StringBuilder = { - if (null != prefix) { - sb append prefix - sb append ':' - } - sb append label - } - - /** - * Returns a type symbol (e.g. DTD, XSD), default `'''null'''`. - */ - def xmlType(): TypeSymbol = null - - /** - * Returns a text representation of this node. Note that this is not equivalent to - * the XPath node-test called text(), it is rather an implementation of the - * XPath function string() - * Martin to Burak: to do: if you make this method abstract, the compiler will now - * complain if there's no implementation in a subclass. Is this what we want? Note that - * this would break doc/DocGenator and doc/ModelToXML, with an error message like: - * {{{ - * doc\DocGenerator.scala:1219: error: object creation impossible, since there is a deferred declaration of method text in class Node of type => String which is not implemented in a subclass - * new SpecialNode { - * ^ - * }}} */ - override def text: String = super.text -} diff --git a/src/library/scala/xml/NodeBuffer.scala b/src/library/scala/xml/NodeBuffer.scala deleted file mode 100644 index ae7c7b2bf8..0000000000 --- a/src/library/scala/xml/NodeBuffer.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** - * This class acts as a Buffer for nodes. If it is used as a sequence of - * nodes `Seq[Node]`, it must be ensured that no updates occur after that - * point, because `scala.xml.Node` is assumed to be immutable. - * - * Despite this being a sequence, don't use it as key in a hashtable. - * Calling the hashcode function will result in a runtime error. - * - * @author Burak Emir - * @version 1.0 - */ -class NodeBuffer extends scala.collection.mutable.ArrayBuffer[Node] { - - /** - * Append given object to this buffer, returns reference on this - * `NodeBuffer` for convenience. Some rules apply: - * - If argument `o` is `'''null'''`, it is ignored. - * - If it is an `Iterator` or `Iterable`, its elements will be added. - * - If `o` is a node, it is added as it is. - * - If it is anything else, it gets wrapped in an [[scala.xml.Atom]]. - * - * @param o converts to an xml node and adds to this node buffer - * @return this nodebuffer - */ - def &+(o: Any): NodeBuffer = { - o match { - case null | _: Unit | Text("") => // ignore - case it: Iterator[_] => it foreach &+ - case n: Node => super.+=(n) - case ns: Iterable[_] => this &+ ns.iterator - case ns: Array[_] => this &+ ns.iterator - case d => super.+=(new Atom(d)) - } - this - } -} diff --git a/src/library/scala/xml/NodeSeq.scala b/src/library/scala/xml/NodeSeq.scala deleted file mode 100644 index b8022472fb..0000000000 --- a/src/library/scala/xml/NodeSeq.scala +++ /dev/null @@ -1,157 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import scala.collection.{ mutable, immutable, generic, SeqLike, AbstractSeq } -import mutable.{ Builder, ListBuffer } -import generic.{ CanBuildFrom } -import scala.language.implicitConversions - -/** This object ... - * - * @author Burak Emir - * @version 1.0 - */ -object NodeSeq { - final val Empty = fromSeq(Nil) - def fromSeq(s: Seq[Node]): NodeSeq = new NodeSeq { - def theSeq = s - } - type Coll = NodeSeq - implicit def canBuildFrom: CanBuildFrom[Coll, Node, NodeSeq] = - new CanBuildFrom[Coll, Node, NodeSeq] { - def apply(from: Coll) = newBuilder - def apply() = newBuilder - } - def newBuilder: Builder[Node, NodeSeq] = new ListBuffer[Node] mapResult fromSeq - implicit def seqToNodeSeq(s: Seq[Node]): NodeSeq = fromSeq(s) -} - -/** This class implements a wrapper around `Seq[Node]` that adds XPath - * and comprehension methods. - * - * @author Burak Emir - * @version 1.0 - */ -abstract class NodeSeq extends AbstractSeq[Node] with immutable.Seq[Node] with SeqLike[Node, NodeSeq] with Equality { - import NodeSeq.seqToNodeSeq // import view magic for NodeSeq wrappers - - /** Creates a list buffer as builder for this class */ - override protected[this] def newBuilder = NodeSeq.newBuilder - - def theSeq: Seq[Node] - def length = theSeq.length - override def iterator = theSeq.iterator - - def apply(i: Int): Node = theSeq(i) - def apply(f: Node => Boolean): NodeSeq = filter(f) - - def xml_sameElements[A](that: Iterable[A]): Boolean = { - val these = this.iterator - val those = that.iterator - while (these.hasNext && those.hasNext) - if (these.next xml_!= those.next) - return false - - !these.hasNext && !those.hasNext - } - - protected def basisForHashCode: Seq[Any] = theSeq - - override def canEqual(other: Any) = other match { - case _: NodeSeq => true - case _ => false - } - - override def strict_==(other: Equality) = other match { - case x: NodeSeq => (length == x.length) && (theSeq sameElements x.theSeq) - case _ => false - } - - /** Projection function, which returns elements of `this` sequence based - * on the string `that`. Use: - * - `this \ "foo"` to get a list of all elements that are labelled with `"foo"`; - * - `\ "_"` to get a list of all elements (wildcard); - * - `ns \ "@foo"` to get the unprefixed attribute `"foo"`; - * - `ns \ "@{uri}foo"` to get the prefixed attribute `"pre:foo"` whose - * prefix `"pre"` is resolved to the namespace `"uri"`. - * - * For attribute projections, the resulting [[scala.xml.NodeSeq]] attribute - * values are wrapped in a [[scala.xml.Group]]. - * - * There is no support for searching a prefixed attribute by its literal prefix. - * - * The document order is preserved. - */ - def \(that: String): NodeSeq = { - def fail = throw new IllegalArgumentException(that) - def atResult = { - lazy val y = this(0) - val attr = - if (that.length == 1) fail - else if (that(1) == '{') { - val i = that indexOf '}' - if (i == -1) fail - val (uri, key) = (that.substring(2,i), that.substring(i+1, that.length())) - if (uri == "" || key == "") fail - else y.attribute(uri, key) - } - else y.attribute(that drop 1) - - attr match { - case Some(x) => Group(x) - case _ => NodeSeq.Empty - } - } - - def makeSeq(cond: (Node) => Boolean) = - NodeSeq fromSeq (this flatMap (_.child) filter cond) - - that match { - case "" => fail - case "_" => makeSeq(!_.isAtom) - case _ if (that(0) == '@' && this.length == 1) => atResult - case _ => makeSeq(_.label == that) - } - } - - /** Projection function, which returns elements of `this` sequence and of - * all its subsequences, based on the string `that`. Use: - * - `this \\ 'foo` to get a list of all elements that are labelled with `"foo"`; - * - `\\ "_"` to get a list of all elements (wildcard); - * - `ns \\ "@foo"` to get the unprefixed attribute `"foo"`; - * - `ns \\ "@{uri}foo"` to get each prefixed attribute `"pre:foo"` whose - * prefix `"pre"` is resolved to the namespace `"uri"`. - * - * For attribute projections, the resulting [[scala.xml.NodeSeq]] attribute - * values are wrapped in a [[scala.xml.Group]]. - * - * There is no support for searching a prefixed attribute by its literal prefix. - * - * The document order is preserved. - */ - def \\ (that: String): NodeSeq = { - def filt(cond: (Node) => Boolean) = this flatMap (_.descendant_or_self) filter cond - that match { - case "_" => filt(!_.isAtom) - case _ if that(0) == '@' => filt(!_.isAtom) flatMap (_ \ that) - case _ => filt(x => !x.isAtom && x.label == that) - } - } - - /** Convenience method which returns string text of the named attribute. Use: - * - `that \@ "foo"` to get the string text of attribute `"foo"`; - */ - def \@(attributeName: String): String = (this \ ("@" + attributeName)).text - - override def toString(): String = theSeq.mkString - - def text: String = (this map (_.text)).mkString -} diff --git a/src/library/scala/xml/Null.scala b/src/library/scala/xml/Null.scala deleted file mode 100644 index f763c023c4..0000000000 --- a/src/library/scala/xml/Null.scala +++ /dev/null @@ -1,62 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import Utility.isNameStart -import scala.collection.Iterator - -/** Essentially, every method in here is a dummy, returning Zero[T]. - * It provides a backstop for the unusual collection defined by MetaData, - * sort of a linked list of tails. - * - * @author Burak Emir - * @version 1.0 - */ -case object Null extends MetaData { - override def iterator = Iterator.empty - override def size = 0 - override def append(m: MetaData, scope: NamespaceBinding = TopScope): MetaData = m - override def filter(f: MetaData => Boolean): MetaData = this - - def copy(next: MetaData) = next - def getNamespace(owner: Node) = null - - override def hasNext = false - def next = null - def key = null - def value = null - def isPrefixed = false - - override def length = 0 - override def length(i: Int) = i - - override def strict_==(other: Equality) = other match { - case x: MetaData => x.length == 0 - case _ => false - } - override protected def basisForHashCode: Seq[Any] = Nil - - def apply(namespace: String, scope: NamespaceBinding, key: String) = null - def apply(key: String) = - if (isNameStart(key.head)) null - else throw new IllegalArgumentException("not a valid attribute name '"+key+"', so can never match !") - - protected def toString1(sb: StringBuilder) = () - override protected def toString1(): String = "" - - override def toString(): String = "" - - override def buildString(sb: StringBuilder): StringBuilder = sb - - override def wellformed(scope: NamespaceBinding) = true - - def remove(key: String) = this - def remove(namespace: String, scope: NamespaceBinding, key: String) = this -} diff --git a/src/library/scala/xml/PCData.scala b/src/library/scala/xml/PCData.scala deleted file mode 100644 index 31eea2b6d7..0000000000 --- a/src/library/scala/xml/PCData.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** This class (which is not used by all XML parsers, but always used by the - * XHTML one) represents parseable character data, which appeared as CDATA - * sections in the input and is to be preserved as CDATA section in the output. - * - * @author Burak Emir - * @version 1.0 - */ -class PCData(data: String) extends Atom[String](data) { - - /** Returns text, with some characters escaped according to the XML - * specification. - * - * @param sb the input string buffer associated to some XML element - * @return the input string buffer with the formatted CDATA section - */ - override def buildString(sb: StringBuilder): StringBuilder = - sb append "".format(data) -} - -/** This singleton object contains the `apply`and `unapply` methods for - * convenient construction and deconstruction. - * - * @author Burak Emir - * @version 1.0 - */ -object PCData { - def apply(data: String) = new PCData(data) - def unapply(other: Any): Option[String] = other match { - case x: PCData => Some(x.data) - case _ => None - } -} - diff --git a/src/library/scala/xml/PrefixedAttribute.scala b/src/library/scala/xml/PrefixedAttribute.scala deleted file mode 100644 index 4ab79c8677..0000000000 --- a/src/library/scala/xml/PrefixedAttribute.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml - -/** prefixed attributes always have a non-null namespace. - * - * @param pre - * @param key - * @param value the attribute value - * @param next1 - */ -class PrefixedAttribute( - val pre: String, - val key: String, - val value: Seq[Node], - val next1: MetaData) -extends Attribute -{ - val next = if (value ne null) next1 else next1.remove(key) - - /** same as this(pre, key, Text(value), next), or no attribute if value is null */ - def this(pre: String, key: String, value: String, next: MetaData) = - this(pre, key, if (value ne null) Text(value) else null: NodeSeq, next) - - /** same as this(pre, key, value.get, next), or no attribute if value is None */ - def this(pre: String, key: String, value: Option[Seq[Node]], next: MetaData) = - this(pre, key, value.orNull, next) - - /** Returns a copy of this unprefixed attribute with the given - * next field. - */ - def copy(next: MetaData) = - new PrefixedAttribute(pre, key, value, next) - - def getNamespace(owner: Node) = - owner.getNamespace(pre) - - /** forwards the call to next (because caller looks for unprefixed attribute */ - def apply(key: String): Seq[Node] = next(key) - - /** gets attribute value of qualified (prefixed) attribute with given key - */ - def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] = { - if (key == this.key && scope.getURI(pre) == namespace) - value - else - next(namespace, scope, key) - } -} - -object PrefixedAttribute { - def unapply(x: PrefixedAttribute) = Some((x.pre, x.key, x.value, x.next)) -} diff --git a/src/library/scala/xml/PrettyPrinter.scala b/src/library/scala/xml/PrettyPrinter.scala deleted file mode 100755 index 9e01905357..0000000000 --- a/src/library/scala/xml/PrettyPrinter.scala +++ /dev/null @@ -1,263 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import Utility.sbToString - -/** Class for pretty printing. After instantiating, you can use the - * format() and formatNode() methods to convert XML to a formatted - * string. The class can be reused to pretty print any number of - * XML nodes. - * - * @author Burak Emir - * @version 1.0 - * - * @param width the width to fit the output into - * @param step indentation - */ -class PrettyPrinter(width: Int, step: Int) { - - class BrokenException() extends java.lang.Exception - - class Item - case object Break extends Item { - override def toString() = "\\" - } - case class Box(col: Int, s: String) extends Item - case class Para(s: String) extends Item - - protected var items: List[Item] = Nil - - protected var cur = 0 - - protected def reset() = { - cur = 0 - items = Nil - } - - /** Try to cut at whitespace. - */ - protected def cut(s: String, ind: Int): List[Item] = { - val tmp = width - cur - if (s.length <= tmp) - return List(Box(ind, s)) - var i = s indexOf ' ' - if (i > tmp || i == -1) throw new BrokenException() // cannot break - - var last: List[Int] = Nil - while (i != -1 && i < tmp) { - last = i::last - i = s.indexOf(' ', i+1) - } - var res: List[Item] = Nil - while (Nil != last) try { - val b = Box(ind, s.substring(0, last.head)) - cur = ind - res = b :: Break :: cut(s.substring(last.head, s.length), ind) - // backtrack - last = last.tail - } catch { - case _:BrokenException => last = last.tail - } - throw new BrokenException() - } - - /** Try to make indented box, if possible, else para. - */ - protected def makeBox(ind: Int, s: String) = - if (cur + s.length > width) { // fits in this line - items ::= Box(ind, s) - cur += s.length - } - else try cut(s, ind) foreach (items ::= _) // break it up - catch { case _: BrokenException => makePara(ind, s) } // give up, para - - // dont respect indent in para, but afterwards - protected def makePara(ind: Int, s: String) = { - items = Break::Para(s)::Break::items - cur = ind - } - - // respect indent - protected def makeBreak() = { // using wrapping here... - items = Break :: items - cur = 0 - } - - protected def leafTag(n: Node) = { - def mkLeaf(sb: StringBuilder) { - sb append '<' - n nameToString sb - n.attributes buildString sb - sb append "/>" - } - sbToString(mkLeaf) - } - - protected def startTag(n: Node, pscope: NamespaceBinding): (String, Int) = { - var i = 0 - def mkStart(sb: StringBuilder) { - sb append '<' - n nameToString sb - i = sb.length + 1 - n.attributes buildString sb - n.scope.buildString(sb, pscope) - sb append '>' - } - (sbToString(mkStart), i) - } - - protected def endTag(n: Node) = { - def mkEnd(sb: StringBuilder) { - sb append "' - } - sbToString(mkEnd) - } - - protected def childrenAreLeaves(n: Node): Boolean = { - def isLeaf(l: Node) = l match { - case _:Atom[_] | _:Comment | _:EntityRef | _:ProcInstr => true - case _ => false - } - n.child forall isLeaf - } - - protected def fits(test: String) = - test.length < width - cur - - private def doPreserve(node: Node) = - node.attribute(XML.namespace, XML.space).map(_.toString == XML.preserve) getOrElse false - - protected def traverse(node: Node, pscope: NamespaceBinding, ind: Int): Unit = node match { - - case Text(s) if s.trim() == "" => - ; - case _:Atom[_] | _:Comment | _:EntityRef | _:ProcInstr => - makeBox( ind, node.toString().trim() ) - case g @ Group(xs) => - traverse(xs.iterator, pscope, ind) - case _ => - val test = { - val sb = new StringBuilder() - Utility.serialize(node, pscope, sb, stripComments = false) - if (doPreserve(node)) sb.toString - else TextBuffer.fromString(sb.toString).toText(0).data - } - if (childrenAreLeaves(node) && fits(test)) { - makeBox(ind, test) - } else { - val (stg, len2) = startTag(node, pscope) - val etg = endTag(node) - if (stg.length < width - cur) { // start tag fits - makeBox(ind, stg) - makeBreak() - traverse(node.child.iterator, node.scope, ind + step) - makeBox(ind, etg) - } else if (len2 < width - cur) { - // - if (!lastwasbreak) sb.append('\n') // on windows: \r\n ? - lastwasbreak = true - cur = 0 -// while (cur < last) { -// sb append ' ' -// cur += 1 -// } - - case Box(i, s) => - lastwasbreak = false - while (cur < i) { - sb append ' ' - cur += 1 - } - sb.append(s) - case Para( s ) => - lastwasbreak = false - sb append s - } - } - - // public convenience methods - - /** Returns a formatted string containing well-formed XML with - * given namespace to prefix mapping. - * - * @param n the node to be serialized - * @param pscope the namespace to prefix mapping - * @return the formatted string - */ - def format(n: Node, pscope: NamespaceBinding = null): String = - sbToString(format(n, pscope, _)) - - /** Returns a formatted string containing well-formed XML. - * - * @param nodes the sequence of nodes to be serialized - * @param pscope the namespace to prefix mapping - */ - def formatNodes(nodes: Seq[Node], pscope: NamespaceBinding = null): String = - sbToString(formatNodes(nodes, pscope, _)) - - /** Appends a formatted string containing well-formed XML with - * the given namespace to prefix mapping to the given stringbuffer. - * - * @param nodes the nodes to be serialized - * @param pscope the namespace to prefix mapping - * @param sb the string buffer to which to append to - */ - def formatNodes(nodes: Seq[Node], pscope: NamespaceBinding, sb: StringBuilder): Unit = - nodes foreach (n => sb append format(n, pscope)) -} diff --git a/src/library/scala/xml/ProcInstr.scala b/src/library/scala/xml/ProcInstr.scala deleted file mode 100644 index 189c1c6878..0000000000 --- a/src/library/scala/xml/ProcInstr.scala +++ /dev/null @@ -1,39 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml - -/** an XML node for processing instructions (PI) - * - * @author Burak Emir - * @param target target name of this PI - * @param proctext text contained in this node, may not contain "?>" - */ -case class ProcInstr(target: String, proctext: String) extends SpecialNode -{ - if (!Utility.isName(target)) - throw new IllegalArgumentException(target+" must be an XML Name") - if (proctext contains "?>") - throw new IllegalArgumentException(proctext+" may not contain \"?>\"") - if (target.toLowerCase == "xml") - throw new IllegalArgumentException(target+" is reserved") - - final override def doCollectNamespaces = false - final override def doTransform = false - - final def label = "#PI" - override def text = "" - - /** appends "<?" target (" "+text)?+"?>" - * to this stringbuffer. - */ - override def buildString(sb: StringBuilder) = - sb append "".format(target, (if (proctext == "") "" else " " + proctext)) -} diff --git a/src/library/scala/xml/QNode.scala b/src/library/scala/xml/QNode.scala deleted file mode 100644 index f9e3f1854b..0000000000 --- a/src/library/scala/xml/QNode.scala +++ /dev/null @@ -1,20 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** This object provides an extractor method to match a qualified node with - * its namespace URI - * - * @author Burak Emir - * @version 1.0 - */ -object QNode { - def unapplySeq(n: Node) = Some((n.scope.getURI(n.prefix), n.label, n.attributes, n.child)) -} diff --git a/src/library/scala/xml/SpecialNode.scala b/src/library/scala/xml/SpecialNode.scala deleted file mode 100644 index 5fef8ef66c..0000000000 --- a/src/library/scala/xml/SpecialNode.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** `SpecialNode` is a special XML node which represents either text - * `(PCDATA)`, a comment, a `PI`, or an entity ref. - * - * `SpecialNode`s also play the role of [[scala.xml.pull.XMLEvent]]s for - * pull-parsing. - * - * @author Burak Emir - */ -abstract class SpecialNode extends Node with pull.XMLEvent { - - /** always empty */ - final override def attributes = Null - - /** always Node.EmptyNamespace */ - final override def namespace = null - - /** always empty */ - final def child = Nil - - /** Append string representation to the given string buffer argument. */ - def buildString(sb: StringBuilder): StringBuilder -} diff --git a/src/library/scala/xml/Text.scala b/src/library/scala/xml/Text.scala deleted file mode 100644 index debea0c025..0000000000 --- a/src/library/scala/xml/Text.scala +++ /dev/null @@ -1,39 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** The class `Text` implements an XML node for text (PCDATA). - * It is used in both non-bound and bound XML representations. - * - * @author Burak Emir - * @param data the text contained in this node, may not be null. - */ -class Text(data: String) extends Atom[String](data) { - - /** Returns text, with some characters escaped according to the XML - * specification. - */ - override def buildString(sb: StringBuilder): StringBuilder = - Utility.escape(data, sb) -} - -/** This singleton object contains the `apply`and `unapply` methods for - * convenient construction and deconstruction. - * - * @author Burak Emir - * @version 1.0 - */ -object Text { - def apply(data: String) = new Text(data) - def unapply(other: Any): Option[String] = other match { - case x: Text => Some(x.data) - case _ => None - } -} diff --git a/src/library/scala/xml/TextBuffer.scala b/src/library/scala/xml/TextBuffer.scala deleted file mode 100644 index 514b1701af..0000000000 --- a/src/library/scala/xml/TextBuffer.scala +++ /dev/null @@ -1,46 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml - -import Utility.isSpace - -object TextBuffer { - def fromString(str: String): TextBuffer = new TextBuffer() append str -} - -/** The class `TextBuffer` is for creating text nodes without surplus - * whitespace. All occurrences of one or more whitespace in strings - * appended with the `append` method will be replaced by a single space - * character, and leading and trailing space will be removed completely. - */ -class TextBuffer -{ - val sb = new StringBuilder() - - /** Appends this string to the text buffer, trimming whitespaces as needed. - */ - def append(cs: Seq[Char]): this.type = { - cs foreach { c => - if (!isSpace(c)) sb append c - else if (sb.isEmpty || !isSpace(sb.last)) sb append ' ' - } - this - } - - /** Returns an empty sequence if text is only whitespace. - * - * @return the text without whitespaces. - */ - def toText: Seq[Text] = sb.toString.trim match { - case "" => Nil - case s => Seq(Text(s)) - } -} diff --git a/src/library/scala/xml/TopScope.scala b/src/library/scala/xml/TopScope.scala deleted file mode 100644 index 474fbbbdb5..0000000000 --- a/src/library/scala/xml/TopScope.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml - -/** top level namespace scope. only contains the predefined binding - * for the "xml" prefix which is bound to - * "http://www.w3.org/XML/1998/namespace" - */ -object TopScope extends NamespaceBinding(null, null, null) { - - import XML.{ xml, namespace } - - override def getURI(prefix1: String): String = - if (prefix1 == xml) namespace else null - - override def getPrefix(uri1: String): String = - if (uri1 == namespace) xml else null - - override def toString() = "" - - override def buildString(stop: NamespaceBinding) = "" - override def buildString(sb: StringBuilder, ignore: NamespaceBinding) = {} -} diff --git a/src/library/scala/xml/TypeSymbol.scala b/src/library/scala/xml/TypeSymbol.scala deleted file mode 100644 index fb371ee340..0000000000 --- a/src/library/scala/xml/TypeSymbol.scala +++ /dev/null @@ -1,15 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml - - -abstract class TypeSymbol diff --git a/src/library/scala/xml/Unparsed.scala b/src/library/scala/xml/Unparsed.scala deleted file mode 100644 index bc190eb724..0000000000 --- a/src/library/scala/xml/Unparsed.scala +++ /dev/null @@ -1,36 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** An XML node for unparsed content. It will be output verbatim, all bets - * are off regarding wellformedness etc. - * - * @author Burak Emir - * @param data content in this node, may not be null. - */ -class Unparsed(data: String) extends Atom[String](data) { - - /** Returns text, with some characters escaped according to XML - * specification. - */ - override def buildString(sb: StringBuilder): StringBuilder = - sb append data -} - -/** This singleton object contains the `apply`and `unapply` methods for - * convenient construction and deconstruction. - * - * @author Burak Emir - * @version 1.0 - */ -object Unparsed { - def apply(data: String) = new Unparsed(data) - def unapply(x: Unparsed) = Some(x.data) -} diff --git a/src/library/scala/xml/UnprefixedAttribute.scala b/src/library/scala/xml/UnprefixedAttribute.scala deleted file mode 100644 index 6fa827da5f..0000000000 --- a/src/library/scala/xml/UnprefixedAttribute.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml - -/** Unprefixed attributes have the null namespace, and no prefix field - * - * @author Burak Emir - */ -class UnprefixedAttribute( - val key: String, - val value: Seq[Node], - next1: MetaData) -extends Attribute -{ - final val pre = null - val next = if (value ne null) next1 else next1.remove(key) - - /** same as this(key, Text(value), next), or no attribute if value is null */ - def this(key: String, value: String, next: MetaData) = - this(key, if (value ne null) Text(value) else null: NodeSeq, next) - - /** same as this(key, value.get, next), or no attribute if value is None */ - def this(key: String, value: Option[Seq[Node]], next: MetaData) = - this(key, value.orNull, next) - - /** returns a copy of this unprefixed attribute with the given next field*/ - def copy(next: MetaData) = new UnprefixedAttribute(key, value, next) - - final def getNamespace(owner: Node): String = null - - /** - * Gets value of unqualified (unprefixed) attribute with given key, null if not found - * - * @param key - * @return value as Seq[Node] if key is found, null otherwise - */ - def apply(key: String): Seq[Node] = - if (key == this.key) value else next(key) - - /** - * Forwards the call to next (because caller looks for prefixed attribute). - * - * @param namespace - * @param scope - * @param key - * @return .. - */ - def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] = - next(namespace, scope, key) -} -object UnprefixedAttribute { - def unapply(x: UnprefixedAttribute) = Some((x.key, x.value, x.next)) -} diff --git a/src/library/scala/xml/Utility.scala b/src/library/scala/xml/Utility.scala deleted file mode 100755 index 9134476401..0000000000 --- a/src/library/scala/xml/Utility.scala +++ /dev/null @@ -1,410 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import scala.collection.mutable -import parsing.XhtmlEntities -import scala.language.implicitConversions - -/** - * The `Utility` object provides utility functions for processing instances - * of bound and not bound XML classes, as well as escaping text nodes. - * - * @author Burak Emir - */ -object Utility extends AnyRef with parsing.TokenTests { - final val SU = '\u001A' - - // [Martin] This looks dubious. We don't convert StringBuilders to - // Strings anywhere else, why do it here? - implicit def implicitSbToString(sb: StringBuilder) = sb.toString() - - // helper for the extremely oft-repeated sequence of creating a - // StringBuilder, passing it around, and then grabbing its String. - private [xml] def sbToString(f: (StringBuilder) => Unit): String = { - val sb = new StringBuilder - f(sb) - sb.toString - } - private[xml] def isAtomAndNotText(x: Node) = x.isAtom && !x.isInstanceOf[Text] - - /** Trims an element - call this method, when you know that it is an - * element (and not a text node) so you know that it will not be trimmed - * away. With this assumption, the function can return a `Node`, rather - * than a `Seq[Node]`. If you don't know, call `trimProper` and account - * for the fact that you may get back an empty sequence of nodes. - * - * Precondition: node is not a text node (it might be trimmed) - */ - def trim(x: Node): Node = x match { - case Elem(pre, lab, md, scp, child@_*) => - Elem(pre, lab, md, scp, (child flatMap trimProper):_*) - } - - /** trim a child of an element. `Attribute` values and `Atom` nodes that - * are not `Text` nodes are unaffected. - */ - def trimProper(x:Node): Seq[Node] = x match { - case Elem(pre,lab,md,scp,child@_*) => - Elem(pre,lab,md,scp, (child flatMap trimProper):_*) - case Text(s) => - new TextBuffer().append(s).toText - case _ => - x - } - - /** returns a sorted attribute list */ - def sort(md: MetaData): MetaData = if((md eq Null) || (md.next eq Null)) md else { - val key = md.key - val smaller = sort(md.filter { m => m.key < key }) - val greater = sort(md.filter { m => m.key > key }) - smaller.foldRight (md copy greater) ((x, xs) => x copy xs) - } - - /** Return the node with its attribute list sorted alphabetically - * (prefixes are ignored) */ - def sort(n:Node): Node = n match { - case Elem(pre,lab,md,scp,child@_*) => - Elem(pre,lab,sort(md),scp, (child map sort):_*) - case _ => n - } - - /** - * Escapes the characters < > & and " from string. - */ - final def escape(text: String): String = sbToString(escape(text, _)) - - object Escapes { - /** For reasons unclear escape and unescape are a long ways from - * being logical inverses. */ - val pairs = Map( - "lt" -> '<', - "gt" -> '>', - "amp" -> '&', - "quot" -> '"' - // enigmatic comment explaining why this isn't escaped -- - // is valid xhtml but not html, and IE doesn't know it, says jweb - // "apos" -> '\'' - ) - val escMap = pairs map { case (s, c) => c-> ("&%s;" format s) } - val unescMap = pairs ++ Map("apos" -> '\'') - } - import Escapes.{ escMap, unescMap } - - /** - * Appends escaped string to `s`. - */ - final def escape(text: String, s: StringBuilder): StringBuilder = { - // Implemented per XML spec: - // http://www.w3.org/International/questions/qa-controls - // imperative code 3x-4x faster than current implementation - // dpp (David Pollak) 2010/02/03 - val len = text.length - var pos = 0 - while (pos < len) { - text.charAt(pos) match { - case '<' => s.append("<") - case '>' => s.append(">") - case '&' => s.append("&") - case '"' => s.append(""") - case '\n' => s.append('\n') - case '\r' => s.append('\r') - case '\t' => s.append('\t') - case c => if (c >= ' ') s.append(c) - } - - pos += 1 - } - s - } - - /** - * Appends unescaped string to `s`, `amp` becomes `&`, - * `lt` becomes `<` etc.. - * - * @return `'''null'''` if `ref` was not a predefined entity. - */ - final def unescape(ref: String, s: StringBuilder): StringBuilder = - ((unescMap get ref) map (s append _)).orNull - - /** - * Returns a set of all namespaces used in a sequence of nodes - * and all their descendants, including the empty namespaces. - */ - def collectNamespaces(nodes: Seq[Node]): mutable.Set[String] = - nodes.foldLeft(new mutable.HashSet[String]) { (set, x) => collectNamespaces(x, set) ; set } - - /** - * Adds all namespaces in node to set. - */ - def collectNamespaces(n: Node, set: mutable.Set[String]) { - if (n.doCollectNamespaces) { - set += n.namespace - for (a <- n.attributes) a match { - case _:PrefixedAttribute => - set += a.getNamespace(n) - case _ => - } - for (i <- n.child) - collectNamespaces(i, set) - } - } - - // def toXML( - // x: Node, - // pscope: NamespaceBinding = TopScope, - // sb: StringBuilder = new StringBuilder, - // stripComments: Boolean = false, - // decodeEntities: Boolean = true, - // preserveWhitespace: Boolean = false, - // minimizeTags: Boolean = false): String = - // { - // toXMLsb(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - // sb.toString() - // } - - /** - * Serialize the provided Node to the provided StringBuilder. - *

- * Note that calling this source-compatible method will result in the same old, arguably almost universally unwanted, - * behaviour. - */ - @deprecated("Please use `serialize` instead and specify a `minimizeTags` parameter", "2.10.0") - def toXML( - x: Node, - pscope: NamespaceBinding = TopScope, - sb: StringBuilder = new StringBuilder, - stripComments: Boolean = false, - decodeEntities: Boolean = true, - preserveWhitespace: Boolean = false, - minimizeTags: Boolean = false): StringBuilder = - { - serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, if (minimizeTags) MinimizeMode.Always else MinimizeMode.Never) - } - - /** - * Serialize an XML Node to a StringBuilder. - * - * This is essentially a minor rework of `toXML` that can't have the same name due to an unfortunate - * combination of named/default arguments and overloading. - * - * @todo use a Writer instead - */ - def serialize( - x: Node, - pscope: NamespaceBinding = TopScope, - sb: StringBuilder = new StringBuilder, - stripComments: Boolean = false, - decodeEntities: Boolean = true, - preserveWhitespace: Boolean = false, - minimizeTags: MinimizeMode.Value = MinimizeMode.Default): StringBuilder = - { - x match { - case c: Comment if !stripComments => c buildString sb - case s: SpecialNode => s buildString sb - case g: Group => for (c <- g.nodes) serialize(c, g.scope, sb, minimizeTags = minimizeTags) ; sb - case el: Elem => - // print tag with namespace declarations - sb.append('<') - el.nameToString(sb) - if (el.attributes ne null) el.attributes.buildString(sb) - el.scope.buildString(sb, pscope) - if (el.child.isEmpty && - (minimizeTags == MinimizeMode.Always || - (minimizeTags == MinimizeMode.Default && el.minimizeEmpty))) - { - // no children, so use short form: - sb.append("/>") - } else { - // children, so use long form: ... - sb.append('>') - sequenceToXML(el.child, el.scope, sb, stripComments) - sb.append("') - } - case _ => throw new IllegalArgumentException("Don't know how to serialize a " + x.getClass.getName) - } - } - - def sequenceToXML( - children: Seq[Node], - pscope: NamespaceBinding = TopScope, - sb: StringBuilder = new StringBuilder, - stripComments: Boolean = false, - decodeEntities: Boolean = true, - preserveWhitespace: Boolean = false, - minimizeTags: MinimizeMode.Value = MinimizeMode.Default): Unit = - { - if (children.isEmpty) return - else if (children forall isAtomAndNotText) { // add space - val it = children.iterator - val f = it.next() - serialize(f, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - while (it.hasNext) { - val x = it.next() - sb.append(' ') - serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - } - } - else children foreach { serialize(_, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) } - } - - /** - * Returns prefix of qualified name if any. - */ - final def prefix(name: String): Option[String] = (name indexOf ':') match { - case -1 => None - case i => Some(name.substring(0, i)) - } - - /** - * Returns a hashcode for the given constituents of a node - */ - def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) = - scala.util.hashing.MurmurHash3.orderedHash(label +: attribHashCode +: scpeHash +: children, pre.##) - - def appendQuoted(s: String): String = sbToString(appendQuoted(s, _)) - - /** - * Appends "s" if string `s` does not contain ", - * 's' otherwise. - */ - def appendQuoted(s: String, sb: StringBuilder) = { - val ch = if (s contains '"') '\'' else '"' - sb.append(ch).append(s).append(ch) - } - - /** - * Appends "s" and escapes and " i s with \" - */ - def appendEscapedQuoted(s: String, sb: StringBuilder): StringBuilder = { - sb.append('"') - for (c <- s) c match { - case '"' => sb.append('\\'); sb.append('"') - case _ => sb.append(c) - } - sb.append('"') - } - - def getName(s: String, index: Int): String = { - if (index >= s.length) null - else { - val xs = s drop index - if (xs.nonEmpty && isNameStart(xs.head)) xs takeWhile isNameChar - else "" - } - } - - /** - * Returns `'''null'''` if the value is a correct attribute value, - * error message if it isn't. - */ - def checkAttributeValue(value: String): String = { - var i = 0 - while (i < value.length) { - value.charAt(i) match { - case '<' => - return "< not allowed in attribute value" - case '&' => - val n = getName(value, i+1) - if (n eq null) - return "malformed entity reference in attribute value ["+value+"]" - i = i + n.length + 1 - if (i >= value.length || value.charAt(i) != ';') - return "malformed entity reference in attribute value ["+value+"]" - case _ => - } - i = i + 1 - } - null - } - - def parseAttributeValue(value: String): Seq[Node] = { - val sb = new StringBuilder - var rfb: StringBuilder = null - val nb = new NodeBuffer() - - val it = value.iterator - while (it.hasNext) { - var c = it.next() - // entity! flush buffer into text node - if (c == '&') { - c = it.next() - if (c == '#') { - c = it.next() - val theChar = parseCharRef ({ ()=> c },{ () => c = it.next() },{s => throw new RuntimeException(s)}, {s => throw new RuntimeException(s)}) - sb.append(theChar) - } - else { - if (rfb eq null) rfb = new StringBuilder() - rfb append c - c = it.next() - while (c != ';') { - rfb.append(c) - c = it.next() - } - val ref = rfb.toString() - rfb.clear() - unescape(ref,sb) match { - case null => - if (sb.length > 0) { // flush buffer - nb += Text(sb.toString()) - sb.clear() - } - nb += EntityRef(ref) // add entityref - case _ => - } - } - } - else sb append c - } - if (sb.length > 0) { // flush buffer - val x = Text(sb.toString()) - if (nb.length == 0) - return x - else - nb += x - } - nb - } - - /** - * {{{ - * CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" - * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" - * }}} - * See [66] - */ - def parseCharRef(ch: () => Char, nextch: () => Unit, reportSyntaxError: String => Unit, reportTruncatedError: String => Unit): String = { - val hex = (ch() == 'x') && { nextch(); true } - val base = if (hex) 16 else 10 - var i = 0 - while (ch() != ';') { - ch() match { - case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => - i = i * base + ch().asDigit - case 'a' | 'b' | 'c' | 'd' | 'e' | 'f' - | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' => - if (! hex) - reportSyntaxError("hex char not allowed in decimal char ref\n" + - "Did you mean to write &#x ?") - else - i = i * base + ch().asDigit - case SU => - reportTruncatedError("") - case _ => - reportSyntaxError("character '" + ch() + "' not allowed in char ref\n") - } - nextch() - } - new String(Array(i), 0, 1) - } -} diff --git a/src/library/scala/xml/XML.scala b/src/library/scala/xml/XML.scala deleted file mode 100755 index 020264e509..0000000000 --- a/src/library/scala/xml/XML.scala +++ /dev/null @@ -1,109 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import parsing.NoBindingFactoryAdapter -import factory.XMLLoader -import java.io.{ File, FileDescriptor, FileInputStream, FileOutputStream } -import java.io.{ InputStream, Reader, StringReader, Writer } -import java.nio.channels.Channels -import scala.util.control.Exception.ultimately - -object Source { - def fromFile(file: File) = new InputSource(new FileInputStream(file)) - def fromFile(fd: FileDescriptor) = new InputSource(new FileInputStream(fd)) - def fromFile(name: String) = new InputSource(new FileInputStream(name)) - - def fromInputStream(is: InputStream) = new InputSource(is) - def fromReader(reader: Reader) = new InputSource(reader) - def fromSysId(sysID: String) = new InputSource(sysID) - def fromString(string: String) = fromReader(new StringReader(string)) -} - -/** - * Governs how empty elements (i.e. those without child elements) should be serialized. - */ -object MinimizeMode extends Enumeration { - /** Minimize empty tags if they were originally empty when parsed, or if they were constructed - * with [[scala.xml.Elem]]`#minimizeEmpty` == true - */ - val Default = Value - - /** Always minimize empty tags. Note that this may be problematic for XHTML, in which - * case [[scala.xml.Xhtml]]`#toXhtml` should be used instead. - */ - val Always = Value - - /** Never minimize empty tags. - */ - val Never = Value -} - -/** The object `XML` provides constants, and functions to load - * and save XML elements. Use this when data binding is not desired, i.e. - * when XML is handled using `Symbol` nodes. - * - * @author Burak Emir - * @version 1.0, 25/04/2005 - */ -object XML extends XMLLoader[Elem] { - val xml = "xml" - val xmlns = "xmlns" - val namespace = "http://www.w3.org/XML/1998/namespace" - val preserve = "preserve" - val space = "space" - val lang = "lang" - val encoding = "ISO-8859-1" - - /** Returns an XMLLoader whose load* methods will use the supplied SAXParser. */ - def withSAXParser(p: SAXParser): XMLLoader[Elem] = - new XMLLoader[Elem] { override val parser: SAXParser = p } - - /** Saves a node to a file with given filename using given encoding - * optionally with xmldecl and doctype declaration. - * - * @param filename the filename - * @param node the xml node we want to write - * @param enc encoding to use - * @param xmlDecl if true, write xml declaration - * @param doctype if not null, write doctype declaration - */ - final def save( - filename: String, - node: Node, - enc: String = encoding, - xmlDecl: Boolean = false, - doctype: dtd.DocType = null - ): Unit = - { - val fos = new FileOutputStream(filename) - val w = Channels.newWriter(fos.getChannel(), enc) - - ultimately(w.close())( - write(w, node, enc, xmlDecl, doctype) - ) - } - - /** Writes the given node using writer, optionally with xml decl and doctype. - * It's the caller's responsibility to close the writer. - * - * @param w the writer - * @param node the xml node we want to write - * @param enc the string to be used in `xmlDecl` - * @param xmlDecl if true, write xml declaration - * @param doctype if not null, write doctype declaration - */ - final def write(w: java.io.Writer, node: Node, enc: String, xmlDecl: Boolean, doctype: dtd.DocType, minimizeTags: MinimizeMode.Value = MinimizeMode.Default) { - /* TODO: optimize by giving writer parameter to toXML*/ - if (xmlDecl) w.write("\n") - if (doctype ne null) w.write( doctype.toString() + "\n") - w.write(Utility.serialize(node, minimizeTags = minimizeTags).toString) - } -} diff --git a/src/library/scala/xml/Xhtml.scala b/src/library/scala/xml/Xhtml.scala deleted file mode 100644 index 6a12c1a89a..0000000000 --- a/src/library/scala/xml/Xhtml.scala +++ /dev/null @@ -1,97 +0,0 @@ - -package scala -package xml - -import parsing.XhtmlEntities -import Utility.{ sbToString, isAtomAndNotText } - -/* (c) David Pollak 2007 WorldWide Conferencing, LLC */ - -object Xhtml -{ - /** - * Convenience function: same as toXhtml(node, false, false) - * - * @param node the node - */ - def toXhtml(node: Node): String = sbToString(sb => toXhtml(x = node, sb = sb)) - - /** - * Convenience function: amounts to calling toXhtml(node) on each - * node in the sequence. - * - * @param nodeSeq the node sequence - */ - def toXhtml(nodeSeq: NodeSeq): String = sbToString(sb => sequenceToXML(nodeSeq: Seq[Node], sb = sb)) - - /** Elements which we believe are safe to minimize if minimizeTags is true. - * See http://www.w3.org/TR/xhtml1/guidelines.html#C_3 - */ - private val minimizableElements = - List("base", "meta", "link", "hr", "br", "param", "img", "area", "input", "col") - - def toXhtml( - x: Node, - pscope: NamespaceBinding = TopScope, - sb: StringBuilder = new StringBuilder, - stripComments: Boolean = false, - decodeEntities: Boolean = false, - preserveWhitespace: Boolean = false, - minimizeTags: Boolean = true): Unit = - { - def decode(er: EntityRef) = XhtmlEntities.entMap.get(er.entityName) match { - case Some(chr) if chr.toInt >= 128 => sb.append(chr) - case _ => er.buildString(sb) - } - def shortForm = - minimizeTags && - (x.child == null || x.child.length == 0) && - (minimizableElements contains x.label) - - x match { - case c: Comment => if (!stripComments) c buildString sb - case er: EntityRef if decodeEntities => decode(er) - case x: SpecialNode => x buildString sb - case g: Group => - g.nodes foreach { toXhtml(_, x.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) } - - case _ => - sb.append('<') - x.nameToString(sb) - if (x.attributes ne null) x.attributes.buildString(sb) - x.scope.buildString(sb, pscope) - - if (shortForm) sb.append(" />") - else { - sb.append('>') - sequenceToXML(x.child, x.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - sb.append("') - } - } - } - - /** - * Amounts to calling toXhtml(node, ...) with the given parameters on each node. - */ - def sequenceToXML( - children: Seq[Node], - pscope: NamespaceBinding = TopScope, - sb: StringBuilder = new StringBuilder, - stripComments: Boolean = false, - decodeEntities: Boolean = false, - preserveWhitespace: Boolean = false, - minimizeTags: Boolean = true): Unit = - { - if (children.isEmpty) - return - - val doSpaces = children forall isAtomAndNotText // interleave spaces - for (c <- children.take(children.length - 1)) { - toXhtml(c, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - if (doSpaces) sb append ' ' - } - toXhtml(children.last, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - } -} diff --git a/src/library/scala/xml/dtd/ContentModel.scala b/src/library/scala/xml/dtd/ContentModel.scala deleted file mode 100644 index 4007985dce..0000000000 --- a/src/library/scala/xml/dtd/ContentModel.scala +++ /dev/null @@ -1,118 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package dtd - -import scala.xml.dtd.impl._ -import scala.xml.Utility.sbToString -import PartialFunction._ - -object ContentModel extends WordExp { - type _labelT = ElemName - type _regexpT = RegExp - - object Translator extends WordBerrySethi { - override val lang: ContentModel.this.type = ContentModel.this - } - - case class ElemName(name: String) extends Label { - override def toString() = """ElemName("%s")""" format name - } - - def isMixed(cm: ContentModel) = cond(cm) { case _: MIXED => true } - def containsText(cm: ContentModel) = (cm == PCDATA) || isMixed(cm) - def parse(s: String): ContentModel = ContentModelParser.parse(s) - - def getLabels(r: RegExp): Set[String] = { - def traverse(r: RegExp): Set[String] = r match { // !!! check for match translation problem - case Letter(ElemName(name)) => Set(name) - case Star( x @ _ ) => traverse( x ) // bug if x@_* - case Sequ( xs @ _* ) => Set(xs flatMap traverse: _*) - case Alt( xs @ _* ) => Set(xs flatMap traverse: _*) - } - - traverse(r) - } - - def buildString(r: RegExp): String = sbToString(buildString(r, _)) - - /* precond: rs.length >= 1 */ - private def buildString(rs: Seq[RegExp], sb: StringBuilder, sep: Char) { - buildString(rs.head, sb) - for (z <- rs.tail) { - sb append sep - buildString(z, sb) - } - } - - def buildString(c: ContentModel, sb: StringBuilder): StringBuilder = c match { - case ANY => sb append "ANY" - case EMPTY => sb append "EMPTY" - case PCDATA => sb append "(#PCDATA)" - case ELEMENTS(_) | MIXED(_) => c buildString sb - } - - def buildString(r: RegExp, sb: StringBuilder): StringBuilder = - r match { // !!! check for match translation problem - case Eps => - sb - case Sequ(rs @ _*) => - sb.append( '(' ); buildString(rs, sb, ','); sb.append( ')' ) - case Alt(rs @ _*) => - sb.append( '(' ); buildString(rs, sb, '|'); sb.append( ')' ) - case Star(r: RegExp) => - sb.append( '(' ); buildString(r, sb); sb.append( ")*" ) - case Letter(ElemName(name)) => - sb.append(name) - } - -} - -sealed abstract class ContentModel -{ - override def toString(): String = sbToString(buildString) - def buildString(sb: StringBuilder): StringBuilder -} - -case object PCDATA extends ContentModel { - override def buildString(sb: StringBuilder): StringBuilder = sb.append("(#PCDATA)") -} -case object EMPTY extends ContentModel { - override def buildString(sb: StringBuilder): StringBuilder = sb.append("EMPTY") -} -case object ANY extends ContentModel { - override def buildString(sb: StringBuilder): StringBuilder = sb.append("ANY") -} -sealed abstract class DFAContentModel extends ContentModel { - import ContentModel.{ ElemName, Translator } - def r: ContentModel.RegExp - - lazy val dfa: DetWordAutom[ElemName] = { - val nfa = Translator.automatonFrom(r, 1) - new SubsetConstruction(nfa).determinize - } -} - -case class MIXED(r: ContentModel.RegExp) extends DFAContentModel { - import ContentModel.{ Alt, RegExp } - - override def buildString(sb: StringBuilder): StringBuilder = { - val newAlt = r match { case Alt(rs @ _*) => Alt(rs drop 1: _*) } - - sb append "(#PCDATA|" - ContentModel.buildString(newAlt: RegExp, sb) - sb append ")*" - } -} - -case class ELEMENTS(r: ContentModel.RegExp) extends DFAContentModel { - override def buildString(sb: StringBuilder): StringBuilder = - ContentModel.buildString(r, sb) -} diff --git a/src/library/scala/xml/dtd/ContentModelParser.scala b/src/library/scala/xml/dtd/ContentModelParser.scala deleted file mode 100644 index 71b391c422..0000000000 --- a/src/library/scala/xml/dtd/ContentModelParser.scala +++ /dev/null @@ -1,129 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package dtd - -/** Parser for regexps (content models in DTD element declarations) */ - -object ContentModelParser extends Scanner { // a bit too permissive concerning #PCDATA - import ContentModel._ - - /** parses the argument to a regexp */ - def parse(s: String): ContentModel = { initScanner(s); contentspec } - - def accept(tok: Int) = { - if (token != tok) { - if ((tok == STAR) && (token == END)) // common mistake - scala.sys.error("in DTDs, \n"+ - "mixed content models must be like (#PCDATA|Name|Name|...)*") - else - scala.sys.error("expected "+token2string(tok)+ - ", got unexpected token:"+token2string(token)) - } - nextToken() - } - - // s [ '+' | '*' | '?' ] - def maybeSuffix(s: RegExp) = token match { - case STAR => nextToken(); Star(s) - case PLUS => nextToken(); Sequ(s, Star(s)) - case OPT => nextToken(); Alt(Eps, s) - case _ => s - } - - // contentspec ::= EMPTY | ANY | (#PCDATA) | "(#PCDATA|"regexp) - - def contentspec: ContentModel = token match { - - case NAME => value match { - case "ANY" => ANY - case "EMPTY" => EMPTY - case _ => scala.sys.error("expected ANY, EMPTY or '(' instead of " + value ) - } - case LPAREN => - - nextToken() - sOpt() - if (token != TOKEN_PCDATA) - ELEMENTS(regexp) - else { - nextToken() - token match { - case RPAREN => - PCDATA - case CHOICE => - val res = MIXED(choiceRest(Eps)) - sOpt() - accept( RPAREN ) - accept( STAR ) - res - case _ => - scala.sys.error("unexpected token:" + token2string(token) ) - } - } - - case _ => - scala.sys.error("unexpected token:" + token2string(token) ) - } - // sopt ::= S? - def sOpt() = if( token == S ) nextToken() - - // (' S? mixed ::= '#PCDATA' S? ')' - // | '#PCDATA' (S? '|' S? atom)* S? ')*' - - // '(' S? regexp ::= cp S? [seqRest|choiceRest] ')' [ '+' | '*' | '?' ] - def regexp: RegExp = { - val p = particle - sOpt() - maybeSuffix(token match { - case RPAREN => nextToken(); p - case CHOICE => val q = choiceRest( p );accept( RPAREN ); q - case COMMA => val q = seqRest( p ); accept( RPAREN ); q - }) - } - - // seqRest ::= (',' S? cp S?)+ - def seqRest(p: RegExp) = { - var k = List(p) - while( token == COMMA ) { - nextToken() - sOpt() - k = particle::k - sOpt() - } - Sequ( k.reverse:_* ) - } - - // choiceRest ::= ('|' S? cp S?)+ - def choiceRest( p:RegExp ) = { - var k = List( p ) - while( token == CHOICE ) { - nextToken() - sOpt() - k = particle::k - sOpt() - } - Alt( k.reverse:_* ) - } - - // particle ::= '(' S? regexp - // | name [ '+' | '*' | '?' ] - def particle = token match { - case LPAREN => nextToken(); sOpt(); regexp - case NAME => val a = Letter(ElemName(value)); nextToken(); maybeSuffix(a) - case _ => scala.sys.error("expected '(' or Name, got:"+token2string(token)) - } - - // atom ::= name - def atom = token match { - case NAME => val a = Letter(ElemName(value)); nextToken(); a - case _ => scala.sys.error("expected Name, got:"+token2string(token)) - } -} diff --git a/src/library/scala/xml/dtd/DTD.scala b/src/library/scala/xml/dtd/DTD.scala deleted file mode 100644 index 16a824fe2c..0000000000 --- a/src/library/scala/xml/dtd/DTD.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package dtd - -import scala.collection.mutable - -/** A document type declaration. - * - * @author Burak Emir - */ -abstract class DTD { - var externalID: ExternalID = null - var decls: List[Decl] = Nil - def notations: Seq[NotationDecl] = Nil - def unparsedEntities: Seq[EntityDecl] = Nil - - var elem: mutable.Map[String, ElemDecl] = new mutable.HashMap[String, ElemDecl]() - var attr: mutable.Map[String, AttListDecl] = new mutable.HashMap[String, AttListDecl]() - var ent: mutable.Map[String, EntityDecl] = new mutable.HashMap[String, EntityDecl]() - - override def toString() = - "DTD [\n%s%s]".format( - Option(externalID) getOrElse "", - decls.mkString("", "\n", "\n") - ) -} diff --git a/src/library/scala/xml/dtd/Decl.scala b/src/library/scala/xml/dtd/Decl.scala deleted file mode 100644 index 8bf859c460..0000000000 --- a/src/library/scala/xml/dtd/Decl.scala +++ /dev/null @@ -1,157 +0,0 @@ -/* __ *\ - ** ________ ___ / / ___ Scala API ** - ** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** - ** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** - ** /____/\___/_/ |_/____/_/ | | ** - ** |/ ** - \* */ - -package scala -package xml -package dtd - -import Utility.sbToString - -sealed abstract class Decl - -sealed abstract class MarkupDecl extends Decl { - def buildString(sb: StringBuilder): StringBuilder -} - -/** an element declaration - */ -case class ElemDecl(name: String, contentModel: ContentModel) -extends MarkupDecl { - override def buildString(sb: StringBuilder): StringBuilder = { - sb append "' - } -} - -case class AttListDecl(name: String, attrs:List[AttrDecl]) -extends MarkupDecl { - override def buildString(sb: StringBuilder): StringBuilder = { - sb append "") - } -} - -/** an attribute declaration. at this point, the tpe is a string. Future - * versions might provide a way to access the attribute types more - * directly. - */ -case class AttrDecl(name: String, tpe: String, default: DefaultDecl) { - override def toString(): String = sbToString(buildString) - - def buildString(sb: StringBuilder): StringBuilder = { - sb append " " append name append ' ' append tpe append ' ' - default buildString sb - } - -} - -/** an entity declaration */ -sealed abstract class EntityDecl extends MarkupDecl - -/** a parsed general entity declaration */ -case class ParsedEntityDecl(name: String, entdef: EntityDef) extends EntityDecl { - override def buildString(sb: StringBuilder): StringBuilder = { - sb append "' - } -} - -/** a parameter entity declaration */ -case class ParameterEntityDecl(name: String, entdef: EntityDef) extends EntityDecl { - override def buildString(sb: StringBuilder): StringBuilder = { - sb append "' - } -} - -/** an unparsed entity declaration */ -case class UnparsedEntityDecl( name:String, extID:ExternalID, notation:String ) extends EntityDecl { - override def buildString(sb: StringBuilder): StringBuilder = { - sb append "' - } -} -/** a notation declaration */ -case class NotationDecl( name:String, extID:ExternalID ) extends MarkupDecl { - override def buildString(sb: StringBuilder): StringBuilder = { - sb append "" */ - final override def toString() = { - def intString = - if (intSubset.isEmpty) "" - else intSubset.mkString("[", "", "]") - - """""".format(name, extID.toString(), intString) - } -} - -object DocType { - /** Creates a doctype with no external id, nor internal subset declarations. */ - def apply(name: String): DocType = apply(name, NoExternalID, Nil) -} diff --git a/src/library/scala/xml/dtd/ElementValidator.scala b/src/library/scala/xml/dtd/ElementValidator.scala deleted file mode 100644 index 4830769a7d..0000000000 --- a/src/library/scala/xml/dtd/ElementValidator.scala +++ /dev/null @@ -1,132 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package dtd - -import PartialFunction._ -import scala.collection.mutable - -import ContentModel.ElemName -import MakeValidationException._ // @todo other exceptions - -import impl._ - -/** validate children and/or attributes of an element - * exceptions are created but not thrown. - */ -class ElementValidator() extends Function1[Node,Boolean] { - - private var exc: List[ValidationException] = Nil - - protected var contentModel: ContentModel = _ - protected var dfa: DetWordAutom[ElemName] = _ - protected var adecls: List[AttrDecl] = _ - - /** set content model, enabling element validation */ - def setContentModel(cm: ContentModel) = { - contentModel = cm - cm match { - case ELEMENTS(r) => - val nfa = ContentModel.Translator.automatonFrom(r, 1) - dfa = new SubsetConstruction(nfa).determinize - case _ => - dfa = null - } - } - - def getContentModel = contentModel - - /** set meta data, enabling attribute validation */ - def setMetaData(adecls: List[AttrDecl]) { this.adecls = adecls } - - def getIterable(nodes: Seq[Node], skipPCDATA: Boolean): Iterable[ElemName] = { - def isAllWhitespace(a: Atom[_]) = cond(a.data) { case s: String if s.trim == "" => true } - - nodes.filter { - case y: SpecialNode => y match { - case a: Atom[_] if isAllWhitespace(a) => false // always skip all-whitespace nodes - case _ => !skipPCDATA - } - case x => x.namespace eq null - } . map (x => ElemName(x.label)) - } - - /** check attributes, return true if md corresponds to attribute declarations in adecls. - */ - def check(md: MetaData): Boolean = { - val len: Int = exc.length - val ok = new mutable.BitSet(adecls.length) - - for (attr <- md) { - def attrStr = attr.value.toString - def find(Key: String): Option[AttrDecl] = { - adecls.zipWithIndex find { - case (a @ AttrDecl(Key, _, _), j) => ok += j ; return Some(a) - case _ => false - } - None - } - - find(attr.key) match { - case None => - exc ::= fromUndefinedAttribute(attr.key) - - case Some(AttrDecl(_, tpe, DEFAULT(true, fixedValue))) if attrStr != fixedValue => - exc ::= fromFixedAttribute(attr.key, fixedValue, attrStr) - - case _ => - } - } - - adecls.zipWithIndex foreach { - case (AttrDecl(key, tpe, REQUIRED), j) if !ok(j) => exc ::= fromMissingAttribute(key, tpe) - case _ => - } - - exc.length == len //- true if no new exception - } - - /** check children, return true if conform to content model - * @note contentModel != null - */ - def check(nodes: Seq[Node]): Boolean = contentModel match { - case ANY => true - case EMPTY => getIterable(nodes, skipPCDATA = false).isEmpty - case PCDATA => getIterable(nodes, skipPCDATA = true).isEmpty - case MIXED(ContentModel.Alt(branches @ _*)) => // @todo - val j = exc.length - def find(Key: String): Boolean = - branches exists { case ContentModel.Letter(ElemName(Key)) => true ; case _ => false } - - getIterable(nodes, skipPCDATA = true) map (_.name) filterNot find foreach { - exc ::= MakeValidationException fromUndefinedElement _ - } - (exc.length == j) // - true if no new exception - - case _: ELEMENTS => - dfa isFinal { - getIterable(nodes, skipPCDATA = false).foldLeft(0) { (q, e) => - (dfa delta q).getOrElse(e, throw ValidationException("element %s not allowed here" format e)) - } - } - case _ => false - } - - /** applies various validations - accumulates error messages in exc - * @todo fail on first error, ignore other errors (rearranging conditions) - */ - def apply(n: Node): Boolean = - //- ? check children - ((contentModel == null) || check(n.child)) && - //- ? check attributes - ((adecls == null) || check(n.attributes)) -} diff --git a/src/library/scala/xml/dtd/ExternalID.scala b/src/library/scala/xml/dtd/ExternalID.scala deleted file mode 100644 index 880633d860..0000000000 --- a/src/library/scala/xml/dtd/ExternalID.scala +++ /dev/null @@ -1,86 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package dtd - -/** an ExternalIDs - either PublicID or SystemID - * - * @author Burak Emir - */ -sealed abstract class ExternalID extends parsing.TokenTests { - def quoted(s: String) = { - val c = if (s contains '"') '\'' else '"' - c + s + c - } - - // public != null: PUBLIC " " publicLiteral " " [systemLiteral] - // public == null: SYSTEM " " systemLiteral - override def toString(): String = { - lazy val quotedSystemLiteral = quoted(systemId) - lazy val quotedPublicLiteral = quoted(publicId) - - if (publicId == null) "SYSTEM " + quotedSystemLiteral - else "PUBLIC " + quotedPublicLiteral + - (if (systemId == null) "" else " " + quotedSystemLiteral) - } - def buildString(sb: StringBuilder): StringBuilder = - sb.append(this.toString()) - - def systemId: String - def publicId: String -} - -/** a system identifier - * - * @author Burak Emir - * @param systemId the system identifier literal - */ -case class SystemID(systemId: String) extends ExternalID { - val publicId = null - - if (!checkSysID(systemId)) - throw new IllegalArgumentException("can't use both \" and ' in systemId") -} - - -/** a public identifier (see http://www.w3.org/QA/2002/04/valid-dtd-list.html). - * - * @author Burak Emir - * @param publicId the public identifier literal - * @param systemId (can be null for notation pubIDs) the system identifier literal - */ -case class PublicID(publicId: String, systemId: String) extends ExternalID { - if (!checkPubID(publicId)) - throw new IllegalArgumentException("publicId must consist of PubidChars") - - if (systemId != null && !checkSysID(systemId)) - throw new IllegalArgumentException("can't use both \" and ' in systemId") - - /** the constant "#PI" */ - def label = "#PI" - - /** always empty */ - def attribute = Node.NoAttributes - - /** always empty */ - def child = Nil -} - -/** A marker used when a `DocType` contains no external id. - * - * @author Michael Bayne - */ -object NoExternalID extends ExternalID { - val publicId = null - val systemId = null - - override def toString = "" -} diff --git a/src/library/scala/xml/dtd/Scanner.scala b/src/library/scala/xml/dtd/Scanner.scala deleted file mode 100644 index 5f9d1ccaed..0000000000 --- a/src/library/scala/xml/dtd/Scanner.scala +++ /dev/null @@ -1,79 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package dtd - -/** Scanner for regexps (content models in DTD element declarations) - * todo: cleanup - */ -class Scanner extends Tokens with parsing.TokenTests { - - final val ENDCH = '\u0000' - - var token:Int = END - var value:String = _ - - private var it: Iterator[Char] = null - private var c: Char = 'z' - - /** initializes the scanner on input s */ - final def initScanner(s: String) { - value = "" - it = (s).iterator - token = 1+END - next() - nextToken() - } - - /** scans the next token */ - final def nextToken() { - if (token != END) token = readToken - } - - // todo: see XML specification... probably isLetter,isDigit is fine - final def isIdentChar = ( ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z')) - - final def next() = if (it.hasNext) c = it.next() else c = ENDCH - - final def acc(d: Char) { - if (c == d) next() else scala.sys.error("expected '"+d+"' found '"+c+"' !") - } - - final def accS(ds: Seq[Char]) { ds foreach acc } - - final def readToken: Int = - if (isSpace(c)) { - while (isSpace(c)) c = it.next() - S - } else c match { - case '(' => next(); LPAREN - case ')' => next(); RPAREN - case ',' => next(); COMMA - case '*' => next(); STAR - case '+' => next(); PLUS - case '?' => next(); OPT - case '|' => next(); CHOICE - case '#' => next(); accS( "PCDATA" ); TOKEN_PCDATA - case ENDCH => END - case _ => - if (isNameStart(c)) name; // NAME - else scala.sys.error("unexpected character:" + c) - } - - final def name = { - val sb = new StringBuilder() - do { sb.append(c); next() } while (isNameChar(c)) - value = sb.toString() - NAME - } - -} diff --git a/src/library/scala/xml/dtd/Tokens.scala b/src/library/scala/xml/dtd/Tokens.scala deleted file mode 100644 index 07e888e77a..0000000000 --- a/src/library/scala/xml/dtd/Tokens.scala +++ /dev/null @@ -1,45 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package dtd - - -class Tokens { - - // Tokens - - final val TOKEN_PCDATA = 0 - final val NAME = 1 - final val LPAREN = 3 - final val RPAREN = 4 - final val COMMA = 5 - final val STAR = 6 - final val PLUS = 7 - final val OPT = 8 - final val CHOICE = 9 - final val END = 10 - final val S = 13 - - final def token2string(i: Int): String = i match { - case 0 => "#PCDATA" - case 1 => "NAME" - case 3 => "(" - case 4 => ")" - case 5 => "," - case 6 => "*" - case 7 => "+" - case 8 => "?" - case 9 => "|" - case 10 => "END" - case 13 => " " - } -} diff --git a/src/library/scala/xml/dtd/ValidationException.scala b/src/library/scala/xml/dtd/ValidationException.scala deleted file mode 100644 index 1bfae55286..0000000000 --- a/src/library/scala/xml/dtd/ValidationException.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package dtd - - -case class ValidationException(e: String) extends Exception(e) - -/** - * @author Burak Emir - */ -object MakeValidationException { - def fromFixedAttribute(k: String, value: String, actual: String) = - ValidationException("value of attribute " + k + " FIXED to \""+ - value+"\", but document tries \""+actual+"\"") - - def fromNonEmptyElement() = - new ValidationException("element should be *empty*") - - def fromUndefinedElement(label: String) = - new ValidationException("element \""+ label +"\" not allowed here") - - def fromUndefinedAttribute(key: String) = - new ValidationException("attribute " + key +" not allowed here") - - def fromMissingAttribute(allKeys: Set[String]) = { - val sb = new StringBuilder("missing value for REQUIRED attribute") - if (allKeys.size > 1) sb.append('s') - allKeys foreach (k => sb append "'%s'".format(k)) - new ValidationException(sb.toString()) - } - - def fromMissingAttribute(key: String, tpe: String) = - new ValidationException("missing value for REQUIRED attribute %s of type %s".format(key, tpe)) -} diff --git a/src/library/scala/xml/dtd/impl/Base.scala b/src/library/scala/xml/dtd/impl/Base.scala deleted file mode 100644 index 91ff03a93a..0000000000 --- a/src/library/scala/xml/dtd/impl/Base.scala +++ /dev/null @@ -1,67 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml.dtd.impl - -/** Basic regular expressions. - * - * @author Burak Emir - * @version 1.0 - */ - -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class Base { - type _regexpT <: RegExp - - abstract class RegExp { - val isNullable: Boolean - } - - object Alt { - /** `Alt( R,R,R* )`. */ - def apply(rs: _regexpT*) = - if (rs.size < 2) throw new SyntaxError("need at least 2 branches in Alt") - else new Alt(rs: _*) - // Can't enforce that statically without changing the interface - // def apply(r1: _regexpT, r2: _regexpT, rs: _regexpT*) = new Alt(Seq(r1, r2) ++ rs: _*) - def unapplySeq(x: Alt) = Some(x.rs) - } - - class Alt private (val rs: _regexpT*) extends RegExp { - final val isNullable = rs exists (_.isNullable) - } - - object Sequ { - /** Sequ( R,R* ) */ - def apply(rs: _regexpT*) = if (rs.isEmpty) Eps else new Sequ(rs: _*) - def unapplySeq(x: Sequ) = Some(x.rs) - } - - class Sequ private (val rs: _regexpT*) extends RegExp { - final val isNullable = rs forall (_.isNullable) - } - - case class Star(r: _regexpT) extends RegExp { - final lazy val isNullable = true - } - - // The empty Sequ. - case object Eps extends RegExp { - final lazy val isNullable = true - override def toString() = "Eps" - } - - /** this class can be used to add meta information to regexps. */ - class Meta(r1: _regexpT) extends RegExp { - final val isNullable = r1.isNullable - def r = r1 - } -} diff --git a/src/library/scala/xml/dtd/impl/BaseBerrySethi.scala b/src/library/scala/xml/dtd/impl/BaseBerrySethi.scala deleted file mode 100644 index f30309b037..0000000000 --- a/src/library/scala/xml/dtd/impl/BaseBerrySethi.scala +++ /dev/null @@ -1,98 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml.dtd.impl - -import scala.collection.{ mutable, immutable } - -// todo: replace global variable pos with acc - -/** This class turns a regular expression over `A` into a - * [[scala.util.automata.NondetWordAutom]] over `A` using the celebrated - * position automata construction (also called ''Berry-Sethi'' or ''Glushkov''). - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class BaseBerrySethi { - val lang: Base - import lang.{ Alt, Eps, Meta, RegExp, Sequ, Star } - - protected var pos = 0 - - // results which hold all info for the NondetWordAutomaton - protected var follow: mutable.HashMap[Int, Set[Int]] = _ - - protected var finalTag: Int = _ - - protected var finals: immutable.Map[Int, Int] = _ // final states - - // constants -------------------------- - - final val emptySet: Set[Int] = Set() - - private def doComp(r: RegExp, compFunction: RegExp => Set[Int]) = r match { - case x: Alt => (x.rs map compFirst).foldLeft(emptySet)(_ ++ _) - case Eps => emptySet - case x: Meta => compFunction(x.r) - case x: Sequ => - val (l1, l2) = x.rs span (_.isNullable) - ((l1 ++ (l2 take 1)) map compFunction).foldLeft(emptySet)(_ ++ _) - case Star(t) => compFunction(t) - case _ => throw new IllegalArgumentException("unexpected pattern " + r.getClass) - } - - /** Computes `first(r)` for the word regexp `r`. */ - protected def compFirst(r: RegExp): Set[Int] = doComp(r, compFirst) - - /** Computes `last(r)` for the regexp `r`. */ - protected def compLast(r: RegExp): Set[Int] = doComp(r, compLast) - - /** Starts from the right-to-left - * precondition: pos is final - * pats are successor patterns of a Sequence node - */ - protected def compFollow(rs: Seq[RegExp]): Set[Int] = { - follow(0) = - if (rs.isEmpty) emptySet - else rs.foldRight(Set(pos))((p, fol) => { - val first = compFollow1(fol, p) - - if (p.isNullable) fol ++ first - else first - }) - - follow(0) - } - - /** Returns the first set of an expression, setting the follow set along the way. - */ - protected def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match { - case x: Alt => Set((x.rs reverseMap (compFollow1(fol1, _))).flatten: _*) - case x: Meta => compFollow1(fol1, x.r) - case x: Star => compFollow1(fol1 ++ compFirst(x.r), x.r) - case x: Sequ => - x.rs.foldRight(fol1) { (p, fol) => - val first = compFollow1(fol, p) - - if (p.isNullable) fol ++ first - else first - } - case _ => throw new IllegalArgumentException("unexpected pattern: " + r.getClass) - } - - /** Returns the "Sethi-length" of a pattern, creating the set of position along the way. - */ - protected def traverse(r: RegExp): Unit = r match { - // (is tree automaton stuff, more than Berry-Sethi) - case x: Alt => x.rs foreach traverse - case x: Sequ => x.rs foreach traverse - case x: Meta => traverse(x.r) - case Star(t) => traverse(t) - case _ => throw new IllegalArgumentException("unexp pattern " + r.getClass) - } -} diff --git a/src/library/scala/xml/dtd/impl/DetWordAutom.scala b/src/library/scala/xml/dtd/impl/DetWordAutom.scala deleted file mode 100644 index 6f8ba4de72..0000000000 --- a/src/library/scala/xml/dtd/impl/DetWordAutom.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml.dtd.impl - -import scala.collection.{ mutable, immutable } - -/** A deterministic automaton. States are integers, where - * 0 is always the only initial state. Transitions are represented - * in the delta function. A default transitions is one that - * is taken when no other transition can be taken. - * All states are reachable. Accepting states are those for which - * the partial function 'finals' is defined. - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class DetWordAutom[T <: AnyRef] { - val nstates: Int - val finals: Array[Int] - val delta: Array[mutable.Map[T, Int]] - val default: Array[Int] - - def isFinal(q: Int) = finals(q) != 0 - def isSink(q: Int) = delta(q).isEmpty && default(q) == q - def next(q: Int, label: T) = delta(q).getOrElse(label, default(q)) - - override def toString() = { - val sb = new StringBuilder("[DetWordAutom nstates=") - sb.append(nstates) - sb.append(" finals=") - val map = Map(finals.zipWithIndex map (_.swap): _*) - sb.append(map.toString()) - sb.append(" delta=\n") - - for (i <- 0 until nstates) { - sb append "%d->%s\n".format(i, delta(i)) - if (i < default.length) - sb append "_>%s\n".format(default(i)) - } - sb.toString - } -} diff --git a/src/library/scala/xml/dtd/impl/Inclusion.scala b/src/library/scala/xml/dtd/impl/Inclusion.scala deleted file mode 100644 index 07b6afaeba..0000000000 --- a/src/library/scala/xml/dtd/impl/Inclusion.scala +++ /dev/null @@ -1,70 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml.dtd.impl - - -/** A fast test of language inclusion between minimal automata. - * inspired by the ''AMoRE automata library''. - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] trait Inclusion[A <: AnyRef] { - - val labels: Seq[A] - - /** Returns true if `dfa1` is included in `dfa2`. - */ - def inclusion(dfa1: DetWordAutom[A], dfa2: DetWordAutom[A]) = { - - def encode(q1: Int, q2: Int) = 1 + q1 + q2 * dfa1.nstates - def decode2(c: Int) = (c-1) / (dfa1.nstates) //integer division - def decode1(c: Int) = (c-1) % (dfa1.nstates) - - var q1 = 0 //dfa1.initstate; // == 0 - var q2 = 0 //dfa2.initstate; // == 0 - - val max = 1 + dfa1.nstates * dfa2.nstates - val mark = new Array[Int](max) - - var result = true - var current = encode(q1, q2) - var last = current - mark(last) = max // mark (q1,q2) - while (current != 0 && result) { - //Console.println("current = [["+q1+" "+q2+"]] = "+current); - for (letter <- labels) { - val r1 = dfa1.next(q1,letter) - val r2 = dfa2.next(q2,letter) - if (dfa1.isFinal(r1) && !dfa2.isFinal(r2)) - result = false - val test = encode(r1, r2) - //Console.println("test = [["+r1+" "+r2+"]] = "+test); - if (mark(test) == 0) { - mark(last) = test - mark(test) = max - last = test - } - } - val ncurrent = mark(current) - if( ncurrent != max ) { - q1 = decode1(ncurrent) - q2 = decode2(ncurrent) - current = ncurrent - } else { - current = 0 - } - } - result - } -} diff --git a/src/library/scala/xml/dtd/impl/NondetWordAutom.scala b/src/library/scala/xml/dtd/impl/NondetWordAutom.scala deleted file mode 100644 index 0bb19a7e3e..0000000000 --- a/src/library/scala/xml/dtd/impl/NondetWordAutom.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml.dtd.impl - -import scala.collection.{ immutable, mutable } - -/** A nondeterministic automaton. States are integers, where - * 0 is always the only initial state. Transitions are represented - * in the delta function. Default transitions are transitions that - * are taken when no other transitions can be applied. - * All states are reachable. Accepting states are those for which - * the partial function `finals` is defined. - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class NondetWordAutom[T <: AnyRef] { - val nstates: Int - val labels: Seq[T] - val finals: Array[Int] // 0 means not final - val delta: Array[mutable.Map[T, immutable.BitSet]] - val default: Array[immutable.BitSet] - - /** @return true if the state is final */ - final def isFinal(state: Int) = finals(state) > 0 - - /** @return tag of final state */ - final def finalTag(state: Int) = finals(state) - - /** @return true if the set of states contains at least one final state */ - final def containsFinal(Q: immutable.BitSet): Boolean = Q exists isFinal - - /** @return true if there are no accepting states */ - final def isEmpty = (0 until nstates) forall (x => !isFinal(x)) - - /** @return an immutable.BitSet with the next states for given state and label */ - def next(q: Int, a: T): immutable.BitSet = delta(q).getOrElse(a, default(q)) - - /** @return an immutable.BitSet with the next states for given state and label */ - def next(Q: immutable.BitSet, a: T): immutable.BitSet = next(Q, next(_, a)) - def nextDefault(Q: immutable.BitSet): immutable.BitSet = next(Q, default) - - private def next(Q: immutable.BitSet, f: (Int) => immutable.BitSet): immutable.BitSet = - (Q map f).foldLeft(immutable.BitSet.empty)(_ ++ _) - - private def finalStates = 0 until nstates filter isFinal - override def toString = { - - val finalString = Map(finalStates map (j => j -> finals(j)) : _*).toString - val deltaString = (0 until nstates) - .map(i => " %d->%s\n _>%s\n".format(i, delta(i), default(i))).mkString - - "[NondetWordAutom nstates=%d finals=%s delta=\n%s".format(nstates, finalString, deltaString) - } -} diff --git a/src/library/scala/xml/dtd/impl/PointedHedgeExp.scala b/src/library/scala/xml/dtd/impl/PointedHedgeExp.scala deleted file mode 100644 index 1720604132..0000000000 --- a/src/library/scala/xml/dtd/impl/PointedHedgeExp.scala +++ /dev/null @@ -1,37 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml.dtd.impl - -/** Pointed regular hedge expressions, a useful subclass of regular hedge expressions. - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class PointedHedgeExp extends Base { - - type _regexpT <: RegExp - type _labelT - - case class Node(label: _labelT, r: _regexpT) extends RegExp { - final val isNullable = false - } - - case class TopIter(r1: _regexpT, r2: _regexpT) extends RegExp { - final val isNullable = r1.isNullable && r2.isNullable //? - } - - case object Point extends RegExp { - final val isNullable = false - } - -} diff --git a/src/library/scala/xml/dtd/impl/SubsetConstruction.scala b/src/library/scala/xml/dtd/impl/SubsetConstruction.scala deleted file mode 100644 index 632ca1eb18..0000000000 --- a/src/library/scala/xml/dtd/impl/SubsetConstruction.scala +++ /dev/null @@ -1,108 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml.dtd.impl - -import scala.collection.{ mutable, immutable } - -@deprecated("This class will be removed", "2.10.0") -private[dtd] class SubsetConstruction[T <: AnyRef](val nfa: NondetWordAutom[T]) { - import nfa.labels - - def selectTag(Q: immutable.BitSet, finals: Array[Int]) = - (Q map finals filter (_ > 0)).min - - def determinize: DetWordAutom[T] = { - // for assigning numbers to bitsets - var indexMap = scala.collection.Map[immutable.BitSet, Int]() - var invIndexMap = scala.collection.Map[Int, immutable.BitSet]() - var ix = 0 - - // we compute the dfa with states = bitsets - val q0 = immutable.BitSet(0) // the set { 0 } - val sink = immutable.BitSet.empty // the set { } - - var states = Set(q0, sink) // initial set of sets - val delta = new mutable.HashMap[immutable.BitSet, mutable.HashMap[T, immutable.BitSet]] - var deftrans = mutable.Map(q0 -> sink, sink -> sink) // initial transitions - var finals: mutable.Map[immutable.BitSet, Int] = mutable.Map() - val rest = new mutable.Stack[immutable.BitSet] - - rest.push(sink, q0) - - def addFinal(q: immutable.BitSet) { - if (nfa containsFinal q) - finals = finals.updated(q, selectTag(q, nfa.finals)) - } - def add(Q: immutable.BitSet) { - if (!states(Q)) { - states += Q - rest push Q - addFinal(Q) - } - } - - addFinal(q0) // initial state may also be a final state - - while (!rest.isEmpty) { - val P = rest.pop() - // assign a number to this bitset - indexMap = indexMap.updated(P, ix) - invIndexMap = invIndexMap.updated(ix, P) - ix += 1 - - // make transition map - val Pdelta = new mutable.HashMap[T, immutable.BitSet] - delta.update(P, Pdelta) - - labels foreach { label => - val Q = nfa.next(P, label) - Pdelta.update(label, Q) - add(Q) - } - - // collect default transitions - val Pdef = nfa nextDefault P - deftrans = deftrans.updated(P, Pdef) - add(Pdef) - } - - // create DetWordAutom, using indices instead of sets - val nstatesR = states.size - val deltaR = new Array[mutable.Map[T, Int]](nstatesR) - val defaultR = new Array[Int](nstatesR) - val finalsR = new Array[Int](nstatesR) - - for (Q <- states) { - val q = indexMap(Q) - val trans = delta(Q) - val transDef = deftrans(Q) - val qDef = indexMap(transDef) - val ntrans = new mutable.HashMap[T, Int]() - - for ((label, value) <- trans) { - val p = indexMap(value) - if (p != qDef) - ntrans.update(label, p) - } - - deltaR(q) = ntrans - defaultR(q) = qDef - } - - finals foreach { case (k,v) => finalsR(indexMap(k)) = v } - - new DetWordAutom [T] { - val nstates = nstatesR - val delta = deltaR - val default = defaultR - val finals = finalsR - } - } -} diff --git a/src/library/scala/xml/dtd/impl/SyntaxError.scala b/src/library/scala/xml/dtd/impl/SyntaxError.scala deleted file mode 100644 index a5b8a5aba0..0000000000 --- a/src/library/scala/xml/dtd/impl/SyntaxError.scala +++ /dev/null @@ -1,21 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml.dtd.impl - -/** This runtime exception is thrown if an attempt to instantiate a - * syntactically incorrect expression is detected. - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] class SyntaxError(e: String) extends RuntimeException(e) diff --git a/src/library/scala/xml/dtd/impl/WordBerrySethi.scala b/src/library/scala/xml/dtd/impl/WordBerrySethi.scala deleted file mode 100644 index 9bf3fa518b..0000000000 --- a/src/library/scala/xml/dtd/impl/WordBerrySethi.scala +++ /dev/null @@ -1,162 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml.dtd.impl - -import scala.collection.{ immutable, mutable } - -/** This class turns a regular expression into a [[scala.util.automata.NondetWordAutom]] - * celebrated position automata construction (also called ''Berry-Sethi'' or ''Glushkov''). - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class WordBerrySethi extends BaseBerrySethi { - override val lang: WordExp - - import lang.{ Alt, Eps, Letter, RegExp, Sequ, Star, _labelT } - - protected var labels: mutable.HashSet[_labelT] = _ - // don't let this fool you, only labelAt is a real, surjective mapping - protected var labelAt: Map[Int, _labelT] = _ // new alphabet "gamma" - protected var deltaq: Array[mutable.HashMap[_labelT, List[Int]]] = _ // delta - protected var defaultq: Array[List[Int]] = _ // default transitions - protected var initials: Set[Int] = _ - - /** Computes `first(r)` where the word regexp `r`. - * - * @param r the regular expression - * @return the computed set `first(r)` - */ - protected override def compFirst(r: RegExp): Set[Int] = r match { - case x: Letter => Set(x.pos) - case _ => super.compFirst(r) - } - - /** Computes `last(r)` where the word regexp `r`. - * - * @param r the regular expression - * @return the computed set `last(r)` - */ - protected override def compLast(r: RegExp): Set[Int] = r match { - case x: Letter => Set(x.pos) - case _ => super.compLast(r) - } - - /** Returns the first set of an expression, setting the follow set along - * the way. - * - * @param r the regular expression - * @return the computed set - */ - protected override def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match { - case x: Letter => follow(x.pos) = fol1 ; Set(x.pos) - case Eps => emptySet - case _ => super.compFollow1(fol1, r) - } - - /** Returns "Sethi-length" of a pattern, creating the set of position - * along the way - */ - - /** Called at the leaves of the regexp */ - protected def seenLabel(r: RegExp, i: Int, label: _labelT) { - labelAt = labelAt.updated(i, label) - this.labels += label - } - - // overridden in BindingBerrySethi - protected def seenLabel(r: RegExp, label: _labelT): Int = { - pos += 1 - seenLabel(r, pos, label) - pos - } - - // todo: replace global variable pos with acc - override def traverse(r: RegExp): Unit = r match { - case a @ Letter(label) => a.pos = seenLabel(r, label) - case Eps => // ignore - case _ => super.traverse(r) - } - - - protected def makeTransition(src: Int, dest: Int, label: _labelT) { - val q = deltaq(src) - q.update(label, dest :: q.getOrElse(label, Nil)) - } - - protected def initialize(subexpr: Seq[RegExp]): Unit = { - this.labelAt = immutable.Map() - this.follow = mutable.HashMap() - this.labels = mutable.HashSet() - this.pos = 0 - - // determine "Sethi-length" of the regexp - subexpr foreach traverse - - this.initials = Set(0) - } - - protected def initializeAutom() { - finals = immutable.Map.empty[Int, Int] // final states - deltaq = new Array[mutable.HashMap[_labelT, List[Int]]](pos) // delta - defaultq = new Array[List[Int]](pos) // default transitions - - for (j <- 0 until pos) { - deltaq(j) = mutable.HashMap[_labelT, List[Int]]() - defaultq(j) = Nil - } - } - - protected def collectTransitions(): Unit = // make transitions - for (j <- 0 until pos ; fol = follow(j) ; k <- fol) { - if (pos == k) finals = finals.updated(j, finalTag) - else makeTransition(j, k, labelAt(k)) - } - - def automatonFrom(pat: RegExp, finalTag: Int): NondetWordAutom[_labelT] = { - this.finalTag = finalTag - - pat match { - case x: Sequ => - // (1,2) compute follow + first - initialize(x.rs) - pos += 1 - compFollow(x.rs) // this used to be assigned to var globalFirst and then never used. - - // (3) make automaton from follow sets - initializeAutom() - collectTransitions() - - if (x.isNullable) // initial state is final - finals = finals.updated(0, finalTag) - - val delta1 = immutable.Map(deltaq.zipWithIndex map (_.swap): _*) - val finalsArr = (0 until pos map (k => finals.getOrElse(k, 0))).toArray // 0 == not final - - val deltaArr: Array[mutable.Map[_labelT, immutable.BitSet]] = - (0 until pos map { x => - mutable.HashMap(delta1(x).toSeq map { case (k, v) => k -> immutable.BitSet(v: _*) } : _*) - }).toArray - - val defaultArr = (0 until pos map (k => immutable.BitSet(defaultq(k): _*))).toArray - - new NondetWordAutom[_labelT] { - val nstates = pos - val labels = WordBerrySethi.this.labels.toList - val finals = finalsArr - val delta = deltaArr - val default = defaultArr - } - case z => - automatonFrom(Sequ(z.asInstanceOf[this.lang._regexpT]), finalTag) - } - } -} diff --git a/src/library/scala/xml/dtd/impl/WordExp.scala b/src/library/scala/xml/dtd/impl/WordExp.scala deleted file mode 100644 index a4bb54c1ea..0000000000 --- a/src/library/scala/xml/dtd/impl/WordExp.scala +++ /dev/null @@ -1,59 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml.dtd.impl - -/** - * The class `WordExp` provides regular word expressions. - * - * Users have to instantiate type member `_regexpT <;: RegExp` - * (from class `Base`) and a type member `_labelT <;: Label`. - * - * Here is a short example: - * {{{ - * import scala.util.regexp._ - * import scala.util.automata._ - * object MyLang extends WordExp { - * type _regexpT = RegExp - * type _labelT = MyChar - * - * case class MyChar(c:Char) extends Label - * } - * import MyLang._ - * // (a* | b)* - * val rex = Star(Alt(Star(Letter(MyChar('a'))),Letter(MyChar('b')))) - * object MyBerriSethi extends WordBerrySethi { - * override val lang = MyLang - * } - * val nfa = MyBerriSethi.automatonFrom(Sequ(rex), 1) - * }}} - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class WordExp extends Base { - - abstract class Label - - type _regexpT <: RegExp - type _labelT <: Label - - case class Letter(a: _labelT) extends RegExp { - final lazy val isNullable = false - var pos = -1 - } - - case class Wildcard() extends RegExp { - final lazy val isNullable = false - var pos = -1 - } -} diff --git a/src/library/scala/xml/factory/Binder.scala b/src/library/scala/xml/factory/Binder.scala deleted file mode 100755 index 947f99e6a4..0000000000 --- a/src/library/scala/xml/factory/Binder.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package factory - -import parsing.ValidatingMarkupHandler - -/** - * @author Burak Emir - */ -abstract class Binder(val preserveWS: Boolean) extends ValidatingMarkupHandler { - - var result: NodeBuffer = new NodeBuffer() - - def reportSyntaxError(pos:Int, str:String) = {} - - final def procInstr(pos: Int, target: String, txt: String) = - ProcInstr(target, txt) - - final def comment(pos: Int, txt: String) = - Comment(txt) - - final def entityRef(pos: Int, n: String) = - EntityRef(n) - - final def text(pos: Int, txt: String) = - Text(txt) - - final def traverse(n:Node): Unit = n match { - case x:ProcInstr => - result &+ procInstr(0, x.target, x.text) - case x:Comment => - result &+ comment(0, x.text) - case x:Text => - result &+ text(0, x.data) - case x:EntityRef => - result &+ entityRef(0, x.entityName) - case x:Elem => - elemStart(0, x.prefix, x.label, x.attributes, x.scope) - val old = result - result = new NodeBuffer() - for (m <- x.child) traverse(m) - result = old &+ elem(0, x.prefix, x.label, x.attributes, x.scope, x.minimizeEmpty, NodeSeq.fromSeq(result)).toList - elemEnd(0, x.prefix, x.label) - } - - final def validate(n: Node): Node = { - this.rootLabel = n.label - traverse(n) - result(0) - } -} diff --git a/src/library/scala/xml/factory/LoggedNodeFactory.scala b/src/library/scala/xml/factory/LoggedNodeFactory.scala deleted file mode 100644 index bc074bfc83..0000000000 --- a/src/library/scala/xml/factory/LoggedNodeFactory.scala +++ /dev/null @@ -1,90 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package factory - -/** This class logs what the nodefactory is actually doing. - * If you want to see what happens during loading, use it like this: -{{{ -object testLogged extends App { - val x = new scala.xml.parsing.NoBindingFactoryAdapter - with scala.xml.factory.LoggedNodeFactory[scala.xml.Elem] { - override def log(s: String) = println(s) - } - - Console.println("Start") - val doc = x.load(new java.net.URL("http://example.com/file.xml")) - Console.println("End") - Console.println(doc) -} -}}} - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This trait will be removed.", "2.11") -trait LoggedNodeFactory[A <: Node] extends NodeFactory[A] { - // configuration values - val logNode = true - val logText = false - val logComment = false - val logProcInstr = false - - final val NONE = 0 - final val CACHE = 1 - final val FULL = 2 - /** 0 = no logging, 1 = cache hits, 2 = detail */ - val logCompressLevel = 1 - - // methods of NodeFactory - - /** logged version of makeNode method */ - override def makeNode(pre: String, label: String, attrSeq: MetaData, - scope: NamespaceBinding, children: Seq[Node]): A = { - if (logNode) - log("[makeNode for "+label+"]") - - val hash = Utility.hashCode(pre, label, attrSeq.##, scope.##, children) - - /* - if(logCompressLevel >= FULL) { - log("[hashcode total:"+hash); - log(" elem name "+uname+" hash "+ ? )); - log(" attrs "+attrSeq+" hash "+attrSeq.hashCode()); - log(" children :"+children+" hash "+children.hashCode()); - } - */ - if (!cache.get( hash ).isEmpty && (logCompressLevel >= CACHE)) - log("[cache hit !]") - - super.makeNode(pre, label, attrSeq, scope, children) - } - - override def makeText(s: String) = { - if (logText) - log("[makeText:\""+s+"\"]") - super.makeText(s) - } - - override def makeComment(s: String): Seq[Comment] = { - if (logComment) - log("[makeComment:\""+s+"\"]") - super.makeComment(s) - } - - override def makeProcInstr(t: String, s: String): Seq[ProcInstr] = { - if (logProcInstr) - log("[makeProcInstr:\""+t+" "+ s+"\"]") - super.makeProcInstr(t, s) - } - - @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11") - def log(msg: String): Unit = {} -} diff --git a/src/library/scala/xml/factory/NodeFactory.scala b/src/library/scala/xml/factory/NodeFactory.scala deleted file mode 100644 index 94801bb554..0000000000 --- a/src/library/scala/xml/factory/NodeFactory.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package factory - -import parsing.{ FactoryAdapter, NoBindingFactoryAdapter } -import java.io.{ InputStream, Reader, StringReader, File, FileDescriptor, FileInputStream } - -trait NodeFactory[A <: Node] { - val ignoreComments = false - val ignoreProcInstr = false - - /* default behaviour is to use hash-consing */ - val cache = new scala.collection.mutable.HashMap[Int, List[A]] - - protected def create(pre: String, name: String, attrs: MetaData, scope: NamespaceBinding, children:Seq[Node]): A - - protected def construct(hash: Int, old:List[A], pre: String, name: String, attrSeq:MetaData, scope: NamespaceBinding, children:Seq[Node]): A = { - val el = create(pre, name, attrSeq, scope, children) - cache.update(hash, el :: old) - el - } - - def eqElements(ch1: Seq[Node], ch2: Seq[Node]): Boolean = - ch1.view.zipAll(ch2.view, null, null) forall { case (x,y) => x eq y } - - def nodeEquals(n: Node, pre: String, name: String, attrSeq:MetaData, scope: NamespaceBinding, children: Seq[Node]) = - n.prefix == pre && - n.label == name && - n.attributes == attrSeq && - // scope? - eqElements(n.child, children) - - def makeNode(pre: String, name: String, attrSeq: MetaData, scope: NamespaceBinding, children: Seq[Node]): A = { - val hash = Utility.hashCode( pre, name, attrSeq.##, scope.##, children) - def cons(old: List[A]) = construct(hash, old, pre, name, attrSeq, scope, children) - - (cache get hash) match { - case Some(list) => // find structurally equal - list.find(nodeEquals(_, pre, name, attrSeq, scope, children)) match { - case Some(x) => x - case _ => cons(list) - } - case None => cons(Nil) - } - } - - def makeText(s: String) = Text(s) - def makeComment(s: String): Seq[Comment] = - if (ignoreComments) Nil else List(Comment(s)) - def makeProcInstr(t: String, s: String): Seq[ProcInstr] = - if (ignoreProcInstr) Nil else List(ProcInstr(t, s)) -} diff --git a/src/library/scala/xml/factory/XMLLoader.scala b/src/library/scala/xml/factory/XMLLoader.scala deleted file mode 100644 index b69f187039..0000000000 --- a/src/library/scala/xml/factory/XMLLoader.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package factory - -import javax.xml.parsers.SAXParserFactory -import parsing.{ FactoryAdapter, NoBindingFactoryAdapter } -import java.io.{ InputStream, Reader, File, FileDescriptor } -import java.net.URL - -/** Presents collection of XML loading methods which use the parser - * created by "def parser". - */ -trait XMLLoader[T <: Node] -{ - import scala.xml.Source._ - def adapter: FactoryAdapter = new NoBindingFactoryAdapter() - - /* Override this to use a different SAXParser. */ - def parser: SAXParser = { - val f = SAXParserFactory.newInstance() - f.setNamespaceAware(false) - f.newSAXParser() - } - - /** Loads XML from the given InputSource, using the supplied parser. - * The methods available in scala.xml.XML use the XML parser in the JDK. - */ - def loadXML(source: InputSource, parser: SAXParser): T = { - val newAdapter = adapter - - newAdapter.scopeStack push TopScope - parser.parse(source, newAdapter) - newAdapter.scopeStack.pop() - - newAdapter.rootElem.asInstanceOf[T] - } - - /** Loads XML from the given file, file descriptor, or filename. */ - def loadFile(file: File): T = loadXML(fromFile(file), parser) - def loadFile(fd: FileDescriptor): T = loadXML(fromFile(fd), parser) - def loadFile(name: String): T = loadXML(fromFile(name), parser) - - /** loads XML from given InputStream, Reader, sysID, InputSource, or URL. */ - def load(is: InputStream): T = loadXML(fromInputStream(is), parser) - def load(reader: Reader): T = loadXML(fromReader(reader), parser) - def load(sysID: String): T = loadXML(fromSysId(sysID), parser) - def load(source: InputSource): T = loadXML(source, parser) - def load(url: URL): T = loadXML(fromInputStream(url.openStream()), parser) - - /** Loads XML from the given String. */ - def loadString(string: String): T = loadXML(fromString(string), parser) -} diff --git a/src/library/scala/xml/include/CircularIncludeException.scala b/src/library/scala/xml/include/CircularIncludeException.scala deleted file mode 100644 index 351f403008..0000000000 --- a/src/library/scala/xml/include/CircularIncludeException.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package include - -/** - * A `CircularIncludeException` is thrown when an included document attempts - * to include itself or one of its ancestor documents. - */ -class CircularIncludeException(message: String) extends XIncludeException { - - /** - * Constructs a `CircularIncludeException` with `'''null'''`. - * as its error detail message. - */ - def this() = this(null) - -} diff --git a/src/library/scala/xml/include/UnavailableResourceException.scala b/src/library/scala/xml/include/UnavailableResourceException.scala deleted file mode 100644 index 47b176e0f3..0000000000 --- a/src/library/scala/xml/include/UnavailableResourceException.scala +++ /dev/null @@ -1,20 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package include - -/** - * An `UnavailableResourceException` is thrown when an included document - * cannot be found or loaded. - */ -class UnavailableResourceException(message: String) -extends XIncludeException(message) { - def this() = this(null) -} diff --git a/src/library/scala/xml/include/XIncludeException.scala b/src/library/scala/xml/include/XIncludeException.scala deleted file mode 100644 index 11e1644d83..0000000000 --- a/src/library/scala/xml/include/XIncludeException.scala +++ /dev/null @@ -1,58 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package include - -/** - * `XIncludeException` is the generic superclass for all checked exceptions - * that may be thrown as a result of a violation of XInclude's rules. - * - * Constructs an `XIncludeException` with the specified detail message. - * The error message string `message` can later be retrieved by the - * `{@link java.lang.Throwable#getMessage}` - * method of class `java.lang.Throwable`. - * - * @param message the detail message. - */ -class XIncludeException(message: String) extends Exception(message) { - - /** - * uses `'''null'''` as its error detail message. - */ - def this() = this(null) - - private var rootCause: Throwable = null - - /** - * When an `IOException`, `MalformedURLException` or other generic - * exception is thrown while processing an XML document for XIncludes, - * it is customarily replaced by some form of `XIncludeException`. - * This method allows you to store the original exception. - * - * @param nestedException the underlying exception which - * caused the XIncludeException to be thrown - */ - def setRootCause(nestedException: Throwable ) { - this.rootCause = nestedException - } - - /** - * When an `IOException`, `MalformedURLException` or other generic - * exception is thrown while processing an XML document for XIncludes, - * it is customarily replaced by some form of `XIncludeException`. - * This method allows you to retrieve the original exception. - * It returns null if no such exception caused this `XIncludeException`. - * - * @return Throwable the underlying exception which caused the - * `XIncludeException` to be thrown - */ - def getRootCause(): Throwable = this.rootCause - -} diff --git a/src/library/scala/xml/include/sax/EncodingHeuristics.scala b/src/library/scala/xml/include/sax/EncodingHeuristics.scala deleted file mode 100644 index 57ab5ed91c..0000000000 --- a/src/library/scala/xml/include/sax/EncodingHeuristics.scala +++ /dev/null @@ -1,98 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package include.sax - -import java.io.InputStream -import scala.util.matching.Regex - -/** `EncodingHeuristics` reads from a stream - * (which should be buffered) and attempts to guess - * what the encoding of the text in the stream is. - * If it fails to determine the type of the encoding, - * it returns the default UTF-8. - * - * @author Burak Emir - * @author Paul Phillips - */ -object EncodingHeuristics -{ - object EncodingNames { - // UCS-4 isn't yet implemented in java releases anyway... - val bigUCS4 = "UCS-4" - val littleUCS4 = "UCS-4" - val unusualUCS4 = "UCS-4" - val bigUTF16 = "UTF-16BE" - val littleUTF16 = "UTF-16LE" - val utf8 = "UTF-8" - val default = utf8 - } - import EncodingNames._ - - /** This utility method attempts to determine the XML character encoding - * by examining the input stream, as specified at - * [[http://www.w3.org/TR/xml/#sec-guessing w3]]. - * - * @param in `InputStream` to read from. - * @throws IOException if the stream cannot be reset - * @return the name of the encoding. - */ - def readEncodingFromStream(in: InputStream): String = { - var ret: String = null - val bytesToRead = 1024 // enough to read most XML encoding declarations - def resetAndRet = { in.reset ; ret } - - // This may fail if there are a lot of space characters before the end - // of the encoding declaration - in mark bytesToRead - val bytes = (in.read, in.read, in.read, in.read) - - // first look for byte order mark - ret = bytes match { - case (0x00, 0x00, 0xFE, 0xFF) => bigUCS4 - case (0xFF, 0xFE, 0x00, 0x00) => littleUCS4 - case (0x00, 0x00, 0xFF, 0xFE) => unusualUCS4 - case (0xFE, 0xFF, 0x00, 0x00) => unusualUCS4 - case (0xFE, 0xFF, _ , _ ) => bigUTF16 - case (0xFF, 0xFE, _ , _ ) => littleUTF16 - case (0xEF, 0xBB, 0xBF, _ ) => utf8 - case _ => null - } - if (ret != null) - return resetAndRet - - def readASCIIEncoding: String = { - val data = new Array[Byte](bytesToRead - 4) - val length = in.read(data, 0, bytesToRead - 4) - - // Use Latin-1 (ISO-8859-1) because all byte sequences are legal. - val declaration = new String(data, 0, length, "ISO-8859-1") - val regexp = """(?m).*?encoding\s*=\s*["'](.+?)['"]""".r - (regexp findFirstMatchIn declaration) match { - case None => default - case Some(md) => md.subgroups(0) - } - } - - // no byte order mark present; first character must be '<' or whitespace - ret = bytes match { - case (0x00, 0x00, 0x00, '<' ) => bigUCS4 - case ('<' , 0x00, 0x00, 0x00) => littleUCS4 - case (0x00, 0x00, '<' , 0x00) => unusualUCS4 - case (0x00, '<' , 0x00, 0x00) => unusualUCS4 - case (0x00, '<' , 0x00, '?' ) => bigUTF16 // XXX must read encoding - case ('<' , 0x00, '?' , 0x00) => littleUTF16 // XXX must read encoding - case ('<' , '?' , 'x' , 'm' ) => readASCIIEncoding - case (0x4C, 0x6F, 0xA7, 0x94) => utf8 // XXX EBCDIC - case _ => utf8 // no XML or text declaration present - } - resetAndRet - } -} diff --git a/src/library/scala/xml/include/sax/XIncludeFilter.scala b/src/library/scala/xml/include/sax/XIncludeFilter.scala deleted file mode 100644 index 3fa3beefb0..0000000000 --- a/src/library/scala/xml/include/sax/XIncludeFilter.scala +++ /dev/null @@ -1,373 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package include.sax - -import scala.xml.include._ - -import org.xml.sax.{ Attributes, XMLReader, Locator } -import org.xml.sax.helpers.{ XMLReaderFactory, XMLFilterImpl, NamespaceSupport, AttributesImpl } - -import java.io.{ InputStream, BufferedInputStream, InputStreamReader, IOException, UnsupportedEncodingException } -import java.util.Stack -import java.net.{ URL, MalformedURLException } - -/** This is a SAX filter which resolves all XInclude include elements before - * passing them on to the client application. Currently this class has the - * following known deviation from the XInclude specification: - * - * 1. XPointer is not supported. - * - * Furthermore, I would definitely use a new instance of this class for each - * document you want to process. I doubt it can be used successfully on - * multiple documents. Furthermore, I can virtually guarantee that this - * class is not thread safe. You have been warned. - * - * Since this class is not designed to be subclassed, and since I have not - * yet considered how that might affect the methods herein or what other - * protected methods might be needed to support subclasses, I have declared - * this class final. I may remove this restriction later, though the use-case - * for subclassing is weak. This class is designed to have its functionality - * extended via a horizontal chain of filters, not a vertical hierarchy of - * sub and superclasses. - * - * To use this class: - * - * - Construct an `XIncludeFilter` object with a known base URL - * - Pass the `XMLReader` object from which the raw document will be read to - * the `setParent()` method of this object. - * - Pass your own `ContentHandler` object to the `setContentHandler()` - * method of this object. This is the object which will receive events - * from the parsed and included document. - * - Optional: if you wish to receive comments, set your own `LexicalHandler` - * object as the value of this object's - * `http://xml.org/sax/properties/lexical-handler` property. - * Also make sure your `LexicalHandler` asks this object for the status of - * each comment using `insideIncludeElement` before doing anything with the - * comment. - * - Pass the URL of the document to read to this object's `parse()` method - * - * e.g. - * {{{ - * val includer = new XIncludeFilter(base) - * includer setParent parser - * includer setContentHandler new SAXXIncluder(System.out) - * includer parse args(i) - * }}} - * translated from Elliotte Rusty Harold's Java source. - * - * @author Burak Emir - */ -class XIncludeFilter extends XMLFilterImpl { - - final val XINCLUDE_NAMESPACE = "http://www.w3.org/2001/XInclude" - - private val bases = new Stack[URL]() - private val locators = new Stack[Locator]() - -/* private EntityResolver resolver; - - public XIncludeFilter() { - this(null); - } - - public XIncludeFilter(EntityResolver resolver) { - this.resolver = resolver; - } */ - - - // what if this isn't called???? - // do I need to check this in startDocument() and push something - // there???? - override def setDocumentLocator(locator: Locator) { - locators push locator - val base = locator.getSystemId() - try { - bases.push(new URL(base)) - } - catch { - case e:MalformedURLException => - throw new UnsupportedOperationException("Unrecognized SYSTEM ID: " + base) - } - super.setDocumentLocator(locator) - } - - - // necessary to throw away contents of non-empty XInclude elements - private var level = 0 - - /** This utility method returns true if and only if this reader is - * currently inside a non-empty include element. (This is '''not''' the - * same as being inside the node set which replaces the include element.) - * This is primarily needed for comments inside include elements. - * It must be checked by the actual `LexicalHandler` to see whether - * a comment is passed or not. - * - * @return boolean - */ - def insideIncludeElement(): Boolean = level != 0 - - override def startElement(uri: String, localName: String, qName: String, atts1: Attributes) { - var atts = atts1 - if (level == 0) { // We're not inside an xi:include element - - // Adjust bases stack by pushing either the new - // value of xml:base or the base of the parent - val base = atts.getValue(NamespaceSupport.XMLNS, "base") - val parentBase = bases.peek().asInstanceOf[URL] - var currentBase = parentBase - if (base != null) { - try { - currentBase = new URL(parentBase, base) - } - catch { - case e: MalformedURLException => - throw new SAXException("Malformed base URL: " - + currentBase, e) - } - } - bases push currentBase - - if (uri.equals(XINCLUDE_NAMESPACE) && localName.equals("include")) { - // include external document - val href = atts.getValue("href") - // Verify that there is an href attribute - if (href == null) { - throw new SAXException("Missing href attribute") - } - - var parse = atts getValue "parse" - if (parse == null) parse = "xml" - - if (parse equals "text") { - val encoding = atts getValue "encoding" - includeTextDocument(href, encoding) - } - else if (parse equals "xml") { - includeXMLDocument(href) - } - // Need to check this also in DOM and JDOM???? - else { - throw new SAXException( - "Illegal value for parse attribute: " + parse) - } - level += 1 - } - else { - if (atRoot) { - // add xml:base attribute if necessary - val attsImpl = new AttributesImpl(atts) - attsImpl.addAttribute(NamespaceSupport.XMLNS, "base", - "xml:base", "CDATA", currentBase.toExternalForm()) - atts = attsImpl - atRoot = false - } - super.startElement(uri, localName, qName, atts) - } - } - } - - override def endElement(uri: String, localName: String, qName: String) { - if (uri.equals(XINCLUDE_NAMESPACE) - && localName.equals("include")) { - level -= 1 - } - else if (level == 0) { - bases.pop() - super.endElement(uri, localName, qName) - } - } - - private var depth = 0 - - override def startDocument() { - level = 0 - if (depth == 0) super.startDocument() - depth += 1 - } - - override def endDocument() { - locators.pop() - bases.pop() // pop the URL for the document itself - depth -= 1 - if (depth == 0) super.endDocument() - } - - // how do prefix mappings move across documents???? - override def startPrefixMapping(prefix: String , uri: String) { - if (level == 0) super.startPrefixMapping(prefix, uri) - } - - override def endPrefixMapping(prefix: String) { - if (level == 0) super.endPrefixMapping(prefix) - } - - override def characters(ch: Array[Char], start: Int, length: Int) { - if (level == 0) super.characters(ch, start, length) - } - - override def ignorableWhitespace(ch: Array[Char], start: Int, length: Int) { - if (level == 0) super.ignorableWhitespace(ch, start, length) - } - - override def processingInstruction(target: String, data: String) { - if (level == 0) super.processingInstruction(target, data) - } - - override def skippedEntity(name: String) { - if (level == 0) super.skippedEntity(name) - } - - // convenience method for error messages - private def getLocation(): String = { - var locationString = "" - val locator = locators.peek().asInstanceOf[Locator] - var publicID = "" - var systemID = "" - var column = -1 - var line = -1 - if (locator != null) { - publicID = locator.getPublicId() - systemID = locator.getSystemId() - line = locator.getLineNumber() - column = locator.getColumnNumber() - } - locationString = (" in document included from " + publicID - + " at " + systemID - + " at line " + line + ", column " + column) - - locationString - } - - /** This utility method reads a document at a specified URL and fires off - * calls to `characters()`. It's used to include files with `parse="text"`. - * - * @param url URL of the document that will be read - * @param encoding1 Encoding of the document; e.g. UTF-8, - * ISO-8859-1, etc. - * @return void - * @throws SAXException if the requested document cannot - be downloaded from the specified URL - or if the encoding is not recognized - */ - private def includeTextDocument(url: String, encoding1: String) { - var encoding = encoding1 - if (encoding == null || encoding.trim().equals("")) encoding = "UTF-8" - var source: URL = null - try { - val base = bases.peek().asInstanceOf[URL] - source = new URL(base, url) - } - catch { - case e: MalformedURLException => - val ex = new UnavailableResourceException("Unresolvable URL " + url - + getLocation()) - ex.setRootCause(e) - throw new SAXException("Unresolvable URL " + url + getLocation(), ex) - } - - try { - val uc = source.openConnection() - val in = new BufferedInputStream(uc.getInputStream()) - val encodingFromHeader = uc.getContentEncoding() - var contentType = uc.getContentType() - if (encodingFromHeader != null) - encoding = encodingFromHeader - else { - // What if file does not have a MIME type but name ends in .xml???? - // MIME types are case-insensitive - // Java may be picking this up from file URL - if (contentType != null) { - contentType = contentType.toLowerCase() - if (contentType.equals("text/xml") - || contentType.equals("application/xml") - || (contentType.startsWith("text/") && contentType.endsWith("+xml") ) - || (contentType.startsWith("application/") && contentType.endsWith("+xml"))) { - encoding = EncodingHeuristics.readEncodingFromStream(in) - } - } - } - val reader = new InputStreamReader(in, encoding) - val c = new Array[Char](1024) - var charsRead: Int = 0 // bogus init value - do { - charsRead = reader.read(c, 0, 1024) - if (charsRead > 0) this.characters(c, 0, charsRead) - } while (charsRead != -1) - } - catch { - case e: UnsupportedEncodingException => - throw new SAXException("Unsupported encoding: " - + encoding + getLocation(), e) - case e: IOException => - throw new SAXException("Document not found: " - + source.toExternalForm() + getLocation(), e) - } - - } - - private var atRoot = false - - /** This utility method reads a document at a specified URL - * and fires off calls to various `ContentHandler` methods. - * It's used to include files with `parse="xml"`. - * - * @param url URL of the document that will be read - * @return void - * @throws SAXException if the requested document cannot - be downloaded from the specified URL. - */ - private def includeXMLDocument(url: String) { - val source = - try new URL(bases.peek(), url) - catch { - case e: MalformedURLException => - val ex = new UnavailableResourceException("Unresolvable URL " + url + getLocation()) - ex setRootCause e - throw new SAXException("Unresolvable URL " + url + getLocation(), ex) - } - - try { - val parser: XMLReader = - try XMLReaderFactory.createXMLReader() - catch { - case e: SAXException => - try XMLReaderFactory.createXMLReader(XercesClassName) - catch { case _: SAXException => return System.err.println("Could not find an XML parser") } - } - - parser setContentHandler this - val resolver = this.getEntityResolver() - if (resolver != null) - parser setEntityResolver resolver - - // save old level and base - val previousLevel = level - this.level = 0 - if (bases contains source) - throw new SAXException( - "Circular XInclude Reference", - new CircularIncludeException("Circular XInclude Reference to " + source + getLocation()) - ) - - bases push source - atRoot = true - parser parse source.toExternalForm() - - // restore old level and base - this.level = previousLevel - bases.pop() - } - catch { - case e: IOException => - throw new SAXException("Document not found: " + source.toExternalForm() + getLocation(), e) - } - } -} diff --git a/src/library/scala/xml/include/sax/XIncluder.scala b/src/library/scala/xml/include/sax/XIncluder.scala deleted file mode 100644 index 1939fa1875..0000000000 --- a/src/library/scala/xml/include/sax/XIncluder.scala +++ /dev/null @@ -1,187 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package include.sax - -import scala.collection.mutable -import org.xml.sax.{ ContentHandler, XMLReader, Locator, Attributes } -import org.xml.sax.ext.LexicalHandler -import java.io.{ File, OutputStream, OutputStreamWriter, Writer, IOException } - -/** XIncluder is a SAX `ContentHandler` that writes its XML document onto - * an output stream after resolving all `xinclude:include` elements. - * - * Based on Eliotte Rusty Harold's SAXXIncluder. - */ -class XIncluder(outs: OutputStream, encoding: String) extends ContentHandler with LexicalHandler { - - var out = new OutputStreamWriter(outs, encoding) - - def setDocumentLocator(locator: Locator) {} - - def startDocument() { - try { - out.write("\r\n") - } - catch { - case e:IOException => - throw new SAXException("Write failed", e) - } - } - - def endDocument() { - try { - out.flush() - } - catch { - case e:IOException => - throw new SAXException("Flush failed", e) - } - } - - def startPrefixMapping(prefix: String , uri: String) {} - - def endPrefixMapping(prefix: String) {} - - def startElement(namespaceURI: String, localName: String, qualifiedName: String, atts: Attributes) = { - try { - out.write("<" + qualifiedName) - var i = 0; while (i < atts.getLength()) { - out.write(" ") - out.write(atts.getQName(i)) - out.write("='") - val value = atts.getValue(i) - // @todo Need to use character references if the encoding - // can't support the character - out.write(scala.xml.Utility.escape(value)) - out.write("'") - i += 1 - } - out.write(">") - } - catch { - case e:IOException => - throw new SAXException("Write failed", e) - } - } - - def endElement(namespaceURI: String, localName:String, qualifiedName: String) { - try { - out.write("") - } - catch { - case e: IOException => - throw new SAXException("Write failed", e) - } - } - - // need to escape characters that are not in the given - // encoding using character references???? - def characters(ch: Array[Char], start: Int, length: Int) { - try { - var i = 0; while (i < length) { - val c = ch(start+i) - if (c == '&') out.write("&") - else if (c == '<') out.write("<") - // This next fix is normally not necessary. - // However, it is required if text contains ]]> - // (The end CDATA section delimiter) - else if (c == '>') out.write(">") - else out.write(c.toInt) - i += 1 - } - } - catch { - case e: IOException => - throw new SAXException("Write failed", e) - } - } - - def ignorableWhitespace(ch: Array[Char], start: Int , length: Int) { - this.characters(ch, start, length) - } - - // do I need to escape text in PI???? - def processingInstruction(target: String, data: String) { - try { - out.write("") - } - catch { - case e:IOException => - throw new SAXException("Write failed", e) - } - } - - def skippedEntity(name: String) { - try { - out.write("&" + name + ";") - } - catch { - case e:IOException => - throw new SAXException("Write failed", e) - } - } - - // LexicalHandler methods - private var inDTD: Boolean = false - private val entities = new mutable.Stack[String]() - - def startDTD(name: String, publicID: String, systemID: String) { - inDTD = true - // if this is the source document, output a DOCTYPE declaration - if (entities.isEmpty) { - var id = "" - if (publicID != null) id = " PUBLIC \"" + publicID + "\" \"" + systemID + '"' - else if (systemID != null) id = " SYSTEM \"" + systemID + '"' - try { - out.write("\r\n") - } - catch { - case e:IOException => - throw new SAXException("Error while writing DOCTYPE", e) - } - } - } - def endDTD() {} - - def startEntity(name: String) { - entities push name - } - - def endEntity(name: String) { - entities.pop() - } - - def startCDATA() {} - def endCDATA() {} - - // Just need this reference so we can ask if a comment is - // inside an include element or not - private var filter: XIncludeFilter = null - - def setFilter(filter: XIncludeFilter) { - this.filter = filter - } - - def comment(ch: Array[Char], start: Int, length: Int) { - if (!inDTD && !filter.insideIncludeElement()) { - try { - out.write("") - } - catch { - case e: IOException => - throw new SAXException("Write failed", e) - } - } - } -} diff --git a/src/library/scala/xml/package.scala b/src/library/scala/xml/package.scala deleted file mode 100644 index 4001cc5ffb..0000000000 --- a/src/library/scala/xml/package.scala +++ /dev/null @@ -1,19 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala - -package object xml { - val XercesClassName = "org.apache.xerces.parsers.SAXParser" - - type SAXException = org.xml.sax.SAXException - type SAXParseException = org.xml.sax.SAXParseException - type EntityResolver = org.xml.sax.EntityResolver - type InputSource = org.xml.sax.InputSource - type SAXParser = javax.xml.parsers.SAXParser -} diff --git a/src/library/scala/xml/parsing/ConstructingHandler.scala b/src/library/scala/xml/parsing/ConstructingHandler.scala deleted file mode 100755 index ba416e4301..0000000000 --- a/src/library/scala/xml/parsing/ConstructingHandler.scala +++ /dev/null @@ -1,34 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - -/** Implementation of MarkupHandler that constructs nodes. - * - * @author Burak Emir - * @version 1.0 - */ -abstract class ConstructingHandler extends MarkupHandler -{ - val preserveWS: Boolean - - def elem(pos: Int, pre: String, label: String, attrs: MetaData, - pscope: NamespaceBinding, empty: Boolean, nodes: NodeSeq): NodeSeq = - Elem(pre, label, attrs, pscope, empty, nodes:_*) - - def procInstr(pos: Int, target: String, txt: String) = - ProcInstr(target, txt) - - def comment(pos: Int, txt: String) = Comment(txt) - def entityRef(pos: Int, n: String) = EntityRef(n) - def text(pos: Int, txt: String) = Text(txt) -} diff --git a/src/library/scala/xml/parsing/ConstructingParser.scala b/src/library/scala/xml/parsing/ConstructingParser.scala deleted file mode 100644 index 3caeddabf4..0000000000 --- a/src/library/scala/xml/parsing/ConstructingParser.scala +++ /dev/null @@ -1,55 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - -import java.io.File -import scala.io.Source - -object ConstructingParser { - def fromFile(inp: File, preserveWS: Boolean) = - new ConstructingParser(Source.fromFile(inp), preserveWS).initialize - - def fromSource(inp: Source, preserveWS: Boolean) = - new ConstructingParser(inp, preserveWS).initialize -} - -/** An xml parser. parses XML and invokes callback methods of a MarkupHandler. - * Don't forget to call next.ch on a freshly instantiated parser in order to - * initialize it. If you get the parser from the object method, initialization - * is already done for you. - * - * {{{ - * object parseFromURL { - * def main(args: Array[String]) { - * val url = args(0) - * val src = scala.io.Source.fromURL(url) - * val cpa = scala.xml.parsing.ConstructingParser.fromSource(src, false) // fromSource initializes automatically - * val doc = cpa.document() - * - * // let's see what it is - * val ppr = new scala.xml.PrettyPrinter(80, 5) - * val ele = doc.docElem - * println("finished parsing") - * val out = ppr.format(ele) - * println(out) - * } - * } - * }}} */ -class ConstructingParser(val input: Source, val preserveWS: Boolean) -extends ConstructingHandler -with ExternalSources -with MarkupParser { - - // default impl. of Logged - override def log(msg: String): Unit = {} -} diff --git a/src/library/scala/xml/parsing/DefaultMarkupHandler.scala b/src/library/scala/xml/parsing/DefaultMarkupHandler.scala deleted file mode 100755 index 6ec7474843..0000000000 --- a/src/library/scala/xml/parsing/DefaultMarkupHandler.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - - -/** Default implementation of markup handler always returns `NodeSeq.Empty` */ -abstract class DefaultMarkupHandler extends MarkupHandler { - - def elem(pos: Int, pre: String, label: String, attrs: MetaData, - scope:NamespaceBinding, empty: Boolean, args: NodeSeq) = NodeSeq.Empty - - def procInstr(pos: Int, target: String, txt: String) = NodeSeq.Empty - - def comment(pos: Int, comment: String ): NodeSeq = NodeSeq.Empty - - def entityRef(pos: Int, n: String) = NodeSeq.Empty - - def text(pos: Int, txt:String) = NodeSeq.Empty - -} diff --git a/src/library/scala/xml/parsing/ExternalSources.scala b/src/library/scala/xml/parsing/ExternalSources.scala deleted file mode 100644 index bb939bca95..0000000000 --- a/src/library/scala/xml/parsing/ExternalSources.scala +++ /dev/null @@ -1,38 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - -import java.net.URL -import java.io.File.separator - -import scala.io.Source - -/** - * @author Burak Emir - * @version 1.0 - */ -trait ExternalSources { - self: ExternalSources with MarkupParser with MarkupHandler => - - def externalSource(systemId: String): Source = { - if (systemId startsWith "http:") - return Source fromURL new URL(systemId) - - val fileStr: String = input.descr match { - case x if x startsWith "file:" => x drop 5 - case x => x take ((x lastIndexOf separator) + 1) - } - - Source.fromFile(fileStr + systemId) - } -} diff --git a/src/library/scala/xml/parsing/FactoryAdapter.scala b/src/library/scala/xml/parsing/FactoryAdapter.scala deleted file mode 100644 index 2154bdf5ba..0000000000 --- a/src/library/scala/xml/parsing/FactoryAdapter.scala +++ /dev/null @@ -1,187 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package parsing - -import java.io.{ InputStream, Reader, File, FileDescriptor, FileInputStream } -import scala.collection.{ mutable, Iterator } -import org.xml.sax.Attributes -import org.xml.sax.helpers.DefaultHandler - -// can be mixed into FactoryAdapter if desired -trait ConsoleErrorHandler extends DefaultHandler { - // ignore warning, crimson warns even for entity resolution! - override def warning(ex: SAXParseException): Unit = { } - override def error(ex: SAXParseException): Unit = printError("Error", ex) - override def fatalError(ex: SAXParseException): Unit = printError("Fatal Error", ex) - - protected def printError(errtype: String, ex: SAXParseException): Unit = - Console.withOut(Console.err) { - val s = "[%s]:%d:%d: %s".format( - errtype, ex.getLineNumber, ex.getColumnNumber, ex.getMessage) - Console.println(s) - Console.flush() - } -} - -/** SAX adapter class, for use with Java SAX parser. Keeps track of - * namespace bindings, without relying on namespace handling of the - * underlying SAX parser. - */ -abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node] { - var rootElem: Node = null - - val buffer = new StringBuilder() - val attribStack = new mutable.Stack[MetaData] - val hStack = new mutable.Stack[Node] // [ element ] contains siblings - val tagStack = new mutable.Stack[String] - var scopeStack = new mutable.Stack[NamespaceBinding] - - var curTag : String = null - var capture: Boolean = false - - // abstract methods - - /** Tests if an XML element contains text. - * @return true if element named `localName` contains text. - */ - def nodeContainsText(localName: String): Boolean // abstract - - /** creates an new non-text(tree) node. - * @param elemName - * @param attribs - * @param chIter - * @return a new XML element. - */ - def createNode(pre: String, elemName: String, attribs: MetaData, - scope: NamespaceBinding, chIter: List[Node]): Node // abstract - - /** creates a Text node. - * @param text - * @return a new Text node. - */ - def createText(text: String): Text // abstract - - /** creates a new processing instruction node. - */ - def createProcInstr(target: String, data: String): Seq[ProcInstr] - - // - // ContentHandler methods - // - - val normalizeWhitespace = false - - /** Characters. - * @param ch - * @param offset - * @param length - */ - override def characters(ch: Array[Char], offset: Int, length: Int): Unit = { - if (!capture) return - // compliant: report every character - else if (!normalizeWhitespace) buffer.appendAll(ch, offset, length) - // normalizing whitespace is not compliant, but useful - else { - var it = ch.slice(offset, offset + length).iterator - while (it.hasNext) { - val c = it.next() - val isSpace = c.isWhitespace - buffer append (if (isSpace) ' ' else c) - if (isSpace) - it = it dropWhile (_.isWhitespace) - } - } - } - - private def splitName(s: String) = { - val idx = s indexOf ':' - if (idx < 0) (null, s) - else (s take idx, s drop (idx + 1)) - } - - /* ContentHandler methods */ - - /* Start element. */ - override def startElement( - uri: String, - _localName: String, - qname: String, - attributes: Attributes): Unit = - { - captureText() - tagStack push curTag - curTag = qname - - val localName = splitName(qname)._2 - capture = nodeContainsText(localName) - - hStack push null - var m: MetaData = Null - var scpe: NamespaceBinding = - if (scopeStack.isEmpty) TopScope - else scopeStack.top - - for (i <- 0 until attributes.getLength()) { - val qname = attributes getQName i - val value = attributes getValue i - val (pre, key) = splitName(qname) - def nullIfEmpty(s: String) = if (s == "") null else s - - if (pre == "xmlns" || (pre == null && qname == "xmlns")) { - val arg = if (pre == null) null else key - scpe = new NamespaceBinding(arg, nullIfEmpty(value), scpe) - } - else - m = Attribute(Option(pre), key, Text(value), m) - } - - scopeStack push scpe - attribStack push m - } - - - /** captures text, possibly normalizing whitespace - */ - def captureText(): Unit = { - if (capture && buffer.length > 0) - hStack push createText(buffer.toString) - - buffer.clear() - } - - /** End element. - * @param uri - * @param _localName - * @param qname - * @throws org.xml.sax.SAXException if .. - */ - override def endElement(uri: String , _localName: String, qname: String): Unit = { - captureText() - val metaData = attribStack.pop() - - // reverse order to get it right - val v = (Iterator continually hStack.pop takeWhile (_ != null)).toList.reverse - val (pre, localName) = splitName(qname) - val scp = scopeStack.pop() - - // create element - rootElem = createNode(pre, localName, metaData, scp, v) - hStack push rootElem - curTag = tagStack.pop() - capture = curTag != null && nodeContainsText(curTag) // root level - } - - /** Processing instruction. - */ - override def processingInstruction(target: String, data: String) { - hStack pushAll createProcInstr(target, data) - } -} diff --git a/src/library/scala/xml/parsing/FatalError.scala b/src/library/scala/xml/parsing/FatalError.scala deleted file mode 100644 index ab3cb2a74d..0000000000 --- a/src/library/scala/xml/parsing/FatalError.scala +++ /dev/null @@ -1,17 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - -/** !!! This is poorly named, but I guess it's in the API. - */ -case class FatalError(msg: String) extends java.lang.RuntimeException(msg) diff --git a/src/library/scala/xml/parsing/MarkupHandler.scala b/src/library/scala/xml/parsing/MarkupHandler.scala deleted file mode 100755 index 1ebffb9c90..0000000000 --- a/src/library/scala/xml/parsing/MarkupHandler.scala +++ /dev/null @@ -1,127 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - -import scala.collection.mutable -import scala.io.Source -import scala.xml.dtd._ - -/** class that handles markup - provides callback methods to MarkupParser. - * the default is nonvalidating behaviour - * - * @author Burak Emir - * @version 1.0 - * - * @todo can we ignore more entity declarations (i.e. those with extIDs)? - * @todo expanding entity references - */ -abstract class MarkupHandler { - - /** returns true is this markup handler is validating */ - val isValidating: Boolean = false - - var decls: List[Decl] = Nil - var ent: mutable.Map[String, EntityDecl] = new mutable.HashMap[String, EntityDecl]() - - def lookupElemDecl(Label: String): ElemDecl = { - for (z @ ElemDecl(Label, _) <- decls) - return z - - null - } - - def replacementText(entityName: String): Source = - Source fromString ((ent get entityName) match { - case Some(ParsedEntityDecl(_, IntDef(value))) => value - case Some(ParameterEntityDecl(_, IntDef(value))) => " %s " format value - case Some(_) => "" format entityName - case None => "" format entityName - }) - - def endDTD(n: String): Unit = () - - /** callback method invoked by MarkupParser after start-tag of element. - * - * @param pos the position in the sourcefile - * @param pre the prefix - * @param label the local name - * @param attrs the attributes (metadata) - */ - def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding): Unit = () - - /** callback method invoked by MarkupParser after end-tag of element. - * - * @param pos the position in the source file - * @param pre the prefix - * @param label the local name - */ - def elemEnd(pos: Int, pre: String, label: String): Unit = () - - /** callback method invoked by MarkupParser after parsing an element, - * between the elemStart and elemEnd callbacks - * - * @param pos the position in the source file - * @param pre the prefix - * @param label the local name - * @param attrs the attributes (metadata) - * @param empty `true` if the element was previously empty; `false` otherwise. - * @param args the children of this element - */ - def elem(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, empty: Boolean, args: NodeSeq): NodeSeq - - /** callback method invoked by MarkupParser after parsing PI. - */ - def procInstr(pos: Int, target: String, txt: String): NodeSeq - - /** callback method invoked by MarkupParser after parsing comment. - */ - def comment(pos: Int, comment: String): NodeSeq - - /** callback method invoked by MarkupParser after parsing entity ref. - * @todo expanding entity references - */ - def entityRef(pos: Int, n: String): NodeSeq - - /** callback method invoked by MarkupParser after parsing text. - */ - def text(pos: Int, txt: String): NodeSeq - - // DTD handler methods - - def elemDecl(n: String, cmstr: String): Unit = () - - def attListDecl(name: String, attList: List[AttrDecl]): Unit = () - - private def someEntityDecl(name: String, edef: EntityDef, f: (String, EntityDef) => EntityDecl): Unit = - edef match { - case _: ExtDef if !isValidating => // ignore (cf REC-xml 4.4.1) - case _ => - val y = f(name, edef) - decls ::= y - ent.update(name, y) - } - - def parameterEntityDecl(name: String, edef: EntityDef): Unit = - someEntityDecl(name, edef, ParameterEntityDecl.apply _) - - def parsedEntityDecl(name: String, edef: EntityDef): Unit = - someEntityDecl(name, edef, ParsedEntityDecl.apply _) - - def peReference(name: String) { decls ::= PEReference(name) } - def unparsedEntityDecl(name: String, extID: ExternalID, notat: String): Unit = () - def notationDecl(notat: String, extID: ExternalID): Unit = () - def reportSyntaxError(pos: Int, str: String): Unit - - @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11") - def log(msg: String): Unit = {} -} diff --git a/src/library/scala/xml/parsing/MarkupParser.scala b/src/library/scala/xml/parsing/MarkupParser.scala deleted file mode 100755 index 3bbd136b67..0000000000 --- a/src/library/scala/xml/parsing/MarkupParser.scala +++ /dev/null @@ -1,938 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package parsing - -import scala.io.Source -import scala.xml.dtd._ -import Utility.Escapes.{ pairs => unescape } - -/** - * An XML parser. - * - * Parses XML 1.0, invokes callback methods of a `MarkupHandler` and returns - * whatever the markup handler returns. Use `ConstructingParser` if you just - * want to parse XML to construct instances of `scala.xml.Node`. - * - * While XML elements are returned, DTD declarations - if handled - are - * collected using side-effects. - * - * @author Burak Emir - * @version 1.0 - */ -trait MarkupParser extends MarkupParserCommon with TokenTests -{ - self: MarkupParser with MarkupHandler => - - type PositionType = Int - type InputType = Source - type ElementType = NodeSeq - type AttributesType = (MetaData, NamespaceBinding) - type NamespaceType = NamespaceBinding - - def truncatedError(msg: String): Nothing = throw FatalError(msg) - def errorNoEnd(tag: String) = throw FatalError("expected closing tag of " + tag) - - def xHandleError(that: Char, msg: String) = reportSyntaxError(msg) - - val input: Source - - /** if true, does not remove surplus whitespace */ - val preserveWS: Boolean - - def externalSource(systemLiteral: String): Source - - // - // variables, values - // - - protected var curInput: Source = input - - // See ticket #3720 for motivations. - private class WithLookAhead(underlying: Source) extends Source { - private val queue = scala.collection.mutable.Queue[Char]() - def lookahead(): BufferedIterator[Char] = { - val iter = queue.iterator ++ new Iterator[Char] { - def hasNext = underlying.hasNext - def next() = { val x = underlying.next(); queue += x; x } - } - iter.buffered - } - val iter = new Iterator[Char] { - def hasNext = underlying.hasNext || !queue.isEmpty - def next() = if (!queue.isEmpty) queue.dequeue() else underlying.next() - } - } - - def lookahead(): BufferedIterator[Char] = curInput match { - case curInputWLA:WithLookAhead => - curInputWLA.lookahead() - case _ => - val newInput = new WithLookAhead(curInput) - curInput = newInput - newInput.lookahead() - } - - - /** the handler of the markup, returns this */ - private val handle: MarkupHandler = this - - /** stack of inputs */ - var inpStack: List[Source] = Nil - - /** holds the position in the source file */ - var pos: Int = _ - - /* used when reading external subset */ - var extIndex = -1 - - /** holds temporary values of pos */ - var tmppos: Int = _ - - /** holds the next character */ - var nextChNeeded: Boolean = false - var reachedEof: Boolean = false - var lastChRead: Char = _ - def ch: Char = { - if (nextChNeeded) { - if (curInput.hasNext) { - lastChRead = curInput.next() - pos = curInput.pos - } else { - val ilen = inpStack.length - //Console.println(" ilen = "+ilen+ " extIndex = "+extIndex); - if ((ilen != extIndex) && (ilen > 0)) { - /* for external source, inpStack == Nil ! need notify of eof! */ - pop() - } else { - reachedEof = true - lastChRead = 0.asInstanceOf[Char] - } - } - nextChNeeded = false - } - lastChRead - } - - /** character buffer, for names */ - protected val cbuf = new StringBuilder() - - var dtd: DTD = null - - protected var doc: Document = null - - def eof: Boolean = { ch; reachedEof } - - // - // methods - // - - /** {{{ - * - * }}} */ - def xmlProcInstr(): MetaData = { - xToken("xml") - xSpace() - val (md,scp) = xAttributes(TopScope) - if (scp != TopScope) - reportSyntaxError("no xmlns definitions here, please.") - xToken('?') - xToken('>') - md - } - - /** Factored out common code. - */ - private def prologOrTextDecl(isProlog: Boolean): (Option[String], Option[String], Option[Boolean]) = { - var info_ver: Option[String] = None - var info_enc: Option[String] = None - var info_stdl: Option[Boolean] = None - - val m = xmlProcInstr() - var n = 0 - - if (isProlog) - xSpaceOpt() - - m("version") match { - case null => - case Text("1.0") => info_ver = Some("1.0"); n += 1 - case _ => reportSyntaxError("cannot deal with versions != 1.0") - } - - m("encoding") match { - case null => - case Text(enc) => - if (!isValidIANAEncoding(enc)) - reportSyntaxError("\"" + enc + "\" is not a valid encoding") - else { - info_enc = Some(enc) - n += 1 - } - } - - if (isProlog) { - m("standalone") match { - case null => - case Text("yes") => info_stdl = Some(true); n += 1 - case Text("no") => info_stdl = Some(false); n += 1 - case _ => reportSyntaxError("either 'yes' or 'no' expected") - } - } - - if (m.length - n != 0) { - val s = if (isProlog) "SDDecl? " else "" - reportSyntaxError("VersionInfo EncodingDecl? %sor '?>' expected!" format s) - } - - (info_ver, info_enc, info_stdl) - } - - /** {{{ - * (x1, x2) } - - /** {{{ - * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? - * [23] XMLDecl ::= '' - * [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') - * [25] Eq ::= S? '=' S? - * [26] VersionNum ::= '1.0' - * [27] Misc ::= Comment | PI | S - * }}} */ - def document(): Document = { - doc = new Document() - - this.dtd = null - var info_prolog: (Option[String], Option[String], Option[Boolean]) = (None, None, None) - if ('<' != ch) { - reportSyntaxError("< expected") - return null - } - - nextch() // is prolog ? - var children: NodeSeq = null - if ('?' == ch) { - nextch() - info_prolog = prolog() - doc.version = info_prolog._1 - doc.encoding = info_prolog._2 - doc.standAlone = info_prolog._3 - - children = content(TopScope) // DTD handled as side effect - } - else { - val ts = new NodeBuffer() - content1(TopScope, ts) // DTD handled as side effect - ts &+ content(TopScope) - children = NodeSeq.fromSeq(ts) - } - //println("[MarkupParser::document] children now: "+children.toList) - var elemCount = 0 - var theNode: Node = null - for (c <- children) c match { - case _:ProcInstr => - case _:Comment => - case _:EntityRef => // todo: fix entities, shouldn't be "special" - reportSyntaxError("no entity references allowed here") - case s:SpecialNode => - if (s.toString.trim().length > 0) //non-empty text nodes not allowed - elemCount += 2 - case m:Node => - elemCount += 1 - theNode = m - } - if (1 != elemCount) { - reportSyntaxError("document must contain exactly one element") - Console.println(children.toList) - } - - doc.children = children - doc.docElem = theNode - doc - } - - /** append Unicode character to name buffer*/ - protected def putChar(c: Char) = cbuf append c - - /** As the current code requires you to call nextch once manually - * after construction, this method formalizes that suboptimal reality. - */ - def initialize: this.type = { - nextch() - this - } - - protected def ch_returning_nextch: Char = { val res = ch; nextch(); res } - - def mkAttributes(name: String, pscope: NamespaceBinding): AttributesType = - if (isNameStart (ch)) xAttributes(pscope) - else (Null, pscope) - - def mkProcInstr(position: Int, name: String, text: String): ElementType = - handle.procInstr(position, name, text) - - /** this method tells ch to get the next character when next called */ - def nextch() { - // Read current ch if needed - ch - - // Mark next ch to be required - nextChNeeded = true - } - - /** parse attribute and create namespace scope, metadata - * {{{ - * [41] Attributes ::= { S Name Eq AttValue } - * }}} - */ - def xAttributes(pscope: NamespaceBinding): (MetaData, NamespaceBinding) = { - var scope: NamespaceBinding = pscope - var aMap: MetaData = Null - while (isNameStart(ch)) { - val qname = xName - xEQ() // side effect - val value = xAttributeValue() - - Utility.prefix(qname) match { - case Some("xmlns") => - val prefix = qname.substring(6 /*xmlns:*/ , qname.length) - scope = new NamespaceBinding(prefix, value, scope) - - case Some(prefix) => - val key = qname.substring(prefix.length+1, qname.length) - aMap = new PrefixedAttribute(prefix, key, Text(value), aMap) - - case _ => - if( qname == "xmlns" ) - scope = new NamespaceBinding(null, value, scope) - else - aMap = new UnprefixedAttribute(qname, Text(value), aMap) - } - - if ((ch != '/') && (ch != '>') && ('?' != ch)) - xSpace() - } - - if(!aMap.wellformed(scope)) - reportSyntaxError( "double attribute") - - (aMap,scope) - } - - /** entity value, terminated by either ' or ". value may not contain <. - * {{{ - * AttValue ::= `'` { _ } `'` - * | `"` { _ } `"` - * }}} - */ - def xEntityValue(): String = { - val endch = ch - nextch() - while (ch != endch && !eof) { - putChar(ch) - nextch() - } - nextch() - val str = cbuf.toString() - cbuf.length = 0 - str - } - - /** {{{ - * '"{char} ) ']]>' - * - * see [15] - * }}} */ - def xCharData: NodeSeq = { - xToken("[CDATA[") - def mkResult(pos: Int, s: String): NodeSeq = { - handle.text(pos, s) - PCData(s) - } - xTakeUntil(mkResult, () => pos, "]]>") - } - - /** {{{ - * Comment ::= '' - * - * see [15] - * }}} */ - def xComment: NodeSeq = { - val sb: StringBuilder = new StringBuilder() - xToken("--") - while (true) { - if (ch == '-' && { sb.append(ch); nextch(); ch == '-' }) { - sb.length = sb.length - 1 - nextch() - xToken('>') - return handle.comment(pos, sb.toString()) - } else sb.append(ch) - nextch() - } - throw FatalError("this cannot happen") - } - - /* todo: move this into the NodeBuilder class */ - def appendText(pos: Int, ts: NodeBuffer, txt: String): Unit = { - if (preserveWS) - ts &+ handle.text(pos, txt) - else - for (t <- TextBuffer.fromString(txt).toText) { - ts &+ handle.text(pos, t.text) - } - } - - /** {{{ - * '<' content1 ::= ... - * }}} */ - def content1(pscope: NamespaceBinding, ts: NodeBuffer) { - ch match { - case '!' => - nextch() - if ('[' == ch) // CDATA - ts &+ xCharData - else if ('D' == ch) // doctypedecl, parse DTD // @todo REMOVE HACK - parseDTD() - else // comment - ts &+ xComment - case '?' => // PI - nextch() - ts &+ xProcInstr - case _ => - ts &+ element1(pscope) // child - } - } - - /** {{{ - * content1 ::= '<' content1 | '&' charref ... - * }}} */ - def content(pscope: NamespaceBinding): NodeSeq = { - val ts = new NodeBuffer - var exit = eof - // todo: optimize seq repr. - def done = new NodeSeq { val theSeq = ts.toList } - - while (!exit) { - tmppos = pos - exit = eof - - if (eof) - return done - - ch match { - case '<' => // another tag - nextch(); ch match { - case '/' => exit = true // end tag - case _ => content1(pscope, ts) - } - - // postcond: xEmbeddedBlock == false! - case '&' => // EntityRef or CharRef - nextch(); ch match { - case '#' => // CharacterRef - nextch() - val theChar = handle.text(tmppos, xCharRef(() => ch, () => nextch())) - xToken(';') - ts &+ theChar - case _ => // EntityRef - val n = xName - xToken(';') - - if (unescape contains n) { - handle.entityRef(tmppos, n) - ts &+ unescape(n) - } else push(n) - } - case _ => // text content - appendText(tmppos, ts, xText) - } - } - done - } // content(NamespaceBinding) - - /** {{{ - * externalID ::= SYSTEM S syslit - * PUBLIC S pubid S syslit - * }}} */ - def externalID(): ExternalID = ch match { - case 'S' => - nextch() - xToken("YSTEM") - xSpace() - val sysID = systemLiteral() - new SystemID(sysID) - case 'P' => - nextch(); xToken("UBLIC") - xSpace() - val pubID = pubidLiteral() - xSpace() - val sysID = systemLiteral() - new PublicID(pubID, sysID) - } - - - /** parses document type declaration and assigns it to instance variable - * dtd. - * {{{ - * - * }}} */ - def parseDTD() { // dirty but fast - var extID: ExternalID = null - if (this.dtd ne null) - reportSyntaxError("unexpected character (DOCTYPE already defined") - xToken("DOCTYPE") - xSpace() - val n = xName - xSpace() - //external ID - if ('S' == ch || 'P' == ch) { - extID = externalID() - xSpaceOpt() - } - - /* parse external subset of DTD - */ - - if ((null != extID) && isValidating) { - - pushExternal(extID.systemId) - extIndex = inpStack.length - - extSubset() - pop() - extIndex = -1 - } - - if ('[' == ch) { // internal subset - nextch() - /* TODO */ - intSubset() - // TODO: do the DTD parsing?? ?!?!?!?!! - xToken(']') - xSpaceOpt() - } - xToken('>') - this.dtd = new DTD { - /*override var*/ externalID = extID - /*override val */decls = handle.decls.reverse - } - //this.dtd.initializeEntities(); - if (doc ne null) - doc.dtd = this.dtd - - handle.endDTD(n) - } - - def element(pscope: NamespaceBinding): NodeSeq = { - xToken('<') - element1(pscope) - } - - /** {{{ - * '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag - * | xmlTag1 '/' '>' - * }}} */ - def element1(pscope: NamespaceBinding): NodeSeq = { - val pos = this.pos - val (qname, (aMap, scope)) = xTag(pscope) - val (pre, local) = Utility.prefix(qname) match { - case Some(p) => (p, qname drop p.length+1) - case _ => (null, qname) - } - val ts = { - if (ch == '/') { // empty element - xToken("/>") - handle.elemStart(pos, pre, local, aMap, scope) - NodeSeq.Empty - } - else { // element with content - xToken('>') - handle.elemStart(pos, pre, local, aMap, scope) - val tmp = content(scope) - xEndTag(qname) - tmp - } - } - val res = handle.elem(pos, pre, local, aMap, scope, ts == NodeSeq.Empty, ts) - handle.elemEnd(pos, pre, local) - res - } - - /** Parse character data. - * - * precondition: `xEmbeddedBlock == false` (we are not in a scala block) - */ - private def xText: String = { - var exit = false - while (! exit) { - putChar(ch) - nextch() - - exit = eof || ( ch == '<' ) || ( ch == '&' ) - } - val str = cbuf.toString - cbuf.length = 0 - str - } - - /** attribute value, terminated by either ' or ". value may not contain <. - * {{{ - * AttValue ::= `'` { _ } `'` - * | `"` { _ } `"` - * }}} */ - def systemLiteral(): String = { - val endch = ch - if (ch != '\'' && ch != '"') - reportSyntaxError("quote ' or \" expected") - nextch() - while (ch != endch && !eof) { - putChar(ch) - nextch() - } - nextch() - val str = cbuf.toString() - cbuf.length = 0 - str - } - - /** {{{ - * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" - * }}} */ - def pubidLiteral(): String = { - val endch = ch - if (ch!='\'' && ch != '"') - reportSyntaxError("quote ' or \" expected") - nextch() - while (ch != endch && !eof) { - putChar(ch) - //println("hello '"+ch+"'"+isPubIDChar(ch)) - if (!isPubIDChar(ch)) - reportSyntaxError("char '"+ch+"' is not allowed in public id") - nextch() - } - nextch() - val str = cbuf.toString - cbuf.length = 0 - str - } - - // - // dtd parsing - // - - def extSubset(): Unit = { - var textdecl: (Option[String],Option[String]) = null - if (ch == '<') { - nextch() - if (ch == '?') { - nextch() - textdecl = textDecl() - } else - markupDecl1() - } - while (!eof) - markupDecl() - } - - def markupDecl1() = { - def doInclude() = { - xToken('['); while(']' != ch) markupDecl(); nextch() // ']' - } - def doIgnore() = { - xToken('['); while(']' != ch) nextch(); nextch() // ']' - } - if ('?' == ch) { - nextch() - xProcInstr // simply ignore processing instructions! - } else { - xToken('!') - ch match { - case '-' => - xComment // ignore comments - - case 'E' => - nextch() - if ('L' == ch) { - nextch() - elementDecl() - } else - entityDecl() - - case 'A' => - nextch() - attrDecl() - - case 'N' => - nextch() - notationDecl() - - case '[' if inpStack.length >= extIndex => - nextch() - xSpaceOpt() - ch match { - case '%' => - nextch() - val ent = xName - xToken(';') - xSpaceOpt() - - push(ent) - xSpaceOpt() - val stmt = xName - xSpaceOpt() - - stmt match { - // parameter entity - case "INCLUDE" => doInclude() - case "IGNORE" => doIgnore() - } - case 'I' => - nextch() - ch match { - case 'G' => - nextch() - xToken("NORE") - xSpaceOpt() - doIgnore() - case 'N' => - nextch() - xToken("NCLUDE") - doInclude() - } - } - xToken(']') - xToken('>') - - case _ => - curInput.reportError(pos, "unexpected character '"+ch+"', expected some markupdecl") - while (ch!='>') - nextch() - } - } - } - - def markupDecl(): Unit = ch match { - case '%' => // parameter entity reference - nextch() - val ent = xName - xToken(';') - if (!isValidating) - handle.peReference(ent) // n-v: just create PE-reference - else - push(ent) // v: parse replacementText - - //peReference - case '<' => - nextch() - markupDecl1() - case _ if isSpace(ch) => - xSpace() - case _ => - reportSyntaxError("markupdecl: unexpected character '"+ch+"' #" + ch.toInt) - nextch() - } - - /** "rec-xml/#ExtSubset" pe references may not occur within markup declarations - */ - def intSubset() { - //Console.println("(DEBUG) intSubset()") - xSpace() - while (']' != ch) - markupDecl() - } - - /** <! element := ELEMENT - */ - def elementDecl() { - xToken("EMENT") - xSpace() - val n = xName - xSpace() - while ('>' != ch) { - //Console.println("["+ch+"]") - putChar(ch) - nextch() - } - //Console.println("END["+ch+"]") - nextch() - val cmstr = cbuf.toString() - cbuf.length = 0 - handle.elemDecl(n, cmstr) - } - - /** {{{ - * ' != ch) { - val aname = xName - xSpace() - // could be enumeration (foo,bar) parse this later :-/ - while ('"' != ch && '\'' != ch && '#' != ch && '<' != ch) { - if (!isSpace(ch)) - cbuf.append(ch) - nextch() - } - val atpe = cbuf.toString - cbuf.length = 0 - - val defdecl: DefaultDecl = ch match { - case '\'' | '"' => - DEFAULT(fixed = false, xAttributeValue()) - - case '#' => - nextch() - xName match { - case "FIXED" => xSpace() ; DEFAULT(fixed = true, xAttributeValue()) - case "IMPLIED" => IMPLIED - case "REQUIRED" => REQUIRED - } - case _ => - null - } - xSpaceOpt() - - attList ::= AttrDecl(aname, atpe, defdecl) - cbuf.length = 0 - } - nextch() - handle.attListDecl(n, attList.reverse) - } - - /** {{{ - * //sy - val extID = externalID() - if (isParameterEntity) { - xSpaceOpt() - xToken('>') - handle.parameterEntityDecl(n, ExtDef(extID)) - } else { // notation? - xSpace() - if ('>' != ch) { - xToken("NDATA") - xSpace() - val notat = xName - xSpaceOpt() - xToken('>') - handle.unparsedEntityDecl(n, extID, notat) - } else { - nextch() - handle.parsedEntityDecl(n, ExtDef(extID)) - } - } - - case '"' | '\'' => - val av = xEntityValue() - xSpaceOpt() - xToken('>') - if (isParameterEntity) - handle.parameterEntityDecl(n, IntDef(av)) - else - handle.parsedEntityDecl(n, IntDef(av)) - } - {} - } // entityDecl - - /** {{{ - * 'N' notationDecl ::= "OTATION" - * }}} */ - def notationDecl() { - xToken("OTATION") - xSpace() - val notat = xName - xSpace() - val extID = if (ch == 'S') { - externalID() - } - else if (ch == 'P') { - /* PublicID (without system, only used in NOTATION) */ - nextch() - xToken("UBLIC") - xSpace() - val pubID = pubidLiteral() - xSpaceOpt() - val sysID = if (ch != '>') - systemLiteral() - else - null - new PublicID(pubID, sysID) - } else { - reportSyntaxError("PUBLIC or SYSTEM expected") - scala.sys.error("died parsing notationdecl") - } - xSpaceOpt() - xToken('>') - handle.notationDecl(notat, extID) - } - - def reportSyntaxError(pos: Int, str: String) { curInput.reportError(pos, str) } - def reportSyntaxError(str: String) { reportSyntaxError(pos, str) } - def reportValidationError(pos: Int, str: String) { reportSyntaxError(pos, str) } - - def push(entityName: String) { - if (!eof) - inpStack = curInput :: inpStack - - // can't push before getting next character if needed - ch - - curInput = replacementText(entityName) - nextch() - } - - def pushExternal(systemId: String) { - if (!eof) - inpStack = curInput :: inpStack - - // can't push before getting next character if needed - ch - - curInput = externalSource(systemId) - nextch() - } - - def pop() { - curInput = inpStack.head - inpStack = inpStack.tail - lastChRead = curInput.ch - nextChNeeded = false - pos = curInput.pos - reachedEof = false // must be false, because of places where entity refs occur - } -} diff --git a/src/library/scala/xml/parsing/MarkupParserCommon.scala b/src/library/scala/xml/parsing/MarkupParserCommon.scala deleted file mode 100644 index 57c1651558..0000000000 --- a/src/library/scala/xml/parsing/MarkupParserCommon.scala +++ /dev/null @@ -1,260 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package parsing - -import scala.io.Source -import scala.annotation.switch -import Utility.Escapes.{ pairs => unescape } - -import Utility.SU - -/** This is not a public trait - it contains common code shared - * between the library level XML parser and the compiler's. - * All members should be accessed through those. - */ -private[scala] trait MarkupParserCommon extends TokenTests { - protected def unreachable = scala.sys.error("Cannot be reached.") - - // type HandleType // MarkupHandler, SymbolicXMLBuilder - type InputType // Source, CharArrayReader - type PositionType // Int, Position - type ElementType // NodeSeq, Tree - type NamespaceType // NamespaceBinding, Any - type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree] - - def mkAttributes(name: String, pscope: NamespaceType): AttributesType - def mkProcInstr(position: PositionType, name: String, text: String): ElementType - - /** parse a start or empty tag. - * [40] STag ::= '<' Name { S Attribute } [S] - * [44] EmptyElemTag ::= '<' Name { S Attribute } [S] - */ - protected def xTag(pscope: NamespaceType): (String, AttributesType) = { - val name = xName - xSpaceOpt() - - (name, mkAttributes(name, pscope)) - } - - /** '?' {Char})]'?>' - * - * see [15] - */ - def xProcInstr: ElementType = { - val n = xName - xSpaceOpt() - xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>") - } - - /** attribute value, terminated by either `'` or `"`. value may not contain `<`. - @param endCh either `'` or `"` - */ - def xAttributeValue(endCh: Char): String = { - val buf = new StringBuilder - while (ch != endCh) { - // well-formedness constraint - if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "") - else if (ch == SU) truncatedError("") - else buf append ch_returning_nextch - } - ch_returning_nextch - // @todo: normalize attribute value - buf.toString - } - - def xAttributeValue(): String = { - val str = xAttributeValue(ch_returning_nextch) - // well-formedness constraint - normalizeAttributeValue(str) - } - - private def takeUntilChar(it: Iterator[Char], end: Char): String = { - val buf = new StringBuilder - while (it.hasNext) it.next() match { - case `end` => return buf.toString - case ch => buf append ch - } - scala.sys.error("Expected '%s'".format(end)) - } - - /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>' - */ - def xEndTag(startName: String) { - xToken('/') - if (xName != startName) - errorNoEnd(startName) - - xSpaceOpt() - xToken('>') - } - - /** actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen - * Name ::= (Letter | '_') (NameChar)* - * - * see [5] of XML 1.0 specification - * - * pre-condition: ch != ':' // assured by definition of XMLSTART token - * post-condition: name does neither start, nor end in ':' - */ - def xName: String = { - if (ch == SU) - truncatedError("") - else if (!isNameStart(ch)) - return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "") - - val buf = new StringBuilder - - do buf append ch_returning_nextch - while (isNameChar(ch)) - - if (buf.last == ':') { - reportSyntaxError( "name cannot end in ':'" ) - buf.toString dropRight 1 - } - else buf.toString - } - - private def attr_unescape(s: String) = s match { - case "lt" => "<" - case "gt" => ">" - case "amp" => "&" - case "apos" => "'" - case "quot" => "\"" - case "quote" => "\"" - case _ => "&" + s + ";" - } - - /** Replaces only character references right now. - * see spec 3.3.3 - */ - private def normalizeAttributeValue(attval: String): String = { - val buf = new StringBuilder - val it = attval.iterator.buffered - - while (it.hasNext) buf append (it.next() match { - case ' ' | '\t' | '\n' | '\r' => " " - case '&' if it.head == '#' => it.next() ; xCharRef(it) - case '&' => attr_unescape(takeUntilChar(it, ';')) - case c => c - }) - - buf.toString - } - - /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" - * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" - * - * see [66] - */ - def xCharRef(ch: () => Char, nextch: () => Unit): String = - Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _) - - def xCharRef(it: Iterator[Char]): String = { - var c = it.next() - Utility.parseCharRef(() => c, () => { c = it.next() }, reportSyntaxError _, truncatedError _) - } - - def xCharRef: String = xCharRef(() => ch, () => nextch()) - - /** Create a lookahead reader which does not influence the input */ - def lookahead(): BufferedIterator[Char] - - /** The library and compiler parsers had the interesting distinction of - * different behavior for nextch (a function for which there are a total - * of two plausible behaviors, so we know the design space was fully - * explored.) One of them returned the value of nextch before the increment - * and one of them the new value. So to unify code we have to at least - * temporarily abstract over the nextchs. - */ - def ch: Char - def nextch(): Unit - protected def ch_returning_nextch: Char - def eof: Boolean - - // def handle: HandleType - var tmppos: PositionType - - def xHandleError(that: Char, msg: String): Unit - def reportSyntaxError(str: String): Unit - def reportSyntaxError(pos: Int, str: String): Unit - - def truncatedError(msg: String): Nothing - def errorNoEnd(tag: String): Nothing - - protected def errorAndResult[T](msg: String, x: T): T = { - reportSyntaxError(msg) - x - } - - def xToken(that: Char) { - if (ch == that) nextch() - else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch)) - } - def xToken(that: Seq[Char]) { that foreach xToken } - - /** scan [S] '=' [S]*/ - def xEQ() = { xSpaceOpt(); xToken('='); xSpaceOpt() } - - /** skip optional space S? */ - def xSpaceOpt() = while (isSpace(ch) && !eof) nextch() - - /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ - def xSpace() = - if (isSpace(ch)) { nextch(); xSpaceOpt() } - else xHandleError(ch, "whitespace expected") - - /** Apply a function and return the passed value */ - def returning[T](x: T)(f: T => Unit): T = { f(x); x } - - /** Execute body with a variable saved and restored after execution */ - def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = { - val saved = getter - try body - finally setter(saved) - } - - /** Take characters from input stream until given String "until" - * is seen. Once seen, the accumulated characters are passed - * along with the current Position to the supplied handler function. - */ - protected def xTakeUntil[T]( - handler: (PositionType, String) => T, - positioner: () => PositionType, - until: String): T = - { - val sb = new StringBuilder - val head = until.head - val rest = until.tail - - while (true) { - if (ch == head && peek(rest)) - return handler(positioner(), sb.toString) - else if (ch == SU) - truncatedError("") // throws TruncatedXMLControl in compiler - - sb append ch - nextch() - } - unreachable - } - - /** Create a non-destructive lookahead reader and see if the head - * of the input would match the given String. If yes, return true - * and drop the entire String from input; if no, return false - * and leave input unchanged. - */ - private def peek(lookingFor: String): Boolean = - (lookahead() take lookingFor.length sameElements lookingFor.iterator) && { - // drop the chars from the real reader (all lookahead + orig) - (0 to lookingFor.length) foreach (_ => nextch()) - true - } -} diff --git a/src/library/scala/xml/parsing/NoBindingFactoryAdapter.scala b/src/library/scala/xml/parsing/NoBindingFactoryAdapter.scala deleted file mode 100644 index 56ac185f47..0000000000 --- a/src/library/scala/xml/parsing/NoBindingFactoryAdapter.scala +++ /dev/null @@ -1,37 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package parsing - -import factory.NodeFactory - -/** nobinding adaptor providing callbacks to parser to create elements. -* implements hash-consing -*/ -class NoBindingFactoryAdapter extends FactoryAdapter with NodeFactory[Elem] -{ - /** True. Every XML node may contain text that the application needs */ - def nodeContainsText(label: String) = true - - /** From NodeFactory. Constructs an instance of scala.xml.Elem */ - protected def create(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: Seq[Node]): Elem = - Elem(pre, label, attrs, scope, children: _*) - - /** From FactoryAdapter. Creates a node. never creates the same node twice, using hash-consing. */ - def createNode(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: List[Node]): Elem = - Elem(pre, label, attrs, scope, children: _*) - - /** Creates a text node. */ - def createText(text: String) = Text(text) - - /** Creates a processing instruction. */ - def createProcInstr(target: String, data: String) = makeProcInstr(target, data) -} diff --git a/src/library/scala/xml/parsing/TokenTests.scala b/src/library/scala/xml/parsing/TokenTests.scala deleted file mode 100644 index 8dd9cdfaa3..0000000000 --- a/src/library/scala/xml/parsing/TokenTests.scala +++ /dev/null @@ -1,101 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package parsing - -/** - * Helper functions for parsing XML fragments - */ -trait TokenTests { - - /** {{{ - * (#x20 | #x9 | #xD | #xA) - * }}} */ - final def isSpace(ch: Char): Boolean = ch match { - case '\u0009' | '\u000A' | '\u000D' | '\u0020' => true - case _ => false - } - /** {{{ - * (#x20 | #x9 | #xD | #xA)+ - * }}} */ - final def isSpace(cs: Seq[Char]): Boolean = cs.nonEmpty && (cs forall isSpace) - - /** These are 99% sure to be redundant but refactoring on the safe side. */ - def isAlpha(c: Char) = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') - def isAlphaDigit(c: Char) = isAlpha(c) || (c >= '0' && c <= '9') - - /** {{{ - * NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' - * | CombiningChar | Extender - * }}} - * See [4] and Appendix B of XML 1.0 specification. - */ - def isNameChar(ch: Char) = { - import java.lang.Character._ - // The constants represent groups Mc, Me, Mn, Lm, and Nd. - - isNameStart(ch) || (getType(ch).toByte match { - case COMBINING_SPACING_MARK | - ENCLOSING_MARK | NON_SPACING_MARK | - MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true - case _ => ".-:" contains ch - }) - } - - /** {{{ - * NameStart ::= ( Letter | '_' ) - * }}} - * where Letter means in one of the Unicode general - * categories `{ Ll, Lu, Lo, Lt, Nl }`. - * - * We do not allow a name to start with `:`. - * See [3] and Appendix B of XML 1.0 specification - */ - def isNameStart(ch: Char) = { - import java.lang.Character._ - - getType(ch).toByte match { - case LOWERCASE_LETTER | - UPPERCASE_LETTER | OTHER_LETTER | - TITLECASE_LETTER | LETTER_NUMBER => true - case _ => ch == '_' - } - } - - /** {{{ - * Name ::= ( Letter | '_' ) (NameChar)* - * }}} - * See [5] of XML 1.0 specification. - */ - def isName(s: String) = - s.nonEmpty && isNameStart(s.head) && (s.tail forall isNameChar) - - def isPubIDChar(ch: Char): Boolean = - isAlphaDigit(ch) || (isSpace(ch) && ch != '\u0009') || - ("""-\()+,./:=?;!*#@$_%""" contains ch) - - /** - * Returns `true` if the encoding name is a valid IANA encoding. - * This method does not verify that there is a decoder available - * for this encoding, only that the characters are valid for an - * IANA encoding name. - * - * @param ianaEncoding The IANA encoding name. - */ - def isValidIANAEncoding(ianaEncoding: Seq[Char]) = { - def charOK(c: Char) = isAlphaDigit(c) || ("._-" contains c) - - ianaEncoding.nonEmpty && isAlpha(ianaEncoding.head) && - (ianaEncoding.tail forall charOK) - } - - def checkSysID(s: String) = List('"', '\'') exists (c => !(s contains c)) - def checkPubID(s: String) = s forall isPubIDChar -} diff --git a/src/library/scala/xml/parsing/ValidatingMarkupHandler.scala b/src/library/scala/xml/parsing/ValidatingMarkupHandler.scala deleted file mode 100644 index 1b20901249..0000000000 --- a/src/library/scala/xml/parsing/ValidatingMarkupHandler.scala +++ /dev/null @@ -1,104 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - -import scala.xml.dtd._ - -abstract class ValidatingMarkupHandler extends MarkupHandler { - - var rootLabel:String = _ - var qStack: List[Int] = Nil - var qCurrent: Int = -1 - - var declStack: List[ElemDecl] = Nil - var declCurrent: ElemDecl = null - - final override val isValidating = true - - override def endDTD(n:String) = { - rootLabel = n - } - override def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope:NamespaceBinding) { - - def advanceDFA(dm:DFAContentModel) = { - val trans = dm.dfa.delta(qCurrent) - log("advanceDFA(dm): " + dm) - log("advanceDFA(trans): " + trans) - trans.get(ContentModel.ElemName(label)) match { - case Some(qNew) => qCurrent = qNew - case _ => reportValidationError(pos, "DTD says, wrong element, expected one of "+trans.keys) - } - } - // advance in current automaton - log("[qCurrent = "+qCurrent+" visiting "+label+"]") - - if (qCurrent == -1) { // root - log(" checking root") - if (label != rootLabel) - reportValidationError(pos, "this element should be "+rootLabel) - } else { - log(" checking node") - declCurrent.contentModel match { - case ANY => - case EMPTY => - reportValidationError(pos, "DTD says, no elems, no text allowed here") - case PCDATA => - reportValidationError(pos, "DTD says, no elements allowed here") - case m @ MIXED(r) => - advanceDFA(m) - case e @ ELEMENTS(r) => - advanceDFA(e) - } - } - // push state, decl - qStack = qCurrent :: qStack - declStack = declCurrent :: declStack - - declCurrent = lookupElemDecl(label) - qCurrent = 0 - log(" done now") - } - - override def elemEnd(pos: Int, pre: String, label: String) { - log(" elemEnd") - qCurrent = qStack.head - qStack = qStack.tail - declCurrent = declStack.head - declStack = declStack.tail - log(" qCurrent now" + qCurrent) - log(" declCurrent now" + declCurrent) - } - - final override def elemDecl(name: String, cmstr: String) { - decls = ElemDecl(name, ContentModel.parse(cmstr)) :: decls - } - - final override def attListDecl(name: String, attList: List[AttrDecl]) { - decls = AttListDecl(name, attList) :: decls - } - - final override def unparsedEntityDecl(name: String, extID: ExternalID, notat: String) { - decls = UnparsedEntityDecl(name, extID, notat) :: decls - } - - final override def notationDecl(notat: String, extID: ExternalID) { - decls = NotationDecl(notat, extID) :: decls - } - - final override def peReference(name: String) { - decls = PEReference(name) :: decls - } - - /** report a syntax error */ - def reportValidationError(pos: Int, str: String): Unit -} diff --git a/src/library/scala/xml/parsing/XhtmlEntities.scala b/src/library/scala/xml/parsing/XhtmlEntities.scala deleted file mode 100644 index 3683af202c..0000000000 --- a/src/library/scala/xml/parsing/XhtmlEntities.scala +++ /dev/null @@ -1,54 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package parsing - -import scala.xml.dtd.{ IntDef, ParsedEntityDecl } - -/** - * @author (c) David Pollak 2007 WorldWide Conferencing, LLC. - * - */ -object XhtmlEntities { - val entList = List(("quot",34), ("amp",38), ("lt",60), ("gt",62), ("nbsp",160), ("iexcl",161), ("cent",162), ("pound",163), ("curren",164), ("yen",165), - ("euro",8364), ("brvbar",166), ("sect",167), ("uml",168), ("copy",169), ("ordf",170), ("laquo",171), ("shy",173), ("reg",174), ("trade",8482), - ("macr",175), ("deg",176), ("plusmn",177), ("sup2",178), ("sup3",179), ("acute",180), ("micro",181), ("para",182), ("middot",183), ("cedil",184), - ("sup1",185), ("ordm",186), ("raquo",187), ("frac14",188), ("frac12",189), ("frac34",190), ("iquest",191), ("times",215), ("divide",247), - ("Agrave",192), ("Aacute",193), ("Acirc",194), ("Atilde",195), ("Auml",196), ("Aring",197), ("AElig",198), ("Ccedil",199), ("Egrave",200), - ("Eacute",201), ("Ecirc",202), ("Euml",203), ("Igrave",204), ("Iacute",205), ("Icirc",206), ("Iuml",207), ("ETH",208), ("Ntilde",209), - ("Ograve",210), ("Oacute",211), ("Ocirc",212), ("Otilde",213), ("Ouml",214), ("Oslash",216), ("Ugrave",217), ("Uacute",218), ("Ucirc",219), - ("Uuml",220), ("Yacute",221), ("THORN",222), ("szlig",223), ("agrave",224), ("aacute",225), ("acirc",226), ("atilde",227), ("auml",228), - ("aring",229), ("aelig",230), ("ccedil",231), ("egrave",232), ("eacute",233), ("ecirc",234), ("euml",235), ("igrave",236), ("iacute",237), - ("icirc",238), ("iuml",239), ("eth",240), ("ntilde",241), ("ograve",242), ("oacute",243), ("ocirc",244), ("otilde",245), ("ouml",246), - ("oslash",248), ("ugrave",249), ("uacute",250), ("ucirc",251), ("uuml",252), ("yacute",253), ("thorn",254), ("yuml",255), ("OElig",338), - ("oelig",339), ("Scaron",352), ("scaron",353), ("Yuml",376), ("circ",710), ("ensp",8194), ("emsp",8195), ("zwnj",204), ("zwj",8205), ("lrm",8206), - ("rlm",8207), ("ndash",8211), ("mdash",8212), ("lsquo",8216), ("rsquo",8217), ("sbquo",8218), ("ldquo",8220), ("rdquo",8221), ("bdquo",8222), - ("dagger",8224), ("Dagger",8225), ("permil",8240), ("lsaquo",8249), ("rsaquo",8250), ("fnof",402), ("bull",8226), ("hellip",8230), ("prime",8242), - ("Prime",8243), ("oline",8254), ("frasl",8260), ("weierp",8472), ("image",8465), ("real",8476), ("alefsym",8501), ("larr",8592), ("uarr",8593), - ("rarr",8594), ("darr",8495), ("harr",8596), ("crarr",8629), ("lArr",8656), ("uArr",8657), ("rArr",8658), ("dArr",8659), ("hArr",8660), - ("forall",8704), ("part",8706), ("exist",8707), ("empty",8709), ("nabla",8711), ("isin",8712), ("notin",8713), ("ni",8715), ("prod",8719), - ("sum",8721), ("minus",8722), ("lowast",8727), ("radic",8730), ("prop",8733), ("infin",8734), ("ang",8736), ("and",8743), ("or",8744), - ("cap",8745), ("cup",8746), ("int",8747), ("there4",8756), ("sim",8764), ("cong",8773), ("asymp",8776), ("ne",8800), ("equiv",8801), ("le",8804), - ("ge",8805), ("sub",8834), ("sup",8835), ("nsub",8836), ("sube",8838), ("supe",8839), ("oplus",8853), ("otimes",8855), ("perp",8869), ("sdot",8901), - ("lceil",8968), ("rceil",8969), ("lfloor",8970), ("rfloor",8971), ("lang",9001), ("rang",9002), ("loz",9674), ("spades",9824), ("clubs",9827), - ("hearts",9829), ("diams",9830), ("Alpha",913), ("Beta",914), ("Gamma",915), ("Delta",916), ("Epsilon",917), ("Zeta",918), ("Eta",919), - ("Theta",920), ("Iota",921), ("Kappa",922), ("Lambda",923), ("Mu",924), ("Nu",925), ("Xi",926), ("Omicron",927), ("Pi",928), ("Rho",929), - ("Sigma",931), ("Tau",932), ("Upsilon",933), ("Phi",934), ("Chi",935), ("Psi",936), ("Omega",937), ("alpha",945), ("beta",946), ("gamma",947), - ("delta",948), ("epsilon",949), ("zeta",950), ("eta",951), ("theta",952), ("iota",953), ("kappa",954), ("lambda",955), ("mu",956), ("nu",957), - ("xi",958), ("omicron",959), ("pi",960), ("rho",961), ("sigmaf",962), ("sigma",963), ("tau",964), ("upsilon",965), ("phi",966), ("chi",967), - ("psi",968), ("omega",969), ("thetasym",977), ("upsih",978), ("piv",982)) - - val entMap: Map[String, Char] = Map.empty[String, Char] ++ entList.map { case (name, value) => (name, value.toChar)} - - val entities = entList. - map { case (name, value) => (name, new ParsedEntityDecl(name, new IntDef(value.toChar.toString)))} - - def apply() = entities -} diff --git a/src/library/scala/xml/parsing/XhtmlParser.scala b/src/library/scala/xml/parsing/XhtmlParser.scala deleted file mode 100644 index 6ce5bec8d0..0000000000 --- a/src/library/scala/xml/parsing/XhtmlParser.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package parsing - -import scala.io.Source - -/** An XML Parser that preserves `CDATA` blocks and knows about - * [[scala.xml.parsing.XhtmlEntities]]. - * - * @author (c) David Pollak, 2007 WorldWide Conferencing, LLC. - */ -class XhtmlParser(val input: Source) extends ConstructingHandler with MarkupParser with ExternalSources { - val preserveWS = true - ent ++= XhtmlEntities() -} - -/** Convenience method that instantiates, initializes and runs an `XhtmlParser`. - * - * @author Burak Emir - */ -object XhtmlParser { - def apply(source: Source): NodeSeq = new XhtmlParser(source).initialize.document() -} diff --git a/src/library/scala/xml/persistent/CachedFileStorage.scala b/src/library/scala/xml/persistent/CachedFileStorage.scala deleted file mode 100644 index a1489ef3f4..0000000000 --- a/src/library/scala/xml/persistent/CachedFileStorage.scala +++ /dev/null @@ -1,129 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package persistent - -import java.io.{ File, FileOutputStream } -import java.nio.ByteBuffer -import java.nio.channels.Channels -import java.lang.Thread - -import scala.collection.Iterator - -/** Mutable storage of immutable xml trees. Everything is kept in memory, - * with a thread periodically checking for changes and writing to file. - * - * To ensure atomicity, two files are used, `filename1` and `'$'+filename1`. - * The implementation switches between the two, deleting the older one - * after a complete dump of the database has been written. - * - * @author Burak Emir - */ -abstract class CachedFileStorage(private val file1: File) extends Thread { - - private val file2 = new File(file1.getParent, file1.getName+"$") - - /** Either equals `file1` or `file2`, references the next file in which - * updates will be stored. - */ - private var theFile: File = null - - private def switch() = { theFile = if (theFile == file1) file2 else file1; } - - /** this storage modified since last modification check */ - protected var dirty = false - - /** period between modification checks, in milliseconds */ - protected val interval = 1000 - - /** finds and loads the storage file. subclasses should call this method - * prior to any other, but only once, to obtain the initial sequence of nodes. - */ - protected def initialNodes: Iterator[Node] = (file1.exists, file2.exists) match { - case (false,false) => - theFile = file1 - Iterator.empty - case (true, true ) if (file1.lastModified < file2.lastModified) => - theFile = file2 - load - case (true, _ ) => - theFile = file1 - load - case _ => - theFile = file2 - load - } - - /** returns an iterator over the nodes in this storage */ - def nodes: Iterator[Node] - - /** adds a node, setting this.dirty to true as a side effect */ - def += (e: Node): Unit - - /** removes a tree, setting this.dirty to true as a side effect */ - def -= (e: Node): Unit - - /* loads and parses XML from file */ - private def load: Iterator[Node] = { - import scala.io.Source - import scala.xml.parsing.ConstructingParser - log("[load]\nloading "+theFile) - val src = Source.fromFile(theFile) - log("parsing "+theFile) - val res = ConstructingParser.fromSource(src,preserveWS = false).document.docElem(0) - switch() - log("[load done]") - res.child.iterator - } - - /** saves the XML to file */ - private def save() = if (this.dirty) { - log("[save]\ndeleting "+theFile) - theFile.delete() - log("creating new "+theFile) - theFile.createNewFile() - val fos = new FileOutputStream(theFile) - val c = fos.getChannel() - - // @todo: optimize - val storageNode = { nodes.toList } - val w = Channels.newWriter(c, "utf-8") - XML.write(w, storageNode, "utf-8", xmlDecl = true, doctype = null) - - log("writing to "+theFile) - - w.close - c.close - fos.close - dirty = false - switch() - log("[save done]") - } - - /** Run method of the thread. remember to use `start()` to start a thread, - * not `run`. */ - override def run = { - log("[run]\nstarting storage thread, checking every "+interval+" ms") - while (true) { - Thread.sleep( this.interval.toLong ) - save() - } - } - - /** Force writing of contents to the file, even if there has not been any - * update. */ - def flush() = { - this.dirty = true - save() - } - - @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11") - def log(msg: String): Unit = {} -} diff --git a/src/library/scala/xml/persistent/Index.scala b/src/library/scala/xml/persistent/Index.scala deleted file mode 100644 index 9ee45e7086..0000000000 --- a/src/library/scala/xml/persistent/Index.scala +++ /dev/null @@ -1,17 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package persistent - -/** an Index returns some unique key that is part of a node - */ -abstract class Index[A] extends Function1[Node,A] {} diff --git a/src/library/scala/xml/persistent/SetStorage.scala b/src/library/scala/xml/persistent/SetStorage.scala deleted file mode 100644 index 8db56a2e71..0000000000 --- a/src/library/scala/xml/persistent/SetStorage.scala +++ /dev/null @@ -1,42 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package persistent - -import scala.collection.mutable -import java.io.File - -/** A persistent store with set semantics. This class allows to add and remove - * trees, but never contains two structurally equal trees. - * - * @author Burak Emir - */ -class SetStorage(file: File) extends CachedFileStorage(file) { - - private val theSet = mutable.HashSet[Node]() - - // initialize - - { - val it = super.initialNodes - dirty = it.hasNext - theSet ++= it - } - - /* forwarding methods to hashset*/ - - def += (e: Node): Unit = synchronized { this.dirty = true; theSet += e } - - def -= (e: Node): Unit = synchronized { this.dirty = true; theSet -= e } - - def nodes = synchronized { theSet.iterator } - -} diff --git a/src/library/scala/xml/pull/XMLEvent.scala b/src/library/scala/xml/pull/XMLEvent.scala deleted file mode 100644 index 3beb3648e7..0000000000 --- a/src/library/scala/xml/pull/XMLEvent.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package pull - -/** An XML event for pull parsing. All events received during - * parsing will be one of the subclasses of this trait. - */ -trait XMLEvent - -/** - * An Element's start tag was encountered. - * @param pre prefix, if any, on the element. This is the `xs` in `foo`. - * @param label the name of the element, not including the prefix - * @param attrs any attributes on the element - */ -case class EvElemStart(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding) extends XMLEvent - -/** - * An Element's end tag was encountered. - * @param pre prefix, if any, on the element. This is the `xs` in `foo`. - * @param label the name of the element, not including the prefix - */ -case class EvElemEnd(pre: String, label: String) extends XMLEvent - -/** - * A text node was encountered. - * @param text the text that was found - */ -case class EvText(text: String) extends XMLEvent - -/** An entity reference was encountered. - * @param entity the name of the entity, e.g. `gt` when encountering the entity `>` - */ -case class EvEntityRef(entity: String) extends XMLEvent - -/** - * A processing instruction was encountered. - * @param target the "PITarget" of the processing instruction. For the instruction ``, the target would - * be `foo` - * @param text the remainder of the instruction. For the instruction ``, the text would - * be `bar="baz"` - * @see [[http://www.w3.org/TR/REC-xml/#sec-pi]] - */ -case class EvProcInstr(target: String, text: String) extends XMLEvent - -/** - * A comment was encountered - * @param text the text of the comment - */ -case class EvComment(text: String) extends XMLEvent diff --git a/src/library/scala/xml/pull/XMLEventReader.scala b/src/library/scala/xml/pull/XMLEventReader.scala deleted file mode 100755 index 76e51e17fd..0000000000 --- a/src/library/scala/xml/pull/XMLEventReader.scala +++ /dev/null @@ -1,157 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package pull - -import scala.io.Source -import java.lang.Thread -import java.util.concurrent.LinkedBlockingQueue -import java.nio.channels.ClosedChannelException -import scala.xml.parsing.{ ExternalSources, MarkupHandler, MarkupParser } - -/** - * Main entry point into creating an event-based XML parser. Treating this - * as a [[scala.collection.Iterator]] will provide access to the generated events. - * @param src A [[scala.io.Source]] for XML data to parse - * - * @author Burak Emir - * @author Paul Phillips - */ -class XMLEventReader(src: Source) -extends scala.collection.AbstractIterator[XMLEvent] - with ProducerConsumerIterator[XMLEvent] { - - // We implement a pull parser as an iterator, but since we may be operating on - // a stream (e.g. XML over a network) there may be arbitrarily long periods when - // the queue is empty. Fortunately the ProducerConsumerIterator is ideally - // suited to this task, possibly because it was written for use by this class. - - // to override as necessary - val preserveWS = true - - override val MaxQueueSize = 1000 - protected case object POISON extends XMLEvent - val EndOfStream = POISON - - // thread machinery - private[this] val parser = new Parser(src) - private[this] val parserThread = new Thread(parser, "XMLEventReader") - parserThread.start - // enqueueing the poison object is the reliable way to cause the - // iterator to terminate; hasNext will return false once it sees it. - // Calling interrupt() on the parserThread is the only way we can get - // it to stop producing tokens since it's lost deep in document() - - // we cross our fingers the interrupt() gets to its target, but if it - // fails for whatever reason the iterator correctness is not impacted, - // only performance (because it will finish the entire XML document, - // or at least as much as it can fit in the queue.) - def stop() = { - produce(POISON) - parserThread.interrupt() - } - - private class Parser(val input: Source) extends MarkupHandler with MarkupParser with ExternalSources with Runnable { - val preserveWS = XMLEventReader.this.preserveWS - // track level for elem memory usage optimization - private var level = 0 - - // this is Parser's way to add to the queue - the odd return type - // is to conform to MarkupHandler's interface - def setEvent(es: XMLEvent*): NodeSeq = { - es foreach produce - NodeSeq.Empty - } - - override def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding) { - level += 1 - setEvent(EvElemStart(pre, label, attrs, scope)) - } - override def elemEnd(pos: Int, pre: String, label: String) { - setEvent(EvElemEnd(pre, label)) - level -= 1 - } - - // this is a dummy to satisfy MarkupHandler's API - // memory usage optimization return one for top level to satisfy - // MarkupParser.document() otherwise NodeSeq.Empty - private var ignoreWritten = false - final def elem(pos: Int, pre: String, label: String, attrs: MetaData, pscope: NamespaceBinding, empty: Boolean, nodes: NodeSeq): NodeSeq = - if (level == 1 && !ignoreWritten) {ignoreWritten = true; } else NodeSeq.Empty - - def procInstr(pos: Int, target: String, txt: String) = setEvent(EvProcInstr(target, txt)) - def comment(pos: Int, txt: String) = setEvent(EvComment(txt)) - def entityRef(pos: Int, n: String) = setEvent(EvEntityRef(n)) - def text(pos: Int, txt:String) = setEvent(EvText(txt)) - - override def run() { - curInput = input - interruptibly { this.initialize.document() } - setEvent(POISON) - } - } -} - -// An iterator designed for one or more producers to generate -// elements, and a single consumer to iterate. Iteration will continue -// until closeIterator() is called, after which point producers -// calling produce() will receive interruptions. -// -// Since hasNext may block indefinitely if nobody is producing, -// there is also an available() method which will return true if -// the next call hasNext is guaranteed not to block. -// -// This is not thread-safe for multiple consumers! -trait ProducerConsumerIterator[T >: Null] extends Iterator[T] { - // abstract - iterator-specific distinguished object for marking eos - val EndOfStream: T - - // defaults to unbounded - override to positive Int if desired - val MaxQueueSize = -1 - - def interruptibly[T](body: => T): Option[T] = try Some(body) catch { - case _: InterruptedException => Thread.currentThread.interrupt(); None - case _: ClosedChannelException => None - } - - private[this] lazy val queue = - if (MaxQueueSize < 0) new LinkedBlockingQueue[T]() - else new LinkedBlockingQueue[T](MaxQueueSize) - private[this] var buffer: T = _ - private def fillBuffer() = { - buffer = interruptibly(queue.take) getOrElse EndOfStream - isElement(buffer) - } - private def isElement(x: T) = x != null && x != EndOfStream - private def eos() = buffer == EndOfStream - - // public producer interface - this is the only method producers call, so - // LinkedBlockingQueue's synchronization is all we need. - def produce(x: T): Unit = if (!eos) interruptibly(queue put x) - - // consumer/iterator interface - we need not synchronize access to buffer - // because we required there to be only one consumer. - def hasNext = !eos && (buffer != null || fillBuffer) - - def next() = { - if (eos()) throw new NoSuchElementException("ProducerConsumerIterator") - if (buffer == null) fillBuffer() - - drainBuffer() - } - - def available() = isElement(buffer) || isElement(queue.peek) - - private def drainBuffer() = { - assert(!eos) - val res = buffer - buffer = null - res - } -} diff --git a/src/library/scala/xml/pull/package.scala b/src/library/scala/xml/pull/package.scala deleted file mode 100644 index 0e3019446b..0000000000 --- a/src/library/scala/xml/pull/package.scala +++ /dev/null @@ -1,42 +0,0 @@ -package scala -package xml - -/** - * Classes needed to view an XML document as a series of events. The document - * is parsed by an [[scala.xml.pull.XMLEventReader]] instance. You can treat it as - * an [[scala.collection.Iterator]] to retrieve the events, which are all - * subclasses of [[scala.xml.pull.XMLEvent]]. - * - * {{{ - * scala> val source = Source.fromString(""" - * - * - * ]>Hello&bar;>""") - * - * source: scala.io.Source = non-empty iterator - * - * scala> val reader = new XMLEventReader(source) - * reader: scala.xml.pull.XMLEventReader = non-empty iterator - * - * scala> reader.foreach{ println(_) } - * EvProcInstr(instruction,custom value="customvalue") - * EvText( - * ) - * EvElemStart(null,foo,,) - * EvText(Hello) - * EvComment( this is a comment ) - * EvElemStart(null,bar,,) - * EvText(BAR) - * EvElemEnd(null,bar) - * EvElemStart(null,bar,,) - * EvEntityRef(gt) - * EvElemEnd(null,bar) - * EvElemEnd(null,foo) - * EvText( - * - * ) - * - * }}} - */ -package object pull diff --git a/src/library/scala/xml/transform/BasicTransformer.scala b/src/library/scala/xml/transform/BasicTransformer.scala deleted file mode 100644 index c98339fd67..0000000000 --- a/src/library/scala/xml/transform/BasicTransformer.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package transform - -/** A class for XML transformations. - * - * @author Burak Emir - * @version 1.0 - */ -abstract class BasicTransformer extends Function1[Node,Node] -{ - protected def unchanged(n: Node, ns: Seq[Node]) = - ns.length == 1 && (ns.head == n) - - /** Call transform(Node) for each node in ns, append results - * to NodeBuffer. - */ - def transform(it: Iterator[Node], nb: NodeBuffer): Seq[Node] = - it.foldLeft(nb)(_ ++= transform(_)).toSeq - - /** Call transform(Node) to each node in ns, yield ns if nothing changes, - * otherwise a new sequence of concatenated results. - */ - def transform(ns: Seq[Node]): Seq[Node] = { - val (xs1, xs2) = ns span (n => unchanged(n, transform(n))) - - if (xs2.isEmpty) ns - else xs1 ++ transform(xs2.head) ++ transform(xs2.tail) - } - - def transform(n: Node): Seq[Node] = { - if (n.doTransform) n match { - case Group(xs) => Group(transform(xs)) // un-group the hack Group tag - case _ => - val ch = n.child - val nch = transform(ch) - - if (ch eq nch) n - else Elem(n.prefix, n.label, n.attributes, n.scope, nch: _*) - } - else n - } - - def apply(n: Node): Node = { - val seq = transform(n) - if (seq.length > 1) - throw new UnsupportedOperationException("transform must return single node for root") - else seq.head - } -} diff --git a/src/library/scala/xml/transform/RewriteRule.scala b/src/library/scala/xml/transform/RewriteRule.scala deleted file mode 100644 index 1399ee538d..0000000000 --- a/src/library/scala/xml/transform/RewriteRule.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package transform - -/** A RewriteRule, when applied to a term, yields either - * the result of rewriting the term or the term itself if the rule - * is not applied. - * - * @author Burak Emir - * @version 1.0 - */ -abstract class RewriteRule extends BasicTransformer { - /** a name for this rewrite rule */ - val name = this.toString() - override def transform(ns: Seq[Node]): Seq[Node] = super.transform(ns) - override def transform(n: Node): Seq[Node] = n -} - diff --git a/src/library/scala/xml/transform/RuleTransformer.scala b/src/library/scala/xml/transform/RuleTransformer.scala deleted file mode 100644 index 3a222ba759..0000000000 --- a/src/library/scala/xml/transform/RuleTransformer.scala +++ /dev/null @@ -1,16 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package transform - -class RuleTransformer(rules: RewriteRule*) extends BasicTransformer { - override def transform(n: Node): Seq[Node] = - rules.foldLeft(super.transform(n)) { (res, rule) => rule transform res } -} diff --git a/src/partest/scala/tools/partest/nest/FileManager.scala b/src/partest/scala/tools/partest/nest/FileManager.scala index 230ada4803..ee24c0b9c1 100644 --- a/src/partest/scala/tools/partest/nest/FileManager.scala +++ b/src/partest/scala/tools/partest/nest/FileManager.scala @@ -68,12 +68,13 @@ trait FileManager extends FileUtil { else (SFile(LATEST_LIB).parent.parent / "classes" / what).toAbsolute.path } + def latestXmlLib = relativeToLibrary("xml") def latestScaladoc = relativeToLibrary("scaladoc") def latestInteractive = relativeToLibrary("interactive") def latestScalapFile = relativeToLibrary("scalap") def latestPaths = List( LATEST_LIB, LATEST_REFLECT, LATEST_COMP, LATEST_PARTEST, LATEST_ACTORS, - latestScalapFile, latestScaladoc, latestInteractive + latestXmlLib, latestScalapFile, latestScaladoc, latestInteractive ) def latestFiles = latestPaths map (p => new java.io.File(p)) def latestUrls = latestFiles map (_.toURI.toURL) diff --git a/src/xml/scala/xml/Atom.scala b/src/xml/scala/xml/Atom.scala new file mode 100644 index 0000000000..33e58ba7e7 --- /dev/null +++ b/src/xml/scala/xml/Atom.scala @@ -0,0 +1,47 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** The class `Atom` provides an XML node for text (`PCDATA`). + * It is used in both non-bound and bound XML representations. + * + * @author Burak Emir + * @param data the text contained in this node, may not be `'''null'''`. + */ +class Atom[+A](val data: A) extends SpecialNode with Serializable { + if (data == null) + throw new IllegalArgumentException("cannot construct "+getClass.getSimpleName+" with null") + + override protected def basisForHashCode: Seq[Any] = Seq(data) + + override def strict_==(other: Equality) = other match { + case x: Atom[_] => data == x.data + case _ => false + } + + override def canEqual(other: Any) = other match { + case _: Atom[_] => true + case _ => false + } + + final override def doCollectNamespaces = false + final override def doTransform = false + + def label = "#PCDATA" + + /** Returns text, with some characters escaped according to the XML + * specification. + */ + def buildString(sb: StringBuilder): StringBuilder = + Utility.escape(data.toString, sb) + + override def text: String = data.toString + +} diff --git a/src/xml/scala/xml/Attribute.scala b/src/xml/scala/xml/Attribute.scala new file mode 100644 index 0000000000..e4b2b69fc6 --- /dev/null +++ b/src/xml/scala/xml/Attribute.scala @@ -0,0 +1,101 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** This singleton object contains the `apply` and `unapply` methods for + * convenient construction and deconstruction. + * + * @author Burak Emir + * @version 1.0 + */ +object Attribute { + def unapply(x: Attribute) = x match { + case PrefixedAttribute(_, key, value, next) => Some((key, value, next)) + case UnprefixedAttribute(key, value, next) => Some((key, value, next)) + case _ => None + } + + /** Convenience functions which choose Un/Prefixedness appropriately */ + def apply(key: String, value: Seq[Node], next: MetaData): Attribute = + new UnprefixedAttribute(key, value, next) + + def apply(pre: String, key: String, value: String, next: MetaData): Attribute = + if (pre == null || pre == "") new UnprefixedAttribute(key, value, next) + else new PrefixedAttribute(pre, key, value, next) + + def apply(pre: String, key: String, value: Seq[Node], next: MetaData): Attribute = + if (pre == null || pre == "") new UnprefixedAttribute(key, value, next) + else new PrefixedAttribute(pre, key, value, next) + + def apply(pre: Option[String], key: String, value: Seq[Node], next: MetaData): Attribute = + pre match { + case None => new UnprefixedAttribute(key, value, next) + case Some(p) => new PrefixedAttribute(p, key, value, next) + } +} + +/** The `Attribute` trait defines the interface shared by both + * [[scala.xml.PrefixedAttribute]] and [[scala.xml.UnprefixedAttribute]]. + * + * @author Burak Emir + * @version 1.0 + */ +abstract trait Attribute extends MetaData { + def pre: String // will be null if unprefixed + val key: String + val value: Seq[Node] + val next: MetaData + + def apply(key: String): Seq[Node] + def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] + def copy(next: MetaData): Attribute + + def remove(key: String) = + if (!isPrefixed && this.key == key) next + else copy(next remove key) + + def remove(namespace: String, scope: NamespaceBinding, key: String) = + if (this.key == key && (scope getURI pre) == namespace) next + else copy(next.remove(namespace, scope, key)) + + def isPrefixed: Boolean = pre != null + + def getNamespace(owner: Node): String + + def wellformed(scope: NamespaceBinding): Boolean = { + val arg = if (isPrefixed) scope getURI pre else null + (next(arg, scope, key) == null) && (next wellformed scope) + } + + /** Returns an iterator on attributes */ + override def iterator: Iterator[MetaData] = { + if (value == null) next.iterator + else Iterator.single(this) ++ next.iterator + } + + override def size: Int = { + if (value == null) next.size + else 1 + next.size + } + + /** Appends string representation of only this attribute to stringbuffer. + */ + protected def toString1(sb: StringBuilder) { + if (value == null) + return + if (isPrefixed) + sb append pre append ':' + + sb append key append '=' + val sb2 = new StringBuilder() + Utility.sequenceToXML(value, TopScope, sb2, stripComments = true) + Utility.appendQuoted(sb2.toString, sb) + } +} diff --git a/src/xml/scala/xml/Comment.scala b/src/xml/scala/xml/Comment.scala new file mode 100644 index 0000000000..b8dccdcb16 --- /dev/null +++ b/src/xml/scala/xml/Comment.scala @@ -0,0 +1,31 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** The class `Comment` implements an XML node for comments. + * + * @author Burak Emir + * @param commentText the text contained in this node, may not contain "--" + */ +case class Comment(commentText: String) extends SpecialNode { + + def label = "#REM" + override def text = "" + final override def doCollectNamespaces = false + final override def doTransform = false + + if (commentText contains "--") + throw new IllegalArgumentException("text contains \"--\"") + + /** Appends "" to this string buffer. + */ + override def buildString(sb: StringBuilder) = + sb append "" +} diff --git a/src/xml/scala/xml/Document.scala b/src/xml/scala/xml/Document.scala new file mode 100644 index 0000000000..9a725014fc --- /dev/null +++ b/src/xml/scala/xml/Document.scala @@ -0,0 +1,92 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** A document information item (according to InfoSet spec). The comments + * are copied from the Infoset spec, only augmented with some information + * on the Scala types for definitions that might have no value. + * Also plays the role of an `XMLEvent` for pull parsing. + * + * @author Burak Emir + * @version 1.0, 26/04/2005 + */ +@SerialVersionUID(-2289320563321795109L) +class Document extends NodeSeq with pull.XMLEvent with Serializable { + + /** An ordered list of child information items, in document + * order. The list contains exactly one element information item. The + * list also contains one processing instruction information item for + * each processing instruction outside the document element, and one + * comment information item for each comment outside the document + * element. Processing instructions and comments within the DTD are + * excluded. If there is a document type declaration, the list also + * contains a document type declaration information item. + */ + var children: Seq[Node] = _ + + /** The element information item corresponding to the document element. */ + var docElem: Node = _ + + /** The dtd that comes with the document, if any */ + var dtd: scala.xml.dtd.DTD = _ + + /** An unordered set of notation information items, one for each notation + * declared in the DTD. If any notation is multiply declared, this property + * has no value. + */ + def notations: Seq[scala.xml.dtd.NotationDecl] = + dtd.notations + + /** An unordered set of unparsed entity information items, one for each + * unparsed entity declared in the DTD. + */ + def unparsedEntities: Seq[scala.xml.dtd.EntityDecl] = + dtd.unparsedEntities + + /** The base URI of the document entity. */ + var baseURI: String = _ + + /** The name of the character encoding scheme in which the document entity + * is expressed. + */ + var encoding: Option[String] = _ + + /** An indication of the standalone status of the document, either + * true or false. This property is derived from the optional standalone + * document declaration in the XML declaration at the beginning of the + * document entity, and has no value (`None`) if there is no + * standalone document declaration. + */ + var standAlone: Option[Boolean] = _ + + /** A string representing the XML version of the document. This + * property is derived from the XML declaration optionally present at + * the beginning of the document entity, and has no value (`None`) + * if there is no XML declaration. + */ + var version: Option[String] = _ + + /** 9. This property is not strictly speaking part of the infoset of + * the document. Rather it is an indication of whether the processor + * has read the complete DTD. Its value is a boolean. If it is false, + * then certain properties (indicated in their descriptions below) may + * be unknown. If it is true, those properties are never unknown. + */ + var allDeclarationsProcessed = false + + // methods for NodeSeq + + def theSeq: Seq[Node] = this.docElem + + override def canEqual(other: Any) = other match { + case _: Document => true + case _ => false + } +} diff --git a/src/xml/scala/xml/Elem.scala b/src/xml/scala/xml/Elem.scala new file mode 100755 index 0000000000..484cf98744 --- /dev/null +++ b/src/xml/scala/xml/Elem.scala @@ -0,0 +1,135 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** This singleton object contains the `apply` and `unapplySeq` methods for + * convenient construction and deconstruction. It is possible to deconstruct + * any `Node` instance (that is not a `SpecialNode` or a `Group`) using the + * syntax `case Elem(prefix, label, attribs, scope, child @ _*) => ...` + * + * Copyright 2008 Google Inc. All Rights Reserved. + * @author Burak Emir + */ +object Elem { + /** Build an Elem, setting its minimizeEmpty property to `true` if it has no children. Note that this + * default may not be exactly what you want, as some XML dialects don't permit some elements to be minimized. + * + * @deprecated This factory method is retained for backward compatibility; please use the other one, with which you + * can specify your own preference for minimizeEmpty. + */ + @deprecated("Use the other apply method in this object", "2.10.0") + def apply(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, child: Node*): Elem = + apply(prefix, label, attributes, scope, child.isEmpty, child: _*) + + def apply(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, minimizeEmpty: Boolean, child: Node*): Elem = + new Elem(prefix, label, attributes, scope, minimizeEmpty, child: _*) + + def unapplySeq(n: Node) = n match { + case _: SpecialNode | _: Group => None + case _ => Some((n.prefix, n.label, n.attributes, n.scope, n.child)) + } + + import scala.sys.process._ + /** Implicitly convert a [[scala.xml.Elem]] into a + * [[scala.sys.process.ProcessBuilder]]. This is done by obtaining the text + * elements of the element, trimming spaces, and then converting the result + * from string to a process. Importantly, tags are completely ignored, so + * they cannot be used to separate parameters. + */ + @deprecated("To create a scala.sys.process.Process from an xml.Elem, please use Process(elem.text.trim).", "2.11.0") + implicit def xmlToProcess(command: scala.xml.Elem): ProcessBuilder = Process(command.text.trim) + + @deprecated("To create a scala.sys.process.Process from an xml.Elem, please use Process(elem.text.trim).", "2.11.0") + implicit def processXml(p: Process.type) = new { + /** Creates a [[scala.sys.process.ProcessBuilder]] from a Scala XML Element. + * This can be used as a way to template strings. + * + * @example {{{ + * apply( {dxPath.absolutePath} --dex --output={classesDexPath.absolutePath} {classesMinJarPath.absolutePath}) + * }}} + */ + def apply(command: Elem): ProcessBuilder = Process(command.text.trim) + } +} + + +/** The case class `Elem` extends the `Node` class, + * providing an immutable data object representing an XML element. + * + * @param prefix namespace prefix (may be null, but not the empty string) + * @param label the element name + * @param attributes1 the attribute map + * @param scope the scope containing the namespace bindings + * @param minimizeEmpty `true` if this element should be serialized as minimized (i.e. "<el/>") when + * empty; `false` if it should be written out in long form. + * @param child the children of this node + * + * Copyright 2008 Google Inc. All Rights Reserved. + * @author Burak Emir + */ +class Elem( + override val prefix: String, + val label: String, + attributes1: MetaData, + override val scope: NamespaceBinding, + val minimizeEmpty: Boolean, + val child: Node*) +extends Node with Serializable +{ + @deprecated("This constructor is retained for backward compatibility. Please use the primary constructor, which lets you specify your own preference for `minimizeEmpty`.", "2.10.0") + def this(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, child: Node*) = { + this(prefix, label, attributes, scope, child.isEmpty, child: _*) + } + + final override def doCollectNamespaces = true + final override def doTransform = true + + override val attributes = MetaData.normalize(attributes1, scope) + + if (prefix == "") + throw new IllegalArgumentException("prefix of zero length, use null instead") + + if (scope == null) + throw new IllegalArgumentException("scope is null, use scala.xml.TopScope for empty scope") + + //@todo: copy the children, + // setting namespace scope if necessary + // cleaning adjacent text nodes if necessary + + override protected def basisForHashCode: Seq[Any] = + prefix :: label :: attributes :: child.toList + + /** Returns a new element with updated attributes, resolving namespace uris + * from this element's scope. See MetaData.update for details. + * + * @param updates MetaData with new and updated attributes + * @return a new symbol with updated attributes + */ + final def %(updates: MetaData): Elem = + copy(attributes = MetaData.update(attributes, scope, updates)) + + /** Returns a copy of this element with any supplied arguments replacing + * this element's value for that field. + * + * @return a new symbol with updated attributes + */ + def copy( + prefix: String = this.prefix, + label: String = this.label, + attributes: MetaData = this.attributes, + scope: NamespaceBinding = this.scope, + minimizeEmpty: Boolean = this.minimizeEmpty, + child: Seq[Node] = this.child.toSeq + ): Elem = Elem(prefix, label, attributes, scope, minimizeEmpty, child: _*) + + /** Returns concatenation of `text(n)` for each child `n`. + */ + override def text = (child map (_.text)).mkString +} diff --git a/src/xml/scala/xml/EntityRef.scala b/src/xml/scala/xml/EntityRef.scala new file mode 100644 index 0000000000..7a58831075 --- /dev/null +++ b/src/xml/scala/xml/EntityRef.scala @@ -0,0 +1,40 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** The class `EntityRef` implements an XML node for entity references. + * + * @author Burak Emir + * @version 1.0 + * @param entityName the name of the entity reference, for example `amp`. + */ +case class EntityRef(entityName: String) extends SpecialNode { + final override def doCollectNamespaces = false + final override def doTransform = false + def label = "#ENTITY" + + override def text = entityName match { + case "lt" => "<" + case "gt" => ">" + case "amp" => "&" + case "apos" => "'" + case "quot" => "\"" + case _ => Utility.sbToString(buildString) + } + + /** Appends `"& entityName;"` to this string buffer. + * + * @param sb the string buffer. + * @return the modified string buffer `sb`. + */ + override def buildString(sb: StringBuilder) = + sb.append("&").append(entityName).append(";") + +} diff --git a/src/xml/scala/xml/Equality.scala b/src/xml/scala/xml/Equality.scala new file mode 100644 index 0000000000..021d185812 --- /dev/null +++ b/src/xml/scala/xml/Equality.scala @@ -0,0 +1,107 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** In an attempt to contain the damage being inflicted on consistency by the + * ad hoc `equals` methods spread around `xml`, the logic is centralized and + * all the `xml` classes go through the `xml.Equality trait`. There are two + * forms of `xml` comparison. + * + * 1. `'''def''' strict_==(other: scala.xml.Equality)` + * + * This one tries to honor the little things like symmetry and hashCode + * contracts. The `equals` method routes all comparisons through this. + * + * 1. `xml_==(other: Any)` + * + * This one picks up where `strict_==` leaves off. It might declare any two + * things equal. + * + * As things stood, the logic not only made a mockery of the collections + * equals contract, but also laid waste to that of case classes. + * + * Among the obstacles to sanity are/were: + * + * Node extends NodeSeq extends Seq[Node] + * MetaData extends Iterable[MetaData] + * The hacky "Group" xml node which throws exceptions + * with wild abandon, so don't get too close + * Rampant asymmetry and impossible hashCodes + * Most classes claiming to be equal to "String" if + * some specific stringification of it was the same. + * String was never going to return the favor. + */ + +object Equality { + def asRef(x: Any): AnyRef = x.asInstanceOf[AnyRef] + + /** Note - these functions assume strict equality has already failed. + */ + def compareBlithely(x1: AnyRef, x2: String): Boolean = x1 match { + case x: Atom[_] => x.data == x2 + case x: NodeSeq => x.text == x2 + case _ => false + } + def compareBlithely(x1: AnyRef, x2: Node): Boolean = x1 match { + case x: NodeSeq if x.length == 1 => x2 == x(0) + case _ => false + } + def compareBlithely(x1: AnyRef, x2: AnyRef): Boolean = { + if (x1 == null || x2 == null) + return (x1 eq x2) + + x2 match { + case s: String => compareBlithely(x1, s) + case n: Node => compareBlithely(x1, n) + case _ => false + } + } +} +import Equality._ + +trait Equality extends scala.Equals { + protected def basisForHashCode: Seq[Any] + + def strict_==(other: Equality): Boolean + def strict_!=(other: Equality) = !strict_==(other) + + /** We insist we're only equal to other `xml.Equality` implementors, + * which heads off a lot of inconsistency up front. + */ + override def canEqual(other: Any): Boolean = other match { + case x: Equality => true + case _ => false + } + + /** It's be nice to make these final, but there are probably + * people out there subclassing the XML types, especially when + * it comes to equals. However WE at least can pretend they + * are final since clearly individual classes cannot be trusted + * to maintain a semblance of order. + */ + override def hashCode() = basisForHashCode.## + override def equals(other: Any) = doComparison(other, blithe = false) + final def xml_==(other: Any) = doComparison(other, blithe = true) + final def xml_!=(other: Any) = !xml_==(other) + + /** The "blithe" parameter expresses the caller's unconcerned attitude + * regarding the usual constraints on equals. The method is thereby + * given carte blanche to declare any two things equal. + */ + private def doComparison(other: Any, blithe: Boolean) = { + val strictlyEqual = other match { + case x: AnyRef if this eq x => true + case x: Equality => (x canEqual this) && (this strict_== x) + case _ => false + } + + strictlyEqual || (blithe && compareBlithely(this, asRef(other))) + } +} diff --git a/src/xml/scala/xml/Group.scala b/src/xml/scala/xml/Group.scala new file mode 100644 index 0000000000..e3af615008 --- /dev/null +++ b/src/xml/scala/xml/Group.scala @@ -0,0 +1,42 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** A hack to group XML nodes in one node for output. + * + * @author Burak Emir + * @version 1.0 + */ +final case class Group(nodes: Seq[Node]) extends Node { + override def theSeq = nodes + + override def canEqual(other: Any) = other match { + case x: Group => true + case _ => false + } + + override def strict_==(other: Equality) = other match { + case Group(xs) => nodes sameElements xs + case _ => false + } + + override protected def basisForHashCode = nodes + + /** Since Group is very much a hack it throws an exception if you + * try to do anything with it. + */ + private def fail(msg: String) = throw new UnsupportedOperationException("class Group does not support method '%s'" format msg) + + def label = fail("label") + override def attributes = fail("attributes") + override def namespace = fail("namespace") + override def child = fail("child") + def buildString(sb: StringBuilder) = fail("toString(StringBuilder)") +} diff --git a/src/xml/scala/xml/MalformedAttributeException.scala b/src/xml/scala/xml/MalformedAttributeException.scala new file mode 100644 index 0000000000..d499ad3e10 --- /dev/null +++ b/src/xml/scala/xml/MalformedAttributeException.scala @@ -0,0 +1,15 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml + + +case class MalformedAttributeException(msg: String) extends RuntimeException(msg) diff --git a/src/xml/scala/xml/MetaData.scala b/src/xml/scala/xml/MetaData.scala new file mode 100644 index 0000000000..8b5ea187cb --- /dev/null +++ b/src/xml/scala/xml/MetaData.scala @@ -0,0 +1,217 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import Utility.sbToString +import scala.annotation.tailrec +import scala.collection.{ AbstractIterable, Iterator } + +/** + * Copyright 2008 Google Inc. All Rights Reserved. + * @author Burak Emir + */ +object MetaData { + /** + * appends all attributes from new_tail to attribs, without attempting to + * detect or remove duplicates. The method guarantees that all attributes + * from attribs come before the attributes in new_tail, but does not + * guarantee to preserve the relative order of attribs. + * + * Duplicates can be removed with `normalize`. + */ + @tailrec // temporarily marked final so it will compile under -Xexperimental + final def concatenate(attribs: MetaData, new_tail: MetaData): MetaData = + if (attribs eq Null) new_tail + else concatenate(attribs.next, attribs copy new_tail) + + /** + * returns normalized MetaData, with all duplicates removed and namespace prefixes resolved to + * namespace URIs via the given scope. + */ + def normalize(attribs: MetaData, scope: NamespaceBinding): MetaData = { + def iterate(md: MetaData, normalized_attribs: MetaData, set: Set[String]): MetaData = { + lazy val key = getUniversalKey(md, scope) + if (md eq Null) normalized_attribs + else if ((md.value eq null) || set(key)) iterate(md.next, normalized_attribs, set) + else md copy iterate(md.next, normalized_attribs, set + key) + } + iterate(attribs, Null, Set()) + } + + /** + * returns key if md is unprefixed, pre+key is md is prefixed + */ + def getUniversalKey(attrib: MetaData, scope: NamespaceBinding) = attrib match { + case prefixed: PrefixedAttribute => scope.getURI(prefixed.pre) + prefixed.key + case unprefixed: UnprefixedAttribute => unprefixed.key + } + + /** + * returns MetaData with attributes updated from given MetaData + */ + def update(attribs: MetaData, scope: NamespaceBinding, updates: MetaData): MetaData = + normalize(concatenate(updates, attribs), scope) + +} + +/** This class represents an attribute and at the same time a linked list of + * attributes. Every instance of this class is either + * - an instance of `UnprefixedAttribute key,value` or + * - an instance of `PrefixedAttribute namespace_prefix,key,value` or + * - `Null, the empty attribute list. + * + * Namespace URIs are obtained by using the namespace scope of the element + * owning this attribute (see `getNamespace`). + * + * Copyright 2008 Google Inc. All Rights Reserved. + * @author Burak Emir + */ +abstract class MetaData +extends AbstractIterable[MetaData] + with Iterable[MetaData] + with Equality + with Serializable { + + /** Updates this MetaData with the MetaData given as argument. All attributes that occur in updates + * are part of the resulting MetaData. If an attribute occurs in both this instance and + * updates, only the one in updates is part of the result (avoiding duplicates). For prefixed + * attributes, namespaces are resolved using the given scope, which defaults to TopScope. + * + * @param updates MetaData with new and updated attributes + * @return a new MetaData instance that contains old, new and updated attributes + */ + def append(updates: MetaData, scope: NamespaceBinding = TopScope): MetaData = + MetaData.update(this, scope, updates) + + /** + * Gets value of unqualified (unprefixed) attribute with given key, null if not found + * + * @param key + * @return value as Seq[Node] if key is found, null otherwise + */ + def apply(key: String): Seq[Node] + + /** convenience method, same as `apply(namespace, owner.scope, key)`. + * + * @param namespace_uri namespace uri of key + * @param owner the element owning this attribute list + * @param key the attribute key + */ + final def apply(namespace_uri: String, owner: Node, key: String): Seq[Node] = + apply(namespace_uri, owner.scope, key) + + /** + * Gets value of prefixed attribute with given key and namespace, null if not found + * + * @param namespace_uri namespace uri of key + * @param scp a namespace scp (usually of the element owning this attribute list) + * @param k to be looked for + * @return value as Seq[Node] if key is found, null otherwise + */ + def apply(namespace_uri: String, scp: NamespaceBinding, k: String): Seq[Node] + + /** returns a copy of this MetaData item with next field set to argument. + */ + def copy(next: MetaData): MetaData + + /** if owner is the element of this metadata item, returns namespace */ + def getNamespace(owner: Node): String + + def hasNext = (Null != next) + + def length: Int = length(0) + + def length(i: Int): Int = next.length(i + 1) + + def isPrefixed: Boolean + + override def canEqual(other: Any) = other match { + case _: MetaData => true + case _ => false + } + override def strict_==(other: Equality) = other match { + case m: MetaData => this.asAttrMap == m.asAttrMap + case _ => false + } + protected def basisForHashCode: Seq[Any] = List(this.asAttrMap) + + /** filters this sequence of meta data */ + override def filter(f: MetaData => Boolean): MetaData = + if (f(this)) copy(next filter f) + else next filter f + + /** returns key of this MetaData item */ + def key: String + + /** returns value of this MetaData item */ + def value: Seq[Node] + + /** Returns a String containing "prefix:key" if the first key is + * prefixed, and "key" otherwise. + */ + def prefixedKey = this match { + case x: Attribute if x.isPrefixed => x.pre + ":" + key + case _ => key + } + + /** Returns a Map containing the attributes stored as key/value pairs. + */ + def asAttrMap: Map[String, String] = + (iterator map (x => (x.prefixedKey, x.value.text))).toMap + + /** returns Null or the next MetaData item */ + def next: MetaData + + /** + * Gets value of unqualified (unprefixed) attribute with given key, None if not found + * + * @param key + * @return value in Some(Seq[Node]) if key is found, None otherwise + */ + final def get(key: String): Option[Seq[Node]] = Option(apply(key)) + + /** same as get(uri, owner.scope, key) */ + final def get(uri: String, owner: Node, key: String): Option[Seq[Node]] = + get(uri, owner.scope, key) + + /** gets value of qualified (prefixed) attribute with given key. + * + * @param uri namespace of key + * @param scope a namespace scp (usually of the element owning this attribute list) + * @param key to be looked fore + * @return value as Some[Seq[Node]] if key is found, None otherwise + */ + final def get(uri: String, scope: NamespaceBinding, key: String): Option[Seq[Node]] = + Option(apply(uri, scope, key)) + + protected def toString1(): String = sbToString(toString1) + + // appends string representations of single attribute to StringBuilder + protected def toString1(sb: StringBuilder): Unit + + override def toString(): String = sbToString(buildString) + + def buildString(sb: StringBuilder): StringBuilder = { + sb append ' ' + toString1(sb) + next buildString sb + } + + /** + */ + def wellformed(scope: NamespaceBinding): Boolean + + def remove(key: String): MetaData + + def remove(namespace: String, scope: NamespaceBinding, key: String): MetaData + + final def remove(namespace: String, owner: Node, key: String): MetaData = + remove(namespace, owner.scope, key) +} diff --git a/src/xml/scala/xml/NamespaceBinding.scala b/src/xml/scala/xml/NamespaceBinding.scala new file mode 100644 index 0000000000..b320466976 --- /dev/null +++ b/src/xml/scala/xml/NamespaceBinding.scala @@ -0,0 +1,83 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import Utility.sbToString + +/** The class `NamespaceBinding` represents namespace bindings + * and scopes. The binding for the default namespace is treated as a null + * prefix. the absent namespace is represented with the null uri. Neither + * prefix nor uri may be empty, which is not checked. + * + * @author Burak Emir + * @version 1.0 + */ +@SerialVersionUID(0 - 2518644165573446725L) +case class NamespaceBinding(prefix: String, uri: String, parent: NamespaceBinding) extends AnyRef with Equality +{ + if (prefix == "") + throw new IllegalArgumentException("zero length prefix not allowed") + + def getURI(_prefix: String): String = + if (prefix == _prefix) uri else parent getURI _prefix + + /** Returns some prefix that is mapped to the URI. + * + * @param _uri the input URI + * @return the prefix that is mapped to the input URI, or null + * if no prefix is mapped to the URI. + */ + def getPrefix(_uri: String): String = + if (_uri == uri) prefix else parent getPrefix _uri + + override def toString(): String = sbToString(buildString(_, TopScope)) + + private def shadowRedefined(stop: NamespaceBinding): NamespaceBinding = { + def prefixList(x: NamespaceBinding): List[String] = + if ((x == null) || (x eq stop)) Nil + else x.prefix :: prefixList(x.parent) + def fromPrefixList(l: List[String]): NamespaceBinding = l match { + case Nil => stop + case x :: xs => new NamespaceBinding(x, this.getURI(x), fromPrefixList(xs)) + } + val ps0 = prefixList(this).reverse + val ps = ps0.distinct + if (ps.size == ps0.size) this + else fromPrefixList(ps) + } + + override def canEqual(other: Any) = other match { + case _: NamespaceBinding => true + case _ => false + } + + override def strict_==(other: Equality) = other match { + case x: NamespaceBinding => (prefix == x.prefix) && (uri == x.uri) && (parent == x.parent) + case _ => false + } + + def basisForHashCode: Seq[Any] = List(prefix, uri, parent) + + def buildString(stop: NamespaceBinding): String = sbToString(buildString(_, stop)) + + def buildString(sb: StringBuilder, stop: NamespaceBinding) { + shadowRedefined(stop).doBuildString(sb, stop) + } + + private def doBuildString(sb: StringBuilder, stop: NamespaceBinding) { + if ((this == null) || (this eq stop)) return // contains? + + val s = " xmlns%s=\"%s\"".format( + (if (prefix != null) ":" + prefix else ""), + (if (uri != null) uri else "") + ) + parent.doBuildString(sb append s, stop) // copy(ignore) + } +} diff --git a/src/xml/scala/xml/Node.scala b/src/xml/scala/xml/Node.scala new file mode 100755 index 0000000000..e121284252 --- /dev/null +++ b/src/xml/scala/xml/Node.scala @@ -0,0 +1,198 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** This singleton object contains the `unapplySeq` method for + * convenient deconstruction. + * + * @author Burak Emir + * @version 1.0 + */ +object Node { + /** the constant empty attribute sequence */ + final def NoAttributes: MetaData = Null + + /** the empty namespace */ + val EmptyNamespace = "" + + def unapplySeq(n: Node) = Some((n.label, n.attributes, n.child)) +} + +/** + * An abstract class representing XML with nodes of a labelled tree. + * This class contains an implementation of a subset of XPath for navigation. + * + * @author Burak Emir and others + * @version 1.1 + */ +abstract class Node extends NodeSeq { + + /** prefix of this node */ + def prefix: String = null + + /** label of this node. I.e. "foo" for <foo/>) */ + def label: String + + /** used internally. Atom/Molecule = -1 PI = -2 Comment = -3 EntityRef = -5 + */ + def isAtom = this.isInstanceOf[Atom[_]] + + /** The logic formerly found in typeTag$, as best I could infer it. */ + def doCollectNamespaces = true // if (tag >= 0) DO collect namespaces + def doTransform = true // if (tag < 0) DO NOT transform + + /** + * method returning the namespace bindings of this node. by default, this + * is TopScope, which means there are no namespace bindings except the + * predefined one for "xml". + */ + def scope: NamespaceBinding = TopScope + + /** + * convenience, same as `getNamespace(this.prefix)` + */ + def namespace = getNamespace(this.prefix) + + /** + * Convenience method, same as `scope.getURI(pre)` but additionally + * checks if scope is `'''null'''`. + * + * @param pre the prefix whose namespace name we would like to obtain + * @return the namespace if `scope != null` and prefix was + * found, else `null` + */ + def getNamespace(pre: String): String = if (scope eq null) null else scope.getURI(pre) + + /** + * Convenience method, looks up an unprefixed attribute in attributes of this node. + * Same as `attributes.getValue(key)` + * + * @param key of queried attribute. + * @return value of `UnprefixedAttribute` with given key + * in attributes, if it exists, otherwise `null`. + */ + final def attribute(key: String): Option[Seq[Node]] = attributes.get(key) + + /** + * Convenience method, looks up a prefixed attribute in attributes of this node. + * Same as `attributes.getValue(uri, this, key)`- + * + * @param uri namespace of queried attribute (may not be null). + * @param key of queried attribute. + * @return value of `PrefixedAttribute` with given namespace + * and given key, otherwise `'''null'''`. + */ + final def attribute(uri: String, key: String): Option[Seq[Node]] = + attributes.get(uri, this, key) + + /** + * Returns attribute meaning all attributes of this node, prefixed and + * unprefixed, in no particular order. In class `Node`, this + * defaults to `Null` (the empty attribute list). + * + * @return all attributes of this node + */ + def attributes: MetaData = Null + + /** + * Returns child axis i.e. all children of this node. + * + * @return all children of this node + */ + def child: Seq[Node] + + /** Children which do not stringify to "" (needed for equality) + */ + def nonEmptyChildren: Seq[Node] = child filterNot (_.toString == "") + + /** + * Descendant axis (all descendants of this node, not including node itself) + * includes all text nodes, element nodes, comments and processing instructions. + */ + def descendant: List[Node] = + child.toList.flatMap { x => x::x.descendant } + + /** + * Descendant axis (all descendants of this node, including thisa node) + * includes all text nodes, element nodes, comments and processing instructions. + */ + def descendant_or_self: List[Node] = this :: descendant + + override def canEqual(other: Any) = other match { + case x: Group => false + case x: Node => true + case _ => false + } + + override protected def basisForHashCode: Seq[Any] = + prefix :: label :: attributes :: nonEmptyChildren.toList + + override def strict_==(other: Equality) = other match { + case _: Group => false + case x: Node => + (prefix == x.prefix) && + (label == x.label) && + (attributes == x.attributes) && + // (scope == x.scope) // note - original code didn't compare scopes so I left it as is. + (nonEmptyChildren sameElements x.nonEmptyChildren) + case _ => + false + } + + // implementations of NodeSeq methods + + /** + * returns a sequence consisting of only this node + */ + def theSeq: Seq[Node] = this :: Nil + + /** + * String representation of this node + * + * @param stripComments if true, strips comment nodes from result + */ + def buildString(stripComments: Boolean): String = + Utility.serialize(this, stripComments = stripComments).toString + + /** + * Same as `toString('''false''')`. + */ + override def toString(): String = buildString(stripComments = false) + + /** + * Appends qualified name of this node to `StringBuilder`. + */ + def nameToString(sb: StringBuilder): StringBuilder = { + if (null != prefix) { + sb append prefix + sb append ':' + } + sb append label + } + + /** + * Returns a type symbol (e.g. DTD, XSD), default `'''null'''`. + */ + def xmlType(): TypeSymbol = null + + /** + * Returns a text representation of this node. Note that this is not equivalent to + * the XPath node-test called text(), it is rather an implementation of the + * XPath function string() + * Martin to Burak: to do: if you make this method abstract, the compiler will now + * complain if there's no implementation in a subclass. Is this what we want? Note that + * this would break doc/DocGenator and doc/ModelToXML, with an error message like: + * {{{ + * doc\DocGenerator.scala:1219: error: object creation impossible, since there is a deferred declaration of method text in class Node of type => String which is not implemented in a subclass + * new SpecialNode { + * ^ + * }}} */ + override def text: String = super.text +} diff --git a/src/xml/scala/xml/NodeBuffer.scala b/src/xml/scala/xml/NodeBuffer.scala new file mode 100644 index 0000000000..ae7c7b2bf8 --- /dev/null +++ b/src/xml/scala/xml/NodeBuffer.scala @@ -0,0 +1,47 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** + * This class acts as a Buffer for nodes. If it is used as a sequence of + * nodes `Seq[Node]`, it must be ensured that no updates occur after that + * point, because `scala.xml.Node` is assumed to be immutable. + * + * Despite this being a sequence, don't use it as key in a hashtable. + * Calling the hashcode function will result in a runtime error. + * + * @author Burak Emir + * @version 1.0 + */ +class NodeBuffer extends scala.collection.mutable.ArrayBuffer[Node] { + + /** + * Append given object to this buffer, returns reference on this + * `NodeBuffer` for convenience. Some rules apply: + * - If argument `o` is `'''null'''`, it is ignored. + * - If it is an `Iterator` or `Iterable`, its elements will be added. + * - If `o` is a node, it is added as it is. + * - If it is anything else, it gets wrapped in an [[scala.xml.Atom]]. + * + * @param o converts to an xml node and adds to this node buffer + * @return this nodebuffer + */ + def &+(o: Any): NodeBuffer = { + o match { + case null | _: Unit | Text("") => // ignore + case it: Iterator[_] => it foreach &+ + case n: Node => super.+=(n) + case ns: Iterable[_] => this &+ ns.iterator + case ns: Array[_] => this &+ ns.iterator + case d => super.+=(new Atom(d)) + } + this + } +} diff --git a/src/xml/scala/xml/NodeSeq.scala b/src/xml/scala/xml/NodeSeq.scala new file mode 100644 index 0000000000..b8022472fb --- /dev/null +++ b/src/xml/scala/xml/NodeSeq.scala @@ -0,0 +1,157 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import scala.collection.{ mutable, immutable, generic, SeqLike, AbstractSeq } +import mutable.{ Builder, ListBuffer } +import generic.{ CanBuildFrom } +import scala.language.implicitConversions + +/** This object ... + * + * @author Burak Emir + * @version 1.0 + */ +object NodeSeq { + final val Empty = fromSeq(Nil) + def fromSeq(s: Seq[Node]): NodeSeq = new NodeSeq { + def theSeq = s + } + type Coll = NodeSeq + implicit def canBuildFrom: CanBuildFrom[Coll, Node, NodeSeq] = + new CanBuildFrom[Coll, Node, NodeSeq] { + def apply(from: Coll) = newBuilder + def apply() = newBuilder + } + def newBuilder: Builder[Node, NodeSeq] = new ListBuffer[Node] mapResult fromSeq + implicit def seqToNodeSeq(s: Seq[Node]): NodeSeq = fromSeq(s) +} + +/** This class implements a wrapper around `Seq[Node]` that adds XPath + * and comprehension methods. + * + * @author Burak Emir + * @version 1.0 + */ +abstract class NodeSeq extends AbstractSeq[Node] with immutable.Seq[Node] with SeqLike[Node, NodeSeq] with Equality { + import NodeSeq.seqToNodeSeq // import view magic for NodeSeq wrappers + + /** Creates a list buffer as builder for this class */ + override protected[this] def newBuilder = NodeSeq.newBuilder + + def theSeq: Seq[Node] + def length = theSeq.length + override def iterator = theSeq.iterator + + def apply(i: Int): Node = theSeq(i) + def apply(f: Node => Boolean): NodeSeq = filter(f) + + def xml_sameElements[A](that: Iterable[A]): Boolean = { + val these = this.iterator + val those = that.iterator + while (these.hasNext && those.hasNext) + if (these.next xml_!= those.next) + return false + + !these.hasNext && !those.hasNext + } + + protected def basisForHashCode: Seq[Any] = theSeq + + override def canEqual(other: Any) = other match { + case _: NodeSeq => true + case _ => false + } + + override def strict_==(other: Equality) = other match { + case x: NodeSeq => (length == x.length) && (theSeq sameElements x.theSeq) + case _ => false + } + + /** Projection function, which returns elements of `this` sequence based + * on the string `that`. Use: + * - `this \ "foo"` to get a list of all elements that are labelled with `"foo"`; + * - `\ "_"` to get a list of all elements (wildcard); + * - `ns \ "@foo"` to get the unprefixed attribute `"foo"`; + * - `ns \ "@{uri}foo"` to get the prefixed attribute `"pre:foo"` whose + * prefix `"pre"` is resolved to the namespace `"uri"`. + * + * For attribute projections, the resulting [[scala.xml.NodeSeq]] attribute + * values are wrapped in a [[scala.xml.Group]]. + * + * There is no support for searching a prefixed attribute by its literal prefix. + * + * The document order is preserved. + */ + def \(that: String): NodeSeq = { + def fail = throw new IllegalArgumentException(that) + def atResult = { + lazy val y = this(0) + val attr = + if (that.length == 1) fail + else if (that(1) == '{') { + val i = that indexOf '}' + if (i == -1) fail + val (uri, key) = (that.substring(2,i), that.substring(i+1, that.length())) + if (uri == "" || key == "") fail + else y.attribute(uri, key) + } + else y.attribute(that drop 1) + + attr match { + case Some(x) => Group(x) + case _ => NodeSeq.Empty + } + } + + def makeSeq(cond: (Node) => Boolean) = + NodeSeq fromSeq (this flatMap (_.child) filter cond) + + that match { + case "" => fail + case "_" => makeSeq(!_.isAtom) + case _ if (that(0) == '@' && this.length == 1) => atResult + case _ => makeSeq(_.label == that) + } + } + + /** Projection function, which returns elements of `this` sequence and of + * all its subsequences, based on the string `that`. Use: + * - `this \\ 'foo` to get a list of all elements that are labelled with `"foo"`; + * - `\\ "_"` to get a list of all elements (wildcard); + * - `ns \\ "@foo"` to get the unprefixed attribute `"foo"`; + * - `ns \\ "@{uri}foo"` to get each prefixed attribute `"pre:foo"` whose + * prefix `"pre"` is resolved to the namespace `"uri"`. + * + * For attribute projections, the resulting [[scala.xml.NodeSeq]] attribute + * values are wrapped in a [[scala.xml.Group]]. + * + * There is no support for searching a prefixed attribute by its literal prefix. + * + * The document order is preserved. + */ + def \\ (that: String): NodeSeq = { + def filt(cond: (Node) => Boolean) = this flatMap (_.descendant_or_self) filter cond + that match { + case "_" => filt(!_.isAtom) + case _ if that(0) == '@' => filt(!_.isAtom) flatMap (_ \ that) + case _ => filt(x => !x.isAtom && x.label == that) + } + } + + /** Convenience method which returns string text of the named attribute. Use: + * - `that \@ "foo"` to get the string text of attribute `"foo"`; + */ + def \@(attributeName: String): String = (this \ ("@" + attributeName)).text + + override def toString(): String = theSeq.mkString + + def text: String = (this map (_.text)).mkString +} diff --git a/src/xml/scala/xml/Null.scala b/src/xml/scala/xml/Null.scala new file mode 100644 index 0000000000..f763c023c4 --- /dev/null +++ b/src/xml/scala/xml/Null.scala @@ -0,0 +1,62 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import Utility.isNameStart +import scala.collection.Iterator + +/** Essentially, every method in here is a dummy, returning Zero[T]. + * It provides a backstop for the unusual collection defined by MetaData, + * sort of a linked list of tails. + * + * @author Burak Emir + * @version 1.0 + */ +case object Null extends MetaData { + override def iterator = Iterator.empty + override def size = 0 + override def append(m: MetaData, scope: NamespaceBinding = TopScope): MetaData = m + override def filter(f: MetaData => Boolean): MetaData = this + + def copy(next: MetaData) = next + def getNamespace(owner: Node) = null + + override def hasNext = false + def next = null + def key = null + def value = null + def isPrefixed = false + + override def length = 0 + override def length(i: Int) = i + + override def strict_==(other: Equality) = other match { + case x: MetaData => x.length == 0 + case _ => false + } + override protected def basisForHashCode: Seq[Any] = Nil + + def apply(namespace: String, scope: NamespaceBinding, key: String) = null + def apply(key: String) = + if (isNameStart(key.head)) null + else throw new IllegalArgumentException("not a valid attribute name '"+key+"', so can never match !") + + protected def toString1(sb: StringBuilder) = () + override protected def toString1(): String = "" + + override def toString(): String = "" + + override def buildString(sb: StringBuilder): StringBuilder = sb + + override def wellformed(scope: NamespaceBinding) = true + + def remove(key: String) = this + def remove(namespace: String, scope: NamespaceBinding, key: String) = this +} diff --git a/src/xml/scala/xml/PCData.scala b/src/xml/scala/xml/PCData.scala new file mode 100644 index 0000000000..31eea2b6d7 --- /dev/null +++ b/src/xml/scala/xml/PCData.scala @@ -0,0 +1,44 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** This class (which is not used by all XML parsers, but always used by the + * XHTML one) represents parseable character data, which appeared as CDATA + * sections in the input and is to be preserved as CDATA section in the output. + * + * @author Burak Emir + * @version 1.0 + */ +class PCData(data: String) extends Atom[String](data) { + + /** Returns text, with some characters escaped according to the XML + * specification. + * + * @param sb the input string buffer associated to some XML element + * @return the input string buffer with the formatted CDATA section + */ + override def buildString(sb: StringBuilder): StringBuilder = + sb append "".format(data) +} + +/** This singleton object contains the `apply`and `unapply` methods for + * convenient construction and deconstruction. + * + * @author Burak Emir + * @version 1.0 + */ +object PCData { + def apply(data: String) = new PCData(data) + def unapply(other: Any): Option[String] = other match { + case x: PCData => Some(x.data) + case _ => None + } +} + diff --git a/src/xml/scala/xml/PrefixedAttribute.scala b/src/xml/scala/xml/PrefixedAttribute.scala new file mode 100644 index 0000000000..4ab79c8677 --- /dev/null +++ b/src/xml/scala/xml/PrefixedAttribute.scala @@ -0,0 +1,61 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml + +/** prefixed attributes always have a non-null namespace. + * + * @param pre + * @param key + * @param value the attribute value + * @param next1 + */ +class PrefixedAttribute( + val pre: String, + val key: String, + val value: Seq[Node], + val next1: MetaData) +extends Attribute +{ + val next = if (value ne null) next1 else next1.remove(key) + + /** same as this(pre, key, Text(value), next), or no attribute if value is null */ + def this(pre: String, key: String, value: String, next: MetaData) = + this(pre, key, if (value ne null) Text(value) else null: NodeSeq, next) + + /** same as this(pre, key, value.get, next), or no attribute if value is None */ + def this(pre: String, key: String, value: Option[Seq[Node]], next: MetaData) = + this(pre, key, value.orNull, next) + + /** Returns a copy of this unprefixed attribute with the given + * next field. + */ + def copy(next: MetaData) = + new PrefixedAttribute(pre, key, value, next) + + def getNamespace(owner: Node) = + owner.getNamespace(pre) + + /** forwards the call to next (because caller looks for unprefixed attribute */ + def apply(key: String): Seq[Node] = next(key) + + /** gets attribute value of qualified (prefixed) attribute with given key + */ + def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] = { + if (key == this.key && scope.getURI(pre) == namespace) + value + else + next(namespace, scope, key) + } +} + +object PrefixedAttribute { + def unapply(x: PrefixedAttribute) = Some((x.pre, x.key, x.value, x.next)) +} diff --git a/src/xml/scala/xml/PrettyPrinter.scala b/src/xml/scala/xml/PrettyPrinter.scala new file mode 100755 index 0000000000..9e01905357 --- /dev/null +++ b/src/xml/scala/xml/PrettyPrinter.scala @@ -0,0 +1,263 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import Utility.sbToString + +/** Class for pretty printing. After instantiating, you can use the + * format() and formatNode() methods to convert XML to a formatted + * string. The class can be reused to pretty print any number of + * XML nodes. + * + * @author Burak Emir + * @version 1.0 + * + * @param width the width to fit the output into + * @param step indentation + */ +class PrettyPrinter(width: Int, step: Int) { + + class BrokenException() extends java.lang.Exception + + class Item + case object Break extends Item { + override def toString() = "\\" + } + case class Box(col: Int, s: String) extends Item + case class Para(s: String) extends Item + + protected var items: List[Item] = Nil + + protected var cur = 0 + + protected def reset() = { + cur = 0 + items = Nil + } + + /** Try to cut at whitespace. + */ + protected def cut(s: String, ind: Int): List[Item] = { + val tmp = width - cur + if (s.length <= tmp) + return List(Box(ind, s)) + var i = s indexOf ' ' + if (i > tmp || i == -1) throw new BrokenException() // cannot break + + var last: List[Int] = Nil + while (i != -1 && i < tmp) { + last = i::last + i = s.indexOf(' ', i+1) + } + var res: List[Item] = Nil + while (Nil != last) try { + val b = Box(ind, s.substring(0, last.head)) + cur = ind + res = b :: Break :: cut(s.substring(last.head, s.length), ind) + // backtrack + last = last.tail + } catch { + case _:BrokenException => last = last.tail + } + throw new BrokenException() + } + + /** Try to make indented box, if possible, else para. + */ + protected def makeBox(ind: Int, s: String) = + if (cur + s.length > width) { // fits in this line + items ::= Box(ind, s) + cur += s.length + } + else try cut(s, ind) foreach (items ::= _) // break it up + catch { case _: BrokenException => makePara(ind, s) } // give up, para + + // dont respect indent in para, but afterwards + protected def makePara(ind: Int, s: String) = { + items = Break::Para(s)::Break::items + cur = ind + } + + // respect indent + protected def makeBreak() = { // using wrapping here... + items = Break :: items + cur = 0 + } + + protected def leafTag(n: Node) = { + def mkLeaf(sb: StringBuilder) { + sb append '<' + n nameToString sb + n.attributes buildString sb + sb append "/>" + } + sbToString(mkLeaf) + } + + protected def startTag(n: Node, pscope: NamespaceBinding): (String, Int) = { + var i = 0 + def mkStart(sb: StringBuilder) { + sb append '<' + n nameToString sb + i = sb.length + 1 + n.attributes buildString sb + n.scope.buildString(sb, pscope) + sb append '>' + } + (sbToString(mkStart), i) + } + + protected def endTag(n: Node) = { + def mkEnd(sb: StringBuilder) { + sb append "' + } + sbToString(mkEnd) + } + + protected def childrenAreLeaves(n: Node): Boolean = { + def isLeaf(l: Node) = l match { + case _:Atom[_] | _:Comment | _:EntityRef | _:ProcInstr => true + case _ => false + } + n.child forall isLeaf + } + + protected def fits(test: String) = + test.length < width - cur + + private def doPreserve(node: Node) = + node.attribute(XML.namespace, XML.space).map(_.toString == XML.preserve) getOrElse false + + protected def traverse(node: Node, pscope: NamespaceBinding, ind: Int): Unit = node match { + + case Text(s) if s.trim() == "" => + ; + case _:Atom[_] | _:Comment | _:EntityRef | _:ProcInstr => + makeBox( ind, node.toString().trim() ) + case g @ Group(xs) => + traverse(xs.iterator, pscope, ind) + case _ => + val test = { + val sb = new StringBuilder() + Utility.serialize(node, pscope, sb, stripComments = false) + if (doPreserve(node)) sb.toString + else TextBuffer.fromString(sb.toString).toText(0).data + } + if (childrenAreLeaves(node) && fits(test)) { + makeBox(ind, test) + } else { + val (stg, len2) = startTag(node, pscope) + val etg = endTag(node) + if (stg.length < width - cur) { // start tag fits + makeBox(ind, stg) + makeBreak() + traverse(node.child.iterator, node.scope, ind + step) + makeBox(ind, etg) + } else if (len2 < width - cur) { + // + if (!lastwasbreak) sb.append('\n') // on windows: \r\n ? + lastwasbreak = true + cur = 0 +// while (cur < last) { +// sb append ' ' +// cur += 1 +// } + + case Box(i, s) => + lastwasbreak = false + while (cur < i) { + sb append ' ' + cur += 1 + } + sb.append(s) + case Para( s ) => + lastwasbreak = false + sb append s + } + } + + // public convenience methods + + /** Returns a formatted string containing well-formed XML with + * given namespace to prefix mapping. + * + * @param n the node to be serialized + * @param pscope the namespace to prefix mapping + * @return the formatted string + */ + def format(n: Node, pscope: NamespaceBinding = null): String = + sbToString(format(n, pscope, _)) + + /** Returns a formatted string containing well-formed XML. + * + * @param nodes the sequence of nodes to be serialized + * @param pscope the namespace to prefix mapping + */ + def formatNodes(nodes: Seq[Node], pscope: NamespaceBinding = null): String = + sbToString(formatNodes(nodes, pscope, _)) + + /** Appends a formatted string containing well-formed XML with + * the given namespace to prefix mapping to the given stringbuffer. + * + * @param nodes the nodes to be serialized + * @param pscope the namespace to prefix mapping + * @param sb the string buffer to which to append to + */ + def formatNodes(nodes: Seq[Node], pscope: NamespaceBinding, sb: StringBuilder): Unit = + nodes foreach (n => sb append format(n, pscope)) +} diff --git a/src/xml/scala/xml/ProcInstr.scala b/src/xml/scala/xml/ProcInstr.scala new file mode 100644 index 0000000000..189c1c6878 --- /dev/null +++ b/src/xml/scala/xml/ProcInstr.scala @@ -0,0 +1,39 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml + +/** an XML node for processing instructions (PI) + * + * @author Burak Emir + * @param target target name of this PI + * @param proctext text contained in this node, may not contain "?>" + */ +case class ProcInstr(target: String, proctext: String) extends SpecialNode +{ + if (!Utility.isName(target)) + throw new IllegalArgumentException(target+" must be an XML Name") + if (proctext contains "?>") + throw new IllegalArgumentException(proctext+" may not contain \"?>\"") + if (target.toLowerCase == "xml") + throw new IllegalArgumentException(target+" is reserved") + + final override def doCollectNamespaces = false + final override def doTransform = false + + final def label = "#PI" + override def text = "" + + /** appends "<?" target (" "+text)?+"?>" + * to this stringbuffer. + */ + override def buildString(sb: StringBuilder) = + sb append "".format(target, (if (proctext == "") "" else " " + proctext)) +} diff --git a/src/xml/scala/xml/QNode.scala b/src/xml/scala/xml/QNode.scala new file mode 100644 index 0000000000..f9e3f1854b --- /dev/null +++ b/src/xml/scala/xml/QNode.scala @@ -0,0 +1,20 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** This object provides an extractor method to match a qualified node with + * its namespace URI + * + * @author Burak Emir + * @version 1.0 + */ +object QNode { + def unapplySeq(n: Node) = Some((n.scope.getURI(n.prefix), n.label, n.attributes, n.child)) +} diff --git a/src/xml/scala/xml/SpecialNode.scala b/src/xml/scala/xml/SpecialNode.scala new file mode 100644 index 0000000000..5fef8ef66c --- /dev/null +++ b/src/xml/scala/xml/SpecialNode.scala @@ -0,0 +1,33 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** `SpecialNode` is a special XML node which represents either text + * `(PCDATA)`, a comment, a `PI`, or an entity ref. + * + * `SpecialNode`s also play the role of [[scala.xml.pull.XMLEvent]]s for + * pull-parsing. + * + * @author Burak Emir + */ +abstract class SpecialNode extends Node with pull.XMLEvent { + + /** always empty */ + final override def attributes = Null + + /** always Node.EmptyNamespace */ + final override def namespace = null + + /** always empty */ + final def child = Nil + + /** Append string representation to the given string buffer argument. */ + def buildString(sb: StringBuilder): StringBuilder +} diff --git a/src/xml/scala/xml/Text.scala b/src/xml/scala/xml/Text.scala new file mode 100644 index 0000000000..debea0c025 --- /dev/null +++ b/src/xml/scala/xml/Text.scala @@ -0,0 +1,39 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** The class `Text` implements an XML node for text (PCDATA). + * It is used in both non-bound and bound XML representations. + * + * @author Burak Emir + * @param data the text contained in this node, may not be null. + */ +class Text(data: String) extends Atom[String](data) { + + /** Returns text, with some characters escaped according to the XML + * specification. + */ + override def buildString(sb: StringBuilder): StringBuilder = + Utility.escape(data, sb) +} + +/** This singleton object contains the `apply`and `unapply` methods for + * convenient construction and deconstruction. + * + * @author Burak Emir + * @version 1.0 + */ +object Text { + def apply(data: String) = new Text(data) + def unapply(other: Any): Option[String] = other match { + case x: Text => Some(x.data) + case _ => None + } +} diff --git a/src/xml/scala/xml/TextBuffer.scala b/src/xml/scala/xml/TextBuffer.scala new file mode 100644 index 0000000000..514b1701af --- /dev/null +++ b/src/xml/scala/xml/TextBuffer.scala @@ -0,0 +1,46 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml + +import Utility.isSpace + +object TextBuffer { + def fromString(str: String): TextBuffer = new TextBuffer() append str +} + +/** The class `TextBuffer` is for creating text nodes without surplus + * whitespace. All occurrences of one or more whitespace in strings + * appended with the `append` method will be replaced by a single space + * character, and leading and trailing space will be removed completely. + */ +class TextBuffer +{ + val sb = new StringBuilder() + + /** Appends this string to the text buffer, trimming whitespaces as needed. + */ + def append(cs: Seq[Char]): this.type = { + cs foreach { c => + if (!isSpace(c)) sb append c + else if (sb.isEmpty || !isSpace(sb.last)) sb append ' ' + } + this + } + + /** Returns an empty sequence if text is only whitespace. + * + * @return the text without whitespaces. + */ + def toText: Seq[Text] = sb.toString.trim match { + case "" => Nil + case s => Seq(Text(s)) + } +} diff --git a/src/xml/scala/xml/TopScope.scala b/src/xml/scala/xml/TopScope.scala new file mode 100644 index 0000000000..474fbbbdb5 --- /dev/null +++ b/src/xml/scala/xml/TopScope.scala @@ -0,0 +1,31 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml + +/** top level namespace scope. only contains the predefined binding + * for the "xml" prefix which is bound to + * "http://www.w3.org/XML/1998/namespace" + */ +object TopScope extends NamespaceBinding(null, null, null) { + + import XML.{ xml, namespace } + + override def getURI(prefix1: String): String = + if (prefix1 == xml) namespace else null + + override def getPrefix(uri1: String): String = + if (uri1 == namespace) xml else null + + override def toString() = "" + + override def buildString(stop: NamespaceBinding) = "" + override def buildString(sb: StringBuilder, ignore: NamespaceBinding) = {} +} diff --git a/src/xml/scala/xml/TypeSymbol.scala b/src/xml/scala/xml/TypeSymbol.scala new file mode 100644 index 0000000000..fb371ee340 --- /dev/null +++ b/src/xml/scala/xml/TypeSymbol.scala @@ -0,0 +1,15 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml + + +abstract class TypeSymbol diff --git a/src/xml/scala/xml/Unparsed.scala b/src/xml/scala/xml/Unparsed.scala new file mode 100644 index 0000000000..bc190eb724 --- /dev/null +++ b/src/xml/scala/xml/Unparsed.scala @@ -0,0 +1,36 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** An XML node for unparsed content. It will be output verbatim, all bets + * are off regarding wellformedness etc. + * + * @author Burak Emir + * @param data content in this node, may not be null. + */ +class Unparsed(data: String) extends Atom[String](data) { + + /** Returns text, with some characters escaped according to XML + * specification. + */ + override def buildString(sb: StringBuilder): StringBuilder = + sb append data +} + +/** This singleton object contains the `apply`and `unapply` methods for + * convenient construction and deconstruction. + * + * @author Burak Emir + * @version 1.0 + */ +object Unparsed { + def apply(data: String) = new Unparsed(data) + def unapply(x: Unparsed) = Some(x.data) +} diff --git a/src/xml/scala/xml/UnprefixedAttribute.scala b/src/xml/scala/xml/UnprefixedAttribute.scala new file mode 100644 index 0000000000..6fa827da5f --- /dev/null +++ b/src/xml/scala/xml/UnprefixedAttribute.scala @@ -0,0 +1,61 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml + +/** Unprefixed attributes have the null namespace, and no prefix field + * + * @author Burak Emir + */ +class UnprefixedAttribute( + val key: String, + val value: Seq[Node], + next1: MetaData) +extends Attribute +{ + final val pre = null + val next = if (value ne null) next1 else next1.remove(key) + + /** same as this(key, Text(value), next), or no attribute if value is null */ + def this(key: String, value: String, next: MetaData) = + this(key, if (value ne null) Text(value) else null: NodeSeq, next) + + /** same as this(key, value.get, next), or no attribute if value is None */ + def this(key: String, value: Option[Seq[Node]], next: MetaData) = + this(key, value.orNull, next) + + /** returns a copy of this unprefixed attribute with the given next field*/ + def copy(next: MetaData) = new UnprefixedAttribute(key, value, next) + + final def getNamespace(owner: Node): String = null + + /** + * Gets value of unqualified (unprefixed) attribute with given key, null if not found + * + * @param key + * @return value as Seq[Node] if key is found, null otherwise + */ + def apply(key: String): Seq[Node] = + if (key == this.key) value else next(key) + + /** + * Forwards the call to next (because caller looks for prefixed attribute). + * + * @param namespace + * @param scope + * @param key + * @return .. + */ + def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] = + next(namespace, scope, key) +} +object UnprefixedAttribute { + def unapply(x: UnprefixedAttribute) = Some((x.key, x.value, x.next)) +} diff --git a/src/xml/scala/xml/Utility.scala b/src/xml/scala/xml/Utility.scala new file mode 100755 index 0000000000..9134476401 --- /dev/null +++ b/src/xml/scala/xml/Utility.scala @@ -0,0 +1,410 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import scala.collection.mutable +import parsing.XhtmlEntities +import scala.language.implicitConversions + +/** + * The `Utility` object provides utility functions for processing instances + * of bound and not bound XML classes, as well as escaping text nodes. + * + * @author Burak Emir + */ +object Utility extends AnyRef with parsing.TokenTests { + final val SU = '\u001A' + + // [Martin] This looks dubious. We don't convert StringBuilders to + // Strings anywhere else, why do it here? + implicit def implicitSbToString(sb: StringBuilder) = sb.toString() + + // helper for the extremely oft-repeated sequence of creating a + // StringBuilder, passing it around, and then grabbing its String. + private [xml] def sbToString(f: (StringBuilder) => Unit): String = { + val sb = new StringBuilder + f(sb) + sb.toString + } + private[xml] def isAtomAndNotText(x: Node) = x.isAtom && !x.isInstanceOf[Text] + + /** Trims an element - call this method, when you know that it is an + * element (and not a text node) so you know that it will not be trimmed + * away. With this assumption, the function can return a `Node`, rather + * than a `Seq[Node]`. If you don't know, call `trimProper` and account + * for the fact that you may get back an empty sequence of nodes. + * + * Precondition: node is not a text node (it might be trimmed) + */ + def trim(x: Node): Node = x match { + case Elem(pre, lab, md, scp, child@_*) => + Elem(pre, lab, md, scp, (child flatMap trimProper):_*) + } + + /** trim a child of an element. `Attribute` values and `Atom` nodes that + * are not `Text` nodes are unaffected. + */ + def trimProper(x:Node): Seq[Node] = x match { + case Elem(pre,lab,md,scp,child@_*) => + Elem(pre,lab,md,scp, (child flatMap trimProper):_*) + case Text(s) => + new TextBuffer().append(s).toText + case _ => + x + } + + /** returns a sorted attribute list */ + def sort(md: MetaData): MetaData = if((md eq Null) || (md.next eq Null)) md else { + val key = md.key + val smaller = sort(md.filter { m => m.key < key }) + val greater = sort(md.filter { m => m.key > key }) + smaller.foldRight (md copy greater) ((x, xs) => x copy xs) + } + + /** Return the node with its attribute list sorted alphabetically + * (prefixes are ignored) */ + def sort(n:Node): Node = n match { + case Elem(pre,lab,md,scp,child@_*) => + Elem(pre,lab,sort(md),scp, (child map sort):_*) + case _ => n + } + + /** + * Escapes the characters < > & and " from string. + */ + final def escape(text: String): String = sbToString(escape(text, _)) + + object Escapes { + /** For reasons unclear escape and unescape are a long ways from + * being logical inverses. */ + val pairs = Map( + "lt" -> '<', + "gt" -> '>', + "amp" -> '&', + "quot" -> '"' + // enigmatic comment explaining why this isn't escaped -- + // is valid xhtml but not html, and IE doesn't know it, says jweb + // "apos" -> '\'' + ) + val escMap = pairs map { case (s, c) => c-> ("&%s;" format s) } + val unescMap = pairs ++ Map("apos" -> '\'') + } + import Escapes.{ escMap, unescMap } + + /** + * Appends escaped string to `s`. + */ + final def escape(text: String, s: StringBuilder): StringBuilder = { + // Implemented per XML spec: + // http://www.w3.org/International/questions/qa-controls + // imperative code 3x-4x faster than current implementation + // dpp (David Pollak) 2010/02/03 + val len = text.length + var pos = 0 + while (pos < len) { + text.charAt(pos) match { + case '<' => s.append("<") + case '>' => s.append(">") + case '&' => s.append("&") + case '"' => s.append(""") + case '\n' => s.append('\n') + case '\r' => s.append('\r') + case '\t' => s.append('\t') + case c => if (c >= ' ') s.append(c) + } + + pos += 1 + } + s + } + + /** + * Appends unescaped string to `s`, `amp` becomes `&`, + * `lt` becomes `<` etc.. + * + * @return `'''null'''` if `ref` was not a predefined entity. + */ + final def unescape(ref: String, s: StringBuilder): StringBuilder = + ((unescMap get ref) map (s append _)).orNull + + /** + * Returns a set of all namespaces used in a sequence of nodes + * and all their descendants, including the empty namespaces. + */ + def collectNamespaces(nodes: Seq[Node]): mutable.Set[String] = + nodes.foldLeft(new mutable.HashSet[String]) { (set, x) => collectNamespaces(x, set) ; set } + + /** + * Adds all namespaces in node to set. + */ + def collectNamespaces(n: Node, set: mutable.Set[String]) { + if (n.doCollectNamespaces) { + set += n.namespace + for (a <- n.attributes) a match { + case _:PrefixedAttribute => + set += a.getNamespace(n) + case _ => + } + for (i <- n.child) + collectNamespaces(i, set) + } + } + + // def toXML( + // x: Node, + // pscope: NamespaceBinding = TopScope, + // sb: StringBuilder = new StringBuilder, + // stripComments: Boolean = false, + // decodeEntities: Boolean = true, + // preserveWhitespace: Boolean = false, + // minimizeTags: Boolean = false): String = + // { + // toXMLsb(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) + // sb.toString() + // } + + /** + * Serialize the provided Node to the provided StringBuilder. + *

+ * Note that calling this source-compatible method will result in the same old, arguably almost universally unwanted, + * behaviour. + */ + @deprecated("Please use `serialize` instead and specify a `minimizeTags` parameter", "2.10.0") + def toXML( + x: Node, + pscope: NamespaceBinding = TopScope, + sb: StringBuilder = new StringBuilder, + stripComments: Boolean = false, + decodeEntities: Boolean = true, + preserveWhitespace: Boolean = false, + minimizeTags: Boolean = false): StringBuilder = + { + serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, if (minimizeTags) MinimizeMode.Always else MinimizeMode.Never) + } + + /** + * Serialize an XML Node to a StringBuilder. + * + * This is essentially a minor rework of `toXML` that can't have the same name due to an unfortunate + * combination of named/default arguments and overloading. + * + * @todo use a Writer instead + */ + def serialize( + x: Node, + pscope: NamespaceBinding = TopScope, + sb: StringBuilder = new StringBuilder, + stripComments: Boolean = false, + decodeEntities: Boolean = true, + preserveWhitespace: Boolean = false, + minimizeTags: MinimizeMode.Value = MinimizeMode.Default): StringBuilder = + { + x match { + case c: Comment if !stripComments => c buildString sb + case s: SpecialNode => s buildString sb + case g: Group => for (c <- g.nodes) serialize(c, g.scope, sb, minimizeTags = minimizeTags) ; sb + case el: Elem => + // print tag with namespace declarations + sb.append('<') + el.nameToString(sb) + if (el.attributes ne null) el.attributes.buildString(sb) + el.scope.buildString(sb, pscope) + if (el.child.isEmpty && + (minimizeTags == MinimizeMode.Always || + (minimizeTags == MinimizeMode.Default && el.minimizeEmpty))) + { + // no children, so use short form: + sb.append("/>") + } else { + // children, so use long form: ... + sb.append('>') + sequenceToXML(el.child, el.scope, sb, stripComments) + sb.append("') + } + case _ => throw new IllegalArgumentException("Don't know how to serialize a " + x.getClass.getName) + } + } + + def sequenceToXML( + children: Seq[Node], + pscope: NamespaceBinding = TopScope, + sb: StringBuilder = new StringBuilder, + stripComments: Boolean = false, + decodeEntities: Boolean = true, + preserveWhitespace: Boolean = false, + minimizeTags: MinimizeMode.Value = MinimizeMode.Default): Unit = + { + if (children.isEmpty) return + else if (children forall isAtomAndNotText) { // add space + val it = children.iterator + val f = it.next() + serialize(f, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) + while (it.hasNext) { + val x = it.next() + sb.append(' ') + serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) + } + } + else children foreach { serialize(_, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) } + } + + /** + * Returns prefix of qualified name if any. + */ + final def prefix(name: String): Option[String] = (name indexOf ':') match { + case -1 => None + case i => Some(name.substring(0, i)) + } + + /** + * Returns a hashcode for the given constituents of a node + */ + def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) = + scala.util.hashing.MurmurHash3.orderedHash(label +: attribHashCode +: scpeHash +: children, pre.##) + + def appendQuoted(s: String): String = sbToString(appendQuoted(s, _)) + + /** + * Appends "s" if string `s` does not contain ", + * 's' otherwise. + */ + def appendQuoted(s: String, sb: StringBuilder) = { + val ch = if (s contains '"') '\'' else '"' + sb.append(ch).append(s).append(ch) + } + + /** + * Appends "s" and escapes and " i s with \" + */ + def appendEscapedQuoted(s: String, sb: StringBuilder): StringBuilder = { + sb.append('"') + for (c <- s) c match { + case '"' => sb.append('\\'); sb.append('"') + case _ => sb.append(c) + } + sb.append('"') + } + + def getName(s: String, index: Int): String = { + if (index >= s.length) null + else { + val xs = s drop index + if (xs.nonEmpty && isNameStart(xs.head)) xs takeWhile isNameChar + else "" + } + } + + /** + * Returns `'''null'''` if the value is a correct attribute value, + * error message if it isn't. + */ + def checkAttributeValue(value: String): String = { + var i = 0 + while (i < value.length) { + value.charAt(i) match { + case '<' => + return "< not allowed in attribute value" + case '&' => + val n = getName(value, i+1) + if (n eq null) + return "malformed entity reference in attribute value ["+value+"]" + i = i + n.length + 1 + if (i >= value.length || value.charAt(i) != ';') + return "malformed entity reference in attribute value ["+value+"]" + case _ => + } + i = i + 1 + } + null + } + + def parseAttributeValue(value: String): Seq[Node] = { + val sb = new StringBuilder + var rfb: StringBuilder = null + val nb = new NodeBuffer() + + val it = value.iterator + while (it.hasNext) { + var c = it.next() + // entity! flush buffer into text node + if (c == '&') { + c = it.next() + if (c == '#') { + c = it.next() + val theChar = parseCharRef ({ ()=> c },{ () => c = it.next() },{s => throw new RuntimeException(s)}, {s => throw new RuntimeException(s)}) + sb.append(theChar) + } + else { + if (rfb eq null) rfb = new StringBuilder() + rfb append c + c = it.next() + while (c != ';') { + rfb.append(c) + c = it.next() + } + val ref = rfb.toString() + rfb.clear() + unescape(ref,sb) match { + case null => + if (sb.length > 0) { // flush buffer + nb += Text(sb.toString()) + sb.clear() + } + nb += EntityRef(ref) // add entityref + case _ => + } + } + } + else sb append c + } + if (sb.length > 0) { // flush buffer + val x = Text(sb.toString()) + if (nb.length == 0) + return x + else + nb += x + } + nb + } + + /** + * {{{ + * CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" + * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" + * }}} + * See [66] + */ + def parseCharRef(ch: () => Char, nextch: () => Unit, reportSyntaxError: String => Unit, reportTruncatedError: String => Unit): String = { + val hex = (ch() == 'x') && { nextch(); true } + val base = if (hex) 16 else 10 + var i = 0 + while (ch() != ';') { + ch() match { + case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => + i = i * base + ch().asDigit + case 'a' | 'b' | 'c' | 'd' | 'e' | 'f' + | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' => + if (! hex) + reportSyntaxError("hex char not allowed in decimal char ref\n" + + "Did you mean to write &#x ?") + else + i = i * base + ch().asDigit + case SU => + reportTruncatedError("") + case _ => + reportSyntaxError("character '" + ch() + "' not allowed in char ref\n") + } + nextch() + } + new String(Array(i), 0, 1) + } +} diff --git a/src/xml/scala/xml/XML.scala b/src/xml/scala/xml/XML.scala new file mode 100755 index 0000000000..020264e509 --- /dev/null +++ b/src/xml/scala/xml/XML.scala @@ -0,0 +1,109 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import parsing.NoBindingFactoryAdapter +import factory.XMLLoader +import java.io.{ File, FileDescriptor, FileInputStream, FileOutputStream } +import java.io.{ InputStream, Reader, StringReader, Writer } +import java.nio.channels.Channels +import scala.util.control.Exception.ultimately + +object Source { + def fromFile(file: File) = new InputSource(new FileInputStream(file)) + def fromFile(fd: FileDescriptor) = new InputSource(new FileInputStream(fd)) + def fromFile(name: String) = new InputSource(new FileInputStream(name)) + + def fromInputStream(is: InputStream) = new InputSource(is) + def fromReader(reader: Reader) = new InputSource(reader) + def fromSysId(sysID: String) = new InputSource(sysID) + def fromString(string: String) = fromReader(new StringReader(string)) +} + +/** + * Governs how empty elements (i.e. those without child elements) should be serialized. + */ +object MinimizeMode extends Enumeration { + /** Minimize empty tags if they were originally empty when parsed, or if they were constructed + * with [[scala.xml.Elem]]`#minimizeEmpty` == true + */ + val Default = Value + + /** Always minimize empty tags. Note that this may be problematic for XHTML, in which + * case [[scala.xml.Xhtml]]`#toXhtml` should be used instead. + */ + val Always = Value + + /** Never minimize empty tags. + */ + val Never = Value +} + +/** The object `XML` provides constants, and functions to load + * and save XML elements. Use this when data binding is not desired, i.e. + * when XML is handled using `Symbol` nodes. + * + * @author Burak Emir + * @version 1.0, 25/04/2005 + */ +object XML extends XMLLoader[Elem] { + val xml = "xml" + val xmlns = "xmlns" + val namespace = "http://www.w3.org/XML/1998/namespace" + val preserve = "preserve" + val space = "space" + val lang = "lang" + val encoding = "ISO-8859-1" + + /** Returns an XMLLoader whose load* methods will use the supplied SAXParser. */ + def withSAXParser(p: SAXParser): XMLLoader[Elem] = + new XMLLoader[Elem] { override val parser: SAXParser = p } + + /** Saves a node to a file with given filename using given encoding + * optionally with xmldecl and doctype declaration. + * + * @param filename the filename + * @param node the xml node we want to write + * @param enc encoding to use + * @param xmlDecl if true, write xml declaration + * @param doctype if not null, write doctype declaration + */ + final def save( + filename: String, + node: Node, + enc: String = encoding, + xmlDecl: Boolean = false, + doctype: dtd.DocType = null + ): Unit = + { + val fos = new FileOutputStream(filename) + val w = Channels.newWriter(fos.getChannel(), enc) + + ultimately(w.close())( + write(w, node, enc, xmlDecl, doctype) + ) + } + + /** Writes the given node using writer, optionally with xml decl and doctype. + * It's the caller's responsibility to close the writer. + * + * @param w the writer + * @param node the xml node we want to write + * @param enc the string to be used in `xmlDecl` + * @param xmlDecl if true, write xml declaration + * @param doctype if not null, write doctype declaration + */ + final def write(w: java.io.Writer, node: Node, enc: String, xmlDecl: Boolean, doctype: dtd.DocType, minimizeTags: MinimizeMode.Value = MinimizeMode.Default) { + /* TODO: optimize by giving writer parameter to toXML*/ + if (xmlDecl) w.write("\n") + if (doctype ne null) w.write( doctype.toString() + "\n") + w.write(Utility.serialize(node, minimizeTags = minimizeTags).toString) + } +} diff --git a/src/xml/scala/xml/Xhtml.scala b/src/xml/scala/xml/Xhtml.scala new file mode 100644 index 0000000000..6a12c1a89a --- /dev/null +++ b/src/xml/scala/xml/Xhtml.scala @@ -0,0 +1,97 @@ + +package scala +package xml + +import parsing.XhtmlEntities +import Utility.{ sbToString, isAtomAndNotText } + +/* (c) David Pollak 2007 WorldWide Conferencing, LLC */ + +object Xhtml +{ + /** + * Convenience function: same as toXhtml(node, false, false) + * + * @param node the node + */ + def toXhtml(node: Node): String = sbToString(sb => toXhtml(x = node, sb = sb)) + + /** + * Convenience function: amounts to calling toXhtml(node) on each + * node in the sequence. + * + * @param nodeSeq the node sequence + */ + def toXhtml(nodeSeq: NodeSeq): String = sbToString(sb => sequenceToXML(nodeSeq: Seq[Node], sb = sb)) + + /** Elements which we believe are safe to minimize if minimizeTags is true. + * See http://www.w3.org/TR/xhtml1/guidelines.html#C_3 + */ + private val minimizableElements = + List("base", "meta", "link", "hr", "br", "param", "img", "area", "input", "col") + + def toXhtml( + x: Node, + pscope: NamespaceBinding = TopScope, + sb: StringBuilder = new StringBuilder, + stripComments: Boolean = false, + decodeEntities: Boolean = false, + preserveWhitespace: Boolean = false, + minimizeTags: Boolean = true): Unit = + { + def decode(er: EntityRef) = XhtmlEntities.entMap.get(er.entityName) match { + case Some(chr) if chr.toInt >= 128 => sb.append(chr) + case _ => er.buildString(sb) + } + def shortForm = + minimizeTags && + (x.child == null || x.child.length == 0) && + (minimizableElements contains x.label) + + x match { + case c: Comment => if (!stripComments) c buildString sb + case er: EntityRef if decodeEntities => decode(er) + case x: SpecialNode => x buildString sb + case g: Group => + g.nodes foreach { toXhtml(_, x.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) } + + case _ => + sb.append('<') + x.nameToString(sb) + if (x.attributes ne null) x.attributes.buildString(sb) + x.scope.buildString(sb, pscope) + + if (shortForm) sb.append(" />") + else { + sb.append('>') + sequenceToXML(x.child, x.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) + sb.append("') + } + } + } + + /** + * Amounts to calling toXhtml(node, ...) with the given parameters on each node. + */ + def sequenceToXML( + children: Seq[Node], + pscope: NamespaceBinding = TopScope, + sb: StringBuilder = new StringBuilder, + stripComments: Boolean = false, + decodeEntities: Boolean = false, + preserveWhitespace: Boolean = false, + minimizeTags: Boolean = true): Unit = + { + if (children.isEmpty) + return + + val doSpaces = children forall isAtomAndNotText // interleave spaces + for (c <- children.take(children.length - 1)) { + toXhtml(c, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) + if (doSpaces) sb append ' ' + } + toXhtml(children.last, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) + } +} diff --git a/src/xml/scala/xml/dtd/ContentModel.scala b/src/xml/scala/xml/dtd/ContentModel.scala new file mode 100644 index 0000000000..4007985dce --- /dev/null +++ b/src/xml/scala/xml/dtd/ContentModel.scala @@ -0,0 +1,118 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package dtd + +import scala.xml.dtd.impl._ +import scala.xml.Utility.sbToString +import PartialFunction._ + +object ContentModel extends WordExp { + type _labelT = ElemName + type _regexpT = RegExp + + object Translator extends WordBerrySethi { + override val lang: ContentModel.this.type = ContentModel.this + } + + case class ElemName(name: String) extends Label { + override def toString() = """ElemName("%s")""" format name + } + + def isMixed(cm: ContentModel) = cond(cm) { case _: MIXED => true } + def containsText(cm: ContentModel) = (cm == PCDATA) || isMixed(cm) + def parse(s: String): ContentModel = ContentModelParser.parse(s) + + def getLabels(r: RegExp): Set[String] = { + def traverse(r: RegExp): Set[String] = r match { // !!! check for match translation problem + case Letter(ElemName(name)) => Set(name) + case Star( x @ _ ) => traverse( x ) // bug if x@_* + case Sequ( xs @ _* ) => Set(xs flatMap traverse: _*) + case Alt( xs @ _* ) => Set(xs flatMap traverse: _*) + } + + traverse(r) + } + + def buildString(r: RegExp): String = sbToString(buildString(r, _)) + + /* precond: rs.length >= 1 */ + private def buildString(rs: Seq[RegExp], sb: StringBuilder, sep: Char) { + buildString(rs.head, sb) + for (z <- rs.tail) { + sb append sep + buildString(z, sb) + } + } + + def buildString(c: ContentModel, sb: StringBuilder): StringBuilder = c match { + case ANY => sb append "ANY" + case EMPTY => sb append "EMPTY" + case PCDATA => sb append "(#PCDATA)" + case ELEMENTS(_) | MIXED(_) => c buildString sb + } + + def buildString(r: RegExp, sb: StringBuilder): StringBuilder = + r match { // !!! check for match translation problem + case Eps => + sb + case Sequ(rs @ _*) => + sb.append( '(' ); buildString(rs, sb, ','); sb.append( ')' ) + case Alt(rs @ _*) => + sb.append( '(' ); buildString(rs, sb, '|'); sb.append( ')' ) + case Star(r: RegExp) => + sb.append( '(' ); buildString(r, sb); sb.append( ")*" ) + case Letter(ElemName(name)) => + sb.append(name) + } + +} + +sealed abstract class ContentModel +{ + override def toString(): String = sbToString(buildString) + def buildString(sb: StringBuilder): StringBuilder +} + +case object PCDATA extends ContentModel { + override def buildString(sb: StringBuilder): StringBuilder = sb.append("(#PCDATA)") +} +case object EMPTY extends ContentModel { + override def buildString(sb: StringBuilder): StringBuilder = sb.append("EMPTY") +} +case object ANY extends ContentModel { + override def buildString(sb: StringBuilder): StringBuilder = sb.append("ANY") +} +sealed abstract class DFAContentModel extends ContentModel { + import ContentModel.{ ElemName, Translator } + def r: ContentModel.RegExp + + lazy val dfa: DetWordAutom[ElemName] = { + val nfa = Translator.automatonFrom(r, 1) + new SubsetConstruction(nfa).determinize + } +} + +case class MIXED(r: ContentModel.RegExp) extends DFAContentModel { + import ContentModel.{ Alt, RegExp } + + override def buildString(sb: StringBuilder): StringBuilder = { + val newAlt = r match { case Alt(rs @ _*) => Alt(rs drop 1: _*) } + + sb append "(#PCDATA|" + ContentModel.buildString(newAlt: RegExp, sb) + sb append ")*" + } +} + +case class ELEMENTS(r: ContentModel.RegExp) extends DFAContentModel { + override def buildString(sb: StringBuilder): StringBuilder = + ContentModel.buildString(r, sb) +} diff --git a/src/xml/scala/xml/dtd/ContentModelParser.scala b/src/xml/scala/xml/dtd/ContentModelParser.scala new file mode 100644 index 0000000000..71b391c422 --- /dev/null +++ b/src/xml/scala/xml/dtd/ContentModelParser.scala @@ -0,0 +1,129 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package dtd + +/** Parser for regexps (content models in DTD element declarations) */ + +object ContentModelParser extends Scanner { // a bit too permissive concerning #PCDATA + import ContentModel._ + + /** parses the argument to a regexp */ + def parse(s: String): ContentModel = { initScanner(s); contentspec } + + def accept(tok: Int) = { + if (token != tok) { + if ((tok == STAR) && (token == END)) // common mistake + scala.sys.error("in DTDs, \n"+ + "mixed content models must be like (#PCDATA|Name|Name|...)*") + else + scala.sys.error("expected "+token2string(tok)+ + ", got unexpected token:"+token2string(token)) + } + nextToken() + } + + // s [ '+' | '*' | '?' ] + def maybeSuffix(s: RegExp) = token match { + case STAR => nextToken(); Star(s) + case PLUS => nextToken(); Sequ(s, Star(s)) + case OPT => nextToken(); Alt(Eps, s) + case _ => s + } + + // contentspec ::= EMPTY | ANY | (#PCDATA) | "(#PCDATA|"regexp) + + def contentspec: ContentModel = token match { + + case NAME => value match { + case "ANY" => ANY + case "EMPTY" => EMPTY + case _ => scala.sys.error("expected ANY, EMPTY or '(' instead of " + value ) + } + case LPAREN => + + nextToken() + sOpt() + if (token != TOKEN_PCDATA) + ELEMENTS(regexp) + else { + nextToken() + token match { + case RPAREN => + PCDATA + case CHOICE => + val res = MIXED(choiceRest(Eps)) + sOpt() + accept( RPAREN ) + accept( STAR ) + res + case _ => + scala.sys.error("unexpected token:" + token2string(token) ) + } + } + + case _ => + scala.sys.error("unexpected token:" + token2string(token) ) + } + // sopt ::= S? + def sOpt() = if( token == S ) nextToken() + + // (' S? mixed ::= '#PCDATA' S? ')' + // | '#PCDATA' (S? '|' S? atom)* S? ')*' + + // '(' S? regexp ::= cp S? [seqRest|choiceRest] ')' [ '+' | '*' | '?' ] + def regexp: RegExp = { + val p = particle + sOpt() + maybeSuffix(token match { + case RPAREN => nextToken(); p + case CHOICE => val q = choiceRest( p );accept( RPAREN ); q + case COMMA => val q = seqRest( p ); accept( RPAREN ); q + }) + } + + // seqRest ::= (',' S? cp S?)+ + def seqRest(p: RegExp) = { + var k = List(p) + while( token == COMMA ) { + nextToken() + sOpt() + k = particle::k + sOpt() + } + Sequ( k.reverse:_* ) + } + + // choiceRest ::= ('|' S? cp S?)+ + def choiceRest( p:RegExp ) = { + var k = List( p ) + while( token == CHOICE ) { + nextToken() + sOpt() + k = particle::k + sOpt() + } + Alt( k.reverse:_* ) + } + + // particle ::= '(' S? regexp + // | name [ '+' | '*' | '?' ] + def particle = token match { + case LPAREN => nextToken(); sOpt(); regexp + case NAME => val a = Letter(ElemName(value)); nextToken(); maybeSuffix(a) + case _ => scala.sys.error("expected '(' or Name, got:"+token2string(token)) + } + + // atom ::= name + def atom = token match { + case NAME => val a = Letter(ElemName(value)); nextToken(); a + case _ => scala.sys.error("expected Name, got:"+token2string(token)) + } +} diff --git a/src/xml/scala/xml/dtd/DTD.scala b/src/xml/scala/xml/dtd/DTD.scala new file mode 100644 index 0000000000..16a824fe2c --- /dev/null +++ b/src/xml/scala/xml/dtd/DTD.scala @@ -0,0 +1,35 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package dtd + +import scala.collection.mutable + +/** A document type declaration. + * + * @author Burak Emir + */ +abstract class DTD { + var externalID: ExternalID = null + var decls: List[Decl] = Nil + def notations: Seq[NotationDecl] = Nil + def unparsedEntities: Seq[EntityDecl] = Nil + + var elem: mutable.Map[String, ElemDecl] = new mutable.HashMap[String, ElemDecl]() + var attr: mutable.Map[String, AttListDecl] = new mutable.HashMap[String, AttListDecl]() + var ent: mutable.Map[String, EntityDecl] = new mutable.HashMap[String, EntityDecl]() + + override def toString() = + "DTD [\n%s%s]".format( + Option(externalID) getOrElse "", + decls.mkString("", "\n", "\n") + ) +} diff --git a/src/xml/scala/xml/dtd/Decl.scala b/src/xml/scala/xml/dtd/Decl.scala new file mode 100644 index 0000000000..8bf859c460 --- /dev/null +++ b/src/xml/scala/xml/dtd/Decl.scala @@ -0,0 +1,157 @@ +/* __ *\ + ** ________ ___ / / ___ Scala API ** + ** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** + ** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** + ** /____/\___/_/ |_/____/_/ | | ** + ** |/ ** + \* */ + +package scala +package xml +package dtd + +import Utility.sbToString + +sealed abstract class Decl + +sealed abstract class MarkupDecl extends Decl { + def buildString(sb: StringBuilder): StringBuilder +} + +/** an element declaration + */ +case class ElemDecl(name: String, contentModel: ContentModel) +extends MarkupDecl { + override def buildString(sb: StringBuilder): StringBuilder = { + sb append "' + } +} + +case class AttListDecl(name: String, attrs:List[AttrDecl]) +extends MarkupDecl { + override def buildString(sb: StringBuilder): StringBuilder = { + sb append "") + } +} + +/** an attribute declaration. at this point, the tpe is a string. Future + * versions might provide a way to access the attribute types more + * directly. + */ +case class AttrDecl(name: String, tpe: String, default: DefaultDecl) { + override def toString(): String = sbToString(buildString) + + def buildString(sb: StringBuilder): StringBuilder = { + sb append " " append name append ' ' append tpe append ' ' + default buildString sb + } + +} + +/** an entity declaration */ +sealed abstract class EntityDecl extends MarkupDecl + +/** a parsed general entity declaration */ +case class ParsedEntityDecl(name: String, entdef: EntityDef) extends EntityDecl { + override def buildString(sb: StringBuilder): StringBuilder = { + sb append "' + } +} + +/** a parameter entity declaration */ +case class ParameterEntityDecl(name: String, entdef: EntityDef) extends EntityDecl { + override def buildString(sb: StringBuilder): StringBuilder = { + sb append "' + } +} + +/** an unparsed entity declaration */ +case class UnparsedEntityDecl( name:String, extID:ExternalID, notation:String ) extends EntityDecl { + override def buildString(sb: StringBuilder): StringBuilder = { + sb append "' + } +} +/** a notation declaration */ +case class NotationDecl( name:String, extID:ExternalID ) extends MarkupDecl { + override def buildString(sb: StringBuilder): StringBuilder = { + sb append "" */ + final override def toString() = { + def intString = + if (intSubset.isEmpty) "" + else intSubset.mkString("[", "", "]") + + """""".format(name, extID.toString(), intString) + } +} + +object DocType { + /** Creates a doctype with no external id, nor internal subset declarations. */ + def apply(name: String): DocType = apply(name, NoExternalID, Nil) +} diff --git a/src/xml/scala/xml/dtd/ElementValidator.scala b/src/xml/scala/xml/dtd/ElementValidator.scala new file mode 100644 index 0000000000..4830769a7d --- /dev/null +++ b/src/xml/scala/xml/dtd/ElementValidator.scala @@ -0,0 +1,132 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package dtd + +import PartialFunction._ +import scala.collection.mutable + +import ContentModel.ElemName +import MakeValidationException._ // @todo other exceptions + +import impl._ + +/** validate children and/or attributes of an element + * exceptions are created but not thrown. + */ +class ElementValidator() extends Function1[Node,Boolean] { + + private var exc: List[ValidationException] = Nil + + protected var contentModel: ContentModel = _ + protected var dfa: DetWordAutom[ElemName] = _ + protected var adecls: List[AttrDecl] = _ + + /** set content model, enabling element validation */ + def setContentModel(cm: ContentModel) = { + contentModel = cm + cm match { + case ELEMENTS(r) => + val nfa = ContentModel.Translator.automatonFrom(r, 1) + dfa = new SubsetConstruction(nfa).determinize + case _ => + dfa = null + } + } + + def getContentModel = contentModel + + /** set meta data, enabling attribute validation */ + def setMetaData(adecls: List[AttrDecl]) { this.adecls = adecls } + + def getIterable(nodes: Seq[Node], skipPCDATA: Boolean): Iterable[ElemName] = { + def isAllWhitespace(a: Atom[_]) = cond(a.data) { case s: String if s.trim == "" => true } + + nodes.filter { + case y: SpecialNode => y match { + case a: Atom[_] if isAllWhitespace(a) => false // always skip all-whitespace nodes + case _ => !skipPCDATA + } + case x => x.namespace eq null + } . map (x => ElemName(x.label)) + } + + /** check attributes, return true if md corresponds to attribute declarations in adecls. + */ + def check(md: MetaData): Boolean = { + val len: Int = exc.length + val ok = new mutable.BitSet(adecls.length) + + for (attr <- md) { + def attrStr = attr.value.toString + def find(Key: String): Option[AttrDecl] = { + adecls.zipWithIndex find { + case (a @ AttrDecl(Key, _, _), j) => ok += j ; return Some(a) + case _ => false + } + None + } + + find(attr.key) match { + case None => + exc ::= fromUndefinedAttribute(attr.key) + + case Some(AttrDecl(_, tpe, DEFAULT(true, fixedValue))) if attrStr != fixedValue => + exc ::= fromFixedAttribute(attr.key, fixedValue, attrStr) + + case _ => + } + } + + adecls.zipWithIndex foreach { + case (AttrDecl(key, tpe, REQUIRED), j) if !ok(j) => exc ::= fromMissingAttribute(key, tpe) + case _ => + } + + exc.length == len //- true if no new exception + } + + /** check children, return true if conform to content model + * @note contentModel != null + */ + def check(nodes: Seq[Node]): Boolean = contentModel match { + case ANY => true + case EMPTY => getIterable(nodes, skipPCDATA = false).isEmpty + case PCDATA => getIterable(nodes, skipPCDATA = true).isEmpty + case MIXED(ContentModel.Alt(branches @ _*)) => // @todo + val j = exc.length + def find(Key: String): Boolean = + branches exists { case ContentModel.Letter(ElemName(Key)) => true ; case _ => false } + + getIterable(nodes, skipPCDATA = true) map (_.name) filterNot find foreach { + exc ::= MakeValidationException fromUndefinedElement _ + } + (exc.length == j) // - true if no new exception + + case _: ELEMENTS => + dfa isFinal { + getIterable(nodes, skipPCDATA = false).foldLeft(0) { (q, e) => + (dfa delta q).getOrElse(e, throw ValidationException("element %s not allowed here" format e)) + } + } + case _ => false + } + + /** applies various validations - accumulates error messages in exc + * @todo fail on first error, ignore other errors (rearranging conditions) + */ + def apply(n: Node): Boolean = + //- ? check children + ((contentModel == null) || check(n.child)) && + //- ? check attributes + ((adecls == null) || check(n.attributes)) +} diff --git a/src/xml/scala/xml/dtd/ExternalID.scala b/src/xml/scala/xml/dtd/ExternalID.scala new file mode 100644 index 0000000000..880633d860 --- /dev/null +++ b/src/xml/scala/xml/dtd/ExternalID.scala @@ -0,0 +1,86 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package dtd + +/** an ExternalIDs - either PublicID or SystemID + * + * @author Burak Emir + */ +sealed abstract class ExternalID extends parsing.TokenTests { + def quoted(s: String) = { + val c = if (s contains '"') '\'' else '"' + c + s + c + } + + // public != null: PUBLIC " " publicLiteral " " [systemLiteral] + // public == null: SYSTEM " " systemLiteral + override def toString(): String = { + lazy val quotedSystemLiteral = quoted(systemId) + lazy val quotedPublicLiteral = quoted(publicId) + + if (publicId == null) "SYSTEM " + quotedSystemLiteral + else "PUBLIC " + quotedPublicLiteral + + (if (systemId == null) "" else " " + quotedSystemLiteral) + } + def buildString(sb: StringBuilder): StringBuilder = + sb.append(this.toString()) + + def systemId: String + def publicId: String +} + +/** a system identifier + * + * @author Burak Emir + * @param systemId the system identifier literal + */ +case class SystemID(systemId: String) extends ExternalID { + val publicId = null + + if (!checkSysID(systemId)) + throw new IllegalArgumentException("can't use both \" and ' in systemId") +} + + +/** a public identifier (see http://www.w3.org/QA/2002/04/valid-dtd-list.html). + * + * @author Burak Emir + * @param publicId the public identifier literal + * @param systemId (can be null for notation pubIDs) the system identifier literal + */ +case class PublicID(publicId: String, systemId: String) extends ExternalID { + if (!checkPubID(publicId)) + throw new IllegalArgumentException("publicId must consist of PubidChars") + + if (systemId != null && !checkSysID(systemId)) + throw new IllegalArgumentException("can't use both \" and ' in systemId") + + /** the constant "#PI" */ + def label = "#PI" + + /** always empty */ + def attribute = Node.NoAttributes + + /** always empty */ + def child = Nil +} + +/** A marker used when a `DocType` contains no external id. + * + * @author Michael Bayne + */ +object NoExternalID extends ExternalID { + val publicId = null + val systemId = null + + override def toString = "" +} diff --git a/src/xml/scala/xml/dtd/Scanner.scala b/src/xml/scala/xml/dtd/Scanner.scala new file mode 100644 index 0000000000..5f9d1ccaed --- /dev/null +++ b/src/xml/scala/xml/dtd/Scanner.scala @@ -0,0 +1,79 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package dtd + +/** Scanner for regexps (content models in DTD element declarations) + * todo: cleanup + */ +class Scanner extends Tokens with parsing.TokenTests { + + final val ENDCH = '\u0000' + + var token:Int = END + var value:String = _ + + private var it: Iterator[Char] = null + private var c: Char = 'z' + + /** initializes the scanner on input s */ + final def initScanner(s: String) { + value = "" + it = (s).iterator + token = 1+END + next() + nextToken() + } + + /** scans the next token */ + final def nextToken() { + if (token != END) token = readToken + } + + // todo: see XML specification... probably isLetter,isDigit is fine + final def isIdentChar = ( ('a' <= c && c <= 'z') + || ('A' <= c && c <= 'Z')) + + final def next() = if (it.hasNext) c = it.next() else c = ENDCH + + final def acc(d: Char) { + if (c == d) next() else scala.sys.error("expected '"+d+"' found '"+c+"' !") + } + + final def accS(ds: Seq[Char]) { ds foreach acc } + + final def readToken: Int = + if (isSpace(c)) { + while (isSpace(c)) c = it.next() + S + } else c match { + case '(' => next(); LPAREN + case ')' => next(); RPAREN + case ',' => next(); COMMA + case '*' => next(); STAR + case '+' => next(); PLUS + case '?' => next(); OPT + case '|' => next(); CHOICE + case '#' => next(); accS( "PCDATA" ); TOKEN_PCDATA + case ENDCH => END + case _ => + if (isNameStart(c)) name; // NAME + else scala.sys.error("unexpected character:" + c) + } + + final def name = { + val sb = new StringBuilder() + do { sb.append(c); next() } while (isNameChar(c)) + value = sb.toString() + NAME + } + +} diff --git a/src/xml/scala/xml/dtd/Tokens.scala b/src/xml/scala/xml/dtd/Tokens.scala new file mode 100644 index 0000000000..07e888e77a --- /dev/null +++ b/src/xml/scala/xml/dtd/Tokens.scala @@ -0,0 +1,45 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package dtd + + +class Tokens { + + // Tokens + + final val TOKEN_PCDATA = 0 + final val NAME = 1 + final val LPAREN = 3 + final val RPAREN = 4 + final val COMMA = 5 + final val STAR = 6 + final val PLUS = 7 + final val OPT = 8 + final val CHOICE = 9 + final val END = 10 + final val S = 13 + + final def token2string(i: Int): String = i match { + case 0 => "#PCDATA" + case 1 => "NAME" + case 3 => "(" + case 4 => ")" + case 5 => "," + case 6 => "*" + case 7 => "+" + case 8 => "?" + case 9 => "|" + case 10 => "END" + case 13 => " " + } +} diff --git a/src/xml/scala/xml/dtd/ValidationException.scala b/src/xml/scala/xml/dtd/ValidationException.scala new file mode 100644 index 0000000000..1bfae55286 --- /dev/null +++ b/src/xml/scala/xml/dtd/ValidationException.scala @@ -0,0 +1,44 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package dtd + + +case class ValidationException(e: String) extends Exception(e) + +/** + * @author Burak Emir + */ +object MakeValidationException { + def fromFixedAttribute(k: String, value: String, actual: String) = + ValidationException("value of attribute " + k + " FIXED to \""+ + value+"\", but document tries \""+actual+"\"") + + def fromNonEmptyElement() = + new ValidationException("element should be *empty*") + + def fromUndefinedElement(label: String) = + new ValidationException("element \""+ label +"\" not allowed here") + + def fromUndefinedAttribute(key: String) = + new ValidationException("attribute " + key +" not allowed here") + + def fromMissingAttribute(allKeys: Set[String]) = { + val sb = new StringBuilder("missing value for REQUIRED attribute") + if (allKeys.size > 1) sb.append('s') + allKeys foreach (k => sb append "'%s'".format(k)) + new ValidationException(sb.toString()) + } + + def fromMissingAttribute(key: String, tpe: String) = + new ValidationException("missing value for REQUIRED attribute %s of type %s".format(key, tpe)) +} diff --git a/src/xml/scala/xml/dtd/impl/Base.scala b/src/xml/scala/xml/dtd/impl/Base.scala new file mode 100644 index 0000000000..91ff03a93a --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/Base.scala @@ -0,0 +1,67 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml.dtd.impl + +/** Basic regular expressions. + * + * @author Burak Emir + * @version 1.0 + */ + +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class Base { + type _regexpT <: RegExp + + abstract class RegExp { + val isNullable: Boolean + } + + object Alt { + /** `Alt( R,R,R* )`. */ + def apply(rs: _regexpT*) = + if (rs.size < 2) throw new SyntaxError("need at least 2 branches in Alt") + else new Alt(rs: _*) + // Can't enforce that statically without changing the interface + // def apply(r1: _regexpT, r2: _regexpT, rs: _regexpT*) = new Alt(Seq(r1, r2) ++ rs: _*) + def unapplySeq(x: Alt) = Some(x.rs) + } + + class Alt private (val rs: _regexpT*) extends RegExp { + final val isNullable = rs exists (_.isNullable) + } + + object Sequ { + /** Sequ( R,R* ) */ + def apply(rs: _regexpT*) = if (rs.isEmpty) Eps else new Sequ(rs: _*) + def unapplySeq(x: Sequ) = Some(x.rs) + } + + class Sequ private (val rs: _regexpT*) extends RegExp { + final val isNullable = rs forall (_.isNullable) + } + + case class Star(r: _regexpT) extends RegExp { + final lazy val isNullable = true + } + + // The empty Sequ. + case object Eps extends RegExp { + final lazy val isNullable = true + override def toString() = "Eps" + } + + /** this class can be used to add meta information to regexps. */ + class Meta(r1: _regexpT) extends RegExp { + final val isNullable = r1.isNullable + def r = r1 + } +} diff --git a/src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala b/src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala new file mode 100644 index 0000000000..f30309b037 --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala @@ -0,0 +1,98 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml.dtd.impl + +import scala.collection.{ mutable, immutable } + +// todo: replace global variable pos with acc + +/** This class turns a regular expression over `A` into a + * [[scala.util.automata.NondetWordAutom]] over `A` using the celebrated + * position automata construction (also called ''Berry-Sethi'' or ''Glushkov''). + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class BaseBerrySethi { + val lang: Base + import lang.{ Alt, Eps, Meta, RegExp, Sequ, Star } + + protected var pos = 0 + + // results which hold all info for the NondetWordAutomaton + protected var follow: mutable.HashMap[Int, Set[Int]] = _ + + protected var finalTag: Int = _ + + protected var finals: immutable.Map[Int, Int] = _ // final states + + // constants -------------------------- + + final val emptySet: Set[Int] = Set() + + private def doComp(r: RegExp, compFunction: RegExp => Set[Int]) = r match { + case x: Alt => (x.rs map compFirst).foldLeft(emptySet)(_ ++ _) + case Eps => emptySet + case x: Meta => compFunction(x.r) + case x: Sequ => + val (l1, l2) = x.rs span (_.isNullable) + ((l1 ++ (l2 take 1)) map compFunction).foldLeft(emptySet)(_ ++ _) + case Star(t) => compFunction(t) + case _ => throw new IllegalArgumentException("unexpected pattern " + r.getClass) + } + + /** Computes `first(r)` for the word regexp `r`. */ + protected def compFirst(r: RegExp): Set[Int] = doComp(r, compFirst) + + /** Computes `last(r)` for the regexp `r`. */ + protected def compLast(r: RegExp): Set[Int] = doComp(r, compLast) + + /** Starts from the right-to-left + * precondition: pos is final + * pats are successor patterns of a Sequence node + */ + protected def compFollow(rs: Seq[RegExp]): Set[Int] = { + follow(0) = + if (rs.isEmpty) emptySet + else rs.foldRight(Set(pos))((p, fol) => { + val first = compFollow1(fol, p) + + if (p.isNullable) fol ++ first + else first + }) + + follow(0) + } + + /** Returns the first set of an expression, setting the follow set along the way. + */ + protected def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match { + case x: Alt => Set((x.rs reverseMap (compFollow1(fol1, _))).flatten: _*) + case x: Meta => compFollow1(fol1, x.r) + case x: Star => compFollow1(fol1 ++ compFirst(x.r), x.r) + case x: Sequ => + x.rs.foldRight(fol1) { (p, fol) => + val first = compFollow1(fol, p) + + if (p.isNullable) fol ++ first + else first + } + case _ => throw new IllegalArgumentException("unexpected pattern: " + r.getClass) + } + + /** Returns the "Sethi-length" of a pattern, creating the set of position along the way. + */ + protected def traverse(r: RegExp): Unit = r match { + // (is tree automaton stuff, more than Berry-Sethi) + case x: Alt => x.rs foreach traverse + case x: Sequ => x.rs foreach traverse + case x: Meta => traverse(x.r) + case Star(t) => traverse(t) + case _ => throw new IllegalArgumentException("unexp pattern " + r.getClass) + } +} diff --git a/src/xml/scala/xml/dtd/impl/DetWordAutom.scala b/src/xml/scala/xml/dtd/impl/DetWordAutom.scala new file mode 100644 index 0000000000..6f8ba4de72 --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/DetWordAutom.scala @@ -0,0 +1,50 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml.dtd.impl + +import scala.collection.{ mutable, immutable } + +/** A deterministic automaton. States are integers, where + * 0 is always the only initial state. Transitions are represented + * in the delta function. A default transitions is one that + * is taken when no other transition can be taken. + * All states are reachable. Accepting states are those for which + * the partial function 'finals' is defined. + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class DetWordAutom[T <: AnyRef] { + val nstates: Int + val finals: Array[Int] + val delta: Array[mutable.Map[T, Int]] + val default: Array[Int] + + def isFinal(q: Int) = finals(q) != 0 + def isSink(q: Int) = delta(q).isEmpty && default(q) == q + def next(q: Int, label: T) = delta(q).getOrElse(label, default(q)) + + override def toString() = { + val sb = new StringBuilder("[DetWordAutom nstates=") + sb.append(nstates) + sb.append(" finals=") + val map = Map(finals.zipWithIndex map (_.swap): _*) + sb.append(map.toString()) + sb.append(" delta=\n") + + for (i <- 0 until nstates) { + sb append "%d->%s\n".format(i, delta(i)) + if (i < default.length) + sb append "_>%s\n".format(default(i)) + } + sb.toString + } +} diff --git a/src/xml/scala/xml/dtd/impl/Inclusion.scala b/src/xml/scala/xml/dtd/impl/Inclusion.scala new file mode 100644 index 0000000000..07b6afaeba --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/Inclusion.scala @@ -0,0 +1,70 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml.dtd.impl + + +/** A fast test of language inclusion between minimal automata. + * inspired by the ''AMoRE automata library''. + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] trait Inclusion[A <: AnyRef] { + + val labels: Seq[A] + + /** Returns true if `dfa1` is included in `dfa2`. + */ + def inclusion(dfa1: DetWordAutom[A], dfa2: DetWordAutom[A]) = { + + def encode(q1: Int, q2: Int) = 1 + q1 + q2 * dfa1.nstates + def decode2(c: Int) = (c-1) / (dfa1.nstates) //integer division + def decode1(c: Int) = (c-1) % (dfa1.nstates) + + var q1 = 0 //dfa1.initstate; // == 0 + var q2 = 0 //dfa2.initstate; // == 0 + + val max = 1 + dfa1.nstates * dfa2.nstates + val mark = new Array[Int](max) + + var result = true + var current = encode(q1, q2) + var last = current + mark(last) = max // mark (q1,q2) + while (current != 0 && result) { + //Console.println("current = [["+q1+" "+q2+"]] = "+current); + for (letter <- labels) { + val r1 = dfa1.next(q1,letter) + val r2 = dfa2.next(q2,letter) + if (dfa1.isFinal(r1) && !dfa2.isFinal(r2)) + result = false + val test = encode(r1, r2) + //Console.println("test = [["+r1+" "+r2+"]] = "+test); + if (mark(test) == 0) { + mark(last) = test + mark(test) = max + last = test + } + } + val ncurrent = mark(current) + if( ncurrent != max ) { + q1 = decode1(ncurrent) + q2 = decode2(ncurrent) + current = ncurrent + } else { + current = 0 + } + } + result + } +} diff --git a/src/xml/scala/xml/dtd/impl/NondetWordAutom.scala b/src/xml/scala/xml/dtd/impl/NondetWordAutom.scala new file mode 100644 index 0000000000..0bb19a7e3e --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/NondetWordAutom.scala @@ -0,0 +1,60 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml.dtd.impl + +import scala.collection.{ immutable, mutable } + +/** A nondeterministic automaton. States are integers, where + * 0 is always the only initial state. Transitions are represented + * in the delta function. Default transitions are transitions that + * are taken when no other transitions can be applied. + * All states are reachable. Accepting states are those for which + * the partial function `finals` is defined. + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class NondetWordAutom[T <: AnyRef] { + val nstates: Int + val labels: Seq[T] + val finals: Array[Int] // 0 means not final + val delta: Array[mutable.Map[T, immutable.BitSet]] + val default: Array[immutable.BitSet] + + /** @return true if the state is final */ + final def isFinal(state: Int) = finals(state) > 0 + + /** @return tag of final state */ + final def finalTag(state: Int) = finals(state) + + /** @return true if the set of states contains at least one final state */ + final def containsFinal(Q: immutable.BitSet): Boolean = Q exists isFinal + + /** @return true if there are no accepting states */ + final def isEmpty = (0 until nstates) forall (x => !isFinal(x)) + + /** @return an immutable.BitSet with the next states for given state and label */ + def next(q: Int, a: T): immutable.BitSet = delta(q).getOrElse(a, default(q)) + + /** @return an immutable.BitSet with the next states for given state and label */ + def next(Q: immutable.BitSet, a: T): immutable.BitSet = next(Q, next(_, a)) + def nextDefault(Q: immutable.BitSet): immutable.BitSet = next(Q, default) + + private def next(Q: immutable.BitSet, f: (Int) => immutable.BitSet): immutable.BitSet = + (Q map f).foldLeft(immutable.BitSet.empty)(_ ++ _) + + private def finalStates = 0 until nstates filter isFinal + override def toString = { + + val finalString = Map(finalStates map (j => j -> finals(j)) : _*).toString + val deltaString = (0 until nstates) + .map(i => " %d->%s\n _>%s\n".format(i, delta(i), default(i))).mkString + + "[NondetWordAutom nstates=%d finals=%s delta=\n%s".format(nstates, finalString, deltaString) + } +} diff --git a/src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala b/src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala new file mode 100644 index 0000000000..1720604132 --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala @@ -0,0 +1,37 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml.dtd.impl + +/** Pointed regular hedge expressions, a useful subclass of regular hedge expressions. + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class PointedHedgeExp extends Base { + + type _regexpT <: RegExp + type _labelT + + case class Node(label: _labelT, r: _regexpT) extends RegExp { + final val isNullable = false + } + + case class TopIter(r1: _regexpT, r2: _regexpT) extends RegExp { + final val isNullable = r1.isNullable && r2.isNullable //? + } + + case object Point extends RegExp { + final val isNullable = false + } + +} diff --git a/src/xml/scala/xml/dtd/impl/SubsetConstruction.scala b/src/xml/scala/xml/dtd/impl/SubsetConstruction.scala new file mode 100644 index 0000000000..632ca1eb18 --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/SubsetConstruction.scala @@ -0,0 +1,108 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml.dtd.impl + +import scala.collection.{ mutable, immutable } + +@deprecated("This class will be removed", "2.10.0") +private[dtd] class SubsetConstruction[T <: AnyRef](val nfa: NondetWordAutom[T]) { + import nfa.labels + + def selectTag(Q: immutable.BitSet, finals: Array[Int]) = + (Q map finals filter (_ > 0)).min + + def determinize: DetWordAutom[T] = { + // for assigning numbers to bitsets + var indexMap = scala.collection.Map[immutable.BitSet, Int]() + var invIndexMap = scala.collection.Map[Int, immutable.BitSet]() + var ix = 0 + + // we compute the dfa with states = bitsets + val q0 = immutable.BitSet(0) // the set { 0 } + val sink = immutable.BitSet.empty // the set { } + + var states = Set(q0, sink) // initial set of sets + val delta = new mutable.HashMap[immutable.BitSet, mutable.HashMap[T, immutable.BitSet]] + var deftrans = mutable.Map(q0 -> sink, sink -> sink) // initial transitions + var finals: mutable.Map[immutable.BitSet, Int] = mutable.Map() + val rest = new mutable.Stack[immutable.BitSet] + + rest.push(sink, q0) + + def addFinal(q: immutable.BitSet) { + if (nfa containsFinal q) + finals = finals.updated(q, selectTag(q, nfa.finals)) + } + def add(Q: immutable.BitSet) { + if (!states(Q)) { + states += Q + rest push Q + addFinal(Q) + } + } + + addFinal(q0) // initial state may also be a final state + + while (!rest.isEmpty) { + val P = rest.pop() + // assign a number to this bitset + indexMap = indexMap.updated(P, ix) + invIndexMap = invIndexMap.updated(ix, P) + ix += 1 + + // make transition map + val Pdelta = new mutable.HashMap[T, immutable.BitSet] + delta.update(P, Pdelta) + + labels foreach { label => + val Q = nfa.next(P, label) + Pdelta.update(label, Q) + add(Q) + } + + // collect default transitions + val Pdef = nfa nextDefault P + deftrans = deftrans.updated(P, Pdef) + add(Pdef) + } + + // create DetWordAutom, using indices instead of sets + val nstatesR = states.size + val deltaR = new Array[mutable.Map[T, Int]](nstatesR) + val defaultR = new Array[Int](nstatesR) + val finalsR = new Array[Int](nstatesR) + + for (Q <- states) { + val q = indexMap(Q) + val trans = delta(Q) + val transDef = deftrans(Q) + val qDef = indexMap(transDef) + val ntrans = new mutable.HashMap[T, Int]() + + for ((label, value) <- trans) { + val p = indexMap(value) + if (p != qDef) + ntrans.update(label, p) + } + + deltaR(q) = ntrans + defaultR(q) = qDef + } + + finals foreach { case (k,v) => finalsR(indexMap(k)) = v } + + new DetWordAutom [T] { + val nstates = nstatesR + val delta = deltaR + val default = defaultR + val finals = finalsR + } + } +} diff --git a/src/xml/scala/xml/dtd/impl/SyntaxError.scala b/src/xml/scala/xml/dtd/impl/SyntaxError.scala new file mode 100644 index 0000000000..a5b8a5aba0 --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/SyntaxError.scala @@ -0,0 +1,21 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml.dtd.impl + +/** This runtime exception is thrown if an attempt to instantiate a + * syntactically incorrect expression is detected. + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] class SyntaxError(e: String) extends RuntimeException(e) diff --git a/src/xml/scala/xml/dtd/impl/WordBerrySethi.scala b/src/xml/scala/xml/dtd/impl/WordBerrySethi.scala new file mode 100644 index 0000000000..9bf3fa518b --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/WordBerrySethi.scala @@ -0,0 +1,162 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml.dtd.impl + +import scala.collection.{ immutable, mutable } + +/** This class turns a regular expression into a [[scala.util.automata.NondetWordAutom]] + * celebrated position automata construction (also called ''Berry-Sethi'' or ''Glushkov''). + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class WordBerrySethi extends BaseBerrySethi { + override val lang: WordExp + + import lang.{ Alt, Eps, Letter, RegExp, Sequ, Star, _labelT } + + protected var labels: mutable.HashSet[_labelT] = _ + // don't let this fool you, only labelAt is a real, surjective mapping + protected var labelAt: Map[Int, _labelT] = _ // new alphabet "gamma" + protected var deltaq: Array[mutable.HashMap[_labelT, List[Int]]] = _ // delta + protected var defaultq: Array[List[Int]] = _ // default transitions + protected var initials: Set[Int] = _ + + /** Computes `first(r)` where the word regexp `r`. + * + * @param r the regular expression + * @return the computed set `first(r)` + */ + protected override def compFirst(r: RegExp): Set[Int] = r match { + case x: Letter => Set(x.pos) + case _ => super.compFirst(r) + } + + /** Computes `last(r)` where the word regexp `r`. + * + * @param r the regular expression + * @return the computed set `last(r)` + */ + protected override def compLast(r: RegExp): Set[Int] = r match { + case x: Letter => Set(x.pos) + case _ => super.compLast(r) + } + + /** Returns the first set of an expression, setting the follow set along + * the way. + * + * @param r the regular expression + * @return the computed set + */ + protected override def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match { + case x: Letter => follow(x.pos) = fol1 ; Set(x.pos) + case Eps => emptySet + case _ => super.compFollow1(fol1, r) + } + + /** Returns "Sethi-length" of a pattern, creating the set of position + * along the way + */ + + /** Called at the leaves of the regexp */ + protected def seenLabel(r: RegExp, i: Int, label: _labelT) { + labelAt = labelAt.updated(i, label) + this.labels += label + } + + // overridden in BindingBerrySethi + protected def seenLabel(r: RegExp, label: _labelT): Int = { + pos += 1 + seenLabel(r, pos, label) + pos + } + + // todo: replace global variable pos with acc + override def traverse(r: RegExp): Unit = r match { + case a @ Letter(label) => a.pos = seenLabel(r, label) + case Eps => // ignore + case _ => super.traverse(r) + } + + + protected def makeTransition(src: Int, dest: Int, label: _labelT) { + val q = deltaq(src) + q.update(label, dest :: q.getOrElse(label, Nil)) + } + + protected def initialize(subexpr: Seq[RegExp]): Unit = { + this.labelAt = immutable.Map() + this.follow = mutable.HashMap() + this.labels = mutable.HashSet() + this.pos = 0 + + // determine "Sethi-length" of the regexp + subexpr foreach traverse + + this.initials = Set(0) + } + + protected def initializeAutom() { + finals = immutable.Map.empty[Int, Int] // final states + deltaq = new Array[mutable.HashMap[_labelT, List[Int]]](pos) // delta + defaultq = new Array[List[Int]](pos) // default transitions + + for (j <- 0 until pos) { + deltaq(j) = mutable.HashMap[_labelT, List[Int]]() + defaultq(j) = Nil + } + } + + protected def collectTransitions(): Unit = // make transitions + for (j <- 0 until pos ; fol = follow(j) ; k <- fol) { + if (pos == k) finals = finals.updated(j, finalTag) + else makeTransition(j, k, labelAt(k)) + } + + def automatonFrom(pat: RegExp, finalTag: Int): NondetWordAutom[_labelT] = { + this.finalTag = finalTag + + pat match { + case x: Sequ => + // (1,2) compute follow + first + initialize(x.rs) + pos += 1 + compFollow(x.rs) // this used to be assigned to var globalFirst and then never used. + + // (3) make automaton from follow sets + initializeAutom() + collectTransitions() + + if (x.isNullable) // initial state is final + finals = finals.updated(0, finalTag) + + val delta1 = immutable.Map(deltaq.zipWithIndex map (_.swap): _*) + val finalsArr = (0 until pos map (k => finals.getOrElse(k, 0))).toArray // 0 == not final + + val deltaArr: Array[mutable.Map[_labelT, immutable.BitSet]] = + (0 until pos map { x => + mutable.HashMap(delta1(x).toSeq map { case (k, v) => k -> immutable.BitSet(v: _*) } : _*) + }).toArray + + val defaultArr = (0 until pos map (k => immutable.BitSet(defaultq(k): _*))).toArray + + new NondetWordAutom[_labelT] { + val nstates = pos + val labels = WordBerrySethi.this.labels.toList + val finals = finalsArr + val delta = deltaArr + val default = defaultArr + } + case z => + automatonFrom(Sequ(z.asInstanceOf[this.lang._regexpT]), finalTag) + } + } +} diff --git a/src/xml/scala/xml/dtd/impl/WordExp.scala b/src/xml/scala/xml/dtd/impl/WordExp.scala new file mode 100644 index 0000000000..a4bb54c1ea --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/WordExp.scala @@ -0,0 +1,59 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml.dtd.impl + +/** + * The class `WordExp` provides regular word expressions. + * + * Users have to instantiate type member `_regexpT <;: RegExp` + * (from class `Base`) and a type member `_labelT <;: Label`. + * + * Here is a short example: + * {{{ + * import scala.util.regexp._ + * import scala.util.automata._ + * object MyLang extends WordExp { + * type _regexpT = RegExp + * type _labelT = MyChar + * + * case class MyChar(c:Char) extends Label + * } + * import MyLang._ + * // (a* | b)* + * val rex = Star(Alt(Star(Letter(MyChar('a'))),Letter(MyChar('b')))) + * object MyBerriSethi extends WordBerrySethi { + * override val lang = MyLang + * } + * val nfa = MyBerriSethi.automatonFrom(Sequ(rex), 1) + * }}} + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class WordExp extends Base { + + abstract class Label + + type _regexpT <: RegExp + type _labelT <: Label + + case class Letter(a: _labelT) extends RegExp { + final lazy val isNullable = false + var pos = -1 + } + + case class Wildcard() extends RegExp { + final lazy val isNullable = false + var pos = -1 + } +} diff --git a/src/xml/scala/xml/factory/Binder.scala b/src/xml/scala/xml/factory/Binder.scala new file mode 100755 index 0000000000..947f99e6a4 --- /dev/null +++ b/src/xml/scala/xml/factory/Binder.scala @@ -0,0 +1,61 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package factory + +import parsing.ValidatingMarkupHandler + +/** + * @author Burak Emir + */ +abstract class Binder(val preserveWS: Boolean) extends ValidatingMarkupHandler { + + var result: NodeBuffer = new NodeBuffer() + + def reportSyntaxError(pos:Int, str:String) = {} + + final def procInstr(pos: Int, target: String, txt: String) = + ProcInstr(target, txt) + + final def comment(pos: Int, txt: String) = + Comment(txt) + + final def entityRef(pos: Int, n: String) = + EntityRef(n) + + final def text(pos: Int, txt: String) = + Text(txt) + + final def traverse(n:Node): Unit = n match { + case x:ProcInstr => + result &+ procInstr(0, x.target, x.text) + case x:Comment => + result &+ comment(0, x.text) + case x:Text => + result &+ text(0, x.data) + case x:EntityRef => + result &+ entityRef(0, x.entityName) + case x:Elem => + elemStart(0, x.prefix, x.label, x.attributes, x.scope) + val old = result + result = new NodeBuffer() + for (m <- x.child) traverse(m) + result = old &+ elem(0, x.prefix, x.label, x.attributes, x.scope, x.minimizeEmpty, NodeSeq.fromSeq(result)).toList + elemEnd(0, x.prefix, x.label) + } + + final def validate(n: Node): Node = { + this.rootLabel = n.label + traverse(n) + result(0) + } +} diff --git a/src/xml/scala/xml/factory/LoggedNodeFactory.scala b/src/xml/scala/xml/factory/LoggedNodeFactory.scala new file mode 100644 index 0000000000..bc074bfc83 --- /dev/null +++ b/src/xml/scala/xml/factory/LoggedNodeFactory.scala @@ -0,0 +1,90 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package factory + +/** This class logs what the nodefactory is actually doing. + * If you want to see what happens during loading, use it like this: +{{{ +object testLogged extends App { + val x = new scala.xml.parsing.NoBindingFactoryAdapter + with scala.xml.factory.LoggedNodeFactory[scala.xml.Elem] { + override def log(s: String) = println(s) + } + + Console.println("Start") + val doc = x.load(new java.net.URL("http://example.com/file.xml")) + Console.println("End") + Console.println(doc) +} +}}} + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This trait will be removed.", "2.11") +trait LoggedNodeFactory[A <: Node] extends NodeFactory[A] { + // configuration values + val logNode = true + val logText = false + val logComment = false + val logProcInstr = false + + final val NONE = 0 + final val CACHE = 1 + final val FULL = 2 + /** 0 = no logging, 1 = cache hits, 2 = detail */ + val logCompressLevel = 1 + + // methods of NodeFactory + + /** logged version of makeNode method */ + override def makeNode(pre: String, label: String, attrSeq: MetaData, + scope: NamespaceBinding, children: Seq[Node]): A = { + if (logNode) + log("[makeNode for "+label+"]") + + val hash = Utility.hashCode(pre, label, attrSeq.##, scope.##, children) + + /* + if(logCompressLevel >= FULL) { + log("[hashcode total:"+hash); + log(" elem name "+uname+" hash "+ ? )); + log(" attrs "+attrSeq+" hash "+attrSeq.hashCode()); + log(" children :"+children+" hash "+children.hashCode()); + } + */ + if (!cache.get( hash ).isEmpty && (logCompressLevel >= CACHE)) + log("[cache hit !]") + + super.makeNode(pre, label, attrSeq, scope, children) + } + + override def makeText(s: String) = { + if (logText) + log("[makeText:\""+s+"\"]") + super.makeText(s) + } + + override def makeComment(s: String): Seq[Comment] = { + if (logComment) + log("[makeComment:\""+s+"\"]") + super.makeComment(s) + } + + override def makeProcInstr(t: String, s: String): Seq[ProcInstr] = { + if (logProcInstr) + log("[makeProcInstr:\""+t+" "+ s+"\"]") + super.makeProcInstr(t, s) + } + + @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11") + def log(msg: String): Unit = {} +} diff --git a/src/xml/scala/xml/factory/NodeFactory.scala b/src/xml/scala/xml/factory/NodeFactory.scala new file mode 100644 index 0000000000..94801bb554 --- /dev/null +++ b/src/xml/scala/xml/factory/NodeFactory.scala @@ -0,0 +1,61 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package factory + +import parsing.{ FactoryAdapter, NoBindingFactoryAdapter } +import java.io.{ InputStream, Reader, StringReader, File, FileDescriptor, FileInputStream } + +trait NodeFactory[A <: Node] { + val ignoreComments = false + val ignoreProcInstr = false + + /* default behaviour is to use hash-consing */ + val cache = new scala.collection.mutable.HashMap[Int, List[A]] + + protected def create(pre: String, name: String, attrs: MetaData, scope: NamespaceBinding, children:Seq[Node]): A + + protected def construct(hash: Int, old:List[A], pre: String, name: String, attrSeq:MetaData, scope: NamespaceBinding, children:Seq[Node]): A = { + val el = create(pre, name, attrSeq, scope, children) + cache.update(hash, el :: old) + el + } + + def eqElements(ch1: Seq[Node], ch2: Seq[Node]): Boolean = + ch1.view.zipAll(ch2.view, null, null) forall { case (x,y) => x eq y } + + def nodeEquals(n: Node, pre: String, name: String, attrSeq:MetaData, scope: NamespaceBinding, children: Seq[Node]) = + n.prefix == pre && + n.label == name && + n.attributes == attrSeq && + // scope? + eqElements(n.child, children) + + def makeNode(pre: String, name: String, attrSeq: MetaData, scope: NamespaceBinding, children: Seq[Node]): A = { + val hash = Utility.hashCode( pre, name, attrSeq.##, scope.##, children) + def cons(old: List[A]) = construct(hash, old, pre, name, attrSeq, scope, children) + + (cache get hash) match { + case Some(list) => // find structurally equal + list.find(nodeEquals(_, pre, name, attrSeq, scope, children)) match { + case Some(x) => x + case _ => cons(list) + } + case None => cons(Nil) + } + } + + def makeText(s: String) = Text(s) + def makeComment(s: String): Seq[Comment] = + if (ignoreComments) Nil else List(Comment(s)) + def makeProcInstr(t: String, s: String): Seq[ProcInstr] = + if (ignoreProcInstr) Nil else List(ProcInstr(t, s)) +} diff --git a/src/xml/scala/xml/factory/XMLLoader.scala b/src/xml/scala/xml/factory/XMLLoader.scala new file mode 100644 index 0000000000..b69f187039 --- /dev/null +++ b/src/xml/scala/xml/factory/XMLLoader.scala @@ -0,0 +1,61 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package factory + +import javax.xml.parsers.SAXParserFactory +import parsing.{ FactoryAdapter, NoBindingFactoryAdapter } +import java.io.{ InputStream, Reader, File, FileDescriptor } +import java.net.URL + +/** Presents collection of XML loading methods which use the parser + * created by "def parser". + */ +trait XMLLoader[T <: Node] +{ + import scala.xml.Source._ + def adapter: FactoryAdapter = new NoBindingFactoryAdapter() + + /* Override this to use a different SAXParser. */ + def parser: SAXParser = { + val f = SAXParserFactory.newInstance() + f.setNamespaceAware(false) + f.newSAXParser() + } + + /** Loads XML from the given InputSource, using the supplied parser. + * The methods available in scala.xml.XML use the XML parser in the JDK. + */ + def loadXML(source: InputSource, parser: SAXParser): T = { + val newAdapter = adapter + + newAdapter.scopeStack push TopScope + parser.parse(source, newAdapter) + newAdapter.scopeStack.pop() + + newAdapter.rootElem.asInstanceOf[T] + } + + /** Loads XML from the given file, file descriptor, or filename. */ + def loadFile(file: File): T = loadXML(fromFile(file), parser) + def loadFile(fd: FileDescriptor): T = loadXML(fromFile(fd), parser) + def loadFile(name: String): T = loadXML(fromFile(name), parser) + + /** loads XML from given InputStream, Reader, sysID, InputSource, or URL. */ + def load(is: InputStream): T = loadXML(fromInputStream(is), parser) + def load(reader: Reader): T = loadXML(fromReader(reader), parser) + def load(sysID: String): T = loadXML(fromSysId(sysID), parser) + def load(source: InputSource): T = loadXML(source, parser) + def load(url: URL): T = loadXML(fromInputStream(url.openStream()), parser) + + /** Loads XML from the given String. */ + def loadString(string: String): T = loadXML(fromString(string), parser) +} diff --git a/src/xml/scala/xml/include/CircularIncludeException.scala b/src/xml/scala/xml/include/CircularIncludeException.scala new file mode 100644 index 0000000000..351f403008 --- /dev/null +++ b/src/xml/scala/xml/include/CircularIncludeException.scala @@ -0,0 +1,25 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package include + +/** + * A `CircularIncludeException` is thrown when an included document attempts + * to include itself or one of its ancestor documents. + */ +class CircularIncludeException(message: String) extends XIncludeException { + + /** + * Constructs a `CircularIncludeException` with `'''null'''`. + * as its error detail message. + */ + def this() = this(null) + +} diff --git a/src/xml/scala/xml/include/UnavailableResourceException.scala b/src/xml/scala/xml/include/UnavailableResourceException.scala new file mode 100644 index 0000000000..47b176e0f3 --- /dev/null +++ b/src/xml/scala/xml/include/UnavailableResourceException.scala @@ -0,0 +1,20 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package include + +/** + * An `UnavailableResourceException` is thrown when an included document + * cannot be found or loaded. + */ +class UnavailableResourceException(message: String) +extends XIncludeException(message) { + def this() = this(null) +} diff --git a/src/xml/scala/xml/include/XIncludeException.scala b/src/xml/scala/xml/include/XIncludeException.scala new file mode 100644 index 0000000000..11e1644d83 --- /dev/null +++ b/src/xml/scala/xml/include/XIncludeException.scala @@ -0,0 +1,58 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package include + +/** + * `XIncludeException` is the generic superclass for all checked exceptions + * that may be thrown as a result of a violation of XInclude's rules. + * + * Constructs an `XIncludeException` with the specified detail message. + * The error message string `message` can later be retrieved by the + * `{@link java.lang.Throwable#getMessage}` + * method of class `java.lang.Throwable`. + * + * @param message the detail message. + */ +class XIncludeException(message: String) extends Exception(message) { + + /** + * uses `'''null'''` as its error detail message. + */ + def this() = this(null) + + private var rootCause: Throwable = null + + /** + * When an `IOException`, `MalformedURLException` or other generic + * exception is thrown while processing an XML document for XIncludes, + * it is customarily replaced by some form of `XIncludeException`. + * This method allows you to store the original exception. + * + * @param nestedException the underlying exception which + * caused the XIncludeException to be thrown + */ + def setRootCause(nestedException: Throwable ) { + this.rootCause = nestedException + } + + /** + * When an `IOException`, `MalformedURLException` or other generic + * exception is thrown while processing an XML document for XIncludes, + * it is customarily replaced by some form of `XIncludeException`. + * This method allows you to retrieve the original exception. + * It returns null if no such exception caused this `XIncludeException`. + * + * @return Throwable the underlying exception which caused the + * `XIncludeException` to be thrown + */ + def getRootCause(): Throwable = this.rootCause + +} diff --git a/src/xml/scala/xml/include/sax/EncodingHeuristics.scala b/src/xml/scala/xml/include/sax/EncodingHeuristics.scala new file mode 100644 index 0000000000..57ab5ed91c --- /dev/null +++ b/src/xml/scala/xml/include/sax/EncodingHeuristics.scala @@ -0,0 +1,98 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package include.sax + +import java.io.InputStream +import scala.util.matching.Regex + +/** `EncodingHeuristics` reads from a stream + * (which should be buffered) and attempts to guess + * what the encoding of the text in the stream is. + * If it fails to determine the type of the encoding, + * it returns the default UTF-8. + * + * @author Burak Emir + * @author Paul Phillips + */ +object EncodingHeuristics +{ + object EncodingNames { + // UCS-4 isn't yet implemented in java releases anyway... + val bigUCS4 = "UCS-4" + val littleUCS4 = "UCS-4" + val unusualUCS4 = "UCS-4" + val bigUTF16 = "UTF-16BE" + val littleUTF16 = "UTF-16LE" + val utf8 = "UTF-8" + val default = utf8 + } + import EncodingNames._ + + /** This utility method attempts to determine the XML character encoding + * by examining the input stream, as specified at + * [[http://www.w3.org/TR/xml/#sec-guessing w3]]. + * + * @param in `InputStream` to read from. + * @throws IOException if the stream cannot be reset + * @return the name of the encoding. + */ + def readEncodingFromStream(in: InputStream): String = { + var ret: String = null + val bytesToRead = 1024 // enough to read most XML encoding declarations + def resetAndRet = { in.reset ; ret } + + // This may fail if there are a lot of space characters before the end + // of the encoding declaration + in mark bytesToRead + val bytes = (in.read, in.read, in.read, in.read) + + // first look for byte order mark + ret = bytes match { + case (0x00, 0x00, 0xFE, 0xFF) => bigUCS4 + case (0xFF, 0xFE, 0x00, 0x00) => littleUCS4 + case (0x00, 0x00, 0xFF, 0xFE) => unusualUCS4 + case (0xFE, 0xFF, 0x00, 0x00) => unusualUCS4 + case (0xFE, 0xFF, _ , _ ) => bigUTF16 + case (0xFF, 0xFE, _ , _ ) => littleUTF16 + case (0xEF, 0xBB, 0xBF, _ ) => utf8 + case _ => null + } + if (ret != null) + return resetAndRet + + def readASCIIEncoding: String = { + val data = new Array[Byte](bytesToRead - 4) + val length = in.read(data, 0, bytesToRead - 4) + + // Use Latin-1 (ISO-8859-1) because all byte sequences are legal. + val declaration = new String(data, 0, length, "ISO-8859-1") + val regexp = """(?m).*?encoding\s*=\s*["'](.+?)['"]""".r + (regexp findFirstMatchIn declaration) match { + case None => default + case Some(md) => md.subgroups(0) + } + } + + // no byte order mark present; first character must be '<' or whitespace + ret = bytes match { + case (0x00, 0x00, 0x00, '<' ) => bigUCS4 + case ('<' , 0x00, 0x00, 0x00) => littleUCS4 + case (0x00, 0x00, '<' , 0x00) => unusualUCS4 + case (0x00, '<' , 0x00, 0x00) => unusualUCS4 + case (0x00, '<' , 0x00, '?' ) => bigUTF16 // XXX must read encoding + case ('<' , 0x00, '?' , 0x00) => littleUTF16 // XXX must read encoding + case ('<' , '?' , 'x' , 'm' ) => readASCIIEncoding + case (0x4C, 0x6F, 0xA7, 0x94) => utf8 // XXX EBCDIC + case _ => utf8 // no XML or text declaration present + } + resetAndRet + } +} diff --git a/src/xml/scala/xml/include/sax/XIncludeFilter.scala b/src/xml/scala/xml/include/sax/XIncludeFilter.scala new file mode 100644 index 0000000000..3fa3beefb0 --- /dev/null +++ b/src/xml/scala/xml/include/sax/XIncludeFilter.scala @@ -0,0 +1,373 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package include.sax + +import scala.xml.include._ + +import org.xml.sax.{ Attributes, XMLReader, Locator } +import org.xml.sax.helpers.{ XMLReaderFactory, XMLFilterImpl, NamespaceSupport, AttributesImpl } + +import java.io.{ InputStream, BufferedInputStream, InputStreamReader, IOException, UnsupportedEncodingException } +import java.util.Stack +import java.net.{ URL, MalformedURLException } + +/** This is a SAX filter which resolves all XInclude include elements before + * passing them on to the client application. Currently this class has the + * following known deviation from the XInclude specification: + * + * 1. XPointer is not supported. + * + * Furthermore, I would definitely use a new instance of this class for each + * document you want to process. I doubt it can be used successfully on + * multiple documents. Furthermore, I can virtually guarantee that this + * class is not thread safe. You have been warned. + * + * Since this class is not designed to be subclassed, and since I have not + * yet considered how that might affect the methods herein or what other + * protected methods might be needed to support subclasses, I have declared + * this class final. I may remove this restriction later, though the use-case + * for subclassing is weak. This class is designed to have its functionality + * extended via a horizontal chain of filters, not a vertical hierarchy of + * sub and superclasses. + * + * To use this class: + * + * - Construct an `XIncludeFilter` object with a known base URL + * - Pass the `XMLReader` object from which the raw document will be read to + * the `setParent()` method of this object. + * - Pass your own `ContentHandler` object to the `setContentHandler()` + * method of this object. This is the object which will receive events + * from the parsed and included document. + * - Optional: if you wish to receive comments, set your own `LexicalHandler` + * object as the value of this object's + * `http://xml.org/sax/properties/lexical-handler` property. + * Also make sure your `LexicalHandler` asks this object for the status of + * each comment using `insideIncludeElement` before doing anything with the + * comment. + * - Pass the URL of the document to read to this object's `parse()` method + * + * e.g. + * {{{ + * val includer = new XIncludeFilter(base) + * includer setParent parser + * includer setContentHandler new SAXXIncluder(System.out) + * includer parse args(i) + * }}} + * translated from Elliotte Rusty Harold's Java source. + * + * @author Burak Emir + */ +class XIncludeFilter extends XMLFilterImpl { + + final val XINCLUDE_NAMESPACE = "http://www.w3.org/2001/XInclude" + + private val bases = new Stack[URL]() + private val locators = new Stack[Locator]() + +/* private EntityResolver resolver; + + public XIncludeFilter() { + this(null); + } + + public XIncludeFilter(EntityResolver resolver) { + this.resolver = resolver; + } */ + + + // what if this isn't called???? + // do I need to check this in startDocument() and push something + // there???? + override def setDocumentLocator(locator: Locator) { + locators push locator + val base = locator.getSystemId() + try { + bases.push(new URL(base)) + } + catch { + case e:MalformedURLException => + throw new UnsupportedOperationException("Unrecognized SYSTEM ID: " + base) + } + super.setDocumentLocator(locator) + } + + + // necessary to throw away contents of non-empty XInclude elements + private var level = 0 + + /** This utility method returns true if and only if this reader is + * currently inside a non-empty include element. (This is '''not''' the + * same as being inside the node set which replaces the include element.) + * This is primarily needed for comments inside include elements. + * It must be checked by the actual `LexicalHandler` to see whether + * a comment is passed or not. + * + * @return boolean + */ + def insideIncludeElement(): Boolean = level != 0 + + override def startElement(uri: String, localName: String, qName: String, atts1: Attributes) { + var atts = atts1 + if (level == 0) { // We're not inside an xi:include element + + // Adjust bases stack by pushing either the new + // value of xml:base or the base of the parent + val base = atts.getValue(NamespaceSupport.XMLNS, "base") + val parentBase = bases.peek().asInstanceOf[URL] + var currentBase = parentBase + if (base != null) { + try { + currentBase = new URL(parentBase, base) + } + catch { + case e: MalformedURLException => + throw new SAXException("Malformed base URL: " + + currentBase, e) + } + } + bases push currentBase + + if (uri.equals(XINCLUDE_NAMESPACE) && localName.equals("include")) { + // include external document + val href = atts.getValue("href") + // Verify that there is an href attribute + if (href == null) { + throw new SAXException("Missing href attribute") + } + + var parse = atts getValue "parse" + if (parse == null) parse = "xml" + + if (parse equals "text") { + val encoding = atts getValue "encoding" + includeTextDocument(href, encoding) + } + else if (parse equals "xml") { + includeXMLDocument(href) + } + // Need to check this also in DOM and JDOM???? + else { + throw new SAXException( + "Illegal value for parse attribute: " + parse) + } + level += 1 + } + else { + if (atRoot) { + // add xml:base attribute if necessary + val attsImpl = new AttributesImpl(atts) + attsImpl.addAttribute(NamespaceSupport.XMLNS, "base", + "xml:base", "CDATA", currentBase.toExternalForm()) + atts = attsImpl + atRoot = false + } + super.startElement(uri, localName, qName, atts) + } + } + } + + override def endElement(uri: String, localName: String, qName: String) { + if (uri.equals(XINCLUDE_NAMESPACE) + && localName.equals("include")) { + level -= 1 + } + else if (level == 0) { + bases.pop() + super.endElement(uri, localName, qName) + } + } + + private var depth = 0 + + override def startDocument() { + level = 0 + if (depth == 0) super.startDocument() + depth += 1 + } + + override def endDocument() { + locators.pop() + bases.pop() // pop the URL for the document itself + depth -= 1 + if (depth == 0) super.endDocument() + } + + // how do prefix mappings move across documents???? + override def startPrefixMapping(prefix: String , uri: String) { + if (level == 0) super.startPrefixMapping(prefix, uri) + } + + override def endPrefixMapping(prefix: String) { + if (level == 0) super.endPrefixMapping(prefix) + } + + override def characters(ch: Array[Char], start: Int, length: Int) { + if (level == 0) super.characters(ch, start, length) + } + + override def ignorableWhitespace(ch: Array[Char], start: Int, length: Int) { + if (level == 0) super.ignorableWhitespace(ch, start, length) + } + + override def processingInstruction(target: String, data: String) { + if (level == 0) super.processingInstruction(target, data) + } + + override def skippedEntity(name: String) { + if (level == 0) super.skippedEntity(name) + } + + // convenience method for error messages + private def getLocation(): String = { + var locationString = "" + val locator = locators.peek().asInstanceOf[Locator] + var publicID = "" + var systemID = "" + var column = -1 + var line = -1 + if (locator != null) { + publicID = locator.getPublicId() + systemID = locator.getSystemId() + line = locator.getLineNumber() + column = locator.getColumnNumber() + } + locationString = (" in document included from " + publicID + + " at " + systemID + + " at line " + line + ", column " + column) + + locationString + } + + /** This utility method reads a document at a specified URL and fires off + * calls to `characters()`. It's used to include files with `parse="text"`. + * + * @param url URL of the document that will be read + * @param encoding1 Encoding of the document; e.g. UTF-8, + * ISO-8859-1, etc. + * @return void + * @throws SAXException if the requested document cannot + be downloaded from the specified URL + or if the encoding is not recognized + */ + private def includeTextDocument(url: String, encoding1: String) { + var encoding = encoding1 + if (encoding == null || encoding.trim().equals("")) encoding = "UTF-8" + var source: URL = null + try { + val base = bases.peek().asInstanceOf[URL] + source = new URL(base, url) + } + catch { + case e: MalformedURLException => + val ex = new UnavailableResourceException("Unresolvable URL " + url + + getLocation()) + ex.setRootCause(e) + throw new SAXException("Unresolvable URL " + url + getLocation(), ex) + } + + try { + val uc = source.openConnection() + val in = new BufferedInputStream(uc.getInputStream()) + val encodingFromHeader = uc.getContentEncoding() + var contentType = uc.getContentType() + if (encodingFromHeader != null) + encoding = encodingFromHeader + else { + // What if file does not have a MIME type but name ends in .xml???? + // MIME types are case-insensitive + // Java may be picking this up from file URL + if (contentType != null) { + contentType = contentType.toLowerCase() + if (contentType.equals("text/xml") + || contentType.equals("application/xml") + || (contentType.startsWith("text/") && contentType.endsWith("+xml") ) + || (contentType.startsWith("application/") && contentType.endsWith("+xml"))) { + encoding = EncodingHeuristics.readEncodingFromStream(in) + } + } + } + val reader = new InputStreamReader(in, encoding) + val c = new Array[Char](1024) + var charsRead: Int = 0 // bogus init value + do { + charsRead = reader.read(c, 0, 1024) + if (charsRead > 0) this.characters(c, 0, charsRead) + } while (charsRead != -1) + } + catch { + case e: UnsupportedEncodingException => + throw new SAXException("Unsupported encoding: " + + encoding + getLocation(), e) + case e: IOException => + throw new SAXException("Document not found: " + + source.toExternalForm() + getLocation(), e) + } + + } + + private var atRoot = false + + /** This utility method reads a document at a specified URL + * and fires off calls to various `ContentHandler` methods. + * It's used to include files with `parse="xml"`. + * + * @param url URL of the document that will be read + * @return void + * @throws SAXException if the requested document cannot + be downloaded from the specified URL. + */ + private def includeXMLDocument(url: String) { + val source = + try new URL(bases.peek(), url) + catch { + case e: MalformedURLException => + val ex = new UnavailableResourceException("Unresolvable URL " + url + getLocation()) + ex setRootCause e + throw new SAXException("Unresolvable URL " + url + getLocation(), ex) + } + + try { + val parser: XMLReader = + try XMLReaderFactory.createXMLReader() + catch { + case e: SAXException => + try XMLReaderFactory.createXMLReader(XercesClassName) + catch { case _: SAXException => return System.err.println("Could not find an XML parser") } + } + + parser setContentHandler this + val resolver = this.getEntityResolver() + if (resolver != null) + parser setEntityResolver resolver + + // save old level and base + val previousLevel = level + this.level = 0 + if (bases contains source) + throw new SAXException( + "Circular XInclude Reference", + new CircularIncludeException("Circular XInclude Reference to " + source + getLocation()) + ) + + bases push source + atRoot = true + parser parse source.toExternalForm() + + // restore old level and base + this.level = previousLevel + bases.pop() + } + catch { + case e: IOException => + throw new SAXException("Document not found: " + source.toExternalForm() + getLocation(), e) + } + } +} diff --git a/src/xml/scala/xml/include/sax/XIncluder.scala b/src/xml/scala/xml/include/sax/XIncluder.scala new file mode 100644 index 0000000000..1939fa1875 --- /dev/null +++ b/src/xml/scala/xml/include/sax/XIncluder.scala @@ -0,0 +1,187 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package include.sax + +import scala.collection.mutable +import org.xml.sax.{ ContentHandler, XMLReader, Locator, Attributes } +import org.xml.sax.ext.LexicalHandler +import java.io.{ File, OutputStream, OutputStreamWriter, Writer, IOException } + +/** XIncluder is a SAX `ContentHandler` that writes its XML document onto + * an output stream after resolving all `xinclude:include` elements. + * + * Based on Eliotte Rusty Harold's SAXXIncluder. + */ +class XIncluder(outs: OutputStream, encoding: String) extends ContentHandler with LexicalHandler { + + var out = new OutputStreamWriter(outs, encoding) + + def setDocumentLocator(locator: Locator) {} + + def startDocument() { + try { + out.write("\r\n") + } + catch { + case e:IOException => + throw new SAXException("Write failed", e) + } + } + + def endDocument() { + try { + out.flush() + } + catch { + case e:IOException => + throw new SAXException("Flush failed", e) + } + } + + def startPrefixMapping(prefix: String , uri: String) {} + + def endPrefixMapping(prefix: String) {} + + def startElement(namespaceURI: String, localName: String, qualifiedName: String, atts: Attributes) = { + try { + out.write("<" + qualifiedName) + var i = 0; while (i < atts.getLength()) { + out.write(" ") + out.write(atts.getQName(i)) + out.write("='") + val value = atts.getValue(i) + // @todo Need to use character references if the encoding + // can't support the character + out.write(scala.xml.Utility.escape(value)) + out.write("'") + i += 1 + } + out.write(">") + } + catch { + case e:IOException => + throw new SAXException("Write failed", e) + } + } + + def endElement(namespaceURI: String, localName:String, qualifiedName: String) { + try { + out.write("") + } + catch { + case e: IOException => + throw new SAXException("Write failed", e) + } + } + + // need to escape characters that are not in the given + // encoding using character references???? + def characters(ch: Array[Char], start: Int, length: Int) { + try { + var i = 0; while (i < length) { + val c = ch(start+i) + if (c == '&') out.write("&") + else if (c == '<') out.write("<") + // This next fix is normally not necessary. + // However, it is required if text contains ]]> + // (The end CDATA section delimiter) + else if (c == '>') out.write(">") + else out.write(c.toInt) + i += 1 + } + } + catch { + case e: IOException => + throw new SAXException("Write failed", e) + } + } + + def ignorableWhitespace(ch: Array[Char], start: Int , length: Int) { + this.characters(ch, start, length) + } + + // do I need to escape text in PI???? + def processingInstruction(target: String, data: String) { + try { + out.write("") + } + catch { + case e:IOException => + throw new SAXException("Write failed", e) + } + } + + def skippedEntity(name: String) { + try { + out.write("&" + name + ";") + } + catch { + case e:IOException => + throw new SAXException("Write failed", e) + } + } + + // LexicalHandler methods + private var inDTD: Boolean = false + private val entities = new mutable.Stack[String]() + + def startDTD(name: String, publicID: String, systemID: String) { + inDTD = true + // if this is the source document, output a DOCTYPE declaration + if (entities.isEmpty) { + var id = "" + if (publicID != null) id = " PUBLIC \"" + publicID + "\" \"" + systemID + '"' + else if (systemID != null) id = " SYSTEM \"" + systemID + '"' + try { + out.write("\r\n") + } + catch { + case e:IOException => + throw new SAXException("Error while writing DOCTYPE", e) + } + } + } + def endDTD() {} + + def startEntity(name: String) { + entities push name + } + + def endEntity(name: String) { + entities.pop() + } + + def startCDATA() {} + def endCDATA() {} + + // Just need this reference so we can ask if a comment is + // inside an include element or not + private var filter: XIncludeFilter = null + + def setFilter(filter: XIncludeFilter) { + this.filter = filter + } + + def comment(ch: Array[Char], start: Int, length: Int) { + if (!inDTD && !filter.insideIncludeElement()) { + try { + out.write("") + } + catch { + case e: IOException => + throw new SAXException("Write failed", e) + } + } + } +} diff --git a/src/xml/scala/xml/package.scala b/src/xml/scala/xml/package.scala new file mode 100644 index 0000000000..4001cc5ffb --- /dev/null +++ b/src/xml/scala/xml/package.scala @@ -0,0 +1,19 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala + +package object xml { + val XercesClassName = "org.apache.xerces.parsers.SAXParser" + + type SAXException = org.xml.sax.SAXException + type SAXParseException = org.xml.sax.SAXParseException + type EntityResolver = org.xml.sax.EntityResolver + type InputSource = org.xml.sax.InputSource + type SAXParser = javax.xml.parsers.SAXParser +} diff --git a/src/xml/scala/xml/parsing/ConstructingHandler.scala b/src/xml/scala/xml/parsing/ConstructingHandler.scala new file mode 100755 index 0000000000..ba416e4301 --- /dev/null +++ b/src/xml/scala/xml/parsing/ConstructingHandler.scala @@ -0,0 +1,34 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + +/** Implementation of MarkupHandler that constructs nodes. + * + * @author Burak Emir + * @version 1.0 + */ +abstract class ConstructingHandler extends MarkupHandler +{ + val preserveWS: Boolean + + def elem(pos: Int, pre: String, label: String, attrs: MetaData, + pscope: NamespaceBinding, empty: Boolean, nodes: NodeSeq): NodeSeq = + Elem(pre, label, attrs, pscope, empty, nodes:_*) + + def procInstr(pos: Int, target: String, txt: String) = + ProcInstr(target, txt) + + def comment(pos: Int, txt: String) = Comment(txt) + def entityRef(pos: Int, n: String) = EntityRef(n) + def text(pos: Int, txt: String) = Text(txt) +} diff --git a/src/xml/scala/xml/parsing/ConstructingParser.scala b/src/xml/scala/xml/parsing/ConstructingParser.scala new file mode 100644 index 0000000000..3caeddabf4 --- /dev/null +++ b/src/xml/scala/xml/parsing/ConstructingParser.scala @@ -0,0 +1,55 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + +import java.io.File +import scala.io.Source + +object ConstructingParser { + def fromFile(inp: File, preserveWS: Boolean) = + new ConstructingParser(Source.fromFile(inp), preserveWS).initialize + + def fromSource(inp: Source, preserveWS: Boolean) = + new ConstructingParser(inp, preserveWS).initialize +} + +/** An xml parser. parses XML and invokes callback methods of a MarkupHandler. + * Don't forget to call next.ch on a freshly instantiated parser in order to + * initialize it. If you get the parser from the object method, initialization + * is already done for you. + * + * {{{ + * object parseFromURL { + * def main(args: Array[String]) { + * val url = args(0) + * val src = scala.io.Source.fromURL(url) + * val cpa = scala.xml.parsing.ConstructingParser.fromSource(src, false) // fromSource initializes automatically + * val doc = cpa.document() + * + * // let's see what it is + * val ppr = new scala.xml.PrettyPrinter(80, 5) + * val ele = doc.docElem + * println("finished parsing") + * val out = ppr.format(ele) + * println(out) + * } + * } + * }}} */ +class ConstructingParser(val input: Source, val preserveWS: Boolean) +extends ConstructingHandler +with ExternalSources +with MarkupParser { + + // default impl. of Logged + override def log(msg: String): Unit = {} +} diff --git a/src/xml/scala/xml/parsing/DefaultMarkupHandler.scala b/src/xml/scala/xml/parsing/DefaultMarkupHandler.scala new file mode 100755 index 0000000000..6ec7474843 --- /dev/null +++ b/src/xml/scala/xml/parsing/DefaultMarkupHandler.scala @@ -0,0 +1,30 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + + +/** Default implementation of markup handler always returns `NodeSeq.Empty` */ +abstract class DefaultMarkupHandler extends MarkupHandler { + + def elem(pos: Int, pre: String, label: String, attrs: MetaData, + scope:NamespaceBinding, empty: Boolean, args: NodeSeq) = NodeSeq.Empty + + def procInstr(pos: Int, target: String, txt: String) = NodeSeq.Empty + + def comment(pos: Int, comment: String ): NodeSeq = NodeSeq.Empty + + def entityRef(pos: Int, n: String) = NodeSeq.Empty + + def text(pos: Int, txt:String) = NodeSeq.Empty + +} diff --git a/src/xml/scala/xml/parsing/ExternalSources.scala b/src/xml/scala/xml/parsing/ExternalSources.scala new file mode 100644 index 0000000000..bb939bca95 --- /dev/null +++ b/src/xml/scala/xml/parsing/ExternalSources.scala @@ -0,0 +1,38 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + +import java.net.URL +import java.io.File.separator + +import scala.io.Source + +/** + * @author Burak Emir + * @version 1.0 + */ +trait ExternalSources { + self: ExternalSources with MarkupParser with MarkupHandler => + + def externalSource(systemId: String): Source = { + if (systemId startsWith "http:") + return Source fromURL new URL(systemId) + + val fileStr: String = input.descr match { + case x if x startsWith "file:" => x drop 5 + case x => x take ((x lastIndexOf separator) + 1) + } + + Source.fromFile(fileStr + systemId) + } +} diff --git a/src/xml/scala/xml/parsing/FactoryAdapter.scala b/src/xml/scala/xml/parsing/FactoryAdapter.scala new file mode 100644 index 0000000000..2154bdf5ba --- /dev/null +++ b/src/xml/scala/xml/parsing/FactoryAdapter.scala @@ -0,0 +1,187 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package parsing + +import java.io.{ InputStream, Reader, File, FileDescriptor, FileInputStream } +import scala.collection.{ mutable, Iterator } +import org.xml.sax.Attributes +import org.xml.sax.helpers.DefaultHandler + +// can be mixed into FactoryAdapter if desired +trait ConsoleErrorHandler extends DefaultHandler { + // ignore warning, crimson warns even for entity resolution! + override def warning(ex: SAXParseException): Unit = { } + override def error(ex: SAXParseException): Unit = printError("Error", ex) + override def fatalError(ex: SAXParseException): Unit = printError("Fatal Error", ex) + + protected def printError(errtype: String, ex: SAXParseException): Unit = + Console.withOut(Console.err) { + val s = "[%s]:%d:%d: %s".format( + errtype, ex.getLineNumber, ex.getColumnNumber, ex.getMessage) + Console.println(s) + Console.flush() + } +} + +/** SAX adapter class, for use with Java SAX parser. Keeps track of + * namespace bindings, without relying on namespace handling of the + * underlying SAX parser. + */ +abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node] { + var rootElem: Node = null + + val buffer = new StringBuilder() + val attribStack = new mutable.Stack[MetaData] + val hStack = new mutable.Stack[Node] // [ element ] contains siblings + val tagStack = new mutable.Stack[String] + var scopeStack = new mutable.Stack[NamespaceBinding] + + var curTag : String = null + var capture: Boolean = false + + // abstract methods + + /** Tests if an XML element contains text. + * @return true if element named `localName` contains text. + */ + def nodeContainsText(localName: String): Boolean // abstract + + /** creates an new non-text(tree) node. + * @param elemName + * @param attribs + * @param chIter + * @return a new XML element. + */ + def createNode(pre: String, elemName: String, attribs: MetaData, + scope: NamespaceBinding, chIter: List[Node]): Node // abstract + + /** creates a Text node. + * @param text + * @return a new Text node. + */ + def createText(text: String): Text // abstract + + /** creates a new processing instruction node. + */ + def createProcInstr(target: String, data: String): Seq[ProcInstr] + + // + // ContentHandler methods + // + + val normalizeWhitespace = false + + /** Characters. + * @param ch + * @param offset + * @param length + */ + override def characters(ch: Array[Char], offset: Int, length: Int): Unit = { + if (!capture) return + // compliant: report every character + else if (!normalizeWhitespace) buffer.appendAll(ch, offset, length) + // normalizing whitespace is not compliant, but useful + else { + var it = ch.slice(offset, offset + length).iterator + while (it.hasNext) { + val c = it.next() + val isSpace = c.isWhitespace + buffer append (if (isSpace) ' ' else c) + if (isSpace) + it = it dropWhile (_.isWhitespace) + } + } + } + + private def splitName(s: String) = { + val idx = s indexOf ':' + if (idx < 0) (null, s) + else (s take idx, s drop (idx + 1)) + } + + /* ContentHandler methods */ + + /* Start element. */ + override def startElement( + uri: String, + _localName: String, + qname: String, + attributes: Attributes): Unit = + { + captureText() + tagStack push curTag + curTag = qname + + val localName = splitName(qname)._2 + capture = nodeContainsText(localName) + + hStack push null + var m: MetaData = Null + var scpe: NamespaceBinding = + if (scopeStack.isEmpty) TopScope + else scopeStack.top + + for (i <- 0 until attributes.getLength()) { + val qname = attributes getQName i + val value = attributes getValue i + val (pre, key) = splitName(qname) + def nullIfEmpty(s: String) = if (s == "") null else s + + if (pre == "xmlns" || (pre == null && qname == "xmlns")) { + val arg = if (pre == null) null else key + scpe = new NamespaceBinding(arg, nullIfEmpty(value), scpe) + } + else + m = Attribute(Option(pre), key, Text(value), m) + } + + scopeStack push scpe + attribStack push m + } + + + /** captures text, possibly normalizing whitespace + */ + def captureText(): Unit = { + if (capture && buffer.length > 0) + hStack push createText(buffer.toString) + + buffer.clear() + } + + /** End element. + * @param uri + * @param _localName + * @param qname + * @throws org.xml.sax.SAXException if .. + */ + override def endElement(uri: String , _localName: String, qname: String): Unit = { + captureText() + val metaData = attribStack.pop() + + // reverse order to get it right + val v = (Iterator continually hStack.pop takeWhile (_ != null)).toList.reverse + val (pre, localName) = splitName(qname) + val scp = scopeStack.pop() + + // create element + rootElem = createNode(pre, localName, metaData, scp, v) + hStack push rootElem + curTag = tagStack.pop() + capture = curTag != null && nodeContainsText(curTag) // root level + } + + /** Processing instruction. + */ + override def processingInstruction(target: String, data: String) { + hStack pushAll createProcInstr(target, data) + } +} diff --git a/src/xml/scala/xml/parsing/FatalError.scala b/src/xml/scala/xml/parsing/FatalError.scala new file mode 100644 index 0000000000..ab3cb2a74d --- /dev/null +++ b/src/xml/scala/xml/parsing/FatalError.scala @@ -0,0 +1,17 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + +/** !!! This is poorly named, but I guess it's in the API. + */ +case class FatalError(msg: String) extends java.lang.RuntimeException(msg) diff --git a/src/xml/scala/xml/parsing/MarkupHandler.scala b/src/xml/scala/xml/parsing/MarkupHandler.scala new file mode 100755 index 0000000000..1ebffb9c90 --- /dev/null +++ b/src/xml/scala/xml/parsing/MarkupHandler.scala @@ -0,0 +1,127 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + +import scala.collection.mutable +import scala.io.Source +import scala.xml.dtd._ + +/** class that handles markup - provides callback methods to MarkupParser. + * the default is nonvalidating behaviour + * + * @author Burak Emir + * @version 1.0 + * + * @todo can we ignore more entity declarations (i.e. those with extIDs)? + * @todo expanding entity references + */ +abstract class MarkupHandler { + + /** returns true is this markup handler is validating */ + val isValidating: Boolean = false + + var decls: List[Decl] = Nil + var ent: mutable.Map[String, EntityDecl] = new mutable.HashMap[String, EntityDecl]() + + def lookupElemDecl(Label: String): ElemDecl = { + for (z @ ElemDecl(Label, _) <- decls) + return z + + null + } + + def replacementText(entityName: String): Source = + Source fromString ((ent get entityName) match { + case Some(ParsedEntityDecl(_, IntDef(value))) => value + case Some(ParameterEntityDecl(_, IntDef(value))) => " %s " format value + case Some(_) => "" format entityName + case None => "" format entityName + }) + + def endDTD(n: String): Unit = () + + /** callback method invoked by MarkupParser after start-tag of element. + * + * @param pos the position in the sourcefile + * @param pre the prefix + * @param label the local name + * @param attrs the attributes (metadata) + */ + def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding): Unit = () + + /** callback method invoked by MarkupParser after end-tag of element. + * + * @param pos the position in the source file + * @param pre the prefix + * @param label the local name + */ + def elemEnd(pos: Int, pre: String, label: String): Unit = () + + /** callback method invoked by MarkupParser after parsing an element, + * between the elemStart and elemEnd callbacks + * + * @param pos the position in the source file + * @param pre the prefix + * @param label the local name + * @param attrs the attributes (metadata) + * @param empty `true` if the element was previously empty; `false` otherwise. + * @param args the children of this element + */ + def elem(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, empty: Boolean, args: NodeSeq): NodeSeq + + /** callback method invoked by MarkupParser after parsing PI. + */ + def procInstr(pos: Int, target: String, txt: String): NodeSeq + + /** callback method invoked by MarkupParser after parsing comment. + */ + def comment(pos: Int, comment: String): NodeSeq + + /** callback method invoked by MarkupParser after parsing entity ref. + * @todo expanding entity references + */ + def entityRef(pos: Int, n: String): NodeSeq + + /** callback method invoked by MarkupParser after parsing text. + */ + def text(pos: Int, txt: String): NodeSeq + + // DTD handler methods + + def elemDecl(n: String, cmstr: String): Unit = () + + def attListDecl(name: String, attList: List[AttrDecl]): Unit = () + + private def someEntityDecl(name: String, edef: EntityDef, f: (String, EntityDef) => EntityDecl): Unit = + edef match { + case _: ExtDef if !isValidating => // ignore (cf REC-xml 4.4.1) + case _ => + val y = f(name, edef) + decls ::= y + ent.update(name, y) + } + + def parameterEntityDecl(name: String, edef: EntityDef): Unit = + someEntityDecl(name, edef, ParameterEntityDecl.apply _) + + def parsedEntityDecl(name: String, edef: EntityDef): Unit = + someEntityDecl(name, edef, ParsedEntityDecl.apply _) + + def peReference(name: String) { decls ::= PEReference(name) } + def unparsedEntityDecl(name: String, extID: ExternalID, notat: String): Unit = () + def notationDecl(notat: String, extID: ExternalID): Unit = () + def reportSyntaxError(pos: Int, str: String): Unit + + @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11") + def log(msg: String): Unit = {} +} diff --git a/src/xml/scala/xml/parsing/MarkupParser.scala b/src/xml/scala/xml/parsing/MarkupParser.scala new file mode 100755 index 0000000000..3bbd136b67 --- /dev/null +++ b/src/xml/scala/xml/parsing/MarkupParser.scala @@ -0,0 +1,938 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package parsing + +import scala.io.Source +import scala.xml.dtd._ +import Utility.Escapes.{ pairs => unescape } + +/** + * An XML parser. + * + * Parses XML 1.0, invokes callback methods of a `MarkupHandler` and returns + * whatever the markup handler returns. Use `ConstructingParser` if you just + * want to parse XML to construct instances of `scala.xml.Node`. + * + * While XML elements are returned, DTD declarations - if handled - are + * collected using side-effects. + * + * @author Burak Emir + * @version 1.0 + */ +trait MarkupParser extends MarkupParserCommon with TokenTests +{ + self: MarkupParser with MarkupHandler => + + type PositionType = Int + type InputType = Source + type ElementType = NodeSeq + type AttributesType = (MetaData, NamespaceBinding) + type NamespaceType = NamespaceBinding + + def truncatedError(msg: String): Nothing = throw FatalError(msg) + def errorNoEnd(tag: String) = throw FatalError("expected closing tag of " + tag) + + def xHandleError(that: Char, msg: String) = reportSyntaxError(msg) + + val input: Source + + /** if true, does not remove surplus whitespace */ + val preserveWS: Boolean + + def externalSource(systemLiteral: String): Source + + // + // variables, values + // + + protected var curInput: Source = input + + // See ticket #3720 for motivations. + private class WithLookAhead(underlying: Source) extends Source { + private val queue = scala.collection.mutable.Queue[Char]() + def lookahead(): BufferedIterator[Char] = { + val iter = queue.iterator ++ new Iterator[Char] { + def hasNext = underlying.hasNext + def next() = { val x = underlying.next(); queue += x; x } + } + iter.buffered + } + val iter = new Iterator[Char] { + def hasNext = underlying.hasNext || !queue.isEmpty + def next() = if (!queue.isEmpty) queue.dequeue() else underlying.next() + } + } + + def lookahead(): BufferedIterator[Char] = curInput match { + case curInputWLA:WithLookAhead => + curInputWLA.lookahead() + case _ => + val newInput = new WithLookAhead(curInput) + curInput = newInput + newInput.lookahead() + } + + + /** the handler of the markup, returns this */ + private val handle: MarkupHandler = this + + /** stack of inputs */ + var inpStack: List[Source] = Nil + + /** holds the position in the source file */ + var pos: Int = _ + + /* used when reading external subset */ + var extIndex = -1 + + /** holds temporary values of pos */ + var tmppos: Int = _ + + /** holds the next character */ + var nextChNeeded: Boolean = false + var reachedEof: Boolean = false + var lastChRead: Char = _ + def ch: Char = { + if (nextChNeeded) { + if (curInput.hasNext) { + lastChRead = curInput.next() + pos = curInput.pos + } else { + val ilen = inpStack.length + //Console.println(" ilen = "+ilen+ " extIndex = "+extIndex); + if ((ilen != extIndex) && (ilen > 0)) { + /* for external source, inpStack == Nil ! need notify of eof! */ + pop() + } else { + reachedEof = true + lastChRead = 0.asInstanceOf[Char] + } + } + nextChNeeded = false + } + lastChRead + } + + /** character buffer, for names */ + protected val cbuf = new StringBuilder() + + var dtd: DTD = null + + protected var doc: Document = null + + def eof: Boolean = { ch; reachedEof } + + // + // methods + // + + /** {{{ + * + * }}} */ + def xmlProcInstr(): MetaData = { + xToken("xml") + xSpace() + val (md,scp) = xAttributes(TopScope) + if (scp != TopScope) + reportSyntaxError("no xmlns definitions here, please.") + xToken('?') + xToken('>') + md + } + + /** Factored out common code. + */ + private def prologOrTextDecl(isProlog: Boolean): (Option[String], Option[String], Option[Boolean]) = { + var info_ver: Option[String] = None + var info_enc: Option[String] = None + var info_stdl: Option[Boolean] = None + + val m = xmlProcInstr() + var n = 0 + + if (isProlog) + xSpaceOpt() + + m("version") match { + case null => + case Text("1.0") => info_ver = Some("1.0"); n += 1 + case _ => reportSyntaxError("cannot deal with versions != 1.0") + } + + m("encoding") match { + case null => + case Text(enc) => + if (!isValidIANAEncoding(enc)) + reportSyntaxError("\"" + enc + "\" is not a valid encoding") + else { + info_enc = Some(enc) + n += 1 + } + } + + if (isProlog) { + m("standalone") match { + case null => + case Text("yes") => info_stdl = Some(true); n += 1 + case Text("no") => info_stdl = Some(false); n += 1 + case _ => reportSyntaxError("either 'yes' or 'no' expected") + } + } + + if (m.length - n != 0) { + val s = if (isProlog) "SDDecl? " else "" + reportSyntaxError("VersionInfo EncodingDecl? %sor '?>' expected!" format s) + } + + (info_ver, info_enc, info_stdl) + } + + /** {{{ + * (x1, x2) } + + /** {{{ + * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? + * [23] XMLDecl ::= '' + * [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') + * [25] Eq ::= S? '=' S? + * [26] VersionNum ::= '1.0' + * [27] Misc ::= Comment | PI | S + * }}} */ + def document(): Document = { + doc = new Document() + + this.dtd = null + var info_prolog: (Option[String], Option[String], Option[Boolean]) = (None, None, None) + if ('<' != ch) { + reportSyntaxError("< expected") + return null + } + + nextch() // is prolog ? + var children: NodeSeq = null + if ('?' == ch) { + nextch() + info_prolog = prolog() + doc.version = info_prolog._1 + doc.encoding = info_prolog._2 + doc.standAlone = info_prolog._3 + + children = content(TopScope) // DTD handled as side effect + } + else { + val ts = new NodeBuffer() + content1(TopScope, ts) // DTD handled as side effect + ts &+ content(TopScope) + children = NodeSeq.fromSeq(ts) + } + //println("[MarkupParser::document] children now: "+children.toList) + var elemCount = 0 + var theNode: Node = null + for (c <- children) c match { + case _:ProcInstr => + case _:Comment => + case _:EntityRef => // todo: fix entities, shouldn't be "special" + reportSyntaxError("no entity references allowed here") + case s:SpecialNode => + if (s.toString.trim().length > 0) //non-empty text nodes not allowed + elemCount += 2 + case m:Node => + elemCount += 1 + theNode = m + } + if (1 != elemCount) { + reportSyntaxError("document must contain exactly one element") + Console.println(children.toList) + } + + doc.children = children + doc.docElem = theNode + doc + } + + /** append Unicode character to name buffer*/ + protected def putChar(c: Char) = cbuf append c + + /** As the current code requires you to call nextch once manually + * after construction, this method formalizes that suboptimal reality. + */ + def initialize: this.type = { + nextch() + this + } + + protected def ch_returning_nextch: Char = { val res = ch; nextch(); res } + + def mkAttributes(name: String, pscope: NamespaceBinding): AttributesType = + if (isNameStart (ch)) xAttributes(pscope) + else (Null, pscope) + + def mkProcInstr(position: Int, name: String, text: String): ElementType = + handle.procInstr(position, name, text) + + /** this method tells ch to get the next character when next called */ + def nextch() { + // Read current ch if needed + ch + + // Mark next ch to be required + nextChNeeded = true + } + + /** parse attribute and create namespace scope, metadata + * {{{ + * [41] Attributes ::= { S Name Eq AttValue } + * }}} + */ + def xAttributes(pscope: NamespaceBinding): (MetaData, NamespaceBinding) = { + var scope: NamespaceBinding = pscope + var aMap: MetaData = Null + while (isNameStart(ch)) { + val qname = xName + xEQ() // side effect + val value = xAttributeValue() + + Utility.prefix(qname) match { + case Some("xmlns") => + val prefix = qname.substring(6 /*xmlns:*/ , qname.length) + scope = new NamespaceBinding(prefix, value, scope) + + case Some(prefix) => + val key = qname.substring(prefix.length+1, qname.length) + aMap = new PrefixedAttribute(prefix, key, Text(value), aMap) + + case _ => + if( qname == "xmlns" ) + scope = new NamespaceBinding(null, value, scope) + else + aMap = new UnprefixedAttribute(qname, Text(value), aMap) + } + + if ((ch != '/') && (ch != '>') && ('?' != ch)) + xSpace() + } + + if(!aMap.wellformed(scope)) + reportSyntaxError( "double attribute") + + (aMap,scope) + } + + /** entity value, terminated by either ' or ". value may not contain <. + * {{{ + * AttValue ::= `'` { _ } `'` + * | `"` { _ } `"` + * }}} + */ + def xEntityValue(): String = { + val endch = ch + nextch() + while (ch != endch && !eof) { + putChar(ch) + nextch() + } + nextch() + val str = cbuf.toString() + cbuf.length = 0 + str + } + + /** {{{ + * '"{char} ) ']]>' + * + * see [15] + * }}} */ + def xCharData: NodeSeq = { + xToken("[CDATA[") + def mkResult(pos: Int, s: String): NodeSeq = { + handle.text(pos, s) + PCData(s) + } + xTakeUntil(mkResult, () => pos, "]]>") + } + + /** {{{ + * Comment ::= '' + * + * see [15] + * }}} */ + def xComment: NodeSeq = { + val sb: StringBuilder = new StringBuilder() + xToken("--") + while (true) { + if (ch == '-' && { sb.append(ch); nextch(); ch == '-' }) { + sb.length = sb.length - 1 + nextch() + xToken('>') + return handle.comment(pos, sb.toString()) + } else sb.append(ch) + nextch() + } + throw FatalError("this cannot happen") + } + + /* todo: move this into the NodeBuilder class */ + def appendText(pos: Int, ts: NodeBuffer, txt: String): Unit = { + if (preserveWS) + ts &+ handle.text(pos, txt) + else + for (t <- TextBuffer.fromString(txt).toText) { + ts &+ handle.text(pos, t.text) + } + } + + /** {{{ + * '<' content1 ::= ... + * }}} */ + def content1(pscope: NamespaceBinding, ts: NodeBuffer) { + ch match { + case '!' => + nextch() + if ('[' == ch) // CDATA + ts &+ xCharData + else if ('D' == ch) // doctypedecl, parse DTD // @todo REMOVE HACK + parseDTD() + else // comment + ts &+ xComment + case '?' => // PI + nextch() + ts &+ xProcInstr + case _ => + ts &+ element1(pscope) // child + } + } + + /** {{{ + * content1 ::= '<' content1 | '&' charref ... + * }}} */ + def content(pscope: NamespaceBinding): NodeSeq = { + val ts = new NodeBuffer + var exit = eof + // todo: optimize seq repr. + def done = new NodeSeq { val theSeq = ts.toList } + + while (!exit) { + tmppos = pos + exit = eof + + if (eof) + return done + + ch match { + case '<' => // another tag + nextch(); ch match { + case '/' => exit = true // end tag + case _ => content1(pscope, ts) + } + + // postcond: xEmbeddedBlock == false! + case '&' => // EntityRef or CharRef + nextch(); ch match { + case '#' => // CharacterRef + nextch() + val theChar = handle.text(tmppos, xCharRef(() => ch, () => nextch())) + xToken(';') + ts &+ theChar + case _ => // EntityRef + val n = xName + xToken(';') + + if (unescape contains n) { + handle.entityRef(tmppos, n) + ts &+ unescape(n) + } else push(n) + } + case _ => // text content + appendText(tmppos, ts, xText) + } + } + done + } // content(NamespaceBinding) + + /** {{{ + * externalID ::= SYSTEM S syslit + * PUBLIC S pubid S syslit + * }}} */ + def externalID(): ExternalID = ch match { + case 'S' => + nextch() + xToken("YSTEM") + xSpace() + val sysID = systemLiteral() + new SystemID(sysID) + case 'P' => + nextch(); xToken("UBLIC") + xSpace() + val pubID = pubidLiteral() + xSpace() + val sysID = systemLiteral() + new PublicID(pubID, sysID) + } + + + /** parses document type declaration and assigns it to instance variable + * dtd. + * {{{ + * + * }}} */ + def parseDTD() { // dirty but fast + var extID: ExternalID = null + if (this.dtd ne null) + reportSyntaxError("unexpected character (DOCTYPE already defined") + xToken("DOCTYPE") + xSpace() + val n = xName + xSpace() + //external ID + if ('S' == ch || 'P' == ch) { + extID = externalID() + xSpaceOpt() + } + + /* parse external subset of DTD + */ + + if ((null != extID) && isValidating) { + + pushExternal(extID.systemId) + extIndex = inpStack.length + + extSubset() + pop() + extIndex = -1 + } + + if ('[' == ch) { // internal subset + nextch() + /* TODO */ + intSubset() + // TODO: do the DTD parsing?? ?!?!?!?!! + xToken(']') + xSpaceOpt() + } + xToken('>') + this.dtd = new DTD { + /*override var*/ externalID = extID + /*override val */decls = handle.decls.reverse + } + //this.dtd.initializeEntities(); + if (doc ne null) + doc.dtd = this.dtd + + handle.endDTD(n) + } + + def element(pscope: NamespaceBinding): NodeSeq = { + xToken('<') + element1(pscope) + } + + /** {{{ + * '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag + * | xmlTag1 '/' '>' + * }}} */ + def element1(pscope: NamespaceBinding): NodeSeq = { + val pos = this.pos + val (qname, (aMap, scope)) = xTag(pscope) + val (pre, local) = Utility.prefix(qname) match { + case Some(p) => (p, qname drop p.length+1) + case _ => (null, qname) + } + val ts = { + if (ch == '/') { // empty element + xToken("/>") + handle.elemStart(pos, pre, local, aMap, scope) + NodeSeq.Empty + } + else { // element with content + xToken('>') + handle.elemStart(pos, pre, local, aMap, scope) + val tmp = content(scope) + xEndTag(qname) + tmp + } + } + val res = handle.elem(pos, pre, local, aMap, scope, ts == NodeSeq.Empty, ts) + handle.elemEnd(pos, pre, local) + res + } + + /** Parse character data. + * + * precondition: `xEmbeddedBlock == false` (we are not in a scala block) + */ + private def xText: String = { + var exit = false + while (! exit) { + putChar(ch) + nextch() + + exit = eof || ( ch == '<' ) || ( ch == '&' ) + } + val str = cbuf.toString + cbuf.length = 0 + str + } + + /** attribute value, terminated by either ' or ". value may not contain <. + * {{{ + * AttValue ::= `'` { _ } `'` + * | `"` { _ } `"` + * }}} */ + def systemLiteral(): String = { + val endch = ch + if (ch != '\'' && ch != '"') + reportSyntaxError("quote ' or \" expected") + nextch() + while (ch != endch && !eof) { + putChar(ch) + nextch() + } + nextch() + val str = cbuf.toString() + cbuf.length = 0 + str + } + + /** {{{ + * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" + * }}} */ + def pubidLiteral(): String = { + val endch = ch + if (ch!='\'' && ch != '"') + reportSyntaxError("quote ' or \" expected") + nextch() + while (ch != endch && !eof) { + putChar(ch) + //println("hello '"+ch+"'"+isPubIDChar(ch)) + if (!isPubIDChar(ch)) + reportSyntaxError("char '"+ch+"' is not allowed in public id") + nextch() + } + nextch() + val str = cbuf.toString + cbuf.length = 0 + str + } + + // + // dtd parsing + // + + def extSubset(): Unit = { + var textdecl: (Option[String],Option[String]) = null + if (ch == '<') { + nextch() + if (ch == '?') { + nextch() + textdecl = textDecl() + } else + markupDecl1() + } + while (!eof) + markupDecl() + } + + def markupDecl1() = { + def doInclude() = { + xToken('['); while(']' != ch) markupDecl(); nextch() // ']' + } + def doIgnore() = { + xToken('['); while(']' != ch) nextch(); nextch() // ']' + } + if ('?' == ch) { + nextch() + xProcInstr // simply ignore processing instructions! + } else { + xToken('!') + ch match { + case '-' => + xComment // ignore comments + + case 'E' => + nextch() + if ('L' == ch) { + nextch() + elementDecl() + } else + entityDecl() + + case 'A' => + nextch() + attrDecl() + + case 'N' => + nextch() + notationDecl() + + case '[' if inpStack.length >= extIndex => + nextch() + xSpaceOpt() + ch match { + case '%' => + nextch() + val ent = xName + xToken(';') + xSpaceOpt() + + push(ent) + xSpaceOpt() + val stmt = xName + xSpaceOpt() + + stmt match { + // parameter entity + case "INCLUDE" => doInclude() + case "IGNORE" => doIgnore() + } + case 'I' => + nextch() + ch match { + case 'G' => + nextch() + xToken("NORE") + xSpaceOpt() + doIgnore() + case 'N' => + nextch() + xToken("NCLUDE") + doInclude() + } + } + xToken(']') + xToken('>') + + case _ => + curInput.reportError(pos, "unexpected character '"+ch+"', expected some markupdecl") + while (ch!='>') + nextch() + } + } + } + + def markupDecl(): Unit = ch match { + case '%' => // parameter entity reference + nextch() + val ent = xName + xToken(';') + if (!isValidating) + handle.peReference(ent) // n-v: just create PE-reference + else + push(ent) // v: parse replacementText + + //peReference + case '<' => + nextch() + markupDecl1() + case _ if isSpace(ch) => + xSpace() + case _ => + reportSyntaxError("markupdecl: unexpected character '"+ch+"' #" + ch.toInt) + nextch() + } + + /** "rec-xml/#ExtSubset" pe references may not occur within markup declarations + */ + def intSubset() { + //Console.println("(DEBUG) intSubset()") + xSpace() + while (']' != ch) + markupDecl() + } + + /** <! element := ELEMENT + */ + def elementDecl() { + xToken("EMENT") + xSpace() + val n = xName + xSpace() + while ('>' != ch) { + //Console.println("["+ch+"]") + putChar(ch) + nextch() + } + //Console.println("END["+ch+"]") + nextch() + val cmstr = cbuf.toString() + cbuf.length = 0 + handle.elemDecl(n, cmstr) + } + + /** {{{ + * ' != ch) { + val aname = xName + xSpace() + // could be enumeration (foo,bar) parse this later :-/ + while ('"' != ch && '\'' != ch && '#' != ch && '<' != ch) { + if (!isSpace(ch)) + cbuf.append(ch) + nextch() + } + val atpe = cbuf.toString + cbuf.length = 0 + + val defdecl: DefaultDecl = ch match { + case '\'' | '"' => + DEFAULT(fixed = false, xAttributeValue()) + + case '#' => + nextch() + xName match { + case "FIXED" => xSpace() ; DEFAULT(fixed = true, xAttributeValue()) + case "IMPLIED" => IMPLIED + case "REQUIRED" => REQUIRED + } + case _ => + null + } + xSpaceOpt() + + attList ::= AttrDecl(aname, atpe, defdecl) + cbuf.length = 0 + } + nextch() + handle.attListDecl(n, attList.reverse) + } + + /** {{{ + * //sy + val extID = externalID() + if (isParameterEntity) { + xSpaceOpt() + xToken('>') + handle.parameterEntityDecl(n, ExtDef(extID)) + } else { // notation? + xSpace() + if ('>' != ch) { + xToken("NDATA") + xSpace() + val notat = xName + xSpaceOpt() + xToken('>') + handle.unparsedEntityDecl(n, extID, notat) + } else { + nextch() + handle.parsedEntityDecl(n, ExtDef(extID)) + } + } + + case '"' | '\'' => + val av = xEntityValue() + xSpaceOpt() + xToken('>') + if (isParameterEntity) + handle.parameterEntityDecl(n, IntDef(av)) + else + handle.parsedEntityDecl(n, IntDef(av)) + } + {} + } // entityDecl + + /** {{{ + * 'N' notationDecl ::= "OTATION" + * }}} */ + def notationDecl() { + xToken("OTATION") + xSpace() + val notat = xName + xSpace() + val extID = if (ch == 'S') { + externalID() + } + else if (ch == 'P') { + /* PublicID (without system, only used in NOTATION) */ + nextch() + xToken("UBLIC") + xSpace() + val pubID = pubidLiteral() + xSpaceOpt() + val sysID = if (ch != '>') + systemLiteral() + else + null + new PublicID(pubID, sysID) + } else { + reportSyntaxError("PUBLIC or SYSTEM expected") + scala.sys.error("died parsing notationdecl") + } + xSpaceOpt() + xToken('>') + handle.notationDecl(notat, extID) + } + + def reportSyntaxError(pos: Int, str: String) { curInput.reportError(pos, str) } + def reportSyntaxError(str: String) { reportSyntaxError(pos, str) } + def reportValidationError(pos: Int, str: String) { reportSyntaxError(pos, str) } + + def push(entityName: String) { + if (!eof) + inpStack = curInput :: inpStack + + // can't push before getting next character if needed + ch + + curInput = replacementText(entityName) + nextch() + } + + def pushExternal(systemId: String) { + if (!eof) + inpStack = curInput :: inpStack + + // can't push before getting next character if needed + ch + + curInput = externalSource(systemId) + nextch() + } + + def pop() { + curInput = inpStack.head + inpStack = inpStack.tail + lastChRead = curInput.ch + nextChNeeded = false + pos = curInput.pos + reachedEof = false // must be false, because of places where entity refs occur + } +} diff --git a/src/xml/scala/xml/parsing/MarkupParserCommon.scala b/src/xml/scala/xml/parsing/MarkupParserCommon.scala new file mode 100644 index 0000000000..57c1651558 --- /dev/null +++ b/src/xml/scala/xml/parsing/MarkupParserCommon.scala @@ -0,0 +1,260 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package parsing + +import scala.io.Source +import scala.annotation.switch +import Utility.Escapes.{ pairs => unescape } + +import Utility.SU + +/** This is not a public trait - it contains common code shared + * between the library level XML parser and the compiler's. + * All members should be accessed through those. + */ +private[scala] trait MarkupParserCommon extends TokenTests { + protected def unreachable = scala.sys.error("Cannot be reached.") + + // type HandleType // MarkupHandler, SymbolicXMLBuilder + type InputType // Source, CharArrayReader + type PositionType // Int, Position + type ElementType // NodeSeq, Tree + type NamespaceType // NamespaceBinding, Any + type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree] + + def mkAttributes(name: String, pscope: NamespaceType): AttributesType + def mkProcInstr(position: PositionType, name: String, text: String): ElementType + + /** parse a start or empty tag. + * [40] STag ::= '<' Name { S Attribute } [S] + * [44] EmptyElemTag ::= '<' Name { S Attribute } [S] + */ + protected def xTag(pscope: NamespaceType): (String, AttributesType) = { + val name = xName + xSpaceOpt() + + (name, mkAttributes(name, pscope)) + } + + /** '?' {Char})]'?>' + * + * see [15] + */ + def xProcInstr: ElementType = { + val n = xName + xSpaceOpt() + xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>") + } + + /** attribute value, terminated by either `'` or `"`. value may not contain `<`. + @param endCh either `'` or `"` + */ + def xAttributeValue(endCh: Char): String = { + val buf = new StringBuilder + while (ch != endCh) { + // well-formedness constraint + if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "") + else if (ch == SU) truncatedError("") + else buf append ch_returning_nextch + } + ch_returning_nextch + // @todo: normalize attribute value + buf.toString + } + + def xAttributeValue(): String = { + val str = xAttributeValue(ch_returning_nextch) + // well-formedness constraint + normalizeAttributeValue(str) + } + + private def takeUntilChar(it: Iterator[Char], end: Char): String = { + val buf = new StringBuilder + while (it.hasNext) it.next() match { + case `end` => return buf.toString + case ch => buf append ch + } + scala.sys.error("Expected '%s'".format(end)) + } + + /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>' + */ + def xEndTag(startName: String) { + xToken('/') + if (xName != startName) + errorNoEnd(startName) + + xSpaceOpt() + xToken('>') + } + + /** actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen + * Name ::= (Letter | '_') (NameChar)* + * + * see [5] of XML 1.0 specification + * + * pre-condition: ch != ':' // assured by definition of XMLSTART token + * post-condition: name does neither start, nor end in ':' + */ + def xName: String = { + if (ch == SU) + truncatedError("") + else if (!isNameStart(ch)) + return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "") + + val buf = new StringBuilder + + do buf append ch_returning_nextch + while (isNameChar(ch)) + + if (buf.last == ':') { + reportSyntaxError( "name cannot end in ':'" ) + buf.toString dropRight 1 + } + else buf.toString + } + + private def attr_unescape(s: String) = s match { + case "lt" => "<" + case "gt" => ">" + case "amp" => "&" + case "apos" => "'" + case "quot" => "\"" + case "quote" => "\"" + case _ => "&" + s + ";" + } + + /** Replaces only character references right now. + * see spec 3.3.3 + */ + private def normalizeAttributeValue(attval: String): String = { + val buf = new StringBuilder + val it = attval.iterator.buffered + + while (it.hasNext) buf append (it.next() match { + case ' ' | '\t' | '\n' | '\r' => " " + case '&' if it.head == '#' => it.next() ; xCharRef(it) + case '&' => attr_unescape(takeUntilChar(it, ';')) + case c => c + }) + + buf.toString + } + + /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" + * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" + * + * see [66] + */ + def xCharRef(ch: () => Char, nextch: () => Unit): String = + Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _) + + def xCharRef(it: Iterator[Char]): String = { + var c = it.next() + Utility.parseCharRef(() => c, () => { c = it.next() }, reportSyntaxError _, truncatedError _) + } + + def xCharRef: String = xCharRef(() => ch, () => nextch()) + + /** Create a lookahead reader which does not influence the input */ + def lookahead(): BufferedIterator[Char] + + /** The library and compiler parsers had the interesting distinction of + * different behavior for nextch (a function for which there are a total + * of two plausible behaviors, so we know the design space was fully + * explored.) One of them returned the value of nextch before the increment + * and one of them the new value. So to unify code we have to at least + * temporarily abstract over the nextchs. + */ + def ch: Char + def nextch(): Unit + protected def ch_returning_nextch: Char + def eof: Boolean + + // def handle: HandleType + var tmppos: PositionType + + def xHandleError(that: Char, msg: String): Unit + def reportSyntaxError(str: String): Unit + def reportSyntaxError(pos: Int, str: String): Unit + + def truncatedError(msg: String): Nothing + def errorNoEnd(tag: String): Nothing + + protected def errorAndResult[T](msg: String, x: T): T = { + reportSyntaxError(msg) + x + } + + def xToken(that: Char) { + if (ch == that) nextch() + else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch)) + } + def xToken(that: Seq[Char]) { that foreach xToken } + + /** scan [S] '=' [S]*/ + def xEQ() = { xSpaceOpt(); xToken('='); xSpaceOpt() } + + /** skip optional space S? */ + def xSpaceOpt() = while (isSpace(ch) && !eof) nextch() + + /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ + def xSpace() = + if (isSpace(ch)) { nextch(); xSpaceOpt() } + else xHandleError(ch, "whitespace expected") + + /** Apply a function and return the passed value */ + def returning[T](x: T)(f: T => Unit): T = { f(x); x } + + /** Execute body with a variable saved and restored after execution */ + def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = { + val saved = getter + try body + finally setter(saved) + } + + /** Take characters from input stream until given String "until" + * is seen. Once seen, the accumulated characters are passed + * along with the current Position to the supplied handler function. + */ + protected def xTakeUntil[T]( + handler: (PositionType, String) => T, + positioner: () => PositionType, + until: String): T = + { + val sb = new StringBuilder + val head = until.head + val rest = until.tail + + while (true) { + if (ch == head && peek(rest)) + return handler(positioner(), sb.toString) + else if (ch == SU) + truncatedError("") // throws TruncatedXMLControl in compiler + + sb append ch + nextch() + } + unreachable + } + + /** Create a non-destructive lookahead reader and see if the head + * of the input would match the given String. If yes, return true + * and drop the entire String from input; if no, return false + * and leave input unchanged. + */ + private def peek(lookingFor: String): Boolean = + (lookahead() take lookingFor.length sameElements lookingFor.iterator) && { + // drop the chars from the real reader (all lookahead + orig) + (0 to lookingFor.length) foreach (_ => nextch()) + true + } +} diff --git a/src/xml/scala/xml/parsing/NoBindingFactoryAdapter.scala b/src/xml/scala/xml/parsing/NoBindingFactoryAdapter.scala new file mode 100644 index 0000000000..56ac185f47 --- /dev/null +++ b/src/xml/scala/xml/parsing/NoBindingFactoryAdapter.scala @@ -0,0 +1,37 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package parsing + +import factory.NodeFactory + +/** nobinding adaptor providing callbacks to parser to create elements. +* implements hash-consing +*/ +class NoBindingFactoryAdapter extends FactoryAdapter with NodeFactory[Elem] +{ + /** True. Every XML node may contain text that the application needs */ + def nodeContainsText(label: String) = true + + /** From NodeFactory. Constructs an instance of scala.xml.Elem */ + protected def create(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: Seq[Node]): Elem = + Elem(pre, label, attrs, scope, children: _*) + + /** From FactoryAdapter. Creates a node. never creates the same node twice, using hash-consing. */ + def createNode(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: List[Node]): Elem = + Elem(pre, label, attrs, scope, children: _*) + + /** Creates a text node. */ + def createText(text: String) = Text(text) + + /** Creates a processing instruction. */ + def createProcInstr(target: String, data: String) = makeProcInstr(target, data) +} diff --git a/src/xml/scala/xml/parsing/TokenTests.scala b/src/xml/scala/xml/parsing/TokenTests.scala new file mode 100644 index 0000000000..8dd9cdfaa3 --- /dev/null +++ b/src/xml/scala/xml/parsing/TokenTests.scala @@ -0,0 +1,101 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package parsing + +/** + * Helper functions for parsing XML fragments + */ +trait TokenTests { + + /** {{{ + * (#x20 | #x9 | #xD | #xA) + * }}} */ + final def isSpace(ch: Char): Boolean = ch match { + case '\u0009' | '\u000A' | '\u000D' | '\u0020' => true + case _ => false + } + /** {{{ + * (#x20 | #x9 | #xD | #xA)+ + * }}} */ + final def isSpace(cs: Seq[Char]): Boolean = cs.nonEmpty && (cs forall isSpace) + + /** These are 99% sure to be redundant but refactoring on the safe side. */ + def isAlpha(c: Char) = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') + def isAlphaDigit(c: Char) = isAlpha(c) || (c >= '0' && c <= '9') + + /** {{{ + * NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' + * | CombiningChar | Extender + * }}} + * See [4] and Appendix B of XML 1.0 specification. + */ + def isNameChar(ch: Char) = { + import java.lang.Character._ + // The constants represent groups Mc, Me, Mn, Lm, and Nd. + + isNameStart(ch) || (getType(ch).toByte match { + case COMBINING_SPACING_MARK | + ENCLOSING_MARK | NON_SPACING_MARK | + MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true + case _ => ".-:" contains ch + }) + } + + /** {{{ + * NameStart ::= ( Letter | '_' ) + * }}} + * where Letter means in one of the Unicode general + * categories `{ Ll, Lu, Lo, Lt, Nl }`. + * + * We do not allow a name to start with `:`. + * See [3] and Appendix B of XML 1.0 specification + */ + def isNameStart(ch: Char) = { + import java.lang.Character._ + + getType(ch).toByte match { + case LOWERCASE_LETTER | + UPPERCASE_LETTER | OTHER_LETTER | + TITLECASE_LETTER | LETTER_NUMBER => true + case _ => ch == '_' + } + } + + /** {{{ + * Name ::= ( Letter | '_' ) (NameChar)* + * }}} + * See [5] of XML 1.0 specification. + */ + def isName(s: String) = + s.nonEmpty && isNameStart(s.head) && (s.tail forall isNameChar) + + def isPubIDChar(ch: Char): Boolean = + isAlphaDigit(ch) || (isSpace(ch) && ch != '\u0009') || + ("""-\()+,./:=?;!*#@$_%""" contains ch) + + /** + * Returns `true` if the encoding name is a valid IANA encoding. + * This method does not verify that there is a decoder available + * for this encoding, only that the characters are valid for an + * IANA encoding name. + * + * @param ianaEncoding The IANA encoding name. + */ + def isValidIANAEncoding(ianaEncoding: Seq[Char]) = { + def charOK(c: Char) = isAlphaDigit(c) || ("._-" contains c) + + ianaEncoding.nonEmpty && isAlpha(ianaEncoding.head) && + (ianaEncoding.tail forall charOK) + } + + def checkSysID(s: String) = List('"', '\'') exists (c => !(s contains c)) + def checkPubID(s: String) = s forall isPubIDChar +} diff --git a/src/xml/scala/xml/parsing/ValidatingMarkupHandler.scala b/src/xml/scala/xml/parsing/ValidatingMarkupHandler.scala new file mode 100644 index 0000000000..1b20901249 --- /dev/null +++ b/src/xml/scala/xml/parsing/ValidatingMarkupHandler.scala @@ -0,0 +1,104 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + +import scala.xml.dtd._ + +abstract class ValidatingMarkupHandler extends MarkupHandler { + + var rootLabel:String = _ + var qStack: List[Int] = Nil + var qCurrent: Int = -1 + + var declStack: List[ElemDecl] = Nil + var declCurrent: ElemDecl = null + + final override val isValidating = true + + override def endDTD(n:String) = { + rootLabel = n + } + override def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope:NamespaceBinding) { + + def advanceDFA(dm:DFAContentModel) = { + val trans = dm.dfa.delta(qCurrent) + log("advanceDFA(dm): " + dm) + log("advanceDFA(trans): " + trans) + trans.get(ContentModel.ElemName(label)) match { + case Some(qNew) => qCurrent = qNew + case _ => reportValidationError(pos, "DTD says, wrong element, expected one of "+trans.keys) + } + } + // advance in current automaton + log("[qCurrent = "+qCurrent+" visiting "+label+"]") + + if (qCurrent == -1) { // root + log(" checking root") + if (label != rootLabel) + reportValidationError(pos, "this element should be "+rootLabel) + } else { + log(" checking node") + declCurrent.contentModel match { + case ANY => + case EMPTY => + reportValidationError(pos, "DTD says, no elems, no text allowed here") + case PCDATA => + reportValidationError(pos, "DTD says, no elements allowed here") + case m @ MIXED(r) => + advanceDFA(m) + case e @ ELEMENTS(r) => + advanceDFA(e) + } + } + // push state, decl + qStack = qCurrent :: qStack + declStack = declCurrent :: declStack + + declCurrent = lookupElemDecl(label) + qCurrent = 0 + log(" done now") + } + + override def elemEnd(pos: Int, pre: String, label: String) { + log(" elemEnd") + qCurrent = qStack.head + qStack = qStack.tail + declCurrent = declStack.head + declStack = declStack.tail + log(" qCurrent now" + qCurrent) + log(" declCurrent now" + declCurrent) + } + + final override def elemDecl(name: String, cmstr: String) { + decls = ElemDecl(name, ContentModel.parse(cmstr)) :: decls + } + + final override def attListDecl(name: String, attList: List[AttrDecl]) { + decls = AttListDecl(name, attList) :: decls + } + + final override def unparsedEntityDecl(name: String, extID: ExternalID, notat: String) { + decls = UnparsedEntityDecl(name, extID, notat) :: decls + } + + final override def notationDecl(notat: String, extID: ExternalID) { + decls = NotationDecl(notat, extID) :: decls + } + + final override def peReference(name: String) { + decls = PEReference(name) :: decls + } + + /** report a syntax error */ + def reportValidationError(pos: Int, str: String): Unit +} diff --git a/src/xml/scala/xml/parsing/XhtmlEntities.scala b/src/xml/scala/xml/parsing/XhtmlEntities.scala new file mode 100644 index 0000000000..3683af202c --- /dev/null +++ b/src/xml/scala/xml/parsing/XhtmlEntities.scala @@ -0,0 +1,54 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package parsing + +import scala.xml.dtd.{ IntDef, ParsedEntityDecl } + +/** + * @author (c) David Pollak 2007 WorldWide Conferencing, LLC. + * + */ +object XhtmlEntities { + val entList = List(("quot",34), ("amp",38), ("lt",60), ("gt",62), ("nbsp",160), ("iexcl",161), ("cent",162), ("pound",163), ("curren",164), ("yen",165), + ("euro",8364), ("brvbar",166), ("sect",167), ("uml",168), ("copy",169), ("ordf",170), ("laquo",171), ("shy",173), ("reg",174), ("trade",8482), + ("macr",175), ("deg",176), ("plusmn",177), ("sup2",178), ("sup3",179), ("acute",180), ("micro",181), ("para",182), ("middot",183), ("cedil",184), + ("sup1",185), ("ordm",186), ("raquo",187), ("frac14",188), ("frac12",189), ("frac34",190), ("iquest",191), ("times",215), ("divide",247), + ("Agrave",192), ("Aacute",193), ("Acirc",194), ("Atilde",195), ("Auml",196), ("Aring",197), ("AElig",198), ("Ccedil",199), ("Egrave",200), + ("Eacute",201), ("Ecirc",202), ("Euml",203), ("Igrave",204), ("Iacute",205), ("Icirc",206), ("Iuml",207), ("ETH",208), ("Ntilde",209), + ("Ograve",210), ("Oacute",211), ("Ocirc",212), ("Otilde",213), ("Ouml",214), ("Oslash",216), ("Ugrave",217), ("Uacute",218), ("Ucirc",219), + ("Uuml",220), ("Yacute",221), ("THORN",222), ("szlig",223), ("agrave",224), ("aacute",225), ("acirc",226), ("atilde",227), ("auml",228), + ("aring",229), ("aelig",230), ("ccedil",231), ("egrave",232), ("eacute",233), ("ecirc",234), ("euml",235), ("igrave",236), ("iacute",237), + ("icirc",238), ("iuml",239), ("eth",240), ("ntilde",241), ("ograve",242), ("oacute",243), ("ocirc",244), ("otilde",245), ("ouml",246), + ("oslash",248), ("ugrave",249), ("uacute",250), ("ucirc",251), ("uuml",252), ("yacute",253), ("thorn",254), ("yuml",255), ("OElig",338), + ("oelig",339), ("Scaron",352), ("scaron",353), ("Yuml",376), ("circ",710), ("ensp",8194), ("emsp",8195), ("zwnj",204), ("zwj",8205), ("lrm",8206), + ("rlm",8207), ("ndash",8211), ("mdash",8212), ("lsquo",8216), ("rsquo",8217), ("sbquo",8218), ("ldquo",8220), ("rdquo",8221), ("bdquo",8222), + ("dagger",8224), ("Dagger",8225), ("permil",8240), ("lsaquo",8249), ("rsaquo",8250), ("fnof",402), ("bull",8226), ("hellip",8230), ("prime",8242), + ("Prime",8243), ("oline",8254), ("frasl",8260), ("weierp",8472), ("image",8465), ("real",8476), ("alefsym",8501), ("larr",8592), ("uarr",8593), + ("rarr",8594), ("darr",8495), ("harr",8596), ("crarr",8629), ("lArr",8656), ("uArr",8657), ("rArr",8658), ("dArr",8659), ("hArr",8660), + ("forall",8704), ("part",8706), ("exist",8707), ("empty",8709), ("nabla",8711), ("isin",8712), ("notin",8713), ("ni",8715), ("prod",8719), + ("sum",8721), ("minus",8722), ("lowast",8727), ("radic",8730), ("prop",8733), ("infin",8734), ("ang",8736), ("and",8743), ("or",8744), + ("cap",8745), ("cup",8746), ("int",8747), ("there4",8756), ("sim",8764), ("cong",8773), ("asymp",8776), ("ne",8800), ("equiv",8801), ("le",8804), + ("ge",8805), ("sub",8834), ("sup",8835), ("nsub",8836), ("sube",8838), ("supe",8839), ("oplus",8853), ("otimes",8855), ("perp",8869), ("sdot",8901), + ("lceil",8968), ("rceil",8969), ("lfloor",8970), ("rfloor",8971), ("lang",9001), ("rang",9002), ("loz",9674), ("spades",9824), ("clubs",9827), + ("hearts",9829), ("diams",9830), ("Alpha",913), ("Beta",914), ("Gamma",915), ("Delta",916), ("Epsilon",917), ("Zeta",918), ("Eta",919), + ("Theta",920), ("Iota",921), ("Kappa",922), ("Lambda",923), ("Mu",924), ("Nu",925), ("Xi",926), ("Omicron",927), ("Pi",928), ("Rho",929), + ("Sigma",931), ("Tau",932), ("Upsilon",933), ("Phi",934), ("Chi",935), ("Psi",936), ("Omega",937), ("alpha",945), ("beta",946), ("gamma",947), + ("delta",948), ("epsilon",949), ("zeta",950), ("eta",951), ("theta",952), ("iota",953), ("kappa",954), ("lambda",955), ("mu",956), ("nu",957), + ("xi",958), ("omicron",959), ("pi",960), ("rho",961), ("sigmaf",962), ("sigma",963), ("tau",964), ("upsilon",965), ("phi",966), ("chi",967), + ("psi",968), ("omega",969), ("thetasym",977), ("upsih",978), ("piv",982)) + + val entMap: Map[String, Char] = Map.empty[String, Char] ++ entList.map { case (name, value) => (name, value.toChar)} + + val entities = entList. + map { case (name, value) => (name, new ParsedEntityDecl(name, new IntDef(value.toChar.toString)))} + + def apply() = entities +} diff --git a/src/xml/scala/xml/parsing/XhtmlParser.scala b/src/xml/scala/xml/parsing/XhtmlParser.scala new file mode 100644 index 0000000000..6ce5bec8d0 --- /dev/null +++ b/src/xml/scala/xml/parsing/XhtmlParser.scala @@ -0,0 +1,31 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package parsing + +import scala.io.Source + +/** An XML Parser that preserves `CDATA` blocks and knows about + * [[scala.xml.parsing.XhtmlEntities]]. + * + * @author (c) David Pollak, 2007 WorldWide Conferencing, LLC. + */ +class XhtmlParser(val input: Source) extends ConstructingHandler with MarkupParser with ExternalSources { + val preserveWS = true + ent ++= XhtmlEntities() +} + +/** Convenience method that instantiates, initializes and runs an `XhtmlParser`. + * + * @author Burak Emir + */ +object XhtmlParser { + def apply(source: Source): NodeSeq = new XhtmlParser(source).initialize.document() +} diff --git a/src/xml/scala/xml/persistent/CachedFileStorage.scala b/src/xml/scala/xml/persistent/CachedFileStorage.scala new file mode 100644 index 0000000000..a1489ef3f4 --- /dev/null +++ b/src/xml/scala/xml/persistent/CachedFileStorage.scala @@ -0,0 +1,129 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package persistent + +import java.io.{ File, FileOutputStream } +import java.nio.ByteBuffer +import java.nio.channels.Channels +import java.lang.Thread + +import scala.collection.Iterator + +/** Mutable storage of immutable xml trees. Everything is kept in memory, + * with a thread periodically checking for changes and writing to file. + * + * To ensure atomicity, two files are used, `filename1` and `'$'+filename1`. + * The implementation switches between the two, deleting the older one + * after a complete dump of the database has been written. + * + * @author Burak Emir + */ +abstract class CachedFileStorage(private val file1: File) extends Thread { + + private val file2 = new File(file1.getParent, file1.getName+"$") + + /** Either equals `file1` or `file2`, references the next file in which + * updates will be stored. + */ + private var theFile: File = null + + private def switch() = { theFile = if (theFile == file1) file2 else file1; } + + /** this storage modified since last modification check */ + protected var dirty = false + + /** period between modification checks, in milliseconds */ + protected val interval = 1000 + + /** finds and loads the storage file. subclasses should call this method + * prior to any other, but only once, to obtain the initial sequence of nodes. + */ + protected def initialNodes: Iterator[Node] = (file1.exists, file2.exists) match { + case (false,false) => + theFile = file1 + Iterator.empty + case (true, true ) if (file1.lastModified < file2.lastModified) => + theFile = file2 + load + case (true, _ ) => + theFile = file1 + load + case _ => + theFile = file2 + load + } + + /** returns an iterator over the nodes in this storage */ + def nodes: Iterator[Node] + + /** adds a node, setting this.dirty to true as a side effect */ + def += (e: Node): Unit + + /** removes a tree, setting this.dirty to true as a side effect */ + def -= (e: Node): Unit + + /* loads and parses XML from file */ + private def load: Iterator[Node] = { + import scala.io.Source + import scala.xml.parsing.ConstructingParser + log("[load]\nloading "+theFile) + val src = Source.fromFile(theFile) + log("parsing "+theFile) + val res = ConstructingParser.fromSource(src,preserveWS = false).document.docElem(0) + switch() + log("[load done]") + res.child.iterator + } + + /** saves the XML to file */ + private def save() = if (this.dirty) { + log("[save]\ndeleting "+theFile) + theFile.delete() + log("creating new "+theFile) + theFile.createNewFile() + val fos = new FileOutputStream(theFile) + val c = fos.getChannel() + + // @todo: optimize + val storageNode = { nodes.toList } + val w = Channels.newWriter(c, "utf-8") + XML.write(w, storageNode, "utf-8", xmlDecl = true, doctype = null) + + log("writing to "+theFile) + + w.close + c.close + fos.close + dirty = false + switch() + log("[save done]") + } + + /** Run method of the thread. remember to use `start()` to start a thread, + * not `run`. */ + override def run = { + log("[run]\nstarting storage thread, checking every "+interval+" ms") + while (true) { + Thread.sleep( this.interval.toLong ) + save() + } + } + + /** Force writing of contents to the file, even if there has not been any + * update. */ + def flush() = { + this.dirty = true + save() + } + + @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11") + def log(msg: String): Unit = {} +} diff --git a/src/xml/scala/xml/persistent/Index.scala b/src/xml/scala/xml/persistent/Index.scala new file mode 100644 index 0000000000..9ee45e7086 --- /dev/null +++ b/src/xml/scala/xml/persistent/Index.scala @@ -0,0 +1,17 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package persistent + +/** an Index returns some unique key that is part of a node + */ +abstract class Index[A] extends Function1[Node,A] {} diff --git a/src/xml/scala/xml/persistent/SetStorage.scala b/src/xml/scala/xml/persistent/SetStorage.scala new file mode 100644 index 0000000000..8db56a2e71 --- /dev/null +++ b/src/xml/scala/xml/persistent/SetStorage.scala @@ -0,0 +1,42 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package persistent + +import scala.collection.mutable +import java.io.File + +/** A persistent store with set semantics. This class allows to add and remove + * trees, but never contains two structurally equal trees. + * + * @author Burak Emir + */ +class SetStorage(file: File) extends CachedFileStorage(file) { + + private val theSet = mutable.HashSet[Node]() + + // initialize + + { + val it = super.initialNodes + dirty = it.hasNext + theSet ++= it + } + + /* forwarding methods to hashset*/ + + def += (e: Node): Unit = synchronized { this.dirty = true; theSet += e } + + def -= (e: Node): Unit = synchronized { this.dirty = true; theSet -= e } + + def nodes = synchronized { theSet.iterator } + +} diff --git a/src/xml/scala/xml/pull/XMLEvent.scala b/src/xml/scala/xml/pull/XMLEvent.scala new file mode 100644 index 0000000000..3beb3648e7 --- /dev/null +++ b/src/xml/scala/xml/pull/XMLEvent.scala @@ -0,0 +1,60 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package pull + +/** An XML event for pull parsing. All events received during + * parsing will be one of the subclasses of this trait. + */ +trait XMLEvent + +/** + * An Element's start tag was encountered. + * @param pre prefix, if any, on the element. This is the `xs` in `foo`. + * @param label the name of the element, not including the prefix + * @param attrs any attributes on the element + */ +case class EvElemStart(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding) extends XMLEvent + +/** + * An Element's end tag was encountered. + * @param pre prefix, if any, on the element. This is the `xs` in `foo`. + * @param label the name of the element, not including the prefix + */ +case class EvElemEnd(pre: String, label: String) extends XMLEvent + +/** + * A text node was encountered. + * @param text the text that was found + */ +case class EvText(text: String) extends XMLEvent + +/** An entity reference was encountered. + * @param entity the name of the entity, e.g. `gt` when encountering the entity `>` + */ +case class EvEntityRef(entity: String) extends XMLEvent + +/** + * A processing instruction was encountered. + * @param target the "PITarget" of the processing instruction. For the instruction ``, the target would + * be `foo` + * @param text the remainder of the instruction. For the instruction ``, the text would + * be `bar="baz"` + * @see [[http://www.w3.org/TR/REC-xml/#sec-pi]] + */ +case class EvProcInstr(target: String, text: String) extends XMLEvent + +/** + * A comment was encountered + * @param text the text of the comment + */ +case class EvComment(text: String) extends XMLEvent diff --git a/src/xml/scala/xml/pull/XMLEventReader.scala b/src/xml/scala/xml/pull/XMLEventReader.scala new file mode 100755 index 0000000000..76e51e17fd --- /dev/null +++ b/src/xml/scala/xml/pull/XMLEventReader.scala @@ -0,0 +1,157 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package pull + +import scala.io.Source +import java.lang.Thread +import java.util.concurrent.LinkedBlockingQueue +import java.nio.channels.ClosedChannelException +import scala.xml.parsing.{ ExternalSources, MarkupHandler, MarkupParser } + +/** + * Main entry point into creating an event-based XML parser. Treating this + * as a [[scala.collection.Iterator]] will provide access to the generated events. + * @param src A [[scala.io.Source]] for XML data to parse + * + * @author Burak Emir + * @author Paul Phillips + */ +class XMLEventReader(src: Source) +extends scala.collection.AbstractIterator[XMLEvent] + with ProducerConsumerIterator[XMLEvent] { + + // We implement a pull parser as an iterator, but since we may be operating on + // a stream (e.g. XML over a network) there may be arbitrarily long periods when + // the queue is empty. Fortunately the ProducerConsumerIterator is ideally + // suited to this task, possibly because it was written for use by this class. + + // to override as necessary + val preserveWS = true + + override val MaxQueueSize = 1000 + protected case object POISON extends XMLEvent + val EndOfStream = POISON + + // thread machinery + private[this] val parser = new Parser(src) + private[this] val parserThread = new Thread(parser, "XMLEventReader") + parserThread.start + // enqueueing the poison object is the reliable way to cause the + // iterator to terminate; hasNext will return false once it sees it. + // Calling interrupt() on the parserThread is the only way we can get + // it to stop producing tokens since it's lost deep in document() - + // we cross our fingers the interrupt() gets to its target, but if it + // fails for whatever reason the iterator correctness is not impacted, + // only performance (because it will finish the entire XML document, + // or at least as much as it can fit in the queue.) + def stop() = { + produce(POISON) + parserThread.interrupt() + } + + private class Parser(val input: Source) extends MarkupHandler with MarkupParser with ExternalSources with Runnable { + val preserveWS = XMLEventReader.this.preserveWS + // track level for elem memory usage optimization + private var level = 0 + + // this is Parser's way to add to the queue - the odd return type + // is to conform to MarkupHandler's interface + def setEvent(es: XMLEvent*): NodeSeq = { + es foreach produce + NodeSeq.Empty + } + + override def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding) { + level += 1 + setEvent(EvElemStart(pre, label, attrs, scope)) + } + override def elemEnd(pos: Int, pre: String, label: String) { + setEvent(EvElemEnd(pre, label)) + level -= 1 + } + + // this is a dummy to satisfy MarkupHandler's API + // memory usage optimization return one for top level to satisfy + // MarkupParser.document() otherwise NodeSeq.Empty + private var ignoreWritten = false + final def elem(pos: Int, pre: String, label: String, attrs: MetaData, pscope: NamespaceBinding, empty: Boolean, nodes: NodeSeq): NodeSeq = + if (level == 1 && !ignoreWritten) {ignoreWritten = true; } else NodeSeq.Empty + + def procInstr(pos: Int, target: String, txt: String) = setEvent(EvProcInstr(target, txt)) + def comment(pos: Int, txt: String) = setEvent(EvComment(txt)) + def entityRef(pos: Int, n: String) = setEvent(EvEntityRef(n)) + def text(pos: Int, txt:String) = setEvent(EvText(txt)) + + override def run() { + curInput = input + interruptibly { this.initialize.document() } + setEvent(POISON) + } + } +} + +// An iterator designed for one or more producers to generate +// elements, and a single consumer to iterate. Iteration will continue +// until closeIterator() is called, after which point producers +// calling produce() will receive interruptions. +// +// Since hasNext may block indefinitely if nobody is producing, +// there is also an available() method which will return true if +// the next call hasNext is guaranteed not to block. +// +// This is not thread-safe for multiple consumers! +trait ProducerConsumerIterator[T >: Null] extends Iterator[T] { + // abstract - iterator-specific distinguished object for marking eos + val EndOfStream: T + + // defaults to unbounded - override to positive Int if desired + val MaxQueueSize = -1 + + def interruptibly[T](body: => T): Option[T] = try Some(body) catch { + case _: InterruptedException => Thread.currentThread.interrupt(); None + case _: ClosedChannelException => None + } + + private[this] lazy val queue = + if (MaxQueueSize < 0) new LinkedBlockingQueue[T]() + else new LinkedBlockingQueue[T](MaxQueueSize) + private[this] var buffer: T = _ + private def fillBuffer() = { + buffer = interruptibly(queue.take) getOrElse EndOfStream + isElement(buffer) + } + private def isElement(x: T) = x != null && x != EndOfStream + private def eos() = buffer == EndOfStream + + // public producer interface - this is the only method producers call, so + // LinkedBlockingQueue's synchronization is all we need. + def produce(x: T): Unit = if (!eos) interruptibly(queue put x) + + // consumer/iterator interface - we need not synchronize access to buffer + // because we required there to be only one consumer. + def hasNext = !eos && (buffer != null || fillBuffer) + + def next() = { + if (eos()) throw new NoSuchElementException("ProducerConsumerIterator") + if (buffer == null) fillBuffer() + + drainBuffer() + } + + def available() = isElement(buffer) || isElement(queue.peek) + + private def drainBuffer() = { + assert(!eos) + val res = buffer + buffer = null + res + } +} diff --git a/src/xml/scala/xml/pull/package.scala b/src/xml/scala/xml/pull/package.scala new file mode 100644 index 0000000000..0e3019446b --- /dev/null +++ b/src/xml/scala/xml/pull/package.scala @@ -0,0 +1,42 @@ +package scala +package xml + +/** + * Classes needed to view an XML document as a series of events. The document + * is parsed by an [[scala.xml.pull.XMLEventReader]] instance. You can treat it as + * an [[scala.collection.Iterator]] to retrieve the events, which are all + * subclasses of [[scala.xml.pull.XMLEvent]]. + * + * {{{ + * scala> val source = Source.fromString(""" + * + * + * ]>Hello&bar;>""") + * + * source: scala.io.Source = non-empty iterator + * + * scala> val reader = new XMLEventReader(source) + * reader: scala.xml.pull.XMLEventReader = non-empty iterator + * + * scala> reader.foreach{ println(_) } + * EvProcInstr(instruction,custom value="customvalue") + * EvText( + * ) + * EvElemStart(null,foo,,) + * EvText(Hello) + * EvComment( this is a comment ) + * EvElemStart(null,bar,,) + * EvText(BAR) + * EvElemEnd(null,bar) + * EvElemStart(null,bar,,) + * EvEntityRef(gt) + * EvElemEnd(null,bar) + * EvElemEnd(null,foo) + * EvText( + * + * ) + * + * }}} + */ +package object pull diff --git a/src/xml/scala/xml/transform/BasicTransformer.scala b/src/xml/scala/xml/transform/BasicTransformer.scala new file mode 100644 index 0000000000..c98339fd67 --- /dev/null +++ b/src/xml/scala/xml/transform/BasicTransformer.scala @@ -0,0 +1,60 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package transform + +/** A class for XML transformations. + * + * @author Burak Emir + * @version 1.0 + */ +abstract class BasicTransformer extends Function1[Node,Node] +{ + protected def unchanged(n: Node, ns: Seq[Node]) = + ns.length == 1 && (ns.head == n) + + /** Call transform(Node) for each node in ns, append results + * to NodeBuffer. + */ + def transform(it: Iterator[Node], nb: NodeBuffer): Seq[Node] = + it.foldLeft(nb)(_ ++= transform(_)).toSeq + + /** Call transform(Node) to each node in ns, yield ns if nothing changes, + * otherwise a new sequence of concatenated results. + */ + def transform(ns: Seq[Node]): Seq[Node] = { + val (xs1, xs2) = ns span (n => unchanged(n, transform(n))) + + if (xs2.isEmpty) ns + else xs1 ++ transform(xs2.head) ++ transform(xs2.tail) + } + + def transform(n: Node): Seq[Node] = { + if (n.doTransform) n match { + case Group(xs) => Group(transform(xs)) // un-group the hack Group tag + case _ => + val ch = n.child + val nch = transform(ch) + + if (ch eq nch) n + else Elem(n.prefix, n.label, n.attributes, n.scope, nch: _*) + } + else n + } + + def apply(n: Node): Node = { + val seq = transform(n) + if (seq.length > 1) + throw new UnsupportedOperationException("transform must return single node for root") + else seq.head + } +} diff --git a/src/xml/scala/xml/transform/RewriteRule.scala b/src/xml/scala/xml/transform/RewriteRule.scala new file mode 100644 index 0000000000..1399ee538d --- /dev/null +++ b/src/xml/scala/xml/transform/RewriteRule.scala @@ -0,0 +1,28 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package transform + +/** A RewriteRule, when applied to a term, yields either + * the result of rewriting the term or the term itself if the rule + * is not applied. + * + * @author Burak Emir + * @version 1.0 + */ +abstract class RewriteRule extends BasicTransformer { + /** a name for this rewrite rule */ + val name = this.toString() + override def transform(ns: Seq[Node]): Seq[Node] = super.transform(ns) + override def transform(n: Node): Seq[Node] = n +} + diff --git a/src/xml/scala/xml/transform/RuleTransformer.scala b/src/xml/scala/xml/transform/RuleTransformer.scala new file mode 100644 index 0000000000..3a222ba759 --- /dev/null +++ b/src/xml/scala/xml/transform/RuleTransformer.scala @@ -0,0 +1,16 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package transform + +class RuleTransformer(rules: RewriteRule*) extends BasicTransformer { + override def transform(n: Node): Seq[Node] = + rules.foldLeft(super.transform(n)) { (res, rule) => rule transform res } +} -- cgit v1.2.3