From b672009b43f6cbfd8cb22402f4f40b9201157e23 Mon Sep 17 00:00:00 2001 From: Adriaan Moors Date: Wed, 3 Jul 2013 17:12:31 -0700 Subject: No more duplication in maven-deploy.xml. I just couldn't stand the incredible mess in there anymore. More cleanup to come. For now, suffice it to say you need only add one line per new module. --- src/build/maven/continuations-plugin-pom.xml | 62 ----- src/build/maven/maven-deploy.xml | 318 +++++++------------------- src/build/maven/plugins/continuations-pom.xml | 62 +++++ src/build/maven/scala-dotnet-library-pom.xml | 45 ---- src/build/maven/scala-library-pom.xml | 5 - src/build/pack.xml | 6 +- 6 files changed, 149 insertions(+), 349 deletions(-) delete mode 100644 src/build/maven/continuations-plugin-pom.xml create mode 100644 src/build/maven/plugins/continuations-pom.xml delete mode 100644 src/build/maven/scala-dotnet-library-pom.xml (limited to 'src') diff --git a/src/build/maven/continuations-plugin-pom.xml b/src/build/maven/continuations-plugin-pom.xml deleted file mode 100644 index 9abb0a36f0..0000000000 --- a/src/build/maven/continuations-plugin-pom.xml +++ /dev/null @@ -1,62 +0,0 @@ - - 4.0.0 - org.scala-lang.plugins - continuations - jar - @VERSION@ - Scala Continuations Plugin - Delimited continuations compilation for Scala - http://www.scala-lang.org/ - 2010 - - LAMP/EPFL - http://lamp.epfl.ch/ - - - - BSD-like - http://www.scala-lang.org/downloads/license.html - - repo - - - - scm:git:git://github.com/scala/scala.git - https://github.com/scala/scala.git - - - JIRA - https://issues.scala-lang.org/ - - - - - org.scala-lang - scala-compiler - @VERSION@ - - - - - scala-tools.org - @RELEASE_REPOSITORY@ - - - scala-tools.org - @SNAPSHOT_REPOSITORY@ - false - - - - - lamp - EPFL LAMP - - - Typesafe - Typesafe, Inc. - - - diff --git a/src/build/maven/maven-deploy.xml b/src/build/maven/maven-deploy.xml index e70173319e..bf82346b80 100644 --- a/src/build/maven/maven-deploy.xml +++ b/src/build/maven/maven-deploy.xml @@ -5,271 +5,119 @@ SuperSabbus extension for deploying a distribution to Maven. THIS FILE IS MEANT TO BE RUN STANDALONE IN THE MAVEN "distpack" DIRECTORY - + + - + - + - + + - - - - - Using server[${repository.credentials.id}] for maven repository credentials. - Please make sure that your ~/.m2/settings.xml has the needed username/password for this server id - - - - - + + + - - - - - - - - - - - - - - - + + + + + + + + - + Using server[${repository.credentials.id}] for maven repository credentials. + Please make sure that your ~/.m2/settings.xml has the needed username/password for this server id + + + + + + + - + + + Deploying ${path}-[pom.xml|src.jar|docs.jar]. + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - - + + + - - - - - - - - - - - - - + + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + diff --git a/src/build/maven/plugins/continuations-pom.xml b/src/build/maven/plugins/continuations-pom.xml new file mode 100644 index 0000000000..9abb0a36f0 --- /dev/null +++ b/src/build/maven/plugins/continuations-pom.xml @@ -0,0 +1,62 @@ + + 4.0.0 + org.scala-lang.plugins + continuations + jar + @VERSION@ + Scala Continuations Plugin + Delimited continuations compilation for Scala + http://www.scala-lang.org/ + 2010 + + LAMP/EPFL + http://lamp.epfl.ch/ + + + + BSD-like + http://www.scala-lang.org/downloads/license.html + + repo + + + + scm:git:git://github.com/scala/scala.git + https://github.com/scala/scala.git + + + JIRA + https://issues.scala-lang.org/ + + + + + org.scala-lang + scala-compiler + @VERSION@ + + + + + scala-tools.org + @RELEASE_REPOSITORY@ + + + scala-tools.org + @SNAPSHOT_REPOSITORY@ + false + + + + + lamp + EPFL LAMP + + + Typesafe + Typesafe, Inc. + + + diff --git a/src/build/maven/scala-dotnet-library-pom.xml b/src/build/maven/scala-dotnet-library-pom.xml deleted file mode 100644 index 007e8be173..0000000000 --- a/src/build/maven/scala-dotnet-library-pom.xml +++ /dev/null @@ -1,45 +0,0 @@ - - - - 4.0.0 - org.scala-lang - scala-dotnet-library - @VERSION@ - Class Library - dotnet:library - - http://www.scala-lang.org/ - 2002 - - LAMP/EPFL - http://lamp.epfl.ch/ - - - - BSD-like - http://www.scala-lang.org/downloads/license.html - - repo - - - - scm:git:git://github.com/scala/scala.git - https://github.com/scala/scala.git - - - JIRA - https://issues.scala-lang.org/ - - - - scala-tools.org - @RELEASE_REPOSITORY@ - - - scala-tools.org - @SNAPSHOT_REPOSITORY@ - false - - - diff --git a/src/build/maven/scala-library-pom.xml b/src/build/maven/scala-library-pom.xml index fc9964ae92..684474e79a 100644 --- a/src/build/maven/scala-library-pom.xml +++ b/src/build/maven/scala-library-pom.xml @@ -34,11 +34,6 @@ http://www.scala-lang.org/api/@VERSION@/ - diff --git a/src/build/pack.xml b/src/build/pack.xml index 20c4034107..f8eb3c67c5 100644 --- a/src/build/pack.xml +++ b/src/build/pack.xml @@ -170,8 +170,8 @@ MAIN DISTRIBUTION PACKAGING - - + + @@ -242,6 +242,8 @@ MAIN DISTRIBUTION PACKAGING + Date: Thu, 20 Jun 2013 11:39:47 -0700 Subject: Spin off src/library/scala/xml to src/xml/scala/xml. Summary: - Remove the last vestiges of xml from Predef and Contexts. - Change build to compile scala.xml to scala-xml.jar. - Deploy scala-xml module to maven. - Update partest accordingly. Note: An older compiler cannot use the new standard library to compile projects that use XML. Thus, skipping locker will break the build until we use 2.11.0-M4 for STARR. In the future build process, where we drop locker, we would have to release a milestone that supports the old and the new approach to xml. As soon as we'd be using that new milestone for starr, we could drop support for the old approach. --- build.xml | 81 +- src/build/bnd/scala-xml.bnd | 5 + src/build/maven/maven-deploy.xml | 1 + src/build/maven/scala-xml-pom.xml | 59 ++ src/build/pack.xml | 5 + .../scala/tools/nsc/typechecker/Contexts.scala | 21 +- src/library/scala/Predef.scala | 9 - src/library/scala/xml/Atom.scala | 47 -- src/library/scala/xml/Attribute.scala | 101 --- src/library/scala/xml/Comment.scala | 31 - src/library/scala/xml/Document.scala | 92 -- src/library/scala/xml/Elem.scala | 135 --- src/library/scala/xml/EntityRef.scala | 40 - src/library/scala/xml/Equality.scala | 107 --- src/library/scala/xml/Group.scala | 42 - .../scala/xml/MalformedAttributeException.scala | 15 - src/library/scala/xml/MetaData.scala | 217 ----- src/library/scala/xml/NamespaceBinding.scala | 83 -- src/library/scala/xml/Node.scala | 198 ----- src/library/scala/xml/NodeBuffer.scala | 47 -- src/library/scala/xml/NodeSeq.scala | 157 ---- src/library/scala/xml/Null.scala | 62 -- src/library/scala/xml/PCData.scala | 44 - src/library/scala/xml/PrefixedAttribute.scala | 61 -- src/library/scala/xml/PrettyPrinter.scala | 263 ------ src/library/scala/xml/ProcInstr.scala | 39 - src/library/scala/xml/QNode.scala | 20 - src/library/scala/xml/SpecialNode.scala | 33 - src/library/scala/xml/Text.scala | 39 - src/library/scala/xml/TextBuffer.scala | 46 - src/library/scala/xml/TopScope.scala | 31 - src/library/scala/xml/TypeSymbol.scala | 15 - src/library/scala/xml/Unparsed.scala | 36 - src/library/scala/xml/UnprefixedAttribute.scala | 61 -- src/library/scala/xml/Utility.scala | 410 --------- src/library/scala/xml/XML.scala | 109 --- src/library/scala/xml/Xhtml.scala | 97 --- src/library/scala/xml/dtd/ContentModel.scala | 118 --- src/library/scala/xml/dtd/ContentModelParser.scala | 129 --- src/library/scala/xml/dtd/DTD.scala | 35 - src/library/scala/xml/dtd/Decl.scala | 157 ---- src/library/scala/xml/dtd/DocType.scala | 39 - src/library/scala/xml/dtd/ElementValidator.scala | 132 --- src/library/scala/xml/dtd/ExternalID.scala | 86 -- src/library/scala/xml/dtd/Scanner.scala | 79 -- src/library/scala/xml/dtd/Tokens.scala | 45 - .../scala/xml/dtd/ValidationException.scala | 44 - src/library/scala/xml/dtd/impl/Base.scala | 67 -- .../scala/xml/dtd/impl/BaseBerrySethi.scala | 98 --- src/library/scala/xml/dtd/impl/DetWordAutom.scala | 50 -- src/library/scala/xml/dtd/impl/Inclusion.scala | 70 -- .../scala/xml/dtd/impl/NondetWordAutom.scala | 60 -- .../scala/xml/dtd/impl/PointedHedgeExp.scala | 37 - .../scala/xml/dtd/impl/SubsetConstruction.scala | 108 --- src/library/scala/xml/dtd/impl/SyntaxError.scala | 21 - .../scala/xml/dtd/impl/WordBerrySethi.scala | 162 ---- src/library/scala/xml/dtd/impl/WordExp.scala | 59 -- src/library/scala/xml/factory/Binder.scala | 61 -- .../scala/xml/factory/LoggedNodeFactory.scala | 90 -- src/library/scala/xml/factory/NodeFactory.scala | 61 -- src/library/scala/xml/factory/XMLLoader.scala | 61 -- .../xml/include/CircularIncludeException.scala | 25 - .../xml/include/UnavailableResourceException.scala | 20 - .../scala/xml/include/XIncludeException.scala | 58 -- .../scala/xml/include/sax/EncodingHeuristics.scala | 98 --- .../scala/xml/include/sax/XIncludeFilter.scala | 373 -------- src/library/scala/xml/include/sax/XIncluder.scala | 187 ---- src/library/scala/xml/package.scala | 19 - .../scala/xml/parsing/ConstructingHandler.scala | 34 - .../scala/xml/parsing/ConstructingParser.scala | 55 -- .../scala/xml/parsing/DefaultMarkupHandler.scala | 30 - .../scala/xml/parsing/ExternalSources.scala | 38 - src/library/scala/xml/parsing/FactoryAdapter.scala | 187 ---- src/library/scala/xml/parsing/FatalError.scala | 17 - src/library/scala/xml/parsing/MarkupHandler.scala | 127 --- src/library/scala/xml/parsing/MarkupParser.scala | 938 --------------------- .../scala/xml/parsing/MarkupParserCommon.scala | 260 ------ .../xml/parsing/NoBindingFactoryAdapter.scala | 37 - src/library/scala/xml/parsing/TokenTests.scala | 101 --- .../xml/parsing/ValidatingMarkupHandler.scala | 104 --- src/library/scala/xml/parsing/XhtmlEntities.scala | 54 -- src/library/scala/xml/parsing/XhtmlParser.scala | 31 - .../scala/xml/persistent/CachedFileStorage.scala | 129 --- src/library/scala/xml/persistent/Index.scala | 17 - src/library/scala/xml/persistent/SetStorage.scala | 42 - src/library/scala/xml/pull/XMLEvent.scala | 60 -- src/library/scala/xml/pull/XMLEventReader.scala | 157 ---- src/library/scala/xml/pull/package.scala | 42 - .../scala/xml/transform/BasicTransformer.scala | 60 -- src/library/scala/xml/transform/RewriteRule.scala | 28 - .../scala/xml/transform/RuleTransformer.scala | 16 - .../scala/tools/partest/nest/FileManager.scala | 3 +- src/xml/scala/xml/Atom.scala | 47 ++ src/xml/scala/xml/Attribute.scala | 101 +++ src/xml/scala/xml/Comment.scala | 31 + src/xml/scala/xml/Document.scala | 92 ++ src/xml/scala/xml/Elem.scala | 135 +++ src/xml/scala/xml/EntityRef.scala | 40 + src/xml/scala/xml/Equality.scala | 107 +++ src/xml/scala/xml/Group.scala | 42 + .../scala/xml/MalformedAttributeException.scala | 15 + src/xml/scala/xml/MetaData.scala | 217 +++++ src/xml/scala/xml/NamespaceBinding.scala | 83 ++ src/xml/scala/xml/Node.scala | 198 +++++ src/xml/scala/xml/NodeBuffer.scala | 47 ++ src/xml/scala/xml/NodeSeq.scala | 157 ++++ src/xml/scala/xml/Null.scala | 62 ++ src/xml/scala/xml/PCData.scala | 44 + src/xml/scala/xml/PrefixedAttribute.scala | 61 ++ src/xml/scala/xml/PrettyPrinter.scala | 263 ++++++ src/xml/scala/xml/ProcInstr.scala | 39 + src/xml/scala/xml/QNode.scala | 20 + src/xml/scala/xml/SpecialNode.scala | 33 + src/xml/scala/xml/Text.scala | 39 + src/xml/scala/xml/TextBuffer.scala | 46 + src/xml/scala/xml/TopScope.scala | 31 + src/xml/scala/xml/TypeSymbol.scala | 15 + src/xml/scala/xml/Unparsed.scala | 36 + src/xml/scala/xml/UnprefixedAttribute.scala | 61 ++ src/xml/scala/xml/Utility.scala | 410 +++++++++ src/xml/scala/xml/XML.scala | 109 +++ src/xml/scala/xml/Xhtml.scala | 97 +++ src/xml/scala/xml/dtd/ContentModel.scala | 118 +++ src/xml/scala/xml/dtd/ContentModelParser.scala | 129 +++ src/xml/scala/xml/dtd/DTD.scala | 35 + src/xml/scala/xml/dtd/Decl.scala | 157 ++++ src/xml/scala/xml/dtd/DocType.scala | 39 + src/xml/scala/xml/dtd/ElementValidator.scala | 132 +++ src/xml/scala/xml/dtd/ExternalID.scala | 86 ++ src/xml/scala/xml/dtd/Scanner.scala | 79 ++ src/xml/scala/xml/dtd/Tokens.scala | 45 + src/xml/scala/xml/dtd/ValidationException.scala | 44 + src/xml/scala/xml/dtd/impl/Base.scala | 67 ++ src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala | 98 +++ src/xml/scala/xml/dtd/impl/DetWordAutom.scala | 50 ++ src/xml/scala/xml/dtd/impl/Inclusion.scala | 70 ++ src/xml/scala/xml/dtd/impl/NondetWordAutom.scala | 60 ++ src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala | 37 + .../scala/xml/dtd/impl/SubsetConstruction.scala | 108 +++ src/xml/scala/xml/dtd/impl/SyntaxError.scala | 21 + src/xml/scala/xml/dtd/impl/WordBerrySethi.scala | 162 ++++ src/xml/scala/xml/dtd/impl/WordExp.scala | 59 ++ src/xml/scala/xml/factory/Binder.scala | 61 ++ src/xml/scala/xml/factory/LoggedNodeFactory.scala | 90 ++ src/xml/scala/xml/factory/NodeFactory.scala | 61 ++ src/xml/scala/xml/factory/XMLLoader.scala | 61 ++ .../xml/include/CircularIncludeException.scala | 25 + .../xml/include/UnavailableResourceException.scala | 20 + src/xml/scala/xml/include/XIncludeException.scala | 58 ++ .../scala/xml/include/sax/EncodingHeuristics.scala | 98 +++ src/xml/scala/xml/include/sax/XIncludeFilter.scala | 373 ++++++++ src/xml/scala/xml/include/sax/XIncluder.scala | 187 ++++ src/xml/scala/xml/package.scala | 19 + .../scala/xml/parsing/ConstructingHandler.scala | 34 + src/xml/scala/xml/parsing/ConstructingParser.scala | 55 ++ .../scala/xml/parsing/DefaultMarkupHandler.scala | 30 + src/xml/scala/xml/parsing/ExternalSources.scala | 38 + src/xml/scala/xml/parsing/FactoryAdapter.scala | 187 ++++ src/xml/scala/xml/parsing/FatalError.scala | 17 + src/xml/scala/xml/parsing/MarkupHandler.scala | 127 +++ src/xml/scala/xml/parsing/MarkupParser.scala | 938 +++++++++++++++++++++ src/xml/scala/xml/parsing/MarkupParserCommon.scala | 260 ++++++ .../xml/parsing/NoBindingFactoryAdapter.scala | 37 + src/xml/scala/xml/parsing/TokenTests.scala | 101 +++ .../xml/parsing/ValidatingMarkupHandler.scala | 104 +++ src/xml/scala/xml/parsing/XhtmlEntities.scala | 54 ++ src/xml/scala/xml/parsing/XhtmlParser.scala | 31 + .../scala/xml/persistent/CachedFileStorage.scala | 129 +++ src/xml/scala/xml/persistent/Index.scala | 17 + src/xml/scala/xml/persistent/SetStorage.scala | 42 + src/xml/scala/xml/pull/XMLEvent.scala | 60 ++ src/xml/scala/xml/pull/XMLEventReader.scala | 157 ++++ src/xml/scala/xml/pull/package.scala | 42 + src/xml/scala/xml/transform/BasicTransformer.scala | 60 ++ src/xml/scala/xml/transform/RewriteRule.scala | 28 + src/xml/scala/xml/transform/RuleTransformer.scala | 16 + test/partest | 2 +- 177 files changed, 8028 insertions(+), 7940 deletions(-) create mode 100644 src/build/bnd/scala-xml.bnd create mode 100644 src/build/maven/scala-xml-pom.xml delete mode 100644 src/library/scala/xml/Atom.scala delete mode 100644 src/library/scala/xml/Attribute.scala delete mode 100644 src/library/scala/xml/Comment.scala delete mode 100644 src/library/scala/xml/Document.scala delete mode 100755 src/library/scala/xml/Elem.scala delete mode 100644 src/library/scala/xml/EntityRef.scala delete mode 100644 src/library/scala/xml/Equality.scala delete mode 100644 src/library/scala/xml/Group.scala delete mode 100644 src/library/scala/xml/MalformedAttributeException.scala delete mode 100644 src/library/scala/xml/MetaData.scala delete mode 100644 src/library/scala/xml/NamespaceBinding.scala delete mode 100755 src/library/scala/xml/Node.scala delete mode 100644 src/library/scala/xml/NodeBuffer.scala delete mode 100644 src/library/scala/xml/NodeSeq.scala delete mode 100644 src/library/scala/xml/Null.scala delete mode 100644 src/library/scala/xml/PCData.scala delete mode 100644 src/library/scala/xml/PrefixedAttribute.scala delete mode 100755 src/library/scala/xml/PrettyPrinter.scala delete mode 100644 src/library/scala/xml/ProcInstr.scala delete mode 100644 src/library/scala/xml/QNode.scala delete mode 100644 src/library/scala/xml/SpecialNode.scala delete mode 100644 src/library/scala/xml/Text.scala delete mode 100644 src/library/scala/xml/TextBuffer.scala delete mode 100644 src/library/scala/xml/TopScope.scala delete mode 100644 src/library/scala/xml/TypeSymbol.scala delete mode 100644 src/library/scala/xml/Unparsed.scala delete mode 100644 src/library/scala/xml/UnprefixedAttribute.scala delete mode 100755 src/library/scala/xml/Utility.scala delete mode 100755 src/library/scala/xml/XML.scala delete mode 100644 src/library/scala/xml/Xhtml.scala delete mode 100644 src/library/scala/xml/dtd/ContentModel.scala delete mode 100644 src/library/scala/xml/dtd/ContentModelParser.scala delete mode 100644 src/library/scala/xml/dtd/DTD.scala delete mode 100644 src/library/scala/xml/dtd/Decl.scala delete mode 100644 src/library/scala/xml/dtd/DocType.scala delete mode 100644 src/library/scala/xml/dtd/ElementValidator.scala delete mode 100644 src/library/scala/xml/dtd/ExternalID.scala delete mode 100644 src/library/scala/xml/dtd/Scanner.scala delete mode 100644 src/library/scala/xml/dtd/Tokens.scala delete mode 100644 src/library/scala/xml/dtd/ValidationException.scala delete mode 100644 src/library/scala/xml/dtd/impl/Base.scala delete mode 100644 src/library/scala/xml/dtd/impl/BaseBerrySethi.scala delete mode 100644 src/library/scala/xml/dtd/impl/DetWordAutom.scala delete mode 100644 src/library/scala/xml/dtd/impl/Inclusion.scala delete mode 100644 src/library/scala/xml/dtd/impl/NondetWordAutom.scala delete mode 100644 src/library/scala/xml/dtd/impl/PointedHedgeExp.scala delete mode 100644 src/library/scala/xml/dtd/impl/SubsetConstruction.scala delete mode 100644 src/library/scala/xml/dtd/impl/SyntaxError.scala delete mode 100644 src/library/scala/xml/dtd/impl/WordBerrySethi.scala delete mode 100644 src/library/scala/xml/dtd/impl/WordExp.scala delete mode 100755 src/library/scala/xml/factory/Binder.scala delete mode 100644 src/library/scala/xml/factory/LoggedNodeFactory.scala delete mode 100644 src/library/scala/xml/factory/NodeFactory.scala delete mode 100644 src/library/scala/xml/factory/XMLLoader.scala delete mode 100644 src/library/scala/xml/include/CircularIncludeException.scala delete mode 100644 src/library/scala/xml/include/UnavailableResourceException.scala delete mode 100644 src/library/scala/xml/include/XIncludeException.scala delete mode 100644 src/library/scala/xml/include/sax/EncodingHeuristics.scala delete mode 100644 src/library/scala/xml/include/sax/XIncludeFilter.scala delete mode 100644 src/library/scala/xml/include/sax/XIncluder.scala delete mode 100644 src/library/scala/xml/package.scala delete mode 100755 src/library/scala/xml/parsing/ConstructingHandler.scala delete mode 100644 src/library/scala/xml/parsing/ConstructingParser.scala delete mode 100755 src/library/scala/xml/parsing/DefaultMarkupHandler.scala delete mode 100644 src/library/scala/xml/parsing/ExternalSources.scala delete mode 100644 src/library/scala/xml/parsing/FactoryAdapter.scala delete mode 100644 src/library/scala/xml/parsing/FatalError.scala delete mode 100755 src/library/scala/xml/parsing/MarkupHandler.scala delete mode 100755 src/library/scala/xml/parsing/MarkupParser.scala delete mode 100644 src/library/scala/xml/parsing/MarkupParserCommon.scala delete mode 100644 src/library/scala/xml/parsing/NoBindingFactoryAdapter.scala delete mode 100644 src/library/scala/xml/parsing/TokenTests.scala delete mode 100644 src/library/scala/xml/parsing/ValidatingMarkupHandler.scala delete mode 100644 src/library/scala/xml/parsing/XhtmlEntities.scala delete mode 100644 src/library/scala/xml/parsing/XhtmlParser.scala delete mode 100644 src/library/scala/xml/persistent/CachedFileStorage.scala delete mode 100644 src/library/scala/xml/persistent/Index.scala delete mode 100644 src/library/scala/xml/persistent/SetStorage.scala delete mode 100644 src/library/scala/xml/pull/XMLEvent.scala delete mode 100755 src/library/scala/xml/pull/XMLEventReader.scala delete mode 100644 src/library/scala/xml/pull/package.scala delete mode 100644 src/library/scala/xml/transform/BasicTransformer.scala delete mode 100644 src/library/scala/xml/transform/RewriteRule.scala delete mode 100644 src/library/scala/xml/transform/RuleTransformer.scala create mode 100644 src/xml/scala/xml/Atom.scala create mode 100644 src/xml/scala/xml/Attribute.scala create mode 100644 src/xml/scala/xml/Comment.scala create mode 100644 src/xml/scala/xml/Document.scala create mode 100755 src/xml/scala/xml/Elem.scala create mode 100644 src/xml/scala/xml/EntityRef.scala create mode 100644 src/xml/scala/xml/Equality.scala create mode 100644 src/xml/scala/xml/Group.scala create mode 100644 src/xml/scala/xml/MalformedAttributeException.scala create mode 100644 src/xml/scala/xml/MetaData.scala create mode 100644 src/xml/scala/xml/NamespaceBinding.scala create mode 100755 src/xml/scala/xml/Node.scala create mode 100644 src/xml/scala/xml/NodeBuffer.scala create mode 100644 src/xml/scala/xml/NodeSeq.scala create mode 100644 src/xml/scala/xml/Null.scala create mode 100644 src/xml/scala/xml/PCData.scala create mode 100644 src/xml/scala/xml/PrefixedAttribute.scala create mode 100755 src/xml/scala/xml/PrettyPrinter.scala create mode 100644 src/xml/scala/xml/ProcInstr.scala create mode 100644 src/xml/scala/xml/QNode.scala create mode 100644 src/xml/scala/xml/SpecialNode.scala create mode 100644 src/xml/scala/xml/Text.scala create mode 100644 src/xml/scala/xml/TextBuffer.scala create mode 100644 src/xml/scala/xml/TopScope.scala create mode 100644 src/xml/scala/xml/TypeSymbol.scala create mode 100644 src/xml/scala/xml/Unparsed.scala create mode 100644 src/xml/scala/xml/UnprefixedAttribute.scala create mode 100755 src/xml/scala/xml/Utility.scala create mode 100755 src/xml/scala/xml/XML.scala create mode 100644 src/xml/scala/xml/Xhtml.scala create mode 100644 src/xml/scala/xml/dtd/ContentModel.scala create mode 100644 src/xml/scala/xml/dtd/ContentModelParser.scala create mode 100644 src/xml/scala/xml/dtd/DTD.scala create mode 100644 src/xml/scala/xml/dtd/Decl.scala create mode 100644 src/xml/scala/xml/dtd/DocType.scala create mode 100644 src/xml/scala/xml/dtd/ElementValidator.scala create mode 100644 src/xml/scala/xml/dtd/ExternalID.scala create mode 100644 src/xml/scala/xml/dtd/Scanner.scala create mode 100644 src/xml/scala/xml/dtd/Tokens.scala create mode 100644 src/xml/scala/xml/dtd/ValidationException.scala create mode 100644 src/xml/scala/xml/dtd/impl/Base.scala create mode 100644 src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala create mode 100644 src/xml/scala/xml/dtd/impl/DetWordAutom.scala create mode 100644 src/xml/scala/xml/dtd/impl/Inclusion.scala create mode 100644 src/xml/scala/xml/dtd/impl/NondetWordAutom.scala create mode 100644 src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala create mode 100644 src/xml/scala/xml/dtd/impl/SubsetConstruction.scala create mode 100644 src/xml/scala/xml/dtd/impl/SyntaxError.scala create mode 100644 src/xml/scala/xml/dtd/impl/WordBerrySethi.scala create mode 100644 src/xml/scala/xml/dtd/impl/WordExp.scala create mode 100755 src/xml/scala/xml/factory/Binder.scala create mode 100644 src/xml/scala/xml/factory/LoggedNodeFactory.scala create mode 100644 src/xml/scala/xml/factory/NodeFactory.scala create mode 100644 src/xml/scala/xml/factory/XMLLoader.scala create mode 100644 src/xml/scala/xml/include/CircularIncludeException.scala create mode 100644 src/xml/scala/xml/include/UnavailableResourceException.scala create mode 100644 src/xml/scala/xml/include/XIncludeException.scala create mode 100644 src/xml/scala/xml/include/sax/EncodingHeuristics.scala create mode 100644 src/xml/scala/xml/include/sax/XIncludeFilter.scala create mode 100644 src/xml/scala/xml/include/sax/XIncluder.scala create mode 100644 src/xml/scala/xml/package.scala create mode 100755 src/xml/scala/xml/parsing/ConstructingHandler.scala create mode 100644 src/xml/scala/xml/parsing/ConstructingParser.scala create mode 100755 src/xml/scala/xml/parsing/DefaultMarkupHandler.scala create mode 100644 src/xml/scala/xml/parsing/ExternalSources.scala create mode 100644 src/xml/scala/xml/parsing/FactoryAdapter.scala create mode 100644 src/xml/scala/xml/parsing/FatalError.scala create mode 100755 src/xml/scala/xml/parsing/MarkupHandler.scala create mode 100755 src/xml/scala/xml/parsing/MarkupParser.scala create mode 100644 src/xml/scala/xml/parsing/MarkupParserCommon.scala create mode 100644 src/xml/scala/xml/parsing/NoBindingFactoryAdapter.scala create mode 100644 src/xml/scala/xml/parsing/TokenTests.scala create mode 100644 src/xml/scala/xml/parsing/ValidatingMarkupHandler.scala create mode 100644 src/xml/scala/xml/parsing/XhtmlEntities.scala create mode 100644 src/xml/scala/xml/parsing/XhtmlParser.scala create mode 100644 src/xml/scala/xml/persistent/CachedFileStorage.scala create mode 100644 src/xml/scala/xml/persistent/Index.scala create mode 100644 src/xml/scala/xml/persistent/SetStorage.scala create mode 100644 src/xml/scala/xml/pull/XMLEvent.scala create mode 100755 src/xml/scala/xml/pull/XMLEventReader.scala create mode 100644 src/xml/scala/xml/pull/package.scala create mode 100644 src/xml/scala/xml/transform/BasicTransformer.scala create mode 100644 src/xml/scala/xml/transform/RewriteRule.scala create mode 100644 src/xml/scala/xml/transform/RuleTransformer.scala (limited to 'src') diff --git a/build.xml b/build.xml index 2af335d6ab..854bb9c68b 100755 --- a/build.xml +++ b/build.xml @@ -472,7 +472,7 @@ TODO: There must be a variable of the shape @{stage}.@{project}.build.path for all @{stage} in locker, quick, strap and all @{project} in library, reflect, compiler - when stage is quick, @{project} also includes: actors, repl, swing, plugins, scalacheck, interactive, scaladoc, partest, scalap + when stage is quick, @{project} also includes: actors, repl, xml, swing, plugins, scalacheck, interactive, scaladoc, partest, scalap --> @@ -527,9 +527,13 @@ TODO: + + + + + - @@ -548,17 +552,19 @@ TODO: - + + + @@ -571,6 +577,7 @@ TODO: + @@ -578,21 +585,18 @@ TODO: - - - - - + + @@ -618,6 +622,7 @@ TODO: + @@ -656,6 +661,19 @@ TODO: + + + + + + + + + + + + + @@ -671,6 +689,10 @@ TODO: + + + + @@ -1070,7 +1092,7 @@ TODO: doctitle="@{title}" docversion="${version.number}" sourcepath="${src.dir}" - classpathref="pack.compiler.path" + classpathref="docs.compiler.path" srcdir="${src.dir}/@{dir}" addparams="${scalac.args.all}" implicits="on" @@ -1086,7 +1108,7 @@ TODO: doctitle="@{title}" docversion="${version.number}" sourcepath="${src.dir}" - classpathref="pack.compiler.path" + classpathref="docs.compiler.path" srcdir="${src.dir}/@{dir}" docRootContent="${src.dir}/@{project}/@{docroot}" addparams="${scalac.args.all}" @@ -1155,14 +1177,17 @@ TODO: - + - - + + - + + + + @@ -1200,7 +1225,7 @@ TODO: - + @@ -1215,6 +1240,7 @@ TODO: + @@ -1251,7 +1277,7 @@ TODO: - + @@ -1264,7 +1290,7 @@ TODO: from="${partest.extras.versions}" to="flatten"/> - + @@ -1358,13 +1384,14 @@ TODO: + - - + + @@ -1641,7 +1668,7 @@ TODO: docversion="${version.number}" docsourceurl="${scaladoc.url}€{FILE_PATH}.scala#L1" sourcepath="${src.dir}" - classpathref="pack.compiler.path" + classpathref="docs.compiler.path" addparams="${scalac.args.all}" docRootContent="${src.dir}/library/rootdoc.txt" implicits="on" @@ -1674,6 +1701,12 @@ TODO: + + + + + + @@ -1712,7 +1745,7 @@ TODO: @@ -1742,8 +1775,7 @@ TODO: - - + + @@ -1846,6 +1879,7 @@ TODO: + @@ -1896,6 +1930,7 @@ TODO: + diff --git a/src/build/bnd/scala-xml.bnd b/src/build/bnd/scala-xml.bnd new file mode 100644 index 0000000000..6203c57dfe --- /dev/null +++ b/src/build/bnd/scala-xml.bnd @@ -0,0 +1,5 @@ +Bundle-Name: Scala XML Library +Bundle-SymbolicName: org.scala-lang.scala-xml +ver: @VERSION@ +Bundle-Version: ${ver} +Export-Package: *;version=${ver} diff --git a/src/build/maven/maven-deploy.xml b/src/build/maven/maven-deploy.xml index bf82346b80..a51562103c 100644 --- a/src/build/maven/maven-deploy.xml +++ b/src/build/maven/maven-deploy.xml @@ -108,6 +108,7 @@ + diff --git a/src/build/maven/scala-xml-pom.xml b/src/build/maven/scala-xml-pom.xml new file mode 100644 index 0000000000..629872c2e2 --- /dev/null +++ b/src/build/maven/scala-xml-pom.xml @@ -0,0 +1,59 @@ + + 4.0.0 + org.scala-lang + scala-xml + jar + @VERSION@ + Scala XML + XML Library for the Scala Programming Language + http://www.scala-lang.org/ + 2002 + + LAMP/EPFL + http://lamp.epfl.ch/ + + + + BSD-like + http://www.scala-lang.org/downloads/license.html + + repo + + + + scm:git:git://github.com/scala/scala.git + https://github.com/scala/scala.git + + + JIRA + https://issues.scala-lang.org/ + + + http://www.scala-lang.org/api/@VERSION@/ + + + + + + scala-tools.org + @RELEASE_REPOSITORY@ + + + scala-tools.org + @SNAPSHOT_REPOSITORY@ + false + + + + + lamp + EPFL LAMP + + + Typesafe + Typesafe, Inc. + + + diff --git a/src/build/pack.xml b/src/build/pack.xml index f8eb3c67c5..44198adb1e 100644 --- a/src/build/pack.xml +++ b/src/build/pack.xml @@ -153,6 +153,7 @@ MAIN DISTRIBUTION PACKAGING + @@ -203,6 +204,10 @@ MAIN DISTRIBUTION PACKAGING basedir="${build-docs.dir}/library"> + + + diff --git a/src/compiler/scala/tools/nsc/typechecker/Contexts.scala b/src/compiler/scala/tools/nsc/typechecker/Contexts.scala index 1f4ff7cc2d..1f8f13ae02 100644 --- a/src/compiler/scala/tools/nsc/typechecker/Contexts.scala +++ b/src/compiler/scala/tools/nsc/typechecker/Contexts.scala @@ -99,22 +99,13 @@ trait Contexts { self: Analyzer => // there must be a scala.xml package when xml literals were parsed in this unit if (unit.hasXml && ScalaXmlPackage == NoSymbol) - unit.error(unit.firstXmlPos, "XML literals may only be used if the package scala.xml is present in the compilation classpath.") - - // TODO: remove the def below and drop `|| predefDefinesDollarScope` in the condition for `contextWithXML` - // as soon as 2.11.0-M4 is released and used as STARR (and $scope is no longer defined in Predef) - // Until then, to allow compiling quick with pre-2.11.0-M4 STARR, - // which relied on Predef defining `val $scope`, we've left it in place. - // Since the new scheme also imports $scope (as an alias for scala.xml.TopScope), - // we must check whether it is still there and not import the alias to avoid ambiguity. - // (All of this is only necessary to compile the full quick stage with STARR. - // if using locker, Predef.$scope is no longer needed.) - def predefDefinesDollarScope = definitions.getMemberIfDefined(PredefModule, nme.dollarScope) != NoSymbol - - // hack for the old xml library (detected by looking for scala.xml.TopScope, which needs to be in scope as $scope) - // import scala.xml.{TopScope => $scope} + unit.error(unit.firstXmlPos, "To compile XML syntax, the scala.xml package must be on the classpath.\nPlease see https://github.com/scala/scala/wiki/Scala-2.11#xml.") + + // scala-xml needs `scala.xml.TopScope` to be in scope globally as `$scope` + // We detect `scala-xml` by looking for `scala.xml.TopScope` and + // inject the equivalent of `import scala.xml.{TopScope => $scope}` val contextWithXML = - if (!unit.hasXml || ScalaXmlTopScope == NoSymbol || predefDefinesDollarScope) rootImportsContext + if (!unit.hasXml || ScalaXmlTopScope == NoSymbol) rootImportsContext else rootImportsContext.make(gen.mkImport(ScalaXmlPackage, nme.TopScope, nme.dollarScope)) val c = contextWithXML.make(tree, unit = unit) diff --git a/src/library/scala/Predef.scala b/src/library/scala/Predef.scala index a188602543..3b588e261f 100644 --- a/src/library/scala/Predef.scala +++ b/src/library/scala/Predef.scala @@ -134,15 +134,6 @@ object Predef extends LowPriorityImplicits with DeprecatedPredef { @inline def implicitly[T](implicit e: T) = e // for summoning implicit values from the nether world -- TODO: when dependent method types are on by default, give this result type `e.type`, so that inliner has better chance of knowing which method to inline in calls like `implicitly[MatchingStrategy[Option]].zero` @inline def locally[T](x: T): T = x // to communicate intent and avoid unmoored statements - // TODO: remove `val $scope = ...` as soon as 2.11.0-M4 is released and used as STARR - // As it has a '$' in its name, we don't have to deprecate first. - // The compiler now aliases `scala.xml.TopScope` to `$scope` (unless Predef.$scope is still there). - // This definition left in place for older compilers and to compile quick with pre-2.11.0-M4 STARR. - // In principle we don't need it to compile library/reflect/compiler (there's no xml left there), - // so a new locker can be built without this definition, and locker can build quick - // (partest, scaladoc still require xml). - val $scope = scala.xml.TopScope - // errors and asserts ------------------------------------------------- // !!! Remove this when possible - ideally for 2.11. diff --git a/src/library/scala/xml/Atom.scala b/src/library/scala/xml/Atom.scala deleted file mode 100644 index 33e58ba7e7..0000000000 --- a/src/library/scala/xml/Atom.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** The class `Atom` provides an XML node for text (`PCDATA`). - * It is used in both non-bound and bound XML representations. - * - * @author Burak Emir - * @param data the text contained in this node, may not be `'''null'''`. - */ -class Atom[+A](val data: A) extends SpecialNode with Serializable { - if (data == null) - throw new IllegalArgumentException("cannot construct "+getClass.getSimpleName+" with null") - - override protected def basisForHashCode: Seq[Any] = Seq(data) - - override def strict_==(other: Equality) = other match { - case x: Atom[_] => data == x.data - case _ => false - } - - override def canEqual(other: Any) = other match { - case _: Atom[_] => true - case _ => false - } - - final override def doCollectNamespaces = false - final override def doTransform = false - - def label = "#PCDATA" - - /** Returns text, with some characters escaped according to the XML - * specification. - */ - def buildString(sb: StringBuilder): StringBuilder = - Utility.escape(data.toString, sb) - - override def text: String = data.toString - -} diff --git a/src/library/scala/xml/Attribute.scala b/src/library/scala/xml/Attribute.scala deleted file mode 100644 index e4b2b69fc6..0000000000 --- a/src/library/scala/xml/Attribute.scala +++ /dev/null @@ -1,101 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** This singleton object contains the `apply` and `unapply` methods for - * convenient construction and deconstruction. - * - * @author Burak Emir - * @version 1.0 - */ -object Attribute { - def unapply(x: Attribute) = x match { - case PrefixedAttribute(_, key, value, next) => Some((key, value, next)) - case UnprefixedAttribute(key, value, next) => Some((key, value, next)) - case _ => None - } - - /** Convenience functions which choose Un/Prefixedness appropriately */ - def apply(key: String, value: Seq[Node], next: MetaData): Attribute = - new UnprefixedAttribute(key, value, next) - - def apply(pre: String, key: String, value: String, next: MetaData): Attribute = - if (pre == null || pre == "") new UnprefixedAttribute(key, value, next) - else new PrefixedAttribute(pre, key, value, next) - - def apply(pre: String, key: String, value: Seq[Node], next: MetaData): Attribute = - if (pre == null || pre == "") new UnprefixedAttribute(key, value, next) - else new PrefixedAttribute(pre, key, value, next) - - def apply(pre: Option[String], key: String, value: Seq[Node], next: MetaData): Attribute = - pre match { - case None => new UnprefixedAttribute(key, value, next) - case Some(p) => new PrefixedAttribute(p, key, value, next) - } -} - -/** The `Attribute` trait defines the interface shared by both - * [[scala.xml.PrefixedAttribute]] and [[scala.xml.UnprefixedAttribute]]. - * - * @author Burak Emir - * @version 1.0 - */ -abstract trait Attribute extends MetaData { - def pre: String // will be null if unprefixed - val key: String - val value: Seq[Node] - val next: MetaData - - def apply(key: String): Seq[Node] - def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] - def copy(next: MetaData): Attribute - - def remove(key: String) = - if (!isPrefixed && this.key == key) next - else copy(next remove key) - - def remove(namespace: String, scope: NamespaceBinding, key: String) = - if (this.key == key && (scope getURI pre) == namespace) next - else copy(next.remove(namespace, scope, key)) - - def isPrefixed: Boolean = pre != null - - def getNamespace(owner: Node): String - - def wellformed(scope: NamespaceBinding): Boolean = { - val arg = if (isPrefixed) scope getURI pre else null - (next(arg, scope, key) == null) && (next wellformed scope) - } - - /** Returns an iterator on attributes */ - override def iterator: Iterator[MetaData] = { - if (value == null) next.iterator - else Iterator.single(this) ++ next.iterator - } - - override def size: Int = { - if (value == null) next.size - else 1 + next.size - } - - /** Appends string representation of only this attribute to stringbuffer. - */ - protected def toString1(sb: StringBuilder) { - if (value == null) - return - if (isPrefixed) - sb append pre append ':' - - sb append key append '=' - val sb2 = new StringBuilder() - Utility.sequenceToXML(value, TopScope, sb2, stripComments = true) - Utility.appendQuoted(sb2.toString, sb) - } -} diff --git a/src/library/scala/xml/Comment.scala b/src/library/scala/xml/Comment.scala deleted file mode 100644 index b8dccdcb16..0000000000 --- a/src/library/scala/xml/Comment.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** The class `Comment` implements an XML node for comments. - * - * @author Burak Emir - * @param commentText the text contained in this node, may not contain "--" - */ -case class Comment(commentText: String) extends SpecialNode { - - def label = "#REM" - override def text = "" - final override def doCollectNamespaces = false - final override def doTransform = false - - if (commentText contains "--") - throw new IllegalArgumentException("text contains \"--\"") - - /** Appends "" to this string buffer. - */ - override def buildString(sb: StringBuilder) = - sb append "" -} diff --git a/src/library/scala/xml/Document.scala b/src/library/scala/xml/Document.scala deleted file mode 100644 index 9a725014fc..0000000000 --- a/src/library/scala/xml/Document.scala +++ /dev/null @@ -1,92 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** A document information item (according to InfoSet spec). The comments - * are copied from the Infoset spec, only augmented with some information - * on the Scala types for definitions that might have no value. - * Also plays the role of an `XMLEvent` for pull parsing. - * - * @author Burak Emir - * @version 1.0, 26/04/2005 - */ -@SerialVersionUID(-2289320563321795109L) -class Document extends NodeSeq with pull.XMLEvent with Serializable { - - /** An ordered list of child information items, in document - * order. The list contains exactly one element information item. The - * list also contains one processing instruction information item for - * each processing instruction outside the document element, and one - * comment information item for each comment outside the document - * element. Processing instructions and comments within the DTD are - * excluded. If there is a document type declaration, the list also - * contains a document type declaration information item. - */ - var children: Seq[Node] = _ - - /** The element information item corresponding to the document element. */ - var docElem: Node = _ - - /** The dtd that comes with the document, if any */ - var dtd: scala.xml.dtd.DTD = _ - - /** An unordered set of notation information items, one for each notation - * declared in the DTD. If any notation is multiply declared, this property - * has no value. - */ - def notations: Seq[scala.xml.dtd.NotationDecl] = - dtd.notations - - /** An unordered set of unparsed entity information items, one for each - * unparsed entity declared in the DTD. - */ - def unparsedEntities: Seq[scala.xml.dtd.EntityDecl] = - dtd.unparsedEntities - - /** The base URI of the document entity. */ - var baseURI: String = _ - - /** The name of the character encoding scheme in which the document entity - * is expressed. - */ - var encoding: Option[String] = _ - - /** An indication of the standalone status of the document, either - * true or false. This property is derived from the optional standalone - * document declaration in the XML declaration at the beginning of the - * document entity, and has no value (`None`) if there is no - * standalone document declaration. - */ - var standAlone: Option[Boolean] = _ - - /** A string representing the XML version of the document. This - * property is derived from the XML declaration optionally present at - * the beginning of the document entity, and has no value (`None`) - * if there is no XML declaration. - */ - var version: Option[String] = _ - - /** 9. This property is not strictly speaking part of the infoset of - * the document. Rather it is an indication of whether the processor - * has read the complete DTD. Its value is a boolean. If it is false, - * then certain properties (indicated in their descriptions below) may - * be unknown. If it is true, those properties are never unknown. - */ - var allDeclarationsProcessed = false - - // methods for NodeSeq - - def theSeq: Seq[Node] = this.docElem - - override def canEqual(other: Any) = other match { - case _: Document => true - case _ => false - } -} diff --git a/src/library/scala/xml/Elem.scala b/src/library/scala/xml/Elem.scala deleted file mode 100755 index 484cf98744..0000000000 --- a/src/library/scala/xml/Elem.scala +++ /dev/null @@ -1,135 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** This singleton object contains the `apply` and `unapplySeq` methods for - * convenient construction and deconstruction. It is possible to deconstruct - * any `Node` instance (that is not a `SpecialNode` or a `Group`) using the - * syntax `case Elem(prefix, label, attribs, scope, child @ _*) => ...` - * - * Copyright 2008 Google Inc. All Rights Reserved. - * @author Burak Emir - */ -object Elem { - /** Build an Elem, setting its minimizeEmpty property to `true` if it has no children. Note that this - * default may not be exactly what you want, as some XML dialects don't permit some elements to be minimized. - * - * @deprecated This factory method is retained for backward compatibility; please use the other one, with which you - * can specify your own preference for minimizeEmpty. - */ - @deprecated("Use the other apply method in this object", "2.10.0") - def apply(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, child: Node*): Elem = - apply(prefix, label, attributes, scope, child.isEmpty, child: _*) - - def apply(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, minimizeEmpty: Boolean, child: Node*): Elem = - new Elem(prefix, label, attributes, scope, minimizeEmpty, child: _*) - - def unapplySeq(n: Node) = n match { - case _: SpecialNode | _: Group => None - case _ => Some((n.prefix, n.label, n.attributes, n.scope, n.child)) - } - - import scala.sys.process._ - /** Implicitly convert a [[scala.xml.Elem]] into a - * [[scala.sys.process.ProcessBuilder]]. This is done by obtaining the text - * elements of the element, trimming spaces, and then converting the result - * from string to a process. Importantly, tags are completely ignored, so - * they cannot be used to separate parameters. - */ - @deprecated("To create a scala.sys.process.Process from an xml.Elem, please use Process(elem.text.trim).", "2.11.0") - implicit def xmlToProcess(command: scala.xml.Elem): ProcessBuilder = Process(command.text.trim) - - @deprecated("To create a scala.sys.process.Process from an xml.Elem, please use Process(elem.text.trim).", "2.11.0") - implicit def processXml(p: Process.type) = new { - /** Creates a [[scala.sys.process.ProcessBuilder]] from a Scala XML Element. - * This can be used as a way to template strings. - * - * @example {{{ - * apply( {dxPath.absolutePath} --dex --output={classesDexPath.absolutePath} {classesMinJarPath.absolutePath}) - * }}} - */ - def apply(command: Elem): ProcessBuilder = Process(command.text.trim) - } -} - - -/** The case class `Elem` extends the `Node` class, - * providing an immutable data object representing an XML element. - * - * @param prefix namespace prefix (may be null, but not the empty string) - * @param label the element name - * @param attributes1 the attribute map - * @param scope the scope containing the namespace bindings - * @param minimizeEmpty `true` if this element should be serialized as minimized (i.e. "<el/>") when - * empty; `false` if it should be written out in long form. - * @param child the children of this node - * - * Copyright 2008 Google Inc. All Rights Reserved. - * @author Burak Emir - */ -class Elem( - override val prefix: String, - val label: String, - attributes1: MetaData, - override val scope: NamespaceBinding, - val minimizeEmpty: Boolean, - val child: Node*) -extends Node with Serializable -{ - @deprecated("This constructor is retained for backward compatibility. Please use the primary constructor, which lets you specify your own preference for `minimizeEmpty`.", "2.10.0") - def this(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, child: Node*) = { - this(prefix, label, attributes, scope, child.isEmpty, child: _*) - } - - final override def doCollectNamespaces = true - final override def doTransform = true - - override val attributes = MetaData.normalize(attributes1, scope) - - if (prefix == "") - throw new IllegalArgumentException("prefix of zero length, use null instead") - - if (scope == null) - throw new IllegalArgumentException("scope is null, use scala.xml.TopScope for empty scope") - - //@todo: copy the children, - // setting namespace scope if necessary - // cleaning adjacent text nodes if necessary - - override protected def basisForHashCode: Seq[Any] = - prefix :: label :: attributes :: child.toList - - /** Returns a new element with updated attributes, resolving namespace uris - * from this element's scope. See MetaData.update for details. - * - * @param updates MetaData with new and updated attributes - * @return a new symbol with updated attributes - */ - final def %(updates: MetaData): Elem = - copy(attributes = MetaData.update(attributes, scope, updates)) - - /** Returns a copy of this element with any supplied arguments replacing - * this element's value for that field. - * - * @return a new symbol with updated attributes - */ - def copy( - prefix: String = this.prefix, - label: String = this.label, - attributes: MetaData = this.attributes, - scope: NamespaceBinding = this.scope, - minimizeEmpty: Boolean = this.minimizeEmpty, - child: Seq[Node] = this.child.toSeq - ): Elem = Elem(prefix, label, attributes, scope, minimizeEmpty, child: _*) - - /** Returns concatenation of `text(n)` for each child `n`. - */ - override def text = (child map (_.text)).mkString -} diff --git a/src/library/scala/xml/EntityRef.scala b/src/library/scala/xml/EntityRef.scala deleted file mode 100644 index 7a58831075..0000000000 --- a/src/library/scala/xml/EntityRef.scala +++ /dev/null @@ -1,40 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** The class `EntityRef` implements an XML node for entity references. - * - * @author Burak Emir - * @version 1.0 - * @param entityName the name of the entity reference, for example `amp`. - */ -case class EntityRef(entityName: String) extends SpecialNode { - final override def doCollectNamespaces = false - final override def doTransform = false - def label = "#ENTITY" - - override def text = entityName match { - case "lt" => "<" - case "gt" => ">" - case "amp" => "&" - case "apos" => "'" - case "quot" => "\"" - case _ => Utility.sbToString(buildString) - } - - /** Appends `"& entityName;"` to this string buffer. - * - * @param sb the string buffer. - * @return the modified string buffer `sb`. - */ - override def buildString(sb: StringBuilder) = - sb.append("&").append(entityName).append(";") - -} diff --git a/src/library/scala/xml/Equality.scala b/src/library/scala/xml/Equality.scala deleted file mode 100644 index 021d185812..0000000000 --- a/src/library/scala/xml/Equality.scala +++ /dev/null @@ -1,107 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** In an attempt to contain the damage being inflicted on consistency by the - * ad hoc `equals` methods spread around `xml`, the logic is centralized and - * all the `xml` classes go through the `xml.Equality trait`. There are two - * forms of `xml` comparison. - * - * 1. `'''def''' strict_==(other: scala.xml.Equality)` - * - * This one tries to honor the little things like symmetry and hashCode - * contracts. The `equals` method routes all comparisons through this. - * - * 1. `xml_==(other: Any)` - * - * This one picks up where `strict_==` leaves off. It might declare any two - * things equal. - * - * As things stood, the logic not only made a mockery of the collections - * equals contract, but also laid waste to that of case classes. - * - * Among the obstacles to sanity are/were: - * - * Node extends NodeSeq extends Seq[Node] - * MetaData extends Iterable[MetaData] - * The hacky "Group" xml node which throws exceptions - * with wild abandon, so don't get too close - * Rampant asymmetry and impossible hashCodes - * Most classes claiming to be equal to "String" if - * some specific stringification of it was the same. - * String was never going to return the favor. - */ - -object Equality { - def asRef(x: Any): AnyRef = x.asInstanceOf[AnyRef] - - /** Note - these functions assume strict equality has already failed. - */ - def compareBlithely(x1: AnyRef, x2: String): Boolean = x1 match { - case x: Atom[_] => x.data == x2 - case x: NodeSeq => x.text == x2 - case _ => false - } - def compareBlithely(x1: AnyRef, x2: Node): Boolean = x1 match { - case x: NodeSeq if x.length == 1 => x2 == x(0) - case _ => false - } - def compareBlithely(x1: AnyRef, x2: AnyRef): Boolean = { - if (x1 == null || x2 == null) - return (x1 eq x2) - - x2 match { - case s: String => compareBlithely(x1, s) - case n: Node => compareBlithely(x1, n) - case _ => false - } - } -} -import Equality._ - -trait Equality extends scala.Equals { - protected def basisForHashCode: Seq[Any] - - def strict_==(other: Equality): Boolean - def strict_!=(other: Equality) = !strict_==(other) - - /** We insist we're only equal to other `xml.Equality` implementors, - * which heads off a lot of inconsistency up front. - */ - override def canEqual(other: Any): Boolean = other match { - case x: Equality => true - case _ => false - } - - /** It's be nice to make these final, but there are probably - * people out there subclassing the XML types, especially when - * it comes to equals. However WE at least can pretend they - * are final since clearly individual classes cannot be trusted - * to maintain a semblance of order. - */ - override def hashCode() = basisForHashCode.## - override def equals(other: Any) = doComparison(other, blithe = false) - final def xml_==(other: Any) = doComparison(other, blithe = true) - final def xml_!=(other: Any) = !xml_==(other) - - /** The "blithe" parameter expresses the caller's unconcerned attitude - * regarding the usual constraints on equals. The method is thereby - * given carte blanche to declare any two things equal. - */ - private def doComparison(other: Any, blithe: Boolean) = { - val strictlyEqual = other match { - case x: AnyRef if this eq x => true - case x: Equality => (x canEqual this) && (this strict_== x) - case _ => false - } - - strictlyEqual || (blithe && compareBlithely(this, asRef(other))) - } -} diff --git a/src/library/scala/xml/Group.scala b/src/library/scala/xml/Group.scala deleted file mode 100644 index e3af615008..0000000000 --- a/src/library/scala/xml/Group.scala +++ /dev/null @@ -1,42 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** A hack to group XML nodes in one node for output. - * - * @author Burak Emir - * @version 1.0 - */ -final case class Group(nodes: Seq[Node]) extends Node { - override def theSeq = nodes - - override def canEqual(other: Any) = other match { - case x: Group => true - case _ => false - } - - override def strict_==(other: Equality) = other match { - case Group(xs) => nodes sameElements xs - case _ => false - } - - override protected def basisForHashCode = nodes - - /** Since Group is very much a hack it throws an exception if you - * try to do anything with it. - */ - private def fail(msg: String) = throw new UnsupportedOperationException("class Group does not support method '%s'" format msg) - - def label = fail("label") - override def attributes = fail("attributes") - override def namespace = fail("namespace") - override def child = fail("child") - def buildString(sb: StringBuilder) = fail("toString(StringBuilder)") -} diff --git a/src/library/scala/xml/MalformedAttributeException.scala b/src/library/scala/xml/MalformedAttributeException.scala deleted file mode 100644 index d499ad3e10..0000000000 --- a/src/library/scala/xml/MalformedAttributeException.scala +++ /dev/null @@ -1,15 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml - - -case class MalformedAttributeException(msg: String) extends RuntimeException(msg) diff --git a/src/library/scala/xml/MetaData.scala b/src/library/scala/xml/MetaData.scala deleted file mode 100644 index 8b5ea187cb..0000000000 --- a/src/library/scala/xml/MetaData.scala +++ /dev/null @@ -1,217 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import Utility.sbToString -import scala.annotation.tailrec -import scala.collection.{ AbstractIterable, Iterator } - -/** - * Copyright 2008 Google Inc. All Rights Reserved. - * @author Burak Emir - */ -object MetaData { - /** - * appends all attributes from new_tail to attribs, without attempting to - * detect or remove duplicates. The method guarantees that all attributes - * from attribs come before the attributes in new_tail, but does not - * guarantee to preserve the relative order of attribs. - * - * Duplicates can be removed with `normalize`. - */ - @tailrec // temporarily marked final so it will compile under -Xexperimental - final def concatenate(attribs: MetaData, new_tail: MetaData): MetaData = - if (attribs eq Null) new_tail - else concatenate(attribs.next, attribs copy new_tail) - - /** - * returns normalized MetaData, with all duplicates removed and namespace prefixes resolved to - * namespace URIs via the given scope. - */ - def normalize(attribs: MetaData, scope: NamespaceBinding): MetaData = { - def iterate(md: MetaData, normalized_attribs: MetaData, set: Set[String]): MetaData = { - lazy val key = getUniversalKey(md, scope) - if (md eq Null) normalized_attribs - else if ((md.value eq null) || set(key)) iterate(md.next, normalized_attribs, set) - else md copy iterate(md.next, normalized_attribs, set + key) - } - iterate(attribs, Null, Set()) - } - - /** - * returns key if md is unprefixed, pre+key is md is prefixed - */ - def getUniversalKey(attrib: MetaData, scope: NamespaceBinding) = attrib match { - case prefixed: PrefixedAttribute => scope.getURI(prefixed.pre) + prefixed.key - case unprefixed: UnprefixedAttribute => unprefixed.key - } - - /** - * returns MetaData with attributes updated from given MetaData - */ - def update(attribs: MetaData, scope: NamespaceBinding, updates: MetaData): MetaData = - normalize(concatenate(updates, attribs), scope) - -} - -/** This class represents an attribute and at the same time a linked list of - * attributes. Every instance of this class is either - * - an instance of `UnprefixedAttribute key,value` or - * - an instance of `PrefixedAttribute namespace_prefix,key,value` or - * - `Null, the empty attribute list. - * - * Namespace URIs are obtained by using the namespace scope of the element - * owning this attribute (see `getNamespace`). - * - * Copyright 2008 Google Inc. All Rights Reserved. - * @author Burak Emir - */ -abstract class MetaData -extends AbstractIterable[MetaData] - with Iterable[MetaData] - with Equality - with Serializable { - - /** Updates this MetaData with the MetaData given as argument. All attributes that occur in updates - * are part of the resulting MetaData. If an attribute occurs in both this instance and - * updates, only the one in updates is part of the result (avoiding duplicates). For prefixed - * attributes, namespaces are resolved using the given scope, which defaults to TopScope. - * - * @param updates MetaData with new and updated attributes - * @return a new MetaData instance that contains old, new and updated attributes - */ - def append(updates: MetaData, scope: NamespaceBinding = TopScope): MetaData = - MetaData.update(this, scope, updates) - - /** - * Gets value of unqualified (unprefixed) attribute with given key, null if not found - * - * @param key - * @return value as Seq[Node] if key is found, null otherwise - */ - def apply(key: String): Seq[Node] - - /** convenience method, same as `apply(namespace, owner.scope, key)`. - * - * @param namespace_uri namespace uri of key - * @param owner the element owning this attribute list - * @param key the attribute key - */ - final def apply(namespace_uri: String, owner: Node, key: String): Seq[Node] = - apply(namespace_uri, owner.scope, key) - - /** - * Gets value of prefixed attribute with given key and namespace, null if not found - * - * @param namespace_uri namespace uri of key - * @param scp a namespace scp (usually of the element owning this attribute list) - * @param k to be looked for - * @return value as Seq[Node] if key is found, null otherwise - */ - def apply(namespace_uri: String, scp: NamespaceBinding, k: String): Seq[Node] - - /** returns a copy of this MetaData item with next field set to argument. - */ - def copy(next: MetaData): MetaData - - /** if owner is the element of this metadata item, returns namespace */ - def getNamespace(owner: Node): String - - def hasNext = (Null != next) - - def length: Int = length(0) - - def length(i: Int): Int = next.length(i + 1) - - def isPrefixed: Boolean - - override def canEqual(other: Any) = other match { - case _: MetaData => true - case _ => false - } - override def strict_==(other: Equality) = other match { - case m: MetaData => this.asAttrMap == m.asAttrMap - case _ => false - } - protected def basisForHashCode: Seq[Any] = List(this.asAttrMap) - - /** filters this sequence of meta data */ - override def filter(f: MetaData => Boolean): MetaData = - if (f(this)) copy(next filter f) - else next filter f - - /** returns key of this MetaData item */ - def key: String - - /** returns value of this MetaData item */ - def value: Seq[Node] - - /** Returns a String containing "prefix:key" if the first key is - * prefixed, and "key" otherwise. - */ - def prefixedKey = this match { - case x: Attribute if x.isPrefixed => x.pre + ":" + key - case _ => key - } - - /** Returns a Map containing the attributes stored as key/value pairs. - */ - def asAttrMap: Map[String, String] = - (iterator map (x => (x.prefixedKey, x.value.text))).toMap - - /** returns Null or the next MetaData item */ - def next: MetaData - - /** - * Gets value of unqualified (unprefixed) attribute with given key, None if not found - * - * @param key - * @return value in Some(Seq[Node]) if key is found, None otherwise - */ - final def get(key: String): Option[Seq[Node]] = Option(apply(key)) - - /** same as get(uri, owner.scope, key) */ - final def get(uri: String, owner: Node, key: String): Option[Seq[Node]] = - get(uri, owner.scope, key) - - /** gets value of qualified (prefixed) attribute with given key. - * - * @param uri namespace of key - * @param scope a namespace scp (usually of the element owning this attribute list) - * @param key to be looked fore - * @return value as Some[Seq[Node]] if key is found, None otherwise - */ - final def get(uri: String, scope: NamespaceBinding, key: String): Option[Seq[Node]] = - Option(apply(uri, scope, key)) - - protected def toString1(): String = sbToString(toString1) - - // appends string representations of single attribute to StringBuilder - protected def toString1(sb: StringBuilder): Unit - - override def toString(): String = sbToString(buildString) - - def buildString(sb: StringBuilder): StringBuilder = { - sb append ' ' - toString1(sb) - next buildString sb - } - - /** - */ - def wellformed(scope: NamespaceBinding): Boolean - - def remove(key: String): MetaData - - def remove(namespace: String, scope: NamespaceBinding, key: String): MetaData - - final def remove(namespace: String, owner: Node, key: String): MetaData = - remove(namespace, owner.scope, key) -} diff --git a/src/library/scala/xml/NamespaceBinding.scala b/src/library/scala/xml/NamespaceBinding.scala deleted file mode 100644 index b320466976..0000000000 --- a/src/library/scala/xml/NamespaceBinding.scala +++ /dev/null @@ -1,83 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import Utility.sbToString - -/** The class `NamespaceBinding` represents namespace bindings - * and scopes. The binding for the default namespace is treated as a null - * prefix. the absent namespace is represented with the null uri. Neither - * prefix nor uri may be empty, which is not checked. - * - * @author Burak Emir - * @version 1.0 - */ -@SerialVersionUID(0 - 2518644165573446725L) -case class NamespaceBinding(prefix: String, uri: String, parent: NamespaceBinding) extends AnyRef with Equality -{ - if (prefix == "") - throw new IllegalArgumentException("zero length prefix not allowed") - - def getURI(_prefix: String): String = - if (prefix == _prefix) uri else parent getURI _prefix - - /** Returns some prefix that is mapped to the URI. - * - * @param _uri the input URI - * @return the prefix that is mapped to the input URI, or null - * if no prefix is mapped to the URI. - */ - def getPrefix(_uri: String): String = - if (_uri == uri) prefix else parent getPrefix _uri - - override def toString(): String = sbToString(buildString(_, TopScope)) - - private def shadowRedefined(stop: NamespaceBinding): NamespaceBinding = { - def prefixList(x: NamespaceBinding): List[String] = - if ((x == null) || (x eq stop)) Nil - else x.prefix :: prefixList(x.parent) - def fromPrefixList(l: List[String]): NamespaceBinding = l match { - case Nil => stop - case x :: xs => new NamespaceBinding(x, this.getURI(x), fromPrefixList(xs)) - } - val ps0 = prefixList(this).reverse - val ps = ps0.distinct - if (ps.size == ps0.size) this - else fromPrefixList(ps) - } - - override def canEqual(other: Any) = other match { - case _: NamespaceBinding => true - case _ => false - } - - override def strict_==(other: Equality) = other match { - case x: NamespaceBinding => (prefix == x.prefix) && (uri == x.uri) && (parent == x.parent) - case _ => false - } - - def basisForHashCode: Seq[Any] = List(prefix, uri, parent) - - def buildString(stop: NamespaceBinding): String = sbToString(buildString(_, stop)) - - def buildString(sb: StringBuilder, stop: NamespaceBinding) { - shadowRedefined(stop).doBuildString(sb, stop) - } - - private def doBuildString(sb: StringBuilder, stop: NamespaceBinding) { - if ((this == null) || (this eq stop)) return // contains? - - val s = " xmlns%s=\"%s\"".format( - (if (prefix != null) ":" + prefix else ""), - (if (uri != null) uri else "") - ) - parent.doBuildString(sb append s, stop) // copy(ignore) - } -} diff --git a/src/library/scala/xml/Node.scala b/src/library/scala/xml/Node.scala deleted file mode 100755 index e121284252..0000000000 --- a/src/library/scala/xml/Node.scala +++ /dev/null @@ -1,198 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** This singleton object contains the `unapplySeq` method for - * convenient deconstruction. - * - * @author Burak Emir - * @version 1.0 - */ -object Node { - /** the constant empty attribute sequence */ - final def NoAttributes: MetaData = Null - - /** the empty namespace */ - val EmptyNamespace = "" - - def unapplySeq(n: Node) = Some((n.label, n.attributes, n.child)) -} - -/** - * An abstract class representing XML with nodes of a labelled tree. - * This class contains an implementation of a subset of XPath for navigation. - * - * @author Burak Emir and others - * @version 1.1 - */ -abstract class Node extends NodeSeq { - - /** prefix of this node */ - def prefix: String = null - - /** label of this node. I.e. "foo" for <foo/>) */ - def label: String - - /** used internally. Atom/Molecule = -1 PI = -2 Comment = -3 EntityRef = -5 - */ - def isAtom = this.isInstanceOf[Atom[_]] - - /** The logic formerly found in typeTag$, as best I could infer it. */ - def doCollectNamespaces = true // if (tag >= 0) DO collect namespaces - def doTransform = true // if (tag < 0) DO NOT transform - - /** - * method returning the namespace bindings of this node. by default, this - * is TopScope, which means there are no namespace bindings except the - * predefined one for "xml". - */ - def scope: NamespaceBinding = TopScope - - /** - * convenience, same as `getNamespace(this.prefix)` - */ - def namespace = getNamespace(this.prefix) - - /** - * Convenience method, same as `scope.getURI(pre)` but additionally - * checks if scope is `'''null'''`. - * - * @param pre the prefix whose namespace name we would like to obtain - * @return the namespace if `scope != null` and prefix was - * found, else `null` - */ - def getNamespace(pre: String): String = if (scope eq null) null else scope.getURI(pre) - - /** - * Convenience method, looks up an unprefixed attribute in attributes of this node. - * Same as `attributes.getValue(key)` - * - * @param key of queried attribute. - * @return value of `UnprefixedAttribute` with given key - * in attributes, if it exists, otherwise `null`. - */ - final def attribute(key: String): Option[Seq[Node]] = attributes.get(key) - - /** - * Convenience method, looks up a prefixed attribute in attributes of this node. - * Same as `attributes.getValue(uri, this, key)`- - * - * @param uri namespace of queried attribute (may not be null). - * @param key of queried attribute. - * @return value of `PrefixedAttribute` with given namespace - * and given key, otherwise `'''null'''`. - */ - final def attribute(uri: String, key: String): Option[Seq[Node]] = - attributes.get(uri, this, key) - - /** - * Returns attribute meaning all attributes of this node, prefixed and - * unprefixed, in no particular order. In class `Node`, this - * defaults to `Null` (the empty attribute list). - * - * @return all attributes of this node - */ - def attributes: MetaData = Null - - /** - * Returns child axis i.e. all children of this node. - * - * @return all children of this node - */ - def child: Seq[Node] - - /** Children which do not stringify to "" (needed for equality) - */ - def nonEmptyChildren: Seq[Node] = child filterNot (_.toString == "") - - /** - * Descendant axis (all descendants of this node, not including node itself) - * includes all text nodes, element nodes, comments and processing instructions. - */ - def descendant: List[Node] = - child.toList.flatMap { x => x::x.descendant } - - /** - * Descendant axis (all descendants of this node, including thisa node) - * includes all text nodes, element nodes, comments and processing instructions. - */ - def descendant_or_self: List[Node] = this :: descendant - - override def canEqual(other: Any) = other match { - case x: Group => false - case x: Node => true - case _ => false - } - - override protected def basisForHashCode: Seq[Any] = - prefix :: label :: attributes :: nonEmptyChildren.toList - - override def strict_==(other: Equality) = other match { - case _: Group => false - case x: Node => - (prefix == x.prefix) && - (label == x.label) && - (attributes == x.attributes) && - // (scope == x.scope) // note - original code didn't compare scopes so I left it as is. - (nonEmptyChildren sameElements x.nonEmptyChildren) - case _ => - false - } - - // implementations of NodeSeq methods - - /** - * returns a sequence consisting of only this node - */ - def theSeq: Seq[Node] = this :: Nil - - /** - * String representation of this node - * - * @param stripComments if true, strips comment nodes from result - */ - def buildString(stripComments: Boolean): String = - Utility.serialize(this, stripComments = stripComments).toString - - /** - * Same as `toString('''false''')`. - */ - override def toString(): String = buildString(stripComments = false) - - /** - * Appends qualified name of this node to `StringBuilder`. - */ - def nameToString(sb: StringBuilder): StringBuilder = { - if (null != prefix) { - sb append prefix - sb append ':' - } - sb append label - } - - /** - * Returns a type symbol (e.g. DTD, XSD), default `'''null'''`. - */ - def xmlType(): TypeSymbol = null - - /** - * Returns a text representation of this node. Note that this is not equivalent to - * the XPath node-test called text(), it is rather an implementation of the - * XPath function string() - * Martin to Burak: to do: if you make this method abstract, the compiler will now - * complain if there's no implementation in a subclass. Is this what we want? Note that - * this would break doc/DocGenator and doc/ModelToXML, with an error message like: - * {{{ - * doc\DocGenerator.scala:1219: error: object creation impossible, since there is a deferred declaration of method text in class Node of type => String which is not implemented in a subclass - * new SpecialNode { - * ^ - * }}} */ - override def text: String = super.text -} diff --git a/src/library/scala/xml/NodeBuffer.scala b/src/library/scala/xml/NodeBuffer.scala deleted file mode 100644 index ae7c7b2bf8..0000000000 --- a/src/library/scala/xml/NodeBuffer.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** - * This class acts as a Buffer for nodes. If it is used as a sequence of - * nodes `Seq[Node]`, it must be ensured that no updates occur after that - * point, because `scala.xml.Node` is assumed to be immutable. - * - * Despite this being a sequence, don't use it as key in a hashtable. - * Calling the hashcode function will result in a runtime error. - * - * @author Burak Emir - * @version 1.0 - */ -class NodeBuffer extends scala.collection.mutable.ArrayBuffer[Node] { - - /** - * Append given object to this buffer, returns reference on this - * `NodeBuffer` for convenience. Some rules apply: - * - If argument `o` is `'''null'''`, it is ignored. - * - If it is an `Iterator` or `Iterable`, its elements will be added. - * - If `o` is a node, it is added as it is. - * - If it is anything else, it gets wrapped in an [[scala.xml.Atom]]. - * - * @param o converts to an xml node and adds to this node buffer - * @return this nodebuffer - */ - def &+(o: Any): NodeBuffer = { - o match { - case null | _: Unit | Text("") => // ignore - case it: Iterator[_] => it foreach &+ - case n: Node => super.+=(n) - case ns: Iterable[_] => this &+ ns.iterator - case ns: Array[_] => this &+ ns.iterator - case d => super.+=(new Atom(d)) - } - this - } -} diff --git a/src/library/scala/xml/NodeSeq.scala b/src/library/scala/xml/NodeSeq.scala deleted file mode 100644 index b8022472fb..0000000000 --- a/src/library/scala/xml/NodeSeq.scala +++ /dev/null @@ -1,157 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import scala.collection.{ mutable, immutable, generic, SeqLike, AbstractSeq } -import mutable.{ Builder, ListBuffer } -import generic.{ CanBuildFrom } -import scala.language.implicitConversions - -/** This object ... - * - * @author Burak Emir - * @version 1.0 - */ -object NodeSeq { - final val Empty = fromSeq(Nil) - def fromSeq(s: Seq[Node]): NodeSeq = new NodeSeq { - def theSeq = s - } - type Coll = NodeSeq - implicit def canBuildFrom: CanBuildFrom[Coll, Node, NodeSeq] = - new CanBuildFrom[Coll, Node, NodeSeq] { - def apply(from: Coll) = newBuilder - def apply() = newBuilder - } - def newBuilder: Builder[Node, NodeSeq] = new ListBuffer[Node] mapResult fromSeq - implicit def seqToNodeSeq(s: Seq[Node]): NodeSeq = fromSeq(s) -} - -/** This class implements a wrapper around `Seq[Node]` that adds XPath - * and comprehension methods. - * - * @author Burak Emir - * @version 1.0 - */ -abstract class NodeSeq extends AbstractSeq[Node] with immutable.Seq[Node] with SeqLike[Node, NodeSeq] with Equality { - import NodeSeq.seqToNodeSeq // import view magic for NodeSeq wrappers - - /** Creates a list buffer as builder for this class */ - override protected[this] def newBuilder = NodeSeq.newBuilder - - def theSeq: Seq[Node] - def length = theSeq.length - override def iterator = theSeq.iterator - - def apply(i: Int): Node = theSeq(i) - def apply(f: Node => Boolean): NodeSeq = filter(f) - - def xml_sameElements[A](that: Iterable[A]): Boolean = { - val these = this.iterator - val those = that.iterator - while (these.hasNext && those.hasNext) - if (these.next xml_!= those.next) - return false - - !these.hasNext && !those.hasNext - } - - protected def basisForHashCode: Seq[Any] = theSeq - - override def canEqual(other: Any) = other match { - case _: NodeSeq => true - case _ => false - } - - override def strict_==(other: Equality) = other match { - case x: NodeSeq => (length == x.length) && (theSeq sameElements x.theSeq) - case _ => false - } - - /** Projection function, which returns elements of `this` sequence based - * on the string `that`. Use: - * - `this \ "foo"` to get a list of all elements that are labelled with `"foo"`; - * - `\ "_"` to get a list of all elements (wildcard); - * - `ns \ "@foo"` to get the unprefixed attribute `"foo"`; - * - `ns \ "@{uri}foo"` to get the prefixed attribute `"pre:foo"` whose - * prefix `"pre"` is resolved to the namespace `"uri"`. - * - * For attribute projections, the resulting [[scala.xml.NodeSeq]] attribute - * values are wrapped in a [[scala.xml.Group]]. - * - * There is no support for searching a prefixed attribute by its literal prefix. - * - * The document order is preserved. - */ - def \(that: String): NodeSeq = { - def fail = throw new IllegalArgumentException(that) - def atResult = { - lazy val y = this(0) - val attr = - if (that.length == 1) fail - else if (that(1) == '{') { - val i = that indexOf '}' - if (i == -1) fail - val (uri, key) = (that.substring(2,i), that.substring(i+1, that.length())) - if (uri == "" || key == "") fail - else y.attribute(uri, key) - } - else y.attribute(that drop 1) - - attr match { - case Some(x) => Group(x) - case _ => NodeSeq.Empty - } - } - - def makeSeq(cond: (Node) => Boolean) = - NodeSeq fromSeq (this flatMap (_.child) filter cond) - - that match { - case "" => fail - case "_" => makeSeq(!_.isAtom) - case _ if (that(0) == '@' && this.length == 1) => atResult - case _ => makeSeq(_.label == that) - } - } - - /** Projection function, which returns elements of `this` sequence and of - * all its subsequences, based on the string `that`. Use: - * - `this \\ 'foo` to get a list of all elements that are labelled with `"foo"`; - * - `\\ "_"` to get a list of all elements (wildcard); - * - `ns \\ "@foo"` to get the unprefixed attribute `"foo"`; - * - `ns \\ "@{uri}foo"` to get each prefixed attribute `"pre:foo"` whose - * prefix `"pre"` is resolved to the namespace `"uri"`. - * - * For attribute projections, the resulting [[scala.xml.NodeSeq]] attribute - * values are wrapped in a [[scala.xml.Group]]. - * - * There is no support for searching a prefixed attribute by its literal prefix. - * - * The document order is preserved. - */ - def \\ (that: String): NodeSeq = { - def filt(cond: (Node) => Boolean) = this flatMap (_.descendant_or_self) filter cond - that match { - case "_" => filt(!_.isAtom) - case _ if that(0) == '@' => filt(!_.isAtom) flatMap (_ \ that) - case _ => filt(x => !x.isAtom && x.label == that) - } - } - - /** Convenience method which returns string text of the named attribute. Use: - * - `that \@ "foo"` to get the string text of attribute `"foo"`; - */ - def \@(attributeName: String): String = (this \ ("@" + attributeName)).text - - override def toString(): String = theSeq.mkString - - def text: String = (this map (_.text)).mkString -} diff --git a/src/library/scala/xml/Null.scala b/src/library/scala/xml/Null.scala deleted file mode 100644 index f763c023c4..0000000000 --- a/src/library/scala/xml/Null.scala +++ /dev/null @@ -1,62 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import Utility.isNameStart -import scala.collection.Iterator - -/** Essentially, every method in here is a dummy, returning Zero[T]. - * It provides a backstop for the unusual collection defined by MetaData, - * sort of a linked list of tails. - * - * @author Burak Emir - * @version 1.0 - */ -case object Null extends MetaData { - override def iterator = Iterator.empty - override def size = 0 - override def append(m: MetaData, scope: NamespaceBinding = TopScope): MetaData = m - override def filter(f: MetaData => Boolean): MetaData = this - - def copy(next: MetaData) = next - def getNamespace(owner: Node) = null - - override def hasNext = false - def next = null - def key = null - def value = null - def isPrefixed = false - - override def length = 0 - override def length(i: Int) = i - - override def strict_==(other: Equality) = other match { - case x: MetaData => x.length == 0 - case _ => false - } - override protected def basisForHashCode: Seq[Any] = Nil - - def apply(namespace: String, scope: NamespaceBinding, key: String) = null - def apply(key: String) = - if (isNameStart(key.head)) null - else throw new IllegalArgumentException("not a valid attribute name '"+key+"', so can never match !") - - protected def toString1(sb: StringBuilder) = () - override protected def toString1(): String = "" - - override def toString(): String = "" - - override def buildString(sb: StringBuilder): StringBuilder = sb - - override def wellformed(scope: NamespaceBinding) = true - - def remove(key: String) = this - def remove(namespace: String, scope: NamespaceBinding, key: String) = this -} diff --git a/src/library/scala/xml/PCData.scala b/src/library/scala/xml/PCData.scala deleted file mode 100644 index 31eea2b6d7..0000000000 --- a/src/library/scala/xml/PCData.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** This class (which is not used by all XML parsers, but always used by the - * XHTML one) represents parseable character data, which appeared as CDATA - * sections in the input and is to be preserved as CDATA section in the output. - * - * @author Burak Emir - * @version 1.0 - */ -class PCData(data: String) extends Atom[String](data) { - - /** Returns text, with some characters escaped according to the XML - * specification. - * - * @param sb the input string buffer associated to some XML element - * @return the input string buffer with the formatted CDATA section - */ - override def buildString(sb: StringBuilder): StringBuilder = - sb append "".format(data) -} - -/** This singleton object contains the `apply`and `unapply` methods for - * convenient construction and deconstruction. - * - * @author Burak Emir - * @version 1.0 - */ -object PCData { - def apply(data: String) = new PCData(data) - def unapply(other: Any): Option[String] = other match { - case x: PCData => Some(x.data) - case _ => None - } -} - diff --git a/src/library/scala/xml/PrefixedAttribute.scala b/src/library/scala/xml/PrefixedAttribute.scala deleted file mode 100644 index 4ab79c8677..0000000000 --- a/src/library/scala/xml/PrefixedAttribute.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml - -/** prefixed attributes always have a non-null namespace. - * - * @param pre - * @param key - * @param value the attribute value - * @param next1 - */ -class PrefixedAttribute( - val pre: String, - val key: String, - val value: Seq[Node], - val next1: MetaData) -extends Attribute -{ - val next = if (value ne null) next1 else next1.remove(key) - - /** same as this(pre, key, Text(value), next), or no attribute if value is null */ - def this(pre: String, key: String, value: String, next: MetaData) = - this(pre, key, if (value ne null) Text(value) else null: NodeSeq, next) - - /** same as this(pre, key, value.get, next), or no attribute if value is None */ - def this(pre: String, key: String, value: Option[Seq[Node]], next: MetaData) = - this(pre, key, value.orNull, next) - - /** Returns a copy of this unprefixed attribute with the given - * next field. - */ - def copy(next: MetaData) = - new PrefixedAttribute(pre, key, value, next) - - def getNamespace(owner: Node) = - owner.getNamespace(pre) - - /** forwards the call to next (because caller looks for unprefixed attribute */ - def apply(key: String): Seq[Node] = next(key) - - /** gets attribute value of qualified (prefixed) attribute with given key - */ - def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] = { - if (key == this.key && scope.getURI(pre) == namespace) - value - else - next(namespace, scope, key) - } -} - -object PrefixedAttribute { - def unapply(x: PrefixedAttribute) = Some((x.pre, x.key, x.value, x.next)) -} diff --git a/src/library/scala/xml/PrettyPrinter.scala b/src/library/scala/xml/PrettyPrinter.scala deleted file mode 100755 index 9e01905357..0000000000 --- a/src/library/scala/xml/PrettyPrinter.scala +++ /dev/null @@ -1,263 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import Utility.sbToString - -/** Class for pretty printing. After instantiating, you can use the - * format() and formatNode() methods to convert XML to a formatted - * string. The class can be reused to pretty print any number of - * XML nodes. - * - * @author Burak Emir - * @version 1.0 - * - * @param width the width to fit the output into - * @param step indentation - */ -class PrettyPrinter(width: Int, step: Int) { - - class BrokenException() extends java.lang.Exception - - class Item - case object Break extends Item { - override def toString() = "\\" - } - case class Box(col: Int, s: String) extends Item - case class Para(s: String) extends Item - - protected var items: List[Item] = Nil - - protected var cur = 0 - - protected def reset() = { - cur = 0 - items = Nil - } - - /** Try to cut at whitespace. - */ - protected def cut(s: String, ind: Int): List[Item] = { - val tmp = width - cur - if (s.length <= tmp) - return List(Box(ind, s)) - var i = s indexOf ' ' - if (i > tmp || i == -1) throw new BrokenException() // cannot break - - var last: List[Int] = Nil - while (i != -1 && i < tmp) { - last = i::last - i = s.indexOf(' ', i+1) - } - var res: List[Item] = Nil - while (Nil != last) try { - val b = Box(ind, s.substring(0, last.head)) - cur = ind - res = b :: Break :: cut(s.substring(last.head, s.length), ind) - // backtrack - last = last.tail - } catch { - case _:BrokenException => last = last.tail - } - throw new BrokenException() - } - - /** Try to make indented box, if possible, else para. - */ - protected def makeBox(ind: Int, s: String) = - if (cur + s.length > width) { // fits in this line - items ::= Box(ind, s) - cur += s.length - } - else try cut(s, ind) foreach (items ::= _) // break it up - catch { case _: BrokenException => makePara(ind, s) } // give up, para - - // dont respect indent in para, but afterwards - protected def makePara(ind: Int, s: String) = { - items = Break::Para(s)::Break::items - cur = ind - } - - // respect indent - protected def makeBreak() = { // using wrapping here... - items = Break :: items - cur = 0 - } - - protected def leafTag(n: Node) = { - def mkLeaf(sb: StringBuilder) { - sb append '<' - n nameToString sb - n.attributes buildString sb - sb append "/>" - } - sbToString(mkLeaf) - } - - protected def startTag(n: Node, pscope: NamespaceBinding): (String, Int) = { - var i = 0 - def mkStart(sb: StringBuilder) { - sb append '<' - n nameToString sb - i = sb.length + 1 - n.attributes buildString sb - n.scope.buildString(sb, pscope) - sb append '>' - } - (sbToString(mkStart), i) - } - - protected def endTag(n: Node) = { - def mkEnd(sb: StringBuilder) { - sb append "' - } - sbToString(mkEnd) - } - - protected def childrenAreLeaves(n: Node): Boolean = { - def isLeaf(l: Node) = l match { - case _:Atom[_] | _:Comment | _:EntityRef | _:ProcInstr => true - case _ => false - } - n.child forall isLeaf - } - - protected def fits(test: String) = - test.length < width - cur - - private def doPreserve(node: Node) = - node.attribute(XML.namespace, XML.space).map(_.toString == XML.preserve) getOrElse false - - protected def traverse(node: Node, pscope: NamespaceBinding, ind: Int): Unit = node match { - - case Text(s) if s.trim() == "" => - ; - case _:Atom[_] | _:Comment | _:EntityRef | _:ProcInstr => - makeBox( ind, node.toString().trim() ) - case g @ Group(xs) => - traverse(xs.iterator, pscope, ind) - case _ => - val test = { - val sb = new StringBuilder() - Utility.serialize(node, pscope, sb, stripComments = false) - if (doPreserve(node)) sb.toString - else TextBuffer.fromString(sb.toString).toText(0).data - } - if (childrenAreLeaves(node) && fits(test)) { - makeBox(ind, test) - } else { - val (stg, len2) = startTag(node, pscope) - val etg = endTag(node) - if (stg.length < width - cur) { // start tag fits - makeBox(ind, stg) - makeBreak() - traverse(node.child.iterator, node.scope, ind + step) - makeBox(ind, etg) - } else if (len2 < width - cur) { - // - if (!lastwasbreak) sb.append('\n') // on windows: \r\n ? - lastwasbreak = true - cur = 0 -// while (cur < last) { -// sb append ' ' -// cur += 1 -// } - - case Box(i, s) => - lastwasbreak = false - while (cur < i) { - sb append ' ' - cur += 1 - } - sb.append(s) - case Para( s ) => - lastwasbreak = false - sb append s - } - } - - // public convenience methods - - /** Returns a formatted string containing well-formed XML with - * given namespace to prefix mapping. - * - * @param n the node to be serialized - * @param pscope the namespace to prefix mapping - * @return the formatted string - */ - def format(n: Node, pscope: NamespaceBinding = null): String = - sbToString(format(n, pscope, _)) - - /** Returns a formatted string containing well-formed XML. - * - * @param nodes the sequence of nodes to be serialized - * @param pscope the namespace to prefix mapping - */ - def formatNodes(nodes: Seq[Node], pscope: NamespaceBinding = null): String = - sbToString(formatNodes(nodes, pscope, _)) - - /** Appends a formatted string containing well-formed XML with - * the given namespace to prefix mapping to the given stringbuffer. - * - * @param nodes the nodes to be serialized - * @param pscope the namespace to prefix mapping - * @param sb the string buffer to which to append to - */ - def formatNodes(nodes: Seq[Node], pscope: NamespaceBinding, sb: StringBuilder): Unit = - nodes foreach (n => sb append format(n, pscope)) -} diff --git a/src/library/scala/xml/ProcInstr.scala b/src/library/scala/xml/ProcInstr.scala deleted file mode 100644 index 189c1c6878..0000000000 --- a/src/library/scala/xml/ProcInstr.scala +++ /dev/null @@ -1,39 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml - -/** an XML node for processing instructions (PI) - * - * @author Burak Emir - * @param target target name of this PI - * @param proctext text contained in this node, may not contain "?>" - */ -case class ProcInstr(target: String, proctext: String) extends SpecialNode -{ - if (!Utility.isName(target)) - throw new IllegalArgumentException(target+" must be an XML Name") - if (proctext contains "?>") - throw new IllegalArgumentException(proctext+" may not contain \"?>\"") - if (target.toLowerCase == "xml") - throw new IllegalArgumentException(target+" is reserved") - - final override def doCollectNamespaces = false - final override def doTransform = false - - final def label = "#PI" - override def text = "" - - /** appends "<?" target (" "+text)?+"?>" - * to this stringbuffer. - */ - override def buildString(sb: StringBuilder) = - sb append "".format(target, (if (proctext == "") "" else " " + proctext)) -} diff --git a/src/library/scala/xml/QNode.scala b/src/library/scala/xml/QNode.scala deleted file mode 100644 index f9e3f1854b..0000000000 --- a/src/library/scala/xml/QNode.scala +++ /dev/null @@ -1,20 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** This object provides an extractor method to match a qualified node with - * its namespace URI - * - * @author Burak Emir - * @version 1.0 - */ -object QNode { - def unapplySeq(n: Node) = Some((n.scope.getURI(n.prefix), n.label, n.attributes, n.child)) -} diff --git a/src/library/scala/xml/SpecialNode.scala b/src/library/scala/xml/SpecialNode.scala deleted file mode 100644 index 5fef8ef66c..0000000000 --- a/src/library/scala/xml/SpecialNode.scala +++ /dev/null @@ -1,33 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** `SpecialNode` is a special XML node which represents either text - * `(PCDATA)`, a comment, a `PI`, or an entity ref. - * - * `SpecialNode`s also play the role of [[scala.xml.pull.XMLEvent]]s for - * pull-parsing. - * - * @author Burak Emir - */ -abstract class SpecialNode extends Node with pull.XMLEvent { - - /** always empty */ - final override def attributes = Null - - /** always Node.EmptyNamespace */ - final override def namespace = null - - /** always empty */ - final def child = Nil - - /** Append string representation to the given string buffer argument. */ - def buildString(sb: StringBuilder): StringBuilder -} diff --git a/src/library/scala/xml/Text.scala b/src/library/scala/xml/Text.scala deleted file mode 100644 index debea0c025..0000000000 --- a/src/library/scala/xml/Text.scala +++ /dev/null @@ -1,39 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** The class `Text` implements an XML node for text (PCDATA). - * It is used in both non-bound and bound XML representations. - * - * @author Burak Emir - * @param data the text contained in this node, may not be null. - */ -class Text(data: String) extends Atom[String](data) { - - /** Returns text, with some characters escaped according to the XML - * specification. - */ - override def buildString(sb: StringBuilder): StringBuilder = - Utility.escape(data, sb) -} - -/** This singleton object contains the `apply`and `unapply` methods for - * convenient construction and deconstruction. - * - * @author Burak Emir - * @version 1.0 - */ -object Text { - def apply(data: String) = new Text(data) - def unapply(other: Any): Option[String] = other match { - case x: Text => Some(x.data) - case _ => None - } -} diff --git a/src/library/scala/xml/TextBuffer.scala b/src/library/scala/xml/TextBuffer.scala deleted file mode 100644 index 514b1701af..0000000000 --- a/src/library/scala/xml/TextBuffer.scala +++ /dev/null @@ -1,46 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml - -import Utility.isSpace - -object TextBuffer { - def fromString(str: String): TextBuffer = new TextBuffer() append str -} - -/** The class `TextBuffer` is for creating text nodes without surplus - * whitespace. All occurrences of one or more whitespace in strings - * appended with the `append` method will be replaced by a single space - * character, and leading and trailing space will be removed completely. - */ -class TextBuffer -{ - val sb = new StringBuilder() - - /** Appends this string to the text buffer, trimming whitespaces as needed. - */ - def append(cs: Seq[Char]): this.type = { - cs foreach { c => - if (!isSpace(c)) sb append c - else if (sb.isEmpty || !isSpace(sb.last)) sb append ' ' - } - this - } - - /** Returns an empty sequence if text is only whitespace. - * - * @return the text without whitespaces. - */ - def toText: Seq[Text] = sb.toString.trim match { - case "" => Nil - case s => Seq(Text(s)) - } -} diff --git a/src/library/scala/xml/TopScope.scala b/src/library/scala/xml/TopScope.scala deleted file mode 100644 index 474fbbbdb5..0000000000 --- a/src/library/scala/xml/TopScope.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml - -/** top level namespace scope. only contains the predefined binding - * for the "xml" prefix which is bound to - * "http://www.w3.org/XML/1998/namespace" - */ -object TopScope extends NamespaceBinding(null, null, null) { - - import XML.{ xml, namespace } - - override def getURI(prefix1: String): String = - if (prefix1 == xml) namespace else null - - override def getPrefix(uri1: String): String = - if (uri1 == namespace) xml else null - - override def toString() = "" - - override def buildString(stop: NamespaceBinding) = "" - override def buildString(sb: StringBuilder, ignore: NamespaceBinding) = {} -} diff --git a/src/library/scala/xml/TypeSymbol.scala b/src/library/scala/xml/TypeSymbol.scala deleted file mode 100644 index fb371ee340..0000000000 --- a/src/library/scala/xml/TypeSymbol.scala +++ /dev/null @@ -1,15 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml - - -abstract class TypeSymbol diff --git a/src/library/scala/xml/Unparsed.scala b/src/library/scala/xml/Unparsed.scala deleted file mode 100644 index bc190eb724..0000000000 --- a/src/library/scala/xml/Unparsed.scala +++ /dev/null @@ -1,36 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -/** An XML node for unparsed content. It will be output verbatim, all bets - * are off regarding wellformedness etc. - * - * @author Burak Emir - * @param data content in this node, may not be null. - */ -class Unparsed(data: String) extends Atom[String](data) { - - /** Returns text, with some characters escaped according to XML - * specification. - */ - override def buildString(sb: StringBuilder): StringBuilder = - sb append data -} - -/** This singleton object contains the `apply`and `unapply` methods for - * convenient construction and deconstruction. - * - * @author Burak Emir - * @version 1.0 - */ -object Unparsed { - def apply(data: String) = new Unparsed(data) - def unapply(x: Unparsed) = Some(x.data) -} diff --git a/src/library/scala/xml/UnprefixedAttribute.scala b/src/library/scala/xml/UnprefixedAttribute.scala deleted file mode 100644 index 6fa827da5f..0000000000 --- a/src/library/scala/xml/UnprefixedAttribute.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml - -/** Unprefixed attributes have the null namespace, and no prefix field - * - * @author Burak Emir - */ -class UnprefixedAttribute( - val key: String, - val value: Seq[Node], - next1: MetaData) -extends Attribute -{ - final val pre = null - val next = if (value ne null) next1 else next1.remove(key) - - /** same as this(key, Text(value), next), or no attribute if value is null */ - def this(key: String, value: String, next: MetaData) = - this(key, if (value ne null) Text(value) else null: NodeSeq, next) - - /** same as this(key, value.get, next), or no attribute if value is None */ - def this(key: String, value: Option[Seq[Node]], next: MetaData) = - this(key, value.orNull, next) - - /** returns a copy of this unprefixed attribute with the given next field*/ - def copy(next: MetaData) = new UnprefixedAttribute(key, value, next) - - final def getNamespace(owner: Node): String = null - - /** - * Gets value of unqualified (unprefixed) attribute with given key, null if not found - * - * @param key - * @return value as Seq[Node] if key is found, null otherwise - */ - def apply(key: String): Seq[Node] = - if (key == this.key) value else next(key) - - /** - * Forwards the call to next (because caller looks for prefixed attribute). - * - * @param namespace - * @param scope - * @param key - * @return .. - */ - def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] = - next(namespace, scope, key) -} -object UnprefixedAttribute { - def unapply(x: UnprefixedAttribute) = Some((x.key, x.value, x.next)) -} diff --git a/src/library/scala/xml/Utility.scala b/src/library/scala/xml/Utility.scala deleted file mode 100755 index 9134476401..0000000000 --- a/src/library/scala/xml/Utility.scala +++ /dev/null @@ -1,410 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import scala.collection.mutable -import parsing.XhtmlEntities -import scala.language.implicitConversions - -/** - * The `Utility` object provides utility functions for processing instances - * of bound and not bound XML classes, as well as escaping text nodes. - * - * @author Burak Emir - */ -object Utility extends AnyRef with parsing.TokenTests { - final val SU = '\u001A' - - // [Martin] This looks dubious. We don't convert StringBuilders to - // Strings anywhere else, why do it here? - implicit def implicitSbToString(sb: StringBuilder) = sb.toString() - - // helper for the extremely oft-repeated sequence of creating a - // StringBuilder, passing it around, and then grabbing its String. - private [xml] def sbToString(f: (StringBuilder) => Unit): String = { - val sb = new StringBuilder - f(sb) - sb.toString - } - private[xml] def isAtomAndNotText(x: Node) = x.isAtom && !x.isInstanceOf[Text] - - /** Trims an element - call this method, when you know that it is an - * element (and not a text node) so you know that it will not be trimmed - * away. With this assumption, the function can return a `Node`, rather - * than a `Seq[Node]`. If you don't know, call `trimProper` and account - * for the fact that you may get back an empty sequence of nodes. - * - * Precondition: node is not a text node (it might be trimmed) - */ - def trim(x: Node): Node = x match { - case Elem(pre, lab, md, scp, child@_*) => - Elem(pre, lab, md, scp, (child flatMap trimProper):_*) - } - - /** trim a child of an element. `Attribute` values and `Atom` nodes that - * are not `Text` nodes are unaffected. - */ - def trimProper(x:Node): Seq[Node] = x match { - case Elem(pre,lab,md,scp,child@_*) => - Elem(pre,lab,md,scp, (child flatMap trimProper):_*) - case Text(s) => - new TextBuffer().append(s).toText - case _ => - x - } - - /** returns a sorted attribute list */ - def sort(md: MetaData): MetaData = if((md eq Null) || (md.next eq Null)) md else { - val key = md.key - val smaller = sort(md.filter { m => m.key < key }) - val greater = sort(md.filter { m => m.key > key }) - smaller.foldRight (md copy greater) ((x, xs) => x copy xs) - } - - /** Return the node with its attribute list sorted alphabetically - * (prefixes are ignored) */ - def sort(n:Node): Node = n match { - case Elem(pre,lab,md,scp,child@_*) => - Elem(pre,lab,sort(md),scp, (child map sort):_*) - case _ => n - } - - /** - * Escapes the characters < > & and " from string. - */ - final def escape(text: String): String = sbToString(escape(text, _)) - - object Escapes { - /** For reasons unclear escape and unescape are a long ways from - * being logical inverses. */ - val pairs = Map( - "lt" -> '<', - "gt" -> '>', - "amp" -> '&', - "quot" -> '"' - // enigmatic comment explaining why this isn't escaped -- - // is valid xhtml but not html, and IE doesn't know it, says jweb - // "apos" -> '\'' - ) - val escMap = pairs map { case (s, c) => c-> ("&%s;" format s) } - val unescMap = pairs ++ Map("apos" -> '\'') - } - import Escapes.{ escMap, unescMap } - - /** - * Appends escaped string to `s`. - */ - final def escape(text: String, s: StringBuilder): StringBuilder = { - // Implemented per XML spec: - // http://www.w3.org/International/questions/qa-controls - // imperative code 3x-4x faster than current implementation - // dpp (David Pollak) 2010/02/03 - val len = text.length - var pos = 0 - while (pos < len) { - text.charAt(pos) match { - case '<' => s.append("<") - case '>' => s.append(">") - case '&' => s.append("&") - case '"' => s.append(""") - case '\n' => s.append('\n') - case '\r' => s.append('\r') - case '\t' => s.append('\t') - case c => if (c >= ' ') s.append(c) - } - - pos += 1 - } - s - } - - /** - * Appends unescaped string to `s`, `amp` becomes `&`, - * `lt` becomes `<` etc.. - * - * @return `'''null'''` if `ref` was not a predefined entity. - */ - final def unescape(ref: String, s: StringBuilder): StringBuilder = - ((unescMap get ref) map (s append _)).orNull - - /** - * Returns a set of all namespaces used in a sequence of nodes - * and all their descendants, including the empty namespaces. - */ - def collectNamespaces(nodes: Seq[Node]): mutable.Set[String] = - nodes.foldLeft(new mutable.HashSet[String]) { (set, x) => collectNamespaces(x, set) ; set } - - /** - * Adds all namespaces in node to set. - */ - def collectNamespaces(n: Node, set: mutable.Set[String]) { - if (n.doCollectNamespaces) { - set += n.namespace - for (a <- n.attributes) a match { - case _:PrefixedAttribute => - set += a.getNamespace(n) - case _ => - } - for (i <- n.child) - collectNamespaces(i, set) - } - } - - // def toXML( - // x: Node, - // pscope: NamespaceBinding = TopScope, - // sb: StringBuilder = new StringBuilder, - // stripComments: Boolean = false, - // decodeEntities: Boolean = true, - // preserveWhitespace: Boolean = false, - // minimizeTags: Boolean = false): String = - // { - // toXMLsb(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - // sb.toString() - // } - - /** - * Serialize the provided Node to the provided StringBuilder. - *

- * Note that calling this source-compatible method will result in the same old, arguably almost universally unwanted, - * behaviour. - */ - @deprecated("Please use `serialize` instead and specify a `minimizeTags` parameter", "2.10.0") - def toXML( - x: Node, - pscope: NamespaceBinding = TopScope, - sb: StringBuilder = new StringBuilder, - stripComments: Boolean = false, - decodeEntities: Boolean = true, - preserveWhitespace: Boolean = false, - minimizeTags: Boolean = false): StringBuilder = - { - serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, if (minimizeTags) MinimizeMode.Always else MinimizeMode.Never) - } - - /** - * Serialize an XML Node to a StringBuilder. - * - * This is essentially a minor rework of `toXML` that can't have the same name due to an unfortunate - * combination of named/default arguments and overloading. - * - * @todo use a Writer instead - */ - def serialize( - x: Node, - pscope: NamespaceBinding = TopScope, - sb: StringBuilder = new StringBuilder, - stripComments: Boolean = false, - decodeEntities: Boolean = true, - preserveWhitespace: Boolean = false, - minimizeTags: MinimizeMode.Value = MinimizeMode.Default): StringBuilder = - { - x match { - case c: Comment if !stripComments => c buildString sb - case s: SpecialNode => s buildString sb - case g: Group => for (c <- g.nodes) serialize(c, g.scope, sb, minimizeTags = minimizeTags) ; sb - case el: Elem => - // print tag with namespace declarations - sb.append('<') - el.nameToString(sb) - if (el.attributes ne null) el.attributes.buildString(sb) - el.scope.buildString(sb, pscope) - if (el.child.isEmpty && - (minimizeTags == MinimizeMode.Always || - (minimizeTags == MinimizeMode.Default && el.minimizeEmpty))) - { - // no children, so use short form: - sb.append("/>") - } else { - // children, so use long form: ... - sb.append('>') - sequenceToXML(el.child, el.scope, sb, stripComments) - sb.append("') - } - case _ => throw new IllegalArgumentException("Don't know how to serialize a " + x.getClass.getName) - } - } - - def sequenceToXML( - children: Seq[Node], - pscope: NamespaceBinding = TopScope, - sb: StringBuilder = new StringBuilder, - stripComments: Boolean = false, - decodeEntities: Boolean = true, - preserveWhitespace: Boolean = false, - minimizeTags: MinimizeMode.Value = MinimizeMode.Default): Unit = - { - if (children.isEmpty) return - else if (children forall isAtomAndNotText) { // add space - val it = children.iterator - val f = it.next() - serialize(f, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - while (it.hasNext) { - val x = it.next() - sb.append(' ') - serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - } - } - else children foreach { serialize(_, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) } - } - - /** - * Returns prefix of qualified name if any. - */ - final def prefix(name: String): Option[String] = (name indexOf ':') match { - case -1 => None - case i => Some(name.substring(0, i)) - } - - /** - * Returns a hashcode for the given constituents of a node - */ - def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) = - scala.util.hashing.MurmurHash3.orderedHash(label +: attribHashCode +: scpeHash +: children, pre.##) - - def appendQuoted(s: String): String = sbToString(appendQuoted(s, _)) - - /** - * Appends "s" if string `s` does not contain ", - * 's' otherwise. - */ - def appendQuoted(s: String, sb: StringBuilder) = { - val ch = if (s contains '"') '\'' else '"' - sb.append(ch).append(s).append(ch) - } - - /** - * Appends "s" and escapes and " i s with \" - */ - def appendEscapedQuoted(s: String, sb: StringBuilder): StringBuilder = { - sb.append('"') - for (c <- s) c match { - case '"' => sb.append('\\'); sb.append('"') - case _ => sb.append(c) - } - sb.append('"') - } - - def getName(s: String, index: Int): String = { - if (index >= s.length) null - else { - val xs = s drop index - if (xs.nonEmpty && isNameStart(xs.head)) xs takeWhile isNameChar - else "" - } - } - - /** - * Returns `'''null'''` if the value is a correct attribute value, - * error message if it isn't. - */ - def checkAttributeValue(value: String): String = { - var i = 0 - while (i < value.length) { - value.charAt(i) match { - case '<' => - return "< not allowed in attribute value" - case '&' => - val n = getName(value, i+1) - if (n eq null) - return "malformed entity reference in attribute value ["+value+"]" - i = i + n.length + 1 - if (i >= value.length || value.charAt(i) != ';') - return "malformed entity reference in attribute value ["+value+"]" - case _ => - } - i = i + 1 - } - null - } - - def parseAttributeValue(value: String): Seq[Node] = { - val sb = new StringBuilder - var rfb: StringBuilder = null - val nb = new NodeBuffer() - - val it = value.iterator - while (it.hasNext) { - var c = it.next() - // entity! flush buffer into text node - if (c == '&') { - c = it.next() - if (c == '#') { - c = it.next() - val theChar = parseCharRef ({ ()=> c },{ () => c = it.next() },{s => throw new RuntimeException(s)}, {s => throw new RuntimeException(s)}) - sb.append(theChar) - } - else { - if (rfb eq null) rfb = new StringBuilder() - rfb append c - c = it.next() - while (c != ';') { - rfb.append(c) - c = it.next() - } - val ref = rfb.toString() - rfb.clear() - unescape(ref,sb) match { - case null => - if (sb.length > 0) { // flush buffer - nb += Text(sb.toString()) - sb.clear() - } - nb += EntityRef(ref) // add entityref - case _ => - } - } - } - else sb append c - } - if (sb.length > 0) { // flush buffer - val x = Text(sb.toString()) - if (nb.length == 0) - return x - else - nb += x - } - nb - } - - /** - * {{{ - * CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" - * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" - * }}} - * See [66] - */ - def parseCharRef(ch: () => Char, nextch: () => Unit, reportSyntaxError: String => Unit, reportTruncatedError: String => Unit): String = { - val hex = (ch() == 'x') && { nextch(); true } - val base = if (hex) 16 else 10 - var i = 0 - while (ch() != ';') { - ch() match { - case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => - i = i * base + ch().asDigit - case 'a' | 'b' | 'c' | 'd' | 'e' | 'f' - | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' => - if (! hex) - reportSyntaxError("hex char not allowed in decimal char ref\n" + - "Did you mean to write &#x ?") - else - i = i * base + ch().asDigit - case SU => - reportTruncatedError("") - case _ => - reportSyntaxError("character '" + ch() + "' not allowed in char ref\n") - } - nextch() - } - new String(Array(i), 0, 1) - } -} diff --git a/src/library/scala/xml/XML.scala b/src/library/scala/xml/XML.scala deleted file mode 100755 index 020264e509..0000000000 --- a/src/library/scala/xml/XML.scala +++ /dev/null @@ -1,109 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml - -import parsing.NoBindingFactoryAdapter -import factory.XMLLoader -import java.io.{ File, FileDescriptor, FileInputStream, FileOutputStream } -import java.io.{ InputStream, Reader, StringReader, Writer } -import java.nio.channels.Channels -import scala.util.control.Exception.ultimately - -object Source { - def fromFile(file: File) = new InputSource(new FileInputStream(file)) - def fromFile(fd: FileDescriptor) = new InputSource(new FileInputStream(fd)) - def fromFile(name: String) = new InputSource(new FileInputStream(name)) - - def fromInputStream(is: InputStream) = new InputSource(is) - def fromReader(reader: Reader) = new InputSource(reader) - def fromSysId(sysID: String) = new InputSource(sysID) - def fromString(string: String) = fromReader(new StringReader(string)) -} - -/** - * Governs how empty elements (i.e. those without child elements) should be serialized. - */ -object MinimizeMode extends Enumeration { - /** Minimize empty tags if they were originally empty when parsed, or if they were constructed - * with [[scala.xml.Elem]]`#minimizeEmpty` == true - */ - val Default = Value - - /** Always minimize empty tags. Note that this may be problematic for XHTML, in which - * case [[scala.xml.Xhtml]]`#toXhtml` should be used instead. - */ - val Always = Value - - /** Never minimize empty tags. - */ - val Never = Value -} - -/** The object `XML` provides constants, and functions to load - * and save XML elements. Use this when data binding is not desired, i.e. - * when XML is handled using `Symbol` nodes. - * - * @author Burak Emir - * @version 1.0, 25/04/2005 - */ -object XML extends XMLLoader[Elem] { - val xml = "xml" - val xmlns = "xmlns" - val namespace = "http://www.w3.org/XML/1998/namespace" - val preserve = "preserve" - val space = "space" - val lang = "lang" - val encoding = "ISO-8859-1" - - /** Returns an XMLLoader whose load* methods will use the supplied SAXParser. */ - def withSAXParser(p: SAXParser): XMLLoader[Elem] = - new XMLLoader[Elem] { override val parser: SAXParser = p } - - /** Saves a node to a file with given filename using given encoding - * optionally with xmldecl and doctype declaration. - * - * @param filename the filename - * @param node the xml node we want to write - * @param enc encoding to use - * @param xmlDecl if true, write xml declaration - * @param doctype if not null, write doctype declaration - */ - final def save( - filename: String, - node: Node, - enc: String = encoding, - xmlDecl: Boolean = false, - doctype: dtd.DocType = null - ): Unit = - { - val fos = new FileOutputStream(filename) - val w = Channels.newWriter(fos.getChannel(), enc) - - ultimately(w.close())( - write(w, node, enc, xmlDecl, doctype) - ) - } - - /** Writes the given node using writer, optionally with xml decl and doctype. - * It's the caller's responsibility to close the writer. - * - * @param w the writer - * @param node the xml node we want to write - * @param enc the string to be used in `xmlDecl` - * @param xmlDecl if true, write xml declaration - * @param doctype if not null, write doctype declaration - */ - final def write(w: java.io.Writer, node: Node, enc: String, xmlDecl: Boolean, doctype: dtd.DocType, minimizeTags: MinimizeMode.Value = MinimizeMode.Default) { - /* TODO: optimize by giving writer parameter to toXML*/ - if (xmlDecl) w.write("\n") - if (doctype ne null) w.write( doctype.toString() + "\n") - w.write(Utility.serialize(node, minimizeTags = minimizeTags).toString) - } -} diff --git a/src/library/scala/xml/Xhtml.scala b/src/library/scala/xml/Xhtml.scala deleted file mode 100644 index 6a12c1a89a..0000000000 --- a/src/library/scala/xml/Xhtml.scala +++ /dev/null @@ -1,97 +0,0 @@ - -package scala -package xml - -import parsing.XhtmlEntities -import Utility.{ sbToString, isAtomAndNotText } - -/* (c) David Pollak 2007 WorldWide Conferencing, LLC */ - -object Xhtml -{ - /** - * Convenience function: same as toXhtml(node, false, false) - * - * @param node the node - */ - def toXhtml(node: Node): String = sbToString(sb => toXhtml(x = node, sb = sb)) - - /** - * Convenience function: amounts to calling toXhtml(node) on each - * node in the sequence. - * - * @param nodeSeq the node sequence - */ - def toXhtml(nodeSeq: NodeSeq): String = sbToString(sb => sequenceToXML(nodeSeq: Seq[Node], sb = sb)) - - /** Elements which we believe are safe to minimize if minimizeTags is true. - * See http://www.w3.org/TR/xhtml1/guidelines.html#C_3 - */ - private val minimizableElements = - List("base", "meta", "link", "hr", "br", "param", "img", "area", "input", "col") - - def toXhtml( - x: Node, - pscope: NamespaceBinding = TopScope, - sb: StringBuilder = new StringBuilder, - stripComments: Boolean = false, - decodeEntities: Boolean = false, - preserveWhitespace: Boolean = false, - minimizeTags: Boolean = true): Unit = - { - def decode(er: EntityRef) = XhtmlEntities.entMap.get(er.entityName) match { - case Some(chr) if chr.toInt >= 128 => sb.append(chr) - case _ => er.buildString(sb) - } - def shortForm = - minimizeTags && - (x.child == null || x.child.length == 0) && - (minimizableElements contains x.label) - - x match { - case c: Comment => if (!stripComments) c buildString sb - case er: EntityRef if decodeEntities => decode(er) - case x: SpecialNode => x buildString sb - case g: Group => - g.nodes foreach { toXhtml(_, x.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) } - - case _ => - sb.append('<') - x.nameToString(sb) - if (x.attributes ne null) x.attributes.buildString(sb) - x.scope.buildString(sb, pscope) - - if (shortForm) sb.append(" />") - else { - sb.append('>') - sequenceToXML(x.child, x.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - sb.append("') - } - } - } - - /** - * Amounts to calling toXhtml(node, ...) with the given parameters on each node. - */ - def sequenceToXML( - children: Seq[Node], - pscope: NamespaceBinding = TopScope, - sb: StringBuilder = new StringBuilder, - stripComments: Boolean = false, - decodeEntities: Boolean = false, - preserveWhitespace: Boolean = false, - minimizeTags: Boolean = true): Unit = - { - if (children.isEmpty) - return - - val doSpaces = children forall isAtomAndNotText // interleave spaces - for (c <- children.take(children.length - 1)) { - toXhtml(c, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - if (doSpaces) sb append ' ' - } - toXhtml(children.last, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - } -} diff --git a/src/library/scala/xml/dtd/ContentModel.scala b/src/library/scala/xml/dtd/ContentModel.scala deleted file mode 100644 index 4007985dce..0000000000 --- a/src/library/scala/xml/dtd/ContentModel.scala +++ /dev/null @@ -1,118 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package dtd - -import scala.xml.dtd.impl._ -import scala.xml.Utility.sbToString -import PartialFunction._ - -object ContentModel extends WordExp { - type _labelT = ElemName - type _regexpT = RegExp - - object Translator extends WordBerrySethi { - override val lang: ContentModel.this.type = ContentModel.this - } - - case class ElemName(name: String) extends Label { - override def toString() = """ElemName("%s")""" format name - } - - def isMixed(cm: ContentModel) = cond(cm) { case _: MIXED => true } - def containsText(cm: ContentModel) = (cm == PCDATA) || isMixed(cm) - def parse(s: String): ContentModel = ContentModelParser.parse(s) - - def getLabels(r: RegExp): Set[String] = { - def traverse(r: RegExp): Set[String] = r match { // !!! check for match translation problem - case Letter(ElemName(name)) => Set(name) - case Star( x @ _ ) => traverse( x ) // bug if x@_* - case Sequ( xs @ _* ) => Set(xs flatMap traverse: _*) - case Alt( xs @ _* ) => Set(xs flatMap traverse: _*) - } - - traverse(r) - } - - def buildString(r: RegExp): String = sbToString(buildString(r, _)) - - /* precond: rs.length >= 1 */ - private def buildString(rs: Seq[RegExp], sb: StringBuilder, sep: Char) { - buildString(rs.head, sb) - for (z <- rs.tail) { - sb append sep - buildString(z, sb) - } - } - - def buildString(c: ContentModel, sb: StringBuilder): StringBuilder = c match { - case ANY => sb append "ANY" - case EMPTY => sb append "EMPTY" - case PCDATA => sb append "(#PCDATA)" - case ELEMENTS(_) | MIXED(_) => c buildString sb - } - - def buildString(r: RegExp, sb: StringBuilder): StringBuilder = - r match { // !!! check for match translation problem - case Eps => - sb - case Sequ(rs @ _*) => - sb.append( '(' ); buildString(rs, sb, ','); sb.append( ')' ) - case Alt(rs @ _*) => - sb.append( '(' ); buildString(rs, sb, '|'); sb.append( ')' ) - case Star(r: RegExp) => - sb.append( '(' ); buildString(r, sb); sb.append( ")*" ) - case Letter(ElemName(name)) => - sb.append(name) - } - -} - -sealed abstract class ContentModel -{ - override def toString(): String = sbToString(buildString) - def buildString(sb: StringBuilder): StringBuilder -} - -case object PCDATA extends ContentModel { - override def buildString(sb: StringBuilder): StringBuilder = sb.append("(#PCDATA)") -} -case object EMPTY extends ContentModel { - override def buildString(sb: StringBuilder): StringBuilder = sb.append("EMPTY") -} -case object ANY extends ContentModel { - override def buildString(sb: StringBuilder): StringBuilder = sb.append("ANY") -} -sealed abstract class DFAContentModel extends ContentModel { - import ContentModel.{ ElemName, Translator } - def r: ContentModel.RegExp - - lazy val dfa: DetWordAutom[ElemName] = { - val nfa = Translator.automatonFrom(r, 1) - new SubsetConstruction(nfa).determinize - } -} - -case class MIXED(r: ContentModel.RegExp) extends DFAContentModel { - import ContentModel.{ Alt, RegExp } - - override def buildString(sb: StringBuilder): StringBuilder = { - val newAlt = r match { case Alt(rs @ _*) => Alt(rs drop 1: _*) } - - sb append "(#PCDATA|" - ContentModel.buildString(newAlt: RegExp, sb) - sb append ")*" - } -} - -case class ELEMENTS(r: ContentModel.RegExp) extends DFAContentModel { - override def buildString(sb: StringBuilder): StringBuilder = - ContentModel.buildString(r, sb) -} diff --git a/src/library/scala/xml/dtd/ContentModelParser.scala b/src/library/scala/xml/dtd/ContentModelParser.scala deleted file mode 100644 index 71b391c422..0000000000 --- a/src/library/scala/xml/dtd/ContentModelParser.scala +++ /dev/null @@ -1,129 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package dtd - -/** Parser for regexps (content models in DTD element declarations) */ - -object ContentModelParser extends Scanner { // a bit too permissive concerning #PCDATA - import ContentModel._ - - /** parses the argument to a regexp */ - def parse(s: String): ContentModel = { initScanner(s); contentspec } - - def accept(tok: Int) = { - if (token != tok) { - if ((tok == STAR) && (token == END)) // common mistake - scala.sys.error("in DTDs, \n"+ - "mixed content models must be like (#PCDATA|Name|Name|...)*") - else - scala.sys.error("expected "+token2string(tok)+ - ", got unexpected token:"+token2string(token)) - } - nextToken() - } - - // s [ '+' | '*' | '?' ] - def maybeSuffix(s: RegExp) = token match { - case STAR => nextToken(); Star(s) - case PLUS => nextToken(); Sequ(s, Star(s)) - case OPT => nextToken(); Alt(Eps, s) - case _ => s - } - - // contentspec ::= EMPTY | ANY | (#PCDATA) | "(#PCDATA|"regexp) - - def contentspec: ContentModel = token match { - - case NAME => value match { - case "ANY" => ANY - case "EMPTY" => EMPTY - case _ => scala.sys.error("expected ANY, EMPTY or '(' instead of " + value ) - } - case LPAREN => - - nextToken() - sOpt() - if (token != TOKEN_PCDATA) - ELEMENTS(regexp) - else { - nextToken() - token match { - case RPAREN => - PCDATA - case CHOICE => - val res = MIXED(choiceRest(Eps)) - sOpt() - accept( RPAREN ) - accept( STAR ) - res - case _ => - scala.sys.error("unexpected token:" + token2string(token) ) - } - } - - case _ => - scala.sys.error("unexpected token:" + token2string(token) ) - } - // sopt ::= S? - def sOpt() = if( token == S ) nextToken() - - // (' S? mixed ::= '#PCDATA' S? ')' - // | '#PCDATA' (S? '|' S? atom)* S? ')*' - - // '(' S? regexp ::= cp S? [seqRest|choiceRest] ')' [ '+' | '*' | '?' ] - def regexp: RegExp = { - val p = particle - sOpt() - maybeSuffix(token match { - case RPAREN => nextToken(); p - case CHOICE => val q = choiceRest( p );accept( RPAREN ); q - case COMMA => val q = seqRest( p ); accept( RPAREN ); q - }) - } - - // seqRest ::= (',' S? cp S?)+ - def seqRest(p: RegExp) = { - var k = List(p) - while( token == COMMA ) { - nextToken() - sOpt() - k = particle::k - sOpt() - } - Sequ( k.reverse:_* ) - } - - // choiceRest ::= ('|' S? cp S?)+ - def choiceRest( p:RegExp ) = { - var k = List( p ) - while( token == CHOICE ) { - nextToken() - sOpt() - k = particle::k - sOpt() - } - Alt( k.reverse:_* ) - } - - // particle ::= '(' S? regexp - // | name [ '+' | '*' | '?' ] - def particle = token match { - case LPAREN => nextToken(); sOpt(); regexp - case NAME => val a = Letter(ElemName(value)); nextToken(); maybeSuffix(a) - case _ => scala.sys.error("expected '(' or Name, got:"+token2string(token)) - } - - // atom ::= name - def atom = token match { - case NAME => val a = Letter(ElemName(value)); nextToken(); a - case _ => scala.sys.error("expected Name, got:"+token2string(token)) - } -} diff --git a/src/library/scala/xml/dtd/DTD.scala b/src/library/scala/xml/dtd/DTD.scala deleted file mode 100644 index 16a824fe2c..0000000000 --- a/src/library/scala/xml/dtd/DTD.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package dtd - -import scala.collection.mutable - -/** A document type declaration. - * - * @author Burak Emir - */ -abstract class DTD { - var externalID: ExternalID = null - var decls: List[Decl] = Nil - def notations: Seq[NotationDecl] = Nil - def unparsedEntities: Seq[EntityDecl] = Nil - - var elem: mutable.Map[String, ElemDecl] = new mutable.HashMap[String, ElemDecl]() - var attr: mutable.Map[String, AttListDecl] = new mutable.HashMap[String, AttListDecl]() - var ent: mutable.Map[String, EntityDecl] = new mutable.HashMap[String, EntityDecl]() - - override def toString() = - "DTD [\n%s%s]".format( - Option(externalID) getOrElse "", - decls.mkString("", "\n", "\n") - ) -} diff --git a/src/library/scala/xml/dtd/Decl.scala b/src/library/scala/xml/dtd/Decl.scala deleted file mode 100644 index 8bf859c460..0000000000 --- a/src/library/scala/xml/dtd/Decl.scala +++ /dev/null @@ -1,157 +0,0 @@ -/* __ *\ - ** ________ ___ / / ___ Scala API ** - ** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** - ** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** - ** /____/\___/_/ |_/____/_/ | | ** - ** |/ ** - \* */ - -package scala -package xml -package dtd - -import Utility.sbToString - -sealed abstract class Decl - -sealed abstract class MarkupDecl extends Decl { - def buildString(sb: StringBuilder): StringBuilder -} - -/** an element declaration - */ -case class ElemDecl(name: String, contentModel: ContentModel) -extends MarkupDecl { - override def buildString(sb: StringBuilder): StringBuilder = { - sb append "' - } -} - -case class AttListDecl(name: String, attrs:List[AttrDecl]) -extends MarkupDecl { - override def buildString(sb: StringBuilder): StringBuilder = { - sb append "") - } -} - -/** an attribute declaration. at this point, the tpe is a string. Future - * versions might provide a way to access the attribute types more - * directly. - */ -case class AttrDecl(name: String, tpe: String, default: DefaultDecl) { - override def toString(): String = sbToString(buildString) - - def buildString(sb: StringBuilder): StringBuilder = { - sb append " " append name append ' ' append tpe append ' ' - default buildString sb - } - -} - -/** an entity declaration */ -sealed abstract class EntityDecl extends MarkupDecl - -/** a parsed general entity declaration */ -case class ParsedEntityDecl(name: String, entdef: EntityDef) extends EntityDecl { - override def buildString(sb: StringBuilder): StringBuilder = { - sb append "' - } -} - -/** a parameter entity declaration */ -case class ParameterEntityDecl(name: String, entdef: EntityDef) extends EntityDecl { - override def buildString(sb: StringBuilder): StringBuilder = { - sb append "' - } -} - -/** an unparsed entity declaration */ -case class UnparsedEntityDecl( name:String, extID:ExternalID, notation:String ) extends EntityDecl { - override def buildString(sb: StringBuilder): StringBuilder = { - sb append "' - } -} -/** a notation declaration */ -case class NotationDecl( name:String, extID:ExternalID ) extends MarkupDecl { - override def buildString(sb: StringBuilder): StringBuilder = { - sb append "" */ - final override def toString() = { - def intString = - if (intSubset.isEmpty) "" - else intSubset.mkString("[", "", "]") - - """""".format(name, extID.toString(), intString) - } -} - -object DocType { - /** Creates a doctype with no external id, nor internal subset declarations. */ - def apply(name: String): DocType = apply(name, NoExternalID, Nil) -} diff --git a/src/library/scala/xml/dtd/ElementValidator.scala b/src/library/scala/xml/dtd/ElementValidator.scala deleted file mode 100644 index 4830769a7d..0000000000 --- a/src/library/scala/xml/dtd/ElementValidator.scala +++ /dev/null @@ -1,132 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package dtd - -import PartialFunction._ -import scala.collection.mutable - -import ContentModel.ElemName -import MakeValidationException._ // @todo other exceptions - -import impl._ - -/** validate children and/or attributes of an element - * exceptions are created but not thrown. - */ -class ElementValidator() extends Function1[Node,Boolean] { - - private var exc: List[ValidationException] = Nil - - protected var contentModel: ContentModel = _ - protected var dfa: DetWordAutom[ElemName] = _ - protected var adecls: List[AttrDecl] = _ - - /** set content model, enabling element validation */ - def setContentModel(cm: ContentModel) = { - contentModel = cm - cm match { - case ELEMENTS(r) => - val nfa = ContentModel.Translator.automatonFrom(r, 1) - dfa = new SubsetConstruction(nfa).determinize - case _ => - dfa = null - } - } - - def getContentModel = contentModel - - /** set meta data, enabling attribute validation */ - def setMetaData(adecls: List[AttrDecl]) { this.adecls = adecls } - - def getIterable(nodes: Seq[Node], skipPCDATA: Boolean): Iterable[ElemName] = { - def isAllWhitespace(a: Atom[_]) = cond(a.data) { case s: String if s.trim == "" => true } - - nodes.filter { - case y: SpecialNode => y match { - case a: Atom[_] if isAllWhitespace(a) => false // always skip all-whitespace nodes - case _ => !skipPCDATA - } - case x => x.namespace eq null - } . map (x => ElemName(x.label)) - } - - /** check attributes, return true if md corresponds to attribute declarations in adecls. - */ - def check(md: MetaData): Boolean = { - val len: Int = exc.length - val ok = new mutable.BitSet(adecls.length) - - for (attr <- md) { - def attrStr = attr.value.toString - def find(Key: String): Option[AttrDecl] = { - adecls.zipWithIndex find { - case (a @ AttrDecl(Key, _, _), j) => ok += j ; return Some(a) - case _ => false - } - None - } - - find(attr.key) match { - case None => - exc ::= fromUndefinedAttribute(attr.key) - - case Some(AttrDecl(_, tpe, DEFAULT(true, fixedValue))) if attrStr != fixedValue => - exc ::= fromFixedAttribute(attr.key, fixedValue, attrStr) - - case _ => - } - } - - adecls.zipWithIndex foreach { - case (AttrDecl(key, tpe, REQUIRED), j) if !ok(j) => exc ::= fromMissingAttribute(key, tpe) - case _ => - } - - exc.length == len //- true if no new exception - } - - /** check children, return true if conform to content model - * @note contentModel != null - */ - def check(nodes: Seq[Node]): Boolean = contentModel match { - case ANY => true - case EMPTY => getIterable(nodes, skipPCDATA = false).isEmpty - case PCDATA => getIterable(nodes, skipPCDATA = true).isEmpty - case MIXED(ContentModel.Alt(branches @ _*)) => // @todo - val j = exc.length - def find(Key: String): Boolean = - branches exists { case ContentModel.Letter(ElemName(Key)) => true ; case _ => false } - - getIterable(nodes, skipPCDATA = true) map (_.name) filterNot find foreach { - exc ::= MakeValidationException fromUndefinedElement _ - } - (exc.length == j) // - true if no new exception - - case _: ELEMENTS => - dfa isFinal { - getIterable(nodes, skipPCDATA = false).foldLeft(0) { (q, e) => - (dfa delta q).getOrElse(e, throw ValidationException("element %s not allowed here" format e)) - } - } - case _ => false - } - - /** applies various validations - accumulates error messages in exc - * @todo fail on first error, ignore other errors (rearranging conditions) - */ - def apply(n: Node): Boolean = - //- ? check children - ((contentModel == null) || check(n.child)) && - //- ? check attributes - ((adecls == null) || check(n.attributes)) -} diff --git a/src/library/scala/xml/dtd/ExternalID.scala b/src/library/scala/xml/dtd/ExternalID.scala deleted file mode 100644 index 880633d860..0000000000 --- a/src/library/scala/xml/dtd/ExternalID.scala +++ /dev/null @@ -1,86 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package dtd - -/** an ExternalIDs - either PublicID or SystemID - * - * @author Burak Emir - */ -sealed abstract class ExternalID extends parsing.TokenTests { - def quoted(s: String) = { - val c = if (s contains '"') '\'' else '"' - c + s + c - } - - // public != null: PUBLIC " " publicLiteral " " [systemLiteral] - // public == null: SYSTEM " " systemLiteral - override def toString(): String = { - lazy val quotedSystemLiteral = quoted(systemId) - lazy val quotedPublicLiteral = quoted(publicId) - - if (publicId == null) "SYSTEM " + quotedSystemLiteral - else "PUBLIC " + quotedPublicLiteral + - (if (systemId == null) "" else " " + quotedSystemLiteral) - } - def buildString(sb: StringBuilder): StringBuilder = - sb.append(this.toString()) - - def systemId: String - def publicId: String -} - -/** a system identifier - * - * @author Burak Emir - * @param systemId the system identifier literal - */ -case class SystemID(systemId: String) extends ExternalID { - val publicId = null - - if (!checkSysID(systemId)) - throw new IllegalArgumentException("can't use both \" and ' in systemId") -} - - -/** a public identifier (see http://www.w3.org/QA/2002/04/valid-dtd-list.html). - * - * @author Burak Emir - * @param publicId the public identifier literal - * @param systemId (can be null for notation pubIDs) the system identifier literal - */ -case class PublicID(publicId: String, systemId: String) extends ExternalID { - if (!checkPubID(publicId)) - throw new IllegalArgumentException("publicId must consist of PubidChars") - - if (systemId != null && !checkSysID(systemId)) - throw new IllegalArgumentException("can't use both \" and ' in systemId") - - /** the constant "#PI" */ - def label = "#PI" - - /** always empty */ - def attribute = Node.NoAttributes - - /** always empty */ - def child = Nil -} - -/** A marker used when a `DocType` contains no external id. - * - * @author Michael Bayne - */ -object NoExternalID extends ExternalID { - val publicId = null - val systemId = null - - override def toString = "" -} diff --git a/src/library/scala/xml/dtd/Scanner.scala b/src/library/scala/xml/dtd/Scanner.scala deleted file mode 100644 index 5f9d1ccaed..0000000000 --- a/src/library/scala/xml/dtd/Scanner.scala +++ /dev/null @@ -1,79 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package dtd - -/** Scanner for regexps (content models in DTD element declarations) - * todo: cleanup - */ -class Scanner extends Tokens with parsing.TokenTests { - - final val ENDCH = '\u0000' - - var token:Int = END - var value:String = _ - - private var it: Iterator[Char] = null - private var c: Char = 'z' - - /** initializes the scanner on input s */ - final def initScanner(s: String) { - value = "" - it = (s).iterator - token = 1+END - next() - nextToken() - } - - /** scans the next token */ - final def nextToken() { - if (token != END) token = readToken - } - - // todo: see XML specification... probably isLetter,isDigit is fine - final def isIdentChar = ( ('a' <= c && c <= 'z') - || ('A' <= c && c <= 'Z')) - - final def next() = if (it.hasNext) c = it.next() else c = ENDCH - - final def acc(d: Char) { - if (c == d) next() else scala.sys.error("expected '"+d+"' found '"+c+"' !") - } - - final def accS(ds: Seq[Char]) { ds foreach acc } - - final def readToken: Int = - if (isSpace(c)) { - while (isSpace(c)) c = it.next() - S - } else c match { - case '(' => next(); LPAREN - case ')' => next(); RPAREN - case ',' => next(); COMMA - case '*' => next(); STAR - case '+' => next(); PLUS - case '?' => next(); OPT - case '|' => next(); CHOICE - case '#' => next(); accS( "PCDATA" ); TOKEN_PCDATA - case ENDCH => END - case _ => - if (isNameStart(c)) name; // NAME - else scala.sys.error("unexpected character:" + c) - } - - final def name = { - val sb = new StringBuilder() - do { sb.append(c); next() } while (isNameChar(c)) - value = sb.toString() - NAME - } - -} diff --git a/src/library/scala/xml/dtd/Tokens.scala b/src/library/scala/xml/dtd/Tokens.scala deleted file mode 100644 index 07e888e77a..0000000000 --- a/src/library/scala/xml/dtd/Tokens.scala +++ /dev/null @@ -1,45 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package dtd - - -class Tokens { - - // Tokens - - final val TOKEN_PCDATA = 0 - final val NAME = 1 - final val LPAREN = 3 - final val RPAREN = 4 - final val COMMA = 5 - final val STAR = 6 - final val PLUS = 7 - final val OPT = 8 - final val CHOICE = 9 - final val END = 10 - final val S = 13 - - final def token2string(i: Int): String = i match { - case 0 => "#PCDATA" - case 1 => "NAME" - case 3 => "(" - case 4 => ")" - case 5 => "," - case 6 => "*" - case 7 => "+" - case 8 => "?" - case 9 => "|" - case 10 => "END" - case 13 => " " - } -} diff --git a/src/library/scala/xml/dtd/ValidationException.scala b/src/library/scala/xml/dtd/ValidationException.scala deleted file mode 100644 index 1bfae55286..0000000000 --- a/src/library/scala/xml/dtd/ValidationException.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package dtd - - -case class ValidationException(e: String) extends Exception(e) - -/** - * @author Burak Emir - */ -object MakeValidationException { - def fromFixedAttribute(k: String, value: String, actual: String) = - ValidationException("value of attribute " + k + " FIXED to \""+ - value+"\", but document tries \""+actual+"\"") - - def fromNonEmptyElement() = - new ValidationException("element should be *empty*") - - def fromUndefinedElement(label: String) = - new ValidationException("element \""+ label +"\" not allowed here") - - def fromUndefinedAttribute(key: String) = - new ValidationException("attribute " + key +" not allowed here") - - def fromMissingAttribute(allKeys: Set[String]) = { - val sb = new StringBuilder("missing value for REQUIRED attribute") - if (allKeys.size > 1) sb.append('s') - allKeys foreach (k => sb append "'%s'".format(k)) - new ValidationException(sb.toString()) - } - - def fromMissingAttribute(key: String, tpe: String) = - new ValidationException("missing value for REQUIRED attribute %s of type %s".format(key, tpe)) -} diff --git a/src/library/scala/xml/dtd/impl/Base.scala b/src/library/scala/xml/dtd/impl/Base.scala deleted file mode 100644 index 91ff03a93a..0000000000 --- a/src/library/scala/xml/dtd/impl/Base.scala +++ /dev/null @@ -1,67 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml.dtd.impl - -/** Basic regular expressions. - * - * @author Burak Emir - * @version 1.0 - */ - -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class Base { - type _regexpT <: RegExp - - abstract class RegExp { - val isNullable: Boolean - } - - object Alt { - /** `Alt( R,R,R* )`. */ - def apply(rs: _regexpT*) = - if (rs.size < 2) throw new SyntaxError("need at least 2 branches in Alt") - else new Alt(rs: _*) - // Can't enforce that statically without changing the interface - // def apply(r1: _regexpT, r2: _regexpT, rs: _regexpT*) = new Alt(Seq(r1, r2) ++ rs: _*) - def unapplySeq(x: Alt) = Some(x.rs) - } - - class Alt private (val rs: _regexpT*) extends RegExp { - final val isNullable = rs exists (_.isNullable) - } - - object Sequ { - /** Sequ( R,R* ) */ - def apply(rs: _regexpT*) = if (rs.isEmpty) Eps else new Sequ(rs: _*) - def unapplySeq(x: Sequ) = Some(x.rs) - } - - class Sequ private (val rs: _regexpT*) extends RegExp { - final val isNullable = rs forall (_.isNullable) - } - - case class Star(r: _regexpT) extends RegExp { - final lazy val isNullable = true - } - - // The empty Sequ. - case object Eps extends RegExp { - final lazy val isNullable = true - override def toString() = "Eps" - } - - /** this class can be used to add meta information to regexps. */ - class Meta(r1: _regexpT) extends RegExp { - final val isNullable = r1.isNullable - def r = r1 - } -} diff --git a/src/library/scala/xml/dtd/impl/BaseBerrySethi.scala b/src/library/scala/xml/dtd/impl/BaseBerrySethi.scala deleted file mode 100644 index f30309b037..0000000000 --- a/src/library/scala/xml/dtd/impl/BaseBerrySethi.scala +++ /dev/null @@ -1,98 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml.dtd.impl - -import scala.collection.{ mutable, immutable } - -// todo: replace global variable pos with acc - -/** This class turns a regular expression over `A` into a - * [[scala.util.automata.NondetWordAutom]] over `A` using the celebrated - * position automata construction (also called ''Berry-Sethi'' or ''Glushkov''). - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class BaseBerrySethi { - val lang: Base - import lang.{ Alt, Eps, Meta, RegExp, Sequ, Star } - - protected var pos = 0 - - // results which hold all info for the NondetWordAutomaton - protected var follow: mutable.HashMap[Int, Set[Int]] = _ - - protected var finalTag: Int = _ - - protected var finals: immutable.Map[Int, Int] = _ // final states - - // constants -------------------------- - - final val emptySet: Set[Int] = Set() - - private def doComp(r: RegExp, compFunction: RegExp => Set[Int]) = r match { - case x: Alt => (x.rs map compFirst).foldLeft(emptySet)(_ ++ _) - case Eps => emptySet - case x: Meta => compFunction(x.r) - case x: Sequ => - val (l1, l2) = x.rs span (_.isNullable) - ((l1 ++ (l2 take 1)) map compFunction).foldLeft(emptySet)(_ ++ _) - case Star(t) => compFunction(t) - case _ => throw new IllegalArgumentException("unexpected pattern " + r.getClass) - } - - /** Computes `first(r)` for the word regexp `r`. */ - protected def compFirst(r: RegExp): Set[Int] = doComp(r, compFirst) - - /** Computes `last(r)` for the regexp `r`. */ - protected def compLast(r: RegExp): Set[Int] = doComp(r, compLast) - - /** Starts from the right-to-left - * precondition: pos is final - * pats are successor patterns of a Sequence node - */ - protected def compFollow(rs: Seq[RegExp]): Set[Int] = { - follow(0) = - if (rs.isEmpty) emptySet - else rs.foldRight(Set(pos))((p, fol) => { - val first = compFollow1(fol, p) - - if (p.isNullable) fol ++ first - else first - }) - - follow(0) - } - - /** Returns the first set of an expression, setting the follow set along the way. - */ - protected def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match { - case x: Alt => Set((x.rs reverseMap (compFollow1(fol1, _))).flatten: _*) - case x: Meta => compFollow1(fol1, x.r) - case x: Star => compFollow1(fol1 ++ compFirst(x.r), x.r) - case x: Sequ => - x.rs.foldRight(fol1) { (p, fol) => - val first = compFollow1(fol, p) - - if (p.isNullable) fol ++ first - else first - } - case _ => throw new IllegalArgumentException("unexpected pattern: " + r.getClass) - } - - /** Returns the "Sethi-length" of a pattern, creating the set of position along the way. - */ - protected def traverse(r: RegExp): Unit = r match { - // (is tree automaton stuff, more than Berry-Sethi) - case x: Alt => x.rs foreach traverse - case x: Sequ => x.rs foreach traverse - case x: Meta => traverse(x.r) - case Star(t) => traverse(t) - case _ => throw new IllegalArgumentException("unexp pattern " + r.getClass) - } -} diff --git a/src/library/scala/xml/dtd/impl/DetWordAutom.scala b/src/library/scala/xml/dtd/impl/DetWordAutom.scala deleted file mode 100644 index 6f8ba4de72..0000000000 --- a/src/library/scala/xml/dtd/impl/DetWordAutom.scala +++ /dev/null @@ -1,50 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml.dtd.impl - -import scala.collection.{ mutable, immutable } - -/** A deterministic automaton. States are integers, where - * 0 is always the only initial state. Transitions are represented - * in the delta function. A default transitions is one that - * is taken when no other transition can be taken. - * All states are reachable. Accepting states are those for which - * the partial function 'finals' is defined. - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class DetWordAutom[T <: AnyRef] { - val nstates: Int - val finals: Array[Int] - val delta: Array[mutable.Map[T, Int]] - val default: Array[Int] - - def isFinal(q: Int) = finals(q) != 0 - def isSink(q: Int) = delta(q).isEmpty && default(q) == q - def next(q: Int, label: T) = delta(q).getOrElse(label, default(q)) - - override def toString() = { - val sb = new StringBuilder("[DetWordAutom nstates=") - sb.append(nstates) - sb.append(" finals=") - val map = Map(finals.zipWithIndex map (_.swap): _*) - sb.append(map.toString()) - sb.append(" delta=\n") - - for (i <- 0 until nstates) { - sb append "%d->%s\n".format(i, delta(i)) - if (i < default.length) - sb append "_>%s\n".format(default(i)) - } - sb.toString - } -} diff --git a/src/library/scala/xml/dtd/impl/Inclusion.scala b/src/library/scala/xml/dtd/impl/Inclusion.scala deleted file mode 100644 index 07b6afaeba..0000000000 --- a/src/library/scala/xml/dtd/impl/Inclusion.scala +++ /dev/null @@ -1,70 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml.dtd.impl - - -/** A fast test of language inclusion between minimal automata. - * inspired by the ''AMoRE automata library''. - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] trait Inclusion[A <: AnyRef] { - - val labels: Seq[A] - - /** Returns true if `dfa1` is included in `dfa2`. - */ - def inclusion(dfa1: DetWordAutom[A], dfa2: DetWordAutom[A]) = { - - def encode(q1: Int, q2: Int) = 1 + q1 + q2 * dfa1.nstates - def decode2(c: Int) = (c-1) / (dfa1.nstates) //integer division - def decode1(c: Int) = (c-1) % (dfa1.nstates) - - var q1 = 0 //dfa1.initstate; // == 0 - var q2 = 0 //dfa2.initstate; // == 0 - - val max = 1 + dfa1.nstates * dfa2.nstates - val mark = new Array[Int](max) - - var result = true - var current = encode(q1, q2) - var last = current - mark(last) = max // mark (q1,q2) - while (current != 0 && result) { - //Console.println("current = [["+q1+" "+q2+"]] = "+current); - for (letter <- labels) { - val r1 = dfa1.next(q1,letter) - val r2 = dfa2.next(q2,letter) - if (dfa1.isFinal(r1) && !dfa2.isFinal(r2)) - result = false - val test = encode(r1, r2) - //Console.println("test = [["+r1+" "+r2+"]] = "+test); - if (mark(test) == 0) { - mark(last) = test - mark(test) = max - last = test - } - } - val ncurrent = mark(current) - if( ncurrent != max ) { - q1 = decode1(ncurrent) - q2 = decode2(ncurrent) - current = ncurrent - } else { - current = 0 - } - } - result - } -} diff --git a/src/library/scala/xml/dtd/impl/NondetWordAutom.scala b/src/library/scala/xml/dtd/impl/NondetWordAutom.scala deleted file mode 100644 index 0bb19a7e3e..0000000000 --- a/src/library/scala/xml/dtd/impl/NondetWordAutom.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml.dtd.impl - -import scala.collection.{ immutable, mutable } - -/** A nondeterministic automaton. States are integers, where - * 0 is always the only initial state. Transitions are represented - * in the delta function. Default transitions are transitions that - * are taken when no other transitions can be applied. - * All states are reachable. Accepting states are those for which - * the partial function `finals` is defined. - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class NondetWordAutom[T <: AnyRef] { - val nstates: Int - val labels: Seq[T] - val finals: Array[Int] // 0 means not final - val delta: Array[mutable.Map[T, immutable.BitSet]] - val default: Array[immutable.BitSet] - - /** @return true if the state is final */ - final def isFinal(state: Int) = finals(state) > 0 - - /** @return tag of final state */ - final def finalTag(state: Int) = finals(state) - - /** @return true if the set of states contains at least one final state */ - final def containsFinal(Q: immutable.BitSet): Boolean = Q exists isFinal - - /** @return true if there are no accepting states */ - final def isEmpty = (0 until nstates) forall (x => !isFinal(x)) - - /** @return an immutable.BitSet with the next states for given state and label */ - def next(q: Int, a: T): immutable.BitSet = delta(q).getOrElse(a, default(q)) - - /** @return an immutable.BitSet with the next states for given state and label */ - def next(Q: immutable.BitSet, a: T): immutable.BitSet = next(Q, next(_, a)) - def nextDefault(Q: immutable.BitSet): immutable.BitSet = next(Q, default) - - private def next(Q: immutable.BitSet, f: (Int) => immutable.BitSet): immutable.BitSet = - (Q map f).foldLeft(immutable.BitSet.empty)(_ ++ _) - - private def finalStates = 0 until nstates filter isFinal - override def toString = { - - val finalString = Map(finalStates map (j => j -> finals(j)) : _*).toString - val deltaString = (0 until nstates) - .map(i => " %d->%s\n _>%s\n".format(i, delta(i), default(i))).mkString - - "[NondetWordAutom nstates=%d finals=%s delta=\n%s".format(nstates, finalString, deltaString) - } -} diff --git a/src/library/scala/xml/dtd/impl/PointedHedgeExp.scala b/src/library/scala/xml/dtd/impl/PointedHedgeExp.scala deleted file mode 100644 index 1720604132..0000000000 --- a/src/library/scala/xml/dtd/impl/PointedHedgeExp.scala +++ /dev/null @@ -1,37 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml.dtd.impl - -/** Pointed regular hedge expressions, a useful subclass of regular hedge expressions. - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class PointedHedgeExp extends Base { - - type _regexpT <: RegExp - type _labelT - - case class Node(label: _labelT, r: _regexpT) extends RegExp { - final val isNullable = false - } - - case class TopIter(r1: _regexpT, r2: _regexpT) extends RegExp { - final val isNullable = r1.isNullable && r2.isNullable //? - } - - case object Point extends RegExp { - final val isNullable = false - } - -} diff --git a/src/library/scala/xml/dtd/impl/SubsetConstruction.scala b/src/library/scala/xml/dtd/impl/SubsetConstruction.scala deleted file mode 100644 index 632ca1eb18..0000000000 --- a/src/library/scala/xml/dtd/impl/SubsetConstruction.scala +++ /dev/null @@ -1,108 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml.dtd.impl - -import scala.collection.{ mutable, immutable } - -@deprecated("This class will be removed", "2.10.0") -private[dtd] class SubsetConstruction[T <: AnyRef](val nfa: NondetWordAutom[T]) { - import nfa.labels - - def selectTag(Q: immutable.BitSet, finals: Array[Int]) = - (Q map finals filter (_ > 0)).min - - def determinize: DetWordAutom[T] = { - // for assigning numbers to bitsets - var indexMap = scala.collection.Map[immutable.BitSet, Int]() - var invIndexMap = scala.collection.Map[Int, immutable.BitSet]() - var ix = 0 - - // we compute the dfa with states = bitsets - val q0 = immutable.BitSet(0) // the set { 0 } - val sink = immutable.BitSet.empty // the set { } - - var states = Set(q0, sink) // initial set of sets - val delta = new mutable.HashMap[immutable.BitSet, mutable.HashMap[T, immutable.BitSet]] - var deftrans = mutable.Map(q0 -> sink, sink -> sink) // initial transitions - var finals: mutable.Map[immutable.BitSet, Int] = mutable.Map() - val rest = new mutable.Stack[immutable.BitSet] - - rest.push(sink, q0) - - def addFinal(q: immutable.BitSet) { - if (nfa containsFinal q) - finals = finals.updated(q, selectTag(q, nfa.finals)) - } - def add(Q: immutable.BitSet) { - if (!states(Q)) { - states += Q - rest push Q - addFinal(Q) - } - } - - addFinal(q0) // initial state may also be a final state - - while (!rest.isEmpty) { - val P = rest.pop() - // assign a number to this bitset - indexMap = indexMap.updated(P, ix) - invIndexMap = invIndexMap.updated(ix, P) - ix += 1 - - // make transition map - val Pdelta = new mutable.HashMap[T, immutable.BitSet] - delta.update(P, Pdelta) - - labels foreach { label => - val Q = nfa.next(P, label) - Pdelta.update(label, Q) - add(Q) - } - - // collect default transitions - val Pdef = nfa nextDefault P - deftrans = deftrans.updated(P, Pdef) - add(Pdef) - } - - // create DetWordAutom, using indices instead of sets - val nstatesR = states.size - val deltaR = new Array[mutable.Map[T, Int]](nstatesR) - val defaultR = new Array[Int](nstatesR) - val finalsR = new Array[Int](nstatesR) - - for (Q <- states) { - val q = indexMap(Q) - val trans = delta(Q) - val transDef = deftrans(Q) - val qDef = indexMap(transDef) - val ntrans = new mutable.HashMap[T, Int]() - - for ((label, value) <- trans) { - val p = indexMap(value) - if (p != qDef) - ntrans.update(label, p) - } - - deltaR(q) = ntrans - defaultR(q) = qDef - } - - finals foreach { case (k,v) => finalsR(indexMap(k)) = v } - - new DetWordAutom [T] { - val nstates = nstatesR - val delta = deltaR - val default = defaultR - val finals = finalsR - } - } -} diff --git a/src/library/scala/xml/dtd/impl/SyntaxError.scala b/src/library/scala/xml/dtd/impl/SyntaxError.scala deleted file mode 100644 index a5b8a5aba0..0000000000 --- a/src/library/scala/xml/dtd/impl/SyntaxError.scala +++ /dev/null @@ -1,21 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml.dtd.impl - -/** This runtime exception is thrown if an attempt to instantiate a - * syntactically incorrect expression is detected. - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] class SyntaxError(e: String) extends RuntimeException(e) diff --git a/src/library/scala/xml/dtd/impl/WordBerrySethi.scala b/src/library/scala/xml/dtd/impl/WordBerrySethi.scala deleted file mode 100644 index 9bf3fa518b..0000000000 --- a/src/library/scala/xml/dtd/impl/WordBerrySethi.scala +++ /dev/null @@ -1,162 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml.dtd.impl - -import scala.collection.{ immutable, mutable } - -/** This class turns a regular expression into a [[scala.util.automata.NondetWordAutom]] - * celebrated position automata construction (also called ''Berry-Sethi'' or ''Glushkov''). - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class WordBerrySethi extends BaseBerrySethi { - override val lang: WordExp - - import lang.{ Alt, Eps, Letter, RegExp, Sequ, Star, _labelT } - - protected var labels: mutable.HashSet[_labelT] = _ - // don't let this fool you, only labelAt is a real, surjective mapping - protected var labelAt: Map[Int, _labelT] = _ // new alphabet "gamma" - protected var deltaq: Array[mutable.HashMap[_labelT, List[Int]]] = _ // delta - protected var defaultq: Array[List[Int]] = _ // default transitions - protected var initials: Set[Int] = _ - - /** Computes `first(r)` where the word regexp `r`. - * - * @param r the regular expression - * @return the computed set `first(r)` - */ - protected override def compFirst(r: RegExp): Set[Int] = r match { - case x: Letter => Set(x.pos) - case _ => super.compFirst(r) - } - - /** Computes `last(r)` where the word regexp `r`. - * - * @param r the regular expression - * @return the computed set `last(r)` - */ - protected override def compLast(r: RegExp): Set[Int] = r match { - case x: Letter => Set(x.pos) - case _ => super.compLast(r) - } - - /** Returns the first set of an expression, setting the follow set along - * the way. - * - * @param r the regular expression - * @return the computed set - */ - protected override def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match { - case x: Letter => follow(x.pos) = fol1 ; Set(x.pos) - case Eps => emptySet - case _ => super.compFollow1(fol1, r) - } - - /** Returns "Sethi-length" of a pattern, creating the set of position - * along the way - */ - - /** Called at the leaves of the regexp */ - protected def seenLabel(r: RegExp, i: Int, label: _labelT) { - labelAt = labelAt.updated(i, label) - this.labels += label - } - - // overridden in BindingBerrySethi - protected def seenLabel(r: RegExp, label: _labelT): Int = { - pos += 1 - seenLabel(r, pos, label) - pos - } - - // todo: replace global variable pos with acc - override def traverse(r: RegExp): Unit = r match { - case a @ Letter(label) => a.pos = seenLabel(r, label) - case Eps => // ignore - case _ => super.traverse(r) - } - - - protected def makeTransition(src: Int, dest: Int, label: _labelT) { - val q = deltaq(src) - q.update(label, dest :: q.getOrElse(label, Nil)) - } - - protected def initialize(subexpr: Seq[RegExp]): Unit = { - this.labelAt = immutable.Map() - this.follow = mutable.HashMap() - this.labels = mutable.HashSet() - this.pos = 0 - - // determine "Sethi-length" of the regexp - subexpr foreach traverse - - this.initials = Set(0) - } - - protected def initializeAutom() { - finals = immutable.Map.empty[Int, Int] // final states - deltaq = new Array[mutable.HashMap[_labelT, List[Int]]](pos) // delta - defaultq = new Array[List[Int]](pos) // default transitions - - for (j <- 0 until pos) { - deltaq(j) = mutable.HashMap[_labelT, List[Int]]() - defaultq(j) = Nil - } - } - - protected def collectTransitions(): Unit = // make transitions - for (j <- 0 until pos ; fol = follow(j) ; k <- fol) { - if (pos == k) finals = finals.updated(j, finalTag) - else makeTransition(j, k, labelAt(k)) - } - - def automatonFrom(pat: RegExp, finalTag: Int): NondetWordAutom[_labelT] = { - this.finalTag = finalTag - - pat match { - case x: Sequ => - // (1,2) compute follow + first - initialize(x.rs) - pos += 1 - compFollow(x.rs) // this used to be assigned to var globalFirst and then never used. - - // (3) make automaton from follow sets - initializeAutom() - collectTransitions() - - if (x.isNullable) // initial state is final - finals = finals.updated(0, finalTag) - - val delta1 = immutable.Map(deltaq.zipWithIndex map (_.swap): _*) - val finalsArr = (0 until pos map (k => finals.getOrElse(k, 0))).toArray // 0 == not final - - val deltaArr: Array[mutable.Map[_labelT, immutable.BitSet]] = - (0 until pos map { x => - mutable.HashMap(delta1(x).toSeq map { case (k, v) => k -> immutable.BitSet(v: _*) } : _*) - }).toArray - - val defaultArr = (0 until pos map (k => immutable.BitSet(defaultq(k): _*))).toArray - - new NondetWordAutom[_labelT] { - val nstates = pos - val labels = WordBerrySethi.this.labels.toList - val finals = finalsArr - val delta = deltaArr - val default = defaultArr - } - case z => - automatonFrom(Sequ(z.asInstanceOf[this.lang._regexpT]), finalTag) - } - } -} diff --git a/src/library/scala/xml/dtd/impl/WordExp.scala b/src/library/scala/xml/dtd/impl/WordExp.scala deleted file mode 100644 index a4bb54c1ea..0000000000 --- a/src/library/scala/xml/dtd/impl/WordExp.scala +++ /dev/null @@ -1,59 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml.dtd.impl - -/** - * The class `WordExp` provides regular word expressions. - * - * Users have to instantiate type member `_regexpT <;: RegExp` - * (from class `Base`) and a type member `_labelT <;: Label`. - * - * Here is a short example: - * {{{ - * import scala.util.regexp._ - * import scala.util.automata._ - * object MyLang extends WordExp { - * type _regexpT = RegExp - * type _labelT = MyChar - * - * case class MyChar(c:Char) extends Label - * } - * import MyLang._ - * // (a* | b)* - * val rex = Star(Alt(Star(Letter(MyChar('a'))),Letter(MyChar('b')))) - * object MyBerriSethi extends WordBerrySethi { - * override val lang = MyLang - * } - * val nfa = MyBerriSethi.automatonFrom(Sequ(rex), 1) - * }}} - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This class will be removed", "2.10.0") -private[dtd] abstract class WordExp extends Base { - - abstract class Label - - type _regexpT <: RegExp - type _labelT <: Label - - case class Letter(a: _labelT) extends RegExp { - final lazy val isNullable = false - var pos = -1 - } - - case class Wildcard() extends RegExp { - final lazy val isNullable = false - var pos = -1 - } -} diff --git a/src/library/scala/xml/factory/Binder.scala b/src/library/scala/xml/factory/Binder.scala deleted file mode 100755 index 947f99e6a4..0000000000 --- a/src/library/scala/xml/factory/Binder.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package factory - -import parsing.ValidatingMarkupHandler - -/** - * @author Burak Emir - */ -abstract class Binder(val preserveWS: Boolean) extends ValidatingMarkupHandler { - - var result: NodeBuffer = new NodeBuffer() - - def reportSyntaxError(pos:Int, str:String) = {} - - final def procInstr(pos: Int, target: String, txt: String) = - ProcInstr(target, txt) - - final def comment(pos: Int, txt: String) = - Comment(txt) - - final def entityRef(pos: Int, n: String) = - EntityRef(n) - - final def text(pos: Int, txt: String) = - Text(txt) - - final def traverse(n:Node): Unit = n match { - case x:ProcInstr => - result &+ procInstr(0, x.target, x.text) - case x:Comment => - result &+ comment(0, x.text) - case x:Text => - result &+ text(0, x.data) - case x:EntityRef => - result &+ entityRef(0, x.entityName) - case x:Elem => - elemStart(0, x.prefix, x.label, x.attributes, x.scope) - val old = result - result = new NodeBuffer() - for (m <- x.child) traverse(m) - result = old &+ elem(0, x.prefix, x.label, x.attributes, x.scope, x.minimizeEmpty, NodeSeq.fromSeq(result)).toList - elemEnd(0, x.prefix, x.label) - } - - final def validate(n: Node): Node = { - this.rootLabel = n.label - traverse(n) - result(0) - } -} diff --git a/src/library/scala/xml/factory/LoggedNodeFactory.scala b/src/library/scala/xml/factory/LoggedNodeFactory.scala deleted file mode 100644 index bc074bfc83..0000000000 --- a/src/library/scala/xml/factory/LoggedNodeFactory.scala +++ /dev/null @@ -1,90 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package factory - -/** This class logs what the nodefactory is actually doing. - * If you want to see what happens during loading, use it like this: -{{{ -object testLogged extends App { - val x = new scala.xml.parsing.NoBindingFactoryAdapter - with scala.xml.factory.LoggedNodeFactory[scala.xml.Elem] { - override def log(s: String) = println(s) - } - - Console.println("Start") - val doc = x.load(new java.net.URL("http://example.com/file.xml")) - Console.println("End") - Console.println(doc) -} -}}} - * - * @author Burak Emir - * @version 1.0 - */ -@deprecated("This trait will be removed.", "2.11") -trait LoggedNodeFactory[A <: Node] extends NodeFactory[A] { - // configuration values - val logNode = true - val logText = false - val logComment = false - val logProcInstr = false - - final val NONE = 0 - final val CACHE = 1 - final val FULL = 2 - /** 0 = no logging, 1 = cache hits, 2 = detail */ - val logCompressLevel = 1 - - // methods of NodeFactory - - /** logged version of makeNode method */ - override def makeNode(pre: String, label: String, attrSeq: MetaData, - scope: NamespaceBinding, children: Seq[Node]): A = { - if (logNode) - log("[makeNode for "+label+"]") - - val hash = Utility.hashCode(pre, label, attrSeq.##, scope.##, children) - - /* - if(logCompressLevel >= FULL) { - log("[hashcode total:"+hash); - log(" elem name "+uname+" hash "+ ? )); - log(" attrs "+attrSeq+" hash "+attrSeq.hashCode()); - log(" children :"+children+" hash "+children.hashCode()); - } - */ - if (!cache.get( hash ).isEmpty && (logCompressLevel >= CACHE)) - log("[cache hit !]") - - super.makeNode(pre, label, attrSeq, scope, children) - } - - override def makeText(s: String) = { - if (logText) - log("[makeText:\""+s+"\"]") - super.makeText(s) - } - - override def makeComment(s: String): Seq[Comment] = { - if (logComment) - log("[makeComment:\""+s+"\"]") - super.makeComment(s) - } - - override def makeProcInstr(t: String, s: String): Seq[ProcInstr] = { - if (logProcInstr) - log("[makeProcInstr:\""+t+" "+ s+"\"]") - super.makeProcInstr(t, s) - } - - @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11") - def log(msg: String): Unit = {} -} diff --git a/src/library/scala/xml/factory/NodeFactory.scala b/src/library/scala/xml/factory/NodeFactory.scala deleted file mode 100644 index 94801bb554..0000000000 --- a/src/library/scala/xml/factory/NodeFactory.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package factory - -import parsing.{ FactoryAdapter, NoBindingFactoryAdapter } -import java.io.{ InputStream, Reader, StringReader, File, FileDescriptor, FileInputStream } - -trait NodeFactory[A <: Node] { - val ignoreComments = false - val ignoreProcInstr = false - - /* default behaviour is to use hash-consing */ - val cache = new scala.collection.mutable.HashMap[Int, List[A]] - - protected def create(pre: String, name: String, attrs: MetaData, scope: NamespaceBinding, children:Seq[Node]): A - - protected def construct(hash: Int, old:List[A], pre: String, name: String, attrSeq:MetaData, scope: NamespaceBinding, children:Seq[Node]): A = { - val el = create(pre, name, attrSeq, scope, children) - cache.update(hash, el :: old) - el - } - - def eqElements(ch1: Seq[Node], ch2: Seq[Node]): Boolean = - ch1.view.zipAll(ch2.view, null, null) forall { case (x,y) => x eq y } - - def nodeEquals(n: Node, pre: String, name: String, attrSeq:MetaData, scope: NamespaceBinding, children: Seq[Node]) = - n.prefix == pre && - n.label == name && - n.attributes == attrSeq && - // scope? - eqElements(n.child, children) - - def makeNode(pre: String, name: String, attrSeq: MetaData, scope: NamespaceBinding, children: Seq[Node]): A = { - val hash = Utility.hashCode( pre, name, attrSeq.##, scope.##, children) - def cons(old: List[A]) = construct(hash, old, pre, name, attrSeq, scope, children) - - (cache get hash) match { - case Some(list) => // find structurally equal - list.find(nodeEquals(_, pre, name, attrSeq, scope, children)) match { - case Some(x) => x - case _ => cons(list) - } - case None => cons(Nil) - } - } - - def makeText(s: String) = Text(s) - def makeComment(s: String): Seq[Comment] = - if (ignoreComments) Nil else List(Comment(s)) - def makeProcInstr(t: String, s: String): Seq[ProcInstr] = - if (ignoreProcInstr) Nil else List(ProcInstr(t, s)) -} diff --git a/src/library/scala/xml/factory/XMLLoader.scala b/src/library/scala/xml/factory/XMLLoader.scala deleted file mode 100644 index b69f187039..0000000000 --- a/src/library/scala/xml/factory/XMLLoader.scala +++ /dev/null @@ -1,61 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package factory - -import javax.xml.parsers.SAXParserFactory -import parsing.{ FactoryAdapter, NoBindingFactoryAdapter } -import java.io.{ InputStream, Reader, File, FileDescriptor } -import java.net.URL - -/** Presents collection of XML loading methods which use the parser - * created by "def parser". - */ -trait XMLLoader[T <: Node] -{ - import scala.xml.Source._ - def adapter: FactoryAdapter = new NoBindingFactoryAdapter() - - /* Override this to use a different SAXParser. */ - def parser: SAXParser = { - val f = SAXParserFactory.newInstance() - f.setNamespaceAware(false) - f.newSAXParser() - } - - /** Loads XML from the given InputSource, using the supplied parser. - * The methods available in scala.xml.XML use the XML parser in the JDK. - */ - def loadXML(source: InputSource, parser: SAXParser): T = { - val newAdapter = adapter - - newAdapter.scopeStack push TopScope - parser.parse(source, newAdapter) - newAdapter.scopeStack.pop() - - newAdapter.rootElem.asInstanceOf[T] - } - - /** Loads XML from the given file, file descriptor, or filename. */ - def loadFile(file: File): T = loadXML(fromFile(file), parser) - def loadFile(fd: FileDescriptor): T = loadXML(fromFile(fd), parser) - def loadFile(name: String): T = loadXML(fromFile(name), parser) - - /** loads XML from given InputStream, Reader, sysID, InputSource, or URL. */ - def load(is: InputStream): T = loadXML(fromInputStream(is), parser) - def load(reader: Reader): T = loadXML(fromReader(reader), parser) - def load(sysID: String): T = loadXML(fromSysId(sysID), parser) - def load(source: InputSource): T = loadXML(source, parser) - def load(url: URL): T = loadXML(fromInputStream(url.openStream()), parser) - - /** Loads XML from the given String. */ - def loadString(string: String): T = loadXML(fromString(string), parser) -} diff --git a/src/library/scala/xml/include/CircularIncludeException.scala b/src/library/scala/xml/include/CircularIncludeException.scala deleted file mode 100644 index 351f403008..0000000000 --- a/src/library/scala/xml/include/CircularIncludeException.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package include - -/** - * A `CircularIncludeException` is thrown when an included document attempts - * to include itself or one of its ancestor documents. - */ -class CircularIncludeException(message: String) extends XIncludeException { - - /** - * Constructs a `CircularIncludeException` with `'''null'''`. - * as its error detail message. - */ - def this() = this(null) - -} diff --git a/src/library/scala/xml/include/UnavailableResourceException.scala b/src/library/scala/xml/include/UnavailableResourceException.scala deleted file mode 100644 index 47b176e0f3..0000000000 --- a/src/library/scala/xml/include/UnavailableResourceException.scala +++ /dev/null @@ -1,20 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package include - -/** - * An `UnavailableResourceException` is thrown when an included document - * cannot be found or loaded. - */ -class UnavailableResourceException(message: String) -extends XIncludeException(message) { - def this() = this(null) -} diff --git a/src/library/scala/xml/include/XIncludeException.scala b/src/library/scala/xml/include/XIncludeException.scala deleted file mode 100644 index 11e1644d83..0000000000 --- a/src/library/scala/xml/include/XIncludeException.scala +++ /dev/null @@ -1,58 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package include - -/** - * `XIncludeException` is the generic superclass for all checked exceptions - * that may be thrown as a result of a violation of XInclude's rules. - * - * Constructs an `XIncludeException` with the specified detail message. - * The error message string `message` can later be retrieved by the - * `{@link java.lang.Throwable#getMessage}` - * method of class `java.lang.Throwable`. - * - * @param message the detail message. - */ -class XIncludeException(message: String) extends Exception(message) { - - /** - * uses `'''null'''` as its error detail message. - */ - def this() = this(null) - - private var rootCause: Throwable = null - - /** - * When an `IOException`, `MalformedURLException` or other generic - * exception is thrown while processing an XML document for XIncludes, - * it is customarily replaced by some form of `XIncludeException`. - * This method allows you to store the original exception. - * - * @param nestedException the underlying exception which - * caused the XIncludeException to be thrown - */ - def setRootCause(nestedException: Throwable ) { - this.rootCause = nestedException - } - - /** - * When an `IOException`, `MalformedURLException` or other generic - * exception is thrown while processing an XML document for XIncludes, - * it is customarily replaced by some form of `XIncludeException`. - * This method allows you to retrieve the original exception. - * It returns null if no such exception caused this `XIncludeException`. - * - * @return Throwable the underlying exception which caused the - * `XIncludeException` to be thrown - */ - def getRootCause(): Throwable = this.rootCause - -} diff --git a/src/library/scala/xml/include/sax/EncodingHeuristics.scala b/src/library/scala/xml/include/sax/EncodingHeuristics.scala deleted file mode 100644 index 57ab5ed91c..0000000000 --- a/src/library/scala/xml/include/sax/EncodingHeuristics.scala +++ /dev/null @@ -1,98 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package include.sax - -import java.io.InputStream -import scala.util.matching.Regex - -/** `EncodingHeuristics` reads from a stream - * (which should be buffered) and attempts to guess - * what the encoding of the text in the stream is. - * If it fails to determine the type of the encoding, - * it returns the default UTF-8. - * - * @author Burak Emir - * @author Paul Phillips - */ -object EncodingHeuristics -{ - object EncodingNames { - // UCS-4 isn't yet implemented in java releases anyway... - val bigUCS4 = "UCS-4" - val littleUCS4 = "UCS-4" - val unusualUCS4 = "UCS-4" - val bigUTF16 = "UTF-16BE" - val littleUTF16 = "UTF-16LE" - val utf8 = "UTF-8" - val default = utf8 - } - import EncodingNames._ - - /** This utility method attempts to determine the XML character encoding - * by examining the input stream, as specified at - * [[http://www.w3.org/TR/xml/#sec-guessing w3]]. - * - * @param in `InputStream` to read from. - * @throws IOException if the stream cannot be reset - * @return the name of the encoding. - */ - def readEncodingFromStream(in: InputStream): String = { - var ret: String = null - val bytesToRead = 1024 // enough to read most XML encoding declarations - def resetAndRet = { in.reset ; ret } - - // This may fail if there are a lot of space characters before the end - // of the encoding declaration - in mark bytesToRead - val bytes = (in.read, in.read, in.read, in.read) - - // first look for byte order mark - ret = bytes match { - case (0x00, 0x00, 0xFE, 0xFF) => bigUCS4 - case (0xFF, 0xFE, 0x00, 0x00) => littleUCS4 - case (0x00, 0x00, 0xFF, 0xFE) => unusualUCS4 - case (0xFE, 0xFF, 0x00, 0x00) => unusualUCS4 - case (0xFE, 0xFF, _ , _ ) => bigUTF16 - case (0xFF, 0xFE, _ , _ ) => littleUTF16 - case (0xEF, 0xBB, 0xBF, _ ) => utf8 - case _ => null - } - if (ret != null) - return resetAndRet - - def readASCIIEncoding: String = { - val data = new Array[Byte](bytesToRead - 4) - val length = in.read(data, 0, bytesToRead - 4) - - // Use Latin-1 (ISO-8859-1) because all byte sequences are legal. - val declaration = new String(data, 0, length, "ISO-8859-1") - val regexp = """(?m).*?encoding\s*=\s*["'](.+?)['"]""".r - (regexp findFirstMatchIn declaration) match { - case None => default - case Some(md) => md.subgroups(0) - } - } - - // no byte order mark present; first character must be '<' or whitespace - ret = bytes match { - case (0x00, 0x00, 0x00, '<' ) => bigUCS4 - case ('<' , 0x00, 0x00, 0x00) => littleUCS4 - case (0x00, 0x00, '<' , 0x00) => unusualUCS4 - case (0x00, '<' , 0x00, 0x00) => unusualUCS4 - case (0x00, '<' , 0x00, '?' ) => bigUTF16 // XXX must read encoding - case ('<' , 0x00, '?' , 0x00) => littleUTF16 // XXX must read encoding - case ('<' , '?' , 'x' , 'm' ) => readASCIIEncoding - case (0x4C, 0x6F, 0xA7, 0x94) => utf8 // XXX EBCDIC - case _ => utf8 // no XML or text declaration present - } - resetAndRet - } -} diff --git a/src/library/scala/xml/include/sax/XIncludeFilter.scala b/src/library/scala/xml/include/sax/XIncludeFilter.scala deleted file mode 100644 index 3fa3beefb0..0000000000 --- a/src/library/scala/xml/include/sax/XIncludeFilter.scala +++ /dev/null @@ -1,373 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package include.sax - -import scala.xml.include._ - -import org.xml.sax.{ Attributes, XMLReader, Locator } -import org.xml.sax.helpers.{ XMLReaderFactory, XMLFilterImpl, NamespaceSupport, AttributesImpl } - -import java.io.{ InputStream, BufferedInputStream, InputStreamReader, IOException, UnsupportedEncodingException } -import java.util.Stack -import java.net.{ URL, MalformedURLException } - -/** This is a SAX filter which resolves all XInclude include elements before - * passing them on to the client application. Currently this class has the - * following known deviation from the XInclude specification: - * - * 1. XPointer is not supported. - * - * Furthermore, I would definitely use a new instance of this class for each - * document you want to process. I doubt it can be used successfully on - * multiple documents. Furthermore, I can virtually guarantee that this - * class is not thread safe. You have been warned. - * - * Since this class is not designed to be subclassed, and since I have not - * yet considered how that might affect the methods herein or what other - * protected methods might be needed to support subclasses, I have declared - * this class final. I may remove this restriction later, though the use-case - * for subclassing is weak. This class is designed to have its functionality - * extended via a horizontal chain of filters, not a vertical hierarchy of - * sub and superclasses. - * - * To use this class: - * - * - Construct an `XIncludeFilter` object with a known base URL - * - Pass the `XMLReader` object from which the raw document will be read to - * the `setParent()` method of this object. - * - Pass your own `ContentHandler` object to the `setContentHandler()` - * method of this object. This is the object which will receive events - * from the parsed and included document. - * - Optional: if you wish to receive comments, set your own `LexicalHandler` - * object as the value of this object's - * `http://xml.org/sax/properties/lexical-handler` property. - * Also make sure your `LexicalHandler` asks this object for the status of - * each comment using `insideIncludeElement` before doing anything with the - * comment. - * - Pass the URL of the document to read to this object's `parse()` method - * - * e.g. - * {{{ - * val includer = new XIncludeFilter(base) - * includer setParent parser - * includer setContentHandler new SAXXIncluder(System.out) - * includer parse args(i) - * }}} - * translated from Elliotte Rusty Harold's Java source. - * - * @author Burak Emir - */ -class XIncludeFilter extends XMLFilterImpl { - - final val XINCLUDE_NAMESPACE = "http://www.w3.org/2001/XInclude" - - private val bases = new Stack[URL]() - private val locators = new Stack[Locator]() - -/* private EntityResolver resolver; - - public XIncludeFilter() { - this(null); - } - - public XIncludeFilter(EntityResolver resolver) { - this.resolver = resolver; - } */ - - - // what if this isn't called???? - // do I need to check this in startDocument() and push something - // there???? - override def setDocumentLocator(locator: Locator) { - locators push locator - val base = locator.getSystemId() - try { - bases.push(new URL(base)) - } - catch { - case e:MalformedURLException => - throw new UnsupportedOperationException("Unrecognized SYSTEM ID: " + base) - } - super.setDocumentLocator(locator) - } - - - // necessary to throw away contents of non-empty XInclude elements - private var level = 0 - - /** This utility method returns true if and only if this reader is - * currently inside a non-empty include element. (This is '''not''' the - * same as being inside the node set which replaces the include element.) - * This is primarily needed for comments inside include elements. - * It must be checked by the actual `LexicalHandler` to see whether - * a comment is passed or not. - * - * @return boolean - */ - def insideIncludeElement(): Boolean = level != 0 - - override def startElement(uri: String, localName: String, qName: String, atts1: Attributes) { - var atts = atts1 - if (level == 0) { // We're not inside an xi:include element - - // Adjust bases stack by pushing either the new - // value of xml:base or the base of the parent - val base = atts.getValue(NamespaceSupport.XMLNS, "base") - val parentBase = bases.peek().asInstanceOf[URL] - var currentBase = parentBase - if (base != null) { - try { - currentBase = new URL(parentBase, base) - } - catch { - case e: MalformedURLException => - throw new SAXException("Malformed base URL: " - + currentBase, e) - } - } - bases push currentBase - - if (uri.equals(XINCLUDE_NAMESPACE) && localName.equals("include")) { - // include external document - val href = atts.getValue("href") - // Verify that there is an href attribute - if (href == null) { - throw new SAXException("Missing href attribute") - } - - var parse = atts getValue "parse" - if (parse == null) parse = "xml" - - if (parse equals "text") { - val encoding = atts getValue "encoding" - includeTextDocument(href, encoding) - } - else if (parse equals "xml") { - includeXMLDocument(href) - } - // Need to check this also in DOM and JDOM???? - else { - throw new SAXException( - "Illegal value for parse attribute: " + parse) - } - level += 1 - } - else { - if (atRoot) { - // add xml:base attribute if necessary - val attsImpl = new AttributesImpl(atts) - attsImpl.addAttribute(NamespaceSupport.XMLNS, "base", - "xml:base", "CDATA", currentBase.toExternalForm()) - atts = attsImpl - atRoot = false - } - super.startElement(uri, localName, qName, atts) - } - } - } - - override def endElement(uri: String, localName: String, qName: String) { - if (uri.equals(XINCLUDE_NAMESPACE) - && localName.equals("include")) { - level -= 1 - } - else if (level == 0) { - bases.pop() - super.endElement(uri, localName, qName) - } - } - - private var depth = 0 - - override def startDocument() { - level = 0 - if (depth == 0) super.startDocument() - depth += 1 - } - - override def endDocument() { - locators.pop() - bases.pop() // pop the URL for the document itself - depth -= 1 - if (depth == 0) super.endDocument() - } - - // how do prefix mappings move across documents???? - override def startPrefixMapping(prefix: String , uri: String) { - if (level == 0) super.startPrefixMapping(prefix, uri) - } - - override def endPrefixMapping(prefix: String) { - if (level == 0) super.endPrefixMapping(prefix) - } - - override def characters(ch: Array[Char], start: Int, length: Int) { - if (level == 0) super.characters(ch, start, length) - } - - override def ignorableWhitespace(ch: Array[Char], start: Int, length: Int) { - if (level == 0) super.ignorableWhitespace(ch, start, length) - } - - override def processingInstruction(target: String, data: String) { - if (level == 0) super.processingInstruction(target, data) - } - - override def skippedEntity(name: String) { - if (level == 0) super.skippedEntity(name) - } - - // convenience method for error messages - private def getLocation(): String = { - var locationString = "" - val locator = locators.peek().asInstanceOf[Locator] - var publicID = "" - var systemID = "" - var column = -1 - var line = -1 - if (locator != null) { - publicID = locator.getPublicId() - systemID = locator.getSystemId() - line = locator.getLineNumber() - column = locator.getColumnNumber() - } - locationString = (" in document included from " + publicID - + " at " + systemID - + " at line " + line + ", column " + column) - - locationString - } - - /** This utility method reads a document at a specified URL and fires off - * calls to `characters()`. It's used to include files with `parse="text"`. - * - * @param url URL of the document that will be read - * @param encoding1 Encoding of the document; e.g. UTF-8, - * ISO-8859-1, etc. - * @return void - * @throws SAXException if the requested document cannot - be downloaded from the specified URL - or if the encoding is not recognized - */ - private def includeTextDocument(url: String, encoding1: String) { - var encoding = encoding1 - if (encoding == null || encoding.trim().equals("")) encoding = "UTF-8" - var source: URL = null - try { - val base = bases.peek().asInstanceOf[URL] - source = new URL(base, url) - } - catch { - case e: MalformedURLException => - val ex = new UnavailableResourceException("Unresolvable URL " + url - + getLocation()) - ex.setRootCause(e) - throw new SAXException("Unresolvable URL " + url + getLocation(), ex) - } - - try { - val uc = source.openConnection() - val in = new BufferedInputStream(uc.getInputStream()) - val encodingFromHeader = uc.getContentEncoding() - var contentType = uc.getContentType() - if (encodingFromHeader != null) - encoding = encodingFromHeader - else { - // What if file does not have a MIME type but name ends in .xml???? - // MIME types are case-insensitive - // Java may be picking this up from file URL - if (contentType != null) { - contentType = contentType.toLowerCase() - if (contentType.equals("text/xml") - || contentType.equals("application/xml") - || (contentType.startsWith("text/") && contentType.endsWith("+xml") ) - || (contentType.startsWith("application/") && contentType.endsWith("+xml"))) { - encoding = EncodingHeuristics.readEncodingFromStream(in) - } - } - } - val reader = new InputStreamReader(in, encoding) - val c = new Array[Char](1024) - var charsRead: Int = 0 // bogus init value - do { - charsRead = reader.read(c, 0, 1024) - if (charsRead > 0) this.characters(c, 0, charsRead) - } while (charsRead != -1) - } - catch { - case e: UnsupportedEncodingException => - throw new SAXException("Unsupported encoding: " - + encoding + getLocation(), e) - case e: IOException => - throw new SAXException("Document not found: " - + source.toExternalForm() + getLocation(), e) - } - - } - - private var atRoot = false - - /** This utility method reads a document at a specified URL - * and fires off calls to various `ContentHandler` methods. - * It's used to include files with `parse="xml"`. - * - * @param url URL of the document that will be read - * @return void - * @throws SAXException if the requested document cannot - be downloaded from the specified URL. - */ - private def includeXMLDocument(url: String) { - val source = - try new URL(bases.peek(), url) - catch { - case e: MalformedURLException => - val ex = new UnavailableResourceException("Unresolvable URL " + url + getLocation()) - ex setRootCause e - throw new SAXException("Unresolvable URL " + url + getLocation(), ex) - } - - try { - val parser: XMLReader = - try XMLReaderFactory.createXMLReader() - catch { - case e: SAXException => - try XMLReaderFactory.createXMLReader(XercesClassName) - catch { case _: SAXException => return System.err.println("Could not find an XML parser") } - } - - parser setContentHandler this - val resolver = this.getEntityResolver() - if (resolver != null) - parser setEntityResolver resolver - - // save old level and base - val previousLevel = level - this.level = 0 - if (bases contains source) - throw new SAXException( - "Circular XInclude Reference", - new CircularIncludeException("Circular XInclude Reference to " + source + getLocation()) - ) - - bases push source - atRoot = true - parser parse source.toExternalForm() - - // restore old level and base - this.level = previousLevel - bases.pop() - } - catch { - case e: IOException => - throw new SAXException("Document not found: " + source.toExternalForm() + getLocation(), e) - } - } -} diff --git a/src/library/scala/xml/include/sax/XIncluder.scala b/src/library/scala/xml/include/sax/XIncluder.scala deleted file mode 100644 index 1939fa1875..0000000000 --- a/src/library/scala/xml/include/sax/XIncluder.scala +++ /dev/null @@ -1,187 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package include.sax - -import scala.collection.mutable -import org.xml.sax.{ ContentHandler, XMLReader, Locator, Attributes } -import org.xml.sax.ext.LexicalHandler -import java.io.{ File, OutputStream, OutputStreamWriter, Writer, IOException } - -/** XIncluder is a SAX `ContentHandler` that writes its XML document onto - * an output stream after resolving all `xinclude:include` elements. - * - * Based on Eliotte Rusty Harold's SAXXIncluder. - */ -class XIncluder(outs: OutputStream, encoding: String) extends ContentHandler with LexicalHandler { - - var out = new OutputStreamWriter(outs, encoding) - - def setDocumentLocator(locator: Locator) {} - - def startDocument() { - try { - out.write("\r\n") - } - catch { - case e:IOException => - throw new SAXException("Write failed", e) - } - } - - def endDocument() { - try { - out.flush() - } - catch { - case e:IOException => - throw new SAXException("Flush failed", e) - } - } - - def startPrefixMapping(prefix: String , uri: String) {} - - def endPrefixMapping(prefix: String) {} - - def startElement(namespaceURI: String, localName: String, qualifiedName: String, atts: Attributes) = { - try { - out.write("<" + qualifiedName) - var i = 0; while (i < atts.getLength()) { - out.write(" ") - out.write(atts.getQName(i)) - out.write("='") - val value = atts.getValue(i) - // @todo Need to use character references if the encoding - // can't support the character - out.write(scala.xml.Utility.escape(value)) - out.write("'") - i += 1 - } - out.write(">") - } - catch { - case e:IOException => - throw new SAXException("Write failed", e) - } - } - - def endElement(namespaceURI: String, localName:String, qualifiedName: String) { - try { - out.write("") - } - catch { - case e: IOException => - throw new SAXException("Write failed", e) - } - } - - // need to escape characters that are not in the given - // encoding using character references???? - def characters(ch: Array[Char], start: Int, length: Int) { - try { - var i = 0; while (i < length) { - val c = ch(start+i) - if (c == '&') out.write("&") - else if (c == '<') out.write("<") - // This next fix is normally not necessary. - // However, it is required if text contains ]]> - // (The end CDATA section delimiter) - else if (c == '>') out.write(">") - else out.write(c.toInt) - i += 1 - } - } - catch { - case e: IOException => - throw new SAXException("Write failed", e) - } - } - - def ignorableWhitespace(ch: Array[Char], start: Int , length: Int) { - this.characters(ch, start, length) - } - - // do I need to escape text in PI???? - def processingInstruction(target: String, data: String) { - try { - out.write("") - } - catch { - case e:IOException => - throw new SAXException("Write failed", e) - } - } - - def skippedEntity(name: String) { - try { - out.write("&" + name + ";") - } - catch { - case e:IOException => - throw new SAXException("Write failed", e) - } - } - - // LexicalHandler methods - private var inDTD: Boolean = false - private val entities = new mutable.Stack[String]() - - def startDTD(name: String, publicID: String, systemID: String) { - inDTD = true - // if this is the source document, output a DOCTYPE declaration - if (entities.isEmpty) { - var id = "" - if (publicID != null) id = " PUBLIC \"" + publicID + "\" \"" + systemID + '"' - else if (systemID != null) id = " SYSTEM \"" + systemID + '"' - try { - out.write("\r\n") - } - catch { - case e:IOException => - throw new SAXException("Error while writing DOCTYPE", e) - } - } - } - def endDTD() {} - - def startEntity(name: String) { - entities push name - } - - def endEntity(name: String) { - entities.pop() - } - - def startCDATA() {} - def endCDATA() {} - - // Just need this reference so we can ask if a comment is - // inside an include element or not - private var filter: XIncludeFilter = null - - def setFilter(filter: XIncludeFilter) { - this.filter = filter - } - - def comment(ch: Array[Char], start: Int, length: Int) { - if (!inDTD && !filter.insideIncludeElement()) { - try { - out.write("") - } - catch { - case e: IOException => - throw new SAXException("Write failed", e) - } - } - } -} diff --git a/src/library/scala/xml/package.scala b/src/library/scala/xml/package.scala deleted file mode 100644 index 4001cc5ffb..0000000000 --- a/src/library/scala/xml/package.scala +++ /dev/null @@ -1,19 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala - -package object xml { - val XercesClassName = "org.apache.xerces.parsers.SAXParser" - - type SAXException = org.xml.sax.SAXException - type SAXParseException = org.xml.sax.SAXParseException - type EntityResolver = org.xml.sax.EntityResolver - type InputSource = org.xml.sax.InputSource - type SAXParser = javax.xml.parsers.SAXParser -} diff --git a/src/library/scala/xml/parsing/ConstructingHandler.scala b/src/library/scala/xml/parsing/ConstructingHandler.scala deleted file mode 100755 index ba416e4301..0000000000 --- a/src/library/scala/xml/parsing/ConstructingHandler.scala +++ /dev/null @@ -1,34 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - -/** Implementation of MarkupHandler that constructs nodes. - * - * @author Burak Emir - * @version 1.0 - */ -abstract class ConstructingHandler extends MarkupHandler -{ - val preserveWS: Boolean - - def elem(pos: Int, pre: String, label: String, attrs: MetaData, - pscope: NamespaceBinding, empty: Boolean, nodes: NodeSeq): NodeSeq = - Elem(pre, label, attrs, pscope, empty, nodes:_*) - - def procInstr(pos: Int, target: String, txt: String) = - ProcInstr(target, txt) - - def comment(pos: Int, txt: String) = Comment(txt) - def entityRef(pos: Int, n: String) = EntityRef(n) - def text(pos: Int, txt: String) = Text(txt) -} diff --git a/src/library/scala/xml/parsing/ConstructingParser.scala b/src/library/scala/xml/parsing/ConstructingParser.scala deleted file mode 100644 index 3caeddabf4..0000000000 --- a/src/library/scala/xml/parsing/ConstructingParser.scala +++ /dev/null @@ -1,55 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - -import java.io.File -import scala.io.Source - -object ConstructingParser { - def fromFile(inp: File, preserveWS: Boolean) = - new ConstructingParser(Source.fromFile(inp), preserveWS).initialize - - def fromSource(inp: Source, preserveWS: Boolean) = - new ConstructingParser(inp, preserveWS).initialize -} - -/** An xml parser. parses XML and invokes callback methods of a MarkupHandler. - * Don't forget to call next.ch on a freshly instantiated parser in order to - * initialize it. If you get the parser from the object method, initialization - * is already done for you. - * - * {{{ - * object parseFromURL { - * def main(args: Array[String]) { - * val url = args(0) - * val src = scala.io.Source.fromURL(url) - * val cpa = scala.xml.parsing.ConstructingParser.fromSource(src, false) // fromSource initializes automatically - * val doc = cpa.document() - * - * // let's see what it is - * val ppr = new scala.xml.PrettyPrinter(80, 5) - * val ele = doc.docElem - * println("finished parsing") - * val out = ppr.format(ele) - * println(out) - * } - * } - * }}} */ -class ConstructingParser(val input: Source, val preserveWS: Boolean) -extends ConstructingHandler -with ExternalSources -with MarkupParser { - - // default impl. of Logged - override def log(msg: String): Unit = {} -} diff --git a/src/library/scala/xml/parsing/DefaultMarkupHandler.scala b/src/library/scala/xml/parsing/DefaultMarkupHandler.scala deleted file mode 100755 index 6ec7474843..0000000000 --- a/src/library/scala/xml/parsing/DefaultMarkupHandler.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - - -/** Default implementation of markup handler always returns `NodeSeq.Empty` */ -abstract class DefaultMarkupHandler extends MarkupHandler { - - def elem(pos: Int, pre: String, label: String, attrs: MetaData, - scope:NamespaceBinding, empty: Boolean, args: NodeSeq) = NodeSeq.Empty - - def procInstr(pos: Int, target: String, txt: String) = NodeSeq.Empty - - def comment(pos: Int, comment: String ): NodeSeq = NodeSeq.Empty - - def entityRef(pos: Int, n: String) = NodeSeq.Empty - - def text(pos: Int, txt:String) = NodeSeq.Empty - -} diff --git a/src/library/scala/xml/parsing/ExternalSources.scala b/src/library/scala/xml/parsing/ExternalSources.scala deleted file mode 100644 index bb939bca95..0000000000 --- a/src/library/scala/xml/parsing/ExternalSources.scala +++ /dev/null @@ -1,38 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - -import java.net.URL -import java.io.File.separator - -import scala.io.Source - -/** - * @author Burak Emir - * @version 1.0 - */ -trait ExternalSources { - self: ExternalSources with MarkupParser with MarkupHandler => - - def externalSource(systemId: String): Source = { - if (systemId startsWith "http:") - return Source fromURL new URL(systemId) - - val fileStr: String = input.descr match { - case x if x startsWith "file:" => x drop 5 - case x => x take ((x lastIndexOf separator) + 1) - } - - Source.fromFile(fileStr + systemId) - } -} diff --git a/src/library/scala/xml/parsing/FactoryAdapter.scala b/src/library/scala/xml/parsing/FactoryAdapter.scala deleted file mode 100644 index 2154bdf5ba..0000000000 --- a/src/library/scala/xml/parsing/FactoryAdapter.scala +++ /dev/null @@ -1,187 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package parsing - -import java.io.{ InputStream, Reader, File, FileDescriptor, FileInputStream } -import scala.collection.{ mutable, Iterator } -import org.xml.sax.Attributes -import org.xml.sax.helpers.DefaultHandler - -// can be mixed into FactoryAdapter if desired -trait ConsoleErrorHandler extends DefaultHandler { - // ignore warning, crimson warns even for entity resolution! - override def warning(ex: SAXParseException): Unit = { } - override def error(ex: SAXParseException): Unit = printError("Error", ex) - override def fatalError(ex: SAXParseException): Unit = printError("Fatal Error", ex) - - protected def printError(errtype: String, ex: SAXParseException): Unit = - Console.withOut(Console.err) { - val s = "[%s]:%d:%d: %s".format( - errtype, ex.getLineNumber, ex.getColumnNumber, ex.getMessage) - Console.println(s) - Console.flush() - } -} - -/** SAX adapter class, for use with Java SAX parser. Keeps track of - * namespace bindings, without relying on namespace handling of the - * underlying SAX parser. - */ -abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node] { - var rootElem: Node = null - - val buffer = new StringBuilder() - val attribStack = new mutable.Stack[MetaData] - val hStack = new mutable.Stack[Node] // [ element ] contains siblings - val tagStack = new mutable.Stack[String] - var scopeStack = new mutable.Stack[NamespaceBinding] - - var curTag : String = null - var capture: Boolean = false - - // abstract methods - - /** Tests if an XML element contains text. - * @return true if element named `localName` contains text. - */ - def nodeContainsText(localName: String): Boolean // abstract - - /** creates an new non-text(tree) node. - * @param elemName - * @param attribs - * @param chIter - * @return a new XML element. - */ - def createNode(pre: String, elemName: String, attribs: MetaData, - scope: NamespaceBinding, chIter: List[Node]): Node // abstract - - /** creates a Text node. - * @param text - * @return a new Text node. - */ - def createText(text: String): Text // abstract - - /** creates a new processing instruction node. - */ - def createProcInstr(target: String, data: String): Seq[ProcInstr] - - // - // ContentHandler methods - // - - val normalizeWhitespace = false - - /** Characters. - * @param ch - * @param offset - * @param length - */ - override def characters(ch: Array[Char], offset: Int, length: Int): Unit = { - if (!capture) return - // compliant: report every character - else if (!normalizeWhitespace) buffer.appendAll(ch, offset, length) - // normalizing whitespace is not compliant, but useful - else { - var it = ch.slice(offset, offset + length).iterator - while (it.hasNext) { - val c = it.next() - val isSpace = c.isWhitespace - buffer append (if (isSpace) ' ' else c) - if (isSpace) - it = it dropWhile (_.isWhitespace) - } - } - } - - private def splitName(s: String) = { - val idx = s indexOf ':' - if (idx < 0) (null, s) - else (s take idx, s drop (idx + 1)) - } - - /* ContentHandler methods */ - - /* Start element. */ - override def startElement( - uri: String, - _localName: String, - qname: String, - attributes: Attributes): Unit = - { - captureText() - tagStack push curTag - curTag = qname - - val localName = splitName(qname)._2 - capture = nodeContainsText(localName) - - hStack push null - var m: MetaData = Null - var scpe: NamespaceBinding = - if (scopeStack.isEmpty) TopScope - else scopeStack.top - - for (i <- 0 until attributes.getLength()) { - val qname = attributes getQName i - val value = attributes getValue i - val (pre, key) = splitName(qname) - def nullIfEmpty(s: String) = if (s == "") null else s - - if (pre == "xmlns" || (pre == null && qname == "xmlns")) { - val arg = if (pre == null) null else key - scpe = new NamespaceBinding(arg, nullIfEmpty(value), scpe) - } - else - m = Attribute(Option(pre), key, Text(value), m) - } - - scopeStack push scpe - attribStack push m - } - - - /** captures text, possibly normalizing whitespace - */ - def captureText(): Unit = { - if (capture && buffer.length > 0) - hStack push createText(buffer.toString) - - buffer.clear() - } - - /** End element. - * @param uri - * @param _localName - * @param qname - * @throws org.xml.sax.SAXException if .. - */ - override def endElement(uri: String , _localName: String, qname: String): Unit = { - captureText() - val metaData = attribStack.pop() - - // reverse order to get it right - val v = (Iterator continually hStack.pop takeWhile (_ != null)).toList.reverse - val (pre, localName) = splitName(qname) - val scp = scopeStack.pop() - - // create element - rootElem = createNode(pre, localName, metaData, scp, v) - hStack push rootElem - curTag = tagStack.pop() - capture = curTag != null && nodeContainsText(curTag) // root level - } - - /** Processing instruction. - */ - override def processingInstruction(target: String, data: String) { - hStack pushAll createProcInstr(target, data) - } -} diff --git a/src/library/scala/xml/parsing/FatalError.scala b/src/library/scala/xml/parsing/FatalError.scala deleted file mode 100644 index ab3cb2a74d..0000000000 --- a/src/library/scala/xml/parsing/FatalError.scala +++ /dev/null @@ -1,17 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - -/** !!! This is poorly named, but I guess it's in the API. - */ -case class FatalError(msg: String) extends java.lang.RuntimeException(msg) diff --git a/src/library/scala/xml/parsing/MarkupHandler.scala b/src/library/scala/xml/parsing/MarkupHandler.scala deleted file mode 100755 index 1ebffb9c90..0000000000 --- a/src/library/scala/xml/parsing/MarkupHandler.scala +++ /dev/null @@ -1,127 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - -import scala.collection.mutable -import scala.io.Source -import scala.xml.dtd._ - -/** class that handles markup - provides callback methods to MarkupParser. - * the default is nonvalidating behaviour - * - * @author Burak Emir - * @version 1.0 - * - * @todo can we ignore more entity declarations (i.e. those with extIDs)? - * @todo expanding entity references - */ -abstract class MarkupHandler { - - /** returns true is this markup handler is validating */ - val isValidating: Boolean = false - - var decls: List[Decl] = Nil - var ent: mutable.Map[String, EntityDecl] = new mutable.HashMap[String, EntityDecl]() - - def lookupElemDecl(Label: String): ElemDecl = { - for (z @ ElemDecl(Label, _) <- decls) - return z - - null - } - - def replacementText(entityName: String): Source = - Source fromString ((ent get entityName) match { - case Some(ParsedEntityDecl(_, IntDef(value))) => value - case Some(ParameterEntityDecl(_, IntDef(value))) => " %s " format value - case Some(_) => "" format entityName - case None => "" format entityName - }) - - def endDTD(n: String): Unit = () - - /** callback method invoked by MarkupParser after start-tag of element. - * - * @param pos the position in the sourcefile - * @param pre the prefix - * @param label the local name - * @param attrs the attributes (metadata) - */ - def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding): Unit = () - - /** callback method invoked by MarkupParser after end-tag of element. - * - * @param pos the position in the source file - * @param pre the prefix - * @param label the local name - */ - def elemEnd(pos: Int, pre: String, label: String): Unit = () - - /** callback method invoked by MarkupParser after parsing an element, - * between the elemStart and elemEnd callbacks - * - * @param pos the position in the source file - * @param pre the prefix - * @param label the local name - * @param attrs the attributes (metadata) - * @param empty `true` if the element was previously empty; `false` otherwise. - * @param args the children of this element - */ - def elem(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, empty: Boolean, args: NodeSeq): NodeSeq - - /** callback method invoked by MarkupParser after parsing PI. - */ - def procInstr(pos: Int, target: String, txt: String): NodeSeq - - /** callback method invoked by MarkupParser after parsing comment. - */ - def comment(pos: Int, comment: String): NodeSeq - - /** callback method invoked by MarkupParser after parsing entity ref. - * @todo expanding entity references - */ - def entityRef(pos: Int, n: String): NodeSeq - - /** callback method invoked by MarkupParser after parsing text. - */ - def text(pos: Int, txt: String): NodeSeq - - // DTD handler methods - - def elemDecl(n: String, cmstr: String): Unit = () - - def attListDecl(name: String, attList: List[AttrDecl]): Unit = () - - private def someEntityDecl(name: String, edef: EntityDef, f: (String, EntityDef) => EntityDecl): Unit = - edef match { - case _: ExtDef if !isValidating => // ignore (cf REC-xml 4.4.1) - case _ => - val y = f(name, edef) - decls ::= y - ent.update(name, y) - } - - def parameterEntityDecl(name: String, edef: EntityDef): Unit = - someEntityDecl(name, edef, ParameterEntityDecl.apply _) - - def parsedEntityDecl(name: String, edef: EntityDef): Unit = - someEntityDecl(name, edef, ParsedEntityDecl.apply _) - - def peReference(name: String) { decls ::= PEReference(name) } - def unparsedEntityDecl(name: String, extID: ExternalID, notat: String): Unit = () - def notationDecl(notat: String, extID: ExternalID): Unit = () - def reportSyntaxError(pos: Int, str: String): Unit - - @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11") - def log(msg: String): Unit = {} -} diff --git a/src/library/scala/xml/parsing/MarkupParser.scala b/src/library/scala/xml/parsing/MarkupParser.scala deleted file mode 100755 index 3bbd136b67..0000000000 --- a/src/library/scala/xml/parsing/MarkupParser.scala +++ /dev/null @@ -1,938 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package parsing - -import scala.io.Source -import scala.xml.dtd._ -import Utility.Escapes.{ pairs => unescape } - -/** - * An XML parser. - * - * Parses XML 1.0, invokes callback methods of a `MarkupHandler` and returns - * whatever the markup handler returns. Use `ConstructingParser` if you just - * want to parse XML to construct instances of `scala.xml.Node`. - * - * While XML elements are returned, DTD declarations - if handled - are - * collected using side-effects. - * - * @author Burak Emir - * @version 1.0 - */ -trait MarkupParser extends MarkupParserCommon with TokenTests -{ - self: MarkupParser with MarkupHandler => - - type PositionType = Int - type InputType = Source - type ElementType = NodeSeq - type AttributesType = (MetaData, NamespaceBinding) - type NamespaceType = NamespaceBinding - - def truncatedError(msg: String): Nothing = throw FatalError(msg) - def errorNoEnd(tag: String) = throw FatalError("expected closing tag of " + tag) - - def xHandleError(that: Char, msg: String) = reportSyntaxError(msg) - - val input: Source - - /** if true, does not remove surplus whitespace */ - val preserveWS: Boolean - - def externalSource(systemLiteral: String): Source - - // - // variables, values - // - - protected var curInput: Source = input - - // See ticket #3720 for motivations. - private class WithLookAhead(underlying: Source) extends Source { - private val queue = scala.collection.mutable.Queue[Char]() - def lookahead(): BufferedIterator[Char] = { - val iter = queue.iterator ++ new Iterator[Char] { - def hasNext = underlying.hasNext - def next() = { val x = underlying.next(); queue += x; x } - } - iter.buffered - } - val iter = new Iterator[Char] { - def hasNext = underlying.hasNext || !queue.isEmpty - def next() = if (!queue.isEmpty) queue.dequeue() else underlying.next() - } - } - - def lookahead(): BufferedIterator[Char] = curInput match { - case curInputWLA:WithLookAhead => - curInputWLA.lookahead() - case _ => - val newInput = new WithLookAhead(curInput) - curInput = newInput - newInput.lookahead() - } - - - /** the handler of the markup, returns this */ - private val handle: MarkupHandler = this - - /** stack of inputs */ - var inpStack: List[Source] = Nil - - /** holds the position in the source file */ - var pos: Int = _ - - /* used when reading external subset */ - var extIndex = -1 - - /** holds temporary values of pos */ - var tmppos: Int = _ - - /** holds the next character */ - var nextChNeeded: Boolean = false - var reachedEof: Boolean = false - var lastChRead: Char = _ - def ch: Char = { - if (nextChNeeded) { - if (curInput.hasNext) { - lastChRead = curInput.next() - pos = curInput.pos - } else { - val ilen = inpStack.length - //Console.println(" ilen = "+ilen+ " extIndex = "+extIndex); - if ((ilen != extIndex) && (ilen > 0)) { - /* for external source, inpStack == Nil ! need notify of eof! */ - pop() - } else { - reachedEof = true - lastChRead = 0.asInstanceOf[Char] - } - } - nextChNeeded = false - } - lastChRead - } - - /** character buffer, for names */ - protected val cbuf = new StringBuilder() - - var dtd: DTD = null - - protected var doc: Document = null - - def eof: Boolean = { ch; reachedEof } - - // - // methods - // - - /** {{{ - * - * }}} */ - def xmlProcInstr(): MetaData = { - xToken("xml") - xSpace() - val (md,scp) = xAttributes(TopScope) - if (scp != TopScope) - reportSyntaxError("no xmlns definitions here, please.") - xToken('?') - xToken('>') - md - } - - /** Factored out common code. - */ - private def prologOrTextDecl(isProlog: Boolean): (Option[String], Option[String], Option[Boolean]) = { - var info_ver: Option[String] = None - var info_enc: Option[String] = None - var info_stdl: Option[Boolean] = None - - val m = xmlProcInstr() - var n = 0 - - if (isProlog) - xSpaceOpt() - - m("version") match { - case null => - case Text("1.0") => info_ver = Some("1.0"); n += 1 - case _ => reportSyntaxError("cannot deal with versions != 1.0") - } - - m("encoding") match { - case null => - case Text(enc) => - if (!isValidIANAEncoding(enc)) - reportSyntaxError("\"" + enc + "\" is not a valid encoding") - else { - info_enc = Some(enc) - n += 1 - } - } - - if (isProlog) { - m("standalone") match { - case null => - case Text("yes") => info_stdl = Some(true); n += 1 - case Text("no") => info_stdl = Some(false); n += 1 - case _ => reportSyntaxError("either 'yes' or 'no' expected") - } - } - - if (m.length - n != 0) { - val s = if (isProlog) "SDDecl? " else "" - reportSyntaxError("VersionInfo EncodingDecl? %sor '?>' expected!" format s) - } - - (info_ver, info_enc, info_stdl) - } - - /** {{{ - * (x1, x2) } - - /** {{{ - * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? - * [23] XMLDecl ::= '' - * [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') - * [25] Eq ::= S? '=' S? - * [26] VersionNum ::= '1.0' - * [27] Misc ::= Comment | PI | S - * }}} */ - def document(): Document = { - doc = new Document() - - this.dtd = null - var info_prolog: (Option[String], Option[String], Option[Boolean]) = (None, None, None) - if ('<' != ch) { - reportSyntaxError("< expected") - return null - } - - nextch() // is prolog ? - var children: NodeSeq = null - if ('?' == ch) { - nextch() - info_prolog = prolog() - doc.version = info_prolog._1 - doc.encoding = info_prolog._2 - doc.standAlone = info_prolog._3 - - children = content(TopScope) // DTD handled as side effect - } - else { - val ts = new NodeBuffer() - content1(TopScope, ts) // DTD handled as side effect - ts &+ content(TopScope) - children = NodeSeq.fromSeq(ts) - } - //println("[MarkupParser::document] children now: "+children.toList) - var elemCount = 0 - var theNode: Node = null - for (c <- children) c match { - case _:ProcInstr => - case _:Comment => - case _:EntityRef => // todo: fix entities, shouldn't be "special" - reportSyntaxError("no entity references allowed here") - case s:SpecialNode => - if (s.toString.trim().length > 0) //non-empty text nodes not allowed - elemCount += 2 - case m:Node => - elemCount += 1 - theNode = m - } - if (1 != elemCount) { - reportSyntaxError("document must contain exactly one element") - Console.println(children.toList) - } - - doc.children = children - doc.docElem = theNode - doc - } - - /** append Unicode character to name buffer*/ - protected def putChar(c: Char) = cbuf append c - - /** As the current code requires you to call nextch once manually - * after construction, this method formalizes that suboptimal reality. - */ - def initialize: this.type = { - nextch() - this - } - - protected def ch_returning_nextch: Char = { val res = ch; nextch(); res } - - def mkAttributes(name: String, pscope: NamespaceBinding): AttributesType = - if (isNameStart (ch)) xAttributes(pscope) - else (Null, pscope) - - def mkProcInstr(position: Int, name: String, text: String): ElementType = - handle.procInstr(position, name, text) - - /** this method tells ch to get the next character when next called */ - def nextch() { - // Read current ch if needed - ch - - // Mark next ch to be required - nextChNeeded = true - } - - /** parse attribute and create namespace scope, metadata - * {{{ - * [41] Attributes ::= { S Name Eq AttValue } - * }}} - */ - def xAttributes(pscope: NamespaceBinding): (MetaData, NamespaceBinding) = { - var scope: NamespaceBinding = pscope - var aMap: MetaData = Null - while (isNameStart(ch)) { - val qname = xName - xEQ() // side effect - val value = xAttributeValue() - - Utility.prefix(qname) match { - case Some("xmlns") => - val prefix = qname.substring(6 /*xmlns:*/ , qname.length) - scope = new NamespaceBinding(prefix, value, scope) - - case Some(prefix) => - val key = qname.substring(prefix.length+1, qname.length) - aMap = new PrefixedAttribute(prefix, key, Text(value), aMap) - - case _ => - if( qname == "xmlns" ) - scope = new NamespaceBinding(null, value, scope) - else - aMap = new UnprefixedAttribute(qname, Text(value), aMap) - } - - if ((ch != '/') && (ch != '>') && ('?' != ch)) - xSpace() - } - - if(!aMap.wellformed(scope)) - reportSyntaxError( "double attribute") - - (aMap,scope) - } - - /** entity value, terminated by either ' or ". value may not contain <. - * {{{ - * AttValue ::= `'` { _ } `'` - * | `"` { _ } `"` - * }}} - */ - def xEntityValue(): String = { - val endch = ch - nextch() - while (ch != endch && !eof) { - putChar(ch) - nextch() - } - nextch() - val str = cbuf.toString() - cbuf.length = 0 - str - } - - /** {{{ - * '"{char} ) ']]>' - * - * see [15] - * }}} */ - def xCharData: NodeSeq = { - xToken("[CDATA[") - def mkResult(pos: Int, s: String): NodeSeq = { - handle.text(pos, s) - PCData(s) - } - xTakeUntil(mkResult, () => pos, "]]>") - } - - /** {{{ - * Comment ::= '' - * - * see [15] - * }}} */ - def xComment: NodeSeq = { - val sb: StringBuilder = new StringBuilder() - xToken("--") - while (true) { - if (ch == '-' && { sb.append(ch); nextch(); ch == '-' }) { - sb.length = sb.length - 1 - nextch() - xToken('>') - return handle.comment(pos, sb.toString()) - } else sb.append(ch) - nextch() - } - throw FatalError("this cannot happen") - } - - /* todo: move this into the NodeBuilder class */ - def appendText(pos: Int, ts: NodeBuffer, txt: String): Unit = { - if (preserveWS) - ts &+ handle.text(pos, txt) - else - for (t <- TextBuffer.fromString(txt).toText) { - ts &+ handle.text(pos, t.text) - } - } - - /** {{{ - * '<' content1 ::= ... - * }}} */ - def content1(pscope: NamespaceBinding, ts: NodeBuffer) { - ch match { - case '!' => - nextch() - if ('[' == ch) // CDATA - ts &+ xCharData - else if ('D' == ch) // doctypedecl, parse DTD // @todo REMOVE HACK - parseDTD() - else // comment - ts &+ xComment - case '?' => // PI - nextch() - ts &+ xProcInstr - case _ => - ts &+ element1(pscope) // child - } - } - - /** {{{ - * content1 ::= '<' content1 | '&' charref ... - * }}} */ - def content(pscope: NamespaceBinding): NodeSeq = { - val ts = new NodeBuffer - var exit = eof - // todo: optimize seq repr. - def done = new NodeSeq { val theSeq = ts.toList } - - while (!exit) { - tmppos = pos - exit = eof - - if (eof) - return done - - ch match { - case '<' => // another tag - nextch(); ch match { - case '/' => exit = true // end tag - case _ => content1(pscope, ts) - } - - // postcond: xEmbeddedBlock == false! - case '&' => // EntityRef or CharRef - nextch(); ch match { - case '#' => // CharacterRef - nextch() - val theChar = handle.text(tmppos, xCharRef(() => ch, () => nextch())) - xToken(';') - ts &+ theChar - case _ => // EntityRef - val n = xName - xToken(';') - - if (unescape contains n) { - handle.entityRef(tmppos, n) - ts &+ unescape(n) - } else push(n) - } - case _ => // text content - appendText(tmppos, ts, xText) - } - } - done - } // content(NamespaceBinding) - - /** {{{ - * externalID ::= SYSTEM S syslit - * PUBLIC S pubid S syslit - * }}} */ - def externalID(): ExternalID = ch match { - case 'S' => - nextch() - xToken("YSTEM") - xSpace() - val sysID = systemLiteral() - new SystemID(sysID) - case 'P' => - nextch(); xToken("UBLIC") - xSpace() - val pubID = pubidLiteral() - xSpace() - val sysID = systemLiteral() - new PublicID(pubID, sysID) - } - - - /** parses document type declaration and assigns it to instance variable - * dtd. - * {{{ - * - * }}} */ - def parseDTD() { // dirty but fast - var extID: ExternalID = null - if (this.dtd ne null) - reportSyntaxError("unexpected character (DOCTYPE already defined") - xToken("DOCTYPE") - xSpace() - val n = xName - xSpace() - //external ID - if ('S' == ch || 'P' == ch) { - extID = externalID() - xSpaceOpt() - } - - /* parse external subset of DTD - */ - - if ((null != extID) && isValidating) { - - pushExternal(extID.systemId) - extIndex = inpStack.length - - extSubset() - pop() - extIndex = -1 - } - - if ('[' == ch) { // internal subset - nextch() - /* TODO */ - intSubset() - // TODO: do the DTD parsing?? ?!?!?!?!! - xToken(']') - xSpaceOpt() - } - xToken('>') - this.dtd = new DTD { - /*override var*/ externalID = extID - /*override val */decls = handle.decls.reverse - } - //this.dtd.initializeEntities(); - if (doc ne null) - doc.dtd = this.dtd - - handle.endDTD(n) - } - - def element(pscope: NamespaceBinding): NodeSeq = { - xToken('<') - element1(pscope) - } - - /** {{{ - * '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag - * | xmlTag1 '/' '>' - * }}} */ - def element1(pscope: NamespaceBinding): NodeSeq = { - val pos = this.pos - val (qname, (aMap, scope)) = xTag(pscope) - val (pre, local) = Utility.prefix(qname) match { - case Some(p) => (p, qname drop p.length+1) - case _ => (null, qname) - } - val ts = { - if (ch == '/') { // empty element - xToken("/>") - handle.elemStart(pos, pre, local, aMap, scope) - NodeSeq.Empty - } - else { // element with content - xToken('>') - handle.elemStart(pos, pre, local, aMap, scope) - val tmp = content(scope) - xEndTag(qname) - tmp - } - } - val res = handle.elem(pos, pre, local, aMap, scope, ts == NodeSeq.Empty, ts) - handle.elemEnd(pos, pre, local) - res - } - - /** Parse character data. - * - * precondition: `xEmbeddedBlock == false` (we are not in a scala block) - */ - private def xText: String = { - var exit = false - while (! exit) { - putChar(ch) - nextch() - - exit = eof || ( ch == '<' ) || ( ch == '&' ) - } - val str = cbuf.toString - cbuf.length = 0 - str - } - - /** attribute value, terminated by either ' or ". value may not contain <. - * {{{ - * AttValue ::= `'` { _ } `'` - * | `"` { _ } `"` - * }}} */ - def systemLiteral(): String = { - val endch = ch - if (ch != '\'' && ch != '"') - reportSyntaxError("quote ' or \" expected") - nextch() - while (ch != endch && !eof) { - putChar(ch) - nextch() - } - nextch() - val str = cbuf.toString() - cbuf.length = 0 - str - } - - /** {{{ - * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" - * }}} */ - def pubidLiteral(): String = { - val endch = ch - if (ch!='\'' && ch != '"') - reportSyntaxError("quote ' or \" expected") - nextch() - while (ch != endch && !eof) { - putChar(ch) - //println("hello '"+ch+"'"+isPubIDChar(ch)) - if (!isPubIDChar(ch)) - reportSyntaxError("char '"+ch+"' is not allowed in public id") - nextch() - } - nextch() - val str = cbuf.toString - cbuf.length = 0 - str - } - - // - // dtd parsing - // - - def extSubset(): Unit = { - var textdecl: (Option[String],Option[String]) = null - if (ch == '<') { - nextch() - if (ch == '?') { - nextch() - textdecl = textDecl() - } else - markupDecl1() - } - while (!eof) - markupDecl() - } - - def markupDecl1() = { - def doInclude() = { - xToken('['); while(']' != ch) markupDecl(); nextch() // ']' - } - def doIgnore() = { - xToken('['); while(']' != ch) nextch(); nextch() // ']' - } - if ('?' == ch) { - nextch() - xProcInstr // simply ignore processing instructions! - } else { - xToken('!') - ch match { - case '-' => - xComment // ignore comments - - case 'E' => - nextch() - if ('L' == ch) { - nextch() - elementDecl() - } else - entityDecl() - - case 'A' => - nextch() - attrDecl() - - case 'N' => - nextch() - notationDecl() - - case '[' if inpStack.length >= extIndex => - nextch() - xSpaceOpt() - ch match { - case '%' => - nextch() - val ent = xName - xToken(';') - xSpaceOpt() - - push(ent) - xSpaceOpt() - val stmt = xName - xSpaceOpt() - - stmt match { - // parameter entity - case "INCLUDE" => doInclude() - case "IGNORE" => doIgnore() - } - case 'I' => - nextch() - ch match { - case 'G' => - nextch() - xToken("NORE") - xSpaceOpt() - doIgnore() - case 'N' => - nextch() - xToken("NCLUDE") - doInclude() - } - } - xToken(']') - xToken('>') - - case _ => - curInput.reportError(pos, "unexpected character '"+ch+"', expected some markupdecl") - while (ch!='>') - nextch() - } - } - } - - def markupDecl(): Unit = ch match { - case '%' => // parameter entity reference - nextch() - val ent = xName - xToken(';') - if (!isValidating) - handle.peReference(ent) // n-v: just create PE-reference - else - push(ent) // v: parse replacementText - - //peReference - case '<' => - nextch() - markupDecl1() - case _ if isSpace(ch) => - xSpace() - case _ => - reportSyntaxError("markupdecl: unexpected character '"+ch+"' #" + ch.toInt) - nextch() - } - - /** "rec-xml/#ExtSubset" pe references may not occur within markup declarations - */ - def intSubset() { - //Console.println("(DEBUG) intSubset()") - xSpace() - while (']' != ch) - markupDecl() - } - - /** <! element := ELEMENT - */ - def elementDecl() { - xToken("EMENT") - xSpace() - val n = xName - xSpace() - while ('>' != ch) { - //Console.println("["+ch+"]") - putChar(ch) - nextch() - } - //Console.println("END["+ch+"]") - nextch() - val cmstr = cbuf.toString() - cbuf.length = 0 - handle.elemDecl(n, cmstr) - } - - /** {{{ - * ' != ch) { - val aname = xName - xSpace() - // could be enumeration (foo,bar) parse this later :-/ - while ('"' != ch && '\'' != ch && '#' != ch && '<' != ch) { - if (!isSpace(ch)) - cbuf.append(ch) - nextch() - } - val atpe = cbuf.toString - cbuf.length = 0 - - val defdecl: DefaultDecl = ch match { - case '\'' | '"' => - DEFAULT(fixed = false, xAttributeValue()) - - case '#' => - nextch() - xName match { - case "FIXED" => xSpace() ; DEFAULT(fixed = true, xAttributeValue()) - case "IMPLIED" => IMPLIED - case "REQUIRED" => REQUIRED - } - case _ => - null - } - xSpaceOpt() - - attList ::= AttrDecl(aname, atpe, defdecl) - cbuf.length = 0 - } - nextch() - handle.attListDecl(n, attList.reverse) - } - - /** {{{ - * //sy - val extID = externalID() - if (isParameterEntity) { - xSpaceOpt() - xToken('>') - handle.parameterEntityDecl(n, ExtDef(extID)) - } else { // notation? - xSpace() - if ('>' != ch) { - xToken("NDATA") - xSpace() - val notat = xName - xSpaceOpt() - xToken('>') - handle.unparsedEntityDecl(n, extID, notat) - } else { - nextch() - handle.parsedEntityDecl(n, ExtDef(extID)) - } - } - - case '"' | '\'' => - val av = xEntityValue() - xSpaceOpt() - xToken('>') - if (isParameterEntity) - handle.parameterEntityDecl(n, IntDef(av)) - else - handle.parsedEntityDecl(n, IntDef(av)) - } - {} - } // entityDecl - - /** {{{ - * 'N' notationDecl ::= "OTATION" - * }}} */ - def notationDecl() { - xToken("OTATION") - xSpace() - val notat = xName - xSpace() - val extID = if (ch == 'S') { - externalID() - } - else if (ch == 'P') { - /* PublicID (without system, only used in NOTATION) */ - nextch() - xToken("UBLIC") - xSpace() - val pubID = pubidLiteral() - xSpaceOpt() - val sysID = if (ch != '>') - systemLiteral() - else - null - new PublicID(pubID, sysID) - } else { - reportSyntaxError("PUBLIC or SYSTEM expected") - scala.sys.error("died parsing notationdecl") - } - xSpaceOpt() - xToken('>') - handle.notationDecl(notat, extID) - } - - def reportSyntaxError(pos: Int, str: String) { curInput.reportError(pos, str) } - def reportSyntaxError(str: String) { reportSyntaxError(pos, str) } - def reportValidationError(pos: Int, str: String) { reportSyntaxError(pos, str) } - - def push(entityName: String) { - if (!eof) - inpStack = curInput :: inpStack - - // can't push before getting next character if needed - ch - - curInput = replacementText(entityName) - nextch() - } - - def pushExternal(systemId: String) { - if (!eof) - inpStack = curInput :: inpStack - - // can't push before getting next character if needed - ch - - curInput = externalSource(systemId) - nextch() - } - - def pop() { - curInput = inpStack.head - inpStack = inpStack.tail - lastChRead = curInput.ch - nextChNeeded = false - pos = curInput.pos - reachedEof = false // must be false, because of places where entity refs occur - } -} diff --git a/src/library/scala/xml/parsing/MarkupParserCommon.scala b/src/library/scala/xml/parsing/MarkupParserCommon.scala deleted file mode 100644 index 57c1651558..0000000000 --- a/src/library/scala/xml/parsing/MarkupParserCommon.scala +++ /dev/null @@ -1,260 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package parsing - -import scala.io.Source -import scala.annotation.switch -import Utility.Escapes.{ pairs => unescape } - -import Utility.SU - -/** This is not a public trait - it contains common code shared - * between the library level XML parser and the compiler's. - * All members should be accessed through those. - */ -private[scala] trait MarkupParserCommon extends TokenTests { - protected def unreachable = scala.sys.error("Cannot be reached.") - - // type HandleType // MarkupHandler, SymbolicXMLBuilder - type InputType // Source, CharArrayReader - type PositionType // Int, Position - type ElementType // NodeSeq, Tree - type NamespaceType // NamespaceBinding, Any - type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree] - - def mkAttributes(name: String, pscope: NamespaceType): AttributesType - def mkProcInstr(position: PositionType, name: String, text: String): ElementType - - /** parse a start or empty tag. - * [40] STag ::= '<' Name { S Attribute } [S] - * [44] EmptyElemTag ::= '<' Name { S Attribute } [S] - */ - protected def xTag(pscope: NamespaceType): (String, AttributesType) = { - val name = xName - xSpaceOpt() - - (name, mkAttributes(name, pscope)) - } - - /** '?' {Char})]'?>' - * - * see [15] - */ - def xProcInstr: ElementType = { - val n = xName - xSpaceOpt() - xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>") - } - - /** attribute value, terminated by either `'` or `"`. value may not contain `<`. - @param endCh either `'` or `"` - */ - def xAttributeValue(endCh: Char): String = { - val buf = new StringBuilder - while (ch != endCh) { - // well-formedness constraint - if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "") - else if (ch == SU) truncatedError("") - else buf append ch_returning_nextch - } - ch_returning_nextch - // @todo: normalize attribute value - buf.toString - } - - def xAttributeValue(): String = { - val str = xAttributeValue(ch_returning_nextch) - // well-formedness constraint - normalizeAttributeValue(str) - } - - private def takeUntilChar(it: Iterator[Char], end: Char): String = { - val buf = new StringBuilder - while (it.hasNext) it.next() match { - case `end` => return buf.toString - case ch => buf append ch - } - scala.sys.error("Expected '%s'".format(end)) - } - - /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>' - */ - def xEndTag(startName: String) { - xToken('/') - if (xName != startName) - errorNoEnd(startName) - - xSpaceOpt() - xToken('>') - } - - /** actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen - * Name ::= (Letter | '_') (NameChar)* - * - * see [5] of XML 1.0 specification - * - * pre-condition: ch != ':' // assured by definition of XMLSTART token - * post-condition: name does neither start, nor end in ':' - */ - def xName: String = { - if (ch == SU) - truncatedError("") - else if (!isNameStart(ch)) - return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "") - - val buf = new StringBuilder - - do buf append ch_returning_nextch - while (isNameChar(ch)) - - if (buf.last == ':') { - reportSyntaxError( "name cannot end in ':'" ) - buf.toString dropRight 1 - } - else buf.toString - } - - private def attr_unescape(s: String) = s match { - case "lt" => "<" - case "gt" => ">" - case "amp" => "&" - case "apos" => "'" - case "quot" => "\"" - case "quote" => "\"" - case _ => "&" + s + ";" - } - - /** Replaces only character references right now. - * see spec 3.3.3 - */ - private def normalizeAttributeValue(attval: String): String = { - val buf = new StringBuilder - val it = attval.iterator.buffered - - while (it.hasNext) buf append (it.next() match { - case ' ' | '\t' | '\n' | '\r' => " " - case '&' if it.head == '#' => it.next() ; xCharRef(it) - case '&' => attr_unescape(takeUntilChar(it, ';')) - case c => c - }) - - buf.toString - } - - /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" - * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" - * - * see [66] - */ - def xCharRef(ch: () => Char, nextch: () => Unit): String = - Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _) - - def xCharRef(it: Iterator[Char]): String = { - var c = it.next() - Utility.parseCharRef(() => c, () => { c = it.next() }, reportSyntaxError _, truncatedError _) - } - - def xCharRef: String = xCharRef(() => ch, () => nextch()) - - /** Create a lookahead reader which does not influence the input */ - def lookahead(): BufferedIterator[Char] - - /** The library and compiler parsers had the interesting distinction of - * different behavior for nextch (a function for which there are a total - * of two plausible behaviors, so we know the design space was fully - * explored.) One of them returned the value of nextch before the increment - * and one of them the new value. So to unify code we have to at least - * temporarily abstract over the nextchs. - */ - def ch: Char - def nextch(): Unit - protected def ch_returning_nextch: Char - def eof: Boolean - - // def handle: HandleType - var tmppos: PositionType - - def xHandleError(that: Char, msg: String): Unit - def reportSyntaxError(str: String): Unit - def reportSyntaxError(pos: Int, str: String): Unit - - def truncatedError(msg: String): Nothing - def errorNoEnd(tag: String): Nothing - - protected def errorAndResult[T](msg: String, x: T): T = { - reportSyntaxError(msg) - x - } - - def xToken(that: Char) { - if (ch == that) nextch() - else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch)) - } - def xToken(that: Seq[Char]) { that foreach xToken } - - /** scan [S] '=' [S]*/ - def xEQ() = { xSpaceOpt(); xToken('='); xSpaceOpt() } - - /** skip optional space S? */ - def xSpaceOpt() = while (isSpace(ch) && !eof) nextch() - - /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ - def xSpace() = - if (isSpace(ch)) { nextch(); xSpaceOpt() } - else xHandleError(ch, "whitespace expected") - - /** Apply a function and return the passed value */ - def returning[T](x: T)(f: T => Unit): T = { f(x); x } - - /** Execute body with a variable saved and restored after execution */ - def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = { - val saved = getter - try body - finally setter(saved) - } - - /** Take characters from input stream until given String "until" - * is seen. Once seen, the accumulated characters are passed - * along with the current Position to the supplied handler function. - */ - protected def xTakeUntil[T]( - handler: (PositionType, String) => T, - positioner: () => PositionType, - until: String): T = - { - val sb = new StringBuilder - val head = until.head - val rest = until.tail - - while (true) { - if (ch == head && peek(rest)) - return handler(positioner(), sb.toString) - else if (ch == SU) - truncatedError("") // throws TruncatedXMLControl in compiler - - sb append ch - nextch() - } - unreachable - } - - /** Create a non-destructive lookahead reader and see if the head - * of the input would match the given String. If yes, return true - * and drop the entire String from input; if no, return false - * and leave input unchanged. - */ - private def peek(lookingFor: String): Boolean = - (lookahead() take lookingFor.length sameElements lookingFor.iterator) && { - // drop the chars from the real reader (all lookahead + orig) - (0 to lookingFor.length) foreach (_ => nextch()) - true - } -} diff --git a/src/library/scala/xml/parsing/NoBindingFactoryAdapter.scala b/src/library/scala/xml/parsing/NoBindingFactoryAdapter.scala deleted file mode 100644 index 56ac185f47..0000000000 --- a/src/library/scala/xml/parsing/NoBindingFactoryAdapter.scala +++ /dev/null @@ -1,37 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package parsing - -import factory.NodeFactory - -/** nobinding adaptor providing callbacks to parser to create elements. -* implements hash-consing -*/ -class NoBindingFactoryAdapter extends FactoryAdapter with NodeFactory[Elem] -{ - /** True. Every XML node may contain text that the application needs */ - def nodeContainsText(label: String) = true - - /** From NodeFactory. Constructs an instance of scala.xml.Elem */ - protected def create(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: Seq[Node]): Elem = - Elem(pre, label, attrs, scope, children: _*) - - /** From FactoryAdapter. Creates a node. never creates the same node twice, using hash-consing. */ - def createNode(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: List[Node]): Elem = - Elem(pre, label, attrs, scope, children: _*) - - /** Creates a text node. */ - def createText(text: String) = Text(text) - - /** Creates a processing instruction. */ - def createProcInstr(target: String, data: String) = makeProcInstr(target, data) -} diff --git a/src/library/scala/xml/parsing/TokenTests.scala b/src/library/scala/xml/parsing/TokenTests.scala deleted file mode 100644 index 8dd9cdfaa3..0000000000 --- a/src/library/scala/xml/parsing/TokenTests.scala +++ /dev/null @@ -1,101 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package parsing - -/** - * Helper functions for parsing XML fragments - */ -trait TokenTests { - - /** {{{ - * (#x20 | #x9 | #xD | #xA) - * }}} */ - final def isSpace(ch: Char): Boolean = ch match { - case '\u0009' | '\u000A' | '\u000D' | '\u0020' => true - case _ => false - } - /** {{{ - * (#x20 | #x9 | #xD | #xA)+ - * }}} */ - final def isSpace(cs: Seq[Char]): Boolean = cs.nonEmpty && (cs forall isSpace) - - /** These are 99% sure to be redundant but refactoring on the safe side. */ - def isAlpha(c: Char) = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') - def isAlphaDigit(c: Char) = isAlpha(c) || (c >= '0' && c <= '9') - - /** {{{ - * NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' - * | CombiningChar | Extender - * }}} - * See [4] and Appendix B of XML 1.0 specification. - */ - def isNameChar(ch: Char) = { - import java.lang.Character._ - // The constants represent groups Mc, Me, Mn, Lm, and Nd. - - isNameStart(ch) || (getType(ch).toByte match { - case COMBINING_SPACING_MARK | - ENCLOSING_MARK | NON_SPACING_MARK | - MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true - case _ => ".-:" contains ch - }) - } - - /** {{{ - * NameStart ::= ( Letter | '_' ) - * }}} - * where Letter means in one of the Unicode general - * categories `{ Ll, Lu, Lo, Lt, Nl }`. - * - * We do not allow a name to start with `:`. - * See [3] and Appendix B of XML 1.0 specification - */ - def isNameStart(ch: Char) = { - import java.lang.Character._ - - getType(ch).toByte match { - case LOWERCASE_LETTER | - UPPERCASE_LETTER | OTHER_LETTER | - TITLECASE_LETTER | LETTER_NUMBER => true - case _ => ch == '_' - } - } - - /** {{{ - * Name ::= ( Letter | '_' ) (NameChar)* - * }}} - * See [5] of XML 1.0 specification. - */ - def isName(s: String) = - s.nonEmpty && isNameStart(s.head) && (s.tail forall isNameChar) - - def isPubIDChar(ch: Char): Boolean = - isAlphaDigit(ch) || (isSpace(ch) && ch != '\u0009') || - ("""-\()+,./:=?;!*#@$_%""" contains ch) - - /** - * Returns `true` if the encoding name is a valid IANA encoding. - * This method does not verify that there is a decoder available - * for this encoding, only that the characters are valid for an - * IANA encoding name. - * - * @param ianaEncoding The IANA encoding name. - */ - def isValidIANAEncoding(ianaEncoding: Seq[Char]) = { - def charOK(c: Char) = isAlphaDigit(c) || ("._-" contains c) - - ianaEncoding.nonEmpty && isAlpha(ianaEncoding.head) && - (ianaEncoding.tail forall charOK) - } - - def checkSysID(s: String) = List('"', '\'') exists (c => !(s contains c)) - def checkPubID(s: String) = s forall isPubIDChar -} diff --git a/src/library/scala/xml/parsing/ValidatingMarkupHandler.scala b/src/library/scala/xml/parsing/ValidatingMarkupHandler.scala deleted file mode 100644 index 1b20901249..0000000000 --- a/src/library/scala/xml/parsing/ValidatingMarkupHandler.scala +++ /dev/null @@ -1,104 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package parsing - -import scala.xml.dtd._ - -abstract class ValidatingMarkupHandler extends MarkupHandler { - - var rootLabel:String = _ - var qStack: List[Int] = Nil - var qCurrent: Int = -1 - - var declStack: List[ElemDecl] = Nil - var declCurrent: ElemDecl = null - - final override val isValidating = true - - override def endDTD(n:String) = { - rootLabel = n - } - override def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope:NamespaceBinding) { - - def advanceDFA(dm:DFAContentModel) = { - val trans = dm.dfa.delta(qCurrent) - log("advanceDFA(dm): " + dm) - log("advanceDFA(trans): " + trans) - trans.get(ContentModel.ElemName(label)) match { - case Some(qNew) => qCurrent = qNew - case _ => reportValidationError(pos, "DTD says, wrong element, expected one of "+trans.keys) - } - } - // advance in current automaton - log("[qCurrent = "+qCurrent+" visiting "+label+"]") - - if (qCurrent == -1) { // root - log(" checking root") - if (label != rootLabel) - reportValidationError(pos, "this element should be "+rootLabel) - } else { - log(" checking node") - declCurrent.contentModel match { - case ANY => - case EMPTY => - reportValidationError(pos, "DTD says, no elems, no text allowed here") - case PCDATA => - reportValidationError(pos, "DTD says, no elements allowed here") - case m @ MIXED(r) => - advanceDFA(m) - case e @ ELEMENTS(r) => - advanceDFA(e) - } - } - // push state, decl - qStack = qCurrent :: qStack - declStack = declCurrent :: declStack - - declCurrent = lookupElemDecl(label) - qCurrent = 0 - log(" done now") - } - - override def elemEnd(pos: Int, pre: String, label: String) { - log(" elemEnd") - qCurrent = qStack.head - qStack = qStack.tail - declCurrent = declStack.head - declStack = declStack.tail - log(" qCurrent now" + qCurrent) - log(" declCurrent now" + declCurrent) - } - - final override def elemDecl(name: String, cmstr: String) { - decls = ElemDecl(name, ContentModel.parse(cmstr)) :: decls - } - - final override def attListDecl(name: String, attList: List[AttrDecl]) { - decls = AttListDecl(name, attList) :: decls - } - - final override def unparsedEntityDecl(name: String, extID: ExternalID, notat: String) { - decls = UnparsedEntityDecl(name, extID, notat) :: decls - } - - final override def notationDecl(notat: String, extID: ExternalID) { - decls = NotationDecl(notat, extID) :: decls - } - - final override def peReference(name: String) { - decls = PEReference(name) :: decls - } - - /** report a syntax error */ - def reportValidationError(pos: Int, str: String): Unit -} diff --git a/src/library/scala/xml/parsing/XhtmlEntities.scala b/src/library/scala/xml/parsing/XhtmlEntities.scala deleted file mode 100644 index 3683af202c..0000000000 --- a/src/library/scala/xml/parsing/XhtmlEntities.scala +++ /dev/null @@ -1,54 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package parsing - -import scala.xml.dtd.{ IntDef, ParsedEntityDecl } - -/** - * @author (c) David Pollak 2007 WorldWide Conferencing, LLC. - * - */ -object XhtmlEntities { - val entList = List(("quot",34), ("amp",38), ("lt",60), ("gt",62), ("nbsp",160), ("iexcl",161), ("cent",162), ("pound",163), ("curren",164), ("yen",165), - ("euro",8364), ("brvbar",166), ("sect",167), ("uml",168), ("copy",169), ("ordf",170), ("laquo",171), ("shy",173), ("reg",174), ("trade",8482), - ("macr",175), ("deg",176), ("plusmn",177), ("sup2",178), ("sup3",179), ("acute",180), ("micro",181), ("para",182), ("middot",183), ("cedil",184), - ("sup1",185), ("ordm",186), ("raquo",187), ("frac14",188), ("frac12",189), ("frac34",190), ("iquest",191), ("times",215), ("divide",247), - ("Agrave",192), ("Aacute",193), ("Acirc",194), ("Atilde",195), ("Auml",196), ("Aring",197), ("AElig",198), ("Ccedil",199), ("Egrave",200), - ("Eacute",201), ("Ecirc",202), ("Euml",203), ("Igrave",204), ("Iacute",205), ("Icirc",206), ("Iuml",207), ("ETH",208), ("Ntilde",209), - ("Ograve",210), ("Oacute",211), ("Ocirc",212), ("Otilde",213), ("Ouml",214), ("Oslash",216), ("Ugrave",217), ("Uacute",218), ("Ucirc",219), - ("Uuml",220), ("Yacute",221), ("THORN",222), ("szlig",223), ("agrave",224), ("aacute",225), ("acirc",226), ("atilde",227), ("auml",228), - ("aring",229), ("aelig",230), ("ccedil",231), ("egrave",232), ("eacute",233), ("ecirc",234), ("euml",235), ("igrave",236), ("iacute",237), - ("icirc",238), ("iuml",239), ("eth",240), ("ntilde",241), ("ograve",242), ("oacute",243), ("ocirc",244), ("otilde",245), ("ouml",246), - ("oslash",248), ("ugrave",249), ("uacute",250), ("ucirc",251), ("uuml",252), ("yacute",253), ("thorn",254), ("yuml",255), ("OElig",338), - ("oelig",339), ("Scaron",352), ("scaron",353), ("Yuml",376), ("circ",710), ("ensp",8194), ("emsp",8195), ("zwnj",204), ("zwj",8205), ("lrm",8206), - ("rlm",8207), ("ndash",8211), ("mdash",8212), ("lsquo",8216), ("rsquo",8217), ("sbquo",8218), ("ldquo",8220), ("rdquo",8221), ("bdquo",8222), - ("dagger",8224), ("Dagger",8225), ("permil",8240), ("lsaquo",8249), ("rsaquo",8250), ("fnof",402), ("bull",8226), ("hellip",8230), ("prime",8242), - ("Prime",8243), ("oline",8254), ("frasl",8260), ("weierp",8472), ("image",8465), ("real",8476), ("alefsym",8501), ("larr",8592), ("uarr",8593), - ("rarr",8594), ("darr",8495), ("harr",8596), ("crarr",8629), ("lArr",8656), ("uArr",8657), ("rArr",8658), ("dArr",8659), ("hArr",8660), - ("forall",8704), ("part",8706), ("exist",8707), ("empty",8709), ("nabla",8711), ("isin",8712), ("notin",8713), ("ni",8715), ("prod",8719), - ("sum",8721), ("minus",8722), ("lowast",8727), ("radic",8730), ("prop",8733), ("infin",8734), ("ang",8736), ("and",8743), ("or",8744), - ("cap",8745), ("cup",8746), ("int",8747), ("there4",8756), ("sim",8764), ("cong",8773), ("asymp",8776), ("ne",8800), ("equiv",8801), ("le",8804), - ("ge",8805), ("sub",8834), ("sup",8835), ("nsub",8836), ("sube",8838), ("supe",8839), ("oplus",8853), ("otimes",8855), ("perp",8869), ("sdot",8901), - ("lceil",8968), ("rceil",8969), ("lfloor",8970), ("rfloor",8971), ("lang",9001), ("rang",9002), ("loz",9674), ("spades",9824), ("clubs",9827), - ("hearts",9829), ("diams",9830), ("Alpha",913), ("Beta",914), ("Gamma",915), ("Delta",916), ("Epsilon",917), ("Zeta",918), ("Eta",919), - ("Theta",920), ("Iota",921), ("Kappa",922), ("Lambda",923), ("Mu",924), ("Nu",925), ("Xi",926), ("Omicron",927), ("Pi",928), ("Rho",929), - ("Sigma",931), ("Tau",932), ("Upsilon",933), ("Phi",934), ("Chi",935), ("Psi",936), ("Omega",937), ("alpha",945), ("beta",946), ("gamma",947), - ("delta",948), ("epsilon",949), ("zeta",950), ("eta",951), ("theta",952), ("iota",953), ("kappa",954), ("lambda",955), ("mu",956), ("nu",957), - ("xi",958), ("omicron",959), ("pi",960), ("rho",961), ("sigmaf",962), ("sigma",963), ("tau",964), ("upsilon",965), ("phi",966), ("chi",967), - ("psi",968), ("omega",969), ("thetasym",977), ("upsih",978), ("piv",982)) - - val entMap: Map[String, Char] = Map.empty[String, Char] ++ entList.map { case (name, value) => (name, value.toChar)} - - val entities = entList. - map { case (name, value) => (name, new ParsedEntityDecl(name, new IntDef(value.toChar.toString)))} - - def apply() = entities -} diff --git a/src/library/scala/xml/parsing/XhtmlParser.scala b/src/library/scala/xml/parsing/XhtmlParser.scala deleted file mode 100644 index 6ce5bec8d0..0000000000 --- a/src/library/scala/xml/parsing/XhtmlParser.scala +++ /dev/null @@ -1,31 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package parsing - -import scala.io.Source - -/** An XML Parser that preserves `CDATA` blocks and knows about - * [[scala.xml.parsing.XhtmlEntities]]. - * - * @author (c) David Pollak, 2007 WorldWide Conferencing, LLC. - */ -class XhtmlParser(val input: Source) extends ConstructingHandler with MarkupParser with ExternalSources { - val preserveWS = true - ent ++= XhtmlEntities() -} - -/** Convenience method that instantiates, initializes and runs an `XhtmlParser`. - * - * @author Burak Emir - */ -object XhtmlParser { - def apply(source: Source): NodeSeq = new XhtmlParser(source).initialize.document() -} diff --git a/src/library/scala/xml/persistent/CachedFileStorage.scala b/src/library/scala/xml/persistent/CachedFileStorage.scala deleted file mode 100644 index a1489ef3f4..0000000000 --- a/src/library/scala/xml/persistent/CachedFileStorage.scala +++ /dev/null @@ -1,129 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package persistent - -import java.io.{ File, FileOutputStream } -import java.nio.ByteBuffer -import java.nio.channels.Channels -import java.lang.Thread - -import scala.collection.Iterator - -/** Mutable storage of immutable xml trees. Everything is kept in memory, - * with a thread periodically checking for changes and writing to file. - * - * To ensure atomicity, two files are used, `filename1` and `'$'+filename1`. - * The implementation switches between the two, deleting the older one - * after a complete dump of the database has been written. - * - * @author Burak Emir - */ -abstract class CachedFileStorage(private val file1: File) extends Thread { - - private val file2 = new File(file1.getParent, file1.getName+"$") - - /** Either equals `file1` or `file2`, references the next file in which - * updates will be stored. - */ - private var theFile: File = null - - private def switch() = { theFile = if (theFile == file1) file2 else file1; } - - /** this storage modified since last modification check */ - protected var dirty = false - - /** period between modification checks, in milliseconds */ - protected val interval = 1000 - - /** finds and loads the storage file. subclasses should call this method - * prior to any other, but only once, to obtain the initial sequence of nodes. - */ - protected def initialNodes: Iterator[Node] = (file1.exists, file2.exists) match { - case (false,false) => - theFile = file1 - Iterator.empty - case (true, true ) if (file1.lastModified < file2.lastModified) => - theFile = file2 - load - case (true, _ ) => - theFile = file1 - load - case _ => - theFile = file2 - load - } - - /** returns an iterator over the nodes in this storage */ - def nodes: Iterator[Node] - - /** adds a node, setting this.dirty to true as a side effect */ - def += (e: Node): Unit - - /** removes a tree, setting this.dirty to true as a side effect */ - def -= (e: Node): Unit - - /* loads and parses XML from file */ - private def load: Iterator[Node] = { - import scala.io.Source - import scala.xml.parsing.ConstructingParser - log("[load]\nloading "+theFile) - val src = Source.fromFile(theFile) - log("parsing "+theFile) - val res = ConstructingParser.fromSource(src,preserveWS = false).document.docElem(0) - switch() - log("[load done]") - res.child.iterator - } - - /** saves the XML to file */ - private def save() = if (this.dirty) { - log("[save]\ndeleting "+theFile) - theFile.delete() - log("creating new "+theFile) - theFile.createNewFile() - val fos = new FileOutputStream(theFile) - val c = fos.getChannel() - - // @todo: optimize - val storageNode = { nodes.toList } - val w = Channels.newWriter(c, "utf-8") - XML.write(w, storageNode, "utf-8", xmlDecl = true, doctype = null) - - log("writing to "+theFile) - - w.close - c.close - fos.close - dirty = false - switch() - log("[save done]") - } - - /** Run method of the thread. remember to use `start()` to start a thread, - * not `run`. */ - override def run = { - log("[run]\nstarting storage thread, checking every "+interval+" ms") - while (true) { - Thread.sleep( this.interval.toLong ) - save() - } - } - - /** Force writing of contents to the file, even if there has not been any - * update. */ - def flush() = { - this.dirty = true - save() - } - - @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11") - def log(msg: String): Unit = {} -} diff --git a/src/library/scala/xml/persistent/Index.scala b/src/library/scala/xml/persistent/Index.scala deleted file mode 100644 index 9ee45e7086..0000000000 --- a/src/library/scala/xml/persistent/Index.scala +++ /dev/null @@ -1,17 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package persistent - -/** an Index returns some unique key that is part of a node - */ -abstract class Index[A] extends Function1[Node,A] {} diff --git a/src/library/scala/xml/persistent/SetStorage.scala b/src/library/scala/xml/persistent/SetStorage.scala deleted file mode 100644 index 8db56a2e71..0000000000 --- a/src/library/scala/xml/persistent/SetStorage.scala +++ /dev/null @@ -1,42 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package xml -package persistent - -import scala.collection.mutable -import java.io.File - -/** A persistent store with set semantics. This class allows to add and remove - * trees, but never contains two structurally equal trees. - * - * @author Burak Emir - */ -class SetStorage(file: File) extends CachedFileStorage(file) { - - private val theSet = mutable.HashSet[Node]() - - // initialize - - { - val it = super.initialNodes - dirty = it.hasNext - theSet ++= it - } - - /* forwarding methods to hashset*/ - - def += (e: Node): Unit = synchronized { this.dirty = true; theSet += e } - - def -= (e: Node): Unit = synchronized { this.dirty = true; theSet -= e } - - def nodes = synchronized { theSet.iterator } - -} diff --git a/src/library/scala/xml/pull/XMLEvent.scala b/src/library/scala/xml/pull/XMLEvent.scala deleted file mode 100644 index 3beb3648e7..0000000000 --- a/src/library/scala/xml/pull/XMLEvent.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package pull - -/** An XML event for pull parsing. All events received during - * parsing will be one of the subclasses of this trait. - */ -trait XMLEvent - -/** - * An Element's start tag was encountered. - * @param pre prefix, if any, on the element. This is the `xs` in `foo`. - * @param label the name of the element, not including the prefix - * @param attrs any attributes on the element - */ -case class EvElemStart(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding) extends XMLEvent - -/** - * An Element's end tag was encountered. - * @param pre prefix, if any, on the element. This is the `xs` in `foo`. - * @param label the name of the element, not including the prefix - */ -case class EvElemEnd(pre: String, label: String) extends XMLEvent - -/** - * A text node was encountered. - * @param text the text that was found - */ -case class EvText(text: String) extends XMLEvent - -/** An entity reference was encountered. - * @param entity the name of the entity, e.g. `gt` when encountering the entity `>` - */ -case class EvEntityRef(entity: String) extends XMLEvent - -/** - * A processing instruction was encountered. - * @param target the "PITarget" of the processing instruction. For the instruction ``, the target would - * be `foo` - * @param text the remainder of the instruction. For the instruction ``, the text would - * be `bar="baz"` - * @see [[http://www.w3.org/TR/REC-xml/#sec-pi]] - */ -case class EvProcInstr(target: String, text: String) extends XMLEvent - -/** - * A comment was encountered - * @param text the text of the comment - */ -case class EvComment(text: String) extends XMLEvent diff --git a/src/library/scala/xml/pull/XMLEventReader.scala b/src/library/scala/xml/pull/XMLEventReader.scala deleted file mode 100755 index 76e51e17fd..0000000000 --- a/src/library/scala/xml/pull/XMLEventReader.scala +++ /dev/null @@ -1,157 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package pull - -import scala.io.Source -import java.lang.Thread -import java.util.concurrent.LinkedBlockingQueue -import java.nio.channels.ClosedChannelException -import scala.xml.parsing.{ ExternalSources, MarkupHandler, MarkupParser } - -/** - * Main entry point into creating an event-based XML parser. Treating this - * as a [[scala.collection.Iterator]] will provide access to the generated events. - * @param src A [[scala.io.Source]] for XML data to parse - * - * @author Burak Emir - * @author Paul Phillips - */ -class XMLEventReader(src: Source) -extends scala.collection.AbstractIterator[XMLEvent] - with ProducerConsumerIterator[XMLEvent] { - - // We implement a pull parser as an iterator, but since we may be operating on - // a stream (e.g. XML over a network) there may be arbitrarily long periods when - // the queue is empty. Fortunately the ProducerConsumerIterator is ideally - // suited to this task, possibly because it was written for use by this class. - - // to override as necessary - val preserveWS = true - - override val MaxQueueSize = 1000 - protected case object POISON extends XMLEvent - val EndOfStream = POISON - - // thread machinery - private[this] val parser = new Parser(src) - private[this] val parserThread = new Thread(parser, "XMLEventReader") - parserThread.start - // enqueueing the poison object is the reliable way to cause the - // iterator to terminate; hasNext will return false once it sees it. - // Calling interrupt() on the parserThread is the only way we can get - // it to stop producing tokens since it's lost deep in document() - - // we cross our fingers the interrupt() gets to its target, but if it - // fails for whatever reason the iterator correctness is not impacted, - // only performance (because it will finish the entire XML document, - // or at least as much as it can fit in the queue.) - def stop() = { - produce(POISON) - parserThread.interrupt() - } - - private class Parser(val input: Source) extends MarkupHandler with MarkupParser with ExternalSources with Runnable { - val preserveWS = XMLEventReader.this.preserveWS - // track level for elem memory usage optimization - private var level = 0 - - // this is Parser's way to add to the queue - the odd return type - // is to conform to MarkupHandler's interface - def setEvent(es: XMLEvent*): NodeSeq = { - es foreach produce - NodeSeq.Empty - } - - override def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding) { - level += 1 - setEvent(EvElemStart(pre, label, attrs, scope)) - } - override def elemEnd(pos: Int, pre: String, label: String) { - setEvent(EvElemEnd(pre, label)) - level -= 1 - } - - // this is a dummy to satisfy MarkupHandler's API - // memory usage optimization return one for top level to satisfy - // MarkupParser.document() otherwise NodeSeq.Empty - private var ignoreWritten = false - final def elem(pos: Int, pre: String, label: String, attrs: MetaData, pscope: NamespaceBinding, empty: Boolean, nodes: NodeSeq): NodeSeq = - if (level == 1 && !ignoreWritten) {ignoreWritten = true; } else NodeSeq.Empty - - def procInstr(pos: Int, target: String, txt: String) = setEvent(EvProcInstr(target, txt)) - def comment(pos: Int, txt: String) = setEvent(EvComment(txt)) - def entityRef(pos: Int, n: String) = setEvent(EvEntityRef(n)) - def text(pos: Int, txt:String) = setEvent(EvText(txt)) - - override def run() { - curInput = input - interruptibly { this.initialize.document() } - setEvent(POISON) - } - } -} - -// An iterator designed for one or more producers to generate -// elements, and a single consumer to iterate. Iteration will continue -// until closeIterator() is called, after which point producers -// calling produce() will receive interruptions. -// -// Since hasNext may block indefinitely if nobody is producing, -// there is also an available() method which will return true if -// the next call hasNext is guaranteed not to block. -// -// This is not thread-safe for multiple consumers! -trait ProducerConsumerIterator[T >: Null] extends Iterator[T] { - // abstract - iterator-specific distinguished object for marking eos - val EndOfStream: T - - // defaults to unbounded - override to positive Int if desired - val MaxQueueSize = -1 - - def interruptibly[T](body: => T): Option[T] = try Some(body) catch { - case _: InterruptedException => Thread.currentThread.interrupt(); None - case _: ClosedChannelException => None - } - - private[this] lazy val queue = - if (MaxQueueSize < 0) new LinkedBlockingQueue[T]() - else new LinkedBlockingQueue[T](MaxQueueSize) - private[this] var buffer: T = _ - private def fillBuffer() = { - buffer = interruptibly(queue.take) getOrElse EndOfStream - isElement(buffer) - } - private def isElement(x: T) = x != null && x != EndOfStream - private def eos() = buffer == EndOfStream - - // public producer interface - this is the only method producers call, so - // LinkedBlockingQueue's synchronization is all we need. - def produce(x: T): Unit = if (!eos) interruptibly(queue put x) - - // consumer/iterator interface - we need not synchronize access to buffer - // because we required there to be only one consumer. - def hasNext = !eos && (buffer != null || fillBuffer) - - def next() = { - if (eos()) throw new NoSuchElementException("ProducerConsumerIterator") - if (buffer == null) fillBuffer() - - drainBuffer() - } - - def available() = isElement(buffer) || isElement(queue.peek) - - private def drainBuffer() = { - assert(!eos) - val res = buffer - buffer = null - res - } -} diff --git a/src/library/scala/xml/pull/package.scala b/src/library/scala/xml/pull/package.scala deleted file mode 100644 index 0e3019446b..0000000000 --- a/src/library/scala/xml/pull/package.scala +++ /dev/null @@ -1,42 +0,0 @@ -package scala -package xml - -/** - * Classes needed to view an XML document as a series of events. The document - * is parsed by an [[scala.xml.pull.XMLEventReader]] instance. You can treat it as - * an [[scala.collection.Iterator]] to retrieve the events, which are all - * subclasses of [[scala.xml.pull.XMLEvent]]. - * - * {{{ - * scala> val source = Source.fromString(""" - * - * - * ]>Hello&bar;>""") - * - * source: scala.io.Source = non-empty iterator - * - * scala> val reader = new XMLEventReader(source) - * reader: scala.xml.pull.XMLEventReader = non-empty iterator - * - * scala> reader.foreach{ println(_) } - * EvProcInstr(instruction,custom value="customvalue") - * EvText( - * ) - * EvElemStart(null,foo,,) - * EvText(Hello) - * EvComment( this is a comment ) - * EvElemStart(null,bar,,) - * EvText(BAR) - * EvElemEnd(null,bar) - * EvElemStart(null,bar,,) - * EvEntityRef(gt) - * EvElemEnd(null,bar) - * EvElemEnd(null,foo) - * EvText( - * - * ) - * - * }}} - */ -package object pull diff --git a/src/library/scala/xml/transform/BasicTransformer.scala b/src/library/scala/xml/transform/BasicTransformer.scala deleted file mode 100644 index c98339fd67..0000000000 --- a/src/library/scala/xml/transform/BasicTransformer.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package transform - -/** A class for XML transformations. - * - * @author Burak Emir - * @version 1.0 - */ -abstract class BasicTransformer extends Function1[Node,Node] -{ - protected def unchanged(n: Node, ns: Seq[Node]) = - ns.length == 1 && (ns.head == n) - - /** Call transform(Node) for each node in ns, append results - * to NodeBuffer. - */ - def transform(it: Iterator[Node], nb: NodeBuffer): Seq[Node] = - it.foldLeft(nb)(_ ++= transform(_)).toSeq - - /** Call transform(Node) to each node in ns, yield ns if nothing changes, - * otherwise a new sequence of concatenated results. - */ - def transform(ns: Seq[Node]): Seq[Node] = { - val (xs1, xs2) = ns span (n => unchanged(n, transform(n))) - - if (xs2.isEmpty) ns - else xs1 ++ transform(xs2.head) ++ transform(xs2.tail) - } - - def transform(n: Node): Seq[Node] = { - if (n.doTransform) n match { - case Group(xs) => Group(transform(xs)) // un-group the hack Group tag - case _ => - val ch = n.child - val nch = transform(ch) - - if (ch eq nch) n - else Elem(n.prefix, n.label, n.attributes, n.scope, nch: _*) - } - else n - } - - def apply(n: Node): Node = { - val seq = transform(n) - if (seq.length > 1) - throw new UnsupportedOperationException("transform must return single node for root") - else seq.head - } -} diff --git a/src/library/scala/xml/transform/RewriteRule.scala b/src/library/scala/xml/transform/RewriteRule.scala deleted file mode 100644 index 1399ee538d..0000000000 --- a/src/library/scala/xml/transform/RewriteRule.scala +++ /dev/null @@ -1,28 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package xml -package transform - -/** A RewriteRule, when applied to a term, yields either - * the result of rewriting the term or the term itself if the rule - * is not applied. - * - * @author Burak Emir - * @version 1.0 - */ -abstract class RewriteRule extends BasicTransformer { - /** a name for this rewrite rule */ - val name = this.toString() - override def transform(ns: Seq[Node]): Seq[Node] = super.transform(ns) - override def transform(n: Node): Seq[Node] = n -} - diff --git a/src/library/scala/xml/transform/RuleTransformer.scala b/src/library/scala/xml/transform/RuleTransformer.scala deleted file mode 100644 index 3a222ba759..0000000000 --- a/src/library/scala/xml/transform/RuleTransformer.scala +++ /dev/null @@ -1,16 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package xml -package transform - -class RuleTransformer(rules: RewriteRule*) extends BasicTransformer { - override def transform(n: Node): Seq[Node] = - rules.foldLeft(super.transform(n)) { (res, rule) => rule transform res } -} diff --git a/src/partest/scala/tools/partest/nest/FileManager.scala b/src/partest/scala/tools/partest/nest/FileManager.scala index 230ada4803..ee24c0b9c1 100644 --- a/src/partest/scala/tools/partest/nest/FileManager.scala +++ b/src/partest/scala/tools/partest/nest/FileManager.scala @@ -68,12 +68,13 @@ trait FileManager extends FileUtil { else (SFile(LATEST_LIB).parent.parent / "classes" / what).toAbsolute.path } + def latestXmlLib = relativeToLibrary("xml") def latestScaladoc = relativeToLibrary("scaladoc") def latestInteractive = relativeToLibrary("interactive") def latestScalapFile = relativeToLibrary("scalap") def latestPaths = List( LATEST_LIB, LATEST_REFLECT, LATEST_COMP, LATEST_PARTEST, LATEST_ACTORS, - latestScalapFile, latestScaladoc, latestInteractive + latestXmlLib, latestScalapFile, latestScaladoc, latestInteractive ) def latestFiles = latestPaths map (p => new java.io.File(p)) def latestUrls = latestFiles map (_.toURI.toURL) diff --git a/src/xml/scala/xml/Atom.scala b/src/xml/scala/xml/Atom.scala new file mode 100644 index 0000000000..33e58ba7e7 --- /dev/null +++ b/src/xml/scala/xml/Atom.scala @@ -0,0 +1,47 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** The class `Atom` provides an XML node for text (`PCDATA`). + * It is used in both non-bound and bound XML representations. + * + * @author Burak Emir + * @param data the text contained in this node, may not be `'''null'''`. + */ +class Atom[+A](val data: A) extends SpecialNode with Serializable { + if (data == null) + throw new IllegalArgumentException("cannot construct "+getClass.getSimpleName+" with null") + + override protected def basisForHashCode: Seq[Any] = Seq(data) + + override def strict_==(other: Equality) = other match { + case x: Atom[_] => data == x.data + case _ => false + } + + override def canEqual(other: Any) = other match { + case _: Atom[_] => true + case _ => false + } + + final override def doCollectNamespaces = false + final override def doTransform = false + + def label = "#PCDATA" + + /** Returns text, with some characters escaped according to the XML + * specification. + */ + def buildString(sb: StringBuilder): StringBuilder = + Utility.escape(data.toString, sb) + + override def text: String = data.toString + +} diff --git a/src/xml/scala/xml/Attribute.scala b/src/xml/scala/xml/Attribute.scala new file mode 100644 index 0000000000..e4b2b69fc6 --- /dev/null +++ b/src/xml/scala/xml/Attribute.scala @@ -0,0 +1,101 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** This singleton object contains the `apply` and `unapply` methods for + * convenient construction and deconstruction. + * + * @author Burak Emir + * @version 1.0 + */ +object Attribute { + def unapply(x: Attribute) = x match { + case PrefixedAttribute(_, key, value, next) => Some((key, value, next)) + case UnprefixedAttribute(key, value, next) => Some((key, value, next)) + case _ => None + } + + /** Convenience functions which choose Un/Prefixedness appropriately */ + def apply(key: String, value: Seq[Node], next: MetaData): Attribute = + new UnprefixedAttribute(key, value, next) + + def apply(pre: String, key: String, value: String, next: MetaData): Attribute = + if (pre == null || pre == "") new UnprefixedAttribute(key, value, next) + else new PrefixedAttribute(pre, key, value, next) + + def apply(pre: String, key: String, value: Seq[Node], next: MetaData): Attribute = + if (pre == null || pre == "") new UnprefixedAttribute(key, value, next) + else new PrefixedAttribute(pre, key, value, next) + + def apply(pre: Option[String], key: String, value: Seq[Node], next: MetaData): Attribute = + pre match { + case None => new UnprefixedAttribute(key, value, next) + case Some(p) => new PrefixedAttribute(p, key, value, next) + } +} + +/** The `Attribute` trait defines the interface shared by both + * [[scala.xml.PrefixedAttribute]] and [[scala.xml.UnprefixedAttribute]]. + * + * @author Burak Emir + * @version 1.0 + */ +abstract trait Attribute extends MetaData { + def pre: String // will be null if unprefixed + val key: String + val value: Seq[Node] + val next: MetaData + + def apply(key: String): Seq[Node] + def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] + def copy(next: MetaData): Attribute + + def remove(key: String) = + if (!isPrefixed && this.key == key) next + else copy(next remove key) + + def remove(namespace: String, scope: NamespaceBinding, key: String) = + if (this.key == key && (scope getURI pre) == namespace) next + else copy(next.remove(namespace, scope, key)) + + def isPrefixed: Boolean = pre != null + + def getNamespace(owner: Node): String + + def wellformed(scope: NamespaceBinding): Boolean = { + val arg = if (isPrefixed) scope getURI pre else null + (next(arg, scope, key) == null) && (next wellformed scope) + } + + /** Returns an iterator on attributes */ + override def iterator: Iterator[MetaData] = { + if (value == null) next.iterator + else Iterator.single(this) ++ next.iterator + } + + override def size: Int = { + if (value == null) next.size + else 1 + next.size + } + + /** Appends string representation of only this attribute to stringbuffer. + */ + protected def toString1(sb: StringBuilder) { + if (value == null) + return + if (isPrefixed) + sb append pre append ':' + + sb append key append '=' + val sb2 = new StringBuilder() + Utility.sequenceToXML(value, TopScope, sb2, stripComments = true) + Utility.appendQuoted(sb2.toString, sb) + } +} diff --git a/src/xml/scala/xml/Comment.scala b/src/xml/scala/xml/Comment.scala new file mode 100644 index 0000000000..b8dccdcb16 --- /dev/null +++ b/src/xml/scala/xml/Comment.scala @@ -0,0 +1,31 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** The class `Comment` implements an XML node for comments. + * + * @author Burak Emir + * @param commentText the text contained in this node, may not contain "--" + */ +case class Comment(commentText: String) extends SpecialNode { + + def label = "#REM" + override def text = "" + final override def doCollectNamespaces = false + final override def doTransform = false + + if (commentText contains "--") + throw new IllegalArgumentException("text contains \"--\"") + + /** Appends "" to this string buffer. + */ + override def buildString(sb: StringBuilder) = + sb append "" +} diff --git a/src/xml/scala/xml/Document.scala b/src/xml/scala/xml/Document.scala new file mode 100644 index 0000000000..9a725014fc --- /dev/null +++ b/src/xml/scala/xml/Document.scala @@ -0,0 +1,92 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** A document information item (according to InfoSet spec). The comments + * are copied from the Infoset spec, only augmented with some information + * on the Scala types for definitions that might have no value. + * Also plays the role of an `XMLEvent` for pull parsing. + * + * @author Burak Emir + * @version 1.0, 26/04/2005 + */ +@SerialVersionUID(-2289320563321795109L) +class Document extends NodeSeq with pull.XMLEvent with Serializable { + + /** An ordered list of child information items, in document + * order. The list contains exactly one element information item. The + * list also contains one processing instruction information item for + * each processing instruction outside the document element, and one + * comment information item for each comment outside the document + * element. Processing instructions and comments within the DTD are + * excluded. If there is a document type declaration, the list also + * contains a document type declaration information item. + */ + var children: Seq[Node] = _ + + /** The element information item corresponding to the document element. */ + var docElem: Node = _ + + /** The dtd that comes with the document, if any */ + var dtd: scala.xml.dtd.DTD = _ + + /** An unordered set of notation information items, one for each notation + * declared in the DTD. If any notation is multiply declared, this property + * has no value. + */ + def notations: Seq[scala.xml.dtd.NotationDecl] = + dtd.notations + + /** An unordered set of unparsed entity information items, one for each + * unparsed entity declared in the DTD. + */ + def unparsedEntities: Seq[scala.xml.dtd.EntityDecl] = + dtd.unparsedEntities + + /** The base URI of the document entity. */ + var baseURI: String = _ + + /** The name of the character encoding scheme in which the document entity + * is expressed. + */ + var encoding: Option[String] = _ + + /** An indication of the standalone status of the document, either + * true or false. This property is derived from the optional standalone + * document declaration in the XML declaration at the beginning of the + * document entity, and has no value (`None`) if there is no + * standalone document declaration. + */ + var standAlone: Option[Boolean] = _ + + /** A string representing the XML version of the document. This + * property is derived from the XML declaration optionally present at + * the beginning of the document entity, and has no value (`None`) + * if there is no XML declaration. + */ + var version: Option[String] = _ + + /** 9. This property is not strictly speaking part of the infoset of + * the document. Rather it is an indication of whether the processor + * has read the complete DTD. Its value is a boolean. If it is false, + * then certain properties (indicated in their descriptions below) may + * be unknown. If it is true, those properties are never unknown. + */ + var allDeclarationsProcessed = false + + // methods for NodeSeq + + def theSeq: Seq[Node] = this.docElem + + override def canEqual(other: Any) = other match { + case _: Document => true + case _ => false + } +} diff --git a/src/xml/scala/xml/Elem.scala b/src/xml/scala/xml/Elem.scala new file mode 100755 index 0000000000..484cf98744 --- /dev/null +++ b/src/xml/scala/xml/Elem.scala @@ -0,0 +1,135 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** This singleton object contains the `apply` and `unapplySeq` methods for + * convenient construction and deconstruction. It is possible to deconstruct + * any `Node` instance (that is not a `SpecialNode` or a `Group`) using the + * syntax `case Elem(prefix, label, attribs, scope, child @ _*) => ...` + * + * Copyright 2008 Google Inc. All Rights Reserved. + * @author Burak Emir + */ +object Elem { + /** Build an Elem, setting its minimizeEmpty property to `true` if it has no children. Note that this + * default may not be exactly what you want, as some XML dialects don't permit some elements to be minimized. + * + * @deprecated This factory method is retained for backward compatibility; please use the other one, with which you + * can specify your own preference for minimizeEmpty. + */ + @deprecated("Use the other apply method in this object", "2.10.0") + def apply(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, child: Node*): Elem = + apply(prefix, label, attributes, scope, child.isEmpty, child: _*) + + def apply(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, minimizeEmpty: Boolean, child: Node*): Elem = + new Elem(prefix, label, attributes, scope, minimizeEmpty, child: _*) + + def unapplySeq(n: Node) = n match { + case _: SpecialNode | _: Group => None + case _ => Some((n.prefix, n.label, n.attributes, n.scope, n.child)) + } + + import scala.sys.process._ + /** Implicitly convert a [[scala.xml.Elem]] into a + * [[scala.sys.process.ProcessBuilder]]. This is done by obtaining the text + * elements of the element, trimming spaces, and then converting the result + * from string to a process. Importantly, tags are completely ignored, so + * they cannot be used to separate parameters. + */ + @deprecated("To create a scala.sys.process.Process from an xml.Elem, please use Process(elem.text.trim).", "2.11.0") + implicit def xmlToProcess(command: scala.xml.Elem): ProcessBuilder = Process(command.text.trim) + + @deprecated("To create a scala.sys.process.Process from an xml.Elem, please use Process(elem.text.trim).", "2.11.0") + implicit def processXml(p: Process.type) = new { + /** Creates a [[scala.sys.process.ProcessBuilder]] from a Scala XML Element. + * This can be used as a way to template strings. + * + * @example {{{ + * apply( {dxPath.absolutePath} --dex --output={classesDexPath.absolutePath} {classesMinJarPath.absolutePath}) + * }}} + */ + def apply(command: Elem): ProcessBuilder = Process(command.text.trim) + } +} + + +/** The case class `Elem` extends the `Node` class, + * providing an immutable data object representing an XML element. + * + * @param prefix namespace prefix (may be null, but not the empty string) + * @param label the element name + * @param attributes1 the attribute map + * @param scope the scope containing the namespace bindings + * @param minimizeEmpty `true` if this element should be serialized as minimized (i.e. "<el/>") when + * empty; `false` if it should be written out in long form. + * @param child the children of this node + * + * Copyright 2008 Google Inc. All Rights Reserved. + * @author Burak Emir + */ +class Elem( + override val prefix: String, + val label: String, + attributes1: MetaData, + override val scope: NamespaceBinding, + val minimizeEmpty: Boolean, + val child: Node*) +extends Node with Serializable +{ + @deprecated("This constructor is retained for backward compatibility. Please use the primary constructor, which lets you specify your own preference for `minimizeEmpty`.", "2.10.0") + def this(prefix: String, label: String, attributes: MetaData, scope: NamespaceBinding, child: Node*) = { + this(prefix, label, attributes, scope, child.isEmpty, child: _*) + } + + final override def doCollectNamespaces = true + final override def doTransform = true + + override val attributes = MetaData.normalize(attributes1, scope) + + if (prefix == "") + throw new IllegalArgumentException("prefix of zero length, use null instead") + + if (scope == null) + throw new IllegalArgumentException("scope is null, use scala.xml.TopScope for empty scope") + + //@todo: copy the children, + // setting namespace scope if necessary + // cleaning adjacent text nodes if necessary + + override protected def basisForHashCode: Seq[Any] = + prefix :: label :: attributes :: child.toList + + /** Returns a new element with updated attributes, resolving namespace uris + * from this element's scope. See MetaData.update for details. + * + * @param updates MetaData with new and updated attributes + * @return a new symbol with updated attributes + */ + final def %(updates: MetaData): Elem = + copy(attributes = MetaData.update(attributes, scope, updates)) + + /** Returns a copy of this element with any supplied arguments replacing + * this element's value for that field. + * + * @return a new symbol with updated attributes + */ + def copy( + prefix: String = this.prefix, + label: String = this.label, + attributes: MetaData = this.attributes, + scope: NamespaceBinding = this.scope, + minimizeEmpty: Boolean = this.minimizeEmpty, + child: Seq[Node] = this.child.toSeq + ): Elem = Elem(prefix, label, attributes, scope, minimizeEmpty, child: _*) + + /** Returns concatenation of `text(n)` for each child `n`. + */ + override def text = (child map (_.text)).mkString +} diff --git a/src/xml/scala/xml/EntityRef.scala b/src/xml/scala/xml/EntityRef.scala new file mode 100644 index 0000000000..7a58831075 --- /dev/null +++ b/src/xml/scala/xml/EntityRef.scala @@ -0,0 +1,40 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** The class `EntityRef` implements an XML node for entity references. + * + * @author Burak Emir + * @version 1.0 + * @param entityName the name of the entity reference, for example `amp`. + */ +case class EntityRef(entityName: String) extends SpecialNode { + final override def doCollectNamespaces = false + final override def doTransform = false + def label = "#ENTITY" + + override def text = entityName match { + case "lt" => "<" + case "gt" => ">" + case "amp" => "&" + case "apos" => "'" + case "quot" => "\"" + case _ => Utility.sbToString(buildString) + } + + /** Appends `"& entityName;"` to this string buffer. + * + * @param sb the string buffer. + * @return the modified string buffer `sb`. + */ + override def buildString(sb: StringBuilder) = + sb.append("&").append(entityName).append(";") + +} diff --git a/src/xml/scala/xml/Equality.scala b/src/xml/scala/xml/Equality.scala new file mode 100644 index 0000000000..021d185812 --- /dev/null +++ b/src/xml/scala/xml/Equality.scala @@ -0,0 +1,107 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** In an attempt to contain the damage being inflicted on consistency by the + * ad hoc `equals` methods spread around `xml`, the logic is centralized and + * all the `xml` classes go through the `xml.Equality trait`. There are two + * forms of `xml` comparison. + * + * 1. `'''def''' strict_==(other: scala.xml.Equality)` + * + * This one tries to honor the little things like symmetry and hashCode + * contracts. The `equals` method routes all comparisons through this. + * + * 1. `xml_==(other: Any)` + * + * This one picks up where `strict_==` leaves off. It might declare any two + * things equal. + * + * As things stood, the logic not only made a mockery of the collections + * equals contract, but also laid waste to that of case classes. + * + * Among the obstacles to sanity are/were: + * + * Node extends NodeSeq extends Seq[Node] + * MetaData extends Iterable[MetaData] + * The hacky "Group" xml node which throws exceptions + * with wild abandon, so don't get too close + * Rampant asymmetry and impossible hashCodes + * Most classes claiming to be equal to "String" if + * some specific stringification of it was the same. + * String was never going to return the favor. + */ + +object Equality { + def asRef(x: Any): AnyRef = x.asInstanceOf[AnyRef] + + /** Note - these functions assume strict equality has already failed. + */ + def compareBlithely(x1: AnyRef, x2: String): Boolean = x1 match { + case x: Atom[_] => x.data == x2 + case x: NodeSeq => x.text == x2 + case _ => false + } + def compareBlithely(x1: AnyRef, x2: Node): Boolean = x1 match { + case x: NodeSeq if x.length == 1 => x2 == x(0) + case _ => false + } + def compareBlithely(x1: AnyRef, x2: AnyRef): Boolean = { + if (x1 == null || x2 == null) + return (x1 eq x2) + + x2 match { + case s: String => compareBlithely(x1, s) + case n: Node => compareBlithely(x1, n) + case _ => false + } + } +} +import Equality._ + +trait Equality extends scala.Equals { + protected def basisForHashCode: Seq[Any] + + def strict_==(other: Equality): Boolean + def strict_!=(other: Equality) = !strict_==(other) + + /** We insist we're only equal to other `xml.Equality` implementors, + * which heads off a lot of inconsistency up front. + */ + override def canEqual(other: Any): Boolean = other match { + case x: Equality => true + case _ => false + } + + /** It's be nice to make these final, but there are probably + * people out there subclassing the XML types, especially when + * it comes to equals. However WE at least can pretend they + * are final since clearly individual classes cannot be trusted + * to maintain a semblance of order. + */ + override def hashCode() = basisForHashCode.## + override def equals(other: Any) = doComparison(other, blithe = false) + final def xml_==(other: Any) = doComparison(other, blithe = true) + final def xml_!=(other: Any) = !xml_==(other) + + /** The "blithe" parameter expresses the caller's unconcerned attitude + * regarding the usual constraints on equals. The method is thereby + * given carte blanche to declare any two things equal. + */ + private def doComparison(other: Any, blithe: Boolean) = { + val strictlyEqual = other match { + case x: AnyRef if this eq x => true + case x: Equality => (x canEqual this) && (this strict_== x) + case _ => false + } + + strictlyEqual || (blithe && compareBlithely(this, asRef(other))) + } +} diff --git a/src/xml/scala/xml/Group.scala b/src/xml/scala/xml/Group.scala new file mode 100644 index 0000000000..e3af615008 --- /dev/null +++ b/src/xml/scala/xml/Group.scala @@ -0,0 +1,42 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** A hack to group XML nodes in one node for output. + * + * @author Burak Emir + * @version 1.0 + */ +final case class Group(nodes: Seq[Node]) extends Node { + override def theSeq = nodes + + override def canEqual(other: Any) = other match { + case x: Group => true + case _ => false + } + + override def strict_==(other: Equality) = other match { + case Group(xs) => nodes sameElements xs + case _ => false + } + + override protected def basisForHashCode = nodes + + /** Since Group is very much a hack it throws an exception if you + * try to do anything with it. + */ + private def fail(msg: String) = throw new UnsupportedOperationException("class Group does not support method '%s'" format msg) + + def label = fail("label") + override def attributes = fail("attributes") + override def namespace = fail("namespace") + override def child = fail("child") + def buildString(sb: StringBuilder) = fail("toString(StringBuilder)") +} diff --git a/src/xml/scala/xml/MalformedAttributeException.scala b/src/xml/scala/xml/MalformedAttributeException.scala new file mode 100644 index 0000000000..d499ad3e10 --- /dev/null +++ b/src/xml/scala/xml/MalformedAttributeException.scala @@ -0,0 +1,15 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml + + +case class MalformedAttributeException(msg: String) extends RuntimeException(msg) diff --git a/src/xml/scala/xml/MetaData.scala b/src/xml/scala/xml/MetaData.scala new file mode 100644 index 0000000000..8b5ea187cb --- /dev/null +++ b/src/xml/scala/xml/MetaData.scala @@ -0,0 +1,217 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import Utility.sbToString +import scala.annotation.tailrec +import scala.collection.{ AbstractIterable, Iterator } + +/** + * Copyright 2008 Google Inc. All Rights Reserved. + * @author Burak Emir + */ +object MetaData { + /** + * appends all attributes from new_tail to attribs, without attempting to + * detect or remove duplicates. The method guarantees that all attributes + * from attribs come before the attributes in new_tail, but does not + * guarantee to preserve the relative order of attribs. + * + * Duplicates can be removed with `normalize`. + */ + @tailrec // temporarily marked final so it will compile under -Xexperimental + final def concatenate(attribs: MetaData, new_tail: MetaData): MetaData = + if (attribs eq Null) new_tail + else concatenate(attribs.next, attribs copy new_tail) + + /** + * returns normalized MetaData, with all duplicates removed and namespace prefixes resolved to + * namespace URIs via the given scope. + */ + def normalize(attribs: MetaData, scope: NamespaceBinding): MetaData = { + def iterate(md: MetaData, normalized_attribs: MetaData, set: Set[String]): MetaData = { + lazy val key = getUniversalKey(md, scope) + if (md eq Null) normalized_attribs + else if ((md.value eq null) || set(key)) iterate(md.next, normalized_attribs, set) + else md copy iterate(md.next, normalized_attribs, set + key) + } + iterate(attribs, Null, Set()) + } + + /** + * returns key if md is unprefixed, pre+key is md is prefixed + */ + def getUniversalKey(attrib: MetaData, scope: NamespaceBinding) = attrib match { + case prefixed: PrefixedAttribute => scope.getURI(prefixed.pre) + prefixed.key + case unprefixed: UnprefixedAttribute => unprefixed.key + } + + /** + * returns MetaData with attributes updated from given MetaData + */ + def update(attribs: MetaData, scope: NamespaceBinding, updates: MetaData): MetaData = + normalize(concatenate(updates, attribs), scope) + +} + +/** This class represents an attribute and at the same time a linked list of + * attributes. Every instance of this class is either + * - an instance of `UnprefixedAttribute key,value` or + * - an instance of `PrefixedAttribute namespace_prefix,key,value` or + * - `Null, the empty attribute list. + * + * Namespace URIs are obtained by using the namespace scope of the element + * owning this attribute (see `getNamespace`). + * + * Copyright 2008 Google Inc. All Rights Reserved. + * @author Burak Emir + */ +abstract class MetaData +extends AbstractIterable[MetaData] + with Iterable[MetaData] + with Equality + with Serializable { + + /** Updates this MetaData with the MetaData given as argument. All attributes that occur in updates + * are part of the resulting MetaData. If an attribute occurs in both this instance and + * updates, only the one in updates is part of the result (avoiding duplicates). For prefixed + * attributes, namespaces are resolved using the given scope, which defaults to TopScope. + * + * @param updates MetaData with new and updated attributes + * @return a new MetaData instance that contains old, new and updated attributes + */ + def append(updates: MetaData, scope: NamespaceBinding = TopScope): MetaData = + MetaData.update(this, scope, updates) + + /** + * Gets value of unqualified (unprefixed) attribute with given key, null if not found + * + * @param key + * @return value as Seq[Node] if key is found, null otherwise + */ + def apply(key: String): Seq[Node] + + /** convenience method, same as `apply(namespace, owner.scope, key)`. + * + * @param namespace_uri namespace uri of key + * @param owner the element owning this attribute list + * @param key the attribute key + */ + final def apply(namespace_uri: String, owner: Node, key: String): Seq[Node] = + apply(namespace_uri, owner.scope, key) + + /** + * Gets value of prefixed attribute with given key and namespace, null if not found + * + * @param namespace_uri namespace uri of key + * @param scp a namespace scp (usually of the element owning this attribute list) + * @param k to be looked for + * @return value as Seq[Node] if key is found, null otherwise + */ + def apply(namespace_uri: String, scp: NamespaceBinding, k: String): Seq[Node] + + /** returns a copy of this MetaData item with next field set to argument. + */ + def copy(next: MetaData): MetaData + + /** if owner is the element of this metadata item, returns namespace */ + def getNamespace(owner: Node): String + + def hasNext = (Null != next) + + def length: Int = length(0) + + def length(i: Int): Int = next.length(i + 1) + + def isPrefixed: Boolean + + override def canEqual(other: Any) = other match { + case _: MetaData => true + case _ => false + } + override def strict_==(other: Equality) = other match { + case m: MetaData => this.asAttrMap == m.asAttrMap + case _ => false + } + protected def basisForHashCode: Seq[Any] = List(this.asAttrMap) + + /** filters this sequence of meta data */ + override def filter(f: MetaData => Boolean): MetaData = + if (f(this)) copy(next filter f) + else next filter f + + /** returns key of this MetaData item */ + def key: String + + /** returns value of this MetaData item */ + def value: Seq[Node] + + /** Returns a String containing "prefix:key" if the first key is + * prefixed, and "key" otherwise. + */ + def prefixedKey = this match { + case x: Attribute if x.isPrefixed => x.pre + ":" + key + case _ => key + } + + /** Returns a Map containing the attributes stored as key/value pairs. + */ + def asAttrMap: Map[String, String] = + (iterator map (x => (x.prefixedKey, x.value.text))).toMap + + /** returns Null or the next MetaData item */ + def next: MetaData + + /** + * Gets value of unqualified (unprefixed) attribute with given key, None if not found + * + * @param key + * @return value in Some(Seq[Node]) if key is found, None otherwise + */ + final def get(key: String): Option[Seq[Node]] = Option(apply(key)) + + /** same as get(uri, owner.scope, key) */ + final def get(uri: String, owner: Node, key: String): Option[Seq[Node]] = + get(uri, owner.scope, key) + + /** gets value of qualified (prefixed) attribute with given key. + * + * @param uri namespace of key + * @param scope a namespace scp (usually of the element owning this attribute list) + * @param key to be looked fore + * @return value as Some[Seq[Node]] if key is found, None otherwise + */ + final def get(uri: String, scope: NamespaceBinding, key: String): Option[Seq[Node]] = + Option(apply(uri, scope, key)) + + protected def toString1(): String = sbToString(toString1) + + // appends string representations of single attribute to StringBuilder + protected def toString1(sb: StringBuilder): Unit + + override def toString(): String = sbToString(buildString) + + def buildString(sb: StringBuilder): StringBuilder = { + sb append ' ' + toString1(sb) + next buildString sb + } + + /** + */ + def wellformed(scope: NamespaceBinding): Boolean + + def remove(key: String): MetaData + + def remove(namespace: String, scope: NamespaceBinding, key: String): MetaData + + final def remove(namespace: String, owner: Node, key: String): MetaData = + remove(namespace, owner.scope, key) +} diff --git a/src/xml/scala/xml/NamespaceBinding.scala b/src/xml/scala/xml/NamespaceBinding.scala new file mode 100644 index 0000000000..b320466976 --- /dev/null +++ b/src/xml/scala/xml/NamespaceBinding.scala @@ -0,0 +1,83 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import Utility.sbToString + +/** The class `NamespaceBinding` represents namespace bindings + * and scopes. The binding for the default namespace is treated as a null + * prefix. the absent namespace is represented with the null uri. Neither + * prefix nor uri may be empty, which is not checked. + * + * @author Burak Emir + * @version 1.0 + */ +@SerialVersionUID(0 - 2518644165573446725L) +case class NamespaceBinding(prefix: String, uri: String, parent: NamespaceBinding) extends AnyRef with Equality +{ + if (prefix == "") + throw new IllegalArgumentException("zero length prefix not allowed") + + def getURI(_prefix: String): String = + if (prefix == _prefix) uri else parent getURI _prefix + + /** Returns some prefix that is mapped to the URI. + * + * @param _uri the input URI + * @return the prefix that is mapped to the input URI, or null + * if no prefix is mapped to the URI. + */ + def getPrefix(_uri: String): String = + if (_uri == uri) prefix else parent getPrefix _uri + + override def toString(): String = sbToString(buildString(_, TopScope)) + + private def shadowRedefined(stop: NamespaceBinding): NamespaceBinding = { + def prefixList(x: NamespaceBinding): List[String] = + if ((x == null) || (x eq stop)) Nil + else x.prefix :: prefixList(x.parent) + def fromPrefixList(l: List[String]): NamespaceBinding = l match { + case Nil => stop + case x :: xs => new NamespaceBinding(x, this.getURI(x), fromPrefixList(xs)) + } + val ps0 = prefixList(this).reverse + val ps = ps0.distinct + if (ps.size == ps0.size) this + else fromPrefixList(ps) + } + + override def canEqual(other: Any) = other match { + case _: NamespaceBinding => true + case _ => false + } + + override def strict_==(other: Equality) = other match { + case x: NamespaceBinding => (prefix == x.prefix) && (uri == x.uri) && (parent == x.parent) + case _ => false + } + + def basisForHashCode: Seq[Any] = List(prefix, uri, parent) + + def buildString(stop: NamespaceBinding): String = sbToString(buildString(_, stop)) + + def buildString(sb: StringBuilder, stop: NamespaceBinding) { + shadowRedefined(stop).doBuildString(sb, stop) + } + + private def doBuildString(sb: StringBuilder, stop: NamespaceBinding) { + if ((this == null) || (this eq stop)) return // contains? + + val s = " xmlns%s=\"%s\"".format( + (if (prefix != null) ":" + prefix else ""), + (if (uri != null) uri else "") + ) + parent.doBuildString(sb append s, stop) // copy(ignore) + } +} diff --git a/src/xml/scala/xml/Node.scala b/src/xml/scala/xml/Node.scala new file mode 100755 index 0000000000..e121284252 --- /dev/null +++ b/src/xml/scala/xml/Node.scala @@ -0,0 +1,198 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** This singleton object contains the `unapplySeq` method for + * convenient deconstruction. + * + * @author Burak Emir + * @version 1.0 + */ +object Node { + /** the constant empty attribute sequence */ + final def NoAttributes: MetaData = Null + + /** the empty namespace */ + val EmptyNamespace = "" + + def unapplySeq(n: Node) = Some((n.label, n.attributes, n.child)) +} + +/** + * An abstract class representing XML with nodes of a labelled tree. + * This class contains an implementation of a subset of XPath for navigation. + * + * @author Burak Emir and others + * @version 1.1 + */ +abstract class Node extends NodeSeq { + + /** prefix of this node */ + def prefix: String = null + + /** label of this node. I.e. "foo" for <foo/>) */ + def label: String + + /** used internally. Atom/Molecule = -1 PI = -2 Comment = -3 EntityRef = -5 + */ + def isAtom = this.isInstanceOf[Atom[_]] + + /** The logic formerly found in typeTag$, as best I could infer it. */ + def doCollectNamespaces = true // if (tag >= 0) DO collect namespaces + def doTransform = true // if (tag < 0) DO NOT transform + + /** + * method returning the namespace bindings of this node. by default, this + * is TopScope, which means there are no namespace bindings except the + * predefined one for "xml". + */ + def scope: NamespaceBinding = TopScope + + /** + * convenience, same as `getNamespace(this.prefix)` + */ + def namespace = getNamespace(this.prefix) + + /** + * Convenience method, same as `scope.getURI(pre)` but additionally + * checks if scope is `'''null'''`. + * + * @param pre the prefix whose namespace name we would like to obtain + * @return the namespace if `scope != null` and prefix was + * found, else `null` + */ + def getNamespace(pre: String): String = if (scope eq null) null else scope.getURI(pre) + + /** + * Convenience method, looks up an unprefixed attribute in attributes of this node. + * Same as `attributes.getValue(key)` + * + * @param key of queried attribute. + * @return value of `UnprefixedAttribute` with given key + * in attributes, if it exists, otherwise `null`. + */ + final def attribute(key: String): Option[Seq[Node]] = attributes.get(key) + + /** + * Convenience method, looks up a prefixed attribute in attributes of this node. + * Same as `attributes.getValue(uri, this, key)`- + * + * @param uri namespace of queried attribute (may not be null). + * @param key of queried attribute. + * @return value of `PrefixedAttribute` with given namespace + * and given key, otherwise `'''null'''`. + */ + final def attribute(uri: String, key: String): Option[Seq[Node]] = + attributes.get(uri, this, key) + + /** + * Returns attribute meaning all attributes of this node, prefixed and + * unprefixed, in no particular order. In class `Node`, this + * defaults to `Null` (the empty attribute list). + * + * @return all attributes of this node + */ + def attributes: MetaData = Null + + /** + * Returns child axis i.e. all children of this node. + * + * @return all children of this node + */ + def child: Seq[Node] + + /** Children which do not stringify to "" (needed for equality) + */ + def nonEmptyChildren: Seq[Node] = child filterNot (_.toString == "") + + /** + * Descendant axis (all descendants of this node, not including node itself) + * includes all text nodes, element nodes, comments and processing instructions. + */ + def descendant: List[Node] = + child.toList.flatMap { x => x::x.descendant } + + /** + * Descendant axis (all descendants of this node, including thisa node) + * includes all text nodes, element nodes, comments and processing instructions. + */ + def descendant_or_self: List[Node] = this :: descendant + + override def canEqual(other: Any) = other match { + case x: Group => false + case x: Node => true + case _ => false + } + + override protected def basisForHashCode: Seq[Any] = + prefix :: label :: attributes :: nonEmptyChildren.toList + + override def strict_==(other: Equality) = other match { + case _: Group => false + case x: Node => + (prefix == x.prefix) && + (label == x.label) && + (attributes == x.attributes) && + // (scope == x.scope) // note - original code didn't compare scopes so I left it as is. + (nonEmptyChildren sameElements x.nonEmptyChildren) + case _ => + false + } + + // implementations of NodeSeq methods + + /** + * returns a sequence consisting of only this node + */ + def theSeq: Seq[Node] = this :: Nil + + /** + * String representation of this node + * + * @param stripComments if true, strips comment nodes from result + */ + def buildString(stripComments: Boolean): String = + Utility.serialize(this, stripComments = stripComments).toString + + /** + * Same as `toString('''false''')`. + */ + override def toString(): String = buildString(stripComments = false) + + /** + * Appends qualified name of this node to `StringBuilder`. + */ + def nameToString(sb: StringBuilder): StringBuilder = { + if (null != prefix) { + sb append prefix + sb append ':' + } + sb append label + } + + /** + * Returns a type symbol (e.g. DTD, XSD), default `'''null'''`. + */ + def xmlType(): TypeSymbol = null + + /** + * Returns a text representation of this node. Note that this is not equivalent to + * the XPath node-test called text(), it is rather an implementation of the + * XPath function string() + * Martin to Burak: to do: if you make this method abstract, the compiler will now + * complain if there's no implementation in a subclass. Is this what we want? Note that + * this would break doc/DocGenator and doc/ModelToXML, with an error message like: + * {{{ + * doc\DocGenerator.scala:1219: error: object creation impossible, since there is a deferred declaration of method text in class Node of type => String which is not implemented in a subclass + * new SpecialNode { + * ^ + * }}} */ + override def text: String = super.text +} diff --git a/src/xml/scala/xml/NodeBuffer.scala b/src/xml/scala/xml/NodeBuffer.scala new file mode 100644 index 0000000000..ae7c7b2bf8 --- /dev/null +++ b/src/xml/scala/xml/NodeBuffer.scala @@ -0,0 +1,47 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** + * This class acts as a Buffer for nodes. If it is used as a sequence of + * nodes `Seq[Node]`, it must be ensured that no updates occur after that + * point, because `scala.xml.Node` is assumed to be immutable. + * + * Despite this being a sequence, don't use it as key in a hashtable. + * Calling the hashcode function will result in a runtime error. + * + * @author Burak Emir + * @version 1.0 + */ +class NodeBuffer extends scala.collection.mutable.ArrayBuffer[Node] { + + /** + * Append given object to this buffer, returns reference on this + * `NodeBuffer` for convenience. Some rules apply: + * - If argument `o` is `'''null'''`, it is ignored. + * - If it is an `Iterator` or `Iterable`, its elements will be added. + * - If `o` is a node, it is added as it is. + * - If it is anything else, it gets wrapped in an [[scala.xml.Atom]]. + * + * @param o converts to an xml node and adds to this node buffer + * @return this nodebuffer + */ + def &+(o: Any): NodeBuffer = { + o match { + case null | _: Unit | Text("") => // ignore + case it: Iterator[_] => it foreach &+ + case n: Node => super.+=(n) + case ns: Iterable[_] => this &+ ns.iterator + case ns: Array[_] => this &+ ns.iterator + case d => super.+=(new Atom(d)) + } + this + } +} diff --git a/src/xml/scala/xml/NodeSeq.scala b/src/xml/scala/xml/NodeSeq.scala new file mode 100644 index 0000000000..b8022472fb --- /dev/null +++ b/src/xml/scala/xml/NodeSeq.scala @@ -0,0 +1,157 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import scala.collection.{ mutable, immutable, generic, SeqLike, AbstractSeq } +import mutable.{ Builder, ListBuffer } +import generic.{ CanBuildFrom } +import scala.language.implicitConversions + +/** This object ... + * + * @author Burak Emir + * @version 1.0 + */ +object NodeSeq { + final val Empty = fromSeq(Nil) + def fromSeq(s: Seq[Node]): NodeSeq = new NodeSeq { + def theSeq = s + } + type Coll = NodeSeq + implicit def canBuildFrom: CanBuildFrom[Coll, Node, NodeSeq] = + new CanBuildFrom[Coll, Node, NodeSeq] { + def apply(from: Coll) = newBuilder + def apply() = newBuilder + } + def newBuilder: Builder[Node, NodeSeq] = new ListBuffer[Node] mapResult fromSeq + implicit def seqToNodeSeq(s: Seq[Node]): NodeSeq = fromSeq(s) +} + +/** This class implements a wrapper around `Seq[Node]` that adds XPath + * and comprehension methods. + * + * @author Burak Emir + * @version 1.0 + */ +abstract class NodeSeq extends AbstractSeq[Node] with immutable.Seq[Node] with SeqLike[Node, NodeSeq] with Equality { + import NodeSeq.seqToNodeSeq // import view magic for NodeSeq wrappers + + /** Creates a list buffer as builder for this class */ + override protected[this] def newBuilder = NodeSeq.newBuilder + + def theSeq: Seq[Node] + def length = theSeq.length + override def iterator = theSeq.iterator + + def apply(i: Int): Node = theSeq(i) + def apply(f: Node => Boolean): NodeSeq = filter(f) + + def xml_sameElements[A](that: Iterable[A]): Boolean = { + val these = this.iterator + val those = that.iterator + while (these.hasNext && those.hasNext) + if (these.next xml_!= those.next) + return false + + !these.hasNext && !those.hasNext + } + + protected def basisForHashCode: Seq[Any] = theSeq + + override def canEqual(other: Any) = other match { + case _: NodeSeq => true + case _ => false + } + + override def strict_==(other: Equality) = other match { + case x: NodeSeq => (length == x.length) && (theSeq sameElements x.theSeq) + case _ => false + } + + /** Projection function, which returns elements of `this` sequence based + * on the string `that`. Use: + * - `this \ "foo"` to get a list of all elements that are labelled with `"foo"`; + * - `\ "_"` to get a list of all elements (wildcard); + * - `ns \ "@foo"` to get the unprefixed attribute `"foo"`; + * - `ns \ "@{uri}foo"` to get the prefixed attribute `"pre:foo"` whose + * prefix `"pre"` is resolved to the namespace `"uri"`. + * + * For attribute projections, the resulting [[scala.xml.NodeSeq]] attribute + * values are wrapped in a [[scala.xml.Group]]. + * + * There is no support for searching a prefixed attribute by its literal prefix. + * + * The document order is preserved. + */ + def \(that: String): NodeSeq = { + def fail = throw new IllegalArgumentException(that) + def atResult = { + lazy val y = this(0) + val attr = + if (that.length == 1) fail + else if (that(1) == '{') { + val i = that indexOf '}' + if (i == -1) fail + val (uri, key) = (that.substring(2,i), that.substring(i+1, that.length())) + if (uri == "" || key == "") fail + else y.attribute(uri, key) + } + else y.attribute(that drop 1) + + attr match { + case Some(x) => Group(x) + case _ => NodeSeq.Empty + } + } + + def makeSeq(cond: (Node) => Boolean) = + NodeSeq fromSeq (this flatMap (_.child) filter cond) + + that match { + case "" => fail + case "_" => makeSeq(!_.isAtom) + case _ if (that(0) == '@' && this.length == 1) => atResult + case _ => makeSeq(_.label == that) + } + } + + /** Projection function, which returns elements of `this` sequence and of + * all its subsequences, based on the string `that`. Use: + * - `this \\ 'foo` to get a list of all elements that are labelled with `"foo"`; + * - `\\ "_"` to get a list of all elements (wildcard); + * - `ns \\ "@foo"` to get the unprefixed attribute `"foo"`; + * - `ns \\ "@{uri}foo"` to get each prefixed attribute `"pre:foo"` whose + * prefix `"pre"` is resolved to the namespace `"uri"`. + * + * For attribute projections, the resulting [[scala.xml.NodeSeq]] attribute + * values are wrapped in a [[scala.xml.Group]]. + * + * There is no support for searching a prefixed attribute by its literal prefix. + * + * The document order is preserved. + */ + def \\ (that: String): NodeSeq = { + def filt(cond: (Node) => Boolean) = this flatMap (_.descendant_or_self) filter cond + that match { + case "_" => filt(!_.isAtom) + case _ if that(0) == '@' => filt(!_.isAtom) flatMap (_ \ that) + case _ => filt(x => !x.isAtom && x.label == that) + } + } + + /** Convenience method which returns string text of the named attribute. Use: + * - `that \@ "foo"` to get the string text of attribute `"foo"`; + */ + def \@(attributeName: String): String = (this \ ("@" + attributeName)).text + + override def toString(): String = theSeq.mkString + + def text: String = (this map (_.text)).mkString +} diff --git a/src/xml/scala/xml/Null.scala b/src/xml/scala/xml/Null.scala new file mode 100644 index 0000000000..f763c023c4 --- /dev/null +++ b/src/xml/scala/xml/Null.scala @@ -0,0 +1,62 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import Utility.isNameStart +import scala.collection.Iterator + +/** Essentially, every method in here is a dummy, returning Zero[T]. + * It provides a backstop for the unusual collection defined by MetaData, + * sort of a linked list of tails. + * + * @author Burak Emir + * @version 1.0 + */ +case object Null extends MetaData { + override def iterator = Iterator.empty + override def size = 0 + override def append(m: MetaData, scope: NamespaceBinding = TopScope): MetaData = m + override def filter(f: MetaData => Boolean): MetaData = this + + def copy(next: MetaData) = next + def getNamespace(owner: Node) = null + + override def hasNext = false + def next = null + def key = null + def value = null + def isPrefixed = false + + override def length = 0 + override def length(i: Int) = i + + override def strict_==(other: Equality) = other match { + case x: MetaData => x.length == 0 + case _ => false + } + override protected def basisForHashCode: Seq[Any] = Nil + + def apply(namespace: String, scope: NamespaceBinding, key: String) = null + def apply(key: String) = + if (isNameStart(key.head)) null + else throw new IllegalArgumentException("not a valid attribute name '"+key+"', so can never match !") + + protected def toString1(sb: StringBuilder) = () + override protected def toString1(): String = "" + + override def toString(): String = "" + + override def buildString(sb: StringBuilder): StringBuilder = sb + + override def wellformed(scope: NamespaceBinding) = true + + def remove(key: String) = this + def remove(namespace: String, scope: NamespaceBinding, key: String) = this +} diff --git a/src/xml/scala/xml/PCData.scala b/src/xml/scala/xml/PCData.scala new file mode 100644 index 0000000000..31eea2b6d7 --- /dev/null +++ b/src/xml/scala/xml/PCData.scala @@ -0,0 +1,44 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** This class (which is not used by all XML parsers, but always used by the + * XHTML one) represents parseable character data, which appeared as CDATA + * sections in the input and is to be preserved as CDATA section in the output. + * + * @author Burak Emir + * @version 1.0 + */ +class PCData(data: String) extends Atom[String](data) { + + /** Returns text, with some characters escaped according to the XML + * specification. + * + * @param sb the input string buffer associated to some XML element + * @return the input string buffer with the formatted CDATA section + */ + override def buildString(sb: StringBuilder): StringBuilder = + sb append "".format(data) +} + +/** This singleton object contains the `apply`and `unapply` methods for + * convenient construction and deconstruction. + * + * @author Burak Emir + * @version 1.0 + */ +object PCData { + def apply(data: String) = new PCData(data) + def unapply(other: Any): Option[String] = other match { + case x: PCData => Some(x.data) + case _ => None + } +} + diff --git a/src/xml/scala/xml/PrefixedAttribute.scala b/src/xml/scala/xml/PrefixedAttribute.scala new file mode 100644 index 0000000000..4ab79c8677 --- /dev/null +++ b/src/xml/scala/xml/PrefixedAttribute.scala @@ -0,0 +1,61 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml + +/** prefixed attributes always have a non-null namespace. + * + * @param pre + * @param key + * @param value the attribute value + * @param next1 + */ +class PrefixedAttribute( + val pre: String, + val key: String, + val value: Seq[Node], + val next1: MetaData) +extends Attribute +{ + val next = if (value ne null) next1 else next1.remove(key) + + /** same as this(pre, key, Text(value), next), or no attribute if value is null */ + def this(pre: String, key: String, value: String, next: MetaData) = + this(pre, key, if (value ne null) Text(value) else null: NodeSeq, next) + + /** same as this(pre, key, value.get, next), or no attribute if value is None */ + def this(pre: String, key: String, value: Option[Seq[Node]], next: MetaData) = + this(pre, key, value.orNull, next) + + /** Returns a copy of this unprefixed attribute with the given + * next field. + */ + def copy(next: MetaData) = + new PrefixedAttribute(pre, key, value, next) + + def getNamespace(owner: Node) = + owner.getNamespace(pre) + + /** forwards the call to next (because caller looks for unprefixed attribute */ + def apply(key: String): Seq[Node] = next(key) + + /** gets attribute value of qualified (prefixed) attribute with given key + */ + def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] = { + if (key == this.key && scope.getURI(pre) == namespace) + value + else + next(namespace, scope, key) + } +} + +object PrefixedAttribute { + def unapply(x: PrefixedAttribute) = Some((x.pre, x.key, x.value, x.next)) +} diff --git a/src/xml/scala/xml/PrettyPrinter.scala b/src/xml/scala/xml/PrettyPrinter.scala new file mode 100755 index 0000000000..9e01905357 --- /dev/null +++ b/src/xml/scala/xml/PrettyPrinter.scala @@ -0,0 +1,263 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import Utility.sbToString + +/** Class for pretty printing. After instantiating, you can use the + * format() and formatNode() methods to convert XML to a formatted + * string. The class can be reused to pretty print any number of + * XML nodes. + * + * @author Burak Emir + * @version 1.0 + * + * @param width the width to fit the output into + * @param step indentation + */ +class PrettyPrinter(width: Int, step: Int) { + + class BrokenException() extends java.lang.Exception + + class Item + case object Break extends Item { + override def toString() = "\\" + } + case class Box(col: Int, s: String) extends Item + case class Para(s: String) extends Item + + protected var items: List[Item] = Nil + + protected var cur = 0 + + protected def reset() = { + cur = 0 + items = Nil + } + + /** Try to cut at whitespace. + */ + protected def cut(s: String, ind: Int): List[Item] = { + val tmp = width - cur + if (s.length <= tmp) + return List(Box(ind, s)) + var i = s indexOf ' ' + if (i > tmp || i == -1) throw new BrokenException() // cannot break + + var last: List[Int] = Nil + while (i != -1 && i < tmp) { + last = i::last + i = s.indexOf(' ', i+1) + } + var res: List[Item] = Nil + while (Nil != last) try { + val b = Box(ind, s.substring(0, last.head)) + cur = ind + res = b :: Break :: cut(s.substring(last.head, s.length), ind) + // backtrack + last = last.tail + } catch { + case _:BrokenException => last = last.tail + } + throw new BrokenException() + } + + /** Try to make indented box, if possible, else para. + */ + protected def makeBox(ind: Int, s: String) = + if (cur + s.length > width) { // fits in this line + items ::= Box(ind, s) + cur += s.length + } + else try cut(s, ind) foreach (items ::= _) // break it up + catch { case _: BrokenException => makePara(ind, s) } // give up, para + + // dont respect indent in para, but afterwards + protected def makePara(ind: Int, s: String) = { + items = Break::Para(s)::Break::items + cur = ind + } + + // respect indent + protected def makeBreak() = { // using wrapping here... + items = Break :: items + cur = 0 + } + + protected def leafTag(n: Node) = { + def mkLeaf(sb: StringBuilder) { + sb append '<' + n nameToString sb + n.attributes buildString sb + sb append "/>" + } + sbToString(mkLeaf) + } + + protected def startTag(n: Node, pscope: NamespaceBinding): (String, Int) = { + var i = 0 + def mkStart(sb: StringBuilder) { + sb append '<' + n nameToString sb + i = sb.length + 1 + n.attributes buildString sb + n.scope.buildString(sb, pscope) + sb append '>' + } + (sbToString(mkStart), i) + } + + protected def endTag(n: Node) = { + def mkEnd(sb: StringBuilder) { + sb append "' + } + sbToString(mkEnd) + } + + protected def childrenAreLeaves(n: Node): Boolean = { + def isLeaf(l: Node) = l match { + case _:Atom[_] | _:Comment | _:EntityRef | _:ProcInstr => true + case _ => false + } + n.child forall isLeaf + } + + protected def fits(test: String) = + test.length < width - cur + + private def doPreserve(node: Node) = + node.attribute(XML.namespace, XML.space).map(_.toString == XML.preserve) getOrElse false + + protected def traverse(node: Node, pscope: NamespaceBinding, ind: Int): Unit = node match { + + case Text(s) if s.trim() == "" => + ; + case _:Atom[_] | _:Comment | _:EntityRef | _:ProcInstr => + makeBox( ind, node.toString().trim() ) + case g @ Group(xs) => + traverse(xs.iterator, pscope, ind) + case _ => + val test = { + val sb = new StringBuilder() + Utility.serialize(node, pscope, sb, stripComments = false) + if (doPreserve(node)) sb.toString + else TextBuffer.fromString(sb.toString).toText(0).data + } + if (childrenAreLeaves(node) && fits(test)) { + makeBox(ind, test) + } else { + val (stg, len2) = startTag(node, pscope) + val etg = endTag(node) + if (stg.length < width - cur) { // start tag fits + makeBox(ind, stg) + makeBreak() + traverse(node.child.iterator, node.scope, ind + step) + makeBox(ind, etg) + } else if (len2 < width - cur) { + // + if (!lastwasbreak) sb.append('\n') // on windows: \r\n ? + lastwasbreak = true + cur = 0 +// while (cur < last) { +// sb append ' ' +// cur += 1 +// } + + case Box(i, s) => + lastwasbreak = false + while (cur < i) { + sb append ' ' + cur += 1 + } + sb.append(s) + case Para( s ) => + lastwasbreak = false + sb append s + } + } + + // public convenience methods + + /** Returns a formatted string containing well-formed XML with + * given namespace to prefix mapping. + * + * @param n the node to be serialized + * @param pscope the namespace to prefix mapping + * @return the formatted string + */ + def format(n: Node, pscope: NamespaceBinding = null): String = + sbToString(format(n, pscope, _)) + + /** Returns a formatted string containing well-formed XML. + * + * @param nodes the sequence of nodes to be serialized + * @param pscope the namespace to prefix mapping + */ + def formatNodes(nodes: Seq[Node], pscope: NamespaceBinding = null): String = + sbToString(formatNodes(nodes, pscope, _)) + + /** Appends a formatted string containing well-formed XML with + * the given namespace to prefix mapping to the given stringbuffer. + * + * @param nodes the nodes to be serialized + * @param pscope the namespace to prefix mapping + * @param sb the string buffer to which to append to + */ + def formatNodes(nodes: Seq[Node], pscope: NamespaceBinding, sb: StringBuilder): Unit = + nodes foreach (n => sb append format(n, pscope)) +} diff --git a/src/xml/scala/xml/ProcInstr.scala b/src/xml/scala/xml/ProcInstr.scala new file mode 100644 index 0000000000..189c1c6878 --- /dev/null +++ b/src/xml/scala/xml/ProcInstr.scala @@ -0,0 +1,39 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml + +/** an XML node for processing instructions (PI) + * + * @author Burak Emir + * @param target target name of this PI + * @param proctext text contained in this node, may not contain "?>" + */ +case class ProcInstr(target: String, proctext: String) extends SpecialNode +{ + if (!Utility.isName(target)) + throw new IllegalArgumentException(target+" must be an XML Name") + if (proctext contains "?>") + throw new IllegalArgumentException(proctext+" may not contain \"?>\"") + if (target.toLowerCase == "xml") + throw new IllegalArgumentException(target+" is reserved") + + final override def doCollectNamespaces = false + final override def doTransform = false + + final def label = "#PI" + override def text = "" + + /** appends "<?" target (" "+text)?+"?>" + * to this stringbuffer. + */ + override def buildString(sb: StringBuilder) = + sb append "".format(target, (if (proctext == "") "" else " " + proctext)) +} diff --git a/src/xml/scala/xml/QNode.scala b/src/xml/scala/xml/QNode.scala new file mode 100644 index 0000000000..f9e3f1854b --- /dev/null +++ b/src/xml/scala/xml/QNode.scala @@ -0,0 +1,20 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** This object provides an extractor method to match a qualified node with + * its namespace URI + * + * @author Burak Emir + * @version 1.0 + */ +object QNode { + def unapplySeq(n: Node) = Some((n.scope.getURI(n.prefix), n.label, n.attributes, n.child)) +} diff --git a/src/xml/scala/xml/SpecialNode.scala b/src/xml/scala/xml/SpecialNode.scala new file mode 100644 index 0000000000..5fef8ef66c --- /dev/null +++ b/src/xml/scala/xml/SpecialNode.scala @@ -0,0 +1,33 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** `SpecialNode` is a special XML node which represents either text + * `(PCDATA)`, a comment, a `PI`, or an entity ref. + * + * `SpecialNode`s also play the role of [[scala.xml.pull.XMLEvent]]s for + * pull-parsing. + * + * @author Burak Emir + */ +abstract class SpecialNode extends Node with pull.XMLEvent { + + /** always empty */ + final override def attributes = Null + + /** always Node.EmptyNamespace */ + final override def namespace = null + + /** always empty */ + final def child = Nil + + /** Append string representation to the given string buffer argument. */ + def buildString(sb: StringBuilder): StringBuilder +} diff --git a/src/xml/scala/xml/Text.scala b/src/xml/scala/xml/Text.scala new file mode 100644 index 0000000000..debea0c025 --- /dev/null +++ b/src/xml/scala/xml/Text.scala @@ -0,0 +1,39 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** The class `Text` implements an XML node for text (PCDATA). + * It is used in both non-bound and bound XML representations. + * + * @author Burak Emir + * @param data the text contained in this node, may not be null. + */ +class Text(data: String) extends Atom[String](data) { + + /** Returns text, with some characters escaped according to the XML + * specification. + */ + override def buildString(sb: StringBuilder): StringBuilder = + Utility.escape(data, sb) +} + +/** This singleton object contains the `apply`and `unapply` methods for + * convenient construction and deconstruction. + * + * @author Burak Emir + * @version 1.0 + */ +object Text { + def apply(data: String) = new Text(data) + def unapply(other: Any): Option[String] = other match { + case x: Text => Some(x.data) + case _ => None + } +} diff --git a/src/xml/scala/xml/TextBuffer.scala b/src/xml/scala/xml/TextBuffer.scala new file mode 100644 index 0000000000..514b1701af --- /dev/null +++ b/src/xml/scala/xml/TextBuffer.scala @@ -0,0 +1,46 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml + +import Utility.isSpace + +object TextBuffer { + def fromString(str: String): TextBuffer = new TextBuffer() append str +} + +/** The class `TextBuffer` is for creating text nodes without surplus + * whitespace. All occurrences of one or more whitespace in strings + * appended with the `append` method will be replaced by a single space + * character, and leading and trailing space will be removed completely. + */ +class TextBuffer +{ + val sb = new StringBuilder() + + /** Appends this string to the text buffer, trimming whitespaces as needed. + */ + def append(cs: Seq[Char]): this.type = { + cs foreach { c => + if (!isSpace(c)) sb append c + else if (sb.isEmpty || !isSpace(sb.last)) sb append ' ' + } + this + } + + /** Returns an empty sequence if text is only whitespace. + * + * @return the text without whitespaces. + */ + def toText: Seq[Text] = sb.toString.trim match { + case "" => Nil + case s => Seq(Text(s)) + } +} diff --git a/src/xml/scala/xml/TopScope.scala b/src/xml/scala/xml/TopScope.scala new file mode 100644 index 0000000000..474fbbbdb5 --- /dev/null +++ b/src/xml/scala/xml/TopScope.scala @@ -0,0 +1,31 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml + +/** top level namespace scope. only contains the predefined binding + * for the "xml" prefix which is bound to + * "http://www.w3.org/XML/1998/namespace" + */ +object TopScope extends NamespaceBinding(null, null, null) { + + import XML.{ xml, namespace } + + override def getURI(prefix1: String): String = + if (prefix1 == xml) namespace else null + + override def getPrefix(uri1: String): String = + if (uri1 == namespace) xml else null + + override def toString() = "" + + override def buildString(stop: NamespaceBinding) = "" + override def buildString(sb: StringBuilder, ignore: NamespaceBinding) = {} +} diff --git a/src/xml/scala/xml/TypeSymbol.scala b/src/xml/scala/xml/TypeSymbol.scala new file mode 100644 index 0000000000..fb371ee340 --- /dev/null +++ b/src/xml/scala/xml/TypeSymbol.scala @@ -0,0 +1,15 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml + + +abstract class TypeSymbol diff --git a/src/xml/scala/xml/Unparsed.scala b/src/xml/scala/xml/Unparsed.scala new file mode 100644 index 0000000000..bc190eb724 --- /dev/null +++ b/src/xml/scala/xml/Unparsed.scala @@ -0,0 +1,36 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +/** An XML node for unparsed content. It will be output verbatim, all bets + * are off regarding wellformedness etc. + * + * @author Burak Emir + * @param data content in this node, may not be null. + */ +class Unparsed(data: String) extends Atom[String](data) { + + /** Returns text, with some characters escaped according to XML + * specification. + */ + override def buildString(sb: StringBuilder): StringBuilder = + sb append data +} + +/** This singleton object contains the `apply`and `unapply` methods for + * convenient construction and deconstruction. + * + * @author Burak Emir + * @version 1.0 + */ +object Unparsed { + def apply(data: String) = new Unparsed(data) + def unapply(x: Unparsed) = Some(x.data) +} diff --git a/src/xml/scala/xml/UnprefixedAttribute.scala b/src/xml/scala/xml/UnprefixedAttribute.scala new file mode 100644 index 0000000000..6fa827da5f --- /dev/null +++ b/src/xml/scala/xml/UnprefixedAttribute.scala @@ -0,0 +1,61 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml + +/** Unprefixed attributes have the null namespace, and no prefix field + * + * @author Burak Emir + */ +class UnprefixedAttribute( + val key: String, + val value: Seq[Node], + next1: MetaData) +extends Attribute +{ + final val pre = null + val next = if (value ne null) next1 else next1.remove(key) + + /** same as this(key, Text(value), next), or no attribute if value is null */ + def this(key: String, value: String, next: MetaData) = + this(key, if (value ne null) Text(value) else null: NodeSeq, next) + + /** same as this(key, value.get, next), or no attribute if value is None */ + def this(key: String, value: Option[Seq[Node]], next: MetaData) = + this(key, value.orNull, next) + + /** returns a copy of this unprefixed attribute with the given next field*/ + def copy(next: MetaData) = new UnprefixedAttribute(key, value, next) + + final def getNamespace(owner: Node): String = null + + /** + * Gets value of unqualified (unprefixed) attribute with given key, null if not found + * + * @param key + * @return value as Seq[Node] if key is found, null otherwise + */ + def apply(key: String): Seq[Node] = + if (key == this.key) value else next(key) + + /** + * Forwards the call to next (because caller looks for prefixed attribute). + * + * @param namespace + * @param scope + * @param key + * @return .. + */ + def apply(namespace: String, scope: NamespaceBinding, key: String): Seq[Node] = + next(namespace, scope, key) +} +object UnprefixedAttribute { + def unapply(x: UnprefixedAttribute) = Some((x.key, x.value, x.next)) +} diff --git a/src/xml/scala/xml/Utility.scala b/src/xml/scala/xml/Utility.scala new file mode 100755 index 0000000000..9134476401 --- /dev/null +++ b/src/xml/scala/xml/Utility.scala @@ -0,0 +1,410 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import scala.collection.mutable +import parsing.XhtmlEntities +import scala.language.implicitConversions + +/** + * The `Utility` object provides utility functions for processing instances + * of bound and not bound XML classes, as well as escaping text nodes. + * + * @author Burak Emir + */ +object Utility extends AnyRef with parsing.TokenTests { + final val SU = '\u001A' + + // [Martin] This looks dubious. We don't convert StringBuilders to + // Strings anywhere else, why do it here? + implicit def implicitSbToString(sb: StringBuilder) = sb.toString() + + // helper for the extremely oft-repeated sequence of creating a + // StringBuilder, passing it around, and then grabbing its String. + private [xml] def sbToString(f: (StringBuilder) => Unit): String = { + val sb = new StringBuilder + f(sb) + sb.toString + } + private[xml] def isAtomAndNotText(x: Node) = x.isAtom && !x.isInstanceOf[Text] + + /** Trims an element - call this method, when you know that it is an + * element (and not a text node) so you know that it will not be trimmed + * away. With this assumption, the function can return a `Node`, rather + * than a `Seq[Node]`. If you don't know, call `trimProper` and account + * for the fact that you may get back an empty sequence of nodes. + * + * Precondition: node is not a text node (it might be trimmed) + */ + def trim(x: Node): Node = x match { + case Elem(pre, lab, md, scp, child@_*) => + Elem(pre, lab, md, scp, (child flatMap trimProper):_*) + } + + /** trim a child of an element. `Attribute` values and `Atom` nodes that + * are not `Text` nodes are unaffected. + */ + def trimProper(x:Node): Seq[Node] = x match { + case Elem(pre,lab,md,scp,child@_*) => + Elem(pre,lab,md,scp, (child flatMap trimProper):_*) + case Text(s) => + new TextBuffer().append(s).toText + case _ => + x + } + + /** returns a sorted attribute list */ + def sort(md: MetaData): MetaData = if((md eq Null) || (md.next eq Null)) md else { + val key = md.key + val smaller = sort(md.filter { m => m.key < key }) + val greater = sort(md.filter { m => m.key > key }) + smaller.foldRight (md copy greater) ((x, xs) => x copy xs) + } + + /** Return the node with its attribute list sorted alphabetically + * (prefixes are ignored) */ + def sort(n:Node): Node = n match { + case Elem(pre,lab,md,scp,child@_*) => + Elem(pre,lab,sort(md),scp, (child map sort):_*) + case _ => n + } + + /** + * Escapes the characters < > & and " from string. + */ + final def escape(text: String): String = sbToString(escape(text, _)) + + object Escapes { + /** For reasons unclear escape and unescape are a long ways from + * being logical inverses. */ + val pairs = Map( + "lt" -> '<', + "gt" -> '>', + "amp" -> '&', + "quot" -> '"' + // enigmatic comment explaining why this isn't escaped -- + // is valid xhtml but not html, and IE doesn't know it, says jweb + // "apos" -> '\'' + ) + val escMap = pairs map { case (s, c) => c-> ("&%s;" format s) } + val unescMap = pairs ++ Map("apos" -> '\'') + } + import Escapes.{ escMap, unescMap } + + /** + * Appends escaped string to `s`. + */ + final def escape(text: String, s: StringBuilder): StringBuilder = { + // Implemented per XML spec: + // http://www.w3.org/International/questions/qa-controls + // imperative code 3x-4x faster than current implementation + // dpp (David Pollak) 2010/02/03 + val len = text.length + var pos = 0 + while (pos < len) { + text.charAt(pos) match { + case '<' => s.append("<") + case '>' => s.append(">") + case '&' => s.append("&") + case '"' => s.append(""") + case '\n' => s.append('\n') + case '\r' => s.append('\r') + case '\t' => s.append('\t') + case c => if (c >= ' ') s.append(c) + } + + pos += 1 + } + s + } + + /** + * Appends unescaped string to `s`, `amp` becomes `&`, + * `lt` becomes `<` etc.. + * + * @return `'''null'''` if `ref` was not a predefined entity. + */ + final def unescape(ref: String, s: StringBuilder): StringBuilder = + ((unescMap get ref) map (s append _)).orNull + + /** + * Returns a set of all namespaces used in a sequence of nodes + * and all their descendants, including the empty namespaces. + */ + def collectNamespaces(nodes: Seq[Node]): mutable.Set[String] = + nodes.foldLeft(new mutable.HashSet[String]) { (set, x) => collectNamespaces(x, set) ; set } + + /** + * Adds all namespaces in node to set. + */ + def collectNamespaces(n: Node, set: mutable.Set[String]) { + if (n.doCollectNamespaces) { + set += n.namespace + for (a <- n.attributes) a match { + case _:PrefixedAttribute => + set += a.getNamespace(n) + case _ => + } + for (i <- n.child) + collectNamespaces(i, set) + } + } + + // def toXML( + // x: Node, + // pscope: NamespaceBinding = TopScope, + // sb: StringBuilder = new StringBuilder, + // stripComments: Boolean = false, + // decodeEntities: Boolean = true, + // preserveWhitespace: Boolean = false, + // minimizeTags: Boolean = false): String = + // { + // toXMLsb(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) + // sb.toString() + // } + + /** + * Serialize the provided Node to the provided StringBuilder. + *

+ * Note that calling this source-compatible method will result in the same old, arguably almost universally unwanted, + * behaviour. + */ + @deprecated("Please use `serialize` instead and specify a `minimizeTags` parameter", "2.10.0") + def toXML( + x: Node, + pscope: NamespaceBinding = TopScope, + sb: StringBuilder = new StringBuilder, + stripComments: Boolean = false, + decodeEntities: Boolean = true, + preserveWhitespace: Boolean = false, + minimizeTags: Boolean = false): StringBuilder = + { + serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, if (minimizeTags) MinimizeMode.Always else MinimizeMode.Never) + } + + /** + * Serialize an XML Node to a StringBuilder. + * + * This is essentially a minor rework of `toXML` that can't have the same name due to an unfortunate + * combination of named/default arguments and overloading. + * + * @todo use a Writer instead + */ + def serialize( + x: Node, + pscope: NamespaceBinding = TopScope, + sb: StringBuilder = new StringBuilder, + stripComments: Boolean = false, + decodeEntities: Boolean = true, + preserveWhitespace: Boolean = false, + minimizeTags: MinimizeMode.Value = MinimizeMode.Default): StringBuilder = + { + x match { + case c: Comment if !stripComments => c buildString sb + case s: SpecialNode => s buildString sb + case g: Group => for (c <- g.nodes) serialize(c, g.scope, sb, minimizeTags = minimizeTags) ; sb + case el: Elem => + // print tag with namespace declarations + sb.append('<') + el.nameToString(sb) + if (el.attributes ne null) el.attributes.buildString(sb) + el.scope.buildString(sb, pscope) + if (el.child.isEmpty && + (minimizeTags == MinimizeMode.Always || + (minimizeTags == MinimizeMode.Default && el.minimizeEmpty))) + { + // no children, so use short form: + sb.append("/>") + } else { + // children, so use long form: ... + sb.append('>') + sequenceToXML(el.child, el.scope, sb, stripComments) + sb.append("') + } + case _ => throw new IllegalArgumentException("Don't know how to serialize a " + x.getClass.getName) + } + } + + def sequenceToXML( + children: Seq[Node], + pscope: NamespaceBinding = TopScope, + sb: StringBuilder = new StringBuilder, + stripComments: Boolean = false, + decodeEntities: Boolean = true, + preserveWhitespace: Boolean = false, + minimizeTags: MinimizeMode.Value = MinimizeMode.Default): Unit = + { + if (children.isEmpty) return + else if (children forall isAtomAndNotText) { // add space + val it = children.iterator + val f = it.next() + serialize(f, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) + while (it.hasNext) { + val x = it.next() + sb.append(' ') + serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) + } + } + else children foreach { serialize(_, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) } + } + + /** + * Returns prefix of qualified name if any. + */ + final def prefix(name: String): Option[String] = (name indexOf ':') match { + case -1 => None + case i => Some(name.substring(0, i)) + } + + /** + * Returns a hashcode for the given constituents of a node + */ + def hashCode(pre: String, label: String, attribHashCode: Int, scpeHash: Int, children: Seq[Node]) = + scala.util.hashing.MurmurHash3.orderedHash(label +: attribHashCode +: scpeHash +: children, pre.##) + + def appendQuoted(s: String): String = sbToString(appendQuoted(s, _)) + + /** + * Appends "s" if string `s` does not contain ", + * 's' otherwise. + */ + def appendQuoted(s: String, sb: StringBuilder) = { + val ch = if (s contains '"') '\'' else '"' + sb.append(ch).append(s).append(ch) + } + + /** + * Appends "s" and escapes and " i s with \" + */ + def appendEscapedQuoted(s: String, sb: StringBuilder): StringBuilder = { + sb.append('"') + for (c <- s) c match { + case '"' => sb.append('\\'); sb.append('"') + case _ => sb.append(c) + } + sb.append('"') + } + + def getName(s: String, index: Int): String = { + if (index >= s.length) null + else { + val xs = s drop index + if (xs.nonEmpty && isNameStart(xs.head)) xs takeWhile isNameChar + else "" + } + } + + /** + * Returns `'''null'''` if the value is a correct attribute value, + * error message if it isn't. + */ + def checkAttributeValue(value: String): String = { + var i = 0 + while (i < value.length) { + value.charAt(i) match { + case '<' => + return "< not allowed in attribute value" + case '&' => + val n = getName(value, i+1) + if (n eq null) + return "malformed entity reference in attribute value ["+value+"]" + i = i + n.length + 1 + if (i >= value.length || value.charAt(i) != ';') + return "malformed entity reference in attribute value ["+value+"]" + case _ => + } + i = i + 1 + } + null + } + + def parseAttributeValue(value: String): Seq[Node] = { + val sb = new StringBuilder + var rfb: StringBuilder = null + val nb = new NodeBuffer() + + val it = value.iterator + while (it.hasNext) { + var c = it.next() + // entity! flush buffer into text node + if (c == '&') { + c = it.next() + if (c == '#') { + c = it.next() + val theChar = parseCharRef ({ ()=> c },{ () => c = it.next() },{s => throw new RuntimeException(s)}, {s => throw new RuntimeException(s)}) + sb.append(theChar) + } + else { + if (rfb eq null) rfb = new StringBuilder() + rfb append c + c = it.next() + while (c != ';') { + rfb.append(c) + c = it.next() + } + val ref = rfb.toString() + rfb.clear() + unescape(ref,sb) match { + case null => + if (sb.length > 0) { // flush buffer + nb += Text(sb.toString()) + sb.clear() + } + nb += EntityRef(ref) // add entityref + case _ => + } + } + } + else sb append c + } + if (sb.length > 0) { // flush buffer + val x = Text(sb.toString()) + if (nb.length == 0) + return x + else + nb += x + } + nb + } + + /** + * {{{ + * CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" + * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" + * }}} + * See [66] + */ + def parseCharRef(ch: () => Char, nextch: () => Unit, reportSyntaxError: String => Unit, reportTruncatedError: String => Unit): String = { + val hex = (ch() == 'x') && { nextch(); true } + val base = if (hex) 16 else 10 + var i = 0 + while (ch() != ';') { + ch() match { + case '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' => + i = i * base + ch().asDigit + case 'a' | 'b' | 'c' | 'd' | 'e' | 'f' + | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' => + if (! hex) + reportSyntaxError("hex char not allowed in decimal char ref\n" + + "Did you mean to write &#x ?") + else + i = i * base + ch().asDigit + case SU => + reportTruncatedError("") + case _ => + reportSyntaxError("character '" + ch() + "' not allowed in char ref\n") + } + nextch() + } + new String(Array(i), 0, 1) + } +} diff --git a/src/xml/scala/xml/XML.scala b/src/xml/scala/xml/XML.scala new file mode 100755 index 0000000000..020264e509 --- /dev/null +++ b/src/xml/scala/xml/XML.scala @@ -0,0 +1,109 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml + +import parsing.NoBindingFactoryAdapter +import factory.XMLLoader +import java.io.{ File, FileDescriptor, FileInputStream, FileOutputStream } +import java.io.{ InputStream, Reader, StringReader, Writer } +import java.nio.channels.Channels +import scala.util.control.Exception.ultimately + +object Source { + def fromFile(file: File) = new InputSource(new FileInputStream(file)) + def fromFile(fd: FileDescriptor) = new InputSource(new FileInputStream(fd)) + def fromFile(name: String) = new InputSource(new FileInputStream(name)) + + def fromInputStream(is: InputStream) = new InputSource(is) + def fromReader(reader: Reader) = new InputSource(reader) + def fromSysId(sysID: String) = new InputSource(sysID) + def fromString(string: String) = fromReader(new StringReader(string)) +} + +/** + * Governs how empty elements (i.e. those without child elements) should be serialized. + */ +object MinimizeMode extends Enumeration { + /** Minimize empty tags if they were originally empty when parsed, or if they were constructed + * with [[scala.xml.Elem]]`#minimizeEmpty` == true + */ + val Default = Value + + /** Always minimize empty tags. Note that this may be problematic for XHTML, in which + * case [[scala.xml.Xhtml]]`#toXhtml` should be used instead. + */ + val Always = Value + + /** Never minimize empty tags. + */ + val Never = Value +} + +/** The object `XML` provides constants, and functions to load + * and save XML elements. Use this when data binding is not desired, i.e. + * when XML is handled using `Symbol` nodes. + * + * @author Burak Emir + * @version 1.0, 25/04/2005 + */ +object XML extends XMLLoader[Elem] { + val xml = "xml" + val xmlns = "xmlns" + val namespace = "http://www.w3.org/XML/1998/namespace" + val preserve = "preserve" + val space = "space" + val lang = "lang" + val encoding = "ISO-8859-1" + + /** Returns an XMLLoader whose load* methods will use the supplied SAXParser. */ + def withSAXParser(p: SAXParser): XMLLoader[Elem] = + new XMLLoader[Elem] { override val parser: SAXParser = p } + + /** Saves a node to a file with given filename using given encoding + * optionally with xmldecl and doctype declaration. + * + * @param filename the filename + * @param node the xml node we want to write + * @param enc encoding to use + * @param xmlDecl if true, write xml declaration + * @param doctype if not null, write doctype declaration + */ + final def save( + filename: String, + node: Node, + enc: String = encoding, + xmlDecl: Boolean = false, + doctype: dtd.DocType = null + ): Unit = + { + val fos = new FileOutputStream(filename) + val w = Channels.newWriter(fos.getChannel(), enc) + + ultimately(w.close())( + write(w, node, enc, xmlDecl, doctype) + ) + } + + /** Writes the given node using writer, optionally with xml decl and doctype. + * It's the caller's responsibility to close the writer. + * + * @param w the writer + * @param node the xml node we want to write + * @param enc the string to be used in `xmlDecl` + * @param xmlDecl if true, write xml declaration + * @param doctype if not null, write doctype declaration + */ + final def write(w: java.io.Writer, node: Node, enc: String, xmlDecl: Boolean, doctype: dtd.DocType, minimizeTags: MinimizeMode.Value = MinimizeMode.Default) { + /* TODO: optimize by giving writer parameter to toXML*/ + if (xmlDecl) w.write("\n") + if (doctype ne null) w.write( doctype.toString() + "\n") + w.write(Utility.serialize(node, minimizeTags = minimizeTags).toString) + } +} diff --git a/src/xml/scala/xml/Xhtml.scala b/src/xml/scala/xml/Xhtml.scala new file mode 100644 index 0000000000..6a12c1a89a --- /dev/null +++ b/src/xml/scala/xml/Xhtml.scala @@ -0,0 +1,97 @@ + +package scala +package xml + +import parsing.XhtmlEntities +import Utility.{ sbToString, isAtomAndNotText } + +/* (c) David Pollak 2007 WorldWide Conferencing, LLC */ + +object Xhtml +{ + /** + * Convenience function: same as toXhtml(node, false, false) + * + * @param node the node + */ + def toXhtml(node: Node): String = sbToString(sb => toXhtml(x = node, sb = sb)) + + /** + * Convenience function: amounts to calling toXhtml(node) on each + * node in the sequence. + * + * @param nodeSeq the node sequence + */ + def toXhtml(nodeSeq: NodeSeq): String = sbToString(sb => sequenceToXML(nodeSeq: Seq[Node], sb = sb)) + + /** Elements which we believe are safe to minimize if minimizeTags is true. + * See http://www.w3.org/TR/xhtml1/guidelines.html#C_3 + */ + private val minimizableElements = + List("base", "meta", "link", "hr", "br", "param", "img", "area", "input", "col") + + def toXhtml( + x: Node, + pscope: NamespaceBinding = TopScope, + sb: StringBuilder = new StringBuilder, + stripComments: Boolean = false, + decodeEntities: Boolean = false, + preserveWhitespace: Boolean = false, + minimizeTags: Boolean = true): Unit = + { + def decode(er: EntityRef) = XhtmlEntities.entMap.get(er.entityName) match { + case Some(chr) if chr.toInt >= 128 => sb.append(chr) + case _ => er.buildString(sb) + } + def shortForm = + minimizeTags && + (x.child == null || x.child.length == 0) && + (minimizableElements contains x.label) + + x match { + case c: Comment => if (!stripComments) c buildString sb + case er: EntityRef if decodeEntities => decode(er) + case x: SpecialNode => x buildString sb + case g: Group => + g.nodes foreach { toXhtml(_, x.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) } + + case _ => + sb.append('<') + x.nameToString(sb) + if (x.attributes ne null) x.attributes.buildString(sb) + x.scope.buildString(sb, pscope) + + if (shortForm) sb.append(" />") + else { + sb.append('>') + sequenceToXML(x.child, x.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) + sb.append("') + } + } + } + + /** + * Amounts to calling toXhtml(node, ...) with the given parameters on each node. + */ + def sequenceToXML( + children: Seq[Node], + pscope: NamespaceBinding = TopScope, + sb: StringBuilder = new StringBuilder, + stripComments: Boolean = false, + decodeEntities: Boolean = false, + preserveWhitespace: Boolean = false, + minimizeTags: Boolean = true): Unit = + { + if (children.isEmpty) + return + + val doSpaces = children forall isAtomAndNotText // interleave spaces + for (c <- children.take(children.length - 1)) { + toXhtml(c, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) + if (doSpaces) sb append ' ' + } + toXhtml(children.last, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) + } +} diff --git a/src/xml/scala/xml/dtd/ContentModel.scala b/src/xml/scala/xml/dtd/ContentModel.scala new file mode 100644 index 0000000000..4007985dce --- /dev/null +++ b/src/xml/scala/xml/dtd/ContentModel.scala @@ -0,0 +1,118 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package dtd + +import scala.xml.dtd.impl._ +import scala.xml.Utility.sbToString +import PartialFunction._ + +object ContentModel extends WordExp { + type _labelT = ElemName + type _regexpT = RegExp + + object Translator extends WordBerrySethi { + override val lang: ContentModel.this.type = ContentModel.this + } + + case class ElemName(name: String) extends Label { + override def toString() = """ElemName("%s")""" format name + } + + def isMixed(cm: ContentModel) = cond(cm) { case _: MIXED => true } + def containsText(cm: ContentModel) = (cm == PCDATA) || isMixed(cm) + def parse(s: String): ContentModel = ContentModelParser.parse(s) + + def getLabels(r: RegExp): Set[String] = { + def traverse(r: RegExp): Set[String] = r match { // !!! check for match translation problem + case Letter(ElemName(name)) => Set(name) + case Star( x @ _ ) => traverse( x ) // bug if x@_* + case Sequ( xs @ _* ) => Set(xs flatMap traverse: _*) + case Alt( xs @ _* ) => Set(xs flatMap traverse: _*) + } + + traverse(r) + } + + def buildString(r: RegExp): String = sbToString(buildString(r, _)) + + /* precond: rs.length >= 1 */ + private def buildString(rs: Seq[RegExp], sb: StringBuilder, sep: Char) { + buildString(rs.head, sb) + for (z <- rs.tail) { + sb append sep + buildString(z, sb) + } + } + + def buildString(c: ContentModel, sb: StringBuilder): StringBuilder = c match { + case ANY => sb append "ANY" + case EMPTY => sb append "EMPTY" + case PCDATA => sb append "(#PCDATA)" + case ELEMENTS(_) | MIXED(_) => c buildString sb + } + + def buildString(r: RegExp, sb: StringBuilder): StringBuilder = + r match { // !!! check for match translation problem + case Eps => + sb + case Sequ(rs @ _*) => + sb.append( '(' ); buildString(rs, sb, ','); sb.append( ')' ) + case Alt(rs @ _*) => + sb.append( '(' ); buildString(rs, sb, '|'); sb.append( ')' ) + case Star(r: RegExp) => + sb.append( '(' ); buildString(r, sb); sb.append( ")*" ) + case Letter(ElemName(name)) => + sb.append(name) + } + +} + +sealed abstract class ContentModel +{ + override def toString(): String = sbToString(buildString) + def buildString(sb: StringBuilder): StringBuilder +} + +case object PCDATA extends ContentModel { + override def buildString(sb: StringBuilder): StringBuilder = sb.append("(#PCDATA)") +} +case object EMPTY extends ContentModel { + override def buildString(sb: StringBuilder): StringBuilder = sb.append("EMPTY") +} +case object ANY extends ContentModel { + override def buildString(sb: StringBuilder): StringBuilder = sb.append("ANY") +} +sealed abstract class DFAContentModel extends ContentModel { + import ContentModel.{ ElemName, Translator } + def r: ContentModel.RegExp + + lazy val dfa: DetWordAutom[ElemName] = { + val nfa = Translator.automatonFrom(r, 1) + new SubsetConstruction(nfa).determinize + } +} + +case class MIXED(r: ContentModel.RegExp) extends DFAContentModel { + import ContentModel.{ Alt, RegExp } + + override def buildString(sb: StringBuilder): StringBuilder = { + val newAlt = r match { case Alt(rs @ _*) => Alt(rs drop 1: _*) } + + sb append "(#PCDATA|" + ContentModel.buildString(newAlt: RegExp, sb) + sb append ")*" + } +} + +case class ELEMENTS(r: ContentModel.RegExp) extends DFAContentModel { + override def buildString(sb: StringBuilder): StringBuilder = + ContentModel.buildString(r, sb) +} diff --git a/src/xml/scala/xml/dtd/ContentModelParser.scala b/src/xml/scala/xml/dtd/ContentModelParser.scala new file mode 100644 index 0000000000..71b391c422 --- /dev/null +++ b/src/xml/scala/xml/dtd/ContentModelParser.scala @@ -0,0 +1,129 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package dtd + +/** Parser for regexps (content models in DTD element declarations) */ + +object ContentModelParser extends Scanner { // a bit too permissive concerning #PCDATA + import ContentModel._ + + /** parses the argument to a regexp */ + def parse(s: String): ContentModel = { initScanner(s); contentspec } + + def accept(tok: Int) = { + if (token != tok) { + if ((tok == STAR) && (token == END)) // common mistake + scala.sys.error("in DTDs, \n"+ + "mixed content models must be like (#PCDATA|Name|Name|...)*") + else + scala.sys.error("expected "+token2string(tok)+ + ", got unexpected token:"+token2string(token)) + } + nextToken() + } + + // s [ '+' | '*' | '?' ] + def maybeSuffix(s: RegExp) = token match { + case STAR => nextToken(); Star(s) + case PLUS => nextToken(); Sequ(s, Star(s)) + case OPT => nextToken(); Alt(Eps, s) + case _ => s + } + + // contentspec ::= EMPTY | ANY | (#PCDATA) | "(#PCDATA|"regexp) + + def contentspec: ContentModel = token match { + + case NAME => value match { + case "ANY" => ANY + case "EMPTY" => EMPTY + case _ => scala.sys.error("expected ANY, EMPTY or '(' instead of " + value ) + } + case LPAREN => + + nextToken() + sOpt() + if (token != TOKEN_PCDATA) + ELEMENTS(regexp) + else { + nextToken() + token match { + case RPAREN => + PCDATA + case CHOICE => + val res = MIXED(choiceRest(Eps)) + sOpt() + accept( RPAREN ) + accept( STAR ) + res + case _ => + scala.sys.error("unexpected token:" + token2string(token) ) + } + } + + case _ => + scala.sys.error("unexpected token:" + token2string(token) ) + } + // sopt ::= S? + def sOpt() = if( token == S ) nextToken() + + // (' S? mixed ::= '#PCDATA' S? ')' + // | '#PCDATA' (S? '|' S? atom)* S? ')*' + + // '(' S? regexp ::= cp S? [seqRest|choiceRest] ')' [ '+' | '*' | '?' ] + def regexp: RegExp = { + val p = particle + sOpt() + maybeSuffix(token match { + case RPAREN => nextToken(); p + case CHOICE => val q = choiceRest( p );accept( RPAREN ); q + case COMMA => val q = seqRest( p ); accept( RPAREN ); q + }) + } + + // seqRest ::= (',' S? cp S?)+ + def seqRest(p: RegExp) = { + var k = List(p) + while( token == COMMA ) { + nextToken() + sOpt() + k = particle::k + sOpt() + } + Sequ( k.reverse:_* ) + } + + // choiceRest ::= ('|' S? cp S?)+ + def choiceRest( p:RegExp ) = { + var k = List( p ) + while( token == CHOICE ) { + nextToken() + sOpt() + k = particle::k + sOpt() + } + Alt( k.reverse:_* ) + } + + // particle ::= '(' S? regexp + // | name [ '+' | '*' | '?' ] + def particle = token match { + case LPAREN => nextToken(); sOpt(); regexp + case NAME => val a = Letter(ElemName(value)); nextToken(); maybeSuffix(a) + case _ => scala.sys.error("expected '(' or Name, got:"+token2string(token)) + } + + // atom ::= name + def atom = token match { + case NAME => val a = Letter(ElemName(value)); nextToken(); a + case _ => scala.sys.error("expected Name, got:"+token2string(token)) + } +} diff --git a/src/xml/scala/xml/dtd/DTD.scala b/src/xml/scala/xml/dtd/DTD.scala new file mode 100644 index 0000000000..16a824fe2c --- /dev/null +++ b/src/xml/scala/xml/dtd/DTD.scala @@ -0,0 +1,35 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package dtd + +import scala.collection.mutable + +/** A document type declaration. + * + * @author Burak Emir + */ +abstract class DTD { + var externalID: ExternalID = null + var decls: List[Decl] = Nil + def notations: Seq[NotationDecl] = Nil + def unparsedEntities: Seq[EntityDecl] = Nil + + var elem: mutable.Map[String, ElemDecl] = new mutable.HashMap[String, ElemDecl]() + var attr: mutable.Map[String, AttListDecl] = new mutable.HashMap[String, AttListDecl]() + var ent: mutable.Map[String, EntityDecl] = new mutable.HashMap[String, EntityDecl]() + + override def toString() = + "DTD [\n%s%s]".format( + Option(externalID) getOrElse "", + decls.mkString("", "\n", "\n") + ) +} diff --git a/src/xml/scala/xml/dtd/Decl.scala b/src/xml/scala/xml/dtd/Decl.scala new file mode 100644 index 0000000000..8bf859c460 --- /dev/null +++ b/src/xml/scala/xml/dtd/Decl.scala @@ -0,0 +1,157 @@ +/* __ *\ + ** ________ ___ / / ___ Scala API ** + ** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** + ** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** + ** /____/\___/_/ |_/____/_/ | | ** + ** |/ ** + \* */ + +package scala +package xml +package dtd + +import Utility.sbToString + +sealed abstract class Decl + +sealed abstract class MarkupDecl extends Decl { + def buildString(sb: StringBuilder): StringBuilder +} + +/** an element declaration + */ +case class ElemDecl(name: String, contentModel: ContentModel) +extends MarkupDecl { + override def buildString(sb: StringBuilder): StringBuilder = { + sb append "' + } +} + +case class AttListDecl(name: String, attrs:List[AttrDecl]) +extends MarkupDecl { + override def buildString(sb: StringBuilder): StringBuilder = { + sb append "") + } +} + +/** an attribute declaration. at this point, the tpe is a string. Future + * versions might provide a way to access the attribute types more + * directly. + */ +case class AttrDecl(name: String, tpe: String, default: DefaultDecl) { + override def toString(): String = sbToString(buildString) + + def buildString(sb: StringBuilder): StringBuilder = { + sb append " " append name append ' ' append tpe append ' ' + default buildString sb + } + +} + +/** an entity declaration */ +sealed abstract class EntityDecl extends MarkupDecl + +/** a parsed general entity declaration */ +case class ParsedEntityDecl(name: String, entdef: EntityDef) extends EntityDecl { + override def buildString(sb: StringBuilder): StringBuilder = { + sb append "' + } +} + +/** a parameter entity declaration */ +case class ParameterEntityDecl(name: String, entdef: EntityDef) extends EntityDecl { + override def buildString(sb: StringBuilder): StringBuilder = { + sb append "' + } +} + +/** an unparsed entity declaration */ +case class UnparsedEntityDecl( name:String, extID:ExternalID, notation:String ) extends EntityDecl { + override def buildString(sb: StringBuilder): StringBuilder = { + sb append "' + } +} +/** a notation declaration */ +case class NotationDecl( name:String, extID:ExternalID ) extends MarkupDecl { + override def buildString(sb: StringBuilder): StringBuilder = { + sb append "" */ + final override def toString() = { + def intString = + if (intSubset.isEmpty) "" + else intSubset.mkString("[", "", "]") + + """""".format(name, extID.toString(), intString) + } +} + +object DocType { + /** Creates a doctype with no external id, nor internal subset declarations. */ + def apply(name: String): DocType = apply(name, NoExternalID, Nil) +} diff --git a/src/xml/scala/xml/dtd/ElementValidator.scala b/src/xml/scala/xml/dtd/ElementValidator.scala new file mode 100644 index 0000000000..4830769a7d --- /dev/null +++ b/src/xml/scala/xml/dtd/ElementValidator.scala @@ -0,0 +1,132 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package dtd + +import PartialFunction._ +import scala.collection.mutable + +import ContentModel.ElemName +import MakeValidationException._ // @todo other exceptions + +import impl._ + +/** validate children and/or attributes of an element + * exceptions are created but not thrown. + */ +class ElementValidator() extends Function1[Node,Boolean] { + + private var exc: List[ValidationException] = Nil + + protected var contentModel: ContentModel = _ + protected var dfa: DetWordAutom[ElemName] = _ + protected var adecls: List[AttrDecl] = _ + + /** set content model, enabling element validation */ + def setContentModel(cm: ContentModel) = { + contentModel = cm + cm match { + case ELEMENTS(r) => + val nfa = ContentModel.Translator.automatonFrom(r, 1) + dfa = new SubsetConstruction(nfa).determinize + case _ => + dfa = null + } + } + + def getContentModel = contentModel + + /** set meta data, enabling attribute validation */ + def setMetaData(adecls: List[AttrDecl]) { this.adecls = adecls } + + def getIterable(nodes: Seq[Node], skipPCDATA: Boolean): Iterable[ElemName] = { + def isAllWhitespace(a: Atom[_]) = cond(a.data) { case s: String if s.trim == "" => true } + + nodes.filter { + case y: SpecialNode => y match { + case a: Atom[_] if isAllWhitespace(a) => false // always skip all-whitespace nodes + case _ => !skipPCDATA + } + case x => x.namespace eq null + } . map (x => ElemName(x.label)) + } + + /** check attributes, return true if md corresponds to attribute declarations in adecls. + */ + def check(md: MetaData): Boolean = { + val len: Int = exc.length + val ok = new mutable.BitSet(adecls.length) + + for (attr <- md) { + def attrStr = attr.value.toString + def find(Key: String): Option[AttrDecl] = { + adecls.zipWithIndex find { + case (a @ AttrDecl(Key, _, _), j) => ok += j ; return Some(a) + case _ => false + } + None + } + + find(attr.key) match { + case None => + exc ::= fromUndefinedAttribute(attr.key) + + case Some(AttrDecl(_, tpe, DEFAULT(true, fixedValue))) if attrStr != fixedValue => + exc ::= fromFixedAttribute(attr.key, fixedValue, attrStr) + + case _ => + } + } + + adecls.zipWithIndex foreach { + case (AttrDecl(key, tpe, REQUIRED), j) if !ok(j) => exc ::= fromMissingAttribute(key, tpe) + case _ => + } + + exc.length == len //- true if no new exception + } + + /** check children, return true if conform to content model + * @note contentModel != null + */ + def check(nodes: Seq[Node]): Boolean = contentModel match { + case ANY => true + case EMPTY => getIterable(nodes, skipPCDATA = false).isEmpty + case PCDATA => getIterable(nodes, skipPCDATA = true).isEmpty + case MIXED(ContentModel.Alt(branches @ _*)) => // @todo + val j = exc.length + def find(Key: String): Boolean = + branches exists { case ContentModel.Letter(ElemName(Key)) => true ; case _ => false } + + getIterable(nodes, skipPCDATA = true) map (_.name) filterNot find foreach { + exc ::= MakeValidationException fromUndefinedElement _ + } + (exc.length == j) // - true if no new exception + + case _: ELEMENTS => + dfa isFinal { + getIterable(nodes, skipPCDATA = false).foldLeft(0) { (q, e) => + (dfa delta q).getOrElse(e, throw ValidationException("element %s not allowed here" format e)) + } + } + case _ => false + } + + /** applies various validations - accumulates error messages in exc + * @todo fail on first error, ignore other errors (rearranging conditions) + */ + def apply(n: Node): Boolean = + //- ? check children + ((contentModel == null) || check(n.child)) && + //- ? check attributes + ((adecls == null) || check(n.attributes)) +} diff --git a/src/xml/scala/xml/dtd/ExternalID.scala b/src/xml/scala/xml/dtd/ExternalID.scala new file mode 100644 index 0000000000..880633d860 --- /dev/null +++ b/src/xml/scala/xml/dtd/ExternalID.scala @@ -0,0 +1,86 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package dtd + +/** an ExternalIDs - either PublicID or SystemID + * + * @author Burak Emir + */ +sealed abstract class ExternalID extends parsing.TokenTests { + def quoted(s: String) = { + val c = if (s contains '"') '\'' else '"' + c + s + c + } + + // public != null: PUBLIC " " publicLiteral " " [systemLiteral] + // public == null: SYSTEM " " systemLiteral + override def toString(): String = { + lazy val quotedSystemLiteral = quoted(systemId) + lazy val quotedPublicLiteral = quoted(publicId) + + if (publicId == null) "SYSTEM " + quotedSystemLiteral + else "PUBLIC " + quotedPublicLiteral + + (if (systemId == null) "" else " " + quotedSystemLiteral) + } + def buildString(sb: StringBuilder): StringBuilder = + sb.append(this.toString()) + + def systemId: String + def publicId: String +} + +/** a system identifier + * + * @author Burak Emir + * @param systemId the system identifier literal + */ +case class SystemID(systemId: String) extends ExternalID { + val publicId = null + + if (!checkSysID(systemId)) + throw new IllegalArgumentException("can't use both \" and ' in systemId") +} + + +/** a public identifier (see http://www.w3.org/QA/2002/04/valid-dtd-list.html). + * + * @author Burak Emir + * @param publicId the public identifier literal + * @param systemId (can be null for notation pubIDs) the system identifier literal + */ +case class PublicID(publicId: String, systemId: String) extends ExternalID { + if (!checkPubID(publicId)) + throw new IllegalArgumentException("publicId must consist of PubidChars") + + if (systemId != null && !checkSysID(systemId)) + throw new IllegalArgumentException("can't use both \" and ' in systemId") + + /** the constant "#PI" */ + def label = "#PI" + + /** always empty */ + def attribute = Node.NoAttributes + + /** always empty */ + def child = Nil +} + +/** A marker used when a `DocType` contains no external id. + * + * @author Michael Bayne + */ +object NoExternalID extends ExternalID { + val publicId = null + val systemId = null + + override def toString = "" +} diff --git a/src/xml/scala/xml/dtd/Scanner.scala b/src/xml/scala/xml/dtd/Scanner.scala new file mode 100644 index 0000000000..5f9d1ccaed --- /dev/null +++ b/src/xml/scala/xml/dtd/Scanner.scala @@ -0,0 +1,79 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package dtd + +/** Scanner for regexps (content models in DTD element declarations) + * todo: cleanup + */ +class Scanner extends Tokens with parsing.TokenTests { + + final val ENDCH = '\u0000' + + var token:Int = END + var value:String = _ + + private var it: Iterator[Char] = null + private var c: Char = 'z' + + /** initializes the scanner on input s */ + final def initScanner(s: String) { + value = "" + it = (s).iterator + token = 1+END + next() + nextToken() + } + + /** scans the next token */ + final def nextToken() { + if (token != END) token = readToken + } + + // todo: see XML specification... probably isLetter,isDigit is fine + final def isIdentChar = ( ('a' <= c && c <= 'z') + || ('A' <= c && c <= 'Z')) + + final def next() = if (it.hasNext) c = it.next() else c = ENDCH + + final def acc(d: Char) { + if (c == d) next() else scala.sys.error("expected '"+d+"' found '"+c+"' !") + } + + final def accS(ds: Seq[Char]) { ds foreach acc } + + final def readToken: Int = + if (isSpace(c)) { + while (isSpace(c)) c = it.next() + S + } else c match { + case '(' => next(); LPAREN + case ')' => next(); RPAREN + case ',' => next(); COMMA + case '*' => next(); STAR + case '+' => next(); PLUS + case '?' => next(); OPT + case '|' => next(); CHOICE + case '#' => next(); accS( "PCDATA" ); TOKEN_PCDATA + case ENDCH => END + case _ => + if (isNameStart(c)) name; // NAME + else scala.sys.error("unexpected character:" + c) + } + + final def name = { + val sb = new StringBuilder() + do { sb.append(c); next() } while (isNameChar(c)) + value = sb.toString() + NAME + } + +} diff --git a/src/xml/scala/xml/dtd/Tokens.scala b/src/xml/scala/xml/dtd/Tokens.scala new file mode 100644 index 0000000000..07e888e77a --- /dev/null +++ b/src/xml/scala/xml/dtd/Tokens.scala @@ -0,0 +1,45 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package dtd + + +class Tokens { + + // Tokens + + final val TOKEN_PCDATA = 0 + final val NAME = 1 + final val LPAREN = 3 + final val RPAREN = 4 + final val COMMA = 5 + final val STAR = 6 + final val PLUS = 7 + final val OPT = 8 + final val CHOICE = 9 + final val END = 10 + final val S = 13 + + final def token2string(i: Int): String = i match { + case 0 => "#PCDATA" + case 1 => "NAME" + case 3 => "(" + case 4 => ")" + case 5 => "," + case 6 => "*" + case 7 => "+" + case 8 => "?" + case 9 => "|" + case 10 => "END" + case 13 => " " + } +} diff --git a/src/xml/scala/xml/dtd/ValidationException.scala b/src/xml/scala/xml/dtd/ValidationException.scala new file mode 100644 index 0000000000..1bfae55286 --- /dev/null +++ b/src/xml/scala/xml/dtd/ValidationException.scala @@ -0,0 +1,44 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://www.scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package dtd + + +case class ValidationException(e: String) extends Exception(e) + +/** + * @author Burak Emir + */ +object MakeValidationException { + def fromFixedAttribute(k: String, value: String, actual: String) = + ValidationException("value of attribute " + k + " FIXED to \""+ + value+"\", but document tries \""+actual+"\"") + + def fromNonEmptyElement() = + new ValidationException("element should be *empty*") + + def fromUndefinedElement(label: String) = + new ValidationException("element \""+ label +"\" not allowed here") + + def fromUndefinedAttribute(key: String) = + new ValidationException("attribute " + key +" not allowed here") + + def fromMissingAttribute(allKeys: Set[String]) = { + val sb = new StringBuilder("missing value for REQUIRED attribute") + if (allKeys.size > 1) sb.append('s') + allKeys foreach (k => sb append "'%s'".format(k)) + new ValidationException(sb.toString()) + } + + def fromMissingAttribute(key: String, tpe: String) = + new ValidationException("missing value for REQUIRED attribute %s of type %s".format(key, tpe)) +} diff --git a/src/xml/scala/xml/dtd/impl/Base.scala b/src/xml/scala/xml/dtd/impl/Base.scala new file mode 100644 index 0000000000..91ff03a93a --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/Base.scala @@ -0,0 +1,67 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml.dtd.impl + +/** Basic regular expressions. + * + * @author Burak Emir + * @version 1.0 + */ + +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class Base { + type _regexpT <: RegExp + + abstract class RegExp { + val isNullable: Boolean + } + + object Alt { + /** `Alt( R,R,R* )`. */ + def apply(rs: _regexpT*) = + if (rs.size < 2) throw new SyntaxError("need at least 2 branches in Alt") + else new Alt(rs: _*) + // Can't enforce that statically without changing the interface + // def apply(r1: _regexpT, r2: _regexpT, rs: _regexpT*) = new Alt(Seq(r1, r2) ++ rs: _*) + def unapplySeq(x: Alt) = Some(x.rs) + } + + class Alt private (val rs: _regexpT*) extends RegExp { + final val isNullable = rs exists (_.isNullable) + } + + object Sequ { + /** Sequ( R,R* ) */ + def apply(rs: _regexpT*) = if (rs.isEmpty) Eps else new Sequ(rs: _*) + def unapplySeq(x: Sequ) = Some(x.rs) + } + + class Sequ private (val rs: _regexpT*) extends RegExp { + final val isNullable = rs forall (_.isNullable) + } + + case class Star(r: _regexpT) extends RegExp { + final lazy val isNullable = true + } + + // The empty Sequ. + case object Eps extends RegExp { + final lazy val isNullable = true + override def toString() = "Eps" + } + + /** this class can be used to add meta information to regexps. */ + class Meta(r1: _regexpT) extends RegExp { + final val isNullable = r1.isNullable + def r = r1 + } +} diff --git a/src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala b/src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala new file mode 100644 index 0000000000..f30309b037 --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/BaseBerrySethi.scala @@ -0,0 +1,98 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml.dtd.impl + +import scala.collection.{ mutable, immutable } + +// todo: replace global variable pos with acc + +/** This class turns a regular expression over `A` into a + * [[scala.util.automata.NondetWordAutom]] over `A` using the celebrated + * position automata construction (also called ''Berry-Sethi'' or ''Glushkov''). + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class BaseBerrySethi { + val lang: Base + import lang.{ Alt, Eps, Meta, RegExp, Sequ, Star } + + protected var pos = 0 + + // results which hold all info for the NondetWordAutomaton + protected var follow: mutable.HashMap[Int, Set[Int]] = _ + + protected var finalTag: Int = _ + + protected var finals: immutable.Map[Int, Int] = _ // final states + + // constants -------------------------- + + final val emptySet: Set[Int] = Set() + + private def doComp(r: RegExp, compFunction: RegExp => Set[Int]) = r match { + case x: Alt => (x.rs map compFirst).foldLeft(emptySet)(_ ++ _) + case Eps => emptySet + case x: Meta => compFunction(x.r) + case x: Sequ => + val (l1, l2) = x.rs span (_.isNullable) + ((l1 ++ (l2 take 1)) map compFunction).foldLeft(emptySet)(_ ++ _) + case Star(t) => compFunction(t) + case _ => throw new IllegalArgumentException("unexpected pattern " + r.getClass) + } + + /** Computes `first(r)` for the word regexp `r`. */ + protected def compFirst(r: RegExp): Set[Int] = doComp(r, compFirst) + + /** Computes `last(r)` for the regexp `r`. */ + protected def compLast(r: RegExp): Set[Int] = doComp(r, compLast) + + /** Starts from the right-to-left + * precondition: pos is final + * pats are successor patterns of a Sequence node + */ + protected def compFollow(rs: Seq[RegExp]): Set[Int] = { + follow(0) = + if (rs.isEmpty) emptySet + else rs.foldRight(Set(pos))((p, fol) => { + val first = compFollow1(fol, p) + + if (p.isNullable) fol ++ first + else first + }) + + follow(0) + } + + /** Returns the first set of an expression, setting the follow set along the way. + */ + protected def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match { + case x: Alt => Set((x.rs reverseMap (compFollow1(fol1, _))).flatten: _*) + case x: Meta => compFollow1(fol1, x.r) + case x: Star => compFollow1(fol1 ++ compFirst(x.r), x.r) + case x: Sequ => + x.rs.foldRight(fol1) { (p, fol) => + val first = compFollow1(fol, p) + + if (p.isNullable) fol ++ first + else first + } + case _ => throw new IllegalArgumentException("unexpected pattern: " + r.getClass) + } + + /** Returns the "Sethi-length" of a pattern, creating the set of position along the way. + */ + protected def traverse(r: RegExp): Unit = r match { + // (is tree automaton stuff, more than Berry-Sethi) + case x: Alt => x.rs foreach traverse + case x: Sequ => x.rs foreach traverse + case x: Meta => traverse(x.r) + case Star(t) => traverse(t) + case _ => throw new IllegalArgumentException("unexp pattern " + r.getClass) + } +} diff --git a/src/xml/scala/xml/dtd/impl/DetWordAutom.scala b/src/xml/scala/xml/dtd/impl/DetWordAutom.scala new file mode 100644 index 0000000000..6f8ba4de72 --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/DetWordAutom.scala @@ -0,0 +1,50 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml.dtd.impl + +import scala.collection.{ mutable, immutable } + +/** A deterministic automaton. States are integers, where + * 0 is always the only initial state. Transitions are represented + * in the delta function. A default transitions is one that + * is taken when no other transition can be taken. + * All states are reachable. Accepting states are those for which + * the partial function 'finals' is defined. + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class DetWordAutom[T <: AnyRef] { + val nstates: Int + val finals: Array[Int] + val delta: Array[mutable.Map[T, Int]] + val default: Array[Int] + + def isFinal(q: Int) = finals(q) != 0 + def isSink(q: Int) = delta(q).isEmpty && default(q) == q + def next(q: Int, label: T) = delta(q).getOrElse(label, default(q)) + + override def toString() = { + val sb = new StringBuilder("[DetWordAutom nstates=") + sb.append(nstates) + sb.append(" finals=") + val map = Map(finals.zipWithIndex map (_.swap): _*) + sb.append(map.toString()) + sb.append(" delta=\n") + + for (i <- 0 until nstates) { + sb append "%d->%s\n".format(i, delta(i)) + if (i < default.length) + sb append "_>%s\n".format(default(i)) + } + sb.toString + } +} diff --git a/src/xml/scala/xml/dtd/impl/Inclusion.scala b/src/xml/scala/xml/dtd/impl/Inclusion.scala new file mode 100644 index 0000000000..07b6afaeba --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/Inclusion.scala @@ -0,0 +1,70 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml.dtd.impl + + +/** A fast test of language inclusion between minimal automata. + * inspired by the ''AMoRE automata library''. + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] trait Inclusion[A <: AnyRef] { + + val labels: Seq[A] + + /** Returns true if `dfa1` is included in `dfa2`. + */ + def inclusion(dfa1: DetWordAutom[A], dfa2: DetWordAutom[A]) = { + + def encode(q1: Int, q2: Int) = 1 + q1 + q2 * dfa1.nstates + def decode2(c: Int) = (c-1) / (dfa1.nstates) //integer division + def decode1(c: Int) = (c-1) % (dfa1.nstates) + + var q1 = 0 //dfa1.initstate; // == 0 + var q2 = 0 //dfa2.initstate; // == 0 + + val max = 1 + dfa1.nstates * dfa2.nstates + val mark = new Array[Int](max) + + var result = true + var current = encode(q1, q2) + var last = current + mark(last) = max // mark (q1,q2) + while (current != 0 && result) { + //Console.println("current = [["+q1+" "+q2+"]] = "+current); + for (letter <- labels) { + val r1 = dfa1.next(q1,letter) + val r2 = dfa2.next(q2,letter) + if (dfa1.isFinal(r1) && !dfa2.isFinal(r2)) + result = false + val test = encode(r1, r2) + //Console.println("test = [["+r1+" "+r2+"]] = "+test); + if (mark(test) == 0) { + mark(last) = test + mark(test) = max + last = test + } + } + val ncurrent = mark(current) + if( ncurrent != max ) { + q1 = decode1(ncurrent) + q2 = decode2(ncurrent) + current = ncurrent + } else { + current = 0 + } + } + result + } +} diff --git a/src/xml/scala/xml/dtd/impl/NondetWordAutom.scala b/src/xml/scala/xml/dtd/impl/NondetWordAutom.scala new file mode 100644 index 0000000000..0bb19a7e3e --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/NondetWordAutom.scala @@ -0,0 +1,60 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml.dtd.impl + +import scala.collection.{ immutable, mutable } + +/** A nondeterministic automaton. States are integers, where + * 0 is always the only initial state. Transitions are represented + * in the delta function. Default transitions are transitions that + * are taken when no other transitions can be applied. + * All states are reachable. Accepting states are those for which + * the partial function `finals` is defined. + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class NondetWordAutom[T <: AnyRef] { + val nstates: Int + val labels: Seq[T] + val finals: Array[Int] // 0 means not final + val delta: Array[mutable.Map[T, immutable.BitSet]] + val default: Array[immutable.BitSet] + + /** @return true if the state is final */ + final def isFinal(state: Int) = finals(state) > 0 + + /** @return tag of final state */ + final def finalTag(state: Int) = finals(state) + + /** @return true if the set of states contains at least one final state */ + final def containsFinal(Q: immutable.BitSet): Boolean = Q exists isFinal + + /** @return true if there are no accepting states */ + final def isEmpty = (0 until nstates) forall (x => !isFinal(x)) + + /** @return an immutable.BitSet with the next states for given state and label */ + def next(q: Int, a: T): immutable.BitSet = delta(q).getOrElse(a, default(q)) + + /** @return an immutable.BitSet with the next states for given state and label */ + def next(Q: immutable.BitSet, a: T): immutable.BitSet = next(Q, next(_, a)) + def nextDefault(Q: immutable.BitSet): immutable.BitSet = next(Q, default) + + private def next(Q: immutable.BitSet, f: (Int) => immutable.BitSet): immutable.BitSet = + (Q map f).foldLeft(immutable.BitSet.empty)(_ ++ _) + + private def finalStates = 0 until nstates filter isFinal + override def toString = { + + val finalString = Map(finalStates map (j => j -> finals(j)) : _*).toString + val deltaString = (0 until nstates) + .map(i => " %d->%s\n _>%s\n".format(i, delta(i), default(i))).mkString + + "[NondetWordAutom nstates=%d finals=%s delta=\n%s".format(nstates, finalString, deltaString) + } +} diff --git a/src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala b/src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala new file mode 100644 index 0000000000..1720604132 --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/PointedHedgeExp.scala @@ -0,0 +1,37 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml.dtd.impl + +/** Pointed regular hedge expressions, a useful subclass of regular hedge expressions. + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class PointedHedgeExp extends Base { + + type _regexpT <: RegExp + type _labelT + + case class Node(label: _labelT, r: _regexpT) extends RegExp { + final val isNullable = false + } + + case class TopIter(r1: _regexpT, r2: _regexpT) extends RegExp { + final val isNullable = r1.isNullable && r2.isNullable //? + } + + case object Point extends RegExp { + final val isNullable = false + } + +} diff --git a/src/xml/scala/xml/dtd/impl/SubsetConstruction.scala b/src/xml/scala/xml/dtd/impl/SubsetConstruction.scala new file mode 100644 index 0000000000..632ca1eb18 --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/SubsetConstruction.scala @@ -0,0 +1,108 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml.dtd.impl + +import scala.collection.{ mutable, immutable } + +@deprecated("This class will be removed", "2.10.0") +private[dtd] class SubsetConstruction[T <: AnyRef](val nfa: NondetWordAutom[T]) { + import nfa.labels + + def selectTag(Q: immutable.BitSet, finals: Array[Int]) = + (Q map finals filter (_ > 0)).min + + def determinize: DetWordAutom[T] = { + // for assigning numbers to bitsets + var indexMap = scala.collection.Map[immutable.BitSet, Int]() + var invIndexMap = scala.collection.Map[Int, immutable.BitSet]() + var ix = 0 + + // we compute the dfa with states = bitsets + val q0 = immutable.BitSet(0) // the set { 0 } + val sink = immutable.BitSet.empty // the set { } + + var states = Set(q0, sink) // initial set of sets + val delta = new mutable.HashMap[immutable.BitSet, mutable.HashMap[T, immutable.BitSet]] + var deftrans = mutable.Map(q0 -> sink, sink -> sink) // initial transitions + var finals: mutable.Map[immutable.BitSet, Int] = mutable.Map() + val rest = new mutable.Stack[immutable.BitSet] + + rest.push(sink, q0) + + def addFinal(q: immutable.BitSet) { + if (nfa containsFinal q) + finals = finals.updated(q, selectTag(q, nfa.finals)) + } + def add(Q: immutable.BitSet) { + if (!states(Q)) { + states += Q + rest push Q + addFinal(Q) + } + } + + addFinal(q0) // initial state may also be a final state + + while (!rest.isEmpty) { + val P = rest.pop() + // assign a number to this bitset + indexMap = indexMap.updated(P, ix) + invIndexMap = invIndexMap.updated(ix, P) + ix += 1 + + // make transition map + val Pdelta = new mutable.HashMap[T, immutable.BitSet] + delta.update(P, Pdelta) + + labels foreach { label => + val Q = nfa.next(P, label) + Pdelta.update(label, Q) + add(Q) + } + + // collect default transitions + val Pdef = nfa nextDefault P + deftrans = deftrans.updated(P, Pdef) + add(Pdef) + } + + // create DetWordAutom, using indices instead of sets + val nstatesR = states.size + val deltaR = new Array[mutable.Map[T, Int]](nstatesR) + val defaultR = new Array[Int](nstatesR) + val finalsR = new Array[Int](nstatesR) + + for (Q <- states) { + val q = indexMap(Q) + val trans = delta(Q) + val transDef = deftrans(Q) + val qDef = indexMap(transDef) + val ntrans = new mutable.HashMap[T, Int]() + + for ((label, value) <- trans) { + val p = indexMap(value) + if (p != qDef) + ntrans.update(label, p) + } + + deltaR(q) = ntrans + defaultR(q) = qDef + } + + finals foreach { case (k,v) => finalsR(indexMap(k)) = v } + + new DetWordAutom [T] { + val nstates = nstatesR + val delta = deltaR + val default = defaultR + val finals = finalsR + } + } +} diff --git a/src/xml/scala/xml/dtd/impl/SyntaxError.scala b/src/xml/scala/xml/dtd/impl/SyntaxError.scala new file mode 100644 index 0000000000..a5b8a5aba0 --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/SyntaxError.scala @@ -0,0 +1,21 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml.dtd.impl + +/** This runtime exception is thrown if an attempt to instantiate a + * syntactically incorrect expression is detected. + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] class SyntaxError(e: String) extends RuntimeException(e) diff --git a/src/xml/scala/xml/dtd/impl/WordBerrySethi.scala b/src/xml/scala/xml/dtd/impl/WordBerrySethi.scala new file mode 100644 index 0000000000..9bf3fa518b --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/WordBerrySethi.scala @@ -0,0 +1,162 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml.dtd.impl + +import scala.collection.{ immutable, mutable } + +/** This class turns a regular expression into a [[scala.util.automata.NondetWordAutom]] + * celebrated position automata construction (also called ''Berry-Sethi'' or ''Glushkov''). + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class WordBerrySethi extends BaseBerrySethi { + override val lang: WordExp + + import lang.{ Alt, Eps, Letter, RegExp, Sequ, Star, _labelT } + + protected var labels: mutable.HashSet[_labelT] = _ + // don't let this fool you, only labelAt is a real, surjective mapping + protected var labelAt: Map[Int, _labelT] = _ // new alphabet "gamma" + protected var deltaq: Array[mutable.HashMap[_labelT, List[Int]]] = _ // delta + protected var defaultq: Array[List[Int]] = _ // default transitions + protected var initials: Set[Int] = _ + + /** Computes `first(r)` where the word regexp `r`. + * + * @param r the regular expression + * @return the computed set `first(r)` + */ + protected override def compFirst(r: RegExp): Set[Int] = r match { + case x: Letter => Set(x.pos) + case _ => super.compFirst(r) + } + + /** Computes `last(r)` where the word regexp `r`. + * + * @param r the regular expression + * @return the computed set `last(r)` + */ + protected override def compLast(r: RegExp): Set[Int] = r match { + case x: Letter => Set(x.pos) + case _ => super.compLast(r) + } + + /** Returns the first set of an expression, setting the follow set along + * the way. + * + * @param r the regular expression + * @return the computed set + */ + protected override def compFollow1(fol1: Set[Int], r: RegExp): Set[Int] = r match { + case x: Letter => follow(x.pos) = fol1 ; Set(x.pos) + case Eps => emptySet + case _ => super.compFollow1(fol1, r) + } + + /** Returns "Sethi-length" of a pattern, creating the set of position + * along the way + */ + + /** Called at the leaves of the regexp */ + protected def seenLabel(r: RegExp, i: Int, label: _labelT) { + labelAt = labelAt.updated(i, label) + this.labels += label + } + + // overridden in BindingBerrySethi + protected def seenLabel(r: RegExp, label: _labelT): Int = { + pos += 1 + seenLabel(r, pos, label) + pos + } + + // todo: replace global variable pos with acc + override def traverse(r: RegExp): Unit = r match { + case a @ Letter(label) => a.pos = seenLabel(r, label) + case Eps => // ignore + case _ => super.traverse(r) + } + + + protected def makeTransition(src: Int, dest: Int, label: _labelT) { + val q = deltaq(src) + q.update(label, dest :: q.getOrElse(label, Nil)) + } + + protected def initialize(subexpr: Seq[RegExp]): Unit = { + this.labelAt = immutable.Map() + this.follow = mutable.HashMap() + this.labels = mutable.HashSet() + this.pos = 0 + + // determine "Sethi-length" of the regexp + subexpr foreach traverse + + this.initials = Set(0) + } + + protected def initializeAutom() { + finals = immutable.Map.empty[Int, Int] // final states + deltaq = new Array[mutable.HashMap[_labelT, List[Int]]](pos) // delta + defaultq = new Array[List[Int]](pos) // default transitions + + for (j <- 0 until pos) { + deltaq(j) = mutable.HashMap[_labelT, List[Int]]() + defaultq(j) = Nil + } + } + + protected def collectTransitions(): Unit = // make transitions + for (j <- 0 until pos ; fol = follow(j) ; k <- fol) { + if (pos == k) finals = finals.updated(j, finalTag) + else makeTransition(j, k, labelAt(k)) + } + + def automatonFrom(pat: RegExp, finalTag: Int): NondetWordAutom[_labelT] = { + this.finalTag = finalTag + + pat match { + case x: Sequ => + // (1,2) compute follow + first + initialize(x.rs) + pos += 1 + compFollow(x.rs) // this used to be assigned to var globalFirst and then never used. + + // (3) make automaton from follow sets + initializeAutom() + collectTransitions() + + if (x.isNullable) // initial state is final + finals = finals.updated(0, finalTag) + + val delta1 = immutable.Map(deltaq.zipWithIndex map (_.swap): _*) + val finalsArr = (0 until pos map (k => finals.getOrElse(k, 0))).toArray // 0 == not final + + val deltaArr: Array[mutable.Map[_labelT, immutable.BitSet]] = + (0 until pos map { x => + mutable.HashMap(delta1(x).toSeq map { case (k, v) => k -> immutable.BitSet(v: _*) } : _*) + }).toArray + + val defaultArr = (0 until pos map (k => immutable.BitSet(defaultq(k): _*))).toArray + + new NondetWordAutom[_labelT] { + val nstates = pos + val labels = WordBerrySethi.this.labels.toList + val finals = finalsArr + val delta = deltaArr + val default = defaultArr + } + case z => + automatonFrom(Sequ(z.asInstanceOf[this.lang._regexpT]), finalTag) + } + } +} diff --git a/src/xml/scala/xml/dtd/impl/WordExp.scala b/src/xml/scala/xml/dtd/impl/WordExp.scala new file mode 100644 index 0000000000..a4bb54c1ea --- /dev/null +++ b/src/xml/scala/xml/dtd/impl/WordExp.scala @@ -0,0 +1,59 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml.dtd.impl + +/** + * The class `WordExp` provides regular word expressions. + * + * Users have to instantiate type member `_regexpT <;: RegExp` + * (from class `Base`) and a type member `_labelT <;: Label`. + * + * Here is a short example: + * {{{ + * import scala.util.regexp._ + * import scala.util.automata._ + * object MyLang extends WordExp { + * type _regexpT = RegExp + * type _labelT = MyChar + * + * case class MyChar(c:Char) extends Label + * } + * import MyLang._ + * // (a* | b)* + * val rex = Star(Alt(Star(Letter(MyChar('a'))),Letter(MyChar('b')))) + * object MyBerriSethi extends WordBerrySethi { + * override val lang = MyLang + * } + * val nfa = MyBerriSethi.automatonFrom(Sequ(rex), 1) + * }}} + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This class will be removed", "2.10.0") +private[dtd] abstract class WordExp extends Base { + + abstract class Label + + type _regexpT <: RegExp + type _labelT <: Label + + case class Letter(a: _labelT) extends RegExp { + final lazy val isNullable = false + var pos = -1 + } + + case class Wildcard() extends RegExp { + final lazy val isNullable = false + var pos = -1 + } +} diff --git a/src/xml/scala/xml/factory/Binder.scala b/src/xml/scala/xml/factory/Binder.scala new file mode 100755 index 0000000000..947f99e6a4 --- /dev/null +++ b/src/xml/scala/xml/factory/Binder.scala @@ -0,0 +1,61 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package factory + +import parsing.ValidatingMarkupHandler + +/** + * @author Burak Emir + */ +abstract class Binder(val preserveWS: Boolean) extends ValidatingMarkupHandler { + + var result: NodeBuffer = new NodeBuffer() + + def reportSyntaxError(pos:Int, str:String) = {} + + final def procInstr(pos: Int, target: String, txt: String) = + ProcInstr(target, txt) + + final def comment(pos: Int, txt: String) = + Comment(txt) + + final def entityRef(pos: Int, n: String) = + EntityRef(n) + + final def text(pos: Int, txt: String) = + Text(txt) + + final def traverse(n:Node): Unit = n match { + case x:ProcInstr => + result &+ procInstr(0, x.target, x.text) + case x:Comment => + result &+ comment(0, x.text) + case x:Text => + result &+ text(0, x.data) + case x:EntityRef => + result &+ entityRef(0, x.entityName) + case x:Elem => + elemStart(0, x.prefix, x.label, x.attributes, x.scope) + val old = result + result = new NodeBuffer() + for (m <- x.child) traverse(m) + result = old &+ elem(0, x.prefix, x.label, x.attributes, x.scope, x.minimizeEmpty, NodeSeq.fromSeq(result)).toList + elemEnd(0, x.prefix, x.label) + } + + final def validate(n: Node): Node = { + this.rootLabel = n.label + traverse(n) + result(0) + } +} diff --git a/src/xml/scala/xml/factory/LoggedNodeFactory.scala b/src/xml/scala/xml/factory/LoggedNodeFactory.scala new file mode 100644 index 0000000000..bc074bfc83 --- /dev/null +++ b/src/xml/scala/xml/factory/LoggedNodeFactory.scala @@ -0,0 +1,90 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package factory + +/** This class logs what the nodefactory is actually doing. + * If you want to see what happens during loading, use it like this: +{{{ +object testLogged extends App { + val x = new scala.xml.parsing.NoBindingFactoryAdapter + with scala.xml.factory.LoggedNodeFactory[scala.xml.Elem] { + override def log(s: String) = println(s) + } + + Console.println("Start") + val doc = x.load(new java.net.URL("http://example.com/file.xml")) + Console.println("End") + Console.println(doc) +} +}}} + * + * @author Burak Emir + * @version 1.0 + */ +@deprecated("This trait will be removed.", "2.11") +trait LoggedNodeFactory[A <: Node] extends NodeFactory[A] { + // configuration values + val logNode = true + val logText = false + val logComment = false + val logProcInstr = false + + final val NONE = 0 + final val CACHE = 1 + final val FULL = 2 + /** 0 = no logging, 1 = cache hits, 2 = detail */ + val logCompressLevel = 1 + + // methods of NodeFactory + + /** logged version of makeNode method */ + override def makeNode(pre: String, label: String, attrSeq: MetaData, + scope: NamespaceBinding, children: Seq[Node]): A = { + if (logNode) + log("[makeNode for "+label+"]") + + val hash = Utility.hashCode(pre, label, attrSeq.##, scope.##, children) + + /* + if(logCompressLevel >= FULL) { + log("[hashcode total:"+hash); + log(" elem name "+uname+" hash "+ ? )); + log(" attrs "+attrSeq+" hash "+attrSeq.hashCode()); + log(" children :"+children+" hash "+children.hashCode()); + } + */ + if (!cache.get( hash ).isEmpty && (logCompressLevel >= CACHE)) + log("[cache hit !]") + + super.makeNode(pre, label, attrSeq, scope, children) + } + + override def makeText(s: String) = { + if (logText) + log("[makeText:\""+s+"\"]") + super.makeText(s) + } + + override def makeComment(s: String): Seq[Comment] = { + if (logComment) + log("[makeComment:\""+s+"\"]") + super.makeComment(s) + } + + override def makeProcInstr(t: String, s: String): Seq[ProcInstr] = { + if (logProcInstr) + log("[makeProcInstr:\""+t+" "+ s+"\"]") + super.makeProcInstr(t, s) + } + + @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11") + def log(msg: String): Unit = {} +} diff --git a/src/xml/scala/xml/factory/NodeFactory.scala b/src/xml/scala/xml/factory/NodeFactory.scala new file mode 100644 index 0000000000..94801bb554 --- /dev/null +++ b/src/xml/scala/xml/factory/NodeFactory.scala @@ -0,0 +1,61 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package factory + +import parsing.{ FactoryAdapter, NoBindingFactoryAdapter } +import java.io.{ InputStream, Reader, StringReader, File, FileDescriptor, FileInputStream } + +trait NodeFactory[A <: Node] { + val ignoreComments = false + val ignoreProcInstr = false + + /* default behaviour is to use hash-consing */ + val cache = new scala.collection.mutable.HashMap[Int, List[A]] + + protected def create(pre: String, name: String, attrs: MetaData, scope: NamespaceBinding, children:Seq[Node]): A + + protected def construct(hash: Int, old:List[A], pre: String, name: String, attrSeq:MetaData, scope: NamespaceBinding, children:Seq[Node]): A = { + val el = create(pre, name, attrSeq, scope, children) + cache.update(hash, el :: old) + el + } + + def eqElements(ch1: Seq[Node], ch2: Seq[Node]): Boolean = + ch1.view.zipAll(ch2.view, null, null) forall { case (x,y) => x eq y } + + def nodeEquals(n: Node, pre: String, name: String, attrSeq:MetaData, scope: NamespaceBinding, children: Seq[Node]) = + n.prefix == pre && + n.label == name && + n.attributes == attrSeq && + // scope? + eqElements(n.child, children) + + def makeNode(pre: String, name: String, attrSeq: MetaData, scope: NamespaceBinding, children: Seq[Node]): A = { + val hash = Utility.hashCode( pre, name, attrSeq.##, scope.##, children) + def cons(old: List[A]) = construct(hash, old, pre, name, attrSeq, scope, children) + + (cache get hash) match { + case Some(list) => // find structurally equal + list.find(nodeEquals(_, pre, name, attrSeq, scope, children)) match { + case Some(x) => x + case _ => cons(list) + } + case None => cons(Nil) + } + } + + def makeText(s: String) = Text(s) + def makeComment(s: String): Seq[Comment] = + if (ignoreComments) Nil else List(Comment(s)) + def makeProcInstr(t: String, s: String): Seq[ProcInstr] = + if (ignoreProcInstr) Nil else List(ProcInstr(t, s)) +} diff --git a/src/xml/scala/xml/factory/XMLLoader.scala b/src/xml/scala/xml/factory/XMLLoader.scala new file mode 100644 index 0000000000..b69f187039 --- /dev/null +++ b/src/xml/scala/xml/factory/XMLLoader.scala @@ -0,0 +1,61 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package factory + +import javax.xml.parsers.SAXParserFactory +import parsing.{ FactoryAdapter, NoBindingFactoryAdapter } +import java.io.{ InputStream, Reader, File, FileDescriptor } +import java.net.URL + +/** Presents collection of XML loading methods which use the parser + * created by "def parser". + */ +trait XMLLoader[T <: Node] +{ + import scala.xml.Source._ + def adapter: FactoryAdapter = new NoBindingFactoryAdapter() + + /* Override this to use a different SAXParser. */ + def parser: SAXParser = { + val f = SAXParserFactory.newInstance() + f.setNamespaceAware(false) + f.newSAXParser() + } + + /** Loads XML from the given InputSource, using the supplied parser. + * The methods available in scala.xml.XML use the XML parser in the JDK. + */ + def loadXML(source: InputSource, parser: SAXParser): T = { + val newAdapter = adapter + + newAdapter.scopeStack push TopScope + parser.parse(source, newAdapter) + newAdapter.scopeStack.pop() + + newAdapter.rootElem.asInstanceOf[T] + } + + /** Loads XML from the given file, file descriptor, or filename. */ + def loadFile(file: File): T = loadXML(fromFile(file), parser) + def loadFile(fd: FileDescriptor): T = loadXML(fromFile(fd), parser) + def loadFile(name: String): T = loadXML(fromFile(name), parser) + + /** loads XML from given InputStream, Reader, sysID, InputSource, or URL. */ + def load(is: InputStream): T = loadXML(fromInputStream(is), parser) + def load(reader: Reader): T = loadXML(fromReader(reader), parser) + def load(sysID: String): T = loadXML(fromSysId(sysID), parser) + def load(source: InputSource): T = loadXML(source, parser) + def load(url: URL): T = loadXML(fromInputStream(url.openStream()), parser) + + /** Loads XML from the given String. */ + def loadString(string: String): T = loadXML(fromString(string), parser) +} diff --git a/src/xml/scala/xml/include/CircularIncludeException.scala b/src/xml/scala/xml/include/CircularIncludeException.scala new file mode 100644 index 0000000000..351f403008 --- /dev/null +++ b/src/xml/scala/xml/include/CircularIncludeException.scala @@ -0,0 +1,25 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package include + +/** + * A `CircularIncludeException` is thrown when an included document attempts + * to include itself or one of its ancestor documents. + */ +class CircularIncludeException(message: String) extends XIncludeException { + + /** + * Constructs a `CircularIncludeException` with `'''null'''`. + * as its error detail message. + */ + def this() = this(null) + +} diff --git a/src/xml/scala/xml/include/UnavailableResourceException.scala b/src/xml/scala/xml/include/UnavailableResourceException.scala new file mode 100644 index 0000000000..47b176e0f3 --- /dev/null +++ b/src/xml/scala/xml/include/UnavailableResourceException.scala @@ -0,0 +1,20 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package include + +/** + * An `UnavailableResourceException` is thrown when an included document + * cannot be found or loaded. + */ +class UnavailableResourceException(message: String) +extends XIncludeException(message) { + def this() = this(null) +} diff --git a/src/xml/scala/xml/include/XIncludeException.scala b/src/xml/scala/xml/include/XIncludeException.scala new file mode 100644 index 0000000000..11e1644d83 --- /dev/null +++ b/src/xml/scala/xml/include/XIncludeException.scala @@ -0,0 +1,58 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package include + +/** + * `XIncludeException` is the generic superclass for all checked exceptions + * that may be thrown as a result of a violation of XInclude's rules. + * + * Constructs an `XIncludeException` with the specified detail message. + * The error message string `message` can later be retrieved by the + * `{@link java.lang.Throwable#getMessage}` + * method of class `java.lang.Throwable`. + * + * @param message the detail message. + */ +class XIncludeException(message: String) extends Exception(message) { + + /** + * uses `'''null'''` as its error detail message. + */ + def this() = this(null) + + private var rootCause: Throwable = null + + /** + * When an `IOException`, `MalformedURLException` or other generic + * exception is thrown while processing an XML document for XIncludes, + * it is customarily replaced by some form of `XIncludeException`. + * This method allows you to store the original exception. + * + * @param nestedException the underlying exception which + * caused the XIncludeException to be thrown + */ + def setRootCause(nestedException: Throwable ) { + this.rootCause = nestedException + } + + /** + * When an `IOException`, `MalformedURLException` or other generic + * exception is thrown while processing an XML document for XIncludes, + * it is customarily replaced by some form of `XIncludeException`. + * This method allows you to retrieve the original exception. + * It returns null if no such exception caused this `XIncludeException`. + * + * @return Throwable the underlying exception which caused the + * `XIncludeException` to be thrown + */ + def getRootCause(): Throwable = this.rootCause + +} diff --git a/src/xml/scala/xml/include/sax/EncodingHeuristics.scala b/src/xml/scala/xml/include/sax/EncodingHeuristics.scala new file mode 100644 index 0000000000..57ab5ed91c --- /dev/null +++ b/src/xml/scala/xml/include/sax/EncodingHeuristics.scala @@ -0,0 +1,98 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package include.sax + +import java.io.InputStream +import scala.util.matching.Regex + +/** `EncodingHeuristics` reads from a stream + * (which should be buffered) and attempts to guess + * what the encoding of the text in the stream is. + * If it fails to determine the type of the encoding, + * it returns the default UTF-8. + * + * @author Burak Emir + * @author Paul Phillips + */ +object EncodingHeuristics +{ + object EncodingNames { + // UCS-4 isn't yet implemented in java releases anyway... + val bigUCS4 = "UCS-4" + val littleUCS4 = "UCS-4" + val unusualUCS4 = "UCS-4" + val bigUTF16 = "UTF-16BE" + val littleUTF16 = "UTF-16LE" + val utf8 = "UTF-8" + val default = utf8 + } + import EncodingNames._ + + /** This utility method attempts to determine the XML character encoding + * by examining the input stream, as specified at + * [[http://www.w3.org/TR/xml/#sec-guessing w3]]. + * + * @param in `InputStream` to read from. + * @throws IOException if the stream cannot be reset + * @return the name of the encoding. + */ + def readEncodingFromStream(in: InputStream): String = { + var ret: String = null + val bytesToRead = 1024 // enough to read most XML encoding declarations + def resetAndRet = { in.reset ; ret } + + // This may fail if there are a lot of space characters before the end + // of the encoding declaration + in mark bytesToRead + val bytes = (in.read, in.read, in.read, in.read) + + // first look for byte order mark + ret = bytes match { + case (0x00, 0x00, 0xFE, 0xFF) => bigUCS4 + case (0xFF, 0xFE, 0x00, 0x00) => littleUCS4 + case (0x00, 0x00, 0xFF, 0xFE) => unusualUCS4 + case (0xFE, 0xFF, 0x00, 0x00) => unusualUCS4 + case (0xFE, 0xFF, _ , _ ) => bigUTF16 + case (0xFF, 0xFE, _ , _ ) => littleUTF16 + case (0xEF, 0xBB, 0xBF, _ ) => utf8 + case _ => null + } + if (ret != null) + return resetAndRet + + def readASCIIEncoding: String = { + val data = new Array[Byte](bytesToRead - 4) + val length = in.read(data, 0, bytesToRead - 4) + + // Use Latin-1 (ISO-8859-1) because all byte sequences are legal. + val declaration = new String(data, 0, length, "ISO-8859-1") + val regexp = """(?m).*?encoding\s*=\s*["'](.+?)['"]""".r + (regexp findFirstMatchIn declaration) match { + case None => default + case Some(md) => md.subgroups(0) + } + } + + // no byte order mark present; first character must be '<' or whitespace + ret = bytes match { + case (0x00, 0x00, 0x00, '<' ) => bigUCS4 + case ('<' , 0x00, 0x00, 0x00) => littleUCS4 + case (0x00, 0x00, '<' , 0x00) => unusualUCS4 + case (0x00, '<' , 0x00, 0x00) => unusualUCS4 + case (0x00, '<' , 0x00, '?' ) => bigUTF16 // XXX must read encoding + case ('<' , 0x00, '?' , 0x00) => littleUTF16 // XXX must read encoding + case ('<' , '?' , 'x' , 'm' ) => readASCIIEncoding + case (0x4C, 0x6F, 0xA7, 0x94) => utf8 // XXX EBCDIC + case _ => utf8 // no XML or text declaration present + } + resetAndRet + } +} diff --git a/src/xml/scala/xml/include/sax/XIncludeFilter.scala b/src/xml/scala/xml/include/sax/XIncludeFilter.scala new file mode 100644 index 0000000000..3fa3beefb0 --- /dev/null +++ b/src/xml/scala/xml/include/sax/XIncludeFilter.scala @@ -0,0 +1,373 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package include.sax + +import scala.xml.include._ + +import org.xml.sax.{ Attributes, XMLReader, Locator } +import org.xml.sax.helpers.{ XMLReaderFactory, XMLFilterImpl, NamespaceSupport, AttributesImpl } + +import java.io.{ InputStream, BufferedInputStream, InputStreamReader, IOException, UnsupportedEncodingException } +import java.util.Stack +import java.net.{ URL, MalformedURLException } + +/** This is a SAX filter which resolves all XInclude include elements before + * passing them on to the client application. Currently this class has the + * following known deviation from the XInclude specification: + * + * 1. XPointer is not supported. + * + * Furthermore, I would definitely use a new instance of this class for each + * document you want to process. I doubt it can be used successfully on + * multiple documents. Furthermore, I can virtually guarantee that this + * class is not thread safe. You have been warned. + * + * Since this class is not designed to be subclassed, and since I have not + * yet considered how that might affect the methods herein or what other + * protected methods might be needed to support subclasses, I have declared + * this class final. I may remove this restriction later, though the use-case + * for subclassing is weak. This class is designed to have its functionality + * extended via a horizontal chain of filters, not a vertical hierarchy of + * sub and superclasses. + * + * To use this class: + * + * - Construct an `XIncludeFilter` object with a known base URL + * - Pass the `XMLReader` object from which the raw document will be read to + * the `setParent()` method of this object. + * - Pass your own `ContentHandler` object to the `setContentHandler()` + * method of this object. This is the object which will receive events + * from the parsed and included document. + * - Optional: if you wish to receive comments, set your own `LexicalHandler` + * object as the value of this object's + * `http://xml.org/sax/properties/lexical-handler` property. + * Also make sure your `LexicalHandler` asks this object for the status of + * each comment using `insideIncludeElement` before doing anything with the + * comment. + * - Pass the URL of the document to read to this object's `parse()` method + * + * e.g. + * {{{ + * val includer = new XIncludeFilter(base) + * includer setParent parser + * includer setContentHandler new SAXXIncluder(System.out) + * includer parse args(i) + * }}} + * translated from Elliotte Rusty Harold's Java source. + * + * @author Burak Emir + */ +class XIncludeFilter extends XMLFilterImpl { + + final val XINCLUDE_NAMESPACE = "http://www.w3.org/2001/XInclude" + + private val bases = new Stack[URL]() + private val locators = new Stack[Locator]() + +/* private EntityResolver resolver; + + public XIncludeFilter() { + this(null); + } + + public XIncludeFilter(EntityResolver resolver) { + this.resolver = resolver; + } */ + + + // what if this isn't called???? + // do I need to check this in startDocument() and push something + // there???? + override def setDocumentLocator(locator: Locator) { + locators push locator + val base = locator.getSystemId() + try { + bases.push(new URL(base)) + } + catch { + case e:MalformedURLException => + throw new UnsupportedOperationException("Unrecognized SYSTEM ID: " + base) + } + super.setDocumentLocator(locator) + } + + + // necessary to throw away contents of non-empty XInclude elements + private var level = 0 + + /** This utility method returns true if and only if this reader is + * currently inside a non-empty include element. (This is '''not''' the + * same as being inside the node set which replaces the include element.) + * This is primarily needed for comments inside include elements. + * It must be checked by the actual `LexicalHandler` to see whether + * a comment is passed or not. + * + * @return boolean + */ + def insideIncludeElement(): Boolean = level != 0 + + override def startElement(uri: String, localName: String, qName: String, atts1: Attributes) { + var atts = atts1 + if (level == 0) { // We're not inside an xi:include element + + // Adjust bases stack by pushing either the new + // value of xml:base or the base of the parent + val base = atts.getValue(NamespaceSupport.XMLNS, "base") + val parentBase = bases.peek().asInstanceOf[URL] + var currentBase = parentBase + if (base != null) { + try { + currentBase = new URL(parentBase, base) + } + catch { + case e: MalformedURLException => + throw new SAXException("Malformed base URL: " + + currentBase, e) + } + } + bases push currentBase + + if (uri.equals(XINCLUDE_NAMESPACE) && localName.equals("include")) { + // include external document + val href = atts.getValue("href") + // Verify that there is an href attribute + if (href == null) { + throw new SAXException("Missing href attribute") + } + + var parse = atts getValue "parse" + if (parse == null) parse = "xml" + + if (parse equals "text") { + val encoding = atts getValue "encoding" + includeTextDocument(href, encoding) + } + else if (parse equals "xml") { + includeXMLDocument(href) + } + // Need to check this also in DOM and JDOM???? + else { + throw new SAXException( + "Illegal value for parse attribute: " + parse) + } + level += 1 + } + else { + if (atRoot) { + // add xml:base attribute if necessary + val attsImpl = new AttributesImpl(atts) + attsImpl.addAttribute(NamespaceSupport.XMLNS, "base", + "xml:base", "CDATA", currentBase.toExternalForm()) + atts = attsImpl + atRoot = false + } + super.startElement(uri, localName, qName, atts) + } + } + } + + override def endElement(uri: String, localName: String, qName: String) { + if (uri.equals(XINCLUDE_NAMESPACE) + && localName.equals("include")) { + level -= 1 + } + else if (level == 0) { + bases.pop() + super.endElement(uri, localName, qName) + } + } + + private var depth = 0 + + override def startDocument() { + level = 0 + if (depth == 0) super.startDocument() + depth += 1 + } + + override def endDocument() { + locators.pop() + bases.pop() // pop the URL for the document itself + depth -= 1 + if (depth == 0) super.endDocument() + } + + // how do prefix mappings move across documents???? + override def startPrefixMapping(prefix: String , uri: String) { + if (level == 0) super.startPrefixMapping(prefix, uri) + } + + override def endPrefixMapping(prefix: String) { + if (level == 0) super.endPrefixMapping(prefix) + } + + override def characters(ch: Array[Char], start: Int, length: Int) { + if (level == 0) super.characters(ch, start, length) + } + + override def ignorableWhitespace(ch: Array[Char], start: Int, length: Int) { + if (level == 0) super.ignorableWhitespace(ch, start, length) + } + + override def processingInstruction(target: String, data: String) { + if (level == 0) super.processingInstruction(target, data) + } + + override def skippedEntity(name: String) { + if (level == 0) super.skippedEntity(name) + } + + // convenience method for error messages + private def getLocation(): String = { + var locationString = "" + val locator = locators.peek().asInstanceOf[Locator] + var publicID = "" + var systemID = "" + var column = -1 + var line = -1 + if (locator != null) { + publicID = locator.getPublicId() + systemID = locator.getSystemId() + line = locator.getLineNumber() + column = locator.getColumnNumber() + } + locationString = (" in document included from " + publicID + + " at " + systemID + + " at line " + line + ", column " + column) + + locationString + } + + /** This utility method reads a document at a specified URL and fires off + * calls to `characters()`. It's used to include files with `parse="text"`. + * + * @param url URL of the document that will be read + * @param encoding1 Encoding of the document; e.g. UTF-8, + * ISO-8859-1, etc. + * @return void + * @throws SAXException if the requested document cannot + be downloaded from the specified URL + or if the encoding is not recognized + */ + private def includeTextDocument(url: String, encoding1: String) { + var encoding = encoding1 + if (encoding == null || encoding.trim().equals("")) encoding = "UTF-8" + var source: URL = null + try { + val base = bases.peek().asInstanceOf[URL] + source = new URL(base, url) + } + catch { + case e: MalformedURLException => + val ex = new UnavailableResourceException("Unresolvable URL " + url + + getLocation()) + ex.setRootCause(e) + throw new SAXException("Unresolvable URL " + url + getLocation(), ex) + } + + try { + val uc = source.openConnection() + val in = new BufferedInputStream(uc.getInputStream()) + val encodingFromHeader = uc.getContentEncoding() + var contentType = uc.getContentType() + if (encodingFromHeader != null) + encoding = encodingFromHeader + else { + // What if file does not have a MIME type but name ends in .xml???? + // MIME types are case-insensitive + // Java may be picking this up from file URL + if (contentType != null) { + contentType = contentType.toLowerCase() + if (contentType.equals("text/xml") + || contentType.equals("application/xml") + || (contentType.startsWith("text/") && contentType.endsWith("+xml") ) + || (contentType.startsWith("application/") && contentType.endsWith("+xml"))) { + encoding = EncodingHeuristics.readEncodingFromStream(in) + } + } + } + val reader = new InputStreamReader(in, encoding) + val c = new Array[Char](1024) + var charsRead: Int = 0 // bogus init value + do { + charsRead = reader.read(c, 0, 1024) + if (charsRead > 0) this.characters(c, 0, charsRead) + } while (charsRead != -1) + } + catch { + case e: UnsupportedEncodingException => + throw new SAXException("Unsupported encoding: " + + encoding + getLocation(), e) + case e: IOException => + throw new SAXException("Document not found: " + + source.toExternalForm() + getLocation(), e) + } + + } + + private var atRoot = false + + /** This utility method reads a document at a specified URL + * and fires off calls to various `ContentHandler` methods. + * It's used to include files with `parse="xml"`. + * + * @param url URL of the document that will be read + * @return void + * @throws SAXException if the requested document cannot + be downloaded from the specified URL. + */ + private def includeXMLDocument(url: String) { + val source = + try new URL(bases.peek(), url) + catch { + case e: MalformedURLException => + val ex = new UnavailableResourceException("Unresolvable URL " + url + getLocation()) + ex setRootCause e + throw new SAXException("Unresolvable URL " + url + getLocation(), ex) + } + + try { + val parser: XMLReader = + try XMLReaderFactory.createXMLReader() + catch { + case e: SAXException => + try XMLReaderFactory.createXMLReader(XercesClassName) + catch { case _: SAXException => return System.err.println("Could not find an XML parser") } + } + + parser setContentHandler this + val resolver = this.getEntityResolver() + if (resolver != null) + parser setEntityResolver resolver + + // save old level and base + val previousLevel = level + this.level = 0 + if (bases contains source) + throw new SAXException( + "Circular XInclude Reference", + new CircularIncludeException("Circular XInclude Reference to " + source + getLocation()) + ) + + bases push source + atRoot = true + parser parse source.toExternalForm() + + // restore old level and base + this.level = previousLevel + bases.pop() + } + catch { + case e: IOException => + throw new SAXException("Document not found: " + source.toExternalForm() + getLocation(), e) + } + } +} diff --git a/src/xml/scala/xml/include/sax/XIncluder.scala b/src/xml/scala/xml/include/sax/XIncluder.scala new file mode 100644 index 0000000000..1939fa1875 --- /dev/null +++ b/src/xml/scala/xml/include/sax/XIncluder.scala @@ -0,0 +1,187 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package include.sax + +import scala.collection.mutable +import org.xml.sax.{ ContentHandler, XMLReader, Locator, Attributes } +import org.xml.sax.ext.LexicalHandler +import java.io.{ File, OutputStream, OutputStreamWriter, Writer, IOException } + +/** XIncluder is a SAX `ContentHandler` that writes its XML document onto + * an output stream after resolving all `xinclude:include` elements. + * + * Based on Eliotte Rusty Harold's SAXXIncluder. + */ +class XIncluder(outs: OutputStream, encoding: String) extends ContentHandler with LexicalHandler { + + var out = new OutputStreamWriter(outs, encoding) + + def setDocumentLocator(locator: Locator) {} + + def startDocument() { + try { + out.write("\r\n") + } + catch { + case e:IOException => + throw new SAXException("Write failed", e) + } + } + + def endDocument() { + try { + out.flush() + } + catch { + case e:IOException => + throw new SAXException("Flush failed", e) + } + } + + def startPrefixMapping(prefix: String , uri: String) {} + + def endPrefixMapping(prefix: String) {} + + def startElement(namespaceURI: String, localName: String, qualifiedName: String, atts: Attributes) = { + try { + out.write("<" + qualifiedName) + var i = 0; while (i < atts.getLength()) { + out.write(" ") + out.write(atts.getQName(i)) + out.write("='") + val value = atts.getValue(i) + // @todo Need to use character references if the encoding + // can't support the character + out.write(scala.xml.Utility.escape(value)) + out.write("'") + i += 1 + } + out.write(">") + } + catch { + case e:IOException => + throw new SAXException("Write failed", e) + } + } + + def endElement(namespaceURI: String, localName:String, qualifiedName: String) { + try { + out.write("") + } + catch { + case e: IOException => + throw new SAXException("Write failed", e) + } + } + + // need to escape characters that are not in the given + // encoding using character references???? + def characters(ch: Array[Char], start: Int, length: Int) { + try { + var i = 0; while (i < length) { + val c = ch(start+i) + if (c == '&') out.write("&") + else if (c == '<') out.write("<") + // This next fix is normally not necessary. + // However, it is required if text contains ]]> + // (The end CDATA section delimiter) + else if (c == '>') out.write(">") + else out.write(c.toInt) + i += 1 + } + } + catch { + case e: IOException => + throw new SAXException("Write failed", e) + } + } + + def ignorableWhitespace(ch: Array[Char], start: Int , length: Int) { + this.characters(ch, start, length) + } + + // do I need to escape text in PI???? + def processingInstruction(target: String, data: String) { + try { + out.write("") + } + catch { + case e:IOException => + throw new SAXException("Write failed", e) + } + } + + def skippedEntity(name: String) { + try { + out.write("&" + name + ";") + } + catch { + case e:IOException => + throw new SAXException("Write failed", e) + } + } + + // LexicalHandler methods + private var inDTD: Boolean = false + private val entities = new mutable.Stack[String]() + + def startDTD(name: String, publicID: String, systemID: String) { + inDTD = true + // if this is the source document, output a DOCTYPE declaration + if (entities.isEmpty) { + var id = "" + if (publicID != null) id = " PUBLIC \"" + publicID + "\" \"" + systemID + '"' + else if (systemID != null) id = " SYSTEM \"" + systemID + '"' + try { + out.write("\r\n") + } + catch { + case e:IOException => + throw new SAXException("Error while writing DOCTYPE", e) + } + } + } + def endDTD() {} + + def startEntity(name: String) { + entities push name + } + + def endEntity(name: String) { + entities.pop() + } + + def startCDATA() {} + def endCDATA() {} + + // Just need this reference so we can ask if a comment is + // inside an include element or not + private var filter: XIncludeFilter = null + + def setFilter(filter: XIncludeFilter) { + this.filter = filter + } + + def comment(ch: Array[Char], start: Int, length: Int) { + if (!inDTD && !filter.insideIncludeElement()) { + try { + out.write("") + } + catch { + case e: IOException => + throw new SAXException("Write failed", e) + } + } + } +} diff --git a/src/xml/scala/xml/package.scala b/src/xml/scala/xml/package.scala new file mode 100644 index 0000000000..4001cc5ffb --- /dev/null +++ b/src/xml/scala/xml/package.scala @@ -0,0 +1,19 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala + +package object xml { + val XercesClassName = "org.apache.xerces.parsers.SAXParser" + + type SAXException = org.xml.sax.SAXException + type SAXParseException = org.xml.sax.SAXParseException + type EntityResolver = org.xml.sax.EntityResolver + type InputSource = org.xml.sax.InputSource + type SAXParser = javax.xml.parsers.SAXParser +} diff --git a/src/xml/scala/xml/parsing/ConstructingHandler.scala b/src/xml/scala/xml/parsing/ConstructingHandler.scala new file mode 100755 index 0000000000..ba416e4301 --- /dev/null +++ b/src/xml/scala/xml/parsing/ConstructingHandler.scala @@ -0,0 +1,34 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + +/** Implementation of MarkupHandler that constructs nodes. + * + * @author Burak Emir + * @version 1.0 + */ +abstract class ConstructingHandler extends MarkupHandler +{ + val preserveWS: Boolean + + def elem(pos: Int, pre: String, label: String, attrs: MetaData, + pscope: NamespaceBinding, empty: Boolean, nodes: NodeSeq): NodeSeq = + Elem(pre, label, attrs, pscope, empty, nodes:_*) + + def procInstr(pos: Int, target: String, txt: String) = + ProcInstr(target, txt) + + def comment(pos: Int, txt: String) = Comment(txt) + def entityRef(pos: Int, n: String) = EntityRef(n) + def text(pos: Int, txt: String) = Text(txt) +} diff --git a/src/xml/scala/xml/parsing/ConstructingParser.scala b/src/xml/scala/xml/parsing/ConstructingParser.scala new file mode 100644 index 0000000000..3caeddabf4 --- /dev/null +++ b/src/xml/scala/xml/parsing/ConstructingParser.scala @@ -0,0 +1,55 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + +import java.io.File +import scala.io.Source + +object ConstructingParser { + def fromFile(inp: File, preserveWS: Boolean) = + new ConstructingParser(Source.fromFile(inp), preserveWS).initialize + + def fromSource(inp: Source, preserveWS: Boolean) = + new ConstructingParser(inp, preserveWS).initialize +} + +/** An xml parser. parses XML and invokes callback methods of a MarkupHandler. + * Don't forget to call next.ch on a freshly instantiated parser in order to + * initialize it. If you get the parser from the object method, initialization + * is already done for you. + * + * {{{ + * object parseFromURL { + * def main(args: Array[String]) { + * val url = args(0) + * val src = scala.io.Source.fromURL(url) + * val cpa = scala.xml.parsing.ConstructingParser.fromSource(src, false) // fromSource initializes automatically + * val doc = cpa.document() + * + * // let's see what it is + * val ppr = new scala.xml.PrettyPrinter(80, 5) + * val ele = doc.docElem + * println("finished parsing") + * val out = ppr.format(ele) + * println(out) + * } + * } + * }}} */ +class ConstructingParser(val input: Source, val preserveWS: Boolean) +extends ConstructingHandler +with ExternalSources +with MarkupParser { + + // default impl. of Logged + override def log(msg: String): Unit = {} +} diff --git a/src/xml/scala/xml/parsing/DefaultMarkupHandler.scala b/src/xml/scala/xml/parsing/DefaultMarkupHandler.scala new file mode 100755 index 0000000000..6ec7474843 --- /dev/null +++ b/src/xml/scala/xml/parsing/DefaultMarkupHandler.scala @@ -0,0 +1,30 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + + +/** Default implementation of markup handler always returns `NodeSeq.Empty` */ +abstract class DefaultMarkupHandler extends MarkupHandler { + + def elem(pos: Int, pre: String, label: String, attrs: MetaData, + scope:NamespaceBinding, empty: Boolean, args: NodeSeq) = NodeSeq.Empty + + def procInstr(pos: Int, target: String, txt: String) = NodeSeq.Empty + + def comment(pos: Int, comment: String ): NodeSeq = NodeSeq.Empty + + def entityRef(pos: Int, n: String) = NodeSeq.Empty + + def text(pos: Int, txt:String) = NodeSeq.Empty + +} diff --git a/src/xml/scala/xml/parsing/ExternalSources.scala b/src/xml/scala/xml/parsing/ExternalSources.scala new file mode 100644 index 0000000000..bb939bca95 --- /dev/null +++ b/src/xml/scala/xml/parsing/ExternalSources.scala @@ -0,0 +1,38 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + +import java.net.URL +import java.io.File.separator + +import scala.io.Source + +/** + * @author Burak Emir + * @version 1.0 + */ +trait ExternalSources { + self: ExternalSources with MarkupParser with MarkupHandler => + + def externalSource(systemId: String): Source = { + if (systemId startsWith "http:") + return Source fromURL new URL(systemId) + + val fileStr: String = input.descr match { + case x if x startsWith "file:" => x drop 5 + case x => x take ((x lastIndexOf separator) + 1) + } + + Source.fromFile(fileStr + systemId) + } +} diff --git a/src/xml/scala/xml/parsing/FactoryAdapter.scala b/src/xml/scala/xml/parsing/FactoryAdapter.scala new file mode 100644 index 0000000000..2154bdf5ba --- /dev/null +++ b/src/xml/scala/xml/parsing/FactoryAdapter.scala @@ -0,0 +1,187 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package parsing + +import java.io.{ InputStream, Reader, File, FileDescriptor, FileInputStream } +import scala.collection.{ mutable, Iterator } +import org.xml.sax.Attributes +import org.xml.sax.helpers.DefaultHandler + +// can be mixed into FactoryAdapter if desired +trait ConsoleErrorHandler extends DefaultHandler { + // ignore warning, crimson warns even for entity resolution! + override def warning(ex: SAXParseException): Unit = { } + override def error(ex: SAXParseException): Unit = printError("Error", ex) + override def fatalError(ex: SAXParseException): Unit = printError("Fatal Error", ex) + + protected def printError(errtype: String, ex: SAXParseException): Unit = + Console.withOut(Console.err) { + val s = "[%s]:%d:%d: %s".format( + errtype, ex.getLineNumber, ex.getColumnNumber, ex.getMessage) + Console.println(s) + Console.flush() + } +} + +/** SAX adapter class, for use with Java SAX parser. Keeps track of + * namespace bindings, without relying on namespace handling of the + * underlying SAX parser. + */ +abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node] { + var rootElem: Node = null + + val buffer = new StringBuilder() + val attribStack = new mutable.Stack[MetaData] + val hStack = new mutable.Stack[Node] // [ element ] contains siblings + val tagStack = new mutable.Stack[String] + var scopeStack = new mutable.Stack[NamespaceBinding] + + var curTag : String = null + var capture: Boolean = false + + // abstract methods + + /** Tests if an XML element contains text. + * @return true if element named `localName` contains text. + */ + def nodeContainsText(localName: String): Boolean // abstract + + /** creates an new non-text(tree) node. + * @param elemName + * @param attribs + * @param chIter + * @return a new XML element. + */ + def createNode(pre: String, elemName: String, attribs: MetaData, + scope: NamespaceBinding, chIter: List[Node]): Node // abstract + + /** creates a Text node. + * @param text + * @return a new Text node. + */ + def createText(text: String): Text // abstract + + /** creates a new processing instruction node. + */ + def createProcInstr(target: String, data: String): Seq[ProcInstr] + + // + // ContentHandler methods + // + + val normalizeWhitespace = false + + /** Characters. + * @param ch + * @param offset + * @param length + */ + override def characters(ch: Array[Char], offset: Int, length: Int): Unit = { + if (!capture) return + // compliant: report every character + else if (!normalizeWhitespace) buffer.appendAll(ch, offset, length) + // normalizing whitespace is not compliant, but useful + else { + var it = ch.slice(offset, offset + length).iterator + while (it.hasNext) { + val c = it.next() + val isSpace = c.isWhitespace + buffer append (if (isSpace) ' ' else c) + if (isSpace) + it = it dropWhile (_.isWhitespace) + } + } + } + + private def splitName(s: String) = { + val idx = s indexOf ':' + if (idx < 0) (null, s) + else (s take idx, s drop (idx + 1)) + } + + /* ContentHandler methods */ + + /* Start element. */ + override def startElement( + uri: String, + _localName: String, + qname: String, + attributes: Attributes): Unit = + { + captureText() + tagStack push curTag + curTag = qname + + val localName = splitName(qname)._2 + capture = nodeContainsText(localName) + + hStack push null + var m: MetaData = Null + var scpe: NamespaceBinding = + if (scopeStack.isEmpty) TopScope + else scopeStack.top + + for (i <- 0 until attributes.getLength()) { + val qname = attributes getQName i + val value = attributes getValue i + val (pre, key) = splitName(qname) + def nullIfEmpty(s: String) = if (s == "") null else s + + if (pre == "xmlns" || (pre == null && qname == "xmlns")) { + val arg = if (pre == null) null else key + scpe = new NamespaceBinding(arg, nullIfEmpty(value), scpe) + } + else + m = Attribute(Option(pre), key, Text(value), m) + } + + scopeStack push scpe + attribStack push m + } + + + /** captures text, possibly normalizing whitespace + */ + def captureText(): Unit = { + if (capture && buffer.length > 0) + hStack push createText(buffer.toString) + + buffer.clear() + } + + /** End element. + * @param uri + * @param _localName + * @param qname + * @throws org.xml.sax.SAXException if .. + */ + override def endElement(uri: String , _localName: String, qname: String): Unit = { + captureText() + val metaData = attribStack.pop() + + // reverse order to get it right + val v = (Iterator continually hStack.pop takeWhile (_ != null)).toList.reverse + val (pre, localName) = splitName(qname) + val scp = scopeStack.pop() + + // create element + rootElem = createNode(pre, localName, metaData, scp, v) + hStack push rootElem + curTag = tagStack.pop() + capture = curTag != null && nodeContainsText(curTag) // root level + } + + /** Processing instruction. + */ + override def processingInstruction(target: String, data: String) { + hStack pushAll createProcInstr(target, data) + } +} diff --git a/src/xml/scala/xml/parsing/FatalError.scala b/src/xml/scala/xml/parsing/FatalError.scala new file mode 100644 index 0000000000..ab3cb2a74d --- /dev/null +++ b/src/xml/scala/xml/parsing/FatalError.scala @@ -0,0 +1,17 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + +/** !!! This is poorly named, but I guess it's in the API. + */ +case class FatalError(msg: String) extends java.lang.RuntimeException(msg) diff --git a/src/xml/scala/xml/parsing/MarkupHandler.scala b/src/xml/scala/xml/parsing/MarkupHandler.scala new file mode 100755 index 0000000000..1ebffb9c90 --- /dev/null +++ b/src/xml/scala/xml/parsing/MarkupHandler.scala @@ -0,0 +1,127 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + +import scala.collection.mutable +import scala.io.Source +import scala.xml.dtd._ + +/** class that handles markup - provides callback methods to MarkupParser. + * the default is nonvalidating behaviour + * + * @author Burak Emir + * @version 1.0 + * + * @todo can we ignore more entity declarations (i.e. those with extIDs)? + * @todo expanding entity references + */ +abstract class MarkupHandler { + + /** returns true is this markup handler is validating */ + val isValidating: Boolean = false + + var decls: List[Decl] = Nil + var ent: mutable.Map[String, EntityDecl] = new mutable.HashMap[String, EntityDecl]() + + def lookupElemDecl(Label: String): ElemDecl = { + for (z @ ElemDecl(Label, _) <- decls) + return z + + null + } + + def replacementText(entityName: String): Source = + Source fromString ((ent get entityName) match { + case Some(ParsedEntityDecl(_, IntDef(value))) => value + case Some(ParameterEntityDecl(_, IntDef(value))) => " %s " format value + case Some(_) => "" format entityName + case None => "" format entityName + }) + + def endDTD(n: String): Unit = () + + /** callback method invoked by MarkupParser after start-tag of element. + * + * @param pos the position in the sourcefile + * @param pre the prefix + * @param label the local name + * @param attrs the attributes (metadata) + */ + def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding): Unit = () + + /** callback method invoked by MarkupParser after end-tag of element. + * + * @param pos the position in the source file + * @param pre the prefix + * @param label the local name + */ + def elemEnd(pos: Int, pre: String, label: String): Unit = () + + /** callback method invoked by MarkupParser after parsing an element, + * between the elemStart and elemEnd callbacks + * + * @param pos the position in the source file + * @param pre the prefix + * @param label the local name + * @param attrs the attributes (metadata) + * @param empty `true` if the element was previously empty; `false` otherwise. + * @param args the children of this element + */ + def elem(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, empty: Boolean, args: NodeSeq): NodeSeq + + /** callback method invoked by MarkupParser after parsing PI. + */ + def procInstr(pos: Int, target: String, txt: String): NodeSeq + + /** callback method invoked by MarkupParser after parsing comment. + */ + def comment(pos: Int, comment: String): NodeSeq + + /** callback method invoked by MarkupParser after parsing entity ref. + * @todo expanding entity references + */ + def entityRef(pos: Int, n: String): NodeSeq + + /** callback method invoked by MarkupParser after parsing text. + */ + def text(pos: Int, txt: String): NodeSeq + + // DTD handler methods + + def elemDecl(n: String, cmstr: String): Unit = () + + def attListDecl(name: String, attList: List[AttrDecl]): Unit = () + + private def someEntityDecl(name: String, edef: EntityDef, f: (String, EntityDef) => EntityDecl): Unit = + edef match { + case _: ExtDef if !isValidating => // ignore (cf REC-xml 4.4.1) + case _ => + val y = f(name, edef) + decls ::= y + ent.update(name, y) + } + + def parameterEntityDecl(name: String, edef: EntityDef): Unit = + someEntityDecl(name, edef, ParameterEntityDecl.apply _) + + def parsedEntityDecl(name: String, edef: EntityDef): Unit = + someEntityDecl(name, edef, ParsedEntityDecl.apply _) + + def peReference(name: String) { decls ::= PEReference(name) } + def unparsedEntityDecl(name: String, extID: ExternalID, notat: String): Unit = () + def notationDecl(notat: String, extID: ExternalID): Unit = () + def reportSyntaxError(pos: Int, str: String): Unit + + @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11") + def log(msg: String): Unit = {} +} diff --git a/src/xml/scala/xml/parsing/MarkupParser.scala b/src/xml/scala/xml/parsing/MarkupParser.scala new file mode 100755 index 0000000000..3bbd136b67 --- /dev/null +++ b/src/xml/scala/xml/parsing/MarkupParser.scala @@ -0,0 +1,938 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package parsing + +import scala.io.Source +import scala.xml.dtd._ +import Utility.Escapes.{ pairs => unescape } + +/** + * An XML parser. + * + * Parses XML 1.0, invokes callback methods of a `MarkupHandler` and returns + * whatever the markup handler returns. Use `ConstructingParser` if you just + * want to parse XML to construct instances of `scala.xml.Node`. + * + * While XML elements are returned, DTD declarations - if handled - are + * collected using side-effects. + * + * @author Burak Emir + * @version 1.0 + */ +trait MarkupParser extends MarkupParserCommon with TokenTests +{ + self: MarkupParser with MarkupHandler => + + type PositionType = Int + type InputType = Source + type ElementType = NodeSeq + type AttributesType = (MetaData, NamespaceBinding) + type NamespaceType = NamespaceBinding + + def truncatedError(msg: String): Nothing = throw FatalError(msg) + def errorNoEnd(tag: String) = throw FatalError("expected closing tag of " + tag) + + def xHandleError(that: Char, msg: String) = reportSyntaxError(msg) + + val input: Source + + /** if true, does not remove surplus whitespace */ + val preserveWS: Boolean + + def externalSource(systemLiteral: String): Source + + // + // variables, values + // + + protected var curInput: Source = input + + // See ticket #3720 for motivations. + private class WithLookAhead(underlying: Source) extends Source { + private val queue = scala.collection.mutable.Queue[Char]() + def lookahead(): BufferedIterator[Char] = { + val iter = queue.iterator ++ new Iterator[Char] { + def hasNext = underlying.hasNext + def next() = { val x = underlying.next(); queue += x; x } + } + iter.buffered + } + val iter = new Iterator[Char] { + def hasNext = underlying.hasNext || !queue.isEmpty + def next() = if (!queue.isEmpty) queue.dequeue() else underlying.next() + } + } + + def lookahead(): BufferedIterator[Char] = curInput match { + case curInputWLA:WithLookAhead => + curInputWLA.lookahead() + case _ => + val newInput = new WithLookAhead(curInput) + curInput = newInput + newInput.lookahead() + } + + + /** the handler of the markup, returns this */ + private val handle: MarkupHandler = this + + /** stack of inputs */ + var inpStack: List[Source] = Nil + + /** holds the position in the source file */ + var pos: Int = _ + + /* used when reading external subset */ + var extIndex = -1 + + /** holds temporary values of pos */ + var tmppos: Int = _ + + /** holds the next character */ + var nextChNeeded: Boolean = false + var reachedEof: Boolean = false + var lastChRead: Char = _ + def ch: Char = { + if (nextChNeeded) { + if (curInput.hasNext) { + lastChRead = curInput.next() + pos = curInput.pos + } else { + val ilen = inpStack.length + //Console.println(" ilen = "+ilen+ " extIndex = "+extIndex); + if ((ilen != extIndex) && (ilen > 0)) { + /* for external source, inpStack == Nil ! need notify of eof! */ + pop() + } else { + reachedEof = true + lastChRead = 0.asInstanceOf[Char] + } + } + nextChNeeded = false + } + lastChRead + } + + /** character buffer, for names */ + protected val cbuf = new StringBuilder() + + var dtd: DTD = null + + protected var doc: Document = null + + def eof: Boolean = { ch; reachedEof } + + // + // methods + // + + /** {{{ + * + * }}} */ + def xmlProcInstr(): MetaData = { + xToken("xml") + xSpace() + val (md,scp) = xAttributes(TopScope) + if (scp != TopScope) + reportSyntaxError("no xmlns definitions here, please.") + xToken('?') + xToken('>') + md + } + + /** Factored out common code. + */ + private def prologOrTextDecl(isProlog: Boolean): (Option[String], Option[String], Option[Boolean]) = { + var info_ver: Option[String] = None + var info_enc: Option[String] = None + var info_stdl: Option[Boolean] = None + + val m = xmlProcInstr() + var n = 0 + + if (isProlog) + xSpaceOpt() + + m("version") match { + case null => + case Text("1.0") => info_ver = Some("1.0"); n += 1 + case _ => reportSyntaxError("cannot deal with versions != 1.0") + } + + m("encoding") match { + case null => + case Text(enc) => + if (!isValidIANAEncoding(enc)) + reportSyntaxError("\"" + enc + "\" is not a valid encoding") + else { + info_enc = Some(enc) + n += 1 + } + } + + if (isProlog) { + m("standalone") match { + case null => + case Text("yes") => info_stdl = Some(true); n += 1 + case Text("no") => info_stdl = Some(false); n += 1 + case _ => reportSyntaxError("either 'yes' or 'no' expected") + } + } + + if (m.length - n != 0) { + val s = if (isProlog) "SDDecl? " else "" + reportSyntaxError("VersionInfo EncodingDecl? %sor '?>' expected!" format s) + } + + (info_ver, info_enc, info_stdl) + } + + /** {{{ + * (x1, x2) } + + /** {{{ + * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? + * [23] XMLDecl ::= '' + * [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') + * [25] Eq ::= S? '=' S? + * [26] VersionNum ::= '1.0' + * [27] Misc ::= Comment | PI | S + * }}} */ + def document(): Document = { + doc = new Document() + + this.dtd = null + var info_prolog: (Option[String], Option[String], Option[Boolean]) = (None, None, None) + if ('<' != ch) { + reportSyntaxError("< expected") + return null + } + + nextch() // is prolog ? + var children: NodeSeq = null + if ('?' == ch) { + nextch() + info_prolog = prolog() + doc.version = info_prolog._1 + doc.encoding = info_prolog._2 + doc.standAlone = info_prolog._3 + + children = content(TopScope) // DTD handled as side effect + } + else { + val ts = new NodeBuffer() + content1(TopScope, ts) // DTD handled as side effect + ts &+ content(TopScope) + children = NodeSeq.fromSeq(ts) + } + //println("[MarkupParser::document] children now: "+children.toList) + var elemCount = 0 + var theNode: Node = null + for (c <- children) c match { + case _:ProcInstr => + case _:Comment => + case _:EntityRef => // todo: fix entities, shouldn't be "special" + reportSyntaxError("no entity references allowed here") + case s:SpecialNode => + if (s.toString.trim().length > 0) //non-empty text nodes not allowed + elemCount += 2 + case m:Node => + elemCount += 1 + theNode = m + } + if (1 != elemCount) { + reportSyntaxError("document must contain exactly one element") + Console.println(children.toList) + } + + doc.children = children + doc.docElem = theNode + doc + } + + /** append Unicode character to name buffer*/ + protected def putChar(c: Char) = cbuf append c + + /** As the current code requires you to call nextch once manually + * after construction, this method formalizes that suboptimal reality. + */ + def initialize: this.type = { + nextch() + this + } + + protected def ch_returning_nextch: Char = { val res = ch; nextch(); res } + + def mkAttributes(name: String, pscope: NamespaceBinding): AttributesType = + if (isNameStart (ch)) xAttributes(pscope) + else (Null, pscope) + + def mkProcInstr(position: Int, name: String, text: String): ElementType = + handle.procInstr(position, name, text) + + /** this method tells ch to get the next character when next called */ + def nextch() { + // Read current ch if needed + ch + + // Mark next ch to be required + nextChNeeded = true + } + + /** parse attribute and create namespace scope, metadata + * {{{ + * [41] Attributes ::= { S Name Eq AttValue } + * }}} + */ + def xAttributes(pscope: NamespaceBinding): (MetaData, NamespaceBinding) = { + var scope: NamespaceBinding = pscope + var aMap: MetaData = Null + while (isNameStart(ch)) { + val qname = xName + xEQ() // side effect + val value = xAttributeValue() + + Utility.prefix(qname) match { + case Some("xmlns") => + val prefix = qname.substring(6 /*xmlns:*/ , qname.length) + scope = new NamespaceBinding(prefix, value, scope) + + case Some(prefix) => + val key = qname.substring(prefix.length+1, qname.length) + aMap = new PrefixedAttribute(prefix, key, Text(value), aMap) + + case _ => + if( qname == "xmlns" ) + scope = new NamespaceBinding(null, value, scope) + else + aMap = new UnprefixedAttribute(qname, Text(value), aMap) + } + + if ((ch != '/') && (ch != '>') && ('?' != ch)) + xSpace() + } + + if(!aMap.wellformed(scope)) + reportSyntaxError( "double attribute") + + (aMap,scope) + } + + /** entity value, terminated by either ' or ". value may not contain <. + * {{{ + * AttValue ::= `'` { _ } `'` + * | `"` { _ } `"` + * }}} + */ + def xEntityValue(): String = { + val endch = ch + nextch() + while (ch != endch && !eof) { + putChar(ch) + nextch() + } + nextch() + val str = cbuf.toString() + cbuf.length = 0 + str + } + + /** {{{ + * '"{char} ) ']]>' + * + * see [15] + * }}} */ + def xCharData: NodeSeq = { + xToken("[CDATA[") + def mkResult(pos: Int, s: String): NodeSeq = { + handle.text(pos, s) + PCData(s) + } + xTakeUntil(mkResult, () => pos, "]]>") + } + + /** {{{ + * Comment ::= '' + * + * see [15] + * }}} */ + def xComment: NodeSeq = { + val sb: StringBuilder = new StringBuilder() + xToken("--") + while (true) { + if (ch == '-' && { sb.append(ch); nextch(); ch == '-' }) { + sb.length = sb.length - 1 + nextch() + xToken('>') + return handle.comment(pos, sb.toString()) + } else sb.append(ch) + nextch() + } + throw FatalError("this cannot happen") + } + + /* todo: move this into the NodeBuilder class */ + def appendText(pos: Int, ts: NodeBuffer, txt: String): Unit = { + if (preserveWS) + ts &+ handle.text(pos, txt) + else + for (t <- TextBuffer.fromString(txt).toText) { + ts &+ handle.text(pos, t.text) + } + } + + /** {{{ + * '<' content1 ::= ... + * }}} */ + def content1(pscope: NamespaceBinding, ts: NodeBuffer) { + ch match { + case '!' => + nextch() + if ('[' == ch) // CDATA + ts &+ xCharData + else if ('D' == ch) // doctypedecl, parse DTD // @todo REMOVE HACK + parseDTD() + else // comment + ts &+ xComment + case '?' => // PI + nextch() + ts &+ xProcInstr + case _ => + ts &+ element1(pscope) // child + } + } + + /** {{{ + * content1 ::= '<' content1 | '&' charref ... + * }}} */ + def content(pscope: NamespaceBinding): NodeSeq = { + val ts = new NodeBuffer + var exit = eof + // todo: optimize seq repr. + def done = new NodeSeq { val theSeq = ts.toList } + + while (!exit) { + tmppos = pos + exit = eof + + if (eof) + return done + + ch match { + case '<' => // another tag + nextch(); ch match { + case '/' => exit = true // end tag + case _ => content1(pscope, ts) + } + + // postcond: xEmbeddedBlock == false! + case '&' => // EntityRef or CharRef + nextch(); ch match { + case '#' => // CharacterRef + nextch() + val theChar = handle.text(tmppos, xCharRef(() => ch, () => nextch())) + xToken(';') + ts &+ theChar + case _ => // EntityRef + val n = xName + xToken(';') + + if (unescape contains n) { + handle.entityRef(tmppos, n) + ts &+ unescape(n) + } else push(n) + } + case _ => // text content + appendText(tmppos, ts, xText) + } + } + done + } // content(NamespaceBinding) + + /** {{{ + * externalID ::= SYSTEM S syslit + * PUBLIC S pubid S syslit + * }}} */ + def externalID(): ExternalID = ch match { + case 'S' => + nextch() + xToken("YSTEM") + xSpace() + val sysID = systemLiteral() + new SystemID(sysID) + case 'P' => + nextch(); xToken("UBLIC") + xSpace() + val pubID = pubidLiteral() + xSpace() + val sysID = systemLiteral() + new PublicID(pubID, sysID) + } + + + /** parses document type declaration and assigns it to instance variable + * dtd. + * {{{ + * + * }}} */ + def parseDTD() { // dirty but fast + var extID: ExternalID = null + if (this.dtd ne null) + reportSyntaxError("unexpected character (DOCTYPE already defined") + xToken("DOCTYPE") + xSpace() + val n = xName + xSpace() + //external ID + if ('S' == ch || 'P' == ch) { + extID = externalID() + xSpaceOpt() + } + + /* parse external subset of DTD + */ + + if ((null != extID) && isValidating) { + + pushExternal(extID.systemId) + extIndex = inpStack.length + + extSubset() + pop() + extIndex = -1 + } + + if ('[' == ch) { // internal subset + nextch() + /* TODO */ + intSubset() + // TODO: do the DTD parsing?? ?!?!?!?!! + xToken(']') + xSpaceOpt() + } + xToken('>') + this.dtd = new DTD { + /*override var*/ externalID = extID + /*override val */decls = handle.decls.reverse + } + //this.dtd.initializeEntities(); + if (doc ne null) + doc.dtd = this.dtd + + handle.endDTD(n) + } + + def element(pscope: NamespaceBinding): NodeSeq = { + xToken('<') + element1(pscope) + } + + /** {{{ + * '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag + * | xmlTag1 '/' '>' + * }}} */ + def element1(pscope: NamespaceBinding): NodeSeq = { + val pos = this.pos + val (qname, (aMap, scope)) = xTag(pscope) + val (pre, local) = Utility.prefix(qname) match { + case Some(p) => (p, qname drop p.length+1) + case _ => (null, qname) + } + val ts = { + if (ch == '/') { // empty element + xToken("/>") + handle.elemStart(pos, pre, local, aMap, scope) + NodeSeq.Empty + } + else { // element with content + xToken('>') + handle.elemStart(pos, pre, local, aMap, scope) + val tmp = content(scope) + xEndTag(qname) + tmp + } + } + val res = handle.elem(pos, pre, local, aMap, scope, ts == NodeSeq.Empty, ts) + handle.elemEnd(pos, pre, local) + res + } + + /** Parse character data. + * + * precondition: `xEmbeddedBlock == false` (we are not in a scala block) + */ + private def xText: String = { + var exit = false + while (! exit) { + putChar(ch) + nextch() + + exit = eof || ( ch == '<' ) || ( ch == '&' ) + } + val str = cbuf.toString + cbuf.length = 0 + str + } + + /** attribute value, terminated by either ' or ". value may not contain <. + * {{{ + * AttValue ::= `'` { _ } `'` + * | `"` { _ } `"` + * }}} */ + def systemLiteral(): String = { + val endch = ch + if (ch != '\'' && ch != '"') + reportSyntaxError("quote ' or \" expected") + nextch() + while (ch != endch && !eof) { + putChar(ch) + nextch() + } + nextch() + val str = cbuf.toString() + cbuf.length = 0 + str + } + + /** {{{ + * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" + * }}} */ + def pubidLiteral(): String = { + val endch = ch + if (ch!='\'' && ch != '"') + reportSyntaxError("quote ' or \" expected") + nextch() + while (ch != endch && !eof) { + putChar(ch) + //println("hello '"+ch+"'"+isPubIDChar(ch)) + if (!isPubIDChar(ch)) + reportSyntaxError("char '"+ch+"' is not allowed in public id") + nextch() + } + nextch() + val str = cbuf.toString + cbuf.length = 0 + str + } + + // + // dtd parsing + // + + def extSubset(): Unit = { + var textdecl: (Option[String],Option[String]) = null + if (ch == '<') { + nextch() + if (ch == '?') { + nextch() + textdecl = textDecl() + } else + markupDecl1() + } + while (!eof) + markupDecl() + } + + def markupDecl1() = { + def doInclude() = { + xToken('['); while(']' != ch) markupDecl(); nextch() // ']' + } + def doIgnore() = { + xToken('['); while(']' != ch) nextch(); nextch() // ']' + } + if ('?' == ch) { + nextch() + xProcInstr // simply ignore processing instructions! + } else { + xToken('!') + ch match { + case '-' => + xComment // ignore comments + + case 'E' => + nextch() + if ('L' == ch) { + nextch() + elementDecl() + } else + entityDecl() + + case 'A' => + nextch() + attrDecl() + + case 'N' => + nextch() + notationDecl() + + case '[' if inpStack.length >= extIndex => + nextch() + xSpaceOpt() + ch match { + case '%' => + nextch() + val ent = xName + xToken(';') + xSpaceOpt() + + push(ent) + xSpaceOpt() + val stmt = xName + xSpaceOpt() + + stmt match { + // parameter entity + case "INCLUDE" => doInclude() + case "IGNORE" => doIgnore() + } + case 'I' => + nextch() + ch match { + case 'G' => + nextch() + xToken("NORE") + xSpaceOpt() + doIgnore() + case 'N' => + nextch() + xToken("NCLUDE") + doInclude() + } + } + xToken(']') + xToken('>') + + case _ => + curInput.reportError(pos, "unexpected character '"+ch+"', expected some markupdecl") + while (ch!='>') + nextch() + } + } + } + + def markupDecl(): Unit = ch match { + case '%' => // parameter entity reference + nextch() + val ent = xName + xToken(';') + if (!isValidating) + handle.peReference(ent) // n-v: just create PE-reference + else + push(ent) // v: parse replacementText + + //peReference + case '<' => + nextch() + markupDecl1() + case _ if isSpace(ch) => + xSpace() + case _ => + reportSyntaxError("markupdecl: unexpected character '"+ch+"' #" + ch.toInt) + nextch() + } + + /** "rec-xml/#ExtSubset" pe references may not occur within markup declarations + */ + def intSubset() { + //Console.println("(DEBUG) intSubset()") + xSpace() + while (']' != ch) + markupDecl() + } + + /** <! element := ELEMENT + */ + def elementDecl() { + xToken("EMENT") + xSpace() + val n = xName + xSpace() + while ('>' != ch) { + //Console.println("["+ch+"]") + putChar(ch) + nextch() + } + //Console.println("END["+ch+"]") + nextch() + val cmstr = cbuf.toString() + cbuf.length = 0 + handle.elemDecl(n, cmstr) + } + + /** {{{ + * ' != ch) { + val aname = xName + xSpace() + // could be enumeration (foo,bar) parse this later :-/ + while ('"' != ch && '\'' != ch && '#' != ch && '<' != ch) { + if (!isSpace(ch)) + cbuf.append(ch) + nextch() + } + val atpe = cbuf.toString + cbuf.length = 0 + + val defdecl: DefaultDecl = ch match { + case '\'' | '"' => + DEFAULT(fixed = false, xAttributeValue()) + + case '#' => + nextch() + xName match { + case "FIXED" => xSpace() ; DEFAULT(fixed = true, xAttributeValue()) + case "IMPLIED" => IMPLIED + case "REQUIRED" => REQUIRED + } + case _ => + null + } + xSpaceOpt() + + attList ::= AttrDecl(aname, atpe, defdecl) + cbuf.length = 0 + } + nextch() + handle.attListDecl(n, attList.reverse) + } + + /** {{{ + * //sy + val extID = externalID() + if (isParameterEntity) { + xSpaceOpt() + xToken('>') + handle.parameterEntityDecl(n, ExtDef(extID)) + } else { // notation? + xSpace() + if ('>' != ch) { + xToken("NDATA") + xSpace() + val notat = xName + xSpaceOpt() + xToken('>') + handle.unparsedEntityDecl(n, extID, notat) + } else { + nextch() + handle.parsedEntityDecl(n, ExtDef(extID)) + } + } + + case '"' | '\'' => + val av = xEntityValue() + xSpaceOpt() + xToken('>') + if (isParameterEntity) + handle.parameterEntityDecl(n, IntDef(av)) + else + handle.parsedEntityDecl(n, IntDef(av)) + } + {} + } // entityDecl + + /** {{{ + * 'N' notationDecl ::= "OTATION" + * }}} */ + def notationDecl() { + xToken("OTATION") + xSpace() + val notat = xName + xSpace() + val extID = if (ch == 'S') { + externalID() + } + else if (ch == 'P') { + /* PublicID (without system, only used in NOTATION) */ + nextch() + xToken("UBLIC") + xSpace() + val pubID = pubidLiteral() + xSpaceOpt() + val sysID = if (ch != '>') + systemLiteral() + else + null + new PublicID(pubID, sysID) + } else { + reportSyntaxError("PUBLIC or SYSTEM expected") + scala.sys.error("died parsing notationdecl") + } + xSpaceOpt() + xToken('>') + handle.notationDecl(notat, extID) + } + + def reportSyntaxError(pos: Int, str: String) { curInput.reportError(pos, str) } + def reportSyntaxError(str: String) { reportSyntaxError(pos, str) } + def reportValidationError(pos: Int, str: String) { reportSyntaxError(pos, str) } + + def push(entityName: String) { + if (!eof) + inpStack = curInput :: inpStack + + // can't push before getting next character if needed + ch + + curInput = replacementText(entityName) + nextch() + } + + def pushExternal(systemId: String) { + if (!eof) + inpStack = curInput :: inpStack + + // can't push before getting next character if needed + ch + + curInput = externalSource(systemId) + nextch() + } + + def pop() { + curInput = inpStack.head + inpStack = inpStack.tail + lastChRead = curInput.ch + nextChNeeded = false + pos = curInput.pos + reachedEof = false // must be false, because of places where entity refs occur + } +} diff --git a/src/xml/scala/xml/parsing/MarkupParserCommon.scala b/src/xml/scala/xml/parsing/MarkupParserCommon.scala new file mode 100644 index 0000000000..57c1651558 --- /dev/null +++ b/src/xml/scala/xml/parsing/MarkupParserCommon.scala @@ -0,0 +1,260 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package parsing + +import scala.io.Source +import scala.annotation.switch +import Utility.Escapes.{ pairs => unescape } + +import Utility.SU + +/** This is not a public trait - it contains common code shared + * between the library level XML parser and the compiler's. + * All members should be accessed through those. + */ +private[scala] trait MarkupParserCommon extends TokenTests { + protected def unreachable = scala.sys.error("Cannot be reached.") + + // type HandleType // MarkupHandler, SymbolicXMLBuilder + type InputType // Source, CharArrayReader + type PositionType // Int, Position + type ElementType // NodeSeq, Tree + type NamespaceType // NamespaceBinding, Any + type AttributesType // (MetaData, NamespaceBinding), mutable.Map[String, Tree] + + def mkAttributes(name: String, pscope: NamespaceType): AttributesType + def mkProcInstr(position: PositionType, name: String, text: String): ElementType + + /** parse a start or empty tag. + * [40] STag ::= '<' Name { S Attribute } [S] + * [44] EmptyElemTag ::= '<' Name { S Attribute } [S] + */ + protected def xTag(pscope: NamespaceType): (String, AttributesType) = { + val name = xName + xSpaceOpt() + + (name, mkAttributes(name, pscope)) + } + + /** '?' {Char})]'?>' + * + * see [15] + */ + def xProcInstr: ElementType = { + val n = xName + xSpaceOpt() + xTakeUntil(mkProcInstr(_, n, _), () => tmppos, "?>") + } + + /** attribute value, terminated by either `'` or `"`. value may not contain `<`. + @param endCh either `'` or `"` + */ + def xAttributeValue(endCh: Char): String = { + val buf = new StringBuilder + while (ch != endCh) { + // well-formedness constraint + if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "") + else if (ch == SU) truncatedError("") + else buf append ch_returning_nextch + } + ch_returning_nextch + // @todo: normalize attribute value + buf.toString + } + + def xAttributeValue(): String = { + val str = xAttributeValue(ch_returning_nextch) + // well-formedness constraint + normalizeAttributeValue(str) + } + + private def takeUntilChar(it: Iterator[Char], end: Char): String = { + val buf = new StringBuilder + while (it.hasNext) it.next() match { + case `end` => return buf.toString + case ch => buf append ch + } + scala.sys.error("Expected '%s'".format(end)) + } + + /** [42] '<' xmlEndTag ::= '<' '/' Name S? '>' + */ + def xEndTag(startName: String) { + xToken('/') + if (xName != startName) + errorNoEnd(startName) + + xSpaceOpt() + xToken('>') + } + + /** actually, Name ::= (Letter | '_' | ':') (NameChar)* but starting with ':' cannot happen + * Name ::= (Letter | '_') (NameChar)* + * + * see [5] of XML 1.0 specification + * + * pre-condition: ch != ':' // assured by definition of XMLSTART token + * post-condition: name does neither start, nor end in ':' + */ + def xName: String = { + if (ch == SU) + truncatedError("") + else if (!isNameStart(ch)) + return errorAndResult("name expected, but char '%s' cannot start a name" format ch, "") + + val buf = new StringBuilder + + do buf append ch_returning_nextch + while (isNameChar(ch)) + + if (buf.last == ':') { + reportSyntaxError( "name cannot end in ':'" ) + buf.toString dropRight 1 + } + else buf.toString + } + + private def attr_unescape(s: String) = s match { + case "lt" => "<" + case "gt" => ">" + case "amp" => "&" + case "apos" => "'" + case "quot" => "\"" + case "quote" => "\"" + case _ => "&" + s + ";" + } + + /** Replaces only character references right now. + * see spec 3.3.3 + */ + private def normalizeAttributeValue(attval: String): String = { + val buf = new StringBuilder + val it = attval.iterator.buffered + + while (it.hasNext) buf append (it.next() match { + case ' ' | '\t' | '\n' | '\r' => " " + case '&' if it.head == '#' => it.next() ; xCharRef(it) + case '&' => attr_unescape(takeUntilChar(it, ';')) + case c => c + }) + + buf.toString + } + + /** CharRef ::= "&#" '0'..'9' {'0'..'9'} ";" + * | "&#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";" + * + * see [66] + */ + def xCharRef(ch: () => Char, nextch: () => Unit): String = + Utility.parseCharRef(ch, nextch, reportSyntaxError _, truncatedError _) + + def xCharRef(it: Iterator[Char]): String = { + var c = it.next() + Utility.parseCharRef(() => c, () => { c = it.next() }, reportSyntaxError _, truncatedError _) + } + + def xCharRef: String = xCharRef(() => ch, () => nextch()) + + /** Create a lookahead reader which does not influence the input */ + def lookahead(): BufferedIterator[Char] + + /** The library and compiler parsers had the interesting distinction of + * different behavior for nextch (a function for which there are a total + * of two plausible behaviors, so we know the design space was fully + * explored.) One of them returned the value of nextch before the increment + * and one of them the new value. So to unify code we have to at least + * temporarily abstract over the nextchs. + */ + def ch: Char + def nextch(): Unit + protected def ch_returning_nextch: Char + def eof: Boolean + + // def handle: HandleType + var tmppos: PositionType + + def xHandleError(that: Char, msg: String): Unit + def reportSyntaxError(str: String): Unit + def reportSyntaxError(pos: Int, str: String): Unit + + def truncatedError(msg: String): Nothing + def errorNoEnd(tag: String): Nothing + + protected def errorAndResult[T](msg: String, x: T): T = { + reportSyntaxError(msg) + x + } + + def xToken(that: Char) { + if (ch == that) nextch() + else xHandleError(that, "'%s' expected instead of '%s'".format(that, ch)) + } + def xToken(that: Seq[Char]) { that foreach xToken } + + /** scan [S] '=' [S]*/ + def xEQ() = { xSpaceOpt(); xToken('='); xSpaceOpt() } + + /** skip optional space S? */ + def xSpaceOpt() = while (isSpace(ch) && !eof) nextch() + + /** scan [3] S ::= (#x20 | #x9 | #xD | #xA)+ */ + def xSpace() = + if (isSpace(ch)) { nextch(); xSpaceOpt() } + else xHandleError(ch, "whitespace expected") + + /** Apply a function and return the passed value */ + def returning[T](x: T)(f: T => Unit): T = { f(x); x } + + /** Execute body with a variable saved and restored after execution */ + def saving[A, B](getter: A, setter: A => Unit)(body: => B): B = { + val saved = getter + try body + finally setter(saved) + } + + /** Take characters from input stream until given String "until" + * is seen. Once seen, the accumulated characters are passed + * along with the current Position to the supplied handler function. + */ + protected def xTakeUntil[T]( + handler: (PositionType, String) => T, + positioner: () => PositionType, + until: String): T = + { + val sb = new StringBuilder + val head = until.head + val rest = until.tail + + while (true) { + if (ch == head && peek(rest)) + return handler(positioner(), sb.toString) + else if (ch == SU) + truncatedError("") // throws TruncatedXMLControl in compiler + + sb append ch + nextch() + } + unreachable + } + + /** Create a non-destructive lookahead reader and see if the head + * of the input would match the given String. If yes, return true + * and drop the entire String from input; if no, return false + * and leave input unchanged. + */ + private def peek(lookingFor: String): Boolean = + (lookahead() take lookingFor.length sameElements lookingFor.iterator) && { + // drop the chars from the real reader (all lookahead + orig) + (0 to lookingFor.length) foreach (_ => nextch()) + true + } +} diff --git a/src/xml/scala/xml/parsing/NoBindingFactoryAdapter.scala b/src/xml/scala/xml/parsing/NoBindingFactoryAdapter.scala new file mode 100644 index 0000000000..56ac185f47 --- /dev/null +++ b/src/xml/scala/xml/parsing/NoBindingFactoryAdapter.scala @@ -0,0 +1,37 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package parsing + +import factory.NodeFactory + +/** nobinding adaptor providing callbacks to parser to create elements. +* implements hash-consing +*/ +class NoBindingFactoryAdapter extends FactoryAdapter with NodeFactory[Elem] +{ + /** True. Every XML node may contain text that the application needs */ + def nodeContainsText(label: String) = true + + /** From NodeFactory. Constructs an instance of scala.xml.Elem */ + protected def create(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: Seq[Node]): Elem = + Elem(pre, label, attrs, scope, children: _*) + + /** From FactoryAdapter. Creates a node. never creates the same node twice, using hash-consing. */ + def createNode(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding, children: List[Node]): Elem = + Elem(pre, label, attrs, scope, children: _*) + + /** Creates a text node. */ + def createText(text: String) = Text(text) + + /** Creates a processing instruction. */ + def createProcInstr(target: String, data: String) = makeProcInstr(target, data) +} diff --git a/src/xml/scala/xml/parsing/TokenTests.scala b/src/xml/scala/xml/parsing/TokenTests.scala new file mode 100644 index 0000000000..8dd9cdfaa3 --- /dev/null +++ b/src/xml/scala/xml/parsing/TokenTests.scala @@ -0,0 +1,101 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package parsing + +/** + * Helper functions for parsing XML fragments + */ +trait TokenTests { + + /** {{{ + * (#x20 | #x9 | #xD | #xA) + * }}} */ + final def isSpace(ch: Char): Boolean = ch match { + case '\u0009' | '\u000A' | '\u000D' | '\u0020' => true + case _ => false + } + /** {{{ + * (#x20 | #x9 | #xD | #xA)+ + * }}} */ + final def isSpace(cs: Seq[Char]): Boolean = cs.nonEmpty && (cs forall isSpace) + + /** These are 99% sure to be redundant but refactoring on the safe side. */ + def isAlpha(c: Char) = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') + def isAlphaDigit(c: Char) = isAlpha(c) || (c >= '0' && c <= '9') + + /** {{{ + * NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' + * | CombiningChar | Extender + * }}} + * See [4] and Appendix B of XML 1.0 specification. + */ + def isNameChar(ch: Char) = { + import java.lang.Character._ + // The constants represent groups Mc, Me, Mn, Lm, and Nd. + + isNameStart(ch) || (getType(ch).toByte match { + case COMBINING_SPACING_MARK | + ENCLOSING_MARK | NON_SPACING_MARK | + MODIFIER_LETTER | DECIMAL_DIGIT_NUMBER => true + case _ => ".-:" contains ch + }) + } + + /** {{{ + * NameStart ::= ( Letter | '_' ) + * }}} + * where Letter means in one of the Unicode general + * categories `{ Ll, Lu, Lo, Lt, Nl }`. + * + * We do not allow a name to start with `:`. + * See [3] and Appendix B of XML 1.0 specification + */ + def isNameStart(ch: Char) = { + import java.lang.Character._ + + getType(ch).toByte match { + case LOWERCASE_LETTER | + UPPERCASE_LETTER | OTHER_LETTER | + TITLECASE_LETTER | LETTER_NUMBER => true + case _ => ch == '_' + } + } + + /** {{{ + * Name ::= ( Letter | '_' ) (NameChar)* + * }}} + * See [5] of XML 1.0 specification. + */ + def isName(s: String) = + s.nonEmpty && isNameStart(s.head) && (s.tail forall isNameChar) + + def isPubIDChar(ch: Char): Boolean = + isAlphaDigit(ch) || (isSpace(ch) && ch != '\u0009') || + ("""-\()+,./:=?;!*#@$_%""" contains ch) + + /** + * Returns `true` if the encoding name is a valid IANA encoding. + * This method does not verify that there is a decoder available + * for this encoding, only that the characters are valid for an + * IANA encoding name. + * + * @param ianaEncoding The IANA encoding name. + */ + def isValidIANAEncoding(ianaEncoding: Seq[Char]) = { + def charOK(c: Char) = isAlphaDigit(c) || ("._-" contains c) + + ianaEncoding.nonEmpty && isAlpha(ianaEncoding.head) && + (ianaEncoding.tail forall charOK) + } + + def checkSysID(s: String) = List('"', '\'') exists (c => !(s contains c)) + def checkPubID(s: String) = s forall isPubIDChar +} diff --git a/src/xml/scala/xml/parsing/ValidatingMarkupHandler.scala b/src/xml/scala/xml/parsing/ValidatingMarkupHandler.scala new file mode 100644 index 0000000000..1b20901249 --- /dev/null +++ b/src/xml/scala/xml/parsing/ValidatingMarkupHandler.scala @@ -0,0 +1,104 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package parsing + +import scala.xml.dtd._ + +abstract class ValidatingMarkupHandler extends MarkupHandler { + + var rootLabel:String = _ + var qStack: List[Int] = Nil + var qCurrent: Int = -1 + + var declStack: List[ElemDecl] = Nil + var declCurrent: ElemDecl = null + + final override val isValidating = true + + override def endDTD(n:String) = { + rootLabel = n + } + override def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope:NamespaceBinding) { + + def advanceDFA(dm:DFAContentModel) = { + val trans = dm.dfa.delta(qCurrent) + log("advanceDFA(dm): " + dm) + log("advanceDFA(trans): " + trans) + trans.get(ContentModel.ElemName(label)) match { + case Some(qNew) => qCurrent = qNew + case _ => reportValidationError(pos, "DTD says, wrong element, expected one of "+trans.keys) + } + } + // advance in current automaton + log("[qCurrent = "+qCurrent+" visiting "+label+"]") + + if (qCurrent == -1) { // root + log(" checking root") + if (label != rootLabel) + reportValidationError(pos, "this element should be "+rootLabel) + } else { + log(" checking node") + declCurrent.contentModel match { + case ANY => + case EMPTY => + reportValidationError(pos, "DTD says, no elems, no text allowed here") + case PCDATA => + reportValidationError(pos, "DTD says, no elements allowed here") + case m @ MIXED(r) => + advanceDFA(m) + case e @ ELEMENTS(r) => + advanceDFA(e) + } + } + // push state, decl + qStack = qCurrent :: qStack + declStack = declCurrent :: declStack + + declCurrent = lookupElemDecl(label) + qCurrent = 0 + log(" done now") + } + + override def elemEnd(pos: Int, pre: String, label: String) { + log(" elemEnd") + qCurrent = qStack.head + qStack = qStack.tail + declCurrent = declStack.head + declStack = declStack.tail + log(" qCurrent now" + qCurrent) + log(" declCurrent now" + declCurrent) + } + + final override def elemDecl(name: String, cmstr: String) { + decls = ElemDecl(name, ContentModel.parse(cmstr)) :: decls + } + + final override def attListDecl(name: String, attList: List[AttrDecl]) { + decls = AttListDecl(name, attList) :: decls + } + + final override def unparsedEntityDecl(name: String, extID: ExternalID, notat: String) { + decls = UnparsedEntityDecl(name, extID, notat) :: decls + } + + final override def notationDecl(notat: String, extID: ExternalID) { + decls = NotationDecl(notat, extID) :: decls + } + + final override def peReference(name: String) { + decls = PEReference(name) :: decls + } + + /** report a syntax error */ + def reportValidationError(pos: Int, str: String): Unit +} diff --git a/src/xml/scala/xml/parsing/XhtmlEntities.scala b/src/xml/scala/xml/parsing/XhtmlEntities.scala new file mode 100644 index 0000000000..3683af202c --- /dev/null +++ b/src/xml/scala/xml/parsing/XhtmlEntities.scala @@ -0,0 +1,54 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package parsing + +import scala.xml.dtd.{ IntDef, ParsedEntityDecl } + +/** + * @author (c) David Pollak 2007 WorldWide Conferencing, LLC. + * + */ +object XhtmlEntities { + val entList = List(("quot",34), ("amp",38), ("lt",60), ("gt",62), ("nbsp",160), ("iexcl",161), ("cent",162), ("pound",163), ("curren",164), ("yen",165), + ("euro",8364), ("brvbar",166), ("sect",167), ("uml",168), ("copy",169), ("ordf",170), ("laquo",171), ("shy",173), ("reg",174), ("trade",8482), + ("macr",175), ("deg",176), ("plusmn",177), ("sup2",178), ("sup3",179), ("acute",180), ("micro",181), ("para",182), ("middot",183), ("cedil",184), + ("sup1",185), ("ordm",186), ("raquo",187), ("frac14",188), ("frac12",189), ("frac34",190), ("iquest",191), ("times",215), ("divide",247), + ("Agrave",192), ("Aacute",193), ("Acirc",194), ("Atilde",195), ("Auml",196), ("Aring",197), ("AElig",198), ("Ccedil",199), ("Egrave",200), + ("Eacute",201), ("Ecirc",202), ("Euml",203), ("Igrave",204), ("Iacute",205), ("Icirc",206), ("Iuml",207), ("ETH",208), ("Ntilde",209), + ("Ograve",210), ("Oacute",211), ("Ocirc",212), ("Otilde",213), ("Ouml",214), ("Oslash",216), ("Ugrave",217), ("Uacute",218), ("Ucirc",219), + ("Uuml",220), ("Yacute",221), ("THORN",222), ("szlig",223), ("agrave",224), ("aacute",225), ("acirc",226), ("atilde",227), ("auml",228), + ("aring",229), ("aelig",230), ("ccedil",231), ("egrave",232), ("eacute",233), ("ecirc",234), ("euml",235), ("igrave",236), ("iacute",237), + ("icirc",238), ("iuml",239), ("eth",240), ("ntilde",241), ("ograve",242), ("oacute",243), ("ocirc",244), ("otilde",245), ("ouml",246), + ("oslash",248), ("ugrave",249), ("uacute",250), ("ucirc",251), ("uuml",252), ("yacute",253), ("thorn",254), ("yuml",255), ("OElig",338), + ("oelig",339), ("Scaron",352), ("scaron",353), ("Yuml",376), ("circ",710), ("ensp",8194), ("emsp",8195), ("zwnj",204), ("zwj",8205), ("lrm",8206), + ("rlm",8207), ("ndash",8211), ("mdash",8212), ("lsquo",8216), ("rsquo",8217), ("sbquo",8218), ("ldquo",8220), ("rdquo",8221), ("bdquo",8222), + ("dagger",8224), ("Dagger",8225), ("permil",8240), ("lsaquo",8249), ("rsaquo",8250), ("fnof",402), ("bull",8226), ("hellip",8230), ("prime",8242), + ("Prime",8243), ("oline",8254), ("frasl",8260), ("weierp",8472), ("image",8465), ("real",8476), ("alefsym",8501), ("larr",8592), ("uarr",8593), + ("rarr",8594), ("darr",8495), ("harr",8596), ("crarr",8629), ("lArr",8656), ("uArr",8657), ("rArr",8658), ("dArr",8659), ("hArr",8660), + ("forall",8704), ("part",8706), ("exist",8707), ("empty",8709), ("nabla",8711), ("isin",8712), ("notin",8713), ("ni",8715), ("prod",8719), + ("sum",8721), ("minus",8722), ("lowast",8727), ("radic",8730), ("prop",8733), ("infin",8734), ("ang",8736), ("and",8743), ("or",8744), + ("cap",8745), ("cup",8746), ("int",8747), ("there4",8756), ("sim",8764), ("cong",8773), ("asymp",8776), ("ne",8800), ("equiv",8801), ("le",8804), + ("ge",8805), ("sub",8834), ("sup",8835), ("nsub",8836), ("sube",8838), ("supe",8839), ("oplus",8853), ("otimes",8855), ("perp",8869), ("sdot",8901), + ("lceil",8968), ("rceil",8969), ("lfloor",8970), ("rfloor",8971), ("lang",9001), ("rang",9002), ("loz",9674), ("spades",9824), ("clubs",9827), + ("hearts",9829), ("diams",9830), ("Alpha",913), ("Beta",914), ("Gamma",915), ("Delta",916), ("Epsilon",917), ("Zeta",918), ("Eta",919), + ("Theta",920), ("Iota",921), ("Kappa",922), ("Lambda",923), ("Mu",924), ("Nu",925), ("Xi",926), ("Omicron",927), ("Pi",928), ("Rho",929), + ("Sigma",931), ("Tau",932), ("Upsilon",933), ("Phi",934), ("Chi",935), ("Psi",936), ("Omega",937), ("alpha",945), ("beta",946), ("gamma",947), + ("delta",948), ("epsilon",949), ("zeta",950), ("eta",951), ("theta",952), ("iota",953), ("kappa",954), ("lambda",955), ("mu",956), ("nu",957), + ("xi",958), ("omicron",959), ("pi",960), ("rho",961), ("sigmaf",962), ("sigma",963), ("tau",964), ("upsilon",965), ("phi",966), ("chi",967), + ("psi",968), ("omega",969), ("thetasym",977), ("upsih",978), ("piv",982)) + + val entMap: Map[String, Char] = Map.empty[String, Char] ++ entList.map { case (name, value) => (name, value.toChar)} + + val entities = entList. + map { case (name, value) => (name, new ParsedEntityDecl(name, new IntDef(value.toChar.toString)))} + + def apply() = entities +} diff --git a/src/xml/scala/xml/parsing/XhtmlParser.scala b/src/xml/scala/xml/parsing/XhtmlParser.scala new file mode 100644 index 0000000000..6ce5bec8d0 --- /dev/null +++ b/src/xml/scala/xml/parsing/XhtmlParser.scala @@ -0,0 +1,31 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package parsing + +import scala.io.Source + +/** An XML Parser that preserves `CDATA` blocks and knows about + * [[scala.xml.parsing.XhtmlEntities]]. + * + * @author (c) David Pollak, 2007 WorldWide Conferencing, LLC. + */ +class XhtmlParser(val input: Source) extends ConstructingHandler with MarkupParser with ExternalSources { + val preserveWS = true + ent ++= XhtmlEntities() +} + +/** Convenience method that instantiates, initializes and runs an `XhtmlParser`. + * + * @author Burak Emir + */ +object XhtmlParser { + def apply(source: Source): NodeSeq = new XhtmlParser(source).initialize.document() +} diff --git a/src/xml/scala/xml/persistent/CachedFileStorage.scala b/src/xml/scala/xml/persistent/CachedFileStorage.scala new file mode 100644 index 0000000000..a1489ef3f4 --- /dev/null +++ b/src/xml/scala/xml/persistent/CachedFileStorage.scala @@ -0,0 +1,129 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package persistent + +import java.io.{ File, FileOutputStream } +import java.nio.ByteBuffer +import java.nio.channels.Channels +import java.lang.Thread + +import scala.collection.Iterator + +/** Mutable storage of immutable xml trees. Everything is kept in memory, + * with a thread periodically checking for changes and writing to file. + * + * To ensure atomicity, two files are used, `filename1` and `'$'+filename1`. + * The implementation switches between the two, deleting the older one + * after a complete dump of the database has been written. + * + * @author Burak Emir + */ +abstract class CachedFileStorage(private val file1: File) extends Thread { + + private val file2 = new File(file1.getParent, file1.getName+"$") + + /** Either equals `file1` or `file2`, references the next file in which + * updates will be stored. + */ + private var theFile: File = null + + private def switch() = { theFile = if (theFile == file1) file2 else file1; } + + /** this storage modified since last modification check */ + protected var dirty = false + + /** period between modification checks, in milliseconds */ + protected val interval = 1000 + + /** finds and loads the storage file. subclasses should call this method + * prior to any other, but only once, to obtain the initial sequence of nodes. + */ + protected def initialNodes: Iterator[Node] = (file1.exists, file2.exists) match { + case (false,false) => + theFile = file1 + Iterator.empty + case (true, true ) if (file1.lastModified < file2.lastModified) => + theFile = file2 + load + case (true, _ ) => + theFile = file1 + load + case _ => + theFile = file2 + load + } + + /** returns an iterator over the nodes in this storage */ + def nodes: Iterator[Node] + + /** adds a node, setting this.dirty to true as a side effect */ + def += (e: Node): Unit + + /** removes a tree, setting this.dirty to true as a side effect */ + def -= (e: Node): Unit + + /* loads and parses XML from file */ + private def load: Iterator[Node] = { + import scala.io.Source + import scala.xml.parsing.ConstructingParser + log("[load]\nloading "+theFile) + val src = Source.fromFile(theFile) + log("parsing "+theFile) + val res = ConstructingParser.fromSource(src,preserveWS = false).document.docElem(0) + switch() + log("[load done]") + res.child.iterator + } + + /** saves the XML to file */ + private def save() = if (this.dirty) { + log("[save]\ndeleting "+theFile) + theFile.delete() + log("creating new "+theFile) + theFile.createNewFile() + val fos = new FileOutputStream(theFile) + val c = fos.getChannel() + + // @todo: optimize + val storageNode = { nodes.toList } + val w = Channels.newWriter(c, "utf-8") + XML.write(w, storageNode, "utf-8", xmlDecl = true, doctype = null) + + log("writing to "+theFile) + + w.close + c.close + fos.close + dirty = false + switch() + log("[save done]") + } + + /** Run method of the thread. remember to use `start()` to start a thread, + * not `run`. */ + override def run = { + log("[run]\nstarting storage thread, checking every "+interval+" ms") + while (true) { + Thread.sleep( this.interval.toLong ) + save() + } + } + + /** Force writing of contents to the file, even if there has not been any + * update. */ + def flush() = { + this.dirty = true + save() + } + + @deprecated("This method and its usages will be removed. Use a debugger to debug code.", "2.11") + def log(msg: String): Unit = {} +} diff --git a/src/xml/scala/xml/persistent/Index.scala b/src/xml/scala/xml/persistent/Index.scala new file mode 100644 index 0000000000..9ee45e7086 --- /dev/null +++ b/src/xml/scala/xml/persistent/Index.scala @@ -0,0 +1,17 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package persistent + +/** an Index returns some unique key that is part of a node + */ +abstract class Index[A] extends Function1[Node,A] {} diff --git a/src/xml/scala/xml/persistent/SetStorage.scala b/src/xml/scala/xml/persistent/SetStorage.scala new file mode 100644 index 0000000000..8db56a2e71 --- /dev/null +++ b/src/xml/scala/xml/persistent/SetStorage.scala @@ -0,0 +1,42 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package xml +package persistent + +import scala.collection.mutable +import java.io.File + +/** A persistent store with set semantics. This class allows to add and remove + * trees, but never contains two structurally equal trees. + * + * @author Burak Emir + */ +class SetStorage(file: File) extends CachedFileStorage(file) { + + private val theSet = mutable.HashSet[Node]() + + // initialize + + { + val it = super.initialNodes + dirty = it.hasNext + theSet ++= it + } + + /* forwarding methods to hashset*/ + + def += (e: Node): Unit = synchronized { this.dirty = true; theSet += e } + + def -= (e: Node): Unit = synchronized { this.dirty = true; theSet -= e } + + def nodes = synchronized { theSet.iterator } + +} diff --git a/src/xml/scala/xml/pull/XMLEvent.scala b/src/xml/scala/xml/pull/XMLEvent.scala new file mode 100644 index 0000000000..3beb3648e7 --- /dev/null +++ b/src/xml/scala/xml/pull/XMLEvent.scala @@ -0,0 +1,60 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package pull + +/** An XML event for pull parsing. All events received during + * parsing will be one of the subclasses of this trait. + */ +trait XMLEvent + +/** + * An Element's start tag was encountered. + * @param pre prefix, if any, on the element. This is the `xs` in `foo`. + * @param label the name of the element, not including the prefix + * @param attrs any attributes on the element + */ +case class EvElemStart(pre: String, label: String, attrs: MetaData, scope: NamespaceBinding) extends XMLEvent + +/** + * An Element's end tag was encountered. + * @param pre prefix, if any, on the element. This is the `xs` in `foo`. + * @param label the name of the element, not including the prefix + */ +case class EvElemEnd(pre: String, label: String) extends XMLEvent + +/** + * A text node was encountered. + * @param text the text that was found + */ +case class EvText(text: String) extends XMLEvent + +/** An entity reference was encountered. + * @param entity the name of the entity, e.g. `gt` when encountering the entity `>` + */ +case class EvEntityRef(entity: String) extends XMLEvent + +/** + * A processing instruction was encountered. + * @param target the "PITarget" of the processing instruction. For the instruction ``, the target would + * be `foo` + * @param text the remainder of the instruction. For the instruction ``, the text would + * be `bar="baz"` + * @see [[http://www.w3.org/TR/REC-xml/#sec-pi]] + */ +case class EvProcInstr(target: String, text: String) extends XMLEvent + +/** + * A comment was encountered + * @param text the text of the comment + */ +case class EvComment(text: String) extends XMLEvent diff --git a/src/xml/scala/xml/pull/XMLEventReader.scala b/src/xml/scala/xml/pull/XMLEventReader.scala new file mode 100755 index 0000000000..76e51e17fd --- /dev/null +++ b/src/xml/scala/xml/pull/XMLEventReader.scala @@ -0,0 +1,157 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package pull + +import scala.io.Source +import java.lang.Thread +import java.util.concurrent.LinkedBlockingQueue +import java.nio.channels.ClosedChannelException +import scala.xml.parsing.{ ExternalSources, MarkupHandler, MarkupParser } + +/** + * Main entry point into creating an event-based XML parser. Treating this + * as a [[scala.collection.Iterator]] will provide access to the generated events. + * @param src A [[scala.io.Source]] for XML data to parse + * + * @author Burak Emir + * @author Paul Phillips + */ +class XMLEventReader(src: Source) +extends scala.collection.AbstractIterator[XMLEvent] + with ProducerConsumerIterator[XMLEvent] { + + // We implement a pull parser as an iterator, but since we may be operating on + // a stream (e.g. XML over a network) there may be arbitrarily long periods when + // the queue is empty. Fortunately the ProducerConsumerIterator is ideally + // suited to this task, possibly because it was written for use by this class. + + // to override as necessary + val preserveWS = true + + override val MaxQueueSize = 1000 + protected case object POISON extends XMLEvent + val EndOfStream = POISON + + // thread machinery + private[this] val parser = new Parser(src) + private[this] val parserThread = new Thread(parser, "XMLEventReader") + parserThread.start + // enqueueing the poison object is the reliable way to cause the + // iterator to terminate; hasNext will return false once it sees it. + // Calling interrupt() on the parserThread is the only way we can get + // it to stop producing tokens since it's lost deep in document() - + // we cross our fingers the interrupt() gets to its target, but if it + // fails for whatever reason the iterator correctness is not impacted, + // only performance (because it will finish the entire XML document, + // or at least as much as it can fit in the queue.) + def stop() = { + produce(POISON) + parserThread.interrupt() + } + + private class Parser(val input: Source) extends MarkupHandler with MarkupParser with ExternalSources with Runnable { + val preserveWS = XMLEventReader.this.preserveWS + // track level for elem memory usage optimization + private var level = 0 + + // this is Parser's way to add to the queue - the odd return type + // is to conform to MarkupHandler's interface + def setEvent(es: XMLEvent*): NodeSeq = { + es foreach produce + NodeSeq.Empty + } + + override def elemStart(pos: Int, pre: String, label: String, attrs: MetaData, scope: NamespaceBinding) { + level += 1 + setEvent(EvElemStart(pre, label, attrs, scope)) + } + override def elemEnd(pos: Int, pre: String, label: String) { + setEvent(EvElemEnd(pre, label)) + level -= 1 + } + + // this is a dummy to satisfy MarkupHandler's API + // memory usage optimization return one for top level to satisfy + // MarkupParser.document() otherwise NodeSeq.Empty + private var ignoreWritten = false + final def elem(pos: Int, pre: String, label: String, attrs: MetaData, pscope: NamespaceBinding, empty: Boolean, nodes: NodeSeq): NodeSeq = + if (level == 1 && !ignoreWritten) {ignoreWritten = true; } else NodeSeq.Empty + + def procInstr(pos: Int, target: String, txt: String) = setEvent(EvProcInstr(target, txt)) + def comment(pos: Int, txt: String) = setEvent(EvComment(txt)) + def entityRef(pos: Int, n: String) = setEvent(EvEntityRef(n)) + def text(pos: Int, txt:String) = setEvent(EvText(txt)) + + override def run() { + curInput = input + interruptibly { this.initialize.document() } + setEvent(POISON) + } + } +} + +// An iterator designed for one or more producers to generate +// elements, and a single consumer to iterate. Iteration will continue +// until closeIterator() is called, after which point producers +// calling produce() will receive interruptions. +// +// Since hasNext may block indefinitely if nobody is producing, +// there is also an available() method which will return true if +// the next call hasNext is guaranteed not to block. +// +// This is not thread-safe for multiple consumers! +trait ProducerConsumerIterator[T >: Null] extends Iterator[T] { + // abstract - iterator-specific distinguished object for marking eos + val EndOfStream: T + + // defaults to unbounded - override to positive Int if desired + val MaxQueueSize = -1 + + def interruptibly[T](body: => T): Option[T] = try Some(body) catch { + case _: InterruptedException => Thread.currentThread.interrupt(); None + case _: ClosedChannelException => None + } + + private[this] lazy val queue = + if (MaxQueueSize < 0) new LinkedBlockingQueue[T]() + else new LinkedBlockingQueue[T](MaxQueueSize) + private[this] var buffer: T = _ + private def fillBuffer() = { + buffer = interruptibly(queue.take) getOrElse EndOfStream + isElement(buffer) + } + private def isElement(x: T) = x != null && x != EndOfStream + private def eos() = buffer == EndOfStream + + // public producer interface - this is the only method producers call, so + // LinkedBlockingQueue's synchronization is all we need. + def produce(x: T): Unit = if (!eos) interruptibly(queue put x) + + // consumer/iterator interface - we need not synchronize access to buffer + // because we required there to be only one consumer. + def hasNext = !eos && (buffer != null || fillBuffer) + + def next() = { + if (eos()) throw new NoSuchElementException("ProducerConsumerIterator") + if (buffer == null) fillBuffer() + + drainBuffer() + } + + def available() = isElement(buffer) || isElement(queue.peek) + + private def drainBuffer() = { + assert(!eos) + val res = buffer + buffer = null + res + } +} diff --git a/src/xml/scala/xml/pull/package.scala b/src/xml/scala/xml/pull/package.scala new file mode 100644 index 0000000000..0e3019446b --- /dev/null +++ b/src/xml/scala/xml/pull/package.scala @@ -0,0 +1,42 @@ +package scala +package xml + +/** + * Classes needed to view an XML document as a series of events. The document + * is parsed by an [[scala.xml.pull.XMLEventReader]] instance. You can treat it as + * an [[scala.collection.Iterator]] to retrieve the events, which are all + * subclasses of [[scala.xml.pull.XMLEvent]]. + * + * {{{ + * scala> val source = Source.fromString(""" + * + * + * ]>Hello&bar;>""") + * + * source: scala.io.Source = non-empty iterator + * + * scala> val reader = new XMLEventReader(source) + * reader: scala.xml.pull.XMLEventReader = non-empty iterator + * + * scala> reader.foreach{ println(_) } + * EvProcInstr(instruction,custom value="customvalue") + * EvText( + * ) + * EvElemStart(null,foo,,) + * EvText(Hello) + * EvComment( this is a comment ) + * EvElemStart(null,bar,,) + * EvText(BAR) + * EvElemEnd(null,bar) + * EvElemStart(null,bar,,) + * EvEntityRef(gt) + * EvElemEnd(null,bar) + * EvElemEnd(null,foo) + * EvText( + * + * ) + * + * }}} + */ +package object pull diff --git a/src/xml/scala/xml/transform/BasicTransformer.scala b/src/xml/scala/xml/transform/BasicTransformer.scala new file mode 100644 index 0000000000..c98339fd67 --- /dev/null +++ b/src/xml/scala/xml/transform/BasicTransformer.scala @@ -0,0 +1,60 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package transform + +/** A class for XML transformations. + * + * @author Burak Emir + * @version 1.0 + */ +abstract class BasicTransformer extends Function1[Node,Node] +{ + protected def unchanged(n: Node, ns: Seq[Node]) = + ns.length == 1 && (ns.head == n) + + /** Call transform(Node) for each node in ns, append results + * to NodeBuffer. + */ + def transform(it: Iterator[Node], nb: NodeBuffer): Seq[Node] = + it.foldLeft(nb)(_ ++= transform(_)).toSeq + + /** Call transform(Node) to each node in ns, yield ns if nothing changes, + * otherwise a new sequence of concatenated results. + */ + def transform(ns: Seq[Node]): Seq[Node] = { + val (xs1, xs2) = ns span (n => unchanged(n, transform(n))) + + if (xs2.isEmpty) ns + else xs1 ++ transform(xs2.head) ++ transform(xs2.tail) + } + + def transform(n: Node): Seq[Node] = { + if (n.doTransform) n match { + case Group(xs) => Group(transform(xs)) // un-group the hack Group tag + case _ => + val ch = n.child + val nch = transform(ch) + + if (ch eq nch) n + else Elem(n.prefix, n.label, n.attributes, n.scope, nch: _*) + } + else n + } + + def apply(n: Node): Node = { + val seq = transform(n) + if (seq.length > 1) + throw new UnsupportedOperationException("transform must return single node for root") + else seq.head + } +} diff --git a/src/xml/scala/xml/transform/RewriteRule.scala b/src/xml/scala/xml/transform/RewriteRule.scala new file mode 100644 index 0000000000..1399ee538d --- /dev/null +++ b/src/xml/scala/xml/transform/RewriteRule.scala @@ -0,0 +1,28 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package xml +package transform + +/** A RewriteRule, when applied to a term, yields either + * the result of rewriting the term or the term itself if the rule + * is not applied. + * + * @author Burak Emir + * @version 1.0 + */ +abstract class RewriteRule extends BasicTransformer { + /** a name for this rewrite rule */ + val name = this.toString() + override def transform(ns: Seq[Node]): Seq[Node] = super.transform(ns) + override def transform(n: Node): Seq[Node] = n +} + diff --git a/src/xml/scala/xml/transform/RuleTransformer.scala b/src/xml/scala/xml/transform/RuleTransformer.scala new file mode 100644 index 0000000000..3a222ba759 --- /dev/null +++ b/src/xml/scala/xml/transform/RuleTransformer.scala @@ -0,0 +1,16 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2002-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package xml +package transform + +class RuleTransformer(rules: RewriteRule*) extends BasicTransformer { + override def transform(n: Node): Seq[Node] = + rules.foldLeft(super.transform(n)) { (res, rule) => rule transform res } +} diff --git a/test/partest b/test/partest index 99a731a49b..d72c1026f3 100755 --- a/test/partest +++ b/test/partest @@ -64,7 +64,7 @@ if [ -z "$EXT_CLASSPATH" ] ; then fi done elif [ -f "$SCALA_HOME/build/pack/lib/scala-partest.jar" ] ; then - for lib in `echo "scala-partest scala-library scala-reflect scala-compiler diffutils"`; do + for lib in `echo "scala-partest scala-library scala-xml scala-reflect scala-compiler diffutils"`; do ext="$SCALA_HOME/build/pack/lib/$lib.jar" if [ -z "$EXT_CLASSPATH" ] ; then EXT_CLASSPATH="$ext" -- cgit v1.2.3 From 46a4635d3acc0a18869131879e6cde862d6b9776 Mon Sep 17 00:00:00 2001 From: Adriaan Moors Date: Mon, 24 Jun 2013 16:41:53 -0700 Subject: Spin off parser combinators to scala-parser-combinators.jar. --- .gitignore | 2 +- build.xml | 44 +- src/build/bnd/scala-parser-combinators.bnd | 5 + src/build/maven/maven-deploy.xml | 1 + src/build/maven/scala-parser-combinators-pom.xml | 59 ++ src/build/pack.xml | 5 + .../parsing/combinator/ImplicitConversions.scala | 43 - .../util/parsing/combinator/JavaTokenParsers.scala | 62 -- .../util/parsing/combinator/PackratParsers.scala | 312 ------- .../scala/util/parsing/combinator/Parsers.scala | 919 --------------------- .../util/parsing/combinator/RegexParsers.scala | 166 ---- .../util/parsing/combinator/lexical/Lexical.scala | 40 - .../util/parsing/combinator/lexical/Scanners.scala | 63 -- .../parsing/combinator/lexical/StdLexical.scala | 87 -- .../syntactical/StandardTokenParsers.scala | 32 - .../combinator/syntactical/StdTokenParsers.scala | 52 -- .../combinator/syntactical/TokenParsers.scala | 35 - .../util/parsing/combinator/token/StdTokens.scala | 39 - .../util/parsing/combinator/token/Tokens.scala | 43 - .../scala/util/parsing/input/CharArrayReader.scala | 35 - .../util/parsing/input/CharSequenceReader.scala | 66 -- .../scala/util/parsing/input/NoPosition.scala | 25 - .../scala/util/parsing/input/OffsetPosition.scala | 73 -- .../scala/util/parsing/input/PagedSeqReader.scala | 71 -- .../scala/util/parsing/input/Position.scala | 62 -- .../scala/util/parsing/input/Positional.scala | 30 - src/library/scala/util/parsing/input/Reader.scala | 62 -- .../scala/util/parsing/input/StreamReader.scala | 76 -- src/library/scala/util/parsing/json/JSON.scala | 97 --- src/library/scala/util/parsing/json/Lexer.scala | 90 -- src/library/scala/util/parsing/json/Parser.scala | 147 ---- .../parsing/combinator/ImplicitConversions.scala | 43 + .../util/parsing/combinator/JavaTokenParsers.scala | 62 ++ .../util/parsing/combinator/PackratParsers.scala | 312 +++++++ .../scala/util/parsing/combinator/Parsers.scala | 919 +++++++++++++++++++++ .../util/parsing/combinator/RegexParsers.scala | 166 ++++ .../util/parsing/combinator/lexical/Lexical.scala | 40 + .../util/parsing/combinator/lexical/Scanners.scala | 63 ++ .../parsing/combinator/lexical/StdLexical.scala | 87 ++ .../syntactical/StandardTokenParsers.scala | 32 + .../combinator/syntactical/StdTokenParsers.scala | 52 ++ .../combinator/syntactical/TokenParsers.scala | 35 + .../util/parsing/combinator/token/StdTokens.scala | 39 + .../util/parsing/combinator/token/Tokens.scala | 43 + .../scala/util/parsing/input/CharArrayReader.scala | 35 + .../util/parsing/input/CharSequenceReader.scala | 66 ++ .../scala/util/parsing/input/NoPosition.scala | 25 + .../scala/util/parsing/input/OffsetPosition.scala | 73 ++ .../scala/util/parsing/input/PagedSeqReader.scala | 71 ++ .../scala/util/parsing/input/Position.scala | 62 ++ .../scala/util/parsing/input/Positional.scala | 30 + .../scala/util/parsing/input/Reader.scala | 62 ++ .../scala/util/parsing/input/StreamReader.scala | 76 ++ .../scala/util/parsing/json/JSON.scala | 97 +++ .../scala/util/parsing/json/Lexer.scala | 90 ++ .../scala/util/parsing/json/Parser.scala | 147 ++++ .../scala/tools/partest/nest/FileManager.scala | 3 +- test/partest | 2 +- 58 files changed, 2837 insertions(+), 2738 deletions(-) create mode 100644 src/build/bnd/scala-parser-combinators.bnd create mode 100644 src/build/maven/scala-parser-combinators-pom.xml delete mode 100644 src/library/scala/util/parsing/combinator/ImplicitConversions.scala delete mode 100644 src/library/scala/util/parsing/combinator/JavaTokenParsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/PackratParsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/Parsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/RegexParsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/lexical/Lexical.scala delete mode 100644 src/library/scala/util/parsing/combinator/lexical/Scanners.scala delete mode 100644 src/library/scala/util/parsing/combinator/lexical/StdLexical.scala delete mode 100644 src/library/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/syntactical/TokenParsers.scala delete mode 100644 src/library/scala/util/parsing/combinator/token/StdTokens.scala delete mode 100644 src/library/scala/util/parsing/combinator/token/Tokens.scala delete mode 100644 src/library/scala/util/parsing/input/CharArrayReader.scala delete mode 100644 src/library/scala/util/parsing/input/CharSequenceReader.scala delete mode 100644 src/library/scala/util/parsing/input/NoPosition.scala delete mode 100644 src/library/scala/util/parsing/input/OffsetPosition.scala delete mode 100644 src/library/scala/util/parsing/input/PagedSeqReader.scala delete mode 100644 src/library/scala/util/parsing/input/Position.scala delete mode 100644 src/library/scala/util/parsing/input/Positional.scala delete mode 100644 src/library/scala/util/parsing/input/Reader.scala delete mode 100644 src/library/scala/util/parsing/input/StreamReader.scala delete mode 100644 src/library/scala/util/parsing/json/JSON.scala delete mode 100644 src/library/scala/util/parsing/json/Lexer.scala delete mode 100644 src/library/scala/util/parsing/json/Parser.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/ImplicitConversions.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/JavaTokenParsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/PackratParsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/Parsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/RegexParsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/lexical/Lexical.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/lexical/Scanners.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/syntactical/TokenParsers.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/token/StdTokens.scala create mode 100644 src/parser-combinators/scala/util/parsing/combinator/token/Tokens.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/CharArrayReader.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/CharSequenceReader.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/NoPosition.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/OffsetPosition.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/PagedSeqReader.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/Position.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/Positional.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/Reader.scala create mode 100644 src/parser-combinators/scala/util/parsing/input/StreamReader.scala create mode 100644 src/parser-combinators/scala/util/parsing/json/JSON.scala create mode 100644 src/parser-combinators/scala/util/parsing/json/Lexer.scala create mode 100644 src/parser-combinators/scala/util/parsing/json/Parser.scala (limited to 'src') diff --git a/.gitignore b/.gitignore index 378eac25d3..84c048a73c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -build +/build/ diff --git a/build.xml b/build.xml index 854bb9c68b..6906c15a19 100755 --- a/build.xml +++ b/build.xml @@ -472,7 +472,7 @@ TODO: There must be a variable of the shape @{stage}.@{project}.build.path for all @{stage} in locker, quick, strap and all @{project} in library, reflect, compiler - when stage is quick, @{project} also includes: actors, repl, xml, swing, plugins, scalacheck, interactive, scaladoc, partest, scalap + when stage is quick, @{project} also includes: actors, parser-combinators, xml, repl, swing, plugins, scalacheck, interactive, scaladoc, partest, scalap --> @@ -510,6 +510,11 @@ TODO: + + + + + @@ -545,6 +550,7 @@ TODO: + @@ -566,6 +572,7 @@ TODO: + @@ -577,6 +584,7 @@ TODO: + @@ -596,6 +604,8 @@ TODO: + + @@ -622,7 +632,8 @@ TODO: - + + @@ -663,6 +674,7 @@ TODO: + @@ -689,6 +701,7 @@ TODO: + @@ -718,6 +731,7 @@ TODO: + @@ -1162,6 +1176,9 @@ TODO: + + + @@ -1171,7 +1188,7 @@ TODO: - + @@ -1180,7 +1197,7 @@ TODO: - + @@ -1225,7 +1242,7 @@ TODO: - + @@ -1240,7 +1257,8 @@ TODO: - + + @@ -1277,7 +1295,7 @@ TODO: - + @@ -1381,6 +1399,7 @@ TODO: + @@ -1707,6 +1726,12 @@ TODO: + + + + + + @@ -1775,7 +1800,7 @@ TODO: - + "+ r) - r - } - - /** A parser generator for repetitions. - * - * `rep(p)` repeatedly uses `p` to parse the input until `p` fails - * (the result is a List of the consecutive results of `p`). - * - * @param p a `Parser` that is to be applied successively to the input - * @return A parser that returns a list of results produced by repeatedly applying `p` to the input. - */ - def rep[T](p: => Parser[T]): Parser[List[T]] = rep1(p) | success(List()) - - /** A parser generator for interleaved repetitions. - * - * `repsep(p, q)` repeatedly uses `p` interleaved with `q` to parse the input, until `p` fails. - * (The result is a `List` of the results of `p`.) - * - * Example: `repsep(term, ",")` parses a comma-separated list of term's, yielding a list of these terms. - * - * @param p a `Parser` that is to be applied successively to the input - * @param q a `Parser` that parses the elements that separate the elements parsed by `p` - * @return A parser that returns a list of results produced by repeatedly applying `p` (interleaved with `q`) to the input. - * The results of `p` are collected in a list. The results of `q` are discarded. - */ - def repsep[T](p: => Parser[T], q: => Parser[Any]): Parser[List[T]] = - rep1sep(p, q) | success(List()) - - /** A parser generator for non-empty repetitions. - * - * `rep1(p)` repeatedly uses `p` to parse the input until `p` fails -- `p` must succeed at least - * once (the result is a `List` of the consecutive results of `p`) - * - * @param p a `Parser` that is to be applied successively to the input - * @return A parser that returns a list of results produced by repeatedly applying `p` to the input - * (and that only succeeds if `p` matches at least once). - */ - def rep1[T](p: => Parser[T]): Parser[List[T]] = rep1(p, p) - - /** A parser generator for non-empty repetitions. - * - * `rep1(f, p)` first uses `f` (which must succeed) and then repeatedly - * uses `p` to parse the input until `p` fails - * (the result is a `List` of the consecutive results of `f` and `p`) - * - * @param first a `Parser` that parses the first piece of input - * @param p0 a `Parser` that is to be applied successively to the rest of the input (if any) -- evaluated at most once, and only when necessary - * @return A parser that returns a list of results produced by first applying `f` and then - * repeatedly `p` to the input (it only succeeds if `f` matches). - */ - @migration("The `p0` call-by-name arguments is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") - def rep1[T](first: => Parser[T], p0: => Parser[T]): Parser[List[T]] = Parser { in => - lazy val p = p0 // lazy argument - val elems = new ListBuffer[T] - - def continue(in: Input): ParseResult[List[T]] = { - val p0 = p // avoid repeatedly re-evaluating by-name parser - @tailrec def applyp(in0: Input): ParseResult[List[T]] = p0(in0) match { - case Success(x, rest) => elems += x ; applyp(rest) - case e @ Error(_, _) => e // still have to propagate error - case _ => Success(elems.toList, in0) - } - - applyp(in) - } - - first(in) match { - case Success(x, rest) => elems += x ; continue(rest) - case ns: NoSuccess => ns - } - } - - /** A parser generator for a specified number of repetitions. - * - * `repN(n, p)` uses `p` exactly `n` time to parse the input - * (the result is a `List` of the `n` consecutive results of `p`). - * - * @param p a `Parser` that is to be applied successively to the input - * @param num the exact number of times `p` must succeed - * @return A parser that returns a list of results produced by repeatedly applying `p` to the input - * (and that only succeeds if `p` matches exactly `n` times). - */ - def repN[T](num: Int, p: => Parser[T]): Parser[List[T]] = - if (num == 0) success(Nil) else Parser { in => - val elems = new ListBuffer[T] - val p0 = p // avoid repeatedly re-evaluating by-name parser - - @tailrec def applyp(in0: Input): ParseResult[List[T]] = - if (elems.length == num) Success(elems.toList, in0) - else p0(in0) match { - case Success(x, rest) => elems += x ; applyp(rest) - case ns: NoSuccess => ns - } - - applyp(in) - } - - /** A parser generator for non-empty repetitions. - * - * `rep1sep(p, q)` repeatedly applies `p` interleaved with `q` to parse the - * input, until `p` fails. The parser `p` must succeed at least once. - * - * @param p a `Parser` that is to be applied successively to the input - * @param q a `Parser` that parses the elements that separate the elements parsed by `p` - * (interleaved with `q`) - * @return A parser that returns a list of results produced by repeatedly applying `p` to the input - * (and that only succeeds if `p` matches at least once). - * The results of `p` are collected in a list. The results of `q` are discarded. - */ - def rep1sep[T](p : => Parser[T], q : => Parser[Any]): Parser[List[T]] = - p ~ rep(q ~> p) ^^ {case x~y => x::y} - - /** A parser generator that, roughly, generalises the rep1sep generator so - * that `q`, which parses the separator, produces a left-associative - * function that combines the elements it separates. - * - * ''From: J. Fokker. Functional parsers. In J. Jeuring and E. Meijer, editors, Advanced Functional Programming, - * volume 925 of Lecture Notes in Computer Science, pages 1--23. Springer, 1995.'' - * - * @param p a parser that parses the elements - * @param q a parser that parses the token(s) separating the elements, yielding a left-associative function that - * combines two elements into one - */ - def chainl1[T](p: => Parser[T], q: => Parser[(T, T) => T]): Parser[T] - = chainl1(p, p, q) - - /** A parser generator that, roughly, generalises the `rep1sep` generator - * so that `q`, which parses the separator, produces a left-associative - * function that combines the elements it separates. - * - * @param first a parser that parses the first element - * @param p a parser that parses the subsequent elements - * @param q a parser that parses the token(s) separating the elements, - * yielding a left-associative function that combines two elements - * into one - */ - def chainl1[T, U](first: => Parser[T], p: => Parser[U], q: => Parser[(T, U) => T]): Parser[T] - = first ~ rep(q ~ p) ^^ { - case x ~ xs => xs.foldLeft(x: T){case (a, f ~ b) => f(a, b)} // x's type annotation is needed to deal with changed type inference due to SI-5189 - } - - /** A parser generator that generalises the `rep1sep` generator so that `q`, - * which parses the separator, produces a right-associative function that - * combines the elements it separates. Additionally, the right-most (last) - * element and the left-most combining function have to be supplied. - * - * rep1sep(p: Parser[T], q) corresponds to chainr1(p, q ^^ cons, cons, Nil) (where val cons = (x: T, y: List[T]) => x :: y) - * - * @param p a parser that parses the elements - * @param q a parser that parses the token(s) separating the elements, yielding a right-associative function that - * combines two elements into one - * @param combine the "last" (left-most) combination function to be applied - * @param first the "first" (right-most) element to be combined - */ - def chainr1[T, U](p: => Parser[T], q: => Parser[(T, U) => U], combine: (T, U) => U, first: U): Parser[U] - = p ~ rep(q ~ p) ^^ { - case x ~ xs => (new ~(combine, x) :: xs).foldRight(first){case (f ~ a, b) => f(a, b)} - } - - /** A parser generator for optional sub-phrases. - * - * `opt(p)` is a parser that returns `Some(x)` if `p` returns `x` and `None` if `p` fails. - * - * @param p A `Parser` that is tried on the input - * @return a `Parser` that always succeeds: either with the result provided by `p` or - * with the empty result - */ - def opt[T](p: => Parser[T]): Parser[Option[T]] = - p ^^ (x => Some(x)) | success(None) - - /** Wrap a parser so that its failures and errors become success and - * vice versa -- it never consumes any input. - */ - def not[T](p: => Parser[T]): Parser[Unit] = Parser { in => - p(in) match { - case Success(_, _) => Failure("Expected failure", in) - case _ => Success((), in) - } - } - - /** A parser generator for guard expressions. The resulting parser will - * fail or succeed just like the one given as parameter but it will not - * consume any input. - * - * @param p a `Parser` that is to be applied to the input - * @return A parser that returns success if and only if `p` succeeds but - * never consumes any input - */ - def guard[T](p: => Parser[T]): Parser[T] = Parser { in => - p(in) match{ - case s@ Success(s1,_) => Success(s1, in) - case e => e - } - } - - /** `positioned` decorates a parser's result with the start position of the - * input it consumed. - * - * @param p a `Parser` whose result conforms to `Positional`. - * @return A parser that has the same behaviour as `p`, but which marks its - * result with the start position of the input it consumed, - * if it didn't already have a position. - */ - def positioned[T <: Positional](p: => Parser[T]): Parser[T] = Parser { in => - p(in) match { - case Success(t, in1) => Success(if (t.pos == NoPosition) t setPos in.pos else t, in1) - case ns: NoSuccess => ns - } - } - - /** A parser generator delimiting whole phrases (i.e. programs). - * - * `phrase(p)` succeeds if `p` succeeds and no input is left over after `p`. - * - * @param p the parser that must consume all input for the resulting parser - * to succeed. - * @return a parser that has the same result as `p`, but that only succeeds - * if `p` consumed all the input. - */ - def phrase[T](p: Parser[T]) = new Parser[T] { - def apply(in: Input) = lastNoSuccessVar.withValue(None) { - p(in) match { - case s @ Success(out, in1) => - if (in1.atEnd) - s - else - lastNoSuccessVar.value filterNot { _.next.pos < in1.pos } getOrElse Failure("end of input expected", in1) - case ns => lastNoSuccessVar.value.getOrElse(ns) - } - } - } - - /** Given a concatenation with a repetition (list), move the concatenated element into the list */ - def mkList[T] = (_: ~[T, List[T]]) match { case x ~ xs => x :: xs } - - /** A wrapper over sequence of matches. - * - * Given `p1: Parser[A]` and `p2: Parser[B]`, a parser composed with - * `p1 ~ p2` will have type `Parser[~[A, B]]`. The successful result - * of the parser can be extracted from this case class. - * - * It also enables pattern matching, so something like this is possible: - * - * {{{ - * def concat(p1: Parser[String], p2: Parser[String]): Parser[String] = - * p1 ~ p2 ^^ { case a ~ b => a + b } - * }}} - */ - case class ~[+a, +b](_1: a, _2: b) { - override def toString = "("+ _1 +"~"+ _2 +")" - } - - /** A parser whose `~` combinator disallows back-tracking. - */ - trait OnceParser[+T] extends Parser[T] { - override def ~ [U](p: => Parser[U]): Parser[~[T, U]] - = OnceParser{ (for(a <- this; b <- commit(p)) yield new ~(a,b)).named("~") } - } -} diff --git a/src/library/scala/util/parsing/combinator/RegexParsers.scala b/src/library/scala/util/parsing/combinator/RegexParsers.scala deleted file mode 100644 index 8ebbc573ad..0000000000 --- a/src/library/scala/util/parsing/combinator/RegexParsers.scala +++ /dev/null @@ -1,166 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package util.parsing.combinator - -import java.util.regex.Pattern -import scala.util.matching.Regex -import scala.util.parsing.input._ -import scala.collection.immutable.PagedSeq -import scala.language.implicitConversions - -/** The ''most important'' differences between `RegexParsers` and - * [[scala.util.parsing.combinator.Parsers]] are: - * - * - `Elem` is defined to be [[scala.Char]] - * - There's an implicit conversion from [[java.lang.String]] to `Parser[String]`, - * so that string literals can be used as parser combinators. - * - There's an implicit conversion from [[scala.util.matching.Regex]] to `Parser[String]`, - * so that regex expressions can be used as parser combinators. - * - The parsing methods call the method `skipWhitespace` (defaults to `true`) and, if true, - * skip any whitespace before each parser is called. - * - Protected val `whiteSpace` returns a regex that identifies whitespace. - * - * For example, this creates a very simple calculator receiving `String` input: - * - * {{{ - * object Calculator extends RegexParsers { - * def number: Parser[Double] = """\d+(\.\d*)?""".r ^^ { _.toDouble } - * def factor: Parser[Double] = number | "(" ~> expr <~ ")" - * def term : Parser[Double] = factor ~ rep( "*" ~ factor | "/" ~ factor) ^^ { - * case number ~ list => (number /: list) { - * case (x, "*" ~ y) => x * y - * case (x, "/" ~ y) => x / y - * } - * } - * def expr : Parser[Double] = term ~ rep("+" ~ log(term)("Plus term") | "-" ~ log(term)("Minus term")) ^^ { - * case number ~ list => list.foldLeft(number) { // same as before, using alternate name for /: - * case (x, "+" ~ y) => x + y - * case (x, "-" ~ y) => x - y - * } - * } - * - * def apply(input: String): Double = parseAll(expr, input) match { - * case Success(result, _) => result - * case failure : NoSuccess => scala.sys.error(failure.msg) - * } - * } - * }}} - */ -trait RegexParsers extends Parsers { - - type Elem = Char - - protected val whiteSpace = """\s+""".r - - def skipWhitespace = whiteSpace.toString.length > 0 - - /** Method called to handle whitespace before parsers. - * - * It checks `skipWhitespace` and, if true, skips anything - * matching `whiteSpace` starting from the current offset. - * - * @param source The input being parsed. - * @param offset The offset into `source` from which to match. - * @return The offset to be used for the next parser. - */ - protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int = - if (skipWhitespace) - (whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match { - case Some(matched) => offset + matched.end - case None => offset - } - else - offset - - /** A parser that matches a literal string */ - implicit def literal(s: String): Parser[String] = new Parser[String] { - def apply(in: Input) = { - val source = in.source - val offset = in.offset - val start = handleWhiteSpace(source, offset) - var i = 0 - var j = start - while (i < s.length && j < source.length && s.charAt(i) == source.charAt(j)) { - i += 1 - j += 1 - } - if (i == s.length) - Success(source.subSequence(start, j).toString, in.drop(j - offset)) - else { - val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'" - Failure("`"+s+"' expected but "+found+" found", in.drop(start - offset)) - } - } - } - - /** A parser that matches a regex string */ - implicit def regex(r: Regex): Parser[String] = new Parser[String] { - def apply(in: Input) = { - val source = in.source - val offset = in.offset - val start = handleWhiteSpace(source, offset) - (r findPrefixMatchOf (source.subSequence(start, source.length))) match { - case Some(matched) => - Success(source.subSequence(start, start + matched.end).toString, - in.drop(start + matched.end - offset)) - case None => - val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'" - Failure("string matching regex `"+r+"' expected but "+found+" found", in.drop(start - offset)) - } - } - } - - /** `positioned` decorates a parser's result with the start position of the input it consumed. - * If whitespace is being skipped, then it is skipped before the start position is recorded. - * - * @param p a `Parser` whose result conforms to `Positional`. - * @return A parser that has the same behaviour as `p`, but which marks its result with the - * start position of the input it consumed after whitespace has been skipped, if it - * didn't already have a position. - */ - override def positioned[T <: Positional](p: => Parser[T]): Parser[T] = { - val pp = super.positioned(p) - new Parser[T] { - def apply(in: Input) = { - val offset = in.offset - val start = handleWhiteSpace(in.source, offset) - pp(in.drop (start - offset)) - } - } - } - - override def phrase[T](p: Parser[T]): Parser[T] = - super.phrase(p <~ opt("""\z""".r)) - - /** Parse some prefix of reader `in` with parser `p`. */ - def parse[T](p: Parser[T], in: Reader[Char]): ParseResult[T] = - p(in) - - /** Parse some prefix of character sequence `in` with parser `p`. */ - def parse[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] = - p(new CharSequenceReader(in)) - - /** Parse some prefix of reader `in` with parser `p`. */ - def parse[T](p: Parser[T], in: java.io.Reader): ParseResult[T] = - p(new PagedSeqReader(PagedSeq.fromReader(in))) - - /** Parse all of reader `in` with parser `p`. */ - def parseAll[T](p: Parser[T], in: Reader[Char]): ParseResult[T] = - parse(phrase(p), in) - - /** Parse all of reader `in` with parser `p`. */ - def parseAll[T](p: Parser[T], in: java.io.Reader): ParseResult[T] = - parse(phrase(p), in) - - /** Parse all of character sequence `in` with parser `p`. */ - def parseAll[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] = - parse(phrase(p), in) -} diff --git a/src/library/scala/util/parsing/combinator/lexical/Lexical.scala b/src/library/scala/util/parsing/combinator/lexical/Lexical.scala deleted file mode 100644 index d8029d068f..0000000000 --- a/src/library/scala/util/parsing/combinator/lexical/Lexical.scala +++ /dev/null @@ -1,40 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package util.parsing -package combinator -package lexical - -import token._ -import input.CharArrayReader.EofCh - -/** This component complements the `Scanners` component with - * common operations for lexical parsers. - * - * Refer to [[scala.util.parsing.combinator.lexical.StdLexical]] - * for a concrete implementation for a simple, Scala-like language. - * - * @author Martin Odersky, Adriaan Moors - */ -abstract class Lexical extends Scanners with Tokens { - - /** A character-parser that matches a letter (and returns it).*/ - def letter = elem("letter", _.isLetter) - - /** A character-parser that matches a digit (and returns it).*/ - def digit = elem("digit", _.isDigit) - - /** A character-parser that matches any character except the ones given in `cs` (and returns it).*/ - def chrExcept(cs: Char*) = elem("", ch => (cs forall (ch != _))) - - /** A character-parser that matches a white-space character (and returns it).*/ - def whitespaceChar = elem("space char", ch => ch <= ' ' && ch != EofCh) -} diff --git a/src/library/scala/util/parsing/combinator/lexical/Scanners.scala b/src/library/scala/util/parsing/combinator/lexical/Scanners.scala deleted file mode 100644 index 2e12915bb8..0000000000 --- a/src/library/scala/util/parsing/combinator/lexical/Scanners.scala +++ /dev/null @@ -1,63 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing -package combinator -package lexical - -import input._ - -/** This component provides core functionality for lexical parsers. - * - * See its subclasses [[scala.util.parsing.combinator.lexical.Lexical]] and -- most interestingly - * [[scala.util.parsing.combinator.lexical.StdLexical]], for more functionality. - * - * @author Martin Odersky, Adriaan Moors - */ -trait Scanners extends Parsers { - type Elem = Char - type Token - - /** This token is produced by a scanner `Scanner` when scanning failed. */ - def errorToken(msg: String): Token - - /** A parser that produces a token (from a stream of characters). */ - def token: Parser[Token] - - /** A parser for white-space -- its result will be discarded. */ - def whitespace: Parser[Any] - - /** `Scanner` is essentially¹ a parser that produces `Token`s - * from a stream of characters. The tokens it produces are typically - * passed to parsers in `TokenParsers`. - * - * @note ¹ `Scanner` is really a `Reader` of `Token`s - */ - class Scanner(in: Reader[Char]) extends Reader[Token] { - /** Convenience constructor (makes a character reader out of the given string) */ - def this(in: String) = this(new CharArrayReader(in.toCharArray())) - private val (tok, rest1, rest2) = whitespace(in) match { - case Success(_, in1) => - token(in1) match { - case Success(tok, in2) => (tok, in1, in2) - case ns: NoSuccess => (errorToken(ns.msg), ns.next, skip(ns.next)) - } - case ns: NoSuccess => (errorToken(ns.msg), ns.next, skip(ns.next)) - } - private def skip(in: Reader[Char]) = if (in.atEnd) in else in.rest - - override def source: java.lang.CharSequence = in.source - override def offset: Int = in.offset - def first = tok - def rest = new Scanner(rest2) - def pos = rest1.pos - def atEnd = in.atEnd || (whitespace(in) match { case Success(_, in1) => in1.atEnd case _ => false }) - } -} - diff --git a/src/library/scala/util/parsing/combinator/lexical/StdLexical.scala b/src/library/scala/util/parsing/combinator/lexical/StdLexical.scala deleted file mode 100644 index 32d7502cda..0000000000 --- a/src/library/scala/util/parsing/combinator/lexical/StdLexical.scala +++ /dev/null @@ -1,87 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing -package combinator -package lexical - -import token._ -import input.CharArrayReader.EofCh -import scala.collection.mutable - -/** This component provides a standard lexical parser for a simple, - * [[http://scala-lang.org Scala]]-like language. It parses keywords and - * identifiers, numeric literals (integers), strings, and delimiters. - * - * To distinguish between identifiers and keywords, it uses a set of - * reserved identifiers: every string contained in `reserved` is returned - * as a keyword token. (Note that `=>` is hard-coded as a keyword.) - * Additionally, the kinds of delimiters can be specified by the - * `delimiters` set. - * - * Usually this component is used to break character-based input into - * bigger tokens, which are then passed to a token-parser (see - * [[scala.util.parsing.combinator.syntactical.TokenParsers]].) - * - * @author Martin Odersky - * @author Iulian Dragos - * @author Adriaan Moors - */ -class StdLexical extends Lexical with StdTokens { - // see `token` in `Scanners` - def token: Parser[Token] = - ( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") } - | digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") } - | '\'' ~ rep( chrExcept('\'', '\n', EofCh) ) ~ '\'' ^^ { case '\'' ~ chars ~ '\'' => StringLit(chars mkString "") } - | '\"' ~ rep( chrExcept('\"', '\n', EofCh) ) ~ '\"' ^^ { case '\"' ~ chars ~ '\"' => StringLit(chars mkString "") } - | EofCh ^^^ EOF - | '\'' ~> failure("unclosed string literal") - | '\"' ~> failure("unclosed string literal") - | delim - | failure("illegal character") - ) - - /** Returns the legal identifier chars, except digits. */ - def identChar = letter | elem('_') - - // see `whitespace in `Scanners` - def whitespace: Parser[Any] = rep[Any]( - whitespaceChar - | '/' ~ '*' ~ comment - | '/' ~ '/' ~ rep( chrExcept(EofCh, '\n') ) - | '/' ~ '*' ~ failure("unclosed comment") - ) - - protected def comment: Parser[Any] = ( - '*' ~ '/' ^^ { case _ => ' ' } - | chrExcept(EofCh) ~ comment - ) - - /** The set of reserved identifiers: these will be returned as `Keyword`s. */ - val reserved = new mutable.HashSet[String] - - /** The set of delimiters (ordering does not matter). */ - val delimiters = new mutable.HashSet[String] - - protected def processIdent(name: String) = - if (reserved contains name) Keyword(name) else Identifier(name) - - private lazy val _delim: Parser[Token] = { - // construct parser for delimiters by |'ing together the parsers for the individual delimiters, - // starting with the longest one -- otherwise a delimiter D will never be matched if there is - // another delimiter that is a prefix of D - def parseDelim(s: String): Parser[Token] = accept(s.toList) ^^ { x => Keyword(s) } - - val d = new Array[String](delimiters.size) - delimiters.copyToArray(d, 0) - scala.util.Sorting.quickSort(d) - (d.toList map parseDelim).foldRight(failure("no matching delimiter"): Parser[Token])((x, y) => y | x) - } - protected def delim: Parser[Token] = _delim -} diff --git a/src/library/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala b/src/library/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala deleted file mode 100644 index 5b9d14c9a7..0000000000 --- a/src/library/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala +++ /dev/null @@ -1,32 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package util.parsing -package combinator -package syntactical - -import token._ -import lexical.StdLexical -import scala.language.implicitConversions - -/** This component provides primitive parsers for the standard tokens defined in `StdTokens`. -* -* @author Martin Odersky, Adriaan Moors - */ -class StandardTokenParsers extends StdTokenParsers { - type Tokens = StdTokens - val lexical = new StdLexical - - //an implicit keyword function that gives a warning when a given word is not in the reserved/delimiters list - override implicit def keyword(chars : String): Parser[String] = - if(lexical.reserved.contains(chars) || lexical.delimiters.contains(chars)) super.keyword(chars) - else failure("You are trying to parse \""+chars+"\", but it is neither contained in the delimiters list, nor in the reserved keyword list of your lexical object") - -} diff --git a/src/library/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala b/src/library/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala deleted file mode 100644 index adcf85da7a..0000000000 --- a/src/library/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala +++ /dev/null @@ -1,52 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package util.parsing -package combinator -package syntactical - -import token._ -import scala.collection.mutable -import scala.language.implicitConversions - -/** This component provides primitive parsers for the standard tokens defined in `StdTokens`. -* -* @author Martin Odersky, Adriaan Moors - */ -trait StdTokenParsers extends TokenParsers { - type Tokens <: StdTokens - import lexical.{Keyword, NumericLit, StringLit, Identifier} - - protected val keywordCache = mutable.HashMap[String, Parser[String]]() - - /** A parser which matches a single keyword token. - * - * @param chars The character string making up the matched keyword. - * @return a `Parser` that matches the given string - */ -// implicit def keyword(chars: String): Parser[String] = accept(Keyword(chars)) ^^ (_.chars) - implicit def keyword(chars: String): Parser[String] = - keywordCache.getOrElseUpdate(chars, accept(Keyword(chars)) ^^ (_.chars)) - - /** A parser which matches a numeric literal */ - def numericLit: Parser[String] = - elem("number", _.isInstanceOf[NumericLit]) ^^ (_.chars) - - /** A parser which matches a string literal */ - def stringLit: Parser[String] = - elem("string literal", _.isInstanceOf[StringLit]) ^^ (_.chars) - - /** A parser which matches an identifier */ - def ident: Parser[String] = - elem("identifier", _.isInstanceOf[Identifier]) ^^ (_.chars) -} - - diff --git a/src/library/scala/util/parsing/combinator/syntactical/TokenParsers.scala b/src/library/scala/util/parsing/combinator/syntactical/TokenParsers.scala deleted file mode 100644 index b06babcd7e..0000000000 --- a/src/library/scala/util/parsing/combinator/syntactical/TokenParsers.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package util.parsing -package combinator -package syntactical - -/** This is the core component for token-based parsers. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -trait TokenParsers extends Parsers { - /** `Tokens` is the abstract type of the `Token`s consumed by the parsers in this component. */ - type Tokens <: token.Tokens - - /** `lexical` is the component responsible for consuming some basic kind of - * input (usually character-based) and turning it into the tokens - * understood by these parsers. - */ - val lexical: Tokens - - /** The input-type for these parsers*/ - type Elem = lexical.Token - -} - - diff --git a/src/library/scala/util/parsing/combinator/token/StdTokens.scala b/src/library/scala/util/parsing/combinator/token/StdTokens.scala deleted file mode 100644 index a102d1541e..0000000000 --- a/src/library/scala/util/parsing/combinator/token/StdTokens.scala +++ /dev/null @@ -1,39 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing -package combinator -package token - -/** This component provides the standard `Token`s for a simple, Scala-like language. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -trait StdTokens extends Tokens { - /** The class of keyword tokens */ - case class Keyword(chars: String) extends Token { - override def toString = "`"+chars+"'" - } - - /** The class of numeric literal tokens */ - case class NumericLit(chars: String) extends Token { - override def toString = chars - } - - /** The class of string literal tokens */ - case class StringLit(chars: String) extends Token { - override def toString = "\""+chars+"\"" - } - - /** The class of identifier tokens */ - case class Identifier(chars: String) extends Token { - override def toString = "identifier "+chars - } -} diff --git a/src/library/scala/util/parsing/combinator/token/Tokens.scala b/src/library/scala/util/parsing/combinator/token/Tokens.scala deleted file mode 100644 index 5c3f1f95b5..0000000000 --- a/src/library/scala/util/parsing/combinator/token/Tokens.scala +++ /dev/null @@ -1,43 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing -package combinator -package token - -/** This component provides the notion of `Token`, the unit of information that is passed from lexical - * parsers in the `Lexical` component to the parsers in the `TokenParsers` component. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -trait Tokens { - /** Objects of this type are produced by a lexical parser or ``scanner'', and consumed by a parser. - * - * @see [[scala.util.parsing.combinator.syntactical.TokenParsers]] - */ - abstract class Token { - def chars: String - } - - /** A class of error tokens. Error tokens are used to communicate - * errors detected during lexical analysis - */ - case class ErrorToken(msg: String) extends Token { - def chars = "*** error: "+msg - } - - /** A class for end-of-file tokens */ - case object EOF extends Token { - def chars = "" - } - - /** This token is produced by a scanner `Scanner` when scanning failed. */ - def errorToken(msg: String): Token = new ErrorToken(msg) -} diff --git a/src/library/scala/util/parsing/input/CharArrayReader.scala b/src/library/scala/util/parsing/input/CharArrayReader.scala deleted file mode 100644 index 22530cb9aa..0000000000 --- a/src/library/scala/util/parsing/input/CharArrayReader.scala +++ /dev/null @@ -1,35 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package util.parsing.input - -/** An object encapsulating basic character constants. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -object CharArrayReader { - final val EofCh = '\032' -} - -/** A character array reader reads a stream of characters (keeping track of their positions) - * from an array. - * - * @param chars an array of characters - * @param index starting offset into the array; the first element returned will be `source(index)` - * - * @author Martin Odersky - * @author Adriaan Moors - */ -class CharArrayReader(chars: Array[Char], index: Int) extends CharSequenceReader(chars, index) { - - def this(chars: Array[Char]) = this(chars, 0) - -} diff --git a/src/library/scala/util/parsing/input/CharSequenceReader.scala b/src/library/scala/util/parsing/input/CharSequenceReader.scala deleted file mode 100644 index 8e7751cc82..0000000000 --- a/src/library/scala/util/parsing/input/CharSequenceReader.scala +++ /dev/null @@ -1,66 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package util.parsing.input - -/** An object encapsulating basic character constants. - * - * @author Martin Odersky, Adriaan Moors - */ -object CharSequenceReader { - final val EofCh = '\032' -} - -/** A character array reader reads a stream of characters (keeping track of their positions) - * from an array. - * - * @param source the source sequence - * @param offset starting offset. - * - * @author Martin Odersky - */ -class CharSequenceReader(override val source: java.lang.CharSequence, - override val offset: Int) extends Reader[Char] { - import CharSequenceReader._ - - /** Construct a `CharSequenceReader` with its first element at - * `source(0)` and position `(1,1)`. - */ - def this(source: java.lang.CharSequence) = this(source, 0) - - /** Returns the first element of the reader, or EofCh if reader is at its end. - */ - def first = - if (offset < source.length) source.charAt(offset) else EofCh - - /** Returns a CharSequenceReader consisting of all elements except the first. - * - * @return If `atEnd` is `true`, the result will be `this`; - * otherwise, it's a `CharSequenceReader` containing the rest of input. - */ - def rest: CharSequenceReader = - if (offset < source.length) new CharSequenceReader(source, offset + 1) - else this - - /** The position of the first element in the reader. - */ - def pos: Position = new OffsetPosition(source, offset) - - /** true iff there are no more elements in this reader (except for trailing - * EofCh's) - */ - def atEnd = offset >= source.length - - /** Returns an abstract reader consisting of all elements except the first - * `n` elements. - */ - override def drop(n: Int): CharSequenceReader = - new CharSequenceReader(source, offset + n) -} diff --git a/src/library/scala/util/parsing/input/NoPosition.scala b/src/library/scala/util/parsing/input/NoPosition.scala deleted file mode 100644 index 4a32264b79..0000000000 --- a/src/library/scala/util/parsing/input/NoPosition.scala +++ /dev/null @@ -1,25 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package util.parsing.input - -/** Undefined position. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -object NoPosition extends Position { - def line = 0 - def column = 0 - override def toString = "" - override def longString = toString - def lineContents = "" -} diff --git a/src/library/scala/util/parsing/input/OffsetPosition.scala b/src/library/scala/util/parsing/input/OffsetPosition.scala deleted file mode 100644 index 23f79c74d1..0000000000 --- a/src/library/scala/util/parsing/input/OffsetPosition.scala +++ /dev/null @@ -1,73 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing.input - -import scala.collection.mutable.ArrayBuffer - -/** `OffsetPosition` is a standard class for positions - * represented as offsets into a source ``document''. - * - * @param source The source document - * @param offset The offset indicating the position - * - * @author Martin Odersky - */ -case class OffsetPosition(source: java.lang.CharSequence, offset: Int) extends Position { - - /** An index that contains all line starts, including first line, and eof. */ - private lazy val index: Array[Int] = { - val lineStarts = new ArrayBuffer[Int] - lineStarts += 0 - for (i <- 0 until source.length) - if (source.charAt(i) == '\n') lineStarts += (i + 1) - lineStarts += source.length - lineStarts.toArray - } - - /** The line number referred to by the position; line numbers start at 1. */ - def line: Int = { - var lo = 0 - var hi = index.length - 1 - while (lo + 1 < hi) { - val mid = (hi + lo) / 2 - if (offset < index(mid)) hi = mid - else lo = mid - } - lo + 1 - } - - /** The column number referred to by the position; column numbers start at 1. */ - def column: Int = offset - index(line - 1) + 1 - - /** The contents of the line numbered at the current offset. - * - * @return the line at `offset` (not including a newline) - */ - def lineContents: String = - source.subSequence(index(line - 1), index(line)).toString - - /** Returns a string representation of the `Position`, of the form `line.column`. */ - override def toString = line+"."+column - - /** Compare this position to another, by first comparing their line numbers, - * and then -- if necessary -- using the columns to break a tie. - * - * @param that a `Position` to compare to this `Position` - * @return true if this position's line number or (in case of equal line numbers) - * column is smaller than the corresponding components of `that` - */ - override def <(that: Position) = that match { - case OffsetPosition(_, that_offset) => - this.offset < that_offset - case _ => - this.line < that.line || - this.line == that.line && this.column < that.column - } -} diff --git a/src/library/scala/util/parsing/input/PagedSeqReader.scala b/src/library/scala/util/parsing/input/PagedSeqReader.scala deleted file mode 100644 index 468f1f9a5f..0000000000 --- a/src/library/scala/util/parsing/input/PagedSeqReader.scala +++ /dev/null @@ -1,71 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - -package scala -package util.parsing.input - -import scala.collection.immutable.PagedSeq - -/** An object encapsulating basic character constants. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -object PagedSeqReader { - final val EofCh = '\032' -} - -/** A character array reader reads a stream of characters (keeping track of their positions) - * from an array. - * - * @param seq the source sequence - * @param offset starting offset. - * - * @author Martin Odersky - */ -class PagedSeqReader(seq: PagedSeq[Char], - override val offset: Int) extends Reader[Char] { - import PagedSeqReader._ - - override lazy val source: java.lang.CharSequence = seq - - /** Construct a `PagedSeqReader` with its first element at - * `source(0)` and position `(1,1)`. - */ - def this(seq: PagedSeq[Char]) = this(seq, 0) - - /** Returns the first element of the reader, or EofCh if reader is at its end - */ - def first = - if (seq.isDefinedAt(offset)) seq(offset) else EofCh - - /** Returns a PagedSeqReader consisting of all elements except the first - * - * @return If `atEnd` is `true`, the result will be `this`; - * otherwise, it's a `PagedSeqReader` containing the rest of input. - */ - def rest: PagedSeqReader = - if (seq.isDefinedAt(offset)) new PagedSeqReader(seq, offset + 1) - else this - - /** The position of the first element in the reader. - */ - def pos: Position = new OffsetPosition(source, offset) - - /** true iff there are no more elements in this reader (except for trailing - * EofCh's). - */ - def atEnd = !seq.isDefinedAt(offset) - - /** Returns an abstract reader consisting of all elements except the first - * `n` elements. - */ - override def drop(n: Int): PagedSeqReader = - new PagedSeqReader(seq, offset + n) -} diff --git a/src/library/scala/util/parsing/input/Position.scala b/src/library/scala/util/parsing/input/Position.scala deleted file mode 100644 index b7995a6471..0000000000 --- a/src/library/scala/util/parsing/input/Position.scala +++ /dev/null @@ -1,62 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing.input - -/** `Position` is the base trait for objects describing a position in a `document`. - * - * It provides functionality for: - * - generating a visual representation of this position (`longString`); - * - comparing two positions (`<`). - * - * To use this class for a concrete kind of `document`, implement the `lineContents` method. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -trait Position { - - /** The line number referred to by the position; line numbers start at 1. */ - def line: Int - - /** The column number referred to by the position; column numbers start at 1. */ - def column: Int - - /** The contents of the line at this position. (must not contain a new-line character). - */ - protected def lineContents: String - - /** Returns a string representation of the `Position`, of the form `line.column`. */ - override def toString = ""+line+"."+column - - /** Returns a more ``visual'' representation of this position. - * More precisely, the resulting string consists of two lines: - * 1. the line in the document referred to by this position - * 2. a caret indicating the column - * - * Example: - * {{{ - * List(this, is, a, line, from, the, document) - * ^ - * }}} - */ - def longString = lineContents+"\n"+lineContents.take(column-1).map{x => if (x == '\t') x else ' ' } + "^" - - /** Compare this position to another, by first comparing their line numbers, - * and then -- if necessary -- using the columns to break a tie. - * - * @param `that` a `Position` to compare to this `Position` - * @return true if this position's line number or (in case of equal line numbers) - * column is smaller than the corresponding components of `that` - */ - def <(that: Position) = { - this.line < that.line || - this.line == that.line && this.column < that.column - } -} diff --git a/src/library/scala/util/parsing/input/Positional.scala b/src/library/scala/util/parsing/input/Positional.scala deleted file mode 100644 index cfde67cadd..0000000000 --- a/src/library/scala/util/parsing/input/Positional.scala +++ /dev/null @@ -1,30 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing.input - -/** A trait for objects that have a source position. - * - * @author Martin Odersky, Adriaan Moors - */ -trait Positional { - - /** The source position of this object, initially set to undefined. */ - var pos: Position = NoPosition - - /** If current source position is undefined, update it with given position `newpos` - * @return the object itself - */ - def setPos(newpos: Position): this.type = { - if (pos eq NoPosition) pos = newpos - this - } -} - - diff --git a/src/library/scala/util/parsing/input/Reader.scala b/src/library/scala/util/parsing/input/Reader.scala deleted file mode 100644 index 9dbf08a7ca..0000000000 --- a/src/library/scala/util/parsing/input/Reader.scala +++ /dev/null @@ -1,62 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package util.parsing.input - - -/** An interface for streams of values that have positions. - * - * @author Martin Odersky - * @author Adriaan Moors - */ -abstract class Reader[+T] { - - /** If this is a reader over character sequences, the underlying char sequence. - * If not, throws a `NoSuchMethodError` exception. - * - * @throws [[java.lang.NoSuchMethodError]] if this not a char sequence reader. - */ - def source: java.lang.CharSequence = - throw new NoSuchMethodError("not a char sequence reader") - - def offset: Int = - throw new NoSuchMethodError("not a char sequence reader") - - /** Returns the first element of the reader - */ - def first: T - - /** Returns an abstract reader consisting of all elements except the first - * - * @return If `atEnd` is `true`, the result will be `this'; - * otherwise, it's a `Reader` containing more elements. - */ - def rest: Reader[T] - - /** Returns an abstract reader consisting of all elements except the first `n` elements. - */ - def drop(n: Int): Reader[T] = { - var r: Reader[T] = this - var cnt = n - while (cnt > 0) { - r = r.rest; cnt -= 1 - } - r - } - - /** The position of the first element in the reader. - */ - def pos: Position - - /** `true` iff there are no more elements in this reader. - */ - def atEnd: Boolean -} diff --git a/src/library/scala/util/parsing/input/StreamReader.scala b/src/library/scala/util/parsing/input/StreamReader.scala deleted file mode 100644 index 30eb097fd7..0000000000 --- a/src/library/scala/util/parsing/input/StreamReader.scala +++ /dev/null @@ -1,76 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing.input - -import java.io.BufferedReader -import scala.collection.immutable.PagedSeq - -/** An object to create a `StreamReader` from a `java.io.Reader`. - * - * @author Miles Sabin - */ -object StreamReader { - final val EofCh = '\032' - - /** Create a `StreamReader` from a `java.io.Reader`. - * - * @param in the `java.io.Reader` that provides the underlying - * stream of characters for this Reader. - */ - def apply(in: java.io.Reader): StreamReader = { - new StreamReader(PagedSeq.fromReader(in), 0, 1) - } -} - -/** A StreamReader reads from a character sequence, typically created as a PagedSeq - * from a java.io.Reader - * - * NOTE: - * StreamReaders do not really fulfill the new contract for readers, which - * requires a `source` CharSequence representing the full input. - * Instead source is treated line by line. - * As a consequence, regex matching cannot extend beyond a single line - * when a StreamReader are used for input. - * - * If you need to match regexes spanning several lines you should consider - * class `PagedSeqReader` instead. - * - * @author Miles Sabin - * @author Martin Odersky - */ -sealed class StreamReader(seq: PagedSeq[Char], off: Int, lnum: Int) extends PagedSeqReader(seq, off) { - import StreamReader._ - - override def rest: StreamReader = - if (off == seq.length) this - else if (seq(off) == '\n') - new StreamReader(seq.slice(off + 1), 0, lnum + 1) - else new StreamReader(seq, off + 1, lnum) - - private def nextEol = { - var i = off - while (i < seq.length && seq(i) != '\n' && seq(i) != EofCh) i += 1 - i - } - - override def drop(n: Int): StreamReader = { - val eolPos = nextEol - if (eolPos < off + n && eolPos < seq.length) - new StreamReader(seq.slice(eolPos + 1), 0, lnum + 1).drop(off + n - (eolPos + 1)) - else - new StreamReader(seq, off + n, lnum) - } - - override def pos: Position = new Position { - def line = lnum - def column = off + 1 - def lineContents = seq.slice(0, nextEol).toString - } -} diff --git a/src/library/scala/util/parsing/json/JSON.scala b/src/library/scala/util/parsing/json/JSON.scala deleted file mode 100644 index b06dddf532..0000000000 --- a/src/library/scala/util/parsing/json/JSON.scala +++ /dev/null @@ -1,97 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - -package scala -package util.parsing.json - -/** - * This object provides a simple interface to the JSON parser class. - * The default conversion for numerics is into a double. If you wish to - * override this behavior at the global level, you can set the - * `globalNumberParser` property to your own `(String => Any)` function. - * If you only want to override at the per-thread level then you can set - * the `perThreadNumberParser` property to your function. For example: - * {{{ - * val myConversionFunc = {input : String => BigDecimal(input)} - * - * // Global override - * JSON.globalNumberParser = myConversionFunc - * - * // Per-thread override - * JSON.perThreadNumberParser = myConversionFunc - * }}} - * - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This object will be removed.", "2.11.0") -object JSON extends Parser { - - /** - * This method converts ''raw'' results back into the original, deprecated - * form. - */ - private def unRaw (in : Any) : Any = in match { - case JSONObject(obj) => obj.map({ case (k,v) => (k,unRaw(v))}).toList - case JSONArray(list) => list.map(unRaw) - case x => x - } - - /** - * Parse the given `JSON` string and return a list of elements. If the - * string is a `JSON` object it will be a `JSONObject`. If it's a `JSON` - * array it will be a `JSONArray`. - * - * @param input the given `JSON` string. - * @return an optional `JSONType` element. - */ - def parseRaw(input : String) : Option[JSONType] = - phrase(root)(new lexical.Scanner(input)) match { - case Success(result, _) => Some(result) - case _ => None - } - - /** - * Parse the given `JSON` string and return either a `List[Any]` - * if the `JSON` string specifies an `Array`, or a - * `Map[String,Any]` if the `JSON` string specifies an object. - * - * @param input the given `JSON` string. - * @return an optional list or map. - */ - def parseFull(input: String): Option[Any] = - parseRaw(input) match { - case Some(data) => Some(resolveType(data)) - case None => None - } - - /** - * A utility method to resolve a parsed `JSON` list into objects or - * arrays. See the `parse` method for details. - */ - def resolveType(input: Any): Any = input match { - case JSONObject(data) => data.transform { - case (k,v) => resolveType(v) - } - case JSONArray(data) => data.map(resolveType) - case x => x - } - - /** - * The global (VM) default function for converting a string to a numeric value. - */ - def globalNumberParser_=(f: NumericParser) { defaultNumberParser = f } - def globalNumberParser : NumericParser = defaultNumberParser - - /** - * Defines the function used to convert a numeric string literal into a - * numeric format on a per-thread basis. Use `globalNumberParser` for a - * global override. - */ - def perThreadNumberParser_=(f : NumericParser) { numberParser.set(f) } - def perThreadNumberParser : NumericParser = numberParser.get() -} diff --git a/src/library/scala/util/parsing/json/Lexer.scala b/src/library/scala/util/parsing/json/Lexer.scala deleted file mode 100644 index 7fc4e0bab6..0000000000 --- a/src/library/scala/util/parsing/json/Lexer.scala +++ /dev/null @@ -1,90 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package util.parsing.json - -import scala.util.parsing.combinator._ -import scala.util.parsing.combinator.lexical._ -import scala.util.parsing.input.CharArrayReader.EofCh - -/** - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This class will be removed.", "2.11.0") -class Lexer extends StdLexical with ImplicitConversions { - - override def token: Parser[Token] = - //( '\"' ~ rep(charSeq | letter) ~ '\"' ^^ lift(StringLit) - ( string ^^ StringLit - | number ~ letter ^^ { case n ~ l => ErrorToken("Invalid number format : " + n + l) } - | '-' ~> whitespace ~ number ~ letter ^^ { case ws ~ num ~ l => ErrorToken("Invalid number format : -" + num + l) } - | '-' ~> whitespace ~ number ^^ { case ws ~ num => NumericLit("-" + num) } - | number ^^ NumericLit - | EofCh ^^^ EOF - | delim - | '\"' ~> failure("Unterminated string") - | rep(letter) ^^ checkKeyword - | failure("Illegal character") - ) - - def checkKeyword(xs : List[Any]) = { - val strRep = xs mkString "" - if (reserved contains strRep) Keyword(strRep) else ErrorToken("Not a keyword: " + strRep) - } - - /** A string is a collection of zero or more Unicode characters, wrapped in - * double quotes, using backslash escapes (cf. http://www.json.org/). - */ - def string = '\"' ~> rep(charSeq | chrExcept('\"', '\n', EofCh)) <~ '\"' ^^ { _ mkString "" } - - override def whitespace = rep(whitespaceChar) - - def number = intPart ~ opt(fracPart) ~ opt(expPart) ^^ { case i ~ f ~ e => - i + optString(".", f) + optString("", e) - } - def intPart = zero | intList - def intList = nonzero ~ rep(digit) ^^ {case x ~ y => (x :: y) mkString ""} - def fracPart = '.' ~> rep(digit) ^^ { _ mkString "" } - def expPart = exponent ~ opt(sign) ~ rep1(digit) ^^ { case e ~ s ~ d => - e + optString("", s) + d.mkString("") - } - - private def optString[A](pre: String, a: Option[A]) = a match { - case Some(x) => pre + x.toString - case None => "" - } - - def zero: Parser[String] = '0' ^^^ "0" - def nonzero = elem("nonzero digit", d => d.isDigit && d != '0') - def exponent = elem("exponent character", d => d == 'e' || d == 'E') - def sign = elem("sign character", d => d == '-' || d == '+') - - def charSeq: Parser[String] = - ('\\' ~ '\"' ^^^ "\"" - |'\\' ~ '\\' ^^^ "\\" - |'\\' ~ '/' ^^^ "/" - |'\\' ~ 'b' ^^^ "\b" - |'\\' ~ 'f' ^^^ "\f" - |'\\' ~ 'n' ^^^ "\n" - |'\\' ~ 'r' ^^^ "\r" - |'\\' ~ 't' ^^^ "\t" - |'\\' ~> 'u' ~> unicodeBlock) - - val hexDigits = Set[Char]() ++ "0123456789abcdefABCDEF".toArray - def hexDigit = elem("hex digit", hexDigits.contains(_)) - - private def unicodeBlock = hexDigit ~ hexDigit ~ hexDigit ~ hexDigit ^^ { - case a ~ b ~ c ~ d => - new String(Array(Integer.parseInt(List(a, b, c, d) mkString "", 16)), 0, 1) - } - - //private def lift[T](f: String => T)(xs: List[Any]): T = f(xs mkString "") -} diff --git a/src/library/scala/util/parsing/json/Parser.scala b/src/library/scala/util/parsing/json/Parser.scala deleted file mode 100644 index 521dfc6612..0000000000 --- a/src/library/scala/util/parsing/json/Parser.scala +++ /dev/null @@ -1,147 +0,0 @@ -/* __ *\ -** ________ ___ / / ___ Scala API ** -** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** -** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** -** /____/\___/_/ |_/____/_/ | | ** -** |/ ** -\* */ - - - -package scala -package util.parsing.json - -import scala.util.parsing.combinator._ -import scala.util.parsing.combinator.syntactical._ - -/** - * A marker class for the JSON result types. - * - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This class will be removed.", "2.11.0") -sealed abstract class JSONType { - /** - * This version of toString allows you to provide your own value - * formatter. - */ - def toString (formatter : JSONFormat.ValueFormatter) : String - - /** - * Returns a String representation of this JSON value - * using the JSONFormat.defaultFormatter. - */ - override def toString = toString(JSONFormat.defaultFormatter) -} - -/** - * This object defines functions that are used when converting JSONType - * values into String representations. Mostly this is concerned with - * proper quoting of strings. - * - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This object will be removed.", "2.11.0") -object JSONFormat { - /** - * This type defines a function that can be used to - * format values into JSON format. - */ - type ValueFormatter = Any => String - - /** - * The default formatter used by the library. You can - * provide your own with the toString calls on - * JSONObject and JSONArray instances. - */ - val defaultFormatter : ValueFormatter = (x : Any) => x match { - case s : String => "\"" + quoteString(s) + "\"" - case jo : JSONObject => jo.toString(defaultFormatter) - case ja : JSONArray => ja.toString(defaultFormatter) - case other => other.toString - } - - /** - * This function can be used to properly quote Strings - * for JSON output. - */ - def quoteString (s : String) : String = - s.map { - case '"' => "\\\"" - case '\\' => "\\\\" - case '/' => "\\/" - case '\b' => "\\b" - case '\f' => "\\f" - case '\n' => "\\n" - case '\r' => "\\r" - case '\t' => "\\t" - /* We'll unicode escape any control characters. These include: - * 0x0 -> 0x1f : ASCII Control (C0 Control Codes) - * 0x7f : ASCII DELETE - * 0x80 -> 0x9f : C1 Control Codes - * - * Per RFC4627, section 2.5, we're not technically required to - * encode the C1 codes, but we do to be safe. - */ - case c if ((c >= '\u0000' && c <= '\u001f') || (c >= '\u007f' && c <= '\u009f')) => "\\u%04x".format(c.toInt) - case c => c - }.mkString -} - -/** - * Represents a JSON Object (map). - * - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This class will be removed.", "2.11.0") -case class JSONObject (obj : Map[String,Any]) extends JSONType { - def toString (formatter : JSONFormat.ValueFormatter) = - "{" + obj.map({ case (k,v) => formatter(k.toString) + " : " + formatter(v) }).mkString(", ") + "}" -} - -/** - * Represents a JSON Array (list). - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This class will be removed.", "2.11.0") -case class JSONArray (list : List[Any]) extends JSONType { - def toString (formatter : JSONFormat.ValueFormatter) = - "[" + list.map(formatter).mkString(", ") + "]" -} - -/** - * The main JSON Parser. - * - * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> - */ -@deprecated("This class will be removed.", "2.11.0") -class Parser extends StdTokenParsers with ImplicitConversions { - // Fill in abstract defs - type Tokens = Lexer - val lexical = new Tokens - - // Configure lexical parsing - lexical.reserved ++= List("true", "false", "null") - lexical.delimiters ++= List("{", "}", "[", "]", ":", ",") - - /** Type signature for functions that can parse numeric literals */ - type NumericParser = String => Any - - // Global default number parsing function - protected var defaultNumberParser : NumericParser = {_.toDouble} - - // Per-thread default number parsing function - protected val numberParser = new ThreadLocal[NumericParser]() { - override def initialValue() = defaultNumberParser - } - - // Define the grammar - def root = jsonObj | jsonArray - def jsonObj = "{" ~> repsep(objEntry, ",") <~ "}" ^^ { case vals : List[_] => JSONObject(Map(vals : _*)) } - def jsonArray = "[" ~> repsep(value, ",") <~ "]" ^^ { case vals : List[_] => JSONArray(vals) } - def objEntry = stringVal ~ (":" ~> value) ^^ { case x ~ y => (x, y) } - def value: Parser[Any] = (jsonObj | jsonArray | number | "true" ^^^ true | "false" ^^^ false | "null" ^^^ null | stringVal) - def stringVal = accept("string", { case lexical.StringLit(n) => n} ) - def number = accept("number", { case lexical.NumericLit(n) => numberParser.get.apply(n)} ) -} - diff --git a/src/parser-combinators/scala/util/parsing/combinator/ImplicitConversions.scala b/src/parser-combinators/scala/util/parsing/combinator/ImplicitConversions.scala new file mode 100644 index 0000000000..0683ea927d --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/ImplicitConversions.scala @@ -0,0 +1,43 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.combinator + +import scala.language.implicitConversions + +/** This object contains implicit conversions that come in handy when using the `^^` combinator. + * + * Refer to [[scala.util.parsing.combinator.Parsers]] to construct an AST from the concrete syntax. + * + * The reason for this is that the sequential composition combinator (`~`) combines its constituents + * into a ~. When several `~`s are combined, this results in nested `~`s (to the left). + * The `flatten*` coercions makes it easy to apply an `n`-argument function to a nested `~` of + * depth `n-1` + * + * The `headOptionTailToFunList` converts a function that takes a `List[A]` to a function that + * accepts a `~[A, Option[List[A]]]` (this happens when parsing something of the following + * shape: `p ~ opt("." ~ repsep(p, "."))` -- where `p` is a parser that yields an `A`). + * + * @author Martin Odersky + * @author Iulian Dragos + * @author Adriaan Moors + */ +trait ImplicitConversions { self: Parsers => + implicit def flatten2[A, B, C] (f: (A, B) => C) = + (p: ~[A, B]) => p match {case a ~ b => f(a, b)} + implicit def flatten3[A, B, C, D] (f: (A, B, C) => D) = + (p: ~[~[A, B], C]) => p match {case a ~ b ~ c => f(a, b, c)} + implicit def flatten4[A, B, C, D, E] (f: (A, B, C, D) => E) = + (p: ~[~[~[A, B], C], D]) => p match {case a ~ b ~ c ~ d => f(a, b, c, d)} + implicit def flatten5[A, B, C, D, E, F](f: (A, B, C, D, E) => F) = + (p: ~[~[~[~[A, B], C], D], E]) => p match {case a ~ b ~ c ~ d ~ e=> f(a, b, c, d, e)} + implicit def headOptionTailToFunList[A, T] (f: List[A] => T)= + (p: ~[A, Option[List[A]]]) => f(p._1 :: (p._2 match { case Some(xs) => xs case None => Nil})) +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/JavaTokenParsers.scala b/src/parser-combinators/scala/util/parsing/combinator/JavaTokenParsers.scala new file mode 100644 index 0000000000..01288a182e --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/JavaTokenParsers.scala @@ -0,0 +1,62 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.combinator + +import scala.annotation.migration + +/** `JavaTokenParsers` differs from [[scala.util.parsing.combinator.RegexParsers]] + * by adding the following definitions: + * + * - `ident` + * - `wholeNumber` + * - `decimalNumber` + * - `stringLiteral` + * - `floatingPointNumber` + */ +trait JavaTokenParsers extends RegexParsers { + /** Anything that is a valid Java identifier, according to + * The Java Language Spec. + * Generally, this means a letter, followed by zero or more letters or numbers. + */ + def ident: Parser[String] = + """\p{javaJavaIdentifierStart}\p{javaJavaIdentifierPart}*""".r + /** An integer, without sign or with a negative sign. */ + def wholeNumber: Parser[String] = + """-?\d+""".r + /** Number following one of these rules: + * + * - An integer. For example: `13` + * - An integer followed by a decimal point. For example: `3.` + * - An integer followed by a decimal point and fractional part. For example: `3.14` + * - A decimal point followed by a fractional part. For example: `.1` + */ + def decimalNumber: Parser[String] = + """(\d+(\.\d*)?|\d*\.\d+)""".r + /** Double quotes (`"`) enclosing a sequence of: + * + * - Any character except double quotes, control characters or backslash (`\`) + * - A backslash followed by another backslash, a single or double quote, or one + * of the letters `b`, `f`, `n`, `r` or `t` + * - `\` followed by `u` followed by four hexadecimal digits + */ + @migration("`stringLiteral` allows escaping single and double quotes, but not forward slashes any longer.", "2.10.0") + def stringLiteral: Parser[String] = + ("\""+"""([^"\p{Cntrl}\\]|\\[\\'"bfnrt]|\\u[a-fA-F0-9]{4})*"""+"\"").r + /** A number following the rules of `decimalNumber`, with the following + * optional additions: + * + * - Preceded by a negative sign + * - Followed by `e` or `E` and an optionally signed integer + * - Followed by `f`, `f`, `d` or `D` (after the above rule, if both are used) + */ + def floatingPointNumber: Parser[String] = + """-?(\d+(\.\d*)?|\d*\.\d+)([eE][+-]?\d+)?[fFdD]?""".r +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/PackratParsers.scala b/src/parser-combinators/scala/util/parsing/combinator/PackratParsers.scala new file mode 100644 index 0000000000..a11dd18e62 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/PackratParsers.scala @@ -0,0 +1,312 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.combinator + +import scala.util.parsing.input.{ Reader, Position } +import scala.collection.mutable +import scala.language.implicitConversions + +/** + * `PackratParsers` is a component that extends the parser combinators + * provided by [[scala.util.parsing.combinator.Parsers]] with a memoization + * facility (''Packrat Parsing''). + * + * Packrat Parsing is a technique for implementing backtracking, + * recursive-descent parsers, with the advantage that it guarantees + * unlimited lookahead and a linear parse time. Using this technique, + * left recursive grammars can also be accepted. + * + * Using `PackratParsers` is very similar to using `Parsers`: + * - any class/trait that extends `Parsers` (directly or through a subclass) + * can mix in `PackratParsers`. + * Example: `'''object''' MyGrammar '''extends''' StandardTokenParsers '''with''' PackratParsers` + * - each grammar production previously declared as a `def` without formal + * parameters becomes a `lazy val`, and its type is changed from + * `Parser[Elem]` to `PackratParser[Elem]`. + * So, for example, `'''def''' production: Parser[Int] = {...}` + * becomes `'''lazy val''' production: PackratParser[Int] = {...}` + * - Important: using `PackratParser`s is not an ''all or nothing'' decision. + * They can be free mixed with regular `Parser`s in a single grammar. + * + * Cached parse results are attached to the ''input'', not the grammar. + * Therefore, `PackratsParser`s require a `PackratReader` as input, which + * adds memoization to an underlying `Reader`. Programmers can create + * `PackratReader` objects either manually, as in + * `production('''new''' PackratReader('''new''' lexical.Scanner("input")))`, + * but the common way should be to rely on the combinator `phrase` to wrap + * a given input with a `PackratReader` if the input is not one itself. + * + * @see Bryan Ford: "Packrat Parsing: Simple, Powerful, Lazy, Linear Time." ICFP'02 + * @see Alessandro Warth, James R. Douglass, Todd Millstein: "Packrat Parsers Can Support Left Recursion." PEPM'08 + * + * @since 2.8 + * @author Manohar Jonnalagedda + * @author Tiark Rompf + */ + +trait PackratParsers extends Parsers { + + //type Input = PackratReader[Elem] + + /** + * A specialized `Reader` class that wraps an underlying `Reader` + * and provides memoization of parse results. + */ + class PackratReader[+T](underlying: Reader[T]) extends Reader[T] { outer => + + /* + * caching of intermediate parse results and information about recursion + */ + private[PackratParsers] val cache = mutable.HashMap.empty[(Parser[_], Position), MemoEntry[_]] + + private[PackratParsers] def getFromCache[T](p: Parser[T]): Option[MemoEntry[T]] = { + cache.get((p, pos)).asInstanceOf[Option[MemoEntry[T]]] + } + + private[PackratParsers] def updateCacheAndGet[T](p: Parser[T], w: MemoEntry[T]): MemoEntry[T] = { + cache.put((p, pos),w) + w + } + + /* a cache for storing parser heads: allows to know which parser is involved + in a recursion*/ + private[PackratParsers] val recursionHeads: mutable.HashMap[Position, Head] = mutable.HashMap.empty + + //a stack that keeps a list of all involved rules + private[PackratParsers] var lrStack: List[LR] = Nil + + override def source: java.lang.CharSequence = underlying.source + override def offset: Int = underlying.offset + + def first: T = underlying.first + def rest: Reader[T] = new PackratReader(underlying.rest) { + override private[PackratParsers] val cache = outer.cache + override private[PackratParsers] val recursionHeads = outer.recursionHeads + lrStack = outer.lrStack + } + + def pos: Position = underlying.pos + def atEnd: Boolean = underlying.atEnd + } + + /** + * A parser generator delimiting whole phrases (i.e. programs). + * + * Overridden to make sure any input passed to the argument parser + * is wrapped in a `PackratReader`. + */ + override def phrase[T](p: Parser[T]) = { + val q = super.phrase(p) + new PackratParser[T] { + def apply(in: Input) = in match { + case in: PackratReader[_] => q(in) + case in => q(new PackratReader(in)) + } + } + } + + private def getPosFromResult(r: ParseResult[_]): Position = r.next.pos + + // auxiliary data structures + + private case class MemoEntry[+T](var r: Either[LR,ParseResult[_]]){ + def getResult: ParseResult[T] = r match { + case Left(LR(res,_,_)) => res.asInstanceOf[ParseResult[T]] + case Right(res) => res.asInstanceOf[ParseResult[T]] + } + } + + private case class LR(var seed: ParseResult[_], var rule: Parser[_], var head: Option[Head]){ + def getPos: Position = getPosFromResult(seed) + } + + private case class Head(var headParser: Parser[_], var involvedSet: List[Parser[_]], var evalSet: List[Parser[_]]){ + def getHead = headParser + } + + /** + * The root class of packrat parsers. + */ + abstract class PackratParser[+T] extends super.Parser[T] + + /** + * Implicitly convert a parser to a packrat parser. + * The conversion is triggered by giving the appropriate target type: + * {{{ + * val myParser: PackratParser[MyResult] = aParser + * }}} */ + implicit def parser2packrat[T](p: => super.Parser[T]): PackratParser[T] = { + lazy val q = p + memo(super.Parser {in => q(in)}) + } + + /* + * An unspecified function that is called when a packrat reader is applied. + * It verifies whether we are in the process of growing a parse or not. + * In the former case, it makes sure that rules involved in the recursion are evaluated. + * It also prevents non-involved rules from getting evaluated further + */ + private def recall(p: super.Parser[_], in: PackratReader[Elem]): Option[MemoEntry[_]] = { + val cached = in.getFromCache(p) + val head = in.recursionHeads.get(in.pos) + + head match { + case None => /*no heads*/ cached + case Some(h@Head(hp, involved, evalSet)) => { + //heads found + if(cached == None && !(hp::involved contains p)) { + //Nothing in the cache, and p is not involved + return Some(MemoEntry(Right(Failure("dummy ",in)))) + } + if(evalSet contains p){ + //something in cache, and p is in the evalSet + //remove the rule from the evalSet of the Head + h.evalSet = h.evalSet.filterNot(_==p) + val tempRes = p(in) + //we know that cached has an entry here + val tempEntry: MemoEntry[_] = cached.get // match {case Some(x: MemoEntry[_]) => x} + //cache is modified + tempEntry.r = Right(tempRes) + } + cached + } + } + } + + /* + * setting up the left-recursion. We have the LR for the rule head + * we modify the involvedSets of all LRs in the stack, till we see + * the current parser again + */ + private def setupLR(p: Parser[_], in: PackratReader[_], recDetect: LR): Unit = { + if(recDetect.head == None) recDetect.head = Some(Head(p, Nil, Nil)) + + in.lrStack.takeWhile(_.rule != p).foreach {x => + x.head = recDetect.head + recDetect.head.map(h => h.involvedSet = x.rule::h.involvedSet) + } + } + + /* + * growing, if needed the recursion + * check whether the parser we are growing is the head of the rule. + * Not => no grow + */ + + /* + * Once the result of the recall function is known, if it is nil, then we need to store a dummy +failure into the cache (much like in the previous listings) and compute the future parse. If it +is not, however, this means we have detected a recursion, and we use the setupLR function +to update each parser involved in the recursion. + */ + + private def lrAnswer[T](p: Parser[T], in: PackratReader[Elem], growable: LR): ParseResult[T] = growable match { + //growable will always be having a head, we can't enter lrAnswer otherwise + case LR(seed ,rule, Some(head)) => + if(head.getHead != p) /*not head rule, so not growing*/ seed.asInstanceOf[ParseResult[T]] + else { + in.updateCacheAndGet(p, MemoEntry(Right[LR, ParseResult[T]](seed.asInstanceOf[ParseResult[T]]))) + seed match { + case f@Failure(_,_) => f + case e@Error(_,_) => e + case s@Success(_,_) => /*growing*/ grow(p, in, head) + } + } + case _=> throw new Exception("lrAnswer with no head !!") + } + + //p here should be strict (cannot be non-strict) !! + //failing left-recursive grammars: This is done by simply storing a failure if nothing is found + + /** + * Explicitly convert a given parser to a memoizing packrat parser. + * In most cases, client code should avoid calling `memo` directly + * and rely on implicit conversion instead. + */ + def memo[T](p: super.Parser[T]): PackratParser[T] = { + new PackratParser[T] { + def apply(in: Input) = { + /* + * transformed reader + */ + val inMem = in.asInstanceOf[PackratReader[Elem]] + + //look in the global cache if in a recursion + val m = recall(p, inMem) + m match { + //nothing has been done due to recall + case None => + val base = LR(Failure("Base Failure",in), p, None) + inMem.lrStack = base::inMem.lrStack + //cache base result + inMem.updateCacheAndGet(p,MemoEntry(Left(base))) + //parse the input + val tempRes = p(in) + //the base variable has passed equality tests with the cache + inMem.lrStack = inMem.lrStack.tail + //check whether base has changed, if yes, we will have a head + base.head match { + case None => + /*simple result*/ + inMem.updateCacheAndGet(p,MemoEntry(Right(tempRes))) + tempRes + case s@Some(_) => + /*non simple result*/ + base.seed = tempRes + //the base variable has passed equality tests with the cache + val res = lrAnswer(p, inMem, base) + res + } + + case Some(mEntry) => { + //entry found in cache + mEntry match { + case MemoEntry(Left(recDetect)) => { + setupLR(p, inMem, recDetect) + //all setupLR does is change the heads of the recursions, so the seed will stay the same + recDetect match {case LR(seed, _, _) => seed.asInstanceOf[ParseResult[T]]} + } + case MemoEntry(Right(res: ParseResult[_])) => res.asInstanceOf[ParseResult[T]] + } + } + } + } + } + } + + private def grow[T](p: super.Parser[T], rest: PackratReader[Elem], head: Head): ParseResult[T] = { + //store the head into the recursionHeads + rest.recursionHeads.put(rest.pos, head /*match {case Head(hp,involved,_) => Head(hp,involved,involved)}*/) + val oldRes: ParseResult[T] = rest.getFromCache(p).get match { + case MemoEntry(Right(x)) => x.asInstanceOf[ParseResult[T]] + case _ => throw new Exception("impossible match") + } + + //resetting the evalSet of the head of the recursion at each beginning of growth + head.evalSet = head.involvedSet + val tempRes = p(rest); tempRes match { + case s@Success(_,_) => + if(getPosFromResult(oldRes) < getPosFromResult(tempRes)) { + rest.updateCacheAndGet(p, MemoEntry(Right(s))) + grow(p, rest, head) + } else { + //we're done with growing, we can remove data from recursion head + rest.recursionHeads -= rest.pos + rest.getFromCache(p).get match { + case MemoEntry(Right(x: ParseResult[_])) => x.asInstanceOf[ParseResult[T]] + case _ => throw new Exception("impossible match") + } + } + case f => + rest.recursionHeads -= rest.pos + /*rest.updateCacheAndGet(p, MemoEntry(Right(f)));*/oldRes + } + } +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/Parsers.scala b/src/parser-combinators/scala/util/parsing/combinator/Parsers.scala new file mode 100644 index 0000000000..16754646fd --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/Parsers.scala @@ -0,0 +1,919 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.combinator + +import scala.util.parsing.input._ +import scala.collection.mutable.ListBuffer +import scala.annotation.tailrec +import scala.annotation.migration +import scala.language.implicitConversions +import scala.util.DynamicVariable + +// TODO: better error handling (labelling like parsec's ) + +/** `Parsers` is a component that ''provides'' generic parser combinators. + * + * There are two abstract members that must be defined in order to + * produce parsers: the type `Elem` and + * [[scala.util.parsing.combinator.Parsers.Parser]]. There are helper + * methods that produce concrete `Parser` implementations -- see ''primitive + * parser'' below. + * + * A `Parsers` may define multiple `Parser` instances, which are combined + * to produced the desired parser. + * + * The type of the elements these parsers should parse must be defined + * by declaring `Elem` + * (each parser is polymorphic in the type of result it produces). + * + * There are two aspects to the result of a parser: + * 1. success or failure + * 1. the result. + * + * A [[scala.util.parsing.combinator.Parsers.Parser]] produces both kinds of information, + * by returning a [[scala.util.parsing.combinator.Parsers.ParseResult]] when its `apply` + * method is called on an input. + * + * The term ''parser combinator'' refers to the fact that these parsers + * are constructed from primitive parsers and composition operators, such + * as sequencing, alternation, optionality, repetition, lifting, and so on. For example, + * given `p1` and `p2` of type [[scala.util.parsing.combinator.Parsers.Parser]]: + * + * {{{ + * p1 ~ p2 // sequencing: must match p1 followed by p2 + * p1 | p2 // alternation: must match either p1 or p2, with preference given to p1 + * p1.? // optionality: may match p1 or not + * p1.* // repetition: matches any number of repetitions of p1 + * }}} + * + * These combinators are provided as methods on [[scala.util.parsing.combinator.Parsers.Parser]], + * or as methods taking one or more `Parsers` and returning a `Parser` provided in + * this class. + * + * A ''primitive parser'' is a parser that accepts or rejects a single + * piece of input, based on a certain criterion, such as whether the + * input... + * - is equal to some given object (see method `accept`), + * - satisfies a certain predicate (see method `acceptIf`), + * - is in the domain of a given partial function (see method `acceptMatch`) + * - or other conditions, by using one of the other methods available, or subclassing `Parser` + * + * Even more primitive parsers always produce the same result, irrespective of the input. See + * methods `success`, `err` and `failure` as examples. + * + * @see [[scala.util.parsing.combinator.RegexParsers]] and other known subclasses for practical examples. + * + * @author Martin Odersky + * @author Iulian Dragos + * @author Adriaan Moors + */ +trait Parsers { + /** the type of input elements the provided parsers consume (When consuming + * invidual characters, a parser is typically called a ''scanner'', which + * produces ''tokens'' that are consumed by what is normally called a ''parser''. + * Nonetheless, the same principles apply, regardless of the input type.) */ + type Elem + + /** The parser input is an abstract reader of input elements, i.e. the type + * of input the parsers in this component expect. */ + type Input = Reader[Elem] + + /** A base class for parser results. A result is either successful or not + * (failure may be fatal, i.e., an Error, or not, i.e., a Failure). On + * success, provides a result of type `T` which consists of some result + * (and the rest of the input). */ + sealed abstract class ParseResult[+T] { + /** Functional composition of ParseResults. + * + * @param f the function to be lifted over this result + * @return `f` applied to the result of this `ParseResult`, packaged up as a new `ParseResult` + */ + def map[U](f: T => U): ParseResult[U] + + /** Partial functional composition of ParseResults. + * + * @param f the partial function to be lifted over this result + * @param error a function that takes the same argument as `f` and + * produces an error message to explain why `f` wasn't applicable + * (it is called when this is the case) + * @return if `f` f is defined at the result in this `ParseResult`, `f` + * applied to the result of this `ParseResult`, packaged up as + * a new `ParseResult`. If `f` is not defined, `Failure`. + */ + def mapPartial[U](f: PartialFunction[T, U], error: T => String): ParseResult[U] + + def flatMapWithNext[U](f: T => Input => ParseResult[U]): ParseResult[U] + + def filterWithError(p: T => Boolean, error: T => String, position: Input): ParseResult[T] + + def append[U >: T](a: => ParseResult[U]): ParseResult[U] + + def isEmpty = !successful + + /** Returns the embedded result. */ + def get: T + + def getOrElse[B >: T](default: => B): B = + if (isEmpty) default else this.get + + val next: Input + + val successful: Boolean + } + + /** The success case of `ParseResult`: contains the result and the remaining input. + * + * @param result The parser's output + * @param next The parser's remaining input + */ + case class Success[+T](result: T, override val next: Input) extends ParseResult[T] { + def map[U](f: T => U) = Success(f(result), next) + def mapPartial[U](f: PartialFunction[T, U], error: T => String): ParseResult[U] + = if(f.isDefinedAt(result)) Success(f(result), next) + else Failure(error(result), next) + + def flatMapWithNext[U](f: T => Input => ParseResult[U]): ParseResult[U] + = f(result)(next) + + def filterWithError(p: T => Boolean, error: T => String, position: Input): ParseResult[T] = + if (p(result)) this + else Failure(error(result), position) + + def append[U >: T](a: => ParseResult[U]): ParseResult[U] = this + + def get: T = result + + /** The toString method of a Success. */ + override def toString = "["+next.pos+"] parsed: "+result + + val successful = true + } + + private lazy val lastNoSuccessVar = new DynamicVariable[Option[NoSuccess]](None) + + /** A common super-class for unsuccessful parse results. */ + sealed abstract class NoSuccess(val msg: String, override val next: Input) extends ParseResult[Nothing] { // when we don't care about the difference between Failure and Error + val successful = false + + if (lastNoSuccessVar.value forall (v => !(next.pos < v.next.pos))) + lastNoSuccessVar.value = Some(this) + + def map[U](f: Nothing => U) = this + def mapPartial[U](f: PartialFunction[Nothing, U], error: Nothing => String): ParseResult[U] = this + + def flatMapWithNext[U](f: Nothing => Input => ParseResult[U]): ParseResult[U] + = this + + def filterWithError(p: Nothing => Boolean, error: Nothing => String, position: Input): ParseResult[Nothing] = this + + def get: Nothing = scala.sys.error("No result when parsing failed") + } + /** An extractor so `NoSuccess(msg, next)` can be used in matches. */ + object NoSuccess { + def unapply[T](x: ParseResult[T]) = x match { + case Failure(msg, next) => Some((msg, next)) + case Error(msg, next) => Some((msg, next)) + case _ => None + } + } + + /** The failure case of `ParseResult`: contains an error-message and the remaining input. + * Parsing will back-track when a failure occurs. + * + * @param msg An error message string describing the failure. + * @param next The parser's unconsumed input at the point where the failure occurred. + */ + case class Failure(override val msg: String, override val next: Input) extends NoSuccess(msg, next) { + /** The toString method of a Failure yields an error message. */ + override def toString = "["+next.pos+"] failure: "+msg+"\n\n"+next.pos.longString + + def append[U >: Nothing](a: => ParseResult[U]): ParseResult[U] = { val alt = a; alt match { + case Success(_, _) => alt + case ns: NoSuccess => if (alt.next.pos < next.pos) this else alt + }} + } + + /** The fatal failure case of ParseResult: contains an error-message and + * the remaining input. + * No back-tracking is done when a parser returns an `Error`. + * + * @param msg An error message string describing the error. + * @param next The parser's unconsumed input at the point where the error occurred. + */ + case class Error(override val msg: String, override val next: Input) extends NoSuccess(msg, next) { + /** The toString method of an Error yields an error message. */ + override def toString = "["+next.pos+"] error: "+msg+"\n\n"+next.pos.longString + def append[U >: Nothing](a: => ParseResult[U]): ParseResult[U] = this + } + + def Parser[T](f: Input => ParseResult[T]): Parser[T] + = new Parser[T]{ def apply(in: Input) = f(in) } + + def OnceParser[T](f: Input => ParseResult[T]): Parser[T] with OnceParser[T] + = new Parser[T] with OnceParser[T] { def apply(in: Input) = f(in) } + + /** The root class of parsers. + * Parsers are functions from the Input type to ParseResult. + */ + abstract class Parser[+T] extends (Input => ParseResult[T]) { + private var name: String = "" + def named(n: String): this.type = {name=n; this} + override def toString() = "Parser ("+ name +")" + + /** An unspecified method that defines the behaviour of this parser. */ + def apply(in: Input): ParseResult[T] + + def flatMap[U](f: T => Parser[U]): Parser[U] + = Parser{ in => this(in) flatMapWithNext(f)} + + def map[U](f: T => U): Parser[U] //= flatMap{x => success(f(x))} + = Parser{ in => this(in) map(f)} + + def filter(p: T => Boolean): Parser[T] + = withFilter(p) + + def withFilter(p: T => Boolean): Parser[T] + = Parser{ in => this(in) filterWithError(p, "Input doesn't match filter: "+_, in)} + + // no filter yet, dealing with zero is tricky! + + @migration("The call-by-name argument is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def append[U >: T](p0: => Parser[U]): Parser[U] = { lazy val p = p0 // lazy argument + Parser{ in => this(in) append p(in)} + } + + // the operator formerly known as +++, ++, &, but now, behold the venerable ~ + // it's short, light (looks like whitespace), has few overloaded meaning (thanks to the recent change from ~ to unary_~) + // and we love it! (or do we like `,` better?) + + /** A parser combinator for sequential composition. + * + * `p ~ q` succeeds if `p` succeeds and `q` succeeds on the input left over by `p`. + * + * @param q a parser that will be executed after `p` (this parser) + * succeeds -- evaluated at most once, and only when necessary. + * @return a `Parser` that -- on success -- returns a `~` (like a `Pair`, + * but easier to pattern match on) that contains the result of `p` and + * that of `q`. The resulting parser fails if either `p` or `q` fails. + */ + @migration("The call-by-name argument is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def ~ [U](q: => Parser[U]): Parser[~[T, U]] = { lazy val p = q // lazy argument + (for(a <- this; b <- p) yield new ~(a,b)).named("~") + } + + /** A parser combinator for sequential composition which keeps only the right result. + * + * `p ~> q` succeeds if `p` succeeds and `q` succeeds on the input left over by `p`. + * + * @param q a parser that will be executed after `p` (this parser) + * succeeds -- evaluated at most once, and only when necessary. + * @return a `Parser` that -- on success -- returns the result of `q`. + */ + @migration("The call-by-name argument is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def ~> [U](q: => Parser[U]): Parser[U] = { lazy val p = q // lazy argument + (for(a <- this; b <- p) yield b).named("~>") + } + + /** A parser combinator for sequential composition which keeps only the left result. + * + * `p <~ q` succeeds if `p` succeeds and `q` succeeds on the input + * left over by `p`. + * + * @note <~ has lower operator precedence than ~ or ~>. + * + * @param q a parser that will be executed after `p` (this parser) succeeds -- evaluated at most once, and only when necessary + * @return a `Parser` that -- on success -- returns the result of `p`. + */ + @migration("The call-by-name argument is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def <~ [U](q: => Parser[U]): Parser[T] = { lazy val p = q // lazy argument + (for(a <- this; b <- p) yield a).named("<~") + } + + /* not really useful: V cannot be inferred because Parser is covariant in first type parameter (V is always trivially Nothing) + def ~~ [U, V](q: => Parser[U])(implicit combine: (T, U) => V): Parser[V] = new Parser[V] { + def apply(in: Input) = seq(Parser.this, q)((x, y) => combine(x,y))(in) + } */ + + /** A parser combinator for non-back-tracking sequential composition. + * + * `p ~! q` succeeds if `p` succeeds and `q` succeeds on the input left over by `p`. + * In case of failure, no back-tracking is performed (in an earlier parser produced by the `|` combinator). + * + * @param p a parser that will be executed after `p` (this parser) succeeds + * @return a `Parser` that -- on success -- returns a `~` (like a Pair, but easier to pattern match on) + * that contains the result of `p` and that of `q`. + * The resulting parser fails if either `p` or `q` fails, this failure is fatal. + */ + def ~! [U](p: => Parser[U]): Parser[~[T, U]] + = OnceParser{ (for(a <- this; b <- commit(p)) yield new ~(a,b)).named("~!") } + + /** A parser combinator for alternative composition. + * + * `p | q` succeeds if `p` succeeds or `q` succeeds. + * Note that `q` is only tried if `p`s failure is non-fatal (i.e., back-tracking is allowed). + * + * @param q a parser that will be executed if `p` (this parser) fails (and allows back-tracking) + * @return a `Parser` that returns the result of the first parser to succeed (out of `p` and `q`) + * The resulting parser succeeds if (and only if) + * - `p` succeeds, ''or'' + * - if `p` fails allowing back-tracking and `q` succeeds. + */ + def | [U >: T](q: => Parser[U]): Parser[U] = append(q).named("|") + + // TODO + /** A parser combinator for alternative with longest match composition. + * + * `p ||| q` succeeds if `p` succeeds or `q` succeeds. + * If `p` and `q` both succeed, the parser that consumed the most characters accepts. + * + * @param q0 a parser that accepts if p consumes less characters. -- evaluated at most once, and only when necessary + * @return a `Parser` that returns the result of the parser consuming the most characters (out of `p` and `q`). + */ + @migration("The call-by-name argument is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def ||| [U >: T](q0: => Parser[U]): Parser[U] = new Parser[U] { + lazy val q = q0 // lazy argument + def apply(in: Input) = { + val res1 = Parser.this(in) + val res2 = q(in) + + (res1, res2) match { + case (s1 @ Success(_, next1), s2 @ Success(_, next2)) => if (next2.pos < next1.pos) s1 else s2 + case (s1 @ Success(_, _), _) => s1 + case (_, s2 @ Success(_, _)) => s2 + case (e1 @ Error(_, _), _) => e1 + case (f1 @ Failure(_, next1), ns2 @ NoSuccess(_, next2)) => if (next2.pos < next1.pos) f1 else ns2 + } + } + override def toString = "|||" + } + + /** A parser combinator for function application. + * + * `p ^^ f` succeeds if `p` succeeds; it returns `f` applied to the result of `p`. + * + * @param f a function that will be applied to this parser's result (see `map` in `ParseResult`). + * @return a parser that has the same behaviour as the current parser, but whose result is + * transformed by `f`. + */ + def ^^ [U](f: T => U): Parser[U] = map(f).named(toString+"^^") + + /** A parser combinator that changes a successful result into the specified value. + * + * `p ^^^ v` succeeds if `p` succeeds; discards its result, and returns `v` instead. + * + * @param v The new result for the parser, evaluated at most once (if `p` succeeds), not evaluated at all if `p` fails. + * @return a parser that has the same behaviour as the current parser, but whose successful result is `v` + */ + @migration("The call-by-name argument is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def ^^^ [U](v: => U): Parser[U] = new Parser[U] { + lazy val v0 = v // lazy argument + def apply(in: Input) = Parser.this(in) map (x => v0) + }.named(toString+"^^^") + + /** A parser combinator for partial function application. + * + * `p ^? (f, error)` succeeds if `p` succeeds AND `f` is defined at the result of `p`; + * in that case, it returns `f` applied to the result of `p`. If `f` is not applicable, + * error(the result of `p`) should explain why. + * + * @param f a partial function that will be applied to this parser's result + * (see `mapPartial` in `ParseResult`). + * @param error a function that takes the same argument as `f` and produces an error message + * to explain why `f` wasn't applicable + * @return a parser that succeeds if the current parser succeeds and `f` is applicable + * to the result. If so, the result will be transformed by `f`. + */ + def ^? [U](f: PartialFunction[T, U], error: T => String): Parser[U] = Parser{ in => + this(in).mapPartial(f, error)}.named(toString+"^?") + + /** A parser combinator for partial function application. + * + * `p ^? f` succeeds if `p` succeeds AND `f` is defined at the result of `p`; + * in that case, it returns `f` applied to the result of `p`. + * + * @param f a partial function that will be applied to this parser's result + * (see `mapPartial` in `ParseResult`). + * @return a parser that succeeds if the current parser succeeds and `f` is applicable + * to the result. If so, the result will be transformed by `f`. + */ + def ^? [U](f: PartialFunction[T, U]): Parser[U] = ^?(f, r => "Constructor function not defined at "+r) + + /** A parser combinator that parameterizes a subsequent parser with the + * result of this one. + * + * Use this combinator when a parser depends on the result of a previous + * parser. `p` should be a function that takes the result from the first + * parser and returns the second parser. + * + * `p into fq` (with `fq` typically `{x => q}`) first applies `p`, and + * then, if `p` successfully returned result `r`, applies `fq(r)` to the + * rest of the input. + * + * ''From: G. Hutton. Higher-order functions for parsing. J. Funct. Program., 2(3):323--343, 1992.'' + * + * @example {{{ + * def perlRE = "m" ~> (".".r into (separator => """[^%s]*""".format(separator).r <~ separator)) + * }}} + * + * @param fq a function that, given the result from this parser, returns + * the second parser to be applied + * @return a parser that succeeds if this parser succeeds (with result `x`) + * and if then `fq(x)` succeeds + */ + def into[U](fq: T => Parser[U]): Parser[U] = flatMap(fq) + + // shortcuts for combinators: + + /** Returns `into(fq)`. */ + def >>[U](fq: T => Parser[U])=into(fq) + + /** Returns a parser that repeatedly parses what this parser parses. + * + * @return rep(this) + */ + def * = rep(this) + + /** Returns a parser that repeatedly parses what this parser parses, + * interleaved with the `sep` parser. The `sep` parser specifies how + * the results parsed by this parser should be combined. + * + * @return chainl1(this, sep) + */ + def *[U >: T](sep: => Parser[(U, U) => U]) = chainl1(this, sep) + + // TODO: improve precedence? a ~ b*(",") = a ~ (b*(",")) should be true + + /** Returns a parser that repeatedly (at least once) parses what this parser parses. + * + * @return rep1(this) + */ + def + = rep1(this) + + /** Returns a parser that optionally parses what this parser parses. + * + * @return opt(this) + */ + def ? = opt(this) + + /** Changes the failure message produced by a parser. + * + * This doesn't change the behavior of a parser on neither + * success nor error, just on failure. The semantics are + * slightly different than those obtained by doing `| failure(msg)`, + * in that the message produced by this method will always + * replace the message produced, which is not guaranteed + * by that idiom. + * + * For example, parser `p` below will always produce the + * designated failure message, while `q` will not produce + * it if `sign` is parsed but `number` is not. + * + * {{{ + * def p = sign.? ~ number withFailureMessage "Number expected!" + * def q = sign.? ~ number | failure("Number expected!") + * }}} + * + * @param msg The message that will replace the default failure message. + * @return A parser with the same properties and different failure message. + */ + def withFailureMessage(msg: String) = Parser{ in => + this(in) match { + case Failure(_, next) => Failure(msg, next) + case other => other + } + } + + /** Changes the error message produced by a parser. + * + * This doesn't change the behavior of a parser on neither + * success nor failure, just on error. The semantics are + * slightly different than those obtained by doing `| error(msg)`, + * in that the message produced by this method will always + * replace the message produced, which is not guaranteed + * by that idiom. + * + * For example, parser `p` below will always produce the + * designated error message, while `q` will not produce + * it if `sign` is parsed but `number` is not. + * + * {{{ + * def p = sign.? ~ number withErrorMessage "Number expected!" + * def q = sign.? ~ number | error("Number expected!") + * }}} + * + * @param msg The message that will replace the default error message. + * @return A parser with the same properties and different error message. + */ + def withErrorMessage(msg: String) = Parser{ in => + this(in) match { + case Error(_, next) => Error(msg, next) + case other => other + } + } + } + + /** Wrap a parser so that its failures become errors (the `|` combinator + * will give up as soon as it encounters an error, on failure it simply + * tries the next alternative). + */ + def commit[T](p: => Parser[T]) = Parser{ in => + p(in) match{ + case s @ Success(_, _) => s + case e @ Error(_, _) => e + case f @ Failure(msg, next) => Error(msg, next) + } + } + + /** A parser matching input elements that satisfy a given predicate. + * + * `elem(kind, p)` succeeds if the input starts with an element `e` for which `p(e)` is true. + * + * @param kind The element kind, used for error messages + * @param p A predicate that determines which elements match. + * @return + */ + def elem(kind: String, p: Elem => Boolean) = acceptIf(p)(inEl => kind+" expected") + + /** A parser that matches only the given element `e`. + * + * `elem(e)` succeeds if the input starts with an element `e`. + * + * @param e the `Elem` that must be the next piece of input for the returned parser to succeed + * @return a `Parser` that succeeds if `e` is the next available input (and returns it). + */ + def elem(e: Elem): Parser[Elem] = accept(e) + + /** A parser that matches only the given element `e`. + * + * The method is implicit so that elements can automatically be lifted to their parsers. + * For example, when parsing `Token`s, `Identifier("new")` (which is a `Token`) can be used directly, + * instead of first creating a `Parser` using `accept(Identifier("new"))`. + * + * @param e the `Elem` that must be the next piece of input for the returned parser to succeed + * @return a `tParser` that succeeds if `e` is the next available input. + */ + + implicit def accept(e: Elem): Parser[Elem] = acceptIf(_ == e)("`"+e+"' expected but " + _ + " found") + + /** A parser that matches only the given list of element `es`. + * + * `accept(es)` succeeds if the input subsequently provides the elements in the list `es`. + * + * @param es the list of expected elements + * @return a Parser that recognizes a specified list of elements + */ + def accept[ES <% List[Elem]](es: ES): Parser[List[Elem]] = acceptSeq(es) + + /** The parser that matches an element in the domain of the partial function `f`. + * + * If `f` is defined on the first element in the input, `f` is applied + * to it to produce this parser's result. + * + * Example: The parser `accept("name", {case Identifier(n) => Name(n)})` + * accepts an `Identifier(n)` and returns a `Name(n)` + * + * @param expected a description of the kind of element this parser expects (for error messages) + * @param f a partial function that determines when this parser is successful and what its output is + * @return A parser that succeeds if `f` is applicable to the first element of the input, + * applying `f` to it to produce the result. + */ + def accept[U](expected: String, f: PartialFunction[Elem, U]): Parser[U] = acceptMatch(expected, f) + + /** A parser matching input elements that satisfy a given predicate. + * + * `acceptIf(p)(el => "Unexpected "+el)` succeeds if the input starts with an element `e` for which `p(e)` is true. + * + * @param err A function from the received element into an error message. + * @param p A predicate that determines which elements match. + * @return A parser for elements satisfying p(e). + */ + def acceptIf(p: Elem => Boolean)(err: Elem => String): Parser[Elem] = Parser { in => + if (in.atEnd) Failure("end of input", in) + else if (p(in.first)) Success(in.first, in.rest) + else Failure(err(in.first), in) + } + + /** The parser that matches an element in the domain of the partial function `f`. + * + * If `f` is defined on the first element in the input, `f` is applied + * to it to produce this parser's result. + * + * Example: The parser `acceptMatch("name", {case Identifier(n) => Name(n)})` + * accepts an `Identifier(n)` and returns a `Name(n)` + * + * @param expected a description of the kind of element this parser expects (for error messages) + * @param f a partial function that determines when this parser is successful and what its output is + * @return A parser that succeeds if `f` is applicable to the first element of the input, + * applying `f` to it to produce the result. + */ + def acceptMatch[U](expected: String, f: PartialFunction[Elem, U]): Parser[U] = Parser{ in => + if (in.atEnd) Failure("end of input", in) + else if (f.isDefinedAt(in.first)) Success(f(in.first), in.rest) + else Failure(expected+" expected", in) + } + + /** A parser that matches only the given [[scala.collection.Iterable]] collection of elements `es`. + * + * `acceptSeq(es)` succeeds if the input subsequently provides the elements in the iterable `es`. + * + * @param es the list of expected elements + * @return a Parser that recognizes a specified list of elements + */ + def acceptSeq[ES <% Iterable[Elem]](es: ES): Parser[List[Elem]] = + es.foldRight[Parser[List[Elem]]](success(Nil)){(x, pxs) => accept(x) ~ pxs ^^ mkList} + + /** A parser that always fails. + * + * @param msg The error message describing the failure. + * @return A parser that always fails with the specified error message. + */ + def failure(msg: String) = Parser{ in => Failure(msg, in) } + + /** A parser that results in an error. + * + * @param msg The error message describing the failure. + * @return A parser that always fails with the specified error message. + */ + def err(msg: String) = Parser{ in => Error(msg, in) } + + /** A parser that always succeeds. + * + * @param v The result for the parser + * @return A parser that always succeeds, with the given result `v` + */ + def success[T](v: T) = Parser{ in => Success(v, in) } + + /** A helper method that turns a `Parser` into one that will + * print debugging information to stdout before and after + * being applied. + */ + def log[T](p: => Parser[T])(name: String): Parser[T] = Parser{ in => + println("trying "+ name +" at "+ in) + val r = p(in) + println(name +" --> "+ r) + r + } + + /** A parser generator for repetitions. + * + * `rep(p)` repeatedly uses `p` to parse the input until `p` fails + * (the result is a List of the consecutive results of `p`). + * + * @param p a `Parser` that is to be applied successively to the input + * @return A parser that returns a list of results produced by repeatedly applying `p` to the input. + */ + def rep[T](p: => Parser[T]): Parser[List[T]] = rep1(p) | success(List()) + + /** A parser generator for interleaved repetitions. + * + * `repsep(p, q)` repeatedly uses `p` interleaved with `q` to parse the input, until `p` fails. + * (The result is a `List` of the results of `p`.) + * + * Example: `repsep(term, ",")` parses a comma-separated list of term's, yielding a list of these terms. + * + * @param p a `Parser` that is to be applied successively to the input + * @param q a `Parser` that parses the elements that separate the elements parsed by `p` + * @return A parser that returns a list of results produced by repeatedly applying `p` (interleaved with `q`) to the input. + * The results of `p` are collected in a list. The results of `q` are discarded. + */ + def repsep[T](p: => Parser[T], q: => Parser[Any]): Parser[List[T]] = + rep1sep(p, q) | success(List()) + + /** A parser generator for non-empty repetitions. + * + * `rep1(p)` repeatedly uses `p` to parse the input until `p` fails -- `p` must succeed at least + * once (the result is a `List` of the consecutive results of `p`) + * + * @param p a `Parser` that is to be applied successively to the input + * @return A parser that returns a list of results produced by repeatedly applying `p` to the input + * (and that only succeeds if `p` matches at least once). + */ + def rep1[T](p: => Parser[T]): Parser[List[T]] = rep1(p, p) + + /** A parser generator for non-empty repetitions. + * + * `rep1(f, p)` first uses `f` (which must succeed) and then repeatedly + * uses `p` to parse the input until `p` fails + * (the result is a `List` of the consecutive results of `f` and `p`) + * + * @param first a `Parser` that parses the first piece of input + * @param p0 a `Parser` that is to be applied successively to the rest of the input (if any) -- evaluated at most once, and only when necessary + * @return A parser that returns a list of results produced by first applying `f` and then + * repeatedly `p` to the input (it only succeeds if `f` matches). + */ + @migration("The `p0` call-by-name arguments is evaluated at most once per constructed Parser object, instead of on every need that arises during parsing.", "2.9.0") + def rep1[T](first: => Parser[T], p0: => Parser[T]): Parser[List[T]] = Parser { in => + lazy val p = p0 // lazy argument + val elems = new ListBuffer[T] + + def continue(in: Input): ParseResult[List[T]] = { + val p0 = p // avoid repeatedly re-evaluating by-name parser + @tailrec def applyp(in0: Input): ParseResult[List[T]] = p0(in0) match { + case Success(x, rest) => elems += x ; applyp(rest) + case e @ Error(_, _) => e // still have to propagate error + case _ => Success(elems.toList, in0) + } + + applyp(in) + } + + first(in) match { + case Success(x, rest) => elems += x ; continue(rest) + case ns: NoSuccess => ns + } + } + + /** A parser generator for a specified number of repetitions. + * + * `repN(n, p)` uses `p` exactly `n` time to parse the input + * (the result is a `List` of the `n` consecutive results of `p`). + * + * @param p a `Parser` that is to be applied successively to the input + * @param num the exact number of times `p` must succeed + * @return A parser that returns a list of results produced by repeatedly applying `p` to the input + * (and that only succeeds if `p` matches exactly `n` times). + */ + def repN[T](num: Int, p: => Parser[T]): Parser[List[T]] = + if (num == 0) success(Nil) else Parser { in => + val elems = new ListBuffer[T] + val p0 = p // avoid repeatedly re-evaluating by-name parser + + @tailrec def applyp(in0: Input): ParseResult[List[T]] = + if (elems.length == num) Success(elems.toList, in0) + else p0(in0) match { + case Success(x, rest) => elems += x ; applyp(rest) + case ns: NoSuccess => ns + } + + applyp(in) + } + + /** A parser generator for non-empty repetitions. + * + * `rep1sep(p, q)` repeatedly applies `p` interleaved with `q` to parse the + * input, until `p` fails. The parser `p` must succeed at least once. + * + * @param p a `Parser` that is to be applied successively to the input + * @param q a `Parser` that parses the elements that separate the elements parsed by `p` + * (interleaved with `q`) + * @return A parser that returns a list of results produced by repeatedly applying `p` to the input + * (and that only succeeds if `p` matches at least once). + * The results of `p` are collected in a list. The results of `q` are discarded. + */ + def rep1sep[T](p : => Parser[T], q : => Parser[Any]): Parser[List[T]] = + p ~ rep(q ~> p) ^^ {case x~y => x::y} + + /** A parser generator that, roughly, generalises the rep1sep generator so + * that `q`, which parses the separator, produces a left-associative + * function that combines the elements it separates. + * + * ''From: J. Fokker. Functional parsers. In J. Jeuring and E. Meijer, editors, Advanced Functional Programming, + * volume 925 of Lecture Notes in Computer Science, pages 1--23. Springer, 1995.'' + * + * @param p a parser that parses the elements + * @param q a parser that parses the token(s) separating the elements, yielding a left-associative function that + * combines two elements into one + */ + def chainl1[T](p: => Parser[T], q: => Parser[(T, T) => T]): Parser[T] + = chainl1(p, p, q) + + /** A parser generator that, roughly, generalises the `rep1sep` generator + * so that `q`, which parses the separator, produces a left-associative + * function that combines the elements it separates. + * + * @param first a parser that parses the first element + * @param p a parser that parses the subsequent elements + * @param q a parser that parses the token(s) separating the elements, + * yielding a left-associative function that combines two elements + * into one + */ + def chainl1[T, U](first: => Parser[T], p: => Parser[U], q: => Parser[(T, U) => T]): Parser[T] + = first ~ rep(q ~ p) ^^ { + case x ~ xs => xs.foldLeft(x: T){case (a, f ~ b) => f(a, b)} // x's type annotation is needed to deal with changed type inference due to SI-5189 + } + + /** A parser generator that generalises the `rep1sep` generator so that `q`, + * which parses the separator, produces a right-associative function that + * combines the elements it separates. Additionally, the right-most (last) + * element and the left-most combining function have to be supplied. + * + * rep1sep(p: Parser[T], q) corresponds to chainr1(p, q ^^ cons, cons, Nil) (where val cons = (x: T, y: List[T]) => x :: y) + * + * @param p a parser that parses the elements + * @param q a parser that parses the token(s) separating the elements, yielding a right-associative function that + * combines two elements into one + * @param combine the "last" (left-most) combination function to be applied + * @param first the "first" (right-most) element to be combined + */ + def chainr1[T, U](p: => Parser[T], q: => Parser[(T, U) => U], combine: (T, U) => U, first: U): Parser[U] + = p ~ rep(q ~ p) ^^ { + case x ~ xs => (new ~(combine, x) :: xs).foldRight(first){case (f ~ a, b) => f(a, b)} + } + + /** A parser generator for optional sub-phrases. + * + * `opt(p)` is a parser that returns `Some(x)` if `p` returns `x` and `None` if `p` fails. + * + * @param p A `Parser` that is tried on the input + * @return a `Parser` that always succeeds: either with the result provided by `p` or + * with the empty result + */ + def opt[T](p: => Parser[T]): Parser[Option[T]] = + p ^^ (x => Some(x)) | success(None) + + /** Wrap a parser so that its failures and errors become success and + * vice versa -- it never consumes any input. + */ + def not[T](p: => Parser[T]): Parser[Unit] = Parser { in => + p(in) match { + case Success(_, _) => Failure("Expected failure", in) + case _ => Success((), in) + } + } + + /** A parser generator for guard expressions. The resulting parser will + * fail or succeed just like the one given as parameter but it will not + * consume any input. + * + * @param p a `Parser` that is to be applied to the input + * @return A parser that returns success if and only if `p` succeeds but + * never consumes any input + */ + def guard[T](p: => Parser[T]): Parser[T] = Parser { in => + p(in) match{ + case s@ Success(s1,_) => Success(s1, in) + case e => e + } + } + + /** `positioned` decorates a parser's result with the start position of the + * input it consumed. + * + * @param p a `Parser` whose result conforms to `Positional`. + * @return A parser that has the same behaviour as `p`, but which marks its + * result with the start position of the input it consumed, + * if it didn't already have a position. + */ + def positioned[T <: Positional](p: => Parser[T]): Parser[T] = Parser { in => + p(in) match { + case Success(t, in1) => Success(if (t.pos == NoPosition) t setPos in.pos else t, in1) + case ns: NoSuccess => ns + } + } + + /** A parser generator delimiting whole phrases (i.e. programs). + * + * `phrase(p)` succeeds if `p` succeeds and no input is left over after `p`. + * + * @param p the parser that must consume all input for the resulting parser + * to succeed. + * @return a parser that has the same result as `p`, but that only succeeds + * if `p` consumed all the input. + */ + def phrase[T](p: Parser[T]) = new Parser[T] { + def apply(in: Input) = lastNoSuccessVar.withValue(None) { + p(in) match { + case s @ Success(out, in1) => + if (in1.atEnd) + s + else + lastNoSuccessVar.value filterNot { _.next.pos < in1.pos } getOrElse Failure("end of input expected", in1) + case ns => lastNoSuccessVar.value.getOrElse(ns) + } + } + } + + /** Given a concatenation with a repetition (list), move the concatenated element into the list */ + def mkList[T] = (_: ~[T, List[T]]) match { case x ~ xs => x :: xs } + + /** A wrapper over sequence of matches. + * + * Given `p1: Parser[A]` and `p2: Parser[B]`, a parser composed with + * `p1 ~ p2` will have type `Parser[~[A, B]]`. The successful result + * of the parser can be extracted from this case class. + * + * It also enables pattern matching, so something like this is possible: + * + * {{{ + * def concat(p1: Parser[String], p2: Parser[String]): Parser[String] = + * p1 ~ p2 ^^ { case a ~ b => a + b } + * }}} + */ + case class ~[+a, +b](_1: a, _2: b) { + override def toString = "("+ _1 +"~"+ _2 +")" + } + + /** A parser whose `~` combinator disallows back-tracking. + */ + trait OnceParser[+T] extends Parser[T] { + override def ~ [U](p: => Parser[U]): Parser[~[T, U]] + = OnceParser{ (for(a <- this; b <- commit(p)) yield new ~(a,b)).named("~") } + } +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/RegexParsers.scala b/src/parser-combinators/scala/util/parsing/combinator/RegexParsers.scala new file mode 100644 index 0000000000..8ebbc573ad --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/RegexParsers.scala @@ -0,0 +1,166 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.combinator + +import java.util.regex.Pattern +import scala.util.matching.Regex +import scala.util.parsing.input._ +import scala.collection.immutable.PagedSeq +import scala.language.implicitConversions + +/** The ''most important'' differences between `RegexParsers` and + * [[scala.util.parsing.combinator.Parsers]] are: + * + * - `Elem` is defined to be [[scala.Char]] + * - There's an implicit conversion from [[java.lang.String]] to `Parser[String]`, + * so that string literals can be used as parser combinators. + * - There's an implicit conversion from [[scala.util.matching.Regex]] to `Parser[String]`, + * so that regex expressions can be used as parser combinators. + * - The parsing methods call the method `skipWhitespace` (defaults to `true`) and, if true, + * skip any whitespace before each parser is called. + * - Protected val `whiteSpace` returns a regex that identifies whitespace. + * + * For example, this creates a very simple calculator receiving `String` input: + * + * {{{ + * object Calculator extends RegexParsers { + * def number: Parser[Double] = """\d+(\.\d*)?""".r ^^ { _.toDouble } + * def factor: Parser[Double] = number | "(" ~> expr <~ ")" + * def term : Parser[Double] = factor ~ rep( "*" ~ factor | "/" ~ factor) ^^ { + * case number ~ list => (number /: list) { + * case (x, "*" ~ y) => x * y + * case (x, "/" ~ y) => x / y + * } + * } + * def expr : Parser[Double] = term ~ rep("+" ~ log(term)("Plus term") | "-" ~ log(term)("Minus term")) ^^ { + * case number ~ list => list.foldLeft(number) { // same as before, using alternate name for /: + * case (x, "+" ~ y) => x + y + * case (x, "-" ~ y) => x - y + * } + * } + * + * def apply(input: String): Double = parseAll(expr, input) match { + * case Success(result, _) => result + * case failure : NoSuccess => scala.sys.error(failure.msg) + * } + * } + * }}} + */ +trait RegexParsers extends Parsers { + + type Elem = Char + + protected val whiteSpace = """\s+""".r + + def skipWhitespace = whiteSpace.toString.length > 0 + + /** Method called to handle whitespace before parsers. + * + * It checks `skipWhitespace` and, if true, skips anything + * matching `whiteSpace` starting from the current offset. + * + * @param source The input being parsed. + * @param offset The offset into `source` from which to match. + * @return The offset to be used for the next parser. + */ + protected def handleWhiteSpace(source: java.lang.CharSequence, offset: Int): Int = + if (skipWhitespace) + (whiteSpace findPrefixMatchOf (source.subSequence(offset, source.length))) match { + case Some(matched) => offset + matched.end + case None => offset + } + else + offset + + /** A parser that matches a literal string */ + implicit def literal(s: String): Parser[String] = new Parser[String] { + def apply(in: Input) = { + val source = in.source + val offset = in.offset + val start = handleWhiteSpace(source, offset) + var i = 0 + var j = start + while (i < s.length && j < source.length && s.charAt(i) == source.charAt(j)) { + i += 1 + j += 1 + } + if (i == s.length) + Success(source.subSequence(start, j).toString, in.drop(j - offset)) + else { + val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'" + Failure("`"+s+"' expected but "+found+" found", in.drop(start - offset)) + } + } + } + + /** A parser that matches a regex string */ + implicit def regex(r: Regex): Parser[String] = new Parser[String] { + def apply(in: Input) = { + val source = in.source + val offset = in.offset + val start = handleWhiteSpace(source, offset) + (r findPrefixMatchOf (source.subSequence(start, source.length))) match { + case Some(matched) => + Success(source.subSequence(start, start + matched.end).toString, + in.drop(start + matched.end - offset)) + case None => + val found = if (start == source.length()) "end of source" else "`"+source.charAt(start)+"'" + Failure("string matching regex `"+r+"' expected but "+found+" found", in.drop(start - offset)) + } + } + } + + /** `positioned` decorates a parser's result with the start position of the input it consumed. + * If whitespace is being skipped, then it is skipped before the start position is recorded. + * + * @param p a `Parser` whose result conforms to `Positional`. + * @return A parser that has the same behaviour as `p`, but which marks its result with the + * start position of the input it consumed after whitespace has been skipped, if it + * didn't already have a position. + */ + override def positioned[T <: Positional](p: => Parser[T]): Parser[T] = { + val pp = super.positioned(p) + new Parser[T] { + def apply(in: Input) = { + val offset = in.offset + val start = handleWhiteSpace(in.source, offset) + pp(in.drop (start - offset)) + } + } + } + + override def phrase[T](p: Parser[T]): Parser[T] = + super.phrase(p <~ opt("""\z""".r)) + + /** Parse some prefix of reader `in` with parser `p`. */ + def parse[T](p: Parser[T], in: Reader[Char]): ParseResult[T] = + p(in) + + /** Parse some prefix of character sequence `in` with parser `p`. */ + def parse[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] = + p(new CharSequenceReader(in)) + + /** Parse some prefix of reader `in` with parser `p`. */ + def parse[T](p: Parser[T], in: java.io.Reader): ParseResult[T] = + p(new PagedSeqReader(PagedSeq.fromReader(in))) + + /** Parse all of reader `in` with parser `p`. */ + def parseAll[T](p: Parser[T], in: Reader[Char]): ParseResult[T] = + parse(phrase(p), in) + + /** Parse all of reader `in` with parser `p`. */ + def parseAll[T](p: Parser[T], in: java.io.Reader): ParseResult[T] = + parse(phrase(p), in) + + /** Parse all of character sequence `in` with parser `p`. */ + def parseAll[T](p: Parser[T], in: java.lang.CharSequence): ParseResult[T] = + parse(phrase(p), in) +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/lexical/Lexical.scala b/src/parser-combinators/scala/util/parsing/combinator/lexical/Lexical.scala new file mode 100644 index 0000000000..d8029d068f --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/lexical/Lexical.scala @@ -0,0 +1,40 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package util.parsing +package combinator +package lexical + +import token._ +import input.CharArrayReader.EofCh + +/** This component complements the `Scanners` component with + * common operations for lexical parsers. + * + * Refer to [[scala.util.parsing.combinator.lexical.StdLexical]] + * for a concrete implementation for a simple, Scala-like language. + * + * @author Martin Odersky, Adriaan Moors + */ +abstract class Lexical extends Scanners with Tokens { + + /** A character-parser that matches a letter (and returns it).*/ + def letter = elem("letter", _.isLetter) + + /** A character-parser that matches a digit (and returns it).*/ + def digit = elem("digit", _.isDigit) + + /** A character-parser that matches any character except the ones given in `cs` (and returns it).*/ + def chrExcept(cs: Char*) = elem("", ch => (cs forall (ch != _))) + + /** A character-parser that matches a white-space character (and returns it).*/ + def whitespaceChar = elem("space char", ch => ch <= ' ' && ch != EofCh) +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/lexical/Scanners.scala b/src/parser-combinators/scala/util/parsing/combinator/lexical/Scanners.scala new file mode 100644 index 0000000000..2e12915bb8 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/lexical/Scanners.scala @@ -0,0 +1,63 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing +package combinator +package lexical + +import input._ + +/** This component provides core functionality for lexical parsers. + * + * See its subclasses [[scala.util.parsing.combinator.lexical.Lexical]] and -- most interestingly + * [[scala.util.parsing.combinator.lexical.StdLexical]], for more functionality. + * + * @author Martin Odersky, Adriaan Moors + */ +trait Scanners extends Parsers { + type Elem = Char + type Token + + /** This token is produced by a scanner `Scanner` when scanning failed. */ + def errorToken(msg: String): Token + + /** A parser that produces a token (from a stream of characters). */ + def token: Parser[Token] + + /** A parser for white-space -- its result will be discarded. */ + def whitespace: Parser[Any] + + /** `Scanner` is essentially¹ a parser that produces `Token`s + * from a stream of characters. The tokens it produces are typically + * passed to parsers in `TokenParsers`. + * + * @note ¹ `Scanner` is really a `Reader` of `Token`s + */ + class Scanner(in: Reader[Char]) extends Reader[Token] { + /** Convenience constructor (makes a character reader out of the given string) */ + def this(in: String) = this(new CharArrayReader(in.toCharArray())) + private val (tok, rest1, rest2) = whitespace(in) match { + case Success(_, in1) => + token(in1) match { + case Success(tok, in2) => (tok, in1, in2) + case ns: NoSuccess => (errorToken(ns.msg), ns.next, skip(ns.next)) + } + case ns: NoSuccess => (errorToken(ns.msg), ns.next, skip(ns.next)) + } + private def skip(in: Reader[Char]) = if (in.atEnd) in else in.rest + + override def source: java.lang.CharSequence = in.source + override def offset: Int = in.offset + def first = tok + def rest = new Scanner(rest2) + def pos = rest1.pos + def atEnd = in.atEnd || (whitespace(in) match { case Success(_, in1) => in1.atEnd case _ => false }) + } +} + diff --git a/src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala b/src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala new file mode 100644 index 0000000000..32d7502cda --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/lexical/StdLexical.scala @@ -0,0 +1,87 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing +package combinator +package lexical + +import token._ +import input.CharArrayReader.EofCh +import scala.collection.mutable + +/** This component provides a standard lexical parser for a simple, + * [[http://scala-lang.org Scala]]-like language. It parses keywords and + * identifiers, numeric literals (integers), strings, and delimiters. + * + * To distinguish between identifiers and keywords, it uses a set of + * reserved identifiers: every string contained in `reserved` is returned + * as a keyword token. (Note that `=>` is hard-coded as a keyword.) + * Additionally, the kinds of delimiters can be specified by the + * `delimiters` set. + * + * Usually this component is used to break character-based input into + * bigger tokens, which are then passed to a token-parser (see + * [[scala.util.parsing.combinator.syntactical.TokenParsers]].) + * + * @author Martin Odersky + * @author Iulian Dragos + * @author Adriaan Moors + */ +class StdLexical extends Lexical with StdTokens { + // see `token` in `Scanners` + def token: Parser[Token] = + ( identChar ~ rep( identChar | digit ) ^^ { case first ~ rest => processIdent(first :: rest mkString "") } + | digit ~ rep( digit ) ^^ { case first ~ rest => NumericLit(first :: rest mkString "") } + | '\'' ~ rep( chrExcept('\'', '\n', EofCh) ) ~ '\'' ^^ { case '\'' ~ chars ~ '\'' => StringLit(chars mkString "") } + | '\"' ~ rep( chrExcept('\"', '\n', EofCh) ) ~ '\"' ^^ { case '\"' ~ chars ~ '\"' => StringLit(chars mkString "") } + | EofCh ^^^ EOF + | '\'' ~> failure("unclosed string literal") + | '\"' ~> failure("unclosed string literal") + | delim + | failure("illegal character") + ) + + /** Returns the legal identifier chars, except digits. */ + def identChar = letter | elem('_') + + // see `whitespace in `Scanners` + def whitespace: Parser[Any] = rep[Any]( + whitespaceChar + | '/' ~ '*' ~ comment + | '/' ~ '/' ~ rep( chrExcept(EofCh, '\n') ) + | '/' ~ '*' ~ failure("unclosed comment") + ) + + protected def comment: Parser[Any] = ( + '*' ~ '/' ^^ { case _ => ' ' } + | chrExcept(EofCh) ~ comment + ) + + /** The set of reserved identifiers: these will be returned as `Keyword`s. */ + val reserved = new mutable.HashSet[String] + + /** The set of delimiters (ordering does not matter). */ + val delimiters = new mutable.HashSet[String] + + protected def processIdent(name: String) = + if (reserved contains name) Keyword(name) else Identifier(name) + + private lazy val _delim: Parser[Token] = { + // construct parser for delimiters by |'ing together the parsers for the individual delimiters, + // starting with the longest one -- otherwise a delimiter D will never be matched if there is + // another delimiter that is a prefix of D + def parseDelim(s: String): Parser[Token] = accept(s.toList) ^^ { x => Keyword(s) } + + val d = new Array[String](delimiters.size) + delimiters.copyToArray(d, 0) + scala.util.Sorting.quickSort(d) + (d.toList map parseDelim).foldRight(failure("no matching delimiter"): Parser[Token])((x, y) => y | x) + } + protected def delim: Parser[Token] = _delim +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala b/src/parser-combinators/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala new file mode 100644 index 0000000000..5b9d14c9a7 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/syntactical/StandardTokenParsers.scala @@ -0,0 +1,32 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing +package combinator +package syntactical + +import token._ +import lexical.StdLexical +import scala.language.implicitConversions + +/** This component provides primitive parsers for the standard tokens defined in `StdTokens`. +* +* @author Martin Odersky, Adriaan Moors + */ +class StandardTokenParsers extends StdTokenParsers { + type Tokens = StdTokens + val lexical = new StdLexical + + //an implicit keyword function that gives a warning when a given word is not in the reserved/delimiters list + override implicit def keyword(chars : String): Parser[String] = + if(lexical.reserved.contains(chars) || lexical.delimiters.contains(chars)) super.keyword(chars) + else failure("You are trying to parse \""+chars+"\", but it is neither contained in the delimiters list, nor in the reserved keyword list of your lexical object") + +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala b/src/parser-combinators/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala new file mode 100644 index 0000000000..adcf85da7a --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/syntactical/StdTokenParsers.scala @@ -0,0 +1,52 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package util.parsing +package combinator +package syntactical + +import token._ +import scala.collection.mutable +import scala.language.implicitConversions + +/** This component provides primitive parsers for the standard tokens defined in `StdTokens`. +* +* @author Martin Odersky, Adriaan Moors + */ +trait StdTokenParsers extends TokenParsers { + type Tokens <: StdTokens + import lexical.{Keyword, NumericLit, StringLit, Identifier} + + protected val keywordCache = mutable.HashMap[String, Parser[String]]() + + /** A parser which matches a single keyword token. + * + * @param chars The character string making up the matched keyword. + * @return a `Parser` that matches the given string + */ +// implicit def keyword(chars: String): Parser[String] = accept(Keyword(chars)) ^^ (_.chars) + implicit def keyword(chars: String): Parser[String] = + keywordCache.getOrElseUpdate(chars, accept(Keyword(chars)) ^^ (_.chars)) + + /** A parser which matches a numeric literal */ + def numericLit: Parser[String] = + elem("number", _.isInstanceOf[NumericLit]) ^^ (_.chars) + + /** A parser which matches a string literal */ + def stringLit: Parser[String] = + elem("string literal", _.isInstanceOf[StringLit]) ^^ (_.chars) + + /** A parser which matches an identifier */ + def ident: Parser[String] = + elem("identifier", _.isInstanceOf[Identifier]) ^^ (_.chars) +} + + diff --git a/src/parser-combinators/scala/util/parsing/combinator/syntactical/TokenParsers.scala b/src/parser-combinators/scala/util/parsing/combinator/syntactical/TokenParsers.scala new file mode 100644 index 0000000000..b06babcd7e --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/syntactical/TokenParsers.scala @@ -0,0 +1,35 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing +package combinator +package syntactical + +/** This is the core component for token-based parsers. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +trait TokenParsers extends Parsers { + /** `Tokens` is the abstract type of the `Token`s consumed by the parsers in this component. */ + type Tokens <: token.Tokens + + /** `lexical` is the component responsible for consuming some basic kind of + * input (usually character-based) and turning it into the tokens + * understood by these parsers. + */ + val lexical: Tokens + + /** The input-type for these parsers*/ + type Elem = lexical.Token + +} + + diff --git a/src/parser-combinators/scala/util/parsing/combinator/token/StdTokens.scala b/src/parser-combinators/scala/util/parsing/combinator/token/StdTokens.scala new file mode 100644 index 0000000000..a102d1541e --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/token/StdTokens.scala @@ -0,0 +1,39 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing +package combinator +package token + +/** This component provides the standard `Token`s for a simple, Scala-like language. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +trait StdTokens extends Tokens { + /** The class of keyword tokens */ + case class Keyword(chars: String) extends Token { + override def toString = "`"+chars+"'" + } + + /** The class of numeric literal tokens */ + case class NumericLit(chars: String) extends Token { + override def toString = chars + } + + /** The class of string literal tokens */ + case class StringLit(chars: String) extends Token { + override def toString = "\""+chars+"\"" + } + + /** The class of identifier tokens */ + case class Identifier(chars: String) extends Token { + override def toString = "identifier "+chars + } +} diff --git a/src/parser-combinators/scala/util/parsing/combinator/token/Tokens.scala b/src/parser-combinators/scala/util/parsing/combinator/token/Tokens.scala new file mode 100644 index 0000000000..5c3f1f95b5 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/combinator/token/Tokens.scala @@ -0,0 +1,43 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing +package combinator +package token + +/** This component provides the notion of `Token`, the unit of information that is passed from lexical + * parsers in the `Lexical` component to the parsers in the `TokenParsers` component. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +trait Tokens { + /** Objects of this type are produced by a lexical parser or ``scanner'', and consumed by a parser. + * + * @see [[scala.util.parsing.combinator.syntactical.TokenParsers]] + */ + abstract class Token { + def chars: String + } + + /** A class of error tokens. Error tokens are used to communicate + * errors detected during lexical analysis + */ + case class ErrorToken(msg: String) extends Token { + def chars = "*** error: "+msg + } + + /** A class for end-of-file tokens */ + case object EOF extends Token { + def chars = "" + } + + /** This token is produced by a scanner `Scanner` when scanning failed. */ + def errorToken(msg: String): Token = new ErrorToken(msg) +} diff --git a/src/parser-combinators/scala/util/parsing/input/CharArrayReader.scala b/src/parser-combinators/scala/util/parsing/input/CharArrayReader.scala new file mode 100644 index 0000000000..22530cb9aa --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/CharArrayReader.scala @@ -0,0 +1,35 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.input + +/** An object encapsulating basic character constants. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +object CharArrayReader { + final val EofCh = '\032' +} + +/** A character array reader reads a stream of characters (keeping track of their positions) + * from an array. + * + * @param chars an array of characters + * @param index starting offset into the array; the first element returned will be `source(index)` + * + * @author Martin Odersky + * @author Adriaan Moors + */ +class CharArrayReader(chars: Array[Char], index: Int) extends CharSequenceReader(chars, index) { + + def this(chars: Array[Char]) = this(chars, 0) + +} diff --git a/src/parser-combinators/scala/util/parsing/input/CharSequenceReader.scala b/src/parser-combinators/scala/util/parsing/input/CharSequenceReader.scala new file mode 100644 index 0000000000..8e7751cc82 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/CharSequenceReader.scala @@ -0,0 +1,66 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.input + +/** An object encapsulating basic character constants. + * + * @author Martin Odersky, Adriaan Moors + */ +object CharSequenceReader { + final val EofCh = '\032' +} + +/** A character array reader reads a stream of characters (keeping track of their positions) + * from an array. + * + * @param source the source sequence + * @param offset starting offset. + * + * @author Martin Odersky + */ +class CharSequenceReader(override val source: java.lang.CharSequence, + override val offset: Int) extends Reader[Char] { + import CharSequenceReader._ + + /** Construct a `CharSequenceReader` with its first element at + * `source(0)` and position `(1,1)`. + */ + def this(source: java.lang.CharSequence) = this(source, 0) + + /** Returns the first element of the reader, or EofCh if reader is at its end. + */ + def first = + if (offset < source.length) source.charAt(offset) else EofCh + + /** Returns a CharSequenceReader consisting of all elements except the first. + * + * @return If `atEnd` is `true`, the result will be `this`; + * otherwise, it's a `CharSequenceReader` containing the rest of input. + */ + def rest: CharSequenceReader = + if (offset < source.length) new CharSequenceReader(source, offset + 1) + else this + + /** The position of the first element in the reader. + */ + def pos: Position = new OffsetPosition(source, offset) + + /** true iff there are no more elements in this reader (except for trailing + * EofCh's) + */ + def atEnd = offset >= source.length + + /** Returns an abstract reader consisting of all elements except the first + * `n` elements. + */ + override def drop(n: Int): CharSequenceReader = + new CharSequenceReader(source, offset + n) +} diff --git a/src/parser-combinators/scala/util/parsing/input/NoPosition.scala b/src/parser-combinators/scala/util/parsing/input/NoPosition.scala new file mode 100644 index 0000000000..4a32264b79 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/NoPosition.scala @@ -0,0 +1,25 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package util.parsing.input + +/** Undefined position. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +object NoPosition extends Position { + def line = 0 + def column = 0 + override def toString = "" + override def longString = toString + def lineContents = "" +} diff --git a/src/parser-combinators/scala/util/parsing/input/OffsetPosition.scala b/src/parser-combinators/scala/util/parsing/input/OffsetPosition.scala new file mode 100644 index 0000000000..23f79c74d1 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/OffsetPosition.scala @@ -0,0 +1,73 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.input + +import scala.collection.mutable.ArrayBuffer + +/** `OffsetPosition` is a standard class for positions + * represented as offsets into a source ``document''. + * + * @param source The source document + * @param offset The offset indicating the position + * + * @author Martin Odersky + */ +case class OffsetPosition(source: java.lang.CharSequence, offset: Int) extends Position { + + /** An index that contains all line starts, including first line, and eof. */ + private lazy val index: Array[Int] = { + val lineStarts = new ArrayBuffer[Int] + lineStarts += 0 + for (i <- 0 until source.length) + if (source.charAt(i) == '\n') lineStarts += (i + 1) + lineStarts += source.length + lineStarts.toArray + } + + /** The line number referred to by the position; line numbers start at 1. */ + def line: Int = { + var lo = 0 + var hi = index.length - 1 + while (lo + 1 < hi) { + val mid = (hi + lo) / 2 + if (offset < index(mid)) hi = mid + else lo = mid + } + lo + 1 + } + + /** The column number referred to by the position; column numbers start at 1. */ + def column: Int = offset - index(line - 1) + 1 + + /** The contents of the line numbered at the current offset. + * + * @return the line at `offset` (not including a newline) + */ + def lineContents: String = + source.subSequence(index(line - 1), index(line)).toString + + /** Returns a string representation of the `Position`, of the form `line.column`. */ + override def toString = line+"."+column + + /** Compare this position to another, by first comparing their line numbers, + * and then -- if necessary -- using the columns to break a tie. + * + * @param that a `Position` to compare to this `Position` + * @return true if this position's line number or (in case of equal line numbers) + * column is smaller than the corresponding components of `that` + */ + override def <(that: Position) = that match { + case OffsetPosition(_, that_offset) => + this.offset < that_offset + case _ => + this.line < that.line || + this.line == that.line && this.column < that.column + } +} diff --git a/src/parser-combinators/scala/util/parsing/input/PagedSeqReader.scala b/src/parser-combinators/scala/util/parsing/input/PagedSeqReader.scala new file mode 100644 index 0000000000..468f1f9a5f --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/PagedSeqReader.scala @@ -0,0 +1,71 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + +package scala +package util.parsing.input + +import scala.collection.immutable.PagedSeq + +/** An object encapsulating basic character constants. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +object PagedSeqReader { + final val EofCh = '\032' +} + +/** A character array reader reads a stream of characters (keeping track of their positions) + * from an array. + * + * @param seq the source sequence + * @param offset starting offset. + * + * @author Martin Odersky + */ +class PagedSeqReader(seq: PagedSeq[Char], + override val offset: Int) extends Reader[Char] { + import PagedSeqReader._ + + override lazy val source: java.lang.CharSequence = seq + + /** Construct a `PagedSeqReader` with its first element at + * `source(0)` and position `(1,1)`. + */ + def this(seq: PagedSeq[Char]) = this(seq, 0) + + /** Returns the first element of the reader, or EofCh if reader is at its end + */ + def first = + if (seq.isDefinedAt(offset)) seq(offset) else EofCh + + /** Returns a PagedSeqReader consisting of all elements except the first + * + * @return If `atEnd` is `true`, the result will be `this`; + * otherwise, it's a `PagedSeqReader` containing the rest of input. + */ + def rest: PagedSeqReader = + if (seq.isDefinedAt(offset)) new PagedSeqReader(seq, offset + 1) + else this + + /** The position of the first element in the reader. + */ + def pos: Position = new OffsetPosition(source, offset) + + /** true iff there are no more elements in this reader (except for trailing + * EofCh's). + */ + def atEnd = !seq.isDefinedAt(offset) + + /** Returns an abstract reader consisting of all elements except the first + * `n` elements. + */ + override def drop(n: Int): PagedSeqReader = + new PagedSeqReader(seq, offset + n) +} diff --git a/src/parser-combinators/scala/util/parsing/input/Position.scala b/src/parser-combinators/scala/util/parsing/input/Position.scala new file mode 100644 index 0000000000..b7995a6471 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/Position.scala @@ -0,0 +1,62 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.input + +/** `Position` is the base trait for objects describing a position in a `document`. + * + * It provides functionality for: + * - generating a visual representation of this position (`longString`); + * - comparing two positions (`<`). + * + * To use this class for a concrete kind of `document`, implement the `lineContents` method. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +trait Position { + + /** The line number referred to by the position; line numbers start at 1. */ + def line: Int + + /** The column number referred to by the position; column numbers start at 1. */ + def column: Int + + /** The contents of the line at this position. (must not contain a new-line character). + */ + protected def lineContents: String + + /** Returns a string representation of the `Position`, of the form `line.column`. */ + override def toString = ""+line+"."+column + + /** Returns a more ``visual'' representation of this position. + * More precisely, the resulting string consists of two lines: + * 1. the line in the document referred to by this position + * 2. a caret indicating the column + * + * Example: + * {{{ + * List(this, is, a, line, from, the, document) + * ^ + * }}} + */ + def longString = lineContents+"\n"+lineContents.take(column-1).map{x => if (x == '\t') x else ' ' } + "^" + + /** Compare this position to another, by first comparing their line numbers, + * and then -- if necessary -- using the columns to break a tie. + * + * @param `that` a `Position` to compare to this `Position` + * @return true if this position's line number or (in case of equal line numbers) + * column is smaller than the corresponding components of `that` + */ + def <(that: Position) = { + this.line < that.line || + this.line == that.line && this.column < that.column + } +} diff --git a/src/parser-combinators/scala/util/parsing/input/Positional.scala b/src/parser-combinators/scala/util/parsing/input/Positional.scala new file mode 100644 index 0000000000..cfde67cadd --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/Positional.scala @@ -0,0 +1,30 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.input + +/** A trait for objects that have a source position. + * + * @author Martin Odersky, Adriaan Moors + */ +trait Positional { + + /** The source position of this object, initially set to undefined. */ + var pos: Position = NoPosition + + /** If current source position is undefined, update it with given position `newpos` + * @return the object itself + */ + def setPos(newpos: Position): this.type = { + if (pos eq NoPosition) pos = newpos + this + } +} + + diff --git a/src/parser-combinators/scala/util/parsing/input/Reader.scala b/src/parser-combinators/scala/util/parsing/input/Reader.scala new file mode 100644 index 0000000000..9dbf08a7ca --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/Reader.scala @@ -0,0 +1,62 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package util.parsing.input + + +/** An interface for streams of values that have positions. + * + * @author Martin Odersky + * @author Adriaan Moors + */ +abstract class Reader[+T] { + + /** If this is a reader over character sequences, the underlying char sequence. + * If not, throws a `NoSuchMethodError` exception. + * + * @throws [[java.lang.NoSuchMethodError]] if this not a char sequence reader. + */ + def source: java.lang.CharSequence = + throw new NoSuchMethodError("not a char sequence reader") + + def offset: Int = + throw new NoSuchMethodError("not a char sequence reader") + + /** Returns the first element of the reader + */ + def first: T + + /** Returns an abstract reader consisting of all elements except the first + * + * @return If `atEnd` is `true`, the result will be `this'; + * otherwise, it's a `Reader` containing more elements. + */ + def rest: Reader[T] + + /** Returns an abstract reader consisting of all elements except the first `n` elements. + */ + def drop(n: Int): Reader[T] = { + var r: Reader[T] = this + var cnt = n + while (cnt > 0) { + r = r.rest; cnt -= 1 + } + r + } + + /** The position of the first element in the reader. + */ + def pos: Position + + /** `true` iff there are no more elements in this reader. + */ + def atEnd: Boolean +} diff --git a/src/parser-combinators/scala/util/parsing/input/StreamReader.scala b/src/parser-combinators/scala/util/parsing/input/StreamReader.scala new file mode 100644 index 0000000000..30eb097fd7 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/input/StreamReader.scala @@ -0,0 +1,76 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.input + +import java.io.BufferedReader +import scala.collection.immutable.PagedSeq + +/** An object to create a `StreamReader` from a `java.io.Reader`. + * + * @author Miles Sabin + */ +object StreamReader { + final val EofCh = '\032' + + /** Create a `StreamReader` from a `java.io.Reader`. + * + * @param in the `java.io.Reader` that provides the underlying + * stream of characters for this Reader. + */ + def apply(in: java.io.Reader): StreamReader = { + new StreamReader(PagedSeq.fromReader(in), 0, 1) + } +} + +/** A StreamReader reads from a character sequence, typically created as a PagedSeq + * from a java.io.Reader + * + * NOTE: + * StreamReaders do not really fulfill the new contract for readers, which + * requires a `source` CharSequence representing the full input. + * Instead source is treated line by line. + * As a consequence, regex matching cannot extend beyond a single line + * when a StreamReader are used for input. + * + * If you need to match regexes spanning several lines you should consider + * class `PagedSeqReader` instead. + * + * @author Miles Sabin + * @author Martin Odersky + */ +sealed class StreamReader(seq: PagedSeq[Char], off: Int, lnum: Int) extends PagedSeqReader(seq, off) { + import StreamReader._ + + override def rest: StreamReader = + if (off == seq.length) this + else if (seq(off) == '\n') + new StreamReader(seq.slice(off + 1), 0, lnum + 1) + else new StreamReader(seq, off + 1, lnum) + + private def nextEol = { + var i = off + while (i < seq.length && seq(i) != '\n' && seq(i) != EofCh) i += 1 + i + } + + override def drop(n: Int): StreamReader = { + val eolPos = nextEol + if (eolPos < off + n && eolPos < seq.length) + new StreamReader(seq.slice(eolPos + 1), 0, lnum + 1).drop(off + n - (eolPos + 1)) + else + new StreamReader(seq, off + n, lnum) + } + + override def pos: Position = new Position { + def line = lnum + def column = off + 1 + def lineContents = seq.slice(0, nextEol).toString + } +} diff --git a/src/parser-combinators/scala/util/parsing/json/JSON.scala b/src/parser-combinators/scala/util/parsing/json/JSON.scala new file mode 100644 index 0000000000..b06dddf532 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/json/JSON.scala @@ -0,0 +1,97 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala +package util.parsing.json + +/** + * This object provides a simple interface to the JSON parser class. + * The default conversion for numerics is into a double. If you wish to + * override this behavior at the global level, you can set the + * `globalNumberParser` property to your own `(String => Any)` function. + * If you only want to override at the per-thread level then you can set + * the `perThreadNumberParser` property to your function. For example: + * {{{ + * val myConversionFunc = {input : String => BigDecimal(input)} + * + * // Global override + * JSON.globalNumberParser = myConversionFunc + * + * // Per-thread override + * JSON.perThreadNumberParser = myConversionFunc + * }}} + * + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This object will be removed.", "2.11.0") +object JSON extends Parser { + + /** + * This method converts ''raw'' results back into the original, deprecated + * form. + */ + private def unRaw (in : Any) : Any = in match { + case JSONObject(obj) => obj.map({ case (k,v) => (k,unRaw(v))}).toList + case JSONArray(list) => list.map(unRaw) + case x => x + } + + /** + * Parse the given `JSON` string and return a list of elements. If the + * string is a `JSON` object it will be a `JSONObject`. If it's a `JSON` + * array it will be a `JSONArray`. + * + * @param input the given `JSON` string. + * @return an optional `JSONType` element. + */ + def parseRaw(input : String) : Option[JSONType] = + phrase(root)(new lexical.Scanner(input)) match { + case Success(result, _) => Some(result) + case _ => None + } + + /** + * Parse the given `JSON` string and return either a `List[Any]` + * if the `JSON` string specifies an `Array`, or a + * `Map[String,Any]` if the `JSON` string specifies an object. + * + * @param input the given `JSON` string. + * @return an optional list or map. + */ + def parseFull(input: String): Option[Any] = + parseRaw(input) match { + case Some(data) => Some(resolveType(data)) + case None => None + } + + /** + * A utility method to resolve a parsed `JSON` list into objects or + * arrays. See the `parse` method for details. + */ + def resolveType(input: Any): Any = input match { + case JSONObject(data) => data.transform { + case (k,v) => resolveType(v) + } + case JSONArray(data) => data.map(resolveType) + case x => x + } + + /** + * The global (VM) default function for converting a string to a numeric value. + */ + def globalNumberParser_=(f: NumericParser) { defaultNumberParser = f } + def globalNumberParser : NumericParser = defaultNumberParser + + /** + * Defines the function used to convert a numeric string literal into a + * numeric format on a per-thread basis. Use `globalNumberParser` for a + * global override. + */ + def perThreadNumberParser_=(f : NumericParser) { numberParser.set(f) } + def perThreadNumberParser : NumericParser = numberParser.get() +} diff --git a/src/parser-combinators/scala/util/parsing/json/Lexer.scala b/src/parser-combinators/scala/util/parsing/json/Lexer.scala new file mode 100644 index 0000000000..7fc4e0bab6 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/json/Lexer.scala @@ -0,0 +1,90 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package util.parsing.json + +import scala.util.parsing.combinator._ +import scala.util.parsing.combinator.lexical._ +import scala.util.parsing.input.CharArrayReader.EofCh + +/** + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This class will be removed.", "2.11.0") +class Lexer extends StdLexical with ImplicitConversions { + + override def token: Parser[Token] = + //( '\"' ~ rep(charSeq | letter) ~ '\"' ^^ lift(StringLit) + ( string ^^ StringLit + | number ~ letter ^^ { case n ~ l => ErrorToken("Invalid number format : " + n + l) } + | '-' ~> whitespace ~ number ~ letter ^^ { case ws ~ num ~ l => ErrorToken("Invalid number format : -" + num + l) } + | '-' ~> whitespace ~ number ^^ { case ws ~ num => NumericLit("-" + num) } + | number ^^ NumericLit + | EofCh ^^^ EOF + | delim + | '\"' ~> failure("Unterminated string") + | rep(letter) ^^ checkKeyword + | failure("Illegal character") + ) + + def checkKeyword(xs : List[Any]) = { + val strRep = xs mkString "" + if (reserved contains strRep) Keyword(strRep) else ErrorToken("Not a keyword: " + strRep) + } + + /** A string is a collection of zero or more Unicode characters, wrapped in + * double quotes, using backslash escapes (cf. http://www.json.org/). + */ + def string = '\"' ~> rep(charSeq | chrExcept('\"', '\n', EofCh)) <~ '\"' ^^ { _ mkString "" } + + override def whitespace = rep(whitespaceChar) + + def number = intPart ~ opt(fracPart) ~ opt(expPart) ^^ { case i ~ f ~ e => + i + optString(".", f) + optString("", e) + } + def intPart = zero | intList + def intList = nonzero ~ rep(digit) ^^ {case x ~ y => (x :: y) mkString ""} + def fracPart = '.' ~> rep(digit) ^^ { _ mkString "" } + def expPart = exponent ~ opt(sign) ~ rep1(digit) ^^ { case e ~ s ~ d => + e + optString("", s) + d.mkString("") + } + + private def optString[A](pre: String, a: Option[A]) = a match { + case Some(x) => pre + x.toString + case None => "" + } + + def zero: Parser[String] = '0' ^^^ "0" + def nonzero = elem("nonzero digit", d => d.isDigit && d != '0') + def exponent = elem("exponent character", d => d == 'e' || d == 'E') + def sign = elem("sign character", d => d == '-' || d == '+') + + def charSeq: Parser[String] = + ('\\' ~ '\"' ^^^ "\"" + |'\\' ~ '\\' ^^^ "\\" + |'\\' ~ '/' ^^^ "/" + |'\\' ~ 'b' ^^^ "\b" + |'\\' ~ 'f' ^^^ "\f" + |'\\' ~ 'n' ^^^ "\n" + |'\\' ~ 'r' ^^^ "\r" + |'\\' ~ 't' ^^^ "\t" + |'\\' ~> 'u' ~> unicodeBlock) + + val hexDigits = Set[Char]() ++ "0123456789abcdefABCDEF".toArray + def hexDigit = elem("hex digit", hexDigits.contains(_)) + + private def unicodeBlock = hexDigit ~ hexDigit ~ hexDigit ~ hexDigit ^^ { + case a ~ b ~ c ~ d => + new String(Array(Integer.parseInt(List(a, b, c, d) mkString "", 16)), 0, 1) + } + + //private def lift[T](f: String => T)(xs: List[Any]): T = f(xs mkString "") +} diff --git a/src/parser-combinators/scala/util/parsing/json/Parser.scala b/src/parser-combinators/scala/util/parsing/json/Parser.scala new file mode 100644 index 0000000000..521dfc6612 --- /dev/null +++ b/src/parser-combinators/scala/util/parsing/json/Parser.scala @@ -0,0 +1,147 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2006-2013, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + + + +package scala +package util.parsing.json + +import scala.util.parsing.combinator._ +import scala.util.parsing.combinator.syntactical._ + +/** + * A marker class for the JSON result types. + * + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This class will be removed.", "2.11.0") +sealed abstract class JSONType { + /** + * This version of toString allows you to provide your own value + * formatter. + */ + def toString (formatter : JSONFormat.ValueFormatter) : String + + /** + * Returns a String representation of this JSON value + * using the JSONFormat.defaultFormatter. + */ + override def toString = toString(JSONFormat.defaultFormatter) +} + +/** + * This object defines functions that are used when converting JSONType + * values into String representations. Mostly this is concerned with + * proper quoting of strings. + * + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This object will be removed.", "2.11.0") +object JSONFormat { + /** + * This type defines a function that can be used to + * format values into JSON format. + */ + type ValueFormatter = Any => String + + /** + * The default formatter used by the library. You can + * provide your own with the toString calls on + * JSONObject and JSONArray instances. + */ + val defaultFormatter : ValueFormatter = (x : Any) => x match { + case s : String => "\"" + quoteString(s) + "\"" + case jo : JSONObject => jo.toString(defaultFormatter) + case ja : JSONArray => ja.toString(defaultFormatter) + case other => other.toString + } + + /** + * This function can be used to properly quote Strings + * for JSON output. + */ + def quoteString (s : String) : String = + s.map { + case '"' => "\\\"" + case '\\' => "\\\\" + case '/' => "\\/" + case '\b' => "\\b" + case '\f' => "\\f" + case '\n' => "\\n" + case '\r' => "\\r" + case '\t' => "\\t" + /* We'll unicode escape any control characters. These include: + * 0x0 -> 0x1f : ASCII Control (C0 Control Codes) + * 0x7f : ASCII DELETE + * 0x80 -> 0x9f : C1 Control Codes + * + * Per RFC4627, section 2.5, we're not technically required to + * encode the C1 codes, but we do to be safe. + */ + case c if ((c >= '\u0000' && c <= '\u001f') || (c >= '\u007f' && c <= '\u009f')) => "\\u%04x".format(c.toInt) + case c => c + }.mkString +} + +/** + * Represents a JSON Object (map). + * + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This class will be removed.", "2.11.0") +case class JSONObject (obj : Map[String,Any]) extends JSONType { + def toString (formatter : JSONFormat.ValueFormatter) = + "{" + obj.map({ case (k,v) => formatter(k.toString) + " : " + formatter(v) }).mkString(", ") + "}" +} + +/** + * Represents a JSON Array (list). + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This class will be removed.", "2.11.0") +case class JSONArray (list : List[Any]) extends JSONType { + def toString (formatter : JSONFormat.ValueFormatter) = + "[" + list.map(formatter).mkString(", ") + "]" +} + +/** + * The main JSON Parser. + * + * @author Derek Chen-Becker <"java"+@+"chen-becker"+"."+"org"> + */ +@deprecated("This class will be removed.", "2.11.0") +class Parser extends StdTokenParsers with ImplicitConversions { + // Fill in abstract defs + type Tokens = Lexer + val lexical = new Tokens + + // Configure lexical parsing + lexical.reserved ++= List("true", "false", "null") + lexical.delimiters ++= List("{", "}", "[", "]", ":", ",") + + /** Type signature for functions that can parse numeric literals */ + type NumericParser = String => Any + + // Global default number parsing function + protected var defaultNumberParser : NumericParser = {_.toDouble} + + // Per-thread default number parsing function + protected val numberParser = new ThreadLocal[NumericParser]() { + override def initialValue() = defaultNumberParser + } + + // Define the grammar + def root = jsonObj | jsonArray + def jsonObj = "{" ~> repsep(objEntry, ",") <~ "}" ^^ { case vals : List[_] => JSONObject(Map(vals : _*)) } + def jsonArray = "[" ~> repsep(value, ",") <~ "]" ^^ { case vals : List[_] => JSONArray(vals) } + def objEntry = stringVal ~ (":" ~> value) ^^ { case x ~ y => (x, y) } + def value: Parser[Any] = (jsonObj | jsonArray | number | "true" ^^^ true | "false" ^^^ false | "null" ^^^ null | stringVal) + def stringVal = accept("string", { case lexical.StringLit(n) => n} ) + def number = accept("number", { case lexical.NumericLit(n) => numberParser.get.apply(n)} ) +} + diff --git a/src/partest/scala/tools/partest/nest/FileManager.scala b/src/partest/scala/tools/partest/nest/FileManager.scala index ee24c0b9c1..7bfa8c6e77 100644 --- a/src/partest/scala/tools/partest/nest/FileManager.scala +++ b/src/partest/scala/tools/partest/nest/FileManager.scala @@ -68,13 +68,14 @@ trait FileManager extends FileUtil { else (SFile(LATEST_LIB).parent.parent / "classes" / what).toAbsolute.path } + def latestParserCBLib = relativeToLibrary("parser-combinators") def latestXmlLib = relativeToLibrary("xml") def latestScaladoc = relativeToLibrary("scaladoc") def latestInteractive = relativeToLibrary("interactive") def latestScalapFile = relativeToLibrary("scalap") def latestPaths = List( LATEST_LIB, LATEST_REFLECT, LATEST_COMP, LATEST_PARTEST, LATEST_ACTORS, - latestXmlLib, latestScalapFile, latestScaladoc, latestInteractive + latestParserCBLib, latestXmlLib, latestScalapFile, latestScaladoc, latestInteractive ) def latestFiles = latestPaths map (p => new java.io.File(p)) def latestUrls = latestFiles map (_.toURI.toURL) diff --git a/test/partest b/test/partest index d72c1026f3..0259cdb791 100755 --- a/test/partest +++ b/test/partest @@ -64,7 +64,7 @@ if [ -z "$EXT_CLASSPATH" ] ; then fi done elif [ -f "$SCALA_HOME/build/pack/lib/scala-partest.jar" ] ; then - for lib in `echo "scala-partest scala-library scala-xml scala-reflect scala-compiler diffutils"`; do + for lib in `echo "scala-partest scala-library scala-parser-combinators scala-xml scala-reflect scala-compiler diffutils"`; do ext="$SCALA_HOME/build/pack/lib/$lib.jar" if [ -z "$EXT_CLASSPATH" ] ; then EXT_CLASSPATH="$ext" -- cgit v1.2.3 From 1b0fa91aa3f4d469505e5a1ff5d9cb1db0bdf662 Mon Sep 17 00:00:00 2001 From: Adriaan Moors Date: Mon, 24 Jun 2013 15:21:08 -0700 Subject: Unfork jline: use vanilla jline 2.11 as a dependency. Notes: - no longer specifying terminal by class name in scripts (using 'unix') - jline doesn't need a separate jansi dependency; it includes its own version according to: http://mvnrepository.com/artifact/jline/jline/2.11 --- build.xml | 39 ++++++++----- lib/jline.jar.desired.sha1 | 1 - src/build/bnd/scala-compiler.bnd | 2 +- src/build/maven/jline-pom.xml | 68 ---------------------- src/build/maven/maven-deploy.xml | 2 +- src/build/maven/scala-compiler-pom.xml | 4 +- src/build/pack.xml | 11 ---- .../scala/tools/ant/templates/tool-unix.tmpl | 2 +- .../nsc/interpreter/ConsoleReaderHelper.scala | 2 +- .../scala/tools/nsc/interpreter/Delimited.scala | 2 +- .../scala/tools/nsc/interpreter/JLineReader.scala | 6 +- .../nsc/interpreter/session/SimpleHistory.scala | 5 ++ .../tools/nsc/interpreter/session/package.scala | 8 +-- 13 files changed, 43 insertions(+), 109 deletions(-) delete mode 100644 lib/jline.jar.desired.sha1 delete mode 100644 src/build/maven/jline-pom.xml (limited to 'src') diff --git a/build.xml b/build.xml index 6906c15a19..1282c3b45b 100755 --- a/build.xml +++ b/build.xml @@ -123,7 +123,6 @@ TODO: - @@ -153,6 +152,8 @@ TODO: + + @@ -238,6 +239,10 @@ TODO: + + + + @@ -529,7 +534,7 @@ TODO: - + @@ -610,7 +615,7 @@ TODO: - + @@ -740,7 +745,7 @@ TODO: - + @@ -1265,7 +1270,10 @@ TODO:

 
-        
+        
+          
+          
+        
         
           
             
@@ -1340,7 +1348,10 @@ TODO:
       
       
     
-    
+    
+      
+      
+    
   
 
    
@@ -1738,13 +1749,6 @@ TODO:
     
   
 
-  
-    
-      
-      
-    
-  
-
   
     
       
@@ -1800,7 +1804,7 @@ TODO:
     
   
 
-  
+  
 
 
     
diff --git a/lib/jline.jar.desired.sha1 b/lib/jline.jar.desired.sha1
deleted file mode 100644
index 1eb994cf1b..0000000000
--- a/lib/jline.jar.desired.sha1
+++ /dev/null
@@ -1 +0,0 @@
-e87ad04fdffb5cd9b7aa9293596d9fdde086eccd ?jline.jar
diff --git a/src/build/bnd/scala-compiler.bnd b/src/build/bnd/scala-compiler.bnd
index c289843447..dc30513db4 100644
--- a/src/build/bnd/scala-compiler.bnd
+++ b/src/build/bnd/scala-compiler.bnd
@@ -3,6 +3,6 @@ Bundle-SymbolicName: org.scala-lang.scala-compiler
 ver: @VERSION@
 Bundle-Version: ${ver}
 Export-Package: *;version=${ver}
-Import-Package: scala.tools.jline.*;resolution:=optional, \
+Import-Package: jline.*;resolution:=optional, \
                 org.apache.tools.ant.*;resolution:=optional, \
                 *
diff --git a/src/build/maven/jline-pom.xml b/src/build/maven/jline-pom.xml
deleted file mode 100644
index 0d6e801551..0000000000
--- a/src/build/maven/jline-pom.xml
+++ /dev/null
@@ -1,68 +0,0 @@
-
-	4.0.0
-	org.scala-lang
-	jline
-	jar
-	@VERSION@
-  jline
-  Like readline, but better
-	http://www.scala-lang.org/
-	2011
-	
-		LAMP/EPFL
-		http://lamp.epfl.ch/
-	
-	
-		
-			BSD-like
-			http://www.scala-lang.org/downloads/license.html
-			
-			repo
-		
-    
-      The BSD License
-      http://www.opensource.org/licenses/bsd-license.php
-      repo
-    
-	
-	
-                scm:git:git://github.com/scala/scala.git
-                https://github.com/scala/scala.git
-	
-	
-                JIRA
-                https://issues.scala-lang.org/
-	
-
-	
-    
-        org.fusesource.jansi
-        jansi
-        1.4
-        
-    
-	
-	
-		
-			scala-tools.org
-			@RELEASE_REPOSITORY@
-		
-		
-			scala-tools.org
-			@SNAPSHOT_REPOSITORY@
-			false
-		
-	
-  
-    
-      lamp
-      EPFL LAMP
-    
-    
-      Typesafe
-      Typesafe, Inc.
-    
-  
-
diff --git a/src/build/maven/maven-deploy.xml b/src/build/maven/maven-deploy.xml
index 7df9a8ec9a..84a12066f5 100644
--- a/src/build/maven/maven-deploy.xml
+++ b/src/build/maven/maven-deploy.xml
@@ -57,6 +57,7 @@
             
             
             
+            
           
         
         
@@ -104,7 +105,6 @@
       
 
       
-        
         
         
         
diff --git a/src/build/maven/scala-compiler-pom.xml b/src/build/maven/scala-compiler-pom.xml
index fedc34a5d5..4b90997da4 100644
--- a/src/build/maven/scala-compiler-pom.xml
+++ b/src/build/maven/scala-compiler-pom.xml
@@ -43,9 +43,9 @@
 			@VERSION@
 		
     
-      org.scala-lang
+      jline
       jline
-      @VERSION@
+      @JLINE_VERSION@
       true
     
 	
diff --git a/src/build/pack.xml b/src/build/pack.xml
index aac121f828..6b6579ce12 100644
--- a/src/build/pack.xml
+++ b/src/build/pack.xml
@@ -151,7 +151,6 @@ MAIN DISTRIBUTION PACKAGING
         
       
     
-    
     
     
     
@@ -183,12 +182,6 @@ MAIN DISTRIBUTION PACKAGING
 
   
     
-    
-      
-    
-
-
     
     
@@ -197,10 +190,6 @@ MAIN DISTRIBUTION PACKAGING
   
 
   
-    
-      
-    
     
       
diff --git a/src/compiler/scala/tools/ant/templates/tool-unix.tmpl b/src/compiler/scala/tools/ant/templates/tool-unix.tmpl
index 84ccaba749..abf9925ad9 100644
--- a/src/compiler/scala/tools/ant/templates/tool-unix.tmpl
+++ b/src/compiler/scala/tools/ant/templates/tool-unix.tmpl
@@ -115,7 +115,7 @@ if [[ -n "$cygwin$mingw" ]]; then
     case "$TERM" in
         rxvt* | xterm*)
             stty -icanon min 1 -echo
-            WINDOWS_OPT="-Djline.terminal=scala.tools.jline.UnixTerminal"
+            WINDOWS_OPT="-Djline.terminal=unix"
         ;;
     esac
 fi
diff --git a/src/repl/scala/tools/nsc/interpreter/ConsoleReaderHelper.scala b/src/repl/scala/tools/nsc/interpreter/ConsoleReaderHelper.scala
index 48af261937..cf03ecb480 100644
--- a/src/repl/scala/tools/nsc/interpreter/ConsoleReaderHelper.scala
+++ b/src/repl/scala/tools/nsc/interpreter/ConsoleReaderHelper.scala
@@ -6,7 +6,7 @@
 package scala.tools.nsc
 package interpreter
 
-import scala.tools.jline.console.{ ConsoleReader, CursorBuffer }
+import jline.console.{ ConsoleReader, CursorBuffer }
 
 trait ConsoleReaderHelper extends ConsoleReader {
   def terminal    = getTerminal()
diff --git a/src/repl/scala/tools/nsc/interpreter/Delimited.scala b/src/repl/scala/tools/nsc/interpreter/Delimited.scala
index e88a044931..b7f06f1d0a 100644
--- a/src/repl/scala/tools/nsc/interpreter/Delimited.scala
+++ b/src/repl/scala/tools/nsc/interpreter/Delimited.scala
@@ -6,7 +6,7 @@
 package scala.tools.nsc
 package interpreter
 
-import scala.tools.jline.console.completer.ArgumentCompleter.{ ArgumentDelimiter, ArgumentList }
+import jline.console.completer.ArgumentCompleter.{ ArgumentDelimiter, ArgumentList }
 
 class JLineDelimiter extends ArgumentDelimiter {
   def toJLine(args: List[String], cursor: Int) = args match {
diff --git a/src/repl/scala/tools/nsc/interpreter/JLineReader.scala b/src/repl/scala/tools/nsc/interpreter/JLineReader.scala
index 5d41f1bbb4..d8a876feb2 100644
--- a/src/repl/scala/tools/nsc/interpreter/JLineReader.scala
+++ b/src/repl/scala/tools/nsc/interpreter/JLineReader.scala
@@ -6,8 +6,8 @@
 package scala.tools.nsc
 package interpreter
 
-import scala.tools.jline.console.ConsoleReader
-import scala.tools.jline.console.completer._
+import jline.console.ConsoleReader
+import jline.console.completer._
 import session._
 import Completion._
 
@@ -42,7 +42,7 @@ class JLineReader(_completion: => Completion) extends InteractiveReader {
     def readOneKey(prompt: String) = {
       this.print(prompt)
       this.flush()
-      this.readVirtualKey()
+      this.readCharacter()
     }
     def eraseLine() = consoleReader.resetPromptLine("", "", 0)
     def redrawLineAndFlush(): Unit = { flush() ; drawLine() ; flush() }
diff --git a/src/repl/scala/tools/nsc/interpreter/session/SimpleHistory.scala b/src/repl/scala/tools/nsc/interpreter/session/SimpleHistory.scala
index 89998e438a..7c49b91296 100644
--- a/src/repl/scala/tools/nsc/interpreter/session/SimpleHistory.scala
+++ b/src/repl/scala/tools/nsc/interpreter/session/SimpleHistory.scala
@@ -46,6 +46,11 @@ class SimpleHistory extends JLineHistory {
   def entries(): JListIterator[JEntry]         = toEntries().asJava.listIterator()
   def iterator: JIterator[JEntry]              = toEntries().iterator.asJava
 
+  def remove(idx: Int): CharSequence        = buf remove idx
+  def removeFirst(): CharSequence           = buf remove 0
+  def removeLast(): CharSequence            = buf remove lastIndex
+  def set(idx: Int, to: CharSequence): Unit = buf(idx) = to
+
   def current()         = if (index >= 0 && index < buf.size) buf(index) else fail("current()")
   def previous()        = (index > 0) && minusOne
   def next()            = (index <= lastIndex) && plusOne
diff --git a/src/repl/scala/tools/nsc/interpreter/session/package.scala b/src/repl/scala/tools/nsc/interpreter/session/package.scala
index c62cf21151..a3d7312c98 100644
--- a/src/repl/scala/tools/nsc/interpreter/session/package.scala
+++ b/src/repl/scala/tools/nsc/interpreter/session/package.scala
@@ -14,10 +14,10 @@ package object session {
   type JIterator[T]       = java.util.Iterator[T]
   type JListIterator[T]   = java.util.ListIterator[T]
 
-  type JEntry             = scala.tools.jline.console.history.History.Entry
-  type JHistory           = scala.tools.jline.console.history.History
-  type JMemoryHistory     = scala.tools.jline.console.history.MemoryHistory
-  type JPersistentHistory = scala.tools.jline.console.history.PersistentHistory
+  type JEntry             = jline.console.history.History.Entry
+  type JHistory           = jline.console.history.History
+  type JMemoryHistory     = jline.console.history.MemoryHistory
+  type JPersistentHistory = jline.console.history.PersistentHistory
 
   private[interpreter] implicit def charSequenceFix(x: CharSequence): String = x.toString
 }
-- 
cgit v1.2.3


From a0a60e709fa8041a482f2389104b8c17443c5526 Mon Sep 17 00:00:00 2001
From: Adriaan Moors 
Date: Thu, 4 Jul 2013 19:40:59 -0700
Subject: Add meta-information for dbuild.

The next version of [dbuild](http://typesafehub.github.io/distributed-build/0.5.3/index.html)
will parse `dbuild-meta.json` to determine which jars are produced by a Scala build.

This way we can modularize without changing dbuild itself.

Yes, I know `dbuild-meta.json` should be generated during the build.
However, given the state of our build.xml, I think this is pointless.

My goal is to generate build.xml, dbuild-meta.json and Eclipse projects
from a higher-level description of our build. Baby steps...

Including improvements by @cunei:
  - Removing outdated field "uri" from ExtractedBuildMeta
  - Changed "partest" to "scala-partest" (the actual jar name)
---
 dbuild-meta.json                     | 250 +++++++++++++++++++++++++++++++++++
 src/build/dbuild-meta-json-gen.scala |  63 +++++++++
 2 files changed, 313 insertions(+)
 create mode 100644 dbuild-meta.json
 create mode 100644 src/build/dbuild-meta-json-gen.scala

(limited to 'src')

diff --git a/dbuild-meta.json b/dbuild-meta.json
new file mode 100644
index 0000000000..705eeeb6b6
--- /dev/null
+++ b/dbuild-meta.json
@@ -0,0 +1,250 @@
+{
+    "version": "2.11.0",
+    "subproj": [],
+    "projects": [
+        {
+            "artifacts": [
+                {
+                    "extension": "jar",
+                    "name": "scala-library",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "dependencies": [],
+            "name": "scala-library",
+            "organization": "org.scala-lang"
+        },
+        {
+            "artifacts": [
+                {
+                    "extension": "jar",
+                    "name": "scala-reflect",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "dependencies": [
+                {
+                    "extension": "jar",
+                    "name": "scala-library",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "name": "scala-reflect",
+            "organization": "org.scala-lang"
+        },
+        {
+            "artifacts": [
+                {
+                    "extension": "jar",
+                    "name": "scala-compiler",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "dependencies": [
+                {
+                    "extension": "jar",
+                    "name": "scala-reflect",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "name": "scala-compiler",
+            "organization": "org.scala-lang"
+        },
+        {
+            "artifacts": [
+                {
+                    "extension": "jar",
+                    "name": "scala-swing",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "dependencies": [
+                {
+                    "extension": "jar",
+                    "name": "scala-library",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "name": "scala-swing",
+            "organization": "org.scala-lang"
+        },
+        {
+            "artifacts": [
+                {
+                    "extension": "jar",
+                    "name": "scala-actors",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "dependencies": [
+                {
+                    "extension": "jar",
+                    "name": "scala-library",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "name": "scala-actors",
+            "organization": "org.scala-lang"
+        },
+        {
+            "artifacts": [
+                {
+                    "extension": "jar",
+                    "name": "scala-xml",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "dependencies": [
+                {
+                    "extension": "jar",
+                    "name": "scala-library",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "name": "scala-xml",
+            "organization": "org.scala-lang"
+        },
+        {
+            "artifacts": [
+                {
+                    "extension": "jar",
+                    "name": "scala-parser-combinators",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "dependencies": [
+                {
+                    "extension": "jar",
+                    "name": "scala-library",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "name": "scala-parser-combinators",
+            "organization": "org.scala-lang"
+        },
+        {
+            "artifacts": [
+                {
+                    "extension": "jar",
+                    "name": "scalacheck",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "dependencies": [
+                {
+                    "extension": "jar",
+                    "name": "scala-library",
+                    "organization": "org.scala-lang"
+                },
+                {
+                    "extension": "jar",
+                    "name": "scala-actors",
+                    "organization": "org.scala-lang"
+                },
+                {
+                    "extension": "jar",
+                    "name": "scala-parser-combinators",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "name": "scalacheck",
+            "organization": "org.scala-lang"
+        },
+        {
+            "artifacts": [
+                {
+                    "extension": "jar",
+                    "name": "scala-partest",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "dependencies": [
+                {
+                    "extension": "jar",
+                    "name": "scala-compiler",
+                    "organization": "org.scala-lang"
+                },
+                {
+                    "extension": "jar",
+                    "name": "scalap",
+                    "organization": "org.scala-lang"
+                },
+                {
+                    "extension": "jar",
+                    "name": "scala-xml",
+                    "organization": "org.scala-lang"
+                },
+                {
+                    "extension": "jar",
+                    "name": "scalacheck",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "name": "scala-partest",
+            "organization": "org.scala-lang"
+        },
+        {
+            "artifacts": [
+                {
+                    "extension": "jar",
+                    "name": "scaladoc",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "dependencies": [
+                {
+                    "extension": "jar",
+                    "name": "scala-compiler",
+                    "organization": "org.scala-lang"
+                },
+                {
+                    "extension": "jar",
+                    "name": "scala-partest",
+                    "organization": "org.scala-lang"
+                },
+                {
+                    "extension": "jar",
+                    "name": "scala-xml",
+                    "organization": "org.scala-lang"
+                },
+                {
+                    "extension": "jar",
+                    "name": "scala-parser-combinators",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "name": "scaladoc",
+            "organization": "org.scala-lang"
+        },
+        {
+            "artifacts": [
+                {
+                    "extension": "jar",
+                    "name": "scalap",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "dependencies": [
+                {
+                    "extension": "jar",
+                    "name": "scala-compiler",
+                    "organization": "org.scala-lang"
+                }
+            ],
+            "name": "scalap",
+            "organization": "org.scala-lang"
+        },
+        {
+            "artifacts": [
+                {
+                    "extension": "jar",
+                    "name": "continuations",
+                    "organization": "org.scala-lang.plugins"
+                }
+            ],
+            "dependencies": [],
+            "name": "continuations",
+            "organization": "org.scala-lang.plugins"
+        }
+    ]
+}
diff --git a/src/build/dbuild-meta-json-gen.scala b/src/build/dbuild-meta-json-gen.scala
new file mode 100644
index 0000000000..42214dd191
--- /dev/null
+++ b/src/build/dbuild-meta-json-gen.scala
@@ -0,0 +1,63 @@
+// use this script to generate dbuild-meta.json
+// make sure the version is specified correctly,
+// update the dependency structura and
+// check out distributed-build and run `sbt console`:
+// TODO: also generate build.xml and eclipse config from a similar data-structure
+
+import distributed.project.model._
+
+val meta =
+  ExtractedBuildMeta("2.11.0", Seq(
+    Project("scala-library", "org.scala-lang",
+      Seq(ProjectRef("scala-library", "org.scala-lang")),
+      Seq.empty), // TODO: forkjoin
+    Project("scala-reflect", "org.scala-lang",
+      Seq(ProjectRef("scala-reflect", "org.scala-lang")),
+      Seq(ProjectRef("scala-library", "org.scala-lang"))),
+    Project("scala-compiler", "org.scala-lang",
+      Seq(ProjectRef("scala-compiler", "org.scala-lang")),
+      Seq(ProjectRef("scala-reflect", "org.scala-lang"))), // asm
+
+    // Project("scala-repl", "org.scala-lang",
+    //   Seq(ProjectRef("scala-repl", "org.scala-lang")),
+    //   Seq(ProjectRef("scala-compiler", "org.scala-lang"))), // jline
+
+    // Project("scala-interactive", "org.scala-lang",
+    //   Seq(ProjectRef("scala-interactive", "org.scala-lang")),
+    //   Seq(ProjectRef("scala-compiler", "org.scala-lang"), ProjectRef("scaladoc", "org.scala-lang"))),
+
+    Project("scala-swing", "org.scala-lang",
+      Seq(ProjectRef("scala-swing", "org.scala-lang")),
+      Seq(ProjectRef("scala-library", "org.scala-lang"))),
+
+    Project("scala-actors", "org.scala-lang",
+      Seq(ProjectRef("scala-actors", "org.scala-lang")),
+      Seq(ProjectRef("scala-library", "org.scala-lang"))),
+    Project("scala-xml", "org.scala-lang",
+      Seq(ProjectRef("scala-xml", "org.scala-lang")),
+      Seq(ProjectRef("scala-library", "org.scala-lang"))),
+    Project("scala-parser-combinators", "org.scala-lang",
+      Seq(ProjectRef("scala-parser-combinators", "org.scala-lang")),
+      Seq(ProjectRef("scala-library", "org.scala-lang"))),
+
+    Project("scalacheck", "org.scala-lang",
+      Seq(ProjectRef("scalacheck", "org.scala-lang")),
+      Seq(ProjectRef("scala-library", "org.scala-lang"), ProjectRef("scala-actors", "org.scala-lang"), ProjectRef("scala-parser-combinators", "org.scala-lang"))),
+
+    Project("scala-partest", "org.scala-lang",
+      Seq(ProjectRef("scala-partest", "org.scala-lang")),
+      Seq(ProjectRef("scala-compiler", "org.scala-lang"), // TODO: refine to scala-repl
+          ProjectRef("scalap", "org.scala-lang"), ProjectRef("scala-xml", "org.scala-lang"), ProjectRef("scalacheck", "org.scala-lang"))),
+
+    Project("scaladoc", "org.scala-lang",
+      Seq(ProjectRef("scaladoc", "org.scala-lang")),
+      Seq(ProjectRef("scala-compiler", "org.scala-lang"),ProjectRef("scala-partest", "org.scala-lang"), ProjectRef("scala-xml", "org.scala-lang"), ProjectRef("scala-parser-combinators", "org.scala-lang"))),
+
+    Project("scalap", "org.scala-lang",
+      Seq(ProjectRef("scalap", "org.scala-lang")),
+      Seq(ProjectRef("scala-compiler", "org.scala-lang"))),
+
+    Project("continuations", "org.scala-lang.plugins", Seq(ProjectRef("continuations", "org.scala-lang.plugins")), Seq.empty)
+  ))
+
+println(Utils.writeValue(meta))
-- 
cgit v1.2.3


From 57534f9811e8f6cf482fc4a32b1c1ecb90f6d418 Mon Sep 17 00:00:00 2001
From: Adriaan Moors 
Date: Fri, 5 Jul 2013 15:40:32 -0700
Subject: Updated eclipse project files.

Set everything up so that soon-to-be independent modules are
compiled with the Eclipse plugin's compiler & library (2.11.0-M3 currently).

Most projects still compile with 2.11.0-M3, but
partest will need a nightly build of 2.11.
---
 src/eclipse/continuations-library/.classpath    |  2 +-
 src/eclipse/continuations-library/.project      |  2 +-
 src/eclipse/interactive/.classpath              |  5 +-
 src/eclipse/partest/.classpath                  | 13 +++--
 src/eclipse/reflect/.classpath                  |  2 +-
 src/eclipse/repl/.classpath                     | 15 +++---
 src/eclipse/repl/.project                       | 64 ++++++++++++-------------
 src/eclipse/scala-compiler/.classpath           |  7 ++-
 src/eclipse/scala-library/.classpath            |  2 +-
 src/eclipse/scala-parser-combinators/.classpath |  7 +++
 src/eclipse/scala-parser-combinators/.project   | 30 ++++++++++++
 src/eclipse/scala-xml/.classpath                |  7 +++
 src/eclipse/scala-xml/.project                  | 30 ++++++++++++
 src/eclipse/scaladoc/.classpath                 | 11 ++---
 src/eclipse/scalap/.classpath                   |  9 ++--
 src/eclipse/test-junit/.classpath               |  7 ++-
 16 files changed, 138 insertions(+), 75 deletions(-)
 create mode 100644 src/eclipse/scala-parser-combinators/.classpath
 create mode 100644 src/eclipse/scala-parser-combinators/.project
 create mode 100644 src/eclipse/scala-xml/.classpath
 create mode 100644 src/eclipse/scala-xml/.project

(limited to 'src')

diff --git a/src/eclipse/continuations-library/.classpath b/src/eclipse/continuations-library/.classpath
index b3ca4eeb48..61cb3f060e 100644
--- a/src/eclipse/continuations-library/.classpath
+++ b/src/eclipse/continuations-library/.classpath
@@ -1,6 +1,6 @@
 
 
-	
+	
 	
 	
 	
diff --git a/src/eclipse/continuations-library/.project b/src/eclipse/continuations-library/.project
index f3a53a3d97..33cc57d667 100644
--- a/src/eclipse/continuations-library/.project
+++ b/src/eclipse/continuations-library/.project
@@ -22,7 +22,7 @@
 			SCALA_BASEDIR/build/quick/classes/continuations/library
 		
 		
-			library
+			continuations-library
 			2
 			SCALA_BASEDIR/src/continuations/library
 		
diff --git a/src/eclipse/interactive/.classpath b/src/eclipse/interactive/.classpath
index 870cc67aec..73a67e45ed 100644
--- a/src/eclipse/interactive/.classpath
+++ b/src/eclipse/interactive/.classpath
@@ -1,10 +1,9 @@
 
 
 	
-	
-	
-	
 	
 	
+	
+	
 	
 
diff --git a/src/eclipse/partest/.classpath b/src/eclipse/partest/.classpath
index a990c5a1b3..462cbb9c94 100644
--- a/src/eclipse/partest/.classpath
+++ b/src/eclipse/partest/.classpath
@@ -1,15 +1,14 @@
 
 
 	
-	
-	
-	
 	
-	
-	
-	
 	
-	
 	
+	
+	
+	
+	
+	
+	
 	
 
diff --git a/src/eclipse/reflect/.classpath b/src/eclipse/reflect/.classpath
index 36e6b6adf1..1eb37e3f5f 100644
--- a/src/eclipse/reflect/.classpath
+++ b/src/eclipse/reflect/.classpath
@@ -2,7 +2,7 @@
 
 	
 	
-	
 	
+	
 	
 
diff --git a/src/eclipse/repl/.classpath b/src/eclipse/repl/.classpath
index 30744da306..748fa6c9c8 100644
--- a/src/eclipse/repl/.classpath
+++ b/src/eclipse/repl/.classpath
@@ -1,11 +1,10 @@
 
 
-        
-        
-        
-        
-        
-        
-        
-        
+	
+	
+	
+	
+	
+	
+	
 
diff --git a/src/eclipse/repl/.project b/src/eclipse/repl/.project
index ea188bc262..69ad08ab1a 100644
--- a/src/eclipse/repl/.project
+++ b/src/eclipse/repl/.project
@@ -1,35 +1,35 @@
 
 
-        repl
-        
-        
-        
-        
-                
-                        org.scala-ide.sdt.core.scalabuilder
-                        
-                        
-                
-        
-        
-                org.scala-ide.sdt.core.scalanature
-                org.eclipse.jdt.core.javanature
-        
-        
-                
-                        build-quick-repl
-                        2
-                        SCALA_BASEDIR/build/quick/classes/repl
-                
-                
-                        lib
-                        2
-                        SCALA_BASEDIR/lib
-                
-                
-                        repl
-                        2
-                        SCALA_BASEDIR/src/repl
-                
-        
+	repl
+	
+	
+	
+	
+		
+			org.scala-ide.sdt.core.scalabuilder
+			
+			
+		
+	
+	
+		org.scala-ide.sdt.core.scalanature
+		org.eclipse.jdt.core.javanature
+	
+	
+		
+			build-quick-repl
+			2
+			SCALA_BASEDIR/build/quick/classes/repl
+		
+		
+			lib
+			2
+			SCALA_BASEDIR/lib
+		
+		
+			repl
+			2
+			SCALA_BASEDIR/src/repl
+		
+	
 
diff --git a/src/eclipse/scala-compiler/.classpath b/src/eclipse/scala-compiler/.classpath
index 0488a0dc39..b6ef5f35bb 100644
--- a/src/eclipse/scala-compiler/.classpath
+++ b/src/eclipse/scala-compiler/.classpath
@@ -1,12 +1,11 @@
 
 
 	
+	
 	
 	
-	
-	
-	
-	
 	
+	
+	
 	
 
diff --git a/src/eclipse/scala-library/.classpath b/src/eclipse/scala-library/.classpath
index a3a4933d34..eff3c8e0b7 100644
--- a/src/eclipse/scala-library/.classpath
+++ b/src/eclipse/scala-library/.classpath
@@ -2,6 +2,6 @@
 
 	
 	
-	
+	
 	
 
diff --git a/src/eclipse/scala-parser-combinators/.classpath b/src/eclipse/scala-parser-combinators/.classpath
new file mode 100644
index 0000000000..7eab7094eb
--- /dev/null
+++ b/src/eclipse/scala-parser-combinators/.classpath
@@ -0,0 +1,7 @@
+
+
+	
+	
+	
+	
+
diff --git a/src/eclipse/scala-parser-combinators/.project b/src/eclipse/scala-parser-combinators/.project
new file mode 100644
index 0000000000..d94523f56d
--- /dev/null
+++ b/src/eclipse/scala-parser-combinators/.project
@@ -0,0 +1,30 @@
+
+
+	scala-parser-combinators
+	
+	
+	
+	
+		
+			org.scala-ide.sdt.core.scalabuilder
+			
+			
+		
+	
+	
+		org.scala-ide.sdt.core.scalanature
+		org.eclipse.jdt.core.javanature
+	
+	
+		
+			build-quick-parser-combinators
+			2
+			SCALA_BASEDIR/build/quick/classes/parser-combinators
+		
+		
+			src-parser-combinators
+			2
+			SCALA_BASEDIR/src/parser-combinators
+		
+	
+
diff --git a/src/eclipse/scala-xml/.classpath b/src/eclipse/scala-xml/.classpath
new file mode 100644
index 0000000000..b90d951640
--- /dev/null
+++ b/src/eclipse/scala-xml/.classpath
@@ -0,0 +1,7 @@
+
+
+	
+	
+	
+	
+
diff --git a/src/eclipse/scala-xml/.project b/src/eclipse/scala-xml/.project
new file mode 100644
index 0000000000..8b0f7f6864
--- /dev/null
+++ b/src/eclipse/scala-xml/.project
@@ -0,0 +1,30 @@
+
+
+	scala-xml
+	
+	
+	
+	
+		
+			org.scala-ide.sdt.core.scalabuilder
+			
+			
+		
+	
+	
+		org.scala-ide.sdt.core.scalanature
+		org.eclipse.jdt.core.javanature
+	
+	
+		
+			build-quick-xml
+			2
+			SCALA_BASEDIR/build/quick/classes/xml
+		
+		
+			src-xml
+			2
+			SCALA_BASEDIR/src/xml
+		
+	
+
diff --git a/src/eclipse/scaladoc/.classpath b/src/eclipse/scaladoc/.classpath
index f12ba4bb2c..caafcf33b0 100644
--- a/src/eclipse/scaladoc/.classpath
+++ b/src/eclipse/scaladoc/.classpath
@@ -1,13 +1,10 @@
 
 
-	
 	
-	
-	
-	
-	
+	
 	
-	
-	
+	
+	
+	
 	
 
diff --git a/src/eclipse/scalap/.classpath b/src/eclipse/scalap/.classpath
index 0a55745702..3b635cf56e 100644
--- a/src/eclipse/scalap/.classpath
+++ b/src/eclipse/scalap/.classpath
@@ -1,12 +1,9 @@
 
 
 	
-	
-	
-	
+	
+	
+	
 	
-	
-	
-	
 	
 
diff --git a/src/eclipse/test-junit/.classpath b/src/eclipse/test-junit/.classpath
index 718f7b6ece..8e4f88e0f0 100644
--- a/src/eclipse/test-junit/.classpath
+++ b/src/eclipse/test-junit/.classpath
@@ -1,12 +1,11 @@
 
 
 	
+	
+	
 	
 	
+	
 	
-	
-	
-	
-	
 	
 
-- 
cgit v1.2.3


From a07879dbc643f362ed86fe764f6386f160d75aaa Mon Sep 17 00:00:00 2001
From: Adriaan Moors 
Date: Mon, 8 Jul 2013 18:45:46 -0700
Subject: scaladoc needs xml and parser-combinators

Concretely, update scala-compiler's pom
to list scala-xml and scala-parser-combinators
on behalf of scaladoc.

NOTE: when spinning off scaladoc, move dependencies to its own pom
---
 src/build/maven/scala-compiler-pom.xml | 56 ++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 23 deletions(-)

(limited to 'src')

diff --git a/src/build/maven/scala-compiler-pom.xml b/src/build/maven/scala-compiler-pom.xml
index 4b90997da4..6e7f1a0f2c 100644
--- a/src/build/maven/scala-compiler-pom.xml
+++ b/src/build/maven/scala-compiler-pom.xml
@@ -1,13 +1,13 @@
 
 	4.0.0
 	org.scala-lang
 	scala-compiler
 	jar
 	@VERSION@
-  Scala Compiler
-  Compiler for the Scala Programming Language
+	Scala Compiler
+	Compiler for the Scala Programming Language
 	http://www.scala-lang.org/
 	2002
 	
@@ -23,12 +23,12 @@
 		
 	
 	
-                scm:git:git://github.com/scala/scala.git
-                https://github.com/scala/scala.git
+		scm:git:git://github.com/scala/scala.git
+		https://github.com/scala/scala.git
 	
 	
-                JIRA
-                https://issues.scala-lang.org/
+		JIRA
+		https://issues.scala-lang.org/
 	
 
 	
@@ -37,17 +37,27 @@
 			scala-library
 			@VERSION@
 		
+		 
+			org.scala-lang
+			scala-xml
+			@VERSION@
+		
+		 
+			org.scala-lang
+			scala-parser-combinators
+			@VERSION@
+		
 		
 			org.scala-lang
 			scala-reflect
 			@VERSION@
 		
-    
-      jline
-      jline
-      @JLINE_VERSION@
-      true
-    
+		
+			jline
+			jline
+			@JLINE_VERSION@
+			true
+		
 	
 	
 		
@@ -60,14 +70,14 @@
 			false
 		
 	
-  
-    
-      lamp
-      EPFL LAMP
-    
-    
-      Typesafe
-      Typesafe, Inc.
-    
-  
+	
+		
+			lamp
+			EPFL LAMP
+		
+		
+			Typesafe
+			Typesafe, Inc.
+		
+	
 
-- 
cgit v1.2.3