summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/library/scala/xml/include/CircularIncludeException.scala18
-rw-r--r--src/library/scala/xml/include/UnavailableResourceException.scala13
-rw-r--r--src/library/scala/xml/include/XIncludeException.scala56
-rw-r--r--src/library/scala/xml/include/sax/EncodingHeuristics.scala167
-rw-r--r--src/library/scala/xml/include/sax/Main.scala96
-rw-r--r--src/library/scala/xml/include/sax/XIncludeFilter.scala421
-rw-r--r--src/library/scala/xml/include/sax/XIncluder.scala201
7 files changed, 972 insertions, 0 deletions
diff --git a/src/library/scala/xml/include/CircularIncludeException.scala b/src/library/scala/xml/include/CircularIncludeException.scala
new file mode 100644
index 0000000000..8f284e4367
--- /dev/null
+++ b/src/library/scala/xml/include/CircularIncludeException.scala
@@ -0,0 +1,18 @@
+package scala.xml.include
+
+/**
+ * <p>
+ * A <code>CircularIncludeException</code> is thrown when
+ * an included document attempts to include itself or
+ * one of its ancestor documents.
+ * </p>
+ */
+class CircularIncludeException(message: String) extends XIncludeException {
+
+ /**
+ * Constructs a <code>CircularIncludeException</code> with <code>null</code>
+ * as its error detail message.
+ */
+ def this() = this(null);
+
+}
diff --git a/src/library/scala/xml/include/UnavailableResourceException.scala b/src/library/scala/xml/include/UnavailableResourceException.scala
new file mode 100644
index 0000000000..171087d9f0
--- /dev/null
+++ b/src/library/scala/xml/include/UnavailableResourceException.scala
@@ -0,0 +1,13 @@
+package scala.xml.include
+
+/**
+ * <p>
+ * An <code>UnavailableResourceException</code> is thrown when
+ * an included document cannot be found or loaded.
+ * </p>
+ *
+ */
+class UnavailableResourceException(message: String)
+extends XIncludeException(message) {
+ def this() = this(null);
+}
diff --git a/src/library/scala/xml/include/XIncludeException.scala b/src/library/scala/xml/include/XIncludeException.scala
new file mode 100644
index 0000000000..310758f7ec
--- /dev/null
+++ b/src/library/scala/xml/include/XIncludeException.scala
@@ -0,0 +1,56 @@
+package scala.xml.include
+
+/**
+ * <p>
+ * <code>XIncludeException</code> is the generic superclass
+ * for all checked exceptions that may be thrown as a result
+ * of a violation of XInclude's rules.
+ * </p>
+ * <p>
+ * Constructs an <code>XIncludeException</code> with the specified detail
+ * message. The error message string <code>message</code> can later be
+ * retrieved by the <code>{@link java.lang.Throwable#getMessage}</code>
+ * method of class <code>java.lang.Throwable</code>.
+ * </p>
+ *
+ * @param message the detail message.
+ */
+class XIncludeException(message: String) extends Exception(message) {
+
+ /**
+ * uses <code>null</code> as its error detail message.
+ */
+ def this() = this(null);
+
+ private var rootCause: Throwable = null;
+
+ /**
+ * When an <code>IOException</code>, <code>MalformedURLException</code>
+ * or other generic exception is thrown while processing an XML document
+ * for XIncludes, it is customarily replaced
+ * by some form of <code>XIncludeException</code>.
+ * This method allows you to store the original exception.
+ *
+ * @param nestedException the underlying exception which
+ caused the XIncludeException to be thrown
+ */
+ def setRootCause(nestedException: Throwable ): Unit = {
+ this.rootCause = nestedException;
+ }
+
+ /**
+ * When an <code>IOException</code>, <code>MalformedURLException</code>
+ * or other generic exception is thrown while processing an XML document
+ * for XIncludes, it is customarily replaced
+ * by some form of <code>XIncludeException</code>.
+ * This method allows you to retrieve the original exception.
+ * It returns null if no such exception caused this <code>XIncludeException</code>.
+ *
+ * @return Throwable the underlying exception which
+ caused the XIncludeException to be thrown
+ */
+ def getRootCause(): Throwable = {
+ return this.rootCause;
+ }
+
+}
diff --git a/src/library/scala/xml/include/sax/EncodingHeuristics.scala b/src/library/scala/xml/include/sax/EncodingHeuristics.scala
new file mode 100644
index 0000000000..a04240a6f3
--- /dev/null
+++ b/src/library/scala/xml/include/sax/EncodingHeuristics.scala
@@ -0,0 +1,167 @@
+package scala.xml.include.sax;
+
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.io.InputStream;
+
+/**
+ * <p>
+ * <code>EncodingHeuristics</code> reads from a stream
+ * (which should be buffered) and attempts to guess
+ * what the encoding of the text in the stream is.
+ * Byte order marks are stripped from the stream.
+ * If it fails to determine the type of the encoding,
+ * it returns the default UTF-8.
+ * </p>
+ *
+ * translated from Elliotte Rusty Harold's Java source
+ * @author Burak Emir
+ */
+object EncodingHeuristics {
+
+ /**
+ * <p>
+ * This utility method ????.
+ * </p>
+ *
+ * @param in <code>InputStream</code> to read from.
+ * @return String The name of the encoding.
+ * @throws IOException if the stream cannot be reset back to where it was when
+ * the method was invoked.
+ */
+ def readEncodingFromStream(in: InputStream): String = {
+ //System.err.println("EncodingHeuristics::readEncodingFromStream");
+ // This may fail if there are a lot of space characters before the end
+ // of the encoding declaration
+ in.mark(1024);
+ var ret: String = null;
+ try {
+ // lots of things can go wrong here. If any do, I just return null
+ // so that we'll fall back on the encoding declaration or the
+ // UTF-8 default
+ val byte1 = in.read();
+ val byte2 = in.read();
+ if (byte1 == 0xFE && byte2 == 0xFF) {
+ // don't reset because the byte order mark should not be included????
+ ret = "UnicodeBig"; // name for big-endian????
+ }
+ else if (byte1 == 0xFF && byte2 == 0xFE) {
+ // don't reset because the byte order mark should not be included????
+ // will the reader throw away the byte order mark or will it return it????
+ ret = "UnicodeLittle";
+ }
+
+ /* In accordance with the Character Model [Character Model],
+ when the text format is a Unicode encoding, the XInclude
+ processor must fail the inclusion when the text in the
+ selected range is non-normalized. When transcoding characters
+ to a Unicode encoding from a legacy encoding, a normalizing transcoder must be used. */
+
+ val byte3 = in.read();
+ // check for UTF-8 byte order mark
+ if (byte1 == 0xEF && byte2 == 0xBB && byte3 == 0xBF) {
+ // don't reset because the byte order mark should not be included????
+ // in general what happens if text document includes non-XML legal chars????
+ ret = "UTF-8";
+ }
+
+ val byte4 = in.read();
+ if (byte1 == 0x00 && byte2 == 0x00 && byte3 == 0xFE && byte4 == 0xFF) {
+ // don't reset because the byte order mark should not be included????
+ ret = "UCS-4"; // right name for big-endian UCS-4 in Java 1.4????
+ }
+ else if (byte1 == 0x00 && byte2 == 0x00 && byte3 == 0xFF && byte4 == 0xFE) {
+ // don't reset because the byte order mark should not be included????
+ ret = "UCS-4"; // right name for little-endian UCS-4 in Java 1.4????
+ }
+
+ // no byte order mark present; first character must be
+ // less than sign or white space
+ // Let's look for less-than signs first
+ if (byte1 == 0x00 && byte2 == 0x00 && byte3 == 0x00 && byte4 == '<') {
+ in.reset();
+ ret = "UCS-4"; // right name for big-endian UCS-4 in Java 1.4????
+ }
+ else if (byte1 == '<' && byte2 == 0x00 && byte3 == 0x00 && byte4 == 0x00) {
+ in.reset();
+ ret = "UCS-4"; // right name for little-endian UCS-4 in Java 1.4????
+ }
+ else if (byte1 == 0x00 && byte2 == '<' && byte3 == 0x00 && byte4 == '?') {
+ in.reset();
+ ret = "UnicodeBigUnmarked";
+ }
+ else if (byte1 == '<' && byte2 == 0x00 && byte3 == '?' && byte4 == 0x00) {
+ in.reset();
+ ret = "UnicodeLittleUnmarked";
+ }
+ else if (byte1 == '<' && byte2 == '?' && byte3 == 'x' && byte4 == 'm') {
+ // ASCII compatible, must read encoding declaration
+ // 1024 bytes will be far enough to read most XML declarations
+ val data = new Array[byte](1024);
+ data(0) = byte1.asInstanceOf[byte];
+ data(1) = byte2.asInstanceOf[byte];;
+ data(2) = byte3.asInstanceOf[byte];;
+ data(3) = byte4.asInstanceOf[byte];;
+ val length = in.read(data, 4, 1020) + 4;
+ // Use Latin-1 (ISO-8859-1) because it's ASCII compatible and
+ // all byte sequences are legal Latin-1 sequences so I don't have
+ // to worry about encoding errors if I slip past the
+ // end of the XML/text declaration
+ val declaration = new String(data, 0, length, "8859_1");
+ // if any of these throw a StringIndexOutOfBoundsException
+ // we just fall into the catch bloclk and return null
+ // since this can't be well-formed XML
+ var position = declaration.indexOf("encoding") + 8;
+ var c: char = '\0'; // bogus init value
+ // get rid of white space before equals sign
+ do {
+ c = declaration.charAt(position);
+ position = position + 1;
+ } while (c == ' ' || c == '\t' || c == '\r' || c == '\n') ;
+ if (c != '=') { // malformed
+ in.reset();
+ ret = "UTF-8";
+ }
+ // get rid of white space after equals sign
+ do {
+ c = declaration.charAt(position);
+ position = position + 1;
+ } while (c == ' ' || c == '\t' || c == '\r' || c == '\n') ;
+ var delimiter: char = c;
+ if (delimiter != '\'' && delimiter != '"') { // malformed
+ in.reset();
+ ret = "UTF-8";
+ }
+ // now positioned to read encoding name
+ val encodingName = new StringBuffer();
+ do {
+ c = declaration.charAt(position);
+ position = position + 1;
+ encodingName.append(c);
+ } while(c != delimiter);
+ encodingName.setLength(encodingName.length() - 1); // rm delim
+ in.reset();
+ ret = encodingName.toString();
+
+ }
+ else if (byte1 == 0x4C && byte2 == 0x6F && byte3 == 0xA7 && byte4 == 0x94) {
+ // EBCDIC compatible, must read encoding declaration
+ // ????
+ }
+
+ } catch {
+ case e: Exception => in.reset();
+ ret = "UTF-8";
+ }
+
+ // no XML or text declaration present
+ //System.err.println("exit EncodingHeuristics::readEncodingFromStream");
+
+ if (ret != null)
+ return ret
+ else {
+ in.reset();
+ return "UTF-8";
+ }
+ }
+}
diff --git a/src/library/scala/xml/include/sax/Main.scala b/src/library/scala/xml/include/sax/Main.scala
new file mode 100644
index 0000000000..b91700e4e1
--- /dev/null
+++ b/src/library/scala/xml/include/sax/Main.scala
@@ -0,0 +1,96 @@
+package scala.xml.include.sax
+
+import org.xml.sax.SAXException
+import org.xml.sax.SAXParseException
+import org.xml.sax.EntityResolver
+import org.xml.sax.helpers.XMLReaderFactory
+import org.xml.sax.XMLReader
+
+object Main {
+
+ /**
+ * The driver method for xinc
+ * Output is written to System.out via Conolse
+ * </p>
+ *
+ * @param args contains the URLs and/or filenames
+ * of the documents to be procesed.
+ */
+ def main(args: Array[String]): Unit = {
+ var parser: XMLReader = null;
+ var err = false;
+ try {
+ parser = XMLReaderFactory.createXMLReader();
+ }
+ catch {
+ case e:SAXException =>
+ try {
+ parser = XMLReaderFactory.createXMLReader(
+ "org.apache.xerces.parsers.SAXParser");
+ } catch {
+ case e2:SAXException =>
+ System.err.println("Could not find an XML parser");
+ err = true;
+ }
+ }
+
+ if(err) return;
+ // Need better namespace handling
+ try {
+ parser.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
+ }
+ catch {
+ case e:SAXException =>
+ System.err.println(e);
+ err = true
+ }
+ if(err) return;
+
+ if (args.length == 0) return;
+ var resolver: EntityResolver = null;
+ var arg: int = 0;
+ if (args(0).equals("-r")) {
+ try {
+ resolver = Class.forName(args(1)).newInstance().asInstanceOf[EntityResolver];
+ parser.setEntityResolver(resolver);
+ }
+ catch {
+ case ex:Exception =>
+ System.err.println("Could not load requested EntityResolver");
+ err = true;
+ }
+ arg = 2;
+ }
+ if(err) return;
+
+ while (arg < args.length) {
+ try {
+ val includer = new XIncludeFilter();
+ includer.setParent(parser);
+ val s = new XIncluder(System.out, "UTF-8");
+ includer.setContentHandler(s);
+ if (resolver != null) includer.setEntityResolver(resolver);
+ try {
+ includer.setProperty(
+ "http://xml.org/sax/properties/lexical-handler",
+ s);
+ s.setFilter(includer);
+ }
+ catch {
+ case e:SAXException => // Will not support comments
+ }
+ includer.parse(args(arg));
+ }
+ catch {
+ case e:SAXParseException =>
+ System.err.println(e);
+ System.err.println("Problem in " + e.getSystemId()
+ + " at line " + e.getLineNumber());
+ case e: Exception => // be specific about exceptions????
+ System.err.println(e);
+ e.printStackTrace();
+ }
+ arg = arg + 1;
+ }
+ }
+}
diff --git a/src/library/scala/xml/include/sax/XIncludeFilter.scala b/src/library/scala/xml/include/sax/XIncludeFilter.scala
new file mode 100644
index 0000000000..51265a8ad0
--- /dev/null
+++ b/src/library/scala/xml/include/sax/XIncludeFilter.scala
@@ -0,0 +1,421 @@
+package scala.xml.include.sax
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.Locator;
+import org.xml.sax.helpers.XMLReaderFactory;
+import org.xml.sax.helpers.XMLFilterImpl;
+import org.xml.sax.helpers.NamespaceSupport;
+import org.xml.sax.helpers.AttributesImpl;
+
+import java.net.URL;
+import java.net.URLConnection;
+import java.net.MalformedURLException;
+import java.io.UnsupportedEncodingException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.BufferedInputStream;
+import java.io.InputStreamReader;
+import java.util.Stack;
+
+/**
+ * <p>
+ * This is a SAX filter which resolves all XInclude include elements
+ * before passing them on to the client application. Currently this
+ * class has the following known deviation from the XInclude specification:
+ * </p>
+ * <ol>
+ * <li>XPointer is not supported.</li>
+ * </ol>
+ *
+ * <p>
+ * Furthermore, I would definitely use a new instance of this class
+ * for each document you want to process. I doubt it can be used
+ * successfully on multiple documents. Furthermore, I can virtually
+ * guarantee that this class is not thread safe. You have been
+ * warned.
+ * </p>
+ *
+ * <p>
+ * Since this class is not designed to be subclassed, and since
+ * I have not yet considered how that might affect the methods
+ * herein or what other protected methods might be needed to support
+ * subclasses, I have declared this class final. I may remove this
+ * restriction later, though the use-case for subclassing is weak.
+ * This class is designed to have its functionality extended via a
+ * a horizontal chain of filters, not a
+ * vertical hierarchy of sub and superclasses.
+ * </p>
+ *
+ * <p>
+ * To use this class:
+ * </p>
+ * <ol>
+ * <li>Construct an <code>XIncludeFilter</code> object with a known base URL</li>
+ * <li>Pass the <code>XMLReader</code> object from which the raw document will
+ * be read to the <code>setParent()</code> method of this object. </li>
+ * <li>Pass your own <code>ContentHandler</code> object to the
+ * <code>setContentHandler()</code> method of this object. This is the
+ * object which will receive events from the parsed and included
+ * document.
+ * </li>
+ * <li>Optional: if you wish to receive comments, set your own
+ * <code>LexicalHandler</code> object as the value of this object's
+ * http://xml.org/sax/properties/lexical-handler property.
+ * Also make sure your <code>LexicalHandler</code> asks this object
+ * for the status of each comment using <code>insideIncludeElement</code>
+ * before doing anything with the comment.
+ * </li>
+ * <li>Pass the URL of the document to read to this object's
+ * <code>parse()</code> method</li>
+ * </ol>
+ *
+ * <p> e.g.</p>
+ * <pre><code>XIncludeFilter includer = new XIncludeFilter(base);
+ * includer.setParent(parser);
+ * includer.setContentHandler(new SAXXIncluder(System.out));
+ * includer.parse(args[i]);</code>
+ * </pre>
+ * </p>
+ * translated from Elliotte Rusty Harold's Java source
+ * @author Burak Emir
+ */
+class XIncludeFilter extends XMLFilterImpl {
+
+ final val XINCLUDE_NAMESPACE = "http://www.w3.org/2001/XInclude";
+
+ private val bases = new Stack();
+ private val locators = new Stack();
+
+/* private EntityResolver resolver;
+
+ public XIncludeFilter() {
+ this(null);
+ }
+
+ public XIncludeFilter(EntityResolver resolver) {
+ this.resolver = resolver;
+ } */
+
+
+ // what if this isn't called????
+ // do I need to check this in startDocument() and push something
+ // there????
+ override def setDocumentLocator(locator: Locator): Unit = {
+ locators.push(locator);
+ val base = locator.getSystemId();
+ try {
+ bases.push(new URL(base));
+ }
+ catch {
+ case e:MalformedURLException =>
+ throw new UnsupportedOperationException("Unrecognized SYSTEM ID: " + base);
+ }
+ super.setDocumentLocator(locator);
+ }
+
+
+ // necessary to throw away contents of non-empty XInclude elements
+ private var level = 0;
+
+ /**
+ * <p>
+ * This utility method returns true if and only if this reader is
+ * currently inside a non-empty include element. (This is <strong>
+ * not</strong> the same as being inside the node set which replaces
+ * the include element.) This is primarily needed for comments
+ * inside include elements. It must be checked by the actual
+ * LexicalHandler to see whether a comment is passed or not.
+ * </p>
+ *
+ * @return boolean
+ */
+ def insideIncludeElement(): boolean = {
+ return level != 0;
+ }
+
+
+ override def startElement(uri: String, localName: String, qName: String, atts1: Attributes): Unit = {
+ var atts = atts1;
+ if (level == 0) { // We're not inside an xi:include element
+
+ // Adjust bases stack by pushing either the new
+ // value of xml:base or the base of the parent
+ val base = atts.getValue(NamespaceSupport.XMLNS, "base");
+ val parentBase = bases.peek().asInstanceOf[URL];
+ var currentBase = parentBase;
+ if (base != null) {
+ try {
+ currentBase = new URL(parentBase, base);
+ }
+ catch {
+ case e: MalformedURLException =>
+ throw new SAXException("Malformed base URL: "
+ + currentBase, e);
+ }
+ }
+ bases.push(currentBase);
+
+ if (uri.equals(XINCLUDE_NAMESPACE) && localName.equals("include")) {
+ // include external document
+ val href = atts.getValue("href");
+ // Verify that there is an href attribute
+ if (href==null) {
+ throw new SAXException("Missing href attribute");
+ }
+
+ var parse = atts.getValue("parse");
+ if (parse == null) parse = "xml";
+
+ if (parse.equals("text")) {
+ val encoding = atts.getValue("encoding");
+ includeTextDocument(href, encoding);
+ }
+ else if (parse.equals("xml")) {
+ includeXMLDocument(href);
+ }
+ // Need to check this also in DOM and JDOM????
+ else {
+ throw new SAXException(
+ "Illegal value for parse attribute: " + parse);
+ }
+ level = level + 1;
+ }
+ else {
+ if (atRoot) {
+ // add xml:base attribute if necessary
+ val attsImpl = new AttributesImpl(atts);
+ attsImpl.addAttribute(NamespaceSupport.XMLNS, "base",
+ "xml:base", "CDATA", currentBase.toExternalForm());
+ atts = attsImpl;
+ atRoot = false;
+ }
+ super.startElement(uri, localName, qName, atts);
+ }
+
+ }
+
+ }
+
+ override def endElement (uri: String , localName: String , qName: String ): Unit = {
+
+ if (uri.equals(XINCLUDE_NAMESPACE)
+ && localName.equals("include")) {
+ level = level - 1;
+ }
+ else if (level == 0) {
+ bases.pop();
+ super.endElement(uri, localName, qName);
+ }
+
+ }
+
+ private var depth = 0;
+
+ override def startDocument(): Unit = {
+ level = 0;
+ if (depth == 0) super.startDocument();
+ depth = depth + 1;
+ }
+
+ override def endDocument(): Unit = {
+ locators.pop();
+ bases.pop(); // pop the URL for the document itself
+ depth = depth - 1;
+ if (depth == 0) super.endDocument();
+ }
+
+ // how do prefix mappings move across documents????
+ override def startPrefixMapping(prefix: String , uri: String ): Unit = {
+ if (level == 0) super.startPrefixMapping(prefix, uri);
+ }
+
+ override def endPrefixMapping(prefix: String): Unit = {
+ if (level == 0) super.endPrefixMapping(prefix);
+ }
+
+ override def characters(ch: Array[char], start: int, length: int): Unit = {
+ if (level == 0) super.characters(ch, start, length);
+ }
+
+ override def ignorableWhitespace(ch: Array[char] , start: int, length: int): Unit = {
+ if (level == 0) super.ignorableWhitespace(ch, start, length);
+ }
+
+ override def processingInstruction(target: String, data: String): Unit = {
+ if (level == 0) super.processingInstruction(target, data);
+ }
+
+ override def skippedEntity(name: String): Unit = {
+ if (level == 0) super.skippedEntity(name);
+ }
+
+ // convenience method for error messages
+ private def getLocation(): String = {
+
+ var locationString = "";
+ val locator = locators.peek().asInstanceOf[Locator];
+ var publicID = "";
+ var systemID = "";
+ var column = -1;
+ var line = -1;
+ if (locator != null) {
+ publicID = locator.getPublicId();
+ systemID = locator.getSystemId();
+ line = locator.getLineNumber();
+ column = locator.getColumnNumber();
+ }
+ locationString = (" in document included from " + publicID
+ + " at " + systemID
+ + " at line " + line + ", column " + column);
+
+ return locationString;
+
+ }
+
+
+ /**
+ * <p>
+ * This utility method reads a document at a specified URL
+ * and fires off calls to <code>characters()</code>.
+ * It's used to include files with <code>parse="text"</code>
+ * </p>
+ *
+ * @param url URL of the document that will be read
+ * @param encoding Encoding of the document; e.g. UTF-8,
+ * ISO-8859-1, etc.
+ * @return void
+ * @throws SAXException if the requested document cannot
+ be downloaded from the specified URL
+ or if the encoding is not recognized
+ */
+ private def includeTextDocument(url: String, encoding1: String): Unit = {
+ var encoding = encoding1;
+ if (encoding == null || encoding.trim().equals("")) encoding = "UTF-8";
+ var source: URL = null;
+ try {
+ val base = bases.peek().asInstanceOf[URL];
+ source = new URL(base, url);
+ }
+ catch {
+ case e: MalformedURLException =>
+ val ex = new UnavailableResourceException("Unresolvable URL " + url
+ + getLocation());
+ ex.setRootCause(e);
+ throw new SAXException("Unresolvable URL " + url + getLocation(), ex);
+ }
+
+ try {
+ val uc = source.openConnection();
+ val in = new BufferedInputStream(uc.getInputStream());
+ var encodingFromHeader = uc.getContentEncoding();
+ var contentType = uc.getContentType();
+ if (encodingFromHeader != null) encoding = encodingFromHeader;
+ else {
+ // What if file does not have a MIME type but name ends in .xml????
+ // MIME types are case-insensitive
+ // Java may be picking this up from file URL
+ if (contentType != null) {
+ contentType = contentType.toLowerCase();
+ if (contentType.equals("text/xml")
+ || contentType.equals("application/xml")
+ || (contentType.startsWith("text/") && contentType.endsWith("+xml") )
+ || (contentType.startsWith("application/") && contentType.endsWith("+xml"))) {
+ encoding = EncodingHeuristics.readEncodingFromStream(in);
+ }
+ }
+ }
+ val reader = new InputStreamReader(in, encoding);
+ val c = new Array[char](1024);
+ var charsRead: Int = 0; // bogus init value
+ do {
+ charsRead = reader.read(c, 0, 1024);
+ if (charsRead > 0) this.characters(c, 0, charsRead);
+ } while (charsRead != -1) ;
+ }
+ catch {
+ case e: UnsupportedEncodingException =>
+ throw new SAXException("Unsupported encoding: "
+ + encoding + getLocation(), e);
+ case e: IOException =>
+ throw new SAXException("Document not found: "
+ + source.toExternalForm() + getLocation(), e);
+ }
+
+ }
+
+ private var atRoot = false;
+
+ /**
+ * <p>
+ * This utility method reads a document at a specified URL
+ * and fires off calls to various <code>ContentHandler</code> methods.
+ * It's used to include files with <code>parse="xml"</code>
+ * </p>
+ *
+ * @param url URL of the document that will be read
+ * @return void
+ * @throws SAXException if the requested document cannot
+ be downloaded from the specified URL.
+ */
+ private def includeXMLDocument(url: String): Unit = {
+ var source: URL = null;
+ try {
+ val base = bases.peek().asInstanceOf[URL];
+ source = new URL(base, url);
+ }
+ catch {
+ case e:MalformedURLException =>
+ val ex = new UnavailableResourceException("Unresolvable URL " + url
+ + getLocation());
+ ex.setRootCause(e);
+ throw new SAXException("Unresolvable URL " + url + getLocation(), ex);
+ }
+
+ try {
+ // make this more robust
+ var parser: XMLReader = null;
+ try {
+ parser = XMLReaderFactory.createXMLReader();
+ } catch {
+ case e:SAXException =>
+ try {
+ parser = XMLReaderFactory.createXMLReader(
+ "org.apache.xerces.parsers.SAXParser"
+ );
+ } catch {
+ case e2: SAXException =>
+ System.err.println("Could not find an XML parser");
+ }
+ }
+ if(parser != null) {
+ parser.setContentHandler(this);
+ val resolver = this.getEntityResolver();
+ if (resolver != null) parser.setEntityResolver(resolver);
+ // save old level and base
+ val previousLevel = level;
+ this.level = 0;
+ if (bases.contains(source)) {
+ val e = new CircularIncludeException(
+ "Circular XInclude Reference to " + source + getLocation()
+ );
+ throw new SAXException("Circular XInclude Reference", e);
+ }
+ bases.push(source);
+ atRoot = true;
+ parser.parse(source.toExternalForm());
+ // restore old level and base
+ this.level = previousLevel;
+ bases.pop();
+ }
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Document not found: "
+ + source.toExternalForm() + getLocation(), e);
+ }
+
+ }
+}
diff --git a/src/library/scala/xml/include/sax/XIncluder.scala b/src/library/scala/xml/include/sax/XIncluder.scala
new file mode 100644
index 0000000000..c95d9fd3a3
--- /dev/null
+++ b/src/library/scala/xml/include/sax/XIncluder.scala
@@ -0,0 +1,201 @@
+package scala.xml.include.sax
+
+import org.xml.sax.SAXException;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.helpers.XMLReaderFactory;
+import org.xml.sax.XMLReader;
+import org.xml.sax.Locator;
+import org.xml.sax.Attributes;
+import org.xml.sax.ext.LexicalHandler;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.io.OutputStream;
+import java.io.Writer;
+import java.io.OutputStreamWriter;
+import java.io.File;
+import java.net.URL;
+import java.net.MalformedURLException;
+import java.util.Stack;
+
+/** XIncluder is a SAX <code>ContentHandler</code>
+ * that writes its XML document onto an output stream after resolving
+ * all <code>xinclude:include</code> elements.
+ *
+ * <p>
+ * based on Eliotte Rusty Harold's SAXXIncluder
+ * </p>
+ */
+class XIncluder(outs:OutputStream, encoding:String) extends Object
+with ContentHandler with LexicalHandler {
+
+ var out = new OutputStreamWriter(outs, encoding);
+
+ def setDocumentLocator(locator: Locator): Unit = {}
+
+ def startDocument(): Unit = {
+ try {
+ out.write("<?xml version='1.0' encoding='"
+ + encoding + "'?>\r\n");
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Write failed", e);
+ }
+ }
+
+ def endDocument(): Unit = {
+ try {
+ out.flush();
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Flush failed", e);
+ }
+ }
+
+ def startPrefixMapping(prefix: String , uri: String): Unit = {}
+
+ def endPrefixMapping(prefix: String): Unit = {}
+
+ def startElement(namespaceURI: String , localName: String, qualifiedName: String , atts:Attributes ) = {
+ try {
+ out.write("<" + qualifiedName);
+ var i = 0; while (i < atts.getLength()) {
+ out.write(" ");
+ out.write(atts.getQName(i));
+ out.write("='");
+ val value = atts.getValue(i);
+ // @todo Need to use character references if the encoding
+ // can't support the character
+ out.write(xml.Utility.escape(value))
+ out.write("'");
+ i = i + 1;
+ }
+ out.write(">");
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Write failed", e);
+ }
+ }
+
+ def endElement(namespaceURI: String , localName:String , qualifiedName: String ): Unit = {
+ try {
+ out.write("</" + qualifiedName + ">");
+ }
+ catch {
+ case e: IOException =>
+ throw new SAXException("Write failed", e);
+ }
+
+ }
+
+ // need to escape characters that are not in the given
+ // encoding using character references????
+ def characters(ch: Array[char] , start: int , length: int ): Unit = {
+
+ try {
+ var i = 0; while (i < length) {
+ val c = ch(start+i);
+ if (c == '&') out.write("&amp;");
+ else if (c == '<') out.write("&lt;");
+ // This next fix is normally not necessary.
+ // However, it is required if text contains ]]>
+ // (The end CDATA section delimiter)
+ else if (c == '>') out.write("&gt;");
+ else out.write(c);
+ i = i+1;
+ }
+ }
+ catch {
+ case e: IOException =>
+ throw new SAXException("Write failed", e);
+ }
+ }
+
+ def ignorableWhitespace(ch: Array[char], start: int , length: int): Unit = {
+ this.characters(ch, start, length);
+ }
+
+ // do I need to escape text in PI????
+ def processingInstruction(target: String , data: String): Unit = {
+
+ try {
+ out.write("<?" + target + " " + data + "?>");
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Write failed", e);
+ }
+ }
+
+ def skippedEntity(name: String): Unit = {
+ try {
+ out.write("&" + name + ";");
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Write failed", e);
+ }
+ }
+
+ // LexicalHandler methods
+ private var inDTD: boolean = false;
+ private val entities: Stack = new Stack();
+
+ def startDTD(name: String, publicID: String, systemID: String): Unit = {
+ inDTD = true;
+ // if this is the source document, output a DOCTYPE declaration
+ if (entities.size() == 0) {
+ var id = "";
+ if (publicID != null) id = " PUBLIC \"" + publicID + "\" \"" + systemID + '"';
+ else if (systemID != null) id = " SYSTEM \"" + systemID + '"';
+ try {
+ out.write("<!DOCTYPE " + name + id + ">\r\n");
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Error while writing DOCTYPE", e);
+ }
+ }
+ }
+ def endDTD(): Unit = { }
+
+ def startEntity(name: String): Unit = {
+ entities.push(name);
+ }
+
+
+ def endEntity(name: String): Unit = {
+ entities.pop();
+ }
+
+ def startCDATA(): Unit = {}
+ def endCDATA(): Unit = {}
+
+ // Just need this reference so we can ask if a comment is
+ // inside an include element or not
+ private var filter: XIncludeFilter = null;
+
+ def setFilter(filter: XIncludeFilter): Unit = {
+ this.filter = filter;
+ }
+
+ def comment(ch: Array[char], start: int, length: int): Unit = {
+
+ if (!inDTD && !filter.insideIncludeElement()) {
+ try {
+ out.write("<!--");
+ out.write(ch, start, length);
+ out.write("-->");
+ }
+ catch {
+ case e:IOException =>
+ throw new SAXException("Write failed", e);
+ }
+ }
+ }
+}