summaryrefslogblamecommitdiff
path: root/src/xml/scala/xml/include/sax/XIncludeFilter.scala
blob: 3fa3beefb0e227952abdbcedb42a7285422af89e (plain) (tree)
1
2
3
4
5
6
7
8
9
10

                                                                          
                                                                          




                                                                          

             
                   
 
                          
 
                                                     
                                                                                                
 
                                                                                                                 
                      
                                              
 




                                                                            
  



                                                                             
  






                                                                              
  














                                                                              
  







                                                           
  



                                            
                                                                  
 

                                             














                                                                    
                                                     
                         
                                    
         
                               


                                     
                                                                                  
     
                                     



                                                                    
                       
 





                                                                            


                     
                                                  
 

                                                                                               



                                                               


                                                              

                         
                                                 



                                                         
                                                    

         
                            


                                                                          
                                        
                                                 
                           
                                                          

         
                                         
                                        
 

                                                 
                                             
         
                                      
                                  



                                                      
                                                          
         
                  
       







                                                                                  
         

                                                       
     

   
                                                                          

                                         
                    
     
                          

                                             
     

   
                       
 
                                

                                         
              

   
                              
                  
                                                       

                                       

   


                                                                 

   

                                                  

   

                                                                     

   

                                                                              

   

                                                                    

   

                                             



                                          





                                                       
                          



                                        


                                                              
                                                
 
                  

   

                                                                              

                                                                
                                                                





                                                               

                                                                   
                                                                          
                          
         

                                               



                                                                           


                                                                           


         

                                                           
                                                      


                                           




                                                                           
                                                 



                                                                                            

                                                                        

         


                                                      
          


                                                           



                                                       
                                                             

                                                     
                                                                            



     
                            
 


                                                              





                                                                
                                               







                                                                                              

         





                                                                                                       
         





















                                                                                                  

           

                                                                                                   
     

   
/*                     __                                               *\
**     ________ ___   / /  ___     Scala API                            **
**    / __/ __// _ | / /  / _ |    (c) 2002-2013, LAMP/EPFL             **
**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
** /____/\___/_/ |_/____/_/ | |                                         **
**                          |/                                          **
\*                                                                      */

package scala
package xml
package include.sax

import scala.xml.include._

import org.xml.sax.{ Attributes, XMLReader, Locator }
import org.xml.sax.helpers.{ XMLReaderFactory, XMLFilterImpl, NamespaceSupport, AttributesImpl }

import java.io.{ InputStream, BufferedInputStream, InputStreamReader, IOException, UnsupportedEncodingException }
import java.util.Stack
import java.net.{ URL, MalformedURLException }

/** This is a SAX filter which resolves all XInclude include elements before
 *  passing them on to the client application. Currently this class has the
 *  following known deviation from the XInclude specification:
 *
 *  1. XPointer is not supported.
 *
 *  Furthermore, I would definitely use a new instance of this class for each
 *  document you want to process. I doubt it can be used successfully on
 *  multiple documents. Furthermore, I can virtually guarantee that this
 *  class is not thread safe. You have been warned.
 *
 *  Since this class is not designed to be subclassed, and since I have not
 *  yet considered how that might affect the methods herein or what other
 *  protected methods might be needed to support subclasses, I have declared
 *  this class final. I may remove this restriction later, though the use-case
 *  for subclassing is weak. This class is designed to have its functionality
 *  extended via a horizontal chain of filters, not a vertical hierarchy of
 *  sub and superclasses.
 *
 *  To use this class:
 *
 *  - Construct an `XIncludeFilter` object with a known base URL
 *  - Pass the `XMLReader` object from which the raw document will be read to
 *    the `setParent()` method of this object.
 *  - Pass your own `ContentHandler` object to the `setContentHandler()`
 *    method of this object. This is the object which will receive events
 *    from the parsed and included document.
 *  - Optional: if you wish to receive comments, set your own `LexicalHandler`
 *    object as the value of this object's
 *    `http://xml.org/sax/properties/lexical-handler` property.
 *    Also make sure your `LexicalHandler` asks this object for the status of
 *    each comment using `insideIncludeElement` before doing anything with the
 *    comment.
 *  - Pass the URL of the document to read to this object's `parse()` method
 *
 *  e.g.
 *  {{{
 *  val includer = new XIncludeFilter(base)
 *  includer setParent parser
 *  includer setContentHandler new SAXXIncluder(System.out)
 *  includer parse args(i)
 *  }}}
 *  translated from Elliotte Rusty Harold's Java source.
 *
 * @author Burak Emir
 */
class XIncludeFilter extends XMLFilterImpl {

  final val XINCLUDE_NAMESPACE = "http://www.w3.org/2001/XInclude"

  private val bases = new Stack[URL]()
  private val locators = new Stack[Locator]()

/*    private EntityResolver resolver;

    public XIncludeFilter() {
        this(null);
    }

    public XIncludeFilter(EntityResolver resolver) {
        this.resolver = resolver;
    }   */


    // what if this isn't called????
    // do I need to check this in startDocument() and push something
    // there????
  override def setDocumentLocator(locator: Locator) {
    locators push locator
    val base = locator.getSystemId()
    try {
      bases.push(new URL(base))
    }
    catch {
      case e:MalformedURLException =>
        throw new UnsupportedOperationException("Unrecognized SYSTEM ID: " + base)
    }
    super.setDocumentLocator(locator)
  }


  // necessary to throw away contents of non-empty XInclude elements
  private var level = 0

  /** This utility method returns true if and only if this reader is
    * currently inside a non-empty include element. (This is '''not''' the
    * same as being inside the node set which replaces the include element.)
    * This is primarily needed for comments inside include elements.
    * It must be checked by the actual `LexicalHandler` to see whether
    * a comment is passed or not.
    *
    * @return boolean
    */
  def insideIncludeElement(): Boolean = level != 0

  override def startElement(uri: String, localName: String, qName: String, atts1: Attributes) {
    var atts = atts1
    if (level == 0) { // We're not inside an xi:include element

      // Adjust bases stack by pushing either the new
      // value of xml:base or the base of the parent
      val base = atts.getValue(NamespaceSupport.XMLNS, "base")
      val parentBase = bases.peek().asInstanceOf[URL]
      var currentBase = parentBase
      if (base != null) {
        try {
          currentBase = new URL(parentBase, base)
        }
        catch {
          case e: MalformedURLException =>
            throw new SAXException("Malformed base URL: "
                                   + currentBase, e)
        }
      }
      bases push currentBase

      if (uri.equals(XINCLUDE_NAMESPACE) && localName.equals("include")) {
        // include external document
        val href = atts.getValue("href")
        // Verify that there is an href attribute
        if (href == null) {
          throw new SAXException("Missing href attribute")
        }

        var parse = atts getValue "parse"
        if (parse == null) parse = "xml"

        if (parse equals "text") {
          val encoding = atts getValue "encoding"
          includeTextDocument(href, encoding)
        }
        else if (parse equals "xml") {
          includeXMLDocument(href)
        }
        // Need to check this also in DOM and JDOM????
        else {
          throw new SAXException(
            "Illegal value for parse attribute: " + parse)
        }
        level += 1
      }
      else {
        if (atRoot) {
          // add xml:base attribute if necessary
          val attsImpl = new AttributesImpl(atts)
          attsImpl.addAttribute(NamespaceSupport.XMLNS, "base",
                                "xml:base", "CDATA", currentBase.toExternalForm())
          atts = attsImpl
          atRoot = false
        }
        super.startElement(uri, localName, qName, atts)
      }
    }
  }

  override def endElement(uri: String, localName: String, qName: String) {
    if (uri.equals(XINCLUDE_NAMESPACE)
        && localName.equals("include")) {
          level -= 1
    }
    else if (level == 0) {
      bases.pop()
      super.endElement(uri, localName, qName)
    }
  }

  private var depth = 0

  override def startDocument() {
    level = 0
    if (depth == 0) super.startDocument()
    depth += 1
  }

  override def endDocument() {
    locators.pop()
    bases.pop()  // pop the URL for the document itself
    depth -= 1
    if (depth == 0) super.endDocument()
  }

  // how do prefix mappings move across documents????
  override def startPrefixMapping(prefix: String , uri: String) {
    if (level == 0) super.startPrefixMapping(prefix, uri)
  }

  override def endPrefixMapping(prefix: String) {
    if (level == 0) super.endPrefixMapping(prefix)
  }

  override def characters(ch: Array[Char], start: Int, length: Int) {
    if (level == 0) super.characters(ch, start, length)
  }

  override def ignorableWhitespace(ch: Array[Char], start: Int, length: Int) {
    if (level == 0) super.ignorableWhitespace(ch, start, length)
  }

  override def processingInstruction(target: String, data: String) {
    if (level == 0) super.processingInstruction(target, data)
  }

  override def skippedEntity(name: String) {
    if (level == 0) super.skippedEntity(name)
  }

  // convenience method for error messages
  private def getLocation(): String = {
    var locationString = ""
    val locator = locators.peek().asInstanceOf[Locator]
    var publicID = ""
    var systemID = ""
    var column = -1
    var line = -1
    if (locator != null) {
      publicID = locator.getPublicId()
      systemID = locator.getSystemId()
      line = locator.getLineNumber()
      column = locator.getColumnNumber()
    }
    locationString = (" in document included from " + publicID
    + " at " + systemID
    + " at line " + line + ", column " + column)

    locationString
  }

  /** This utility method reads a document at a specified URL and fires off
    * calls to `characters()`. It's used to include files with `parse="text"`.
    *
    * @param  url          URL of the document that will be read
    * @param  encoding1    Encoding of the document; e.g. UTF-8,
    *                      ISO-8859-1, etc.
    * @return void
    * @throws SAXException if the requested document cannot
                           be downloaded from the specified URL
                           or if the encoding is not recognized
    */
  private def includeTextDocument(url: String, encoding1: String) {
    var encoding = encoding1
    if (encoding == null || encoding.trim().equals("")) encoding = "UTF-8"
    var source: URL = null
    try {
      val base = bases.peek().asInstanceOf[URL]
      source = new URL(base, url)
    }
    catch {
      case e: MalformedURLException =>
        val ex = new UnavailableResourceException("Unresolvable URL " + url
                                                  + getLocation())
      ex.setRootCause(e)
      throw new SAXException("Unresolvable URL " + url + getLocation(), ex)
    }

    try {
      val uc = source.openConnection()
      val in = new BufferedInputStream(uc.getInputStream())
      val encodingFromHeader = uc.getContentEncoding()
      var contentType = uc.getContentType()
      if (encodingFromHeader != null)
        encoding = encodingFromHeader
      else {
        // What if file does not have a MIME type but name ends in .xml????
        // MIME types are case-insensitive
        // Java may be picking this up from file URL
        if (contentType != null) {
          contentType = contentType.toLowerCase()
          if (contentType.equals("text/xml")
              || contentType.equals("application/xml")
              || (contentType.startsWith("text/") && contentType.endsWith("+xml") )
              || (contentType.startsWith("application/") && contentType.endsWith("+xml"))) {
                encoding = EncodingHeuristics.readEncodingFromStream(in)
          }
        }
      }
      val reader = new InputStreamReader(in, encoding)
      val c = new Array[Char](1024)
      var charsRead: Int = 0  // bogus init value
      do {
        charsRead = reader.read(c, 0, 1024)
        if (charsRead > 0) this.characters(c, 0, charsRead)
      } while (charsRead != -1)
    }
    catch {
      case e: UnsupportedEncodingException =>
        throw new SAXException("Unsupported encoding: "
                               + encoding + getLocation(), e)
      case e: IOException =>
        throw new SAXException("Document not found: "
                               + source.toExternalForm() + getLocation(), e)
    }

  }

  private var atRoot = false

  /** This utility method reads a document at a specified URL
    * and fires off calls to various `ContentHandler` methods.
    * It's used to include files with `parse="xml"`.
    *
    * @param  url          URL of the document that will be read
    * @return void
    * @throws SAXException if the requested document cannot
                           be downloaded from the specified URL.
    */
  private def includeXMLDocument(url: String) {
    val source =
      try new URL(bases.peek(), url)
      catch {
        case e: MalformedURLException =>
          val ex = new UnavailableResourceException("Unresolvable URL " + url + getLocation())
          ex setRootCause e
          throw new SAXException("Unresolvable URL " + url + getLocation(), ex)
      }

    try {
      val parser: XMLReader =
        try XMLReaderFactory.createXMLReader()
        catch {
          case e: SAXException  =>
            try XMLReaderFactory.createXMLReader(XercesClassName)
            catch { case _: SAXException => return System.err.println("Could not find an XML parser") }
        }

      parser setContentHandler this
      val resolver = this.getEntityResolver()
      if (resolver != null)
        parser setEntityResolver resolver

      // save old level and base
      val previousLevel = level
      this.level = 0
      if (bases contains source)
        throw new SAXException(
          "Circular XInclude Reference",
          new CircularIncludeException("Circular XInclude Reference to " + source + getLocation())
        )

      bases push source
      atRoot = true
      parser parse source.toExternalForm()

      // restore old level and base
      this.level = previousLevel
      bases.pop()
    }
    catch {
      case e: IOException =>
        throw new SAXException("Document not found: " + source.toExternalForm() + getLocation(), e)
    }
  }
}