summaryrefslogtreecommitdiff
path: root/src/xml/scala/xml/parsing/FactoryAdapter.scala
blob: 2154bdf5ba47e8f151be1268e0cfc2b857ff4e2d (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
/*                     __                                               *\
**     ________ ___   / /  ___     Scala API                            **
**    / __/ __// _ | / /  / _ |    (c) 2003-2013, LAMP/EPFL             **
**  __\ \/ /__/ __ |/ /__/ __ |    http://scala-lang.org/               **
** /____/\___/_/ |_/____/_/ | |                                         **
**                          |/                                          **
\*                                                                      */

package scala
package xml
package parsing

import java.io.{ InputStream, Reader, File, FileDescriptor, FileInputStream }
import scala.collection.{ mutable, Iterator }
import org.xml.sax.Attributes
import org.xml.sax.helpers.DefaultHandler

// can be mixed into FactoryAdapter if desired
trait ConsoleErrorHandler extends DefaultHandler {
  // ignore warning, crimson warns even for entity resolution!
  override def warning(ex: SAXParseException): Unit = { }
  override def error(ex: SAXParseException): Unit = printError("Error", ex)
  override def fatalError(ex: SAXParseException): Unit = printError("Fatal Error", ex)

  protected def printError(errtype: String, ex: SAXParseException): Unit =
    Console.withOut(Console.err) {
      val s = "[%s]:%d:%d: %s".format(
        errtype, ex.getLineNumber, ex.getColumnNumber, ex.getMessage)
      Console.println(s)
      Console.flush()
    }
}

/** SAX adapter class, for use with Java SAX parser. Keeps track of
 *  namespace bindings, without relying on namespace handling of the
 *  underlying SAX parser.
 */
abstract class FactoryAdapter extends DefaultHandler with factory.XMLLoader[Node] {
  var rootElem: Node = null

  val buffer      = new StringBuilder()
  val attribStack = new mutable.Stack[MetaData]
  val hStack      = new mutable.Stack[Node]   // [ element ] contains siblings
  val tagStack    = new mutable.Stack[String]
  var scopeStack  = new mutable.Stack[NamespaceBinding]

  var curTag : String = null
  var capture: Boolean = false

  // abstract methods

  /** Tests if an XML element contains text.
   * @return true if element named `localName` contains text.
   */
  def nodeContainsText(localName: String): Boolean // abstract

  /** creates an new non-text(tree) node.
   * @param elemName
   * @param attribs
   * @param chIter
   * @return a new XML element.
   */
  def createNode(pre: String, elemName: String, attribs: MetaData,
                 scope: NamespaceBinding, chIter: List[Node]): Node // abstract

  /** creates a Text node.
   * @param text
   * @return a new Text node.
   */
  def createText(text: String): Text // abstract

  /** creates a new processing instruction node.
  */
  def createProcInstr(target: String, data: String): Seq[ProcInstr]

  //
  // ContentHandler methods
  //

  val normalizeWhitespace = false

  /** Characters.
  * @param ch
  * @param offset
  * @param length
  */
  override def characters(ch: Array[Char], offset: Int, length: Int): Unit = {
    if (!capture) return
    // compliant: report every character
    else if (!normalizeWhitespace) buffer.appendAll(ch, offset, length)
    // normalizing whitespace is not compliant, but useful
    else {
      var it = ch.slice(offset, offset + length).iterator
      while (it.hasNext) {
        val c = it.next()
        val isSpace = c.isWhitespace
        buffer append (if (isSpace) ' ' else c)
        if (isSpace)
          it = it dropWhile (_.isWhitespace)
      }
    }
  }

  private def splitName(s: String) = {
    val idx = s indexOf ':'
    if (idx < 0) (null, s)
    else (s take idx, s drop (idx + 1))
  }

  /* ContentHandler methods */

  /* Start element. */
  override def startElement(
    uri: String,
    _localName: String,
    qname: String,
    attributes: Attributes): Unit =
  {
    captureText()
    tagStack push curTag
    curTag = qname

    val localName = splitName(qname)._2
    capture = nodeContainsText(localName)

    hStack push null
    var m: MetaData = Null
    var scpe: NamespaceBinding =
      if (scopeStack.isEmpty) TopScope
      else scopeStack.top

    for (i <- 0 until attributes.getLength()) {
      val qname = attributes getQName i
      val value = attributes getValue i
      val (pre, key) = splitName(qname)
      def nullIfEmpty(s: String) = if (s == "") null else s

      if (pre == "xmlns" || (pre == null && qname == "xmlns")) {
        val arg = if (pre == null) null else key
        scpe = new NamespaceBinding(arg, nullIfEmpty(value), scpe)
      }
      else
        m = Attribute(Option(pre), key, Text(value), m)
    }

    scopeStack push scpe
    attribStack push m
  }


  /** captures text, possibly normalizing whitespace
   */
  def captureText(): Unit = {
    if (capture && buffer.length > 0)
      hStack push createText(buffer.toString)

    buffer.clear()
  }

  /** End element.
   * @param uri
   * @param _localName
   * @param qname
   * @throws org.xml.sax.SAXException if ..
   */
  override def endElement(uri: String , _localName: String, qname: String): Unit = {
    captureText()
    val metaData = attribStack.pop()

    // reverse order to get it right
    val v = (Iterator continually hStack.pop takeWhile (_ != null)).toList.reverse
    val (pre, localName) = splitName(qname)
    val scp = scopeStack.pop()

    // create element
    rootElem = createNode(pre, localName, metaData, scp, v)
    hStack push rootElem
    curTag = tagStack.pop()
    capture = curTag != null && nodeContainsText(curTag) // root level
  }

  /** Processing instruction.
  */
  override def processingInstruction(target: String, data: String) {
    hStack pushAll createProcInstr(target, data)
  }
}