diff options
author | Som Snytt <som.snytt@gmail.com> | 2014-12-20 02:28:44 -0800 |
---|---|---|
committer | Som Snytt <som.snytt@gmail.com> | 2015-04-08 09:20:24 -0700 |
commit | 4df81aab315e587d9c7e319c7a2ece0f0f6fbaf3 (patch) | |
tree | d83454b666d54549d88f52bd800cddb66bd46041 /src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala | |
parent | 476eef82dfed90278de45739ca72819b1b4be5a4 (diff) | |
download | scala-4df81aab315e587d9c7e319c7a2ece0f0f6fbaf3.tar.gz scala-4df81aab315e587d9c7e319c7a2ece0f0f6fbaf3.tar.bz2 scala-4df81aab315e587d9c7e319c7a2ece0f0f6fbaf3.zip |
SI-3368 CDATA gets a Node
XML Parser uses `scala.xml.PCData`.
A compiler flag `-Yxml:coalescing`, analogous to
`DocumentBuilderFactory.setCoalescing`, turns `PCData`
nodes into `Text` nodes and coalesces sibling text nodes.
This change also fixes parse errors such as rejecting a
sequence of CDATA sections.
A sequence of "top level" nodes are not coalesced.
```
scala> <a><b/>start<![CDATA[hi & bye]]><c/>world<d/>stuff<![CDATA[red & black]]></a>
res0: scala.xml.Elem = <a><b/>start<![CDATA[hi & bye]]><c/>world<d/>stuff<![CDATA[red & black]]></a>
scala> :replay -Yxml:coalescing
Replaying: <a><b/>start<![CDATA[hi & bye]]><c/>world<d/>stuff<![CDATA[red & black]]></a>
res0: scala.xml.Elem = <a><b/>starthi & bye<c/>world<d/>stuffred & black</a>
```
Diffstat (limited to 'src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala')
-rwxr-xr-x | src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala | 129 |
1 files changed, 78 insertions, 51 deletions
diff --git a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala index 96939e616c..edee1e296d 100755 --- a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala @@ -6,6 +6,7 @@ package scala.tools.nsc package ast.parser +import scala.annotation.tailrec import scala.collection.mutable import mutable.{ Buffer, ArrayBuffer, ListBuffer } import scala.util.control.ControlThrowable @@ -172,20 +173,19 @@ trait MarkupParsers { } def appendText(pos: Position, ts: Buffer[Tree], txt: String): Unit = { - def append(t: String) = ts append handle.text(pos, t) - - if (preserveWS) append(txt) - else { + def append(text: String): Unit = { + val tree = handle.text(pos, text) + ts append tree + } + val clean = if (preserveWS) txt else { val sb = new StringBuilder() - txt foreach { c => if (!isSpace(c)) sb append c else if (sb.isEmpty || !isSpace(sb.last)) sb append ' ' } - - val trimmed = sb.toString.trim - if (!trimmed.isEmpty) append(trimmed) + sb.toString.trim } + if (!clean.isEmpty) append(clean) } /** adds entity/character to ts as side-effect @@ -216,44 +216,75 @@ trait MarkupParsers { if (xCheckEmbeddedBlock) ts append xEmbeddedExpr else appendText(p, ts, xText) - /** Returns true if it encounters an end tag (without consuming it), - * appends trees to ts as side-effect. + /** At an open angle-bracket, detects an end tag + * or consumes CDATA, comment, PI or element. + * Trees are appended to `ts` as a side-effect. + * @return true if an end tag (without consuming it) */ - private def content_LT(ts: ArrayBuffer[Tree]): Boolean = { - if (ch == '/') - return true // end tag - - val toAppend = ch match { - case '!' => nextch() ; if (ch =='[') xCharData else xComment // CDATA or Comment - case '?' => nextch() ; xProcInstr // PI - case _ => element // child node + private def content_LT(ts: ArrayBuffer[Tree]): Boolean = + (ch == '/') || { + val toAppend = ch match { + case '!' => nextch() ; if (ch =='[') xCharData else xComment // CDATA or Comment + case '?' => nextch() ; xProcInstr // PI + case _ => element // child node + } + ts append toAppend + false } - ts append toAppend - false - } - def content: Buffer[Tree] = { val ts = new ArrayBuffer[Tree] - while (true) { - if (xEmbeddedBlock) + val coalescing = settings.YxmlSettings.isCoalescing + @tailrec def loopContent(): Unit = + if (xEmbeddedBlock) { ts append xEmbeddedExpr - else { + loopContent() + } else { tmppos = o2p(curOffset) ch match { - // end tag, cdata, comment, pi or child node - case '<' => nextch() ; if (content_LT(ts)) return ts - // either the character '{' or an embedded scala block } - case '{' => content_BRACE(tmppos, ts) // } - // EntityRef or CharRef - case '&' => content_AMP(ts) - case SU => return ts - // text content - here xEmbeddedBlock might be true - case _ => appendText(tmppos, ts, xText) + case '<' => // end tag, cdata, comment, pi or child node + nextch() + if (!content_LT(ts)) loopContent() + case '{' => // } literal brace or embedded Scala block + content_BRACE(tmppos, ts) + loopContent() + case '&' => // EntityRef or CharRef + content_AMP(ts) + loopContent() + case SU => () + case _ => // text content - here xEmbeddedBlock might be true + appendText(tmppos, ts, xText) + loopContent() } } + // merge text sections and strip attachments + def coalesce(): ArrayBuffer[Tree] = { + def copy() = { + val buf = new ArrayBuffer[Tree] + var acc = new StringBuilder + var pos: Position = NoPosition + def emit() = if (acc.nonEmpty) { + appendText(pos, buf, acc.toString) + acc.clear() + } + for (t <- ts) + t.attachments.get[handle.TextAttache] match { + case Some(ta) => + if (acc.isEmpty) pos = ta.pos + acc append ta.text + case _ => + emit() + buf += t + } + emit() + buf + } + val res = if (ts.count(_.hasAttachment[handle.TextAttache]) > 1) copy() else ts + for (t <- res) t.removeAttachment[handle.TextAttache] + res } - unreachable + loopContent() + if (coalescing) coalesce() else ts } /** '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag @@ -289,20 +320,16 @@ trait MarkupParsers { private def xText: String = { assert(!xEmbeddedBlock, "internal error: encountered embedded block") val buf = new StringBuilder - def done = buf.toString - - while (ch != SU) { - if (ch == '}') { - if (charComingAfter(nextch()) == '}') nextch() - else errorBraces() - } - - buf append ch - nextch() - if (xCheckEmbeddedBlock || ch == '<' || ch == '&') - return done - } - done + if (ch != SU) + do { + if (ch == '}') { + if (charComingAfter(nextch()) == '}') nextch() + else errorBraces() + } + buf append ch + nextch() + } while (!(ch == SU || xCheckEmbeddedBlock || ch == '<' || ch == '&')) + buf.toString } /** Some try/catch/finally logic used by xLiteral and xLiteralPattern. */ @@ -344,12 +371,12 @@ trait MarkupParsers { tmppos = o2p(curOffset) // Iuli: added this line, as it seems content_LT uses tmppos when creating trees content_LT(ts) - // parse more XML ? + // parse more XML? if (charComingAfter(xSpaceOpt()) == '<') { do { xSpaceOpt() nextch() - ts append element + content_LT(ts) } while (charComingAfter(xSpaceOpt()) == '<') handle.makeXMLseq(r2p(start, start, curOffset), ts) } |