From 4df81aab315e587d9c7e319c7a2ece0f0f6fbaf3 Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Sat, 20 Dec 2014 02:28:44 -0800 Subject: SI-3368 CDATA gets a Node XML Parser uses `scala.xml.PCData`. A compiler flag `-Yxml:coalescing`, analogous to `DocumentBuilderFactory.setCoalescing`, turns `PCData` nodes into `Text` nodes and coalesces sibling text nodes. This change also fixes parse errors such as rejecting a sequence of CDATA sections. A sequence of "top level" nodes are not coalesced. ``` scala> startworldstuff res0: scala.xml.Elem = startworldstuff scala> :replay -Yxml:coalescing Replaying: startworldstuff res0: scala.xml.Elem = starthi & byeworldstuffred & black ``` --- .../scala/tools/nsc/ast/parser/MarkupParsers.scala | 129 +++++++++++++-------- .../tools/nsc/ast/parser/SymbolicXMLBuilder.scala | 25 +++- .../scala/tools/nsc/settings/ScalaSettings.scala | 12 ++ .../scala/tools/partest/ParserTest.scala | 21 ++++ test/files/pos/t3368.flags | 1 + test/files/pos/t3368.scala | 5 + test/files/run/t3368.check | 46 ++++++++ test/files/run/t3368.scala | 18 +++ 8 files changed, 200 insertions(+), 57 deletions(-) create mode 100644 src/partest-extras/scala/tools/partest/ParserTest.scala create mode 100644 test/files/pos/t3368.flags create mode 100644 test/files/pos/t3368.scala create mode 100644 test/files/run/t3368.check create mode 100644 test/files/run/t3368.scala diff --git a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala index 96939e616c..edee1e296d 100755 --- a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala @@ -6,6 +6,7 @@ package scala.tools.nsc package ast.parser +import scala.annotation.tailrec import scala.collection.mutable import mutable.{ Buffer, ArrayBuffer, ListBuffer } import scala.util.control.ControlThrowable @@ -172,20 +173,19 @@ trait MarkupParsers { } def appendText(pos: Position, ts: Buffer[Tree], txt: String): Unit = { - def append(t: String) = ts append handle.text(pos, t) - - if (preserveWS) append(txt) - else { + def append(text: String): Unit = { + val tree = handle.text(pos, text) + ts append tree + } + val clean = if (preserveWS) txt else { val sb = new StringBuilder() - txt foreach { c => if (!isSpace(c)) sb append c else if (sb.isEmpty || !isSpace(sb.last)) sb append ' ' } - - val trimmed = sb.toString.trim - if (!trimmed.isEmpty) append(trimmed) + sb.toString.trim } + if (!clean.isEmpty) append(clean) } /** adds entity/character to ts as side-effect @@ -216,44 +216,75 @@ trait MarkupParsers { if (xCheckEmbeddedBlock) ts append xEmbeddedExpr else appendText(p, ts, xText) - /** Returns true if it encounters an end tag (without consuming it), - * appends trees to ts as side-effect. + /** At an open angle-bracket, detects an end tag + * or consumes CDATA, comment, PI or element. + * Trees are appended to `ts` as a side-effect. + * @return true if an end tag (without consuming it) */ - private def content_LT(ts: ArrayBuffer[Tree]): Boolean = { - if (ch == '/') - return true // end tag - - val toAppend = ch match { - case '!' => nextch() ; if (ch =='[') xCharData else xComment // CDATA or Comment - case '?' => nextch() ; xProcInstr // PI - case _ => element // child node + private def content_LT(ts: ArrayBuffer[Tree]): Boolean = + (ch == '/') || { + val toAppend = ch match { + case '!' => nextch() ; if (ch =='[') xCharData else xComment // CDATA or Comment + case '?' => nextch() ; xProcInstr // PI + case _ => element // child node + } + ts append toAppend + false } - ts append toAppend - false - } - def content: Buffer[Tree] = { val ts = new ArrayBuffer[Tree] - while (true) { - if (xEmbeddedBlock) + val coalescing = settings.YxmlSettings.isCoalescing + @tailrec def loopContent(): Unit = + if (xEmbeddedBlock) { ts append xEmbeddedExpr - else { + loopContent() + } else { tmppos = o2p(curOffset) ch match { - // end tag, cdata, comment, pi or child node - case '<' => nextch() ; if (content_LT(ts)) return ts - // either the character '{' or an embedded scala block } - case '{' => content_BRACE(tmppos, ts) // } - // EntityRef or CharRef - case '&' => content_AMP(ts) - case SU => return ts - // text content - here xEmbeddedBlock might be true - case _ => appendText(tmppos, ts, xText) + case '<' => // end tag, cdata, comment, pi or child node + nextch() + if (!content_LT(ts)) loopContent() + case '{' => // } literal brace or embedded Scala block + content_BRACE(tmppos, ts) + loopContent() + case '&' => // EntityRef or CharRef + content_AMP(ts) + loopContent() + case SU => () + case _ => // text content - here xEmbeddedBlock might be true + appendText(tmppos, ts, xText) + loopContent() } } + // merge text sections and strip attachments + def coalesce(): ArrayBuffer[Tree] = { + def copy() = { + val buf = new ArrayBuffer[Tree] + var acc = new StringBuilder + var pos: Position = NoPosition + def emit() = if (acc.nonEmpty) { + appendText(pos, buf, acc.toString) + acc.clear() + } + for (t <- ts) + t.attachments.get[handle.TextAttache] match { + case Some(ta) => + if (acc.isEmpty) pos = ta.pos + acc append ta.text + case _ => + emit() + buf += t + } + emit() + buf + } + val res = if (ts.count(_.hasAttachment[handle.TextAttache]) > 1) copy() else ts + for (t <- res) t.removeAttachment[handle.TextAttache] + res } - unreachable + loopContent() + if (coalescing) coalesce() else ts } /** '<' element ::= xmlTag1 '>' { xmlExpr | '{' simpleExpr '}' } ETag @@ -289,20 +320,16 @@ trait MarkupParsers { private def xText: String = { assert(!xEmbeddedBlock, "internal error: encountered embedded block") val buf = new StringBuilder - def done = buf.toString - - while (ch != SU) { - if (ch == '}') { - if (charComingAfter(nextch()) == '}') nextch() - else errorBraces() - } - - buf append ch - nextch() - if (xCheckEmbeddedBlock || ch == '<' || ch == '&') - return done - } - done + if (ch != SU) + do { + if (ch == '}') { + if (charComingAfter(nextch()) == '}') nextch() + else errorBraces() + } + buf append ch + nextch() + } while (!(ch == SU || xCheckEmbeddedBlock || ch == '<' || ch == '&')) + buf.toString } /** Some try/catch/finally logic used by xLiteral and xLiteralPattern. */ @@ -344,12 +371,12 @@ trait MarkupParsers { tmppos = o2p(curOffset) // Iuli: added this line, as it seems content_LT uses tmppos when creating trees content_LT(ts) - // parse more XML ? + // parse more XML? if (charComingAfter(xSpaceOpt()) == '<') { do { xSpaceOpt() nextch() - ts append element + content_LT(ts) } while (charComingAfter(xSpaceOpt()) == '<') handle.makeXMLseq(r2p(start, start, curOffset), ts) } diff --git a/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala b/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala index d2a999cdec..90610ab2e6 100755 --- a/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala @@ -36,6 +36,7 @@ abstract class SymbolicXMLBuilder(p: Parsers#Parser, preserveWS: Boolean) { val _MetaData: NameType = "MetaData" val _NamespaceBinding: NameType = "NamespaceBinding" val _NodeBuffer: NameType = "NodeBuffer" + val _PCData: NameType = "PCData" val _PrefixedAttribute: NameType = "PrefixedAttribute" val _ProcInstr: NameType = "ProcInstr" val _Text: NameType = "Text" @@ -46,6 +47,7 @@ abstract class SymbolicXMLBuilder(p: Parsers#Parser, preserveWS: Boolean) { private object xmlterms extends TermNames { val _Null: NameType = "Null" val __Elem: NameType = "Elem" + val _PCData: NameType = "PCData" val __Text: NameType = "Text" val _buf: NameType = "$buf" val _md: NameType = "$md" @@ -55,10 +57,15 @@ abstract class SymbolicXMLBuilder(p: Parsers#Parser, preserveWS: Boolean) { val _xml: NameType = "xml" } - import xmltypes.{_Comment, _Elem, _EntityRef, _Group, _MetaData, _NamespaceBinding, _NodeBuffer, - _PrefixedAttribute, _ProcInstr, _Text, _Unparsed, _UnprefixedAttribute} + import xmltypes.{ + _Comment, _Elem, _EntityRef, _Group, _MetaData, _NamespaceBinding, _NodeBuffer, + _PCData, _PrefixedAttribute, _ProcInstr, _Text, _Unparsed, _UnprefixedAttribute + } + + import xmlterms.{ _Null, __Elem, __Text, _buf, _md, _plus, _scope, _tmpscope, _xml } - import xmlterms.{_Null, __Elem, __Text, _buf, _md, _plus, _scope, _tmpscope, _xml} + /** Attachment for trees deriving from text nodes (Text, CData, entities). Used for coalescing. */ + case class TextAttache(pos: Position, text: String) // convenience methods private def LL[A](x: A*): List[List[A]] = List(List(x:_*)) @@ -108,16 +115,22 @@ abstract class SymbolicXMLBuilder(p: Parsers#Parser, preserveWS: Boolean) { final def entityRef(pos: Position, n: String) = atPos(pos)( New(_scala_xml_EntityRef, LL(const(n))) ) + private def coalescing = settings.YxmlSettings.isCoalescing + // create scala.xml.Text here <: scala.xml.Node final def text(pos: Position, txt: String): Tree = atPos(pos) { - if (isPattern) makeTextPat(const(txt)) - else makeText1(const(txt)) + val t = if (isPattern) makeTextPat(const(txt)) else makeText1(const(txt)) + if (coalescing) t updateAttachment TextAttache(pos, txt) else t } def makeTextPat(txt: Tree) = Apply(_scala_xml__Text, List(txt)) def makeText1(txt: Tree) = New(_scala_xml_Text, LL(txt)) def comment(pos: Position, text: String) = atPos(pos)( Comment(const(text)) ) - def charData(pos: Position, txt: String) = atPos(pos)( makeText1(const(txt)) ) + def charData(pos: Position, txt: String) = atPos(pos) { + val t = if (isPattern) Apply(_scala_xml(xmlterms._PCData), List(const(txt))) + else New(_scala_xml(_PCData), LL(const(txt))) + if (coalescing) t updateAttachment TextAttache(pos, txt) else t + } def procInstr(pos: Position, target: String, txt: String) = atPos(pos)( ProcInstr(const(target), const(txt)) ) diff --git a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala index 03fd0976e5..e10b3bc259 100644 --- a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala +++ b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala @@ -305,6 +305,18 @@ trait ScalaSettings extends AbsScalaSettings ) withPostSetHook { _ => scala.reflect.internal.util.Statistics.enabled = true } } + // XML parsing options (transitional in 2.11) + object YxmlSettings extends MultiChoiceEnumeration { + val coalescing = Value + def isCoalescing = Yxml contains coalescing + } + val Yxml = MultiChoiceSetting( + name = "-Yxml", + helpArg = "property", + descr = "Configure XML parsing", + domain = YxmlSettings + ) + def YstatisticsEnabled = Ystatistics.value.nonEmpty /** Area-specific debug output. diff --git a/src/partest-extras/scala/tools/partest/ParserTest.scala b/src/partest-extras/scala/tools/partest/ParserTest.scala new file mode 100644 index 0000000000..e4c92e3dc3 --- /dev/null +++ b/src/partest-extras/scala/tools/partest/ParserTest.scala @@ -0,0 +1,21 @@ +/* NSC -- new Scala compiler + * Copyright 2005-2014 LAMP/EPFL + */ + +package scala.tools.partest + +/** A class for testing parser output. + * Just supply the `code` and update the check file. + */ +abstract class ParserTest extends DirectTest { + + override def extraSettings: String = "-usejavacp -Ystop-after:parser -Xprint:parser" + + override def show(): Unit = { + // redirect err to out, for logging + val prevErr = System.err + System.setErr(System.out) + compile() + System.setErr(prevErr) + } +} diff --git a/test/files/pos/t3368.flags b/test/files/pos/t3368.flags new file mode 100644 index 0000000000..cb20509902 --- /dev/null +++ b/test/files/pos/t3368.flags @@ -0,0 +1 @@ +-Ystop-after:parser diff --git a/test/files/pos/t3368.scala b/test/files/pos/t3368.scala new file mode 100644 index 0000000000..c8e861a899 --- /dev/null +++ b/test/files/pos/t3368.scala @@ -0,0 +1,5 @@ + +trait X { + // error: in XML literal: name expected, but char '!' cannot start a name + def x = +} diff --git a/test/files/run/t3368.check b/test/files/run/t3368.check new file mode 100644 index 0000000000..1d9dd677f6 --- /dev/null +++ b/test/files/run/t3368.check @@ -0,0 +1,46 @@ +[[syntax trees at end of parser]] // newSource1.scala +package { + abstract trait X extends scala.AnyRef { + def $init$() = { + () + }; + def x = { + val $buf = new _root_.scala.xml.NodeBuffer(); + $buf.$amp$plus(new _root_.scala.xml.PCData("hi & bye")); + $buf.$amp$plus(new _root_.scala.xml.PCData("red & black")); + $buf + } + }; + abstract trait Y extends scala.AnyRef { + def $init$() = { + () + }; + def y = { + { + new _root_.scala.xml.Elem(null, "a", _root_.scala.xml.Null, $scope, false, ({ + val $buf = new _root_.scala.xml.NodeBuffer(); + $buf.$amp$plus({ + { + new _root_.scala.xml.Elem(null, "b", _root_.scala.xml.Null, $scope, true) + } + }); + $buf.$amp$plus(new _root_.scala.xml.Text("starthi & bye")); + $buf.$amp$plus({ + { + new _root_.scala.xml.Elem(null, "c", _root_.scala.xml.Null, $scope, true) + } + }); + $buf.$amp$plus(new _root_.scala.xml.Text("world")); + $buf.$amp$plus({ + { + new _root_.scala.xml.Elem(null, "d", _root_.scala.xml.Null, $scope, true) + } + }); + $buf.$amp$plus(new _root_.scala.xml.Text("stuffred & black")); + $buf + }: _*)) + } + } + } +} + diff --git a/test/files/run/t3368.scala b/test/files/run/t3368.scala new file mode 100644 index 0000000000..aaf34b43fb --- /dev/null +++ b/test/files/run/t3368.scala @@ -0,0 +1,18 @@ + +import scala.tools.partest.ParserTest + + +object Test extends ParserTest { + + override def code = """ + trait X { + // error: in XML literal: name expected, but char '!' cannot start a name + def x = + } + trait Y { + def y = startworldstuff + } + """ + + override def extraSettings = s"${super.extraSettings} -Yxml:coalescing" +} -- cgit v1.2.3 From b60de05a2e1257d16b7876856e26feec7bb81870 Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Sun, 21 Dec 2014 12:50:51 -0800 Subject: SI-5699 Use ParserTest Update the test slightly to use the rig it inspired. --- test/files/run/t5699.scala | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/test/files/run/t5699.scala b/test/files/run/t5699.scala index ec3b1d26b4..ede6fefe48 100755 --- a/test/files/run/t5699.scala +++ b/test/files/run/t5699.scala @@ -1,7 +1,7 @@ -import scala.tools.partest.DirectTest +import scala.tools.partest.ParserTest import scala.reflect.internal.util.BatchSourceFile -object Test extends DirectTest { +object Test extends ParserTest { // Java code override def code = """ |public @interface MyAnnotation { String value(); } @@ -9,14 +9,6 @@ object Test extends DirectTest { override def extraSettings: String = "-usejavacp -Ystop-after:typer -Xprint:parser" - override def show(): Unit = { - // redirect err to out, for logging - val prevErr = System.err - System.setErr(System.out) - compile() - System.setErr(prevErr) - } - override def newSources(sourceCodes: String*) = { assert(sourceCodes.size == 1) List(new BatchSourceFile("annodef.java", sourceCodes(0))) -- cgit v1.2.3 From 996d066bb3ef0fc91acab18a5502bb649929881a Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Sun, 8 Feb 2015 15:35:43 -0800 Subject: SI-3368 Promote xml option to -Xxml As long as Scala does XML literals, there is probably parsing behavior worth configuring. Therefore, the umbrella option is promoted to `-Xxml`. It was tempting to make it `-XML`, but we resisted. --- .../scala/tools/nsc/ast/parser/MarkupParsers.scala | 2 +- .../tools/nsc/ast/parser/SymbolicXMLBuilder.scala | 2 +- .../scala/tools/nsc/settings/ScalaSettings.scala | 24 +++++++++++----------- test/files/run/t3368.scala | 2 +- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala index edee1e296d..52b8a51a79 100755 --- a/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/MarkupParsers.scala @@ -234,7 +234,7 @@ trait MarkupParsers { def content: Buffer[Tree] = { val ts = new ArrayBuffer[Tree] - val coalescing = settings.YxmlSettings.isCoalescing + val coalescing = settings.XxmlSettings.isCoalescing @tailrec def loopContent(): Unit = if (xEmbeddedBlock) { ts append xEmbeddedExpr diff --git a/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala b/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala index 90610ab2e6..99399e363f 100755 --- a/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala +++ b/src/compiler/scala/tools/nsc/ast/parser/SymbolicXMLBuilder.scala @@ -115,7 +115,7 @@ abstract class SymbolicXMLBuilder(p: Parsers#Parser, preserveWS: Boolean) { final def entityRef(pos: Position, n: String) = atPos(pos)( New(_scala_xml_EntityRef, LL(const(n))) ) - private def coalescing = settings.YxmlSettings.isCoalescing + private def coalescing = settings.XxmlSettings.isCoalescing // create scala.xml.Text here <: scala.xml.Node final def text(pos: Position, txt: String): Tree = atPos(pos) { diff --git a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala index e10b3bc259..b783e80db9 100644 --- a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala +++ b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala @@ -139,6 +139,18 @@ trait ScalaSettings extends AbsScalaSettings val XnoPatmatAnalysis = BooleanSetting ("-Xno-patmat-analysis", "Don't perform exhaustivity/unreachability analysis. Also, ignore @switch annotation.") val XfullLubs = BooleanSetting ("-Xfull-lubs", "Retains pre 2.10 behavior of less aggressive truncation of least upper bounds.") + // XML parsing options + object XxmlSettings extends MultiChoiceEnumeration { + val coalescing = Value + def isCoalescing = Xxml contains coalescing + } + val Xxml = MultiChoiceSetting( + name = "-Xxml", + helpArg = "property", + descr = "Configure XML parsing", + domain = XxmlSettings + ) + /** Compatibility stubs for options whose value name did * not previously match the option name. */ @@ -305,18 +317,6 @@ trait ScalaSettings extends AbsScalaSettings ) withPostSetHook { _ => scala.reflect.internal.util.Statistics.enabled = true } } - // XML parsing options (transitional in 2.11) - object YxmlSettings extends MultiChoiceEnumeration { - val coalescing = Value - def isCoalescing = Yxml contains coalescing - } - val Yxml = MultiChoiceSetting( - name = "-Yxml", - helpArg = "property", - descr = "Configure XML parsing", - domain = YxmlSettings - ) - def YstatisticsEnabled = Ystatistics.value.nonEmpty /** Area-specific debug output. diff --git a/test/files/run/t3368.scala b/test/files/run/t3368.scala index aaf34b43fb..15acba5099 100644 --- a/test/files/run/t3368.scala +++ b/test/files/run/t3368.scala @@ -14,5 +14,5 @@ object Test extends ParserTest { } """ - override def extraSettings = s"${super.extraSettings} -Yxml:coalescing" + override def extraSettings = s"${super.extraSettings} -Xxml:coalescing" } -- cgit v1.2.3 From ef7b5524a7d6a2da82c32f16632ed2304130f32a Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Wed, 8 Apr 2015 09:49:31 -0700 Subject: SI-3368 Review Verbose option help and test tweak. --- src/compiler/scala/tools/nsc/settings/ScalaSettings.scala | 2 +- test/files/run/t5699.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala index b783e80db9..f217d21c35 100644 --- a/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala +++ b/src/compiler/scala/tools/nsc/settings/ScalaSettings.scala @@ -141,7 +141,7 @@ trait ScalaSettings extends AbsScalaSettings // XML parsing options object XxmlSettings extends MultiChoiceEnumeration { - val coalescing = Value + val coalescing = Choice("coalescing", "Convert PCData to Text and coalesce sibling nodes") def isCoalescing = Xxml contains coalescing } val Xxml = MultiChoiceSetting( diff --git a/test/files/run/t5699.scala b/test/files/run/t5699.scala index ede6fefe48..409bcd250c 100755 --- a/test/files/run/t5699.scala +++ b/test/files/run/t5699.scala @@ -7,7 +7,7 @@ object Test extends ParserTest { |public @interface MyAnnotation { String value(); } """.stripMargin - override def extraSettings: String = "-usejavacp -Ystop-after:typer -Xprint:parser" + override def extraSettings: String = "-usejavacp -Ystop-after:namer -Xprint:parser" override def newSources(sourceCodes: String*) = { assert(sourceCodes.size == 1) -- cgit v1.2.3