From c88601425dc8ffd79798ec80de7dd73bc5644245 Mon Sep 17 00:00:00 2001 From: buraq Date: Wed, 5 May 2004 17:21:45 +0000 Subject: xml changes (part 1) --- doc/reference/Makefile | 2 +- doc/reference/ReferencePart.tex | 165 +++++++++++++++++++++++++++++--- doc/reference/ReferencePartAppendix.tex | 57 ++++++++++- doc/reference/Scala.bib | 7 ++ doc/reference/ScalaReference.tex | 4 +- 5 files changed, 214 insertions(+), 21 deletions(-) (limited to 'doc') diff --git a/doc/reference/Makefile b/doc/reference/Makefile index 6c9a4dd630..1f2bccf829 100644 --- a/doc/reference/Makefile +++ b/doc/reference/Makefile @@ -44,6 +44,6 @@ include $(PROJECT_SUPPORTDIR)/make/latex.mk ############################################################################## clean : - $(RM) *.out + $(RM) *.out *.pdf *.ps *.dvi *.aux *.log *.bbl *.blg *.toc ############################################################################## diff --git a/doc/reference/ReferencePart.tex b/doc/reference/ReferencePart.tex index 5dab7ca1e6..9c87b1d47e 100644 --- a/doc/reference/ReferencePart.tex +++ b/doc/reference/ReferencePart.tex @@ -21,23 +21,37 @@ \newcommand{\ifpackaging}[1]{} \newcommand{\ifnewfor}[1]{} +%\newcommand{\U}[1]{\mbox{U+$\backslash$u{#1}}} +\newcommand{\U}[1]{\mbox{U+{#1}}} + \chapter{Lexical Syntax} -This chapter defines the syntax of Scala tokens. Tokens are -constructed from characters in the following character sets: +Scala programs are written using the Unicode character set. +This chapter defines the two modes of Scala's lexical syntax, the +Scala mode and the \textsc{Xml} mode. If not otherwise mentioned, the following +descriptions of Scala tokens refer to Scala mode, and literal characters `c' refer +to the ASCII fragment \U{0000-007F}. + +In Scala mode, \textit{Unicode escapes} are replaced by the corresponding +Unicode character as in Java. +\begin{lstlisting} +UnicodeEscape ::= \\{\\\\}u{u} HexDigit HexDigit HexDigit HexDigit +HexDigit ::= '0' | $\ldots$ | `9' | `A' | $\ldots$ | `F' | `a' | $\ldots$ | `f' | +\end{lstlisting} +To construct tokens, characters are distinguished according to the following classes +(Unicode general category given in parentheses): \begin{enumerate} -\item Whitespace characters. -\item Lower case letters ~\lstinline@`a' | $\ldots$ | `z'@~ and -upper case letters ~\lstinline@`A' | $\ldots$ | `Z' | `$\Dollar$' | `_'@. +\item Whitespace characters. \U{0020} | \U{0009} | \U{000D} | \U{000A} +\item Letters, which include lower case letters(Ll), upper case letters(Lu), titlecase letters(Lt), other letters(Lo), letter numerals(Nl) and the +two characters \U{0024} ~\lstinline@`$\Dollar$'@ and \U{005F} ~\lstinline@`_'@, which +both count as upper case letters \item Digits ~\lstinline@`0' | $\ldots$ | `9'@. \item Parentheses ~\lstinline@`(' | `)' | `[' | `]' | `{' | `}'@. \item Delimiter characters ~\lstinline@``' | `'' | `"' | `.' | `;' | `,'@. -\item Operator characters. These include all printable ASCII characters -which are in none of the sets above. +\item Operator characters. These include all printable ASCII characters \U{0020-007F}. +which are in none of the sets above, mathematical symbols(Sm) and other symbols(So). \end{enumerate} - -These sets are extended in the usual way to Unicode. - +\newpage \section{Identifiers}\label{sec:idents} \syntax\begin{lstlisting} @@ -53,7 +67,7 @@ There are three ways to form an identifier. First, an identifier can start with a letter which can be followed by an arbitrary sequence of letters and digits. This may be followed by an underscore `\lstinline@_@' character and another string of characters that by -themselves make up an identifier. Second, an identifier can be start +themselves make up an identifier. Second, an identifier can be started with a special character followed by an arbitrary sequence of special characters. Finally, an identifier may also be formed by an arbitrary string between back-quotes (host systems may impose some restrictions @@ -95,7 +109,7 @@ The Unicode operator `$\Rightarrow$' (\textsc{U+21D2}) has the ASCII equivalent Here are examples of identifiers: \begin{lstlisting} x Object maxIndex p2p empty_? - + +_field + + +_field $\alpha\rho\epsilon\tau\eta$ \end{lstlisting} \section{Braces and Semicolons} @@ -127,7 +141,7 @@ stringLit ::= $\mbox{\rm\em ``as in Java''}$ \section{Whitespace and Comments} -Tokens may be separated by whitespace characters (ASCII codes 0 to 32) +Tokens may be separated by whitespace characters and/or comments. Comments come in two forms: A single-line comment is a sequence of characters which starts with @@ -136,6 +150,31 @@ A single-line comment is a sequence of characters which starts with A multi-line comment is a sequence of characters between \lstinline@/*@ and \lstinline@*/@. Multi-line comments may be nested. +\section{XML mode\label{sec::xmlMode}} + +In order to allow literal inclusion of XML fragments, lexical analysis switches +from Scala mode to XML mode when encountering an opening angle bracket '<' +in the following circumstance: The '<' must be preceded either by whitespace, an +opening parenthesis or an opening brace and immediately followed by a character +starting an XML name. +\syntax\begin{lstlisting} + ( whitespace | '(' | '{' ) '<' XNameStart +\end{lstlisting} + +The scanner switches from XML mode to Scala mode if either +\begin{itemize} +\item the XML expression or the XML pattern started by the initial '<' has been +successfully parsed, or if + +\item the parser encounters an embedded Scala expression or pattern and forces the Scanner +back to normal mode, until the Scala expression or pattern is +successully parsed. In this case, since code and XML fragments can be +nested, the parser has to maintain a stack that reflects the nesting +of XML and Scala expressions adequately. +\end{itemize} + +Note that no Scala tokens are constructed in XML mode, and that comments are interpreted +as text. \chapter{\label{sec:names}Identifiers, Names and Scopes} @@ -2392,6 +2431,7 @@ module FileSystem { | SimpleExpr `.' id | SimpleExpr TypeArgs | SimpleExpr ArgumentExprs + | XmlExpr ArgumentExprs ::= `(' [Exprs] ')' | BlockExpr BlockExpr ::= `{' CaseClause {CaseClause} `}' @@ -3405,7 +3445,8 @@ written. % 2003 July - changed to new pattern syntax + semantic Burak % Nov - incorporated changes to grammar, avoiding empty patterns % definitions for value and sequence patterns -% Jan - revert back to original version ?! move regexp to subsect +% 2004 Jan - revert back to original version ?! move regexp to subsect +% May - XmlPattern \label{sec:patterns} @@ -3417,6 +3458,7 @@ written. | `_' | literal | StableId {`(' [Patterns] `)'} + | XmlPattern Patterns ::= Pattern {`,' Pattern} \end{verbatim} @@ -3775,6 +3817,101 @@ object HelloWord { To be completed. +\chapter{XML expressions and patterns} +This chapter describes the syntactic structure of XML expressions and patterns. +It follows as close as possible the XML 1.0 specification \cite{w3c:xml}, +changes being mandated by the possibility of embedding Scala code fragments. + +\section{XML expressions} +XML expressions are expressions generated by the following production, where the +opening bracket `<' of the first element must be in a position to start the lexical +XML mode (see \ref{sec::xmlMode}). + +\syntax\begin{lstlisting} +XmlExpr ::= Element {Element} +\end{lstlisting} +Well-formedness constraints of the XML specification apply, which means for instance +that start tags and end tags must match, and attributes may only be defined once, with +the exception of constraints related to entity resolution. + +If an XML expression is a single element, its value is a runtime representation of +an XML node. If the XML expression consists of more than one element, then its value +is a runtime representation of a sequence of XML nodes. + +XML expressions may contain Scala expressions as attribute values or within nodes. + +\syntax\begin{lstlisting} +Element ::= EmptyElemTag + | STag Content ETag + +EmptyElemTag ::= `<' Name {S Attribute} [S] `/>' + +STag ::= `<' Name {S Attribute} [S] `>' +ETag ::= `' +Content ::= [CharData] {Content1 [CharData]} +Content1 ::= Element + | Reference + | Comment + | ScalaExpr +\end{lstlisting} +The following productions describe Scala's extensible markup language as close as possible +to the W3C extensible markup language standard. Only the productions for +attribute values and character data are changed. Scala does not support neither +declarations, CDATA sections nor processing instructions. Entity references are not +resolved at runtime. + +\syntax\begin{lstlisting} +Attribute ::= Name Eq AttValue + +AttValue ::= `"' {CharQ | CharRef} `"' + | `'' {CharA | CharRef} `'' + | ScalaExp + +ScalaExpr ::= `{' expr `}' + +CharData ::= { CharNoRef } $\mbox{\rm\em without}$ {CharNoRef}`{'CharB {CharNoRef} + $\mbox{\rm\em and without}$ {CharNoRef}`]]>'{CharNoRef} + +Reference, Char, NameChar, S, Comment, CombiningChar, Extender + ::= $\mbox{\rm\em ``as in W3C XML''}$ + +Char1 ::= Char $\mbox{\rm\em without}$ `<' | `&' +CharQ ::= Char1 $\mbox{\rm\em without}$ `"' +CharA ::= Char1 $\mbox{\rm\em without}$ `'' +CharB ::= Char1 $\mbox{\rm\em without}$ '{' + +Name ::= $\mbox{\rm\em ``as in W3C XML'', but without }$ ':' +\end{lstlisting} +\section{XML patterns} +XML patterns are patterns generated by the following production, where the opening +bracket `<' of the element patterns must be in a position to start the lexical +XML mode (see \ref{sec::xmlMode}). +\syntax\begin{lstlisting} +XmlPattern ::= ElementPattern {ElementPattern} +\end{lstlisting} +Well-formedness constraints of the XML specification apply. + +If an XML pattern is a single element pattern, it expects the type of runtime +representation of an XML tree, and matches exactly one instance of this type +that has the same structure as described by the pattern. If an XML pattern +consists of more than one element, then it expects the type of sequences +of runtime representations of XML trees, and matches every sequence whose elements +match the sequence pattern. + +XML patterns may contain Scala patterns(\ref{sec:pattern-match}). + +\begin{lstlisting} +ElemPattern ::= EmptyElemTagP + | STagP ContentP ETagP + +EmptyElemTagP ::= '<' Name [S] '/>' +STagP ::= '<' Name [S] '>' +ETagP ::= '' +ContentP ::= [CharData] {(ElemPattern|ScalaPatterns) [CharData]} +ScalaPatterns ::= '{' patterns '}' +\end{lstlisting} + + \chapter{The Scala Standard Library} The Scala standard library consists of the package \code{scala} with a diff --git a/doc/reference/ReferencePartAppendix.tex b/doc/reference/ReferencePartAppendix.tex index 085972f590..f898019272 100644 --- a/doc/reference/ReferencePartAppendix.tex +++ b/doc/reference/ReferencePartAppendix.tex @@ -5,11 +5,12 @@ The lexical syntax of Scala is given by the following grammar in EBNF form. \begin{lstlisting} - upper ::= `A' | $\ldots$ | `Z' | `$\Dollar$' | `_' - lower ::= `a' | $\ldots$ | `z' - letter ::= upper | lower + upper ::= `A' | $\ldots$ | `Z' | `$\Dollar$' | `_' $\mbox{\rm\em and Unicode Lu}$ + lower ::= `a' | $\ldots$ | `z' $\mbox{\rm\em and Unicode Ll}$ + letter ::= upper | lower $\mbox{\rm\em and Unicode categories Lo, Lt, Nl}$ digit ::= `0' | $\ldots$ | `9' - special ::= $\mbox{\rm\em ``all other characters except parentheses ([{}]) and periods''}$ + special ::= $\mbox{\rm\em ``all other characters in \U{0020-007F} and Unicode categories Sm, So}$ + $\mbox{\rm\em except parentheses ([{}]) and periods''}$ op ::= special {special} varid ::= lower {letter | digit} [`_' {digit} [id]] @@ -87,6 +88,7 @@ grammar. | SimpleExpr `.' id | SimpleExpr TypeArgs | SimpleExpr ArgumentExprs + | XmlExpr ArgumentExprs ::= `(' [Exprs] ')' | BlockExpr BlockExpr ::= `{' CaseClause {CaseClause} `}' @@ -122,6 +124,7 @@ grammar. | Literal | StableId [ `(' [Patterns] `)' ] | `(' [Patterns] `)' + | XmlPattern Patterns ::= Pattern {`,' Pattern} TypeParamClause ::= `[' TypeParam {`,' TypeParam} `]' @@ -197,6 +200,52 @@ grammar. | Packaging ::= package QualId `{' {TopStat `;'} TopStat `}' QualId ::= id {`.' id} + + XmlExpr ::= Element {Element} + + XmlPattern ::= ElementPattern {ElementPattern} + Element ::= EmptyElemTag + | STag Content ETag + + EmptyElemTag ::= `<' Name {S Attribute} [S] `/>' + + STag ::= `<' Name {S Attribute} [S] `>' + ETag ::= `' + Content ::= [CharData] {Content1 [CharData]} + Content1 ::= Element + | Reference + | Comment + | ScalaExpr + Attribute ::= Name Eq AttValue + AttValue ::= `"' {CharQ | CharRef} `"' + | `'' {CharA | CharRef} `'' + | ScalaExp + + ScalaExpr ::= `{' expr `}' + + CharData ::= { CharNoRef } + $\mbox{\rm\em without}$ {CharNoRef}`{'CharB {CharNoRef} + $\mbox{\rm\em and without}$ {CharNoRef}`]]>'{CharNoRef} + + Reference, Char, NameChar, S, Comment, CombiningChar, Extender + ::= $\mbox{\rm\em ``as in W3C XML''}$ + + Char1 ::= Char $\mbox{\rm\em without}$ `<' | `&' + CharQ ::= Char1 $\mbox{\rm\em without}$ `"' + CharA ::= Char1 $\mbox{\rm\em without}$ `'' + CharB ::= Char1 $\mbox{\rm\em without}$ '{' + + Name ::= $\mbox{\rm\em ``as in W3C XML'', but without }$ ':' + + ElemPattern ::= EmptyElemTagP + | STagP ContentP ETagP + + EmptyElemTagP ::= '<' Name [S] '/>' + STagP ::= '<' Name [S] '>' + ETagP ::= '' + ContentP ::= [CharData] {(ElemPattern|ScalaPatterns) [CharData]} + ScalaPatterns ::= '{' patterns '}' + \end{lstlisting} \chapter{Implementation Status} diff --git a/doc/reference/Scala.bib b/doc/reference/Scala.bib index b61413558e..ca253717fc 100644 --- a/doc/reference/Scala.bib +++ b/doc/reference/Scala.bib @@ -92,3 +92,10 @@ for syntactic definitions?", howpublished = {\hspace*{\fill}\\ \verb@http://www.w3.org/DOM/@} } + +@Misc{w3c:xml, + author = {W3C}, + title = {Extensible Markup Language ({XML})}, + howpublished = {\hspace*{\fill}\\ + \verb@http://www.w3.org/TR/REC-xml@} +} diff --git a/doc/reference/ScalaReference.tex b/doc/reference/ScalaReference.tex index a3e657c9c1..87be986e10 100644 --- a/doc/reference/ScalaReference.tex +++ b/doc/reference/ScalaReference.tex @@ -18,7 +18,7 @@ %%% %%% extra_modules=" %%% //ul9.map -%%% //fourier_v03.map +%%% //fourier.map %%% " %%% %%% (3) Execute ./updmap (still as root) to integrate the new fonts into @@ -28,7 +28,7 @@ %%% %%% in LAMP, you can find those files here %%% /home/linuxsoft/share/texmf/dvips/config/ul9.map -%%% /home/linuxsoft/share/texmf/dvips/fourier/fourier_v03.map +%%% /home/linuxsoft/share/texmf/dvips/fourier/fourier.map %%% o last step: get normal user again and enable your tex tools to find the fonts as well -- cgit v1.2.3