From c88601425dc8ffd79798ec80de7dd73bc5644245 Mon Sep 17 00:00:00 2001
From: buraq <buraq@epfl.ch>
Date: Wed, 5 May 2004 17:21:45 +0000
Subject: xml changes (part 1)

---
 doc/reference/Makefile                  |   2 +-
 doc/reference/ReferencePart.tex         | 165 +++++++++++++++++++++++++++++---
 doc/reference/ReferencePartAppendix.tex |  57 ++++++++++-
 doc/reference/Scala.bib                 |   7 ++
 doc/reference/ScalaReference.tex        |   4 +-
 5 files changed, 214 insertions(+), 21 deletions(-)

(limited to 'doc')

diff --git a/doc/reference/Makefile b/doc/reference/Makefile
index 6c9a4dd630..1f2bccf829 100644
--- a/doc/reference/Makefile
+++ b/doc/reference/Makefile
@@ -44,6 +44,6 @@ include $(PROJECT_SUPPORTDIR)/make/latex.mk
 ##############################################################################
 
 clean			:
-	$(RM) *.out
+	$(RM) *.out *.pdf *.ps *.dvi *.aux *.log *.bbl *.blg *.toc
 
 ##############################################################################
diff --git a/doc/reference/ReferencePart.tex b/doc/reference/ReferencePart.tex
index 5dab7ca1e6..9c87b1d47e 100644
--- a/doc/reference/ReferencePart.tex
+++ b/doc/reference/ReferencePart.tex
@@ -21,23 +21,37 @@
 \newcommand{\ifpackaging}[1]{}
 \newcommand{\ifnewfor}[1]{}
 
+%\newcommand{\U}[1]{\mbox{U+$\backslash$u{#1}}}
+\newcommand{\U}[1]{\mbox{U+{#1}}}
+
 \chapter{Lexical Syntax}
 
-This chapter defines the syntax of Scala tokens. Tokens are
-constructed from characters in the following character sets:
+Scala programs are written using the Unicode character set.
+This chapter defines the two modes of Scala's lexical syntax, the
+Scala mode and the \textsc{Xml} mode. If not otherwise mentioned, the following 
+descriptions of Scala tokens refer to Scala mode, and literal characters `c' refer 
+to the ASCII fragment \U{0000-007F}. 
+
+In Scala mode, \textit{Unicode escapes} are replaced by the corresponding
+Unicode character as in Java.
+\begin{lstlisting}
+UnicodeEscape ::= \\{\\\\}u{u} HexDigit HexDigit HexDigit HexDigit
+HexDigit      ::= '0' | $\ldots$ | `9' | `A' | $\ldots$ | `F' | `a' | $\ldots$ | `f' |
+\end{lstlisting}
+To construct tokens, characters are distinguished according to the following classes 
+(Unicode general category given in parentheses):
 \begin{enumerate}
-\item Whitespace characters.
-\item Lower case letters ~\lstinline@`a' | $\ldots$ | `z'@~  and
-upper case letters ~\lstinline@`A' | $\ldots$ | `Z' | `$\Dollar$' | `_'@.
+\item Whitespace characters. \U{0020} | \U{0009} | \U{000D} | \U{000A}
+\item Letters, which include lower case letters(Ll), upper case letters(Lu), titlecase letters(Lt), other letters(Lo), letter numerals(Nl) and the 
+two characters \U{0024} ~\lstinline@`$\Dollar$'@ and \U{005F} ~\lstinline@`_'@, which
+both count as upper case letters
 \item Digits ~\lstinline@`0' | $\ldots$ | `9'@.
 \item Parentheses ~\lstinline@`(' | `)' | `[' | `]' | `{' | `}'@.
 \item Delimiter characters ~\lstinline@``' | `'' | `"' | `.' | `;' | `,'@.
-\item Operator characters. These include all printable ASCII characters
-which are in none of the sets above.
+\item Operator characters. These include all printable ASCII characters \U{0020-007F}. 
+which are in none of the sets above, mathematical symbols(Sm) and other symbols(So).
 \end{enumerate}
-
-These sets are extended in the usual way to Unicode. 
-
+\newpage
 \section{Identifiers}\label{sec:idents}
 
 \syntax\begin{lstlisting}
@@ -53,7 +67,7 @@ There are three ways to form an identifier. First, an identifier can
 start with a letter which can be followed by an arbitrary sequence of
 letters and digits. This may be followed by an underscore
 `\lstinline@_@' character and another string of characters that by
-themselves make up an identifier.  Second, an identifier can be start
+themselves make up an identifier.  Second, an identifier can be started
 with a special character followed by an arbitrary sequence of special
 characters.  Finally, an identifier may also be formed by an arbitrary
 string between back-quotes (host systems may impose some restrictions
@@ -95,7 +109,7 @@ The Unicode operator `$\Rightarrow$' (\textsc{U+21D2}) has the ASCII equivalent
 Here are examples of identifiers:
 \begin{lstlisting}
     x    Object        maxIndex        p2p      empty_?
-    +    +_field
+    +    +_field       $\alpha\rho\epsilon\tau\eta$
 \end{lstlisting}
 
 \section{Braces and Semicolons}
@@ -127,7 +141,7 @@ stringLit    ::=  $\mbox{\rm\em ``as in Java''}$
 
 \section{Whitespace and Comments}
 
-Tokens may be separated by whitespace characters (ASCII codes 0 to 32)
+Tokens may be separated by whitespace characters
 and/or comments. Comments come in two forms:
 
 A single-line comment is a sequence of characters which starts with
@@ -136,6 +150,31 @@ A single-line comment is a sequence of characters which starts with
 A multi-line comment is a sequence of characters between \lstinline@/*@ and
 \lstinline@*/@. Multi-line comments may be nested.
 
+\section{XML mode\label{sec::xmlMode}}
+
+In order to allow literal inclusion of XML fragments, lexical analysis switches
+from Scala mode to XML mode when encountering an opening angle bracket '<' 
+in the following circumstance: The '<' must be preceded either by whitespace, an 
+opening parenthesis or an opening brace and immediately followed by a character 
+starting an XML name.
+\syntax\begin{lstlisting}
+ ( whitespace | '(' | '{' ) '<' XNameStart
+\end{lstlisting}
+
+The scanner switches from XML mode to Scala mode if either
+\begin{itemize}
+\item the XML expression or the XML pattern started by the initial '<' has been 
+successfully parsed, or if
+
+\item the parser encounters an embedded Scala expression or pattern and forces the Scanner 
+back to normal mode, until the Scala expression or pattern is
+successully parsed. In this case, since code and XML fragments can be
+nested, the parser has to maintain a stack that reflects the nesting
+of XML and Scala expressions adequately.
+\end{itemize}
+
+Note that no Scala tokens are constructed in XML mode, and that comments are interpreted
+as text.
 
 \chapter{\label{sec:names}Identifiers, Names and Scopes}
 
@@ -2392,6 +2431,7 @@ module FileSystem {
                     |  SimpleExpr `.' id 
                     |  SimpleExpr TypeArgs
                     |  SimpleExpr ArgumentExprs
+                    |  XmlExpr
   ArgumentExprs   ::=  `(' [Exprs] ')'
                     |  BlockExpr
   BlockExpr       ::=  `{' CaseClause {CaseClause} `}'
@@ -3405,7 +3445,8 @@ written.
 % 2003 July - changed to new pattern syntax + semantic Burak
 %      Nov  - incorporated changes to grammar, avoiding empty patterns
 %             definitions for value and sequence patterns
-%      Jan  - revert back to original version ?! move regexp to subsect
+% 2004 Jan  - revert back to original version ?! move regexp to subsect
+%      May  - XmlPattern
 
 \label{sec:patterns}
 
@@ -3417,6 +3458,7 @@ written.
                  |   `_'
                  |   literal
                  |   StableId {`(' [Patterns] `)'}
+                 |   XmlPattern
   Patterns      ::=  Pattern {`,' Pattern}
 \end{verbatim}
 
@@ -3775,6 +3817,101 @@ object HelloWord {
 
 To be completed.
 
+\chapter{XML expressions and patterns}
+This chapter describes the syntactic structure of XML expressions and patterns.
+It follows as close as possible the XML 1.0 specification \cite{w3c:xml},
+changes being mandated by the possibility of embedding Scala code fragments.
+
+\section{XML expressions}
+XML expressions are expressions generated by the following production, where the 
+opening bracket `<' of the first element must be in a position to start the lexical
+XML mode (see \ref{sec::xmlMode}).
+
+\syntax\begin{lstlisting}
+XmlExpr ::= Element {Element}
+\end{lstlisting}
+Well-formedness constraints of the XML specification apply, which means for instance
+that start tags and end tags must match, and attributes may only be defined once, with
+the exception of constraints related to entity resolution.
+
+If an XML expression is a single element, its value is a runtime representation of
+an XML node. If the XML expression consists of more than one element, then its value
+is a runtime representation of a sequence of XML nodes.
+
+XML expressions may contain Scala expressions as attribute values or within nodes.
+
+\syntax\begin{lstlisting}
+Element       ::=    EmptyElemTag
+                |    STag Content ETag                                       
+
+EmptyElemTag  ::=    `<' Name {S Attribute} [S] `/>'                         
+
+STag          ::=    `<' Name {S Attribute} [S] `>'                          
+ETag          ::=    `</' Name [S] '>'                                        
+Content       ::=    [CharData] {Content1 [CharData]}
+Content1      ::=    Element
+                |    Reference
+                |    Comment
+                |    ScalaExpr
+\end{lstlisting}
+The following productions describe Scala's extensible markup language as close as possible
+to the W3C extensible markup language standard. Only the productions for
+attribute values and character data are changed. Scala does not support neither 
+declarations, CDATA sections nor processing instructions. Entity references are not
+resolved at runtime.
+
+\syntax\begin{lstlisting}
+Attribute     ::=    Name Eq AttValue	                                 
+
+AttValue      ::=    `"' {CharQ | CharRef} `"'
+                |    `'' {CharA | CharRef} `''
+                |    ScalaExp
+
+ScalaExpr      ::=    `{' expr `}'
+
+CharData      ::=   { CharNoRef } $\mbox{\rm\em without}$ {CharNoRef}`{'CharB {CharNoRef} 
+                                  $\mbox{\rm\em and without}$ {CharNoRef}`]]>'{CharNoRef}
+
+Reference, Char, NameChar, S, Comment, CombiningChar, Extender      
+              ::=   $\mbox{\rm\em ``as in W3C XML''}$
+
+Char1         ::=   Char $\mbox{\rm\em without}$ `<' | `&'
+CharQ         ::=   Char1 $\mbox{\rm\em without}$ `"'
+CharA         ::=   Char1 $\mbox{\rm\em without}$ `''
+CharB         ::=   Char1 $\mbox{\rm\em without}$ '{'
+
+Name          ::=  $\mbox{\rm\em ``as in W3C XML'', but without }$ ':'
+\end{lstlisting}
+\section{XML patterns}
+XML patterns are patterns generated by the following production, where the opening
+bracket `<' of the element patterns must be in a position to start the lexical
+XML mode (see \ref{sec::xmlMode}).
+\syntax\begin{lstlisting}
+XmlPattern  ::= ElementPattern {ElementPattern}
+\end{lstlisting}
+Well-formedness constraints of the XML specification apply.
+
+If an XML pattern is a single element pattern, it expects the type of runtime
+representation of an XML tree, and matches exactly one instance of this type
+that has the same structure as described by the pattern. If an XML pattern 
+consists of more than one element, then it expects the type of sequences
+of runtime representations of XML trees, and matches every sequence whose elements
+match the sequence pattern.
+
+XML patterns may contain Scala patterns(\ref{sec:pattern-match}).
+
+\begin{lstlisting}
+ElemPattern   ::=    EmptyElemTagP
+                |    STagP ContentP ETagP                                       
+
+EmptyElemTagP ::=    '<' Name [S] '/>'
+STagP         ::=    '<' Name [S] '>'                          
+ETagP         ::=    '</' Name [S] '>'                                        
+ContentP      ::=    [CharData] {(ElemPattern|ScalaPatterns) [CharData]}
+ScalaPatterns ::=    '{' patterns '}'
+\end{lstlisting}
+
+
 \chapter{The Scala Standard Library}
 
 The Scala standard library consists of the package \code{scala} with a
diff --git a/doc/reference/ReferencePartAppendix.tex b/doc/reference/ReferencePartAppendix.tex
index 085972f590..f898019272 100644
--- a/doc/reference/ReferencePartAppendix.tex
+++ b/doc/reference/ReferencePartAppendix.tex
@@ -5,11 +5,12 @@ The lexical syntax of Scala is given by the following grammar in EBNF
 form.
 
 \begin{lstlisting}
-  upper          ::=  `A' | $\ldots$ | `Z' | `$\Dollar$' | `_'
-  lower          ::=  `a' | $\ldots$ | `z'
-  letter         ::=  upper | lower
+  upper          ::=  `A' | $\ldots$ | `Z' | `$\Dollar$' | `_' $\mbox{\rm\em and Unicode Lu}$
+  lower          ::=  `a' | $\ldots$ | `z' $\mbox{\rm\em and Unicode Ll}$
+  letter         ::=  upper | lower $\mbox{\rm\em and Unicode categories Lo, Lt, Nl}$
   digit          ::=  `0' | $\ldots$ | `9'
-  special        ::=  $\mbox{\rm\em ``all other characters except parentheses ([{}]) and periods''}$
+  special        ::=  $\mbox{\rm\em ``all other characters in \U{0020-007F} and Unicode categories Sm, So}$
+                      $\mbox{\rm\em except parentheses ([{}]) and periods''}$
 
   op             ::=  special {special} 
   varid          ::=  lower {letter | digit} [`_' {digit} [id]]
@@ -87,6 +88,7 @@ grammar.
                     |  SimpleExpr `.' id 
                     |  SimpleExpr TypeArgs
                     |  SimpleExpr ArgumentExprs
+                    |  XmlExpr
   ArgumentExprs    ::=  `(' [Exprs] ')'
                     |  BlockExpr
   BlockExpr       ::=  `{' CaseClause {CaseClause} `}'
@@ -122,6 +124,7 @@ grammar.
                     |  Literal
                     |  StableId [ `(' [Patterns] `)' ]
                     |  `(' [Patterns] `)'
+                    |  XmlPattern
   Patterns        ::=  Pattern {`,' Pattern}
 
   TypeParamClause ::=  `[' TypeParam {`,' TypeParam} `]'
@@ -197,6 +200,52 @@ grammar.
                     |
   Packaging       ::=  package QualId `{' {TopStat `;'} TopStat `}'
   QualId          ::=  id {`.' id}
+
+  XmlExpr         ::=  Element {Element}
+
+  XmlPattern      ::=  ElementPattern {ElementPattern}
+  Element         ::=  EmptyElemTag
+                    |  STag Content ETag                                       
+
+  EmptyElemTag    ::=  `<' Name {S Attribute} [S] `/>'                         
+
+  STag            ::=  `<' Name {S Attribute} [S] `>'                          
+  ETag            ::=  `</' Name [S] '>'                                        
+  Content         ::=  [CharData] {Content1 [CharData]}
+  Content1        ::=  Element
+                    |  Reference
+                    |  Comment
+                    |  ScalaExpr
+  Attribute       ::=  Name Eq AttValue	                                 
+  AttValue        ::=  `"' {CharQ | CharRef} `"'
+                    |  `'' {CharA | CharRef} `''
+                    |  ScalaExp
+
+  ScalaExpr       ::=  `{' expr `}'
+
+  CharData        ::=  { CharNoRef } 
+                       $\mbox{\rm\em without}$ {CharNoRef}`{'CharB {CharNoRef} 
+                       $\mbox{\rm\em and without}$ {CharNoRef}`]]>'{CharNoRef}
+
+  Reference, Char, NameChar, S, Comment, CombiningChar, Extender      
+                  ::=   $\mbox{\rm\em ``as in W3C XML''}$
+
+  Char1             ::=  Char $\mbox{\rm\em without}$ `<' | `&'
+  CharQ             ::=  Char1 $\mbox{\rm\em without}$ `"'
+  CharA             ::=  Char1 $\mbox{\rm\em without}$ `''
+  CharB             ::=  Char1 $\mbox{\rm\em without}$ '{'
+
+  Name              ::=  $\mbox{\rm\em ``as in W3C XML'', but without }$ ':'
+
+  ElemPattern       ::=  EmptyElemTagP
+                    |    STagP ContentP ETagP                                       
+
+  EmptyElemTagP     ::=  '<' Name [S] '/>'
+  STagP             ::=  '<' Name [S] '>'                          
+  ETagP             ::=  '</' Name [S] '>'                                        
+  ContentP          ::=  [CharData] {(ElemPattern|ScalaPatterns) [CharData]}
+  ScalaPatterns     ::=  '{' patterns '}'
+
 \end{lstlisting}
 
 \chapter{Implementation Status}
diff --git a/doc/reference/Scala.bib b/doc/reference/Scala.bib
index b61413558e..ca253717fc 100644
--- a/doc/reference/Scala.bib
+++ b/doc/reference/Scala.bib
@@ -92,3 +92,10 @@ for syntactic definitions?",
   howpublished	= {\hspace*{\fill}\\
 		  \verb@http://www.w3.org/DOM/@}
 }
+
+@Misc{w3c:xml,
+  author	= {W3C},
+  title		= {Extensible Markup Language ({XML})},
+  howpublished	= {\hspace*{\fill}\\
+		  \verb@http://www.w3.org/TR/REC-xml@}
+}
diff --git a/doc/reference/ScalaReference.tex b/doc/reference/ScalaReference.tex
index a3e657c9c1..87be986e10 100644
--- a/doc/reference/ScalaReference.tex
+++ b/doc/reference/ScalaReference.tex
@@ -18,7 +18,7 @@
 %%%
 %%%    extra_modules="
 %%%      /<full-path-to>/ul9.map
-%%%      /<full-path-to>/fourier_v03.map
+%%%      /<full-path-to>/fourier.map
 %%%    "
 %%%
 %%% (3) Execute ./updmap (still as root) to integrate the new fonts into
@@ -28,7 +28,7 @@
 %%%
 %%% in LAMP, you can find those files here 
 %%% /home/linuxsoft/share/texmf/dvips/config/ul9.map
-%%% /home/linuxsoft/share/texmf/dvips/fourier/fourier_v03.map
+%%% /home/linuxsoft/share/texmf/dvips/fourier/fourier.map
 
 %%% o last step: get normal user again and enable your tex tools to find the fonts as well
 
-- 
cgit v1.2.3