diff options
Diffstat (limited to 'doc/reference/ExamplesPart.tex')
-rw-r--r-- | doc/reference/ExamplesPart.tex | 6862 |
1 files changed, 0 insertions, 6862 deletions
diff --git a/doc/reference/ExamplesPart.tex b/doc/reference/ExamplesPart.tex deleted file mode 100644 index c90ecffa24..0000000000 --- a/doc/reference/ExamplesPart.tex +++ /dev/null @@ -1,6862 +0,0 @@ -\def\exercise{ - \def\theresult{Exercise~\thesection.\arabic{result}} - \refstepcounter{result} - \trivlist\item[\hskip - \labelsep{\bf \theresult}]} -\def\endexercise{\endtrivlist} - -\newcommand{\rewriteby}[1]{\mbox{\tab\tab\rm(#1)}} - -\chapter{\label{chap:example-one}A First Example} - -As a first example, here is an implementation of Quicksort in Scala. - -\begin{lstlisting} -def sort(xs: Array[int]): unit = { - def swap(i: int, j: int): unit = { - val t = xs(i); xs(i) = xs(j); xs(j) = t; - } - def sort1(l: int, r: int): unit = { - val pivot = xs((l + r) / 2); - var i = l, j = r; - while (i <= j) { - while (xs(i) < pivot) { i = i + 1 } - while (xs(j) > pivot) { j = j - 1 } - if (i <= j) { - swap(i, j); - i = i + 1; - j = j - 1; - } - } - if (l < j) sort1(l, j); - if (j < r) sort1(i, r); - } - sort1(0, xs.length - 1); -} -\end{lstlisting} - -The implementation looks quite similar to what one would write in Java -or C. We use the same operators and similar control structures. -There are also some minor syntactical differences. In particular: -\begin{itemize} -\item -Definitions start with a reserved word. Function definitions start -with \code{def}, variable definitions start with \code{var} and -definitions of values (i.e. read only variables) start with \code{val}. -\item -The declared type of a symbol is given after the symbol and a colon. -The declared type can often be omitted, because the compiler can infer -it from the context. -\item -We use \code{unit} instead of \code{void} to define the result type of -a procedure. -\item -Array types are written \code{Array[T]} rather than \code{T[]}, -and array selections are written \code{a(i)} rather than \code{a[i]}. -\item -Functions can be nested inside other functions. Nested functions can -access parameters and local variables of enclosing functions. For -instance, the name of the array \code{a} is visible in functions -\code{swap} and \code{sort1}, and therefore need not be passed as a -parameter to them. -\end{itemize} -So far, Scala looks like a fairly conventional language with some -syntactic peculiarities. In fact it is possible to write programs in a -conventional imperative or object-oriented style. This is important -because it is one of the things that makes it easy to combine Scala -components with components written in mainstream languages such as -Java, C\# or Visual Basic. - -However, it is also possible to write programs in a style which looks -completely different. Here is Quicksort again, this time written in -functional style. - -\begin{lstlisting} -def sort(xs: List[int]): List[int] = - if (xs.length <= 1) xs - else { - val pivot = xs(xs.length / 2); - sort(xs.filter(x => x < pivot)) - ::: xs.filter(x => x == pivot) - ::: sort(xs.filter(x => x > pivot)) - } -\end{lstlisting} - -The functional program works with lists instead of arrays.\footnote{In -a future complete implementation of Scala, we could also have used arrays -instead of lists, but at the moment arrays do not yet support -\code{filter} and \code{:::}.} -It captures the essence of the quicksort algorithm in a concise way: -\begin{itemize} -\item If the list is empty or consists of a single element, - it is already sorted, so return it immediately. -\item If the list is not empty, pick an an element in the middle of - it as a pivot. -\item Partition the lists into two sub-lists containing elements that -are less than, respectively greater than the pivot element, and a -third list which contains elements equal to pivot. -\item Sort the first two sub-lists by a recursive invocation of -the sort function.\footnote{This is not quite what the imperative algorithm does; -the latter partitions the array into two sub-arrays containing elements -less than or greater or equal to pivot.} -\item The result is obtained by appending the three sub-lists together. -\end{itemize} -Both the imperative and the functional implementation have the same -asymptotic complexity -- $O(N;log(N))$ in the average case and -$O(N^2)$ in the worst case. But where the imperative implementation -operates in place by modifying the argument array, the functional -implementation returns a new sorted list and leaves the argument -list unchanged. The functional implementation thus requires more -transient memory than the imperative one. - -The functional implementation makes it look like Scala is a language -that's specialized for functional operations on lists. In fact, it -is not; all of the operations used in the example are simple library -methods of a class \code{List[t]} which is part of the standard -Scala library, and which itself is implemented in Scala. - -In particular, there is the method \code{filter} which takes as -argument a {\em predicate function} that maps list elements to -boolean values. The result of \code{filter} is a list consisting of -all the elements of the original list for which the given predicate -function is true. The \code{filter} method of an object of type -\code{List[t]} thus has the signature - -\begin{lstlisting} -def filter(p: t => boolean): List[t] -\end{lstlisting} - -Here, \code{t => boolean} is the type of functions that take an element -of type \code{t} and return a \code{boolean}. Functions like -\code{filter} that take another function as argument or return one as -result are called {\em higher-order} functions. - -In the quicksort program, \code{filter} is applied three times to an -anonymous function argument. The first argument, -\code{x => x <= pivot} represents the function that maps its parameter -\code{x} to the boolean value \code{x <= pivot}. That is, it yields -true if \code{x} is smaller or equal than \code{pivot}, false -otherwise. The function is anonymous, i.e.\ it is not defined with a -name. The type of the \code{x} parameter is omitted because a Scala -compiler can infer it automatically from the context where the -function is used. To summarize, \code{xs.filter(x => x <= pivot)} -returns a list consisting of all elements of the list \code{xs} that are -smaller than \code{pivot}. - -\comment{ -It is also possible to apply higher-order functions such as -\code{filter} to named function arguments. Here is functional -quicksort again, where the two anonymous functions are replaced by -named auxiliary functions that compare the argument to the -\code{pivot} value. - -\begin{lstlisting} -def sort (xs: List[int]): List[int] = { - val pivot = xs(xs.length / 2); - def leqPivot(x: int) = x <= pivot; - def gtPivot(x: int) = x > pivot; - def eqPivot(x: int) = x == pivot; - sort(xs filter leqPivot) - ::: sort(xs filter eqPivot) - ::: sort(xs filter gtPivot) -} -\end{lstlisting} -} - -An object of type \code{List[t]} also has a method ``\code{:::}'' -which takes an another list and which returns the result of appending this -list to itself. This method has the signature - -\begin{lstlisting} -def :::(that: List[t]): List[t] -\end{lstlisting} - -Scala does not distinguish between identifiers and operator names. An -identifier can be either a sequence of letters and digits which begins -with a letter, or it can be a sequence of special characters, such as -``\code{+}'', ``\code{*}'', or ``\code{:}''. The last definition thus -introduced a new method identifier ``\code{:::}''. This identifier is -used in the Quicksort example as a binary infix operator that connects -the two sub-lists resulting from the partition. In fact, any method -can be used as an operator in Scala. The binary operation $E;op;E'$ -is always interpreted as the method call $E.op(E')$. This holds also -for binary infix operators which start with a letter. The recursive call -to \code{sort} in the last quicksort example is thus equivalent to -\begin{lstlisting} -sort(a.filter(x => x < pivot)) - .:::(sort(a.filter(x => x == pivot))) - .:::(sort(a.filter(x => x > pivot))) -\end{lstlisting} - -Looking again in detail at the first, imperative implementation of -Quicksort, we find that many of the language constructs used in the -second solution are also present, albeit in a disguised form. - -For instance, ``standard'' binary operators such as \code{+}, -\code{-}, or \code{<} are not treated in any special way. Like -\code{append}, they are methods of their left operand. Consequently, -the expression \code{i + 1} is regarded as the invocation -\code{i.+(1)} of the \code{+} method of the integer value \code{x}. -Of course, a compiler is free (if it is moderately smart, even expected) -to recognize the special case of calling the \code{+} method over -integer arguments and to generate efficient inline code for it. - -For efficiency and better error diagnostics the \code{while} loop is a -primitive construct in Scala. But in principle, it could have just as -well been a predefined function. Here is a possible implementation of it: -\begin{lstlisting} -def While (p: => boolean) (s: => unit): unit = - if (p) { s ; While(p)(s) } -\end{lstlisting} -The \code{While} function takes as first parameter a test function, -which takes no parameters and yields a boolean value. As second -parameter it takes a command function which also takes no parameters -and yields a trivial result. \code{While} invokes the command function -as long as the test function yields true. - - -\chapter{Programming with Actors and Messages} -\label{chap:example-auction} - -Here's an example that shows an application area for which Scala is -particularly well suited. Consider the task of implementing an -electronic auction service. We use an Erlang-style actor process -model to implement the participants of the auction. Actors are -objects to which messages are sent. Every process has a ``mailbox'' of -its incoming messages which is represented as a queue. It can work -sequentially through the messages in its mailbox, or search for -messages matching some pattern. - -\begin{lstlisting}[style=floating,label=fig:simple-auction-msgs,caption=Implementation of an Auction Service] -trait AuctionMessage; -case class Offer(bid: int, client: Actor) extends AuctionMessage; -case class Inquire(client: Actor) extends AuctionMessage; - -trait AuctionReply; -case class Status(asked: int, expire: Date) extends AuctionReply; -case object BestOffer extends AuctionReply; -case class BeatenOffer(maxBid: int) extends AuctionReply; -case class AuctionConcluded(seller: Actor, client: Actor) - extends AuctionReply; -case object AuctionFailed extends AuctionReply; -case object AuctionOver extends AuctionReply; -\end{lstlisting} - -For every traded item there is an auctioneer process that publishes -information about the traded item, that accepts offers from clients -and that communicates with the seller and winning bidder to close the -transaction. We present an overview of a simple implementation -here. - -As a first step, we define the messages that are exchanged during an -auction. There are two abstract base classes (called {\em traits}): -\code{AuctionMessage} for messages from clients to the auction -service, and \code{AuctionReply} for replies from the service to the -clients. For both base classes there exists a number of cases, which -are defined in Figure~\ref{fig:simple-auction-msgs}. - -\begin{lstlisting}[style=floating,label=fig:simple-auction,caption=Implementation of an Auction Service] -class Auction(seller: Actor, minBid: int, closing: Date) extends Actor { - val timeToShutdown = 36000000; // msec - val bidIncrement = 10; - override def run() = { - var maxBid = minBid - bidIncrement; - var maxBidder: Actor = _; - var running = true; - while (running) { - receiveWithin ((closing.getTime() - new Date().getTime())) { - case Offer(bid, client) => - if (bid >= maxBid + bidIncrement) { - if (maxBid >= minBid) maxBidder send BeatenOffer(bid); - maxBid = bid; maxBidder = client; client send BestOffer; - } else { - client send BeatenOffer(maxBid); - } - case Inquire(client) => - client send Status(maxBid, closing); - case TIMEOUT => - if (maxBid >= minBid) { - val reply = AuctionConcluded(seller, maxBidder); - maxBidder send reply; seller send reply; - } else { - seller send AuctionFailed; - } - receiveWithin(timeToShutdown) { - case Offer(_, client) => client send AuctionOver - case TIMEOUT => running = false; - } - } - } - } -} -\end{lstlisting} - -For each base class, there are a number of {\em case classes} which -define the format of particular messages in the class. These messages -might well be ultimately mapped to small XML documents. We expect -automatic tools to exist that convert between XML documents and -internal data structures like the ones defined above. - -Figure~\ref{fig:simple-auction} presents a Scala implementation of a -class \code{Auction} for auction processes that coordinate the bidding -on one item. Objects of this class are created by indicating -\begin{itemize} -\item a seller process which needs to be notified when the auction is over, -\item a minimal bid, -\item the date when the auction is to be closed. -\end{itemize} -The process behavior is defined by its \code{run} method. That method -repeatedly selects (using \code{receiveWithin}) a message and reacts to it, -until the auction is closed, which is signaled by a \code{TIMEOUT} -message. Before finally stopping, it stays active for another period -determined by the \code{timeToShutdown} constant and replies to -further offers that the auction is closed. - -Here are some further explanations of the constructs used in this -program: -\begin{itemize} -\item -The \code{receiveWithin} method of class \code{Actor} takes as -parameters a time span given in milliseconds and a function that -processes messages in the mailbox. The function is given by a sequence -of cases that each specify a pattern and an action to perform for -messages matching the pattern. The \code{receiveWithin} method selects -the first message in the mailbox which matches one of these patterns -and applies the corresponding action to it. -\item -The last case of \code{receiveWithin} is guarded by a -\code{TIMEOUT} pattern. If no other messages are received in the meantime, this -pattern is triggered after the time span which is passed as argument -to the enclosing \code{receiveWithin} method. \code{TIMEOUT} is a -particular instance of class \code{Message}, which is triggered by the -\code{Actor} implementation itself. -\item -Reply messages are sent using syntax of the form -\code{destination send SomeMessage}. \code{send} is used here as a -binary operator with a process and a message as arguments. This is -equivalent in Scala to the method call -\code{destination.send(SomeMessage)}, i.e. the invocation of -the \code{send} of the destination process with the given message as -parameter. -\end{itemize} -The preceding discussion gave a flavor of distributed programming in -Scala. It might seem that Scala has a rich set of language constructs -that support actor processes, message sending and receiving, -programming with timeouts, etc. In fact, the opposite is true. All the -constructs discussed above are offered as methods in the library class -\code{Actor}. That class is itself implemented in Scala, based on the underlying -thread model of the host language (e.g. Java, or .NET). -The implementation of all features of class \code{Actor} used here is -given in Section~\ref{sec:actors}. - -The advantages of the library-based approach are relative simplicity -of the core language and flexibility for library designers. Because -the core language need not specify details of high-level process -communication, it can be kept simpler and more general. Because the -particular model of messages in a mailbox is a library module, it can -be freely modified if a different model is needed in some -applications. The approach requires however that the core language is -expressive enough to provide the necessary language abstractions in a -convenient way. Scala has been designed with this in mind; one of its -major design goals was that it should be flexible enough to act as a -convenient host language for domain specific languages implemented by -library modules. For instance, the actor communication constructs -presented above can be regarded as one such domain specific language, -which conceptually extends the Scala core. - -\chapter{\label{chap:simple-funs}Expressions and Simple Functions} - -The previous examples gave an impression of what can be done with -Scala. We now introduce its constructs one by one in a more -systematic fashion. We start with the smallest level, expressions and -functions. - -\section{Expressions And Simple Functions} - -A Scala system comes with an interpreter which can be seen as a fancy -calculator. A user interacts with the calculator by typing in -expressions. The calculator returns the evaluation results and their -types. Example: - -\begin{lstlisting} -> 87 + 145 -232: scala.Int - -> 5 + 2 * 3 -11: scala.Int - -> "hello" + " world!" -hello world: scala.String -\end{lstlisting} -It is also possible to name a sub-expression and use the name instead -of the expression afterwards: -\begin{lstlisting} -> def scale = 5 -def scale: int - -> 7 * scale -35: scala.Int -\end{lstlisting} -\begin{lstlisting} -> def pi = 3.141592653589793 -def pi: scala.Double - -> def radius = 10 -def radius: scala.Int - -> 2 * pi * radius -62.83185307179586: scala.Double -\end{lstlisting} -Definitions start with the reserved word \code{def}; they introduce a -name which stands for the expression following the \code{=} sign. The -interpreter will answer with the introduced name and its type. - -Executing a definition such as \code{def x = e} will not evaluate the -expression \code{e}. Instead \code{e} is evaluated whenever \code{x} -is used. Alternatively, Scala offers a value definition -\code{val x = e}, which does evaluate the right-hand-side \code{e} as part of the -evaluation of the definition. If \code{x} is then used subsequently, -it is immediately replaced by the pre-computed value of -\code{e}, so that the expression need not be evaluated again. - -How are expressions evaluated? An expression consisting of operators -and operands is evaluated by repeatedly applying the following -simplification steps. -\begin{itemize} -\item pick the left-most operation -\item evaluate its operands -\item apply the operator to the operand values. -\end{itemize} -A name defined by \code{def}\ is evaluated by replacing the name by the -(unevaluated) definition's right hand side. A name defined by \code{val} is -evaluated by replacing the name by the value of the definitions's -right-hand side. The evaluation process stops once we have reached a -value. A value is some data item such as a string, a number, an array, -or a list. - -\example -Here is an evaluation of an arithmetic expression. -\begin{lstlisting} -$\,\,\,$ (2 * pi) * radius -$\rightarrow$ (2 * 3.141592653589793) * radius -$\rightarrow$ 6.283185307179586 * radius -$\rightarrow$ 6.283185307179586 * 10 -$\rightarrow$ 62.83185307179586 -\end{lstlisting} -The process of stepwise simplification of expressions to values is -called {\em reduction}. - -\section{Parameters} - -Using \code{def}, one can also define functions with parameters. Example: -\begin{lstlisting} -> def square(x: double) = x * x -def (x: double): scala.Double - -> square(2) -4.0: scala.Double - -> square(5 + 3) -64.0: scala.Double - -> square(square(4)) -256.0: scala.Double - -> def sumOfSquares(x: double, y: double) = square(x) + square(y) -def sumOfSquares(scala.Double,scala.Double): scala.Double - -> sumOfSquares(3, 2 + 2) -25.0: scala.Double -\end{lstlisting} - -Function parameters follow the function name and are always enclosed -in parentheses. Every parameter comes with a type, which is indicated -following the parameter name and a colon. At the present time, we -only need basic numeric types such as the type \code{scala.Double} of -double precision numbers. Scala defines {\em type aliases} for some -standard types, so we can write numeric types as in Java. For instance -\code{double} is a type alias of \code{scala.Double} and \code{int} is -a type alias for \code{scala.Int}. - -Functions with parameters are evaluated analogously to operators in -expressions. First, the arguments of the function are evaluated (in -left-to-right order). Then, the function application is replaced by -the function's right hand side, and at the same time all formal -parameters of the function are replaced by their corresponding actual -arguments. - -\example\ - -\begin{lstlisting} -$\,\,\,$ sumOfSquares(3, 2+2) -$\rightarrow$ sumOfSquares(3, 4) -$\rightarrow$ square(3) + square(4) -$\rightarrow$ 3 * 3 + square(4) -$\rightarrow$ 9 + square(4) -$\rightarrow$ 9 + 4 * 4 -$\rightarrow$ 9 + 16 -$\rightarrow$ 25 -\end{lstlisting} - -The example shows that the interpreter reduces function arguments to -values before rewriting the function application. One could instead -have chosen to apply the function to unreduced arguments. This would -have yielded the following reduction sequence: -\begin{lstlisting} -$\,\,\,$ sumOfSquares(3, 2+2) -$\rightarrow$ square(3) + square(2+2) -$\rightarrow$ 3 * 3 + square(2+2) -$\rightarrow$ 9 + square(2+2) -$\rightarrow$ 9 + (2+2) * (2+2) -$\rightarrow$ 9 + 4 * (2+2) -$\rightarrow$ 9 + 4 * 4 -$\rightarrow$ 9 + 16 -$\rightarrow$ 25 -\end{lstlisting} - -The second evaluation order is known as \emph{call-by-name}, -whereas the first one is known as \emph{call-by-value}. For -expressions that use only pure functions and that therefore can be -reduced with the substitution model, both schemes yield the same final -values. - -Call-by-value has the advantage that it avoids repeated evaluation of -arguments. Call-by-name has the advantage that it avoids evaluation of -arguments when the parameter is not used at all by the function. -Call-by-value is usually more efficient than call-by-name, but a -call-by-value evaluation might loop where a call-by-name evaluation -would terminate. Consider: -\begin{lstlisting} -> def loop: int = loop -def loop: scala.Int - -> def first(x: int, y: int) = x -def first(x: scala.Int, y: scala.Int): scala.Int -\end{lstlisting} -Then \code{first(1, loop)} reduces with call-by-name to \code{1}, -whereas the same term reduces with call-by-value repeatedly to itself, -hence evaluation does not terminate. -\begin{lstlisting} -$\,\,\,$ first(1, loop) -$\rightarrow$ first(1, loop) -$\rightarrow$ first(1, loop) -$\rightarrow$ ... -\end{lstlisting} -Scala uses call-by-value by default, but it switches to call-by-name evaluation -if the parameter type is preceded by \code{=>}. - -\example\ - -\begin{lstlisting} -> def constOne(x: int, y: => int) = 1 -constOne(x: scala.Int, y: => scala.Int): scala.Int - -> constOne(1, loop) -1: scala.Int - -> constOne(loop, 2) // gives an infinite loop. -^C -\end{lstlisting} - -\section{Conditional Expressions} - -Scala's \code{if-else} lets one choose between two alternatives. Its -syntax is like Java's \code{if-else}. But where Java's \code{if-else} -can be used only as an alternative of statements, Scala allows the -same syntax to choose between two expressions. That's why Scala's -\code{if-else} serves also as a substitute for Java's conditional -expression \code{ ... ? ... : ...}. - -\example\ - -\begin{lstlisting} -> def abs(x: double) = if (x >= 0) x else -x -abs(x: double): double -\end{lstlisting} -Scala's boolean expressions are similar to Java's; they are formed -from the constants -\code{true} and -\code{false}, comparison operators, boolean negation \code{!} and the -boolean operators $\,$\code{&&}$\,$ and $\,$\code{||}. - -\section{\label{sec:sqrt}Example: Square Roots by Newton's Method} - -We now illustrate the language elements introduced so far in the -construction of a more interesting program. The task is to write a -function -\begin{lstlisting} -def sqrt(x: double): double = ... -\end{lstlisting} -which computes the square root of \code{x}. - -A common way to compute square roots is by Newton's method of -successive approximations. One starts with an initial guess \code{y} -(say: \code{y = 1}). One then repeatedly improves the current guess -\code{y} by taking the average of \code{y} and \code{x/y}. As an -example, the next three columns indicate the guess \code{y}, the -quotient \code{x/y}, and their average for the first approximations of -$\sqrt 2$. -\begin{lstlisting} -1 2/1 = 2 1.5 -1.5 2/1.5 = 1.3333 1.4167 -1.4167 2/1.4167 = 1.4118 1.4142 -1.4142 ... ... - -$y$ $x/y$ $(y + x/y)/2$ -\end{lstlisting} -One can implement this algorithm in Scala by a set of small functions, -which each represent one of the elements of the algorithm. - -We first define a function for iterating from a guess to the result: -\begin{lstlisting} -def sqrtIter(guess: double, x: double): double = - if (isGoodEnough(guess, x)) guess - else sqrtIter(improve(guess, x), x); -\end{lstlisting} -Note that \code{sqrtIter} calls itself recursively. Loops in -imperative programs can always be modeled by recursion in functional -programs. - -Note also that the definition of \code{sqrtIter} contains a return -type, which follows the parameter section. Such return types are -mandatory for recursive functions. For a non-recursive function, the -return type is optional; if it is missing the type checker will -compute it from the type of the function's right-hand side. However, -even for non-recursive functions it is often a good idea to include a -return type for better documentation. - -As a second step, we define the two functions called by -\code{sqrtIter}: a function to \code{improve} the guess and a -termination test \code{isGoodEnough}. Here is their definition. -\begin{lstlisting} -def improve(guess: double, x: double) = - (guess + x / guess) / 2; - -def isGoodEnough(guess: double, x: double) = - abs(square(guess) - x) < 0.001; -\end{lstlisting} - -Finally, the \code{sqrt} function itself is defined by an application -of \code{sqrtIter}. -\begin{lstlisting} -def sqrt(x: double) = sqrtIter(1.0, x); -\end{lstlisting} - -\begin{exercise} The \code{isGoodEnough} test is not very precise for small -numbers and might lead to non-termination for very large ones (why?). -Design a different version of \code{isGoodEnough} which does not have -these problems. -\end{exercise} - -\begin{exercise} Trace the execution of the \code{sqrt(4)} expression. -\end{exercise} - -\section{Nested Functions} - -The functional programming style encourages the construction of many -small helper functions. In the last example, the implementation -of \code{sqrt} made use of the helper functions \code{sqrtIter}, -\code{improve} and \code{isGoodEnough}. The names of these functions -are relevant only for the implementation of \code{sqrt}. We normally -do not want users of \code{sqrt} to access these functions directly. - -We can enforce this (and avoid name-space pollution) by including -the helper functions within the calling function itself: -\begin{lstlisting} -def sqrt(x: double) = { - def sqrtIter(guess: double, x: double): double = - if (isGoodEnough(guess, x)) guess - else sqrtIter(improve(guess, x), x); - def improve(guess: double, x: double) = - (guess + x / guess) / 2; - def isGoodEnough(guess: double, x: double) = - abs(square(guess) - x) < 0.001; - sqrtIter(1.0, x) -} -\end{lstlisting} -In this program, the braces \code{\{ ... \}} enclose a {\em block}. -Blocks in Scala are themselves expressions. Every block ends in a -result expression which defines its value. The result expression may -be preceded by auxiliary definitions, which are visible only in the -block itself. - -Every definition in a block must be followed by a semicolon, which -separates this definition from subsequent definitions or the result -expression. However, a semicolon is inserted implicitly if the -definition ends in a right brace and is followed by a new line. -Therefore, the following are all legal: -\begin{lstlisting} -def f(x) = x + 1; /* `;' mandatory */ -f(1) + f(2) - -def g(x) = {x + 1} -g(1) + g(2) - -def h(x) = {x + 1}; /* `;' mandatory */ h(1) + h(2) -\end{lstlisting} -Scala uses the usual block-structured scoping rules. A name defined in -some outer block is visible also in some inner block, provided it is -not redefined there. This rule permits us to simplify our -\code{sqrt} example. We need not pass \code{x} around as an additional parameter of -the nested functions, since it is always visible in them as a -parameter of the outer function \code{sqrt}. Here is the simplified code: -\begin{lstlisting} -def sqrt(x: double) = { - def sqrtIter(guess: double): double = - if (isGoodEnough(guess)) guess - else sqrtIter(improve(guess)); - def improve(guess: double) = - (guess + x / guess) / 2; - def isGoodEnough(guess: double) = - abs(square(guess) - x) < 0.001; - sqrtIter(1.0) -} -\end{lstlisting} - -\section{Tail Recursion} - -Consider the following function to compute the greatest common divisor -of two given numbers. - -\begin{lstlisting} -def gcd(a: int, b: int): int = if (b == 0) a else gcd(b, a % b) -\end{lstlisting} - -Using our substitution model of function evaluation, -\code{gcd(14, 21)} evaluates as follows: - -\begin{lstlisting} -$\,\,$ gcd(14, 21) -$\rightarrow\!$ if (21 == 0) 14 else gcd(21, 14 % 21) -$\rightarrow\!$ if (false) 14 else gcd(21, 14 % 21) -$\rightarrow\!$ gcd(21, 14 % 21) -$\rightarrow\!$ gcd(21, 14) -$\rightarrow\!$ if (14 == 0) 21 else gcd(14, 21 % 14) -$\rightarrow$ $\rightarrow$ gcd(14, 21 % 14) -$\rightarrow\!$ gcd(14, 7) -$\rightarrow\!$ if (7 == 0) 14 else gcd(7, 14 % 7) -$\rightarrow$ $\rightarrow$ gcd(7, 14 % 7) -$\rightarrow\!$ gcd(7, 0) -$\rightarrow\!$ if (0 == 0) 7 else gcd(0, 7 % 0) -$\rightarrow$ $\rightarrow$ 7 -\end{lstlisting} - -Contrast this with the evaluation of another recursive function, -\code{factorial}: - -\begin{lstlisting} -def factorial(n: int): int = if (n == 0) 1 else n * factorial(n - 1) -\end{lstlisting} - -The application \code{factorial(5)} rewrites as follows: -\begin{lstlisting} -$\,\,\,$ factorial(5) -$\rightarrow$ if (5 == 0) 1 else 5 * factorial(5 - 1) -$\rightarrow$ 5 * factorial(5 - 1) -$\rightarrow$ 5 * factorial(4) -$\rightarrow\ldots\rightarrow$ 5 * (4 * factorial(3)) -$\rightarrow\ldots\rightarrow$ 5 * (4 * (3 * factorial(2))) -$\rightarrow\ldots\rightarrow$ 5 * (4 * (3 * (2 * factorial(1)))) -$\rightarrow\ldots\rightarrow$ 5 * (4 * (3 * (2 * (1 * factorial(0)))) -$\rightarrow\ldots\rightarrow$ 5 * (4 * (3 * (2 * (1 * 1)))) -$\rightarrow\ldots\rightarrow$ 120 -\end{lstlisting} -There is an important difference between the two rewrite sequences: -The terms in the rewrite sequence of \code{gcd} have again and again -the same form. As evaluation proceeds, their size is bounded by a -constant. By contrast, in the evaluation of factorial we get longer -and longer chains of operands which are then multiplied in the last -part of the evaluation sequence. - -Even though actual implementations of Scala do not work by rewriting -terms, they nevertheless should have the same space behavior as in the -rewrite sequences. In the implementation of \code{gcd}, one notes that -the recursive call to \code{gcd} is the last action performed in the -evaluation of its body. One also says that \code{gcd} is -``tail-recursive''. The final call in a tail-recursive function can be -implemented by a jump back to the beginning of that function. The -arguments of that call can overwrite the parameters of the current -instantiation of \code{gcd}, so that no new stack space is needed. -Hence, tail recursive functions are iterative processes, which can be -executed in constant space. - -By contrast, the recursive call in \code{factorial} is followed by a -multiplication. Hence, a new stack frame is allocated for the -recursive instance of factorial, and is deallocated after that -instance has finished. The given formulation of the factorial function -is not tail-recursive; it needs space proportional to its input -parameter for its execution. - -More generally, if the last action of a function is a call to another -(possibly the same) function, only a single stack frame is needed for -both functions. Such calls are called ``tail calls''. In principle, -tail calls can always re-use the stack frame of the calling function. -However, some run-time environments (such as the Java VM) lack the -primitives to make stack frame re-use for tail calls efficient. A -production quality Scala implementation is therefore only required to -re-use the stack frame of a directly tail-recursive function whose -last action is a call to itself. Other tail calls might be optimized -also, but one should not rely on this across implementations. - -\begin{exercise} Design a tail-recursive version of -\code{factorial}. -\end{exercise} - -\chapter{\label{chap:first-class-funs}First-Class Functions} - -A function in Scala is a ``first-class value''. Like any other value, -it may be passed as a parameter or returned as a result. Functions -which take other functions as parameters or return them as results are -called {\em higher-order} functions. This chapter introduces -higher-order functions and shows how they provide a flexible mechanism -for program composition. - -As a motivating example, consider the following three related tasks: -\begin{enumerate} -\item -Write a function to sum all integers between two given numbers \code{a} and \code{b}: -\begin{lstlisting} -def sumInts(a: int, b: int): int = - if (a > b) 0 else a + sumInts(a + 1, b); -\end{lstlisting} -\item -Write a function to sum the squares of all integers between two given numbers -\code{a} and \code{b}: -\begin{lstlisting} -def square(x: int): int = x * x; -def sumSquares(a: int, b: int): int = - if (a > b) 0 else square(a) + sumSquares(a + 1, b); -\end{lstlisting} -\item -Write a function to sum the powers $2^n$ of all integers $n$ between -two given numbers \code{a} and \code{b}: -\begin{lstlisting} -def powerOfTwo(x: int): int = if (x == 0) 1 else x * powerOfTwo(x - 1); -def sumPowersOfTwo(a: int, b: int): int = - if (a > b) 0 else powerOfTwo(a) + sumPowersOfTwo(a + 1, b); -\end{lstlisting} -\end{enumerate} -These functions are all instances of -\(\sum^b_a f(n)\) for different values of $f$. -We can factor out the common pattern by defining a function \code{sum}: -\begin{lstlisting} -def sum(f: int => int, a: int, b: int): double = - if (a > b) 0 else f(a) + sum(f, a + 1, b); -\end{lstlisting} -The type \code{int => int} is the type of functions that -take arguments of type \code{int} and return results of type -\code{int}. So \code{sum} is a function which takes another function as -a parameter. In other words, \code{sum} is a {\em higher-order} -function. - -Using \code{sum}, we can formulate the three summing functions as -follows. -\begin{lstlisting} -def sumInts(a: int, b: int): int = sum(id, a, b); -def sumSquares(a: int, b: int): int = sum(square, a, b); -def sumPowersOfTwo(a: int, b: int): int = sum(powerOfTwo, a, b); -\end{lstlisting} -where -\begin{lstlisting} -def id(x: int): int = x; -def square(x: int): int = x * x; -def powerOfTwo(x: int): int = if (x == 0) 1 else x * powerOfTwo(x - 1); -\end{lstlisting} - -\section{Anonymous Functions} - -Parameterization by functions tends to create many small functions. In -the previous example, we defined \code{id}, \code{square} and -\code{power} as separate functions, so that they could be -passed as arguments to \code{sum}. - -Instead of using named function definitions for these small argument -functions, we can formulate them in a shorter way as {\em anonymous -functions}. An anonymous function is an expression that evaluates to a -function; the function is defined without giving it a name. As an -example consider the anonymous square function: -\begin{lstlisting} - x: int => x * x -\end{lstlisting} -The part before the arrow `\code{=>}' is the parameter of the function, -whereas the part following the `\code{=>}' is its body. If there are -several parameters, we need to enclose them in parentheses. For -instance, here is an anonymous function which multiples its two arguments. -\begin{lstlisting} - (x: int, y: int) => x * y -\end{lstlisting} -Using anonymous functions, we can reformulate the first two summation -functions without named auxiliary functions: -\begin{lstlisting} -def sumInts(a: int, b: int): int = sum(x: int => x, a, b); -def sumSquares(a: int, b: int): int = sum(x: int => x * x, a, b); -\end{lstlisting} -Often, the Scala compiler can deduce the parameter type(s) from the -context of the anonymous function in which case they can be omitted. -For instance, in the case of \code{sumInts} or \code{sumSquares}, one -knows from the type of \code{sum} that the first parameter must be a -function of type \code{int => int}. Hence, the parameter type -\code{int} is redundant and may be omitted: -\begin{lstlisting} -def sumInts(a: int, b: int): int = sum(x => x, a, b); -def sumSquares(a: int, b: int): int = sum(x => x * x, a, b); -\end{lstlisting} - -Generally, the Scala term -\code{(x}$_1$\code{: T}$_1$\code{, ..., x}$_n$\code{: T}$_n$\code{) => E} -defines a function which maps its parameters -\code{x}$_1$\code{, ..., x}$_n$ to the result of the expression \code{E} -(where \code{E} may refer to \code{x}$_1$\code{, ..., x}$_n$). Anonymous -functions are not essential language elements of Scala, as they can -always be expressed in terms of named functions. Indeed, the -anonymous function -\begin{lstlisting} -(x$_1$: T$_1$, ..., x$_n$: T$_n$) => E -\end{lstlisting} -is equivalent to the block -\begin{lstlisting} -{ def f (x$_1$: T$_1$, ..., x$_n$: T$_n$) = E ; f } -\end{lstlisting} -where \code{f} is fresh name which is used nowhere else in the program. -We also say, anonymous functions are ``syntactic sugar''. - -\section{Currying} - -The latest formulation of the summing functions is already quite -compact. But we can do even better. Note that -\code{a} and \code{b} appear as parameters and arguments of every function -but they do not seem to take part in interesting combinations. Is -there a way to get rid of them? - -Let's try to rewrite \code{sum} so that it does not take the bounds -\code{a} and \code{b} as parameters: -\begin{lstlisting} -def sum(f: int => int) = { - def sumF(a: int, b: int): int = - if (a > b) 0 else f(a) + sumF(a + 1, b); - sumF -} -\end{lstlisting} -In this formulation, \code{sum} is a function which returns another -function, namely the specialized summing function \code{sumF}. This -latter function does all the work; it takes the bounds \code{a} and -\code{b} as parameters, applies \code{sum}'s function parameter \code{f} to all -integers between them, and sums up the results. - -Using this new formulation of \code{sum}, we can now define: -\begin{lstlisting} -def sumInts = sum(x => x); -def sumSquares = sum(x => x * x); -def sumPowersOfTwo = sum(powerOfTwo); -\end{lstlisting} -Or, equivalently, with value definitions: -\begin{lstlisting} -val sumInts = sum(x => x); -val sumSquares = sum(x => x * x); -val sumPowersOfTwo = sum(powerOfTwo); -\end{lstlisting} -These functions can be applied like other functions. For instance, -\begin{lstlisting} -> sumSquares(1, 10) + sumPowersOfTwo(10, 20) -267632001: scala.Int -\end{lstlisting} -How are function-returning functions applied? As an example, in the expression -\begin{lstlisting} -sum(x => x * x)(1, 10) , -\end{lstlisting} -the function \code{sum} is applied to the squaring function -\code{(x => x * x)}. The resulting function is then -applied to the second argument list, \code{(1, 10)}. - -This notation is possible because function application associates to the left. -That is, if $\mbox{args}_1$ and $\mbox{args}_2$ are argument lists, then -\bda{lcl} -f(\mbox{args}_1)(\mbox{args}_2) & \ \ \mbox{is equivalent to}\ \ & (f(\mbox{args}_1))(\mbox{args}_2) -\eda -In our example, \code{sum(x => x * x)(1, 10)} is equivalent to the -following expression: -\code{(sum(x => x * x))(1, 10)}. - -The style of function-returning functions is so useful that Scala has -special syntax for it. For instance, the next definition of \code{sum} -is equivalent to the previous one, but is shorter: -\begin{lstlisting} -def sum(f: int => int)(a: int, b: int): int = - if (a > b) 0 else f(a) + sum(f)(a + 1, b); -\end{lstlisting} -Generally, a curried function definition -\begin{lstlisting} -def f (args$_1$) ... (args$_n$) = E -\end{lstlisting} -where $n > 1$ expands to -\begin{lstlisting} -def f (args$_1$) ... (args$_{n-1}$) = { def g (args$_n$) = E ; g } -\end{lstlisting} -where \code{g} is a fresh identifier. Or, shorter, using an anonymous function: -\begin{lstlisting} -def f (args$_1$) ... (args$_{n-1}$) = ( args$_n$ ) => E . -\end{lstlisting} -Performing this step $n$ times yields that -\begin{lstlisting} -def f (args$_1$) ... (args$_n$) = E -\end{lstlisting} -is equivalent to -\begin{lstlisting} -def f = (args$_1$) => ... => (args$_n$) => E . -\end{lstlisting} -Or, equivalently, using a value definition: -\begin{lstlisting} -val f = (args$_1$) => ... => (args$_n$) => E . -\end{lstlisting} -This style of function definition and application is called {\em -currying} after its promoter, Haskell B.\ Curry, a logician of the -20th century, even though the idea goes back further to Moses -Sch\"onfinkel and Gottlob Frege. - -The type of a function-returning function is expressed analogously to -its parameter list. Taking the last formulation of \code{sum} as an example, -the type of \code{sum} is \code{(int => int) => (int, int) => int}. -This is possible because function types associate to the right. I.e. -\begin{lstlisting} -T$_1$ => T$_2$ => T$_3$ $\mbox{is equivalent to}$ T$_1$ => (T$_2$ => T$_3$) -\end{lstlisting} - - -\begin{exercise} -1. The \code{sum} function uses a linear recursion. Can you write a -tail-recursive one by filling in the ??'s? - -\begin{lstlisting} -def sum(f: int => double)(a: int, b: int): double = { - def iter(a, result) = { - if (??) ?? - else iter(??, ??) - } - iter(??, ??) -} -\end{lstlisting} -\end{exercise} - -\begin{exercise} -Write a function \code{product} that computes the product of the -values of functions at points over a given range. -\end{exercise} - -\begin{exercise} -Write \code{factorial} in terms of \code{product}. -\end{exercise} - -\begin{exercise} -Can you write an even more general function which generalizes both -\code{sum} and \code{product}? -\end{exercise} - -\section{Example: Finding Fixed Points of Functions} - -A number \code{x} is called a {\em fixed point} of a function \code{f} if -\begin{lstlisting} -f(x) = x . -\end{lstlisting} -For some functions \code{f} we can locate the fixed point by beginning -with an initial guess and then applying \code{f} repeatedly, until the -value does not change anymore (or the change is within a small -tolerance). This is possible if the sequence -\begin{lstlisting} -x, f(x), f(f(x)), f(f(f(x))), ... -\end{lstlisting} -converges to fixed point of $f$. This idea is captured in -the following ``fixed-point finding function'': -\begin{lstlisting} -val tolerance = 0.0001; -def isCloseEnough(x: double, y: double) = abs((x - y) / x) < tolerance; -def fixedPoint(f: double => double)(firstGuess: double) = { - def iterate(guess: double): double = { - val next = f(guess); - if (isCloseEnough(guess, next)) next - else iterate(next) - } - iterate(firstGuess) -} -\end{lstlisting} -We now apply this idea in a reformulation of the square root function. -Let's start with a specification of \code{sqrt}: -\begin{lstlisting} -sqrt(x) = $\mbox{the {\sl y} such that}$ y * y = x - = $\mbox{the {\sl y} such that}$ y = x / y -\end{lstlisting} -Hence, \code{sqrt(x)} is a fixed point of the function \code{y => x / y}. -This suggests that \code{sqrt(x)} can be computed by fixed point iteration: -\begin{lstlisting} -def sqrt(x: double) = fixedPoint(y => x / y)(1.0) -\end{lstlisting} -But if we try this, we find that the computation does not -converge. Let's instrument the fixed point function with a print -statement which keeps track of the current \code{guess} value: -\begin{lstlisting} -def fixedPoint(f: double => double)(firstGuess: double) = { - def iterate(guess: double): double = { - val next = f(guess); - System.out.println(next); - if (isCloseEnough(guess, next)) next - else iterate(next) - } - iterate(firstGuess) -} -\end{lstlisting} -Then, \code{sqrt(2)} yields: -\begin{lstlisting} - 2.0 - 1.0 - 2.0 - 1.0 - 2.0 - ... -\end{lstlisting} -One way to control such oscillations is to prevent the guess from changing too much. -This can be achieved by {\em averaging} successive values of the original sequence: -\begin{lstlisting} -> def sqrt(x: double) = fixedPoint(y => (y + x/y) / 2)(1.0) -def sqrt(x: scala.Double): scala.Double -> sqrt(2.0) - 1.5 - 1.4166666666666665 - 1.4142156862745097 - 1.4142135623746899 - 1.4142135623746899 -\end{lstlisting} -In fact, expanding the \code{fixedPoint} function yields exactly our -previous definition of fixed point from Section~\ref{sec:sqrt}. - -The previous examples showed that the expressive power of a language -is considerably enhanced if functions can be passed as arguments. The -next example shows that functions which return functions can also be -very useful. - -Consider again fixed point iterations. We started with the observation -that $\sqrt(x)$ is a fixed point of the function \code{y => x / y}. -Then we made the iteration converge by averaging successive values. -This technique of {\em average damping} is so general that it -can be wrapped in another function. -\begin{lstlisting} -def averageDamp(f: double => double)(x: double) = (x + f(x)) / 2; -\end{lstlisting} -Using \code{averageDamp}, we can reformulate the square root function -as follows. -\begin{lstlisting} -def sqrt(x: double) = fixedPoint(averageDamp(y => x/y))(1.0); -\end{lstlisting} -This expresses the elements of the algorithm as clearly as possible. - -\begin{exercise} Write a function for cube roots using \code{fixedPoint} and -\code{averageDamp}. -\end{exercise} - -\section{Summary} - -We have seen in the previous chapter that functions are essential -abstractions, because they permit us to introduce general methods of -computing as explicit, named elements in our programming language. -The present chapter has shown that these abstractions can be combined -by higher-order functions to create further abstractions. As -programmers, we should look out for opportunities to abstract and to -reuse. The highest possible level of abstraction is not always the -best, but it is important to know abstraction techniques, so that one -can use abstractions where appropriate. - -\section{Language Elements Seen So Far} - -Chapters~\ref{chap:simple-funs} and \ref{chap:first-class-funs} have -covered Scala's language elements to express expressions and types -comprising of primitive data and functions. The context-free syntax -of these language elements is given below in extended Backus-Naur -form, where `\code{|}' denotes alternatives, \code{[...]} denotes -option (0 or 1 occurrence), and \lstinline@{...}@ denotes repetition -(0 or more occurrences). - -\subsection*{Characters} - -Scala programs are sequences of (Unicode) characters. We distinguish the -following character sets: -\begin{itemize} -\item -whitespace, such as `\code{ }', tabulator, or newline characters, -\item -letters `\code{a}' to `\code{z}', `\code{A}' to `\code{Z}', -\item -digits \code{`0'} to `\code{9}', -\item -the delimiter characters - -\begin{lstlisting} -. , ; ( ) { } [ ] \ $\mbox{\tt "}$ ' -\end{lstlisting} - -\item -operator characters, such as `\code{#}' `\code{+}', -`\code{:}'. Essentially, these are printable characters which are -in none of the character sets above. -\end{itemize} - -\subsection*{Lexemes:} - -\begin{lstlisting} -ident = letter {letter | digit} - | operator { operator } - | ident '_' ident -literal = $\mbox{``as in Java''}$ -\end{lstlisting} - -Literals are as in Java. They define numbers, characters, strings, or -boolean values. Examples of literals as \code{0}, \code{1.0d10}, \code{'x'}, -\code{"he said \"hi!\""}, or \code{true}. - -Identifiers can be of two forms. They either start with a letter, -which is followed by a (possibly empty) sequence of letters or -symbols, or they start with an operator character, which is followed -by a (possibly empty) sequence of operator characters. Both forms of -identifiers may contain underscore characters `\code{_}'. Furthermore, -an underscore character may be followed by either sort of -identifier. Hence, the following are all legal identifiers: -\begin{lstlisting} -x Room10a + -- foldl_: +_vector -\end{lstlisting} -It follows from this rule that subsequent operator-identifiers need to -be separated by whitespace. For instance, the input -\code{x+-y} is parsed as the three token sequence \code{x}, \code{+-}, -\code{y}. If we want to express the sum of \code{x} with the -negated value of \code{y}, we need to add at least one space, -e.g. \code{x+ -y}. - -The \verb@$@ character is reserved for compiler-generated -identifiers; it should not be used in source programs. %$ - -The following are reserved words, they may not be used as identifiers: -\begin{lstlisting}[keywordstyle=] -abstract case catch class def -do else extends false final -finally for if import new -null object override package private -protected return sealed super this -trait try true type val -var while with yield -_ : = => <- <: >: # @ -\end{lstlisting} - -\subsection*{Types:} - -\begin{lstlisting} -Type = SimpleType | FunctionType -FunctionType = SimpleType '=>' Type | '(' [Types] ')' '=>' Type -SimpleType = byte | short | char | int | long | double | float | - boolean | unit | String -Types = Type {`,' Type} -\end{lstlisting} - -Types can be: -\begin{itemize} -\item number types \code{byte}, \code{short}, \code{char}, \code{int}, \code{long}, \code{float} and \code{double} (these are as in Java), -\item the type \code{boolean} with values \code{true} and \code{false}, -\item the type \code{unit} with the only value \code{()}, -\item the type \code{String}, -\item function types such as \code{(int, int) => int} or \code{String => Int => String}. -\end{itemize} - -\subsection*{Expressions:} - -\begin{lstlisting} -Expr = InfixExpr | FunctionExpr | if '(' Expr ')' Expr else Expr -InfixExpr = PrefixExpr | InfixExpr Operator InfixExpr -Operator = ident -PrefixExpr = ['+' | '-' | '!' | '~' ] SimpleExpr -SimpleExpr = ident | literal | SimpleExpr '.' ident | Block -FunctionExpr = Bindings '=>' Expr -Bindings = ident [':' SimpleType] | '(' [Binding {',' Binding}] ')' -Binding = ident [':' Type] -Block = '{' {Def ';'} Expr '}' -\end{lstlisting} - -Expressions can be: -\begin{itemize} -\item -identifiers such as \code{x}, \code{isGoodEnough}, \code{*}, or \code{+-}, -\item -literals, such as \code{0}, \code{1.0}, or \code{"abc"}, -\item -field and method selections, such as \code{System.out.println}, -\item -function applications, such as \code{sqrt(x)}, -\item -operator applications, such as \code{-x} or \code{y + x}, -\item -conditionals, such as \code{if (x < 0) -x else x}, -\item -blocks, such as \lstinline@{ val x = abs(y) ; x * 2 }@, -\item -anonymous functions, such as \code{x => x + 1} or \code{(x: int, y: int) => x + y}. -\end{itemize} - -\subsection*{Definitions:} - -\begin{lstlisting} -Def = FunDef | ValDef -FunDef = 'def' ident {'(' [Parameters] ')'} [':' Type] '=' Expr -ValDef = 'val' ident [':' Type] '=' Expr -Parameters = Parameter {',' Parameter} -Parameter = ['def'] ident ':' Type -\end{lstlisting} -Definitions can be: -\begin{itemize} -\item -function definitions such as \code{def square(x: int): int = x * x}, -\item -value definitions such as \code{val y = square(2)}. -\end{itemize} - -\chapter{Classes and Objects} -\label{chap:classes} - -Scala does not have a built-in type of rational numbers, but it is -easy to define one, using a class. Here's a possible implementation. - -\begin{lstlisting} -class Rational(n: int, d: int) { - private def gcd(x: int, y: int): int = { - if (x == 0) y - else if (x < 0) gcd(-x, y) - else if (y < 0) -gcd(x, -y) - else gcd(y % x, x); - } - private val g = gcd(n, d); - - val numer: int = n/g; - val denom: int = d/g; - def +(that: Rational) = - new Rational(numer * that.denom + that.numer * denom, - denom * that.denom); - def -(that: Rational) = - new Rational(numer * that.denom - that.numer * denom, - denom * that.denom); - def *(that: Rational) = - new Rational(numer * that.numer, denom * that.denom); - def /(that: Rational) = - new Rational(numer * that.denom, denom * that.numer); -} -\end{lstlisting} -This defines \code{Rational} as a class which takes two constructor -arguments \code{n} and \code{d}, containing the number's numerator and -denominator parts. The class provides fields which return these parts -as well as methods for arithmetic over rational numbers. Each -arithmetic method takes as parameter the right operand of the -operation. The left operand of the operation is always the rational -number of which the method is a member. - -\paragraph{Private members} -The implementation of rational numbers defines a private method -\code{gcd} which computes the greatest common denominator of two -integers, as well as a private field \code{g} which contains the -\code{gcd} of the constructor arguments. These members are inaccessible -outside class \code{Rational}. They are used in the implementation of -the class to eliminate common factors in the constructor arguments in -order to ensure that numerator and denominator are always in -normalized form. - -\paragraph{Creating and Accessing Objects} -As an example of how rational numbers can be used, here's a program -that prints the sum of all numbers $1/i$ where $i$ ranges from 1 to 10. -\begin{lstlisting} -var i = 1; -var x = new Rational(0, 1); -while (i <= 10) { - x = x + new Rational(1,i); - i = i + 1; -} -System.out.println("" + x.numer + "/" + x.denom); -\end{lstlisting} -The \code{+} takes as left operand a string and as right operand a -value of arbitrary type. It returns the result of converting its right -operand to a string and appending it to its left operand. - -\paragraph{Inheritance and Overriding} -Every class in Scala has a superclass which it extends. -\comment{Excepted is -only the root class \code{Object}, which does not have a superclass, -and which is indirectly extended by every other class. } -If a class -does not mention a superclass in its definition, the root type -\code{scala.AnyRef} is implicitly assumed (for Java implementations, -this type is an alias for \code{java.lang.Object}. For instance, class -\code{Rational} could equivalently be defined as -\begin{lstlisting} -class Rational(n: int, d: int) extends AnyRef { - ... // as before -} -\end{lstlisting} -A class inherits all members from its superclass. It may also redefine -(or: {\em override}) some inherited members. For instance, class -\code{java.lang.Object} defines -a method -\code{toString} which returns a representation of the object as a string: -\begin{lstlisting} -class Object { - ... - def toString(): String = ... -} -\end{lstlisting} -The implementation of \code{toString} in \code{Object} -forms a string consisting of the object's class name and a number. It -makes sense to redefine this method for objects that are rational -numbers: -\begin{lstlisting} -class Rational(n: int, d: int) extends AnyRef { - ... // as before - override def toString() = "" + numer + "/" + denom; -} -\end{lstlisting} -Note that, unlike in Java, redefining definitions need to be preceded -by an \code{override} modifier. - -If class $A$ extends class $B$, then objects of type $A$ may be used -wherever objects of type $B$ are expected. We say in this case that -type $A$ {\em conforms} to type $B$. For instance, \code{Rational} -conforms to \code{AnyRef}, so it is legal to assign a \code{Rational} -value to a variable of type \code{AnyRef}: -\begin{lstlisting} -var x: AnyRef = new Rational(1,2); -\end{lstlisting} - -\paragraph{Parameterless Methods} -%Also unlike in Java, methods in Scala do not necessarily take a -%parameter list. An example is \code{toString}; the method is invoked -%by simply mentioning its name. For instance: -%\begin{lstlisting} -%val r = new Rational(1,2); -%System.out.println(r.toString()); // prints``1/2'' -%\end{lstlisting} -Unlike in Java, methods in Scala do not necessarily take a -parameter list. An example is the \code{square} method below. This -method is invoked by simply mentioning its name. -\begin{lstlisting} -class Rational(n: int, d: int) extends AnyRef { - ... // as before - def square = new Rational(numer*numer, denom*denom); -} -val r = new Rational(3,4); -System.out.println(r.square); // prints``9/16''* -\end{lstlisting} -That is, parameterless methods are accessed just as value fields such -as \code{numer} are. The difference between values and parameterless -methods lies in their definition. The right-hand side of a value is -evaluated when the object is created, and the value does not change -afterwards. A right-hand side of a parameterless method, on the other -hand, is evaluated each time the method is called. The uniform access -of fields and parameterless methods gives increased flexibility for -the implementer of a class. Often, a field in one version of a class -becomes a computed value in the next version. Uniform access ensures -that clients do not have to be rewritten because of that change. - -\paragraph{Abstract Classes} - -Consider the task of writing a class for sets of integer numbers with -two operations, \code{incl} and \code{contains}. \code{(s incl x)} -should return a new set which contains the element \code{x} together -with all the elements of set \code{s}. \code{(s contains x)} should -return true if the set \code{s} contains the element \code{x}, and -should return \code{false} otherwise. The interface of such sets is -given by: -\begin{lstlisting} -abstract class IntSet { - def incl(x: int): IntSet; - def contains(x: int): boolean; -} -\end{lstlisting} -\code{IntSet} is labeled as an \emph{abstract class}. This has two -consequences. First, abstract classes may have {\em deferred} members -which are declared but which do not have an implementation. In our -case, both \code{incl} and \code{contains} are such members. Second, -because an abstract class might have unimplemented members, no objects -of that class may be created using \code{new}. By contrast, an -abstract class may be used as a base class of some other class, which -implements the deferred members. - -\paragraph{Traits} - -Instead of \code{abstract class} one also often uses the keyword -\code{trait} in Scala. A trait is an abstract class with no state, no -constructor arguments, and no side effects during object -initialization. Since \code{IntSet}'s fall in this category, one can -alternatively define them as traits: -\begin{lstlisting} -trait IntSet { - def incl(x: int): IntSet; - def contains(x: int): boolean; -} -\end{lstlisting} -A trait corresponds to an interface in Java, except -that a trait can also define implemented methods. - -\paragraph{Implementing Abstract Classes} - -Let's say, we plan to implement sets as binary trees. There are two -possible forms of trees. A tree for the empty set, and a tree -consisting of an integer and two subtrees. Here are their -implementations. - -\begin{lstlisting} -class EmptySet extends IntSet { - def contains(x: int): boolean = false; - def incl(x: int): IntSet = new NonEmptySet(x, new EmptySet, new EmptySet); -} -\end{lstlisting} - -\begin{lstlisting} -class NonEmptySet(elem:int, left:IntSet, right:IntSet) extends IntSet { - def contains(x: int): boolean = - if (x < elem) left contains x - else if (x > elem) right contains x - else true; - def incl(x: int): IntSet = - if (x < elem) new NonEmptySet(elem, left incl x, right) - else if (x > elem) new NonEmptySet(elem, left, right incl x) - else this; -} -\end{lstlisting} -Both \code{EmptySet} and \code{NonEmptySet} extend class -\code{IntSet}. This implies that types \code{EmptySet} and -\code{NonEmptySet} conform to type \code{IntSet} -- a value of type \code{EmptySet} or \code{NonEmptySet} may be used wherever a value of type \code{IntSet} is required. - -\begin{exercise} Write methods \code{union} and \code{intersection} to form -the union and intersection between two sets. -\end{exercise} - -\begin{exercise} Add a method -\begin{lstlisting} -def excl(x: int) -\end{lstlisting} -to return the given set without the element \code{x}. To accomplish this, -it is useful to also implement a test method -\begin{lstlisting} -def isEmpty: boolean -\end{lstlisting} -for sets. -\end{exercise} - -\paragraph{Dynamic Binding} - -Object-oriented languages (Scala included) use \emph{dynamic dispatch} -for method invocations. That is, the code invoked for a method call -depends on the run-time type of the object which contains the method. -For example, consider the expression \code{s contains 7} where -\code{s} is a value of declared type \code{s: IntSet}. Which code for -\code{contains} is executed depends on the type of value of \code{s} at run-time. -If it is an \code{EmptySet} value, it is the implementation of \code{contains} in class \code{EmptySet} that is executed, and analogously for \code{NonEmptySet} values. -This behavior is a direct consequence of our substitution model of evaluation. -For instance, -\begin{lstlisting} - (new EmptySet).contains(7) - --> $\rewriteby{by replacing {\sl contains} by its body in class {\sl EmptySet}}$ - - false -\end{lstlisting} -Or, -\begin{lstlisting} - new NonEmptySet(7, new EmptySet, new EmptySet).contains(1) - --> $\rewriteby{by replacing {\sl contains} by its body in class {\sl NonEmptySet}}$ - - if (1 < 7) new EmptySet contains 1 - else if (1 > 7) new EmptySet contains 1 - else true - --> $\rewriteby{by rewriting the conditional}$ - - new EmptySet contains 1 - --> $\rewriteby{by replacing {\sl contains} by its body in class {\sl EmptySet}}$ - - false . -\end{lstlisting} - -Dynamic method dispatch is analogous to higher-order function -calls. In both cases, the identity of code to be executed is known -only at run-time. This similarity is not just superficial. Indeed, -Scala represents every function value as an object (see -Section~\ref{sec:functions}). - - -\paragraph{Objects} - -In the previous implementation of integer sets, empty sets were -expressed with \code{new EmptySet}; so a new object was created every time -an empty set value was required. We could have avoided unnecessary -object creations by defining a value \code{empty} once and then using -this value instead of every occurrence of \code{new EmptySet}. E.g. -\begin{lstlisting} -val EmptySetVal = new EmptySet; -\end{lstlisting} -One problem with this approach is that a value definition such as the -one above is not a legal top-level definition in Scala; it has to be -part of another class or object. Also, the definition of class -\code{EmptySet} now seems a bit of an overkill -- why define a class of objects, -if we are only interested in a single object of this class? A more -direct approach is to use an {\em object definition}. Here is -a more streamlined alternative definition of the empty set: -\begin{lstlisting} -object EmptySet extends IntSet { - def contains(x: int): boolean = false; - def incl(x: int): IntSet = new NonEmptySet(x, EmptySet, EmptySet); -} -\end{lstlisting} -The syntax of an object definition follows the syntax of a class -definition; it has an optional extends clause as well as an optional -body. As is the case for classes, the extends clause defines inherited -members of the object whereas the body defines overriding or new -members. However, an object definition defines a single object only; -it is not possible to create other objects with the same structure -using \code{new}. Therefore, object definitions also lack constructor -parameters, which might be present in class definitions. - -Object definitions can appear anywhere in a Scala program; including -at top-level. Since there is no fixed execution order of top-level -entities in Scala, one might ask exactly when the object defined by an -object definition is created and initialized. The answer is that the -object is created the first time one of its members is accessed. This -strategy is called {\em lazy evaluation}. - -\paragraph{Standard Classes} - -\todo{include picture} - -Scala is a pure object-oriented language. This means that every value -in Scala can be regarded as an object. In fact, even primitive types -such as \code{int} or \code{boolean} are not treated specially. They -are defined as type aliases of Scala classes in module \code{Predef}: -\begin{lstlisting} -type boolean = scala.Boolean; -type int = scala.Int; -type long = scala.Long; -... -\end{lstlisting} -For efficiency, the compiler usually represents values of type -\code{scala.Int} by 32 bit integers, values of type -\code{scala.Boolean} by Java's booleans, etc. But it converts these -specialized representations to objects when required, for instance -when a primitive \code{int} value is passed to a function with a -parameter of type \code{AnyRef}. Hence, the special representation of -primitive values is just an optimization, it does not change the -meaning of a program. - -Here is a specification of class \code{Boolean}. -\begin{lstlisting} -package scala; -trait Boolean { - def && (x: => Boolean): Boolean; - def || (x: => Boolean): Boolean; - def ! : Boolean; - - def == (x: Boolean) : Boolean; - def != (x: Boolean) : Boolean; - def < (x: Boolean) : Boolean; - def > (x: Boolean) : Boolean; - def <= (x: Boolean) : Boolean; - def >= (x: Boolean) : Boolean; -} -\end{lstlisting} -Booleans can be defined using only classes and objects, without -reference to a built-in type of booleans or numbers. A possible -implementation of class \code{Boolean} is given below. This is not -the actual implementation in the standard Scala library. For -efficiency reasons the standard implementation uses built-in -booleans. -\begin{lstlisting} -package scala; -trait Boolean { - def ifThenElse(thenpart: => Boolean, elsepart: => Boolean); - - def && (x: => Boolean): Boolean = ifThenElse(x, false); - def || (x: => Boolean): Boolean = ifThenElse(true, x); - def ! : Boolean = ifThenElse(false, true); - - def == (x: Boolean) : Boolean = ifThenElse(x, x.!); - def != (x: Boolean) : Boolean = ifThenElse(x.!, x); - def < (x: Boolean) : Boolean = ifThenElse(false, x); - def > (x: Boolean) : Boolean = ifThenElse(x.!, false); - def <= (x: Boolean) : Boolean = ifThenElse(x, true); - def >= (x: Boolean) : Boolean = ifThenElse(true, x.!); -} -case object True extends Boolean { - def ifThenElse(t: => Boolean, e: => Boolean) = t; -} -case object False extends Boolean { - def ifThenElse(t: => Boolean, e: => Boolean) = e; -} -\end{lstlisting} -Here is a partial specification of class \code{Int}. - -\begin{lstlisting} -package scala; -trait Int extends AnyVal { - def coerce: Long; - def coerce: Float; - def coerce: Double; - - def + (that: Double): Double; - def + (that: Float): Float; - def + (that: Long): Long; - def + (that: Int): Int; // analogous for -, *, /, % - - def << (cnt: Int): Int; // analogous for >>, >>> - - def & (that: Long): Long; - def & (that: Int): Int; // analogous for |, ^ - - def == (that: Double): Boolean; - def == (that: Float): Boolean; - def == (that: Long): Boolean; // analogous for !=, <, >, <=, >= -} -\end{lstlisting} - -Class \code{Int} can in principle also be implemented using just -objects and classes, without reference to a built in type of -integers. To see how, we consider a slightly simpler problem, namely -how to implement a type \code{Nat} of natural (i.e. non-negative) -numbers. Here is the definition of a trait \code{Nat}: -\begin{lstlisting} -trait Nat { - def isZero: Boolean; - def predecessor: Nat; - def successor: Nat; - def + (that: Nat): Nat; - def - (that: Nat): Nat; -} -\end{lstlisting} -To implement the operations of class \code{Nat}, we define a sub-object -\code{Zero} and a subclass \code{Succ} (for successor). Each number -\code{N} is represented as \code{N} applications of the \code{Succ} -constructor to \code{Zero}: -\[ -\underbrace{\mbox{\sl new Succ( ... new Succ}}_{\mbox{$N$ times}}\mbox{\sl (Zero) ... )} -\] -The implementation of the \code{Zero} object is straightforward: -\begin{lstlisting} -object Zero extends Nat { - def isZero: Boolean = true; - def predecessor: Nat = throw new Error("negative number"); - def successor: Nat = new Succ(Zero); - def + (that: Nat): Nat = that; - def - (that: Nat): Nat = if (that.isZero) Zero - else throw new Error("negative number") -} -\end{lstlisting} - -The implementation of the predecessor and subtraction functions on -\code{Zero} throws an \code{Error} exception, which aborts the program -with the given error message. - -Here is the implementation of the successor class: -\begin{lstlisting} -class Succ(x: Nat) extends Nat { - def isZero: Boolean = false; - def predecessor: Nat = x; - def successor: Nat = new Succ(this); - def + (that: Nat): Nat = x + that.successor; - def - (that: Nat): Nat = x - that.predecessor; -} -\end{lstlisting} -Note the implementation of method \code{successor}. To create the -successor of a number, we need to pass the object itself as an -argument to the \code{Succ} constructor. The object itself is -referenced by the reserved name \code{this}. - -The implementations of \code{+} and \code{-} each contain a recursive -call with the constructor argument as receiver. The recursion will -terminate once the receiver is the \code{Zero} object (which is -guaranteed to happen eventually because of the way numbers are formed). - -\begin{exercise} Write an implementation \code{Integer} of integer numbers -The implementation should support all operations of class \code{Nat} -while adding two methods -\begin{lstlisting} -def isPositive: Boolean; -def negate: Integer; -\end{lstlisting} -The first method should return \code{true} if the number is positive. The second method should negate the number. -Do not use any of Scala's standard numeric classes in your -implementation. (Hint: There are two possible ways to implement -\code{Integer}. One can either make use the existing implementation of -\code{Nat}, representing an integer as a natural number and a sign. -Or one can generalize the given implementation of \code{Nat} to -\code{Integer}, using the three subclasses \code{Zero} for 0, -\code{Succ} for positive numbers and \code{Pred} for negative numbers.) -\end{exercise} - - - -\subsection*{Language Elements Introduced In This Chapter} - -\textbf{Types:} -\begin{lstlisting} -Type = ... | ident -\end{lstlisting} - -Types can now be arbitrary identifiers which represent classes. - -\textbf{Expressions:} -\begin{lstlisting} -Expr = ... | Expr '.' ident | 'new' Expr | 'this' -\end{lstlisting} - -An expression can now be an object creation, or -a selection \code{E.m} of a member \code{m} -from an object-valued expression \code{E}, or it can be the reserved name \code{this}. - -\textbf{Definitions and Declarations:} -\begin{lstlisting} -Def = FunDef | ValDef | ClassDef | TraitDef | ObjectDef -ClassDef = ['abstract'] 'class' ident ['(' [Parameters] ')'] - ['extends' Expr] [`{' {TemplateDef} `}'] -TraitDef = 'trait' ident ['extends' Expr] ['{' {TemplateDef} '}'] -ObjectDef = 'object' ident ['extends' Expr] ['{' {ObjectDef} '}'] -TemplateDef = [Modifier] (Def | Dcl) -ObjectDef = [Modifier] Def -Modifier = 'private' | 'override' -Dcl = FunDcl | ValDcl -FunDcl = 'def' ident {'(' [Parameters] ')'} ':' Type -ValDcl = 'val' ident ':' Type -\end{lstlisting} - -A definition can now be a class, trait or object definition such as -\begin{lstlisting} -class C(params) extends B { defs } -trait T extends B { defs } -object O extends B { defs } -\end{lstlisting} -The definitions \code{defs} in a class, trait or object may be -preceded by modifiers \code{private} or \code{override}. - -Abstract classes and traits may also contain declarations. These -introduce {\em deferred} functions or values with their types, but do -not give an implementation. Deferred members have to be implemented in -subclasses before objects of an abstract class or trait can be created. - -\chapter{Case Classes and Pattern Matching} - -Say, we want to write an interpreter for arithmetic expressions. To -keep things simple initially, we restrict ourselves to just numbers -and \code{+} operations. Such expressions can be represented as a class hierarchy, with an abstract base class \code{Expr} as the root, and two subclasses \code{Number} and -\code{Sum}. Then, an expression \code{1 + (3 + 7)} would be represented as -\begin{lstlisting} -new Sum(new Number(1), new Sum(new Number(3), new Number(7))) -\end{lstlisting} -Now, an evaluator of an expression like this needs to know of what -form it is (either \code{Sum} or \code{Number}) and also needs to -access the components of the expression. The following -implementation provides all necessary methods. -\begin{lstlisting} -trait Expr { - def isNumber: boolean; - def isSum: boolean; - def numValue: int; - def leftOp: Expr; - def rightOp: Expr; -} -class Number(n: int) extends Expr { - def isNumber: boolean = true; - def isSum: boolean = false; - def numValue: int = n; - def leftOp: Expr = throw new Error("Number.leftOp"); - def rightOp: Expr = throw new Error("Number.rightOp"); -} -class Sum(e1: Expr, e2: Expr) extends Expr { - def isNumber: boolean = false; - def isSum: boolean = true; - def numValue: int = throw new Error("Sum.numValue"); - def leftOp: Expr = e1; - def rightOp: Expr = e2; -} -\end{lstlisting} -With these classification and access methods, writing an evaluator function is simple: -\begin{lstlisting} -def eval(e: Expr): int = { - if (e.isNumber) e.numValue - else if (e.isSum) eval(e.leftOp) + eval(e.rightOp) - else throw new Error("unrecognized expression kind") -} -\end{lstlisting} -However, defining all these methods in classes \code{Sum} and -\code{Number} is rather tedious. Furthermore, the problem becomes worse -when we want to add new forms of expressions. For instance, consider -adding a new expression form -\code{Prod} for products. Not only do we have to implement a new class \code{Prod}, with all previous classification and access methods; we also have to introduce a -new abstract method \code{isProduct} in class \code{Expr} and -implement that method in subclasses \code{Number}, \code{Sum}, and -\code{Prod}. Having to modify existing code when a system grows is always problematic, since it introduces versioning and maintenance problems. - -The promise of object-oriented programming is that such modifications -should be unnecessary, because they can be avoided by re-using -existing, unmodified code through inheritance. Indeed, a more -object-oriented decomposition of our problem solves the problem. The -idea is to make the ``high-level'' operation \code{eval} a method of -each expression class, instead of implementing it as a function -outside the expression class hierarchy, as we have done -before. Because \code{eval} is now a member of all expression nodes, -all classification and access methods become superfluous, and the implementation is simplified considerably: -\begin{lstlisting} -trait Expr { - def eval: int; -} -class Number(n: int) extends Expr { - def eval: int = n; -} -class Sum(e1: Expr, e2: Expr) extends Expr { - def eval: int = e1.eval + e2.eval; -} -\end{lstlisting} -Furthermore, adding a new \code{Prod} class does not entail any changes to existing code: -\begin{lstlisting} -class Prod(e1: Expr, e2: Expr) extends Expr { - def eval: int = e1.eval * e2.eval; -} -\end{lstlisting} - -The conclusion we can draw from this example is that object-oriented -decomposition is the technique of choice for constructing systems that -should be extensible with new types of data. But there is also another -possible way we might want to extend the expression example. We might -want to add new {\em operations} on expressions. For instance, we might -want to add an operation that pretty-prints an expression tree to standard output. - -If we have defined all classification and access methods, such an -operation can easily be written as an external function. Here is an -implementation: -\begin{lstlisting} -def print(e: Expr): unit = - if (e.isNumber) System.out.print(e.numValue) - else if (e.isSum) { - System.out.print("("); - print(e.leftOp); - System.out.print("+"); - print(e.rightOp); - System.out.print(")"); - } else throw new Error("unrecognized expression kind"); -\end{lstlisting} -However, if we had opted for an object-oriented decomposition of -expressions, we would need to add a new \code{print} method -to each class: -\begin{lstlisting} -trait Expr { - def eval: int; - def print: unit; -} -class Number(n: int) extends Expr { - def eval: int = n; - def print: unit = System.out.print(n); -} -class Sum(e1: Expr, e2: Expr) extends Expr { - def eval: int = e1.eval + e2.eval; - def print: unit = { - System.out.print("("); - print(e1); - System.out.print("+"); - print(e2); - System.out.print(")"); -} -\end{lstlisting} -Hence, classical object-oriented decomposition requires modification -of all existing classes when a system is extended with new operations. - -As yet another way we might want to extend the interpreter, consider -expression simplification. For instance, we might want to write a -function which rewrites expressions of the form -\code{a * b + a * c} to \code{a * (b + c)}. This operation requires inspection of -more than a single node of the expression tree at the same -time. Hence, it cannot be implemented by a method in each expression -kind, unless that method can also inspect other nodes. So we are -forced to have classification and access methods in this case. This -seems to bring us back to square one, with all the problems of -verbosity and extensibility. - -Taking a closer look, one observers that the only purpose of the -classification and access functions is to {\em reverse} the data -construction process. They let us determine, first, which sub-class -of an abstract base class was used and, second, what were the -constructor arguments. Since this situation is quite common, Scala has -a way to automate it with case classes. - -\section{Case Classes and Case Objects} - -{\em Case classes} and {\em case objects} are defined like a normal -classes or objects, except that the definition is prefixed with the modifier -\code{case}. For instance, the definitions -\begin{lstlisting} -trait Expr; -case class Number(n: int) extends Expr; -case class Sum(e1: Expr, e2: Expr) extends Expr; -\end{lstlisting} -introduce \code{Number} and \code{Sum} as case classes. -The \code{case} modifier in front of a class or object -definition has the following effects. -\begin{enumerate} -\item Case classes implicitly come with a constructor function, with the same name as the class. In our example, the two functions -\begin{lstlisting} -def Number(n: int) = new Number(n); -def Sum(e1: Expr, e2: Expr) = new Sum(e1, e2); -\end{lstlisting} -would be added. Hence, one can now construct expression trees a bit more concisely, as in -\begin{lstlisting} -Sum(Sum(Number(1), Number(2)), Number(3)) -\end{lstlisting} -\item Case classes and case objects -implicitly come with implementations of methods -\code{toString}, \code{equals} and \code{hashCode}, which override the -methods with the same name in class \code{AnyRef}. The implementation -of these methods takes in each case the structure of a member of a -case class into account. The \code{toString} method represents an -expression tree the way it was constructed. So, -\begin{lstlisting} -Sum(Sum(Number(1), Number(2)), Number(3)) -\end{lstlisting} -would be converted to exactly that string, whereas the default -implementation in class \code{AnyRef} would return a string consisting -of the outermost constructor name \code{Sum} and a number. The -\code{equals} methods treats two case members of a case class as equal -if they have been constructed with the same constructor and with -arguments which are themselves pairwise equal. This also affects the -implementation of \code{==} and \code{!=}, which are implemented in -terms of \code{equals} in Scala. So, -\begin{lstlisting} -Sum(Number(1), Number(2)) == Sum(Number(1), Number(2)) -\end{lstlisting} -will yield \code{true}. If \code{Sum} or \code{Number} were not case -classes, the same expression would be \code{false}, since the standard -implementation of \code{equals} in class \code{AnyRef} always treats -objects created by different constructor calls as being different. -The \code{hashCode} method follows the same principle as other two -methods. It computes a hash code from the case class constructor name -and the hash codes of the constructor arguments, instead of from the object's -address, which is what the as the default implementation of \code{hashCode} does. -\item -Case classes implicitly come with nullary accessor methods which -retrieve the constructor arguments. -In our example, \code{Number} would obtain an accessor method -\begin{lstlisting} -def n: int; -\end{lstlisting} -which returns the constructor parameter \code{n}, whereas \code{Sum} would obtain two accessor methods -\begin{lstlisting} -def e1: Expr, e2: Expr; -\end{lstlisting} -Hence, if for a value \code{s} of type \code{Sum}, say, one can now -write \code{s.e1}, to access the left operand. However, for a value -\code{e} of type \code{Expr}, the term \code{e.e1} would be illegal -since \code{e1} is defined in \code{Sum}; it is not a member of the -base class \code{Expr}. -So, how do we determine the constructor and access constructor -arguments for values whose static type is the base class \code{Expr}? -This is solved by the fourth and final particularity of case classes. -\item -Case classes allow the constructions of {\em patterns} which refer to -the case class constructor. -\end{enumerate} - -\section{Pattern Matching} - -Pattern matching is a generalization of C or Java's \code{switch} -statement to class hierarchies. Instead of a \code{switch} statement, -there is a standard method \code{match}, which is defined in Scala's -root class \code{Any}, and therefore is available for all objects. -The \code{match} method takes as argument a number of cases. -For instance, here is an implementation of \code{eval} using -pattern matching. -\begin{lstlisting} -def eval(e: Expr): int = e match { - case Number(x) => x - case Sum(l, r) => eval(l) + eval(r) -} -\end{lstlisting} -In this example, there are two cases. Each case associates a pattern -with an expression. Patterns are matched against the selector -values \code{e}. The first pattern in our example, -\code{Number(n)}, matches all values of the form \code{Number(v)}, -where \code{v} is an arbitrary value. In that case, the {\em pattern -variable} \code{n} is bound to the value \code{v}. Similarly, the -pattern \code{Sum(l, r)} matches all selector values of form -\code{Sum(v}$_1$\code{, v}$_2$\code{)} and binds the pattern variables -\code{l} and \code{r} -to \code{v}$_1$ and \code{v}$_2$, respectively. - -In general, patterns are built from -\begin{itemize} -\item Case class constructors, e.g. \code{Number}, \code{Sum}, whose arguments - are again patterns, -\item pattern variables, e.g. \code{n}, \code{e1}, \code{e2}, -\item the ``wildcard'' pattern \code{_}, -\item literals, e.g. \code{1}, \code{true}, "abc", -\item constant identifiers, e.g. \code{MAXINT}, \code{EmptySet}. -\end{itemize} -Pattern variables always start with a lower-case letter, so that they -can be distinguished from constant identifiers, which start with an -upper case letter. Each variable name may occur only once in a -pattern. For instance, \code{Sum(x, x)} would be illegal as a pattern, -since the pattern variable \code{x} occurs twice in it. - -\paragraph{Meaning of Pattern Matching} -A pattern matching expression -\begin{lstlisting} -e match { case p$_1$ => e$_1$ ... case p$_n$ => e$_n$ } -\end{lstlisting} -matches the patterns $p_1 \commadots p_n$ in the order they -are written against the selector value \code{e}. -\begin{itemize} -\item -A constructor pattern $C(p_1 \commadots p_n)$ matches all values that -are of type \code{C} (or a subtype thereof) and that have been constructed with -\code{C}-arguments matching patterns $p_1 \commadots p_n$. -\item -A variable pattern \code{x} matches any value and binds the variable -name to that value. -\item -The wildcard pattern `\code{_}' matches any value but does not bind a name to that value. -\item A constant pattern \code{C} matches a value which is -equal (in terms of \code{==}) to \code{C}. -\end{itemize} -The pattern matching expression rewrites to the right-hand-side of the -first case whose pattern matches the selector value. References to -pattern variables are replaced by corresponding constructor arguments. -If none of the patterns matches, the pattern matching expression is -aborted with a \code{MatchError} exception. - -\example Our substitution model of program evaluation extends quite naturally to pattern matching, For instance, here is how \code{eval} applied to a simple expression is re-written: -\begin{lstlisting} - eval(Sum(Number(1), Number(2))) - --> $\mbox{\tab\tab\rm(by rewriting the application)}$ - - Sum(Number(1), Number(2)) match { - case Number(n) => n - case Sum(e1, e2) => eval(e1) + eval(e2) - } - --> $\mbox{\tab\tab\rm(by rewriting the pattern match)}$ - - eval(Number(1)) + eval(Number(2)) - --> $\mbox{\tab\tab\rm(by rewriting the first application)}$ - - Number(1) match { - case Number(n) => n - case Sum(e1, e2) => eval(e1) + eval(e2) - } + eval(Number(2)) - --> $\mbox{\tab\tab\rm(by rewriting the pattern match)}$ - - 1 + eval(Number(2)) - -->$^*$ 1 + 2 -> 3 -\end{lstlisting} - -\paragraph{Pattern Matching and Methods} -In the previous example, we have used pattern -matching in a function which was defined outside the class hierarchy -over which it matches. Of course, it is also possible to define a -pattern matching function in that class hierarchy itself. For -instance, we could have defined -\code{eval} is a method of the base class \code{Expr}, and still have used pattern matching in its implementation: -\begin{lstlisting} -trait Expr { - def eval: int = this match { - case Number(n) => n - case Sum(e1, e2) => e1.eval + e2.eval - } -} -\end{lstlisting} - -\begin{exercise} Consider the following definitions representing trees -of integers. These definitions can be seen as an alternative -representation of \code{IntSet}: -\begin{lstlisting} -trait IntTree; -case object EmptyTree extends IntTree; -case class Node(elem: int, left: IntTree, right: IntTree) extends IntTree; -\end{lstlisting} -Complete the following implementations of function \code{contains} and \code{insert} for -\code{IntTree}'s. -\begin{lstlisting} -def contains(t: IntTree, v: int): boolean = t match { ... - ... -} -def insert(t: IntTree, v: int): IntTree = t match { ... - ... -} -\end{lstlisting} -\end{exercise} - -\paragraph{Pattern Matching Anonymous Functions} - -So far, case-expressions always appeared in conjunction with a -\verb@match@ operation. But it is also possible to use -case-expressions by themselves. A block of case-expressions such as -\begin{lstlisting} -{ case $P_1$ => $E_1$ ... case $P_n$ => $E_n$ } -\end{lstlisting} -is seen by itself as a function which matches its arguments -against the patterns $P_1 \commadots P_n$, and produces the result of -one of $E_1 \commadots E_n$. (If no pattern matches, the function -would throw a \code{MatchError} exception instead). -In other words, the expression above is seen as a shorthand for the anonymous function -\begin{lstlisting} -(x => x match { case $P_1$ => $E_1$ ... case $P_n$ => $E_n$ }) -\end{lstlisting} -where \code{x} is a fresh variable which is not used -otherwise in the expression. - -\chapter{Generic Types and Methods} - -Classes in Scala can have type parameters. We demonstrate the use of -type parameters with functional stacks as an example. Say, we want to -write a data type of stacks of integers, with methods \code{push}, -\code{top}, \code{pop}, and \code{isEmpty}. This is achieved by the -following class hierarchy: -\begin{lstlisting} -trait IntStack { - def push(x: int): IntStack = new IntNonEmptyStack(x, this); - def isEmpty: boolean; - def top: int; - def pop: IntStack; -} -class IntEmptyStack extends IntStack { - def isEmpty = true; - def top = throw new Error("EmptyStack.top"); - def pop = throw new Error("EmptyStack.pop"); -} -class IntNonEmptyStack(elem: int, rest: IntStack) { - def isEmpty = false; - def top = elem; - def pop = rest; -} -\end{lstlisting} -Of course, it would also make sense to define an abstraction for a -stack of Strings. To do that, one could take the existing abstraction -for \code{IntStack}, rename it to \code{StringStack} and at the same -time rename all occurrences of type \code{int} to \code{String}. - -A better way, which does not entail code duplication, is to -parameterize the stack definitions with the element type. -Parameterization lets us generalize from a specific instance of a -problem to a more general one. So far, we have used parameterization -only for values, but it is available also for types. To arrive at a -{\em generic} version of \code{Stack}, we equip it with a type -parameter. -\begin{lstlisting} -trait Stack[a] { - def push(x: a): Stack[a] = new NonEmptyStack[a](x, this); - def isEmpty: boolean - def top: a; - def pop: Stack[a]; -} -class EmptyStack[a] extends Stack[a] { - def isEmpty = true; - def top = throw new Error("EmptyStack.top"); - def pop = throw new Error("EmptyStack.pop"); -} -class NonEmptyStack[a](elem: a, rest: Stack[a]) extends Stack[a] { - def isEmpty = false; - def top = elem; - def pop = rest; -} -\end{lstlisting} -In the definitions above, `\code{a}' is a {\em type parameter} of -class \code{Stack} and its subclasses. Type parameters are arbitrary -names; they are enclosed in brackets instead of parentheses, so that -they can be easily distinguished from value parameters. Here is an -example how the generic classes are used: -\begin{lstlisting} -val x = new EmptyStack[int]; -val y = x.push(1).push(2); -System.out.println(y.pop.top); -\end{lstlisting} -The first line creates a new empty stack of \code{int}'s. Note the -actual type argument \code{[int]} which replaces the formal type -parameter \code{a}. - -It is also possible to parameterize methods with types. As an example, -here is a generic method which determines whether one stack is a -prefix of another. -\begin{lstlisting} -def isPrefix[a](p: Stack[a], s: Stack[a]): boolean = { - p.isEmpty || - p.top == s.top && isPrefix[a](p.pop, s.pop); -} -\end{lstlisting} -parameters are called {\em polymorphic}. Generic methods are also -called {\em polymorphic}. The term comes from the Greek, where it -means ``having many forms''. To apply a polymorphic method such as -\code{isPrefix}, we pass type parameters as well as value parameters -to it. For instance, -\begin{lstlisting} -val s1 = new EmptyStack[String].push("abc"); -val s2 = new EmptyStack[String].push("abx").push(s.pop) -System.out.println(isPrefix[String](s1, s2)); -\end{lstlisting} - -\paragraph{Local Type Inference} -Passing type parameters such as \code{[int]} or \code{[String]} all -the time can become tedious in applications where generic functions -are used a lot. Quite often, the information in a type parameter is -redundant, because the correct parameter type can also be determined -by inspecting the function's value parameters or expected result type. -Taking the expression \code{isPrefix[String](s1, s2)} as an -example, we know that its value parameters are both of type -\code{Stack[String]}, so we can deduce that the type parameter must -be \code{String}. Scala has a fairly powerful type inferencer which -allows one to omit type parameters to polymorphic functions and -constructors in situations like these. In the example above, one -could have written \code{isPrefix(s1, s2)} and the missing type argument -\code{[String]} would have been inserted by the type inferencer. - -\section{Type Parameter Bounds} - -Now that we know how to make classes generic it is natural to -generalize some of the earlier classes we have written. For instance -class \code{IntSet} could be generalized to sets with arbitrary -element types. Let's try. The trait for generic sets is easily -written. -\begin{lstlisting} -trait Set[a] { - def incl(x: a): Set[a]; - def contains(x: a): boolean; -} -\end{lstlisting} -However, if we still want to implement sets as binary search trees, we -encounter a problem. The \code{contains} and \code{incl} methods both -compare elements using methods \code{<} and \code{>}. For -\code{IntSet} this was OK, since type \code{int} has these two -methods. But for an arbitrary type parameter \code{a}, we cannot -guarantee this. Therefore, the previous implementation of, say, -\code{contains} would generate a compiler error. -\begin{lstlisting} - def contains(x: int): boolean = - if (x < elem) left contains x - ^ < $\mbox{\sl not a member of type}$ a. -\end{lstlisting} -One way to solve the problem is to restrict the legal types that can -be substituted for type \code{a} to only those types that contain methods -\code{<} and \code{>} of the correct types. There is a trait -\code{Ord[a]} in the standard class library Scala which represents -values which are comparable (via \code{<} and \code{>}) to values of -type \code{a}. We can enforce the comparability of a type by demanding -that the type is a subtype of \code{Ord}. This is done by giving an -upper bound to the type parameter of \code{Set}: -\begin{lstlisting} -trait Set[a <: Ord[a]] { - def incl(x: a): Set[a]; - def contains(x: a): boolean; -} -\end{lstlisting} -The parameter declaration \code{a <: Ord[a]} introduces \code{a} as a -type parameter which must be a subtype of \code{Ord[a]}, i.e.\ its values -must be comparable to values of the same type. - -With this restriction, we can now implement the rest of the generic -set abstraction as we did in the case of \code{IntSet}s before. - -\begin{lstlisting} -class EmptySet[a <: Ord[a]] extends Set[a] { - def contains(x: a): boolean = false; - def incl(x: a): Set[a] = new NonEmptySet(x, new EmptySet[a], new EmptySet[a]); -} -\end{lstlisting} - -\begin{lstlisting} -class NonEmptySet[a <: Ord[a]] - (elem:a, left: Set[a], right: Set[a]) extends Set[a] { - def contains(x: a): boolean = - if (x < elem) left contains x - else if (x > elem) right contains x - else true; - def incl(x: a): Set[a] = - if (x < elem) new NonEmptySet(elem, left incl x, right) - else if (x > elem) new NonEmptySet(elem, left, right incl x) - else this; -} -\end{lstlisting} -Note that we have left out the type argument in the object creations -\code{new NonEmptySet(...)}. In the same way as for polymorphic methods, -missing type arguments in constructor calls are inferred from value -arguments and/or the expected result type. - -Here is an example that uses the generic set abstraction. -\begin{lstlisting} -val s = new EmptySet[double].incl(1.0).incl(2.0); -s.contains(1.5) -\end{lstlisting} -This is OK, as type \code{double} implements trait \code{Ord[double]}. -However, the following example is in error. -\begin{lstlisting} -val s = new EmptySet[java.io.File] - ^ java.io.File $\mbox{\sl does not conform to type}$ - $\mbox{\sl parameter bound}$ Ord[java.io.File]. -\end{lstlisting} -To conclude the discussion of type parameter -bounds, here is the definition of trait \code{Ord} in scala. -\begin{lstlisting} -package scala; -trait Ord[t <: Ord[t]]: t { - def < (that: t): Boolean; - def <=(that: t): Boolean = this < that || this == that; - def > (that: t): Boolean = that < this; - def >=(that: t): Boolean = that <= this; -} -\end{lstlisting} - -\section{Variance Annotations}\label{sec:first-arrays} - -The combination of type parameters and subtyping poses some -interesting questions. For instance, should \code{Stack[String]} be a -subtype of \code{Stack[AnyRef]}? Intuitively, this seems OK, since a -stack of \code{String}s is a special case of a stack of -\code{AnyRef}s. More generally, if \code{T} is a subtype of type \code{S} -then \code{Stack[T]} should be a subtype of \code{Stack[S]}. -This property is called {\em co-variant} subtyping. - -In Scala, generic types have by default non-variant subtyping. That -is, with \code{Stack} defined as above, stacks with different element -types would never be in a subtype relation. However, we can enforce -co-variant subtyping of stacks by changing the first line of the -definition of class \code{Stack} as follows. -\begin{lstlisting} -class Stack[+a] { -\end{lstlisting} -Prefixing a formal type parameter with a \code{+} indicates that -subtyping is covariant in that parameter. -Besides \code{+}, there is also a prefix \code{-} which indicates -contra-variant subtyping. If \code{Stack} was defined \code{class -Stack[-a] ...}, then \code{T} a subtype of type \code{S} would imply -that \code{Stack[S]} is a subtype of \code{Stack[T]} (which in the -case of stacks would be rather surprising!). - -In a purely functional world, all types could be co-variant. However, -the situation changes once we introduce mutable data. Consider the -case of arrays in Java or .NET. Such arrays are represented in Scala -by a generic class \code{Array}. Here is a partial definition of this -class. -\begin{lstlisting} -class Array[a] { - def apply(index: int): a; - def update(index: int, elem: a): unit; -} -\end{lstlisting} -The class above defines the way Scala arrays are seen from Scala user -programs. The Scala compiler will map this abstraction to the -underlying arrays of the host system in most cases where this -possible. - -In Java, arrays are indeed covariant; that is, for reference types -\code{T} and \code{S}, if \code{T} is a subtype of \code{S}, then also -\code{Array[T]} is a subtype of \code{Array[S]}. This might seem -natural but leads to safety problems that require special runtime -checks. Here is an example: -\begin{lstlisting} -val x = new Array[String](1); -val y: Array[Any] = x; -y(0) = new Rational(1, 2); // this is syntactic sugar for - // y.update(0, new Rational(1, 2)); -\end{lstlisting} -In the first line, a new array of strings is created. In the second -line, this array is bound to a variable \code{y}, of type -\code{Array[Any]}. Assuming arrays are covariant, this is OK, since -\code{Array[String]} is a subtype of \code{Array[Any]}. Finally, in -the last line a rational number is stored in the array. This is also -OK, since type \code{Rational} is a subtype of the element type -\code{Any} of the array \code{y}. We thus end up storing a rational -number in an array of strings, which clearly violates type soundness. - -Java solves this problem by introducing a run-time check in the third -line which tests whether the stored element is compatible with the -element type with which the array was created. We have seen in the -example that this element type is not necessarily the static element -type of the array being updated. If the test fails, an -\code{ArrayStoreException} is raised. - -Scala solves this problem instead statically, by disallowing the -second line at compile-time, because arrays in Scala have non-variant -subtyping. This raises the question how a Scala compiler verifies that -variance annotations are correct. If we had simply declared arrays -co-variant, how would the potential problem have been detected? - -Scala uses a conservative approximation to verify soundness of -variance annotations. A covariant type parameter of a class may only -appear in co-variant positions inside the class. Among the co-variant -positions are the types of values in the class, the result types of -methods in the class, and type arguments to other covariant types. Not -co-variant are types of formal method parameters. Hence, the following -class definition would have been rejected -\begin{lstlisting} -class Array[+a] { - def apply(index: int): a; - def update(index: int, elem: a): unit; - ^ $\mbox{\sl covariant type parameter}$ a - $\mbox{\sl appears in contravariant position.}$ -} -\end{lstlisting} -So far, so good. Intuitively, the compiler was correct in rejecting -the \code{update} method in a co-variant class because \code{update} -potentially changes state, and therefore undermines the soundness of -co-variant subtyping. - -However, there are also methods which do not mutate state, but where a -type parameter still appears contra-variantly. An example is -\code{push} in type \code{Stack}. Again the Scala compiler will reject -the definition of this method for co-variant stacks. -\begin{lstlisting} -class Stack[+a] { - def push(x: a): Stack[a] = - ^ $\mbox{\sl covariant type parameter}$ a - $\mbox{\sl appears in contravariant position.}$ -\end{lstlisting} -This is a pity, because, unlike arrays, stacks are purely functional data -structures and therefore should enable co-variant subtyping. However, -there is a a way to solve the problem by using a polymorphic method -with a lower type parameter bound. - -\section{Lower Bounds} - -We have seen upper bounds for type parameters. In a type parameter -declaration such as \code{t <: U}, the type parameter \code{t} is -restricted to range only over subtypes of type \code{U}. Symmetrical -to this are lower bounds in Scala. In a type parameter declaration -\code{t >: L}, the type parameter \code{t} is restricted to range only -over {\em supertypes} of type \code{L}. (One can also combine lower and -upper bounds, as in \code{t >: L <: U}.) - -Using lower bounds, we can generalize the \code{push} method in -\code{Stack} as follows. -\begin{lstlisting} -class Stack[+a] { - def push[b >: a](x: b): Stack[b] = new NonEmptyStack(x, this); -\end{lstlisting} -Technically, this solves our variance problem since now the type -parameter \code{a} appears no longer as a parameter type of method -\code{push}. Instead, it appears as lower bound for another type -parameter of a method, which is classified as a co-variant position. -Hence, the Scala compiler accepts the new definition of \code{push}. - -In fact, we have not only solved the technical variance problem but -also have generalized the definition of \code{push}. Before, we were -required to push only elements with types that conform to the declared -element type of the stack. Now, we can push also elements of a -supertype of this type, but the type of the returned stack will change -accordingly. For instance, we can now push an \code{AnyRef} onto a -stack of \code{String}s, but the resulting stack will be a stack of -\code{AnyRef}s instead of a stack of \code{String}s! - -In summary, one should not hesitate to add variance annotations to -your data structures, as this yields rich natural subtyping -relationships. The compiler will detect potential soundness -problems. Even if the compiler's approximation is too conservative, as -in the case of method \code{push} of class \code{Stack}, this will -often suggest a useful generalization of the contested method. - -\section{Least Types} - -Scala does not allow one to parameterize objects with types. That's -why we originally defined a generic class \code{EmptyStack[a]}, even -though a single value denoting empty stacks of arbitrary type would -do. For co-variant stacks, however, one can use the following idiom: -\begin{lstlisting} -object EmptyStack extends Stack[All] { ... } -\end{lstlisting} -The identifier \code{All} refers to the bottom type \code{scala.All}, -which is a subtype of all other types. Hence, for co-variant stacks, -\code{Stack[All]} is a subtype of \code{Stack[T]}, for any other type -\code{T}. This makes it possible to use a single empty stack object -in user code. For instance: -\begin{lstlisting} -val s = EmptyStack.push("abc").push(new AnyRef()); -\end{lstlisting} -Let's analyze the type assignment for this expression in detail. The -\code{EmptyStack} object is of type \code{Stack[All]}, which has a -method -\begin{lstlisting} -push[b >: All](elem: b): Stack[b] . -\end{lstlisting} -Local type inference will determine that the type parameter \code{b} -should be instantiated to \code{String} in the application -\code{EmptyStack.push("abc")}. The result type of that application is hence -\code{Stack[String]}, which in turn has a method -\begin{lstlisting} -push[b >: String](elem: b): Stack[b] . -\end{lstlisting} -The final part of the value definition above is the application of -this method to \code{new AnyRef()}. Local type inference will -determine that the type parameter \code{b} should this time be -instantiated to \code{AnyRef}, with result type \code{Stack[AnyRef]}. -Hence, the type assigned to value \code{s} is \code{Stack[AnyRef]}. - -Besides \code{scala.All}, which is a subtype of every other type, -there is also the type \code{scala.AllRef}, which is a subtype of -\code{scala.AnyRef}, and every type derived from it. The \code{null} -literal in Scala is of that type. This makes \code{null} compatible -with every reference type, but not with a value type such as -\code{int}. - -We conclude this section with the complete improved definition of -stacks. Stacks have now co-variant subtyping, the \code{push} method -has been generalized, and the empty stack is represented by a single -object. -\begin{lstlisting} -trait Stack[+a] { - def push[b >: a](x: b): Stack[b] = new NonEmptyStack(x, this); - def isEmpty: boolean; - def top: a; - def pop: Stack[a]; -} -object EmptyStack extends Stack[All] { - def isEmpty = true; - def top = throw new Error("EmptyStack.top"); - def pop = throw new Error("EmptyStack.pop"); -} -class NonEmptyStack[+a](elem: a, rest: Stack[a]) extends Stack[a] { - def isEmpty = false; - def top = elem; - def pop = rest; -} -\end{lstlisting} -Many classes in the Scala library are generic. We now present two -commonly used families of generic classes, tuples and functions. The -discussion of another common class, lists, is deferred to the next -chapter. - -\section{Tuples} - -Sometimes, a function needs to return more than one result. For -instance, take the function \code{divmod} which returns the integer quotient -and rest of two given integer arguments. Of course, one can define a -class to hold the two results of \code{divmod}, as in: -\begin{lstlisting} -case class TwoInts(first: int, second: int); -def divmod(x: int, y: int): TwoInts = new TwoInts(x / y, x % y); -\end{lstlisting} -However, having to define a new class for every possible pair of -result types is very tedious. In Scala one can use instead a -the generic classes \lstinline@Tuple$n$@, for each $n$ between -2 and 9. As an example, here is the definition of Tuple2. -\begin{lstlisting} -package scala; -case class Tuple2[a, b](_1: a, _2: b); -\end{lstlisting} -With \code{Tuple2}, the \code{divmod} method can be written as follows. -\begin{lstlisting} -def divmod(x: int, y: int) = new Tuple2[int, int](x / y, x % y); -\end{lstlisting} -As usual, type parameters to constructors can be omitted if they are -deducible from value arguments. Also, Scala defines an alias -\code{Pair} for \code{Tuple2} (as well as \code{Triple} for \code{Tuple3}). -With these conventions, \code{divmod} can equivalently be written as -follows. -\begin{lstlisting} -def divmod(x: int, y: int) = Pair(x / y, x % y); -\end{lstlisting} -How are elements of tuples accessed? Since tuples are case classes, -there are two possibilities. One can either access a tuple's fields -using the names of the constructor parameters \lstinline@_$i$@, as in the following example: -\begin{lstlisting} -val xy = divmod(x, y); -System.out.println("quotient: " + x._1 + ", rest: " + x._2); -\end{lstlisting} -Or one uses pattern matching on tuples, as in the following example: -\begin{lstlisting} -divmod(x, y) match { - case Pair(n, d) => - System.out.println("quotient: " + n + ", rest: " + d); -} -\end{lstlisting} -Note that type parameters are never used in patterns; it would have -been illegal to write case \code{Pair[int, int](n, d)}. - -\section{Functions}\label{sec:functions} - -Scala is a functional language in that functions are first-class -values. Scala is also an object-oriented language in that every value -is an object. It follows that functions are objects in Scala. For -instance, a function from type \code{String} to type \code{int} is -represented as an instance of the trait \code{Function1[String, int]}. -The \code{Function1} trait is defined as follows. -\begin{lstlisting} -package scala; -trait Function1[-a, +b] { - def apply(x: a): b; -} -\end{lstlisting} -Besides \code{Function1}, there are also definitions of -\code{Function0} and \code{Function2} up to \code{Function9} in the -standard Scala library. That is, there is one definition for each -possible number of function parameters between 0 and 9. Scala's -function type syntax ~\lstinline@$T_1 \commadots T_n$ => $S$@~ is -simply an abbreviation for the parameterized type -~\lstinline@Function$n$[$T_1 \commadots T_n, S$]@~. - -Scala uses the same syntax $f(x)$ for function application, no matter -whether $f$ is a method or a function object. This is made possible by -the following convention: A function application $f(x)$ where $f$ is -an object (as opposed to a method) is taken to be a shorthand for -\lstinline@$f$.apply($x$)@. Hence, the \code{apply} method of a -function type is inserted automatically where this is necessary. - -That's also why we defined array subscripting in -Section~\ref{sec:first-arrays} by an \code{apply} method. For any -array \code{a}, the subscript operation \code{a(i)} is taken to be a -shorthand for \code{a.apply(i)}. - -Functions are an example where a contra-variant type parameter -declaration is useful. For example, consider the following code: -\begin{lstlisting} -val f: (AnyRef => int) = x => x.hashCode(); -val g: (String => int) = f; -g("abc") -\end{lstlisting} -It's sound to bind the value \code{g} of type \code{String => int} to -\code{f}, which is of type \code{AnyRef => int}. Indeed, all one can -do with function of type \code{String => int} is pass it a string in -order to obtain an integer. Clearly, the same works for function -\code{f}: If we pass it a string (or any other object), we obtain an -integer. This demonstrates that function subtyping is contra-variant -in its argument type whereas it is covariant in its result type. -In short, $S \Rightarrow T$ is a subtype of $S' \Rightarrow T'$, provided -$S'$ is a subtype of $S$ and $T$ is a subtype of $T'$. - -\example Consider the Scala code -\begin{lstlisting} -val plus1: (int => int) = (x: int) => x + 1; -plus1(2) -\end{lstlisting} -This is expanded into the following object code. -\begin{lstlisting} -val plus1: Function1[int, int] = new Function1[int, int] { - def apply(x: int): int = x + 1; -} -plus1.apply(2) -\end{lstlisting} -Here, the object creation \lstinline@new Function1[int, int]{ ... }@ -represents an instance of an {\em anonymous class}. It combines the -creation of a new \code{Function1} object with an implementation of -the \code{apply} method (which is abstract in \code{Function1}). -Equivalently, but more verbosely, one could have used a local class: -\begin{lstlisting} -val plus1: Function1[int, int] = { - class Local extends Function1[int, int] { - def apply(x: int): int = x + 1; - } - new Local: Function1[int, int] -} -plus1.apply(2) -\end{lstlisting} - -\chapter{Lists} - -Lists are an important data structure in many Scala programs. -A list containing the elements \code{x}$_1$, \ldots, \code{x}$_n$ is written -\code{List(x}$_1$\code{, ..., x}$_n$\code{)}. Examples are: -\begin{lstlisting} -val fruit = List("apples", "oranges", "pears"); -val nums = List(1, 2, 3, 4); -val diag3 = List(List(1, 0, 0), List(0, 1, 0), List(0, 0, 1)); -val empty = List(); -\end{lstlisting} -Lists are similar to arrays in languages such as C or Java, but there -are also three important differences. First, lists are immutable. That -is, elements of a list cannot be changed by assignment. Second, -lists have a recursive structure, whereas arrays are flat. Third, -lists support a much richer set of operations than arrays usually do. - -\section{Using Lists} - -\paragraph{The List type} -Like arrays, lists are {\em homogeneous}. That is, the elements of a -list all have the same type. The type of a list with elements of type -\code{T} is written \code{List[T]} (compare to \code{T[]} in Java). -\begin{lstlisting} -val fruit: List[String] = List("apples", "oranges", "pears"); -val nums : List[int] = List(1, 2, 3, 4); -val diag3: List[List[int]] = List(List(1, 0, 0), List(0, 1, 0), List(0, 0, 1)); -val empty: List[int] = List(); -\end{lstlisting} - -\paragraph{List constructors} -All lists are built from two more fundamental constructors, \code{Nil} -and \code{::} (pronounced ``cons''). \code{Nil} represents an empty -list. The infix operator \code{::} expresses list extension. That is, -\code{x :: xs} represents a list whose first element is \code{x}, -which is followed by (the elements of) list \code{xs}. Hence, the -list values above could also have been defined as follows (in fact -their previous definition is simply syntactic sugar for the definitions below). -\begin{lstlisting} -val fruit = "apples" :: ("oranges" :: ("pears" :: Nil)); -val nums = 1 :: (2 :: (3 :: (4 :: Nil))); -val diag3 = (1 :: (0 :: (0 :: Nil))) :: - (0 :: (1 :: (0 :: Nil))) :: - (0 :: (0 :: (1 :: Nil))) :: Nil; -val empty = Nil; -\end{lstlisting} -The `\code{::}' operation associates to the right: \code{A :: B :: C} is -interpreted as \code{A :: (B :: C)}. Therefore, we can drop the -parentheses in the definitions above. For instance, we can write -shorter -\begin{lstlisting} -val nums = 1 :: 2 :: 3 :: 4 :: Nil; -\end{lstlisting} - -\paragraph{Basic operations on lists} -All operations on lists can be expressed in terms of the following three: - -\begin{tabular}{ll} -\code{head} & returns the first element of a list,\\ -\code{tail} & returns the list consisting of all elements except the\\ -& first element,\\ -\code{isEmpty} & returns \code{true} iff the list is empty -\end{tabular} - -These operations are defined as methods of list objects. So we invoke -them by selecting from the list that's operated on. Examples: -\begin{lstlisting} -empty.isEmpty = true -fruit.isEmpty = false -fruit.head = "apples" -fruit.tail.head = "oranges" -diag3.head = List(1, 0, 0) -\end{lstlisting} -The \code{head} and \code{tail} methods are defined only for non-empty -lists. When selected from an empty list, they throw an exception. - -As an example of how lists can be processed, consider sorting the -elements of a list of numbers into ascending order. One simple way to -do so is {\em insertion sort}, which works as follows: To sort a -non-empty list with first element \code{x} and rest \code{xs}, sort -the remainder \code{xs} and insert the element \code{x} at the right -position in the result. Sorting an empty list will yield the -empty list. Expressed as Scala code: -\begin{lstlisting} -def isort(xs: List[int]): List[int] = - if (xs.isEmpty) Nil - else insert(xs.head, isort(xs.tail)); -\end{lstlisting} - -\begin{exercise} Provide an implementation of the missing function -\code{insert}. -\end{exercise} - -\paragraph{List patterns} In fact, \code{::} is defined as a case -class in Scala's standard library. Hence, it is possible to decompose -lists by pattern matching, using patterns composed from the \code{Nil} -and \code{::} constructors. For instance, \code{isort} can be written -alternatively as follows. -\begin{lstlisting} -def isort(xs: List[int]): List[int] = xs match { - case List() => List() - case x :: xs1 => insert(x, isort(xs1)) -} -\end{lstlisting} -where -\begin{lstlisting} -def insert(x: int, xs: List[int]): List[int] = xs match { - case List() => List(x) - case y :: ys => if (x <= y) x :: xs else y :: insert(x, ys) -} -\end{lstlisting} - -\section{Definition of class List I: First Order Methods} -\label{sec:list-first-order} - -Lists are not built in in Scala; they are defined by an abstract class -\code{List}, which comes with two subclasses for \code{::} and \code{Nil}. -In the following we present a tour through class \code{List}. -\begin{lstlisting} -package scala; -abstract class List[+a] { -\end{lstlisting} -\code{List} is an abstract class, so one cannot define elements by -calling the empty \code{List} constructor (e.g. by -\code{new List}). The class has a type parameter \code{a}. It is -co-variant in this parameter, which means that -\code{List[S] <: List[T]} for all types \code{S} and \code{T} such that -\code{S <: T}. The class is situated in the package -\code{scala}. This is a package containing the most important standard -classes of Scala. - \code{List} defines a number of methods, which are -explained in the following. - -\paragraph{Decomposing lists} -First, there are the three basic methods \code{isEmpty}, -\code{head}, \code{tail}. Their implementation in terms of pattern -matching is straightforward: -\begin{lstlisting} -def isEmpty: boolean = match { - case Nil => true - case x :: xs => false -} -def head: a = match { - case Nil => throw new Error("Nil.head") - case x :: xs => x -} -def tail: List[a] = match { - case Nil => throw new Error("Nil.tail") - case x :: xs => x -} -\end{lstlisting} - -The next function computes the length of a list. -\begin{lstlisting} -def length = match { - case Nil => 0 - case x :: xs => 1 + xs.length -} -\end{lstlisting} -\begin{exercise} Design a tail-recursive version of \code{length}. -\end{exercise} - -The next two functions are the complements of \code{head} and -\code{tail}. -\begin{lstlisting} -def last: a; -def init: List[a]; -\end{lstlisting} -\code{xs.last} returns the last element of list \code{xs}, whereas -\code{xs.init} returns all elements of \code{xs} except the last. -Both functions have to traverse the entire list, and are thus less -efficient than their \code{head} and \code{tail} analogues. -Here is the implementation of \code{last}. -\begin{lstlisting} -def last: a = match { - case Nil => throw new Error("Nil.last") - case x :: Nil => x - case x :: xs => xs.last -} -\end{lstlisting} -The implementation of \code{init} is analogous. - -The next three functions return a prefix of the list, or a suffix, or -both. -\begin{lstlisting} -def take(n: int): List[a] = - if (n == 0 || isEmpty) Nil else head :: tail.take(n-1); - -def drop(n: int): List[a] = - if (n == 0 || isEmpty) this else tail.drop(n-1); - -def split(n: int): Pair[List[a], List[a]] = Pair(take(n), drop(n)); -\end{lstlisting} -\code{(xs take n)} returns the first \code{n} elements of list -\code{xs}, or the whole list, if its length is smaller than \code{n}. -\code{(xs drop n)} returns all elements of \code{xs} except the -\code{n} first ones. Finally, \code{(xs split n)} returns a pair -consisting of the lists resulting from \code{xs take n} and -\code{xs drop n}. - -The next function returns an element at a given index in a list. -It is thus analogous to array subscripting. Indices start at 0. -\begin{lstlisting} -def apply(n: int): a = drop(n).head; -\end{lstlisting} -The \code{apply} method has a special meaning in Scala. An object with -an \code{apply} method can be applied to arguments as if it was a -function. For instance, to pick the 3'rd element of a list \code{xs}, -one can write either \code{xs.apply(3)} or \code{xs(3)} -- the latter -expression expands into the first. - -With \code{take} and \code{drop}, we can extract sublists consisting -of consecutive elements of the original list. To extract the sublist -$xs_m \commadots xs_{n-1}$ of a list \code{xs}, use: - -\begin{lstlisting} -xs.drop(m).take(n - m) -\end{lstlisting} - -\paragraph{Zipping lists} The next function combines two lists into a list of pairs. -Given two lists -\begin{lstlisting} -xs = List(x$_1$, ..., x$_n$) $\mbox{\rm, and}$ -ys = List(y$_1$, ..., y$_n$) , -\end{lstlisting} -\code{xs zip ys} constructs the list -\code{List(Pair(x}$_1$\code{, y}$_1$\code{), ..., Pair(x}$_n$\code{, y}$_n$\code{))}. -If the two lists have different lengths, the longer one of the two is -truncated. Here is the definition of \code{zip} -- note that it is a -polymorphic method. -\begin{lstlisting} -def zip[b](that: List[b]): List[Pair[a,b]] = - if (this.isEmpty || that.isEmpty) Nil - else Pair(this.head, that.head) :: (this.tail zip that.tail); -\end{lstlisting} - -\paragraph{Consing lists.} -Like any infix operator, \code{::} -is also implemented as a method of an object. In this case, the object -is the list that is extended. This is possible, because operators -ending with a `\code{:}' character are treated specially in Scala. -All such operators are treated as methods of their right operand. E.g., -\begin{lstlisting} - x :: y = y.::(x) $\mbox{\rm whereas}$ x + y = x.+(y) -\end{lstlisting} -Note, however, that operands of a binary operation are in each case -evaluated from left to right. So, if \code{D} and \code{E} are -expressions with possible side-effects, \code{D :: E} is translated to -\lstinline@{val x = D; E.::(x)}@ in order to maintain the left-to-right -order of operand evaluation. - -Another difference between operators ending in a `\code{:}' and other -operators concerns their associativity. Operators ending in -`\code{:}' are right-associative, whereas other operators are -left-associative. E.g., -\begin{lstlisting} - x :: y :: z = x :: (y :: z) $\mbox{\rm whereas}$ x + y + z = (x + y) + z -\end{lstlisting} -The definition of \code{::} as a method in -class \code{List} is as follows: -\begin{lstlisting} -def ::[b >: a](x: b): List[b] = new scala.::(x, this); -\end{lstlisting} -Note that \code{::} is defined for all elements \code{x} of type -\code{B} and lists of type \code{List[A]} such that the type \code{B} -of \code{x} is a supertype of the list's element type \code{A}. The result -is in this case a list of \code{B}'s. This -is expressed by the type parameter \code{b} with lower bound \code{a} -in the signature of \code{::}. - -\paragraph{Concatenating lists} -An operation similar to \code{::} is list concatenation, written -`\code{:::}'. The result of \code{(xs ::: ys)} is a list consisting of -all elements of \code{xs}, followed by all elements of \code{ys}. -Because it ends in a colon, \code{:::} is right-associative and is -considered as a method of its right-hand operand. Therefore, -\begin{lstlisting} -xs ::: ys ::: zs = xs ::: (ys ::: zs) - = zs.:::(ys).:::(xs) -\end{lstlisting} -Here is the implementation of the \code{:::} method: -\begin{lstlisting} - def :::[b >: a](prefix: List[b]): List[b] = prefix match { - case Nil => this - case p :: ps => this.:::(ps).::(p) - } -\end{lstlisting} - -\paragraph{Reversing lists} Another useful operation -is list reversal. There is a method \code{reverse} in \code{List} to -that effect. Let's try to give its implementation: -\begin{lstlisting} -def reverse[a](xs: List[a]): List[a] = xs match { - case Nil => Nil - case x :: xs => reverse(xs) ::: List(x) -} -\end{lstlisting} -This implementation has the advantage of being simple, but it is not -very efficient. Indeed, one concatenation is executed for every -element in the list. List concatenation takes time proportional to the -length of its first operand. Therefore, the complexity of -\code{reverse(xs)} is -\[ -n + (n - 1) + ... + 1 = n(n+1)/2 -\] -where $n$ is the length of \code{xs}. Can \code{reverse} be -implemented more efficiently? We will see later that there exists -another implementation which has only linear complexity. - -\section{Example: Merge sort} - -The insertion sort presented earlier in this chapter is simple to -formulate, but also not very efficient. It's average complexity is -proportional to the square of the length of the input list. We now -design a program to sort the elements of a list which is more -efficient than insertion sort. A good algorithm for this is {\em merge -sort}, which works as follows. - -First, if the list has zero or one elements, it is already sorted, so -one returns the list unchanged. Longer lists are split into two -sub-lists, each containing about half the elements of the original -list. Each sub-list is sorted by a recursive call to the sort -function, and the resulting two sorted lists are then combined in a -merge operation. - -For a general implementation of merge sort, we still have to specify -the type of list elements to be sorted, as well as the function to be -used for the comparison of elements. We obtain a function of maximal -generality by passing these two items as parameters. This leads to the -following implementation. -\begin{lstlisting} -def msort[a](less: (a, a) => boolean)(xs: List[a]): List[a] = { - def merge(xs1: List[a], xs2: List[a]): List[a] = - if (xs1.isEmpty) xs2 - else if (xs2.isEmpty) xs1 - else if (less(xs1.head, xs2.head)) xs1.head :: merge(xs1.tail, xs2) - else xs2.head :: merge(xs1, xs2.tail); - val n = xs.length/2; - if (n == 0) xs - else merge(msort(less)(xs take n), msort(less)(xs drop n)) -} -\end{lstlisting} -The complexity of \code{msort} is $O(N;log(N))$, where $N$ is the -length of the input list. To see why, note that splitting a list in -two and merging two sorted lists each take time proportional to the -length of the argument list(s). Each recursive call of \code{msort} -halves the number of elements in its input, so there are $O(log(N))$ -consecutive recursive calls until the base case of lists of length 1 -is reached. However, for longer lists each call spawns off two -further calls. Adding everything up we obtain that at each of the -$O(log(N))$ call levels, every element of the original lists takes -part in one split operation and in one merge operation. Hence, every -call level has a total cost proportional to $O(N)$. Since there are -$O(log(N))$ call levels, we obtain an overall cost of -$O(N;log(N))$. That cost does not depend on the initial distribution -of elements in the list, so the worst case cost is the same as the -average case cost. This makes merge sort an attractive algorithm for -sorting lists. - -Here is an example how \code{msort} is used. -\begin{lstlisting} -msort(x: int, y: int => x < y)(List(5, 7, 1, 3)) -\end{lstlisting} -The definition of \code{msort} is curried, to make it easy to specialize it with particular -comparison functions. For instance, -\begin{lstlisting} - -val intSort = msort(x: int, y: int => x < y) -val reverseSort = msort(x: int, y: int => x > y) -\end{lstlisting} - -\section{Definition of class List II: Higher-Order Methods} - -The examples encountered so far show that functions over lists often -have similar structures. We can identify several patterns of -computation over lists, like: -\begin{itemize} - \item transforming every element of a list in some way. - \item extracting from a list all elements satisfying a criterion. - \item combine the elements of a list using some operator. -\end{itemize} -Functional programming languages enable programmers to write general -functions which implement patterns like this by means of higher order -functions. We now discuss a set of commonly used higher-order -functions, which are implemented as methods in class \code{List}. - -\paragraph{Mapping over lists} -A common operation is to transform each element of a list and then -return the lists of results. For instance, to scale each element of a -list by a given factor. -\begin{lstlisting} -def scaleList(xs: List[double], factor: double): List[double] = xs match { - case Nil => xs - case x :: xs1 => x * factor :: scaleList(xs1, factor) -} -\end{lstlisting} -This pattern can be generalized to the \code{map} method of class \code{List}: -\begin{lstlisting} -abstract class List[a] { ... - def map[b](f: a => b): List[b] = this match { - case Nil => this - case x :: xs => f(x) :: xs.map(f) - } -\end{lstlisting} -Using \code{map}, \code{scaleList} can be more concisely written as follows. -\begin{lstlisting} -def scaleList(xs: List[double], factor: double) = - xs map (x => x * factor); -\end{lstlisting} - -As another example, consider the problem of returning a given column -of a matrix which is represented as a list of rows, where each row is -again a list. This is done by the following function \code{column}. - -\begin{lstlisting} -def column[a](xs: List[List[a[]], index: int): List[a] = - xs map (row => row at index); -\end{lstlisting} - -Closely related to \code{map} is the \code{foreach} method, which -applies a given function to all elements of a list, but does not -construct a list of results. The function is thus applied only for its -side effect. \code{foreach} is defined as follows. -\begin{lstlisting} - def foreach(f: a => unit): unit = this match { - case Nil => () - case x :: xs => f(x) ; xs.foreach(f) - } -\end{lstlisting} -This function can be used for printing all elements of a list, for instance: -\begin{lstlisting} - xs foreach (x => System.out.println(x)) -\end{lstlisting} - -\begin{exercise} Consider a function which squares all elements of a list and -returns a list with the results. Complete the following two equivalent -definitions of \code{squareList}. - -\begin{lstlisting} -def squareList(xs: List[int]): List[int] = xs match { - case List() => ?? - case y :: ys => ?? -} -def squareList(xs: List[int]): List[int] = - xs map ?? -\end{lstlisting} -\end{exercise} - -\paragraph{Filtering Lists} -Another common operation selects from a list all elements fulfilling a -given criterion. For instance, to return a list of all positive -elements in some given lists of integers: -\begin{lstlisting} -def posElems(xs: List[int]): List[int] = xs match { - case Nil => xs - case x :: xs1 => if (x > 0) x :: posElems(xs1) else posElems(xs1) -} -\end{lstlisting} -This pattern is generalized to the \code{filter} method of class \code{List}: -\begin{lstlisting} - def filter(p: a => boolean): List[a] = this match { - case Nil => this - case x :: xs => if (p(x)) x :: xs.filter(p) else xs.filter(p) - } -\end{lstlisting} -Using \code{filter}, \code{posElems} can be more concisely written as -follows. -\begin{lstlisting} -def posElems(xs: List[int]): List[int] = - xs filter (x => x > 0); -\end{lstlisting} - -An operation related to filtering is testing whether all elements of a -list satisfy a certain condition. Dually, one might also be interested -in the question whether there exists an element in a list that -satisfies a certain condition. These operations are embodied in the -higher-order functions \code{forall} and \code{exists} of class -\code{List}. -\begin{lstlisting} -def forall(p: a => Boolean): Boolean = - isEmpty || (p(head) && (tail forall p)); -def exists(p: a => Boolean): Boolean = - !isEmpty && (p(head) || (tail exists p)); -\end{lstlisting} -To illustrate the use of \code{forall}, consider the question whether -a number if prime. Remember that a number $n$ is prime of it can be -divided without remainder only by one and itself. The most direct -translation of this definition would test that $n$ divided by all -numbers from 2 up to and excluding itself gives a non-zero -remainder. This list of numbers can be generated using a function -\code{List.range} which is defined in object \code{List} as follows. -\begin{lstlisting} -package scala; -object List { ... - def range(from: int, end: int): List[int] = - if (from >= end) Nil else from :: range(from + 1, end); -\end{lstlisting} -For example, \code{List.range(2, n)} -generates the list of all integers from 2 up to and excluding $n$. -The function \code{isPrime} can now simply be defined as follows. -\begin{lstlisting} -def isPrime(n: int) = - List.range(2, n) forall (x => n % x != 0); -\end{lstlisting} -We see that the mathematical definition of prime-ness has been -translated directly into Scala code. - -Exercise: Define \code{forall} and \code{exists} in terms of \code{filter}. - - -\paragraph{Folding and Reducing Lists} -Another common operation is to combine the elements of a list with -some operator. For instance: -\begin{lstlisting} -sum(List(x$_1$, ..., x$_n$)) = 0 + x$_1$ + ... + x$_n$ -product(List(x$_1$, ..., x$_n$)) = 1 * x$_1$ * ... * x$_n$ -\end{lstlisting} -Of course, we can implement both functions with a -recursive scheme: -\begin{lstlisting} -def sum(xs: List[int]): int = xs match { - case Nil => 0 - case y :: ys => y + sum(ys) -} -def product(xs: List[int]): int = xs match { - case Nil => 1 - case y :: ys => y * product(ys) -} -\end{lstlisting} -But we can also use the generalization of this program scheme embodied -in the \code{reduceLeft} method of class \code{List}. This method -inserts a given binary operator between adjacent elements of a given list. -E.g.\ -\begin{lstlisting} -List(x$_1$, ..., x$_n$).reduceLeft(op) = (...(x$_1$ op x$_2$) op ... ) op x$_n$ -\end{lstlisting} -Using \code{reduceLeft}, we can make the common pattern -in \code{sum} and \code{product} apparent: -\begin{lstlisting} -def sum(xs: List[int]) = (0 :: xs) reduceLeft {(x, y) => x + y}; -def product(xs: List[int]) = (1 :: xs) reduceLeft {(x, y) => x * y}; -\end{lstlisting} -Here is the implementation of \code{reduceLeft}. -\begin{lstlisting} - def reduceLeft(op: (a, a) => a): a = this match { - case Nil => throw new Error("Nil.reduceLeft") - case x :: xs => (xs foldLeft x)(op) - } - def foldLeft[b](z: b)(op: (b, a) => b): b = this match { - case Nil => z - case x :: xs => (xs foldLeft op(z, x))(op) - } -} -\end{lstlisting} -We see that the \code{reduceLeft} method is defined in terms of -another generally useful method, \code{foldLeft}. The latter takes as -additional parameter an {\em accumulator} \code{z}, which is returned -when \code{foldLeft} is applied on an empty list. That is, -\begin{lstlisting} -(List(x$_1$, ..., x$_n$) foldLeft z)(op) = (...(z op x$_1$) op ... ) op x$_n$ -\end{lstlisting} -The \code{sum} and \code{product} methods can be defined alternatively -using \code{foldLeft}: -\begin{lstlisting} -def sum(xs: List[int]) = (xs foldLeft 0) {(x, y) => x + y}; -def product(xs: List[int]) = (xs foldLeft 1) {(x, y) => x * y}; -\end{lstlisting} - -\paragraph{FoldRight and ReduceRight} -Applications of \code{foldLeft} and \code{reduceLeft} expand to -left-leaning trees. \todo{insert pictures}. They have duals -\code{foldRight} and \code{reduceRight}, which produce right-leaning -trees. -\begin{lstlisting} -List(x$_1$, ..., x$_n$).reduceRight(op) = x$_1$ op ( ... (x$_{n-1}$ op x$_n$)...) -(List(x$_1$, ..., x$_n$) foldRight acc)(op) = x$_1$ op ( ... (x$_n$ op acc)...) -\end{lstlisting} -These are defined as follows. -\begin{lstlisting} - def reduceRight(op: (a, a) => a): a = match - case Nil => throw new Error("Nil.reduceRight") - case x :: Nil => x - case x :: xs => op(x, xs.reduceRight(op)) - } - def foldRight[b](z: b)(op: (a, b) => b): b = match { - case Nil => z - case x :: xs => op(x, (xs foldRight z)(op)) - } -\end{lstlisting} - -Class \code{List} defines also two symbolic abbreviations for -\code{foldLeft} and \code{foldRight}: -\begin{lstlisting} - def /:[b](z: b)(f: (b, a) => b): b = foldLeft(z)(f); - def :\[b](z: b)(f: (a, b) => b): b = foldRight(z)(f); -\end{lstlisting} -The method names picture the left/right leaning trees of the fold -operations by forward or backward slashes. The \code{:} points in each -case to the list argument whereas the end of the slash points to the -accumulator (or: zero) argument \code{z}. -That is, -\begin{lstlisting} -(z /: List(x$_1$, ..., x$_n$))(op) = (...(z op x$_1$) op ... ) op x$_n$ -(List(x$_1$, ..., x$_n$) :\ z)(op) = x$_1$ op ( ... (x$_n$ op acc)...) -\end{lstlisting} -For associative and commutative operators, \code{/:} and -\code{:\\} are equivalent (even though there may be a difference -in efficiency). -%But sometimes, only one of the two operators is -%appropriate or has the right type: - -\begin{exercise} Consider the problem of writing a function \code{flatten}, -which takes a list of element lists as arguments. The result of -\code{flatten} should be the concatenation of all element lists into a -single list. Here is the an implementation of this method in terms of -\code{:\\}. -\begin{lstlisting} -def flatten[a](xs: List[List[a]]): List[a] = - (xs :\ (Nil: List[a])) {(x, xs) => x ::: xs}; -\end{lstlisting} -Consider replacing the first part of the body of \lstinline@flatten@ -by \lstinline@(Nil /: xs)@. What would be the difference in asymptotoc -complexity between the two versions of \lstinline@flatten@? - -In fact \code{flatten} is predefined together with a set of other -userful function in an object called \code{List} in the standatd Scala -library. It can be accessed from user program by calling -\code{List.flatten}. Note that \code{flatten} is not a method of class -\code{List} -- it would not make sense there, since it applies only -to lists of lists, not to all lists in general. -\end{exercise} - -\paragraph{List Reversal Again} We have seen in -Section~\ref{sec:list-first-order} an implementation of method -\code{reverse} whose run-time was quadratic in the length of the list -to be reversed. We now develop a new implementation of \code{reverse}, -which has linear cost. The idea is to use a \code{foldLeft} -operation based on the following program scheme. -\begin{lstlisting} -class List[+a] { ... - def reverse: List[a] = (z? /: this)(op?); -\end{lstlisting} -It only remains to fill in the \code{z?} and \code{op?} parts. Let's -try to deduce them from examples. -\begin{lstlisting} - Nil -= Nil.reverse // by specification -= (z /: Nil)(op) // by the template for reverse -= (Nil foldLeft z)(op) // by the definition of /: -= z // by definition of foldLeft -\end{lstlisting} -Hence, \code{z?} must be \code{Nil}. To deduce the second operand, -let's study reversal of a list of length one. -\begin{lstlisting} - List(x) -= List(x).reverse // by specification -= (Nil /: List(x))(op) // by the template for reverse, with z = Nil -= (List(x) foldLeft Nil)(op) // by the definition of /: -= op(Nil, x) // by definition of foldLeft -\end{lstlisting} -Hence, \code{op(Nil, x)} equals \code{List(x)}, which is the same -as \code{x :: Nil}. This suggests to take as \code{op} the -\code{::} operator with its operands exchanged. Hence, we arrive at -the following implementation for \code{reverse}, which has linear complexity. -\begin{lstlisting} -def reverse: List[a] = - ((Nil: List[a]) /: this) {(xs, x) => x :: xs}; -\end{lstlisting} -(Remark: The type annotation of \code{Nil} is necessary -to make the type inferencer work.) - -\begin{exercise} Fill in the missing expressions to complete the following -definitions of some basic list-manipulation operations as fold -operations. -\begin{lstlisting} -def mapFun[a, b](xs: List[a], f: a => b): List[b] = - (xs :\ List[b]()){ ?? }; - -def lengthFun[a](xs: List[a]): int = - (0 /: xs){ ?? }; -\end{lstlisting} -\end{exercise} - -\paragraph{Nested Mappings} - -We can employ higher-order list processing functions to express many -computations that are normally expressed as nested loops in imperative -languages. - -As an example, consider the following problem: Given a positive -integer $n$, find all pairs of positive integers $i$ and $j$, where -$1 \leq j < i < n$ such that $i + j$ is prime. For instance, if $n = 7$, -the pairs are -\bda{c|lllllll} -i & 2 & 3 & 4 & 4 & 5 & 6 & 6\\ -j & 1 & 2 & 1 & 3 & 2 & 1 & 5\\ \hline -i + j & 3 & 5 & 5 & 7 & 7 & 7 & 11 -\eda - -A natural way to solve this problem consists of two steps. In a first step, -one generates the sequence of all pairs $(i, j)$ of integers such that -$1 \leq j < i < n$. In a second step one then filters from this sequence -all pairs $(i, j)$ such that $i + j$ is prime. - -Looking at the first step in more detail, a natural way to generate -the sequence of pairs consists of three sub-steps. First, generate -all integers between $1$ and $n$ for $i$. -\item -Second, for each integer $i$ between $1$ and $n$, generate the list of -pairs $(i, 1)$ up to $(i, i-1)$. This can be achieved by a -combination of \code{range} and \code{map}: -\begin{lstlisting} - List.range(1, i) map (x => Pair(i, x)) -\end{lstlisting} -Finally, combine all sublists using \code{foldRight} with \code{:::}. -Putting everything together gives the following expression: -\begin{lstlisting} -List.range(1, n) - .map(i => List.range(1, i).map(x => Pair(i, x))) - .foldRight(List[Pair[int, int]]()) {(xs, ys) => xs ::: ys} - .filter(pair => isPrime(pair._1 + pair._2)) -\end{lstlisting} - -\paragraph{Flattening Maps} -The combination of mapping and then concatenating sublists -resulting from the map -is so common that we there is a special method -for it in class \code{List}: -\begin{lstlisting} -abstract class List[+a] { ... - def flatMap[b](f: a => List[b]): List[b] = match { - case Nil => Nil - case x :: xs => f(x) ::: (xs flatMap f) - } -} -\end{lstlisting} -With \code{flatMap}, the pairs-whose-sum-is-prime expression -could have been written more concisely as follows. -\begin{lstlisting} -List.range(1, n) - .flatMap(i => List.range(1, i).map(x => Pair(i, x))) - .filter(pair => isPrime(pair._1 + pair._2)) -\end{lstlisting} - - - -\section{Summary} - -This chapter has ingtroduced lists as a fundamental data structure in -programming. Since lists are immutable, they are a common data type in -functional programming languages. They play there a role comparable to -arrays in imperative languages. However, the access patterns between -arrays and lists are quite different. Where array accessing is always -done by indexing, this is much less common for lists. We have seen -that \code{scala.List} defines a method called \code{apply} for indexing; -however this operation is much more costly than in the case of arrays -(linear as opposed to constant time). Instead of indexing, lists are -usually traversed recursively, where recursion steps are usually based -on a pattern match over the traversed list. There is also a rich set of -higher-order combinators which allow one to instantiate a set of -predefined patterns of computations over lists. - -\comment{ -\bsh{Reasoning About Lists} - -Recall the concatenation operation for lists: - -\begin{lstlisting} -class List[+a] { - ... - def ::: (that: List[a]): List[a] = - if (isEmpty) that - else head :: (tail ::: that) -} -\end{lstlisting} - -We would like to verify that concatenation is associative, with the -empty list \code{List()} as left and right identity: -\bda{lcl} - (xs ::: ys) ::: zs &=& xs ::: (ys ::: zs) \\ - xs ::: List() &=& xs \gap =\ List() ::: xs -\eda -\emph{Q}: How can we prove statements like the one above? - -\emph{A}: By \emph{structural induction} over lists. -\es -\bsh{Reminder: Natural Induction} - -Recall the proof principle of \emph{natural induction}: - -To show a property \mathtext{P(n)} for all numbers \mathtext{n \geq b}: -\be -\item Show that \mathtext{P(b)} holds (\emph{base case}). -\item For arbitrary \mathtext{n \geq b} show: -\begin{quote} - if \mathtext{P(n)} holds, then \mathtext{P(n+1)} holds as well -\end{quote} -(\emph{induction step}). -\ee -%\es\bs -\emph{Example}: Given -\begin{lstlisting} -def factorial(n: int): int = - if (n == 0) 1 - else n * factorial(n-1) -\end{lstlisting} -show that, for all \code{n >= 4}, -\begin{lstlisting} - factorial(n) >= 2$^n$ -\end{lstlisting} -\es\bs -\Case{\code{4}} -is established by simple calculation of \code{factorial(4) = 24} and \code{2$^4$ = 16}. - -\Case{\code{n+1}} -We have for \code{n >= 4}: -\begin{lstlisting} - \= factorial(n + 1) - = \> $\expl{by the second clause of factorial(*)}$ - \> (n + 1) * factorial(n) - >= \> $\expl{by calculation}$ - \> 2 * factorial(n) - >= \> $\expl{by the induction hypothesis}$ - \> 2 * 2$^n$. -\end{lstlisting} -Note that in our proof we can freely apply reduction steps such as in (*) -anywhere in a term. - - -This works because purely functional programs do not have side -effects; so a term is equivalent to the term it reduces to. - -The principle is called {\em\emph{referential transparency}}. -\es -\bsh{Structural Induction} - -The principle of structural induction is analogous to natural induction: - -In the case of lists, it is as follows: - -To prove a property \mathtext{P(xs)} for all lists \mathtext{xs}, -\be -\item Show that \code{P(List())} holds (\emph{base case}). -\item For arbitrary lists \mathtext{xs} and elements \mathtext{x} - show: -\begin{quote} - if \mathtext{P(xs)} holds, then \mathtext{P(x :: xs)} holds as well -\end{quote} -(\emph{induction step}). -\ee - -\es -\bsh{Example} - -We show \code{(xs ::: ys) ::: zs = xs ::: (ys ::: zs)} by structural induction -on \code{xs}. - -\Case{\code{List()}} -For the left-hand side, we have: -\begin{lstlisting} - \= (List() ::: ys) ::: zs - = \> $\expl{by first clause of \prog{:::}}$ - \> ys ::: zs -\end{lstlisting} -For the right-hand side, we have: -\begin{lstlisting} - \= List() ::: (ys ::: zs) - = \> $\expl{by first clause of \prog{:::}}$ - \> ys ::: zs -\end{lstlisting} -So the case is established. - -\es -\bs -\Case{\code{x :: xs}} - -For the left-hand side, we have: -\begin{lstlisting} - \= ((x :: xs) ::: ys) ::: zs - = \> $\expl{by second clause of \prog{:::}}$ - \> (x :: (xs ::: ys)) ::: zs - = \> $\expl{by second clause of \prog{:::}}$ - \> x :: ((xs ::: ys) ::: zs) - = \> $\expl{by the induction hypothesis}$ - \> x :: (xs ::: (ys ::: zs)) -\end{lstlisting} - -For the right-hand side, we have: -\begin{lstlisting} - \= (x :: xs) ::: (ys ::: zs) - = \> $\expl{by second clause of \prog{:::}}$ - \> x :: (xs ::: (ys ::: zs)) -\end{lstlisting} -So the case (and with it the property) is established. - -\begin{exercise} -Show by induction on \code{xs} that \code{xs ::: List() = xs}. -\es -\bsh{Example (2)} -\end{exercise} - -As a more difficult example, consider function -\begin{lstlisting} -abstract class List[a] { ... - def reverse: List[a] = match { - case List() => List() - case x :: xs => xs.reverse ::: List(x) - } -} -\end{lstlisting} -We would like to prove the proposition that -\begin{lstlisting} - xs.reverse.reverse = xs . -\end{lstlisting} -We proceed by induction over \code{xs}. The base case is easy to establish: -\begin{lstlisting} - \= List().reverse.reverse - = \> $\expl{by first clause of \prog{reverse}}$ - \> List().reverse - = \> $\expl{by first clause of \prog{reverse}}$ - \> List() -\end{lstlisting} -\es\bs -For the induction step, we try: -\begin{lstlisting} - \= (x :: xs).reverse.reverse - = \> $\expl{by second clause of \prog{reverse}}$ - \> (xs.reverse ::: List(x)).reverse -\end{lstlisting} -There's nothing more we can do to this expression, so we turn to the right side: -\begin{lstlisting} - \= x :: xs - = \> $\expl{by induction hypothesis}$ - \> x :: xs.reverse.reverse -\end{lstlisting} -The two sides have simplified to different expressions. - -So we still have to show that -\begin{lstlisting} - (xs.reverse ::: List(x)).reverse = x :: xs.reverse.reverse -\end{lstlisting} -Trying to prove this directly by induction does not work. - -Instead we have to {\em generalize} the equation to: -\begin{lstlisting} - (ys ::: List(x)).reverse = x :: ys.reverse -\end{lstlisting} -\es\bs -This equation can be proved by a second induction argument over \code{ys}. -(See blackboard). - -\begin{exercise} -Is it the case that \code{(xs drop m) at n = xs at (m + n)} for all -natural numbers \code{m}, \code{n} and all lists \code{xs}? -\end{exercise} - -\es -\bsh{Structural Induction on Trees} - -Structural induction is not restricted to lists; it works for arbitrary -trees. - -The general induction principle is as follows. - -To show that property \code{P(t)} holds for all trees of a certain type, -\begin{itemize} -\item Show \code{P(l)} for all leaf trees \code{$l$}. -\item For every interior node \code{t} with subtrees \code{s$_1$, ..., s$_n$}, - show that \code{P(s$_1$) $\wedge$ ... $\wedge$ P(s$_n$) => P(t)}. -\end{itemize} - -\example Recall our definition of \code{IntSet} with -operations \code{contains} and \code{incl}: - -\begin{lstlisting} -abstract class IntSet { - abstract def incl(x: int): IntSet - abstract def contains(x: int): boolean -} -\end{lstlisting} -\es\bs -\begin{lstlisting} -case class Empty extends IntSet { - def contains(x: int): boolean = false - def incl(x: int): IntSet = NonEmpty(x, Empty, Empty) -} -case class NonEmpty(elem: int, left: Set, right: Set) extends IntSet { - def contains(x: int): boolean = - if (x < elem) left contains x - else if (x > elem) right contains x - else true - def incl(x: int): IntSet = - if (x < elem) NonEmpty(elem, left incl x, right) - else if (x > elem) NonEmpty(elem, left, right incl x) - else this -} -\end{lstlisting} -(With \code{case} added, so that we can use factory methods instead of \code{new}). - -What does it mean to prove the correctness of this implementation? -\es -\bsh{Laws of IntSet} - -One way to state and prove the correctness of an implementation is -to prove laws that hold for it. - -In the case of \code{IntSet}, three such laws would be: - -For all sets \code{s}, elements \code{x}, \code{y}: - -\begin{lstlisting} -Empty contains x \= = false -(s incl x) contains x \> = true -(s incl x) contains y \> = s contains y if x $\neq$ y -\end{lstlisting} - -(In fact, one can show that these laws characterize the desired data -type completely). - -How can we establish that these laws hold? - -\emph{Proposition 1}: \code{Empty contains x = false}. - -\emph{Proof}: By the definition of \code{contains} in \code{Empty}. -\es\bs -\emph{Proposition 2}: \code{(xs incl x) contains x = true} - -\emph{Proof:} - -\Case{\code{Empty}} -\begin{lstlisting} - \= (Empty incl x) contains x - = \> $\expl{by definition of \prog{incl} in \prog{Empty}}$ - \> NonEmpty(x, Empty, Empty) contains x - = \> $\expl{by definition of \prog{contains} in \prog{NonEmpty}}$ - \> true -\end{lstlisting} - -\Case{\code{NonEmpty(x, l, r)}} -\begin{lstlisting} - \= (NonEmpty(x, l, r) incl x) contains x - = \> $\expl{by definition of \prog{incl} in \prog{NonEmpty}}$ - \> NonEmpty(x, l, r) contains x - = \> $\expl{by definition of \prog{contains} in \prog{Empty}}$ - \> true -\end{lstlisting} -\es\bs -\Case{\code{NonEmpty(y, l, r)} where \code{y < x}} -\begin{lstlisting} - \= (NonEmpty(y, l, r) incl x) contains x - = \> $\expl{by definition of \prog{incl} in \prog{NonEmpty}}$ - \> NonEmpty(y, l, r incl x) contains x - = \> $\expl{by definition of \prog{contains} in \prog{NonEmpty}}$ - \> (r incl x) contains x - = \> $\expl{by the induction hypothesis}$ - \> true -\end{lstlisting} - -\Case{\code{NonEmpty(y, l, r)} where \code{y > x}} is analogous. - -\bigskip - -\emph{Proposition 3}: If \code{x $\neq$ y} then -\code{xs incl y contains x = xs contains x}. - -\emph{Proof:} See blackboard. -\es -\bsh{Exercise} - -Say we add a \code{union} function to \code{IntSet}: - -\begin{lstlisting} -class IntSet { ... - def union(other: IntSet): IntSet -} -class Expty extends IntSet { ... - def union(other: IntSet) = other -} -class NonEmpty(x: int, l: IntSet, r: IntSet) extends IntSet { ... - def union(other: IntSet): IntSet = l union r union other incl x -} -\end{lstlisting} - -The correctness of \code{union} can be subsumed with the following -law: - -\emph{Proposition 4}: -\code{(xs union ys) contains x = xs contains x || ys contains x}. -Is that true ? What hypothesis is missing ? Show a counterexample. - -Show Proposition 4 using structural induction on \code{xs}. -\es -\comment{ - -\emph{Proof:} By induction on \code{xs}. - -\Case{\code{Empty}} - -\Case{\code{NonEmpty(x, l, r)}} - -\Case{\code{NonEmpty(y, l, r)} where \code{y < x}} - -\begin{lstlisting} - \= (Empty union ys) contains x - = \> $\expl{by definition of \prog{union} in \prog{Empty}}$ - \> ys contains x - = \> $\expl{Boolean algebra}$ - \> false || ys contains x - = \> $\expl{by definition of \prog{contains} in \prog{Empty} (reverse)}$ - \> (Empty contains x) || (ys contains x) -\end{lstlisting} - -\begin{lstlisting} - \= (NonEmpty(x, l, r) union ys) contains x - = \> $\expl{by definition of \prog{union} in \prog{NonEmpty}}$ - \> (l union r union ys incl x) contains x - = \> $\expl{by Proposition 2}$ - \> true - = \> $\expl{Boolean algebra}$ - \> true || (ys contains x) - = \> $\expl{by definition of \prog{contains} in \prog{NonEmpty} (reverse)}$ - \> (NonEmpty(x, l, r) contains x) || (ys contains x) -\end{lstlisting} - -\begin{lstlisting} - \= (NonEmpty(y, l, r) union ys) contains x - = \> $\expl{by definition of \prog{union} in \prog{NonEmpty}}$ - \> (l union r union ys incl y) contains x - = \> $\expl{by Proposition 3}$ - \> (l union r union ys) contains x - = \> $\expl{by the induction hypothesis}$ - \> ((l union r) contains x) || (ys contains x) - = \> $\expl{by Proposition 3}$ - \> ((l union r incl y) contains x) || (ys contains x) -\end{lstlisting} - -\Case{\code{NonEmpty(y, l, r)} where \code{y < x}} - ... is analogous. - -\es -}} -\chapter{\label{sec:for-notation}For-Comprehensions} - -The last chapter demonstrated that higher-order functions such as -\verb@map@, \verb@flatMap@, \verb@filter@ provide powerful -constructions for dealing with lists. But sometimes the level of -abstraction required by these functions makes a program hard to -understand. - -To help understandability, Scala has a special notation which -simplifies common patterns of applications of higher-order functions. -This notation builds a bridge between set-comprehensions in -mathematics and for-loops in imperative languages such as C or -Java. It also closely resembles the query notation of relational -databases. - -As a first example, say we are given a list \code{persons} of persons -with \code{name} and \code{age} fields. To print the names of all -persons in the sequence which are aged over 20, one can write: -\begin{lstlisting} -for (val p <- persons; p.age > 20) yield p.name -\end{lstlisting} -This is equivalent to the following expression , which uses -higher-order functions \code{filter} and \code{map}: -\begin{lstlisting} -persons filter (p => p.age > 20) map (p => p.name) -\end{lstlisting} -The for-comprehension looks a bit like a for-loop in imperative languages, -except that it constructs a list of the results of all iterations. - -Generally, a for-comprehension is of the form -\begin{lstlisting} -for ( $s$ ) yield $e$ -\end{lstlisting} -Here, $s$ is a sequence of {\em generators} and {\em filters}. A {\em -generator} is of the form \code{val x <- e}, where \code{e} is a -list-valued expression. It binds \code{x} to successive values in the -list. A {\em filter} is an expression \code{f} of type -\code{boolean}. It omits from consideration all bindings for which -\code{f} is \code{false}. The sequence $s$ starts in each case with a -generator. If there are several generators in a sequence, later -generators vary more rapidly than earlier ones. - -Here are two examples that show how for-comprehensions are used. -First, let's redo an example of the previous chapter: Given a positive -integer $n$, find all pairs of positive integers $i$ and $j$, where $1 -\leq j < i < n$ such that $i + j$ is prime. With a for-comprehension -this problem is solved as follows: -\begin{lstlisting} -for (val i <- List.range(1, n); - val j <- List.range(1, i); - isPrime(i+j)) yield Pair(i, j) -\end{lstlisting} -This is arguably much clearer than the solution using \code{map}, -\code{flatMap} and \code{filter} that we have developed previously. - -As a second example, consider computing the scalar product of two -vectors \code{xs} and \code{ys}. Using a for-comprehension, this can -be written as follows. -\begin{lstlisting} - sum (for(val (x, y) <- xs zip ys) yield x * y) -\end{lstlisting} - -\section{The N-Queens Problem} - -For-comprehensions are especially useful for solving combinatorial -puzzles. An example of such a puzzle is the 8-queens problem: Given a -standard chess-board, place 8 queens such that no queen is in check from any -other (a queen can check another piece if they are on the same -column, row, or diagonal). We will now develop a solution to this -problem, generalizing it to chess-boards of arbitrary size. Hence, the -problem is to place $n$ queens on a chess-board of size $n \times n$. - -To solve this problem, note that we need to place a queen in each row. -So we could place queens in successive rows, each time checking that a -newly placed queen is not in check from any other queens that have -already been placed. In the course of this search, it might arrive -that a queen to be placed in row $k$ would be in check in all fields -of that row from queens in row $1$ to $k-1$. In that case, we need to -abort that part of the search in order to continue with a different -configuration of queens in columns $1$ to $k-1$. - -This suggests a recursive algorithm. Assume that we have already -generated all solutions of placing $k-1$ queens on a board of size $n -\times n$. We can represent each such solution by a list of length -$k-1$ of column numbers (which can range from $1$ to $n$). We treat -these partial solution lists as stacks, where the column number of the -queen in row $k-1$ comes first in the list, followed by the column -number of the queen in row $k-2$, etc. The bottom of the stack is the -column number of the queen placed in the first row of the board. All -solutions together are then represented as a list of lists, with one -element for each solution. - -Now, to place the $k$'the queen, we generate all possible extensions -of each previous solution by one more queen. This yields another list -of solution lists, this time of length $k$. We continue the process -until we have reached solutions of the size of the chess-board $n$. -This algorithmic idea is embodied in function \code{placeQueens} below: -\begin{lstlisting} -def queens(n: int): List[List[int]] = { - def placeQueens(k: int): List[List[int]] = - if (k == 0) List(List()) - else for (val queens <- placeQueens(k - 1); - val column <- List.range(1, n + 1); - isSafe(column, queens, 1)) yield col :: queens; - placeQueens(n); -} -\end{lstlisting} - -\begin{exercise} Write the function -\begin{lstlisting} - def isSafe(col: int, queens: List[int], delta: int): boolean -\end{lstlisting} -which tests whether a queen in the given column \verb@col@ is safe with -respect to the \verb@queens@ already placed. Here, \verb@delta@ is the difference between the row of the queen to be -placed and the row of the first queen in the list. -\end{exercise} - -\section{Querying with For-Comprehensions} - -The for-notation is essentially equivalent to common operations of -database query languages. For instance, say we are given a -database \code{books}, represented as a list of books, where -\code{Book} is defined as follows. -\begin{lstlisting} -case class Book(title: String, authors: List[String]); -\end{lstlisting} -Here is a small example database: -\begin{lstlisting} -val books: List[Book] = List( - Book("Structure and Interpretation of Computer Programs", - List("Abelson, Harold", "Sussman, Gerald J.")), - Book("Principles of Compiler Design", - List("Aho, Alfred", "Ullman, Jeffrey")), - Book("Programming in Modula-2", - List("Wirth, Niklaus")), - Book("Introduction to Functional Programming"), - List("Bird, Richard")), - Book("The Java Language Specification", - List("Gosling, James", "Joy, Bill", "Steele, Guy", "Bracha, Gilad"))); -\end{lstlisting} -Then, to find the titles of all books whose author's last name is ``Ullman'': -\begin{lstlisting} -for (val b <- books; val a <- b.authors; a startsWith "Ullman") -yield b.title -\end{lstlisting} -(Here, \code{startsWith} is a method in \code{java.lang.String}). Or, -to find the titles of all books that have the string ``Program'' in -their title: -\begin{lstlisting} -for (val b <- books; (b.title indexOf "Program") >= 0) -yield b.title -\end{lstlisting} -Or, to find the names of all authors that have written at least two -books in the database. -\begin{lstlisting} -for (val b1 <- books; val b2 <- books; b1 != b2; - val a1 <- b1.authors; val a2 <- b2.authors; a1 == a2) -yield a1 -\end{lstlisting} -The last solution is not yet perfect, because authors will appear -several times in the list of results. We still need to remove -duplicate authors from result lists. This can be achieved with the -following function. -\begin{lstlisting} -def removeDuplicates[a](xs: List[a]): List[a] = - if (xs.isEmpty) xs - else xs.head :: removeDuplicates(xs.tail filter (x => x != xs.head)); -\end{lstlisting} -Note that the last expression in method \code{removeDuplicates} -can be equivalently expressed using a for-comprehension. -\begin{lstlisting} -xs.head :: removeDuplicates(for (val x <- xs.tail; x != xs.head) yield x) -\end{lstlisting} - -\section{Translation of For-Comprehensions} - -Every for-comprehension can be expressed in terms of the three -higher-order functions \code{map}, \code{flatMap} and \code{filter}. -Here is the translation scheme, which is also used by the Scala compiler. -\begin{itemize} -\item -A simple for-comprehension -\begin{lstlisting} -for (val x <- e) yield e' -\end{lstlisting} -is translated to -\begin{lstlisting} -e.map(x => e') -\end{lstlisting} -\item -A for-comprehension -\begin{lstlisting} -for (val x <- e; f; s) yield e' -\end{lstlisting} -where \code{f} is a filter and \code{s} is a (possibly empty) -sequence of generators or filters -is translated to -\begin{lstlisting} -for (val x <- e.filter(x => f); s) yield e' -\end{lstlisting} -and then translation continues with the latter expression. -\item -A for-comprehension -\begin{lstlisting} -for (val x <- e; y <- e'; s) yield e'' -\end{lstlisting} -where \code{s} is a (possibly empty) -sequence of generators or filters -is translated to -\begin{lstlisting} -e.flatMap(x => for (y <- e'; s) yield e'') -\end{lstlisting} -and then translation continues with the latter expression. -\end{itemize} -For instance, taking our "pairs of integers whose sum is prime" example: -\begin{lstlisting} -for ( val i <- range(1, n); - val j <- range(1, i); - isPrime(i+j) -) yield (i, j) -\end{lstlisting} -Here is what we get when we translate this expression: -\begin{lstlisting} -range(1, n) - .flatMap(i => - range(1, i) - .filter(j => isPrime(i+j)) - .map(j => (i, j))) -\end{lstlisting} - -Conversely, it would also be possible to express functions \code{map}, -\code{flatMap}{ and \code{filter} using for-comprehensions. Here are the -three functions again, this time implemented using for-comprehensions. -\begin{lstlisting} -object Demo { - def map[a, b](xs: List[a], f: a => b): List[b] = - for (val x <- xs) yield f(x); - - def flatMap[a, b](xs: List[a], f: a => List[b]): List[b] = - for (val x <- xs; val y <- f(x)) yield y; - - def filter[a](xs: List[a], p: a => boolean): List[a] = - for (val x <- xs; p(x)) yield x; -} -\end{lstlisting} -Not surprisingly, the translation of the for-comprehension in the body of -\code{Demo.map} will produce a call to \code{map} in class \code{List}. -Similarly, \code{Demo.flatMap} and \code{Demo.filter} translate to -\code{flatMap} and \code{filter} in class \code{List}. - -\begin{exercise} -Define the following function in terms of \code{for}. -\begin{lstlisting} -def flatten(xss: List[List[a]]): List[a] = - (xss :\ List()) ((xs, ys) => xs ::: ys) -\end{lstlisting} -\end{exercise} - -\begin{exercise} -Translate -\begin{lstlisting} -for ( val b <- books; val a <- b.authors; a startsWith "Bird" ) yield b.title -for ( val b <- books; (b.title indexOf "Program") >= 0 ) yield b.title -\end{lstlisting} -to higher-order functions. -\end{exercise} - -\section{For-Loops}\label{sec:for-loops} - -For-comprehensions resemble for-loops in imperative languages, except -that they produce a list of results. Sometimes, a list of results is -not needed but we would still like the flexibility of generators and -filters in iterations over lists. This is made possible by a variant -of the for-comprehension syntax, which expresses for-loops: -\begin{lstlisting} -for ( $s$ ) $e$ -\end{lstlisting} -This construct is the same as the standard for-comprehension syntax -except that the keyword \code{yield} is missing. The for-loop is -executed by executing the expression $e$ for each element generated -from the sequence of generators and filters $s$. - -As an example, the following expression prints out all elements of a -matrix represented as a list of lists: - \begin{lstlisting} -for (xs <- xss) { - for (x <- xs) System.out.print(x + "\t") - System.out.println() -} -\end{lstlisting} -The translation of for-loops to higher-order methods of class -\code{List} is similar to the translation of for-comprehensions, but -is simpler. Where for-comprehensions translate to \code{map} and -\code{flatMap}, for-loops translate in each case to \code{foreach}. - -\section{Generalizing For} - -We have seen that the translation of for-comprehensions only relies on -the presence of methods \code{map}, \code{flatMap}, and -\code{filter}. Therefore it is possible to apply the same notation to -generators that produce objects other than lists; these objects only -have to support the three key functions \code{map}, \code{flatMap}, -and \code{filter}. - -The standard Scala library has several other abstractions that support -these three methods and with them support for-comprehensions. We will -encounter some of them in the following chapters. As a programmer you -can also use this principle to enable for-comprehensions for types you -define -- these types just need to support methods \code{map}, -\code{flatMap}, and \code{filter}. - -There are many examples where this is useful: Examples are database -interfaces, XML trees, or optional values. We will see in -Chapter~\ref{sec:parsers-results} how for-comprehensions can be used -in the definition of parsers for context-free grammars that construct -abstract syntax trees. - -One caveat: It is not assured automatically that the result -translating a for-comprehension is well-typed. To ensure this, the -types of \code{map}, \code{flatMap} and \code{filter} have to be -essentially similar to the types of these methods in class \code{List}. - -To make this precise, assume you have a parameterized class - \code{C[a]} for which you want to enable for-comprehensions. Then - \code{C} should define \code{map}, \code{flatMap} and \code{filter} - with the following types: -\begin{lstlisting} -def map[b](f: a => b): C[b] -def flatMap[b](f: a => C[b]): C[b] -def filter(p: a => boolean): C[a] -\end{lstlisting} -It would be attractive to enforce these types statically in the Scala -compiler, for instance by requiring that any type supporting -for-comprehensions implements a standard trait with these methods -\footnote{In the programming language Haskell, which has similar -constructs, this abstraction is called a ``monad with zero''}. The -problem is that such a standard trait would have to abstract over the -identity of the class \code{C}, for instance by taking \code{C} as a -type parameter. Note that this parameter would be a type constructor, -which gets applied to {\em several different} types in the signatures of -methods \code{map} and \code{flatMap}. Unfortunately, the Scala type -system is too weak to express this construct, since it can handle only -type parameters which are fully applied types. - -\chapter{Mutable State} - -Most programs we have presented so for did not have side-effects -\footnote{We ignore here the fact that some of our program printed to -standard output, which technically is a side effect.}. Therefore, the -notion of {\em time} did not matter. For a program that terminates, -any sequence of actions would have led to the same result! This is -also reflected by the substitution model of computation, where a -rewrite step can be applied anywhere in a term, and all rewritings -that terminate lead to the same solution. In fact, this {\em -confluence} property is a deep result in $\lambda$-calculus, the -theory underlying functional programming. - -In this chapter, we introduce functions with side effects and study -their behavior. We will see that as a consequence we have to -fundamentally modify up the substitution model of computation which we -employed so far. - -\section{Stateful Objects} - -We normally view the world as a set of objects, some of which have -state that {\em changes} over time. Normally, state is associated -with a set of variables that can be changed in the course of a -computation. There is also a more abstract notion of state, which -does not refer to particular constructs of a programming language: An -object {\em has state} (or: {\em is stateful}) if its behavior is -influenced by its history. - -For instance, a bank account object has state, because the question -``can I withdraw 100 CHF?'' -might have different answers during the lifetime of the account. - -In Scala, all mutable state is ultimately built from variables. A -variable definition is written like a value definition, but starts -with \verb@var@ instead of \verb@val@. For instance, the following two -definitions introduce and initialize two variables \code{x} and -\code{count}. -\begin{lstlisting} -var x: String = "abc"; -var count = 111; -\end{lstlisting} -Like a value definition, a variable definition associates a name with -a value. But in the case of a variable definition, this association -may be changed later by an assignment. Such assignments are written -as in C or Java. Examples: -\begin{lstlisting} -x = "hello"; -count = count + 1; -\end{lstlisting} -In Scala, every defined variable has to be initialized at the point of -its definition. For instance, the statement ~\code{var x: int;}~ is -{\em not} regarded as a variable definition, because the initializer -is missing\footnote{If a statement like this appears in a class, it is -instead regarded as a variable declaration, which introduces -abstract access methods for the variable, but does not associate these -methods with a piece of state.}. If one does not know, or does not -care about, the appropriate initializer, one can use a wildcard -instead. I.e. -\begin{lstlisting} -val x: T = _; -\end{lstlisting} -will initialize \code{x} to some default value (\code{null} for -reference types, \code{false} for booleans, and the appropriate -version of \code{0} for numeric value types). - -Real-world objects with state are represented in Scala by objects that -have variables as members. For instance, here is a class that -represents bank accounts. -\begin{lstlisting} -class BankAccount { - private var balance = 0; - def deposit(amount: int): unit = - if (amount > 0) balance = balance + amount; - - def withdraw(amount: int): int = - if (0 < amount && amount <= balance) { - balance = balance - amount; - balance - } else throw new Error("insufficient funds"); -} -\end{lstlisting} -The class defines a variable \code{balance} which contains the current -balance of an account. Methods \code{deposit} and \code{withdraw} -change the value of this variable through assignments. Note that -\code{balance} is \code{private} in class \code{BankAccount} -- hence -it can not be accessed directly outside the class. - -To create bank-accounts, we use the usual object creation notation: -\begin{lstlisting} -val myAccount = new BankAccount -\end{lstlisting} - -\example Here is a \code{scalaint} session that deals with bank -accounts. - -\begin{lstlisting} -> :l bankaccount.scala -loading file 'bankaccount.scala' -> val account = new BankAccount -val account : BankAccount = BankAccount$\Dollar$class@1797795 -> account deposit 50 -(): scala.Unit -> account withdraw 20 -30: scala.Int -> account withdraw 20 -10: scala.Int -> account withdraw 15 -java.lang.RuntimeException: insufficient funds - at BankAccount$\Dollar$class.withdraw(bankaccount.scala:13) - at <top-level>(console:1) -> -\end{lstlisting} -The example shows that applying the same operation (\code{withdraw -20}) twice to an account yields different results. So, clearly, -accounts are stateful objects. - -\paragraph{Sameness and Change} -Assignments pose new problems in deciding when two expressions are -``the same''. -If assignments are excluded, and one writes -\begin{lstlisting} -val x = E; val y = E; -\end{lstlisting} -where \code{E} is some arbitrary expression, -then \code{x} and \code{y} can reasonably be assumed to be the same. -I.e. one could have equivalently written -\begin{lstlisting} -val x = E; val y = x; -\end{lstlisting} -(This property is usually called {\em referential transparency}). But -once we admit assignments, the two definition sequences are different. -Consider: -\begin{lstlisting} -val x = new BankAccount; val y = new BankAccount; -\end{lstlisting} -To answer the question whether \code{x} and \code{y} are the same, we -need to be more precise what ``sameness'' means. This meaning is -captured in the notion of {\em operational equivalence}, which, -somewhat informally, is stated as follows. - -Suppose we have two definitions of \code{x} and \code{y}. -To test whether \code{x} and \code{y} define the same value, proceed -as follows. -\begin{itemize} -\item -Execute the definitions followed by an -arbitrary sequence \code{S} of operations that involve \code{x} and -\code{y}. Observe the results (if any). -\item -Then, execute the definitions with another sequence \code{S'} which -results from \code{S} by renaming all occurrences of \code{y} in -\code{S} to \code{x}. -\item -If the results of running \code{S'} are different, then surely -\code{x} and \code{y} are different. -\item -On the other hand, if all possible pairs of sequences \code{(S, S')} -yield the same results, then \code{x} and \code{y} are the same. -\end{itemize} -In other words, operational equivalence regards two definitions -\code{x} and \code{y} as defining the same value, if no possible -experiment can distinguish between \code{x} and \code{y}. An -experiment in this context are two version of an arbitrary program which use either -\code{x} or \code{y}. - -Given this definition, let's test whether -\begin{lstlisting} -val x = new BankAccount; val y = new BankAccount; -\end{lstlisting} -defines values \code{x} and \code{y} which are the same. -Here are the definitions again, followed by a test sequence: - -\begin{lstlisting} -> val x = new BankAccount -> val y = new BankAccount -> x deposit 30 -30 -> y withdraw 20 -java.lang.RuntimeException: insufficient funds -\end{lstlisting} - -Now, rename all occurrences of \code{y} in that sequence to -\code{x}. We get: -\begin{lstlisting} -> val x = new BankAccount -> val y = new BankAccount -> x deposit 30 -30 -> x withdraw 20 -10 -\end{lstlisting} -Since the final results are different, we have established that -\code{x} and \code{y} are not the same. -On the other hand, if we define -\begin{lstlisting} -val x = new BankAccount; val y = x -\end{lstlisting} -then no sequence of operations can distinguish between \code{x} and -\code{y}, so \code{x} and \code{y} are the same in this case. - -\paragraph{Assignment and the Substitution Model} -These examples show that our previous substitution model of -computation cannot be used anymore. After all, under this -model we could always replace a value name by its -defining expression. -For instance in -\begin{lstlisting} -val x = new BankAccount; val y = x -\end{lstlisting} -the \code{x} in the definition of \code{y} could -be replaced by \code{new BankAccount}. -But we have seen that this change leads to a different program. -So the substitution model must be invalid, once we add assignments. - -\section{Imperative Control Structures} - -Scala has the \code{while} and \code{do-while} loop constructs known -from the C and Java languages. There is also a single branch \code{if} -which leaves out the else-part as well as a \code{return} statement which -aborts a function prematurely. This makes it possible to program in a -conventional imperative style. For instance, the following function, -which computes the \code{n}'th power of a given parameter \code{x}, is -implemented using \code{while} and single-branch \code{if}. -\begin{lstlisting} -def power (x: double, n: int): double = { - var r = 1.0; - var i = n; - while (i > 0) { - if ((i & 1) == 1) { r = r * x } - if (i > 1) r = r * r; - i = i >> 1; - } - r -} -\end{lstlisting} -These imperative control constructs are in the language for -convenience. They could have been left out, as the same constructs can -be implemented using just functions. As an example, let's develop a -functional implementation of the while loop. \code{whileLoop} should -be a function that takes two parameters: a condition, of type -\code{boolean}, and a command, of type \code{unit}. Both condition and -command need to be passed by-name, so that they are evaluated -repeatedly for each loop iteration. This leads to the following -definition of \code{whileLoop}. -\begin{lstlisting} -def whileLoop(condition: => boolean)(command: => unit): unit = - if (condition) { - command; whileLoop(condition)(command) - } else {} -\end{lstlisting} -Note that \code{whileLoop} is tail recursive, so it operates in -constant stack space. - -\begin{exercise} Write a function \code{repeatLoop}, which should be -applied as follows: -\begin{lstlisting} -repeatLoop { command } ( condition ) -\end{lstlisting} -Is there also a way to obtain a loop syntax like the following? -\begin{lstlisting} -repeatLoop { command } until ( condition ) -\end{lstlisting} -\end{exercise} - -Some other control constructs known from C and Java are missing in -Scala: There are no \code{break} and \code{continue} jumps for loops. -There are also no for-loops in the Java sense -- these have been -replaced by the more general for-loop construct discussed in -Section~\ref{sec:for-loops}. - -\section{Extended Example: Discrete Event Simulation} - -We now discuss an example that demonstrates how assignments and -higher-order functions can be combined in interesting ways. -We will build a simulator for digital circuits. - -The example is taken from Abelson and Sussman's book -\cite{abelson-sussman:structure}. We augment their basic (Scheme-) -code by an object-oriented structure which allows code-reuse through -inheritance. The example also shows how discrete event simulation programs -in general are structured and built. - -We start with a little language to describe digital circuits. -A digital circuit is built from {\em wires} and {\em function boxes}. -Wires carry signals which are transformed by function boxes. -We will represent signals by the booleans \code{true} and -\code{false}. - -Basic function boxes (or: {\em gates}) are: -\begin{itemize} -\item An \emph{inverter}, which negates its signal -\item An \emph{and-gate}, which sets its output to the conjunction of its input. -\item An \emph{or-gate}, which sets its output to the disjunction of its -input. -\end{itemize} -Other function boxes can be built by combining basic ones. - -Gates have {\em delays}, so an output of a gate will change only some -time after its inputs change. - -\paragraph{A Language for Digital Circuits} - -We describe the elements of a digital circuit by the following set of -Scala classes and functions. - -First, there is a class \code{Wire} for wires. -We can construct wires as follows. -\begin{lstlisting} -val a = new Wire; -val b = new Wire; -val c = new Wire; -\end{lstlisting} -Second, there are functions -\begin{lstlisting} -def inverter(input: Wire, output: Wire): unit -def andGate(a1: Wire, a2: Wire, output: Wire): unit -def orGate(o1: Wire, o2: Wire, output: Wire): unit -\end{lstlisting} -which ``make'' the basic gates we need (as side-effects). -More complicated function boxes can now be built from these. -For instance, to construct a half-adder, we can define: - -\begin{lstlisting} - def halfAdder(a: Wire, b: Wire, s: Wire, c: Wire): unit = { - val d = new Wire; - val e = new Wire; - orGate(a, b, d); - andGate(a, b, c); - inverter(c, e); - andGate(d, e, s); - } -\end{lstlisting} -This abstraction can itself be used, for instance in defining a full -adder: -\begin{lstlisting} - def fullAdder(a: Wire, b: Wire, cin: Wire, sum: Wire, cout: Wire) = { - val s = new Wire; - val c1 = new Wire; - val c2 = new Wire; - halfAdder(a, cin, s, c1); - halfAdder(b, s, sum, c2); - orGate(c1, c2, cout); - } -\end{lstlisting} -Class \code{Wire} and functions \code{inverter}, \code{andGate}, and -\code{orGate} represent thus a little language in which users can -define digital circuits. We now give implementations of this class -and these functions, which allow one to simulate circuits. -These implementations are based on a simple and general API for -discrete event simulation. - -\paragraph{The Simulation API} - -Discrete event simulation performs user-defined \emph{actions} at -specified \emph{times}. -An {\em action} is represented as a function which takes no parameters and -returns a \code{unit} result: -\begin{lstlisting} -type Action = () => unit; -\end{lstlisting} -The \emph{time} is simulated; it is not the actual ``wall-clock'' time. - -A concrete simulation will be done inside an object which inherits -from the abstract \code{Simulation} class. This class has the following -signature: - -\begin{lstlisting} -abstract class Simulation { - def currentTime: int; - def afterDelay(delay: int, action: => Action): unit; - def run: unit; -} -\end{lstlisting} -Here, -\code{currentTime} returns the current simulated time as an integer -number, -\code{afterDelay} schedules an action to be performed at a specified -delay after \code{currentTime}, and -\code{run} runs the simulation until there are no further actions to be -performed. - -\paragraph{The Wire Class} -A wire needs to support three basic actions. -\begin{itemize} -\item[] -\code{getSignal: boolean}~~ returns the current signal on the wire. -\item[] -\code{setSignal(sig: boolean): unit}~~ sets the wire's signal to \code{sig}. -\item[] -\code{addAction(p: Action): unit}~~ attaches the specified procedure -\code{p} to the {\em actions} of the wire. All attached action -procedures will be executed every time the signal of a wire changes. -\end{itemize} -Here is an implementation of the \code{Wire} class: -\begin{lstlisting} -class Wire { - private var sigVal = false; - private var actions: List[Action] = List(); - def getSignal = sigVal; - def setSignal(s: boolean) = - if (s != sigVal) { - sigVal = s; - actions.foreach(action => action()); - } - def addAction(a: Action) = { - actions = a :: actions; a() - } -} -\end{lstlisting} -Two private variables make up the state of a wire. The variable -\code{sigVal} represents the current signal, and the variable -\code{actions} represents the action procedures currently attached to -the wire. - -\paragraph{The Inverter Class} -We implement an inverter by installing an action on its input wire, -namely the action which puts the negated input signal onto the output -signal. The action needs to take effect at \code{InverterDelay} -simulated time units after the input changes. This suggests the -following implementation: -\begin{lstlisting} -def inverter(input: Wire, output: Wire) = { - def invertAction() = { - val inputSig = input.getSignal; - afterDelay(InverterDelay, () => output.setSignal(!inputSig)) - } - input addAction invertAction -} -\end{lstlisting} - -\paragraph{The And-Gate Class} -And-gates are implemented analogously to inverters. The action of an -\code{andGate} is to output the conjunction of its input signals. -This should happen at \code{AndGateDelay} simulated time units after -any one of its two inputs changes. Hence, the following implementation: -\begin{lstlisting} -def andGate(a1: Wire, a2: Wire, output: Wire) = { - def andAction() = { - val a1Sig = a1.getSignal; - val a2Sig = a2.getSignal; - afterDelay(AndGateDelay, () => output.setSignal(a1Sig & a2Sig)); - } - a1 addAction andAction; - a2 addAction andAction; -} -\end{lstlisting} - -\begin{exercise} Write the implementation of \code{orGate}. -\end{exercise} - -\begin{exercise} Another way is to define an or-gate by a combination of -inverters and and gates. Define a function \code{orGate} in terms of -\code{andGate} and \code{inverter}. What is the delay time of this function? -\end{exercise} - -\paragraph{The Simulation Class} - -Now, we just need to implement class \code{Simulation}, and we are -done. The idea is that we maintain inside a \code{Simulation} object -an \emph{agenda} of actions to perform. The agenda is represented as -a list of pairs of actions and the times they need to be run. The -agenda list is sorted, so that earlier actions come before later ones. -\begin{lstlisting} -class Simulation { - private type Agenda = List[Pair[int, Action]]; - private var agenda: Agenda = List(); -\end{lstlisting} -There is also a private variable \code{curtime} to keep track of the -current simulated time. -\begin{lstlisting} - private var curtime = 0; -\end{lstlisting} -An application of the method \code{afterDelay(delay, action)} -inserts the pair \code{(curtime + delay, action)} into the -\code{agenda} list at the appropriate place. -\begin{lstlisting} - def afterDelay(int delay)(action: => Action): unit = { - val actiontime = curtime + delay; - def insertAction(ag: Agenda): Agenda = ag match { - case List() => - Pair(actiontime, action) :: ag - case (first @ Pair(time, act)) :: ag1 => - if (actiontime < time) Pair(actiontime, action) :: ag - else first :: insert(ag1) - } - agenda = insert(agenda) - } -\end{lstlisting} -An application of the \code{run} method removes successive elements -from the \code{agenda} and performs their actions. -It continues until the agenda is empty: -\begin{lstlisting} -def run = { - afterDelay(0, () => System.out.println("*** simulation started ***")); - agenda match { - case List() => - case Pair(_, action) :: agenda1 => - agenda = agenda1; action(); run - } -} -\end{lstlisting} - - -\paragraph{Running the Simulator} -To run the simulator, we still need a way to inspect changes of -signals on wires. To this purpose, we write a function \code{probe}. -\begin{lstlisting} -def probe(name: String, wire: Wire): unit = { - wire addAction (() => - System.out.println( - name + " " + currentTime + " new_value = " + wire.getSignal); - ) -} -\end{lstlisting} -Now, to see the simulator in action, let's define four wires, and place -probes on two of them: -\begin{lstlisting} -> val input1 = new Wire -> val input2 = new Wire -> val sum = new Wire -> val carry = new Wire - -> probe("sum", sum) -sum 0 new_value = false -> probe("carry", carry) -carry 0 new_value = false -\end{lstlisting} -Now let's define a half-adder connecting the wires: -\begin{lstlisting} -> halfAdder(input1, input2, sum, carry); -\end{lstlisting} -Finally, set one after another the signals on the two input wires to -\code{true} and run the simulation. -\begin{lstlisting} -> input1 setSignal true; run -*** simulation started *** -sum 8 new_value = true -> input2 setSignal true; run -carry 11 new_value = true -sum 15 new_value = false -\end{lstlisting} - -\section{Summary} - -We have seen in this chapter the constructs that let us model state in -Scala -- these are variables, assignments, and imperative control -structures. State and Assignment complicate our mental model of -computation. In particular, referential transparency is lost. On the -other hand, assignment gives us new ways to formulate programs -elegantly. As always, it depends on the situation whether purely -functional programming or programming with assignments works best. - -\chapter{Computing with Streams} - -The previous chapters have introduced variables, assignment and -stateful objects. We have seen how real-world objects that change -with time can be modeled by changing the state of variables in a -computation. Time changes in the real world thus are modeled by time -changes in program execution. Of course, such time changes are usually -stretched out or compressed, but their relative order is the same. -This seems quite natural, but there is a also price to pay: Our simple -and powerful substitution model for functional computation is no -longer applicable once we introduce variables and assignment. - -Is there another way? Can we model state change in the real world -using only immutable functions? Taking mathematics as a guide, the -answer is clearly yes: A time-changing quantity is simply modeled by -a function \code{f(t)} with a time parameter \code{t}. The same can be -done in computation. Instead of overwriting a variable with successive -values, we represent all these values as successive elements in a -list. So, a mutable variable \code{var x: T} gets replaced by an -immutable value \code{val x: List[T]}. In a sense, we trade space for -time -- the different values of the variable now all exit concurrently -as different elements of the list. One advantage of the list-based -view is that we can ``time-travel'', i.e. view several successive -values of the variable at the same time. Another advantage is that we -can make use of the powerful library of list processing functions, -which often simplifies computation. For instance, consider the -imperative way to compute the sum of all prime numbers in an interval: -\begin{lstlisting} -def sumPrimes(start: int, end: int): int = { - var i = start; - var acc = 0; - while (i < end) { - if (isPrime(i)) acc = acc + i; - i = i + 1; - } - acc -} -\end{lstlisting} -Note that the variable \code{i} ``steps through'' all values of the interval -\code{[start .. end-1]}. - -A more functional way is to represent the list of values of variable \code{i} directly as \code{range(start, end)}. Then the function can be rewritten as follows. -\begin{lstlisting} -def sumPrimes(start: int, end: int) = - sum(range(start, end) filter isPrime); -\end{lstlisting} - -No contest which program is shorter and clearer! However, the -functional program is also considerably less efficient since it -constructs a list of all numbers in the interval, and then another one -for the prime numbers. Even worse from an efficiency point of view is -the following example: - -To find the second prime number between \code{1000} and \code{10000}: -\begin{lstlisting} - range(1000, 10000) filter isPrime at 1 -\end{lstlisting} -Here, the list of all numbers between \code{1000} and \code{10000} is -constructed. But most of that list is never inspected! - -However, we can obtain efficient execution for examples like these by -a trick: -\begin{quote} -%\red - Avoid computing the tail of a sequence unless that tail is actually - necessary for the computation. -\end{quote} -We define a new class for such sequences, which is called \code{Stream}. - -Streams are created using the constant \code{empty} and the constructor \code{cons}, -which are both defined in module \code{scala.Stream}. For instance, the following -expression constructs a stream with elements \code{1} and \code{2}: -\begin{lstlisting} -Stream.cons(1, Stream.cons(2, Stream.empty)) -\end{lstlisting} -As another example, here is the analogue of \code{List.range}, -but returning a stream instead of a list: -\begin{lstlisting} -def range(start: Int, end: Int): Stream[Int] = - if (start >= end) Stream.empty - else Stream.cons(start, range(start + 1, end)); -\end{lstlisting} -(This function is also defined as given above in module -\code{Stream}). Even though \code{Stream.range} and \code{List.range} -look similar, their execution behavior is completely different: - -\code{Stream.range} immediately returns with a \code{Stream} object -whose first element is \code{start}. All other elements are computed -only when they are \emph{demanded} by calling the \code{tail} method -(which might be never at all). - -Streams are accessed just as lists. as for lists, the basic access -methods are \code{isEmpty}, \code{head} and \code{tail}. For instance, -we can print all elements of a stream as follows. -\begin{lstlisting} -def print(xs: Stream[a]): unit = - if (!xs.isEmpty) { System.out.println(xs.head); print(xs.tail) }; -\end{lstlisting} -Streams also support almost all other methods defined on lists (see -below for where their methods sets differ). For instance, we can find -the second prime number between \code{1000} and \code{10000} by applying methods -\code{filter} and \code{apply} on an interval stream: -\begin{lstlisting} - Stream.range(1000, 10000) filter isPrime at 1 -\end{lstlisting} -The difference to the previous list-based implementation is that now -we do not needlessly construct and test for primality any numbers -beyond 3. - -\paragraph{Consing and appending streams} Two methods in class \code{List} -which are not supported by class \code{Stream} are \code{::} and -\code{:::}. The reason is that these methods are dispatched on their -right-hand side argument, which means that this argument needs to be -evaluated before the method is called. For instance, in the case of -\code{x :: xs} on lists, the tail \code{xs} needs to be evaluated -before \code{::} can be called and the new list can be constructed. -This does not work for streams, where we require that the tail of a -stream should not be evaluated until it is demanded by a \code{tail} operation. -The argument why list-append \code{:::} cannot be adapted to streams is analogous. - -Instead of \code{x :: xs}, one uses \code{Stream.cons(x, xs)} for -constructing a stream with first element \code{x} and (unevaluated) -rest \code{xs}. Instead of \code{xs ::: ys}, one uses the operation -\code{xs append ys}. - -\chapter{Iterators} - -Iterators are the imperative version of streams. Like streams, -iterators describe potentially infinite lists. However, there is no -data-structure which contains the elements of an iterator. Instead, -iterators allow one to step through the sequence, using two abstract methods \code{next} and \code{hasNext}. -\begin{lstlisting} -trait Iterator[+a] { - def hasNext: boolean; - def next: a; -\end{lstlisting} -Method \code{next} returns successive elements. Method \code{hasNext} -indicates whether there are still more elements to be returned by -\code{next}. Iterators also support some other methods, which are -explained later. - -As an example, here is an application which prints the squares of all -numbers from 1 to 100. -\begin{lstlisting} -var it: Iterator[int] = Iterator.range(1, 100); -while (it.hasNext) { - val x = it.next; - System.out.println(x * x) -} -\end{lstlisting} - -\section{Iterator Methods} - -Iterators support a rich set of methods besides \code{next} and -\code{hasNext}, which is described in the following. Many of these -methods mimic a corresponding functionality in lists. - -\paragraph{Append} -Method \code{append} constructs an iterator which resumes with the -given iterator \code{it} after the current iterator has finished. -\begin{lstlisting} - def append[b >: a](that: Iterator[b]): Iterator[b] = new Iterator[b] { - def hasNext = Iterator.this.hasNext || that.hasNext; - def next = if (Iterator.this.hasNext) Iterator.this.next else that.next; - } -\end{lstlisting} -The terms \code{Iterator.this.next} and \code{Iterator.this.hasNext} -in the definition of \code{append} call the corresponding methods as -they are defined in the enclosing \code{Iterator} class. If the -\code{Iterator} prefix to \code{this} would have been missing, -\code{hasNext} and \code{next} would have called recursively the -methods being defined in the result of \code{append}, which is not -what we want. - -\paragraph{Map, FlatMap, Foreach} Method \code{map} -constructs an iterator which returns all elements of the original -iterator transformed by a given function \code{f}. -\begin{lstlisting} - def map[b](f: a => b): Iterator[b] = new Iterator[b] { - def hasNext = Iterator.this.hasNext; - def next = f(Iterator.this.next); - } -\end{lstlisting} -Method \code{flatMap} is like method \code{map}, except that the -transformation function \code{f} now returns an iterator. -The result of \code{flatMap} is the iterator resulting from appending -together all iterators returned from successive calls of \code{f}. -\begin{lstlisting} - def flatMap[b](f: a => Iterator[b]): Iterator[b] = new Iterator[b] { - private var cur: Iterator[b] = Iterator.empty; - def hasNext: Boolean = - if (cur.hasNext) true - else if (Iterator.this.hasNext) { cur = f(Iterator.this.next); hasNext } - else false; - def next: b = - if (cur.hasNext) cur.next - else if (Iterator.this.hasNext) { cur = f(Iterator.this.next); next } - else throw new Error("next on empty iterator"); - } -\end{lstlisting} -Closely related to \code{map} is the \code{foreach} method, which -applies a given function to all elements of an iterator, but does not -construct a list of results -\begin{lstlisting} - def foreach(f: a => Unit): Unit = - while (hasNext) { f(next) } -\end{lstlisting} - -\paragraph{Filter} Method \code{filter} constructs an iterator which -returns all elements of the original iterator that satisfy a criterion -\code{p}. -\begin{lstlisting} - def filter(p: a => Boolean) = new BufferedIterator[a] { - private val source = - Iterator.this.buffered; - private def skip: Unit = - while (source.hasNext && !p(source.head)) { source.next; () } - def hasNext: Boolean = - { skip; source.hasNext } - def next: a = - { skip; source.next } - def head: a = - { skip; source.head; } - } -\end{lstlisting} -In fact, \code{filter} returns instances of a subclass of iterators -which are ``buffered''. A \code{BufferedIterator} object is an -iterator which has in addition a method \code{head}. This method -returns the element which would otherwise have been returned by -\code{head}, but does not advance beyond that element. Hence, the -element returned by \code{head} is returned again by the next call to -\code{head} or \code{next}. Here is the definition of the -\code{BufferedIterator} trait. -\begin{lstlisting} -trait BufferedIterator[+a] extends Iterator[a] { - def head: a; -} -\end{lstlisting} -Since \code{map}, \code{flatMap}, \code{filter}, and \code{foreach} -exist for iterators, it follows that for-comprehensions and for-loops -can also be used on iterators. For instance, the application which prints the squares of numbers between 1 and 100 could have equivalently been expressed as follows. -\begin{lstlisting} -for (val i <- Iterator.range(1, 100)) - System.out.println(i * i); -\end{lstlisting} - -\paragraph{Zip} Method \code{zip} takes another iterator and -returns an iterator consisting of pairs of corresponding elements -returned by the two iterators. -\begin{lstlisting} - def zip[b](that: Iterator[b]) = new Iterator[Pair[a, b]] { - def hasNext = Iterator.this.hasNext && that.hasNext; - def next = Pair(Iterator.this.next, that.next); - } -} -\end{lstlisting} - -\section{Constructing Iterators} - -Concrete iterators need to provide implementations for the two -abstract methods \code{next} and \code{hasNext} in class -\code{Iterator}. The simplest iterator is \code{Iterator.empty} which -always returns an empty sequence: -\begin{lstlisting} -object Iterator { - object empty extends Iterator[All] { - def hasNext = false; - def next: a = throw new Error("next on empty iterator"); - } -\end{lstlisting} -A more interesting iterator enumerates all elements of an array. This -iterator is constructed by the \code{fromArray} method, which is also defined in the object \code{Iterator} -\begin{lstlisting} - def fromArray[a](xs: Array[a]) = new Iterator[a] { - private var i = 0; - def hasNext: Boolean = - i < xs.length; - def next: a = - if (i < xs.length) { val x = xs(i) ; i = i + 1 ; x } - else throw new Error("next on empty iterator"); - } -\end{lstlisting} -Another iterator enumerates an integer interval. The -\code{Iterator.range} function returns an iterator which traverses a -given interval of integer values. It is defined as follows. -\begin{lstlisting} -object Iterator { - def range(start: int, end: int) = new Iterator[int] { - private var current = start; - def hasNext = current < end; - def next = { - val r = current; - if (current < end) current = current + 1 - else throw new Error("end of iterator"); - r - } - } -} -\end{lstlisting} -All iterators seen so far terminate eventually. It is also possible to -define iterators that go on forever. For instance, the following -iterator returns successive integers from some start -value\footnote{Due to the finite representation of type \prog{int}, -numbers will wrap around at $2^31$.}. -\begin{lstlisting} -def from(start: int) = new Iterator[int] { - private var last = start - 1; - def hasNext = true; - def next = { last = last + 1; last } -} -\end{lstlisting} - -\section{Using Iterators} - -Here are two more examples how iterators are used. First, to print all -elements of an array \code{xs: Array[int]}, one can write: -\begin{lstlisting} - Iterator.fromArray(xs) foreach (x => - System.out.println(x)) -\end{lstlisting} -Or, using a for-comprehension: -\begin{lstlisting} - for (val x <- Iterator.fromArray(xs)) - System.out.println(x) -\end{lstlisting} -As a second example, consider the problem of finding the indices of -all the elements in an array of \code{double}s greater than some -\code{limit}. The indices should be returned as an iterator. -This is achieved by the following expression. -\begin{lstlisting} -import Iterator._; -fromArray(xs) -.zip(from(0)) -.filter(case Pair(x, i) => x > limit) -.map(case Pair(x, i) => i) -\end{lstlisting} -Or, using a for-comprehension: -\begin{lstlisting} -import Iterator._; -for (val Pair(x, i) <- fromArray(xs) zip from(0); x > limit) -yield i -\end{lstlisting} - - - - - - - -\chapter{Combinator Parsing}\label{sec:combinator-parsing} - -In this chapter we describe how to write combinator parsers in -Scala. Such parsers are constructed from predefined higher-order -functions, so called {\em parser combinators}, that closely model the -constructions of an EBNF grammar \cite{wirth:ebnf}. - -As running example, we consider parsers for possibly nested -lists of identifiers and numbers, which -are described by the following context-free grammar. -\bda{p{3cm}cp{10cm}} -letter &::=& /* all letters */ \\ -digit &::=& /* all digits */ \\[0.5em] -ident &::=& letter \{letter $|$ digit \}\\ -number &::=& digit \{digit\}\\[0.5em] -list &::=& `(' [listElems] `)' \\ -listElems &::=& expr [`,' listElems] \\ -expr &::=& ident | number | list - -\eda - -\section{Simple Combinator Parsing} - -In this section we will only be concerned with the task of recognizing -input strings, not with processing them. So we can describe parsers -by the sets of input strings they accept. There are two -fundamental operators over parsers: -\code{&&&} expresses the sequential composition of a parser with -another, while \code{|||} expresses an alternative. These operations -will both be defined as methods of a \code{Parser} class. We will -also define constructors for the following primitive parsers: - -\begin{tabular}{ll} -\code{empty} & The parser that accepts the empty string -\\ -\code{fail} & The parser that accepts no string -\\ -\code{chr(c: char)} - & The parser that accepts the single-character string ``$c$''. -\\ -\code{chr(p: char => boolean)} - & The parser that accepts single-character strings - ``$c$'' \\ - & for which $p(c)$ is true. -\end{tabular} - -There are also the two higher-order parser combinators \code{opt}, -expressing optionality and \code{rep}, expressing repetition. -For any parser $p$, \code{opt(}$p$\code{)} yields a parser that -accepts the strings accepted by $p$ or else the empty string, while -\code{rep(}$p$\code{)} accepts arbitrary sequences of the strings accepted by -$p$. In EBNF, \code{opt(}$p$\code{)} corresponds to $[p]$ and -\code{rep(}$p$\code{)} corresponds to $\{p\}$. - -The central idea of parser combinators is that parsers can be produced -by a straightforward rewrite of the grammar, replacing \code{::=} with -\code{=}, sequencing with -\code{&&&}, choice -\code{|} with \code{|||}, repetition \code{\{...\}} with -\code{rep(...)} and optional occurrence \code{[...]} with \code{opt(...)}. -Applying this process to the grammar of lists -yields the following class. -\begin{lstlisting} -abstract class ListParsers extends Parsers { - def chr(p: char => boolean): Parser; - def chr(c: char): Parser = chr(d: char => d == c); - - def letter : Parser = chr(Character.isLetter); - def digit : Parser = chr(Character.isDigit); - - def ident : Parser = letter &&& rep(letter ||| digit); - def number : Parser = digit &&& rep(digit); - def list : Parser = chr('(') &&& opt(listElems) &&& chr(')'); - def listElems : Parser = expr &&& (chr(',') &&& listElems ||| empty); - def expr : Parser = ident ||| number ||| list; -} -\end{lstlisting} -This class isolates the grammar from other aspects of parsing. It -abstracts over the type of input -and over the method used to parse a single character -(represented by the abstract method \code{chr(p: char => -boolean))}. The missing bits of information need to be supplied by code -applying the parser class. - -It remains to explain how to implement a library with the combinators -described above. We will pack combinators and their underlying -implementation in a base class \code{Parsers}, which is inherited by -\code{ListParsers}. The first question to decide is which underlying -representation type to use for a parser. We treat parsers here -essentially as functions that take a datum of the input type -\code{intype} and that yield a parse result of type -\code{Option[intype]}. The \code{Option} type is predefined as -follows. -\begin{lstlisting} -trait Option[+a]; -case object None extends Option[All]; -case class Some[a](x: a) extends Option[a]; -\end{lstlisting} -A parser applied to some input either succeeds or fails. If it fails, -it returns the constant \code{None}. If it succeeds, it returns a -value of the form \code{Some(in1)} where \code{in1} represents the -input that remains to be parsed. -\begin{lstlisting} -abstract class Parsers { - type intype; - abstract class Parser { - type Result = Option[intype]; - def apply(in: intype): Result; -\end{lstlisting} -A parser also implements the combinators -for sequence and alternative: -\begin{lstlisting} - /*** p &&& q applies first p, and if that succeeds, then q - */ - def &&& (q: => Parser) = new Parser { - def apply(in: intype): Result = Parser.this.apply(in) match { - case None => None - case Some(in1) => q(in1) - } - } - - /*** p ||| q applies first p, and, if that fails, then q. - */ - def ||| (q: => Parser) = new Parser { - def apply(in: intype): Result = Parser.this.apply(in) match { - case None => q(in) - case s => s - } - } -\end{lstlisting} -The implementations of the primitive parsers \code{empty} and \code{fail} -are trivial: -\begin{lstlisting} - val empty = new Parser { def apply(in: intype): Result = Some(in) } - val fail = new Parser { def apply(in: intype): Result = None } -\end{lstlisting} -The higher-order parser combinators \code{opt} and \code{rep} can be -defined in terms of the combinators for sequence and alternative: -\begin{lstlisting} - def opt(p: Parser): Parser = p ||| empty; // p? = (p | <empty>) - def rep(p: Parser): Parser = opt(rep1(p)); // p* = [p+] - def rep1(p: Parser): Parser = p &&& rep(p); // p+ = p p* -} // end Parser -\end{lstlisting} -To run combinator parsers, we still need to decide on a way to handle -parser input. Several possibilities exist: The input could be -represented as a list, as an array, or as a random access file. Note -that the presented combinator parsers use backtracking to change from -one alternative to another. Therefore, it must be possible to reset -input to a point that was previously parsed. If one restricted the -focus to LL(1) grammars, a non-backtracking implementation of the -parser combinators in class \code{Parsers} would also be possible. In -that case sequential input methods based on (say) iterators or -sequential files would also be possible. - -In our example, we represent the input by a pair of a string, which -contains the input phrase as a whole, and an index, which represents -the portion of the input which has not yet been parsed. Since the -input string does not change, just the index needs to be passed around -as a result of individual parse steps. This leads to the following -class of parsers that read strings: -\begin{lstlisting} -class ParseString(s: String) extends Parsers { - type intype = int; - def chr(p: char => boolean) = new Parser { - def apply(in: int): Parser#Result = - if (in < s.length() && p(s charAt in)) Some(in + 1); - else None; - } - val input = 0; -} -\end{lstlisting} -This class implements a method \code{chr(p: char => boolean)} and a -value \code{input}. The \code{chr} method builds a parser that either -reads a single character satisfying the given predicate \code{p} or -fails. All other parsers over strings are ultimately implemented in -terms of that method. The \code{input} value represents the input as a -whole. In out case, it is simply value \code{0}, the start index of -the string to be read. - -Note \code{apply}'s result type, \code{Parser#Result}. This syntax -selects the type element \code{Result} of the type \code{Parser}. It -thus corresponds roughly to selecting a static inner class from some -outer class in Java. Note that we could {\em not} have written -\code{Parser.Result}, as the latter would express selection of the -\code{Result} element from a {\em value} named \code{Parser}. - -We have now extended the root class \code{Parsers} in two different -directions: Class \code{ListParsers} defines a grammar of phrases to -be parsed, whereas class \code{ParseString} defines a method by which -such phrases are input. To write a concrete parsing application, we -need to define both grammar and input method. We do this by combining -two extensions of \code{Parsers} using a {\em mixin composition}. -Here is the start of a sample application: -\begin{lstlisting} -object Test { - def main(args: Array[String]): unit = { - val ps = new ListParsers with ParseString(args(0)); -\end{lstlisting} -The last line above creates a new family of parsers by composing class -\code{ListParsers} with class \code{ParseString}. The two classes -share the common superclass \code{Parsers}. The abstract method -\code{chr} in \code{ListParsers} is implemented by class \code{ParseString}. - -To run the parser, we apply the start symbol of the grammar -\code{expr} the argument code{input} and observe the result: -\begin{lstlisting} - ps.expr(input) match { - case Some(n) => - System.out.println("parsed: " + args(0).substring(0, n)); - case None => - System.out.println("nothing parsed"); - } - } -}// end Test -\end{lstlisting} -Note the syntax ~\code{ps.expr(input)}, which treats the \code{expr} -parser as if it was a function. In Scala, objects with \code{apply} -methods can be applied directly to arguments as if they were functions. - -Here is an example run of the program above: -\begin{lstlisting} -> java examples.Test "(x,1,(y,z))" -parsed: (x,1,(y,z)) -> java examples.Test "(x,,1,(y,z))" -nothing parsed -\end{lstlisting} - -\section{\label{sec:parsers-results}Parsers that Produce Results} - -The combinator library of the previous section does not support the -generation of output from parsing. But usually one does not just want -to check whether a given string belongs to the defined language, one -also wants to convert the input string into some internal -representation such as an abstract syntax tree. - -In this section, we modify our parser library to build parsers that -produce results. We will make use of the for-comprehensions introduced -in Chapter~\ref{sec:for-notation}. The basic combinator of sequential -composition, formerly ~\code{p &&& q}, now becomes -\begin{lstlisting} -for (val x <- p; val y <- q) yield e . -\end{lstlisting} -Here, the names \code{x} and \code{y} are bound to the results of -executing the parsers \code{p} and \code{q}. \code{e} is an expression -that uses these results to build the tree returned by the composed -parser. - -Before describing the implementation of the new parser combinators, we -explain how the new building blocks are used. Say we want to modify -our list parser so that it returns an abstract syntax tree of the -parsed expression. Syntax trees are given by the following class hierarchy: -\begin{lstlisting} -abstract class Tree{} -case class Id (s: String) extends Tree {} -case class Num(n: int) extends Tree {} -case class Lst(elems: List[Tree]) extends Tree {} -\end{lstlisting} -That is, a syntax tree is an identifier, an integer number, or a -\code{Lst} node with a list of trees as descendants. - -As a first step towards parsers that produce results we define three -little parsers that return a single read character as result. -\begin{lstlisting} -abstract class CharParsers extends Parsers { - def any: Parser[char]; - def chr(ch: char): Parser[char] = - for (val c <- any; c == ch) yield c; - def chr(p: char => boolean): Parser[char] = - for (val c <- any; p(c)) yield c; -} -\end{lstlisting} -The \code{any} parser succeeds with the first character of remaining -input as long as input is nonempty. It is abstract in class -\code{ListParsers} since we want to abstract in this class from the -concrete input method used. The two \code{chr} parsers return as before -the first input character if it equals a given character or matches a -given predicate. They are now implemented in terms of \code{any}. - -The next level is represented by parsers reading identifiers, numbers -and lists. Here is a parser for identifiers. -\begin{lstlisting} -abstract class ListParsers extends CharParsers { - def ident: Parser[Tree] = - for ( - val c: char <- chr(Character.isLetter); - val cs: List[char] <- rep(chr(Character.isLetterOrDigit)) - ) yield Id((c :: cs).mkString("", "", "")); -\end{lstlisting} -Remark: Because \code{chr(...)} returns a single character, its -repetition \code{rep(chr(...))} returns a list of characters. The -\code{yield} part of the for-comprehension converts all intermediate -results into an \code{Id} node with a string as element. To convert -the read characters into a string, it conses them into a single list, -and invokes the \code{mkString} method on the result. - -Here is a parser for numbers: -\begin{lstlisting} - def number: Parser[Tree] = - for ( - val d: char <- chr(Character.isDigit); - val ds: List[char] <- rep(chr(Character.isDigit)) - ) yield Num(((d - '0') /: ds) ((x, digit) => x * 10 + digit - '0')); -\end{lstlisting} -Intermediate results are in this case the leading digit of -the read number, followed by a list of remaining digits. The -\code{yield} part of the for-comprehension reduces these to a number -by a fold-left operation. - -Here is a parser for lists: -\begin{lstlisting} - def list: Parser[Tree] = - for ( - val _ <- chr('('); - val es <- listElems ||| succeed(List()); - val _ <- chr(')') - ) yield Lst(es); - - def listElems: Parser[List[Tree]] = - for ( - val x <- expr; - val xs <- chr(',') &&& listElems ||| succeed(List()) - ) yield x :: xs; -\end{lstlisting} -The \code{list} parser returns a \code{Lst} node with a list of trees -as elements. That list is either the result of \code{listElems}, or, -if that fails, the empty list (expressed here as: the result of a -parser which always succeeds with the empty list as result). - -The highest level of our grammar is represented by function -\code{expr}: -\begin{lstlisting} - def expr: Parser[Tree] = - ident ||| number ||| list -}// end ListParsers. -\end{lstlisting} -We now present the parser combinators that support the new -scheme. Parsers that succeed now return a parse result besides the -un-consumed input. -\begin{lstlisting} -abstract class Parsers { - type intype; - trait Parser[a] { - type Result = Option[Pair[a, intype]]; - def apply(in: intype): Result; -\end{lstlisting} -Parsers are parameterized with the type of their result. The class -\code{Parser[a]} now defines new methods \code{map}, \code{flatMap} -and \code{filter}. The \code{for} expressions are mapped by the -compiler to calls of these functions using the scheme described in -Chapter~\ref{sec:for-notation}. For parsers, these methods are -implemented as follows. -\begin{lstlisting} - def filter(pred: a => boolean) = new Parser[a] { - def apply(in: intype): Result = Parser.this.apply(in) match { - case None => None - case Some(Pair(x, in1)) => if (pred(x)) Some(Pair(x, in1)) else None - } - } - def map[b](f: a => b) = new Parser[b] { - def apply(in: intype): Result = Parser.this.apply(in) match { - case None => None - case Some(Pair(x, in1)) => Some(Pair(f(x), in1)) - } - } - def flatMap[b](f: a => Parser[b]) = new Parser[b] { - def apply(in: intype): Result = Parser.this.apply(in) match { - case None => None - case Some(Pair(x, in1)) => f(x).apply(in1) - } - } -\end{lstlisting} -The \code{filter} method takes as parameter a predicate $p$ which it -applies to the results of the current parser. If the predicate is -false, the parser fails by returning \code{None}; otherwise it returns -the result of the current parser. The \code{map} method takes as -parameter a function $f$ which it applies to the results of the -current parser. The \code{flatMap} takes as parameter a function -\code{f} which returns a parser. It applies \code{f} to the result of -the current parser and then continues with the resulting parser. The -\code{|||} method is essentially defined as before. The -\code{&&&} method can now be defined in terms of \code{for}. -\begin{lstlisting} - def ||| (p: => Parser[a]) = new Parser[a] { - def apply(in: intype): Result = Parser.this.apply(in) match { - case None => p(in) - case s => s - } - } - - def &&& [b](p: => Parser[b]): Parser[b] = - for (val _ <- this; val x <- p) yield x; - }// end Parser -\end{lstlisting} - -The primitive parser \code{succeed} replaces \code{empty}. It consumes -no input and returns its parameter as result. -\begin{lstlisting} - def succeed[a](x: a) = new Parser[a] { - def apply(in: intype) = Some(Pair(x, in)) - } -\end{lstlisting} - -The parser combinators \code{rep} and \code{opt} now also return -results. \code{rep} returns a list which contains as elements the -results of each iteration of its sub-parser. \code{opt} returns a list -which is either empty or returns as single element the result of the -optional parser. -\begin{lstlisting} - def rep[a](p: Parser[a]): Parser[List[a]] = - rep1(p) ||| succeed(List()); - - def rep1[a](p: Parser[a]): Parser[List[a]] = - for (val x <- p; val xs <- rep(p)) yield x :: xs; - - def opt[a](p: Parser[a]): Parser[List[a]] = - (for (val x <- p) yield List(x)) ||| succeed(List()); -} // end Parsers -\end{lstlisting} -The root class \code{Parsers} abstracts over which kind of -input is parsed. As before, we determine the input method by a separate class. -Here is \code{ParseString}, this time adapted to parsers that return results. -It defines now the method \code{any}, which returns the first input character. -\begin{lstlisting} -class ParseString(s: String) extends Parsers { - type intype = int; - val input = 0; - def any = new Parser[char] { - def apply(in: int): Parser[char]#Result = - if (in < s.length()) Some(Pair(s charAt in, in + 1)) else None; - } -} -\end{lstlisting} -The rest of the application is as before. Here is a test program which -constructs a list parser over strings and prints out the result of -applying it to the command line argument. -\begin{lstlisting} -object Test { - def main(args: Array[String]): unit = { - val ps = new ListParsers with ParseString(args(0)); - ps.expr(ps.input) match { - case Some(Pair(list, _)) => System.out.println("parsed: " + list); - case None => "nothing parsed" - } - } -} -\end{lstlisting} - -\begin{exercise}\label{exercise:end-marker} The parsers we have defined so -far can succeed even if there is some input beyond the parsed text. To -prevent this, one needs a parser which recognizes the end of input. -Redesign the parser library so that such a parser can be introduced. -Which classes need to be modified? -\end{exercise} - -\chapter{\label{sec:hm}Hindley/Milner Type Inference} - -This chapter demonstrates Scala's data types and pattern matching by -developing a type inference system in the Hindley/Milner style -\cite{milner:polymorphism}. The source language for the type inferencer is -lambda calculus with a let construct called Mini-ML. Abstract syntax -trees for the Mini-ML are represented by the following data type of -\code{Terms}. -\begin{lstlisting} -trait Term {} -case class Var(x: String) extends Term { - override def toString() = x; -} -case class Lam(x: String, e: Term) extends Term { - override def toString() = "(\\" + x + "." + e + ")"; -} -case class App(f: Term, e: Term) extends Term { - override def toString() = "(" + f + " " + e + ")"; -} -case class Let(x: String, e: Term, f: Term) extends Term { - override def toString() = "let " + x + " = " + e + " in " + f; -} -\end{lstlisting} -There are four tree constructors: \code{Var} for variables, \code{Lam} -for function abstractions, \code{App} for function applications, and -\code{Let} for let expressions. Each case class overrides the -\code{toString()} method of class \code{Any}, so that terms can be -printed in legible form. - -We next define the types that are -computed by the inference system. -\begin{lstlisting} -sealed trait Type {} -case class Tyvar(a: String) extends Type { - override def toString() = a; -} -case class Arrow(t1: Type, t2: Type) extends Type { - override def toString() = "(" + t1 + "->" + t2 + ")"; -} -case class Tycon(k: String, ts: List[Type]) extends Type { - override def toString() = - k + (if (ts.isEmpty) "" else ts.mkString("[", ",", "]")); -} -\end{lstlisting} -There are three type constructors: \code{Tyvar} for type variables, -\code{Arrow} for function types and \code{Tycon} for type constructors -such as \code{boolean} or \code{List}. Type constructors have as -component a list of their type parameters. This list is empty for type -constants such as \code{boolean}. Again, the type constructors -implement the \code{toString} method in order to display types legibly. - -Note that \code{Type} is a \code{sealed} class. This means that no -subclasses or data constructors that extend \code{Type} can be formed -outside the sequence of definitions in which \code{Type} is defined. -This makes \code{Type} a {\em closed} algebraic data type with exactly -three alternatives. By contrast, type \code{Term} is an {\em open} -algebraic type for which further alternatives can be defined. - -The main parts of the type inferencer are contained in object -\code{typeInfer}. We start with a utility function which creates -fresh type variables: -\begin{lstlisting} -object typeInfer { - private var n: Int = 0; - def newTyvar(): Type = { n = n + 1 ; Tyvar("a" + n) } -\end{lstlisting} -We next define a class for substitutions. A substitution is an -idempotent function from type variables to types. It maps a finite -number of type variables to some types, and leaves all other type -variables unchanged. The meaning of a substitution is extended -point-wise to a mapping from types to types. -\begin{lstlisting} - trait Subst extends Any with Function1[Type,Type] { - - def lookup(x: Tyvar): Type; - - def apply(t: Type): Type = t match { - case tv @ Tyvar(a) => val u = lookup(tv); if (t == u) t else apply(u); - case Arrow(t1, t2) => Arrow(apply(t1), apply(t2)) - case Tycon(k, ts) => Tycon(k, ts map apply) - } - - def extend(x: Tyvar, t: Type) = new Subst { - def lookup(y: Tyvar): Type = if (x == y) t else Subst.this.lookup(y); - } - } - val emptySubst = new Subst { def lookup(t: Tyvar): Type = t } -\end{lstlisting} -We represent substitutions as functions, of type \code{Type => -Type}. This is achieved by making class \code{Subst} inherit from the -unary function type \code{Function1[Type, Type]}\footnote{ -The class inherits the function type as a mixin rather than as a direct -superclass. This is because in the current Scala implementation, the -\code{Function1} type is a Java interface, which cannot be used as a direct -superclass of some other class.}. -To be an instance -of this type, a substitution \code{s} has to implement an \code{apply} -method that takes a \code{Type} as argument and yields another -\code{Type} as result. A function application \code{s(t)} is then -interpreted as \code{s.apply(t)}. - -The \code{lookup} method is abstract in class \code{Subst}. There are -two concrete forms of substitutions which differ in how they -implement this method. One form is defined by the \code{emptySubst} value, -the other is defined by the \code{extend} method in class -\code{Subst}. - -The next data type describes type schemes, which consist of a type and -a list of names of type variables which appear universally quantified -in the type scheme. -For instance, the type scheme $\forall a\forall b.a \!\arrow\! b$ would be represented in the type checker as: -\begin{lstlisting} -TypeScheme(List(TyVar("a"), TyVar("b")), Arrow(Tyvar("a"), Tyvar("b"))) . -\end{lstlisting} -The class definition of type schemes does not carry an extends -clause; this means that type schemes extend directly class -\code{AnyRef}. Even though there is only one possible way to -construct a type scheme, a case class representation was chosen -since it offers convenient ways to decompose an instance of this type into its -parts. -\begin{lstlisting} -case class TypeScheme(tyvars: List[String], tpe: Type) { - def newInstance: Type = { - (emptySubst /: tyvars) ((s, tv) => s.extend(tv, newTyvar())) (tpe); - } -} -\end{lstlisting} -Type scheme objects come with a method \code{newInstance}, which -returns the type contained in the scheme after all universally type -variables have been renamed to fresh variables. The implementation of -this method folds (with \code{/:}) the type scheme's type variables -with an operation which extends a given substitution \code{s} by -renaming a given type variable \code{tv} to a fresh type -variable. The resulting substitution renames all type variables of the -scheme to fresh ones. This substitution is then applied to the type -part of the type scheme. - -The last type we need in the type inferencer is -\code{Env}, a type for environments, which associate variable names -with type schemes. They are represented by a type alias \code{Env} in -module \code{typeInfer}: -\begin{lstlisting} -type Env = List[Pair[String, TypeScheme]]; -\end{lstlisting} -There are two operations on environments. The \code{lookup} function -returns the type scheme associated with a given name, or \code{null} -if the name is not recorded in the environment. -\begin{lstlisting} - def lookup(env: Env, x: String): TypeScheme = env match { - case List() => null - case Pair(y, t) :: env1 => if (x == y) t else lookup(env1, x) - } -\end{lstlisting} -The \code{gen} function turns a given type into a type scheme, -quantifying over all type variables that are free in the type, but -not in the environment. -\begin{lstlisting} - def gen(env: Env, t: Type): TypeScheme = - TypeScheme(tyvars(t) diff tyvars(env), t); -\end{lstlisting} -The set of free type variables of a type is simply the set of all type -variables which occur in the type. It is represented here as a list of -type variables, which is constructed as follows. -\begin{lstlisting} - def tyvars(t: Type): List[Tyvar] = t match { - case tv @ Tyvar(a) => - List(tv) - case Arrow(t1, t2) => - tyvars(t1) union tyvars(t2) - case Tycon(k, ts) => - (List[Tyvar]() /: ts) ((tvs, t) => tvs union tyvars(t)); - } -\end{lstlisting} -Note that the syntax \code{tv @ ...} in the first pattern introduces a variable -which is bound to the pattern that follows. Note also that the explicit type parameter \code{[Tyvar]} in the expression of the third -clause is needed to make local type inference work. - -The set of free type variables of a type scheme is the set of free -type variables of its type component, excluding any quantified type variables: -\begin{lstlisting} - def tyvars(ts: TypeScheme): List[Tyvar] = - tyvars(ts.tpe) diff ts.tyvars; -\end{lstlisting} -Finally, the set of free type variables of an environment is the union -of the free type variables of all type schemes recorded in it. -\begin{lstlisting} - def tyvars(env: Env): List[Tyvar] = - (List[Tyvar]() /: env) ((tvs, nt) => tvs union tyvars(nt._2)); -\end{lstlisting} -A central operation of Hindley/Milner type checking is unification, -which computes a substitution to make two given types equal (such a -substitution is called a {\em unifier}). Function \code{mgu} computes -the most general unifier of two given types $t$ and $u$ under a -pre-existing substitution $s$. That is, it returns the most general -substitution $s'$ which extends $s$, and which makes $s'(t)$ and -$s'(u)$ equal types. -\begin{lstlisting} - def mgu(t: Type, u: Type, s: Subst): Subst = Pair(s(t), s(u)) match { - case Pair(Tyvar(a), Tyvar(b)) if (a == b) => - s - case Pair(Tyvar(a), _) if !(tyvars(u) contains a) => - s.extend(Tyvar(a), u) - case Pair(_, Tyvar(a)) => - mgu(u, t, s) - case Pair(Arrow(t1, t2), Arrow(u1, u2)) => - mgu(t1, u1, mgu(t2, u2, s)) - case Pair(Tycon(k1, ts), Tycon(k2, us)) if (k1 == k2) => - (s /: (ts zip us)) ((s, tu) => mgu(tu._1, tu._2, s)) - case _ => - throw new TypeError("cannot unify " + s(t) + " with " + s(u)) - } -\end{lstlisting} -The \code{mgu} function throws a \code{TypeError} exception if no -unifier substitution exists. This can happen because the two types -have different type constructors at corresponding places, or because a -type variable is unified with a type that contains the type variable -itself. Such exceptions are modeled here as instances of case classes -that inherit from the predefined \code{Exception} class. -\begin{lstlisting} - case class TypeError(s: String) extends Exception(s) {} -\end{lstlisting} -The main task of the type checker is implemented by function -\code{tp}. This function takes as parameters an environment $env$, a -term $e$, a proto-type $t$, and a -pre-existing substitution $s$. The function yields a substitution -$s'$ that extends $s$ and that -turns $s'(env) \ts e: s'(t)$ into a derivable type judgment according -to the derivation rules of the Hindley/Milner type system \cite{milner:polymorphism}. A -\code{TypeError} exception is thrown if no such substitution exists. -\begin{lstlisting} - def tp(env: Env, e: Term, t: Type, s: Subst): Subst = { - current = e; - e match { - case Var(x) => - val u = lookup(env, x); - if (u == null) throw new TypeError("undefined: " + x); - else mgu(u.newInstance, t, s) - - case Lam(x, e1) => - val a = newTyvar(), b = newTyvar(); - val s1 = mgu(t, Arrow(a, b), s); - val env1 = Pair(x, TypeScheme(List(), a)) :: env; - tp(env1, e1, b, s1) - - case App(e1, e2) => - val a = newTyvar(); - val s1 = tp(env, e1, Arrow(a, t), s); - tp(env, e2, a, s1) - - case Let(x, e1, e2) => - val a = newTyvar(); - val s1 = tp(env, e1, a, s); - tp(Pair(x, gen(env, s1(a))) :: env, e2, t, s1) - } - } - var current: Term = null; -\end{lstlisting} -To aid error diagnostics, the \code{tp} function stores the currently -analyzed sub-term in variable \code{current}. Thus, if type checking -is aborted with a \code{TypeError} exception, this variable will -contain the subterm that caused the problem. - -The last function of the type inference module, \code{typeOf}, is a -simplified facade for \code{tp}. It computes the type of a given term -$e$ in a given environment $env$. It does so by creating a fresh type -variable $a$, computing a typing substitution that makes $env \ts e: a$ -into a derivable type judgment, and returning -the result of applying the substitution to $a$. -\begin{lstlisting} - def typeOf(env: Env, e: Term): Type = { - val a = newTyvar(); - tp(env, e, a, emptySubst)(a) - } -}// end typeInfer -\end{lstlisting} -To apply the type inferencer, it is convenient to have a predefined -environment that contains bindings for commonly used constants. The -module \code{predefined} defines an environment \code{env} that -contains bindings for the types of booleans, numbers and lists -together with some primitive operations over them. It also -defines a fixed point operator \code{fix}, which can be used to -represent recursion. -\begin{lstlisting} -object predefined { - val booleanType = Tycon("Boolean", List()); - val intType = Tycon("Int", List()); - def listType(t: Type) = Tycon("List", List(t)); - - private def gen(t: Type): typeInfer.TypeScheme = typeInfer.gen(List(), t); - private val a = typeInfer.newTyvar(); - val env = List( - Pair("true", gen(booleanType)), - Pair("false", gen(booleanType)), - Pair("if", gen(Arrow(booleanType, Arrow(a, Arrow(a, a))))), - Pair("zero", gen(intType)), - Pair("succ", gen(Arrow(intType, intType))), - Pair("nil", gen(listType(a))), - Pair("cons", gen(Arrow(a, Arrow(listType(a), listType(a))))), - Pair("isEmpty", gen(Arrow(listType(a), booleanType))), - Pair("head", gen(Arrow(listType(a), a))), - Pair("tail", gen(Arrow(listType(a), listType(a)))), - Pair("fix", gen(Arrow(Arrow(a, a), a))) - ) -} -\end{lstlisting} -Here's an example how the type inferencer can be used. -Let's define a function \code{showType} which returns the type of -a given term computed in the predefined environment -\code{Predefined.env}: -\begin{lstlisting} -object testInfer { - def showType(e: Term): String = - try { - typeInfer.typeOf(predefined.env, e).toString(); - } catch { - case typeInfer.TypeError(msg) => - "\n cannot type: " + typeInfer.current + - "\n reason: " + msg; - } -\end{lstlisting} -Then the application -\begin{lstlisting} -> testInfer.showType(Lam("x", App(App(Var("cons"), Var("x")), Var("nil")))); -\end{lstlisting} -would give the response -\begin{lstlisting} -> (a6->List[a6]) -\end{lstlisting} -To make the type inferencer more useful, we complete it with a -parser. -Function \code{main} of module \code{testInfer} -parses and typechecks a Mini-ML expression which is given as the first -command line argument. -\begin{lstlisting} - def main(args: Array[String]): unit = { - val ps = new MiniMLParsers with ParseString(args(0)); - ps.all(ps.input) match { - case Some(Pair(term, _)) => - System.out.println("" + term + ": " + showType(term)); - case None => - System.out.println("syntax error"); - } - } -}// typeInf -\end{lstlisting} -To do the parsing, method \code{main} uses the combinator parser -scheme of Chapter~\ref{sec:combinator-parsing}. It creates a parser -family \code{ps} as a mixin composition of parsers -that understand MiniML (but do not know where input comes from) and -parsers that read input from a given string. The \code{MiniMLParsers} -object implements parsers for the following grammar. -\begin{lstlisting} -term ::= "\" ident "." term - | term1 {term1} - | "let" ident "=" term "in" term -term1 ::= ident - | "(" term ")" -all ::= term ";" -\end{lstlisting} -Input as a whole is described by the production \code{all}; it -consists of a term followed by a semicolon. We allow ``whitespace'' -consisting of one or more space, tabulator or newline characters -between any two lexemes (this is not reflected in the grammar -above). Identifiers are defined as in -Chapter~\ref{sec:combinator-parsing} except that an identifier cannot -be one of the two reserved words "let" and "in". -\begin{lstlisting} -abstract class MiniMLParsers[intype] extends CharParsers[intype] { - - /** whitespace */ - def whitespace = rep{chr(' ') ||| chr('\t') ||| chr('\n')}; - - /** A given character, possible preceded by whitespace */ - def wschr(ch: char) = whitespace &&& chr(ch); - - /** identifiers or keywords */ - def id: Parser[String] = - for ( - val c: char <- whitespace &&& chr(Character.isLetter); - val cs: List[char] <- rep(chr(Character.isLetterOrDigit)) - ) yield (c :: cs).mkString("", "", ""); - - /** Non-keyword identifiers */ - def ident: Parser[String] = - for (val s <- id; s != "let" && s != "in") yield s; - - /** term = '\' ident '.' term | term1 {term1} | let ident "=" term in term */ - def term: Parser[Term] = - ( for ( - val _ <- wschr('\\'); - val x <- ident; - val _ <- wschr('.'); - val t <- term) - yield Lam(x, t): Term ) - ||| - ( for ( - val letid <- id; letid == "let"; - val x <- ident; - val _ <- wschr('='); - val t <- term; - val inid <- id; inid == "in"; - val c <- term) - yield Let(x, t, c) ) - ||| - ( for ( - val t <- term1; - val ts <- rep(term1)) - yield (t /: ts)((f, arg) => App(f, arg)) ); - - /** term1 = ident | '(' term ')' */ - def term1: Parser[Term] = - ( for (val s <- ident) - yield Var(s): Term ) - ||| - ( for ( - val _ <- wschr('('); - val t <- term; - val _ <- wschr(')')) - yield t ); - - /** all = term ';' */ - def all: Parser[Term] = - for ( - val t <- term; - val _ <- wschr(';')) - yield t; -} -\end{lstlisting} -Here are some sample MiniML programs and the output the type inferencer gives for each of them: -\begin{lstlisting} -> java testInfer -| "\x.\f.f(f x);" -(\x.(\f.(f (f x)))): (a8->((a8->a8)->a8)) - -> java testInfer -| "let id = \x.x -| in if (id true) (id nil) (id (cons zero nil));" -let id = (\x.x) in (((if (id true)) (id nil)) (id ((cons zero) nil))): List[Int] - -> java testInfer -| "let id = \x.x -| in if (id true) (id nil);" -let id = (\x.x) in ((if (id true)) (id nil)): (List[a13]->List[a13]) - -> java testInfer -| "let length = fix (\len.\xs. -| if (isEmpty xs) -| zero -| (succ (len (tail xs)))) -| in (length nil);" -let length = (fix (\len.(\xs.(((if (isEmpty xs)) zero) -(succ (len (tail xs))))))) in (length nil): Int - -> java testInfer -| "let id = \x.x -| in if (id true) (id nil) zero;" -let id = (\x.x) in (((if (id true)) (id nil)) zero): - cannot type: zero - reason: cannot unify Int with List[a14] -\end{lstlisting} - -\begin{exercise}\label{exercise:hm-parse} Using the parser library constructed in -Exercise~\ref{exercise:end-marker}, modify the MiniML parser library -so that no marker ``;'' is necessary for indicating the end of input. -\end{exercise} - -\begin{exercise}\label{execcise:hm-extend} Extend the Mini-ML parser and type -inferencer with a \code{letrec} construct which allows the definition of -recursive functions. Syntax: -\begin{lstlisting} -letrec ident "=" term in term . -\end{lstlisting} -The typing of \code{letrec} is as for \code{let}, -except that the defined identifier is visible in the defining expression. Using \code{letrec}, the \code{length} function for lists can now be defined as follows. -\begin{lstlisting} -letrec length = \xs. - if (isEmpty xs) - zero - (succ (length (tail xs))) -in ... -\end{lstlisting} -\end{exercise} - -\chapter{Abstractions for Concurrency}\label{sec:ex-concurrency} - -This section reviews common concurrent programming patterns and shows -how they can be implemented in Scala. - -\section{Signals and Monitors} - -\example -The {\em monitor} provides the basic means for mutual exclusion -of processes in Scala. Every instance of class \code{AnyRef} can be -used as a monitor by calling one or more of the methods below. -\begin{lstlisting} - def synchronized[a] (e: => a): a; - def wait(): unit; - def wait(msec: long): unit; - def notify(): unit; - def notifyAll(): unit; -\end{lstlisting} -The \code{synchronized} method executes its argument computation -\code{e} in mutual exclusive mode -- at any one time, only one thread -can execute a \code{synchronized} argument of a given monitor. - -Threads can suspend inside a monitor by waiting on a signal. Threads -that call the \code{wait} method wait until a \code{notify} method of -the same object is called subsequently by some other thread. Calls to -\code{notify} with no threads waiting for the signal are ignored. - -There is also a timed form of \code{wait}, which blocks only as long -as no signal was received or the specified amount of time (given in -milliseconds) has elapsed. Furthermore, there is a \code{notifyAll} -method which unblocks all threads which wait for the signal. These -methods, as well as class \code{Monitor} are primitive in Scala; they -are implemented in terms of the underlying runtime system. - -Typically, a thread waits for some condition to be established. If the -condition does not hold at the time of the wait call, the thread -blocks until some other thread has established the condition. It is -the responsibility of this other thread to wake up waiting processes -by issuing a \code{notify} or \code{notifyAll}. Note however, that -there is no guarantee that a waiting process gets to run immediately -after the call to notify is issued. It could be that other processes -get to run first which invalidate the condition again. Therefore, the -correct form of waiting for a condition $C$ uses a while loop: -\begin{lstlisting} -while (!$C$) wait(); -\end{lstlisting} - -As an example of how monitors are used, here is is an implementation -of a bounded buffer class. -\begin{lstlisting} -class BoundedBuffer[a](N: Int) { - var in = 0, out = 0, n = 0; - val elems = new Array[a](N); - - def put(x: a) = synchronized { - while (n >= N) wait(); - elems(in) = x ; in = (in + 1) % N ; n = n + 1; - if (n == 1) notifyAll(); - } - - def get: a = synchronized { - while (n == 0) wait(); - val x = elems(out) ; out = (out + 1) % N ; n = n - 1; - if (n == N - 1) notifyAll(); - x - } -} -\end{lstlisting} -And here is a program using a bounded buffer to communicate between a -producer and a consumer process. -\begin{lstlisting} -import scala.concurrent.ops._; -... -val buf = new BoundedBuffer[String](10); -spawn { while (true) { val s = produceString ; buf.put(s) } } -spawn { while (true) { val s = buf.get ; consumeString(s) } } -} -\end{lstlisting} -The \code{spawn} method spawns a new thread which executes the -expression given in the parameter. It is defined in object \code{concurrent.ops} -as follows. -\begin{lstlisting} -def spawn(p: => unit) = { - val t = new Thread() { override def run() = p; } - t.start() -} -\end{lstlisting} - -\comment{ -\section{Logic Variable} - -A logic variable (or lvar for short) offers operations \code{:=} -and \code{value} to define the variable and to retrieve its value. -Variables can be \code{define}d only once. A call to \code{value} -blocks until the variable has been defined. - -Logic variables can be implemented as follows. - -\begin{lstlisting} -class LVar[a] { - private val defined = new Signal - private var isDefined: boolean = false - private var v: a - def value = synchronized { - if (!isDefined) defined.wait - v - } - def :=(x: a) = synchronized { - v = x ; isDefined = true ; defined.send - } -} -\end{lstlisting} -} - -\section{SyncVars} - -A synchronized variable (or syncvar for short) offers \code{get} and -\code{put} operations to read and set the variable. \code{get} operations -block until the variable has been defined. An \code{unset} operation -resets the variable to undefined state. - -Here's the standard implementation of synchronized variables. -\begin{lstlisting} -package scala.concurrent; -class SyncVar[a] { - private var isDefined: Boolean = false; - private var value: a = _; - def get = synchronized { - if (!isDefined) wait(); - value - } - def set(x: a) = synchronized { - value = x ; isDefined = true ; notifyAll(); - } - def isSet: Boolean = synchronized { - isDefined; - } - def unset = synchronized { - isDefined = false; - } -} -\end{lstlisting} - -\section{Futures} -\label{sec:futures} - -A {\em future} is a value which is computed in parallel to some other -client thread, to be used by the client thread at some future time. -Futures are used in order to make good use of parallel processing -resources. A typical usage is: - -\begin{lstlisting} -import scala.concurrent.ops._; -... -val x = future(someLengthyComputation); -anotherLengthyComputation; -val y = f(x()) + g(x()); -\end{lstlisting} - -The \code{future} method is defined in object -\code{scala.concurrent.ops} as follows. -\begin{lstlisting} -def future[a](p: => a): unit => a = { - val result = new SyncVar[a]; - fork { result.set(p) } - (() => result.get) -} -\end{lstlisting} - -The \code{future} method gets as parameter a computation \code{p} to -be performed. The type of the computation is arbitrary; it is -represented by \code{future}'s type parameter \code{a}. The -\code{future} method defines a guard \code{result}, which takes a -parameter representing the result of the computation. It then forks -off a new thread that computes the result and invokes the -\code{result} guard when it is finished. In parallel to this thread, -the function returns an anonymous function of type \code{a}. -When called, this functions waits on the result guard to be -invoked, and, once this happens returns the result argument. -At the same time, the function reinvokes the \code{result} guard with -the same argument, so that future invocations of the function can -return the result immediately. - -\section{Parallel Computations} - -The next example presents a function \code{par} which takes a pair of -computations as parameters and which returns the results of the computations -in another pair. The two computations are performed in parallel. - -The function is defined in object -\code{scala.concurrent.ops} as follows. -\begin{lstlisting} - def par[a, b](xp: => a, yp: => b): Pair[a, b] = { - val y = new SyncVar[b]; - spawn { y set yp } - Pair(xp, y.get) - } -\end{lstlisting} -Defined in the same place is a function \code{replicate} which performs a -number of replicates of a computation in parallel. Each -replication instance is passed an integer number which identifies it. -\begin{lstlisting} - def replicate(start: Int, end: Int)(p: Int => Unit): Unit = { - if (start == end) - () - else if (start + 1 == end) - p(start) - else { - val mid = (start + end) / 2; - spawn { replicate(start, mid)(p) } - replicate(mid, end)(p) - } - } -\end{lstlisting} - -The next function uses \code{replicate} to perform parallel -computations on all elements of an array. - -\begin{lstlisting} -def parMap[a,b](f: a => b, xs: Array[a]): Array[b] = { - val results = new Array[b](xs.length); - replicate(0, xs.length) { i => results(i) = f(xs(i)) } - results -} -\end{lstlisting} - -\section{Semaphores} - -A common mechanism for process synchronization is a {\em lock} (or: -{\em semaphore}). A lock offers two atomic actions: \prog{acquire} and -\prog{release}. Here's the implementation of a lock in Scala: - -\begin{lstlisting} -package scala.concurrent; - -class Lock { - var available = true; - def acquire = synchronized { - if (!available) wait(); - available = false - } - def release = synchronized { - available = true; - notify() - } -} -\end{lstlisting} - -\section{Readers/Writers} - -A more complex form of synchronization distinguishes between {\em -readers} which access a common resource without modifying it and {\em -writers} which can both access and modify it. To synchronize readers -and writers we need to implement operations \prog{startRead}, \prog{startWrite}, -\prog{endRead}, \prog{endWrite}, such that: -\begin{itemize} -\item there can be multiple concurrent readers, -\item there can only be one writer at one time, -\item pending write requests have priority over pending read requests, -but don't preempt ongoing read operations. -\end{itemize} -The following implementation of a readers/writers lock is based on the -{\em mailbox} concept (see Section~\ref{sec:mailbox}). - -\begin{lstlisting} -import scala.concurrent._; - -class ReadersWriters { - val m = new MailBox; - private case class Writers(n: int), Readers(n: int) { m send this; }; - Writers(0); Readers(0); - def startRead = m receive { - case Writers(n) if n == 0 => m receive { - case Readers(n) => Writers(0) ; Readers(n+1); - } - } - def startWrite = m receive { - case Writers(n) => - Writers(n+1); - m receive { case Readers(n) if n == 0 => } - } - def endRead = m receive { - case Readers(n) => Readers(n-1) - } - def endWrite = m receive { - case Writers(n) => Writers(n-1) ; if (n == 0) Readers(0) - } -} -\end{lstlisting} - -\section{Asynchronous Channels} - -A fundamental way of interprocess communication is the asynchronous -channel. Its implementation makes use the following simple class for linked -lists: -\begin{lstlisting} -class LinkedList[a] { - var elem: a = _; - var next: LinkedList[a] = null; -} -\end{lstlisting} -To facilitate insertion and deletion of elements into linked lists, -every reference into a linked list points to the node which precedes -the node which conceptually forms the top of the list. -Empty linked lists start with a dummy node, whose successor is \code{null}. - -The channel class uses a linked list to store data that has been sent -but not read yet. At the opposite end, threads that -wish to read from an empty channel, register their presence by -incrementing the \code{nreaders} field and waiting to be notified. -\begin{lstlisting} -package scala.concurrent; - -class Channel[a] { - class LinkedList[a] { - var elem: a = _; - var next: LinkedList[a] = null; - } - private var written = new LinkedList[a]; - private var lastWritten = written; - private var nreaders = 0; - - def write(x: a) = synchronized { - lastWritten.elem = x; - lastWritten.next = new LinkedList[a]; - lastWritten = lastWritten.next; - if (nreaders > 0) notify(); - } - - def read: a = synchronized { - if (written.next == null) { - nreaders = nreaders + 1; wait(); nreaders = nreaders - 1; - } - val x = written.elem; - written = written.next; - x - } -} -\end{lstlisting} - -\section{Synchronous Channels} - -Here's an implementation of synchronous channels, where the sender of -a message blocks until that message has been received. Synchronous -channels only need a single variable to store messages in transit, but -three signals are used to coordinate reader and writer processes. -\begin{lstlisting} -package scala.concurrent; - -class SyncChannel[a] { - private var data: a = _; - private var reading = false; - private var writing = false; - - def write(x: a) = synchronized { - while (writing) wait(); - data = x; - writing = true; - if (reading) notifyAll(); - else while (!reading) wait(); - } - - def read: a = synchronized { - while (reading) wait(); - reading = true; - while (!writing) wait(); - val x = data; - writing = false; - reading = false; - notifyAll(); - x - } -} -\end{lstlisting} - -\section{Workers} - -Here's an implementation of a {\em compute server} in Scala. The -server implements a \code{future} method which evaluates a given -expression in parallel with its caller. Unlike the implementation in -Section~\ref{sec:futures} the server computes futures only with a -predefined number of threads. A possible implementation of the server -could run each thread on a separate processor, and could hence avoid -the overhead inherent in context-switching several threads on a single -processor. - -\begin{lstlisting} -import scala.concurrent._, scala.concurrent.ops._; - -class ComputeServer(n: Int) { - - private trait Job { - type t; - def task: t; - def ret(x: t): Unit; - } - - private val openJobs = new Channel[Job](); - - private def processor(i: Int): Unit = { - while (true) { - val job = openJobs.read; - job.ret(job.task) - } - } - - def future[a](p: => a): () => a = { - val reply = new SyncVar[a](); - openJobs.write{ - new Job { - type t = a; - def task = p; - def ret(x: a) = reply.set(x); - } - } - () => reply.get - } - - spawn(replicate(0, n) { processor }) -} -\end{lstlisting} -Expressions to be computed (i.e. arguments -to calls of \code{future}) are written to the \code{openJobs} -channel. A {\em job} is an object with -\begin{itemize} -\item -An abstract type \code{t} which describes the result of the compute -job. -\item -A parameterless \code{task} method of type \code{t} which denotes -the expression to be computed. -\item -A \code{return} method which consumes the result once it is -computed. -\end{itemize} -The compute server creates $n$ \code{processor} processes as part of -its initialization. Every such process repeatedly consumes an open -job, evaluates the job's \code{task} method and passes the result on -to the job's -\code{return} method. The polymorphic \code{future} method creates -a new job where the \code{return} method is implemented by a guard -named \code{reply} and inserts this job into the set of open jobs by -calling the \code{isOpen} guard. It then waits until the corresponding -\code{reply} guard is called. - -The example demonstrates the use of abstract types. The abstract type -\code{t} keeps track of the result type of a job, which can vary -between different jobs. Without abstract types it would be impossible -to implement the same class to the user in a statically type-safe -way, without relying on dynamic type tests and type casts. - - -Here is some code which uses the compute server to evaluate -the expression \code{41 + 1}. -\begin{lstlisting} -object Test with Executable { - val server = new ComputeServer(1); - val f = server.future(41 + 1); - Console.println(f()) -} -\end{lstlisting} - -\section{Mailboxes} -\label{sec:mailbox} - -Mailboxes are high-level, flexible constructs for process -synchronization and communication. They allow sending and receiving of -messages. A {\em message} in this context is an arbitrary object. -There is a special message \code{TIMEOUT} which is used to signal a -time-out. -\begin{lstlisting} -case object TIMEOUT; -\end{lstlisting} -Mailboxes implement the following signature. -\begin{lstlisting} -class MailBox { - def send(msg: Any): unit; - def receive[a](f: PartialFunction[Any, a]): a; - def receiveWithin[a](msec: long)(f: PartialFunction[Any, a]): a; -} -\end{lstlisting} -The state of a mailbox consists of a multi-set of messages. -Messages are added to the mailbox the \code{send} method. Messages -are removed using the \code{receive} method, which is passed a message -processor \code{f} as argument, which is a partial function from -messages to some arbitrary result type. Typically, this function is -implemented as a pattern matching expression. The \code{receive} -method blocks until there is a message in the mailbox for which its -message processor is defined. The matching message is then removed -from the mailbox and the blocked thread is restarted by applying the -message processor to the message. Both sent messages and receivers are -ordered in time. A receiver $r$ is applied to a matching message $m$ -only if there is no other (message, receiver) pair which precedes $(m, -r)$ in the partial ordering on pairs that orders each component in -time. - -As a simple example of how mailboxes are used, consider a -one-place buffer: -\begin{lstlisting} -class OnePlaceBuffer { - private val m = new MailBox; // An internal mailbox - private case class Empty, Full(x: int); // Types of messages we deal with - m send Empty; // Initialization - def write(x: int): unit = - m receive { case Empty => m send Full(x) } - def read: int = - m receive { case Full(x) => m send Empty ; x } -} -\end{lstlisting} -Here's how the mailbox class can be implemented: -\begin{lstlisting} -class MailBox { - private abstract class Receiver extends Signal { - def isDefined(msg: Any): boolean; - var msg = null; - } -\end{lstlisting} -We define an internal class for receivers with a test method -\code{isDefined}, which indicates whether the receiver is -defined for a given message. The receiver inherits from class -\code{Signal} a \code{notify} method which is used to wake up a -receiver thread. When the receiver thread is woken up, the message it -needs to be applied to is stored in the \code{msg} variable of -\code{Receiver}. -\begin{lstlisting} - private val sent = new LinkedList[Any]; - private var lastSent = sent; - private val receivers = new LinkedList[Receiver]; - private var lastReceiver = receivers; -\end{lstlisting} -The mailbox class maintains two linked lists, -one for sent but unconsumed messages, the other for waiting receivers. -\begin{lstlisting} - def send(msg: Any): unit = synchronized { - var r = receivers, r1 = r.next; - while (r1 != null && !r1.elem.isDefined(msg)) { - r = r1; r1 = r1.next; - } - if (r1 != null) { - r.next = r1.next; r1.elem.msg = msg; r1.elem.notify; - } else { - lastSent = insert(lastSent, msg); - } - } -\end{lstlisting} -The \code{send} method first checks whether a waiting receiver is -applicable to the sent message. If yes, the receiver is notified. -Otherwise, the message is appended to the linked list of sent messages. -\begin{lstlisting} - def receive[a](f: PartialFunction[Any, a]): a = { - val msg: Any = synchronized { - var s = sent, s1 = s.next; - while (s1 != null && !f.isDefinedAt(s1.elem)) { - s = s1; s1 = s1.next - } - if (s1 != null) { - s.next = s1.next; s1.elem - } else { - val r = insert(lastReceiver, new Receiver { - def isDefined(msg: Any) = f.isDefinedAt(msg); - }); - lastReceiver = r; - r.elem.wait(); - r.elem.msg - } - } - f(msg) - } -\end{lstlisting} -The \code{receive} method first checks whether the message processor function -\code{f} can be applied to a message that has already been sent but that -was not yet consumed. If yes, the thread continues immediately by -applying \code{f} to the message. Otherwise, a new receiver is created -and linked into the \code{receivers} list, and the thread waits for a -notification on this receiver. Once the thread is woken up again, it -continues by applying \code{f} to the message that was stored in the -receiver. The insert method on linked lists is defined as follows. -\begin{lstlisting} - def insert(l: LinkedList[a], x: a): LinkedList[a] = { - l.next = new LinkedList[a]; - l.next.elem = x; - l.next.next = l.next; - l - } -\end{lstlisting} -The mailbox class also offers a method \code{receiveWithin} -which blocks for only a specified maximal amount of time. If no -message is received within the specified time interval (given in -milliseconds), the message processor argument $f$ will be unblocked -with the special \code{TIMEOUT} message. The implementation of -\code{receiveWithin} is quite similar to \code{receive}: -\begin{lstlisting} - def receiveWithin[a](msec: long)(f: PartialFunction[Any, a]): a = { - val msg: Any = synchronized { - var s = sent, s1 = s.next; - while (s1 != null && !f.isDefinedAt(s1.elem)) { - s = s1; s1 = s1.next ; - } - if (s1 != null) { - s.next = s1.next; s1.elem - } else { - val r = insert(lastReceiver, new Receiver { - def isDefined(msg: Any) = f.isDefinedAt(msg); - }); - lastReceiver = r; - r.elem.wait(msec); - if (r.elem.msg == null) r.elem.msg = TIMEOUT; - r.elem.msg - } - } - f(msg) - } -} // end MailBox -\end{lstlisting} -The only differences are the timed call to \code{wait}, and the -statement following it. - -\section{Actors} -\label{sec:actors} - -Chapter~\ref{chap:example-auction} sketched as a program example the -implementation of an electronic auction service. This service was -based on high-level actor processes, that work by inspecting messages -in their mailbox using pattern matching. An actor is simply a thread -whose communication primitives are those of a mailbox. Actors are -hence defined as a mixin composition extension of Java's standard -\code{Thread} class with the \code{MailBox} class. -\begin{lstlisting} -abstract class Actor extends Thread with MailBox; -\end{lstlisting} - -\comment{ -As an extended example of an application that uses actors, we come -back to the auction server example of Section~\ref{sec:ex-auction}. -The following code implements: - -\begin{figure}[thb] -\begin{lstlisting} -class AuctionMessage; -case class - Offer(bid: int, client: Process), // make a bid - Inquire(client: Process) extends AuctionMessage // inquire status - -class AuctionReply; -case class - Status(asked; int, expiration: Date), // asked sum, expiration date - BestOffer, // yours is the best offer - BeatenOffer(maxBid: int), // offer beaten by maxBid - AuctionConcluded(seller: Process, client: Process),// auction concluded - AuctionFailed // failed with no bids - AuctionOver extends AuctionReply // bidding is closed -\end{lstlisting} -\end{figure} - -\begin{lstlisting} -class Auction(seller: Process, minBid: int, closing: Date) - extends Process { - - val timeToShutdown = 36000000 // msec - val delta = 10 // bid increment -\end{lstlisting} -\begin{lstlisting} - override def run = { - var askedBid = minBid - var maxBidder: Process = null - while (true) { - receiveWithin ((closing - Date.currentDate).msec) { - case Offer(bid, client) => { - if (bid >= askedBid) { - if (maxBidder != null && maxBidder != client) { - maxBidder send BeatenOffer(bid) - } - maxBidder = client - askedBid = bid + delta - client send BestOffer - } else client send BeatenOffer(maxBid) - } -\end{lstlisting} -\begin{lstlisting} - case Inquire(client) => { - client send Status(askedBid, closing) - } -\end{lstlisting} -\begin{lstlisting} - case TIMEOUT => { - if (maxBidder != null) { - val reply = AuctionConcluded(seller, maxBidder) - maxBidder send reply - seller send reply - } else seller send AuctionFailed - receiveWithin (timeToShutdown) { - case Offer(_, client) => client send AuctionOver ; discardAndContinue - case _ => discardAndContinue - case TIMEOUT => stop - } - } -\end{lstlisting} -\begin{lstlisting} - case _ => discardAndContinue - } - } - } -\end{lstlisting} -\begin{lstlisting} - def houseKeeping: int = { - val Limit = 100 - var nWaiting: int = 0 - receiveWithin(0) { - case _ => - nWaiting = nWaiting + 1 - if (nWaiting > Limit) { - receiveWithin(0) { - case Offer(_, _) => continue - case TIMEOUT => - case _ => discardAndContinue - } - } else continue - case TIMEOUT => - } - } -} -\end{lstlisting} -\begin{lstlisting} -class Bidder (auction: Process, minBid: int, maxBid: int) - extends Process { - val MaxTries = 3 - val Unknown = -1 - - var nextBid = Unknown -\end{lstlisting} -\begin{lstlisting} - def getAuctionStatus = { - var nTries = 0 - while (nextBid == Unknown && nTries < MaxTries) { - auction send Inquiry(this) - nTries = nTries + 1 - receiveWithin(waitTime) { - case Status(bid, _) => bid match { - case None => nextBid = minBid - case Some(curBid) => nextBid = curBid + Delta - } - case TIMEOUT => - case _ => continue - } - } - status - } -\end{lstlisting} -\begin{lstlisting} - def bid: unit = { - if (nextBid < maxBid) { - auction send Offer(nextBid, this) - receive { - case BestOffer => - receive { - case BeatenOffer(bestBid) => - nextBid = bestBid + Delta - bid - case AuctionConcluded(seller, client) => - transferPayment(seller, nextBid) - case _ => continue - } - - case BeatenOffer(bestBid) => - nextBid = nextBid + Delta - bid - - case AuctionOver => - - case _ => continue - } - } - } -\end{lstlisting} -\begin{lstlisting} - override def run = { - getAuctionStatus - if (nextBid != Unknown) bid - } - - def transferPayment(seller: Process, amount: int) -} -\end{lstlisting} -} |