From 6060a2984382ae52374c8a656af29fe88ef1d359 Mon Sep 17 00:00:00 2001 From: Paul Phillips Date: Tue, 23 Jun 2009 22:27:58 +0000 Subject: Some incremental steps on overhauling IO. scala.io.Codec for encapsulating charset encoding/decoding info, and then uses that class as the world's first default/implicit parameter: see BufferedSource.fromInputStream. --- src/dotnet-library/scala/io/Codec.scala | 1 + src/library/scala/io/BufferedSource.scala | 105 +++++++++++++----------------- src/library/scala/io/Codec.scala | 38 +++++++++++ src/library/scala/io/Source.scala | 22 +++---- 4 files changed, 94 insertions(+), 72 deletions(-) create mode 100644 src/dotnet-library/scala/io/Codec.scala create mode 100644 src/library/scala/io/Codec.scala diff --git a/src/dotnet-library/scala/io/Codec.scala b/src/dotnet-library/scala/io/Codec.scala new file mode 100644 index 0000000000..e291d7a90d --- /dev/null +++ b/src/dotnet-library/scala/io/Codec.scala @@ -0,0 +1 @@ +/* Codec.scala does not exist for the dotnet target */ \ No newline at end of file diff --git a/src/library/scala/io/BufferedSource.scala b/src/library/scala/io/BufferedSource.scala index 5926e9bd53..b6543688f8 100644 --- a/src/library/scala/io/BufferedSource.scala +++ b/src/library/scala/io/BufferedSource.scala @@ -11,29 +11,34 @@ package scala.io -import java.io.InputStream -import java.nio.{ByteBuffer, CharBuffer} -import java.nio.channels.{ByteChannel, Channels, ReadableByteChannel} -import java.nio.charset.{Charset, CharsetDecoder} +import java.io.{ InputStream, Reader, BufferedReader, InputStreamReader, IOException } +import java.nio.charset.{ Charset, CharsetDecoder, CodingErrorAction, CharacterCodingException, MalformedInputException } +import java.nio.channels.Channels +import Source._ -object BufferedSource { - - /** same as fromInputStream(inpStream, Charset.forName(enc), buffer_size, do_reset) */ - def fromInputStream(inpStream: InputStream, enc: String, buffer_size: Int, do_reset: () => Source): BufferedSource = - fromInputStream(inpStream, Charset.forName(enc), buffer_size, do_reset) - - /** same as fromInputStream(inpStream, charSet.newDecoder(), buffer_size, do_reset) */ - def fromInputStream(inpStream: InputStream, charSet: Charset, buffer_size: Int, do_reset: () => Source): BufferedSource = - fromInputStream(inpStream, charSet.newDecoder(), buffer_size, do_reset) - - /** constructs a BufferedSource instance from an input stream, using given decoder */ - def fromInputStream(inpStream: InputStream, decoder: CharsetDecoder, buffer_size: Int, do_reset: () => Source): BufferedSource = { - val byteChannel = Channels.newChannel(inpStream) - return new { - val buf_size = buffer_size - } with BufferedSource(byteChannel, decoder) { - override def reset = do_reset() - def close { inpStream.close } +object BufferedSource +{ + /** Reads data from inputStream with a buffered reader, + * using encoding in implicit parameter codec. + * + * @param inputStream the input stream from which to read + * @param bufferSize buffer size (defaults to BufferedSource.DefaultBufSize) + * @param reset a () => Source which resets the stream (defaults to Source.NoReset) + * @param codec (implicit) a scala.io.Codec specifying behavior (defaults to BufferedSource.defaultCodec) + * @return the buffered source + */ + def fromInputStream( + inputStream: InputStream, + bufferSize: Int = DefaultBufSize, + reset: () => Source = null + )(implicit codec: Codec = defaultCodec) = + { + if (reset == null) new BufferedSource(inputStream, bufferSize, codec) + else { + def _reset = reset + new BufferedSource(inputStream, bufferSize, codec) { + override def reset = _reset() + } } } } @@ -44,49 +49,31 @@ object BufferedSource { * @author Burak Emir * @version 1.0, 19/08/2004 */ -abstract class BufferedSource(byteChannel: ReadableByteChannel, decoder: CharsetDecoder) extends Source { - - val buf_size: Int - - def close: Unit - - val byteBuffer = ByteBuffer.allocate(buf_size) - var charBuffer = CharBuffer.allocate(buf_size) - byteBuffer.position(byteBuffer.limit()) - charBuffer.position(charBuffer.limit()) - decoder.reset() - var endOfInput = false +class BufferedSource( + inputStream: InputStream, + bufferSize: Int, + codec: Codec) +extends Source +{ + val decoder = codec.decoder + decoder.reset + decoder onMalformedInput codec.malformedAction + val reader = new BufferedReader(new InputStreamReader(inputStream, decoder), bufferSize) - def fillBuffer() = { - byteBuffer.compact() - charBuffer.compact() - var num_bytes = byteChannel.read(byteBuffer) - while (0 == num_bytes) { - Thread.sleep(1); // wait 1 ms for new data - num_bytes = byteChannel.read(byteBuffer) - } - endOfInput = (num_bytes == -1) - byteBuffer.flip() - decoder.decode(byteBuffer, charBuffer, endOfInput) - if (endOfInput) decoder.flush(charBuffer) - charBuffer.flip() - } override val iter = new Iterator[Char] { - var buf_char = { - fillBuffer() - if (endOfInput) ' ' else charBuffer.get() - } - def hasNext = { charBuffer.remaining() > 0 || !endOfInput} + private def getc(): Char = + try { reader.read().toChar } + catch { case e: CharacterCodingException => codec receivedMalformedInput e } + + var buf_char = getc + def hasNext = reader.ready() def next = { val c = buf_char - if (charBuffer.remaining() == 0) { - fillBuffer() - } - if (!endOfInput) { - buf_char = charBuffer.get() - } + if (hasNext) buf_char = getc c } } + def close: Unit = reader.close + def reset(): Source = NoReset() } diff --git a/src/library/scala/io/Codec.scala b/src/library/scala/io/Codec.scala new file mode 100644 index 0000000000..9ecf3f0b1b --- /dev/null +++ b/src/library/scala/io/Codec.scala @@ -0,0 +1,38 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2009, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +// $Id$ + +package scala.io + +import java.nio.charset.{ Charset, CharsetDecoder, CodingErrorAction } + +/** A class for character encoding/decoding preferences. + * + */ +class Codec(charSet: Charset) { + def decoder = charSet.newDecoder() + + // by default we replace bad chars with the decoder's replacement value (e.g. "?") + // this behavior can be altered by overriding these two methods + def malformedAction(): CodingErrorAction = CodingErrorAction.IGNORE + def receivedMalformedInput(e: Exception): Char = decoder.replacement()(0) +} + +object Codec { + def apply(encoding: String): Codec = new Codec(Charset forName encoding) + def apply(charSet: Charset): Codec = new Codec(charSet) + def apply(decoder: CharsetDecoder): Codec = { + val _decoder = decoder + new Codec(decoder.charset()) { override def decoder = _decoder } + } + + implicit def string2codec(s: String) = apply(s) + implicit def charset2codec(c: Charset) = apply(c) + implicit def decoder2codec(cd: CharsetDecoder) = apply(cd) +} \ No newline at end of file diff --git a/src/library/scala/io/Source.scala b/src/library/scala/io/Source.scala index b79de507f6..756cb66e8b 100644 --- a/src/library/scala/io/Source.scala +++ b/src/library/scala/io/Source.scala @@ -24,12 +24,13 @@ import java.nio.charset.Charset * @author Burak Emir * @version 1.0, 19/08/2004 */ -object Source { - +object Source +{ val DefaultBufSize = 2048 - val NoReset: () => Source = () => throw new UnsupportedOperationException() + def defaultCodec = Codec(util.Properties.encodingString) + /** Creates a Source instance from the given array of bytes, * with empty description. * @@ -129,7 +130,7 @@ object Source { val inpStream = new FileInputStream(file) val size = if (bufferSize > 0) bufferSize else Source.DefaultBufSize setFileDescriptor(file, - BufferedSource.fromInputStream(inpStream, enc, size, { () => fromFile(file, enc, size)})) + BufferedSource.fromInputStream(inpStream, size, () => fromFile(file, enc, size))(Codec(enc))) } /** This method sets the descr property of the given source to a string of the form "file:"+path @@ -195,7 +196,7 @@ object Source { @deprecated def fromInputStream(istream: InputStream, enc: String, maxlen: Option[Int]): Source = { val limit = maxlen match { case Some(i) => i; case None => 0 } - val bi = new BufferedInputStream(istream, Source.DefaultBufSize) + val bi = new BufferedInputStream(istream, DefaultBufSize) val bytes = new collection.mutable.ArrayBuffer[Byte]() var b = 0 var i = 0 @@ -206,15 +207,10 @@ object Source { fromBytes(bytes.toArray, enc) } - /** same as BufferedSource.fromInputStream(is, enc, Source.DefaultBufSize) + /** same as BufferedSource.fromInputStream(is) */ - def fromInputStream(is: InputStream, enc: String): Source = - BufferedSource.fromInputStream(is, enc, Source.DefaultBufSize, { () => fromInputStream(is, enc) }) - - /** same as BufferedSource.fromInputStream(is, "utf-8", Source.DefaultBufSize) */ - def fromInputStream(is: InputStream): Source = - BufferedSource.fromInputStream(is, "utf-8", Source.DefaultBufSize, { () => fromInputStream(is) }) - + def fromInputStream(is: InputStream, codec: Codec = defaultCodec): Source = + BufferedSource.fromInputStream(is, DefaultBufSize, () => fromInputStream(is, codec))(codec) } /** The class Source implements an iterable representation -- cgit v1.2.3