From eb71465d1d584685f360929d2fe083f7b72e3000 Mon Sep 17 00:00:00 2001 From: Burak Emir Date: Thu, 31 Jan 2008 00:05:44 +0000 Subject: buffering when reading from an inputstream --- src/library/scala/io/BufferedSource.scala | 93 +++++++++++++++++++++++++++++++ src/library/scala/io/Source.scala | 77 ++++++++++++++++--------- test/files/run/unittest_io.scala | 19 ++++++- 3 files changed, 162 insertions(+), 27 deletions(-) create mode 100644 src/library/scala/io/BufferedSource.scala diff --git a/src/library/scala/io/BufferedSource.scala b/src/library/scala/io/BufferedSource.scala new file mode 100644 index 0000000000..f2e766ab20 --- /dev/null +++ b/src/library/scala/io/BufferedSource.scala @@ -0,0 +1,93 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2008, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +// $Id$ +package scala.io + +import java.io.{BufferedInputStream, File, FileInputStream, InputStream, + PrintStream} +import java.nio.{ByteBuffer, CharBuffer} +import java.nio.channels.{ByteChannel, Channels, ReadableByteChannel} +import java.nio.charset.{Charset, CharsetDecoder} +import java.net.{URI, URL} + +object BufferedSource { + + /** same as fromInputStream(inpStream, Charset.forName(enc), buffer_size, do_reset) */ + def fromInputStream(inpStream: InputStream, enc: String, buffer_size: Int, do_reset: () => Source): BufferedSource = + fromInputStream(inpStream, Charset.forName(enc), buffer_size, do_reset) + + /** same as fromInputStream(inpStream, charSet.newDecoder(), buffer_size, do_reset) */ + def fromInputStream(inpStream: InputStream, charSet: Charset, buffer_size: Int, do_reset: () => Source): BufferedSource = + fromInputStream(inpStream, charSet.newDecoder(), buffer_size, do_reset) + + /** constructs a BufferedSource instance from an input stream, using given decoder */ + def fromInputStream(inpStream: InputStream, decoder: CharsetDecoder, buffer_size: Int, do_reset: () => Source): BufferedSource = { + val byteChannel = Channels.newChannel(inpStream) + return new BufferedSource(byteChannel, decoder) { + val buf_size = buffer_size + override def reset = do_reset() + def close { inpStream.close } + } + } +} + +/** This object provides convenience methods to create an iterable + * representation of a source file. + * + * @author Burak Emir + * @version 1.0, 19/08/2004 + */ +abstract class BufferedSource(byteChannel: ReadableByteChannel, decoder: CharsetDecoder) extends Source { + + val buf_size: Int + + def close: Unit + + val byteBuffer = ByteBuffer.allocate(buf_size) + var charBuffer = CharBuffer.allocate(buf_size) + byteBuffer.position(byteBuffer.limit()) + charBuffer.position(charBuffer.limit()) + decoder.reset() + var endOfInput = false + + def fillBuffer() = { + byteBuffer.compact() + charBuffer.position(0) + byteChannel.read(byteBuffer) match { + case -1 => + endOfInput = true; + byteBuffer.position(0) + decoder.decode(byteBuffer, charBuffer, true) + decoder.flush(charBuffer) + case num_bytes => + endOfInput = false + byteBuffer.flip() + decoder.decode(byteBuffer, charBuffer, false) + charBuffer.flip() + } + } + override val iter = new Iterator[Char] { + var buf_char = { + fillBuffer() + if (endOfInput) ' ' else charBuffer.get() + } + def hasNext = { charBuffer.remaining() > 0 || !endOfInput} + def next = { + val c = buf_char + if (charBuffer.remaining() == 0) { + fillBuffer() + } + if (!endOfInput) { + buf_char = charBuffer.get() + } + c + } + } +} + diff --git a/src/library/scala/io/Source.scala b/src/library/scala/io/Source.scala index a9b5b1f495..083e401348 100644 --- a/src/library/scala/io/Source.scala +++ b/src/library/scala/io/Source.scala @@ -14,6 +14,8 @@ package scala.io import java.io.{BufferedInputStream, File, FileInputStream, InputStream, PrintStream} +import java.nio.{ByteBuffer, CharBuffer} +import java.nio.charset.Charset import java.net.{URI, URL} /** This object provides convenience methods to create an iterable @@ -24,6 +26,10 @@ import java.net.{URI, URL} */ object Source { + val DefaultBufSize = 2048 + + val NoReset: () => Source = () => throw new UnsupportedOperationException() + /** Creates a Source instance from the given array of bytes, * with empty description. * @@ -84,8 +90,9 @@ object Source { /** creates Source from file with given name, setting its description to * filename. + * @deprecated use fromFile(name, enc) instead. */ - def fromFile(name: String): Source = + @deprecated def fromFile(name: String): Source = fromFile(new File(name)) /** creates Source from file with given name, using given encoding, setting @@ -95,14 +102,21 @@ object Source { fromFile(new File(name), enc) /** creates Source from file with given file: URI + * @deprecated use fromFile(uri, enc) instead. */ - def fromFile(uri: URI): Source = + @deprecated def fromFile(uri: URI): Source = fromFile(new File(uri)) + /** creates Source from file with given file: URI + */ + def fromFile(uri: URI, enc: String): Source = + fromFile(new File(uri), enc) + /** creates Source from file, using default character encoding, setting its * description to filename. + * @deprecated use fromFile(file, enc) instead. */ - def fromFile(file: File): Source = { + @deprecated def fromFile(file: File): Source = { val arr: Array[Byte] = new Array[Byte](file.length().asInstanceOf[Int]) val is = new FileInputStream(file) is.read(arr) @@ -110,26 +124,25 @@ object Source { return setFileDescriptor(file, s) } + /** same as fromFile(file, enc, Source.DefaultBufSize) + */ + def fromFile(file: File, enc: String): Source = + fromFile(file, enc, Source.DefaultBufSize) + /** Creates Source from file, using given character encoding, setting its - * description to filename. - * - * @param file ... - * @param enc ... - * @return ... + * description to filename. Input is buffered in a buffer of size + * buffer_size. */ - def fromFile(file: File, enc: String): Source = { - val arr: Array[Byte] = new Array[Byte](file.length().asInstanceOf[Int]) - val is = new FileInputStream(file) - is.read(arr) - val s = fromBytes(arr, enc) - s.descr = file.getName() - return setFileDescriptor(file, s) + def fromFile(file: File, enc: String, buffer_size: Int): Source = { + val inpStream = new FileInputStream(file) + return setFileDescriptor(file, + BufferedSource.fromInputStream(inpStream, enc, buffer_size, { () => fromFile(file, enc, buffer_size)})) } - /** - * @param file ... - * @param s ... - * @return ... + /** This method sets the descr property of the given source to a string of the form "file:"+path + * @param file the file whose path we want to describe + * @param s the source whose property we set + * @return s */ def setFileDescriptor(file: File, s: Source): Source = { s.descr = new StringBuilder("file:").append(file.getAbsolutePath()).toString(); @@ -139,13 +152,20 @@ object Source { /** * @param s ... * @return ... + * @deprecated use fromURL(s, enc) */ def fromURL(s: String): Source = fromURL(new URL(s)) + /** same as fromURL(new URL(s), enc) + */ + def fromURL(s: String, enc:String): Source = + fromURL(new URL(s), enc) + /** * @param url ... * @return ... + * @deprecated use fromURL(url, enc) */ def fromURL(url: URL): Source = { val it = new Iterator[Char] { @@ -164,6 +184,11 @@ object Source { s } + /** same as fromInputStream(url.openStream(), enc) + */ + def fromURL(url: URL, enc:String): Source = + fromInputStream(url.openStream(), enc) + /** reads data from inputstream into a byte array, and calls fromBytes with given encoding. * If maxlen is given, reads not more bytes than maxlen. if maxlen was not given, or was <= 0, then * whole inputstream is read and closed afterwards. @@ -172,10 +197,9 @@ object Source { * @param enc the encoding to apply to the bytes * @param maxlen optionally, a positive int specifying maximum number of bytes to read */ - def fromInputStream(istream: InputStream, enc: String, maxlen: Option[Int]): Source = { - val BUFSIZE = 1024 + @deprecated def fromInputStream(istream: InputStream, enc: String, maxlen: Option[Int]): Source = { val limit = maxlen match { case Some(i) => i; case None => 0 } - val bi = new BufferedInputStream(istream, BUFSIZE) + val bi = new BufferedInputStream(istream, Source.DefaultBufSize) val bytes = new collection.mutable.ArrayBuffer[Byte]() var b = 0 var i = 0 @@ -186,13 +210,14 @@ object Source { fromBytes(bytes.toArray, enc) } - /** same as fromInputStream(is, enc, None) */ + /** same as BufferedSource.fromInputStream(is, enc, Source.DefaultBufSize) + */ def fromInputStream(is: InputStream, enc: String): Source = - fromInputStream(is, enc, None) + BufferedSource.fromInputStream(is, enc, Source.DefaultBufSize, { () => fromInputStream(is, enc) }) - /** same as fromInputStream(is, "utf-8", None) */ + /** same as BufferedSource.fromInputStream(is, "utf-8", Source.DefaultBufSize) */ def fromInputStream(is: InputStream): Source = - fromInputStream(is, "utf-8", None) + BufferedSource.fromInputStream(is, "utf-8", Source.DefaultBufSize, { () => fromInputStream(is) }) } diff --git a/test/files/run/unittest_io.scala b/test/files/run/unittest_io.scala index 183f989df8..53f9b3b87c 100644 --- a/test/files/run/unittest_io.scala +++ b/test/files/run/unittest_io.scala @@ -1,7 +1,7 @@ import testing.SUnit._ object Test extends TestConsoleMain { - def suite = new TestSuite(new UTF8Tests()) + def suite = new TestSuite(new UTF8Tests, new SourceTest) class UTF8Tests extends TestCase("UTF8Codec") { import io.UTF8Codec.encode @@ -22,4 +22,21 @@ object Test extends TestConsoleMain { }) } } + + class SourceTest extends TestCase("Source") { + def runTest { + val s = "Here is a test string" + val f = io.Source.fromBytes(s.getBytes("utf-8")) + val b = new collection.mutable.ArrayBuffer[Char]() + f.copyToBuffer(b) + assertEquals(s, new String(b.toArray)) + + /* todo: same factories for BufferedSource and Source + val g = io.BufferedSource.fromBytes(s.getBytes("utf-8")) + val c = new collection.mutable.ArrayBuffer[Char]() + g.copyToBuffer(c) + assertEquals(s, new String(c.toArray)) + */ + } + } } -- cgit v1.2.3