From 4c4d3245252f4a86e9fe93fb37573b838098dd68 Mon Sep 17 00:00:00 2001 From: Rex Kerr Date: Tue, 10 Sep 2013 07:49:53 -0700 Subject: SI-7356 - Source.mkString performs painfully slow (...) 1. Wrote a custom mkString for BufferedSource. 2. Moved the logic for rescuing the iterator-buffered char out of BufferedLineIterator and into a private method in BufferedSource. Speed test based on the one in the issue tracker (but written correctly) indicates that performance is equal or better to getLines. This resolves SI-7356 in a minimal fashion. --- src/library/scala/io/BufferedSource.scala | 46 +++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/src/library/scala/io/BufferedSource.scala b/src/library/scala/io/BufferedSource.scala index c170d28127..832c7b23f9 100644 --- a/src/library/scala/io/BufferedSource.scala +++ b/src/library/scala/io/BufferedSource.scala @@ -8,9 +8,11 @@ package scala.io +import java.util.Arrays import java.io.{ InputStream, BufferedReader, InputStreamReader, PushbackReader } import Source.DefaultBufSize import scala.collection.{ Iterator, AbstractIterator } +import scala.collection.mutable.ArrayBuffer /** This object provides convenience methods to create an iterable * representation of a source file. @@ -39,8 +41,8 @@ class BufferedSource(inputStream: InputStream, bufferSize: Int)(implicit val cod takeWhile (_ != -1) map (_.toChar) ) - - class BufferedLineIterator extends AbstractIterator[String] with Iterator[String] { + + private def decachedReader: BufferedReader = { // Don't want to lose a buffered char sitting in iter either. Yes, // this is ridiculous, but if I can't get rid of Source, and all the // Iterator bits are designed into Source, and people create Sources @@ -48,18 +50,21 @@ class BufferedSource(inputStream: InputStream, bufferSize: Int)(implicit val cod // that calls hasNext to find out if they're empty, and that leads // to chars being buffered, and no, I don't work here, they left a // door unlocked. - private val lineReader: BufferedReader = { - // To avoid inflicting this silliness indiscriminately, we can - // skip it if the char reader was never created: and almost always - // it will not have been created, since getLines will be called - // immediately on the source. - if (charReaderCreated && iter.hasNext) { - val pb = new PushbackReader(charReader) - pb unread iter.next().toInt - new BufferedReader(pb, bufferSize) - } - else charReader + // To avoid inflicting this silliness indiscriminately, we can + // skip it if the char reader was never created: and almost always + // it will not have been created, since getLines will be called + // immediately on the source. + if (charReaderCreated && iter.hasNext) { + val pb = new PushbackReader(charReader) + pb unread iter.next().toInt + new BufferedReader(pb, bufferSize) } + else charReader + } + + + class BufferedLineIterator extends AbstractIterator[String] with Iterator[String] { + private val lineReader = decachedReader var nextLine: String = null override def hasNext = { @@ -79,5 +84,18 @@ class BufferedSource(inputStream: InputStream, bufferSize: Int)(implicit val cod } override def getLines(): Iterator[String] = new BufferedLineIterator + + /** Efficiently converts the entire remaining input into a string. */ + override def mkString = { + // Speed up slurping of whole data set in the simplest cases. + val allReader = decachedReader + val sb = new StringBuilder + val buf = new Array[Char](bufferSize) + var n = 0 + while (n != -1) { + n = charReader.read(buf) + if (n>0) sb.appendAll(buf, 0, n) + } + sb.result + } } - -- cgit v1.2.3