From ab0e09bb44567a19690529c03cb388295ce5d338 Mon Sep 17 00:00:00 2001 From: Alexander Clare Date: Thu, 12 Jul 2012 07:59:42 -0500 Subject: SI-5906 Search for sorted sequences Augments sequence classes with search functionality, using binary search (comparable to that found in java.util.Collections) for indexed sequences and linear search for others. --- src/library/scala/collection/Searching.scala | 100 +++++++++++++++++++++ .../scala/collection/generic/IsSeqLike.scala | 38 ++++++++ test/files/run/search.check | 6 ++ test/files/run/search.scala | 14 +++ 4 files changed, 158 insertions(+) create mode 100644 src/library/scala/collection/Searching.scala create mode 100644 src/library/scala/collection/generic/IsSeqLike.scala create mode 100644 test/files/run/search.check create mode 100644 test/files/run/search.scala diff --git a/src/library/scala/collection/Searching.scala b/src/library/scala/collection/Searching.scala new file mode 100644 index 0000000000..d62421b486 --- /dev/null +++ b/src/library/scala/collection/Searching.scala @@ -0,0 +1,100 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2012, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala.collection + +import scala.annotation.tailrec +import scala.collection.generic.IsSeqLike +import scala.math.Ordering + +/** A collection of wrappers that provide sequence classes with search functionality. + * + * Example usage: + * {{{ + * import scala.collection.Searching._ + * val l = List(1, 2, 3, 4, 5) + * l.search(3) + * // == 2 + * }}} + */ +object Searching { + class SearchImpl[A, Repr](val coll: SeqLike[A, Repr]) { + /** Search the sorted sequence for a specific element. + * + * The sequence should be sorted with the same `Ordering` before calling; otherwise, + * the results are undefined. + * + * @see [[scala.math.Ordering]] + * @see [[scala.collection.SeqLike]], method `sorted` + * + * @param elem the element to find. + * @param ord the ordering to be used to compare elements. + * @return a `Right` value containing the index corresponding to the element in the + * $coll, or a `Left` value containing the index where the element would be + * inserted if the element is not in the $coll. + */ + final def search[B >: A](elem: B)(implicit ord: Ordering[B]): Either[Int, Int] = + coll match { + case _: IndexedSeqLike[A, Repr] => binarySearch(elem, -1, coll.length)(ord) + case _ => linearSearch(coll.view, elem, 0)(ord) + } + + /** Search within an interval in the sorted sequence for a specific element. + * + * The sequence should be sorted with the same `Ordering` before calling; otherwise, + * the results are undefined. + * + * @see [[scala.math.Ordering]] + * @see [[scala.collection.SeqLike]], method `sorted` + * + * @param elem the element to find. + * @param from the index where the search starts. + * @param to the index following where the search ends. + * @param ord the ordering to be used to compare elements. + * @return a `Right` value containing the index corresponding to the element in the + * $coll, or a `Left` value containing the index where the element would be + * inserted if the element is not in the $coll. + */ + final def search[B >: A](elem: B, from: Int, to: Int) + (implicit ord: Ordering[B]): Either[Int, Int] = + coll match { + case _: IndexedSeqLike[A, Repr] => binarySearch(elem, from-1, to)(ord) + case _ => linearSearch(coll.view(from, to), elem, from)(ord) + } + + @tailrec + private def binarySearch[B >: A](elem: B, from: Int, to: Int) + (implicit ord: Ordering[B]): Either[Int, Int] = { + if ((to-from) == 1) Left(from) else { + val idx = (to+from)/2 + math.signum(ord.compare(elem, coll(idx))) match { + case -1 => binarySearch(elem, from, idx)(ord) + case 1 => binarySearch(elem, idx, to)(ord) + case _ => Right(idx) + } + } + } + + private def linearSearch[B >: A](c: SeqView[A, Repr], elem: B, offset: Int) + (implicit ord: Ordering[B]): Either[Int, Int] = { + var idx = offset + val it = c.iterator + while (it.hasNext) { + val cur = it.next() + if (ord.equiv(elem, cur)) return Right(idx) + else if (ord.lt(elem, cur)) return Left(idx-1) + idx += 1 + } + Left(idx) + } + + } + + implicit def search[Repr, A](coll: Repr) + (implicit fr: IsSeqLike[Repr]): SearchImpl[fr.A, Repr] = new SearchImpl(fr.conversion(coll)) +} diff --git a/src/library/scala/collection/generic/IsSeqLike.scala b/src/library/scala/collection/generic/IsSeqLike.scala new file mode 100644 index 0000000000..47e2924d34 --- /dev/null +++ b/src/library/scala/collection/generic/IsSeqLike.scala @@ -0,0 +1,38 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003-2012, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +\* */ + +package scala.collection +package generic + +/** Type class witnessing that a collection representation type `Repr` has + * elements of type `A` and has a conversion to `SeqLike[A, Repr]`. + * + * @see [[scala.collection.generic.IsTraversableLike]] + */ +trait IsSeqLike[Repr] { + /** The type of elements we can traverse over. */ + type A + /** A conversion from the representation type `Repr` to a `SeqLike[A,Repr]`. */ + val conversion: Repr => SeqLike[A, Repr] +} + +object IsSeqLike { + import language.higherKinds + + implicit val stringRepr: IsSeqLike[String] { type A = Char } = + new IsSeqLike[String] { + type A = Char + val conversion = implicitly[String => SeqLike[Char, String]] + } + + implicit def seqLikeRepr[C[_], A0](implicit conv: C[A0] => SeqLike[A0,C[A0]]): IsSeqLike[C[A0]] { type A = A0 } = + new IsSeqLike[C[A0]] { + type A = A0 + val conversion = conv + } +} diff --git a/test/files/run/search.check b/test/files/run/search.check new file mode 100644 index 0000000000..3dc3c9d369 --- /dev/null +++ b/test/files/run/search.check @@ -0,0 +1,6 @@ +Right(2) +Right(4) +Left(9) +Right(2) +Right(4) +Left(9) diff --git a/test/files/run/search.scala b/test/files/run/search.scala new file mode 100644 index 0000000000..1e57fa2bf1 --- /dev/null +++ b/test/files/run/search.scala @@ -0,0 +1,14 @@ +object Test extends App { + import scala.collection.{LinearSeq, IndexedSeq} + import scala.collection.Searching._ + + val ls = LinearSeq(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13) + println(ls.search(3)) + println(ls.search(5, 3, 8)) + println(ls.search(12)) + + val is = IndexedSeq(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13) + println(is.search(3)) + println(is.search(5, 3, 8)) + println(is.search(12)) +} -- cgit v1.2.3 From 41869c39ba68ef574595533777d2a99fcabdbdc3 Mon Sep 17 00:00:00 2001 From: Alexander Clare Date: Mon, 16 Jul 2012 13:35:43 -0500 Subject: Changes suggested by @retronym and @jsuereth Change return type to case classes, branch between functions depending on IndexedSeq instead of IndexedSeqLike, and alter tests accordingly. Clean up doc comments and reflect changes in them. --- src/library/scala/collection/Searching.scala | 58 ++++++++++++++-------- .../scala/collection/generic/IsSeqLike.scala | 21 +++++++- test/files/run/search.check | 12 ++--- test/files/run/search.scala | 2 +- 4 files changed, 64 insertions(+), 29 deletions(-) diff --git a/src/library/scala/collection/Searching.scala b/src/library/scala/collection/Searching.scala index d62421b486..c1f7f4cae6 100644 --- a/src/library/scala/collection/Searching.scala +++ b/src/library/scala/collection/Searching.scala @@ -19,36 +19,51 @@ import scala.math.Ordering * import scala.collection.Searching._ * val l = List(1, 2, 3, 4, 5) * l.search(3) - * // == 2 + * // == Found(2) * }}} */ object Searching { + sealed abstract class SearchResult { + def insertionPoint: Int + } + + case class Found(foundIndex: Int) extends SearchResult { + override def insertionPoint = foundIndex + } + case class InsertionPoint(insertionPoint: Int) extends SearchResult + class SearchImpl[A, Repr](val coll: SeqLike[A, Repr]) { - /** Search the sorted sequence for a specific element. + /** Search the sorted sequence for a specific element. If the sequence is an + * `IndexedSeq`, a binary search is used. Otherwise, a linear search is used. * * The sequence should be sorted with the same `Ordering` before calling; otherwise, * the results are undefined. * + * @see [[scala.math.IndexedSeq]] * @see [[scala.math.Ordering]] * @see [[scala.collection.SeqLike]], method `sorted` * * @param elem the element to find. * @param ord the ordering to be used to compare elements. - * @return a `Right` value containing the index corresponding to the element in the - * $coll, or a `Left` value containing the index where the element would be - * inserted if the element is not in the $coll. + * + * @return a `Found` value containing the index corresponding to the element in the + * sequence, or the `InsertionPoint` where the element would be inserted if + * the element is not in the sequence. */ - final def search[B >: A](elem: B)(implicit ord: Ordering[B]): Either[Int, Int] = + final def search[B >: A](elem: B)(implicit ord: Ordering[B]): SearchResult = coll match { - case _: IndexedSeqLike[A, Repr] => binarySearch(elem, -1, coll.length)(ord) + case _: IndexedSeq[A] => binarySearch(elem, -1, coll.length)(ord) case _ => linearSearch(coll.view, elem, 0)(ord) } - /** Search within an interval in the sorted sequence for a specific element. + /** Search within an interval in the sorted sequence for a specific element. If the + * sequence is an IndexedSeq, a binary search is used. Otherwise, a linear search + * is used. * * The sequence should be sorted with the same `Ordering` before calling; otherwise, * the results are undefined. * + * @see [[scala.math.IndexedSeq]] * @see [[scala.math.Ordering]] * @see [[scala.collection.SeqLike]], method `sorted` * @@ -56,41 +71,42 @@ object Searching { * @param from the index where the search starts. * @param to the index following where the search ends. * @param ord the ordering to be used to compare elements. - * @return a `Right` value containing the index corresponding to the element in the - * $coll, or a `Left` value containing the index where the element would be - * inserted if the element is not in the $coll. + * + * @return a `Found` value containing the index corresponding to the element in the + * sequence, or the `InsertionPoint` where the element would be inserted if + * the element is not in the sequence. */ final def search[B >: A](elem: B, from: Int, to: Int) - (implicit ord: Ordering[B]): Either[Int, Int] = + (implicit ord: Ordering[B]): SearchResult = coll match { - case _: IndexedSeqLike[A, Repr] => binarySearch(elem, from-1, to)(ord) + case _: IndexedSeq[A] => binarySearch(elem, from-1, to)(ord) case _ => linearSearch(coll.view(from, to), elem, from)(ord) } @tailrec private def binarySearch[B >: A](elem: B, from: Int, to: Int) - (implicit ord: Ordering[B]): Either[Int, Int] = { - if ((to-from) == 1) Left(from) else { - val idx = (to+from)/2 + (implicit ord: Ordering[B]): SearchResult = { + if ((to-from) == 1) InsertionPoint(from) else { + val idx = from+(to-from)/2 math.signum(ord.compare(elem, coll(idx))) match { case -1 => binarySearch(elem, from, idx)(ord) case 1 => binarySearch(elem, idx, to)(ord) - case _ => Right(idx) + case _ => Found(idx) } } } private def linearSearch[B >: A](c: SeqView[A, Repr], elem: B, offset: Int) - (implicit ord: Ordering[B]): Either[Int, Int] = { + (implicit ord: Ordering[B]): SearchResult = { var idx = offset val it = c.iterator while (it.hasNext) { val cur = it.next() - if (ord.equiv(elem, cur)) return Right(idx) - else if (ord.lt(elem, cur)) return Left(idx-1) + if (ord.equiv(elem, cur)) return Found(idx) + else if (ord.lt(elem, cur)) return InsertionPoint(idx-1) idx += 1 } - Left(idx) + InsertionPoint(idx) } } diff --git a/src/library/scala/collection/generic/IsSeqLike.scala b/src/library/scala/collection/generic/IsSeqLike.scala index 47e2924d34..8eac025ed6 100644 --- a/src/library/scala/collection/generic/IsSeqLike.scala +++ b/src/library/scala/collection/generic/IsSeqLike.scala @@ -10,8 +10,27 @@ package scala.collection package generic /** Type class witnessing that a collection representation type `Repr` has - * elements of type `A` and has a conversion to `SeqLike[A, Repr]`. + * elements of type `A` and has a conversion to `SeqLike[A, Repr]`. * + * This type enables simple enrichment of `Seq`s with extension methods which + * can make full use of the mechanics of the Scala collections framework in + * their implementation. + * + * Example usage: + * {{{ + * class FilterMapImpl[A, Repr](val r: SeqLike[A, Repr]) { + * final def filterMap[B, That](f: A => Option[B])(implicit cbf: CanBuildFrom[Repr, B, That]): That = + * r.flatMap(f(_)) + * } + * implicit def filterMap[Repr, A](r: Repr)(implicit fr: IsSeqLike[Repr]): FilterMapImpl[fr.A,Repr] = + * new FilterMapImpl(fr.conversion(r)) + * + * val l = List(1, 2, 3, 4, 5) + * List(1, 2, 3, 4, 5) filterMap (i => if(i % 2 == 0) Some(i) else None) + * // == List(2, 4) + * }}} + * + * @see [[scala.collection.generic.Seq]] * @see [[scala.collection.generic.IsTraversableLike]] */ trait IsSeqLike[Repr] { diff --git a/test/files/run/search.check b/test/files/run/search.check index 3dc3c9d369..a885696509 100644 --- a/test/files/run/search.check +++ b/test/files/run/search.check @@ -1,6 +1,6 @@ -Right(2) -Right(4) -Left(9) -Right(2) -Right(4) -Left(9) +Found(2) +Found(4) +InsertionPoint(9) +Found(2) +Found(4) +InsertionPoint(9) diff --git a/test/files/run/search.scala b/test/files/run/search.scala index 1e57fa2bf1..ed7fed54a7 100644 --- a/test/files/run/search.scala +++ b/test/files/run/search.scala @@ -1,6 +1,6 @@ object Test extends App { import scala.collection.{LinearSeq, IndexedSeq} - import scala.collection.Searching._ + import scala.collection.Searching.search val ls = LinearSeq(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 13) println(ls.search(3)) -- cgit v1.2.3