diff options
author | stenman <stenman@epfl.ch> | 2003-07-22 10:20:25 +0000 |
---|---|---|
committer | stenman <stenman@epfl.ch> | 2003-07-22 10:20:25 +0000 |
commit | 4ff4623f2ee14d974237d5524c72163f6cb0992e (patch) | |
tree | 7211f318b526f20325da7867237a594e90d5c09a | |
parent | e570d189e04924a7f2b59f4f859f642c43939a21 (diff) | |
download | scala-4ff4623f2ee14d974237d5524c72163f6cb0992e.tar.gz scala-4ff4623f2ee14d974237d5524c72163f6cb0992e.tar.bz2 scala-4ff4623f2ee14d974237d5524c72163f6cb0992e.zip |
GBTrees added to library
-rw-r--r-- | config/list/library.lst | 1 | ||||
-rwxr-xr-x | sources/scala/collection/immutable/GBTree.scala | 517 |
2 files changed, 518 insertions, 0 deletions
diff --git a/config/list/library.lst b/config/list/library.lst index 8e2557bd61..9ac97509bd 100644 --- a/config/list/library.lst +++ b/config/list/library.lst @@ -68,6 +68,7 @@ collection/immutable/ListSet.scala collection/immutable/Map.scala collection/immutable/Set.scala collection/immutable/Queue.scala +collection/immutable/GBTree.scala concurrent/Actor.scala concurrent/Channel.scala diff --git a/sources/scala/collection/immutable/GBTree.scala b/sources/scala/collection/immutable/GBTree.scala new file mode 100755 index 0000000000..ab80c655c2 --- /dev/null +++ b/sources/scala/collection/immutable/GBTree.scala @@ -0,0 +1,517 @@ +/* __ *\ +** ________ ___ / / ___ Scala API ** +** / __/ __// _ | / / / _ | (c) 2003, LAMP/EPFL ** +** __\ \/ /__/ __ |/ /__/ __ | ** +** /____/\___/_/ |_/____/_/ | | ** +** |/ ** +** $Id$ +** @author Erik Stenman +** @version 1.0, 10/07/2003 +** +** =====================================================================*/ +/** General Balanced Trees - highly efficient functional dictionaries. +** +** This is a scala version of gb_trees.erl which is +** copyrighted (C) 1999-2001 by Sven-Olof Nyström, and Richard Carlsson +** +** An efficient implementation of Prof. Arne Andersson's General +** Balanced Trees. These have no storage overhead compared to plain +** unbalanced binary trees, and their performance is in general better +** than AVL trees. +** --------------------------------------------------------------------- +** This library is free software; you can redistribute it and/or modify +** it under the terms of the GNU Lesser General Public License as +** published by the Free Software Foundation; either version 2 of the +** License, or (at your option) any later version. +** +** This library is distributed in the hope that it will be useful, but +** WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +** Lesser General Public License for more details. +** +** You should have received a copy of the GNU Lesser General Public +** License along with this library; if not, write to the Free Software +** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +** USA +** +** Author contact: erik.stenman@epfl.ch +** (svenolof@csd.uu.se, richardc@csd.uu.se) +** --------------------------------------------------------------------- +** +** +** - update(X, V): updates key X to value V in the tree; returns the +** new tree. Assumes that the key is present in the tree. +** +** - enter(X, V): inserts key X with value V into the tree if the key +** is not present in the tree, otherwise updates key X to value V in +** the tree. Returns the new tree. +** +** - delete(X): removes key X from the tree; returns new tree. Assumes +** that the key is present in the tree. +** +** - delete_any(X): removes key X from the tree if the key is present +** in the tree, otherwise does nothing; returns new tree. +** +** - balance: rebalances the tree. Note that this is rarely necessary, +** but may be motivated when a large number of entries have been +** deleted from the tree without further insertions. Rebalancing could +** then be forced in order to minimise lookup times, since deletion +** only does not rebalance the tree. +** +** - contains(X): returns `true' if key X is present in the tree, and +** `false' otherwise. +** +** - keys: returns an ordered list of all keys in the tree. +** +** - values: returns the list of values for all keys in the tree, +** sorted by their corresponding keys. Duplicates are not removed. +** +** - to_list: returns an ordered list of Pair(Key, Value) for all +** keys in the tree. +** +** +** - iterator: returns an iterator that can be used for traversing +** the entries of the tree; see `next'. The implementation of this is +** very efficient; traversing the whole tree using `next' is only +** slightly slower than getting the list of all elements using +** `to_list' and traversing that. The main advantage of the iterator +** approach is that it does not require the complete list of all +** elements to be built in memory at one time. +** +** - next: returns Tuple3(X, V, S1) where X is the smallest key referred to +** by the iterator, and S1 is the new iterator to be used for +** traversing the remaining entries, or the atom `none' if no entries +** remain. +*/ + +package scala.collection.immutable; + +object GBTree { + def Empty[A <: Ord[A], B] = new GBTree[A, B]; +} + +/** %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +** Data structure: +** - Size - the number of elements in the tree. +** - Tree, which is composed of nodes of the form: +** - Node(Key, Value, Smaller, Bigger), and the "empty tree" node: +** - Nil. +** +** I make no attempt to balance trees after deletions. Since deletions +** don't increase the height of a tree, I figure this is OK. +** +** Original balance condition h(T) <= ceil(c * log(|T|)) has been +** changed to the similar (but not quite equivalent) condition 2 ^ h(T) +** <= |T| ^ c. I figure this should also be OK. +** +** %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +*/ + +class GBTree[A <: Ord[A], B]() with Map[A, B, GBTree[A,B]] { + type TREE = Tree[A,B]; + + /** - size: returns the number of nodes in the tree as an integer. + ** Returns 0 (zero) if the tree is empty. + **/ + def size = 0; + protected val tree:TREE = Nil[A,B](); + + /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + * Factory method to create new GB-trees. + */ + private def mkGBTree[C >: A <: Ord[C],D>:B](sz:int,t:Tree[C,D]) = + new GBTree[A,B](){ + override def size=sz; + override protected val tree:this.Tree[A,B]=t as this.Tree[A,B]; + }; + + + /* %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% */ + + + /** Checks if the tree is empty. + * + * @returns true, iff there is no element in the tree. + */ + override def isEmpty = size == 0; + + /** Looks up the key in the tree; + ** @returns Option.Some(v), + ** or Option.None if the key is not present. + ** + **/ + def get(key:A) = get_1(key, tree); + + /* The term order is an arithmetic total order, so we should not + ** test exact equality for the keys. (If we do, then it becomes + ** possible that neither `>', `<', nor `=:=' matches.) Testing '<' + ** and '>' first is statistically better than testing for + ** equality, and also allows us to skip the test completely in the + ** remaining case. + */ + private def get_1(key:A,t:Tree[A,B]):Option[B] = + t.match { + case Node(key1,_,smaller,_) if (key < key1) => + get_1(key, smaller); + case Node(key1, _, _, bigger) if (key > key1) => + get_1(key, bigger); + case Node(_, value, _, _) => Some(value); + case Nil() => None; + } + + /* This is a specialized version of `get'. + */ + override def contains(key:A) = is_defined_1(key, tree); + + private def is_defined_1(key:A, t:Tree[A,B]):Boolean = + t.match { + case Node(key1, _, smaller, _) if(key < key1) => + is_defined_1(key, smaller); + case Node(key1, _, _, bigger) if(key > key1) => + is_defined_1(key, bigger); + case Node(_, _, _, _) => true; + case Nil() => false + } + + + + /** Retreives the value stored with the <code>key</code> + ** in the tree. Assumes that the key is present in the tree. + ** @returns the value stored with the <code>key</code>. + ** @throws "key not found". + **/ + override def apply(key:A):B = apply_1(key, tree); + + private def apply_1(key:A,t:Tree[A,B]):B = + t.match { + case Node(key1, _, smaller, _) if(key < key1) => + apply_1(key, smaller); + case Node(key1, _, _, bigger) if(key > key1) => + apply_1(key, bigger); + case Node(_, value, _, _) => value; + case Nil() => error("key not found") + }; + + def update(key:A, value:B):GBTree[A,B] = + if(contains(key)) update_1(key, value) + else insert(key, value); + + private def update_1(key:A, value:B):GBTree[A,B] = { + val t1:Tree[A,B] = update_1(key, value, tree); + mkGBTree(size,t1); + } + /** See `get' for notes on the term comparison order. + **/ + private def update_1(key:A, value:B, t:Tree[A,B]):Tree[A,B] = + t.match{ + case Node(key1, v, smaller, bigger) if(key < key1) => + Node(key1, v, update_1(key, value, smaller), bigger); + case Node(key1, v, smaller, bigger) if(key > key1) => + Node(key1, v, smaller, update_1(key, value, bigger)); + case Node(_, _, smaller, bigger) => + Node(key, value, smaller, bigger); + }; + + // %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% */ + abstract class InsertTree[C <: Ord[C],D](); + case class ITree[C <: Ord[C],D](t:Tree[C,D]) extends InsertTree[C,D]; + case class INode[C,D](t:Tree[A,B],height:int,size:int) extends InsertTree[A,B]; + + + + /** Inserts the key <code>key</code> + ** with value <code>value</code> into the tree; returns + ** the new tree. + ** Assumes that the key is *not* present in the tree. + ** @returns tree:GBTree[A,B]. + ** @throws GBTree.KeyExists(key) + **/ + def insert(key:A, value:B):GBTree[A,B] = { + val s1 = size+1; + + val ITree(t1) = insert_1(key, value, tree, pow(s1, p)); + mkGBTree(s1,t1); + } + + private def insert_1(key:A, + value:B, + t0:Tree[A,B], s:int):InsertTree[A,B] = { + t0.match { + case Node(key1, v, smaller, bigger) if(key < key1) => + insert_1(key, value, smaller, div2(s)).match { + case ITree(t1) => + ITree(Node(key1, v, t1, bigger)); + case INode(t1, h1, s1) => + val t = Node(key1, v, t1, bigger); + val iBigger = bigger.count; + val h = mul2(max(h1, iBigger.height)); + val ss = s1 + iBigger.size + 1; + val pp = pow(ss, p); + if(h > pp) ITree(balance_t(t, ss)); + else INode(t,h,ss); + }; + case Node(key1, v, smaller, bigger) if(key > key1) => + insert_1(key, value, bigger, div2(s)).match { + case ITree(t1) => + ITree(Node(key1, v, smaller, t1)); + + case INode(t1, h1, s1) => + val t = Node(key1, v, smaller, t1); + val iSmaller = smaller.count; + val h = mul2(max(h1, iSmaller.height)); + val ss = s1 + iSmaller.size + 1; + val pp = pow(ss, p); + if(h > pp) ITree(balance_t(t, ss)); + else INode(t, h, ss); + }; + case Nil() if(s == 0) => + INode(Node(key, value, Nil(), Nil()), 1, 1); + case Nil() => + ITree(Node(key, value, Nil(), Nil())); + case _ => + (new KeyExists(key)).throw; + } + } + + + private def balance_t(t:Tree[A,B], s:int) = + balance_list(to_list_1(t), s); + + + def balance = mkGBTree(size,balance_t(tree,size)); + + + private def balance_list(list:List[Pair[A,B]], s:int) = { + val Pair(t, _) = balance_list_1(list, s); + t; + } + + private def balance_list_1(list:List[Pair[A,B]], s:int): + Pair[Tree[A,B],List[Pair[A,B]]] = { + if(s > 1) { + val sm = s - 1; + val s2 = div2(sm); + val s1 = sm - s2; + val Pair(t1,Pair(k, v)::l1) = balance_list_1(list, s1); + val Pair(t2, l2) = balance_list_1(l1, s2); + val t = Node(k, v, t1, t2); + Pair(t, l2); + } else + if(s == 1) { + val Pair(key,v) = list.head; + Pair(Node(key,v,Nil(),Nil()), list.tail); + } else + Pair(Nil(),list); + } + + + private def to_list_1(t:Tree[A,B]) = to_list(t, scala.Nil); + + private def to_list(t:Tree[A,B], list:List[Pair[A,B]]):List[Pair[A,B]] = { + t.match { + case Node(key, value, small, big) => + to_list(small, Pair(key, value)::to_list(big, list)); + case Nil() => list; + } + } + + + + override def toList = + to_list(tree, scala.Nil); + + def -(key:A):GBTree[A,B] = delete_any(key); + + // The iterator structure is really just a list corresponding to + // the call stack of an in-order traversal. This is quite fast. + // + // Note: The iterator has a state, i.e., it is not functional. + def elements:Iterator[Pair[A,B]] = + new Iterator[Pair[A,B]] { + var iter = mk_iter(tree,scala.Nil); + def hasNext = !iter.isEmpty; + def next = + iter.match { + case (Node(x,v,_,t)::iter_tail) => + iter= iter_tail; + Pair(x,v); + case scala.Nil => + error("next on empty iterator"); + } + } + + + private def mk_iter(t:TREE,iter_tail:List[TREE]):List[TREE] = { + t.match { + case Node(_,_,Nil(),_) => t::iter_tail; + case Node(_,_,l,_) => mk_iter(l,t::iter_tail); + case Nil() => iter_tail; + } + } + + + + + def delete_any(key:A):GBTree[A,B] = + if(contains(key)) delete(key) + else this; + + + // delete. Assumes that key is present. + + def delete(key:A):GBTree[A,B] = + mkGBTree(size - 1, delete_1(key,tree)); + + // See `get' for notes on the term comparison order. + def delete_1(key:A,t:TREE):TREE = { + t.match { + case Node(key1, value, smaller, larger) if key < key1 => + val smaller1 = delete_1(key, smaller); + Node(key1, value, smaller1, larger); + case Node(key1, value, smaller, bigger) if key > key1 => + val bigger1 = delete_1(key, bigger); + Node(key1, value, smaller, bigger1); + case Node(_,_,smaller,larger) => + merge(smaller, larger) + } + } + + private def merge(smaller:TREE,larger:TREE) = { + if(larger==Nil()) smaller; + else + if(smaller==Nil()) larger; + else { + val Tuple3(key,value,larger1) = take_smallest1(larger); + Node(key,value,smaller, larger1); + } + } + + private def take_smallest1(t:TREE):Tuple3[A,B,TREE] = { + t.match { + case Node(key, value, Nil(), larger) => + Tuple3(key, value, larger); + case Node(key, value, smaller, larger) => + val Tuple3(key1, value1, smaller1) = take_smallest1(smaller); + Tuple3(key1, value1, Node(key, value, smaller1, larger)); + } + } + + + abstract class Tree[A <: Ord[A],B]() { + def count:Info; + } + + case class Node(key:A,value:B,smaller:TREE,bigger:TREE) + extends Tree[A,B] { + def count:Info = { + if (smaller == Nil() && bigger == Nil()) + new Info(1,1); + else { + val sInfo = smaller.count; + val bInfo = bigger.count; + new Info(mul2(max(sInfo.height,bInfo.height)), + sInfo.size + bInfo.size +1); + } + } + } + + case class Nil[A <: Ord[A],B]() extends Tree[A,B] { + def count = new Info(1,0); + } + + class KeyExists(key:Any) extends java.lang.Exception(); + + class Info(h:int, s:int) { + val height = h; + val size = s; + } + + /* ----------------------------------------------------------- + // Some helpful definitions. + */ + final val p = 2; // It seems that p = 2 is optimal for sorted keys */ + final def pow(a:int, b:int):int = + a.match { + case 2 => a * a; + case 1 => a; + case x if x > 0 => a * pow(a, b-1); + }; + final def div2(x:int) = x >> 1; + final def mul2(x:int) = x << 1; + def max(x:int, y:int) = if(x < y) y; else x; + + + +} + +/* +** - from_orddict(L): turns an ordered list L of Pair(Key, Value) pairs into +** a tree. The list must not contain duplicate keys. +** +** - smallest: returns Pair(X, V), where X is the smallest key in the tree, +** and V is the value associated with X in the tree. Assumes that the tree +** is nonempty. +** +** - largest: returns Pair(X, V), where X is the largest key in tree the tree, +** and V is the value associated with X in the tree. Assumes that the tree +** is nonempty. +** +** - take_smallest: returns Tuple3(X, V, T1), where X is the smallest key +** in the tree, V is the value associated with X in the tree, and T1 is the +** tree with key X deleted. Assumes that the tree is nonempty. +** +** - take_largest: returns Tuple3(X, V, T1), where X is the largest key +** in the tree, V is the value associated with X, and T1 is the +** tree with key X deleted. Assumes that the tree is nonempty. + + +from_orddict(L) => + S = length(L), + {S, balance_list(L, S)}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +take_smallest({Size, Tree}) => + {Key, Value, Larger} = take_smallest1(Tree), + {Key, Value, {Size - 1, Larger}}. + + + +smallest({_, Tree}) => + smallest_1(Tree). + +smallest_1({Key, Value, nil, _Larger}) => + {Key, Value}; +smallest_1({_Key, _Value, Smaller, _Larger}) => + smallest_1(Smaller). + +take_largest({Size, Tree}) => + {Key, Value, Smaller} = take_largest1(Tree), + {Key, Value, {Size - 1, Smaller}}. + +take_largest1({Key, Value, Smaller, nil}) => + {Key, Value, Smaller}; +take_largest1({Key, Value, Smaller, Larger}) => + {Key1, Value1, Larger1} = take_largest1(Larger), + {Key1, Value1, {Key, Value, Smaller, Larger1}}. + +largest({_, Tree}) => + largest_1(Tree). + +largest_1({Key, Value, _Smaller, nil}) => + {Key, Value}; +largest_1({_Key, _Value, _Smaller, Larger}) => + largest_1(Larger). + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + + + + + +*/ |