Commit | Line | Data |
---|---|---|
7f918cf1 CE |
1 | (* Copyright (C) 2009 Matthew Fluet. |
2 | * Copyright (C) 1999-2006, 2008 Henry Cejtin, Matthew Fluet, Suresh | |
3 | * Jagannathan, and Stephen Weeks. | |
4 | * | |
5 | * MLton is released under a BSD-style license. | |
6 | * See the file MLton-LICENSE for details. | |
7 | *) | |
8 | ||
9 | structure HashSet: HASH_SET = | |
10 | struct | |
11 | ||
12 | datatype 'a t = | |
13 | T of {buckets: 'a list array ref, | |
14 | hash: 'a -> word, | |
15 | mask: word ref, | |
16 | numItems: int ref} | |
17 | ||
18 | fun 'a newWithBuckets {hash, numBuckets: int}: 'a t = | |
19 | let | |
20 | val mask: word = Word.fromInt numBuckets - 0w1 | |
21 | in | |
22 | T {buckets = ref (Array.new (numBuckets, [])), | |
23 | hash = hash, | |
24 | numItems = ref 0, | |
25 | mask = ref mask} | |
26 | end | |
27 | ||
28 | val initialSize: int = Int.pow (2, 6) | |
29 | ||
30 | fun new {hash} = newWithBuckets {hash = hash, | |
31 | numBuckets = initialSize} | |
32 | ||
33 | fun newOfSize {hash, size} = | |
34 | newWithBuckets {hash = hash, | |
35 | numBuckets = 4 * Int.roundUpToPowerOfTwo size} | |
36 | ||
37 | fun size (T {numItems, ...}) = !numItems | |
38 | ||
39 | fun index (w: word, mask: word): int = | |
40 | Word.toInt (Word.andb (w, mask)) | |
41 | ||
42 | val numPeeks: Int64.int ref = ref 0 | |
43 | val numLinks: Int64.int ref = ref 0 | |
44 | ||
45 | fun stats () = | |
46 | let open Layout | |
47 | in align | |
48 | [seq [str "hash set numPeeks = ", str (Int64.toString (!numPeeks))], | |
49 | (* seq [str "hash set numLinks = ", str (Int64.toString (!numLinks))], *) | |
50 | seq [str "hash set average position = ", | |
51 | str let open Real | |
52 | val fromInt = fromIntInf o Int64.toLarge | |
53 | in format (fromInt (!numLinks) / fromInt (!numPeeks), | |
54 | Format.fix (SOME 3)) | |
55 | end]] | |
56 | end | |
57 | ||
58 | fun stats' (T {buckets, numItems, ...}) = | |
59 | let open Layout | |
60 | val numi = !numItems | |
61 | val numb = Array.length (!buckets) | |
62 | val numb' = numb - 1 | |
63 | val avg = let open Real in (fromInt numi / fromInt numb) end | |
64 | val (min,max,total) | |
65 | = Array.fold | |
66 | (!buckets, | |
67 | (NONE, NONE, 0.0), | |
68 | fn (l,(min,max,total)) | |
69 | => let | |
70 | val n = List.length l | |
71 | val d = (Real.fromInt n) - avg | |
72 | in | |
73 | (SOME (Option.fold(min,n,Int.min)), | |
74 | SOME (Option.fold(max,n,Int.max)), | |
75 | total + d * d) | |
76 | end) | |
77 | val stdd = let open Real in Math.sqrt(total / (fromInt numb')) end | |
78 | val rfmt = fn r => Real.format (r, Real.Format.fix (SOME 3)) | |
79 | in align | |
80 | [seq [str "numItems = ", Int.layout numi], | |
81 | seq [str "numBuckets = ", Int.layout numb], | |
82 | seq [str "avg = ", str (rfmt avg), | |
83 | str " stdd = ", str (rfmt stdd), | |
84 | str " min = ", Option.layout Int.layout min, | |
85 | str " max = ", Option.layout Int.layout max]] | |
86 | end | |
87 | ||
88 | fun resize (T {buckets, hash, mask, ...}, size: int, newMask: word): unit = | |
89 | let | |
90 | val newBuckets = Array.new (size, []) | |
91 | in Array.foreach (!buckets, fn r => | |
92 | List.foreach (r, fn a => | |
93 | let val j = index (hash a, newMask) | |
94 | in Array.update | |
95 | (newBuckets, j, | |
96 | a :: Array.sub (newBuckets, j)) | |
97 | end)) | |
98 | ; buckets := newBuckets | |
99 | ; mask := newMask | |
100 | end | |
101 | ||
102 | fun maybeGrow (s as T {buckets, mask, numItems, ...}): unit = | |
103 | let | |
104 | val n = Array.length (!buckets) | |
105 | in if !numItems * 4 > n | |
106 | then resize (s, | |
107 | n * 2, | |
108 | (* The new mask depends on growFactor being 2. *) | |
109 | Word.orb (0w1, Word.<< (!mask, 0w1))) | |
110 | else () | |
111 | end | |
112 | ||
113 | fun removeAll (T {buckets, numItems, ...}, p) = | |
114 | Array.modify (!buckets, fn elts => | |
115 | List.fold (elts, [], fn (a, ac) => | |
116 | if p a | |
117 | then (Int.dec numItems; ac) | |
118 | else a :: ac)) | |
119 | ||
120 | fun remove (T {buckets, mask, numItems, ...}, w, p) = | |
121 | let | |
122 | val i = index (w, !mask) | |
123 | val b = !buckets | |
124 | val _ = Array.update (b, i, List.removeFirst (Array.sub (b, i), p)) | |
125 | val _ = Int.dec numItems | |
126 | in | |
127 | () | |
128 | end | |
129 | ||
130 | fun peekGen (T {buckets = ref buckets, mask, ...}, w, p, no, yes) = | |
131 | let | |
132 | val _ = | |
133 | numPeeks := 1 + !numPeeks | |
134 | handle Overflow => Error.bug "HashSet: numPeeks overflow" | |
135 | val j = index (w, !mask) | |
136 | val b = Array.sub (buckets, j) | |
137 | fun update () = | |
138 | numLinks := !numLinks + 1 | |
139 | handle Overflow => Error.bug "HashSet: numLinks overflow" | |
140 | in case List.peek (b, fn a => (update (); p a)) of | |
141 | NONE => no (j, b) | |
142 | | SOME a => yes a | |
143 | end | |
144 | ||
145 | fun peek (t, w, p) = peekGen (t, w, p, fn _ => NONE, SOME) | |
146 | ||
147 | (* fun update (T {buckets = ref buckets, equals, hash, mask, ...}, a) = | |
148 | * let | |
149 | * val j = index (hash a, !mask) | |
150 | * val _ = | |
151 | * Array.update (buckets, j, | |
152 | * a :: (List.remove (Array.sub (buckets, j), | |
153 | * fn a' => equals (a, a')))) | |
154 | * in () | |
155 | * end | |
156 | *) | |
157 | ||
158 | fun insertIfNew (table as T {buckets, numItems, ...}, w, p, f, | |
159 | g: 'a -> unit) = | |
160 | let | |
161 | fun no (j, b) = | |
162 | let val a = f () | |
163 | val _ = Int.inc numItems | |
164 | val _ = Array.update (!buckets, j, a :: b) | |
165 | val _ = maybeGrow table | |
166 | in a | |
167 | end | |
168 | fun yes x = (g x; x) | |
169 | in peekGen (table, w, p, no, yes) | |
170 | end | |
171 | ||
172 | fun lookupOrInsert (table, w, p, f) = | |
173 | insertIfNew (table, w, p, f, ignore) | |
174 | ||
175 | fun fold (T {buckets, ...}, b, f) = | |
176 | Array.fold (!buckets, b, fn (r, b) => List.fold (r, b, f)) | |
177 | ||
178 | local | |
179 | structure F = Fold (type 'a t = 'a t | |
180 | type 'a elt = 'a | |
181 | val fold = fold) | |
182 | open F | |
183 | in | |
184 | val foreach = foreach | |
185 | end | |
186 | ||
187 | fun forall (T {buckets, ...}, f) = | |
188 | Array.forall (!buckets, fn r => List.forall (r, f)) | |
189 | ||
190 | fun toList t = fold (t, [], fn (a, l) => a :: l) | |
191 | ||
192 | fun layout lay t = List.layout lay (toList t) | |
193 | ||
194 | fun fromList (l, {hash, equals}) = | |
195 | let | |
196 | val s = new {hash = hash} | |
197 | val () = | |
198 | List.foreach (l, fn a => | |
199 | ignore (lookupOrInsert (s, hash a, | |
200 | fn b => equals (a, b), | |
201 | fn _ => a))) | |
202 | in | |
203 | s | |
204 | end | |
205 | ||
206 | end |