Import Upstream version 20180207
[hcoop/debian/mlton.git] / lib / mlton / basic / hash-set.sml
CommitLineData
7f918cf1
CE
1(* Copyright (C) 2009 Matthew Fluet.
2 * Copyright (C) 1999-2006, 2008 Henry Cejtin, Matthew Fluet, Suresh
3 * Jagannathan, and Stephen Weeks.
4 *
5 * MLton is released under a BSD-style license.
6 * See the file MLton-LICENSE for details.
7 *)
8
9structure HashSet: HASH_SET =
10struct
11
12datatype 'a t =
13 T of {buckets: 'a list array ref,
14 hash: 'a -> word,
15 mask: word ref,
16 numItems: int ref}
17
18fun 'a newWithBuckets {hash, numBuckets: int}: 'a t =
19 let
20 val mask: word = Word.fromInt numBuckets - 0w1
21 in
22 T {buckets = ref (Array.new (numBuckets, [])),
23 hash = hash,
24 numItems = ref 0,
25 mask = ref mask}
26 end
27
28val initialSize: int = Int.pow (2, 6)
29
30fun new {hash} = newWithBuckets {hash = hash,
31 numBuckets = initialSize}
32
33fun newOfSize {hash, size} =
34 newWithBuckets {hash = hash,
35 numBuckets = 4 * Int.roundUpToPowerOfTwo size}
36
37fun size (T {numItems, ...}) = !numItems
38
39fun index (w: word, mask: word): int =
40 Word.toInt (Word.andb (w, mask))
41
42val numPeeks: Int64.int ref = ref 0
43val numLinks: Int64.int ref = ref 0
44
45fun stats () =
46 let open Layout
47 in align
48 [seq [str "hash set numPeeks = ", str (Int64.toString (!numPeeks))],
49 (* seq [str "hash set numLinks = ", str (Int64.toString (!numLinks))], *)
50 seq [str "hash set average position = ",
51 str let open Real
52 val fromInt = fromIntInf o Int64.toLarge
53 in format (fromInt (!numLinks) / fromInt (!numPeeks),
54 Format.fix (SOME 3))
55 end]]
56 end
57
58fun stats' (T {buckets, numItems, ...}) =
59 let open Layout
60 val numi = !numItems
61 val numb = Array.length (!buckets)
62 val numb' = numb - 1
63 val avg = let open Real in (fromInt numi / fromInt numb) end
64 val (min,max,total)
65 = Array.fold
66 (!buckets,
67 (NONE, NONE, 0.0),
68 fn (l,(min,max,total))
69 => let
70 val n = List.length l
71 val d = (Real.fromInt n) - avg
72 in
73 (SOME (Option.fold(min,n,Int.min)),
74 SOME (Option.fold(max,n,Int.max)),
75 total + d * d)
76 end)
77 val stdd = let open Real in Math.sqrt(total / (fromInt numb')) end
78 val rfmt = fn r => Real.format (r, Real.Format.fix (SOME 3))
79 in align
80 [seq [str "numItems = ", Int.layout numi],
81 seq [str "numBuckets = ", Int.layout numb],
82 seq [str "avg = ", str (rfmt avg),
83 str " stdd = ", str (rfmt stdd),
84 str " min = ", Option.layout Int.layout min,
85 str " max = ", Option.layout Int.layout max]]
86 end
87
88fun resize (T {buckets, hash, mask, ...}, size: int, newMask: word): unit =
89 let
90 val newBuckets = Array.new (size, [])
91 in Array.foreach (!buckets, fn r =>
92 List.foreach (r, fn a =>
93 let val j = index (hash a, newMask)
94 in Array.update
95 (newBuckets, j,
96 a :: Array.sub (newBuckets, j))
97 end))
98 ; buckets := newBuckets
99 ; mask := newMask
100 end
101
102fun maybeGrow (s as T {buckets, mask, numItems, ...}): unit =
103 let
104 val n = Array.length (!buckets)
105 in if !numItems * 4 > n
106 then resize (s,
107 n * 2,
108 (* The new mask depends on growFactor being 2. *)
109 Word.orb (0w1, Word.<< (!mask, 0w1)))
110 else ()
111 end
112
113fun removeAll (T {buckets, numItems, ...}, p) =
114 Array.modify (!buckets, fn elts =>
115 List.fold (elts, [], fn (a, ac) =>
116 if p a
117 then (Int.dec numItems; ac)
118 else a :: ac))
119
120fun remove (T {buckets, mask, numItems, ...}, w, p) =
121 let
122 val i = index (w, !mask)
123 val b = !buckets
124 val _ = Array.update (b, i, List.removeFirst (Array.sub (b, i), p))
125 val _ = Int.dec numItems
126 in
127 ()
128 end
129
130fun peekGen (T {buckets = ref buckets, mask, ...}, w, p, no, yes) =
131 let
132 val _ =
133 numPeeks := 1 + !numPeeks
134 handle Overflow => Error.bug "HashSet: numPeeks overflow"
135 val j = index (w, !mask)
136 val b = Array.sub (buckets, j)
137 fun update () =
138 numLinks := !numLinks + 1
139 handle Overflow => Error.bug "HashSet: numLinks overflow"
140 in case List.peek (b, fn a => (update (); p a)) of
141 NONE => no (j, b)
142 | SOME a => yes a
143 end
144
145fun peek (t, w, p) = peekGen (t, w, p, fn _ => NONE, SOME)
146
147(* fun update (T {buckets = ref buckets, equals, hash, mask, ...}, a) =
148 * let
149 * val j = index (hash a, !mask)
150 * val _ =
151 * Array.update (buckets, j,
152 * a :: (List.remove (Array.sub (buckets, j),
153 * fn a' => equals (a, a'))))
154 * in ()
155 * end
156 *)
157
158fun insertIfNew (table as T {buckets, numItems, ...}, w, p, f,
159 g: 'a -> unit) =
160 let
161 fun no (j, b) =
162 let val a = f ()
163 val _ = Int.inc numItems
164 val _ = Array.update (!buckets, j, a :: b)
165 val _ = maybeGrow table
166 in a
167 end
168 fun yes x = (g x; x)
169 in peekGen (table, w, p, no, yes)
170 end
171
172fun lookupOrInsert (table, w, p, f) =
173 insertIfNew (table, w, p, f, ignore)
174
175fun fold (T {buckets, ...}, b, f) =
176 Array.fold (!buckets, b, fn (r, b) => List.fold (r, b, f))
177
178local
179 structure F = Fold (type 'a t = 'a t
180 type 'a elt = 'a
181 val fold = fold)
182 open F
183in
184 val foreach = foreach
185end
186
187fun forall (T {buckets, ...}, f) =
188 Array.forall (!buckets, fn r => List.forall (r, f))
189
190fun toList t = fold (t, [], fn (a, l) => a :: l)
191
192fun layout lay t = List.layout lay (toList t)
193
194fun fromList (l, {hash, equals}) =
195 let
196 val s = new {hash = hash}
197 val () =
198 List.foreach (l, fn a =>
199 ignore (lookupOrInsert (s, hash a,
200 fn b => equals (a, b),
201 fn _ => a)))
202 in
203 s
204 end
205
206end