Commit | Line | Data |
---|---|---|
7f918cf1 CE |
1 | /* Copyright (C) 2012,2016 Matthew Fluet. |
2 | * Copyright (C) 1999-2008 Henry Cejtin, Matthew Fluet, Suresh | |
3 | * Jagannathan, and Stephen Weeks. | |
4 | * Copyright (C) 1997-2000 NEC Research Institute. | |
5 | * | |
6 | * MLton is released under a BSD-style license. | |
7 | * See the file MLton-LICENSE for details. | |
8 | */ | |
9 | ||
10 | /* ---------------------------------------------------------------- */ | |
11 | /* Object hash consing */ | |
12 | /* ---------------------------------------------------------------- */ | |
13 | ||
14 | /* Hashing based on Introduction to Algorithms by Cormen, Leiserson, and Rivest. | |
15 | * Section numbers in parens. | |
16 | * k is key to be hashed. | |
17 | * table is of size 2^p (it must be a power of two) | |
18 | * Open addressing (12.4), meaning that we stick the entries directly in the | |
19 | * table and probe until we find what we want. | |
20 | * Multiplication method (12.3.2), meaning that we compute the hash by | |
21 | * multiplying by a magic number, chosen by Knuth, and take the high-order p | |
22 | * bits of the low order 32 bits. | |
23 | * Double hashing (12.4), meaning that we use two hash functions, the first to | |
24 | * decide where to start looking and a second to decide at what offset to | |
25 | * probe. The second hash must be relatively prime to the table size, which | |
26 | * we ensure by making it odd and keeping the table size as a power of 2. | |
27 | */ | |
28 | ||
29 | GC_objectHashTable allocHashTable (GC_state s) { | |
30 | uint32_t elementsLengthMax; | |
31 | pointer regionStart; | |
32 | pointer regionEnd; | |
33 | GC_objectHashTable t; | |
34 | ||
35 | t = (GC_objectHashTable)(malloc_safe (sizeof(*t))); | |
36 | // Try to use space in the heap for the elements. | |
37 | if (not (isHeapInit (&s->secondaryHeap))) { | |
38 | if (DEBUG_SHARE) | |
39 | fprintf (stderr, "using secondaryHeap\n"); | |
40 | regionStart = s->secondaryHeap.start; | |
41 | regionEnd = s->secondaryHeap.start + s->secondaryHeap.size; | |
42 | } else if (s->amInGC or not s->canMinor) { | |
43 | if (DEBUG_SHARE) | |
44 | fprintf (stderr, "using end of heap\n"); | |
45 | regionStart = s->frontier; | |
46 | regionEnd = s->limitPlusSlop; | |
47 | } else { | |
48 | if (DEBUG_SHARE) | |
49 | fprintf (stderr, "using minor space\n"); | |
50 | assert (s->canMinor); | |
51 | regionStart = s->heap.start + s->heap.oldGenSize; | |
52 | regionEnd = s->heap.nursery; | |
53 | } | |
54 | elementsLengthMax = (uint32_t)((size_t)(regionEnd - regionStart) / sizeof (*(t->elements))); | |
55 | if (DEBUG_SHARE) | |
56 | fprintf (stderr, "elementsLengthMax = %"PRIu32"\n", elementsLengthMax); | |
57 | t->elementsLengthMax = 64; // some small power of two | |
58 | t->elementsLengthMaxLog2 = 6; // and its log base 2 | |
59 | if (elementsLengthMax < t->elementsLengthMax) { | |
60 | if (DEBUG_SHARE) | |
61 | fprintf (stderr, "elementsLengthMax too small -- using calloc\n"); | |
62 | t->elementsIsInHeap = FALSE; | |
63 | t->elements = | |
64 | (struct GC_objectHashElement *) | |
65 | (calloc_safe(t->elementsLengthMax, sizeof(*(t->elements)))); | |
66 | } else { | |
67 | if (DEBUG_SHARE) | |
68 | fprintf (stderr, "elementsLengthMax big enough -- using heap\n"); | |
69 | t->elementsIsInHeap = TRUE; | |
70 | t->elements = (struct GC_objectHashElement*)regionStart; | |
71 | // Find the largest power of two that fits. | |
72 | for ( ; | |
73 | t->elementsLengthMax <= elementsLengthMax; | |
74 | t->elementsLengthMax <<= 1, t->elementsLengthMaxLog2++) | |
75 | ; // nothing | |
76 | t->elementsLengthMax >>= 1; | |
77 | t->elementsLengthMaxLog2--; | |
78 | assert (t->elementsLengthMax <= elementsLengthMax); | |
79 | for (unsigned int i = 0; i < t->elementsLengthMax; ++i) | |
80 | t->elements[i].object = NULL; | |
81 | } | |
82 | t->elementsLengthCur = 0; | |
83 | t->mayInsert = TRUE; | |
84 | if (DEBUG_SHARE) { | |
85 | fprintf (stderr, "elementsIsInHeap = %s\n", | |
86 | boolToString (t->elementsIsInHeap)); | |
87 | fprintf (stderr, "elementsLengthMax = %"PRIu32"\n", t->elementsLengthMax); | |
88 | fprintf (stderr, FMTPTR" = allocHashTable ()\n", (uintptr_t)t); | |
89 | } | |
90 | return t; | |
91 | } | |
92 | ||
93 | void freeHashTable (GC_objectHashTable t) { | |
94 | unless (t->elementsIsInHeap) | |
95 | free (t->elements); | |
96 | free (t); | |
97 | } | |
98 | ||
99 | pointer insertHashTableElem (GC_state s, | |
100 | GC_objectHashTable t, | |
101 | GC_hash hash, pointer object, | |
102 | pointer max, bool mightBeThere) { | |
103 | static bool init = FALSE; | |
104 | static uint64_t mult; // magic multiplier for hashing | |
105 | static uint32_t maxNumProbes = 0; | |
106 | ||
107 | GC_objectHashElement e; | |
108 | uint32_t numProbes; | |
109 | uint32_t probe; | |
110 | uint32_t slot; // slot in the hash table we are considering | |
111 | unsigned int *p1; | |
112 | unsigned int *p2; | |
113 | ||
114 | if (DEBUG_SHARE) | |
115 | fprintf (stderr, "insertHashTableElem ("FMTHASH", "FMTPTR", "FMTPTR", %s)\n", | |
116 | hash, | |
117 | (uintptr_t)object, | |
118 | (uintptr_t)max, | |
119 | boolToString (mightBeThere)); | |
120 | if (! init) { | |
121 | init = TRUE; | |
122 | double dmult = floor (((sqrt (5.0) - 1.0) / 2.0) * (double)0x100000000llu); | |
123 | mult = (uint64_t)dmult; | |
124 | } | |
125 | slot = (uint32_t)(mult * (uint64_t)hash) >> (32 - t->elementsLengthMaxLog2); | |
126 | probe = (1 == slot % 2) ? slot : slot - 1; | |
127 | if (DEBUG_SHARE) | |
128 | fprintf (stderr, "probe = 0x%"PRIx32"\n", probe); | |
129 | assert (1 == probe % 2); | |
130 | numProbes = 0; | |
131 | look: | |
132 | if (DEBUG_SHARE) | |
133 | fprintf (stderr, "slot = 0x%"PRIx32"\n", slot); | |
134 | assert (slot < t->elementsLengthMax); | |
135 | numProbes++; | |
136 | e = &t->elements[slot]; | |
137 | if (NULL == e->object) { | |
138 | /* It's not in the table. Add it. */ | |
139 | unless (t->mayInsert) { | |
140 | if (DEBUG_SHARE) | |
141 | fprintf (stderr, "not inserting\n"); | |
142 | return object; | |
143 | } | |
144 | e->hash = hash; | |
145 | e->object = object; | |
146 | t->elementsLengthCur++; | |
147 | if (numProbes > maxNumProbes) { | |
148 | maxNumProbes = numProbes; | |
149 | if (DEBUG_SHARE) | |
150 | fprintf (stderr, "numProbes = %"PRIu32"\n", numProbes); | |
151 | } | |
152 | return object; | |
153 | } | |
154 | unless (hash == e->hash) { | |
155 | lookNext: | |
156 | slot = (slot + probe) % t->elementsLengthMax; | |
157 | goto look; | |
158 | } | |
159 | unless (mightBeThere) | |
160 | goto lookNext; | |
161 | if (DEBUG_SHARE) | |
162 | fprintf (stderr, "comparing "FMTPTR" to "FMTPTR"\n", | |
163 | (uintptr_t)object, (uintptr_t)e->object); | |
164 | /* Compare object to e->object. */ | |
165 | unless (object == e->object) { | |
166 | GC_header header; | |
167 | GC_objectTypeTag tag; | |
168 | ||
169 | header = getHeader (object); | |
170 | unless (header == getHeader (e->object)) | |
171 | goto lookNext; | |
172 | for (p1 = (unsigned int*)object, | |
173 | p2 = (unsigned int*)e->object; | |
174 | p1 < (unsigned int*)max; | |
175 | ++p1, ++p2) | |
176 | unless (*p1 == *p2) | |
177 | goto lookNext; | |
178 | splitHeader (s, header, &tag, NULL, NULL, NULL); | |
179 | if (ARRAY_TAG == tag | |
180 | and (getArrayLength (object) != getArrayLength (e->object))) | |
181 | goto lookNext; | |
182 | } | |
183 | /* object is equal to e->object. */ | |
184 | return e->object; | |
185 | } | |
186 | ||
187 | void growHashTableMaybe (GC_state s, GC_objectHashTable t) { | |
188 | GC_objectHashElement oldElement; | |
189 | struct GC_objectHashElement *oldElements; | |
190 | uint32_t oldElementsLengthMax; | |
191 | uint32_t newElementsLengthMax; | |
192 | ||
193 | if (not t->mayInsert or t->elementsLengthCur * 2 <= t->elementsLengthMax) | |
194 | return; | |
195 | oldElements = t->elements; | |
196 | oldElementsLengthMax = t->elementsLengthMax; | |
197 | newElementsLengthMax = oldElementsLengthMax * 2; | |
198 | if (DEBUG_SHARE) | |
199 | fprintf (stderr, | |
200 | "trying to grow table to cardinality %"PRIu32"\n", | |
201 | newElementsLengthMax); | |
202 | // Try to alocate the new table. | |
203 | t->elements = | |
204 | (struct GC_objectHashElement *) | |
205 | (calloc(newElementsLengthMax, sizeof(*(t->elements)))); | |
206 | if (NULL == t->elements) { | |
207 | t->mayInsert = FALSE; | |
208 | t->elements = oldElements; | |
209 | if (DEBUG_SHARE) | |
210 | fprintf (stderr, "unable to grow table\n"); | |
211 | return; | |
212 | } | |
213 | t->elementsLengthMax = newElementsLengthMax; | |
214 | t->elementsLengthMaxLog2++; | |
215 | for (unsigned int i = 0; i < oldElementsLengthMax; ++i) { | |
216 | oldElement = &oldElements[i]; | |
217 | unless (NULL == oldElement->object) | |
218 | insertHashTableElem | |
219 | (s, t, oldElement->hash, oldElement->object, NULL, FALSE); | |
220 | } | |
221 | if (t->elementsIsInHeap) | |
222 | t->elementsIsInHeap = FALSE; | |
223 | else | |
224 | free (oldElements); | |
225 | if (DEBUG_SHARE) | |
226 | fprintf (stderr, "done growing table\n"); | |
227 | } | |
228 | ||
229 | pointer hashConsPointer (GC_state s, pointer object, bool countBytesHashConsed) { | |
230 | GC_objectHashTable t; | |
231 | GC_header header; | |
232 | uint16_t bytesNonObjptrs; | |
233 | uint16_t numObjptrs; | |
234 | bool hasIdentity; | |
235 | GC_objectTypeTag tag; | |
236 | pointer max; | |
237 | GC_hash hash; | |
238 | GC_hash* p; | |
239 | pointer res; | |
240 | ||
241 | if (DEBUG_SHARE) | |
242 | fprintf (stderr, "hashConsPointer ("FMTPTR")\n", (uintptr_t)object); | |
243 | t = s->objectHashTable; | |
244 | header = getHeader (object); | |
245 | splitHeader(s, header, &tag, &hasIdentity, &bytesNonObjptrs, &numObjptrs); | |
246 | if (hasIdentity) { | |
247 | /* Don't hash cons. */ | |
248 | res = object; | |
249 | goto done; | |
250 | } | |
251 | assert ((ARRAY_TAG == tag) or (NORMAL_TAG == tag)); | |
252 | max = | |
253 | object | |
254 | + (ARRAY_TAG == tag | |
255 | ? (sizeofArrayNoMetaData (s, getArrayLength (object), | |
256 | bytesNonObjptrs, numObjptrs)) | |
257 | : (bytesNonObjptrs + (numObjptrs * OBJPTR_SIZE))); | |
258 | // Compute the hash. | |
259 | hash = (GC_hash)header; | |
260 | for (p = (GC_hash*)object; p < (GC_hash*)max; ++p) | |
261 | hash = hash * 31 + *p; | |
262 | /* Insert into table. */ | |
263 | res = insertHashTableElem (s, t, hash, object, max, TRUE); | |
264 | growHashTableMaybe (s, t); | |
265 | if (countBytesHashConsed and res != object) { | |
266 | size_t amount; | |
267 | ||
268 | amount = (size_t)(max - object); | |
269 | if (ARRAY_TAG == tag) | |
270 | amount += GC_ARRAY_METADATA_SIZE; | |
271 | else | |
272 | amount += GC_NORMAL_METADATA_SIZE; | |
273 | s->lastMajorStatistics.bytesHashConsed += amount; | |
274 | } | |
275 | done: | |
276 | if (DEBUG_SHARE) | |
277 | fprintf (stderr, FMTPTR" = hashConsPointer ("FMTPTR")\n", | |
278 | (uintptr_t)res, (uintptr_t)object); | |
279 | return res; | |
280 | } | |
281 | ||
282 | void shareObjptr (GC_state s, objptr *opp) { | |
283 | pointer p; | |
284 | ||
285 | p = objptrToPointer (*opp, s->heap.start); | |
286 | if (DEBUG_SHARE) | |
287 | fprintf (stderr, "shareObjptr opp = "FMTPTR" *opp = "FMTOBJPTR"\n", | |
288 | (uintptr_t)opp, *opp); | |
289 | p = hashConsPointer (s, p, FALSE); | |
290 | *opp = pointerToObjptr (p, s->heap.start); | |
291 | markIntergenerationalObjptr (s, opp); | |
292 | } | |
293 | ||
294 | void printBytesHashConsedMessage (size_t bytesHashConsed, size_t bytesExamined) { | |
295 | fprintf (stderr, "[GC: hash-consed %s bytes (%.1f%% of bytes examined).]\n", | |
296 | uintmaxToCommaString(bytesHashConsed), | |
297 | 100.0 * ((double)bytesHashConsed / (double)bytesExamined)); | |
298 | } |