Import Upstream version 20180207
[hcoop/debian/mlton.git] / runtime / gc / hash-cons.c
CommitLineData
7f918cf1
CE
1/* Copyright (C) 2012,2016 Matthew Fluet.
2 * Copyright (C) 1999-2008 Henry Cejtin, Matthew Fluet, Suresh
3 * Jagannathan, and Stephen Weeks.
4 * Copyright (C) 1997-2000 NEC Research Institute.
5 *
6 * MLton is released under a BSD-style license.
7 * See the file MLton-LICENSE for details.
8 */
9
10/* ---------------------------------------------------------------- */
11/* Object hash consing */
12/* ---------------------------------------------------------------- */
13
14/* Hashing based on Introduction to Algorithms by Cormen, Leiserson, and Rivest.
15 * Section numbers in parens.
16 * k is key to be hashed.
17 * table is of size 2^p (it must be a power of two)
18 * Open addressing (12.4), meaning that we stick the entries directly in the
19 * table and probe until we find what we want.
20 * Multiplication method (12.3.2), meaning that we compute the hash by
21 * multiplying by a magic number, chosen by Knuth, and take the high-order p
22 * bits of the low order 32 bits.
23 * Double hashing (12.4), meaning that we use two hash functions, the first to
24 * decide where to start looking and a second to decide at what offset to
25 * probe. The second hash must be relatively prime to the table size, which
26 * we ensure by making it odd and keeping the table size as a power of 2.
27 */
28
29GC_objectHashTable allocHashTable (GC_state s) {
30 uint32_t elementsLengthMax;
31 pointer regionStart;
32 pointer regionEnd;
33 GC_objectHashTable t;
34
35 t = (GC_objectHashTable)(malloc_safe (sizeof(*t)));
36 // Try to use space in the heap for the elements.
37 if (not (isHeapInit (&s->secondaryHeap))) {
38 if (DEBUG_SHARE)
39 fprintf (stderr, "using secondaryHeap\n");
40 regionStart = s->secondaryHeap.start;
41 regionEnd = s->secondaryHeap.start + s->secondaryHeap.size;
42 } else if (s->amInGC or not s->canMinor) {
43 if (DEBUG_SHARE)
44 fprintf (stderr, "using end of heap\n");
45 regionStart = s->frontier;
46 regionEnd = s->limitPlusSlop;
47 } else {
48 if (DEBUG_SHARE)
49 fprintf (stderr, "using minor space\n");
50 assert (s->canMinor);
51 regionStart = s->heap.start + s->heap.oldGenSize;
52 regionEnd = s->heap.nursery;
53 }
54 elementsLengthMax = (uint32_t)((size_t)(regionEnd - regionStart) / sizeof (*(t->elements)));
55 if (DEBUG_SHARE)
56 fprintf (stderr, "elementsLengthMax = %"PRIu32"\n", elementsLengthMax);
57 t->elementsLengthMax = 64; // some small power of two
58 t->elementsLengthMaxLog2 = 6; // and its log base 2
59 if (elementsLengthMax < t->elementsLengthMax) {
60 if (DEBUG_SHARE)
61 fprintf (stderr, "elementsLengthMax too small -- using calloc\n");
62 t->elementsIsInHeap = FALSE;
63 t->elements =
64 (struct GC_objectHashElement *)
65 (calloc_safe(t->elementsLengthMax, sizeof(*(t->elements))));
66 } else {
67 if (DEBUG_SHARE)
68 fprintf (stderr, "elementsLengthMax big enough -- using heap\n");
69 t->elementsIsInHeap = TRUE;
70 t->elements = (struct GC_objectHashElement*)regionStart;
71 // Find the largest power of two that fits.
72 for ( ;
73 t->elementsLengthMax <= elementsLengthMax;
74 t->elementsLengthMax <<= 1, t->elementsLengthMaxLog2++)
75 ; // nothing
76 t->elementsLengthMax >>= 1;
77 t->elementsLengthMaxLog2--;
78 assert (t->elementsLengthMax <= elementsLengthMax);
79 for (unsigned int i = 0; i < t->elementsLengthMax; ++i)
80 t->elements[i].object = NULL;
81 }
82 t->elementsLengthCur = 0;
83 t->mayInsert = TRUE;
84 if (DEBUG_SHARE) {
85 fprintf (stderr, "elementsIsInHeap = %s\n",
86 boolToString (t->elementsIsInHeap));
87 fprintf (stderr, "elementsLengthMax = %"PRIu32"\n", t->elementsLengthMax);
88 fprintf (stderr, FMTPTR" = allocHashTable ()\n", (uintptr_t)t);
89 }
90 return t;
91}
92
93void freeHashTable (GC_objectHashTable t) {
94 unless (t->elementsIsInHeap)
95 free (t->elements);
96 free (t);
97}
98
99pointer insertHashTableElem (GC_state s,
100 GC_objectHashTable t,
101 GC_hash hash, pointer object,
102 pointer max, bool mightBeThere) {
103 static bool init = FALSE;
104 static uint64_t mult; // magic multiplier for hashing
105 static uint32_t maxNumProbes = 0;
106
107 GC_objectHashElement e;
108 uint32_t numProbes;
109 uint32_t probe;
110 uint32_t slot; // slot in the hash table we are considering
111 unsigned int *p1;
112 unsigned int *p2;
113
114 if (DEBUG_SHARE)
115 fprintf (stderr, "insertHashTableElem ("FMTHASH", "FMTPTR", "FMTPTR", %s)\n",
116 hash,
117 (uintptr_t)object,
118 (uintptr_t)max,
119 boolToString (mightBeThere));
120 if (! init) {
121 init = TRUE;
122 double dmult = floor (((sqrt (5.0) - 1.0) / 2.0) * (double)0x100000000llu);
123 mult = (uint64_t)dmult;
124 }
125 slot = (uint32_t)(mult * (uint64_t)hash) >> (32 - t->elementsLengthMaxLog2);
126 probe = (1 == slot % 2) ? slot : slot - 1;
127 if (DEBUG_SHARE)
128 fprintf (stderr, "probe = 0x%"PRIx32"\n", probe);
129 assert (1 == probe % 2);
130 numProbes = 0;
131look:
132 if (DEBUG_SHARE)
133 fprintf (stderr, "slot = 0x%"PRIx32"\n", slot);
134 assert (slot < t->elementsLengthMax);
135 numProbes++;
136 e = &t->elements[slot];
137 if (NULL == e->object) {
138 /* It's not in the table. Add it. */
139 unless (t->mayInsert) {
140 if (DEBUG_SHARE)
141 fprintf (stderr, "not inserting\n");
142 return object;
143 }
144 e->hash = hash;
145 e->object = object;
146 t->elementsLengthCur++;
147 if (numProbes > maxNumProbes) {
148 maxNumProbes = numProbes;
149 if (DEBUG_SHARE)
150 fprintf (stderr, "numProbes = %"PRIu32"\n", numProbes);
151 }
152 return object;
153 }
154 unless (hash == e->hash) {
155lookNext:
156 slot = (slot + probe) % t->elementsLengthMax;
157 goto look;
158 }
159 unless (mightBeThere)
160 goto lookNext;
161 if (DEBUG_SHARE)
162 fprintf (stderr, "comparing "FMTPTR" to "FMTPTR"\n",
163 (uintptr_t)object, (uintptr_t)e->object);
164 /* Compare object to e->object. */
165 unless (object == e->object) {
166 GC_header header;
167 GC_objectTypeTag tag;
168
169 header = getHeader (object);
170 unless (header == getHeader (e->object))
171 goto lookNext;
172 for (p1 = (unsigned int*)object,
173 p2 = (unsigned int*)e->object;
174 p1 < (unsigned int*)max;
175 ++p1, ++p2)
176 unless (*p1 == *p2)
177 goto lookNext;
178 splitHeader (s, header, &tag, NULL, NULL, NULL);
179 if (ARRAY_TAG == tag
180 and (getArrayLength (object) != getArrayLength (e->object)))
181 goto lookNext;
182 }
183 /* object is equal to e->object. */
184 return e->object;
185}
186
187void growHashTableMaybe (GC_state s, GC_objectHashTable t) {
188 GC_objectHashElement oldElement;
189 struct GC_objectHashElement *oldElements;
190 uint32_t oldElementsLengthMax;
191 uint32_t newElementsLengthMax;
192
193 if (not t->mayInsert or t->elementsLengthCur * 2 <= t->elementsLengthMax)
194 return;
195 oldElements = t->elements;
196 oldElementsLengthMax = t->elementsLengthMax;
197 newElementsLengthMax = oldElementsLengthMax * 2;
198 if (DEBUG_SHARE)
199 fprintf (stderr,
200 "trying to grow table to cardinality %"PRIu32"\n",
201 newElementsLengthMax);
202 // Try to alocate the new table.
203 t->elements =
204 (struct GC_objectHashElement *)
205 (calloc(newElementsLengthMax, sizeof(*(t->elements))));
206 if (NULL == t->elements) {
207 t->mayInsert = FALSE;
208 t->elements = oldElements;
209 if (DEBUG_SHARE)
210 fprintf (stderr, "unable to grow table\n");
211 return;
212 }
213 t->elementsLengthMax = newElementsLengthMax;
214 t->elementsLengthMaxLog2++;
215 for (unsigned int i = 0; i < oldElementsLengthMax; ++i) {
216 oldElement = &oldElements[i];
217 unless (NULL == oldElement->object)
218 insertHashTableElem
219 (s, t, oldElement->hash, oldElement->object, NULL, FALSE);
220 }
221 if (t->elementsIsInHeap)
222 t->elementsIsInHeap = FALSE;
223 else
224 free (oldElements);
225 if (DEBUG_SHARE)
226 fprintf (stderr, "done growing table\n");
227}
228
229pointer hashConsPointer (GC_state s, pointer object, bool countBytesHashConsed) {
230 GC_objectHashTable t;
231 GC_header header;
232 uint16_t bytesNonObjptrs;
233 uint16_t numObjptrs;
234 bool hasIdentity;
235 GC_objectTypeTag tag;
236 pointer max;
237 GC_hash hash;
238 GC_hash* p;
239 pointer res;
240
241 if (DEBUG_SHARE)
242 fprintf (stderr, "hashConsPointer ("FMTPTR")\n", (uintptr_t)object);
243 t = s->objectHashTable;
244 header = getHeader (object);
245 splitHeader(s, header, &tag, &hasIdentity, &bytesNonObjptrs, &numObjptrs);
246 if (hasIdentity) {
247 /* Don't hash cons. */
248 res = object;
249 goto done;
250 }
251 assert ((ARRAY_TAG == tag) or (NORMAL_TAG == tag));
252 max =
253 object
254 + (ARRAY_TAG == tag
255 ? (sizeofArrayNoMetaData (s, getArrayLength (object),
256 bytesNonObjptrs, numObjptrs))
257 : (bytesNonObjptrs + (numObjptrs * OBJPTR_SIZE)));
258 // Compute the hash.
259 hash = (GC_hash)header;
260 for (p = (GC_hash*)object; p < (GC_hash*)max; ++p)
261 hash = hash * 31 + *p;
262 /* Insert into table. */
263 res = insertHashTableElem (s, t, hash, object, max, TRUE);
264 growHashTableMaybe (s, t);
265 if (countBytesHashConsed and res != object) {
266 size_t amount;
267
268 amount = (size_t)(max - object);
269 if (ARRAY_TAG == tag)
270 amount += GC_ARRAY_METADATA_SIZE;
271 else
272 amount += GC_NORMAL_METADATA_SIZE;
273 s->lastMajorStatistics.bytesHashConsed += amount;
274 }
275done:
276 if (DEBUG_SHARE)
277 fprintf (stderr, FMTPTR" = hashConsPointer ("FMTPTR")\n",
278 (uintptr_t)res, (uintptr_t)object);
279 return res;
280}
281
282void shareObjptr (GC_state s, objptr *opp) {
283 pointer p;
284
285 p = objptrToPointer (*opp, s->heap.start);
286 if (DEBUG_SHARE)
287 fprintf (stderr, "shareObjptr opp = "FMTPTR" *opp = "FMTOBJPTR"\n",
288 (uintptr_t)opp, *opp);
289 p = hashConsPointer (s, p, FALSE);
290 *opp = pointerToObjptr (p, s->heap.start);
291 markIntergenerationalObjptr (s, opp);
292}
293
294void printBytesHashConsedMessage (size_t bytesHashConsed, size_t bytesExamined) {
295 fprintf (stderr, "[GC: hash-consed %s bytes (%.1f%% of bytes examined).]\n",
296 uintmaxToCommaString(bytesHashConsed),
297 100.0 * ((double)bytesHashConsed / (double)bytesExamined));
298}