Commit | Line | Data |
---|---|---|
7f918cf1 CE |
1 | /* Copyright (C) 2005-2007 Henry Cejtin, Matthew Fluet, Suresh |
2 | * Jagannathan, and Stephen Weeks. | |
3 | * | |
4 | * MLton is released under a BSD-style license. | |
5 | * See the file MLton-LICENSE for details. | |
6 | */ | |
7 | ||
8 | #if (defined (MLTON_GC_INTERNAL_TYPES)) | |
9 | ||
10 | /* | |
11 | Consider the following schemes for representing object pointers and | |
12 | mapping them to 64-bit native pointers. | |
13 | ||
14 | A. 32 bits, with bottom two bits zero. | |
15 | B. 32 bits, with bottom bit zero, shift left by one. | |
16 | C. 32 bits, with bottom bit zero, shift left by two. | |
17 | D. 32 bits, shift left by two. | |
18 | E. 32 bits, shift left by three. | |
19 | F. 40 bits, with bottom two bits zero. | |
20 | G. 64 bits, with bottom two bits zero. | |
21 | ||
22 | These schemes vary in the number of bits to represent a pointer in an | |
23 | object, the time to load a pointer from memory into a register, the | |
24 | amount of addressable memory, and the object alignment. | |
25 | ||
26 | bits time mem(G) align | |
27 | A 32 fast 4 4 | |
28 | B 32 slow 8 4 | |
29 | C 32 slow 16 8 | |
30 | D 32 slow 16 4 | |
31 | E 32 slow 32 8 | |
32 | F 40 slow 256 4 | |
33 | G 64 fast 4G 4 | |
34 | ||
35 | Each of the (A-F) has a variant (AX-FX) in which pointers are added to | |
36 | some constant base address. This gives access to any region in the | |
37 | virtual address space instead of just the low addresses. | |
38 | ||
39 | The following diagram demonstrates what portion of the native pointer | |
40 | to which the object pointer corresponds. | |
41 | ||
42 | 64 32 0 | |
43 | | | | | |
44 | ----------------------------------------------------------------- | |
45 | ||
46 | ==============================00 A | |
47 | ||
48 | ===============================0 B | |
49 | ||
50 | ===============================0 C | |
51 | ||
52 | ================================ D | |
53 | ||
54 | ================================= E | |
55 | ||
56 | ======================================00 F | |
57 | ||
58 | ==============================================================00 G | |
59 | ||
60 | Algorithmically, we can compute the native pointer (P) from the object | |
61 | pointer (O) (with bitsize Z), given a shift (S) and a base (B): | |
62 | ||
63 | P = %add64(%shl64(%zxZ_64(O),S),B) | |
64 | ||
65 | Likewise, we can compute the object pointer (O) from the native | |
66 | pointer (P), given a shift (S) and a base (B): | |
67 | ||
68 | O = %lobits64_Z(%shr64(%sub64(P,B),S)) | |
69 | ||
70 | Hence, each of the schemes may be characterized by the size Z of the | |
71 | object pointer, the shift S, and whether or not the base B is zero. | |
72 | Noting that | |
73 | %zx64_64(x) = x | |
74 | %shl64(x, 0) = x | |
75 | %add64(x, 0) = x | |
76 | %lobits64_64(x) = x | |
77 | %shr64(x, 0) = x | |
78 | %sub64(x, 0) = x | |
79 | it is easy to compute the number of ALU operations required by each | |
80 | scheme: | |
81 | ||
82 | A :: Z = 32, S == 0, B == 0 ops = 1 | |
83 | AX :: Z = 32, S == 0, B != 0 ops = 2 | |
84 | B :: Z = 32, S == 1, B == 0 ops = 2 | |
85 | BX :: Z = 32, S == 1, B != 0 ops = 3 | |
86 | C :: Z = 32, S == 2, B == 0 ops = 2 | |
87 | CX :: Z = 32, S == 2, B != 0 ops = 3 | |
88 | D :: Z = 32, S == 2, B == 0 ops = 2 | |
89 | DX :: Z = 32, S == 2, B != 0 ops = 3 | |
90 | E :: Z = 32, S == 3, B == 0 ops = 2 | |
91 | EX :: Z = 32, S == 3, B != 0 ops = 3 | |
92 | F :: Z = 40, S == 0, B == 0 ops = 1 (#) | |
93 | FX :: Z = 40, S == 0, B != 0 ops = 2 (#) | |
94 | G :: Z = 64, S == 0, B == 0 ops = 0 | |
95 | ||
96 | #: In schemes F and FX, the conversion from object pointer to native | |
97 | pointer requires logical-shift-right, rather than zero-extend, since | |
98 | the object pointer would be fetched from memory as a 64-bit value. | |
99 | The cost may actually be higher, as storing an object pointer in | |
100 | memory requires some care so as not to overwrite neighboring data. | |
101 | ||
102 | It is not clear that any of the thirteen schemes dominates another. | |
103 | Here are some thoughts. | |
104 | ||
105 | (A) This is is what we have now, but is still useful on 64-bit | |
106 | machines where the bottom 4G may be less cluttered than on a 32-bit | |
107 | machine. | |
108 | ||
109 | (AX) seems like a nice cost/benefit tradeoff for a program that only | |
110 | needs 4G of memory, since the base can be used to find a contiguous 4G | |
111 | somewhere in the address space. | |
112 | ||
113 | (B) and (C) are similar, the tradeoff being to increase object | |
114 | alignment requirements in order to allow more memory. Importantly, | |
115 | pointers having a bottom zero bit means that we can still set the | |
116 | bottom bit to one to represent small values in sum types. | |
117 | ||
118 | (D) and (E) are problematic because they leave no room to represent | |
119 | small objects in sum types with pointers. I think that really rules | |
120 | them out. | |
121 | ||
122 | (F) costs some in object alignment because a sequence of pointers in | |
123 | an object may have to be padded to meet 4-byte alignment. Loading a | |
124 | pointer from memory into a register may be slightly faster than in | |
125 | (B) or (C) because we don't have to shift, but I wonder if that | |
126 | matters. | |
127 | ||
128 | (G) costs the most in space, but has the fastest load time for | |
129 | pointers of the schemes that allow access to 4G of memory. | |
130 | ||
131 | A reasonable tradeoff in implementation complexity vs allowing our | |
132 | users enough flexibility might be to provide: | |
133 | ||
134 | A, AX, B, BX, C, CX, G | |
135 | ||
136 | After some experiments on those, we might be able to find a more | |
137 | manageable set for users. | |
138 | */ | |
139 | ||
140 | #if defined (GC_MODEL_NATIVE32) | |
141 | #define GC_MODEL_OBJPTR_SIZE 32 | |
142 | #define GC_MODEL_OBJPTR_SHIFT 0 | |
143 | #define GC_MODEL_OBJPTR_BASE 0 | |
144 | #define GC_MODEL_HEADER_SIZE 32 | |
145 | #define GC_MODEL_ARRLEN_SIZE 32 | |
146 | #elif defined (GC_MODEL_NATIVE64) | |
147 | #define GC_MODEL_OBJPTR_SIZE 64 | |
148 | #define GC_MODEL_OBJPTR_SHIFT 0 | |
149 | #define GC_MODEL_OBJPTR_BASE 0 | |
150 | #define GC_MODEL_HEADER_SIZE 64 | |
151 | #define GC_MODEL_ARRLEN_SIZE 64 | |
152 | #else | |
153 | #error GC_MODEL_* undefined | |
154 | #endif | |
155 | ||
156 | #define GC_MODEL_MINALIGN_SHIFT max(2, GC_MODEL_OBJPTR_SHIFT + 1) | |
157 | #define GC_MODEL_MINALIGN TWOPOWER(GC_MODEL_MINALIGN_SHIFT) | |
158 | ||
159 | #endif /* (defined (MLTON_GC_INTERNAL_TYPES)) */ |