| 1 | /* Copyright (C) 2005-2007 Henry Cejtin, Matthew Fluet, Suresh |
| 2 | * Jagannathan, and Stephen Weeks. |
| 3 | * |
| 4 | * MLton is released under a BSD-style license. |
| 5 | * See the file MLton-LICENSE for details. |
| 6 | */ |
| 7 | |
| 8 | #if (defined (MLTON_GC_INTERNAL_TYPES)) |
| 9 | |
| 10 | /* |
| 11 | Consider the following schemes for representing object pointers and |
| 12 | mapping them to 64-bit native pointers. |
| 13 | |
| 14 | A. 32 bits, with bottom two bits zero. |
| 15 | B. 32 bits, with bottom bit zero, shift left by one. |
| 16 | C. 32 bits, with bottom bit zero, shift left by two. |
| 17 | D. 32 bits, shift left by two. |
| 18 | E. 32 bits, shift left by three. |
| 19 | F. 40 bits, with bottom two bits zero. |
| 20 | G. 64 bits, with bottom two bits zero. |
| 21 | |
| 22 | These schemes vary in the number of bits to represent a pointer in an |
| 23 | object, the time to load a pointer from memory into a register, the |
| 24 | amount of addressable memory, and the object alignment. |
| 25 | |
| 26 | bits time mem(G) align |
| 27 | A 32 fast 4 4 |
| 28 | B 32 slow 8 4 |
| 29 | C 32 slow 16 8 |
| 30 | D 32 slow 16 4 |
| 31 | E 32 slow 32 8 |
| 32 | F 40 slow 256 4 |
| 33 | G 64 fast 4G 4 |
| 34 | |
| 35 | Each of the (A-F) has a variant (AX-FX) in which pointers are added to |
| 36 | some constant base address. This gives access to any region in the |
| 37 | virtual address space instead of just the low addresses. |
| 38 | |
| 39 | The following diagram demonstrates what portion of the native pointer |
| 40 | to which the object pointer corresponds. |
| 41 | |
| 42 | 64 32 0 |
| 43 | | | | |
| 44 | ----------------------------------------------------------------- |
| 45 | |
| 46 | ==============================00 A |
| 47 | |
| 48 | ===============================0 B |
| 49 | |
| 50 | ===============================0 C |
| 51 | |
| 52 | ================================ D |
| 53 | |
| 54 | ================================= E |
| 55 | |
| 56 | ======================================00 F |
| 57 | |
| 58 | ==============================================================00 G |
| 59 | |
| 60 | Algorithmically, we can compute the native pointer (P) from the object |
| 61 | pointer (O) (with bitsize Z), given a shift (S) and a base (B): |
| 62 | |
| 63 | P = %add64(%shl64(%zxZ_64(O),S),B) |
| 64 | |
| 65 | Likewise, we can compute the object pointer (O) from the native |
| 66 | pointer (P), given a shift (S) and a base (B): |
| 67 | |
| 68 | O = %lobits64_Z(%shr64(%sub64(P,B),S)) |
| 69 | |
| 70 | Hence, each of the schemes may be characterized by the size Z of the |
| 71 | object pointer, the shift S, and whether or not the base B is zero. |
| 72 | Noting that |
| 73 | %zx64_64(x) = x |
| 74 | %shl64(x, 0) = x |
| 75 | %add64(x, 0) = x |
| 76 | %lobits64_64(x) = x |
| 77 | %shr64(x, 0) = x |
| 78 | %sub64(x, 0) = x |
| 79 | it is easy to compute the number of ALU operations required by each |
| 80 | scheme: |
| 81 | |
| 82 | A :: Z = 32, S == 0, B == 0 ops = 1 |
| 83 | AX :: Z = 32, S == 0, B != 0 ops = 2 |
| 84 | B :: Z = 32, S == 1, B == 0 ops = 2 |
| 85 | BX :: Z = 32, S == 1, B != 0 ops = 3 |
| 86 | C :: Z = 32, S == 2, B == 0 ops = 2 |
| 87 | CX :: Z = 32, S == 2, B != 0 ops = 3 |
| 88 | D :: Z = 32, S == 2, B == 0 ops = 2 |
| 89 | DX :: Z = 32, S == 2, B != 0 ops = 3 |
| 90 | E :: Z = 32, S == 3, B == 0 ops = 2 |
| 91 | EX :: Z = 32, S == 3, B != 0 ops = 3 |
| 92 | F :: Z = 40, S == 0, B == 0 ops = 1 (#) |
| 93 | FX :: Z = 40, S == 0, B != 0 ops = 2 (#) |
| 94 | G :: Z = 64, S == 0, B == 0 ops = 0 |
| 95 | |
| 96 | #: In schemes F and FX, the conversion from object pointer to native |
| 97 | pointer requires logical-shift-right, rather than zero-extend, since |
| 98 | the object pointer would be fetched from memory as a 64-bit value. |
| 99 | The cost may actually be higher, as storing an object pointer in |
| 100 | memory requires some care so as not to overwrite neighboring data. |
| 101 | |
| 102 | It is not clear that any of the thirteen schemes dominates another. |
| 103 | Here are some thoughts. |
| 104 | |
| 105 | (A) This is is what we have now, but is still useful on 64-bit |
| 106 | machines where the bottom 4G may be less cluttered than on a 32-bit |
| 107 | machine. |
| 108 | |
| 109 | (AX) seems like a nice cost/benefit tradeoff for a program that only |
| 110 | needs 4G of memory, since the base can be used to find a contiguous 4G |
| 111 | somewhere in the address space. |
| 112 | |
| 113 | (B) and (C) are similar, the tradeoff being to increase object |
| 114 | alignment requirements in order to allow more memory. Importantly, |
| 115 | pointers having a bottom zero bit means that we can still set the |
| 116 | bottom bit to one to represent small values in sum types. |
| 117 | |
| 118 | (D) and (E) are problematic because they leave no room to represent |
| 119 | small objects in sum types with pointers. I think that really rules |
| 120 | them out. |
| 121 | |
| 122 | (F) costs some in object alignment because a sequence of pointers in |
| 123 | an object may have to be padded to meet 4-byte alignment. Loading a |
| 124 | pointer from memory into a register may be slightly faster than in |
| 125 | (B) or (C) because we don't have to shift, but I wonder if that |
| 126 | matters. |
| 127 | |
| 128 | (G) costs the most in space, but has the fastest load time for |
| 129 | pointers of the schemes that allow access to 4G of memory. |
| 130 | |
| 131 | A reasonable tradeoff in implementation complexity vs allowing our |
| 132 | users enough flexibility might be to provide: |
| 133 | |
| 134 | A, AX, B, BX, C, CX, G |
| 135 | |
| 136 | After some experiments on those, we might be able to find a more |
| 137 | manageable set for users. |
| 138 | */ |
| 139 | |
| 140 | #if defined (GC_MODEL_NATIVE32) |
| 141 | #define GC_MODEL_OBJPTR_SIZE 32 |
| 142 | #define GC_MODEL_OBJPTR_SHIFT 0 |
| 143 | #define GC_MODEL_OBJPTR_BASE 0 |
| 144 | #define GC_MODEL_HEADER_SIZE 32 |
| 145 | #define GC_MODEL_ARRLEN_SIZE 32 |
| 146 | #elif defined (GC_MODEL_NATIVE64) |
| 147 | #define GC_MODEL_OBJPTR_SIZE 64 |
| 148 | #define GC_MODEL_OBJPTR_SHIFT 0 |
| 149 | #define GC_MODEL_OBJPTR_BASE 0 |
| 150 | #define GC_MODEL_HEADER_SIZE 64 |
| 151 | #define GC_MODEL_ARRLEN_SIZE 64 |
| 152 | #else |
| 153 | #error GC_MODEL_* undefined |
| 154 | #endif |
| 155 | |
| 156 | #define GC_MODEL_MINALIGN_SHIFT max(2, GC_MODEL_OBJPTR_SHIFT + 1) |
| 157 | #define GC_MODEL_MINALIGN TWOPOWER(GC_MODEL_MINALIGN_SHIFT) |
| 158 | |
| 159 | #endif /* (defined (MLTON_GC_INTERNAL_TYPES)) */ |