| 1 | ;;; Guile bytecode assembler |
| 2 | |
| 3 | ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013, 2014 Free Software Foundation, Inc. |
| 4 | ;;; |
| 5 | ;;; This library is free software; you can redistribute it and/or |
| 6 | ;;; modify it under the terms of the GNU Lesser General Public |
| 7 | ;;; License as published by the Free Software Foundation; either |
| 8 | ;;; version 3 of the License, or (at your option) any later version. |
| 9 | ;;; |
| 10 | ;;; This library is distributed in the hope that it will be useful, |
| 11 | ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | ;;; Lesser General Public License for more details. |
| 14 | ;;; |
| 15 | ;;; You should have received a copy of the GNU Lesser General Public |
| 16 | ;;; License along with this library; if not, write to the Free Software |
| 17 | ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 18 | |
| 19 | ;;; Commentary: |
| 20 | ;;; |
| 21 | ;;; This module implements an assembler that creates an ELF image from |
| 22 | ;;; bytecode assembly and macro-assembly. The input can be given in |
| 23 | ;;; s-expression form, like ((OP ARG ...) ...). Internally there is a |
| 24 | ;;; procedural interface, the emit-OP procedures, but that is not |
| 25 | ;;; currently exported. |
| 26 | ;;; |
| 27 | ;;; "Primitive instructions" correspond to VM operations. Assemblers |
| 28 | ;;; for primitive instructions are generated programmatically from |
| 29 | ;;; (instruction-list), which itself is derived from the VM sources. |
| 30 | ;;; There are also "macro-instructions" like "label" or "load-constant" |
| 31 | ;;; that expand to 0 or more primitive instructions. |
| 32 | ;;; |
| 33 | ;;; The assembler also handles some higher-level tasks, like creating |
| 34 | ;;; the symbol table, other metadata sections, creating a constant table |
| 35 | ;;; for the whole compilation unit, and writing the dynamic section of |
| 36 | ;;; the ELF file along with the appropriate initialization routines. |
| 37 | ;;; |
| 38 | ;;; Most compilers will want to use the trio of make-assembler, |
| 39 | ;;; emit-text, and link-assembly. That will result in the creation of |
| 40 | ;;; an ELF image as a bytevector, which can then be loaded using |
| 41 | ;;; load-thunk-from-memory, or written to disk as a .go file. |
| 42 | ;;; |
| 43 | ;;; Code: |
| 44 | |
| 45 | (define-module (system vm assembler) |
| 46 | #:use-module (system base target) |
| 47 | #:use-module (system vm dwarf) |
| 48 | #:use-module (system vm elf) |
| 49 | #:use-module (system vm linker) |
| 50 | #:use-module (language bytecode) |
| 51 | #:use-module (rnrs bytevectors) |
| 52 | #:use-module (ice-9 binary-ports) |
| 53 | #:use-module (ice-9 vlist) |
| 54 | #:use-module (ice-9 match) |
| 55 | #:use-module (srfi srfi-1) |
| 56 | #:use-module (srfi srfi-4) |
| 57 | #:use-module (srfi srfi-9) |
| 58 | #:use-module (srfi srfi-11) |
| 59 | #:export (make-assembler |
| 60 | emit-text |
| 61 | link-assembly)) |
| 62 | |
| 63 | |
| 64 | \f |
| 65 | |
| 66 | ;;; Bytecode consists of 32-bit units, often subdivided in some way. |
| 67 | ;;; These helpers create one 32-bit unit from multiple components. |
| 68 | |
| 69 | (define-inlinable (pack-u8-u24 x y) |
| 70 | (unless (<= 0 x 255) |
| 71 | (error "out of range" x)) |
| 72 | (logior x (ash y 8))) |
| 73 | |
| 74 | (define-inlinable (pack-u8-s24 x y) |
| 75 | (unless (<= 0 x 255) |
| 76 | (error "out of range" x)) |
| 77 | (logior x (ash (cond |
| 78 | ((< 0 (- y) #x800000) |
| 79 | (+ y #x1000000)) |
| 80 | ((<= 0 y #xffffff) |
| 81 | y) |
| 82 | (else (error "out of range" y))) |
| 83 | 8))) |
| 84 | |
| 85 | (define-inlinable (pack-u1-u7-u24 x y z) |
| 86 | (unless (<= 0 x 1) |
| 87 | (error "out of range" x)) |
| 88 | (unless (<= 0 y 127) |
| 89 | (error "out of range" y)) |
| 90 | (logior x (ash y 1) (ash z 8))) |
| 91 | |
| 92 | (define-inlinable (pack-u8-u12-u12 x y z) |
| 93 | (unless (<= 0 x 255) |
| 94 | (error "out of range" x)) |
| 95 | (unless (<= 0 y 4095) |
| 96 | (error "out of range" y)) |
| 97 | (logior x (ash y 8) (ash z 20))) |
| 98 | |
| 99 | (define-inlinable (pack-u8-u8-u16 x y z) |
| 100 | (unless (<= 0 x 255) |
| 101 | (error "out of range" x)) |
| 102 | (unless (<= 0 y 255) |
| 103 | (error "out of range" y)) |
| 104 | (logior x (ash y 8) (ash z 16))) |
| 105 | |
| 106 | (define-inlinable (pack-u8-u8-u8-u8 x y z w) |
| 107 | (unless (<= 0 x 255) |
| 108 | (error "out of range" x)) |
| 109 | (unless (<= 0 y 255) |
| 110 | (error "out of range" y)) |
| 111 | (unless (<= 0 z 255) |
| 112 | (error "out of range" z)) |
| 113 | (logior x (ash y 8) (ash z 16) (ash w 24))) |
| 114 | |
| 115 | (define-syntax pack-flags |
| 116 | (syntax-rules () |
| 117 | ;; Add clauses as needed. |
| 118 | ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0) |
| 119 | (if f2 (ash 2 0) 0))))) |
| 120 | |
| 121 | ;;; Helpers to read and write 32-bit units in a buffer. |
| 122 | |
| 123 | (define-syntax-rule (u32-ref buf n) |
| 124 | (bytevector-u32-native-ref buf (* n 4))) |
| 125 | |
| 126 | (define-syntax-rule (u32-set! buf n val) |
| 127 | (bytevector-u32-native-set! buf (* n 4) val)) |
| 128 | |
| 129 | (define-syntax-rule (s32-ref buf n) |
| 130 | (bytevector-s32-native-ref buf (* n 4))) |
| 131 | |
| 132 | (define-syntax-rule (s32-set! buf n val) |
| 133 | (bytevector-s32-native-set! buf (* n 4) val)) |
| 134 | |
| 135 | |
| 136 | \f |
| 137 | |
| 138 | ;;; A <meta> entry collects metadata for one procedure. Procedures are |
| 139 | ;;; written as contiguous ranges of bytecode. |
| 140 | ;;; |
| 141 | (define-syntax-rule (assert-match arg pattern kind) |
| 142 | (let ((x arg)) |
| 143 | (unless (match x (pattern #t) (_ #f)) |
| 144 | (error (string-append "expected " kind) x)))) |
| 145 | |
| 146 | (define-record-type <meta> |
| 147 | (%make-meta label properties low-pc high-pc arities) |
| 148 | meta? |
| 149 | (label meta-label) |
| 150 | (properties meta-properties set-meta-properties!) |
| 151 | (low-pc meta-low-pc) |
| 152 | (high-pc meta-high-pc set-meta-high-pc!) |
| 153 | (arities meta-arities set-meta-arities!)) |
| 154 | |
| 155 | (define (make-meta label properties low-pc) |
| 156 | (assert-match label (or (? exact-integer?) (? symbol?)) "symbol") |
| 157 | (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys") |
| 158 | (%make-meta label properties low-pc #f '())) |
| 159 | |
| 160 | (define (meta-name meta) |
| 161 | (assq-ref (meta-properties meta) 'name)) |
| 162 | |
| 163 | ;; Metadata for one <lambda-case>. |
| 164 | (define-record-type <arity> |
| 165 | (make-arity req opt rest kw-indices allow-other-keys? |
| 166 | low-pc high-pc definitions) |
| 167 | arity? |
| 168 | (req arity-req) |
| 169 | (opt arity-opt) |
| 170 | (rest arity-rest) |
| 171 | (kw-indices arity-kw-indices) |
| 172 | (allow-other-keys? arity-allow-other-keys?) |
| 173 | (low-pc arity-low-pc) |
| 174 | (high-pc arity-high-pc set-arity-high-pc!) |
| 175 | (definitions arity-definitions set-arity-definitions!)) |
| 176 | |
| 177 | (define-syntax *block-size* (identifier-syntax 32)) |
| 178 | |
| 179 | ;;; An assembler collects all of the words emitted during assembly, and |
| 180 | ;;; also maintains ancillary information such as the constant table, a |
| 181 | ;;; relocation list, and so on. |
| 182 | ;;; |
| 183 | ;;; Bytecode consists of 32-bit units. We emit bytecode using native |
| 184 | ;;; endianness. If we're targeting a foreign endianness, we byte-swap |
| 185 | ;;; the bytevector as a whole instead of conditionalizing each access. |
| 186 | ;;; |
| 187 | (define-record-type <asm> |
| 188 | (make-asm cur idx start prev written |
| 189 | labels relocs |
| 190 | word-size endianness |
| 191 | constants inits |
| 192 | shstrtab next-section-number |
| 193 | meta sources |
| 194 | dead-slot-maps) |
| 195 | asm? |
| 196 | |
| 197 | ;; We write bytecode into what is logically a growable vector, |
| 198 | ;; implemented as a list of blocks. asm-cur is the current block, and |
| 199 | ;; asm-idx is the current index into that block, in 32-bit units. |
| 200 | ;; |
| 201 | (cur asm-cur set-asm-cur!) |
| 202 | (idx asm-idx set-asm-idx!) |
| 203 | |
| 204 | ;; asm-start is an absolute position, indicating the offset of the |
| 205 | ;; beginning of an instruction (in u32 units). It is updated after |
| 206 | ;; writing all the words for one primitive instruction. It models the |
| 207 | ;; position of the instruction pointer during execution, given that |
| 208 | ;; the VM updates the IP only at the end of executing the instruction, |
| 209 | ;; and is thus useful for computing offsets between two points in a |
| 210 | ;; program. |
| 211 | ;; |
| 212 | (start asm-start set-asm-start!) |
| 213 | |
| 214 | ;; The list of previously written blocks. |
| 215 | ;; |
| 216 | (prev asm-prev set-asm-prev!) |
| 217 | |
| 218 | ;; The number of u32 words written in asm-prev, which is the same as |
| 219 | ;; the offset of the current block. |
| 220 | ;; |
| 221 | (written asm-written set-asm-written!) |
| 222 | |
| 223 | ;; An alist of symbol -> position pairs, indicating the labels defined |
| 224 | ;; in this compilation unit. |
| 225 | ;; |
| 226 | (labels asm-labels set-asm-labels!) |
| 227 | |
| 228 | ;; A list of relocations needed by the program text. We use an |
| 229 | ;; internal representation for relocations, and handle textualn |
| 230 | ;; relative relocations in the assembler. Other kinds of relocations |
| 231 | ;; are later reified as linker relocations and resolved by the linker. |
| 232 | ;; |
| 233 | (relocs asm-relocs set-asm-relocs!) |
| 234 | |
| 235 | ;; Target information. |
| 236 | ;; |
| 237 | (word-size asm-word-size) |
| 238 | (endianness asm-endianness) |
| 239 | |
| 240 | ;; The constant table, as a vhash of object -> label. All constants |
| 241 | ;; get de-duplicated and written into separate sections -- either the |
| 242 | ;; .rodata section, for read-only data, or .data, for constants that |
| 243 | ;; need initialization at load-time (like symbols). Constants can |
| 244 | ;; depend on other constants (e.g. a symbol depending on a stringbuf), |
| 245 | ;; so order in this table is important. |
| 246 | ;; |
| 247 | (constants asm-constants set-asm-constants!) |
| 248 | |
| 249 | ;; A list of instructions needed to initialize the constants. Will |
| 250 | ;; run in a thunk with 2 local variables. |
| 251 | ;; |
| 252 | (inits asm-inits set-asm-inits!) |
| 253 | |
| 254 | ;; The shstrtab, for section names. |
| 255 | ;; |
| 256 | (shstrtab asm-shstrtab set-asm-shstrtab!) |
| 257 | |
| 258 | ;; The section number for the next section to be written. |
| 259 | ;; |
| 260 | (next-section-number asm-next-section-number set-asm-next-section-number!) |
| 261 | |
| 262 | ;; A list of <meta>, corresponding to procedure metadata. |
| 263 | ;; |
| 264 | (meta asm-meta set-asm-meta!) |
| 265 | |
| 266 | ;; A list of (pos . source) pairs, indicating source information. POS |
| 267 | ;; is relative to the beginning of the text section, and SOURCE is in |
| 268 | ;; the same format that source-properties returns. |
| 269 | ;; |
| 270 | (sources asm-sources set-asm-sources!) |
| 271 | |
| 272 | ;; A list of (pos . dead-slot-map) pairs, indicating dead slot maps. |
| 273 | ;; POS is relative to the beginning of the text section. |
| 274 | ;; DEAD-SLOT-MAP is a bitfield of slots that are dead at call sites, |
| 275 | ;; as an integer. |
| 276 | ;; |
| 277 | (dead-slot-maps asm-dead-slot-maps set-asm-dead-slot-maps!)) |
| 278 | |
| 279 | (define-inlinable (fresh-block) |
| 280 | (make-u32vector *block-size*)) |
| 281 | |
| 282 | (define* (make-assembler #:key (word-size (target-word-size)) |
| 283 | (endianness (target-endianness))) |
| 284 | "Create an assembler for a given target @var{word-size} and |
| 285 | @var{endianness}, falling back to appropriate values for the configured |
| 286 | target." |
| 287 | (make-asm (fresh-block) 0 0 '() 0 |
| 288 | (make-hash-table) '() |
| 289 | word-size endianness |
| 290 | vlist-null '() |
| 291 | (make-string-table) 1 |
| 292 | '() '() '())) |
| 293 | |
| 294 | (define (intern-section-name! asm string) |
| 295 | "Add a string to the section name table (shstrtab)." |
| 296 | (string-table-intern! (asm-shstrtab asm) string)) |
| 297 | |
| 298 | (define-inlinable (asm-pos asm) |
| 299 | "The offset of the next word to be written into the code buffer, in |
| 300 | 32-bit units." |
| 301 | (+ (asm-idx asm) (asm-written asm))) |
| 302 | |
| 303 | (define (allocate-new-block asm) |
| 304 | "Close off the current block, and arrange for the next word to be |
| 305 | written to a fresh block." |
| 306 | (let ((new (fresh-block))) |
| 307 | (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm))) |
| 308 | (set-asm-written! asm (asm-pos asm)) |
| 309 | (set-asm-cur! asm new) |
| 310 | (set-asm-idx! asm 0))) |
| 311 | |
| 312 | (define-inlinable (emit asm u32) |
| 313 | "Emit one 32-bit word into the instruction stream. Assumes that there |
| 314 | is space for the word, and ensures that there is space for the next |
| 315 | word." |
| 316 | (u32-set! (asm-cur asm) (asm-idx asm) u32) |
| 317 | (set-asm-idx! asm (1+ (asm-idx asm))) |
| 318 | (if (= (asm-idx asm) *block-size*) |
| 319 | (allocate-new-block asm))) |
| 320 | |
| 321 | (define-inlinable (make-reloc type label base word) |
| 322 | "Make an internal relocation of type @var{type} referencing symbol |
| 323 | @var{label}, @var{word} words after position @var{start}. @var{type} |
| 324 | may be x8-s24, indicating a 24-bit relative label reference that can be |
| 325 | fixed up by the assembler, or s32, indicating a 32-bit relative |
| 326 | reference that needs to be fixed up by the linker." |
| 327 | (list type label base word)) |
| 328 | |
| 329 | (define-inlinable (reset-asm-start! asm) |
| 330 | "Reset the asm-start after writing the words for one instruction." |
| 331 | (set-asm-start! asm (asm-pos asm))) |
| 332 | |
| 333 | (define (record-label-reference asm label) |
| 334 | "Record an x8-s24 local label reference. This value will get patched |
| 335 | up later by the assembler." |
| 336 | (let* ((start (asm-start asm)) |
| 337 | (pos (asm-pos asm)) |
| 338 | (reloc (make-reloc 'x8-s24 label start (- pos start)))) |
| 339 | (set-asm-relocs! asm (cons reloc (asm-relocs asm))))) |
| 340 | |
| 341 | (define* (record-far-label-reference asm label #:optional (offset 0)) |
| 342 | "Record an s32 far label reference. This value will get patched up |
| 343 | later by the linker." |
| 344 | (let* ((start (- (asm-start asm) offset)) |
| 345 | (pos (asm-pos asm)) |
| 346 | (reloc (make-reloc 's32 label start (- pos start)))) |
| 347 | (set-asm-relocs! asm (cons reloc (asm-relocs asm))))) |
| 348 | |
| 349 | |
| 350 | \f |
| 351 | |
| 352 | ;;; |
| 353 | ;;; Primitive assemblers are defined by expanding `assembler' for each |
| 354 | ;;; opcode in `(instruction-list)'. |
| 355 | ;;; |
| 356 | |
| 357 | (eval-when (expand compile load eval) |
| 358 | (define (id-append ctx a b) |
| 359 | (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b))))) |
| 360 | |
| 361 | (define-syntax assembler |
| 362 | (lambda (x) |
| 363 | (define-syntax op-case |
| 364 | (lambda (x) |
| 365 | (syntax-case x () |
| 366 | ((_ asm name ((type arg ...) code ...) clause ...) |
| 367 | #`(if (eq? name 'type) |
| 368 | (with-syntax (((arg ...) (generate-temporaries #'(arg ...)))) |
| 369 | #'((arg ...) |
| 370 | code ...)) |
| 371 | (op-case asm name clause ...))) |
| 372 | ((_ asm name) |
| 373 | #'(error "unmatched name" name))))) |
| 374 | |
| 375 | (define (pack-first-word asm opcode type) |
| 376 | (with-syntax ((opcode opcode)) |
| 377 | (op-case |
| 378 | asm type |
| 379 | ((U8_X24) |
| 380 | (emit asm opcode)) |
| 381 | ((U8_U24 arg) |
| 382 | (emit asm (pack-u8-u24 opcode arg))) |
| 383 | ((U8_L24 label) |
| 384 | (record-label-reference asm label) |
| 385 | (emit asm opcode)) |
| 386 | ((U8_U8_I16 a imm) |
| 387 | (emit asm (pack-u8-u8-u16 opcode a (object-address imm)))) |
| 388 | ((U8_U12_U12 a b) |
| 389 | (emit asm (pack-u8-u12-u12 opcode a b))) |
| 390 | ((U8_U8_U8_U8 a b c) |
| 391 | (emit asm (pack-u8-u8-u8-u8 opcode a b c)))))) |
| 392 | |
| 393 | (define (pack-tail-word asm type) |
| 394 | (op-case |
| 395 | asm type |
| 396 | ((U8_U24 a b) |
| 397 | (emit asm (pack-u8-u24 a b))) |
| 398 | ((U8_L24 a label) |
| 399 | (record-label-reference asm label) |
| 400 | (emit asm a)) |
| 401 | ((U32 a) |
| 402 | (emit asm a)) |
| 403 | ((I32 imm) |
| 404 | (let ((val (object-address imm))) |
| 405 | (unless (zero? (ash val -32)) |
| 406 | (error "FIXME: enable truncation of negative fixnums when cross-compiling")) |
| 407 | (emit asm val))) |
| 408 | ((A32 imm) |
| 409 | (unless (= (asm-word-size asm) 8) |
| 410 | (error "make-long-immediate unavailable for this target")) |
| 411 | (emit asm (ash (object-address imm) -32)) |
| 412 | (emit asm (logand (object-address imm) (1- (ash 1 32))))) |
| 413 | ((B32)) |
| 414 | ((N32 label) |
| 415 | (record-far-label-reference asm label) |
| 416 | (emit asm 0)) |
| 417 | ((S32 label) |
| 418 | (record-far-label-reference asm label) |
| 419 | (emit asm 0)) |
| 420 | ((L32 label) |
| 421 | (record-far-label-reference asm label) |
| 422 | (emit asm 0)) |
| 423 | ((LO32 label offset) |
| 424 | (record-far-label-reference asm label |
| 425 | (* offset (/ (asm-word-size asm) 4))) |
| 426 | (emit asm 0)) |
| 427 | ((X8_U24 a) |
| 428 | (emit asm (pack-u8-u24 0 a))) |
| 429 | ((X8_L24 label) |
| 430 | (record-label-reference asm label) |
| 431 | (emit asm 0)) |
| 432 | ((B1_X7_L24 a label) |
| 433 | (record-label-reference asm label) |
| 434 | (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0))) |
| 435 | ((B1_U7_L24 a b label) |
| 436 | (record-label-reference asm label) |
| 437 | (emit asm (pack-u1-u7-u24 (if a 1 0) b 0))) |
| 438 | ((B1_X31 a) |
| 439 | (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0))) |
| 440 | ((B1_X7_U24 a b) |
| 441 | (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b))))) |
| 442 | |
| 443 | (syntax-case x () |
| 444 | ((_ name opcode word0 word* ...) |
| 445 | (with-syntax ((((formal0 ...) |
| 446 | code0 ...) |
| 447 | (pack-first-word #'asm |
| 448 | (syntax->datum #'opcode) |
| 449 | (syntax->datum #'word0))) |
| 450 | ((((formal* ...) |
| 451 | code* ...) ...) |
| 452 | (map (lambda (word) (pack-tail-word #'asm word)) |
| 453 | (syntax->datum #'(word* ...))))) |
| 454 | #'(lambda (asm formal0 ... formal* ... ...) |
| 455 | (unless (asm? asm) (error "not an asm")) |
| 456 | code0 ... |
| 457 | code* ... ... |
| 458 | (reset-asm-start! asm))))))) |
| 459 | |
| 460 | (define assemblers (make-hash-table)) |
| 461 | |
| 462 | (define-syntax define-assembler |
| 463 | (lambda (x) |
| 464 | (syntax-case x () |
| 465 | ((_ name opcode kind arg ...) |
| 466 | (with-syntax ((emit (id-append #'name #'emit- #'name))) |
| 467 | #'(begin |
| 468 | (define emit |
| 469 | (let ((emit (assembler name opcode arg ...))) |
| 470 | (hashq-set! assemblers 'name emit) |
| 471 | emit)) |
| 472 | (export emit))))))) |
| 473 | |
| 474 | (define-syntax visit-opcodes |
| 475 | (lambda (x) |
| 476 | (syntax-case x () |
| 477 | ((visit-opcodes macro arg ...) |
| 478 | (with-syntax (((inst ...) |
| 479 | (map (lambda (x) (datum->syntax #'macro x)) |
| 480 | (instruction-list)))) |
| 481 | #'(begin |
| 482 | (macro arg ... . inst) |
| 483 | ...)))))) |
| 484 | |
| 485 | (visit-opcodes define-assembler) |
| 486 | |
| 487 | (define (emit-text asm instructions) |
| 488 | "Assemble @var{instructions} using the assembler @var{asm}. |
| 489 | @var{instructions} is a sequence of instructions, expressed as a list of |
| 490 | lists. This procedure can be called many times before calling |
| 491 | @code{link-assembly}." |
| 492 | (for-each (lambda (inst) |
| 493 | (apply (or (hashq-ref assemblers (car inst)) |
| 494 | (error 'bad-instruction inst)) |
| 495 | asm |
| 496 | (cdr inst))) |
| 497 | instructions)) |
| 498 | |
| 499 | \f |
| 500 | |
| 501 | ;;; |
| 502 | ;;; The constant table records a topologically sorted set of literal |
| 503 | ;;; constants used by a program. For example, a pair uses its car and |
| 504 | ;;; cdr, a string uses its stringbuf, etc. |
| 505 | ;;; |
| 506 | ;;; Some things we want to add to the constant table are not actually |
| 507 | ;;; Scheme objects: for example, stringbufs, cache cells for toplevel |
| 508 | ;;; references, or cache cells for non-closure procedures. For these we |
| 509 | ;;; define special record types and add instances of those record types |
| 510 | ;;; to the table. |
| 511 | ;;; |
| 512 | |
| 513 | (define-inlinable (immediate? x) |
| 514 | "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise." |
| 515 | (not (zero? (logand (object-address x) 6)))) |
| 516 | |
| 517 | (define-record-type <stringbuf> |
| 518 | (make-stringbuf string) |
| 519 | stringbuf? |
| 520 | (string stringbuf-string)) |
| 521 | |
| 522 | (define-record-type <static-procedure> |
| 523 | (make-static-procedure code) |
| 524 | static-procedure? |
| 525 | (code static-procedure-code)) |
| 526 | |
| 527 | (define-record-type <uniform-vector-backing-store> |
| 528 | (make-uniform-vector-backing-store bytes element-size) |
| 529 | uniform-vector-backing-store? |
| 530 | (bytes uniform-vector-backing-store-bytes) |
| 531 | (element-size uniform-vector-backing-store-element-size)) |
| 532 | |
| 533 | (define-record-type <cache-cell> |
| 534 | (make-cache-cell scope key) |
| 535 | cache-cell? |
| 536 | (scope cache-cell-scope) |
| 537 | (key cache-cell-key)) |
| 538 | |
| 539 | (define (simple-vector? obj) |
| 540 | (and (vector? obj) |
| 541 | (equal? (array-shape obj) (list (list 0 (1- (vector-length obj))))))) |
| 542 | |
| 543 | (define (simple-uniform-vector? obj) |
| 544 | (and (array? obj) |
| 545 | (symbol? (array-type obj)) |
| 546 | (equal? (array-shape obj) (list (list 0 (1- (array-length obj))))))) |
| 547 | |
| 548 | (define (statically-allocatable? x) |
| 549 | "Return @code{#t} if a non-immediate constant can be allocated |
| 550 | statically, and @code{#f} if it would need some kind of runtime |
| 551 | allocation." |
| 552 | (or (pair? x) (string? x) (stringbuf? x) (static-procedure? x) (array? x))) |
| 553 | |
| 554 | (define (intern-constant asm obj) |
| 555 | "Add an object to the constant table, and return a label that can be |
| 556 | used to reference it. If the object is already present in the constant |
| 557 | table, its existing label is used directly." |
| 558 | (define (recur obj) |
| 559 | (intern-constant asm obj)) |
| 560 | (define (field dst n obj) |
| 561 | (let ((src (recur obj))) |
| 562 | (if src |
| 563 | (if (statically-allocatable? obj) |
| 564 | `((static-patch! ,dst ,n ,src)) |
| 565 | `((static-ref 1 ,src) |
| 566 | (static-set! 1 ,dst ,n))) |
| 567 | '()))) |
| 568 | (define (intern obj label) |
| 569 | (cond |
| 570 | ((pair? obj) |
| 571 | (append (field label 0 (car obj)) |
| 572 | (field label 1 (cdr obj)))) |
| 573 | ((simple-vector? obj) |
| 574 | (let lp ((i 0) (inits '())) |
| 575 | (if (< i (vector-length obj)) |
| 576 | (lp (1+ i) |
| 577 | (append-reverse (field label (1+ i) (vector-ref obj i)) |
| 578 | inits)) |
| 579 | (reverse inits)))) |
| 580 | ((stringbuf? obj) '()) |
| 581 | ((static-procedure? obj) |
| 582 | `((static-patch! ,label 1 ,(static-procedure-code obj)))) |
| 583 | ((cache-cell? obj) '()) |
| 584 | ((symbol? obj) |
| 585 | `((make-non-immediate 1 ,(recur (symbol->string obj))) |
| 586 | (string->symbol 1 1) |
| 587 | (static-set! 1 ,label 0))) |
| 588 | ((string? obj) |
| 589 | `((static-patch! ,label 1 ,(recur (make-stringbuf obj))))) |
| 590 | ((keyword? obj) |
| 591 | `((static-ref 1 ,(recur (keyword->symbol obj))) |
| 592 | (symbol->keyword 1 1) |
| 593 | (static-set! 1 ,label 0))) |
| 594 | ((number? obj) |
| 595 | `((make-non-immediate 1 ,(recur (number->string obj))) |
| 596 | (string->number 1 1) |
| 597 | (static-set! 1 ,label 0))) |
| 598 | ((uniform-vector-backing-store? obj) '()) |
| 599 | ((simple-uniform-vector? obj) |
| 600 | (let ((width (case (array-type obj) |
| 601 | ((vu8 u8 s8) 1) |
| 602 | ((u16 s16) 2) |
| 603 | ;; Bitvectors are addressed in 32-bit units. |
| 604 | ;; Although a complex number is 8 or 16 bytes wide, |
| 605 | ;; it should be byteswapped in 4 or 8 byte units. |
| 606 | ((u32 s32 f32 c32 b) 4) |
| 607 | ((u64 s64 f64 c64) 8) |
| 608 | (else |
| 609 | (error "unhandled array type" obj))))) |
| 610 | `((static-patch! ,label 2 |
| 611 | ,(recur (make-uniform-vector-backing-store |
| 612 | (uniform-array->bytevector obj) |
| 613 | width)))))) |
| 614 | (else |
| 615 | (error "don't know how to intern" obj)))) |
| 616 | (cond |
| 617 | ((immediate? obj) #f) |
| 618 | ((vhash-assoc obj (asm-constants asm)) => cdr) |
| 619 | (else |
| 620 | ;; Note that calling intern may mutate asm-constants and |
| 621 | ;; asm-constant-inits. |
| 622 | (let* ((label (gensym "constant")) |
| 623 | (inits (intern obj label))) |
| 624 | (set-asm-constants! asm (vhash-cons obj label (asm-constants asm))) |
| 625 | (set-asm-inits! asm (append-reverse inits (asm-inits asm))) |
| 626 | label)))) |
| 627 | |
| 628 | (define (intern-non-immediate asm obj) |
| 629 | "Intern a non-immediate into the constant table, and return its |
| 630 | label." |
| 631 | (when (immediate? obj) |
| 632 | (error "expected a non-immediate" obj)) |
| 633 | (intern-constant asm obj)) |
| 634 | |
| 635 | (define (intern-cache-cell asm scope key) |
| 636 | "Intern a cache cell into the constant table, and return its label. |
| 637 | If there is already a cache cell with the given scope and key, it is |
| 638 | returned instead." |
| 639 | (intern-constant asm (make-cache-cell scope key))) |
| 640 | |
| 641 | ;; Return the label of the cell that holds the module for a scope. |
| 642 | (define (intern-module-cache-cell asm scope) |
| 643 | "Intern a cache cell for a module, and return its label." |
| 644 | (intern-cache-cell asm scope #t)) |
| 645 | |
| 646 | |
| 647 | \f |
| 648 | |
| 649 | ;;; |
| 650 | ;;; Macro assemblers bridge the gap between primitive instructions and |
| 651 | ;;; some higher-level operations. |
| 652 | ;;; |
| 653 | |
| 654 | (define-syntax define-macro-assembler |
| 655 | (lambda (x) |
| 656 | (syntax-case x () |
| 657 | ((_ (name arg ...) body body* ...) |
| 658 | (with-syntax ((emit (id-append #'name #'emit- #'name))) |
| 659 | #'(begin |
| 660 | (define emit |
| 661 | (let ((emit (lambda (arg ...) body body* ...))) |
| 662 | (hashq-set! assemblers 'name emit) |
| 663 | emit)) |
| 664 | (export emit))))))) |
| 665 | |
| 666 | (define-macro-assembler (load-constant asm dst obj) |
| 667 | (cond |
| 668 | ((immediate? obj) |
| 669 | (let ((bits (object-address obj))) |
| 670 | (cond |
| 671 | ((and (< dst 256) (zero? (ash bits -16))) |
| 672 | (emit-make-short-immediate asm dst obj)) |
| 673 | ((zero? (ash bits -32)) |
| 674 | (emit-make-long-immediate asm dst obj)) |
| 675 | (else |
| 676 | (emit-make-long-long-immediate asm dst obj))))) |
| 677 | ((statically-allocatable? obj) |
| 678 | (emit-make-non-immediate asm dst (intern-non-immediate asm obj))) |
| 679 | (else |
| 680 | (emit-static-ref asm dst (intern-non-immediate asm obj))))) |
| 681 | |
| 682 | (define-macro-assembler (load-static-procedure asm dst label) |
| 683 | (let ((loc (intern-constant asm (make-static-procedure label)))) |
| 684 | (emit-make-non-immediate asm dst loc))) |
| 685 | |
| 686 | (define-syntax-rule (define-tc7-macro-assembler name tc7) |
| 687 | (define-macro-assembler (name asm slot invert? label) |
| 688 | (emit-br-if-tc7 asm slot invert? tc7 label))) |
| 689 | |
| 690 | ;; Keep in sync with tags.h. Part of Guile's ABI. Currently unused |
| 691 | ;; macro assemblers are commented out. See also |
| 692 | ;; *branching-primcall-arities* in (language cps primitives), the set of |
| 693 | ;; macro-instructions in assembly.scm, and |
| 694 | ;; disassembler.scm:code-annotation. |
| 695 | ;; |
| 696 | ;; FIXME: Define all tc7 values in Scheme in one place, derived from |
| 697 | ;; tags.h. |
| 698 | (define-tc7-macro-assembler br-if-symbol 5) |
| 699 | (define-tc7-macro-assembler br-if-variable 7) |
| 700 | (define-tc7-macro-assembler br-if-vector 13) |
| 701 | ;(define-tc7-macro-assembler br-if-weak-vector 13) |
| 702 | (define-tc7-macro-assembler br-if-string 21) |
| 703 | ;(define-tc7-macro-assembler br-if-heap-number 23) |
| 704 | ;(define-tc7-macro-assembler br-if-stringbuf 39) |
| 705 | (define-tc7-macro-assembler br-if-bytevector 77) |
| 706 | ;(define-tc7-macro-assembler br-if-pointer 31) |
| 707 | ;(define-tc7-macro-assembler br-if-hashtable 29) |
| 708 | ;(define-tc7-macro-assembler br-if-fluid 37) |
| 709 | ;(define-tc7-macro-assembler br-if-dynamic-state 45) |
| 710 | ;(define-tc7-macro-assembler br-if-frame 47) |
| 711 | ;(define-tc7-macro-assembler br-if-vm 55) |
| 712 | ;(define-tc7-macro-assembler br-if-vm-cont 71) |
| 713 | ;(define-tc7-macro-assembler br-if-rtl-program 69) |
| 714 | ;(define-tc7-macro-assembler br-if-weak-set 85) |
| 715 | ;(define-tc7-macro-assembler br-if-weak-table 87) |
| 716 | ;(define-tc7-macro-assembler br-if-array 93) |
| 717 | (define-tc7-macro-assembler br-if-bitvector 95) |
| 718 | ;(define-tc7-macro-assembler br-if-port 125) |
| 719 | ;(define-tc7-macro-assembler br-if-smob 127) |
| 720 | |
| 721 | (define-macro-assembler (begin-program asm label properties) |
| 722 | (emit-label asm label) |
| 723 | (let ((meta (make-meta label properties (asm-start asm)))) |
| 724 | (set-asm-meta! asm (cons meta (asm-meta asm))))) |
| 725 | |
| 726 | (define-macro-assembler (end-program asm) |
| 727 | (let ((meta (car (asm-meta asm)))) |
| 728 | (set-meta-high-pc! meta (asm-start asm)) |
| 729 | (set-meta-arities! meta (reverse (meta-arities meta))))) |
| 730 | |
| 731 | (define-macro-assembler (begin-standard-arity asm req nlocals alternate) |
| 732 | (emit-begin-opt-arity asm req '() #f nlocals alternate)) |
| 733 | |
| 734 | (define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate) |
| 735 | (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate)) |
| 736 | |
| 737 | (define-macro-assembler (begin-kw-arity asm req opt rest kw-indices |
| 738 | allow-other-keys? nlocals alternate) |
| 739 | (assert-match req ((? symbol?) ...) "list of symbols") |
| 740 | (assert-match opt ((? symbol?) ...) "list of symbols") |
| 741 | (assert-match rest (or #f (? symbol?)) "#f or symbol") |
| 742 | (assert-match kw-indices (((? keyword?) . (? integer?)) ...) |
| 743 | "alist of keyword -> integer") |
| 744 | (assert-match allow-other-keys? (? boolean?) "boolean") |
| 745 | (assert-match nlocals (? integer?) "integer") |
| 746 | (assert-match alternate (or #f (? exact-integer?) (? symbol?)) "#f or symbol") |
| 747 | (let* ((meta (car (asm-meta asm))) |
| 748 | (arity (make-arity req opt rest kw-indices allow-other-keys? |
| 749 | (asm-start asm) #f '())) |
| 750 | ;; The procedure itself is in slot 0, in the standard calling |
| 751 | ;; convention. For procedure prologues, nreq includes the |
| 752 | ;; procedure, so here we add 1. |
| 753 | (nreq (1+ (length req))) |
| 754 | (nopt (length opt)) |
| 755 | (rest? (->bool rest))) |
| 756 | (set-meta-arities! meta (cons arity (meta-arities meta))) |
| 757 | (cond |
| 758 | ((or allow-other-keys? (pair? kw-indices)) |
| 759 | (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys? |
| 760 | nlocals alternate)) |
| 761 | ((or rest? (pair? opt)) |
| 762 | (emit-opt-prelude asm nreq nopt rest? nlocals alternate)) |
| 763 | (else |
| 764 | (emit-standard-prelude asm nreq nlocals alternate))))) |
| 765 | |
| 766 | (define-macro-assembler (end-arity asm) |
| 767 | (let ((arity (car (meta-arities (car (asm-meta asm)))))) |
| 768 | (set-arity-definitions! arity (reverse (arity-definitions arity))) |
| 769 | (set-arity-high-pc! arity (asm-start asm)))) |
| 770 | |
| 771 | (define-macro-assembler (standard-prelude asm nreq nlocals alternate) |
| 772 | (cond |
| 773 | (alternate |
| 774 | (emit-br-if-nargs-ne asm nreq alternate) |
| 775 | (emit-alloc-frame asm nlocals)) |
| 776 | ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12))) |
| 777 | (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq))) |
| 778 | (else |
| 779 | (emit-assert-nargs-ee asm nreq) |
| 780 | (emit-alloc-frame asm nlocals)))) |
| 781 | |
| 782 | (define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate) |
| 783 | (if alternate |
| 784 | (emit-br-if-nargs-lt asm nreq alternate) |
| 785 | (emit-assert-nargs-ge asm nreq)) |
| 786 | (cond |
| 787 | (rest? |
| 788 | (emit-bind-rest asm (+ nreq nopt))) |
| 789 | (alternate |
| 790 | (emit-br-if-nargs-gt asm (+ nreq nopt) alternate)) |
| 791 | (else |
| 792 | (emit-assert-nargs-le asm (+ nreq nopt)))) |
| 793 | (emit-alloc-frame asm nlocals)) |
| 794 | |
| 795 | (define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices |
| 796 | allow-other-keys? nlocals alternate) |
| 797 | (if alternate |
| 798 | (begin |
| 799 | (emit-br-if-nargs-lt asm nreq alternate) |
| 800 | (unless rest? |
| 801 | (emit-br-if-npos-gt asm nreq (+ nreq nopt) alternate))) |
| 802 | (emit-assert-nargs-ge asm nreq)) |
| 803 | (let ((ntotal (fold (lambda (kw ntotal) |
| 804 | (match kw |
| 805 | (((? keyword?) . idx) |
| 806 | (max (1+ idx) ntotal)))) |
| 807 | (+ nreq nopt) kw-indices))) |
| 808 | ;; FIXME: port 581f410f |
| 809 | (emit-bind-kwargs asm nreq |
| 810 | (pack-flags allow-other-keys? rest?) |
| 811 | (+ nreq nopt) |
| 812 | ntotal |
| 813 | (intern-constant asm kw-indices)) |
| 814 | (emit-alloc-frame asm nlocals))) |
| 815 | |
| 816 | (define-macro-assembler (label asm sym) |
| 817 | (hashq-set! (asm-labels asm) sym (asm-start asm))) |
| 818 | |
| 819 | (define-macro-assembler (source asm source) |
| 820 | (set-asm-sources! asm (acons (asm-start asm) source (asm-sources asm)))) |
| 821 | |
| 822 | (define-macro-assembler (definition asm name slot) |
| 823 | (let* ((arity (car (meta-arities (car (asm-meta asm))))) |
| 824 | (def (vector name |
| 825 | slot |
| 826 | (* (- (asm-start asm) (arity-low-pc arity)) 4)))) |
| 827 | (set-arity-definitions! arity (cons def (arity-definitions arity))))) |
| 828 | |
| 829 | (define-macro-assembler (cache-current-module! asm module scope) |
| 830 | (let ((mod-label (intern-module-cache-cell asm scope))) |
| 831 | (emit-static-set! asm module mod-label 0))) |
| 832 | |
| 833 | (define-macro-assembler (cached-toplevel-box asm dst scope sym bound?) |
| 834 | (let ((sym-label (intern-non-immediate asm sym)) |
| 835 | (mod-label (intern-module-cache-cell asm scope)) |
| 836 | (cell-label (intern-cache-cell asm scope sym))) |
| 837 | (emit-toplevel-box asm dst cell-label mod-label sym-label bound?))) |
| 838 | |
| 839 | (define-macro-assembler (cached-module-box asm dst module-name sym public? bound?) |
| 840 | (let* ((sym-label (intern-non-immediate asm sym)) |
| 841 | (key (cons public? module-name)) |
| 842 | (mod-name-label (intern-constant asm key)) |
| 843 | (cell-label (intern-cache-cell asm key sym))) |
| 844 | (emit-module-box asm dst cell-label mod-name-label sym-label bound?))) |
| 845 | |
| 846 | (define-macro-assembler (dead-slot-map asm proc-slot dead-slot-map) |
| 847 | (unless (zero? dead-slot-map) |
| 848 | (set-asm-dead-slot-maps! asm |
| 849 | (cons |
| 850 | (cons* (asm-start asm) proc-slot dead-slot-map) |
| 851 | (asm-dead-slot-maps asm))))) |
| 852 | |
| 853 | \f |
| 854 | |
| 855 | ;;; |
| 856 | ;;; Helper for linking objects. |
| 857 | ;;; |
| 858 | |
| 859 | (define (make-object asm name bv relocs labels . kwargs) |
| 860 | "Make a linker object. This helper handles interning the name in the |
| 861 | shstrtab, assigning the size, allocating a fresh index, and defining a |
| 862 | corresponding linker symbol for the start of the section." |
| 863 | (let ((name-idx (intern-section-name! asm (symbol->string name))) |
| 864 | (index (asm-next-section-number asm))) |
| 865 | (set-asm-next-section-number! asm (1+ index)) |
| 866 | (make-linker-object (apply make-elf-section |
| 867 | #:index index |
| 868 | #:name name-idx |
| 869 | #:size (bytevector-length bv) |
| 870 | kwargs) |
| 871 | bv relocs |
| 872 | (cons (make-linker-symbol name 0) labels)))) |
| 873 | |
| 874 | |
| 875 | \f |
| 876 | |
| 877 | ;;; |
| 878 | ;;; Linking the constant table. This code is somewhat intertwingled |
| 879 | ;;; with the intern-constant code above, as that procedure also |
| 880 | ;;; residualizes instructions to initialize constants at load time. |
| 881 | ;;; |
| 882 | |
| 883 | (define (write-immediate asm buf pos x) |
| 884 | (let ((val (object-address x)) |
| 885 | (endianness (asm-endianness asm))) |
| 886 | (case (asm-word-size asm) |
| 887 | ((4) (bytevector-u32-set! buf pos val endianness)) |
| 888 | ((8) (bytevector-u64-set! buf pos val endianness)) |
| 889 | (else (error "bad word size" asm))))) |
| 890 | |
| 891 | (define (emit-init-constants asm) |
| 892 | "If there is writable data that needs initialization at runtime, emit |
| 893 | a procedure to do that and return its label. Otherwise return |
| 894 | @code{#f}." |
| 895 | (let ((inits (asm-inits asm))) |
| 896 | (and (not (null? inits)) |
| 897 | (let ((label (gensym "init-constants"))) |
| 898 | (emit-text asm |
| 899 | `((begin-program ,label ()) |
| 900 | (assert-nargs-ee/locals 1 1) |
| 901 | ,@(reverse inits) |
| 902 | (load-constant 1 ,*unspecified*) |
| 903 | (return 1) |
| 904 | (end-program))) |
| 905 | label)))) |
| 906 | |
| 907 | (define (link-data asm data name) |
| 908 | "Link the static data for a program into the @var{name} section (which |
| 909 | should be .data or .rodata), and return the resulting linker object. |
| 910 | @var{data} should be a vhash mapping objects to labels." |
| 911 | (define (align address alignment) |
| 912 | (+ address |
| 913 | (modulo (- alignment (modulo address alignment)) alignment))) |
| 914 | |
| 915 | (define tc7-vector 13) |
| 916 | (define stringbuf-shared-flag #x100) |
| 917 | (define stringbuf-wide-flag #x400) |
| 918 | (define tc7-stringbuf 39) |
| 919 | (define tc7-narrow-stringbuf |
| 920 | (+ tc7-stringbuf stringbuf-shared-flag)) |
| 921 | (define tc7-wide-stringbuf |
| 922 | (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag)) |
| 923 | (define tc7-ro-string (+ 21 #x200)) |
| 924 | (define tc7-program 69) |
| 925 | (define tc7-bytevector 77) |
| 926 | (define tc7-bitvector 95) |
| 927 | |
| 928 | (let ((word-size (asm-word-size asm)) |
| 929 | (endianness (asm-endianness asm))) |
| 930 | (define (byte-length x) |
| 931 | (cond |
| 932 | ((stringbuf? x) |
| 933 | (let ((x (stringbuf-string x))) |
| 934 | (+ (* 2 word-size) |
| 935 | (case (string-bytes-per-char x) |
| 936 | ((1) (1+ (string-length x))) |
| 937 | ((4) (* (1+ (string-length x)) 4)) |
| 938 | (else (error "bad string bytes per char" x)))))) |
| 939 | ((static-procedure? x) |
| 940 | (* 2 word-size)) |
| 941 | ((string? x) |
| 942 | (* 4 word-size)) |
| 943 | ((pair? x) |
| 944 | (* 2 word-size)) |
| 945 | ((simple-vector? x) |
| 946 | (* (1+ (vector-length x)) word-size)) |
| 947 | ((simple-uniform-vector? x) |
| 948 | (* 4 word-size)) |
| 949 | ((uniform-vector-backing-store? x) |
| 950 | (bytevector-length (uniform-vector-backing-store-bytes x))) |
| 951 | (else |
| 952 | word-size))) |
| 953 | |
| 954 | (define (write-constant-reference buf pos x) |
| 955 | ;; The asm-inits will fix up any reference to a non-immediate. |
| 956 | (write-immediate asm buf pos (if (immediate? x) x #f))) |
| 957 | |
| 958 | (define (write buf pos obj) |
| 959 | (cond |
| 960 | ((stringbuf? obj) |
| 961 | (let* ((x (stringbuf-string obj)) |
| 962 | (len (string-length x)) |
| 963 | (tag (if (= (string-bytes-per-char x) 1) |
| 964 | tc7-narrow-stringbuf |
| 965 | tc7-wide-stringbuf))) |
| 966 | (case word-size |
| 967 | ((4) |
| 968 | (bytevector-u32-set! buf pos tag endianness) |
| 969 | (bytevector-u32-set! buf (+ pos 4) len endianness)) |
| 970 | ((8) |
| 971 | (bytevector-u64-set! buf pos tag endianness) |
| 972 | (bytevector-u64-set! buf (+ pos 8) len endianness)) |
| 973 | (else |
| 974 | (error "bad word size" asm))) |
| 975 | (let ((pos (+ pos (* word-size 2)))) |
| 976 | (case (string-bytes-per-char x) |
| 977 | ((1) |
| 978 | (let lp ((i 0)) |
| 979 | (if (< i len) |
| 980 | (let ((u8 (char->integer (string-ref x i)))) |
| 981 | (bytevector-u8-set! buf (+ pos i) u8) |
| 982 | (lp (1+ i))) |
| 983 | (bytevector-u8-set! buf (+ pos i) 0)))) |
| 984 | ((4) |
| 985 | (let lp ((i 0)) |
| 986 | (if (< i len) |
| 987 | (let ((u32 (char->integer (string-ref x i)))) |
| 988 | (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness) |
| 989 | (lp (1+ i))) |
| 990 | (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness)))) |
| 991 | (else (error "bad string bytes per char" x)))))) |
| 992 | |
| 993 | ((static-procedure? obj) |
| 994 | (case word-size |
| 995 | ((4) |
| 996 | (bytevector-u32-set! buf pos tc7-program endianness) |
| 997 | (bytevector-u32-set! buf (+ pos 4) 0 endianness)) |
| 998 | ((8) |
| 999 | (bytevector-u64-set! buf pos tc7-program endianness) |
| 1000 | (bytevector-u64-set! buf (+ pos 8) 0 endianness)) |
| 1001 | (else (error "bad word size")))) |
| 1002 | |
| 1003 | ((cache-cell? obj) |
| 1004 | (write-immediate asm buf pos #f)) |
| 1005 | |
| 1006 | ((string? obj) |
| 1007 | (let ((tag (logior tc7-ro-string (ash (string-length obj) 8)))) |
| 1008 | (case word-size |
| 1009 | ((4) |
| 1010 | (bytevector-u32-set! buf pos tc7-ro-string endianness) |
| 1011 | (write-immediate asm buf (+ pos 4) #f) ; stringbuf |
| 1012 | (bytevector-u32-set! buf (+ pos 8) 0 endianness) |
| 1013 | (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness)) |
| 1014 | ((8) |
| 1015 | (bytevector-u64-set! buf pos tc7-ro-string endianness) |
| 1016 | (write-immediate asm buf (+ pos 8) #f) ; stringbuf |
| 1017 | (bytevector-u64-set! buf (+ pos 16) 0 endianness) |
| 1018 | (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness)) |
| 1019 | (else (error "bad word size"))))) |
| 1020 | |
| 1021 | ((pair? obj) |
| 1022 | (write-constant-reference buf pos (car obj)) |
| 1023 | (write-constant-reference buf (+ pos word-size) (cdr obj))) |
| 1024 | |
| 1025 | ((simple-vector? obj) |
| 1026 | (let* ((len (vector-length obj)) |
| 1027 | (tag (logior tc7-vector (ash len 8)))) |
| 1028 | (case word-size |
| 1029 | ((4) (bytevector-u32-set! buf pos tag endianness)) |
| 1030 | ((8) (bytevector-u64-set! buf pos tag endianness)) |
| 1031 | (else (error "bad word size"))) |
| 1032 | (let lp ((i 0)) |
| 1033 | (when (< i (vector-length obj)) |
| 1034 | (let ((pos (+ pos word-size (* i word-size))) |
| 1035 | (elt (vector-ref obj i))) |
| 1036 | (write-constant-reference buf pos elt) |
| 1037 | (lp (1+ i))))))) |
| 1038 | |
| 1039 | ((symbol? obj) |
| 1040 | (write-immediate asm buf pos #f)) |
| 1041 | |
| 1042 | ((keyword? obj) |
| 1043 | (write-immediate asm buf pos #f)) |
| 1044 | |
| 1045 | ((number? obj) |
| 1046 | (write-immediate asm buf pos #f)) |
| 1047 | |
| 1048 | ((simple-uniform-vector? obj) |
| 1049 | (let ((tag (if (bitvector? obj) |
| 1050 | tc7-bitvector |
| 1051 | (let ((type-code (array-type-code obj))) |
| 1052 | (logior tc7-bytevector (ash type-code 7)))))) |
| 1053 | (case word-size |
| 1054 | ((4) |
| 1055 | (bytevector-u32-set! buf pos tag endianness) |
| 1056 | (bytevector-u32-set! buf (+ pos 4) |
| 1057 | (if (bitvector? obj) |
| 1058 | (bitvector-length obj) |
| 1059 | (bytevector-length obj)) |
| 1060 | endianness) ; length |
| 1061 | (bytevector-u32-set! buf (+ pos 8) 0 endianness) ; pointer |
| 1062 | (write-immediate asm buf (+ pos 12) #f)) ; owner |
| 1063 | ((8) |
| 1064 | (bytevector-u64-set! buf pos tag endianness) |
| 1065 | (bytevector-u64-set! buf (+ pos 8) |
| 1066 | (if (bitvector? obj) |
| 1067 | (bitvector-length obj) |
| 1068 | (bytevector-length obj)) |
| 1069 | endianness) ; length |
| 1070 | (bytevector-u64-set! buf (+ pos 16) 0 endianness) ; pointer |
| 1071 | (write-immediate asm buf (+ pos 24) #f)) ; owner |
| 1072 | (else (error "bad word size"))))) |
| 1073 | |
| 1074 | ((uniform-vector-backing-store? obj) |
| 1075 | (let ((bv (uniform-vector-backing-store-bytes obj))) |
| 1076 | (bytevector-copy! bv 0 buf pos (bytevector-length bv)) |
| 1077 | (unless (or (= 1 (uniform-vector-backing-store-element-size obj)) |
| 1078 | (eq? endianness (native-endianness))) |
| 1079 | ;; Need to swap units of element-size bytes |
| 1080 | (error "FIXME: Implement byte order swap")))) |
| 1081 | |
| 1082 | (else |
| 1083 | (error "unrecognized object" obj)))) |
| 1084 | |
| 1085 | (cond |
| 1086 | ((vlist-null? data) #f) |
| 1087 | (else |
| 1088 | (let* ((byte-len (vhash-fold (lambda (k v len) |
| 1089 | (+ (byte-length k) (align len 8))) |
| 1090 | 0 data)) |
| 1091 | (buf (make-bytevector byte-len 0))) |
| 1092 | (let lp ((i 0) (pos 0) (symbols '())) |
| 1093 | (if (< i (vlist-length data)) |
| 1094 | (let* ((pair (vlist-ref data i)) |
| 1095 | (obj (car pair)) |
| 1096 | (obj-label (cdr pair))) |
| 1097 | (write buf pos obj) |
| 1098 | (lp (1+ i) |
| 1099 | (align (+ (byte-length obj) pos) 8) |
| 1100 | (cons (make-linker-symbol obj-label pos) symbols))) |
| 1101 | (make-object asm name buf '() symbols |
| 1102 | #:flags (match name |
| 1103 | ('.data (logior SHF_ALLOC SHF_WRITE)) |
| 1104 | ('.rodata SHF_ALLOC)))))))))) |
| 1105 | |
| 1106 | (define (link-constants asm) |
| 1107 | "Link sections to hold constants needed by the program text emitted |
| 1108 | using @var{asm}. |
| 1109 | |
| 1110 | Returns three values: an object for the .rodata section, an object for |
| 1111 | the .data section, and a label for an initialization procedure. Any of |
| 1112 | these may be @code{#f}." |
| 1113 | (define (shareable? x) |
| 1114 | (cond |
| 1115 | ((stringbuf? x) #t) |
| 1116 | ((pair? x) |
| 1117 | (and (immediate? (car x)) (immediate? (cdr x)))) |
| 1118 | ((simple-vector? x) |
| 1119 | (let lp ((i 0)) |
| 1120 | (or (= i (vector-length x)) |
| 1121 | (and (immediate? (vector-ref x i)) |
| 1122 | (lp (1+ i)))))) |
| 1123 | ((uniform-vector-backing-store? x) #t) |
| 1124 | (else #f))) |
| 1125 | (let* ((constants (asm-constants asm)) |
| 1126 | (len (vlist-length constants))) |
| 1127 | (let lp ((i 0) |
| 1128 | (ro vlist-null) |
| 1129 | (rw vlist-null)) |
| 1130 | (if (= i len) |
| 1131 | (values (link-data asm ro '.rodata) |
| 1132 | (link-data asm rw '.data) |
| 1133 | (emit-init-constants asm)) |
| 1134 | (let ((pair (vlist-ref constants i))) |
| 1135 | (if (shareable? (car pair)) |
| 1136 | (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw) |
| 1137 | (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw)))))))) |
| 1138 | |
| 1139 | \f |
| 1140 | |
| 1141 | ;;; |
| 1142 | ;;; Linking program text. |
| 1143 | ;;; |
| 1144 | |
| 1145 | (define (process-relocs buf relocs labels) |
| 1146 | "Patch up internal x8-s24 relocations, and any s32 relocations that |
| 1147 | reference symbols in the text section. Return a list of linker |
| 1148 | relocations for references to symbols defined outside the text section." |
| 1149 | (fold |
| 1150 | (lambda (reloc tail) |
| 1151 | (match reloc |
| 1152 | ((type label base word) |
| 1153 | (let ((abs (hashq-ref labels label)) |
| 1154 | (dst (+ base word))) |
| 1155 | (case type |
| 1156 | ((s32) |
| 1157 | (if abs |
| 1158 | (let ((rel (- abs base))) |
| 1159 | (s32-set! buf dst rel) |
| 1160 | tail) |
| 1161 | (cons (make-linker-reloc 'rel32/4 (* dst 4) word label) |
| 1162 | tail))) |
| 1163 | ((x8-s24) |
| 1164 | (unless abs |
| 1165 | (error "unbound near relocation" reloc)) |
| 1166 | (let ((rel (- abs base)) |
| 1167 | (u32 (u32-ref buf dst))) |
| 1168 | (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel)) |
| 1169 | tail)) |
| 1170 | (else (error "bad relocation kind" reloc))))))) |
| 1171 | '() |
| 1172 | relocs)) |
| 1173 | |
| 1174 | (define (process-labels labels) |
| 1175 | "Define linker symbols for the label-offset map in @var{labels}. |
| 1176 | The offsets are expected to be expressed in words." |
| 1177 | (hash-map->list (lambda (label loc) |
| 1178 | (make-linker-symbol label (* loc 4))) |
| 1179 | labels)) |
| 1180 | |
| 1181 | (define (swap-bytes! buf) |
| 1182 | "Patch up the text buffer @var{buf}, swapping the endianness of each |
| 1183 | 32-bit unit." |
| 1184 | (unless (zero? (modulo (bytevector-length buf) 4)) |
| 1185 | (error "unexpected length")) |
| 1186 | (let ((byte-len (bytevector-length buf))) |
| 1187 | (let lp ((pos 0)) |
| 1188 | (unless (= pos byte-len) |
| 1189 | (bytevector-u32-set! |
| 1190 | buf pos |
| 1191 | (bytevector-u32-ref buf pos (endianness big)) |
| 1192 | (endianness little)) |
| 1193 | (lp (+ pos 4)))))) |
| 1194 | |
| 1195 | (define (link-text-object asm) |
| 1196 | "Link the .rtl-text section, swapping the endianness of the bytes if |
| 1197 | needed." |
| 1198 | (let ((buf (make-u32vector (asm-pos asm)))) |
| 1199 | (let lp ((pos 0) (prev (reverse (asm-prev asm)))) |
| 1200 | (if (null? prev) |
| 1201 | (let ((byte-size (* (asm-idx asm) 4))) |
| 1202 | (bytevector-copy! (asm-cur asm) 0 buf pos byte-size) |
| 1203 | (unless (eq? (asm-endianness asm) (native-endianness)) |
| 1204 | (swap-bytes! buf)) |
| 1205 | (make-object asm '.rtl-text |
| 1206 | buf |
| 1207 | (process-relocs buf (asm-relocs asm) |
| 1208 | (asm-labels asm)) |
| 1209 | (process-labels (asm-labels asm)))) |
| 1210 | (let ((len (* *block-size* 4))) |
| 1211 | (bytevector-copy! (car prev) 0 buf pos len) |
| 1212 | (lp (+ pos len) (cdr prev))))))) |
| 1213 | |
| 1214 | |
| 1215 | \f |
| 1216 | |
| 1217 | ;;; |
| 1218 | ;;; Create the frame maps. These maps are used by GC to identify dead |
| 1219 | ;;; slots in pending call frames, to avoid marking them. We only do |
| 1220 | ;;; this when frame makes a non-tail call, as that is the common case. |
| 1221 | ;;; Only the topmost frame will see a GC at any other point, but we mark |
| 1222 | ;;; top frames conservatively as serializing live slot maps at every |
| 1223 | ;;; instruction would take up too much space in the object file. |
| 1224 | ;;; |
| 1225 | |
| 1226 | ;; The .guile.frame-maps section starts with two packed u32 values: one |
| 1227 | ;; indicating the offset of the first byte of the .rtl-text section, and |
| 1228 | ;; another indicating the relative offset in bytes of the slots data. |
| 1229 | (define frame-maps-prefix-len 8) |
| 1230 | |
| 1231 | ;; Each header is 8 bytes: 4 for the offset from .rtl_text, and 4 for |
| 1232 | ;; the offset of the slot map from the beginning of the |
| 1233 | ;; .guile.frame-maps section. The length of a frame map depends on the |
| 1234 | ;; frame size at the call site, and is not encoded into this section as |
| 1235 | ;; it is available at run-time. |
| 1236 | (define frame-map-header-len 8) |
| 1237 | |
| 1238 | (define (link-frame-maps asm) |
| 1239 | (define (map-byte-length proc-slot) |
| 1240 | (ceiling-quotient (- proc-slot 2) 8)) |
| 1241 | (define (make-frame-maps maps count map-len) |
| 1242 | (let* ((endianness (asm-endianness asm)) |
| 1243 | (header-pos frame-maps-prefix-len) |
| 1244 | (map-pos (+ header-pos (* count frame-map-header-len))) |
| 1245 | (bv (make-bytevector (+ map-pos map-len) 0))) |
| 1246 | (bytevector-u32-set! bv 4 map-pos endianness) |
| 1247 | (let lp ((maps maps) (header-pos header-pos) (map-pos map-pos)) |
| 1248 | (match maps |
| 1249 | (() |
| 1250 | (make-object asm '.guile.frame-maps bv |
| 1251 | (list (make-linker-reloc 'abs32/1 0 0 '.rtl-text)) |
| 1252 | '() #:type SHT_PROGBITS #:flags SHF_ALLOC)) |
| 1253 | (((pos proc-slot . map) . maps) |
| 1254 | (bytevector-u32-set! bv header-pos (* pos 4) endianness) |
| 1255 | (bytevector-u32-set! bv (+ header-pos 4) map-pos endianness) |
| 1256 | (let write-bytes ((map-pos map-pos) |
| 1257 | (map map) |
| 1258 | (byte-length (map-byte-length proc-slot))) |
| 1259 | (if (zero? byte-length) |
| 1260 | (lp maps (+ header-pos frame-map-header-len) map-pos) |
| 1261 | (begin |
| 1262 | (bytevector-u8-set! bv map-pos (logand map #xff)) |
| 1263 | (write-bytes (1+ map-pos) (ash map -8) |
| 1264 | (1- byte-length)))))))))) |
| 1265 | (match (asm-dead-slot-maps asm) |
| 1266 | (() #f) |
| 1267 | (in |
| 1268 | (let lp ((in in) (out '()) (count 0) (map-len 0)) |
| 1269 | (match in |
| 1270 | (() (make-frame-maps out count map-len)) |
| 1271 | (((and head (pos proc-slot . map)) . in) |
| 1272 | (lp in (cons head out) |
| 1273 | (1+ count) |
| 1274 | (+ (map-byte-length proc-slot) map-len)))))))) |
| 1275 | |
| 1276 | \f |
| 1277 | |
| 1278 | ;;; |
| 1279 | ;;; Linking other sections of the ELF file, like the dynamic segment, |
| 1280 | ;;; the symbol table, etc. |
| 1281 | ;;; |
| 1282 | |
| 1283 | ;; FIXME: Define these somewhere central, shared with C. |
| 1284 | (define *bytecode-major-version* #x0202) |
| 1285 | (define *bytecode-minor-version* 5) |
| 1286 | |
| 1287 | (define (link-dynamic-section asm text rw rw-init frame-maps) |
| 1288 | "Link the dynamic section for an ELF image with bytecode @var{text}, |
| 1289 | given the writable data section @var{rw} needing fixup from the |
| 1290 | procedure with label @var{rw-init}. @var{rw-init} may be false. If |
| 1291 | @var{rw} is true, it will be added to the GC roots at runtime." |
| 1292 | (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type) |
| 1293 | (let* ((endianness (asm-endianness asm)) |
| 1294 | (words 6) |
| 1295 | (words (if rw (+ words 4) words)) |
| 1296 | (words (if rw-init (+ words 2) words)) |
| 1297 | (words (if frame-maps (+ words 2) words)) |
| 1298 | (bv (make-bytevector (* word-size words) 0)) |
| 1299 | (set-uword! |
| 1300 | (lambda (i uword) |
| 1301 | (%set-uword! bv (* i word-size) uword endianness))) |
| 1302 | (relocs '()) |
| 1303 | (set-label! |
| 1304 | (lambda (i label) |
| 1305 | (set! relocs (cons (make-linker-reloc 'reloc-type |
| 1306 | (* i word-size) 0 label) |
| 1307 | relocs)) |
| 1308 | (%set-uword! bv (* i word-size) 0 endianness)))) |
| 1309 | (set-uword! 0 DT_GUILE_VM_VERSION) |
| 1310 | (set-uword! 1 (logior (ash *bytecode-major-version* 16) |
| 1311 | *bytecode-minor-version*)) |
| 1312 | (set-uword! 2 DT_GUILE_ENTRY) |
| 1313 | (set-label! 3 '.rtl-text) |
| 1314 | (when rw |
| 1315 | ;; Add roots to GC. |
| 1316 | (set-uword! 4 DT_GUILE_GC_ROOT) |
| 1317 | (set-label! 5 '.data) |
| 1318 | (set-uword! 6 DT_GUILE_GC_ROOT_SZ) |
| 1319 | (set-uword! 7 (bytevector-length (linker-object-bv rw))) |
| 1320 | (when rw-init |
| 1321 | (set-uword! 8 DT_INIT) ; constants |
| 1322 | (set-label! 9 rw-init))) |
| 1323 | (when frame-maps |
| 1324 | (set-uword! (- words 4) DT_GUILE_FRAME_MAPS) |
| 1325 | (set-label! (- words 3) '.guile.frame-maps)) |
| 1326 | (set-uword! (- words 2) DT_NULL) |
| 1327 | (set-uword! (- words 1) 0) |
| 1328 | (make-object asm '.dynamic bv relocs '() |
| 1329 | #:type SHT_DYNAMIC #:flags SHF_ALLOC))) |
| 1330 | (case (asm-word-size asm) |
| 1331 | ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1)) |
| 1332 | ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1)) |
| 1333 | (else (error "bad word size" asm)))) |
| 1334 | |
| 1335 | (define (link-shstrtab asm) |
| 1336 | "Link the string table for the section headers." |
| 1337 | (intern-section-name! asm ".shstrtab") |
| 1338 | (make-object asm '.shstrtab |
| 1339 | (link-string-table! (asm-shstrtab asm)) |
| 1340 | '() '() |
| 1341 | #:type SHT_STRTAB #:flags 0)) |
| 1342 | |
| 1343 | (define (link-symtab text-section asm) |
| 1344 | (let* ((endianness (asm-endianness asm)) |
| 1345 | (word-size (asm-word-size asm)) |
| 1346 | (size (elf-symbol-len word-size)) |
| 1347 | (meta (reverse (asm-meta asm))) |
| 1348 | (n (length meta)) |
| 1349 | (strtab (make-string-table)) |
| 1350 | (bv (make-bytevector (* n size) 0))) |
| 1351 | (define (intern-string! name) |
| 1352 | (string-table-intern! strtab (if name (symbol->string name) ""))) |
| 1353 | (for-each |
| 1354 | (lambda (meta n) |
| 1355 | (let ((name (intern-string! (meta-name meta)))) |
| 1356 | (write-elf-symbol bv (* n size) endianness word-size |
| 1357 | (make-elf-symbol |
| 1358 | #:name name |
| 1359 | ;; Symbol value and size are measured in |
| 1360 | ;; bytes, not u32s. |
| 1361 | #:value (* 4 (meta-low-pc meta)) |
| 1362 | #:size (* 4 (- (meta-high-pc meta) |
| 1363 | (meta-low-pc meta))) |
| 1364 | #:type STT_FUNC |
| 1365 | #:visibility STV_HIDDEN |
| 1366 | #:shndx (elf-section-index text-section))))) |
| 1367 | meta (iota n)) |
| 1368 | (let ((strtab (make-object asm '.strtab |
| 1369 | (link-string-table! strtab) |
| 1370 | '() '() |
| 1371 | #:type SHT_STRTAB #:flags 0))) |
| 1372 | (values (make-object asm '.symtab |
| 1373 | bv |
| 1374 | '() '() |
| 1375 | #:type SHT_SYMTAB #:flags 0 #:entsize size |
| 1376 | #:link (elf-section-index |
| 1377 | (linker-object-section strtab))) |
| 1378 | strtab)))) |
| 1379 | |
| 1380 | ;;; The .guile.arities section describes the arities that a function can |
| 1381 | ;;; have. It is in two parts: a sorted array of headers describing |
| 1382 | ;;; basic arities, and an array of links out to a string table (and in |
| 1383 | ;;; the case of keyword arguments, to the data section) for argument |
| 1384 | ;;; names. The whole thing is prefixed by a uint32 indicating the |
| 1385 | ;;; offset of the end of the headers array. |
| 1386 | ;;; |
| 1387 | ;;; The arity headers array is a packed array of structures of the form: |
| 1388 | ;;; |
| 1389 | ;;; struct arity_header { |
| 1390 | ;;; uint32_t low_pc; |
| 1391 | ;;; uint32_t high_pc; |
| 1392 | ;;; uint32_t offset; |
| 1393 | ;;; uint32_t flags; |
| 1394 | ;;; uint32_t nreq; |
| 1395 | ;;; uint32_t nopt; |
| 1396 | ;;; uint32_t nlocals; |
| 1397 | ;;; } |
| 1398 | ;;; |
| 1399 | ;;; All of the offsets and addresses are 32 bits. We can expand in the |
| 1400 | ;;; future to use 64-bit offsets if appropriate, but there are other |
| 1401 | ;;; aspects of bytecode that constrain us to a total image that fits in |
| 1402 | ;;; 32 bits, so for the moment we'll simplify the problem space. |
| 1403 | ;;; |
| 1404 | ;;; The following flags values are defined: |
| 1405 | ;;; |
| 1406 | ;;; #x1: has-rest? |
| 1407 | ;;; #x2: allow-other-keys? |
| 1408 | ;;; #x4: has-keyword-args? |
| 1409 | ;;; #x8: is-case-lambda? |
| 1410 | ;;; #x10: is-in-case-lambda? |
| 1411 | ;;; |
| 1412 | ;;; Functions with a single arity specify their number of required and |
| 1413 | ;;; optional arguments in nreq and nopt, and do not have the |
| 1414 | ;;; is-case-lambda? flag set. Their "offset" member links to an array |
| 1415 | ;;; of pointers into the associated .guile.arities.strtab string table, |
| 1416 | ;;; identifying the argument names. This offset is relative to the |
| 1417 | ;;; start of the .guile.arities section. |
| 1418 | ;;; |
| 1419 | ;;; If the arity has keyword arguments -- if has-keyword-args? is set in |
| 1420 | ;;; the flags -- the first uint32 pointed to by offset encodes a link to |
| 1421 | ;;; the "keyword indices" literal, in the data section. Then follow the |
| 1422 | ;;; names for all locals, in order, as uleb128 values. The required |
| 1423 | ;;; arguments will be the first locals, followed by the optionals, |
| 1424 | ;;; followed by the rest argument if if has-rest? is set. The names |
| 1425 | ;;; point into the associated string table section. |
| 1426 | ;;; |
| 1427 | ;;; Functions with no arities have no arities information present in the |
| 1428 | ;;; .guile.arities section. |
| 1429 | ;;; |
| 1430 | ;;; Functions with multiple arities are preceded by a header with |
| 1431 | ;;; is-case-lambda? set. All other fields are 0, except low-pc and |
| 1432 | ;;; high-pc which should be the bounds of the whole function. Headers |
| 1433 | ;;; for the individual arities follow, with the is-in-case-lambda? flag |
| 1434 | ;;; set. In this way the whole headers array is sorted in increasing |
| 1435 | ;;; low-pc order, and case-lambda clauses are contained within the |
| 1436 | ;;; [low-pc, high-pc] of the case-lambda header. |
| 1437 | |
| 1438 | ;; Length of the prefix to the arities section, in bytes. |
| 1439 | (define arities-prefix-len 4) |
| 1440 | |
| 1441 | ;; Length of an arity header, in bytes. |
| 1442 | (define arity-header-len (* 7 4)) |
| 1443 | |
| 1444 | ;; Some helpers. |
| 1445 | (define (put-uleb128 port val) |
| 1446 | (let lp ((val val)) |
| 1447 | (let ((next (ash val -7))) |
| 1448 | (if (zero? next) |
| 1449 | (put-u8 port val) |
| 1450 | (begin |
| 1451 | (put-u8 port (logior #x80 (logand val #x7f))) |
| 1452 | (lp next)))))) |
| 1453 | |
| 1454 | (define (put-sleb128 port val) |
| 1455 | (let lp ((val val)) |
| 1456 | (if (<= 0 (+ val 64) 127) |
| 1457 | (put-u8 port (logand val #x7f)) |
| 1458 | (begin |
| 1459 | (put-u8 port (logior #x80 (logand val #x7f))) |
| 1460 | (lp (ash val -7)))))) |
| 1461 | |
| 1462 | (define (port-position port) |
| 1463 | (seek port 0 SEEK_CUR)) |
| 1464 | |
| 1465 | (define-syntax-rule (pack-arity-flags has-rest? allow-other-keys? |
| 1466 | has-keyword-args? is-case-lambda? |
| 1467 | is-in-case-lambda?) |
| 1468 | (logior (if has-rest? (ash 1 0) 0) |
| 1469 | (if allow-other-keys? (ash 1 1) 0) |
| 1470 | (if has-keyword-args? (ash 1 2) 0) |
| 1471 | (if is-case-lambda? (ash 1 3) 0) |
| 1472 | (if is-in-case-lambda? (ash 1 4) 0))) |
| 1473 | |
| 1474 | (define (write-arities asm metas headers names-port strtab) |
| 1475 | (define (write-header pos low-pc high-pc offset flags nreq nopt nlocals) |
| 1476 | (unless (<= (+ nreq nopt) nlocals) |
| 1477 | (error "forgot to emit definition instructions?")) |
| 1478 | (bytevector-u32-set! headers pos (* low-pc 4) (asm-endianness asm)) |
| 1479 | (bytevector-u32-set! headers (+ pos 4) (* high-pc 4) (asm-endianness asm)) |
| 1480 | (bytevector-u32-set! headers (+ pos 8) offset (asm-endianness asm)) |
| 1481 | (bytevector-u32-set! headers (+ pos 12) flags (asm-endianness asm)) |
| 1482 | (bytevector-u32-set! headers (+ pos 16) nreq (asm-endianness asm)) |
| 1483 | (bytevector-u32-set! headers (+ pos 20) nopt (asm-endianness asm)) |
| 1484 | (bytevector-u32-set! headers (+ pos 24) nlocals (asm-endianness asm))) |
| 1485 | (define (write-kw-indices kw-indices relocs) |
| 1486 | ;; FIXME: Assert that kw-indices is already interned. |
| 1487 | (if (pair? kw-indices) |
| 1488 | (let ((pos (+ (bytevector-length headers) |
| 1489 | (port-position names-port))) |
| 1490 | (label (intern-constant asm kw-indices))) |
| 1491 | (put-bytevector names-port #vu8(0 0 0 0)) |
| 1492 | (cons (make-linker-reloc 'abs32/1 pos 0 label) relocs)) |
| 1493 | relocs)) |
| 1494 | (define (write-arity pos arity in-case-lambda? relocs) |
| 1495 | (write-header pos (arity-low-pc arity) |
| 1496 | (arity-high-pc arity) |
| 1497 | ;; FIXME: Seems silly to add on bytevector-length of |
| 1498 | ;; headers, given the arities-prefix. |
| 1499 | (+ (bytevector-length headers) (port-position names-port)) |
| 1500 | (pack-arity-flags (arity-rest arity) |
| 1501 | (arity-allow-other-keys? arity) |
| 1502 | (pair? (arity-kw-indices arity)) |
| 1503 | #f |
| 1504 | in-case-lambda?) |
| 1505 | (length (arity-req arity)) |
| 1506 | (length (arity-opt arity)) |
| 1507 | (length (arity-definitions arity))) |
| 1508 | (let ((relocs (write-kw-indices (arity-kw-indices arity) relocs))) |
| 1509 | ;; Write local names. |
| 1510 | (let lp ((definitions (arity-definitions arity))) |
| 1511 | (match definitions |
| 1512 | (() relocs) |
| 1513 | ((#(name slot def) . definitions) |
| 1514 | (let ((sym (if (symbol? name) |
| 1515 | (string-table-intern! strtab (symbol->string name)) |
| 1516 | 0))) |
| 1517 | (put-uleb128 names-port sym) |
| 1518 | (lp definitions))))) |
| 1519 | ;; Now write their definitions. |
| 1520 | (let lp ((definitions (arity-definitions arity))) |
| 1521 | (match definitions |
| 1522 | (() relocs) |
| 1523 | ((#(name slot def) . definitions) |
| 1524 | (put-uleb128 names-port def) |
| 1525 | (put-uleb128 names-port slot) |
| 1526 | (lp definitions)))))) |
| 1527 | (let lp ((metas metas) (pos arities-prefix-len) (relocs '())) |
| 1528 | (match metas |
| 1529 | (() |
| 1530 | (unless (= pos (bytevector-length headers)) |
| 1531 | (error "expected to fully fill the bytevector" |
| 1532 | pos (bytevector-length headers))) |
| 1533 | relocs) |
| 1534 | ((meta . metas) |
| 1535 | (match (meta-arities meta) |
| 1536 | (() (lp metas pos relocs)) |
| 1537 | ((arity) |
| 1538 | (lp metas |
| 1539 | (+ pos arity-header-len) |
| 1540 | (write-arity pos arity #f relocs))) |
| 1541 | (arities |
| 1542 | ;; Write a case-lambda header, then individual arities. |
| 1543 | ;; The case-lambda header's offset link is 0. |
| 1544 | (write-header pos (meta-low-pc meta) (meta-high-pc meta) 0 |
| 1545 | (pack-arity-flags #f #f #f #t #f) 0 0 0) |
| 1546 | (let lp* ((arities arities) (pos (+ pos arity-header-len)) |
| 1547 | (relocs relocs)) |
| 1548 | (match arities |
| 1549 | (() (lp metas pos relocs)) |
| 1550 | ((arity . arities) |
| 1551 | (lp* arities |
| 1552 | (+ pos arity-header-len) |
| 1553 | (write-arity pos arity #t relocs))))))))))) |
| 1554 | |
| 1555 | (define (link-arities asm) |
| 1556 | (define (meta-arities-header-size meta) |
| 1557 | (define (lambda-size arity) |
| 1558 | arity-header-len) |
| 1559 | (define (case-lambda-size arities) |
| 1560 | (fold + |
| 1561 | arity-header-len ;; case-lambda header |
| 1562 | (map lambda-size arities))) ;; the cases |
| 1563 | (match (meta-arities meta) |
| 1564 | (() 0) |
| 1565 | ((arity) (lambda-size arity)) |
| 1566 | (arities (case-lambda-size arities)))) |
| 1567 | |
| 1568 | (define (bytevector-append a b) |
| 1569 | (let ((out (make-bytevector (+ (bytevector-length a) |
| 1570 | (bytevector-length b))))) |
| 1571 | (bytevector-copy! a 0 out 0 (bytevector-length a)) |
| 1572 | (bytevector-copy! b 0 out (bytevector-length a) (bytevector-length b)) |
| 1573 | out)) |
| 1574 | |
| 1575 | (let* ((endianness (asm-endianness asm)) |
| 1576 | (metas (reverse (asm-meta asm))) |
| 1577 | (header-size (fold (lambda (meta size) |
| 1578 | (+ size (meta-arities-header-size meta))) |
| 1579 | arities-prefix-len |
| 1580 | metas)) |
| 1581 | (strtab (make-string-table)) |
| 1582 | (headers (make-bytevector header-size 0))) |
| 1583 | (bytevector-u32-set! headers 0 (bytevector-length headers) endianness) |
| 1584 | (let-values (((names-port get-name-bv) (open-bytevector-output-port))) |
| 1585 | (let* ((relocs (write-arities asm metas headers names-port strtab)) |
| 1586 | (strtab (make-object asm '.guile.arities.strtab |
| 1587 | (link-string-table! strtab) |
| 1588 | '() '() |
| 1589 | #:type SHT_STRTAB #:flags 0))) |
| 1590 | (values (make-object asm '.guile.arities |
| 1591 | (bytevector-append headers (get-name-bv)) |
| 1592 | relocs '() |
| 1593 | #:type SHT_PROGBITS #:flags 0 |
| 1594 | #:link (elf-section-index |
| 1595 | (linker-object-section strtab))) |
| 1596 | strtab))))) |
| 1597 | |
| 1598 | ;;; |
| 1599 | ;;; The .guile.docstrs section is a packed, sorted array of (pc, str) |
| 1600 | ;;; values. Pc and str are both 32 bits wide. (Either could change to |
| 1601 | ;;; 64 bits if appropriate in the future.) Pc is the address of the |
| 1602 | ;;; entry to a program, relative to the start of the text section, in |
| 1603 | ;;; bytes, and str is an index into the associated .guile.docstrs.strtab |
| 1604 | ;;; string table section. |
| 1605 | ;;; |
| 1606 | |
| 1607 | ;; The size of a docstrs entry, in bytes. |
| 1608 | (define docstr-size 8) |
| 1609 | |
| 1610 | (define (link-docstrs asm) |
| 1611 | (define (find-docstrings) |
| 1612 | (filter-map (lambda (meta) |
| 1613 | (define (is-documentation? pair) |
| 1614 | (eq? (car pair) 'documentation)) |
| 1615 | (let* ((props (meta-properties meta)) |
| 1616 | (tail (find-tail is-documentation? props))) |
| 1617 | (and tail |
| 1618 | (not (find-tail is-documentation? (cdr tail))) |
| 1619 | (string? (cdar tail)) |
| 1620 | (cons (* 4 (meta-low-pc meta)) (cdar tail))))) |
| 1621 | (reverse (asm-meta asm)))) |
| 1622 | (let* ((endianness (asm-endianness asm)) |
| 1623 | (docstrings (find-docstrings)) |
| 1624 | (strtab (make-string-table)) |
| 1625 | (bv (make-bytevector (* (length docstrings) docstr-size) 0))) |
| 1626 | (fold (lambda (pair pos) |
| 1627 | (match pair |
| 1628 | ((pc . string) |
| 1629 | (bytevector-u32-set! bv pos pc endianness) |
| 1630 | (bytevector-u32-set! bv (+ pos 4) |
| 1631 | (string-table-intern! strtab string) |
| 1632 | endianness) |
| 1633 | (+ pos docstr-size)))) |
| 1634 | 0 |
| 1635 | docstrings) |
| 1636 | (let ((strtab (make-object asm '.guile.docstrs.strtab |
| 1637 | (link-string-table! strtab) |
| 1638 | '() '() |
| 1639 | #:type SHT_STRTAB #:flags 0))) |
| 1640 | (values (make-object asm '.guile.docstrs |
| 1641 | bv |
| 1642 | '() '() |
| 1643 | #:type SHT_PROGBITS #:flags 0 |
| 1644 | #:link (elf-section-index |
| 1645 | (linker-object-section strtab))) |
| 1646 | strtab)))) |
| 1647 | |
| 1648 | ;;; |
| 1649 | ;;; The .guile.procprops section is a packed, sorted array of (pc, addr) |
| 1650 | ;;; values. Pc and addr are both 32 bits wide. (Either could change to |
| 1651 | ;;; 64 bits if appropriate in the future.) Pc is the address of the |
| 1652 | ;;; entry to a program, relative to the start of the text section, and |
| 1653 | ;;; addr is the address of the associated properties alist, relative to |
| 1654 | ;;; the start of the ELF image. |
| 1655 | ;;; |
| 1656 | ;;; Since procedure properties are stored in the data sections, we need |
| 1657 | ;;; to link the procedures property section first. (Note that this |
| 1658 | ;;; constraint does not apply to the arities section, which may |
| 1659 | ;;; reference the data sections via the kw-indices literal, because |
| 1660 | ;;; assembling the text section already makes sure that the kw-indices |
| 1661 | ;;; are interned.) |
| 1662 | ;;; |
| 1663 | |
| 1664 | ;; The size of a procprops entry, in bytes. |
| 1665 | (define procprops-size 8) |
| 1666 | |
| 1667 | (define (link-procprops asm) |
| 1668 | (define (assoc-remove-one alist key value-pred) |
| 1669 | (match alist |
| 1670 | (() '()) |
| 1671 | ((((? (lambda (x) (eq? x key))) . value) . alist) |
| 1672 | (if (value-pred value) |
| 1673 | alist |
| 1674 | (acons key value alist))) |
| 1675 | (((k . v) . alist) |
| 1676 | (acons k v (assoc-remove-one alist key value-pred))))) |
| 1677 | (define (props-without-name-or-docstring meta) |
| 1678 | (assoc-remove-one |
| 1679 | (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t)) |
| 1680 | 'documentation |
| 1681 | string?)) |
| 1682 | (define (find-procprops) |
| 1683 | (filter-map (lambda (meta) |
| 1684 | (let ((props (props-without-name-or-docstring meta))) |
| 1685 | (and (pair? props) |
| 1686 | (cons (* 4 (meta-low-pc meta)) props)))) |
| 1687 | (reverse (asm-meta asm)))) |
| 1688 | (let* ((endianness (asm-endianness asm)) |
| 1689 | (procprops (find-procprops)) |
| 1690 | (bv (make-bytevector (* (length procprops) procprops-size) 0))) |
| 1691 | (let lp ((procprops procprops) (pos 0) (relocs '())) |
| 1692 | (match procprops |
| 1693 | (() |
| 1694 | (make-object asm '.guile.procprops |
| 1695 | bv |
| 1696 | relocs '() |
| 1697 | #:type SHT_PROGBITS #:flags 0)) |
| 1698 | (((pc . props) . procprops) |
| 1699 | (bytevector-u32-set! bv pos pc endianness) |
| 1700 | (lp procprops |
| 1701 | (+ pos procprops-size) |
| 1702 | (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0 |
| 1703 | (intern-constant asm props)) |
| 1704 | relocs))))))) |
| 1705 | |
| 1706 | ;;; |
| 1707 | ;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc |
| 1708 | ;;; sections provide line number and local variable liveness |
| 1709 | ;;; information. Their format is defined by the DWARF |
| 1710 | ;;; specifications. |
| 1711 | ;;; |
| 1712 | |
| 1713 | (define (asm-language asm) |
| 1714 | ;; FIXME: Plumb language through to the assembler. |
| 1715 | 'scheme) |
| 1716 | |
| 1717 | ;; -> 5 values: .debug_info, .debug_abbrev, .debug_str, .debug_loc, .debug_lines |
| 1718 | (define (link-debug asm) |
| 1719 | (define (put-s8 port val) |
| 1720 | (let ((bv (make-bytevector 1))) |
| 1721 | (bytevector-s8-set! bv 0 val) |
| 1722 | (put-bytevector port bv))) |
| 1723 | |
| 1724 | (define (put-u16 port val) |
| 1725 | (let ((bv (make-bytevector 2))) |
| 1726 | (bytevector-u16-set! bv 0 val (asm-endianness asm)) |
| 1727 | (put-bytevector port bv))) |
| 1728 | |
| 1729 | (define (put-u32 port val) |
| 1730 | (let ((bv (make-bytevector 4))) |
| 1731 | (bytevector-u32-set! bv 0 val (asm-endianness asm)) |
| 1732 | (put-bytevector port bv))) |
| 1733 | |
| 1734 | (define (put-u64 port val) |
| 1735 | (let ((bv (make-bytevector 8))) |
| 1736 | (bytevector-u64-set! bv 0 val (asm-endianness asm)) |
| 1737 | (put-bytevector port bv))) |
| 1738 | |
| 1739 | (define (meta->subprogram-die meta) |
| 1740 | `(subprogram |
| 1741 | (@ ,@(cond |
| 1742 | ((meta-name meta) |
| 1743 | => (lambda (name) `((name ,(symbol->string name))))) |
| 1744 | (else |
| 1745 | '())) |
| 1746 | (low-pc ,(meta-label meta)) |
| 1747 | (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta))))))) |
| 1748 | |
| 1749 | (define (make-compile-unit-die asm) |
| 1750 | `(compile-unit |
| 1751 | (@ (producer ,(string-append "Guile " (version))) |
| 1752 | (language ,(asm-language asm)) |
| 1753 | (low-pc .rtl-text) |
| 1754 | (high-pc ,(* 4 (asm-pos asm))) |
| 1755 | (stmt-list 0)) |
| 1756 | ,@(map meta->subprogram-die (reverse (asm-meta asm))))) |
| 1757 | |
| 1758 | (let-values (((die-port get-die-bv) (open-bytevector-output-port)) |
| 1759 | ((die-relocs) '()) |
| 1760 | ((abbrev-port get-abbrev-bv) (open-bytevector-output-port)) |
| 1761 | ;; (tag has-kids? attrs forms) -> code |
| 1762 | ((abbrevs) vlist-null) |
| 1763 | ((strtab) (make-string-table)) |
| 1764 | ((line-port get-line-bv) (open-bytevector-output-port)) |
| 1765 | ((line-relocs) '()) |
| 1766 | ;; file -> code |
| 1767 | ((files) vlist-null)) |
| 1768 | |
| 1769 | (define (write-abbrev code tag has-children? attrs forms) |
| 1770 | (put-uleb128 abbrev-port code) |
| 1771 | (put-uleb128 abbrev-port (tag-name->code tag)) |
| 1772 | (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no))) |
| 1773 | (for-each (lambda (attr form) |
| 1774 | (put-uleb128 abbrev-port (attribute-name->code attr)) |
| 1775 | (put-uleb128 abbrev-port (form-name->code form))) |
| 1776 | attrs forms) |
| 1777 | (put-uleb128 abbrev-port 0) |
| 1778 | (put-uleb128 abbrev-port 0)) |
| 1779 | |
| 1780 | (define (intern-abbrev tag has-children? attrs forms) |
| 1781 | (let ((key (list tag has-children? attrs forms))) |
| 1782 | (match (vhash-assoc key abbrevs) |
| 1783 | ((_ . code) code) |
| 1784 | (#f (let ((code (1+ (vlist-length abbrevs)))) |
| 1785 | (set! abbrevs (vhash-cons key code abbrevs)) |
| 1786 | (write-abbrev code tag has-children? attrs forms) |
| 1787 | code))))) |
| 1788 | |
| 1789 | (define (intern-file file) |
| 1790 | (match (vhash-assoc file files) |
| 1791 | ((_ . code) code) |
| 1792 | (#f (let ((code (1+ (vlist-length files)))) |
| 1793 | (set! files (vhash-cons file code files)) |
| 1794 | code)))) |
| 1795 | |
| 1796 | (define (write-sources) |
| 1797 | ;; Choose line base and line range values that will allow for an |
| 1798 | ;; address advance range of 16 words. The special opcode range is |
| 1799 | ;; from 10 to 255, so 246 values. |
| 1800 | (define base -4) |
| 1801 | (define range 15) |
| 1802 | |
| 1803 | (let lp ((sources (asm-sources asm)) (out '())) |
| 1804 | (match sources |
| 1805 | (((pc . s) . sources) |
| 1806 | (let ((file (assq-ref s 'filename)) |
| 1807 | (line (assq-ref s 'line)) |
| 1808 | (col (assq-ref s 'column))) |
| 1809 | (lp sources |
| 1810 | ;; Guile line and column numbers are 0-indexed, but |
| 1811 | ;; they are 1-indexed for DWARF. |
| 1812 | (cons (list pc |
| 1813 | (if file (intern-file file) 0) |
| 1814 | (if line (1+ line)) |
| 1815 | (if col (1+ col))) |
| 1816 | out)))) |
| 1817 | (() |
| 1818 | ;; Compilation unit header for .debug_line. We write in |
| 1819 | ;; DWARF 2 format because more tools understand it than DWARF |
| 1820 | ;; 4, which incompatibly adds another field to this header. |
| 1821 | |
| 1822 | (put-u32 line-port 0) ; Length; will patch later. |
| 1823 | (put-u16 line-port 2) ; DWARF 2 format. |
| 1824 | (put-u32 line-port 0) ; Prologue length; will patch later. |
| 1825 | (put-u8 line-port 4) ; Minimum instruction length: 4 bytes. |
| 1826 | (put-u8 line-port 1) ; Default is-stmt: true. |
| 1827 | |
| 1828 | (put-s8 line-port base) ; Line base. See the DWARF standard. |
| 1829 | (put-u8 line-port range) ; Line range. See the DWARF standard. |
| 1830 | (put-u8 line-port 10) ; Opcode base: the first "special" opcode. |
| 1831 | |
| 1832 | ;; A table of the number of uleb128 arguments taken by each |
| 1833 | ;; of the standard opcodes. |
| 1834 | (put-u8 line-port 0) ; 1: copy |
| 1835 | (put-u8 line-port 1) ; 2: advance-pc |
| 1836 | (put-u8 line-port 1) ; 3: advance-line |
| 1837 | (put-u8 line-port 1) ; 4: set-file |
| 1838 | (put-u8 line-port 1) ; 5: set-column |
| 1839 | (put-u8 line-port 0) ; 6: negate-stmt |
| 1840 | (put-u8 line-port 0) ; 7: set-basic-block |
| 1841 | (put-u8 line-port 0) ; 8: const-add-pc |
| 1842 | (put-u8 line-port 1) ; 9: fixed-advance-pc |
| 1843 | |
| 1844 | ;; Include directories, as a zero-terminated sequence of |
| 1845 | ;; nul-terminated strings. Nothing, for the moment. |
| 1846 | (put-u8 line-port 0) |
| 1847 | |
| 1848 | ;; File table. For each file that contributes to this |
| 1849 | ;; compilation unit, a nul-terminated file name string, and a |
| 1850 | ;; uleb128 for each of directory the file was found in, the |
| 1851 | ;; modification time, and the file's size in bytes. We pass |
| 1852 | ;; zero for the latter three fields. |
| 1853 | (vlist-fold-right |
| 1854 | (lambda (pair seed) |
| 1855 | (match pair |
| 1856 | ((file . code) |
| 1857 | (put-bytevector line-port (string->utf8 file)) |
| 1858 | (put-u8 line-port 0) |
| 1859 | (put-uleb128 line-port 0) ; directory |
| 1860 | (put-uleb128 line-port 0) ; mtime |
| 1861 | (put-uleb128 line-port 0))) ; size |
| 1862 | seed) |
| 1863 | #f |
| 1864 | files) |
| 1865 | (put-u8 line-port 0) ; 0 byte terminating file list. |
| 1866 | |
| 1867 | ;; Patch prologue length. |
| 1868 | (let ((offset (port-position line-port))) |
| 1869 | (seek line-port 6 SEEK_SET) |
| 1870 | (put-u32 line-port (- offset 10)) |
| 1871 | (seek line-port offset SEEK_SET)) |
| 1872 | |
| 1873 | ;; Now write the statement program. |
| 1874 | (let () |
| 1875 | (define (extended-op opcode payload-len) |
| 1876 | (put-u8 line-port 0) ; extended op |
| 1877 | (put-uleb128 line-port (1+ payload-len)) ; payload-len + opcode |
| 1878 | (put-uleb128 line-port opcode)) |
| 1879 | (define (set-address sym) |
| 1880 | (define (add-reloc! kind) |
| 1881 | (set! line-relocs |
| 1882 | (cons (make-linker-reloc kind |
| 1883 | (port-position line-port) |
| 1884 | 0 |
| 1885 | sym) |
| 1886 | line-relocs))) |
| 1887 | (match (asm-word-size asm) |
| 1888 | (4 |
| 1889 | (extended-op 2 4) |
| 1890 | (add-reloc! 'abs32/1) |
| 1891 | (put-u32 line-port 0)) |
| 1892 | (8 |
| 1893 | (extended-op 2 8) |
| 1894 | (add-reloc! 'abs64/1) |
| 1895 | (put-u64 line-port 0)))) |
| 1896 | (define (end-sequence pc) |
| 1897 | (let ((pc-inc (- (asm-pos asm) pc))) |
| 1898 | (put-u8 line-port 2) ; advance-pc |
| 1899 | (put-uleb128 line-port pc-inc)) |
| 1900 | (extended-op 1 0)) |
| 1901 | (define (advance-pc pc-inc line-inc) |
| 1902 | (let ((spec (+ (- line-inc base) (* pc-inc range) 10))) |
| 1903 | (cond |
| 1904 | ((or (< line-inc base) (>= line-inc (+ base range))) |
| 1905 | (advance-line line-inc) |
| 1906 | (advance-pc pc-inc 0)) |
| 1907 | ((<= spec 255) |
| 1908 | (put-u8 line-port spec)) |
| 1909 | ((< spec 500) |
| 1910 | (put-u8 line-port 8) ; const-advance-pc |
| 1911 | (advance-pc (- pc-inc (floor/ (- 255 10) range)) |
| 1912 | line-inc)) |
| 1913 | (else |
| 1914 | (put-u8 line-port 2) ; advance-pc |
| 1915 | (put-uleb128 line-port pc-inc) |
| 1916 | (advance-pc 0 line-inc))))) |
| 1917 | (define (advance-line inc) |
| 1918 | (put-u8 line-port 3) |
| 1919 | (put-sleb128 line-port inc)) |
| 1920 | (define (set-file file) |
| 1921 | (put-u8 line-port 4) |
| 1922 | (put-uleb128 line-port file)) |
| 1923 | (define (set-column col) |
| 1924 | (put-u8 line-port 5) |
| 1925 | (put-uleb128 line-port col)) |
| 1926 | |
| 1927 | (set-address '.rtl-text) |
| 1928 | |
| 1929 | (let lp ((in out) (pc 0) (file 1) (line 1) (col 0)) |
| 1930 | (match in |
| 1931 | (() |
| 1932 | (when (null? out) |
| 1933 | ;; There was no source info in the first place. Set |
| 1934 | ;; file register to 0 before adding final row. |
| 1935 | (set-file 0)) |
| 1936 | (end-sequence pc)) |
| 1937 | (((pc* file* line* col*) . in*) |
| 1938 | (cond |
| 1939 | ((and (eqv? file file*) (eqv? line line*) (eqv? col col*)) |
| 1940 | (lp in* pc file line col)) |
| 1941 | (else |
| 1942 | (unless (eqv? col col*) |
| 1943 | (set-column col*)) |
| 1944 | (unless (eqv? file file*) |
| 1945 | (set-file file*)) |
| 1946 | (advance-pc (- pc* pc) (- line* line)) |
| 1947 | (lp in* pc* file* line* col*))))))))))) |
| 1948 | |
| 1949 | (define (compute-code attr val) |
| 1950 | (match attr |
| 1951 | ('name (string-table-intern! strtab val)) |
| 1952 | ('low-pc val) |
| 1953 | ('high-pc val) |
| 1954 | ('producer (string-table-intern! strtab val)) |
| 1955 | ('language (language-name->code val)) |
| 1956 | ('stmt-list val))) |
| 1957 | |
| 1958 | (define (choose-form attr val code) |
| 1959 | (cond |
| 1960 | ((string? val) 'strp) |
| 1961 | ((eq? attr 'stmt-list) 'sec-offset) |
| 1962 | ((eq? attr 'low-pc) 'addr) |
| 1963 | ((exact-integer? code) |
| 1964 | (cond |
| 1965 | ((< code 0) 'sleb128) |
| 1966 | ((<= code #xff) 'data1) |
| 1967 | ((<= code #xffff) 'data2) |
| 1968 | ((<= code #xffffffff) 'data4) |
| 1969 | ((<= code #xffffffffffffffff) 'data8) |
| 1970 | (else 'uleb128))) |
| 1971 | (else (error "unhandled case" attr val code)))) |
| 1972 | |
| 1973 | (define (add-die-relocation! kind sym) |
| 1974 | (set! die-relocs |
| 1975 | (cons (make-linker-reloc kind (port-position die-port) 0 sym) |
| 1976 | die-relocs))) |
| 1977 | |
| 1978 | (define (write-value code form) |
| 1979 | (match form |
| 1980 | ('data1 (put-u8 die-port code)) |
| 1981 | ('data2 (put-u16 die-port code)) |
| 1982 | ('data4 (put-u32 die-port code)) |
| 1983 | ('data8 (put-u64 die-port code)) |
| 1984 | ('uleb128 (put-uleb128 die-port code)) |
| 1985 | ('sleb128 (put-sleb128 die-port code)) |
| 1986 | ('addr |
| 1987 | (match (asm-word-size asm) |
| 1988 | (4 |
| 1989 | (add-die-relocation! 'abs32/1 code) |
| 1990 | (put-u32 die-port 0)) |
| 1991 | (8 |
| 1992 | (add-die-relocation! 'abs64/1 code) |
| 1993 | (put-u64 die-port 0)))) |
| 1994 | ('sec-offset (put-u32 die-port code)) |
| 1995 | ('strp (put-u32 die-port code)))) |
| 1996 | |
| 1997 | (define (write-die die) |
| 1998 | (match die |
| 1999 | ((tag ('@ (attrs vals) ...) children ...) |
| 2000 | (let* ((codes (map compute-code attrs vals)) |
| 2001 | (forms (map choose-form attrs vals codes)) |
| 2002 | (has-children? (not (null? children))) |
| 2003 | (abbrev-code (intern-abbrev tag has-children? attrs forms))) |
| 2004 | (put-uleb128 die-port abbrev-code) |
| 2005 | (for-each write-value codes forms) |
| 2006 | (when has-children? |
| 2007 | (for-each write-die children) |
| 2008 | (put-uleb128 die-port 0)))))) |
| 2009 | |
| 2010 | ;; Compilation unit header. |
| 2011 | (put-u32 die-port 0) ; Length; will patch later. |
| 2012 | (put-u16 die-port 4) ; DWARF 4. |
| 2013 | (put-u32 die-port 0) ; Abbrevs offset. |
| 2014 | (put-u8 die-port (asm-word-size asm)) ; Address size. |
| 2015 | |
| 2016 | (write-die (make-compile-unit-die asm)) |
| 2017 | |
| 2018 | ;; Terminate the abbrevs list. |
| 2019 | (put-uleb128 abbrev-port 0) |
| 2020 | |
| 2021 | (write-sources) |
| 2022 | |
| 2023 | (values (let ((bv (get-die-bv))) |
| 2024 | ;; Patch DWARF32 length. |
| 2025 | (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4) |
| 2026 | (asm-endianness asm)) |
| 2027 | (make-object asm '.debug_info bv die-relocs '() |
| 2028 | #:type SHT_PROGBITS #:flags 0)) |
| 2029 | (make-object asm '.debug_abbrev (get-abbrev-bv) '() '() |
| 2030 | #:type SHT_PROGBITS #:flags 0) |
| 2031 | (make-object asm '.debug_str (link-string-table! strtab) '() '() |
| 2032 | #:type SHT_PROGBITS #:flags 0) |
| 2033 | (make-object asm '.debug_loc #vu8() '() '() |
| 2034 | #:type SHT_PROGBITS #:flags 0) |
| 2035 | (let ((bv (get-line-bv))) |
| 2036 | ;; Patch DWARF32 length. |
| 2037 | (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4) |
| 2038 | (asm-endianness asm)) |
| 2039 | (make-object asm '.debug_line bv line-relocs '() |
| 2040 | #:type SHT_PROGBITS #:flags 0))))) |
| 2041 | |
| 2042 | (define (link-objects asm) |
| 2043 | (let*-values (;; Link procprops before constants, because it probably |
| 2044 | ;; interns more constants. |
| 2045 | ((procprops) (link-procprops asm)) |
| 2046 | ((ro rw rw-init) (link-constants asm)) |
| 2047 | ;; Link text object after constants, so that the |
| 2048 | ;; constants initializer gets included. |
| 2049 | ((text) (link-text-object asm)) |
| 2050 | ((frame-maps) (link-frame-maps asm)) |
| 2051 | ((dt) (link-dynamic-section asm text rw rw-init frame-maps)) |
| 2052 | ((symtab strtab) (link-symtab (linker-object-section text) asm)) |
| 2053 | ((arities arities-strtab) (link-arities asm)) |
| 2054 | ((docstrs docstrs-strtab) (link-docstrs asm)) |
| 2055 | ((dinfo dabbrev dstrtab dloc dline) (link-debug asm)) |
| 2056 | ;; This needs to be linked last, because linking other |
| 2057 | ;; sections adds entries to the string table. |
| 2058 | ((shstrtab) (link-shstrtab asm))) |
| 2059 | (filter identity |
| 2060 | (list text ro frame-maps rw dt symtab strtab |
| 2061 | arities arities-strtab |
| 2062 | docstrs docstrs-strtab procprops |
| 2063 | dinfo dabbrev dstrtab dloc dline |
| 2064 | shstrtab)))) |
| 2065 | |
| 2066 | |
| 2067 | \f |
| 2068 | |
| 2069 | ;;; |
| 2070 | ;;; High-level public interfaces. |
| 2071 | ;;; |
| 2072 | |
| 2073 | (define* (link-assembly asm #:key (page-aligned? #t)) |
| 2074 | "Produce an ELF image from the code and data emitted into @var{asm}. |
| 2075 | The result is a bytevector, by default linked so that read-only and |
| 2076 | writable data are on separate pages. Pass @code{#:page-aligned? #f} to |
| 2077 | disable this behavior." |
| 2078 | (link-elf (link-objects asm) #:page-aligned? page-aligned?)) |