check symbols constants uninterned
[bpt/guile.git] / module / system / vm / assembler.scm
1 ;;; Guile bytecode assembler
2
3 ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013, 2014, 2015 Free Software Foundation, Inc.
4 ;;;
5 ;;; This library is free software; you can redistribute it and/or
6 ;;; modify it under the terms of the GNU Lesser General Public
7 ;;; License as published by the Free Software Foundation; either
8 ;;; version 3 of the License, or (at your option) any later version.
9 ;;;
10 ;;; This library is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;; Lesser General Public License for more details.
14 ;;;
15 ;;; You should have received a copy of the GNU Lesser General Public
16 ;;; License along with this library; if not, write to the Free Software
17 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Commentary:
20 ;;;
21 ;;; This module implements an assembler that creates an ELF image from
22 ;;; bytecode assembly and macro-assembly. The input can be given in
23 ;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
24 ;;; procedural interface, the emit-OP procedures, but that is not
25 ;;; currently exported.
26 ;;;
27 ;;; "Primitive instructions" correspond to VM operations. Assemblers
28 ;;; for primitive instructions are generated programmatically from
29 ;;; (instruction-list), which itself is derived from the VM sources.
30 ;;; There are also "macro-instructions" like "label" or "load-constant"
31 ;;; that expand to 0 or more primitive instructions.
32 ;;;
33 ;;; The assembler also handles some higher-level tasks, like creating
34 ;;; the symbol table, other metadata sections, creating a constant table
35 ;;; for the whole compilation unit, and writing the dynamic section of
36 ;;; the ELF file along with the appropriate initialization routines.
37 ;;;
38 ;;; Most compilers will want to use the trio of make-assembler,
39 ;;; emit-text, and link-assembly. That will result in the creation of
40 ;;; an ELF image as a bytevector, which can then be loaded using
41 ;;; load-thunk-from-memory, or written to disk as a .go file.
42 ;;;
43 ;;; Code:
44
45 (define-module (system vm assembler)
46 #:use-module (system base target)
47 #:use-module (system vm dwarf)
48 #:use-module (system vm elf)
49 #:use-module (system vm linker)
50 #:use-module (language bytecode)
51 #:use-module (rnrs bytevectors)
52 #:use-module (ice-9 binary-ports)
53 #:use-module (ice-9 vlist)
54 #:use-module (ice-9 match)
55 #:use-module (srfi srfi-1)
56 #:use-module (srfi srfi-4)
57 #:use-module (srfi srfi-9)
58 #:use-module (srfi srfi-11)
59 #:export (make-assembler
60
61 emit-call
62 emit-call-label
63 emit-tail-call
64 emit-tail-call-label
65 (emit-receive* . emit-receive)
66 emit-receive-values
67 emit-return
68 emit-return-values
69 emit-call/cc
70 emit-abort
71 (emit-builtin-ref* . emit-builtin-ref)
72 emit-br-if-nargs-ne
73 emit-br-if-nargs-lt
74 emit-br-if-nargs-gt
75 emit-assert-nargs-ee
76 emit-assert-nargs-ge
77 emit-assert-nargs-le
78 emit-alloc-frame
79 emit-reset-frame
80 emit-assert-nargs-ee/locals
81 emit-br-if-npos-gt
82 emit-bind-kwargs
83 emit-bind-rest
84 emit-br
85 emit-br-if-true
86 emit-br-if-null
87 emit-br-if-nil
88 emit-br-if-pair
89 emit-br-if-struct
90 emit-br-if-char
91 emit-br-if-tc7
92 (emit-br-if-eq* . emit-br-if-eq)
93 (emit-br-if-eqv* . emit-br-if-eqv)
94 (emit-br-if-equal* . emit-br-if-equal)
95 (emit-br-if-=* . emit-br-if-=)
96 (emit-br-if-<* . emit-br-if-<)
97 (emit-br-if-<=* . emit-br-if-<=)
98 (emit-br-if-logtest* . emit-br-if-logtest)
99 (emit-mov* . emit-mov)
100 (emit-box* . emit-box)
101 (emit-box-ref* . emit-box-ref)
102 (emit-box-set!* . emit-box-set!)
103 emit-make-closure
104 (emit-free-ref* . emit-free-ref)
105 (emit-free-set!* . emit-free-set!)
106 emit-current-module
107 emit-resolve
108 (emit-define!* . emit-define!)
109 emit-toplevel-box
110 emit-module-box
111 emit-prompt
112 (emit-wind* . emit-wind)
113 emit-unwind
114 (emit-push-fluid* . emit-push-fluid)
115 emit-pop-fluid
116 (emit-fluid-ref* . emit-fluid-ref)
117 (emit-fluid-set* . emit-fluid-set)
118 (emit-string-length* . emit-string-length)
119 (emit-string-ref* . emit-string-ref)
120 (emit-string->number* . emit-string->number)
121 (emit-string->symbol* . emit-string->symbol)
122 (emit-symbol->keyword* . emit-symbol->keyword)
123 (emit-cons* . emit-cons)
124 (emit-car* . emit-car)
125 (emit-cdr* . emit-cdr)
126 (emit-set-car!* . emit-set-car!)
127 (emit-set-cdr!* . emit-set-cdr!)
128 (emit-add* . emit-add)
129 (emit-add1* . emit-add1)
130 (emit-sub* . emit-sub)
131 (emit-sub1* . emit-sub1)
132 (emit-mul* . emit-mul)
133 (emit-div* . emit-div)
134 (emit-quo* . emit-quo)
135 (emit-rem* . emit-rem)
136 (emit-mod* . emit-mod)
137 (emit-ash* . emit-ash)
138 (emit-logand* . emit-logand)
139 (emit-logior* . emit-logior)
140 (emit-logxor* . emit-logxor)
141 (emit-make-vector* . emit-make-vector)
142 (emit-make-vector/immediate* . emit-make-vector/immediate)
143 (emit-vector-length* . emit-vector-length)
144 (emit-vector-ref* . emit-vector-ref)
145 (emit-vector-ref/immediate* . emit-vector-ref/immediate)
146 (emit-vector-set!* . emit-vector-set!)
147 (emit-vector-set!/immediate* . emit-vector-set!/immediate)
148 (emit-struct-vtable* . emit-struct-vtable)
149 (emit-allocate-struct/immediate* . emit-allocate-struct/immediate)
150 (emit-struct-ref/immediate* . emit-struct-ref/immediate)
151 (emit-struct-set!/immediate* . emit-struct-set!/immediate)
152 (emit-allocate-struct* . emit-allocate-struct)
153 (emit-struct-ref* . emit-struct-ref)
154 (emit-struct-set!* . emit-struct-set!)
155 (emit-class-of* . emit-class-of)
156 (emit-make-array* . emit-make-array)
157 (emit-bv-u8-ref* . emit-bv-u8-ref)
158 (emit-bv-s8-ref* . emit-bv-s8-ref)
159 (emit-bv-u16-ref* . emit-bv-u16-ref)
160 (emit-bv-s16-ref* . emit-bv-s16-ref)
161 (emit-bv-u32-ref* . emit-bv-u32-ref)
162 (emit-bv-s32-ref* . emit-bv-s32-ref)
163 (emit-bv-u64-ref* . emit-bv-u64-ref)
164 (emit-bv-s64-ref* . emit-bv-s64-ref)
165 (emit-bv-f32-ref* . emit-bv-f32-ref)
166 (emit-bv-f64-ref* . emit-bv-f64-ref)
167 (emit-bv-u8-set!* . emit-bv-u8-set!)
168 (emit-bv-s8-set!* . emit-bv-s8-set!)
169 (emit-bv-u16-set!* . emit-bv-u16-set!)
170 (emit-bv-s16-set!* . emit-bv-s16-set!)
171 (emit-bv-u32-set!* . emit-bv-u32-set!)
172 (emit-bv-s32-set!* . emit-bv-s32-set!)
173 (emit-bv-u64-set!* . emit-bv-u64-set!)
174 (emit-bv-s64-set!* . emit-bv-s64-set!)
175 (emit-bv-f32-set!* . emit-bv-f32-set!)
176 (emit-bv-f64-set!* . emit-bv-f64-set!)
177
178 emit-text
179 link-assembly))
180
181
182 \f
183
184 ;; Like define-inlinable, but only for first-order uses of the defined
185 ;; routine. Should residualize less code.
186 (eval-when (expand)
187 (define-syntax define-inline
188 (lambda (x)
189 (syntax-case x ()
190 ((_ (name arg ...) body ...)
191 (with-syntax (((temp ...) (generate-temporaries #'(arg ...))))
192 #`(eval-when (expand)
193 (define-syntax-rule (name temp ...)
194 (let ((arg temp) ...)
195 body ...)))))))))
196
197 ;;; Bytecode consists of 32-bit units, often subdivided in some way.
198 ;;; These helpers create one 32-bit unit from multiple components.
199
200 (define-inline (pack-u8-u24 x y)
201 (unless (<= 0 x 255)
202 (error "out of range" x))
203 (logior x (ash y 8)))
204
205 (define-inline (pack-u8-s24 x y)
206 (unless (<= 0 x 255)
207 (error "out of range" x))
208 (logior x (ash (cond
209 ((< 0 (- y) #x800000)
210 (+ y #x1000000))
211 ((<= 0 y #xffffff)
212 y)
213 (else (error "out of range" y)))
214 8)))
215
216 (define-inline (pack-u1-u7-u24 x y z)
217 (unless (<= 0 x 1)
218 (error "out of range" x))
219 (unless (<= 0 y 127)
220 (error "out of range" y))
221 (logior x (ash y 1) (ash z 8)))
222
223 (define-inline (pack-u8-u12-u12 x y z)
224 (unless (<= 0 x 255)
225 (error "out of range" x))
226 (unless (<= 0 y 4095)
227 (error "out of range" y))
228 (logior x (ash y 8) (ash z 20)))
229
230 (define-inline (pack-u8-u8-u16 x y z)
231 (unless (<= 0 x 255)
232 (error "out of range" x))
233 (unless (<= 0 y 255)
234 (error "out of range" y))
235 (logior x (ash y 8) (ash z 16)))
236
237 (define-inline (pack-u8-u8-u8-u8 x y z w)
238 (unless (<= 0 x 255)
239 (error "out of range" x))
240 (unless (<= 0 y 255)
241 (error "out of range" y))
242 (unless (<= 0 z 255)
243 (error "out of range" z))
244 (logior x (ash y 8) (ash z 16) (ash w 24)))
245
246 (eval-when (expand)
247 (define-syntax pack-flags
248 (syntax-rules ()
249 ;; Add clauses as needed.
250 ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
251 (if f2 (ash 2 0) 0))))))
252
253 ;;; Helpers to read and write 32-bit units in a buffer.
254
255 (define-inline (u32-ref buf n)
256 (bytevector-u32-native-ref buf (* n 4)))
257
258 (define-inline (u32-set! buf n val)
259 (bytevector-u32-native-set! buf (* n 4) val))
260
261 (define-inline (s32-ref buf n)
262 (bytevector-s32-native-ref buf (* n 4)))
263
264 (define-inline (s32-set! buf n val)
265 (bytevector-s32-native-set! buf (* n 4) val))
266
267
268 \f
269
270 ;;; A <meta> entry collects metadata for one procedure. Procedures are
271 ;;; written as contiguous ranges of bytecode.
272 ;;;
273 (eval-when (expand)
274 (define-syntax-rule (assert-match arg pattern kind)
275 (let ((x arg))
276 (unless (match x (pattern #t) (_ #f))
277 (error (string-append "expected " kind) x)))))
278
279 (define-record-type <meta>
280 (%make-meta label properties low-pc high-pc arities)
281 meta?
282 (label meta-label)
283 (properties meta-properties set-meta-properties!)
284 (low-pc meta-low-pc)
285 (high-pc meta-high-pc set-meta-high-pc!)
286 (arities meta-arities set-meta-arities!))
287
288 (define (make-meta label properties low-pc)
289 (assert-match label (or (? exact-integer?) (? symbol?)) "symbol")
290 (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
291 (%make-meta label properties low-pc #f '()))
292
293 (define (meta-name meta)
294 (assq-ref (meta-properties meta) 'name))
295
296 ;; Metadata for one <lambda-case>.
297 (define-record-type <arity>
298 (make-arity req opt rest kw-indices allow-other-keys?
299 low-pc high-pc definitions)
300 arity?
301 (req arity-req)
302 (opt arity-opt)
303 (rest arity-rest)
304 (kw-indices arity-kw-indices)
305 (allow-other-keys? arity-allow-other-keys?)
306 (low-pc arity-low-pc)
307 (high-pc arity-high-pc set-arity-high-pc!)
308 (definitions arity-definitions set-arity-definitions!))
309
310 (eval-when (expand)
311 (define-syntax *block-size* (identifier-syntax 32)))
312
313 ;;; An assembler collects all of the words emitted during assembly, and
314 ;;; also maintains ancillary information such as the constant table, a
315 ;;; relocation list, and so on.
316 ;;;
317 ;;; Bytecode consists of 32-bit units. We emit bytecode using native
318 ;;; endianness. If we're targeting a foreign endianness, we byte-swap
319 ;;; the bytevector as a whole instead of conditionalizing each access.
320 ;;;
321 (define-record-type <asm>
322 (make-asm cur idx start prev written
323 labels relocs
324 word-size endianness
325 constants inits
326 shstrtab next-section-number
327 meta sources
328 dead-slot-maps
329 to-file?)
330 asm?
331
332 ;; We write bytecode into what is logically a growable vector,
333 ;; implemented as a list of blocks. asm-cur is the current block, and
334 ;; asm-idx is the current index into that block, in 32-bit units.
335 ;;
336 (cur asm-cur set-asm-cur!)
337 (idx asm-idx set-asm-idx!)
338
339 ;; asm-start is an absolute position, indicating the offset of the
340 ;; beginning of an instruction (in u32 units). It is updated after
341 ;; writing all the words for one primitive instruction. It models the
342 ;; position of the instruction pointer during execution, given that
343 ;; the VM updates the IP only at the end of executing the instruction,
344 ;; and is thus useful for computing offsets between two points in a
345 ;; program.
346 ;;
347 (start asm-start set-asm-start!)
348
349 ;; The list of previously written blocks.
350 ;;
351 (prev asm-prev set-asm-prev!)
352
353 ;; The number of u32 words written in asm-prev, which is the same as
354 ;; the offset of the current block.
355 ;;
356 (written asm-written set-asm-written!)
357
358 ;; An alist of symbol -> position pairs, indicating the labels defined
359 ;; in this compilation unit.
360 ;;
361 (labels asm-labels set-asm-labels!)
362
363 ;; A list of relocations needed by the program text. We use an
364 ;; internal representation for relocations, and handle textualn
365 ;; relative relocations in the assembler. Other kinds of relocations
366 ;; are later reified as linker relocations and resolved by the linker.
367 ;;
368 (relocs asm-relocs set-asm-relocs!)
369
370 ;; Target information.
371 ;;
372 (word-size asm-word-size)
373 (endianness asm-endianness)
374
375 ;; The constant table, as a vhash of object -> label. All constants
376 ;; get de-duplicated and written into separate sections -- either the
377 ;; .rodata section, for read-only data, or .data, for constants that
378 ;; need initialization at load-time (like symbols). Constants can
379 ;; depend on other constants (e.g. a symbol depending on a stringbuf),
380 ;; so order in this table is important.
381 ;;
382 (constants asm-constants set-asm-constants!)
383
384 ;; A list of instructions needed to initialize the constants. Will
385 ;; run in a thunk with 2 local variables.
386 ;;
387 (inits asm-inits set-asm-inits!)
388
389 ;; The shstrtab, for section names.
390 ;;
391 (shstrtab asm-shstrtab set-asm-shstrtab!)
392
393 ;; The section number for the next section to be written.
394 ;;
395 (next-section-number asm-next-section-number set-asm-next-section-number!)
396
397 ;; A list of <meta>, corresponding to procedure metadata.
398 ;;
399 (meta asm-meta set-asm-meta!)
400
401 ;; A list of (pos . source) pairs, indicating source information. POS
402 ;; is relative to the beginning of the text section, and SOURCE is in
403 ;; the same format that source-properties returns.
404 ;;
405 (sources asm-sources set-asm-sources!)
406
407 ;; A list of (pos . dead-slot-map) pairs, indicating dead slot maps.
408 ;; POS is relative to the beginning of the text section.
409 ;; DEAD-SLOT-MAP is a bitfield of slots that are dead at call sites,
410 ;; as an integer.
411 ;;
412 (dead-slot-maps asm-dead-slot-maps set-asm-dead-slot-maps!)
413 (to-file? asm-to-file?))
414
415 (define-inline (fresh-block)
416 (make-u32vector *block-size*))
417
418 (define* (make-assembler #:key (word-size (target-word-size))
419 (endianness (target-endianness))
420 (to-file? #t))
421 "Create an assembler for a given target @var{word-size} and
422 @var{endianness}, falling back to appropriate values for the configured
423 target."
424 (make-asm (fresh-block) 0 0 '() 0
425 (make-hash-table) '()
426 word-size endianness
427 vlist-null '()
428 (make-string-table) 1
429 '() '() '() to-file?))
430
431 (define (intern-section-name! asm string)
432 "Add a string to the section name table (shstrtab)."
433 (string-table-intern! (asm-shstrtab asm) string))
434
435 (define-inline (asm-pos asm)
436 "The offset of the next word to be written into the code buffer, in
437 32-bit units."
438 (+ (asm-idx asm) (asm-written asm)))
439
440 (define (allocate-new-block asm)
441 "Close off the current block, and arrange for the next word to be
442 written to a fresh block."
443 (let ((new (fresh-block)))
444 (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
445 (set-asm-written! asm (asm-pos asm))
446 (set-asm-cur! asm new)
447 (set-asm-idx! asm 0)))
448
449 (define-inline (emit asm u32)
450 "Emit one 32-bit word into the instruction stream. Assumes that there
451 is space for the word, and ensures that there is space for the next
452 word."
453 (u32-set! (asm-cur asm) (asm-idx asm) u32)
454 (set-asm-idx! asm (1+ (asm-idx asm)))
455 (if (= (asm-idx asm) *block-size*)
456 (allocate-new-block asm)))
457
458 (define-inline (make-reloc type label base word)
459 "Make an internal relocation of type @var{type} referencing symbol
460 @var{label}, @var{word} words after position @var{start}. @var{type}
461 may be x8-s24, indicating a 24-bit relative label reference that can be
462 fixed up by the assembler, or s32, indicating a 32-bit relative
463 reference that needs to be fixed up by the linker."
464 (list type label base word))
465
466 (define-inline (reset-asm-start! asm)
467 "Reset the asm-start after writing the words for one instruction."
468 (set-asm-start! asm (asm-pos asm)))
469
470 (define (record-label-reference asm label)
471 "Record an x8-s24 local label reference. This value will get patched
472 up later by the assembler."
473 (let* ((start (asm-start asm))
474 (pos (asm-pos asm))
475 (reloc (make-reloc 'x8-s24 label start (- pos start))))
476 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
477
478 (define* (record-far-label-reference asm label #:optional (offset 0))
479 "Record an s32 far label reference. This value will get patched up
480 later by the linker."
481 (let* ((start (- (asm-start asm) offset))
482 (pos (asm-pos asm))
483 (reloc (make-reloc 's32 label start (- pos start))))
484 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
485
486
487 \f
488
489 ;;;
490 ;;; Primitive assemblers are defined by expanding `assembler' for each
491 ;;; opcode in `(instruction-list)'.
492 ;;;
493
494 (eval-when (expand)
495 (define (id-append ctx a b)
496 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b))))
497
498 (define-syntax assembler
499 (lambda (x)
500 (define-syntax op-case
501 (lambda (x)
502 (syntax-case x ()
503 ((_ asm name ((type arg ...) code ...) clause ...)
504 #`(if (eq? name 'type)
505 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
506 #'((arg ...)
507 code ...))
508 (op-case asm name clause ...)))
509 ((_ asm name)
510 #'(error "unmatched name" name)))))
511
512 (define (pack-first-word asm opcode type)
513 (with-syntax ((opcode opcode))
514 (op-case
515 asm type
516 ((U8_X24)
517 (emit asm opcode))
518 ((U8_U24 arg)
519 (emit asm (pack-u8-u24 opcode arg)))
520 ((U8_L24 label)
521 (record-label-reference asm label)
522 (emit asm opcode))
523 ((U8_U8_I16 a imm)
524 (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
525 ((U8_U12_U12 a b)
526 (emit asm (pack-u8-u12-u12 opcode a b)))
527 ((U8_U8_U8_U8 a b c)
528 (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
529
530 (define (pack-tail-word asm type)
531 (op-case
532 asm type
533 ((U8_U24 a b)
534 (emit asm (pack-u8-u24 a b)))
535 ((U8_L24 a label)
536 (record-label-reference asm label)
537 (emit asm a))
538 ((U32 a)
539 (emit asm a))
540 ((I32 imm)
541 (let ((val (object-address imm)))
542 (unless (zero? (ash val -32))
543 (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
544 (emit asm val)))
545 ((A32 imm)
546 (unless (= (asm-word-size asm) 8)
547 (error "make-long-immediate unavailable for this target"))
548 (emit asm (ash (object-address imm) -32))
549 (emit asm (logand (object-address imm) (1- (ash 1 32)))))
550 ((B32))
551 ((N32 label)
552 (record-far-label-reference asm label)
553 (emit asm 0))
554 ((S32 label)
555 (record-far-label-reference asm label)
556 (emit asm 0))
557 ((L32 label)
558 (record-far-label-reference asm label)
559 (emit asm 0))
560 ((LO32 label offset)
561 (record-far-label-reference asm label
562 (* offset (/ (asm-word-size asm) 4)))
563 (emit asm 0))
564 ((X8_U24 a)
565 (emit asm (pack-u8-u24 0 a)))
566 ((X8_L24 label)
567 (record-label-reference asm label)
568 (emit asm 0))
569 ((B1_X7_L24 a label)
570 (record-label-reference asm label)
571 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
572 ((B1_U7_L24 a b label)
573 (record-label-reference asm label)
574 (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
575 ((B1_X31 a)
576 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
577 ((B1_X7_U24 a b)
578 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
579
580 (syntax-case x ()
581 ((_ name opcode word0 word* ...)
582 (with-syntax ((((formal0 ...)
583 code0 ...)
584 (pack-first-word #'asm
585 (syntax->datum #'opcode)
586 (syntax->datum #'word0)))
587 ((((formal* ...)
588 code* ...) ...)
589 (map (lambda (word) (pack-tail-word #'asm word))
590 (syntax->datum #'(word* ...)))))
591 #'(lambda (asm formal0 ... formal* ... ...)
592 (unless (asm? asm) (error "not an asm"))
593 code0 ...
594 code* ... ...
595 (reset-asm-start! asm))))))))
596
597 (define assemblers (make-hash-table))
598
599 (eval-when (expand)
600 (define-syntax define-assembler
601 (lambda (x)
602 (syntax-case x ()
603 ((_ name opcode kind arg ...)
604 (with-syntax ((emit (id-append #'name #'emit- #'name)))
605 #'(define emit
606 (let ((emit (assembler name opcode arg ...)))
607 (hashq-set! assemblers 'name emit)
608 emit)))))))
609
610 (define-syntax visit-opcodes
611 (lambda (x)
612 (syntax-case x ()
613 ((visit-opcodes macro arg ...)
614 (with-syntax (((inst ...)
615 (map (lambda (x) (datum->syntax #'macro x))
616 (instruction-list))))
617 #'(begin
618 (macro arg ... . inst)
619 ...)))))))
620
621 (visit-opcodes define-assembler)
622
623 (eval-when (expand)
624
625 ;; Some operands are encoded using a restricted subset of the full
626 ;; 24-bit local address space, in order to make the bytecode more
627 ;; dense in the usual case that there are few live locals. Here we
628 ;; define wrapper emitters that shuffle out-of-range operands into and
629 ;; out of the reserved range of locals [233,255]. This range is
630 ;; sufficient because these restricted operands are only present in
631 ;; the first word of an instruction. Since 8 bits is the smallest
632 ;; slot-addressing operand size, that means we can fit 3 operands in
633 ;; the 24 bits of payload of the first word (the lower 8 bits being
634 ;; taken by the opcode).
635 ;;
636 ;; The result are wrapper emitters with the same arity,
637 ;; e.g. emit-cons* that wraps emit-cons. We expose these wrappers as
638 ;; the public interface for emitting `cons' instructions. That way we
639 ;; solve the problem fully and in just one place. The only manual
640 ;; care that need be taken is in the exports list at the top of the
641 ;; file -- to be sure that we export the wrapper and not the wrapped
642 ;; emitter.
643
644 (define (shuffling-assembler name kind word0 word*)
645 (define (analyze-first-word)
646 (define-syntax op-case
647 (syntax-rules ()
648 ((_ type ((%type %kind arg ...) values) clause ...)
649 (if (and (eq? type '%type) (eq? kind '%kind))
650 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
651 #'((arg ...) values))
652 (op-case type clause ...)))
653 ((_ type)
654 #f)))
655 (op-case
656 word0
657 ((U8_U8_I16 ! a imm)
658 (values (if (< a (ash 1 8)) a (begin (emit-mov* asm 253 a) 253))
659 imm))
660 ((U8_U8_I16 <- a imm)
661 (values (if (< a (ash 1 8)) a 253)
662 imm))
663 ((U8_U12_U12 ! a b)
664 (values (if (< a (ash 1 12)) a (begin (emit-mov* asm 253 a) 253))
665 (if (< b (ash 1 12)) b (begin (emit-mov* asm 254 b) 254))))
666 ((U8_U12_U12 <- a b)
667 (values (if (< a (ash 1 12)) a 253)
668 (if (< b (ash 1 12)) b (begin (emit-mov* asm 254 b) 254))))
669 ((U8_U8_U8_U8 ! a b c)
670 (values (if (< a (ash 1 8)) a (begin (emit-mov* asm 253 a) 253))
671 (if (< b (ash 1 8)) b (begin (emit-mov* asm 254 b) 254))
672 (if (< c (ash 1 8)) c (begin (emit-mov* asm 255 c) 255))))
673 ((U8_U8_U8_U8 <- a b c)
674 (values (if (< a (ash 1 8)) a 253)
675 (if (< b (ash 1 8)) b (begin (emit-mov* asm 254 b) 254))
676 (if (< c (ash 1 8)) c (begin (emit-mov* asm 255 c) 255))))))
677
678 (define (tail-formals type)
679 (define-syntax op-case
680 (syntax-rules ()
681 ((op-case type (%type arg ...) clause ...)
682 (if (eq? type '%type)
683 (generate-temporaries #'(arg ...))
684 (op-case type clause ...)))
685 ((op-case type)
686 (error "unmatched type" type))))
687 (op-case type
688 (U8_U24 a b)
689 (U8_L24 a label)
690 (U32 a)
691 (I32 imm)
692 (A32 imm)
693 (B32)
694 (N32 label)
695 (S32 label)
696 (L32 label)
697 (LO32 label offset)
698 (X8_U24 a)
699 (X8_L24 label)
700 (B1_X7_L24 a label)
701 (B1_U7_L24 a b label)
702 (B1_X31 a)
703 (B1_X7_U24 a b)))
704
705 (define (shuffle-up dst)
706 (define-syntax op-case
707 (syntax-rules ()
708 ((_ type ((%type ...) exp) clause ...)
709 (if (memq type '(%type ...))
710 #'exp
711 (op-case type clause ...)))
712 ((_ type)
713 (error "unexpected type" type))))
714 (with-syntax ((dst dst))
715 (op-case
716 word0
717 ((U8_U8_I16 U8_U8_U8_U8)
718 (unless (< dst (ash 1 8))
719 (emit-mov* asm dst 253)))
720 ((U8_U12_U12)
721 (unless (< dst (ash 1 12))
722 (emit-mov* asm dst 253))))))
723
724 (and=>
725 (analyze-first-word)
726 (lambda (formals+shuffle)
727 (with-syntax ((emit-name (id-append name #'emit- name))
728 (((formal0 ...) shuffle) formals+shuffle)
729 (((formal* ...) ...) (map tail-formals word*)))
730 (with-syntax (((shuffle-up-dst ...)
731 (if (eq? kind '<-)
732 (syntax-case #'(formal0 ...) ()
733 ((dst . _)
734 (list (shuffle-up #'dst))))
735 '())))
736 #'(lambda (asm formal0 ... formal* ... ...)
737 (call-with-values (lambda () shuffle)
738 (lambda (formal0 ...)
739 (emit-name asm formal0 ... formal* ... ...)))
740 shuffle-up-dst ...))))))
741
742 (define-syntax define-shuffling-assembler
743 (lambda (stx)
744 (syntax-case stx ()
745 ((_ #:except (except ...) name opcode kind word0 word* ...)
746 (cond
747 ((or-map (lambda (op) (eq? (syntax->datum #'name) op))
748 (map syntax->datum #'(except ...)))
749 #'(begin))
750 ((shuffling-assembler #'name (syntax->datum #'kind)
751 (syntax->datum #'word0)
752 (map syntax->datum #'(word* ...)))
753 => (lambda (proc)
754 (with-syntax ((emit (id-append #'name
755 (id-append #'name #'emit- #'name)
756 #'*))
757 (proc proc))
758 #'(define emit
759 (let ((emit proc))
760 (hashq-set! assemblers 'name emit)
761 emit)))))
762 (else #'(begin))))))))
763
764 (visit-opcodes define-shuffling-assembler #:except (receive mov))
765
766 ;; Mov and receive are two special cases that can work without wrappers.
767 ;; Indeed it is important that they do so.
768
769 (define (emit-mov* asm dst src)
770 (if (and (< dst (ash 1 12)) (< src (ash 1 12)))
771 (emit-mov asm dst src)
772 (emit-long-mov asm dst src)))
773
774 (define (emit-receive* asm dst proc nlocals)
775 (if (and (< dst (ash 1 12)) (< proc (ash 1 12)))
776 (emit-receive asm dst proc nlocals)
777 (begin
778 (emit-receive-values asm proc #t 1)
779 (emit-mov* asm dst (1+ proc))
780 (emit-reset-frame asm nlocals))))
781
782 (define (emit-text asm instructions)
783 "Assemble @var{instructions} using the assembler @var{asm}.
784 @var{instructions} is a sequence of instructions, expressed as a list of
785 lists. This procedure can be called many times before calling
786 @code{link-assembly}."
787 (for-each (lambda (inst)
788 (apply (or (hashq-ref assemblers (car inst))
789 (error 'bad-instruction inst))
790 asm
791 (cdr inst)))
792 instructions))
793
794 \f
795
796 ;;;
797 ;;; The constant table records a topologically sorted set of literal
798 ;;; constants used by a program. For example, a pair uses its car and
799 ;;; cdr, a string uses its stringbuf, etc.
800 ;;;
801 ;;; Some things we want to add to the constant table are not actually
802 ;;; Scheme objects: for example, stringbufs, cache cells for toplevel
803 ;;; references, or cache cells for non-closure procedures. For these we
804 ;;; define special record types and add instances of those record types
805 ;;; to the table.
806 ;;;
807
808 (define-inline (immediate? x)
809 "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
810 (not (zero? (logand (object-address x) 6))))
811
812 (define-record-type <stringbuf>
813 (make-stringbuf string)
814 stringbuf?
815 (string stringbuf-string))
816
817 (define-record-type <static-procedure>
818 (make-static-procedure code)
819 static-procedure?
820 (code static-procedure-code))
821
822 (define-record-type <uniform-vector-backing-store>
823 (make-uniform-vector-backing-store bytes element-size)
824 uniform-vector-backing-store?
825 (bytes uniform-vector-backing-store-bytes)
826 (element-size uniform-vector-backing-store-element-size))
827
828 (define-record-type <cache-cell>
829 (make-cache-cell scope key)
830 cache-cell?
831 (scope cache-cell-scope)
832 (key cache-cell-key))
833
834 (define (simple-vector? obj)
835 (and (vector? obj)
836 (equal? (array-shape obj) (list (list 0 (1- (vector-length obj)))))))
837
838 (define (simple-uniform-vector? obj)
839 (and (array? obj)
840 (symbol? (array-type obj))
841 (equal? (array-shape obj) (list (list 0 (1- (array-length obj)))))))
842
843 (define (statically-allocatable? x)
844 "Return @code{#t} if a non-immediate constant can be allocated
845 statically, and @code{#f} if it would need some kind of runtime
846 allocation."
847 (or (pair? x) (string? x) (stringbuf? x) (static-procedure? x) (array? x)))
848
849 (define (intern-constant asm obj)
850 "Add an object to the constant table, and return a label that can be
851 used to reference it. If the object is already present in the constant
852 table, its existing label is used directly."
853 (define (recur obj)
854 (intern-constant asm obj))
855 (define (field dst n obj)
856 (let ((src (recur obj)))
857 (if src
858 (if (statically-allocatable? obj)
859 `((static-patch! ,dst ,n ,src))
860 `((static-ref 1 ,src)
861 (static-set! 1 ,dst ,n)))
862 '())))
863 (define (intern obj label)
864 (cond
865 ((pair? obj)
866 (append (field label 0 (car obj))
867 (field label 1 (cdr obj))))
868 ((simple-vector? obj)
869 (let lp ((i 0) (inits '()))
870 (if (< i (vector-length obj))
871 (lp (1+ i)
872 (append-reverse (field label (1+ i) (vector-ref obj i))
873 inits))
874 (reverse inits))))
875 ((stringbuf? obj) '())
876 ((static-procedure? obj)
877 `((static-patch! ,label 1 ,(static-procedure-code obj))))
878 ((cache-cell? obj) '())
879 ((and (symbol? obj) (symbol-interned? obj))
880 `((make-non-immediate 1 ,(recur (symbol->string obj)))
881 (string->symbol 1 1)
882 (static-set! 1 ,label 0)))
883 ((string? obj)
884 `((static-patch! ,label 1 ,(recur (make-stringbuf obj)))))
885 ((keyword? obj)
886 `((static-ref 1 ,(recur (keyword->symbol obj)))
887 (symbol->keyword 1 1)
888 (static-set! 1 ,label 0)))
889 ((number? obj)
890 `((make-non-immediate 1 ,(recur (number->string obj)))
891 (string->number 1 1)
892 (static-set! 1 ,label 0)))
893 ((uniform-vector-backing-store? obj) '())
894 ((simple-uniform-vector? obj)
895 (let ((width (case (array-type obj)
896 ((vu8 u8 s8) 1)
897 ((u16 s16) 2)
898 ;; Bitvectors are addressed in 32-bit units.
899 ;; Although a complex number is 8 or 16 bytes wide,
900 ;; it should be byteswapped in 4 or 8 byte units.
901 ((u32 s32 f32 c32 b) 4)
902 ((u64 s64 f64 c64) 8)
903 (else
904 (error "unhandled array type" obj)))))
905 `((static-patch! ,label 2
906 ,(recur (make-uniform-vector-backing-store
907 (uniform-array->bytevector obj)
908 width))))))
909 ((array? obj)
910 `((static-patch! ,label 1 ,(recur (shared-array-root obj)))))
911 (else
912 (if (asm-to-file? asm)
913 (error "don't know how to intern" obj)
914 `((vector-ref/immediate 1 0 ,(vlist-length (asm-constants asm)))
915 (static-set! 1 ,label 0))))))
916 (cond
917 ((immediate? obj) #f)
918 ((vhash-assoc obj (asm-constants asm)) => cdr)
919 (else
920 ;; Note that calling intern may mutate asm-constants and asm-inits.
921 (let* ((label (gensym "constant"))
922 (inits (intern obj label)))
923 (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
924 (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
925 label))))
926
927 (define (intern-non-immediate asm obj)
928 "Intern a non-immediate into the constant table, and return its
929 label."
930 (when (immediate? obj)
931 (error "expected a non-immediate" obj))
932 (intern-constant asm obj))
933
934 (define (intern-cache-cell asm scope key)
935 "Intern a cache cell into the constant table, and return its label.
936 If there is already a cache cell with the given scope and key, it is
937 returned instead."
938 (intern-constant asm (make-cache-cell scope key)))
939
940 ;; Return the label of the cell that holds the module for a scope.
941 (define (intern-module-cache-cell asm scope)
942 "Intern a cache cell for a module, and return its label."
943 (intern-cache-cell asm scope #t))
944
945
946 \f
947
948 ;;;
949 ;;; Macro assemblers bridge the gap between primitive instructions and
950 ;;; some higher-level operations.
951 ;;;
952
953 (eval-when (expand)
954 (define-syntax define-macro-assembler
955 (lambda (x)
956 (syntax-case x ()
957 ((_ (name arg ...) body body* ...)
958 (with-syntax ((emit (id-append #'name #'emit- #'name)))
959 #'(begin
960 (define emit
961 (let ((emit (lambda (arg ...) body body* ...)))
962 (hashq-set! assemblers 'name emit)
963 emit))
964 (export emit))))))))
965
966 (define-macro-assembler (load-constant asm dst obj)
967 (cond
968 ((immediate? obj)
969 (let ((bits (object-address obj)))
970 (cond
971 ((and (< dst 256) (zero? (ash bits -16)))
972 (emit-make-short-immediate asm dst obj))
973 ((zero? (ash bits -32))
974 (emit-make-long-immediate asm dst obj))
975 (else
976 (emit-make-long-long-immediate asm dst obj)))))
977 ((statically-allocatable? obj)
978 (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
979 (else
980 (emit-static-ref asm dst (intern-non-immediate asm obj)))))
981
982 (define-macro-assembler (load-static-procedure asm dst label)
983 (let ((loc (intern-constant asm (make-static-procedure label))))
984 (emit-make-non-immediate asm dst loc)))
985
986 (define-syntax-rule (define-tc7-macro-assembler name tc7)
987 (define-macro-assembler (name asm slot invert? label)
988 (emit-br-if-tc7 asm slot invert? tc7 label)))
989
990 ;; Keep in sync with tags.h. Part of Guile's ABI. Currently unused
991 ;; macro assemblers are commented out. See also
992 ;; *branching-primcall-arities* in (language cps primitives), the set of
993 ;; macro-instructions in assembly.scm, and
994 ;; disassembler.scm:code-annotation.
995 ;;
996 ;; FIXME: Define all tc7 values in Scheme in one place, derived from
997 ;; tags.h.
998 (define-tc7-macro-assembler br-if-symbol 5)
999 (define-tc7-macro-assembler br-if-variable 7)
1000 (define-tc7-macro-assembler br-if-vector 13)
1001 ;(define-tc7-macro-assembler br-if-weak-vector 13)
1002 (define-tc7-macro-assembler br-if-string 21)
1003 ;(define-tc7-macro-assembler br-if-heap-number 23)
1004 ;(define-tc7-macro-assembler br-if-stringbuf 39)
1005 (define-tc7-macro-assembler br-if-bytevector 77)
1006 ;(define-tc7-macro-assembler br-if-pointer 31)
1007 ;(define-tc7-macro-assembler br-if-hashtable 29)
1008 ;(define-tc7-macro-assembler br-if-fluid 37)
1009 ;(define-tc7-macro-assembler br-if-dynamic-state 45)
1010 ;(define-tc7-macro-assembler br-if-frame 47)
1011 (define-tc7-macro-assembler br-if-keyword 53)
1012 ;(define-tc7-macro-assembler br-if-vm 55)
1013 ;(define-tc7-macro-assembler br-if-vm-cont 71)
1014 ;(define-tc7-macro-assembler br-if-rtl-program 69)
1015 ;(define-tc7-macro-assembler br-if-weak-set 85)
1016 ;(define-tc7-macro-assembler br-if-weak-table 87)
1017 ;(define-tc7-macro-assembler br-if-array 93)
1018 (define-tc7-macro-assembler br-if-bitvector 95)
1019 ;(define-tc7-macro-assembler br-if-port 125)
1020 ;(define-tc7-macro-assembler br-if-smob 127)
1021
1022 (define-macro-assembler (begin-program asm label properties)
1023 (emit-label asm label)
1024 (let ((meta (make-meta label properties (asm-start asm))))
1025 (set-asm-meta! asm (cons meta (asm-meta asm)))))
1026
1027 (define-macro-assembler (end-program asm)
1028 (let ((meta (car (asm-meta asm))))
1029 (set-meta-high-pc! meta (asm-start asm))
1030 (set-meta-arities! meta (reverse (meta-arities meta)))))
1031
1032 (define-macro-assembler (begin-standard-arity asm req nlocals alternate)
1033 (emit-begin-opt-arity asm req '() #f nlocals alternate))
1034
1035 (define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
1036 (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
1037
1038 (define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
1039 allow-other-keys? nlocals alternate)
1040 (assert-match req ((? symbol?) ...) "list of symbols")
1041 (assert-match opt ((? symbol?) ...) "list of symbols")
1042 (assert-match rest (or #f (? symbol?)) "#f or symbol")
1043 (assert-match kw-indices (((? keyword?) . (? integer?)) ...)
1044 "alist of keyword -> integer")
1045 (assert-match allow-other-keys? (? boolean?) "boolean")
1046 (assert-match nlocals (? integer?) "integer")
1047 (assert-match alternate (or #f (? exact-integer?) (? symbol?)) "#f or symbol")
1048 (let* ((meta (car (asm-meta asm)))
1049 (arity (make-arity req opt rest kw-indices allow-other-keys?
1050 (asm-start asm) #f '()))
1051 ;; The procedure itself is in slot 0, in the standard calling
1052 ;; convention. For procedure prologues, nreq includes the
1053 ;; procedure, so here we add 1.
1054 (nreq (1+ (length req)))
1055 (nopt (length opt))
1056 (rest? (->bool rest)))
1057 (set-meta-arities! meta (cons arity (meta-arities meta)))
1058 (cond
1059 ((or allow-other-keys? (pair? kw-indices))
1060 (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
1061 nlocals alternate))
1062 ((or rest? (pair? opt))
1063 (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
1064 (else
1065 (emit-standard-prelude asm nreq nlocals alternate)))))
1066
1067 (define-macro-assembler (end-arity asm)
1068 (let ((arity (car (meta-arities (car (asm-meta asm))))))
1069 (set-arity-definitions! arity (reverse (arity-definitions arity)))
1070 (set-arity-high-pc! arity (asm-start asm))))
1071
1072 ;; As noted above, we reserve locals 253 through 255 for shuffling large
1073 ;; operands. However the calling convention has all arguments passed in
1074 ;; a contiguous block. This helper, called after the clause has been
1075 ;; chosen and the keyword/optional/rest arguments have been processed,
1076 ;; shuffles up arguments from slot 253 and higher into their final
1077 ;; allocations.
1078 ;;
1079 (define (shuffle-up-args asm nargs)
1080 (when (> nargs 253)
1081 (let ((slot (1- nargs)))
1082 (emit-mov asm (+ slot 3) slot)
1083 (shuffle-up-args asm (1- nargs)))))
1084
1085 (define-macro-assembler (standard-prelude asm nreq nlocals alternate)
1086 (cond
1087 (alternate
1088 (emit-br-if-nargs-ne asm nreq alternate)
1089 (emit-alloc-frame asm nlocals))
1090 ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
1091 (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
1092 (else
1093 (emit-assert-nargs-ee asm nreq)
1094 (emit-alloc-frame asm nlocals)))
1095 (shuffle-up-args asm nreq))
1096
1097 (define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
1098 (if alternate
1099 (emit-br-if-nargs-lt asm nreq alternate)
1100 (emit-assert-nargs-ge asm nreq))
1101 (cond
1102 (rest?
1103 (emit-bind-rest asm (+ nreq nopt)))
1104 (alternate
1105 (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
1106 (else
1107 (emit-assert-nargs-le asm (+ nreq nopt))))
1108 (emit-alloc-frame asm nlocals)
1109 (shuffle-up-args asm (+ nreq nopt (if rest? 1 0))))
1110
1111 (define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
1112 allow-other-keys? nlocals alternate)
1113 (if alternate
1114 (begin
1115 (emit-br-if-nargs-lt asm nreq alternate)
1116 (unless rest?
1117 (emit-br-if-npos-gt asm nreq (+ nreq nopt) alternate)))
1118 (emit-assert-nargs-ge asm nreq))
1119 (let ((ntotal (fold (lambda (kw ntotal)
1120 (match kw
1121 (((? keyword?) . idx)
1122 (max (1+ idx) ntotal))))
1123 (+ nreq nopt) kw-indices)))
1124 ;; FIXME: port 581f410f
1125 (emit-bind-kwargs asm nreq
1126 (pack-flags allow-other-keys? rest?)
1127 (+ nreq nopt)
1128 ntotal
1129 (intern-constant asm kw-indices))
1130 (emit-alloc-frame asm nlocals)
1131 (shuffle-up-args asm ntotal)))
1132
1133 (define-macro-assembler (label asm sym)
1134 (hashq-set! (asm-labels asm) sym (asm-start asm)))
1135
1136 (define-macro-assembler (source asm source)
1137 (set-asm-sources! asm (acons (asm-start asm) source (asm-sources asm))))
1138
1139 (define-macro-assembler (definition asm name slot)
1140 (let* ((arity (car (meta-arities (car (asm-meta asm)))))
1141 (def (vector name
1142 slot
1143 (* (- (asm-start asm) (arity-low-pc arity)) 4))))
1144 (set-arity-definitions! arity (cons def (arity-definitions arity)))))
1145
1146 (define-macro-assembler (cache-current-module! asm module scope)
1147 (let ((mod-label (intern-module-cache-cell asm scope)))
1148 (emit-static-set! asm module mod-label 0)))
1149
1150 (define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
1151 (let ((sym-label (intern-non-immediate asm sym))
1152 (mod-label (intern-module-cache-cell asm scope))
1153 (cell-label (intern-cache-cell asm scope sym)))
1154 (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
1155
1156 (define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
1157 (let* ((sym-label (intern-non-immediate asm sym))
1158 (key (cons public? module-name))
1159 (mod-name-label (intern-constant asm key))
1160 (cell-label (intern-cache-cell asm key sym)))
1161 (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
1162
1163 (define-macro-assembler (dead-slot-map asm proc-slot dead-slot-map)
1164 (unless (zero? dead-slot-map)
1165 (set-asm-dead-slot-maps! asm
1166 (cons
1167 (cons* (asm-start asm) proc-slot dead-slot-map)
1168 (asm-dead-slot-maps asm)))))
1169
1170 \f
1171
1172 ;;;
1173 ;;; Helper for linking objects.
1174 ;;;
1175
1176 (define (make-object asm name bv relocs labels . kwargs)
1177 "Make a linker object. This helper handles interning the name in the
1178 shstrtab, assigning the size, allocating a fresh index, and defining a
1179 corresponding linker symbol for the start of the section."
1180 (let ((name-idx (intern-section-name! asm (symbol->string name)))
1181 (index (asm-next-section-number asm)))
1182 (set-asm-next-section-number! asm (1+ index))
1183 (make-linker-object (apply make-elf-section
1184 #:index index
1185 #:name name-idx
1186 #:size (bytevector-length bv)
1187 kwargs)
1188 bv relocs
1189 (cons (make-linker-symbol name 0) labels))))
1190
1191
1192 \f
1193
1194 ;;;
1195 ;;; Linking the constant table. This code is somewhat intertwingled
1196 ;;; with the intern-constant code above, as that procedure also
1197 ;;; residualizes instructions to initialize constants at load time.
1198 ;;;
1199
1200 (define (write-immediate asm buf pos x)
1201 (let ((val (object-address x))
1202 (endianness (asm-endianness asm)))
1203 (case (asm-word-size asm)
1204 ((4) (bytevector-u32-set! buf pos val endianness))
1205 ((8) (bytevector-u64-set! buf pos val endianness))
1206 (else (error "bad word size" asm)))))
1207
1208 (define (emit-init-constants asm)
1209 "If there is writable data that needs initialization at runtime, emit
1210 a procedure to do that and return its label. Otherwise return
1211 @code{#f}."
1212 (let ((inits (asm-inits asm)))
1213 (and (not (null? inits))
1214 (let ((label (gensym "init-constants")))
1215 (emit-text asm
1216 `((begin-program ,label ())
1217 ,@(if (asm-to-file? asm)
1218 '((assert-nargs-ee/locals 1 1))
1219 '((assert-nargs-ee/locals 2 0)
1220 (mov 0 1)))
1221 ,@(reverse inits)
1222 (load-constant 1 ,*unspecified*)
1223 (return 1)
1224 (end-program)))
1225 label))))
1226
1227 (define (link-data asm data name)
1228 "Link the static data for a program into the @var{name} section (which
1229 should be .data or .rodata), and return the resulting linker object.
1230 @var{data} should be a vhash mapping objects to labels."
1231 (define (align address alignment)
1232 (+ address
1233 (modulo (- alignment (modulo address alignment)) alignment)))
1234
1235 (define tc7-vector 13)
1236 (define stringbuf-shared-flag #x100)
1237 (define stringbuf-wide-flag #x400)
1238 (define tc7-stringbuf 39)
1239 (define tc7-narrow-stringbuf
1240 (+ tc7-stringbuf stringbuf-shared-flag))
1241 (define tc7-wide-stringbuf
1242 (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
1243 (define tc7-ro-string (+ 21 #x200))
1244 (define tc7-program 69)
1245 (define tc7-bytevector 77)
1246 (define tc7-bitvector 95)
1247 (define tc7-array 93)
1248
1249 (let ((word-size (asm-word-size asm))
1250 (endianness (asm-endianness asm)))
1251 (define (byte-length x)
1252 (cond
1253 ((stringbuf? x)
1254 (let ((x (stringbuf-string x)))
1255 (+ (* 2 word-size)
1256 (case (string-bytes-per-char x)
1257 ((1) (1+ (string-length x)))
1258 ((4) (* (1+ (string-length x)) 4))
1259 (else (error "bad string bytes per char" x))))))
1260 ((static-procedure? x)
1261 (* 2 word-size))
1262 ((string? x)
1263 (* 4 word-size))
1264 ((pair? x)
1265 (* 2 word-size))
1266 ((simple-vector? x)
1267 (* (1+ (vector-length x)) word-size))
1268 ((simple-uniform-vector? x)
1269 (* 4 word-size))
1270 ((uniform-vector-backing-store? x)
1271 (bytevector-length (uniform-vector-backing-store-bytes x)))
1272 ((array? x)
1273 (* word-size (+ 3 (* 3 (array-rank x)))))
1274 (else
1275 word-size)))
1276
1277 (define (write-constant-reference buf pos x)
1278 ;; The asm-inits will fix up any reference to a non-immediate.
1279 (write-immediate asm buf pos (if (immediate? x) x #f)))
1280
1281 (define (write buf pos obj)
1282 (cond
1283 ((stringbuf? obj)
1284 (let* ((x (stringbuf-string obj))
1285 (len (string-length x))
1286 (tag (if (= (string-bytes-per-char x) 1)
1287 tc7-narrow-stringbuf
1288 tc7-wide-stringbuf)))
1289 (case word-size
1290 ((4)
1291 (bytevector-u32-set! buf pos tag endianness)
1292 (bytevector-u32-set! buf (+ pos 4) len endianness))
1293 ((8)
1294 (bytevector-u64-set! buf pos tag endianness)
1295 (bytevector-u64-set! buf (+ pos 8) len endianness))
1296 (else
1297 (error "bad word size" asm)))
1298 (let ((pos (+ pos (* word-size 2))))
1299 (case (string-bytes-per-char x)
1300 ((1)
1301 (let lp ((i 0))
1302 (if (< i len)
1303 (let ((u8 (char->integer (string-ref x i))))
1304 (bytevector-u8-set! buf (+ pos i) u8)
1305 (lp (1+ i)))
1306 (bytevector-u8-set! buf (+ pos i) 0))))
1307 ((4)
1308 (let lp ((i 0))
1309 (if (< i len)
1310 (let ((u32 (char->integer (string-ref x i))))
1311 (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
1312 (lp (1+ i)))
1313 (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
1314 (else (error "bad string bytes per char" x))))))
1315
1316 ((static-procedure? obj)
1317 (case word-size
1318 ((4)
1319 (bytevector-u32-set! buf pos tc7-program endianness)
1320 (bytevector-u32-set! buf (+ pos 4) 0 endianness))
1321 ((8)
1322 (bytevector-u64-set! buf pos tc7-program endianness)
1323 (bytevector-u64-set! buf (+ pos 8) 0 endianness))
1324 (else (error "bad word size"))))
1325
1326 ((cache-cell? obj)
1327 (write-immediate asm buf pos #f))
1328
1329 ((string? obj)
1330 (let ((tag (logior tc7-ro-string (ash (string-length obj) 8)))) ; FIXME: unused?
1331 (case word-size
1332 ((4)
1333 (bytevector-u32-set! buf pos tc7-ro-string endianness)
1334 (write-immediate asm buf (+ pos 4) #f) ; stringbuf
1335 (bytevector-u32-set! buf (+ pos 8) 0 endianness)
1336 (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
1337 ((8)
1338 (bytevector-u64-set! buf pos tc7-ro-string endianness)
1339 (write-immediate asm buf (+ pos 8) #f) ; stringbuf
1340 (bytevector-u64-set! buf (+ pos 16) 0 endianness)
1341 (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
1342 (else (error "bad word size")))))
1343
1344 ((pair? obj)
1345 (write-constant-reference buf pos (car obj))
1346 (write-constant-reference buf (+ pos word-size) (cdr obj)))
1347
1348 ((simple-vector? obj)
1349 (let* ((len (vector-length obj))
1350 (tag (logior tc7-vector (ash len 8))))
1351 (case word-size
1352 ((4) (bytevector-u32-set! buf pos tag endianness))
1353 ((8) (bytevector-u64-set! buf pos tag endianness))
1354 (else (error "bad word size")))
1355 (let lp ((i 0))
1356 (when (< i (vector-length obj))
1357 (let ((pos (+ pos word-size (* i word-size)))
1358 (elt (vector-ref obj i)))
1359 (write-constant-reference buf pos elt)
1360 (lp (1+ i)))))))
1361
1362 ((and (symbol? obj) (symbol-interned? obj))
1363 (write-immediate asm buf pos #f))
1364
1365 ((keyword? obj)
1366 (write-immediate asm buf pos #f))
1367
1368 ((number? obj)
1369 (write-immediate asm buf pos #f))
1370
1371 ((simple-uniform-vector? obj)
1372 (let ((tag (if (bitvector? obj)
1373 tc7-bitvector
1374 (let ((type-code (array-type-code obj)))
1375 (logior tc7-bytevector (ash type-code 7))))))
1376 (case word-size
1377 ((4)
1378 (bytevector-u32-set! buf pos tag endianness)
1379 (bytevector-u32-set! buf (+ pos 4)
1380 (if (bitvector? obj)
1381 (bitvector-length obj)
1382 (bytevector-length obj))
1383 endianness) ; length
1384 (bytevector-u32-set! buf (+ pos 8) 0 endianness) ; pointer
1385 (write-immediate asm buf (+ pos 12) #f)) ; owner
1386 ((8)
1387 (bytevector-u64-set! buf pos tag endianness)
1388 (bytevector-u64-set! buf (+ pos 8)
1389 (if (bitvector? obj)
1390 (bitvector-length obj)
1391 (bytevector-length obj))
1392 endianness) ; length
1393 (bytevector-u64-set! buf (+ pos 16) 0 endianness) ; pointer
1394 (write-immediate asm buf (+ pos 24) #f)) ; owner
1395 (else (error "bad word size")))))
1396
1397 ((uniform-vector-backing-store? obj)
1398 (let ((bv (uniform-vector-backing-store-bytes obj)))
1399 (bytevector-copy! bv 0 buf pos (bytevector-length bv))
1400 (unless (or (= 1 (uniform-vector-backing-store-element-size obj))
1401 (eq? endianness (native-endianness)))
1402 ;; Need to swap units of element-size bytes
1403 (error "FIXME: Implement byte order swap"))))
1404
1405 ((array? obj)
1406 (let-values
1407 ;; array tag + rank + contp flag: see libguile/arrays.h .
1408 (((tag) (logior tc7-array (ash (array-rank obj) 17) (ash 1 16)))
1409 ((bv-set! bvs-set!)
1410 (case word-size
1411 ((4) (values bytevector-u32-set! bytevector-s32-set!))
1412 ((8) (values bytevector-u64-set! bytevector-s64-set!))
1413 (else (error "bad word size")))))
1414 (bv-set! buf pos tag endianness)
1415 (write-immediate asm buf (+ pos word-size) #f) ; root vector (fixed later)
1416 (bv-set! buf (+ pos (* word-size 2)) 0 endianness) ; base
1417 (let lp ((pos (+ pos (* word-size 3)))
1418 (bounds (array-shape obj))
1419 (incs (shared-array-increments obj)))
1420 (when (pair? bounds)
1421 (bvs-set! buf pos (first (first bounds)) endianness)
1422 (bvs-set! buf (+ pos word-size) (second (first bounds)) endianness)
1423 (bvs-set! buf (+ pos (* word-size 2)) (first incs) endianness)
1424 (lp (+ pos (* 3 word-size)) (cdr bounds) (cdr incs))))))
1425
1426 (else
1427 (if (asm-to-file? asm)
1428 (error "unrecognized object" obj)
1429 (write-constant-reference buf pos obj)))))
1430
1431 (cond
1432 ((vlist-null? data) #f)
1433 (else
1434 (let* ((byte-len (vhash-fold (lambda (k v len)
1435 (+ (byte-length k) (align len 8)))
1436 0 data))
1437 (buf (make-bytevector byte-len 0)))
1438 (let lp ((i 0) (pos 0) (symbols '()))
1439 (if (< i (vlist-length data))
1440 (let* ((pair (vlist-ref data i))
1441 (obj (car pair))
1442 (obj-label (cdr pair)))
1443 (write buf pos obj)
1444 (lp (1+ i)
1445 (align (+ (byte-length obj) pos) 8)
1446 (cons (make-linker-symbol obj-label pos) symbols)))
1447 (make-object asm name buf '() symbols
1448 #:flags (match name
1449 ('.data (logior SHF_ALLOC SHF_WRITE))
1450 ('.rodata SHF_ALLOC))))))))))
1451
1452 (define (link-constants asm)
1453 "Link sections to hold constants needed by the program text emitted
1454 using @var{asm}.
1455
1456 Returns three values: an object for the .rodata section, an object for
1457 the .data section, and a label for an initialization procedure. Any of
1458 these may be @code{#f}."
1459 (define (shareable? x)
1460 (cond
1461 ((stringbuf? x) #t)
1462 ((pair? x)
1463 (and (immediate? (car x)) (immediate? (cdr x))))
1464 ((simple-vector? x)
1465 (let lp ((i 0))
1466 (or (= i (vector-length x))
1467 (and (immediate? (vector-ref x i))
1468 (lp (1+ i))))))
1469 ((uniform-vector-backing-store? x) #t)
1470 (else #f)))
1471 (let* ((constants (asm-constants asm))
1472 (len (vlist-length constants)))
1473 (let lp ((i 0)
1474 (ro vlist-null)
1475 (rw vlist-null))
1476 (if (= i len)
1477 (values (link-data asm ro '.rodata)
1478 (link-data asm rw '.data)
1479 (emit-init-constants asm))
1480 (let ((pair (vlist-ref constants i)))
1481 (if (shareable? (car pair))
1482 (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
1483 (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
1484
1485 \f
1486
1487 ;;;
1488 ;;; Linking program text.
1489 ;;;
1490
1491 (define (process-relocs buf relocs labels)
1492 "Patch up internal x8-s24 relocations, and any s32 relocations that
1493 reference symbols in the text section. Return a list of linker
1494 relocations for references to symbols defined outside the text section."
1495 (fold
1496 (lambda (reloc tail)
1497 (match reloc
1498 ((type label base word)
1499 (let ((abs (hashq-ref labels label))
1500 (dst (+ base word)))
1501 (case type
1502 ((s32)
1503 (if abs
1504 (let ((rel (- abs base)))
1505 (s32-set! buf dst rel)
1506 tail)
1507 (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
1508 tail)))
1509 ((x8-s24)
1510 (unless abs
1511 (error "unbound near relocation" reloc))
1512 (let ((rel (- abs base))
1513 (u32 (u32-ref buf dst)))
1514 (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
1515 tail))
1516 (else (error "bad relocation kind" reloc)))))))
1517 '()
1518 relocs))
1519
1520 (define (process-labels labels)
1521 "Define linker symbols for the label-offset map in @var{labels}.
1522 The offsets are expected to be expressed in words."
1523 (hash-map->list (lambda (label loc)
1524 (make-linker-symbol label (* loc 4)))
1525 labels))
1526
1527 (define (swap-bytes! buf)
1528 "Patch up the text buffer @var{buf}, swapping the endianness of each
1529 32-bit unit."
1530 (unless (zero? (modulo (bytevector-length buf) 4))
1531 (error "unexpected length"))
1532 (let ((byte-len (bytevector-length buf)))
1533 (let lp ((pos 0))
1534 (unless (= pos byte-len)
1535 (bytevector-u32-set!
1536 buf pos
1537 (bytevector-u32-ref buf pos (endianness big))
1538 (endianness little))
1539 (lp (+ pos 4))))))
1540
1541 (define (link-text-object asm)
1542 "Link the .rtl-text section, swapping the endianness of the bytes if
1543 needed."
1544 (let ((buf (make-u32vector (asm-pos asm))))
1545 (let lp ((pos 0) (prev (reverse (asm-prev asm))))
1546 (if (null? prev)
1547 (let ((byte-size (* (asm-idx asm) 4)))
1548 (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
1549 (unless (eq? (asm-endianness asm) (native-endianness))
1550 (swap-bytes! buf))
1551 (make-object asm '.rtl-text
1552 buf
1553 (process-relocs buf (asm-relocs asm)
1554 (asm-labels asm))
1555 (process-labels (asm-labels asm))))
1556 (let ((len (* *block-size* 4)))
1557 (bytevector-copy! (car prev) 0 buf pos len)
1558 (lp (+ pos len) (cdr prev)))))))
1559
1560
1561 \f
1562
1563 ;;;
1564 ;;; Create the frame maps. These maps are used by GC to identify dead
1565 ;;; slots in pending call frames, to avoid marking them. We only do
1566 ;;; this when frame makes a non-tail call, as that is the common case.
1567 ;;; Only the topmost frame will see a GC at any other point, but we mark
1568 ;;; top frames conservatively as serializing live slot maps at every
1569 ;;; instruction would take up too much space in the object file.
1570 ;;;
1571
1572 ;; The .guile.frame-maps section starts with two packed u32 values: one
1573 ;; indicating the offset of the first byte of the .rtl-text section, and
1574 ;; another indicating the relative offset in bytes of the slots data.
1575 (define frame-maps-prefix-len 8)
1576
1577 ;; Each header is 8 bytes: 4 for the offset from .rtl_text, and 4 for
1578 ;; the offset of the slot map from the beginning of the
1579 ;; .guile.frame-maps section. The length of a frame map depends on the
1580 ;; frame size at the call site, and is not encoded into this section as
1581 ;; it is available at run-time.
1582 (define frame-map-header-len 8)
1583
1584 (define (link-frame-maps asm)
1585 (define (map-byte-length proc-slot)
1586 (ceiling-quotient (- proc-slot 2) 8))
1587 (define (make-frame-maps maps count map-len)
1588 (let* ((endianness (asm-endianness asm))
1589 (header-pos frame-maps-prefix-len)
1590 (map-pos (+ header-pos (* count frame-map-header-len)))
1591 (bv (make-bytevector (+ map-pos map-len) 0)))
1592 (bytevector-u32-set! bv 4 map-pos endianness)
1593 (let lp ((maps maps) (header-pos header-pos) (map-pos map-pos))
1594 (match maps
1595 (()
1596 (make-object asm '.guile.frame-maps bv
1597 (list (make-linker-reloc 'abs32/1 0 0 '.rtl-text))
1598 '() #:type SHT_PROGBITS #:flags SHF_ALLOC))
1599 (((pos proc-slot . map) . maps)
1600 (bytevector-u32-set! bv header-pos (* pos 4) endianness)
1601 (bytevector-u32-set! bv (+ header-pos 4) map-pos endianness)
1602 (let write-bytes ((map-pos map-pos)
1603 (map map)
1604 (byte-length (map-byte-length proc-slot)))
1605 (if (zero? byte-length)
1606 (lp maps (+ header-pos frame-map-header-len) map-pos)
1607 (begin
1608 (bytevector-u8-set! bv map-pos (logand map #xff))
1609 (write-bytes (1+ map-pos) (ash map -8)
1610 (1- byte-length))))))))))
1611 (match (asm-dead-slot-maps asm)
1612 (() #f)
1613 (in
1614 (let lp ((in in) (out '()) (count 0) (map-len 0))
1615 (match in
1616 (() (make-frame-maps out count map-len))
1617 (((and head (pos proc-slot . map)) . in)
1618 (lp in (cons head out)
1619 (1+ count)
1620 (+ (map-byte-length proc-slot) map-len))))))))
1621
1622 \f
1623
1624 ;;;
1625 ;;; Linking other sections of the ELF file, like the dynamic segment,
1626 ;;; the symbol table, etc.
1627 ;;;
1628
1629 ;; FIXME: Define these somewhere central, shared with C.
1630 (define *bytecode-major-version* #x0202)
1631 (define *bytecode-minor-version* 6)
1632
1633 (define (link-dynamic-section asm text rw rw-init frame-maps)
1634 "Link the dynamic section for an ELF image with bytecode @var{text},
1635 given the writable data section @var{rw} needing fixup from the
1636 procedure with label @var{rw-init}. @var{rw-init} may be false. If
1637 @var{rw} is true, it will be added to the GC roots at runtime."
1638 (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
1639 (let* ((endianness (asm-endianness asm))
1640 (words 6)
1641 (words (if rw (+ words 4) words))
1642 (words (if rw-init (+ words 2) words))
1643 (words (if frame-maps (+ words 2) words))
1644 (bv (make-bytevector (* word-size words) 0))
1645 (set-uword!
1646 (lambda (i uword)
1647 (%set-uword! bv (* i word-size) uword endianness)))
1648 (relocs '())
1649 (set-label!
1650 (lambda (i label)
1651 (set! relocs (cons (make-linker-reloc 'reloc-type
1652 (* i word-size) 0 label)
1653 relocs))
1654 (%set-uword! bv (* i word-size) 0 endianness))))
1655 (set-uword! 0 DT_GUILE_VM_VERSION)
1656 (set-uword! 1 (logior (ash *bytecode-major-version* 16)
1657 *bytecode-minor-version*))
1658 (set-uword! 2 DT_GUILE_ENTRY)
1659 (set-label! 3 '.rtl-text)
1660 (when rw
1661 ;; Add roots to GC.
1662 (set-uword! 4 DT_GUILE_GC_ROOT)
1663 (set-label! 5 '.data)
1664 (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
1665 (set-uword! 7 (bytevector-length (linker-object-bv rw)))
1666 (when rw-init
1667 (set-uword! 8 DT_INIT) ; constants
1668 (set-label! 9 rw-init)))
1669 (when frame-maps
1670 (set-uword! (- words 4) DT_GUILE_FRAME_MAPS)
1671 (set-label! (- words 3) '.guile.frame-maps))
1672 (set-uword! (- words 2) DT_NULL)
1673 (set-uword! (- words 1) 0)
1674 (make-object asm '.dynamic bv relocs '()
1675 #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
1676 (case (asm-word-size asm)
1677 ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
1678 ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
1679 (else (error "bad word size" asm))))
1680
1681 (define (link-shstrtab asm)
1682 "Link the string table for the section headers."
1683 (intern-section-name! asm ".shstrtab")
1684 (make-object asm '.shstrtab
1685 (link-string-table! (asm-shstrtab asm))
1686 '() '()
1687 #:type SHT_STRTAB #:flags 0))
1688
1689 (define (link-symtab text-section asm)
1690 (let* ((endianness (asm-endianness asm))
1691 (word-size (asm-word-size asm))
1692 (size (elf-symbol-len word-size))
1693 (meta (reverse (asm-meta asm)))
1694 (n (length meta))
1695 (strtab (make-string-table))
1696 (bv (make-bytevector (* n size) 0)))
1697 (define (intern-string! name)
1698 (string-table-intern! strtab (if name (symbol->string name) "")))
1699 (for-each
1700 (lambda (meta n)
1701 (let ((name (intern-string! (meta-name meta))))
1702 (write-elf-symbol bv (* n size) endianness word-size
1703 (make-elf-symbol
1704 #:name name
1705 ;; Symbol value and size are measured in
1706 ;; bytes, not u32s.
1707 #:value (* 4 (meta-low-pc meta))
1708 #:size (* 4 (- (meta-high-pc meta)
1709 (meta-low-pc meta)))
1710 #:type STT_FUNC
1711 #:visibility STV_HIDDEN
1712 #:shndx (elf-section-index text-section)))))
1713 meta (iota n))
1714 (let ((strtab (make-object asm '.strtab
1715 (link-string-table! strtab)
1716 '() '()
1717 #:type SHT_STRTAB #:flags 0)))
1718 (values (make-object asm '.symtab
1719 bv
1720 '() '()
1721 #:type SHT_SYMTAB #:flags 0 #:entsize size
1722 #:link (elf-section-index
1723 (linker-object-section strtab)))
1724 strtab))))
1725
1726 ;;; The .guile.arities section describes the arities that a function can
1727 ;;; have. It is in two parts: a sorted array of headers describing
1728 ;;; basic arities, and an array of links out to a string table (and in
1729 ;;; the case of keyword arguments, to the data section) for argument
1730 ;;; names. The whole thing is prefixed by a uint32 indicating the
1731 ;;; offset of the end of the headers array.
1732 ;;;
1733 ;;; The arity headers array is a packed array of structures of the form:
1734 ;;;
1735 ;;; struct arity_header {
1736 ;;; uint32_t low_pc;
1737 ;;; uint32_t high_pc;
1738 ;;; uint32_t offset;
1739 ;;; uint32_t flags;
1740 ;;; uint32_t nreq;
1741 ;;; uint32_t nopt;
1742 ;;; uint32_t nlocals;
1743 ;;; }
1744 ;;;
1745 ;;; All of the offsets and addresses are 32 bits. We can expand in the
1746 ;;; future to use 64-bit offsets if appropriate, but there are other
1747 ;;; aspects of bytecode that constrain us to a total image that fits in
1748 ;;; 32 bits, so for the moment we'll simplify the problem space.
1749 ;;;
1750 ;;; The following flags values are defined:
1751 ;;;
1752 ;;; #x1: has-rest?
1753 ;;; #x2: allow-other-keys?
1754 ;;; #x4: has-keyword-args?
1755 ;;; #x8: is-case-lambda?
1756 ;;; #x10: is-in-case-lambda?
1757 ;;;
1758 ;;; Functions with a single arity specify their number of required and
1759 ;;; optional arguments in nreq and nopt, and do not have the
1760 ;;; is-case-lambda? flag set. Their "offset" member links to an array
1761 ;;; of pointers into the associated .guile.arities.strtab string table,
1762 ;;; identifying the argument names. This offset is relative to the
1763 ;;; start of the .guile.arities section.
1764 ;;;
1765 ;;; If the arity has keyword arguments -- if has-keyword-args? is set in
1766 ;;; the flags -- the first uint32 pointed to by offset encodes a link to
1767 ;;; the "keyword indices" literal, in the data section. Then follow the
1768 ;;; names for all locals, in order, as uleb128 values. The required
1769 ;;; arguments will be the first locals, followed by the optionals,
1770 ;;; followed by the rest argument if if has-rest? is set. The names
1771 ;;; point into the associated string table section.
1772 ;;;
1773 ;;; Functions with no arities have no arities information present in the
1774 ;;; .guile.arities section.
1775 ;;;
1776 ;;; Functions with multiple arities are preceded by a header with
1777 ;;; is-case-lambda? set. All other fields are 0, except low-pc and
1778 ;;; high-pc which should be the bounds of the whole function. Headers
1779 ;;; for the individual arities follow, with the is-in-case-lambda? flag
1780 ;;; set. In this way the whole headers array is sorted in increasing
1781 ;;; low-pc order, and case-lambda clauses are contained within the
1782 ;;; [low-pc, high-pc] of the case-lambda header.
1783
1784 ;; Length of the prefix to the arities section, in bytes.
1785 (define arities-prefix-len 4)
1786
1787 ;; Length of an arity header, in bytes.
1788 (define arity-header-len (* 7 4))
1789
1790 ;; Some helpers.
1791 (define (put-uleb128 port val)
1792 (let lp ((val val))
1793 (let ((next (ash val -7)))
1794 (if (zero? next)
1795 (put-u8 port val)
1796 (begin
1797 (put-u8 port (logior #x80 (logand val #x7f)))
1798 (lp next))))))
1799
1800 (define (put-sleb128 port val)
1801 (let lp ((val val))
1802 (if (<= 0 (+ val 64) 127)
1803 (put-u8 port (logand val #x7f))
1804 (begin
1805 (put-u8 port (logior #x80 (logand val #x7f)))
1806 (lp (ash val -7))))))
1807
1808 (define (port-position port)
1809 (seek port 0 SEEK_CUR))
1810
1811 (define-inline (pack-arity-flags has-rest? allow-other-keys?
1812 has-keyword-args? is-case-lambda?
1813 is-in-case-lambda?)
1814 (logior (if has-rest? (ash 1 0) 0)
1815 (if allow-other-keys? (ash 1 1) 0)
1816 (if has-keyword-args? (ash 1 2) 0)
1817 (if is-case-lambda? (ash 1 3) 0)
1818 (if is-in-case-lambda? (ash 1 4) 0)))
1819
1820 (define (write-arities asm metas headers names-port strtab)
1821 (define (write-header pos low-pc high-pc offset flags nreq nopt nlocals)
1822 (unless (<= (+ nreq nopt) nlocals)
1823 (error "forgot to emit definition instructions?"))
1824 (bytevector-u32-set! headers pos (* low-pc 4) (asm-endianness asm))
1825 (bytevector-u32-set! headers (+ pos 4) (* high-pc 4) (asm-endianness asm))
1826 (bytevector-u32-set! headers (+ pos 8) offset (asm-endianness asm))
1827 (bytevector-u32-set! headers (+ pos 12) flags (asm-endianness asm))
1828 (bytevector-u32-set! headers (+ pos 16) nreq (asm-endianness asm))
1829 (bytevector-u32-set! headers (+ pos 20) nopt (asm-endianness asm))
1830 (bytevector-u32-set! headers (+ pos 24) nlocals (asm-endianness asm)))
1831 (define (write-kw-indices kw-indices relocs)
1832 ;; FIXME: Assert that kw-indices is already interned.
1833 (if (pair? kw-indices)
1834 (let ((pos (+ (bytevector-length headers)
1835 (port-position names-port)))
1836 (label (intern-constant asm kw-indices)))
1837 (put-bytevector names-port #vu8(0 0 0 0))
1838 (cons (make-linker-reloc 'abs32/1 pos 0 label) relocs))
1839 relocs))
1840 (define (write-arity pos arity in-case-lambda? relocs)
1841 (write-header pos (arity-low-pc arity)
1842 (arity-high-pc arity)
1843 ;; FIXME: Seems silly to add on bytevector-length of
1844 ;; headers, given the arities-prefix.
1845 (+ (bytevector-length headers) (port-position names-port))
1846 (pack-arity-flags (arity-rest arity)
1847 (arity-allow-other-keys? arity)
1848 (pair? (arity-kw-indices arity))
1849 #f
1850 in-case-lambda?)
1851 (length (arity-req arity))
1852 (length (arity-opt arity))
1853 (length (arity-definitions arity)))
1854 (let ((relocs (write-kw-indices (arity-kw-indices arity) relocs)))
1855 ;; Write local names.
1856 (let lp ((definitions (arity-definitions arity)))
1857 (match definitions
1858 (() relocs)
1859 ((#(name slot def) . definitions)
1860 (let ((sym (if (symbol? name)
1861 (string-table-intern! strtab (symbol->string name))
1862 0)))
1863 (put-uleb128 names-port sym)
1864 (lp definitions)))))
1865 ;; Now write their definitions.
1866 (let lp ((definitions (arity-definitions arity)))
1867 (match definitions
1868 (() relocs)
1869 ((#(name slot def) . definitions)
1870 (put-uleb128 names-port def)
1871 (put-uleb128 names-port slot)
1872 (lp definitions))))))
1873 (let lp ((metas metas) (pos arities-prefix-len) (relocs '()))
1874 (match metas
1875 (()
1876 (unless (= pos (bytevector-length headers))
1877 (error "expected to fully fill the bytevector"
1878 pos (bytevector-length headers)))
1879 relocs)
1880 ((meta . metas)
1881 (match (meta-arities meta)
1882 (() (lp metas pos relocs))
1883 ((arity)
1884 (lp metas
1885 (+ pos arity-header-len)
1886 (write-arity pos arity #f relocs)))
1887 (arities
1888 ;; Write a case-lambda header, then individual arities.
1889 ;; The case-lambda header's offset link is 0.
1890 (write-header pos (meta-low-pc meta) (meta-high-pc meta) 0
1891 (pack-arity-flags #f #f #f #t #f) 0 0 0)
1892 (let lp* ((arities arities) (pos (+ pos arity-header-len))
1893 (relocs relocs))
1894 (match arities
1895 (() (lp metas pos relocs))
1896 ((arity . arities)
1897 (lp* arities
1898 (+ pos arity-header-len)
1899 (write-arity pos arity #t relocs)))))))))))
1900
1901 (define (link-arities asm)
1902 (define (meta-arities-header-size meta)
1903 (define (lambda-size arity)
1904 arity-header-len)
1905 (define (case-lambda-size arities)
1906 (fold +
1907 arity-header-len ;; case-lambda header
1908 (map lambda-size arities))) ;; the cases
1909 (match (meta-arities meta)
1910 (() 0)
1911 ((arity) (lambda-size arity))
1912 (arities (case-lambda-size arities))))
1913
1914 (define (bytevector-append a b)
1915 (let ((out (make-bytevector (+ (bytevector-length a)
1916 (bytevector-length b)))))
1917 (bytevector-copy! a 0 out 0 (bytevector-length a))
1918 (bytevector-copy! b 0 out (bytevector-length a) (bytevector-length b))
1919 out))
1920
1921 (let* ((endianness (asm-endianness asm))
1922 (metas (reverse (asm-meta asm)))
1923 (header-size (fold (lambda (meta size)
1924 (+ size (meta-arities-header-size meta)))
1925 arities-prefix-len
1926 metas))
1927 (strtab (make-string-table))
1928 (headers (make-bytevector header-size 0)))
1929 (bytevector-u32-set! headers 0 (bytevector-length headers) endianness)
1930 (let-values (((names-port get-name-bv) (open-bytevector-output-port)))
1931 (let* ((relocs (write-arities asm metas headers names-port strtab))
1932 (strtab (make-object asm '.guile.arities.strtab
1933 (link-string-table! strtab)
1934 '() '()
1935 #:type SHT_STRTAB #:flags 0)))
1936 (values (make-object asm '.guile.arities
1937 (bytevector-append headers (get-name-bv))
1938 relocs '()
1939 #:type SHT_PROGBITS #:flags 0
1940 #:link (elf-section-index
1941 (linker-object-section strtab)))
1942 strtab)))))
1943
1944 ;;;
1945 ;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
1946 ;;; values. Pc and str are both 32 bits wide. (Either could change to
1947 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1948 ;;; entry to a program, relative to the start of the text section, in
1949 ;;; bytes, and str is an index into the associated .guile.docstrs.strtab
1950 ;;; string table section.
1951 ;;;
1952
1953 ;; The size of a docstrs entry, in bytes.
1954 (define docstr-size 8)
1955
1956 (define (link-docstrs asm)
1957 (define (find-docstrings)
1958 (filter-map (lambda (meta)
1959 (define (is-documentation? pair)
1960 (eq? (car pair) 'documentation))
1961 (let* ((props (meta-properties meta))
1962 (tail (find-tail is-documentation? props)))
1963 (and tail
1964 (not (find-tail is-documentation? (cdr tail)))
1965 (string? (cdar tail))
1966 (cons (* 4 (meta-low-pc meta)) (cdar tail)))))
1967 (reverse (asm-meta asm))))
1968 (let* ((endianness (asm-endianness asm))
1969 (docstrings (find-docstrings))
1970 (strtab (make-string-table))
1971 (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
1972 (fold (lambda (pair pos)
1973 (match pair
1974 ((pc . string)
1975 (bytevector-u32-set! bv pos pc endianness)
1976 (bytevector-u32-set! bv (+ pos 4)
1977 (string-table-intern! strtab string)
1978 endianness)
1979 (+ pos docstr-size))))
1980 0
1981 docstrings)
1982 (let ((strtab (make-object asm '.guile.docstrs.strtab
1983 (link-string-table! strtab)
1984 '() '()
1985 #:type SHT_STRTAB #:flags 0)))
1986 (values (make-object asm '.guile.docstrs
1987 bv
1988 '() '()
1989 #:type SHT_PROGBITS #:flags 0
1990 #:link (elf-section-index
1991 (linker-object-section strtab)))
1992 strtab))))
1993
1994 ;;;
1995 ;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
1996 ;;; values. Pc and addr are both 32 bits wide. (Either could change to
1997 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1998 ;;; entry to a program, relative to the start of the text section, and
1999 ;;; addr is the address of the associated properties alist, relative to
2000 ;;; the start of the ELF image.
2001 ;;;
2002 ;;; Since procedure properties are stored in the data sections, we need
2003 ;;; to link the procedures property section first. (Note that this
2004 ;;; constraint does not apply to the arities section, which may
2005 ;;; reference the data sections via the kw-indices literal, because
2006 ;;; assembling the text section already makes sure that the kw-indices
2007 ;;; are interned.)
2008 ;;;
2009
2010 ;; The size of a procprops entry, in bytes.
2011 (define procprops-size 8)
2012
2013 (define (link-procprops asm)
2014 (define (assoc-remove-one alist key value-pred)
2015 (match alist
2016 (() '())
2017 ((((? (lambda (x) (eq? x key))) . value) . alist)
2018 (if (value-pred value)
2019 alist
2020 (acons key value alist)))
2021 (((k . v) . alist)
2022 (acons k v (assoc-remove-one alist key value-pred)))))
2023 (define (props-without-name-or-docstring meta)
2024 (assoc-remove-one
2025 (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
2026 'documentation
2027 string?))
2028 (define (find-procprops)
2029 (filter-map (lambda (meta)
2030 (let ((props (props-without-name-or-docstring meta)))
2031 (and (pair? props)
2032 (cons (* 4 (meta-low-pc meta)) props))))
2033 (reverse (asm-meta asm))))
2034 (let* ((endianness (asm-endianness asm))
2035 (procprops (find-procprops))
2036 (bv (make-bytevector (* (length procprops) procprops-size) 0)))
2037 (let lp ((procprops procprops) (pos 0) (relocs '()))
2038 (match procprops
2039 (()
2040 (make-object asm '.guile.procprops
2041 bv
2042 relocs '()
2043 #:type SHT_PROGBITS #:flags 0))
2044 (((pc . props) . procprops)
2045 (bytevector-u32-set! bv pos pc endianness)
2046 (lp procprops
2047 (+ pos procprops-size)
2048 (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
2049 (intern-constant asm props))
2050 relocs)))))))
2051
2052 ;;;
2053 ;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc
2054 ;;; sections provide line number and local variable liveness
2055 ;;; information. Their format is defined by the DWARF
2056 ;;; specifications.
2057 ;;;
2058
2059 (define (asm-language asm)
2060 ;; FIXME: Plumb language through to the assembler.
2061 'scheme)
2062
2063 ;; -> 5 values: .debug_info, .debug_abbrev, .debug_str, .debug_loc, .debug_lines
2064 (define (link-debug asm)
2065 (define (put-s8 port val)
2066 (let ((bv (make-bytevector 1)))
2067 (bytevector-s8-set! bv 0 val)
2068 (put-bytevector port bv)))
2069
2070 (define (put-u16 port val)
2071 (let ((bv (make-bytevector 2)))
2072 (bytevector-u16-set! bv 0 val (asm-endianness asm))
2073 (put-bytevector port bv)))
2074
2075 (define (put-u32 port val)
2076 (let ((bv (make-bytevector 4)))
2077 (bytevector-u32-set! bv 0 val (asm-endianness asm))
2078 (put-bytevector port bv)))
2079
2080 (define (put-u64 port val)
2081 (let ((bv (make-bytevector 8)))
2082 (bytevector-u64-set! bv 0 val (asm-endianness asm))
2083 (put-bytevector port bv)))
2084
2085 (define (meta->subprogram-die meta)
2086 `(subprogram
2087 (@ ,@(cond
2088 ((meta-name meta)
2089 => (lambda (name) `((name ,(symbol->string name)))))
2090 (else
2091 '()))
2092 (low-pc ,(meta-label meta))
2093 (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta)))))))
2094
2095 (define (make-compile-unit-die asm)
2096 `(compile-unit
2097 (@ (producer ,(string-append "Guile " (version)))
2098 (language ,(asm-language asm))
2099 (low-pc .rtl-text)
2100 (high-pc ,(* 4 (asm-pos asm)))
2101 (stmt-list 0))
2102 ,@(map meta->subprogram-die (reverse (asm-meta asm)))))
2103
2104 (let-values (((die-port get-die-bv) (open-bytevector-output-port))
2105 ((die-relocs) '())
2106 ((abbrev-port get-abbrev-bv) (open-bytevector-output-port))
2107 ;; (tag has-kids? attrs forms) -> code
2108 ((abbrevs) vlist-null)
2109 ((strtab) (make-string-table))
2110 ((line-port get-line-bv) (open-bytevector-output-port))
2111 ((line-relocs) '())
2112 ;; file -> code
2113 ((files) vlist-null))
2114
2115 (define (write-abbrev code tag has-children? attrs forms)
2116 (put-uleb128 abbrev-port code)
2117 (put-uleb128 abbrev-port (tag-name->code tag))
2118 (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no)))
2119 (for-each (lambda (attr form)
2120 (put-uleb128 abbrev-port (attribute-name->code attr))
2121 (put-uleb128 abbrev-port (form-name->code form)))
2122 attrs forms)
2123 (put-uleb128 abbrev-port 0)
2124 (put-uleb128 abbrev-port 0))
2125
2126 (define (intern-abbrev tag has-children? attrs forms)
2127 (let ((key (list tag has-children? attrs forms)))
2128 (match (vhash-assoc key abbrevs)
2129 ((_ . code) code)
2130 (#f (let ((code (1+ (vlist-length abbrevs))))
2131 (set! abbrevs (vhash-cons key code abbrevs))
2132 (write-abbrev code tag has-children? attrs forms)
2133 code)))))
2134
2135 (define (intern-file file)
2136 (match (vhash-assoc file files)
2137 ((_ . code) code)
2138 (#f (let ((code (1+ (vlist-length files))))
2139 (set! files (vhash-cons file code files))
2140 code))))
2141
2142 (define (write-sources)
2143 ;; Choose line base and line range values that will allow for an
2144 ;; address advance range of 16 words. The special opcode range is
2145 ;; from 10 to 255, so 246 values.
2146 (define base -4)
2147 (define range 15)
2148
2149 (let lp ((sources (asm-sources asm)) (out '()))
2150 (match sources
2151 (((pc . s) . sources)
2152 (let ((file (assq-ref s 'filename))
2153 (line (assq-ref s 'line))
2154 (col (assq-ref s 'column)))
2155 (lp sources
2156 ;; Guile line and column numbers are 0-indexed, but
2157 ;; they are 1-indexed for DWARF.
2158 (if (and line col)
2159 (cons (list pc
2160 (if (string? file) (intern-file file) 0)
2161 (1+ line)
2162 (1+ col))
2163 out)
2164 out))))
2165 (()
2166 ;; Compilation unit header for .debug_line. We write in
2167 ;; DWARF 2 format because more tools understand it than DWARF
2168 ;; 4, which incompatibly adds another field to this header.
2169
2170 (put-u32 line-port 0) ; Length; will patch later.
2171 (put-u16 line-port 2) ; DWARF 2 format.
2172 (put-u32 line-port 0) ; Prologue length; will patch later.
2173 (put-u8 line-port 4) ; Minimum instruction length: 4 bytes.
2174 (put-u8 line-port 1) ; Default is-stmt: true.
2175
2176 (put-s8 line-port base) ; Line base. See the DWARF standard.
2177 (put-u8 line-port range) ; Line range. See the DWARF standard.
2178 (put-u8 line-port 10) ; Opcode base: the first "special" opcode.
2179
2180 ;; A table of the number of uleb128 arguments taken by each
2181 ;; of the standard opcodes.
2182 (put-u8 line-port 0) ; 1: copy
2183 (put-u8 line-port 1) ; 2: advance-pc
2184 (put-u8 line-port 1) ; 3: advance-line
2185 (put-u8 line-port 1) ; 4: set-file
2186 (put-u8 line-port 1) ; 5: set-column
2187 (put-u8 line-port 0) ; 6: negate-stmt
2188 (put-u8 line-port 0) ; 7: set-basic-block
2189 (put-u8 line-port 0) ; 8: const-add-pc
2190 (put-u8 line-port 1) ; 9: fixed-advance-pc
2191
2192 ;; Include directories, as a zero-terminated sequence of
2193 ;; nul-terminated strings. Nothing, for the moment.
2194 (put-u8 line-port 0)
2195
2196 ;; File table. For each file that contributes to this
2197 ;; compilation unit, a nul-terminated file name string, and a
2198 ;; uleb128 for each of directory the file was found in, the
2199 ;; modification time, and the file's size in bytes. We pass
2200 ;; zero for the latter three fields.
2201 (vlist-fold-right
2202 (lambda (pair seed)
2203 (match pair
2204 ((file . code)
2205 (put-bytevector line-port (string->utf8 file))
2206 (put-u8 line-port 0)
2207 (put-uleb128 line-port 0) ; directory
2208 (put-uleb128 line-port 0) ; mtime
2209 (put-uleb128 line-port 0))) ; size
2210 seed)
2211 #f
2212 files)
2213 (put-u8 line-port 0) ; 0 byte terminating file list.
2214
2215 ;; Patch prologue length.
2216 (let ((offset (port-position line-port)))
2217 (seek line-port 6 SEEK_SET)
2218 (put-u32 line-port (- offset 10))
2219 (seek line-port offset SEEK_SET))
2220
2221 ;; Now write the statement program.
2222 (let ()
2223 (define (extended-op opcode payload-len)
2224 (put-u8 line-port 0) ; extended op
2225 (put-uleb128 line-port (1+ payload-len)) ; payload-len + opcode
2226 (put-uleb128 line-port opcode))
2227 (define (set-address sym)
2228 (define (add-reloc! kind)
2229 (set! line-relocs
2230 (cons (make-linker-reloc kind
2231 (port-position line-port)
2232 0
2233 sym)
2234 line-relocs)))
2235 (match (asm-word-size asm)
2236 (4
2237 (extended-op 2 4)
2238 (add-reloc! 'abs32/1)
2239 (put-u32 line-port 0))
2240 (8
2241 (extended-op 2 8)
2242 (add-reloc! 'abs64/1)
2243 (put-u64 line-port 0))))
2244 (define (end-sequence pc)
2245 (let ((pc-inc (- (asm-pos asm) pc)))
2246 (put-u8 line-port 2) ; advance-pc
2247 (put-uleb128 line-port pc-inc))
2248 (extended-op 1 0))
2249 (define (advance-pc pc-inc line-inc)
2250 (let ((spec (+ (- line-inc base) (* pc-inc range) 10)))
2251 (cond
2252 ((or (< line-inc base) (>= line-inc (+ base range)))
2253 (advance-line line-inc)
2254 (advance-pc pc-inc 0))
2255 ((<= spec 255)
2256 (put-u8 line-port spec))
2257 ((< spec 500)
2258 (put-u8 line-port 8) ; const-advance-pc
2259 (advance-pc (- pc-inc (floor/ (- 255 10) range))
2260 line-inc))
2261 (else
2262 (put-u8 line-port 2) ; advance-pc
2263 (put-uleb128 line-port pc-inc)
2264 (advance-pc 0 line-inc)))))
2265 (define (advance-line inc)
2266 (put-u8 line-port 3)
2267 (put-sleb128 line-port inc))
2268 (define (set-file file)
2269 (put-u8 line-port 4)
2270 (put-uleb128 line-port file))
2271 (define (set-column col)
2272 (put-u8 line-port 5)
2273 (put-uleb128 line-port col))
2274
2275 (set-address '.rtl-text)
2276
2277 (let lp ((in out) (pc 0) (file 1) (line 1) (col 0))
2278 (match in
2279 (()
2280 (when (null? out)
2281 ;; There was no source info in the first place. Set
2282 ;; file register to 0 before adding final row.
2283 (set-file 0))
2284 (end-sequence pc))
2285 (((pc* file* line* col*) . in*)
2286 (cond
2287 ((and (eqv? file file*) (eqv? line line*) (eqv? col col*))
2288 (lp in* pc file line col))
2289 (else
2290 (unless (eqv? col col*)
2291 (set-column col*))
2292 (unless (eqv? file file*)
2293 (set-file file*))
2294 (advance-pc (- pc* pc) (- line* line))
2295 (lp in* pc* file* line* col*)))))))))))
2296
2297 (define (compute-code attr val)
2298 (match attr
2299 ('name (string-table-intern! strtab val))
2300 ('low-pc val)
2301 ('high-pc val)
2302 ('producer (string-table-intern! strtab val))
2303 ('language (language-name->code val))
2304 ('stmt-list val)))
2305
2306 (define (choose-form attr val code)
2307 (cond
2308 ((string? val) 'strp)
2309 ((eq? attr 'stmt-list) 'sec-offset)
2310 ((eq? attr 'low-pc) 'addr)
2311 ((exact-integer? code)
2312 (cond
2313 ((< code 0) 'sleb128)
2314 ((<= code #xff) 'data1)
2315 ((<= code #xffff) 'data2)
2316 ((<= code #xffffffff) 'data4)
2317 ((<= code #xffffffffffffffff) 'data8)
2318 (else 'uleb128)))
2319 (else (error "unhandled case" attr val code))))
2320
2321 (define (add-die-relocation! kind sym)
2322 (set! die-relocs
2323 (cons (make-linker-reloc kind (port-position die-port) 0 sym)
2324 die-relocs)))
2325
2326 (define (write-value code form)
2327 (match form
2328 ('data1 (put-u8 die-port code))
2329 ('data2 (put-u16 die-port code))
2330 ('data4 (put-u32 die-port code))
2331 ('data8 (put-u64 die-port code))
2332 ('uleb128 (put-uleb128 die-port code))
2333 ('sleb128 (put-sleb128 die-port code))
2334 ('addr
2335 (match (asm-word-size asm)
2336 (4
2337 (add-die-relocation! 'abs32/1 code)
2338 (put-u32 die-port 0))
2339 (8
2340 (add-die-relocation! 'abs64/1 code)
2341 (put-u64 die-port 0))))
2342 ('sec-offset (put-u32 die-port code))
2343 ('strp (put-u32 die-port code))))
2344
2345 (define (write-die die)
2346 (match die
2347 ((tag ('@ (attrs vals) ...) children ...)
2348 (let* ((codes (map compute-code attrs vals))
2349 (forms (map choose-form attrs vals codes))
2350 (has-children? (not (null? children)))
2351 (abbrev-code (intern-abbrev tag has-children? attrs forms)))
2352 (put-uleb128 die-port abbrev-code)
2353 (for-each write-value codes forms)
2354 (when has-children?
2355 (for-each write-die children)
2356 (put-uleb128 die-port 0))))))
2357
2358 ;; Compilation unit header.
2359 (put-u32 die-port 0) ; Length; will patch later.
2360 (put-u16 die-port 4) ; DWARF 4.
2361 (put-u32 die-port 0) ; Abbrevs offset.
2362 (put-u8 die-port (asm-word-size asm)) ; Address size.
2363
2364 (write-die (make-compile-unit-die asm))
2365
2366 ;; Terminate the abbrevs list.
2367 (put-uleb128 abbrev-port 0)
2368
2369 (write-sources)
2370
2371 (values (let ((bv (get-die-bv)))
2372 ;; Patch DWARF32 length.
2373 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
2374 (asm-endianness asm))
2375 (make-object asm '.debug_info bv die-relocs '()
2376 #:type SHT_PROGBITS #:flags 0))
2377 (make-object asm '.debug_abbrev (get-abbrev-bv) '() '()
2378 #:type SHT_PROGBITS #:flags 0)
2379 (make-object asm '.debug_str (link-string-table! strtab) '() '()
2380 #:type SHT_PROGBITS #:flags 0)
2381 (make-object asm '.debug_loc #vu8() '() '()
2382 #:type SHT_PROGBITS #:flags 0)
2383 (let ((bv (get-line-bv)))
2384 ;; Patch DWARF32 length.
2385 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
2386 (asm-endianness asm))
2387 (make-object asm '.debug_line bv line-relocs '()
2388 #:type SHT_PROGBITS #:flags 0)))))
2389
2390 (define (link-objects asm)
2391 (let*-values (;; Link procprops before constants, because it probably
2392 ;; interns more constants.
2393 ((procprops) (link-procprops asm))
2394 ((ro rw rw-init) (link-constants asm))
2395 ;; Link text object after constants, so that the
2396 ;; constants initializer gets included.
2397 ((text) (link-text-object asm))
2398 ((frame-maps) (link-frame-maps asm))
2399 ((dt) (link-dynamic-section asm text rw rw-init frame-maps))
2400 ((symtab strtab) (link-symtab (linker-object-section text) asm))
2401 ((arities arities-strtab) (link-arities asm))
2402 ((docstrs docstrs-strtab) (link-docstrs asm))
2403 ((dinfo dabbrev dstrtab dloc dline) (link-debug asm))
2404 ;; This needs to be linked last, because linking other
2405 ;; sections adds entries to the string table.
2406 ((shstrtab) (link-shstrtab asm)))
2407 (filter identity
2408 (list text ro frame-maps rw dt symtab strtab
2409 arities arities-strtab
2410 docstrs docstrs-strtab procprops
2411 dinfo dabbrev dstrtab dloc dline
2412 shstrtab))))
2413
2414
2415 \f
2416
2417 ;;;
2418 ;;; High-level public interfaces.
2419 ;;;
2420
2421 (define* (link-assembly asm #:key (page-aligned? #t))
2422 "Produce an ELF image from the code and data emitted into @var{asm}.
2423 The result is a bytevector, by default linked so that read-only and
2424 writable data are on separate pages. Pass @code{#:page-aligned? #f} to
2425 disable this behavior."
2426 (define (asm-constant-vector asm)
2427 (list->vector (reverse (map car (vlist->list (asm-constants asm))))))
2428 (let ((bv (link-elf (link-objects asm) #:page-aligned? page-aligned?)))
2429 (cons bv (if (asm-to-file? asm) #f (asm-constant-vector asm)))))