1 ;;; Guile bytecode assembler
3 ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013, 2014 Free Software Foundation, Inc.
5 ;;; This library is free software; you can redistribute it and/or
6 ;;; modify it under the terms of the GNU Lesser General Public
7 ;;; License as published by the Free Software Foundation; either
8 ;;; version 3 of the License, or (at your option) any later version.
10 ;;; This library is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;; Lesser General Public License for more details.
15 ;;; You should have received a copy of the GNU Lesser General Public
16 ;;; License along with this library; if not, write to the Free Software
17 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;;; This module implements an assembler that creates an ELF image from
22 ;;; bytecode assembly and macro-assembly. The input can be given in
23 ;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
24 ;;; procedural interface, the emit-OP procedures, but that is not
25 ;;; currently exported.
27 ;;; "Primitive instructions" correspond to VM operations. Assemblers
28 ;;; for primitive instructions are generated programmatically from
29 ;;; (instruction-list), which itself is derived from the VM sources.
30 ;;; There are also "macro-instructions" like "label" or "load-constant"
31 ;;; that expand to 0 or more primitive instructions.
33 ;;; The assembler also handles some higher-level tasks, like creating
34 ;;; the symbol table, other metadata sections, creating a constant table
35 ;;; for the whole compilation unit, and writing the dynamic section of
36 ;;; the ELF file along with the appropriate initialization routines.
38 ;;; Most compilers will want to use the trio of make-assembler,
39 ;;; emit-text, and link-assembly. That will result in the creation of
40 ;;; an ELF image as a bytevector, which can then be loaded using
41 ;;; load-thunk-from-memory, or written to disk as a .go file.
45 (define-module (system vm assembler)
46 #:use-module (system base target)
47 #:use-module (system vm dwarf)
48 #:use-module (system vm elf)
49 #:use-module (system vm linker)
50 #:use-module (language bytecode)
51 #:use-module (rnrs bytevectors)
52 #:use-module (ice-9 binary-ports)
53 #:use-module (ice-9 vlist)
54 #:use-module (ice-9 match)
55 #:use-module (srfi srfi-1)
56 #:use-module (srfi srfi-4)
57 #:use-module (srfi srfi-9)
58 #:use-module (srfi srfi-11)
59 #:export (make-assembler
66 ;;; Bytecode consists of 32-bit units, often subdivided in some way.
67 ;;; These helpers create one 32-bit unit from multiple components.
69 (define-inlinable (pack-u8-u24 x y)
71 (error "out of range" x))
74 (define-inlinable (pack-u8-s24 x y)
76 (error "out of range" x))
82 (else (error "out of range" y)))
85 (define-inlinable (pack-u1-u7-u24 x y z)
87 (error "out of range" x))
89 (error "out of range" y))
90 (logior x (ash y 1) (ash z 8)))
92 (define-inlinable (pack-u8-u12-u12 x y z)
94 (error "out of range" x))
96 (error "out of range" y))
97 (logior x (ash y 8) (ash z 20)))
99 (define-inlinable (pack-u8-u8-u16 x y z)
101 (error "out of range" x))
103 (error "out of range" y))
104 (logior x (ash y 8) (ash z 16)))
106 (define-inlinable (pack-u8-u8-u8-u8 x y z w)
108 (error "out of range" x))
110 (error "out of range" y))
112 (error "out of range" z))
113 (logior x (ash y 8) (ash z 16) (ash w 24)))
115 (define-syntax pack-flags
117 ;; Add clauses as needed.
118 ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
119 (if f2 (ash 2 0) 0)))))
121 ;;; Helpers to read and write 32-bit units in a buffer.
123 (define-syntax-rule (u32-ref buf n)
124 (bytevector-u32-native-ref buf (* n 4)))
126 (define-syntax-rule (u32-set! buf n val)
127 (bytevector-u32-native-set! buf (* n 4) val))
129 (define-syntax-rule (s32-ref buf n)
130 (bytevector-s32-native-ref buf (* n 4)))
132 (define-syntax-rule (s32-set! buf n val)
133 (bytevector-s32-native-set! buf (* n 4) val))
138 ;;; A <meta> entry collects metadata for one procedure. Procedures are
139 ;;; written as contiguous ranges of bytecode.
141 (define-syntax-rule (assert-match arg pattern kind)
143 (unless (match x (pattern #t) (_ #f))
144 (error (string-append "expected " kind) x))))
146 (define-record-type <meta>
147 (%make-meta label properties low-pc high-pc arities)
150 (properties meta-properties set-meta-properties!)
152 (high-pc meta-high-pc set-meta-high-pc!)
153 (arities meta-arities set-meta-arities!))
155 (define (make-meta label properties low-pc)
156 (assert-match label (? symbol?) "symbol")
157 (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
158 (%make-meta label properties low-pc #f '()))
160 (define (meta-name meta)
161 (assq-ref (meta-properties meta) 'name))
163 ;; Metadata for one <lambda-case>.
164 (define-record-type <arity>
165 (make-arity req opt rest kw-indices allow-other-keys?
171 (kw-indices arity-kw-indices)
172 (allow-other-keys? arity-allow-other-keys?)
173 (low-pc arity-low-pc)
174 (high-pc arity-high-pc set-arity-high-pc!))
176 (define-syntax *block-size* (identifier-syntax 32))
178 ;;; An assembler collects all of the words emitted during assembly, and
179 ;;; also maintains ancillary information such as the constant table, a
180 ;;; relocation list, and so on.
182 ;;; Bytecode consists of 32-bit units. We emit bytecode using native
183 ;;; endianness. If we're targeting a foreign endianness, we byte-swap
184 ;;; the bytevector as a whole instead of conditionalizing each access.
186 (define-record-type <asm>
187 (make-asm cur idx start prev written
191 shstrtab next-section-number
196 ;; We write bytecode into what is logically a growable vector,
197 ;; implemented as a list of blocks. asm-cur is the current block, and
198 ;; asm-idx is the current index into that block, in 32-bit units.
200 (cur asm-cur set-asm-cur!)
201 (idx asm-idx set-asm-idx!)
203 ;; asm-start is an absolute position, indicating the offset of the
204 ;; beginning of an instruction (in u32 units). It is updated after
205 ;; writing all the words for one primitive instruction. It models the
206 ;; position of the instruction pointer during execution, given that
207 ;; the VM updates the IP only at the end of executing the instruction,
208 ;; and is thus useful for computing offsets between two points in a
211 (start asm-start set-asm-start!)
213 ;; The list of previously written blocks.
215 (prev asm-prev set-asm-prev!)
217 ;; The number of u32 words written in asm-prev, which is the same as
218 ;; the offset of the current block.
220 (written asm-written set-asm-written!)
222 ;; An alist of symbol -> position pairs, indicating the labels defined
223 ;; in this compilation unit.
225 (labels asm-labels set-asm-labels!)
227 ;; A list of relocations needed by the program text. We use an
228 ;; internal representation for relocations, and handle textualn
229 ;; relative relocations in the assembler. Other kinds of relocations
230 ;; are later reified as linker relocations and resolved by the linker.
232 (relocs asm-relocs set-asm-relocs!)
234 ;; Target information.
236 (word-size asm-word-size)
237 (endianness asm-endianness)
239 ;; The constant table, as a vhash of object -> label. All constants
240 ;; get de-duplicated and written into separate sections -- either the
241 ;; .rodata section, for read-only data, or .data, for constants that
242 ;; need initialization at load-time (like symbols). Constants can
243 ;; depend on other constants (e.g. a symbol depending on a stringbuf),
244 ;; so order in this table is important.
246 (constants asm-constants set-asm-constants!)
248 ;; A list of instructions needed to initialize the constants. Will
249 ;; run in a thunk with 2 local variables.
251 (inits asm-inits set-asm-inits!)
253 ;; The shstrtab, for section names.
255 (shstrtab asm-shstrtab set-asm-shstrtab!)
257 ;; The section number for the next section to be written.
259 (next-section-number asm-next-section-number set-asm-next-section-number!)
261 ;; A list of <meta>, corresponding to procedure metadata.
263 (meta asm-meta set-asm-meta!)
265 ;; A list of (pos . source) pairs, indicating source information. POS
266 ;; is relative to the beginning of the text section, and SOURCE is in
267 ;; the same format that source-properties returns.
269 (sources asm-sources set-asm-sources!)
271 ;; A list of (pos . dead-slot-map) pairs, indicating dead slot maps.
272 ;; POS is relative to the beginning of the text section.
273 ;; DEAD-SLOT-MAP is a bitfield of slots that are dead at call sites,
276 (dead-slot-maps asm-dead-slot-maps set-asm-dead-slot-maps!))
278 (define-inlinable (fresh-block)
279 (make-u32vector *block-size*))
281 (define* (make-assembler #:key (word-size (target-word-size))
282 (endianness (target-endianness)))
283 "Create an assembler for a given target @var{word-size} and
284 @var{endianness}, falling back to appropriate values for the configured
286 (make-asm (fresh-block) 0 0 '() 0
287 (make-hash-table) '()
290 (make-string-table) 1
293 (define (intern-section-name! asm string)
294 "Add a string to the section name table (shstrtab)."
295 (string-table-intern! (asm-shstrtab asm) string))
297 (define-inlinable (asm-pos asm)
298 "The offset of the next word to be written into the code buffer, in
300 (+ (asm-idx asm) (asm-written asm)))
302 (define (allocate-new-block asm)
303 "Close off the current block, and arrange for the next word to be
304 written to a fresh block."
305 (let ((new (fresh-block)))
306 (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
307 (set-asm-written! asm (asm-pos asm))
308 (set-asm-cur! asm new)
309 (set-asm-idx! asm 0)))
311 (define-inlinable (emit asm u32)
312 "Emit one 32-bit word into the instruction stream. Assumes that there
313 is space for the word, and ensures that there is space for the next
315 (u32-set! (asm-cur asm) (asm-idx asm) u32)
316 (set-asm-idx! asm (1+ (asm-idx asm)))
317 (if (= (asm-idx asm) *block-size*)
318 (allocate-new-block asm)))
320 (define-inlinable (make-reloc type label base word)
321 "Make an internal relocation of type @var{type} referencing symbol
322 @var{label}, @var{word} words after position @var{start}. @var{type}
323 may be x8-s24, indicating a 24-bit relative label reference that can be
324 fixed up by the assembler, or s32, indicating a 32-bit relative
325 reference that needs to be fixed up by the linker."
326 (list type label base word))
328 (define-inlinable (reset-asm-start! asm)
329 "Reset the asm-start after writing the words for one instruction."
330 (set-asm-start! asm (asm-pos asm)))
332 (define (record-label-reference asm label)
333 "Record an x8-s24 local label reference. This value will get patched
334 up later by the assembler."
335 (let* ((start (asm-start asm))
337 (reloc (make-reloc 'x8-s24 label start (- pos start))))
338 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
340 (define* (record-far-label-reference asm label #:optional (offset 0))
341 "Record an s32 far label reference. This value will get patched up
342 later by the linker."
343 (let* ((start (- (asm-start asm) offset))
345 (reloc (make-reloc 's32 label start (- pos start))))
346 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
352 ;;; Primitive assemblers are defined by expanding `assembler' for each
353 ;;; opcode in `(instruction-list)'.
356 (eval-when (expand compile load eval)
357 (define (id-append ctx a b)
358 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
360 (define-syntax assembler
362 (define-syntax op-case
365 ((_ asm name ((type arg ...) code ...) clause ...)
366 #`(if (eq? name 'type)
367 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
370 (op-case asm name clause ...)))
372 #'(error "unmatched name" name)))))
374 (define (pack-first-word asm opcode type)
375 (with-syntax ((opcode opcode))
381 (emit asm (pack-u8-u24 opcode arg)))
383 (record-label-reference asm label)
386 (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
388 (emit asm (pack-u8-u12-u12 opcode a b)))
390 (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
392 (define (pack-tail-word asm type)
396 (emit asm (pack-u8-u24 a b)))
398 (record-label-reference asm label)
401 (emit asm (pack-u8-u8-u16 a b (object-address imm))))
403 (emit asm (pack-u8-u12-u12 a b c)))
404 ((U8_U8_U8_U8 a b c d)
405 (emit asm (pack-u8-u8-u8-u8 a b c d)))
409 (let ((val (object-address imm)))
410 (unless (zero? (ash val -32))
411 (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
414 (unless (= (asm-word-size asm) 8)
415 (error "make-long-immediate unavailable for this target"))
416 (emit asm (ash (object-address imm) -32))
417 (emit asm (logand (object-address imm) (1- (ash 1 32)))))
420 (record-far-label-reference asm label)
423 (record-far-label-reference asm label)
426 (record-far-label-reference asm label)
429 (record-far-label-reference asm label
430 (* offset (/ (asm-word-size asm) 4)))
433 (emit asm (pack-u8-u24 0 a)))
435 (emit asm (pack-u8-u12-u12 0 a b)))
437 (record-label-reference asm label)
440 (record-label-reference asm label)
441 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
442 ((B1_U7_L24 a b label)
443 (record-label-reference asm label)
444 (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
446 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
448 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
451 ((_ name opcode word0 word* ...)
452 (with-syntax ((((formal0 ...)
454 (pack-first-word #'asm
455 (syntax->datum #'opcode)
456 (syntax->datum #'word0)))
459 (map (lambda (word) (pack-tail-word #'asm word))
460 (syntax->datum #'(word* ...)))))
461 #'(lambda (asm formal0 ... formal* ... ...)
462 (unless (asm? asm) (error "not an asm"))
465 (reset-asm-start! asm)))))))
467 (define assemblers (make-hash-table))
469 (define-syntax define-assembler
472 ((_ name opcode kind arg ...)
473 (with-syntax ((emit (id-append #'name #'emit- #'name)))
476 (let ((emit (assembler name opcode arg ...)))
477 (hashq-set! assemblers 'name emit)
481 (define-syntax visit-opcodes
484 ((visit-opcodes macro arg ...)
485 (with-syntax (((inst ...)
486 (map (lambda (x) (datum->syntax #'macro x))
487 (instruction-list))))
489 (macro arg ... . inst)
492 (visit-opcodes define-assembler)
494 (define (emit-text asm instructions)
495 "Assemble @var{instructions} using the assembler @var{asm}.
496 @var{instructions} is a sequence of instructions, expressed as a list of
497 lists. This procedure can be called many times before calling
498 @code{link-assembly}."
499 (for-each (lambda (inst)
500 (apply (or (hashq-ref assemblers (car inst))
501 (error 'bad-instruction inst))
509 ;;; The constant table records a topologically sorted set of literal
510 ;;; constants used by a program. For example, a pair uses its car and
511 ;;; cdr, a string uses its stringbuf, etc.
513 ;;; Some things we want to add to the constant table are not actually
514 ;;; Scheme objects: for example, stringbufs, cache cells for toplevel
515 ;;; references, or cache cells for non-closure procedures. For these we
516 ;;; define special record types and add instances of those record types
520 (define-inlinable (immediate? x)
521 "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
522 (not (zero? (logand (object-address x) 6))))
524 (define-record-type <stringbuf>
525 (make-stringbuf string)
527 (string stringbuf-string))
529 (define-record-type <static-procedure>
530 (make-static-procedure code)
532 (code static-procedure-code))
534 (define-record-type <uniform-vector-backing-store>
535 (make-uniform-vector-backing-store bytes element-size)
536 uniform-vector-backing-store?
537 (bytes uniform-vector-backing-store-bytes)
538 (element-size uniform-vector-backing-store-element-size))
540 (define-record-type <cache-cell>
541 (make-cache-cell scope key)
543 (scope cache-cell-scope)
544 (key cache-cell-key))
546 (define (simple-vector? obj)
548 (equal? (array-shape obj) (list (list 0 (1- (vector-length obj)))))))
550 (define (simple-uniform-vector? obj)
552 (symbol? (array-type obj))
553 (equal? (array-shape obj) (list (list 0 (1- (array-length obj)))))))
555 (define (statically-allocatable? x)
556 "Return @code{#t} if a non-immediate constant can be allocated
557 statically, and @code{#f} if it would need some kind of runtime
559 (or (pair? x) (string? x) (stringbuf? x) (static-procedure? x) (array? x)))
561 (define (intern-constant asm obj)
562 "Add an object to the constant table, and return a label that can be
563 used to reference it. If the object is already present in the constant
564 table, its existing label is used directly."
566 (intern-constant asm obj))
567 (define (field dst n obj)
568 (let ((src (recur obj)))
570 (if (statically-allocatable? obj)
571 `((static-patch! ,dst ,n ,src))
572 `((static-ref 1 ,src)
573 (static-set! 1 ,dst ,n)))
575 (define (intern obj label)
578 (append (field label 0 (car obj))
579 (field label 1 (cdr obj))))
580 ((simple-vector? obj)
581 (let lp ((i 0) (inits '()))
582 (if (< i (vector-length obj))
584 (append-reverse (field label (1+ i) (vector-ref obj i))
587 ((stringbuf? obj) '())
588 ((static-procedure? obj)
589 `((static-patch! ,label 1 ,(static-procedure-code obj))))
590 ((cache-cell? obj) '())
592 `((make-non-immediate 1 ,(recur (symbol->string obj)))
594 (static-set! 1 ,label 0)))
596 `((static-patch! ,label 1 ,(recur (make-stringbuf obj)))))
598 `((static-ref 1 ,(recur (keyword->symbol obj)))
599 (symbol->keyword 1 1)
600 (static-set! 1 ,label 0)))
602 `((make-non-immediate 1 ,(recur (number->string obj)))
604 (static-set! 1 ,label 0)))
605 ((uniform-vector-backing-store? obj) '())
606 ((simple-uniform-vector? obj)
607 `((static-patch! ,label 2
608 ,(recur (make-uniform-vector-backing-store
609 (uniform-array->bytevector obj)
611 ;; Bitvectors are addressed in
614 (uniform-vector-element-size obj)))))))
616 (error "don't know how to intern" obj))))
618 ((immediate? obj) #f)
619 ((vhash-assoc obj (asm-constants asm)) => cdr)
621 ;; Note that calling intern may mutate asm-constants and
622 ;; asm-constant-inits.
623 (let* ((label (gensym "constant"))
624 (inits (intern obj label)))
625 (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
626 (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
629 (define (intern-non-immediate asm obj)
630 "Intern a non-immediate into the constant table, and return its
632 (when (immediate? obj)
633 (error "expected a non-immediate" obj))
634 (intern-constant asm obj))
636 (define (intern-cache-cell asm scope key)
637 "Intern a cache cell into the constant table, and return its label.
638 If there is already a cache cell with the given scope and key, it is
640 (intern-constant asm (make-cache-cell scope key)))
642 ;; Return the label of the cell that holds the module for a scope.
643 (define (intern-module-cache-cell asm scope)
644 "Intern a cache cell for a module, and return its label."
645 (intern-cache-cell asm scope #t))
651 ;;; Macro assemblers bridge the gap between primitive instructions and
652 ;;; some higher-level operations.
655 (define-syntax define-macro-assembler
658 ((_ (name arg ...) body body* ...)
659 (with-syntax ((emit (id-append #'name #'emit- #'name)))
662 (let ((emit (lambda (arg ...) body body* ...)))
663 (hashq-set! assemblers 'name emit)
667 (define-macro-assembler (load-constant asm dst obj)
670 (let ((bits (object-address obj)))
672 ((and (< dst 256) (zero? (ash bits -16)))
673 (emit-make-short-immediate asm dst obj))
674 ((zero? (ash bits -32))
675 (emit-make-long-immediate asm dst obj))
677 (emit-make-long-long-immediate asm dst obj)))))
678 ((statically-allocatable? obj)
679 (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
681 (emit-static-ref asm dst (intern-non-immediate asm obj)))))
683 (define-macro-assembler (load-static-procedure asm dst label)
684 (let ((loc (intern-constant asm (make-static-procedure label))))
685 (emit-make-non-immediate asm dst loc)))
687 (define-syntax-rule (define-tc7-macro-assembler name tc7)
688 (define-macro-assembler (name asm slot invert? label)
689 (emit-br-if-tc7 asm slot invert? tc7 label)))
691 ;; Keep in sync with tags.h. Part of Guile's ABI. Currently unused
692 ;; macro assemblers are commented out. See also
693 ;; *branching-primcall-arities* in (language cps primitives), the set of
694 ;; macro-instructions in assembly.scm, and
695 ;; disassembler.scm:code-annotation.
697 ;; FIXME: Define all tc7 values in Scheme in one place, derived from
699 (define-tc7-macro-assembler br-if-symbol 5)
700 (define-tc7-macro-assembler br-if-variable 7)
701 (define-tc7-macro-assembler br-if-vector 13)
702 ;(define-tc7-macro-assembler br-if-weak-vector 13)
703 (define-tc7-macro-assembler br-if-string 21)
704 ;(define-tc7-macro-assembler br-if-heap-number 23)
705 ;(define-tc7-macro-assembler br-if-stringbuf 39)
706 (define-tc7-macro-assembler br-if-bytevector 77)
707 ;(define-tc7-macro-assembler br-if-pointer 31)
708 ;(define-tc7-macro-assembler br-if-hashtable 29)
709 ;(define-tc7-macro-assembler br-if-fluid 37)
710 ;(define-tc7-macro-assembler br-if-dynamic-state 45)
711 ;(define-tc7-macro-assembler br-if-frame 47)
712 ;(define-tc7-macro-assembler br-if-vm 55)
713 ;(define-tc7-macro-assembler br-if-vm-cont 71)
714 ;(define-tc7-macro-assembler br-if-rtl-program 69)
715 ;(define-tc7-macro-assembler br-if-weak-set 85)
716 ;(define-tc7-macro-assembler br-if-weak-table 87)
717 ;(define-tc7-macro-assembler br-if-array 93)
718 (define-tc7-macro-assembler br-if-bitvector 95)
719 ;(define-tc7-macro-assembler br-if-port 125)
720 ;(define-tc7-macro-assembler br-if-smob 127)
722 (define-macro-assembler (begin-program asm label properties)
723 (emit-label asm label)
724 (let ((meta (make-meta label properties (asm-start asm))))
725 (set-asm-meta! asm (cons meta (asm-meta asm)))))
727 (define-macro-assembler (end-program asm)
728 (let ((meta (car (asm-meta asm))))
729 (set-meta-high-pc! meta (asm-start asm))
730 (set-meta-arities! meta (reverse (meta-arities meta)))))
732 (define-macro-assembler (begin-standard-arity asm req nlocals alternate)
733 (emit-begin-opt-arity asm req '() #f nlocals alternate))
735 (define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
736 (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
738 (define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
739 allow-other-keys? nlocals alternate)
740 (assert-match req ((? symbol?) ...) "list of symbols")
741 (assert-match opt ((? symbol?) ...) "list of symbols")
742 (assert-match rest (or #f (? symbol?)) "#f or symbol")
743 (assert-match kw-indices (((? keyword?) . (? integer?)) ...)
744 "alist of keyword -> integer")
745 (assert-match allow-other-keys? (? boolean?) "boolean")
746 (assert-match nlocals (? integer?) "integer")
747 (assert-match alternate (or #f (? symbol?)) "#f or symbol")
748 (let* ((meta (car (asm-meta asm)))
749 (arity (make-arity req opt rest kw-indices allow-other-keys?
751 ;; The procedure itself is in slot 0, in the standard calling
752 ;; convention. For procedure prologues, nreq includes the
753 ;; procedure, so here we add 1.
754 (nreq (1+ (length req)))
756 (rest? (->bool rest)))
757 (set-meta-arities! meta (cons arity (meta-arities meta)))
759 ((or allow-other-keys? (pair? kw-indices))
760 (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
762 ((or rest? (pair? opt))
763 (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
765 (emit-standard-prelude asm nreq nlocals alternate)))))
767 (define-macro-assembler (end-arity asm)
768 (let ((arity (car (meta-arities (car (asm-meta asm))))))
769 (set-arity-high-pc! arity (asm-start asm))))
771 (define-macro-assembler (standard-prelude asm nreq nlocals alternate)
774 (emit-br-if-nargs-ne asm nreq alternate)
775 (emit-alloc-frame asm nlocals))
776 ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
777 (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
779 (emit-assert-nargs-ee asm nreq)
780 (emit-alloc-frame asm nlocals))))
782 (define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
784 (emit-br-if-nargs-lt asm nreq alternate)
785 (emit-assert-nargs-ge asm nreq))
788 (emit-bind-rest asm (+ nreq nopt)))
790 (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
792 (emit-assert-nargs-le asm (+ nreq nopt))))
793 (emit-alloc-frame asm nlocals))
795 (define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
796 allow-other-keys? nlocals alternate)
799 (emit-br-if-nargs-lt asm nreq alternate)
801 (emit-br-if-npos-gt asm nreq (+ nreq nopt) alternate)))
802 (emit-assert-nargs-ge asm nreq))
803 (let ((ntotal (fold (lambda (kw ntotal)
805 (((? keyword?) . idx)
806 (max (1+ idx) ntotal))))
807 (+ nreq nopt) kw-indices)))
808 ;; FIXME: port 581f410f
809 (emit-bind-kwargs asm nreq
810 (pack-flags allow-other-keys? rest?)
813 (intern-constant asm kw-indices))
814 (emit-alloc-frame asm nlocals)))
816 (define-macro-assembler (label asm sym)
817 (hashq-set! (asm-labels asm) sym (asm-start asm)))
819 (define-macro-assembler (source asm source)
820 (set-asm-sources! asm (acons (asm-start asm) source (asm-sources asm))))
822 (define-macro-assembler (cache-current-module! asm module scope)
823 (let ((mod-label (intern-module-cache-cell asm scope)))
824 (emit-static-set! asm module mod-label 0)))
826 (define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
827 (let ((sym-label (intern-non-immediate asm sym))
828 (mod-label (intern-module-cache-cell asm scope))
829 (cell-label (intern-cache-cell asm scope sym)))
830 (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
832 (define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
833 (let* ((sym-label (intern-non-immediate asm sym))
834 (key (cons public? module-name))
835 (mod-name-label (intern-constant asm key))
836 (cell-label (intern-cache-cell asm key sym)))
837 (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
839 (define-macro-assembler (dead-slot-map asm proc-slot dead-slot-map)
840 (unless (zero? dead-slot-map)
841 (set-asm-dead-slot-maps! asm
843 (cons* (asm-start asm) proc-slot dead-slot-map)
844 (asm-dead-slot-maps asm)))))
849 ;;; Helper for linking objects.
852 (define (make-object asm name bv relocs labels . kwargs)
853 "Make a linker object. This helper handles interning the name in the
854 shstrtab, assigning the size, allocating a fresh index, and defining a
855 corresponding linker symbol for the start of the section."
856 (let ((name-idx (intern-section-name! asm (symbol->string name)))
857 (index (asm-next-section-number asm)))
858 (set-asm-next-section-number! asm (1+ index))
859 (make-linker-object (apply make-elf-section
862 #:size (bytevector-length bv)
865 (cons (make-linker-symbol name 0) labels))))
871 ;;; Linking the constant table. This code is somewhat intertwingled
872 ;;; with the intern-constant code above, as that procedure also
873 ;;; residualizes instructions to initialize constants at load time.
876 (define (write-immediate asm buf pos x)
877 (let ((val (object-address x))
878 (endianness (asm-endianness asm)))
879 (case (asm-word-size asm)
880 ((4) (bytevector-u32-set! buf pos val endianness))
881 ((8) (bytevector-u64-set! buf pos val endianness))
882 (else (error "bad word size" asm)))))
884 (define (emit-init-constants asm)
885 "If there is writable data that needs initialization at runtime, emit
886 a procedure to do that and return its label. Otherwise return
888 (let ((inits (asm-inits asm)))
889 (and (not (null? inits))
890 (let ((label (gensym "init-constants")))
892 `((begin-program ,label ())
893 (assert-nargs-ee/locals 1 1)
895 (load-constant 1 ,*unspecified*)
900 (define (link-data asm data name)
901 "Link the static data for a program into the @var{name} section (which
902 should be .data or .rodata), and return the resulting linker object.
903 @var{data} should be a vhash mapping objects to labels."
904 (define (align address alignment)
906 (modulo (- alignment (modulo address alignment)) alignment)))
908 (define tc7-vector 13)
909 (define stringbuf-shared-flag #x100)
910 (define stringbuf-wide-flag #x400)
911 (define tc7-stringbuf 39)
912 (define tc7-narrow-stringbuf
913 (+ tc7-stringbuf stringbuf-shared-flag))
914 (define tc7-wide-stringbuf
915 (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
916 (define tc7-ro-string (+ 21 #x200))
917 (define tc7-program 69)
918 (define tc7-bytevector 77)
919 (define tc7-bitvector 95)
921 (let ((word-size (asm-word-size asm))
922 (endianness (asm-endianness asm)))
923 (define (byte-length x)
926 (let ((x (stringbuf-string x)))
928 (case (string-bytes-per-char x)
929 ((1) (1+ (string-length x)))
930 ((4) (* (1+ (string-length x)) 4))
931 (else (error "bad string bytes per char" x))))))
932 ((static-procedure? x)
939 (* (1+ (vector-length x)) word-size))
940 ((simple-uniform-vector? x)
942 ((uniform-vector-backing-store? x)
943 (bytevector-length (uniform-vector-backing-store-bytes x)))
947 (define (write-constant-reference buf pos x)
948 ;; The asm-inits will fix up any reference to a non-immediate.
949 (write-immediate asm buf pos (if (immediate? x) x #f)))
951 (define (write buf pos obj)
954 (let* ((x (stringbuf-string obj))
955 (len (string-length x))
956 (tag (if (= (string-bytes-per-char x) 1)
958 tc7-wide-stringbuf)))
961 (bytevector-u32-set! buf pos tag endianness)
962 (bytevector-u32-set! buf (+ pos 4) len endianness))
964 (bytevector-u64-set! buf pos tag endianness)
965 (bytevector-u64-set! buf (+ pos 8) len endianness))
967 (error "bad word size" asm)))
968 (let ((pos (+ pos (* word-size 2))))
969 (case (string-bytes-per-char x)
973 (let ((u8 (char->integer (string-ref x i))))
974 (bytevector-u8-set! buf (+ pos i) u8)
976 (bytevector-u8-set! buf (+ pos i) 0))))
980 (let ((u32 (char->integer (string-ref x i))))
981 (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
983 (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
984 (else (error "bad string bytes per char" x))))))
986 ((static-procedure? obj)
989 (bytevector-u32-set! buf pos tc7-program endianness)
990 (bytevector-u32-set! buf (+ pos 4) 0 endianness))
992 (bytevector-u64-set! buf pos tc7-program endianness)
993 (bytevector-u64-set! buf (+ pos 8) 0 endianness))
994 (else (error "bad word size"))))
997 (write-immediate asm buf pos #f))
1000 (let ((tag (logior tc7-ro-string (ash (string-length obj) 8))))
1003 (bytevector-u32-set! buf pos tc7-ro-string endianness)
1004 (write-immediate asm buf (+ pos 4) #f) ; stringbuf
1005 (bytevector-u32-set! buf (+ pos 8) 0 endianness)
1006 (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
1008 (bytevector-u64-set! buf pos tc7-ro-string endianness)
1009 (write-immediate asm buf (+ pos 8) #f) ; stringbuf
1010 (bytevector-u64-set! buf (+ pos 16) 0 endianness)
1011 (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
1012 (else (error "bad word size")))))
1015 (write-constant-reference buf pos (car obj))
1016 (write-constant-reference buf (+ pos word-size) (cdr obj)))
1018 ((simple-vector? obj)
1019 (let* ((len (vector-length obj))
1020 (tag (logior tc7-vector (ash len 8))))
1022 ((4) (bytevector-u32-set! buf pos tag endianness))
1023 ((8) (bytevector-u64-set! buf pos tag endianness))
1024 (else (error "bad word size")))
1026 (when (< i (vector-length obj))
1027 (let ((pos (+ pos word-size (* i word-size)))
1028 (elt (vector-ref obj i)))
1029 (write-constant-reference buf pos elt)
1033 (write-immediate asm buf pos #f))
1036 (write-immediate asm buf pos #f))
1039 (write-immediate asm buf pos #f))
1041 ((simple-uniform-vector? obj)
1042 (let ((tag (if (bitvector? obj)
1044 (let ((type-code (uniform-vector-element-type-code obj)))
1045 (logior tc7-bytevector (ash type-code 7))))))
1048 (bytevector-u32-set! buf pos tag endianness)
1049 (bytevector-u32-set! buf (+ pos 4)
1050 (if (bitvector? obj)
1051 (bitvector-length obj)
1052 (bytevector-length obj))
1053 endianness) ; length
1054 (bytevector-u32-set! buf (+ pos 8) 0 endianness) ; pointer
1055 (write-immediate asm buf (+ pos 12) #f)) ; owner
1057 (bytevector-u64-set! buf pos tag endianness)
1058 (bytevector-u64-set! buf (+ pos 8)
1059 (if (bitvector? obj)
1060 (bitvector-length obj)
1061 (bytevector-length obj))
1062 endianness) ; length
1063 (bytevector-u64-set! buf (+ pos 16) 0 endianness) ; pointer
1064 (write-immediate asm buf (+ pos 24) #f)) ; owner
1065 (else (error "bad word size")))))
1067 ((uniform-vector-backing-store? obj)
1068 (let ((bv (uniform-vector-backing-store-bytes obj)))
1069 (bytevector-copy! bv 0 buf pos (bytevector-length bv))
1070 (unless (or (= 1 (uniform-vector-backing-store-element-size obj))
1071 (eq? endianness (native-endianness)))
1072 ;; Need to swap units of element-size bytes
1073 (error "FIXME: Implement byte order swap"))))
1076 (error "unrecognized object" obj))))
1079 ((vlist-null? data) #f)
1081 (let* ((byte-len (vhash-fold (lambda (k v len)
1082 (+ (byte-length k) (align len 8)))
1084 (buf (make-bytevector byte-len 0)))
1085 (let lp ((i 0) (pos 0) (symbols '()))
1086 (if (< i (vlist-length data))
1087 (let* ((pair (vlist-ref data i))
1089 (obj-label (cdr pair)))
1092 (align (+ (byte-length obj) pos) 8)
1093 (cons (make-linker-symbol obj-label pos) symbols)))
1094 (make-object asm name buf '() symbols
1096 ('.data (logior SHF_ALLOC SHF_WRITE))
1097 ('.rodata SHF_ALLOC))))))))))
1099 (define (link-constants asm)
1100 "Link sections to hold constants needed by the program text emitted
1103 Returns three values: an object for the .rodata section, an object for
1104 the .data section, and a label for an initialization procedure. Any of
1105 these may be @code{#f}."
1106 (define (shareable? x)
1110 (and (immediate? (car x)) (immediate? (cdr x))))
1113 (or (= i (vector-length x))
1114 (and (immediate? (vector-ref x i))
1116 ((uniform-vector-backing-store? x) #t)
1118 (let* ((constants (asm-constants asm))
1119 (len (vlist-length constants)))
1124 (values (link-data asm ro '.rodata)
1125 (link-data asm rw '.data)
1126 (emit-init-constants asm))
1127 (let ((pair (vlist-ref constants i)))
1128 (if (shareable? (car pair))
1129 (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
1130 (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
1135 ;;; Linking program text.
1138 (define (process-relocs buf relocs labels)
1139 "Patch up internal x8-s24 relocations, and any s32 relocations that
1140 reference symbols in the text section. Return a list of linker
1141 relocations for references to symbols defined outside the text section."
1143 (lambda (reloc tail)
1145 ((type label base word)
1146 (let ((abs (hashq-ref labels label))
1147 (dst (+ base word)))
1151 (let ((rel (- abs base)))
1152 (s32-set! buf dst rel)
1154 (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
1158 (error "unbound near relocation" reloc))
1159 (let ((rel (- abs base))
1160 (u32 (u32-ref buf dst)))
1161 (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
1163 (else (error "bad relocation kind" reloc)))))))
1167 (define (process-labels labels)
1168 "Define linker symbols for the label-offset map in @var{labels}.
1169 The offsets are expected to be expressed in words."
1170 (hash-map->list (lambda (label loc)
1171 (make-linker-symbol label (* loc 4)))
1174 (define (swap-bytes! buf)
1175 "Patch up the text buffer @var{buf}, swapping the endianness of each
1177 (unless (zero? (modulo (bytevector-length buf) 4))
1178 (error "unexpected length"))
1179 (let ((byte-len (bytevector-length buf)))
1181 (unless (= pos byte-len)
1182 (bytevector-u32-set!
1184 (bytevector-u32-ref buf pos (endianness big))
1185 (endianness little))
1188 (define (link-text-object asm)
1189 "Link the .rtl-text section, swapping the endianness of the bytes if
1191 (let ((buf (make-u32vector (asm-pos asm))))
1192 (let lp ((pos 0) (prev (reverse (asm-prev asm))))
1194 (let ((byte-size (* (asm-idx asm) 4)))
1195 (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
1196 (unless (eq? (asm-endianness asm) (native-endianness))
1198 (make-object asm '.rtl-text
1200 (process-relocs buf (asm-relocs asm)
1202 (process-labels (asm-labels asm))))
1203 (let ((len (* *block-size* 4)))
1204 (bytevector-copy! (car prev) 0 buf pos len)
1205 (lp (+ pos len) (cdr prev)))))))
1211 ;;; Create the frame maps. These maps are used by GC to identify dead
1212 ;;; slots in pending call frames, to avoid marking them. We only do
1213 ;;; this when frame makes a non-tail call, as that is the common case.
1214 ;;; Only the topmost frame will see a GC at any other point, but we mark
1215 ;;; top frames conservatively as serializing live slot maps at every
1216 ;;; instruction would take up too much space in the object file.
1219 ;; The .guile.frame-maps section starts with two packed u32 values: one
1220 ;; indicating the offset of the first byte of the .rtl-text section, and
1221 ;; another indicating the relative offset in bytes of the slots data.
1222 (define frame-maps-prefix-len 8)
1224 ;; Each header is 8 bytes: 4 for the offset from .rtl_text, and 4 for
1225 ;; the offset of the slot map from the beginning of the
1226 ;; .guile.frame-maps section. The length of a frame map depends on the
1227 ;; frame size at the call site, and is not encoded into this section as
1228 ;; it is available at run-time.
1229 (define frame-map-header-len 8)
1231 (define (link-frame-maps asm)
1232 (define (map-byte-length proc-slot)
1233 (ceiling-quotient (- proc-slot 2) 8))
1234 (define (make-frame-maps maps count map-len)
1235 (let* ((endianness (asm-endianness asm))
1236 (header-pos frame-maps-prefix-len)
1237 (map-pos (+ header-pos (* count frame-map-header-len)))
1238 (bv (make-bytevector (+ map-pos map-len) 0)))
1239 (bytevector-u32-set! bv 4 map-pos endianness)
1240 (let lp ((maps maps) (header-pos header-pos) (map-pos map-pos))
1243 (make-object asm '.guile.frame-maps bv
1244 (list (make-linker-reloc 'abs32/1 0 0 '.rtl-text))
1245 '() #:type SHT_PROGBITS #:flags SHF_ALLOC))
1246 (((pos proc-slot . map) . maps)
1247 (bytevector-u32-set! bv header-pos (* pos 4) endianness)
1248 (bytevector-u32-set! bv (+ header-pos 4) map-pos endianness)
1249 (let write-bytes ((map-pos map-pos)
1251 (byte-length (map-byte-length proc-slot)))
1252 (if (zero? byte-length)
1253 (lp maps (+ header-pos frame-map-header-len) map-pos)
1255 (bytevector-u8-set! bv map-pos (logand map #xff))
1256 (write-bytes (1+ map-pos) (ash map -8)
1257 (1- byte-length))))))))))
1258 (match (asm-dead-slot-maps asm)
1261 (let lp ((in in) (out '()) (count 0) (map-len 0))
1263 (() (make-frame-maps out count map-len))
1264 (((and head (pos proc-slot . map)) . in)
1265 (lp in (cons head out)
1267 (+ (map-byte-length proc-slot) map-len))))))))
1272 ;;; Linking other sections of the ELF file, like the dynamic segment,
1273 ;;; the symbol table, etc.
1276 ;; FIXME: Define these somewhere central, shared with C.
1277 (define *bytecode-major-version* #x0202)
1278 (define *bytecode-minor-version* 4)
1280 (define (link-dynamic-section asm text rw rw-init frame-maps)
1281 "Link the dynamic section for an ELF image with bytecode @var{text},
1282 given the writable data section @var{rw} needing fixup from the
1283 procedure with label @var{rw-init}. @var{rw-init} may be false. If
1284 @var{rw} is true, it will be added to the GC roots at runtime."
1285 (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
1286 (let* ((endianness (asm-endianness asm))
1288 (words (if rw (+ words 4) words))
1289 (words (if rw-init (+ words 2) words))
1290 (words (if frame-maps (+ words 2) words))
1291 (bv (make-bytevector (* word-size words) 0))
1294 (%set-uword! bv (* i word-size) uword endianness)))
1298 (set! relocs (cons (make-linker-reloc 'reloc-type
1299 (* i word-size) 0 label)
1301 (%set-uword! bv (* i word-size) 0 endianness))))
1302 (set-uword! 0 DT_GUILE_VM_VERSION)
1303 (set-uword! 1 (logior (ash *bytecode-major-version* 16)
1304 *bytecode-minor-version*))
1305 (set-uword! 2 DT_GUILE_ENTRY)
1306 (set-label! 3 '.rtl-text)
1309 (set-uword! 4 DT_GUILE_GC_ROOT)
1310 (set-label! 5 '.data)
1311 (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
1312 (set-uword! 7 (bytevector-length (linker-object-bv rw)))
1314 (set-uword! 8 DT_INIT) ; constants
1315 (set-label! 9 rw-init)))
1317 (set-uword! (- words 4) DT_GUILE_FRAME_MAPS)
1318 (set-label! (- words 3) '.guile.frame-maps))
1319 (set-uword! (- words 2) DT_NULL)
1320 (set-uword! (- words 1) 0)
1321 (make-object asm '.dynamic bv relocs '()
1322 #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
1323 (case (asm-word-size asm)
1324 ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
1325 ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
1326 (else (error "bad word size" asm))))
1328 (define (link-shstrtab asm)
1329 "Link the string table for the section headers."
1330 (intern-section-name! asm ".shstrtab")
1331 (make-object asm '.shstrtab
1332 (link-string-table! (asm-shstrtab asm))
1334 #:type SHT_STRTAB #:flags 0))
1336 (define (link-symtab text-section asm)
1337 (let* ((endianness (asm-endianness asm))
1338 (word-size (asm-word-size asm))
1339 (size (elf-symbol-len word-size))
1340 (meta (reverse (asm-meta asm)))
1342 (strtab (make-string-table))
1343 (bv (make-bytevector (* n size) 0)))
1344 (define (intern-string! name)
1345 (string-table-intern! strtab (if name (symbol->string name) "")))
1348 (let ((name (intern-string! (meta-name meta))))
1349 (write-elf-symbol bv (* n size) endianness word-size
1352 ;; Symbol value and size are measured in
1354 #:value (* 4 (meta-low-pc meta))
1355 #:size (* 4 (- (meta-high-pc meta)
1356 (meta-low-pc meta)))
1358 #:visibility STV_HIDDEN
1359 #:shndx (elf-section-index text-section)))))
1361 (let ((strtab (make-object asm '.strtab
1362 (link-string-table! strtab)
1364 #:type SHT_STRTAB #:flags 0)))
1365 (values (make-object asm '.symtab
1368 #:type SHT_SYMTAB #:flags 0 #:entsize size
1369 #:link (elf-section-index
1370 (linker-object-section strtab)))
1373 ;;; The .guile.arities section describes the arities that a function can
1374 ;;; have. It is in two parts: a sorted array of headers describing
1375 ;;; basic arities, and an array of links out to a string table (and in
1376 ;;; the case of keyword arguments, to the data section) for argument
1377 ;;; names. The whole thing is prefixed by a uint32 indicating the
1378 ;;; offset of the end of the headers array.
1380 ;;; The arity headers array is a packed array of structures of the form:
1382 ;;; struct arity_header {
1383 ;;; uint32_t low_pc;
1384 ;;; uint32_t high_pc;
1385 ;;; uint32_t offset;
1391 ;;; All of the offsets and addresses are 32 bits. We can expand in the
1392 ;;; future to use 64-bit offsets if appropriate, but there are other
1393 ;;; aspects of bytecode that constrain us to a total image that fits in
1394 ;;; 32 bits, so for the moment we'll simplify the problem space.
1396 ;;; The following flags values are defined:
1399 ;;; #x2: allow-other-keys?
1400 ;;; #x4: has-keyword-args?
1401 ;;; #x8: is-case-lambda?
1402 ;;; #x10: is-in-case-lambda?
1404 ;;; Functions with a single arity specify their number of required and
1405 ;;; optional arguments in nreq and nopt, and do not have the
1406 ;;; is-case-lambda? flag set. Their "offset" member links to an array
1407 ;;; of pointers into the associated .guile.arities.strtab string table,
1408 ;;; identifying the argument names. This offset is relative to the
1409 ;;; start of the .guile.arities section. Links for required arguments
1410 ;;; are first, in order, as uint32 values. Next follow the optionals,
1411 ;;; then the rest link if has-rest? is set, then a link to the "keyword
1412 ;;; indices" literal if has-keyword-args? is set. Unlike the other
1413 ;;; links, the kw-indices link points into the data section, and is
1414 ;;; relative to the ELF image as a whole.
1416 ;;; Functions with no arities have no arities information present in the
1417 ;;; .guile.arities section.
1419 ;;; Functions with multiple arities are preceded by a header with
1420 ;;; is-case-lambda? set. All other fields are 0, except low-pc and
1421 ;;; high-pc which should be the bounds of the whole function. Headers
1422 ;;; for the individual arities follow, with the is-in-case-lambda? flag
1423 ;;; set. In this way the whole headers array is sorted in increasing
1424 ;;; low-pc order, and case-lambda clauses are contained within the
1425 ;;; [low-pc, high-pc] of the case-lambda header.
1427 ;; Length of the prefix to the arities section, in bytes.
1428 (define arities-prefix-len 4)
1430 ;; Length of an arity header, in bytes.
1431 (define arity-header-len (* 6 4))
1433 ;; The offset of "offset" within arity header, in bytes.
1434 (define arity-header-offset-offset (* 2 4))
1436 (define-syntax-rule (pack-arity-flags has-rest? allow-other-keys?
1437 has-keyword-args? is-case-lambda?
1439 (logior (if has-rest? (ash 1 0) 0)
1440 (if allow-other-keys? (ash 1 1) 0)
1441 (if has-keyword-args? (ash 1 2) 0)
1442 (if is-case-lambda? (ash 1 3) 0)
1443 (if is-in-case-lambda? (ash 1 4) 0)))
1445 (define (meta-arities-size meta)
1446 (define (lambda-size arity)
1448 (* 4 ;; name pointers
1449 (+ (length (arity-req arity))
1450 (length (arity-opt arity))
1451 (if (arity-rest arity) 1 0)
1452 (if (pair? (arity-kw-indices arity)) 1 0)))))
1453 (define (case-lambda-size arities)
1455 arity-header-len ;; case-lambda header
1456 (map lambda-size arities))) ;; the cases
1457 (match (meta-arities meta)
1459 ((arity) (lambda-size arity))
1460 (arities (case-lambda-size arities))))
1462 (define (write-arity-headers metas bv endianness)
1463 (define (write-arity-header* pos low-pc high-pc flags nreq nopt)
1464 (bytevector-u32-set! bv pos (* low-pc 4) endianness)
1465 (bytevector-u32-set! bv (+ pos 4) (* high-pc 4) endianness)
1466 (bytevector-u32-set! bv (+ pos 8) 0 endianness) ; offset
1467 (bytevector-u32-set! bv (+ pos 12) flags endianness)
1468 (bytevector-u32-set! bv (+ pos 16) nreq endianness)
1469 (bytevector-u32-set! bv (+ pos 20) nopt endianness))
1470 (define (write-arity-header pos arity in-case-lambda?)
1471 (write-arity-header* pos (arity-low-pc arity)
1472 (arity-high-pc arity)
1473 (pack-arity-flags (arity-rest arity)
1474 (arity-allow-other-keys? arity)
1475 (pair? (arity-kw-indices arity))
1478 (length (arity-req arity))
1479 (length (arity-opt arity))))
1480 (let lp ((metas metas) (pos arities-prefix-len) (offsets '()))
1483 ;; Fill in the prefix.
1484 (bytevector-u32-set! bv 0 pos endianness)
1485 (values pos (reverse offsets)))
1487 (match (meta-arities meta)
1488 (() (lp metas pos offsets))
1490 (write-arity-header pos arity #f)
1492 (+ pos arity-header-len)
1493 (acons arity (+ pos arity-header-offset-offset) offsets)))
1495 ;; Write a case-lambda header, then individual arities.
1496 ;; The case-lambda header's offset link is 0.
1497 (write-arity-header* pos (meta-low-pc meta) (meta-high-pc meta)
1498 (pack-arity-flags #f #f #f #t #f) 0 0)
1499 (let lp* ((arities arities) (pos (+ pos arity-header-len))
1502 (() (lp metas pos offsets))
1504 (write-arity-header pos arity #t)
1506 (+ pos arity-header-len)
1508 (+ pos arity-header-offset-offset)
1511 (define (write-arity-links asm bv pos arity-offset-pairs strtab)
1512 (define (write-symbol sym pos)
1513 (bytevector-u32-set! bv pos
1514 (string-table-intern! strtab (symbol->string sym))
1515 (asm-endianness asm))
1517 (define (write-kw-indices pos kw-indices)
1518 ;; FIXME: Assert that kw-indices is already interned.
1519 (make-linker-reloc 'abs32/1 pos 0
1520 (intern-constant asm kw-indices)))
1521 (let lp ((pos pos) (pairs arity-offset-pairs) (relocs '()))
1524 (unless (= pos (bytevector-length bv))
1525 (error "expected to fully fill the bytevector"
1526 pos (bytevector-length bv)))
1528 (((arity . offset) . pairs)
1529 (bytevector-u32-set! bv offset pos (asm-endianness asm))
1530 (let ((pos (fold write-symbol
1532 (append (arity-req arity)
1535 ((arity-rest arity) => list)
1537 (match (arity-kw-indices arity)
1538 (() (lp pos pairs relocs))
1542 (cons (write-kw-indices pos kw-indices) relocs)))))))))
1544 (define (link-arities asm)
1545 (let* ((endianness (asm-endianness asm))
1546 (metas (reverse (asm-meta asm)))
1547 (size (fold (lambda (meta size)
1548 (+ size (meta-arities-size meta)))
1551 (strtab (make-string-table))
1552 (bv (make-bytevector size 0)))
1553 (let ((kw-indices-relocs
1556 (write-arity-headers metas bv endianness))
1557 (lambda (pos arity-offset-pairs)
1558 (write-arity-links asm bv pos arity-offset-pairs strtab)))))
1559 (let ((strtab (make-object asm '.guile.arities.strtab
1560 (link-string-table! strtab)
1562 #:type SHT_STRTAB #:flags 0)))
1563 (values (make-object asm '.guile.arities
1565 kw-indices-relocs '()
1566 #:type SHT_PROGBITS #:flags 0
1567 #:link (elf-section-index
1568 (linker-object-section strtab)))
1572 ;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
1573 ;;; values. Pc and str are both 32 bits wide. (Either could change to
1574 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1575 ;;; entry to a program, relative to the start of the text section, in
1576 ;;; bytes, and str is an index into the associated .guile.docstrs.strtab
1577 ;;; string table section.
1580 ;; The size of a docstrs entry, in bytes.
1581 (define docstr-size 8)
1583 (define (link-docstrs asm)
1584 (define (find-docstrings)
1585 (filter-map (lambda (meta)
1586 (define (is-documentation? pair)
1587 (eq? (car pair) 'documentation))
1588 (let* ((props (meta-properties meta))
1589 (tail (find-tail is-documentation? props)))
1591 (not (find-tail is-documentation? (cdr tail)))
1592 (string? (cdar tail))
1593 (cons (* 4 (meta-low-pc meta)) (cdar tail)))))
1594 (reverse (asm-meta asm))))
1595 (let* ((endianness (asm-endianness asm))
1596 (docstrings (find-docstrings))
1597 (strtab (make-string-table))
1598 (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
1599 (fold (lambda (pair pos)
1602 (bytevector-u32-set! bv pos pc endianness)
1603 (bytevector-u32-set! bv (+ pos 4)
1604 (string-table-intern! strtab string)
1606 (+ pos docstr-size))))
1609 (let ((strtab (make-object asm '.guile.docstrs.strtab
1610 (link-string-table! strtab)
1612 #:type SHT_STRTAB #:flags 0)))
1613 (values (make-object asm '.guile.docstrs
1616 #:type SHT_PROGBITS #:flags 0
1617 #:link (elf-section-index
1618 (linker-object-section strtab)))
1622 ;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
1623 ;;; values. Pc and addr are both 32 bits wide. (Either could change to
1624 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1625 ;;; entry to a program, relative to the start of the text section, and
1626 ;;; addr is the address of the associated properties alist, relative to
1627 ;;; the start of the ELF image.
1629 ;;; Since procedure properties are stored in the data sections, we need
1630 ;;; to link the procedures property section first. (Note that this
1631 ;;; constraint does not apply to the arities section, which may
1632 ;;; reference the data sections via the kw-indices literal, because
1633 ;;; assembling the text section already makes sure that the kw-indices
1637 ;; The size of a procprops entry, in bytes.
1638 (define procprops-size 8)
1640 (define (link-procprops asm)
1641 (define (assoc-remove-one alist key value-pred)
1644 ((((? (lambda (x) (eq? x key))) . value) . alist)
1645 (if (value-pred value)
1647 (acons key value alist)))
1649 (acons k v (assoc-remove-one alist key value-pred)))))
1650 (define (props-without-name-or-docstring meta)
1652 (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
1655 (define (find-procprops)
1656 (filter-map (lambda (meta)
1657 (let ((props (props-without-name-or-docstring meta)))
1659 (cons (* 4 (meta-low-pc meta)) props))))
1660 (reverse (asm-meta asm))))
1661 (let* ((endianness (asm-endianness asm))
1662 (procprops (find-procprops))
1663 (bv (make-bytevector (* (length procprops) procprops-size) 0)))
1664 (let lp ((procprops procprops) (pos 0) (relocs '()))
1667 (make-object asm '.guile.procprops
1670 #:type SHT_PROGBITS #:flags 0))
1671 (((pc . props) . procprops)
1672 (bytevector-u32-set! bv pos pc endianness)
1674 (+ pos procprops-size)
1675 (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
1676 (intern-constant asm props))
1680 ;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc
1681 ;;; sections provide line number and local variable liveness
1682 ;;; information. Their format is defined by the DWARF
1686 (define (asm-language asm)
1687 ;; FIXME: Plumb language through to the assembler.
1690 ;; -> 5 values: .debug_info, .debug_abbrev, .debug_str, .debug_loc, .debug_lines
1691 (define (link-debug asm)
1692 (define (put-s8 port val)
1693 (let ((bv (make-bytevector 1)))
1694 (bytevector-s8-set! bv 0 val)
1695 (put-bytevector port bv)))
1697 (define (put-u16 port val)
1698 (let ((bv (make-bytevector 2)))
1699 (bytevector-u16-set! bv 0 val (asm-endianness asm))
1700 (put-bytevector port bv)))
1702 (define (put-u32 port val)
1703 (let ((bv (make-bytevector 4)))
1704 (bytevector-u32-set! bv 0 val (asm-endianness asm))
1705 (put-bytevector port bv)))
1707 (define (put-u64 port val)
1708 (let ((bv (make-bytevector 8)))
1709 (bytevector-u64-set! bv 0 val (asm-endianness asm))
1710 (put-bytevector port bv)))
1712 (define (put-uleb128 port val)
1714 (let ((next (ash val -7)))
1718 (put-u8 port (logior #x80 (logand val #x7f)))
1721 (define (put-sleb128 port val)
1723 (if (<= 0 (+ val 64) 127)
1724 (put-u8 port (logand val #x7f))
1726 (put-u8 port (logior #x80 (logand val #x7f)))
1727 (lp (ash val -7))))))
1729 (define (port-position port)
1730 (seek port 0 SEEK_CUR))
1732 (define (meta->subprogram-die meta)
1736 => (lambda (name) `((name ,(symbol->string name)))))
1739 (low-pc ,(meta-label meta))
1740 (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta)))))))
1742 (define (make-compile-unit-die asm)
1744 (@ (producer ,(string-append "Guile " (version)))
1745 (language ,(asm-language asm))
1747 (high-pc ,(* 4 (asm-pos asm)))
1749 ,@(map meta->subprogram-die (reverse (asm-meta asm)))))
1751 (let-values (((die-port get-die-bv) (open-bytevector-output-port))
1753 ((abbrev-port get-abbrev-bv) (open-bytevector-output-port))
1754 ;; (tag has-kids? attrs forms) -> code
1755 ((abbrevs) vlist-null)
1756 ((strtab) (make-string-table))
1757 ((line-port get-line-bv) (open-bytevector-output-port))
1760 ((files) vlist-null))
1762 (define (write-abbrev code tag has-children? attrs forms)
1763 (put-uleb128 abbrev-port code)
1764 (put-uleb128 abbrev-port (tag-name->code tag))
1765 (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no)))
1766 (for-each (lambda (attr form)
1767 (put-uleb128 abbrev-port (attribute-name->code attr))
1768 (put-uleb128 abbrev-port (form-name->code form)))
1770 (put-uleb128 abbrev-port 0)
1771 (put-uleb128 abbrev-port 0))
1773 (define (intern-abbrev tag has-children? attrs forms)
1774 (let ((key (list tag has-children? attrs forms)))
1775 (match (vhash-assoc key abbrevs)
1777 (#f (let ((code (1+ (vlist-length abbrevs))))
1778 (set! abbrevs (vhash-cons key code abbrevs))
1779 (write-abbrev code tag has-children? attrs forms)
1782 (define (intern-file file)
1783 (match (vhash-assoc file files)
1785 (#f (let ((code (1+ (vlist-length files))))
1786 (set! files (vhash-cons file code files))
1789 (define (write-sources)
1790 ;; Choose line base and line range values that will allow for an
1791 ;; address advance range of 16 words. The special opcode range is
1792 ;; from 10 to 255, so 246 values.
1796 (let lp ((sources (asm-sources asm)) (out '()))
1798 (((pc . s) . sources)
1799 (let ((file (assq-ref s 'filename))
1800 (line (assq-ref s 'line))
1801 (col (assq-ref s 'column)))
1803 ;; Guile line and column numbers are 0-indexed, but
1804 ;; they are 1-indexed for DWARF.
1806 (if file (intern-file file) 0)
1811 ;; Compilation unit header for .debug_line. We write in
1812 ;; DWARF 2 format because more tools understand it than DWARF
1813 ;; 4, which incompatibly adds another field to this header.
1815 (put-u32 line-port 0) ; Length; will patch later.
1816 (put-u16 line-port 2) ; DWARF 2 format.
1817 (put-u32 line-port 0) ; Prologue length; will patch later.
1818 (put-u8 line-port 4) ; Minimum instruction length: 4 bytes.
1819 (put-u8 line-port 1) ; Default is-stmt: true.
1821 (put-s8 line-port base) ; Line base. See the DWARF standard.
1822 (put-u8 line-port range) ; Line range. See the DWARF standard.
1823 (put-u8 line-port 10) ; Opcode base: the first "special" opcode.
1825 ;; A table of the number of uleb128 arguments taken by each
1826 ;; of the standard opcodes.
1827 (put-u8 line-port 0) ; 1: copy
1828 (put-u8 line-port 1) ; 2: advance-pc
1829 (put-u8 line-port 1) ; 3: advance-line
1830 (put-u8 line-port 1) ; 4: set-file
1831 (put-u8 line-port 1) ; 5: set-column
1832 (put-u8 line-port 0) ; 6: negate-stmt
1833 (put-u8 line-port 0) ; 7: set-basic-block
1834 (put-u8 line-port 0) ; 8: const-add-pc
1835 (put-u8 line-port 1) ; 9: fixed-advance-pc
1837 ;; Include directories, as a zero-terminated sequence of
1838 ;; nul-terminated strings. Nothing, for the moment.
1839 (put-u8 line-port 0)
1841 ;; File table. For each file that contributes to this
1842 ;; compilation unit, a nul-terminated file name string, and a
1843 ;; uleb128 for each of directory the file was found in, the
1844 ;; modification time, and the file's size in bytes. We pass
1845 ;; zero for the latter three fields.
1850 (put-bytevector line-port (string->utf8 file))
1851 (put-u8 line-port 0)
1852 (put-uleb128 line-port 0) ; directory
1853 (put-uleb128 line-port 0) ; mtime
1854 (put-uleb128 line-port 0))) ; size
1858 (put-u8 line-port 0) ; 0 byte terminating file list.
1860 ;; Patch prologue length.
1861 (let ((offset (port-position line-port)))
1862 (seek line-port 6 SEEK_SET)
1863 (put-u32 line-port (- offset 10))
1864 (seek line-port offset SEEK_SET))
1866 ;; Now write the statement program.
1868 (define (extended-op opcode payload-len)
1869 (put-u8 line-port 0) ; extended op
1870 (put-uleb128 line-port (1+ payload-len)) ; payload-len + opcode
1871 (put-uleb128 line-port opcode))
1872 (define (set-address sym)
1873 (define (add-reloc! kind)
1875 (cons (make-linker-reloc kind
1876 (port-position line-port)
1880 (match (asm-word-size asm)
1883 (add-reloc! 'abs32/1)
1884 (put-u32 line-port 0))
1887 (add-reloc! 'abs64/1)
1888 (put-u64 line-port 0))))
1889 (define (end-sequence pc)
1890 (let ((pc-inc (- (asm-pos asm) pc)))
1891 (put-u8 line-port 2) ; advance-pc
1892 (put-uleb128 line-port pc-inc))
1894 (define (advance-pc pc-inc line-inc)
1895 (let ((spec (+ (- line-inc base) (* pc-inc range) 10)))
1897 ((or (< line-inc base) (>= line-inc (+ base range)))
1898 (advance-line line-inc)
1899 (advance-pc pc-inc 0))
1901 (put-u8 line-port spec))
1903 (put-u8 line-port 8) ; const-advance-pc
1904 (advance-pc (- pc-inc (floor/ (- 255 10) range))
1907 (put-u8 line-port 2) ; advance-pc
1908 (put-uleb128 line-port pc-inc)
1909 (advance-pc 0 line-inc)))))
1910 (define (advance-line inc)
1911 (put-u8 line-port 3)
1912 (put-sleb128 line-port inc))
1913 (define (set-file file)
1914 (put-u8 line-port 4)
1915 (put-uleb128 line-port file))
1916 (define (set-column col)
1917 (put-u8 line-port 5)
1918 (put-uleb128 line-port col))
1920 (set-address '.rtl-text)
1922 (let lp ((in out) (pc 0) (file 1) (line 1) (col 0))
1926 ;; There was no source info in the first place. Set
1927 ;; file register to 0 before adding final row.
1930 (((pc* file* line* col*) . in*)
1932 ((and (eqv? file file*) (eqv? line line*) (eqv? col col*))
1933 (lp in* pc file line col))
1935 (unless (eqv? col col*)
1937 (unless (eqv? file file*)
1939 (advance-pc (- pc* pc) (- line* line))
1940 (lp in* pc* file* line* col*)))))))))))
1942 (define (compute-code attr val)
1944 ('name (string-table-intern! strtab val))
1947 ('producer (string-table-intern! strtab val))
1948 ('language (language-name->code val))
1951 (define (exact-integer? val)
1952 (and (number? val) (integer? val) (exact? val)))
1954 (define (choose-form attr val code)
1956 ((string? val) 'strp)
1957 ((eq? attr 'stmt-list) 'sec-offset)
1958 ((exact-integer? code)
1960 ((< code 0) 'sleb128)
1961 ((<= code #xff) 'data1)
1962 ((<= code #xffff) 'data2)
1963 ((<= code #xffffffff) 'data4)
1964 ((<= code #xffffffffffffffff) 'data8)
1966 ((symbol? val) 'addr)
1967 (else (error "unhandled case" attr val code))))
1969 (define (add-die-relocation! kind sym)
1971 (cons (make-linker-reloc kind (port-position die-port) 0 sym)
1974 (define (write-value code form)
1976 ('data1 (put-u8 die-port code))
1977 ('data2 (put-u16 die-port code))
1978 ('data4 (put-u32 die-port code))
1979 ('data8 (put-u64 die-port code))
1980 ('uleb128 (put-uleb128 die-port code))
1981 ('sleb128 (put-sleb128 die-port code))
1983 (match (asm-word-size asm)
1985 (add-die-relocation! 'abs32/1 code)
1986 (put-u32 die-port 0))
1988 (add-die-relocation! 'abs64/1 code)
1989 (put-u64 die-port 0))))
1990 ('sec-offset (put-u32 die-port code))
1991 ('strp (put-u32 die-port code))))
1993 (define (write-die die)
1995 ((tag ('@ (attrs vals) ...) children ...)
1996 (let* ((codes (map compute-code attrs vals))
1997 (forms (map choose-form attrs vals codes))
1998 (has-children? (not (null? children)))
1999 (abbrev-code (intern-abbrev tag has-children? attrs forms)))
2000 (put-uleb128 die-port abbrev-code)
2001 (for-each write-value codes forms)
2003 (for-each write-die children)
2004 (put-uleb128 die-port 0))))))
2006 ;; Compilation unit header.
2007 (put-u32 die-port 0) ; Length; will patch later.
2008 (put-u16 die-port 4) ; DWARF 4.
2009 (put-u32 die-port 0) ; Abbrevs offset.
2010 (put-u8 die-port (asm-word-size asm)) ; Address size.
2012 (write-die (make-compile-unit-die asm))
2014 ;; Terminate the abbrevs list.
2015 (put-uleb128 abbrev-port 0)
2019 (values (let ((bv (get-die-bv)))
2020 ;; Patch DWARF32 length.
2021 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
2022 (asm-endianness asm))
2023 (make-object asm '.debug_info bv die-relocs '()
2024 #:type SHT_PROGBITS #:flags 0))
2025 (make-object asm '.debug_abbrev (get-abbrev-bv) '() '()
2026 #:type SHT_PROGBITS #:flags 0)
2027 (make-object asm '.debug_str (link-string-table! strtab) '() '()
2028 #:type SHT_PROGBITS #:flags 0)
2029 (make-object asm '.debug_loc #vu8() '() '()
2030 #:type SHT_PROGBITS #:flags 0)
2031 (let ((bv (get-line-bv)))
2032 ;; Patch DWARF32 length.
2033 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
2034 (asm-endianness asm))
2035 (make-object asm '.debug_line bv line-relocs '()
2036 #:type SHT_PROGBITS #:flags 0)))))
2038 (define (link-objects asm)
2039 (let*-values (;; Link procprops before constants, because it probably
2040 ;; interns more constants.
2041 ((procprops) (link-procprops asm))
2042 ((ro rw rw-init) (link-constants asm))
2043 ;; Link text object after constants, so that the
2044 ;; constants initializer gets included.
2045 ((text) (link-text-object asm))
2046 ((frame-maps) (link-frame-maps asm))
2047 ((dt) (link-dynamic-section asm text rw rw-init frame-maps))
2048 ((symtab strtab) (link-symtab (linker-object-section text) asm))
2049 ((arities arities-strtab) (link-arities asm))
2050 ((docstrs docstrs-strtab) (link-docstrs asm))
2051 ((dinfo dabbrev dstrtab dloc dline) (link-debug asm))
2052 ;; This needs to be linked last, because linking other
2053 ;; sections adds entries to the string table.
2054 ((shstrtab) (link-shstrtab asm)))
2056 (list text ro frame-maps rw dt symtab strtab
2057 arities arities-strtab
2058 docstrs docstrs-strtab procprops
2059 dinfo dabbrev dstrtab dloc dline
2066 ;;; High-level public interfaces.
2069 (define* (link-assembly asm #:key (page-aligned? #t))
2070 "Produce an ELF image from the code and data emitted into @var{asm}.
2071 The result is a bytevector, by default linked so that read-only and
2072 writable data are on separate pages. Pass @code{#:page-aligned? #f} to
2073 disable this behavior."
2074 (link-elf (link-objects asm) #:page-aligned? page-aligned?))