1 ;;; Guile RTL assembler
3 ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
5 ;;; This library is free software; you can redistribute it and/or
6 ;;; modify it under the terms of the GNU Lesser General Public
7 ;;; License as published by the Free Software Foundation; either
8 ;;; version 3 of the License, or (at your option) any later version.
10 ;;; This library is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;; Lesser General Public License for more details.
15 ;;; You should have received a copy of the GNU Lesser General Public
16 ;;; License along with this library; if not, write to the Free Software
17 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;;; This module implements an assembler that creates an ELF image from
22 ;;; RTL assembly and macro-assembly. The input can be given in
23 ;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
24 ;;; procedural interface, the emit-OP procedures, but that is not
25 ;;; currently exported.
27 ;;; "Primitive instructions" correspond to RTL VM operations.
28 ;;; Assemblers for primitive instructions are generated programmatically
29 ;;; from (rtl-instruction-list), which itself is derived from the VM
30 ;;; sources. There are also "macro-instructions" like "label" or
31 ;;; "load-constant" that expand to 0 or more primitive instructions.
33 ;;; The assembler also handles some higher-level tasks, like creating
34 ;;; the symbol table, other metadata sections, creating a constant table
35 ;;; for the whole compilation unit, and writing the dynamic section of
36 ;;; the ELF file along with the appropriate initialization routines.
38 ;;; Most compilers will want to use the trio of make-assembler,
39 ;;; emit-text, and link-assembly. That will result in the creation of
40 ;;; an ELF image as a bytevector, which can then be loaded using
41 ;;; load-thunk-from-memory, or written to disk as a .go file.
45 (define-module (system vm assembler)
46 #:use-module (system base target)
47 #:use-module (system vm instruction)
48 #:use-module (system vm dwarf)
49 #:use-module (system vm elf)
50 #:use-module (system vm linker)
51 #:use-module (system vm objcode)
52 #:use-module (rnrs bytevectors)
53 #:use-module (ice-9 binary-ports)
54 #:use-module (ice-9 vlist)
55 #:use-module (ice-9 match)
56 #:use-module (srfi srfi-1)
57 #:use-module (srfi srfi-4)
58 #:use-module (srfi srfi-9)
59 #:use-module (srfi srfi-11)
60 #:export (make-assembler
68 ;;; RTL code consists of 32-bit units, often subdivided in some way.
69 ;;; These helpers create one 32-bit unit from multiple components.
71 (define-syntax-rule (pack-u8-u24 x y)
74 (define-syntax-rule (pack-u8-s24 x y)
80 (else (error "out of range" y)))
83 (define-syntax-rule (pack-u1-u7-u24 x y z)
84 (logior x (ash y 1) (ash z 8)))
86 (define-syntax-rule (pack-u8-u12-u12 x y z)
87 (logior x (ash y 8) (ash z 20)))
89 (define-syntax-rule (pack-u8-u8-u16 x y z)
90 (logior x (ash y 8) (ash z 16)))
92 (define-syntax-rule (pack-u8-u8-u8-u8 x y z w)
93 (logior x (ash y 8) (ash z 16) (ash w 24)))
95 (define-syntax pack-flags
97 ;; Add clauses as needed.
98 ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
99 (if f2 (ash 2 0) 0)))))
101 ;;; Helpers to read and write 32-bit units in a buffer.
103 (define-syntax-rule (u32-ref buf n)
104 (bytevector-u32-native-ref buf (* n 4)))
106 (define-syntax-rule (u32-set! buf n val)
107 (bytevector-u32-native-set! buf (* n 4) val))
109 (define-syntax-rule (s32-ref buf n)
110 (bytevector-s32-native-ref buf (* n 4)))
112 (define-syntax-rule (s32-set! buf n val)
113 (bytevector-s32-native-set! buf (* n 4) val))
118 ;;; A <meta> entry collects metadata for one procedure. Procedures are
119 ;;; written as contiguous ranges of RTL code.
121 (define-syntax-rule (assert-match arg pattern kind)
123 (unless (match x (pattern #t) (_ #f))
124 (error (string-append "expected " kind) x))))
126 (define-record-type <meta>
127 (%make-meta label properties low-pc high-pc arities)
130 (properties meta-properties set-meta-properties!)
132 (high-pc meta-high-pc set-meta-high-pc!)
133 (arities meta-arities set-meta-arities!))
135 (define (make-meta label properties low-pc)
136 (assert-match label (? symbol?) "symbol")
137 (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
138 (%make-meta label properties low-pc #f '()))
140 (define (meta-name meta)
141 (assq-ref (meta-properties meta) 'name))
143 ;; Metadata for one <lambda-case>.
144 (define-record-type <arity>
145 (make-arity req opt rest kw-indices allow-other-keys?
151 (kw-indices arity-kw-indices)
152 (allow-other-keys? arity-allow-other-keys?)
153 (low-pc arity-low-pc)
154 (high-pc arity-high-pc set-arity-high-pc!))
156 (define-syntax *block-size* (identifier-syntax 32))
158 ;;; An assembler collects all of the words emitted during assembly, and
159 ;;; also maintains ancillary information such as the constant table, a
160 ;;; relocation list, and so on.
162 ;;; RTL code consists of 32-bit units. We emit RTL code using native
163 ;;; endianness. If we're targeting a foreign endianness, we byte-swap
164 ;;; the bytevector as a whole instead of conditionalizing each access.
166 (define-record-type <asm>
167 (make-asm cur idx start prev written
171 shstrtab next-section-number
175 ;; We write RTL code into what is logically a growable vector,
176 ;; implemented as a list of blocks. asm-cur is the current block, and
177 ;; asm-idx is the current index into that block, in 32-bit units.
179 (cur asm-cur set-asm-cur!)
180 (idx asm-idx set-asm-idx!)
182 ;; asm-start is an absolute position, indicating the offset of the
183 ;; beginning of an instruction (in u32 units). It is updated after
184 ;; writing all the words for one primitive instruction. It models the
185 ;; position of the instruction pointer during execution, given that
186 ;; the RTL VM updates the IP only at the end of executing the
187 ;; instruction, and is thus useful for computing offsets between two
188 ;; points in a program.
190 (start asm-start set-asm-start!)
192 ;; The list of previously written blocks.
194 (prev asm-prev set-asm-prev!)
196 ;; The number of u32 words written in asm-prev, which is the same as
197 ;; the offset of the current block.
199 (written asm-written set-asm-written!)
201 ;; An alist of symbol -> position pairs, indicating the labels defined
202 ;; in this compilation unit.
204 (labels asm-labels set-asm-labels!)
206 ;; A list of relocations needed by the program text. We use an
207 ;; internal representation for relocations, and handle textualn
208 ;; relative relocations in the assembler. Other kinds of relocations
209 ;; are later reified as linker relocations and resolved by the linker.
211 (relocs asm-relocs set-asm-relocs!)
213 ;; Target information.
215 (word-size asm-word-size)
216 (endianness asm-endianness)
218 ;; The constant table, as a vhash of object -> label. All constants
219 ;; get de-duplicated and written into separate sections -- either the
220 ;; .rodata section, for read-only data, or .data, for constants that
221 ;; need initialization at load-time (like symbols). Constants can
222 ;; depend on other constants (e.g. a symbol depending on a stringbuf),
223 ;; so order in this table is important.
225 (constants asm-constants set-asm-constants!)
227 ;; A list of RTL instructions needed to initialize the constants.
228 ;; Will run in a thunk with 2 local variables.
230 (inits asm-inits set-asm-inits!)
232 ;; The shstrtab, for section names.
234 (shstrtab asm-shstrtab set-asm-shstrtab!)
236 ;; The section number for the next section to be written.
238 (next-section-number asm-next-section-number set-asm-next-section-number!)
240 ;; A list of <meta>, corresponding to procedure metadata.
242 (meta asm-meta set-asm-meta!))
244 (define-inlinable (fresh-block)
245 (make-u32vector *block-size*))
247 (define* (make-assembler #:key (word-size (target-word-size))
248 (endianness (target-endianness)))
249 "Create an assembler for a given target @var{word-size} and
250 @var{endianness}, falling back to appropriate values for the configured
252 (make-asm (fresh-block) 0 0 '() 0
256 (make-string-table) 1
259 (define (intern-section-name! asm string)
260 "Add a string to the section name table (shstrtab)."
261 (string-table-intern! (asm-shstrtab asm) string))
263 (define-inlinable (asm-pos asm)
264 "The offset of the next word to be written into the code buffer, in
266 (+ (asm-idx asm) (asm-written asm)))
268 (define (allocate-new-block asm)
269 "Close off the current block, and arrange for the next word to be
270 written to a fresh block."
271 (let ((new (fresh-block)))
272 (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
273 (set-asm-written! asm (asm-pos asm))
274 (set-asm-cur! asm new)
275 (set-asm-idx! asm 0)))
277 (define-inlinable (emit asm u32)
278 "Emit one 32-bit word into the instruction stream. Assumes that there
279 is space for the word, and ensures that there is space for the next
281 (u32-set! (asm-cur asm) (asm-idx asm) u32)
282 (set-asm-idx! asm (1+ (asm-idx asm)))
283 (if (= (asm-idx asm) *block-size*)
284 (allocate-new-block asm)))
286 (define-inlinable (make-reloc type label base word)
287 "Make an internal relocation of type @var{type} referencing symbol
288 @var{label}, @var{word} words after position @var{start}. @var{type}
289 may be x8-s24, indicating a 24-bit relative label reference that can be
290 fixed up by the assembler, or s32, indicating a 32-bit relative
291 reference that needs to be fixed up by the linker."
292 (list type label base word))
294 (define-inlinable (reset-asm-start! asm)
295 "Reset the asm-start after writing the words for one instruction."
296 (set-asm-start! asm (asm-pos asm)))
298 (define (emit-exported-label asm label)
299 "Define a linker symbol associating @var{label} with the current
301 (set-asm-labels! asm (acons label (asm-start asm) (asm-labels asm))))
303 (define (record-label-reference asm label)
304 "Record an x8-s24 local label reference. This value will get patched
305 up later by the assembler."
306 (let* ((start (asm-start asm))
308 (reloc (make-reloc 'x8-s24 label start (- pos start))))
309 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
311 (define* (record-far-label-reference asm label #:optional (offset 0))
312 "Record an s32 far label reference. This value will get patched up
313 later by the linker."
314 (let* ((start (- (asm-start asm) offset))
316 (reloc (make-reloc 's32 label start (- pos start))))
317 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
323 ;;; Primitive assemblers are defined by expanding `assembler' for each
324 ;;; opcode in `(rtl-instruction-list)'.
327 (eval-when (expand compile load eval)
328 (define (id-append ctx a b)
329 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
331 (define-syntax assembler
333 (define-syntax op-case
336 ((_ asm name ((type arg ...) code ...) clause ...)
337 #`(if (eq? name 'type)
338 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
341 (op-case asm name clause ...)))
343 #'(error "unmatched name" name)))))
345 (define (pack-first-word asm opcode type)
346 (with-syntax ((opcode opcode))
352 (emit asm (pack-u8-u24 opcode arg)))
354 (record-label-reference asm label)
357 (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
359 (emit asm (pack-u8-u12-u12 opcode a b)))
361 (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
363 (define (pack-tail-word asm type)
367 (emit asm (pack-u8-u24 a b)))
369 (record-label-reference asm label)
372 (emit asm (pack-u8-u8-u16 a b (object-address imm))))
374 (emit asm (pack-u8-u12-u12 a b c)))
375 ((U8_U8_U8_U8 a b c d)
376 (emit asm (pack-u8-u8-u8-u8 a b c d)))
380 (let ((val (object-address imm)))
381 (unless (zero? (ash val -32))
382 (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
385 (unless (= (asm-word-size asm) 8)
386 (error "make-long-immediate unavailable for this target"))
387 (emit asm (ash (object-address imm) -32))
388 (emit asm (logand (object-address imm) (1- (ash 1 32)))))
391 (record-far-label-reference asm label)
394 (record-far-label-reference asm label)
397 (record-far-label-reference asm label)
400 (record-far-label-reference asm label
401 (* offset (/ (asm-word-size asm) 4)))
404 (emit asm (pack-u8-u24 0 a)))
406 (emit asm (pack-u8-u12-u12 0 a b)))
408 (record-label-reference asm label)
411 (record-label-reference asm label)
412 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
413 ((B1_U7_L24 a b label)
414 (record-label-reference asm label)
415 (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
417 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
419 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
422 ((_ name opcode word0 word* ...)
423 (with-syntax ((((formal0 ...)
425 (pack-first-word #'asm
426 (syntax->datum #'opcode)
427 (syntax->datum #'word0)))
430 (map (lambda (word) (pack-tail-word #'asm word))
431 (syntax->datum #'(word* ...)))))
432 #'(lambda (asm formal0 ... formal* ... ...)
433 (unless (asm? asm) (error "not an asm"))
436 (reset-asm-start! asm)))))))
438 (define assemblers (make-hash-table))
440 (define-syntax define-assembler
443 ((_ name opcode kind arg ...)
444 (with-syntax ((emit (id-append #'name #'emit- #'name)))
447 (let ((emit (assembler name opcode arg ...)))
448 (hashq-set! assemblers 'name emit)
452 (define-syntax visit-opcodes
455 ((visit-opcodes macro arg ...)
456 (with-syntax (((inst ...)
457 (map (lambda (x) (datum->syntax #'macro x))
458 (rtl-instruction-list))))
460 (macro arg ... . inst)
463 (visit-opcodes define-assembler)
465 (define (emit-text asm instructions)
466 "Assemble @var{instructions} using the assembler @var{asm}.
467 @var{instructions} is a sequence of RTL instructions, expressed as a
468 list of lists. This procedure can be called many times before calling
469 @code{link-assembly}."
470 (for-each (lambda (inst)
471 (apply (or (hashq-ref assemblers (car inst))
472 (error 'bad-instruction inst))
480 ;;; The constant table records a topologically sorted set of literal
481 ;;; constants used by a program. For example, a pair uses its car and
482 ;;; cdr, a string uses its stringbuf, etc.
484 ;;; Some things we want to add to the constant table are not actually
485 ;;; Scheme objects: for example, stringbufs, cache cells for toplevel
486 ;;; references, or cache cells for non-closure procedures. For these we
487 ;;; define special record types and add instances of those record types
491 (define-inlinable (immediate? x)
492 "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
493 (not (zero? (logand (object-address x) 6))))
495 (define-record-type <stringbuf>
496 (make-stringbuf string)
498 (string stringbuf-string))
500 (define-record-type <static-procedure>
501 (make-static-procedure code)
503 (code static-procedure-code))
505 (define-record-type <cache-cell>
506 (make-cache-cell scope key)
508 (scope cache-cell-scope)
509 (key cache-cell-key))
511 (define (statically-allocatable? x)
512 "Return @code{#t} if a non-immediate constant can be allocated
513 statically, and @code{#f} if it would need some kind of runtime
515 (or (pair? x) (vector? x) (string? x) (stringbuf? x) (static-procedure? x)))
517 (define (intern-constant asm obj)
518 "Add an object to the constant table, and return a label that can be
519 used to reference it. If the object is already present in the constant
520 table, its existing label is used directly."
522 (intern-constant asm obj))
523 (define (field dst n obj)
524 (let ((src (recur obj)))
526 (list (if (statically-allocatable? obj)
527 `(make-non-immediate 1 ,src)
528 `(static-ref 1 ,src))
529 `(static-set! 1 ,dst ,n))
531 (define (intern obj label)
534 (append (field label 0 (car obj))
535 (field label 1 (cdr obj))))
537 (let lp ((i 0) (inits '()))
538 (if (< i (vector-length obj))
540 (append-reverse (field label (1+ i) (vector-ref obj i))
543 ((stringbuf? obj) '())
544 ((static-procedure? obj)
545 `((make-non-immediate 1 ,label)
546 (link-procedure! 1 ,(static-procedure-code obj))))
547 ((cache-cell? obj) '())
549 `((make-non-immediate 1 ,(recur (symbol->string obj)))
551 (static-set! 1 ,label 0)))
553 `((make-non-immediate 1 ,(recur (make-stringbuf obj)))
554 (static-set! 1 ,label 1)))
556 `((static-ref 1 ,(recur (keyword->symbol obj)))
557 (symbol->keyword 1 1)
558 (static-set! 1 ,label 0)))
560 `((make-non-immediate 1 ,(recur (number->string obj)))
562 (static-set! 1 ,label 0)))
564 (error "don't know how to intern" obj))))
566 ((immediate? obj) #f)
567 ((vhash-assoc obj (asm-constants asm)) => cdr)
569 ;; Note that calling intern may mutate asm-constants and
570 ;; asm-constant-inits.
571 (let* ((label (gensym "constant"))
572 (inits (intern obj label)))
573 (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
574 (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
577 (define (intern-non-immediate asm obj)
578 "Intern a non-immediate into the constant table, and return its
580 (when (immediate? obj)
581 (error "expected a non-immediate" obj))
582 (intern-constant asm obj))
584 (define (intern-cache-cell asm scope key)
585 "Intern a cache cell into the constant table, and return its label.
586 If there is already a cache cell with the given scope and key, it is
588 (intern-constant asm (make-cache-cell scope key)))
590 ;; Return the label of the cell that holds the module for a scope.
591 (define (intern-module-cache-cell asm scope)
592 "Intern a cache cell for a module, and return its label."
593 (intern-cache-cell asm scope #t))
599 ;;; Macro assemblers bridge the gap between primitive instructions and
600 ;;; some higher-level operations.
603 (define-syntax define-macro-assembler
606 ((_ (name arg ...) body body* ...)
607 (with-syntax ((emit (id-append #'name #'emit- #'name)))
610 (let ((emit (lambda (arg ...) body body* ...)))
611 (hashq-set! assemblers 'name emit)
615 (define-macro-assembler (load-constant asm dst obj)
618 (let ((bits (object-address obj)))
620 ((and (< dst 256) (zero? (ash bits -16)))
621 (emit-make-short-immediate asm dst obj))
622 ((zero? (ash bits -32))
623 (emit-make-long-immediate asm dst obj))
625 (emit-make-long-long-immediate asm dst obj)))))
626 ((statically-allocatable? obj)
627 (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
629 (emit-static-ref asm dst (intern-non-immediate asm obj)))))
631 (define-macro-assembler (load-static-procedure asm dst label)
632 (let ((loc (intern-constant asm (make-static-procedure label))))
633 (emit-make-non-immediate asm dst loc)))
635 (define-macro-assembler (begin-program asm label properties)
636 (emit-label asm label)
637 (let ((meta (make-meta label properties (asm-start asm))))
638 (set-asm-meta! asm (cons meta (asm-meta asm)))))
640 (define-macro-assembler (end-program asm)
641 (let ((meta (car (asm-meta asm))))
642 (set-meta-high-pc! meta (asm-start asm))
643 (set-meta-arities! meta (reverse (meta-arities meta)))))
645 (define-macro-assembler (begin-standard-arity asm req nlocals alternate)
646 (emit-begin-opt-arity asm req '() #f nlocals alternate))
648 (define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
649 (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
651 (define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
652 allow-other-keys? nlocals alternate)
653 (assert-match req ((? symbol?) ...) "list of symbols")
654 (assert-match opt ((? symbol?) ...) "list of symbols")
655 (assert-match rest (or #f (? symbol?)) "#f or symbol")
656 (assert-match kw-indices (((? symbol?) . (? integer?)) ...)
657 "alist of symbol -> integer")
658 (assert-match allow-other-keys? (? boolean?) "boolean")
659 (assert-match nlocals (? integer?) "integer")
660 (assert-match alternate (or #f (? symbol?)) "#f or symbol")
661 (let* ((meta (car (asm-meta asm)))
662 (arity (make-arity req opt rest kw-indices allow-other-keys?
664 ;; The procedure itself is in slot 0, in the standard calling
665 ;; convention. For procedure prologues, nreq includes the
666 ;; procedure, so here we add 1.
667 (nreq (1+ (length req)))
669 (rest? (->bool rest)))
670 (set-meta-arities! meta (cons arity (meta-arities meta)))
672 ((or allow-other-keys? (pair? kw-indices))
673 (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
675 ((or rest? (pair? opt))
676 (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
678 (emit-standard-prelude asm nreq nlocals alternate)))))
680 (define-macro-assembler (end-arity asm)
681 (let ((arity (car (meta-arities (car (asm-meta asm))))))
682 (set-arity-high-pc! arity (asm-start asm))))
684 (define-macro-assembler (standard-prelude asm nreq nlocals alternate)
687 (emit-br-if-nargs-ne asm nreq alternate)
688 (emit-alloc-frame asm nlocals))
689 ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
690 (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
692 (emit-assert-nargs-ee asm nreq)
693 (emit-alloc-frame asm nlocals))))
695 (define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
697 (emit-br-if-nargs-lt asm nreq alternate)
698 (emit-assert-nargs-ge asm nreq))
701 (emit-bind-rest asm (+ nreq nopt)))
703 (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
705 (emit-assert-nargs-le asm (+ nreq nopt))))
706 (emit-alloc-frame asm nlocals))
708 (define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
709 allow-other-keys? nlocals alternate)
711 (emit-br-if-nargs-lt asm nreq alternate)
712 (emit-assert-nargs-ge asm nreq))
713 (let ((ntotal (fold (lambda (kw ntotal)
715 (((? keyword?) . idx)
716 (max (1+ idx) ntotal))))
717 (+ nreq nopt) kw-indices)))
718 ;; FIXME: port 581f410f
719 (emit-bind-kwargs asm nreq
720 (pack-flags allow-other-keys? rest?)
724 (emit-alloc-frame asm nlocals)))
726 (define-macro-assembler (label asm sym)
727 (set-asm-labels! asm (acons sym (asm-start asm) (asm-labels asm))))
729 (define-macro-assembler (cache-current-module! asm module scope)
730 (let ((mod-label (intern-module-cache-cell asm scope)))
731 (emit-static-set! asm module mod-label 0)))
733 (define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
734 (let ((sym-label (intern-non-immediate asm sym))
735 (mod-label (intern-module-cache-cell asm scope))
736 (cell-label (intern-cache-cell asm scope sym)))
737 (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
739 (define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
740 (let* ((sym-label (intern-non-immediate asm sym))
741 (key (cons public? module-name))
742 (mod-name-label (intern-constant asm key))
743 (cell-label (intern-cache-cell asm key sym)))
744 (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
750 ;;; Helper for linking objects.
753 (define (make-object asm name bv relocs labels . kwargs)
754 "Make a linker object. This helper handles interning the name in the
755 shstrtab, assigning the size, allocating a fresh index, and defining a
756 corresponding linker symbol for the start of the section."
757 (let ((name-idx (intern-section-name! asm (symbol->string name)))
758 (index (asm-next-section-number asm)))
759 (set-asm-next-section-number! asm (1+ index))
760 (make-linker-object (apply make-elf-section
763 #:size (bytevector-length bv)
766 (cons (make-linker-symbol name 0) labels))))
772 ;;; Linking the constant table. This code is somewhat intertwingled
773 ;;; with the intern-constant code above, as that procedure also
774 ;;; residualizes instructions to initialize constants at load time.
777 (define (write-immediate asm buf pos x)
778 (let ((val (object-address x))
779 (endianness (asm-endianness asm)))
780 (case (asm-word-size asm)
781 ((4) (bytevector-u32-set! buf pos val endianness))
782 ((8) (bytevector-u64-set! buf pos val endianness))
783 (else (error "bad word size" asm)))))
785 (define (emit-init-constants asm)
786 "If there is writable data that needs initialization at runtime, emit
787 a procedure to do that and return its label. Otherwise return
789 (let ((inits (asm-inits asm)))
790 (and (not (null? inits))
791 (let ((label (gensym "init-constants")))
793 `((begin-program ,label ())
794 (assert-nargs-ee/locals 1 1)
796 (load-constant 1 ,*unspecified*)
801 (define (link-data asm data name)
802 "Link the static data for a program into the @var{name} section (which
803 should be .data or .rodata), and return the resulting linker object.
804 @var{data} should be a vhash mapping objects to labels."
805 (define (align address alignment)
807 (modulo (- alignment (modulo address alignment)) alignment)))
809 (define tc7-vector 13)
810 (define stringbuf-shared-flag #x100)
811 (define stringbuf-wide-flag #x400)
812 (define tc7-stringbuf 39)
813 (define tc7-narrow-stringbuf
814 (+ tc7-stringbuf stringbuf-shared-flag))
815 (define tc7-wide-stringbuf
816 (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
817 (define tc7-ro-string (+ 21 #x200))
818 (define tc7-rtl-program 69)
820 (let ((word-size (asm-word-size asm))
821 (endianness (asm-endianness asm)))
822 (define (byte-length x)
825 (let ((x (stringbuf-string x)))
827 (case (string-bytes-per-char x)
828 ((1) (1+ (string-length x)))
829 ((4) (* (1+ (string-length x)) 4))
830 (else (error "bad string bytes per char" x))))))
831 ((static-procedure? x)
838 (* (1+ (vector-length x)) word-size))
842 (define (write-constant-reference buf pos x)
843 ;; The asm-inits will fix up any reference to a non-immediate.
844 (write-immediate asm buf pos (if (immediate? x) x #f)))
846 (define (write buf pos obj)
849 (let* ((x (stringbuf-string obj))
850 (len (string-length x))
851 (tag (if (= (string-bytes-per-char x) 1)
853 tc7-wide-stringbuf)))
856 (bytevector-u32-set! buf pos tag endianness)
857 (bytevector-u32-set! buf (+ pos 4) len endianness))
859 (bytevector-u64-set! buf pos tag endianness)
860 (bytevector-u64-set! buf (+ pos 8) len endianness))
862 (error "bad word size" asm)))
863 (let ((pos (+ pos (* word-size 2))))
864 (case (string-bytes-per-char x)
868 (let ((u8 (char->integer (string-ref x i))))
869 (bytevector-u8-set! buf (+ pos i) u8)
871 (bytevector-u8-set! buf (+ pos i) 0))))
875 (let ((u32 (char->integer (string-ref x i))))
876 (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
878 (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
879 (else (error "bad string bytes per char" x))))))
881 ((static-procedure? obj)
884 (bytevector-u32-set! buf pos tc7-rtl-program endianness)
885 (bytevector-u32-set! buf (+ pos 4) 0 endianness))
887 (bytevector-u64-set! buf pos tc7-rtl-program endianness)
888 (bytevector-u64-set! buf (+ pos 8) 0 endianness))
889 (else (error "bad word size"))))
892 (write-immediate asm buf pos #f))
895 (let ((tag (logior tc7-ro-string (ash (string-length obj) 8))))
898 (bytevector-u32-set! buf pos tc7-ro-string endianness)
899 (write-immediate asm buf (+ pos 4) #f) ; stringbuf
900 (bytevector-u32-set! buf (+ pos 8) 0 endianness)
901 (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
903 (bytevector-u64-set! buf pos tc7-ro-string endianness)
904 (write-immediate asm buf (+ pos 8) #f) ; stringbuf
905 (bytevector-u64-set! buf (+ pos 16) 0 endianness)
906 (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
907 (else (error "bad word size")))))
910 (write-constant-reference buf pos (car obj))
911 (write-constant-reference buf (+ pos word-size) (cdr obj)))
914 (let* ((len (vector-length obj))
915 (tag (logior tc7-vector (ash len 8))))
917 ((4) (bytevector-u32-set! buf pos tag endianness))
918 ((8) (bytevector-u64-set! buf pos tag endianness))
919 (else (error "bad word size")))
921 (when (< i (vector-length obj))
922 (let ((pos (+ pos word-size (* i word-size)))
923 (elt (vector-ref obj i)))
924 (write-constant-reference buf pos elt)
928 (write-immediate asm buf pos #f))
931 (write-immediate asm buf pos #f))
934 (write-immediate asm buf pos #f))
937 (error "unrecognized object" obj))))
940 ((vlist-null? data) #f)
942 (let* ((byte-len (vhash-fold (lambda (k v len)
943 (+ (byte-length k) (align len 8)))
945 (buf (make-bytevector byte-len 0)))
946 (let lp ((i 0) (pos 0) (labels '()))
947 (if (< i (vlist-length data))
948 (let* ((pair (vlist-ref data i))
950 (obj-label (cdr pair)))
953 (align (+ (byte-length obj) pos) 8)
954 (cons (make-linker-symbol obj-label pos) labels)))
955 (make-object asm name buf '() labels
957 ('.data (logior SHF_ALLOC SHF_WRITE))
958 ('.rodata SHF_ALLOC))))))))))
960 (define (link-constants asm)
961 "Link sections to hold constants needed by the program text emitted
964 Returns three values: an object for the .rodata section, an object for
965 the .data section, and a label for an initialization procedure. Any of
966 these may be @code{#f}."
967 (define (shareable? x)
971 (and (immediate? (car x)) (immediate? (cdr x))))
974 (or (= i (vector-length x))
975 (and (immediate? (vector-ref x i))
978 (let* ((constants (asm-constants asm))
979 (len (vlist-length constants)))
984 (values (link-data asm ro '.rodata)
985 (link-data asm rw '.data)
986 (emit-init-constants asm))
987 (let ((pair (vlist-ref constants i)))
988 (if (shareable? (car pair))
989 (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
990 (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
995 ;;; Linking program text.
998 (define (process-relocs buf relocs labels)
999 "Patch up internal x8-s24 relocations, and any s32 relocations that
1000 reference symbols in the text section. Return a list of linker
1001 relocations for references to symbols defined outside the text section."
1003 (lambda (reloc tail)
1005 ((type label base word)
1006 (let ((abs (assq-ref labels label))
1007 (dst (+ base word)))
1011 (let ((rel (- abs base)))
1012 (s32-set! buf dst rel)
1014 (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
1018 (error "unbound near relocation" reloc))
1019 (let ((rel (- abs base))
1020 (u32 (u32-ref buf dst)))
1021 (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
1023 (else (error "bad relocation kind" reloc)))))))
1027 (define (process-labels labels)
1028 "Define linker symbols for the label-offset pairs in @var{labels}.
1029 The offsets are expected to be expressed in words."
1031 (make-linker-symbol (car pair) (* (cdr pair) 4)))
1034 (define (swap-bytes! buf)
1035 "Patch up the text buffer @var{buf}, swapping the endianness of each
1037 (unless (zero? (modulo (bytevector-length buf) 4))
1038 (error "unexpected length"))
1039 (let ((byte-len (bytevector-length buf)))
1041 (unless (= pos byte-len)
1042 (bytevector-u32-set!
1044 (bytevector-u32-ref buf pos (endianness big))
1045 (endianness little))
1048 (define (link-text-object asm)
1049 "Link the .rtl-text section, swapping the endianness of the bytes if
1051 (let ((buf (make-u32vector (asm-pos asm))))
1052 (let lp ((pos 0) (prev (reverse (asm-prev asm))))
1054 (let ((byte-size (* (asm-idx asm) 4)))
1055 (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
1056 (unless (eq? (asm-endianness asm) (native-endianness))
1058 (make-object asm '.rtl-text
1060 (process-relocs buf (asm-relocs asm)
1062 (process-labels (asm-labels asm))))
1063 (let ((len (* *block-size* 4)))
1064 (bytevector-copy! (car prev) 0 buf pos len)
1065 (lp (+ pos len) (cdr prev)))))))
1071 ;;; Linking other sections of the ELF file, like the dynamic segment,
1072 ;;; the symbol table, etc.
1075 (define (link-dynamic-section asm text rw rw-init)
1076 "Link the dynamic section for an ELF image with RTL text, given the
1077 writable data section @var{rw} needing fixup from the procedure with
1078 label @var{rw-init}. @var{rw-init} may be false. If @var{rw} is true,
1079 it will be added to the GC roots at runtime."
1080 (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
1081 (let* ((endianness (asm-endianness asm))
1082 (bv (make-bytevector (* word-size (if rw (if rw-init 12 10) 6)) 0))
1085 (%set-uword! bv (* i word-size) uword endianness)))
1089 (set! relocs (cons (make-linker-reloc 'reloc-type
1090 (* i word-size) 0 label)
1092 (%set-uword! bv (* i word-size) 0 endianness))))
1093 (set-uword! 0 DT_GUILE_RTL_VERSION)
1094 (set-uword! 1 #x02020000)
1095 (set-uword! 2 DT_GUILE_ENTRY)
1096 (set-label! 3 '.rtl-text)
1100 (set-uword! 4 DT_GUILE_GC_ROOT)
1101 (set-label! 5 '.data)
1102 (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
1103 (set-uword! 7 (bytevector-length (linker-object-bv rw)))
1106 (set-uword! 8 DT_INIT) ; constants
1107 (set-label! 9 rw-init)
1108 (set-uword! 10 DT_NULL)
1111 (set-uword! 8 DT_NULL)
1114 (set-uword! 4 DT_NULL)
1116 (make-object asm '.dynamic bv relocs '()
1117 #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
1118 (case (asm-word-size asm)
1119 ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
1120 ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
1121 (else (error "bad word size" asm))))
1123 (define (link-shstrtab asm)
1124 "Link the string table for the section headers."
1125 (intern-section-name! asm ".shstrtab")
1126 (make-object asm '.shstrtab
1127 (link-string-table! (asm-shstrtab asm))
1129 #:type SHT_STRTAB #:flags 0))
1131 (define (link-symtab text-section asm)
1132 (let* ((endianness (asm-endianness asm))
1133 (word-size (asm-word-size asm))
1134 (size (elf-symbol-len word-size))
1135 (meta (reverse (asm-meta asm)))
1137 (strtab (make-string-table))
1138 (bv (make-bytevector (* n size) 0)))
1139 (define (intern-string! name)
1140 (string-table-intern! strtab (if name (symbol->string name) "")))
1143 (let ((name (intern-string! (meta-name meta))))
1144 (write-elf-symbol bv (* n size) endianness word-size
1147 ;; Symbol value and size are measured in
1149 #:value (* 4 (meta-low-pc meta))
1150 #:size (* 4 (- (meta-high-pc meta)
1151 (meta-low-pc meta)))
1153 #:visibility STV_HIDDEN
1154 #:shndx (elf-section-index text-section)))))
1156 (let ((strtab (make-object asm '.strtab
1157 (link-string-table! strtab)
1159 #:type SHT_STRTAB #:flags 0)))
1160 (values (make-object asm '.symtab
1163 #:type SHT_SYMTAB #:flags 0 #:entsize size
1164 #:link (elf-section-index
1165 (linker-object-section strtab)))
1168 ;;; The .guile.arities section describes the arities that a function can
1169 ;;; have. It is in two parts: a sorted array of headers describing
1170 ;;; basic arities, and an array of links out to a string table (and in
1171 ;;; the case of keyword arguments, to the data section) for argument
1172 ;;; names. The whole thing is prefixed by a uint32 indicating the
1173 ;;; offset of the end of the headers array.
1175 ;;; The arity headers array is a packed array of structures of the form:
1177 ;;; struct arity_header {
1178 ;;; uint32_t low_pc;
1179 ;;; uint32_t high_pc;
1180 ;;; uint32_t offset;
1186 ;;; All of the offsets and addresses are 32 bits. We can expand in the
1187 ;;; future to use 64-bit offsets if appropriate, but there are other
1188 ;;; aspects of RTL that constrain us to a total image that fits in 32
1189 ;;; bits, so for the moment we'll simplify the problem space.
1191 ;;; The following flags values are defined:
1194 ;;; #x2: allow-other-keys?
1195 ;;; #x4: has-keyword-args?
1196 ;;; #x8: is-case-lambda?
1198 ;;; Functions with a single arity specify their number of required and
1199 ;;; optional arguments in nreq and nopt, and do not have the
1200 ;;; is-case-lambda? flag set. Their "offset" member links to an array
1201 ;;; of pointers into the associated .guile.arities.strtab string table,
1202 ;;; identifying the argument names. This offset is relative to the
1203 ;;; start of the .guile.arities section. Links for required arguments
1204 ;;; are first, in order, as uint32 values. Next follow the optionals,
1205 ;;; then the rest link if has-rest? is set, then a link to the "keyword
1206 ;;; indices" literal if has-keyword-args? is set. Unlike the other
1207 ;;; links, the kw-indices link points into the data section, and is
1208 ;;; relative to the ELF image as a whole.
1210 ;;; Functions with no arities have no arities information present in the
1211 ;;; .guile.arities section.
1213 ;;; Functions with multiple arities are preceded by a header with
1214 ;;; is-case-lambda? set. All other fields are 0, except low-pc and
1215 ;;; high-pc which should be the bounds of the whole function. Headers
1216 ;;; for the individual arities follow. In this way the whole headers
1217 ;;; array is sorted in increasing low-pc order, and case-lambda clauses
1218 ;;; are contained within the [low-pc, high-pc] of the case-lambda
1221 ;; Length of the prefix to the arities section, in bytes.
1222 (define arities-prefix-len 4)
1224 ;; Length of an arity header, in bytes.
1225 (define arity-header-len (* 6 4))
1227 ;; The offset of "offset" within arity header, in bytes.
1228 (define arity-header-offset-offset (* 2 4))
1230 (define-syntax-rule (pack-arity-flags has-rest? allow-other-keys?
1231 has-keyword-args? is-case-lambda?)
1232 (logior (if has-rest? (ash 1 0) 0)
1233 (if allow-other-keys? (ash 1 1) 0)
1234 (if has-keyword-args? (ash 1 2) 0)
1235 (if is-case-lambda? (ash 1 3) 0)))
1237 (define (meta-arities-size meta)
1238 (define (lambda-size arity)
1240 (* 4 ;; name pointers
1241 (+ (length (arity-req arity))
1242 (length (arity-opt arity))
1243 (if (arity-rest arity) 1 0)
1244 (if (pair? (arity-kw-indices arity)) 1 0)))))
1245 (define (case-lambda-size arities)
1247 arity-header-len ;; case-lambda header
1248 (map lambda-size arities))) ;; the cases
1249 (match (meta-arities meta)
1251 ((arity) (lambda-size arity))
1252 (arities (case-lambda-size arities))))
1254 (define (write-arity-headers metas bv endianness)
1255 (define (write-arity-header* pos low-pc high-pc flags nreq nopt)
1256 (bytevector-u32-set! bv pos low-pc endianness)
1257 (bytevector-u32-set! bv (+ pos 4) high-pc endianness)
1258 (bytevector-u32-set! bv (+ pos 8) 0 endianness) ; offset
1259 (bytevector-u32-set! bv (+ pos 12) flags endianness)
1260 (bytevector-u32-set! bv (+ pos 16) nreq endianness)
1261 (bytevector-u32-set! bv (+ pos 20) nopt endianness))
1262 (define (write-arity-header pos arity)
1263 (write-arity-header* pos (arity-low-pc arity)
1264 (arity-high-pc arity)
1265 (pack-arity-flags (arity-rest arity)
1266 (arity-allow-other-keys? arity)
1267 (pair? (arity-kw-indices arity))
1269 (length (arity-req arity))
1270 (length (arity-opt arity))))
1271 (let lp ((metas metas) (pos arities-prefix-len) (offsets '()))
1274 ;; Fill in the prefix.
1275 (bytevector-u32-set! bv 0 pos endianness)
1276 (values pos (reverse offsets)))
1278 (match (meta-arities meta)
1279 (() (lp metas pos offsets))
1281 (write-arity-header pos arity)
1283 (+ pos arity-header-len)
1284 (acons arity (+ pos arity-header-offset-offset) offsets)))
1286 ;; Write a case-lambda header, then individual arities.
1287 ;; The case-lambda header's offset link is 0.
1288 (write-arity-header* pos (meta-low-pc meta) (meta-high-pc meta)
1289 (pack-arity-flags #f #f #f #t) 0 0)
1290 (let lp* ((arities arities) (pos (+ pos arity-header-len))
1293 (() (lp metas pos offsets))
1295 (write-arity-header pos arity)
1297 (+ pos arity-header-len)
1299 (+ pos arity-header-offset-offset)
1302 (define (write-arity-links asm bv pos arity-offset-pairs strtab)
1303 (define (write-symbol sym pos)
1304 (bytevector-u32-set! bv pos
1305 (string-table-intern! strtab (symbol->string sym))
1306 (asm-endianness asm))
1308 (define (write-kw-indices pos kw-indices)
1309 ;; FIXME: Assert that kw-indices is already interned.
1310 (make-linker-reloc 'abs32/1 pos 0
1311 (intern-constant asm kw-indices)))
1312 (let lp ((pos pos) (pairs arity-offset-pairs) (relocs '()))
1315 (unless (= pos (bytevector-length bv))
1316 (error "expected to fully fill the bytevector"
1317 pos (bytevector-length bv)))
1319 (((arity . offset) . pairs)
1320 (bytevector-u32-set! bv offset pos (asm-endianness asm))
1321 (let ((pos (fold write-symbol
1323 (append (arity-req arity)
1326 ((arity-rest arity) => list)
1328 (match (arity-kw-indices arity)
1329 (() (lp pos pairs relocs))
1333 (cons (write-kw-indices pos kw-indices) relocs)))))))))
1335 (define (link-arities asm)
1336 (let* ((endianness (asm-endianness asm))
1337 (metas (reverse (asm-meta asm)))
1338 (size (fold (lambda (meta size)
1339 (+ size (meta-arities-size meta)))
1342 (strtab (make-string-table))
1343 (bv (make-bytevector size 0)))
1344 (let ((kw-indices-relocs
1347 (write-arity-headers metas bv endianness))
1348 (lambda (pos arity-offset-pairs)
1349 (write-arity-links asm bv pos arity-offset-pairs strtab)))))
1350 (let ((strtab (make-object asm '.guile.arities.strtab
1351 (link-string-table! strtab)
1353 #:type SHT_STRTAB #:flags 0)))
1354 (values (make-object asm '.guile.arities
1356 kw-indices-relocs '()
1357 #:type SHT_PROGBITS #:flags 0
1358 #:link (elf-section-index
1359 (linker-object-section strtab)))
1363 ;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
1364 ;;; values. Pc and str are both 32 bits wide. (Either could change to
1365 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1366 ;;; entry to a program, relative to the start of the text section, and
1367 ;;; str is an index into the associated .guile.docstrs.strtab string
1371 ;; The size of a docstrs entry, in bytes.
1372 (define docstr-size 8)
1374 (define (link-docstrs asm)
1375 (define (find-docstrings)
1376 (filter-map (lambda (meta)
1377 (define (is-documentation? pair)
1378 (eq? (car pair) 'documentation))
1379 (let* ((props (meta-properties meta))
1380 (tail (find-tail is-documentation? props)))
1382 (not (find-tail is-documentation? (cdr tail)))
1383 (string? (cdar tail))
1384 (cons (meta-low-pc meta) (cdar tail)))))
1385 (reverse (asm-meta asm))))
1386 (let* ((endianness (asm-endianness asm))
1387 (docstrings (find-docstrings))
1388 (strtab (make-string-table))
1389 (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
1390 (fold (lambda (pair pos)
1393 (bytevector-u32-set! bv pos pc endianness)
1394 (bytevector-u32-set! bv (+ pos 4)
1395 (string-table-intern! strtab string)
1397 (+ pos docstr-size))))
1400 (let ((strtab (make-object asm '.guile.docstrs.strtab
1401 (link-string-table! strtab)
1403 #:type SHT_STRTAB #:flags 0)))
1404 (values (make-object asm '.guile.docstrs
1407 #:type SHT_PROGBITS #:flags 0
1408 #:link (elf-section-index
1409 (linker-object-section strtab)))
1413 ;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
1414 ;;; values. Pc and addr are both 32 bits wide. (Either could change to
1415 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1416 ;;; entry to a program, relative to the start of the text section, and
1417 ;;; addr is the address of the associated properties alist, relative to
1418 ;;; the start of the ELF image.
1420 ;;; Since procedure properties are stored in the data sections, we need
1421 ;;; to link the procedures property section first. (Note that this
1422 ;;; constraint does not apply to the arities section, which may
1423 ;;; reference the data sections via the kw-indices literal, because
1424 ;;; assembling the text section already makes sure that the kw-indices
1428 ;; The size of a procprops entry, in bytes.
1429 (define procprops-size 8)
1431 (define (link-procprops asm)
1432 (define (assoc-remove-one alist key value-pred)
1435 ((((? (lambda (x) (eq? x key))) . value) . alist)
1436 (if (value-pred value)
1438 (acons key value alist)))
1440 (acons k v (assoc-remove-one alist key value-pred)))))
1441 (define (props-without-name-or-docstring meta)
1443 (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
1446 (define (find-procprops)
1447 (filter-map (lambda (meta)
1448 (let ((props (props-without-name-or-docstring meta)))
1450 (cons (meta-low-pc meta) props))))
1451 (reverse (asm-meta asm))))
1452 (let* ((endianness (asm-endianness asm))
1453 (procprops (find-procprops))
1454 (bv (make-bytevector (* (length procprops) procprops-size) 0)))
1455 (let lp ((procprops procprops) (pos 0) (relocs '()))
1458 (make-object asm '.guile.procprops
1461 #:type SHT_PROGBITS #:flags 0))
1462 (((pc . props) . procprops)
1463 (bytevector-u32-set! bv pos pc endianness)
1465 (+ pos procprops-size)
1466 (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
1467 (intern-constant asm props))
1471 ;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc
1472 ;;; sections provide line number and local variable liveness
1473 ;;; information. Their format is defined by the DWARF
1477 (define (asm-language asm)
1478 ;; FIXME: Plumb language through to the assembler.
1481 ;; -> 4 values: .debug_info, .debug_abbrev, .debug_str, and .debug_loc
1482 (define (link-debug asm)
1483 (define (put-u16 port val)
1484 (let ((bv (make-bytevector 2)))
1485 (bytevector-u16-set! bv 0 val (asm-endianness asm))
1486 (put-bytevector port bv)))
1488 (define (put-u32 port val)
1489 (let ((bv (make-bytevector 4)))
1490 (bytevector-u32-set! bv 0 val (asm-endianness asm))
1491 (put-bytevector port bv)))
1493 (define (put-u64 port val)
1494 (let ((bv (make-bytevector 8)))
1495 (bytevector-u64-set! bv 0 val (asm-endianness asm))
1496 (put-bytevector port bv)))
1498 (define (put-uleb128 port val)
1500 (let ((next (ash val -7)))
1504 (put-u8 port (logior #x80 (logand val #x7f)))
1507 (define (meta->subprogram-die meta)
1511 => (lambda (name) `((name ,(symbol->string name)))))
1514 (low-pc ,(meta-label meta))
1515 (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta)))))))
1517 (define (make-compile-unit-die asm)
1519 (@ (producer ,(string-append "Guile " (version)))
1520 (language ,(asm-language asm))
1522 (high-pc ,(* 4 (asm-pos asm))))
1523 ,@(map meta->subprogram-die (reverse (asm-meta asm)))))
1525 (let-values (((die-port get-die-bv) (open-bytevector-output-port))
1527 ((abbrev-port get-abbrev-bv) (open-bytevector-output-port))
1528 ;; (tag has-kids? attrs forms) -> code
1529 ((abbrevs) vlist-null)
1530 ((next-abbrev-code) 1)
1531 ((strtab) (make-string-table)))
1533 (define (write-abbrev code tag has-children? attrs forms)
1534 (put-uleb128 abbrev-port code)
1535 (put-uleb128 abbrev-port (tag-name->code tag))
1536 (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no)))
1537 (for-each (lambda (attr form)
1538 (put-uleb128 abbrev-port (attribute-name->code attr))
1539 (put-uleb128 abbrev-port (form-name->code form)))
1541 (put-uleb128 abbrev-port 0)
1542 (put-uleb128 abbrev-port 0))
1544 (define (intern-abbrev tag has-children? attrs forms)
1545 (let ((key (list tag has-children? attrs forms)))
1546 (match (vhash-assoc key abbrevs)
1548 (#f (let ((code next-abbrev-code))
1549 (set! next-abbrev-code (1+ next-abbrev-code))
1550 (set! abbrevs (vhash-cons key code abbrevs))
1551 (write-abbrev code tag has-children? attrs forms)
1554 (define (compute-code attr val)
1556 ('name (string-table-intern! strtab val))
1559 ('producer (string-table-intern! strtab val))
1560 ('language (language-name->code val))))
1562 (define (exact-integer? val)
1563 (and (number? val) (integer? val) (exact? val)))
1565 (define (choose-form attr val code)
1567 ((string? val) 'strp)
1568 ((exact-integer? code)
1570 ((< code 0) 'sleb128)
1571 ((<= code #xff) 'data1)
1572 ((<= code #xffff) 'data2)
1573 ((<= code #xffffffff) 'data4)
1574 ((<= code #xffffffffffffffff) 'data8)
1576 ((symbol? val) 'addr)
1577 (else (error "unhandled case" attr val code))))
1579 (define (add-die-relocation! kind sym)
1581 (cons (make-linker-reloc kind (seek die-port 0 SEEK_CUR) 0 sym)
1584 (define (write-value code form)
1586 ('data1 (put-u8 die-port code))
1587 ('data2 (put-u16 die-port code))
1588 ('data4 (put-u32 die-port code))
1589 ('data8 (put-u64 die-port code))
1590 ('uleb128 (put-uleb128 die-port code))
1591 ('sleb128 (error "not yet implemented"))
1593 (match (asm-word-size asm)
1595 (add-die-relocation! 'abs32/1 code)
1596 (put-u32 die-port 0))
1598 (add-die-relocation! 'abs64/1 code)
1599 (put-u64 die-port 0))))
1600 ('strp (put-u32 die-port code))))
1602 (define (write-die die)
1604 ((tag ('@ (attrs vals) ...) children ...)
1605 (let* ((codes (map compute-code attrs vals))
1606 (forms (map choose-form attrs vals codes))
1607 (has-children? (not (null? children)))
1608 (abbrev-code (intern-abbrev tag has-children? attrs forms)))
1609 (put-uleb128 die-port abbrev-code)
1610 (for-each write-value codes forms)
1612 (for-each write-die children)
1613 (put-uleb128 die-port 0))))))
1615 ;; Compilation unit header.
1616 (put-u32 die-port 0) ; Length; will patch later.
1617 (put-u16 die-port 4) ; DWARF 4.
1618 (put-u32 die-port 0) ; Abbrevs offset.
1619 (put-u8 die-port (asm-word-size asm)) ; Address size.
1621 (write-die (make-compile-unit-die asm))
1623 ;; Terminate the abbrevs list.
1624 (put-uleb128 abbrev-port 0)
1626 (values (let ((bv (get-die-bv)))
1627 ;; Patch DWARF32 length.
1628 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
1629 (asm-endianness asm))
1630 (make-object asm '.debug_info bv die-relocs '()
1631 #:type SHT_PROGBITS #:flags 0))
1632 (make-object asm '.debug_abbrev (get-abbrev-bv) '() '()
1633 #:type SHT_PROGBITS #:flags 0)
1634 (make-object asm '.debug_str (link-string-table! strtab) '() '()
1635 #:type SHT_PROGBITS #:flags 0)
1636 (make-object asm '.debug_loc #vu8() '() '()
1637 #:type SHT_PROGBITS #:flags 0))))
1639 (define (link-objects asm)
1640 (let*-values (;; Link procprops before constants, because it probably
1641 ;; interns more constants.
1642 ((procprops) (link-procprops asm))
1643 ((ro rw rw-init) (link-constants asm))
1644 ;; Link text object after constants, so that the
1645 ;; constants initializer gets included.
1646 ((text) (link-text-object asm))
1647 ((dt) (link-dynamic-section asm text rw rw-init))
1648 ((symtab strtab) (link-symtab (linker-object-section text) asm))
1649 ((arities arities-strtab) (link-arities asm))
1650 ((docstrs docstrs-strtab) (link-docstrs asm))
1651 ((dinfo dabbrev dstrtab dloc) (link-debug asm))
1652 ;; This needs to be linked last, because linking other
1653 ;; sections adds entries to the string table.
1654 ((shstrtab) (link-shstrtab asm)))
1656 (list text ro rw dt symtab strtab arities arities-strtab
1657 docstrs docstrs-strtab procprops
1658 dinfo dabbrev dstrtab dloc
1665 ;;; High-level public interfaces.
1668 (define* (link-assembly asm #:key (page-aligned? #t))
1669 "Produce an ELF image from the code and data emitted into @var{asm}.
1670 The result is a bytevector, by default linked so that read-only and
1671 writable data are on separate pages. Pass @code{#:page-aligned? #f} to
1672 disable this behavior."
1673 (link-elf (link-objects asm) #:page-aligned? page-aligned?))
1675 (define (assemble-program instructions)
1676 "Take the sequence of instructions @var{instructions}, assemble them
1677 into RTL code, link an image, and load that image from memory. Returns
1679 (let ((asm (make-assembler)))
1680 (emit-text asm instructions)
1681 (load-thunk-from-memory (link-assembly asm #:page-aligned? #f))))