(define-module (system vm assembler)
#:use-module (system base target)
#:use-module (system vm instruction)
+ #:use-module (system vm dwarf)
#:use-module (system vm elf)
#:use-module (system vm linker)
- #:use-module (system vm objcode)
#:use-module (rnrs bytevectors)
+ #:use-module (ice-9 binary-ports)
#:use-module (ice-9 vlist)
#:use-module (ice-9 match)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-11)
#:export (make-assembler
emit-text
- link-assembly
- assemble-program))
+ link-assembly))
\f
;;; RTL code consists of 32-bit units, often subdivided in some way.
;;; These helpers create one 32-bit unit from multiple components.
-(define-syntax-rule (pack-u8-u24 x y)
+(define-inlinable (pack-u8-u24 x y)
+ (unless (<= 0 x 255)
+ (error "out of range" x))
(logior x (ash y 8)))
-(define-syntax-rule (pack-u8-s24 x y)
+(define-inlinable (pack-u8-s24 x y)
+ (unless (<= 0 x 255)
+ (error "out of range" x))
(logior x (ash (cond
((< 0 (- y) #x800000)
(+ y #x1000000))
(else (error "out of range" y)))
8)))
-(define-syntax-rule (pack-u1-u7-u24 x y z)
+(define-inlinable (pack-u1-u7-u24 x y z)
+ (unless (<= 0 x 1)
+ (error "out of range" x))
+ (unless (<= 0 y 127)
+ (error "out of range" y))
(logior x (ash y 1) (ash z 8)))
-(define-syntax-rule (pack-u8-u12-u12 x y z)
+(define-inlinable (pack-u8-u12-u12 x y z)
+ (unless (<= 0 x 255)
+ (error "out of range" x))
+ (unless (<= 0 y 4095)
+ (error "out of range" y))
(logior x (ash y 8) (ash z 20)))
-(define-syntax-rule (pack-u8-u8-u16 x y z)
+(define-inlinable (pack-u8-u8-u16 x y z)
+ (unless (<= 0 x 255)
+ (error "out of range" x))
+ (unless (<= 0 y 255)
+ (error "out of range" y))
(logior x (ash y 8) (ash z 16)))
-(define-syntax-rule (pack-u8-u8-u8-u8 x y z w)
+(define-inlinable (pack-u8-u8-u8-u8 x y z w)
+ (unless (<= 0 x 255)
+ (error "out of range" x))
+ (unless (<= 0 y 255)
+ (error "out of range" y))
+ (unless (<= 0 z 255)
+ (error "out of range" z))
(logior x (ash y 8) (ash z 16) (ash w 24)))
(define-syntax pack-flags
word-size endianness
constants inits
shstrtab next-section-number
- meta)
+ meta sources)
asm?
;; We write RTL code into what is logically a growable vector,
;; A list of <meta>, corresponding to procedure metadata.
;;
- (meta asm-meta set-asm-meta!))
+ (meta asm-meta set-asm-meta!)
+
+ ;; A list of (pos . source) pairs, indicating source information. POS
+ ;; is relative to the beginning of the text section, and SOURCE is in
+ ;; the same format that source-properties returns.
+ ;;
+ (sources asm-sources set-asm-sources!))
(define-inlinable (fresh-block)
(make-u32vector *block-size*))
@var{endianness}, falling back to appropriate values for the configured
target."
(make-asm (fresh-block) 0 0 '() 0
- '() '()
+ (make-hash-table) '()
word-size endianness
vlist-null '()
(make-string-table) 1
- '()))
+ '() '()))
(define (intern-section-name! asm string)
"Add a string to the section name table (shstrtab)."
"Reset the asm-start after writing the words for one instruction."
(set-asm-start! asm (asm-pos asm)))
-(define (emit-exported-label asm label)
- "Define a linker symbol associating @var{label} with the current
-asm-start."
- (set-asm-labels! asm (acons label (asm-start asm) (asm-labels asm))))
-
(define (record-label-reference asm label)
"Record an x8-s24 local label reference. This value will get patched
up later by the assembler."
((U8_L24 label)
(record-label-reference asm label)
(emit asm opcode))
- ((U8_R24 rest)
- (emit asm (pack-u8-u24 opcode (list rest)))
- (for-each (lambda (x) (emit asm x)) rest))
((U8_U8_I16 a imm)
(emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
((U8_U12_U12 a b)
((U8_L24 a label)
(record-label-reference asm label)
(emit asm a))
- ((U8_R24 rest)
- (emit asm (pack-u8-u24 a (length rest)))
- (for-each (lambda (x) (emit asm x)) rest))
((U8_U8_I16 a b imm)
(emit asm (pack-u8-u8-u16 a b (object-address imm))))
((U8_U12_U12 a b)
(emit asm (pack-u8-u24 0 a)))
((X8_U12_U12 a b)
(emit asm (pack-u8-u12-u12 0 a b)))
- ((X8_R24 rest)
- (emit asm (pack-u8-u24 0 (length rest)))
- (for-each (lambda (x) (emit asm x)) rest))
((X8_L24 label)
(record-label-reference asm label)
(emit asm 0))
(emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
((B1_U7_L24 a b label)
(record-label-reference asm label)
- (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))))
+ (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
+ ((B1_X31 a)
+ (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
+ ((B1_X7_U24 a b)
+ (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
(syntax-case x ()
((_ name opcode word0 word* ...)
(define-syntax define-assembler
(lambda (x)
(syntax-case x ()
- ((_ name opcode arg ...)
+ ((_ name opcode kind arg ...)
(with-syntax ((emit (id-append #'name #'emit- #'name)))
- #'(define emit
- (let ((emit (assembler name opcode arg ...)))
- (hashq-set! assemblers 'name emit)
- emit)))))))
+ #'(begin
+ (define emit
+ (let ((emit (assembler name opcode arg ...)))
+ (hashq-set! assemblers 'name emit)
+ emit))
+ (export emit)))))))
(define-syntax visit-opcodes
(lambda (x)
static-procedure?
(code static-procedure-code))
+(define-record-type <uniform-vector-backing-store>
+ (make-uniform-vector-backing-store bytes element-size)
+ uniform-vector-backing-store?
+ (bytes uniform-vector-backing-store-bytes)
+ (element-size uniform-vector-backing-store-element-size))
+
(define-record-type <cache-cell>
(make-cache-cell scope key)
cache-cell?
(scope cache-cell-scope)
(key cache-cell-key))
+(define (simple-vector? obj)
+ (and (vector? obj)
+ (equal? (array-shape obj) (list (list 0 (1- (vector-length obj)))))))
+
+(define (simple-uniform-vector? obj)
+ (and (array? obj)
+ (symbol? (array-type obj))
+ (equal? (array-shape obj) (list (list 0 (1- (array-length obj)))))))
+
(define (statically-allocatable? x)
"Return @code{#t} if a non-immediate constant can be allocated
statically, and @code{#f} if it would need some kind of runtime
allocation."
- (or (pair? x) (vector? x) (string? x) (stringbuf? x) (static-procedure? x)))
+ (or (pair? x) (string? x) (stringbuf? x) (static-procedure? x) (array? x)))
(define (intern-constant asm obj)
"Add an object to the constant table, and return a label that can be
(define (field dst n obj)
(let ((src (recur obj)))
(if src
- (list (if (statically-allocatable? obj)
- `(make-non-immediate 0 ,src)
- `(static-ref 0 ,src))
- `(static-set! 0 ,dst ,n))
+ (if (statically-allocatable? obj)
+ `((static-patch! ,dst ,n ,src))
+ `((static-ref 1 ,src)
+ (static-set! 1 ,dst ,n)))
'())))
(define (intern obj label)
(cond
((pair? obj)
(append (field label 0 (car obj))
(field label 1 (cdr obj))))
- ((vector? obj)
+ ((simple-vector? obj)
(let lp ((i 0) (inits '()))
(if (< i (vector-length obj))
(lp (1+ i)
(reverse inits))))
((stringbuf? obj) '())
((static-procedure? obj)
- `((make-non-immediate 0 ,label)
- (link-procedure! 0 ,(static-procedure-code obj))))
+ `((static-patch! ,label 1 ,(static-procedure-code obj))))
((cache-cell? obj) '())
((symbol? obj)
- `((make-non-immediate 0 ,(recur (symbol->string obj)))
- (string->symbol 0 0)
- (static-set! 0 ,label 0)))
+ `((make-non-immediate 1 ,(recur (symbol->string obj)))
+ (string->symbol 1 1)
+ (static-set! 1 ,label 0)))
((string? obj)
- `((make-non-immediate 0 ,(recur (make-stringbuf obj)))
- (static-set! 0 ,label 1)))
+ `((static-patch! ,label 1 ,(recur (make-stringbuf obj)))))
((keyword? obj)
- `((static-ref 0 ,(recur (keyword->symbol obj)))
- (symbol->keyword 0 0)
- (static-set! 0 ,label 0)))
+ `((static-ref 1 ,(recur (keyword->symbol obj)))
+ (symbol->keyword 1 1)
+ (static-set! 1 ,label 0)))
((number? obj)
- `((make-non-immediate 0 ,(recur (number->string obj)))
- (string->number 0 0)
- (static-set! 0 ,label 0)))
+ `((make-non-immediate 1 ,(recur (number->string obj)))
+ (string->number 1 1)
+ (static-set! 1 ,label 0)))
+ ((uniform-vector-backing-store? obj) '())
+ ((simple-uniform-vector? obj)
+ `((static-patch! ,label 2
+ ,(recur (make-uniform-vector-backing-store
+ (uniform-array->bytevector obj)
+ (if (bitvector? obj)
+ ;; Bitvectors are addressed in
+ ;; 32-bit units.
+ 4
+ (uniform-vector-element-size obj)))))))
(else
(error "don't know how to intern" obj))))
(cond
(syntax-case x ()
((_ (name arg ...) body body* ...)
(with-syntax ((emit (id-append #'name #'emit- #'name)))
- #'(define emit
- (let ((emit (lambda (arg ...) body body* ...)))
- (hashq-set! assemblers 'name emit)
- emit)))))))
+ #'(begin
+ (define emit
+ (let ((emit (lambda (arg ...) body body* ...)))
+ (hashq-set! assemblers 'name emit)
+ emit))
+ (export emit)))))))
(define-macro-assembler (load-constant asm dst obj)
(cond
(let ((loc (intern-constant asm (make-static-procedure label))))
(emit-make-non-immediate asm dst loc)))
+(define-syntax-rule (define-tc7-macro-assembler name tc7)
+ (define-macro-assembler (name asm slot invert? label)
+ (emit-br-if-tc7 asm slot invert? tc7 label)))
+
+;; Keep in sync with tags.h. Part of Guile's ABI. Currently unused
+;; macro assemblers are commented out. See also
+;; *branching-primcall-arities* in (language cps primitives), the set of
+;; macro-instructions in assembly.scm, and
+;; disassembler.scm:code-annotation.
+;;
+;; FIXME: Define all tc7 values in Scheme in one place, derived from
+;; tags.h.
+(define-tc7-macro-assembler br-if-symbol 5)
+(define-tc7-macro-assembler br-if-variable 7)
+(define-tc7-macro-assembler br-if-vector 13)
+;(define-tc7-macro-assembler br-if-weak-vector 13)
+(define-tc7-macro-assembler br-if-string 21)
+;(define-tc7-macro-assembler br-if-heap-number 23)
+;(define-tc7-macro-assembler br-if-stringbuf 39)
+(define-tc7-macro-assembler br-if-bytevector 77)
+;(define-tc7-macro-assembler br-if-pointer 31)
+;(define-tc7-macro-assembler br-if-hashtable 29)
+;(define-tc7-macro-assembler br-if-fluid 37)
+;(define-tc7-macro-assembler br-if-dynamic-state 45)
+;(define-tc7-macro-assembler br-if-frame 47)
+;(define-tc7-macro-assembler br-if-objcode 53)
+;(define-tc7-macro-assembler br-if-vm 55)
+;(define-tc7-macro-assembler br-if-vm-cont 71)
+;(define-tc7-macro-assembler br-if-rtl-program 69)
+;(define-tc7-macro-assembler br-if-program 79)
+;(define-tc7-macro-assembler br-if-weak-set 85)
+;(define-tc7-macro-assembler br-if-weak-table 87)
+;(define-tc7-macro-assembler br-if-array 93)
+(define-tc7-macro-assembler br-if-bitvector 95)
+;(define-tc7-macro-assembler br-if-port 125)
+;(define-tc7-macro-assembler br-if-smob 127)
+
(define-macro-assembler (begin-program asm label properties)
(emit-label asm label)
(let ((meta (make-meta label properties (asm-start asm))))
(assert-match req ((? symbol?) ...) "list of symbols")
(assert-match opt ((? symbol?) ...) "list of symbols")
(assert-match rest (or #f (? symbol?)) "#f or symbol")
- (assert-match kw-indices (((? symbol?) . (? integer?)) ...)
- "alist of symbol -> integer")
+ (assert-match kw-indices (((? keyword?) . (? integer?)) ...)
+ "alist of keyword -> integer")
(assert-match allow-other-keys? (? boolean?) "boolean")
(assert-match nlocals (? integer?) "integer")
(assert-match alternate (or #f (? symbol?)) "#f or symbol")
(let* ((meta (car (asm-meta asm)))
(arity (make-arity req opt rest kw-indices allow-other-keys?
(asm-start asm) #f))
- (nreq (length req))
+ ;; The procedure itself is in slot 0, in the standard calling
+ ;; convention. For procedure prologues, nreq includes the
+ ;; procedure, so here we add 1.
+ (nreq (1+ (length req)))
(nopt (length opt))
(rest? (->bool rest)))
(set-meta-arities! meta (cons arity (meta-arities meta)))
(cond
(alternate
(emit-br-if-nargs-ne asm nreq alternate)
- (emit-reserve-locals asm nlocals))
+ (emit-alloc-frame asm nlocals))
((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
(emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
(else
(emit-assert-nargs-ee asm nreq)
- (emit-reserve-locals asm nlocals))))
+ (emit-alloc-frame asm nlocals))))
(define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
(if alternate
(emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
(else
(emit-assert-nargs-le asm (+ nreq nopt))))
- (emit-reserve-locals asm nlocals))
+ (emit-alloc-frame asm nlocals))
(define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
allow-other-keys? nlocals alternate)
(if alternate
- (emit-br-if-nargs-lt asm nreq alternate)
+ (begin
+ (emit-br-if-nargs-lt asm nreq alternate)
+ (unless rest?
+ (emit-br-if-npos-gt asm nreq (+ nreq nopt) alternate)))
(emit-assert-nargs-ge asm nreq))
(let ((ntotal (fold (lambda (kw ntotal)
(match kw
(pack-flags allow-other-keys? rest?)
(+ nreq nopt)
ntotal
- kw-indices)
- (emit-reserve-locals asm nlocals)))
+ (intern-constant asm kw-indices))
+ (emit-alloc-frame asm nlocals)))
(define-macro-assembler (label asm sym)
- (set-asm-labels! asm (acons sym (asm-start asm) (asm-labels asm))))
+ (hashq-set! (asm-labels asm) sym (asm-start asm)))
-(define-macro-assembler (cache-current-module! asm tmp scope)
- (let ((mod-label (intern-module-cache-cell asm scope)))
- (emit-current-module asm tmp)
- (emit-static-set! asm tmp mod-label 0)))
+(define-macro-assembler (source asm source)
+ (set-asm-sources! asm (acons (asm-start asm) source (asm-sources asm))))
-(define-macro-assembler (cached-toplevel-ref asm dst scope sym)
- (let ((sym-label (intern-non-immediate asm sym))
- (mod-label (intern-module-cache-cell asm scope))
- (cell-label (intern-cache-cell asm scope sym)))
- (emit-toplevel-ref asm dst cell-label mod-label sym-label)))
+(define-macro-assembler (cache-current-module! asm module scope)
+ (let ((mod-label (intern-module-cache-cell asm scope)))
+ (emit-static-set! asm module mod-label 0)))
-(define-macro-assembler (cached-toplevel-set! asm src scope sym)
+(define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
(let ((sym-label (intern-non-immediate asm sym))
(mod-label (intern-module-cache-cell asm scope))
(cell-label (intern-cache-cell asm scope sym)))
- (emit-toplevel-set! asm src cell-label mod-label sym-label)))
+ (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
-(define-macro-assembler (cached-module-ref asm dst module-name public? sym)
+(define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
(let* ((sym-label (intern-non-immediate asm sym))
(key (cons public? module-name))
(mod-name-label (intern-constant asm key))
(cell-label (intern-cache-cell asm key sym)))
- (emit-module-ref asm dst cell-label mod-name-label sym-label)))
-
-(define-macro-assembler (cached-module-set! asm src module-name public? sym)
- (let* ((sym-label (intern-non-immediate asm sym))
- (key (cons public? module-name))
- (mod-name-label (intern-non-immediate asm key))
- (cell-label (intern-cache-cell asm key sym)))
- (emit-module-set! asm src cell-label mod-name-label sym-label)))
+ (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
\f
(let ((label (gensym "init-constants")))
(emit-text asm
`((begin-program ,label ())
- (assert-nargs-ee/locals 0 1)
+ (assert-nargs-ee/locals 1 1)
,@(reverse inits)
- (load-constant 0 ,*unspecified*)
- (return 0)
+ (load-constant 1 ,*unspecified*)
+ (return 1)
(end-program)))
label))))
(modulo (- alignment (modulo address alignment)) alignment)))
(define tc7-vector 13)
- (define tc7-narrow-stringbuf 39)
- (define tc7-wide-stringbuf (+ 39 #x400))
+ (define stringbuf-shared-flag #x100)
+ (define stringbuf-wide-flag #x400)
+ (define tc7-stringbuf 39)
+ (define tc7-narrow-stringbuf
+ (+ tc7-stringbuf stringbuf-shared-flag))
+ (define tc7-wide-stringbuf
+ (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
(define tc7-ro-string (+ 21 #x200))
(define tc7-rtl-program 69)
+ (define tc7-bytevector 77)
+ (define tc7-bitvector 95)
(let ((word-size (asm-word-size asm))
(endianness (asm-endianness asm)))
(* 4 word-size))
((pair? x)
(* 2 word-size))
- ((vector? x)
+ ((simple-vector? x)
(* (1+ (vector-length x)) word-size))
+ ((simple-uniform-vector? x)
+ (* 4 word-size))
+ ((uniform-vector-backing-store? x)
+ (bytevector-length (uniform-vector-backing-store-bytes x)))
(else
word-size)))
(write-constant-reference buf pos (car obj))
(write-constant-reference buf (+ pos word-size) (cdr obj)))
- ((vector? obj)
+ ((simple-vector? obj)
(let* ((len (vector-length obj))
(tag (logior tc7-vector (ash len 8))))
(case word-size
((number? obj)
(write-immediate asm buf pos #f))
+ ((simple-uniform-vector? obj)
+ (let ((tag (if (bitvector? obj)
+ tc7-bitvector
+ (let ((type-code (uniform-vector-element-type-code obj)))
+ (logior tc7-bytevector (ash type-code 7))))))
+ (case word-size
+ ((4)
+ (bytevector-u32-set! buf pos tag endianness)
+ (bytevector-u32-set! buf (+ pos 4)
+ (if (bitvector? obj)
+ (bitvector-length obj)
+ (bytevector-length obj))
+ endianness) ; length
+ (bytevector-u32-set! buf (+ pos 8) 0 endianness) ; pointer
+ (write-immediate asm buf (+ pos 12) #f)) ; owner
+ ((8)
+ (bytevector-u64-set! buf pos tag endianness)
+ (bytevector-u64-set! buf (+ pos 8)
+ (if (bitvector? obj)
+ (bitvector-length obj)
+ (bytevector-length obj))
+ endianness) ; length
+ (bytevector-u64-set! buf (+ pos 16) 0 endianness) ; pointer
+ (write-immediate asm buf (+ pos 24) #f)) ; owner
+ (else (error "bad word size")))))
+
+ ((uniform-vector-backing-store? obj)
+ (let ((bv (uniform-vector-backing-store-bytes obj)))
+ (bytevector-copy! bv 0 buf pos (bytevector-length bv))
+ (unless (or (= 1 (uniform-vector-backing-store-element-size obj))
+ (eq? endianness (native-endianness)))
+ ;; Need to swap units of element-size bytes
+ (error "FIXME: Implement byte order swap"))))
+
(else
(error "unrecognized object" obj))))
(+ (byte-length k) (align len 8)))
0 data))
(buf (make-bytevector byte-len 0)))
- (let lp ((i 0) (pos 0) (labels '()))
+ (let lp ((i 0) (pos 0) (symbols '()))
(if (< i (vlist-length data))
(let* ((pair (vlist-ref data i))
(obj (car pair))
(write buf pos obj)
(lp (1+ i)
(align (+ (byte-length obj) pos) 8)
- (cons (make-linker-symbol obj-label pos) labels)))
- (make-object asm name buf '() labels))))))))
+ (cons (make-linker-symbol obj-label pos) symbols)))
+ (make-object asm name buf '() symbols
+ #:flags (match name
+ ('.data (logior SHF_ALLOC SHF_WRITE))
+ ('.rodata SHF_ALLOC))))))))))
(define (link-constants asm)
"Link sections to hold constants needed by the program text emitted
((stringbuf? x) #t)
((pair? x)
(and (immediate? (car x)) (immediate? (cdr x))))
- ((vector? x)
+ ((simple-vector? x)
(let lp ((i 0))
(or (= i (vector-length x))
(and (immediate? (vector-ref x i))
(lp (1+ i))))))
+ ((uniform-vector-backing-store? x) #t)
(else #f)))
(let* ((constants (asm-constants asm))
(len (vlist-length constants)))
(lambda (reloc tail)
(match reloc
((type label base word)
- (let ((abs (assq-ref labels label))
+ (let ((abs (hashq-ref labels label))
(dst (+ base word)))
(case type
((s32)
relocs))
(define (process-labels labels)
- "Define linker symbols for the label-offset pairs in @var{labels}.
+ "Define linker symbols for the label-offset map in @var{labels}.
The offsets are expected to be expressed in words."
- (map (lambda (pair)
- (make-linker-symbol (car pair) (* (cdr pair) 4)))
- labels))
+ (hash-map->list (lambda (label loc)
+ (make-linker-symbol label (* loc 4)))
+ labels))
(define (swap-bytes! buf)
"Patch up the text buffer @var{buf}, swapping the endianness of each
(define (write-arity-headers metas bv endianness)
(define (write-arity-header* pos low-pc high-pc flags nreq nopt)
- (bytevector-u32-set! bv pos low-pc endianness)
- (bytevector-u32-set! bv (+ pos 4) high-pc endianness)
+ (bytevector-u32-set! bv pos (* low-pc 4) endianness)
+ (bytevector-u32-set! bv (+ pos 4) (* high-pc 4) endianness)
(bytevector-u32-set! bv (+ pos 8) 0 endianness) ; offset
(bytevector-u32-set! bv (+ pos 12) flags endianness)
(bytevector-u32-set! bv (+ pos 16) nreq endianness)
(linker-object-section strtab)))
strtab)))))
+;;;
+;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
+;;; values. Pc and str are both 32 bits wide. (Either could change to
+;;; 64 bits if appropriate in the future.) Pc is the address of the
+;;; entry to a program, relative to the start of the text section, in
+;;; bytes, and str is an index into the associated .guile.docstrs.strtab
+;;; string table section.
+;;;
+
+;; The size of a docstrs entry, in bytes.
+(define docstr-size 8)
+
+(define (link-docstrs asm)
+ (define (find-docstrings)
+ (filter-map (lambda (meta)
+ (define (is-documentation? pair)
+ (eq? (car pair) 'documentation))
+ (let* ((props (meta-properties meta))
+ (tail (find-tail is-documentation? props)))
+ (and tail
+ (not (find-tail is-documentation? (cdr tail)))
+ (string? (cdar tail))
+ (cons (* 4 (meta-low-pc meta)) (cdar tail)))))
+ (reverse (asm-meta asm))))
+ (let* ((endianness (asm-endianness asm))
+ (docstrings (find-docstrings))
+ (strtab (make-string-table))
+ (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
+ (fold (lambda (pair pos)
+ (match pair
+ ((pc . string)
+ (bytevector-u32-set! bv pos pc endianness)
+ (bytevector-u32-set! bv (+ pos 4)
+ (string-table-intern! strtab string)
+ endianness)
+ (+ pos docstr-size))))
+ 0
+ docstrings)
+ (let ((strtab (make-object asm '.guile.docstrs.strtab
+ (link-string-table! strtab)
+ '() '()
+ #:type SHT_STRTAB #:flags 0)))
+ (values (make-object asm '.guile.docstrs
+ bv
+ '() '()
+ #:type SHT_PROGBITS #:flags 0
+ #:link (elf-section-index
+ (linker-object-section strtab)))
+ strtab))))
+
+;;;
+;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
+;;; values. Pc and addr are both 32 bits wide. (Either could change to
+;;; 64 bits if appropriate in the future.) Pc is the address of the
+;;; entry to a program, relative to the start of the text section, and
+;;; addr is the address of the associated properties alist, relative to
+;;; the start of the ELF image.
+;;;
+;;; Since procedure properties are stored in the data sections, we need
+;;; to link the procedures property section first. (Note that this
+;;; constraint does not apply to the arities section, which may
+;;; reference the data sections via the kw-indices literal, because
+;;; assembling the text section already makes sure that the kw-indices
+;;; are interned.)
+;;;
+
+;; The size of a procprops entry, in bytes.
+(define procprops-size 8)
+
+(define (link-procprops asm)
+ (define (assoc-remove-one alist key value-pred)
+ (match alist
+ (() '())
+ ((((? (lambda (x) (eq? x key))) . value) . alist)
+ (if (value-pred value)
+ alist
+ (acons key value alist)))
+ (((k . v) . alist)
+ (acons k v (assoc-remove-one alist key value-pred)))))
+ (define (props-without-name-or-docstring meta)
+ (assoc-remove-one
+ (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
+ 'documentation
+ string?))
+ (define (find-procprops)
+ (filter-map (lambda (meta)
+ (let ((props (props-without-name-or-docstring meta)))
+ (and (pair? props)
+ (cons (meta-low-pc meta) props))))
+ (reverse (asm-meta asm))))
+ (let* ((endianness (asm-endianness asm))
+ (procprops (find-procprops))
+ (bv (make-bytevector (* (length procprops) procprops-size) 0)))
+ (let lp ((procprops procprops) (pos 0) (relocs '()))
+ (match procprops
+ (()
+ (make-object asm '.guile.procprops
+ bv
+ relocs '()
+ #:type SHT_PROGBITS #:flags 0))
+ (((pc . props) . procprops)
+ (bytevector-u32-set! bv pos pc endianness)
+ (lp procprops
+ (+ pos procprops-size)
+ (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
+ (intern-constant asm props))
+ relocs)))))))
+
+;;;
+;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc
+;;; sections provide line number and local variable liveness
+;;; information. Their format is defined by the DWARF
+;;; specifications.
+;;;
+
+(define (asm-language asm)
+ ;; FIXME: Plumb language through to the assembler.
+ 'scheme)
+
+;; -> 5 values: .debug_info, .debug_abbrev, .debug_str, .debug_loc, .debug_lines
+(define (link-debug asm)
+ (define (put-s8 port val)
+ (let ((bv (make-bytevector 1)))
+ (bytevector-s8-set! bv 0 val)
+ (put-bytevector port bv)))
+
+ (define (put-u16 port val)
+ (let ((bv (make-bytevector 2)))
+ (bytevector-u16-set! bv 0 val (asm-endianness asm))
+ (put-bytevector port bv)))
+
+ (define (put-u32 port val)
+ (let ((bv (make-bytevector 4)))
+ (bytevector-u32-set! bv 0 val (asm-endianness asm))
+ (put-bytevector port bv)))
+
+ (define (put-u64 port val)
+ (let ((bv (make-bytevector 8)))
+ (bytevector-u64-set! bv 0 val (asm-endianness asm))
+ (put-bytevector port bv)))
+
+ (define (put-uleb128 port val)
+ (let lp ((val val))
+ (let ((next (ash val -7)))
+ (if (zero? next)
+ (put-u8 port val)
+ (begin
+ (put-u8 port (logior #x80 (logand val #x7f)))
+ (lp next))))))
+
+ (define (put-sleb128 port val)
+ (let lp ((val val))
+ (if (<= 0 (+ val 64) 127)
+ (put-u8 port (logand val #x7f))
+ (begin
+ (put-u8 port (logior #x80 (logand val #x7f)))
+ (lp (ash val -7))))))
+
+ (define (port-position port)
+ (seek port 0 SEEK_CUR))
+
+ (define (meta->subprogram-die meta)
+ `(subprogram
+ (@ ,@(cond
+ ((meta-name meta)
+ => (lambda (name) `((name ,(symbol->string name)))))
+ (else
+ '()))
+ (low-pc ,(meta-label meta))
+ (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta)))))))
+
+ (define (make-compile-unit-die asm)
+ `(compile-unit
+ (@ (producer ,(string-append "Guile " (version)))
+ (language ,(asm-language asm))
+ (low-pc .rtl-text)
+ (high-pc ,(* 4 (asm-pos asm)))
+ (stmt-list 0))
+ ,@(map meta->subprogram-die (reverse (asm-meta asm)))))
+
+ (let-values (((die-port get-die-bv) (open-bytevector-output-port))
+ ((die-relocs) '())
+ ((abbrev-port get-abbrev-bv) (open-bytevector-output-port))
+ ;; (tag has-kids? attrs forms) -> code
+ ((abbrevs) vlist-null)
+ ((strtab) (make-string-table))
+ ((line-port get-line-bv) (open-bytevector-output-port))
+ ((line-relocs) '())
+ ;; file -> code
+ ((files) vlist-null))
+
+ (define (write-abbrev code tag has-children? attrs forms)
+ (put-uleb128 abbrev-port code)
+ (put-uleb128 abbrev-port (tag-name->code tag))
+ (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no)))
+ (for-each (lambda (attr form)
+ (put-uleb128 abbrev-port (attribute-name->code attr))
+ (put-uleb128 abbrev-port (form-name->code form)))
+ attrs forms)
+ (put-uleb128 abbrev-port 0)
+ (put-uleb128 abbrev-port 0))
+
+ (define (intern-abbrev tag has-children? attrs forms)
+ (let ((key (list tag has-children? attrs forms)))
+ (match (vhash-assoc key abbrevs)
+ ((_ . code) code)
+ (#f (let ((code (1+ (vlist-length abbrevs))))
+ (set! abbrevs (vhash-cons key code abbrevs))
+ (write-abbrev code tag has-children? attrs forms)
+ code)))))
+
+ (define (intern-file file)
+ (match (vhash-assoc file files)
+ ((_ . code) code)
+ (#f (let ((code (1+ (vlist-length files))))
+ (set! files (vhash-cons file code files))
+ code))))
+
+ (define (write-sources)
+ ;; Choose line base and line range values that will allow for an
+ ;; address advance range of 16 words. The special opcode range is
+ ;; from 10 to 255, so 246 values.
+ (define base -4)
+ (define range 15)
+
+ (let lp ((sources (asm-sources asm)) (out '()))
+ (match sources
+ (((pc . s) . sources)
+ (let ((file (assq-ref s 'filename))
+ (line (assq-ref s 'line))
+ (col (assq-ref s 'column)))
+ (lp sources
+ ;; Guile line and column numbers are 0-indexed, but
+ ;; they are 1-indexed for DWARF.
+ (cons (list pc
+ (if file (intern-file file) 0)
+ (if line (1+ line))
+ (if col (1+ col)))
+ out))))
+ (()
+ ;; Compilation unit header for .debug_line. We write in
+ ;; DWARF 2 format because more tools understand it than DWARF
+ ;; 4, which incompatibly adds another field to this header.
+
+ (put-u32 line-port 0) ; Length; will patch later.
+ (put-u16 line-port 2) ; DWARF 2 format.
+ (put-u32 line-port 0) ; Prologue length; will patch later.
+ (put-u8 line-port 4) ; Minimum instruction length: 4 bytes.
+ (put-u8 line-port 1) ; Default is-stmt: true.
+
+ (put-s8 line-port base) ; Line base. See the DWARF standard.
+ (put-u8 line-port range) ; Line range. See the DWARF standard.
+ (put-u8 line-port 10) ; Opcode base: the first "special" opcode.
+
+ ;; A table of the number of uleb128 arguments taken by each
+ ;; of the standard opcodes.
+ (put-u8 line-port 0) ; 1: copy
+ (put-u8 line-port 1) ; 2: advance-pc
+ (put-u8 line-port 1) ; 3: advance-line
+ (put-u8 line-port 1) ; 4: set-file
+ (put-u8 line-port 1) ; 5: set-column
+ (put-u8 line-port 0) ; 6: negate-stmt
+ (put-u8 line-port 0) ; 7: set-basic-block
+ (put-u8 line-port 0) ; 8: const-add-pc
+ (put-u8 line-port 1) ; 9: fixed-advance-pc
+
+ ;; Include directories, as a zero-terminated sequence of
+ ;; nul-terminated strings. Nothing, for the moment.
+ (put-u8 line-port 0)
+
+ ;; File table. For each file that contributes to this
+ ;; compilation unit, a nul-terminated file name string, and a
+ ;; uleb128 for each of directory the file was found in, the
+ ;; modification time, and the file's size in bytes. We pass
+ ;; zero for the latter three fields.
+ (vlist-fold-right
+ (lambda (pair seed)
+ (match pair
+ ((file . code)
+ (put-bytevector line-port (string->utf8 file))
+ (put-u8 line-port 0)
+ (put-uleb128 line-port 0) ; directory
+ (put-uleb128 line-port 0) ; mtime
+ (put-uleb128 line-port 0))) ; size
+ seed)
+ #f
+ files)
+ (put-u8 line-port 0) ; 0 byte terminating file list.
+
+ ;; Patch prologue length.
+ (let ((offset (port-position line-port)))
+ (seek line-port 6 SEEK_SET)
+ (put-u32 line-port (- offset 10))
+ (seek line-port offset SEEK_SET))
+
+ ;; Now write the statement program.
+ (let ()
+ (define (extended-op opcode payload-len)
+ (put-u8 line-port 0) ; extended op
+ (put-uleb128 line-port (1+ payload-len)) ; payload-len + opcode
+ (put-uleb128 line-port opcode))
+ (define (set-address sym)
+ (define (add-reloc! kind)
+ (set! line-relocs
+ (cons (make-linker-reloc kind
+ (port-position line-port)
+ 0
+ sym)
+ line-relocs)))
+ (match (asm-word-size asm)
+ (4
+ (extended-op 2 4)
+ (add-reloc! 'abs32/1)
+ (put-u32 line-port 0))
+ (8
+ (extended-op 2 8)
+ (add-reloc! 'abs64/1)
+ (put-u64 line-port 0))))
+ (define (end-sequence pc)
+ (let ((pc-inc (- (asm-pos asm) pc)))
+ (put-u8 line-port 2) ; advance-pc
+ (put-uleb128 line-port pc-inc))
+ (extended-op 1 0))
+ (define (advance-pc pc-inc line-inc)
+ (let ((spec (+ (- line-inc base) (* pc-inc range) 10)))
+ (cond
+ ((or (< line-inc base) (>= line-inc (+ base range)))
+ (advance-line line-inc)
+ (advance-pc pc-inc 0))
+ ((<= spec 255)
+ (put-u8 line-port spec))
+ ((< spec 500)
+ (put-u8 line-port 8) ; const-advance-pc
+ (advance-pc (- pc-inc (floor/ (- 255 10) range))
+ line-inc))
+ (else
+ (put-u8 line-port 2) ; advance-pc
+ (put-uleb128 line-port pc-inc)
+ (advance-pc 0 line-inc)))))
+ (define (advance-line inc)
+ (put-u8 line-port 3)
+ (put-sleb128 line-port inc))
+ (define (set-file file)
+ (put-u8 line-port 4)
+ (put-uleb128 line-port file))
+ (define (set-column col)
+ (put-u8 line-port 5)
+ (put-uleb128 line-port col))
+
+ (set-address '.rtl-text)
+
+ (let lp ((in out) (pc 0) (file 1) (line 1) (col 0))
+ (match in
+ (()
+ (when (null? out)
+ ;; There was no source info in the first place. Set
+ ;; file register to 0 before adding final row.
+ (set-file 0))
+ (end-sequence pc))
+ (((pc* file* line* col*) . in*)
+ (cond
+ ((and (eqv? file file*) (eqv? line line*) (eqv? col col*))
+ (lp in* pc file line col))
+ (else
+ (unless (eqv? col col*)
+ (set-column col*))
+ (unless (eqv? file file*)
+ (set-file file*))
+ (advance-pc (- pc* pc) (- line* line))
+ (lp in* pc* file* line* col*)))))))))))
+
+ (define (compute-code attr val)
+ (match attr
+ ('name (string-table-intern! strtab val))
+ ('low-pc val)
+ ('high-pc val)
+ ('producer (string-table-intern! strtab val))
+ ('language (language-name->code val))
+ ('stmt-list val)))
+
+ (define (exact-integer? val)
+ (and (number? val) (integer? val) (exact? val)))
+
+ (define (choose-form attr val code)
+ (cond
+ ((string? val) 'strp)
+ ((eq? attr 'stmt-list) 'sec-offset)
+ ((exact-integer? code)
+ (cond
+ ((< code 0) 'sleb128)
+ ((<= code #xff) 'data1)
+ ((<= code #xffff) 'data2)
+ ((<= code #xffffffff) 'data4)
+ ((<= code #xffffffffffffffff) 'data8)
+ (else 'uleb128)))
+ ((symbol? val) 'addr)
+ (else (error "unhandled case" attr val code))))
+
+ (define (add-die-relocation! kind sym)
+ (set! die-relocs
+ (cons (make-linker-reloc kind (port-position die-port) 0 sym)
+ die-relocs)))
+
+ (define (write-value code form)
+ (match form
+ ('data1 (put-u8 die-port code))
+ ('data2 (put-u16 die-port code))
+ ('data4 (put-u32 die-port code))
+ ('data8 (put-u64 die-port code))
+ ('uleb128 (put-uleb128 die-port code))
+ ('sleb128 (put-sleb128 die-port code))
+ ('addr
+ (match (asm-word-size asm)
+ (4
+ (add-die-relocation! 'abs32/1 code)
+ (put-u32 die-port 0))
+ (8
+ (add-die-relocation! 'abs64/1 code)
+ (put-u64 die-port 0))))
+ ('sec-offset (put-u32 die-port code))
+ ('strp (put-u32 die-port code))))
+
+ (define (write-die die)
+ (match die
+ ((tag ('@ (attrs vals) ...) children ...)
+ (let* ((codes (map compute-code attrs vals))
+ (forms (map choose-form attrs vals codes))
+ (has-children? (not (null? children)))
+ (abbrev-code (intern-abbrev tag has-children? attrs forms)))
+ (put-uleb128 die-port abbrev-code)
+ (for-each write-value codes forms)
+ (when has-children?
+ (for-each write-die children)
+ (put-uleb128 die-port 0))))))
+
+ ;; Compilation unit header.
+ (put-u32 die-port 0) ; Length; will patch later.
+ (put-u16 die-port 4) ; DWARF 4.
+ (put-u32 die-port 0) ; Abbrevs offset.
+ (put-u8 die-port (asm-word-size asm)) ; Address size.
+
+ (write-die (make-compile-unit-die asm))
+
+ ;; Terminate the abbrevs list.
+ (put-uleb128 abbrev-port 0)
+
+ (write-sources)
+
+ (values (let ((bv (get-die-bv)))
+ ;; Patch DWARF32 length.
+ (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
+ (asm-endianness asm))
+ (make-object asm '.debug_info bv die-relocs '()
+ #:type SHT_PROGBITS #:flags 0))
+ (make-object asm '.debug_abbrev (get-abbrev-bv) '() '()
+ #:type SHT_PROGBITS #:flags 0)
+ (make-object asm '.debug_str (link-string-table! strtab) '() '()
+ #:type SHT_PROGBITS #:flags 0)
+ (make-object asm '.debug_loc #vu8() '() '()
+ #:type SHT_PROGBITS #:flags 0)
+ (let ((bv (get-line-bv)))
+ ;; Patch DWARF32 length.
+ (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
+ (asm-endianness asm))
+ (make-object asm '.debug_line bv line-relocs '()
+ #:type SHT_PROGBITS #:flags 0)))))
+
(define (link-objects asm)
- (let*-values (((ro rw rw-init) (link-constants asm))
+ (let*-values (;; Link procprops before constants, because it probably
+ ;; interns more constants.
+ ((procprops) (link-procprops asm))
+ ((ro rw rw-init) (link-constants asm))
;; Link text object after constants, so that the
;; constants initializer gets included.
((text) (link-text-object asm))
((dt) (link-dynamic-section asm text rw rw-init))
((symtab strtab) (link-symtab (linker-object-section text) asm))
((arities arities-strtab) (link-arities asm))
+ ((docstrs docstrs-strtab) (link-docstrs asm))
+ ((dinfo dabbrev dstrtab dloc dline) (link-debug asm))
;; This needs to be linked last, because linking other
;; sections adds entries to the string table.
((shstrtab) (link-shstrtab asm)))
(filter identity
(list text ro rw dt symtab strtab arities arities-strtab
+ docstrs docstrs-strtab procprops
+ dinfo dabbrev dstrtab dloc dline
shstrtab))))
writable data are on separate pages. Pass @code{#:page-aligned? #f} to
disable this behavior."
(link-elf (link-objects asm) #:page-aligned? page-aligned?))
-
-(define (assemble-program instructions)
- "Take the sequence of instructions @var{instructions}, assemble them
-into RTL code, link an image, and load that image from memory. Returns
-a procedure."
- (let ((asm (make-assembler)))
- (emit-text asm instructions)
- (load-thunk-from-memory (link-assembly asm #:page-aligned? #f))))