Operations on 8-bit and 12-bit operands shuffle args into range
[bpt/guile.git] / module / system / vm / assembler.scm
CommitLineData
691697de 1;;; Guile bytecode assembler
e78991aa 2
02c624fc 3;;; Copyright (C) 2001, 2009, 2010, 2012, 2013, 2014 Free Software Foundation, Inc.
e78991aa
AW
4;;;
5;;; This library is free software; you can redistribute it and/or
6;;; modify it under the terms of the GNU Lesser General Public
7;;; License as published by the Free Software Foundation; either
8;;; version 3 of the License, or (at your option) any later version.
9;;;
10;;; This library is distributed in the hope that it will be useful,
11;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13;;; Lesser General Public License for more details.
14;;;
15;;; You should have received a copy of the GNU Lesser General Public
16;;; License along with this library; if not, write to the Free Software
17;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19;;; Commentary:
20;;;
21;;; This module implements an assembler that creates an ELF image from
691697de 22;;; bytecode assembly and macro-assembly. The input can be given in
e78991aa
AW
23;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
24;;; procedural interface, the emit-OP procedures, but that is not
25;;; currently exported.
26;;;
691697de
AW
27;;; "Primitive instructions" correspond to VM operations. Assemblers
28;;; for primitive instructions are generated programmatically from
29;;; (instruction-list), which itself is derived from the VM sources.
30;;; There are also "macro-instructions" like "label" or "load-constant"
31;;; that expand to 0 or more primitive instructions.
e78991aa
AW
32;;;
33;;; The assembler also handles some higher-level tasks, like creating
34;;; the symbol table, other metadata sections, creating a constant table
35;;; for the whole compilation unit, and writing the dynamic section of
36;;; the ELF file along with the appropriate initialization routines.
37;;;
38;;; Most compilers will want to use the trio of make-assembler,
39;;; emit-text, and link-assembly. That will result in the creation of
40;;; an ELF image as a bytevector, which can then be loaded using
41;;; load-thunk-from-memory, or written to disk as a .go file.
42;;;
43;;; Code:
44
45(define-module (system vm assembler)
46 #:use-module (system base target)
a862d8c1 47 #:use-module (system vm dwarf)
e78991aa
AW
48 #:use-module (system vm elf)
49 #:use-module (system vm linker)
691697de 50 #:use-module (language bytecode)
e78991aa 51 #:use-module (rnrs bytevectors)
a862d8c1 52 #:use-module (ice-9 binary-ports)
e78991aa
AW
53 #:use-module (ice-9 vlist)
54 #:use-module (ice-9 match)
55 #:use-module (srfi srfi-1)
56 #:use-module (srfi srfi-4)
57 #:use-module (srfi srfi-9)
58 #:use-module (srfi srfi-11)
59 #:export (make-assembler
d4b3a36d
AW
60
61 emit-call
62 emit-call-label
63 emit-tail-call
64 emit-tail-call-label
65 (emit-receive* . emit-receive)
66 emit-receive-values
67 emit-return
68 emit-return-values
69 emit-call/cc
70 emit-abort
71 (emit-builtin-ref* . emit-builtin-ref)
72 emit-br-if-nargs-ne
73 emit-br-if-nargs-lt
74 emit-br-if-nargs-gt
75 emit-assert-nargs-ee
76 emit-assert-nargs-ge
77 emit-assert-nargs-le
78 emit-alloc-frame
79 emit-reset-frame
80 emit-assert-nargs-ee/locals
81 emit-br-if-npos-gt
82 emit-bind-kwargs
83 emit-bind-rest
84 emit-br
85 emit-br-if-true
86 emit-br-if-null
87 emit-br-if-nil
88 emit-br-if-pair
89 emit-br-if-struct
90 emit-br-if-char
91 emit-br-if-tc7
92 (emit-br-if-eq* . emit-br-if-eq)
93 (emit-br-if-eqv* . emit-br-if-eqv)
94 (emit-br-if-equal* . emit-br-if-equal)
95 (emit-br-if-=* . emit-br-if-=)
96 (emit-br-if-<* . emit-br-if-<)
97 (emit-br-if-<=* . emit-br-if-<=)
98 (emit-mov* . emit-mov)
99 (emit-box* . emit-box)
100 (emit-box-ref* . emit-box-ref)
101 (emit-box-set!* . emit-box-set!)
102 emit-make-closure
103 (emit-free-ref* . emit-free-ref)
104 (emit-free-set!* . emit-free-set!)
105 emit-current-module
106 emit-resolve
107 (emit-define!* . emit-define!)
108 emit-toplevel-box
109 emit-module-box
110 emit-prompt
111 (emit-wind* . emit-wind)
112 emit-unwind
113 (emit-push-fluid* . emit-push-fluid)
114 emit-pop-fluid
115 (emit-fluid-ref* . emit-fluid-ref)
116 (emit-fluid-set* . emit-fluid-set)
117 (emit-string-length* . emit-string-length)
118 (emit-string-ref* . emit-string-ref)
119 (emit-string->number* . emit-string->number)
120 (emit-string->symbol* . emit-string->symbol)
121 (emit-symbol->keyword* . emit-symbol->keyword)
122 (emit-cons* . emit-cons)
123 (emit-car* . emit-car)
124 (emit-cdr* . emit-cdr)
125 (emit-set-car!* . emit-set-car!)
126 (emit-set-cdr!* . emit-set-cdr!)
127 (emit-add* . emit-add)
128 (emit-add1* . emit-add1)
129 (emit-sub* . emit-sub)
130 (emit-sub1* . emit-sub1)
131 (emit-mul* . emit-mul)
132 (emit-div* . emit-div)
133 (emit-quo* . emit-quo)
134 (emit-rem* . emit-rem)
135 (emit-mod* . emit-mod)
136 (emit-ash* . emit-ash)
137 (emit-logand* . emit-logand)
138 (emit-logior* . emit-logior)
139 (emit-logxor* . emit-logxor)
140 (emit-make-vector/immediate* . emit-make-vector/immediate)
141 (emit-vector-length* . emit-vector-length)
142 (emit-vector-ref* . emit-vector-ref)
143 (emit-vector-ref/immediate* . emit-vector-ref/immediate)
144 (emit-vector-set!* . emit-vector-set!)
145 (emit-vector-set!/immediate* . emit-vector-set!/immediate)
146 (emit-struct-vtable* . emit-struct-vtable)
147 (emit-allocate-struct/immediate* . emit-allocate-struct/immediate)
148 (emit-struct-ref/immediate* . emit-struct-ref/immediate)
149 (emit-struct-set!/immediate* . emit-struct-set!/immediate)
150 (emit-class-of* . emit-class-of)
151 (emit-make-array* . emit-make-array)
152 (emit-bv-u8-ref* . emit-bv-u8-ref)
153 (emit-bv-s8-ref* . emit-bv-s8-ref)
154 (emit-bv-u16-ref* . emit-bv-u16-ref)
155 (emit-bv-s16-ref* . emit-bv-s16-ref)
156 (emit-bv-u32-ref* . emit-bv-u32-ref)
157 (emit-bv-s32-ref* . emit-bv-s32-ref)
158 (emit-bv-u64-ref* . emit-bv-u64-ref)
159 (emit-bv-s64-ref* . emit-bv-s64-ref)
160 (emit-bv-f32-ref* . emit-bv-f32-ref)
161 (emit-bv-f64-ref* . emit-bv-f64-ref)
162 (emit-bv-u8-set!* . emit-bv-u8-set!)
163 (emit-bv-s8-set!* . emit-bv-s8-set!)
164 (emit-bv-u16-set!* . emit-bv-u16-set!)
165 (emit-bv-s16-set!* . emit-bv-s16-set!)
166 (emit-bv-u32-set!* . emit-bv-u32-set!)
167 (emit-bv-s32-set!* . emit-bv-s32-set!)
168 (emit-bv-u64-set!* . emit-bv-u64-set!)
169 (emit-bv-s64-set!* . emit-bv-s64-set!)
170 (emit-bv-f32-set!* . emit-bv-f32-set!)
171 (emit-bv-f64-set!* . emit-bv-f64-set!)
172
e78991aa 173 emit-text
4dfae1bf 174 link-assembly))
e78991aa
AW
175
176
177\f
178
dece0412
AW
179;; Like define-inlinable, but only for first-order uses of the defined
180;; routine. Should residualize less code.
28e12ea0
AW
181(eval-when (expand)
182 (define-syntax define-inline
183 (lambda (x)
184 (syntax-case x ()
185 ((_ (name arg ...) body ...)
186 (with-syntax (((temp ...) (generate-temporaries #'(arg ...))))
187 #`(eval-when (expand)
188 (define-syntax-rule (name temp ...)
189 (let ((arg temp) ...)
190 body ...)))))))))
dece0412 191
691697de 192;;; Bytecode consists of 32-bit units, often subdivided in some way.
e78991aa
AW
193;;; These helpers create one 32-bit unit from multiple components.
194
dece0412 195(define-inline (pack-u8-u24 x y)
cb8054c7
AW
196 (unless (<= 0 x 255)
197 (error "out of range" x))
e78991aa
AW
198 (logior x (ash y 8)))
199
dece0412 200(define-inline (pack-u8-s24 x y)
cb8054c7
AW
201 (unless (<= 0 x 255)
202 (error "out of range" x))
e78991aa
AW
203 (logior x (ash (cond
204 ((< 0 (- y) #x800000)
205 (+ y #x1000000))
206 ((<= 0 y #xffffff)
207 y)
208 (else (error "out of range" y)))
209 8)))
210
dece0412 211(define-inline (pack-u1-u7-u24 x y z)
cb8054c7
AW
212 (unless (<= 0 x 1)
213 (error "out of range" x))
214 (unless (<= 0 y 127)
215 (error "out of range" y))
e78991aa
AW
216 (logior x (ash y 1) (ash z 8)))
217
dece0412 218(define-inline (pack-u8-u12-u12 x y z)
cb8054c7
AW
219 (unless (<= 0 x 255)
220 (error "out of range" x))
221 (unless (<= 0 y 4095)
222 (error "out of range" y))
e78991aa
AW
223 (logior x (ash y 8) (ash z 20)))
224
dece0412 225(define-inline (pack-u8-u8-u16 x y z)
cb8054c7
AW
226 (unless (<= 0 x 255)
227 (error "out of range" x))
228 (unless (<= 0 y 255)
229 (error "out of range" y))
e78991aa
AW
230 (logior x (ash y 8) (ash z 16)))
231
dece0412 232(define-inline (pack-u8-u8-u8-u8 x y z w)
cb8054c7
AW
233 (unless (<= 0 x 255)
234 (error "out of range" x))
235 (unless (<= 0 y 255)
236 (error "out of range" y))
237 (unless (<= 0 z 255)
238 (error "out of range" z))
e78991aa
AW
239 (logior x (ash y 8) (ash z 16) (ash w 24)))
240
28e12ea0
AW
241(eval-when (expand)
242 (define-syntax pack-flags
243 (syntax-rules ()
244 ;; Add clauses as needed.
245 ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
246 (if f2 (ash 2 0) 0))))))
07c05279 247
e78991aa
AW
248;;; Helpers to read and write 32-bit units in a buffer.
249
28e12ea0 250(define-inline (u32-ref buf n)
e78991aa
AW
251 (bytevector-u32-native-ref buf (* n 4)))
252
28e12ea0 253(define-inline (u32-set! buf n val)
e78991aa
AW
254 (bytevector-u32-native-set! buf (* n 4) val))
255
28e12ea0 256(define-inline (s32-ref buf n)
e78991aa
AW
257 (bytevector-s32-native-ref buf (* n 4)))
258
28e12ea0 259(define-inline (s32-set! buf n val)
e78991aa
AW
260 (bytevector-s32-native-set! buf (* n 4) val))
261
262
263\f
264
265;;; A <meta> entry collects metadata for one procedure. Procedures are
691697de 266;;; written as contiguous ranges of bytecode.
e78991aa 267;;;
28e12ea0
AW
268(eval-when (expand)
269 (define-syntax-rule (assert-match arg pattern kind)
270 (let ((x arg))
271 (unless (match x (pattern #t) (_ #f))
272 (error (string-append "expected " kind) x)))))
2a4daafd 273
e78991aa 274(define-record-type <meta>
3185c907 275 (%make-meta label properties low-pc high-pc arities)
e78991aa 276 meta?
2a4daafd
AW
277 (label meta-label)
278 (properties meta-properties set-meta-properties!)
e78991aa 279 (low-pc meta-low-pc)
3185c907
AW
280 (high-pc meta-high-pc set-meta-high-pc!)
281 (arities meta-arities set-meta-arities!))
e78991aa 282
2a4daafd 283(define (make-meta label properties low-pc)
9a1dfb7d 284 (assert-match label (or (? exact-integer?) (? symbol?)) "symbol")
2a4daafd 285 (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
3185c907 286 (%make-meta label properties low-pc #f '()))
2a4daafd
AW
287
288(define (meta-name meta)
289 (assq-ref (meta-properties meta) 'name))
290
3185c907
AW
291;; Metadata for one <lambda-case>.
292(define-record-type <arity>
293 (make-arity req opt rest kw-indices allow-other-keys?
78351d10 294 low-pc high-pc definitions)
3185c907
AW
295 arity?
296 (req arity-req)
297 (opt arity-opt)
298 (rest arity-rest)
299 (kw-indices arity-kw-indices)
300 (allow-other-keys? arity-allow-other-keys?)
301 (low-pc arity-low-pc)
78351d10
AW
302 (high-pc arity-high-pc set-arity-high-pc!)
303 (definitions arity-definitions set-arity-definitions!))
3185c907 304
28e12ea0
AW
305(eval-when (expand)
306 (define-syntax *block-size* (identifier-syntax 32)))
e78991aa
AW
307
308;;; An assembler collects all of the words emitted during assembly, and
309;;; also maintains ancillary information such as the constant table, a
310;;; relocation list, and so on.
311;;;
691697de 312;;; Bytecode consists of 32-bit units. We emit bytecode using native
e78991aa
AW
313;;; endianness. If we're targeting a foreign endianness, we byte-swap
314;;; the bytevector as a whole instead of conditionalizing each access.
315;;;
316(define-record-type <asm>
317 (make-asm cur idx start prev written
318 labels relocs
319 word-size endianness
320 constants inits
321 shstrtab next-section-number
02c624fc
AW
322 meta sources
323 dead-slot-maps)
e78991aa
AW
324 asm?
325
691697de 326 ;; We write bytecode into what is logically a growable vector,
e78991aa
AW
327 ;; implemented as a list of blocks. asm-cur is the current block, and
328 ;; asm-idx is the current index into that block, in 32-bit units.
329 ;;
330 (cur asm-cur set-asm-cur!)
331 (idx asm-idx set-asm-idx!)
332
333 ;; asm-start is an absolute position, indicating the offset of the
334 ;; beginning of an instruction (in u32 units). It is updated after
335 ;; writing all the words for one primitive instruction. It models the
336 ;; position of the instruction pointer during execution, given that
691697de
AW
337 ;; the VM updates the IP only at the end of executing the instruction,
338 ;; and is thus useful for computing offsets between two points in a
339 ;; program.
e78991aa
AW
340 ;;
341 (start asm-start set-asm-start!)
342
343 ;; The list of previously written blocks.
344 ;;
345 (prev asm-prev set-asm-prev!)
346
347 ;; The number of u32 words written in asm-prev, which is the same as
348 ;; the offset of the current block.
349 ;;
350 (written asm-written set-asm-written!)
351
352 ;; An alist of symbol -> position pairs, indicating the labels defined
353 ;; in this compilation unit.
354 ;;
355 (labels asm-labels set-asm-labels!)
356
357 ;; A list of relocations needed by the program text. We use an
358 ;; internal representation for relocations, and handle textualn
359 ;; relative relocations in the assembler. Other kinds of relocations
360 ;; are later reified as linker relocations and resolved by the linker.
361 ;;
362 (relocs asm-relocs set-asm-relocs!)
363
364 ;; Target information.
365 ;;
366 (word-size asm-word-size)
367 (endianness asm-endianness)
368
369 ;; The constant table, as a vhash of object -> label. All constants
370 ;; get de-duplicated and written into separate sections -- either the
371 ;; .rodata section, for read-only data, or .data, for constants that
372 ;; need initialization at load-time (like symbols). Constants can
373 ;; depend on other constants (e.g. a symbol depending on a stringbuf),
374 ;; so order in this table is important.
375 ;;
376 (constants asm-constants set-asm-constants!)
377
691697de
AW
378 ;; A list of instructions needed to initialize the constants. Will
379 ;; run in a thunk with 2 local variables.
e78991aa
AW
380 ;;
381 (inits asm-inits set-asm-inits!)
382
383 ;; The shstrtab, for section names.
384 ;;
385 (shstrtab asm-shstrtab set-asm-shstrtab!)
386
387 ;; The section number for the next section to be written.
388 ;;
389 (next-section-number asm-next-section-number set-asm-next-section-number!)
390
391 ;; A list of <meta>, corresponding to procedure metadata.
392 ;;
e675e9bd
AW
393 (meta asm-meta set-asm-meta!)
394
395 ;; A list of (pos . source) pairs, indicating source information. POS
396 ;; is relative to the beginning of the text section, and SOURCE is in
397 ;; the same format that source-properties returns.
398 ;;
02c624fc
AW
399 (sources asm-sources set-asm-sources!)
400
401 ;; A list of (pos . dead-slot-map) pairs, indicating dead slot maps.
402 ;; POS is relative to the beginning of the text section.
403 ;; DEAD-SLOT-MAP is a bitfield of slots that are dead at call sites,
404 ;; as an integer.
405 ;;
406 (dead-slot-maps asm-dead-slot-maps set-asm-dead-slot-maps!))
e78991aa 407
dece0412 408(define-inline (fresh-block)
e78991aa
AW
409 (make-u32vector *block-size*))
410
411(define* (make-assembler #:key (word-size (target-word-size))
412 (endianness (target-endianness)))
413 "Create an assembler for a given target @var{word-size} and
414@var{endianness}, falling back to appropriate values for the configured
415target."
416 (make-asm (fresh-block) 0 0 '() 0
3659ef54 417 (make-hash-table) '()
e78991aa
AW
418 word-size endianness
419 vlist-null '()
420 (make-string-table) 1
02c624fc 421 '() '() '()))
e78991aa
AW
422
423(define (intern-section-name! asm string)
424 "Add a string to the section name table (shstrtab)."
425 (string-table-intern! (asm-shstrtab asm) string))
426
dece0412 427(define-inline (asm-pos asm)
e78991aa
AW
428 "The offset of the next word to be written into the code buffer, in
42932-bit units."
430 (+ (asm-idx asm) (asm-written asm)))
431
432(define (allocate-new-block asm)
433 "Close off the current block, and arrange for the next word to be
434written to a fresh block."
435 (let ((new (fresh-block)))
436 (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
437 (set-asm-written! asm (asm-pos asm))
438 (set-asm-cur! asm new)
439 (set-asm-idx! asm 0)))
440
dece0412 441(define-inline (emit asm u32)
e78991aa
AW
442 "Emit one 32-bit word into the instruction stream. Assumes that there
443is space for the word, and ensures that there is space for the next
444word."
445 (u32-set! (asm-cur asm) (asm-idx asm) u32)
446 (set-asm-idx! asm (1+ (asm-idx asm)))
447 (if (= (asm-idx asm) *block-size*)
448 (allocate-new-block asm)))
449
dece0412 450(define-inline (make-reloc type label base word)
e78991aa
AW
451 "Make an internal relocation of type @var{type} referencing symbol
452@var{label}, @var{word} words after position @var{start}. @var{type}
453may be x8-s24, indicating a 24-bit relative label reference that can be
454fixed up by the assembler, or s32, indicating a 32-bit relative
455reference that needs to be fixed up by the linker."
456 (list type label base word))
457
dece0412 458(define-inline (reset-asm-start! asm)
e78991aa
AW
459 "Reset the asm-start after writing the words for one instruction."
460 (set-asm-start! asm (asm-pos asm)))
461
e78991aa
AW
462(define (record-label-reference asm label)
463 "Record an x8-s24 local label reference. This value will get patched
464up later by the assembler."
465 (let* ((start (asm-start asm))
466 (pos (asm-pos asm))
467 (reloc (make-reloc 'x8-s24 label start (- pos start))))
468 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
469
470(define* (record-far-label-reference asm label #:optional (offset 0))
471 "Record an s32 far label reference. This value will get patched up
472later by the linker."
473 (let* ((start (- (asm-start asm) offset))
474 (pos (asm-pos asm))
475 (reloc (make-reloc 's32 label start (- pos start))))
476 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
477
478
479\f
480
481;;;
482;;; Primitive assemblers are defined by expanding `assembler' for each
1b780c13 483;;; opcode in `(instruction-list)'.
e78991aa
AW
484;;;
485
28e12ea0 486(eval-when (expand)
e78991aa 487 (define (id-append ctx a b)
28e12ea0
AW
488 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b))))
489
490 (define-syntax assembler
491 (lambda (x)
492 (define-syntax op-case
493 (lambda (x)
494 (syntax-case x ()
495 ((_ asm name ((type arg ...) code ...) clause ...)
496 #`(if (eq? name 'type)
497 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
498 #'((arg ...)
499 code ...))
500 (op-case asm name clause ...)))
501 ((_ asm name)
502 #'(error "unmatched name" name)))))
503
504 (define (pack-first-word asm opcode type)
505 (with-syntax ((opcode opcode))
506 (op-case
507 asm type
508 ((U8_X24)
509 (emit asm opcode))
510 ((U8_U24 arg)
511 (emit asm (pack-u8-u24 opcode arg)))
512 ((U8_L24 label)
513 (record-label-reference asm label)
514 (emit asm opcode))
515 ((U8_U8_I16 a imm)
516 (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
517 ((U8_U12_U12 a b)
518 (emit asm (pack-u8-u12-u12 opcode a b)))
519 ((U8_U8_U8_U8 a b c)
520 (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
521
522 (define (pack-tail-word asm type)
e78991aa
AW
523 (op-case
524 asm type
28e12ea0
AW
525 ((U8_U24 a b)
526 (emit asm (pack-u8-u24 a b)))
527 ((U8_L24 a label)
e78991aa 528 (record-label-reference asm label)
28e12ea0
AW
529 (emit asm a))
530 ((U32 a)
531 (emit asm a))
532 ((I32 imm)
533 (let ((val (object-address imm)))
534 (unless (zero? (ash val -32))
535 (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
536 (emit asm val)))
537 ((A32 imm)
538 (unless (= (asm-word-size asm) 8)
539 (error "make-long-immediate unavailable for this target"))
540 (emit asm (ash (object-address imm) -32))
541 (emit asm (logand (object-address imm) (1- (ash 1 32)))))
542 ((B32))
543 ((N32 label)
544 (record-far-label-reference asm label)
545 (emit asm 0))
546 ((S32 label)
547 (record-far-label-reference asm label)
548 (emit asm 0))
549 ((L32 label)
550 (record-far-label-reference asm label)
551 (emit asm 0))
552 ((LO32 label offset)
553 (record-far-label-reference asm label
554 (* offset (/ (asm-word-size asm) 4)))
555 (emit asm 0))
556 ((X8_U24 a)
557 (emit asm (pack-u8-u24 0 a)))
558 ((X8_L24 label)
559 (record-label-reference asm label)
560 (emit asm 0))
561 ((B1_X7_L24 a label)
562 (record-label-reference asm label)
563 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
564 ((B1_U7_L24 a b label)
565 (record-label-reference asm label)
566 (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
567 ((B1_X31 a)
568 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
569 ((B1_X7_U24 a b)
570 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
571
572 (syntax-case x ()
573 ((_ name opcode word0 word* ...)
574 (with-syntax ((((formal0 ...)
575 code0 ...)
576 (pack-first-word #'asm
577 (syntax->datum #'opcode)
578 (syntax->datum #'word0)))
579 ((((formal* ...)
580 code* ...) ...)
581 (map (lambda (word) (pack-tail-word #'asm word))
582 (syntax->datum #'(word* ...)))))
583 #'(lambda (asm formal0 ... formal* ... ...)
584 (unless (asm? asm) (error "not an asm"))
585 code0 ...
586 code* ... ...
587 (reset-asm-start! asm))))))))
e78991aa
AW
588
589(define assemblers (make-hash-table))
590
28e12ea0
AW
591(eval-when (expand)
592 (define-syntax define-assembler
593 (lambda (x)
594 (syntax-case x ()
595 ((_ name opcode kind arg ...)
596 (with-syntax ((emit (id-append #'name #'emit- #'name)))
d4b3a36d
AW
597 #'(define emit
598 (let ((emit (assembler name opcode arg ...)))
599 (hashq-set! assemblers 'name emit)
600 emit)))))))
28e12ea0
AW
601
602 (define-syntax visit-opcodes
603 (lambda (x)
604 (syntax-case x ()
605 ((visit-opcodes macro arg ...)
606 (with-syntax (((inst ...)
607 (map (lambda (x) (datum->syntax #'macro x))
608 (instruction-list))))
609 #'(begin
610 (macro arg ... . inst)
611 ...)))))))
e78991aa
AW
612
613(visit-opcodes define-assembler)
614
d4b3a36d
AW
615(eval-when (expand)
616
617 ;; Some operands are encoded using a restricted subset of the full
618 ;; 24-bit local address space, in order to make the bytecode more
619 ;; dense in the usual case that there are few live locals. Here we
620 ;; define wrapper emitters that shuffle out-of-range operands into and
621 ;; out of the reserved range of locals [233,255]. This range is
622 ;; sufficient because these restricted operands are only present in
623 ;; the first word of an instruction. Since 8 bits is the smallest
624 ;; slot-addressing operand size, that means we can fit 3 operands in
625 ;; the 24 bits of payload of the first word (the lower 8 bits being
626 ;; taken by the opcode).
627 ;;
628 ;; The result are wrapper emitters with the same arity,
629 ;; e.g. emit-cons* that wraps emit-cons. We expose these wrappers as
630 ;; the public interface for emitting `cons' instructions. That way we
631 ;; solve the problem fully and in just one place. The only manual
632 ;; care that need be taken is in the exports list at the top of the
633 ;; file -- to be sure that we export the wrapper and not the wrapped
634 ;; emitter.
635
636 (define (shuffling-assembler name kind word0 word*)
637 (define (analyze-first-word)
638 (define-syntax op-case
639 (syntax-rules ()
640 ((_ type ((%type %kind arg ...) values) clause ...)
641 (if (and (eq? type '%type) (eq? kind '%kind))
642 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
643 #'((arg ...) values))
644 (op-case type clause ...)))
645 ((_ type)
646 #f)))
647 (op-case
648 word0
649 ((U8_U8_I16 ! a imm)
650 (values (if (< a (ash 1 8)) a (begin (emit-mov* asm 253 a) 253))
651 imm))
652 ((U8_U8_I16 <- a imm)
653 (values (if (< a (ash 1 8)) a 253)
654 imm))
655 ((U8_U12_U12 ! a b)
656 (values (if (< a (ash 1 12)) a (begin (emit-mov* asm 253 a) 253))
657 (if (< b (ash 1 12)) b (begin (emit-mov* asm 254 b) 254))))
658 ((U8_U12_U12 <- a b)
659 (values (if (< a (ash 1 12)) a 253)
660 (if (< b (ash 1 12)) b (begin (emit-mov* asm 254 b) 254))))
661 ((U8_U8_U8_U8 ! a b c)
662 (values (if (< a (ash 1 8)) a (begin (emit-mov* asm 253 a) 253))
663 (if (< b (ash 1 8)) b (begin (emit-mov* asm 254 b) 254))
664 (if (< c (ash 1 8)) c (begin (emit-mov* asm 255 c) 255))))
665 ((U8_U8_U8_U8 <- a b c)
666 (values (if (< a (ash 1 8)) a 253)
667 (if (< b (ash 1 8)) b (begin (emit-mov* asm 254 b) 254))
668 (if (< c (ash 1 8)) c (begin (emit-mov* asm 255 c) 255))))))
669
670 (define (tail-formals type)
671 (define-syntax op-case
672 (syntax-rules ()
673 ((op-case type (%type arg ...) clause ...)
674 (if (eq? type '%type)
675 (generate-temporaries #'(arg ...))
676 (op-case type clause ...)))
677 ((op-case type)
678 (error "unmatched type" type))))
679 (op-case type
680 (U8_U24 a b)
681 (U8_L24 a label)
682 (U32 a)
683 (I32 imm)
684 (A32 imm)
685 (B32)
686 (N32 label)
687 (S32 label)
688 (L32 label)
689 (LO32 label offset)
690 (X8_U24 a)
691 (X8_L24 label)
692 (B1_X7_L24 a label)
693 (B1_U7_L24 a b label)
694 (B1_X31 a)
695 (B1_X7_U24 a b)))
696
697 (define (shuffle-up dst)
698 (define-syntax op-case
699 (syntax-rules ()
700 ((_ type ((%type ...) exp) clause ...)
701 (if (memq type '(%type ...))
702 #'exp
703 (op-case type clause ...)))
704 ((_ type)
705 (error "unexpected type" type))))
706 (with-syntax ((dst dst))
707 (op-case
708 word0
709 ((U8_U8_I16 U8_U8_U8_U8)
710 (unless (< dst (ash 1 8))
711 (emit-mov* asm dst 253)))
712 ((U8_U12_U12)
713 (unless (< dst (ash 1 12))
714 (emit-mov* asm dst 253))))))
715
716 (and=>
717 (analyze-first-word)
718 (lambda (formals+shuffle)
719 (with-syntax ((emit-name (id-append name #'emit- name))
720 (((formal0 ...) shuffle) formals+shuffle)
721 (((formal* ...) ...) (map tail-formals word*)))
722 (with-syntax (((shuffle-up-dst ...)
723 (if (eq? kind '<-)
724 (syntax-case #'(formal0 ...) ()
725 ((dst . _)
726 (list (shuffle-up #'dst))))
727 '())))
728 #'(lambda (asm formal0 ... formal* ... ...)
729 (call-with-values (lambda () shuffle)
730 (lambda (formal0 ...)
731 (emit-name asm formal0 ... formal* ... ...)))
732 shuffle-up-dst ...))))))
733
734 (define-syntax define-shuffling-assembler
735 (lambda (stx)
736 (syntax-case stx ()
737 ((_ #:except (except ...) name opcode kind word0 word* ...)
738 (cond
739 ((or-map (lambda (op) (eq? (syntax->datum #'name) op))
740 (map syntax->datum #'(except ...)))
741 #'(begin))
742 ((shuffling-assembler #'name (syntax->datum #'kind)
743 (syntax->datum #'word0)
744 (map syntax->datum #'(word* ...)))
745 => (lambda (proc)
746 (with-syntax ((emit (id-append #'name
747 (id-append #'name #'emit- #'name)
748 #'*))
749 (proc proc))
750 #'(define emit
751 (let ((emit proc))
752 (hashq-set! assemblers 'name emit)
753 emit)))))
754 (else #'(begin))))))))
755
756(visit-opcodes define-shuffling-assembler #:except (receive mov))
757
758;; Mov and receive are two special cases that can work without wrappers.
759;; Indeed it is important that they do so.
760
761(define (emit-mov* asm dst src)
762 (if (and (< dst (ash 1 12)) (< src (ash 1 12)))
763 (emit-mov asm dst src)
764 (emit-long-mov asm dst src)))
765
766(define (emit-receive* asm dst proc nlocals)
767 (if (and (< dst (ash 1 12)) (< proc (ash 1 12)))
768 (emit-receive asm dst proc nlocals)
769 (begin
770 (emit-receive-values asm proc #t 1)
771 (emit-mov* asm dst (1+ proc))
772 (emit-reset-frame asm nlocals))))
773
e78991aa
AW
774(define (emit-text asm instructions)
775 "Assemble @var{instructions} using the assembler @var{asm}.
691697de
AW
776@var{instructions} is a sequence of instructions, expressed as a list of
777lists. This procedure can be called many times before calling
e78991aa
AW
778@code{link-assembly}."
779 (for-each (lambda (inst)
780 (apply (or (hashq-ref assemblers (car inst))
781 (error 'bad-instruction inst))
782 asm
783 (cdr inst)))
784 instructions))
785
786\f
787
788;;;
789;;; The constant table records a topologically sorted set of literal
790;;; constants used by a program. For example, a pair uses its car and
791;;; cdr, a string uses its stringbuf, etc.
792;;;
793;;; Some things we want to add to the constant table are not actually
794;;; Scheme objects: for example, stringbufs, cache cells for toplevel
795;;; references, or cache cells for non-closure procedures. For these we
796;;; define special record types and add instances of those record types
797;;; to the table.
798;;;
799
dece0412 800(define-inline (immediate? x)
e78991aa
AW
801 "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
802 (not (zero? (logand (object-address x) 6))))
803
804(define-record-type <stringbuf>
805 (make-stringbuf string)
806 stringbuf?
807 (string stringbuf-string))
808
809(define-record-type <static-procedure>
810 (make-static-procedure code)
811 static-procedure?
812 (code static-procedure-code))
813
7bfbc7b1 814(define-record-type <uniform-vector-backing-store>
d65514a2 815 (make-uniform-vector-backing-store bytes element-size)
7bfbc7b1 816 uniform-vector-backing-store?
d65514a2
AW
817 (bytes uniform-vector-backing-store-bytes)
818 (element-size uniform-vector-backing-store-element-size))
7bfbc7b1 819
e78991aa
AW
820(define-record-type <cache-cell>
821 (make-cache-cell scope key)
822 cache-cell?
823 (scope cache-cell-scope)
824 (key cache-cell-key))
825
7bfbc7b1
AW
826(define (simple-vector? obj)
827 (and (vector? obj)
828 (equal? (array-shape obj) (list (list 0 (1- (vector-length obj)))))))
829
830(define (simple-uniform-vector? obj)
831 (and (array? obj)
832 (symbol? (array-type obj))
833 (equal? (array-shape obj) (list (list 0 (1- (array-length obj)))))))
834
e78991aa
AW
835(define (statically-allocatable? x)
836 "Return @code{#t} if a non-immediate constant can be allocated
837statically, and @code{#f} if it would need some kind of runtime
838allocation."
7bfbc7b1 839 (or (pair? x) (string? x) (stringbuf? x) (static-procedure? x) (array? x)))
e78991aa
AW
840
841(define (intern-constant asm obj)
842 "Add an object to the constant table, and return a label that can be
843used to reference it. If the object is already present in the constant
844table, its existing label is used directly."
845 (define (recur obj)
846 (intern-constant asm obj))
847 (define (field dst n obj)
848 (let ((src (recur obj)))
849 (if src
c7cb2bc2
AW
850 (if (statically-allocatable? obj)
851 `((static-patch! ,dst ,n ,src))
852 `((static-ref 1 ,src)
853 (static-set! 1 ,dst ,n)))
e78991aa
AW
854 '())))
855 (define (intern obj label)
856 (cond
857 ((pair? obj)
858 (append (field label 0 (car obj))
859 (field label 1 (cdr obj))))
7bfbc7b1 860 ((simple-vector? obj)
e78991aa
AW
861 (let lp ((i 0) (inits '()))
862 (if (< i (vector-length obj))
863 (lp (1+ i)
864 (append-reverse (field label (1+ i) (vector-ref obj i))
865 inits))
866 (reverse inits))))
867 ((stringbuf? obj) '())
868 ((static-procedure? obj)
2ab2a10d 869 `((static-patch! ,label 1 ,(static-procedure-code obj))))
e78991aa
AW
870 ((cache-cell? obj) '())
871 ((symbol? obj)
7396d216
AW
872 `((make-non-immediate 1 ,(recur (symbol->string obj)))
873 (string->symbol 1 1)
874 (static-set! 1 ,label 0)))
e78991aa 875 ((string? obj)
2ab2a10d 876 `((static-patch! ,label 1 ,(recur (make-stringbuf obj)))))
e78991aa 877 ((keyword? obj)
7396d216
AW
878 `((static-ref 1 ,(recur (keyword->symbol obj)))
879 (symbol->keyword 1 1)
880 (static-set! 1 ,label 0)))
e78991aa 881 ((number? obj)
7396d216
AW
882 `((make-non-immediate 1 ,(recur (number->string obj)))
883 (string->number 1 1)
884 (static-set! 1 ,label 0)))
7bfbc7b1
AW
885 ((uniform-vector-backing-store? obj) '())
886 ((simple-uniform-vector? obj)
8051cf23
AW
887 (let ((width (case (array-type obj)
888 ((vu8 u8 s8) 1)
889 ((u16 s16) 2)
890 ;; Bitvectors are addressed in 32-bit units.
891 ;; Although a complex number is 8 or 16 bytes wide,
892 ;; it should be byteswapped in 4 or 8 byte units.
893 ((u32 s32 f32 c32 b) 4)
894 ((u64 s64 f64 c64) 8)
895 (else
896 (error "unhandled array type" obj)))))
897 `((static-patch! ,label 2
898 ,(recur (make-uniform-vector-backing-store
899 (uniform-array->bytevector obj)
900 width))))))
e78991aa
AW
901 (else
902 (error "don't know how to intern" obj))))
903 (cond
904 ((immediate? obj) #f)
905 ((vhash-assoc obj (asm-constants asm)) => cdr)
906 (else
907 ;; Note that calling intern may mutate asm-constants and
908 ;; asm-constant-inits.
909 (let* ((label (gensym "constant"))
910 (inits (intern obj label)))
911 (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
912 (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
913 label))))
914
915(define (intern-non-immediate asm obj)
916 "Intern a non-immediate into the constant table, and return its
917label."
918 (when (immediate? obj)
919 (error "expected a non-immediate" obj))
920 (intern-constant asm obj))
921
922(define (intern-cache-cell asm scope key)
923 "Intern a cache cell into the constant table, and return its label.
924If there is already a cache cell with the given scope and key, it is
925returned instead."
926 (intern-constant asm (make-cache-cell scope key)))
927
928;; Return the label of the cell that holds the module for a scope.
929(define (intern-module-cache-cell asm scope)
930 "Intern a cache cell for a module, and return its label."
931 (intern-cache-cell asm scope #t))
932
933
934\f
935
936;;;
937;;; Macro assemblers bridge the gap between primitive instructions and
938;;; some higher-level operations.
939;;;
940
28e12ea0
AW
941(eval-when (expand)
942 (define-syntax define-macro-assembler
943 (lambda (x)
944 (syntax-case x ()
945 ((_ (name arg ...) body body* ...)
946 (with-syntax ((emit (id-append #'name #'emit- #'name)))
947 #'(begin
948 (define emit
949 (let ((emit (lambda (arg ...) body body* ...)))
950 (hashq-set! assemblers 'name emit)
951 emit))
952 (export emit))))))))
e78991aa
AW
953
954(define-macro-assembler (load-constant asm dst obj)
955 (cond
956 ((immediate? obj)
957 (let ((bits (object-address obj)))
958 (cond
959 ((and (< dst 256) (zero? (ash bits -16)))
960 (emit-make-short-immediate asm dst obj))
961 ((zero? (ash bits -32))
962 (emit-make-long-immediate asm dst obj))
963 (else
964 (emit-make-long-long-immediate asm dst obj)))))
965 ((statically-allocatable? obj)
966 (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
967 (else
968 (emit-static-ref asm dst (intern-non-immediate asm obj)))))
969
970(define-macro-assembler (load-static-procedure asm dst label)
971 (let ((loc (intern-constant asm (make-static-procedure label))))
972 (emit-make-non-immediate asm dst loc)))
973
be8b62ca
AW
974(define-syntax-rule (define-tc7-macro-assembler name tc7)
975 (define-macro-assembler (name asm slot invert? label)
976 (emit-br-if-tc7 asm slot invert? tc7 label)))
977
978;; Keep in sync with tags.h. Part of Guile's ABI. Currently unused
becce37b
AW
979;; macro assemblers are commented out. See also
980;; *branching-primcall-arities* in (language cps primitives), the set of
981;; macro-instructions in assembly.scm, and
982;; disassembler.scm:code-annotation.
983;;
984;; FIXME: Define all tc7 values in Scheme in one place, derived from
985;; tags.h.
be8b62ca
AW
986(define-tc7-macro-assembler br-if-symbol 5)
987(define-tc7-macro-assembler br-if-variable 7)
988(define-tc7-macro-assembler br-if-vector 13)
989;(define-tc7-macro-assembler br-if-weak-vector 13)
990(define-tc7-macro-assembler br-if-string 21)
991;(define-tc7-macro-assembler br-if-heap-number 23)
992;(define-tc7-macro-assembler br-if-stringbuf 39)
becce37b 993(define-tc7-macro-assembler br-if-bytevector 77)
be8b62ca
AW
994;(define-tc7-macro-assembler br-if-pointer 31)
995;(define-tc7-macro-assembler br-if-hashtable 29)
996;(define-tc7-macro-assembler br-if-fluid 37)
997;(define-tc7-macro-assembler br-if-dynamic-state 45)
998;(define-tc7-macro-assembler br-if-frame 47)
be8b62ca
AW
999;(define-tc7-macro-assembler br-if-vm 55)
1000;(define-tc7-macro-assembler br-if-vm-cont 71)
1001;(define-tc7-macro-assembler br-if-rtl-program 69)
be8b62ca
AW
1002;(define-tc7-macro-assembler br-if-weak-set 85)
1003;(define-tc7-macro-assembler br-if-weak-table 87)
1004;(define-tc7-macro-assembler br-if-array 93)
d65514a2 1005(define-tc7-macro-assembler br-if-bitvector 95)
be8b62ca
AW
1006;(define-tc7-macro-assembler br-if-port 125)
1007;(define-tc7-macro-assembler br-if-smob 127)
1008
2a4daafd 1009(define-macro-assembler (begin-program asm label properties)
e78991aa 1010 (emit-label asm label)
2a4daafd 1011 (let ((meta (make-meta label properties (asm-start asm))))
e78991aa
AW
1012 (set-asm-meta! asm (cons meta (asm-meta asm)))))
1013
1014(define-macro-assembler (end-program asm)
2a4daafd 1015 (let ((meta (car (asm-meta asm))))
3185c907
AW
1016 (set-meta-high-pc! meta (asm-start asm))
1017 (set-meta-arities! meta (reverse (meta-arities meta)))))
1018
1019(define-macro-assembler (begin-standard-arity asm req nlocals alternate)
1020 (emit-begin-opt-arity asm req '() #f nlocals alternate))
1021
1022(define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
1023 (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
1024
1025(define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
1026 allow-other-keys? nlocals alternate)
1027 (assert-match req ((? symbol?) ...) "list of symbols")
1028 (assert-match opt ((? symbol?) ...) "list of symbols")
1029 (assert-match rest (or #f (? symbol?)) "#f or symbol")
8695854a
AW
1030 (assert-match kw-indices (((? keyword?) . (? integer?)) ...)
1031 "alist of keyword -> integer")
3185c907
AW
1032 (assert-match allow-other-keys? (? boolean?) "boolean")
1033 (assert-match nlocals (? integer?) "integer")
9a1dfb7d 1034 (assert-match alternate (or #f (? exact-integer?) (? symbol?)) "#f or symbol")
3185c907
AW
1035 (let* ((meta (car (asm-meta asm)))
1036 (arity (make-arity req opt rest kw-indices allow-other-keys?
78351d10 1037 (asm-start asm) #f '()))
7396d216
AW
1038 ;; The procedure itself is in slot 0, in the standard calling
1039 ;; convention. For procedure prologues, nreq includes the
1040 ;; procedure, so here we add 1.
1041 (nreq (1+ (length req)))
3185c907
AW
1042 (nopt (length opt))
1043 (rest? (->bool rest)))
1044 (set-meta-arities! meta (cons arity (meta-arities meta)))
1045 (cond
1046 ((or allow-other-keys? (pair? kw-indices))
1047 (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
1048 nlocals alternate))
1049 ((or rest? (pair? opt))
1050 (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
1051 (else
1052 (emit-standard-prelude asm nreq nlocals alternate)))))
1053
1054(define-macro-assembler (end-arity asm)
1055 (let ((arity (car (meta-arities (car (asm-meta asm))))))
78351d10 1056 (set-arity-definitions! arity (reverse (arity-definitions arity)))
3185c907 1057 (set-arity-high-pc! arity (asm-start asm))))
e78991aa 1058
d4b3a36d
AW
1059;; As noted above, we reserve locals 253 through 255 for shuffling large
1060;; operands. However the calling convention has all arguments passed in
1061;; a contiguous block. This helper, called after the clause has been
1062;; chosen and the keyword/optional/rest arguments have been processed,
1063;; shuffles up arguments from slot 253 and higher into their final
1064;; allocations.
1065;;
1066(define (shuffle-up-args asm nargs)
1067 (when (> nargs 253)
1068 (let ((slot (1- nargs)))
1069 (emit-mov asm (+ slot 3) slot)
1070 (shuffle-up-args asm (1- nargs)))))
1071
07c05279
AW
1072(define-macro-assembler (standard-prelude asm nreq nlocals alternate)
1073 (cond
1074 (alternate
1075 (emit-br-if-nargs-ne asm nreq alternate)
af95414f 1076 (emit-alloc-frame asm nlocals))
07c05279
AW
1077 ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
1078 (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
1079 (else
1080 (emit-assert-nargs-ee asm nreq)
d4b3a36d
AW
1081 (emit-alloc-frame asm nlocals)))
1082 (shuffle-up-args asm nreq))
07c05279
AW
1083
1084(define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
1085 (if alternate
1086 (emit-br-if-nargs-lt asm nreq alternate)
1087 (emit-assert-nargs-ge asm nreq))
1088 (cond
1089 (rest?
1090 (emit-bind-rest asm (+ nreq nopt)))
1091 (alternate
1092 (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
1093 (else
1094 (emit-assert-nargs-le asm (+ nreq nopt))))
d4b3a36d
AW
1095 (emit-alloc-frame asm nlocals)
1096 (shuffle-up-args asm (+ nreq nopt (if rest? 1 0))))
07c05279
AW
1097
1098(define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
1099 allow-other-keys? nlocals alternate)
1100 (if alternate
b0ed216b
AW
1101 (begin
1102 (emit-br-if-nargs-lt asm nreq alternate)
1103 (unless rest?
1104 (emit-br-if-npos-gt asm nreq (+ nreq nopt) alternate)))
07c05279
AW
1105 (emit-assert-nargs-ge asm nreq))
1106 (let ((ntotal (fold (lambda (kw ntotal)
1107 (match kw
1108 (((? keyword?) . idx)
1109 (max (1+ idx) ntotal))))
1110 (+ nreq nopt) kw-indices)))
1111 ;; FIXME: port 581f410f
1112 (emit-bind-kwargs asm nreq
1113 (pack-flags allow-other-keys? rest?)
1114 (+ nreq nopt)
1115 ntotal
8695854a 1116 (intern-constant asm kw-indices))
d4b3a36d
AW
1117 (emit-alloc-frame asm nlocals)
1118 (shuffle-up-args asm ntotal)))
07c05279 1119
e78991aa 1120(define-macro-assembler (label asm sym)
3659ef54 1121 (hashq-set! (asm-labels asm) sym (asm-start asm)))
e78991aa 1122
e675e9bd
AW
1123(define-macro-assembler (source asm source)
1124 (set-asm-sources! asm (acons (asm-start asm) source (asm-sources asm))))
1125
78351d10
AW
1126(define-macro-assembler (definition asm name slot)
1127 (let* ((arity (car (meta-arities (car (asm-meta asm)))))
67ddb7e2
AW
1128 (def (vector name
1129 slot
1130 (* (- (asm-start asm) (arity-low-pc arity)) 4))))
78351d10
AW
1131 (set-arity-definitions! arity (cons def (arity-definitions arity)))))
1132
af95414f 1133(define-macro-assembler (cache-current-module! asm module scope)
e78991aa 1134 (let ((mod-label (intern-module-cache-cell asm scope)))
af95414f 1135 (emit-static-set! asm module mod-label 0)))
e78991aa 1136
af95414f 1137(define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
e78991aa
AW
1138 (let ((sym-label (intern-non-immediate asm sym))
1139 (mod-label (intern-module-cache-cell asm scope))
1140 (cell-label (intern-cache-cell asm scope sym)))
af95414f 1141 (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
e78991aa 1142
af95414f 1143(define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
e78991aa
AW
1144 (let* ((sym-label (intern-non-immediate asm sym))
1145 (key (cons public? module-name))
1146 (mod-name-label (intern-constant asm key))
1147 (cell-label (intern-cache-cell asm key sym)))
af95414f 1148 (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
e78991aa 1149
02c624fc
AW
1150(define-macro-assembler (dead-slot-map asm proc-slot dead-slot-map)
1151 (unless (zero? dead-slot-map)
1152 (set-asm-dead-slot-maps! asm
1153 (cons
1154 (cons* (asm-start asm) proc-slot dead-slot-map)
1155 (asm-dead-slot-maps asm)))))
e78991aa
AW
1156
1157\f
1158
1159;;;
1160;;; Helper for linking objects.
1161;;;
1162
1163(define (make-object asm name bv relocs labels . kwargs)
1164 "Make a linker object. This helper handles interning the name in the
1165shstrtab, assigning the size, allocating a fresh index, and defining a
1166corresponding linker symbol for the start of the section."
1167 (let ((name-idx (intern-section-name! asm (symbol->string name)))
1168 (index (asm-next-section-number asm)))
1169 (set-asm-next-section-number! asm (1+ index))
1170 (make-linker-object (apply make-elf-section
1171 #:index index
1172 #:name name-idx
1173 #:size (bytevector-length bv)
1174 kwargs)
1175 bv relocs
1176 (cons (make-linker-symbol name 0) labels))))
1177
1178
1179\f
1180
1181;;;
1182;;; Linking the constant table. This code is somewhat intertwingled
1183;;; with the intern-constant code above, as that procedure also
1184;;; residualizes instructions to initialize constants at load time.
1185;;;
1186
1187(define (write-immediate asm buf pos x)
1188 (let ((val (object-address x))
1189 (endianness (asm-endianness asm)))
1190 (case (asm-word-size asm)
1191 ((4) (bytevector-u32-set! buf pos val endianness))
1192 ((8) (bytevector-u64-set! buf pos val endianness))
1193 (else (error "bad word size" asm)))))
1194
1195(define (emit-init-constants asm)
1196 "If there is writable data that needs initialization at runtime, emit
1197a procedure to do that and return its label. Otherwise return
1198@code{#f}."
1199 (let ((inits (asm-inits asm)))
1200 (and (not (null? inits))
1201 (let ((label (gensym "init-constants")))
1202 (emit-text asm
2a4daafd 1203 `((begin-program ,label ())
7396d216 1204 (assert-nargs-ee/locals 1 1)
e78991aa 1205 ,@(reverse inits)
7396d216
AW
1206 (load-constant 1 ,*unspecified*)
1207 (return 1)
e78991aa
AW
1208 (end-program)))
1209 label))))
1210
1211(define (link-data asm data name)
1212 "Link the static data for a program into the @var{name} section (which
1213should be .data or .rodata), and return the resulting linker object.
1214@var{data} should be a vhash mapping objects to labels."
1215 (define (align address alignment)
1216 (+ address
1217 (modulo (- alignment (modulo address alignment)) alignment)))
1218
1219 (define tc7-vector 13)
8fa72889
AW
1220 (define stringbuf-shared-flag #x100)
1221 (define stringbuf-wide-flag #x400)
1222 (define tc7-stringbuf 39)
1223 (define tc7-narrow-stringbuf
1224 (+ tc7-stringbuf stringbuf-shared-flag))
1225 (define tc7-wide-stringbuf
1226 (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
e78991aa 1227 (define tc7-ro-string (+ 21 #x200))
e0755cd1 1228 (define tc7-program 69)
7bfbc7b1 1229 (define tc7-bytevector 77)
d65514a2 1230 (define tc7-bitvector 95)
e78991aa
AW
1231
1232 (let ((word-size (asm-word-size asm))
1233 (endianness (asm-endianness asm)))
1234 (define (byte-length x)
1235 (cond
1236 ((stringbuf? x)
1237 (let ((x (stringbuf-string x)))
1238 (+ (* 2 word-size)
1239 (case (string-bytes-per-char x)
1240 ((1) (1+ (string-length x)))
1241 ((4) (* (1+ (string-length x)) 4))
1242 (else (error "bad string bytes per char" x))))))
1243 ((static-procedure? x)
1244 (* 2 word-size))
1245 ((string? x)
1246 (* 4 word-size))
1247 ((pair? x)
1248 (* 2 word-size))
7bfbc7b1 1249 ((simple-vector? x)
e78991aa 1250 (* (1+ (vector-length x)) word-size))
7bfbc7b1
AW
1251 ((simple-uniform-vector? x)
1252 (* 4 word-size))
1253 ((uniform-vector-backing-store? x)
1254 (bytevector-length (uniform-vector-backing-store-bytes x)))
e78991aa
AW
1255 (else
1256 word-size)))
1257
1258 (define (write-constant-reference buf pos x)
1259 ;; The asm-inits will fix up any reference to a non-immediate.
1260 (write-immediate asm buf pos (if (immediate? x) x #f)))
1261
1262 (define (write buf pos obj)
1263 (cond
1264 ((stringbuf? obj)
1265 (let* ((x (stringbuf-string obj))
1266 (len (string-length x))
1267 (tag (if (= (string-bytes-per-char x) 1)
1268 tc7-narrow-stringbuf
1269 tc7-wide-stringbuf)))
1270 (case word-size
1271 ((4)
1272 (bytevector-u32-set! buf pos tag endianness)
1273 (bytevector-u32-set! buf (+ pos 4) len endianness))
1274 ((8)
1275 (bytevector-u64-set! buf pos tag endianness)
1276 (bytevector-u64-set! buf (+ pos 8) len endianness))
1277 (else
1278 (error "bad word size" asm)))
1279 (let ((pos (+ pos (* word-size 2))))
1280 (case (string-bytes-per-char x)
1281 ((1)
1282 (let lp ((i 0))
1283 (if (< i len)
1284 (let ((u8 (char->integer (string-ref x i))))
1285 (bytevector-u8-set! buf (+ pos i) u8)
1286 (lp (1+ i)))
1287 (bytevector-u8-set! buf (+ pos i) 0))))
1288 ((4)
1289 (let lp ((i 0))
1290 (if (< i len)
1291 (let ((u32 (char->integer (string-ref x i))))
1292 (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
1293 (lp (1+ i)))
1294 (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
1295 (else (error "bad string bytes per char" x))))))
1296
1297 ((static-procedure? obj)
1298 (case word-size
1299 ((4)
e0755cd1 1300 (bytevector-u32-set! buf pos tc7-program endianness)
e78991aa
AW
1301 (bytevector-u32-set! buf (+ pos 4) 0 endianness))
1302 ((8)
e0755cd1 1303 (bytevector-u64-set! buf pos tc7-program endianness)
e78991aa
AW
1304 (bytevector-u64-set! buf (+ pos 8) 0 endianness))
1305 (else (error "bad word size"))))
1306
1307 ((cache-cell? obj)
1308 (write-immediate asm buf pos #f))
1309
1310 ((string? obj)
1311 (let ((tag (logior tc7-ro-string (ash (string-length obj) 8))))
1312 (case word-size
1313 ((4)
1314 (bytevector-u32-set! buf pos tc7-ro-string endianness)
1315 (write-immediate asm buf (+ pos 4) #f) ; stringbuf
1316 (bytevector-u32-set! buf (+ pos 8) 0 endianness)
1317 (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
1318 ((8)
1319 (bytevector-u64-set! buf pos tc7-ro-string endianness)
1320 (write-immediate asm buf (+ pos 8) #f) ; stringbuf
1321 (bytevector-u64-set! buf (+ pos 16) 0 endianness)
1322 (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
1323 (else (error "bad word size")))))
1324
1325 ((pair? obj)
1326 (write-constant-reference buf pos (car obj))
1327 (write-constant-reference buf (+ pos word-size) (cdr obj)))
1328
7bfbc7b1 1329 ((simple-vector? obj)
e78991aa
AW
1330 (let* ((len (vector-length obj))
1331 (tag (logior tc7-vector (ash len 8))))
1332 (case word-size
1333 ((4) (bytevector-u32-set! buf pos tag endianness))
1334 ((8) (bytevector-u64-set! buf pos tag endianness))
1335 (else (error "bad word size")))
1336 (let lp ((i 0))
1337 (when (< i (vector-length obj))
1338 (let ((pos (+ pos word-size (* i word-size)))
1339 (elt (vector-ref obj i)))
1340 (write-constant-reference buf pos elt)
1341 (lp (1+ i)))))))
1342
1343 ((symbol? obj)
1344 (write-immediate asm buf pos #f))
1345
1346 ((keyword? obj)
1347 (write-immediate asm buf pos #f))
1348
1349 ((number? obj)
1350 (write-immediate asm buf pos #f))
1351
7bfbc7b1 1352 ((simple-uniform-vector? obj)
d65514a2
AW
1353 (let ((tag (if (bitvector? obj)
1354 tc7-bitvector
8051cf23 1355 (let ((type-code (array-type-code obj)))
d65514a2 1356 (logior tc7-bytevector (ash type-code 7))))))
7bfbc7b1
AW
1357 (case word-size
1358 ((4)
1359 (bytevector-u32-set! buf pos tag endianness)
d65514a2
AW
1360 (bytevector-u32-set! buf (+ pos 4)
1361 (if (bitvector? obj)
1362 (bitvector-length obj)
1363 (bytevector-length obj))
7bfbc7b1
AW
1364 endianness) ; length
1365 (bytevector-u32-set! buf (+ pos 8) 0 endianness) ; pointer
1366 (write-immediate asm buf (+ pos 12) #f)) ; owner
1367 ((8)
1368 (bytevector-u64-set! buf pos tag endianness)
d65514a2
AW
1369 (bytevector-u64-set! buf (+ pos 8)
1370 (if (bitvector? obj)
1371 (bitvector-length obj)
1372 (bytevector-length obj))
7bfbc7b1
AW
1373 endianness) ; length
1374 (bytevector-u64-set! buf (+ pos 16) 0 endianness) ; pointer
1375 (write-immediate asm buf (+ pos 24) #f)) ; owner
1376 (else (error "bad word size")))))
1377
1378 ((uniform-vector-backing-store? obj)
1379 (let ((bv (uniform-vector-backing-store-bytes obj)))
1380 (bytevector-copy! bv 0 buf pos (bytevector-length bv))
d65514a2 1381 (unless (or (= 1 (uniform-vector-backing-store-element-size obj))
7bfbc7b1
AW
1382 (eq? endianness (native-endianness)))
1383 ;; Need to swap units of element-size bytes
1384 (error "FIXME: Implement byte order swap"))))
1385
e78991aa
AW
1386 (else
1387 (error "unrecognized object" obj))))
1388
1389 (cond
1390 ((vlist-null? data) #f)
1391 (else
1392 (let* ((byte-len (vhash-fold (lambda (k v len)
1393 (+ (byte-length k) (align len 8)))
1394 0 data))
1395 (buf (make-bytevector byte-len 0)))
3659ef54 1396 (let lp ((i 0) (pos 0) (symbols '()))
e78991aa
AW
1397 (if (< i (vlist-length data))
1398 (let* ((pair (vlist-ref data i))
1399 (obj (car pair))
1400 (obj-label (cdr pair)))
1401 (write buf pos obj)
1402 (lp (1+ i)
1403 (align (+ (byte-length obj) pos) 8)
3659ef54
AW
1404 (cons (make-linker-symbol obj-label pos) symbols)))
1405 (make-object asm name buf '() symbols
8fa72889
AW
1406 #:flags (match name
1407 ('.data (logior SHF_ALLOC SHF_WRITE))
1408 ('.rodata SHF_ALLOC))))))))))
e78991aa
AW
1409
1410(define (link-constants asm)
1411 "Link sections to hold constants needed by the program text emitted
1412using @var{asm}.
1413
1414Returns three values: an object for the .rodata section, an object for
1415the .data section, and a label for an initialization procedure. Any of
1416these may be @code{#f}."
1417 (define (shareable? x)
1418 (cond
1419 ((stringbuf? x) #t)
1420 ((pair? x)
1421 (and (immediate? (car x)) (immediate? (cdr x))))
7bfbc7b1 1422 ((simple-vector? x)
e78991aa
AW
1423 (let lp ((i 0))
1424 (or (= i (vector-length x))
1425 (and (immediate? (vector-ref x i))
1426 (lp (1+ i))))))
7bfbc7b1 1427 ((uniform-vector-backing-store? x) #t)
e78991aa
AW
1428 (else #f)))
1429 (let* ((constants (asm-constants asm))
1430 (len (vlist-length constants)))
1431 (let lp ((i 0)
1432 (ro vlist-null)
1433 (rw vlist-null))
1434 (if (= i len)
1435 (values (link-data asm ro '.rodata)
1436 (link-data asm rw '.data)
1437 (emit-init-constants asm))
1438 (let ((pair (vlist-ref constants i)))
1439 (if (shareable? (car pair))
1440 (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
1441 (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
1442
1443\f
1444
1445;;;
1446;;; Linking program text.
1447;;;
1448
1449(define (process-relocs buf relocs labels)
1450 "Patch up internal x8-s24 relocations, and any s32 relocations that
1451reference symbols in the text section. Return a list of linker
1452relocations for references to symbols defined outside the text section."
1453 (fold
1454 (lambda (reloc tail)
1455 (match reloc
1456 ((type label base word)
3659ef54 1457 (let ((abs (hashq-ref labels label))
e78991aa
AW
1458 (dst (+ base word)))
1459 (case type
1460 ((s32)
1461 (if abs
1462 (let ((rel (- abs base)))
1463 (s32-set! buf dst rel)
1464 tail)
1465 (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
1466 tail)))
1467 ((x8-s24)
1468 (unless abs
1469 (error "unbound near relocation" reloc))
1470 (let ((rel (- abs base))
1471 (u32 (u32-ref buf dst)))
1472 (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
1473 tail))
1474 (else (error "bad relocation kind" reloc)))))))
1475 '()
1476 relocs))
1477
1478(define (process-labels labels)
3659ef54 1479 "Define linker symbols for the label-offset map in @var{labels}.
e78991aa 1480The offsets are expected to be expressed in words."
3659ef54
AW
1481 (hash-map->list (lambda (label loc)
1482 (make-linker-symbol label (* loc 4)))
1483 labels))
e78991aa
AW
1484
1485(define (swap-bytes! buf)
1486 "Patch up the text buffer @var{buf}, swapping the endianness of each
148732-bit unit."
1488 (unless (zero? (modulo (bytevector-length buf) 4))
1489 (error "unexpected length"))
1490 (let ((byte-len (bytevector-length buf)))
1491 (let lp ((pos 0))
1492 (unless (= pos byte-len)
1493 (bytevector-u32-set!
1494 buf pos
1495 (bytevector-u32-ref buf pos (endianness big))
1496 (endianness little))
1497 (lp (+ pos 4))))))
1498
1499(define (link-text-object asm)
1500 "Link the .rtl-text section, swapping the endianness of the bytes if
1501needed."
1502 (let ((buf (make-u32vector (asm-pos asm))))
1503 (let lp ((pos 0) (prev (reverse (asm-prev asm))))
1504 (if (null? prev)
1505 (let ((byte-size (* (asm-idx asm) 4)))
1506 (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
1507 (unless (eq? (asm-endianness asm) (native-endianness))
1508 (swap-bytes! buf))
1509 (make-object asm '.rtl-text
1510 buf
1511 (process-relocs buf (asm-relocs asm)
1512 (asm-labels asm))
1513 (process-labels (asm-labels asm))))
1514 (let ((len (* *block-size* 4)))
1515 (bytevector-copy! (car prev) 0 buf pos len)
1516 (lp (+ pos len) (cdr prev)))))))
1517
1518
1519\f
1520
02c624fc
AW
1521;;;
1522;;; Create the frame maps. These maps are used by GC to identify dead
1523;;; slots in pending call frames, to avoid marking them. We only do
1524;;; this when frame makes a non-tail call, as that is the common case.
1525;;; Only the topmost frame will see a GC at any other point, but we mark
1526;;; top frames conservatively as serializing live slot maps at every
1527;;; instruction would take up too much space in the object file.
1528;;;
1529
1530;; The .guile.frame-maps section starts with two packed u32 values: one
1531;; indicating the offset of the first byte of the .rtl-text section, and
1532;; another indicating the relative offset in bytes of the slots data.
1533(define frame-maps-prefix-len 8)
1534
1535;; Each header is 8 bytes: 4 for the offset from .rtl_text, and 4 for
1536;; the offset of the slot map from the beginning of the
1537;; .guile.frame-maps section. The length of a frame map depends on the
1538;; frame size at the call site, and is not encoded into this section as
1539;; it is available at run-time.
1540(define frame-map-header-len 8)
1541
1542(define (link-frame-maps asm)
1543 (define (map-byte-length proc-slot)
1544 (ceiling-quotient (- proc-slot 2) 8))
1545 (define (make-frame-maps maps count map-len)
1546 (let* ((endianness (asm-endianness asm))
1547 (header-pos frame-maps-prefix-len)
1548 (map-pos (+ header-pos (* count frame-map-header-len)))
1549 (bv (make-bytevector (+ map-pos map-len) 0)))
1550 (bytevector-u32-set! bv 4 map-pos endianness)
1551 (let lp ((maps maps) (header-pos header-pos) (map-pos map-pos))
1552 (match maps
1553 (()
1554 (make-object asm '.guile.frame-maps bv
1555 (list (make-linker-reloc 'abs32/1 0 0 '.rtl-text))
1556 '() #:type SHT_PROGBITS #:flags SHF_ALLOC))
1557 (((pos proc-slot . map) . maps)
1558 (bytevector-u32-set! bv header-pos (* pos 4) endianness)
1559 (bytevector-u32-set! bv (+ header-pos 4) map-pos endianness)
1560 (let write-bytes ((map-pos map-pos)
1561 (map map)
1562 (byte-length (map-byte-length proc-slot)))
1563 (if (zero? byte-length)
1564 (lp maps (+ header-pos frame-map-header-len) map-pos)
1565 (begin
1566 (bytevector-u8-set! bv map-pos (logand map #xff))
1567 (write-bytes (1+ map-pos) (ash map -8)
1568 (1- byte-length))))))))))
1569 (match (asm-dead-slot-maps asm)
1570 (() #f)
1571 (in
1572 (let lp ((in in) (out '()) (count 0) (map-len 0))
1573 (match in
1574 (() (make-frame-maps out count map-len))
1575 (((and head (pos proc-slot . map)) . in)
1576 (lp in (cons head out)
1577 (1+ count)
1578 (+ (map-byte-length proc-slot) map-len))))))))
1579
1580\f
1581
e78991aa
AW
1582;;;
1583;;; Linking other sections of the ELF file, like the dynamic segment,
1584;;; the symbol table, etc.
1585;;;
1586
4c906ad5
AW
1587;; FIXME: Define these somewhere central, shared with C.
1588(define *bytecode-major-version* #x0202)
1a82c201 1589(define *bytecode-minor-version* 5)
4c906ad5 1590
02c624fc 1591(define (link-dynamic-section asm text rw rw-init frame-maps)
691697de
AW
1592 "Link the dynamic section for an ELF image with bytecode @var{text},
1593given the writable data section @var{rw} needing fixup from the
1594procedure with label @var{rw-init}. @var{rw-init} may be false. If
1595@var{rw} is true, it will be added to the GC roots at runtime."
e78991aa
AW
1596 (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
1597 (let* ((endianness (asm-endianness asm))
02c624fc
AW
1598 (words 6)
1599 (words (if rw (+ words 4) words))
1600 (words (if rw-init (+ words 2) words))
1601 (words (if frame-maps (+ words 2) words))
1602 (bv (make-bytevector (* word-size words) 0))
e78991aa
AW
1603 (set-uword!
1604 (lambda (i uword)
1605 (%set-uword! bv (* i word-size) uword endianness)))
1606 (relocs '())
1607 (set-label!
1608 (lambda (i label)
1609 (set! relocs (cons (make-linker-reloc 'reloc-type
1610 (* i word-size) 0 label)
1611 relocs))
1612 (%set-uword! bv (* i word-size) 0 endianness))))
8bf83893 1613 (set-uword! 0 DT_GUILE_VM_VERSION)
4c906ad5
AW
1614 (set-uword! 1 (logior (ash *bytecode-major-version* 16)
1615 *bytecode-minor-version*))
e78991aa
AW
1616 (set-uword! 2 DT_GUILE_ENTRY)
1617 (set-label! 3 '.rtl-text)
02c624fc 1618 (when rw
e78991aa
AW
1619 ;; Add roots to GC.
1620 (set-uword! 4 DT_GUILE_GC_ROOT)
1621 (set-label! 5 '.data)
1622 (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
1623 (set-uword! 7 (bytevector-length (linker-object-bv rw)))
02c624fc 1624 (when rw-init
e78991aa 1625 (set-uword! 8 DT_INIT) ; constants
02c624fc
AW
1626 (set-label! 9 rw-init)))
1627 (when frame-maps
1628 (set-uword! (- words 4) DT_GUILE_FRAME_MAPS)
1629 (set-label! (- words 3) '.guile.frame-maps))
1630 (set-uword! (- words 2) DT_NULL)
1631 (set-uword! (- words 1) 0)
e78991aa
AW
1632 (make-object asm '.dynamic bv relocs '()
1633 #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
1634 (case (asm-word-size asm)
1635 ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
1636 ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
1637 (else (error "bad word size" asm))))
1638
1639(define (link-shstrtab asm)
1640 "Link the string table for the section headers."
1641 (intern-section-name! asm ".shstrtab")
1642 (make-object asm '.shstrtab
1643 (link-string-table! (asm-shstrtab asm))
1644 '() '()
1645 #:type SHT_STRTAB #:flags 0))
1646
1647(define (link-symtab text-section asm)
1648 (let* ((endianness (asm-endianness asm))
1649 (word-size (asm-word-size asm))
1650 (size (elf-symbol-len word-size))
1651 (meta (reverse (asm-meta asm)))
1652 (n (length meta))
1653 (strtab (make-string-table))
1654 (bv (make-bytevector (* n size) 0)))
1655 (define (intern-string! name)
2a4daafd 1656 (string-table-intern! strtab (if name (symbol->string name) "")))
e78991aa
AW
1657 (for-each
1658 (lambda (meta n)
1659 (let ((name (intern-string! (meta-name meta))))
1660 (write-elf-symbol bv (* n size) endianness word-size
1661 (make-elf-symbol
1662 #:name name
1663 ;; Symbol value and size are measured in
1664 ;; bytes, not u32s.
1665 #:value (* 4 (meta-low-pc meta))
1666 #:size (* 4 (- (meta-high-pc meta)
1667 (meta-low-pc meta)))
1668 #:type STT_FUNC
1669 #:visibility STV_HIDDEN
1670 #:shndx (elf-section-index text-section)))))
1671 meta (iota n))
1672 (let ((strtab (make-object asm '.strtab
1673 (link-string-table! strtab)
1674 '() '()
1675 #:type SHT_STRTAB #:flags 0)))
1676 (values (make-object asm '.symtab
1677 bv
1678 '() '()
1679 #:type SHT_SYMTAB #:flags 0 #:entsize size
1680 #:link (elf-section-index
1681 (linker-object-section strtab)))
1682 strtab))))
1683
b2006c19
AW
1684;;; The .guile.arities section describes the arities that a function can
1685;;; have. It is in two parts: a sorted array of headers describing
1686;;; basic arities, and an array of links out to a string table (and in
1687;;; the case of keyword arguments, to the data section) for argument
1688;;; names. The whole thing is prefixed by a uint32 indicating the
1689;;; offset of the end of the headers array.
1690;;;
1691;;; The arity headers array is a packed array of structures of the form:
1692;;;
1693;;; struct arity_header {
1694;;; uint32_t low_pc;
1695;;; uint32_t high_pc;
1696;;; uint32_t offset;
1697;;; uint32_t flags;
1698;;; uint32_t nreq;
1699;;; uint32_t nopt;
c3651bd5 1700;;; uint32_t nlocals;
b2006c19
AW
1701;;; }
1702;;;
1703;;; All of the offsets and addresses are 32 bits. We can expand in the
1704;;; future to use 64-bit offsets if appropriate, but there are other
691697de
AW
1705;;; aspects of bytecode that constrain us to a total image that fits in
1706;;; 32 bits, so for the moment we'll simplify the problem space.
b2006c19
AW
1707;;;
1708;;; The following flags values are defined:
1709;;;
1710;;; #x1: has-rest?
1711;;; #x2: allow-other-keys?
1712;;; #x4: has-keyword-args?
1713;;; #x8: is-case-lambda?
d8595af5 1714;;; #x10: is-in-case-lambda?
b2006c19
AW
1715;;;
1716;;; Functions with a single arity specify their number of required and
1717;;; optional arguments in nreq and nopt, and do not have the
1718;;; is-case-lambda? flag set. Their "offset" member links to an array
1719;;; of pointers into the associated .guile.arities.strtab string table,
1720;;; identifying the argument names. This offset is relative to the
cade4c8f
AW
1721;;; start of the .guile.arities section.
1722;;;
1723;;; If the arity has keyword arguments -- if has-keyword-args? is set in
1724;;; the flags -- the first uint32 pointed to by offset encodes a link to
c3651bd5
AW
1725;;; the "keyword indices" literal, in the data section. Then follow the
1726;;; names for all locals, in order, as uleb128 values. The required
1727;;; arguments will be the first locals, followed by the optionals,
1728;;; followed by the rest argument if if has-rest? is set. The names
1729;;; point into the associated string table section.
b2006c19
AW
1730;;;
1731;;; Functions with no arities have no arities information present in the
1732;;; .guile.arities section.
1733;;;
1734;;; Functions with multiple arities are preceded by a header with
1735;;; is-case-lambda? set. All other fields are 0, except low-pc and
1736;;; high-pc which should be the bounds of the whole function. Headers
d8595af5
AW
1737;;; for the individual arities follow, with the is-in-case-lambda? flag
1738;;; set. In this way the whole headers array is sorted in increasing
1739;;; low-pc order, and case-lambda clauses are contained within the
1740;;; [low-pc, high-pc] of the case-lambda header.
b2006c19
AW
1741
1742;; Length of the prefix to the arities section, in bytes.
1743(define arities-prefix-len 4)
1744
1745;; Length of an arity header, in bytes.
c3651bd5
AW
1746(define arity-header-len (* 7 4))
1747
1748;; Some helpers.
1749(define (put-uleb128 port val)
1750 (let lp ((val val))
1751 (let ((next (ash val -7)))
1752 (if (zero? next)
1753 (put-u8 port val)
1754 (begin
1755 (put-u8 port (logior #x80 (logand val #x7f)))
1756 (lp next))))))
b2006c19 1757
c3651bd5
AW
1758(define (put-sleb128 port val)
1759 (let lp ((val val))
1760 (if (<= 0 (+ val 64) 127)
1761 (put-u8 port (logand val #x7f))
1762 (begin
1763 (put-u8 port (logior #x80 (logand val #x7f)))
1764 (lp (ash val -7))))))
1765
1766(define (port-position port)
1767 (seek port 0 SEEK_CUR))
b2006c19 1768
28e12ea0
AW
1769(define-inline (pack-arity-flags has-rest? allow-other-keys?
1770 has-keyword-args? is-case-lambda?
1771 is-in-case-lambda?)
b2006c19
AW
1772 (logior (if has-rest? (ash 1 0) 0)
1773 (if allow-other-keys? (ash 1 1) 0)
1774 (if has-keyword-args? (ash 1 2) 0)
d8595af5
AW
1775 (if is-case-lambda? (ash 1 3) 0)
1776 (if is-in-case-lambda? (ash 1 4) 0)))
b2006c19 1777
c3651bd5
AW
1778(define (write-arities asm metas headers names-port strtab)
1779 (define (write-header pos low-pc high-pc offset flags nreq nopt nlocals)
4cbe4d72
AW
1780 (unless (<= (+ nreq nopt) nlocals)
1781 (error "forgot to emit definition instructions?"))
c3651bd5
AW
1782 (bytevector-u32-set! headers pos (* low-pc 4) (asm-endianness asm))
1783 (bytevector-u32-set! headers (+ pos 4) (* high-pc 4) (asm-endianness asm))
1784 (bytevector-u32-set! headers (+ pos 8) offset (asm-endianness asm))
1785 (bytevector-u32-set! headers (+ pos 12) flags (asm-endianness asm))
1786 (bytevector-u32-set! headers (+ pos 16) nreq (asm-endianness asm))
1787 (bytevector-u32-set! headers (+ pos 20) nopt (asm-endianness asm))
1788 (bytevector-u32-set! headers (+ pos 24) nlocals (asm-endianness asm)))
1789 (define (write-kw-indices kw-indices relocs)
1790 ;; FIXME: Assert that kw-indices is already interned.
1791 (if (pair? kw-indices)
1792 (let ((pos (+ (bytevector-length headers)
1793 (port-position names-port)))
1794 (label (intern-constant asm kw-indices)))
1795 (put-bytevector names-port #vu8(0 0 0 0))
1796 (cons (make-linker-reloc 'abs32/1 pos 0 label) relocs))
1797 relocs))
1798 (define (write-arity pos arity in-case-lambda? relocs)
1799 (write-header pos (arity-low-pc arity)
1800 (arity-high-pc arity)
1801 ;; FIXME: Seems silly to add on bytevector-length of
1802 ;; headers, given the arities-prefix.
1803 (+ (bytevector-length headers) (port-position names-port))
1804 (pack-arity-flags (arity-rest arity)
1805 (arity-allow-other-keys? arity)
1806 (pair? (arity-kw-indices arity))
1807 #f
1808 in-case-lambda?)
1809 (length (arity-req arity))
1810 (length (arity-opt arity))
1811 (length (arity-definitions arity)))
1812 (let ((relocs (write-kw-indices (arity-kw-indices arity) relocs)))
67ddb7e2 1813 ;; Write local names.
c3651bd5
AW
1814 (let lp ((definitions (arity-definitions arity)))
1815 (match definitions
1816 (() relocs)
1817 ((#(name slot def) . definitions)
1818 (let ((sym (if (symbol? name)
1819 (string-table-intern! strtab (symbol->string name))
1820 0)))
1821 (put-uleb128 names-port sym)
67ddb7e2
AW
1822 (lp definitions)))))
1823 ;; Now write their definitions.
1824 (let lp ((definitions (arity-definitions arity)))
1825 (match definitions
1826 (() relocs)
1827 ((#(name slot def) . definitions)
1828 (put-uleb128 names-port def)
1829 (put-uleb128 names-port slot)
1830 (lp definitions))))))
c3651bd5 1831 (let lp ((metas metas) (pos arities-prefix-len) (relocs '()))
b2006c19
AW
1832 (match metas
1833 (()
c3651bd5
AW
1834 (unless (= pos (bytevector-length headers))
1835 (error "expected to fully fill the bytevector"
1836 pos (bytevector-length headers)))
1837 relocs)
b2006c19
AW
1838 ((meta . metas)
1839 (match (meta-arities meta)
c3651bd5 1840 (() (lp metas pos relocs))
b2006c19 1841 ((arity)
b2006c19
AW
1842 (lp metas
1843 (+ pos arity-header-len)
c3651bd5 1844 (write-arity pos arity #f relocs)))
b2006c19
AW
1845 (arities
1846 ;; Write a case-lambda header, then individual arities.
1847 ;; The case-lambda header's offset link is 0.
c3651bd5
AW
1848 (write-header pos (meta-low-pc meta) (meta-high-pc meta) 0
1849 (pack-arity-flags #f #f #f #t #f) 0 0 0)
b2006c19 1850 (let lp* ((arities arities) (pos (+ pos arity-header-len))
c3651bd5 1851 (relocs relocs))
b2006c19 1852 (match arities
c3651bd5 1853 (() (lp metas pos relocs))
b2006c19 1854 ((arity . arities)
b2006c19
AW
1855 (lp* arities
1856 (+ pos arity-header-len)
c3651bd5 1857 (write-arity pos arity #t relocs)))))))))))
b2006c19
AW
1858
1859(define (link-arities asm)
c3651bd5
AW
1860 (define (meta-arities-header-size meta)
1861 (define (lambda-size arity)
1862 arity-header-len)
1863 (define (case-lambda-size arities)
1864 (fold +
1865 arity-header-len ;; case-lambda header
1866 (map lambda-size arities))) ;; the cases
1867 (match (meta-arities meta)
1868 (() 0)
1869 ((arity) (lambda-size arity))
1870 (arities (case-lambda-size arities))))
1871
1872 (define (bytevector-append a b)
1873 (let ((out (make-bytevector (+ (bytevector-length a)
1874 (bytevector-length b)))))
1875 (bytevector-copy! a 0 out 0 (bytevector-length a))
1876 (bytevector-copy! b 0 out (bytevector-length a) (bytevector-length b))
1877 out))
1878
b2006c19
AW
1879 (let* ((endianness (asm-endianness asm))
1880 (metas (reverse (asm-meta asm)))
c3651bd5
AW
1881 (header-size (fold (lambda (meta size)
1882 (+ size (meta-arities-header-size meta)))
1883 arities-prefix-len
1884 metas))
b2006c19 1885 (strtab (make-string-table))
c3651bd5
AW
1886 (headers (make-bytevector header-size 0)))
1887 (bytevector-u32-set! headers 0 (bytevector-length headers) endianness)
1888 (let-values (((names-port get-name-bv) (open-bytevector-output-port)))
1889 (let* ((relocs (write-arities asm metas headers names-port strtab))
1890 (strtab (make-object asm '.guile.arities.strtab
1891 (link-string-table! strtab)
1892 '() '()
1893 #:type SHT_STRTAB #:flags 0)))
b2006c19 1894 (values (make-object asm '.guile.arities
c3651bd5
AW
1895 (bytevector-append headers (get-name-bv))
1896 relocs '()
b2006c19
AW
1897 #:type SHT_PROGBITS #:flags 0
1898 #:link (elf-section-index
1899 (linker-object-section strtab)))
1900 strtab)))))
1901
9128b1a1
AW
1902;;;
1903;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
1904;;; values. Pc and str are both 32 bits wide. (Either could change to
1905;;; 64 bits if appropriate in the future.) Pc is the address of the
0a1d52ac
AW
1906;;; entry to a program, relative to the start of the text section, in
1907;;; bytes, and str is an index into the associated .guile.docstrs.strtab
1908;;; string table section.
9128b1a1
AW
1909;;;
1910
1911;; The size of a docstrs entry, in bytes.
1912(define docstr-size 8)
1913
1914(define (link-docstrs asm)
1915 (define (find-docstrings)
1916 (filter-map (lambda (meta)
1917 (define (is-documentation? pair)
1918 (eq? (car pair) 'documentation))
1919 (let* ((props (meta-properties meta))
1920 (tail (find-tail is-documentation? props)))
1921 (and tail
1922 (not (find-tail is-documentation? (cdr tail)))
1923 (string? (cdar tail))
0a1d52ac 1924 (cons (* 4 (meta-low-pc meta)) (cdar tail)))))
9128b1a1
AW
1925 (reverse (asm-meta asm))))
1926 (let* ((endianness (asm-endianness asm))
1927 (docstrings (find-docstrings))
1928 (strtab (make-string-table))
1929 (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
1930 (fold (lambda (pair pos)
1931 (match pair
1932 ((pc . string)
1933 (bytevector-u32-set! bv pos pc endianness)
1934 (bytevector-u32-set! bv (+ pos 4)
1935 (string-table-intern! strtab string)
1936 endianness)
1937 (+ pos docstr-size))))
1938 0
1939 docstrings)
1940 (let ((strtab (make-object asm '.guile.docstrs.strtab
1941 (link-string-table! strtab)
1942 '() '()
1943 #:type SHT_STRTAB #:flags 0)))
1944 (values (make-object asm '.guile.docstrs
1945 bv
1946 '() '()
1947 #:type SHT_PROGBITS #:flags 0
1948 #:link (elf-section-index
1949 (linker-object-section strtab)))
1950 strtab))))
1951
c4c098e3
AW
1952;;;
1953;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
1954;;; values. Pc and addr are both 32 bits wide. (Either could change to
1955;;; 64 bits if appropriate in the future.) Pc is the address of the
1956;;; entry to a program, relative to the start of the text section, and
1957;;; addr is the address of the associated properties alist, relative to
1958;;; the start of the ELF image.
1959;;;
1960;;; Since procedure properties are stored in the data sections, we need
1961;;; to link the procedures property section first. (Note that this
1962;;; constraint does not apply to the arities section, which may
1963;;; reference the data sections via the kw-indices literal, because
1964;;; assembling the text section already makes sure that the kw-indices
1965;;; are interned.)
1966;;;
1967
1968;; The size of a procprops entry, in bytes.
1969(define procprops-size 8)
1970
1971(define (link-procprops asm)
1972 (define (assoc-remove-one alist key value-pred)
1973 (match alist
1974 (() '())
1975 ((((? (lambda (x) (eq? x key))) . value) . alist)
1976 (if (value-pred value)
1977 alist
1978 (acons key value alist)))
1979 (((k . v) . alist)
1980 (acons k v (assoc-remove-one alist key value-pred)))))
1981 (define (props-without-name-or-docstring meta)
1982 (assoc-remove-one
1983 (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
1984 'documentation
1985 string?))
1986 (define (find-procprops)
1987 (filter-map (lambda (meta)
1988 (let ((props (props-without-name-or-docstring meta)))
1989 (and (pair? props)
463469cc 1990 (cons (* 4 (meta-low-pc meta)) props))))
c4c098e3
AW
1991 (reverse (asm-meta asm))))
1992 (let* ((endianness (asm-endianness asm))
1993 (procprops (find-procprops))
1994 (bv (make-bytevector (* (length procprops) procprops-size) 0)))
1995 (let lp ((procprops procprops) (pos 0) (relocs '()))
1996 (match procprops
1997 (()
1998 (make-object asm '.guile.procprops
1999 bv
2000 relocs '()
2001 #:type SHT_PROGBITS #:flags 0))
2002 (((pc . props) . procprops)
2003 (bytevector-u32-set! bv pos pc endianness)
2004 (lp procprops
2005 (+ pos procprops-size)
2006 (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
2007 (intern-constant asm props))
2008 relocs)))))))
2009
a862d8c1
AW
2010;;;
2011;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc
2012;;; sections provide line number and local variable liveness
2013;;; information. Their format is defined by the DWARF
2014;;; specifications.
2015;;;
2016
2017(define (asm-language asm)
2018 ;; FIXME: Plumb language through to the assembler.
2019 'scheme)
2020
0a7340ac 2021;; -> 5 values: .debug_info, .debug_abbrev, .debug_str, .debug_loc, .debug_lines
a862d8c1 2022(define (link-debug asm)
0a7340ac
AW
2023 (define (put-s8 port val)
2024 (let ((bv (make-bytevector 1)))
2025 (bytevector-s8-set! bv 0 val)
2026 (put-bytevector port bv)))
2027
a862d8c1
AW
2028 (define (put-u16 port val)
2029 (let ((bv (make-bytevector 2)))
2030 (bytevector-u16-set! bv 0 val (asm-endianness asm))
2031 (put-bytevector port bv)))
2032
2033 (define (put-u32 port val)
2034 (let ((bv (make-bytevector 4)))
2035 (bytevector-u32-set! bv 0 val (asm-endianness asm))
2036 (put-bytevector port bv)))
2037
2038 (define (put-u64 port val)
2039 (let ((bv (make-bytevector 8)))
2040 (bytevector-u64-set! bv 0 val (asm-endianness asm))
2041 (put-bytevector port bv)))
2042
a862d8c1
AW
2043 (define (meta->subprogram-die meta)
2044 `(subprogram
2045 (@ ,@(cond
2046 ((meta-name meta)
2047 => (lambda (name) `((name ,(symbol->string name)))))
2048 (else
2049 '()))
2050 (low-pc ,(meta-label meta))
2051 (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta)))))))
2052
2053 (define (make-compile-unit-die asm)
2054 `(compile-unit
2055 (@ (producer ,(string-append "Guile " (version)))
2056 (language ,(asm-language asm))
2057 (low-pc .rtl-text)
0a7340ac
AW
2058 (high-pc ,(* 4 (asm-pos asm)))
2059 (stmt-list 0))
a862d8c1
AW
2060 ,@(map meta->subprogram-die (reverse (asm-meta asm)))))
2061
2062 (let-values (((die-port get-die-bv) (open-bytevector-output-port))
2063 ((die-relocs) '())
2064 ((abbrev-port get-abbrev-bv) (open-bytevector-output-port))
2065 ;; (tag has-kids? attrs forms) -> code
2066 ((abbrevs) vlist-null)
0a7340ac
AW
2067 ((strtab) (make-string-table))
2068 ((line-port get-line-bv) (open-bytevector-output-port))
2069 ((line-relocs) '())
2070 ;; file -> code
2071 ((files) vlist-null))
a862d8c1
AW
2072
2073 (define (write-abbrev code tag has-children? attrs forms)
2074 (put-uleb128 abbrev-port code)
2075 (put-uleb128 abbrev-port (tag-name->code tag))
2076 (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no)))
2077 (for-each (lambda (attr form)
2078 (put-uleb128 abbrev-port (attribute-name->code attr))
2079 (put-uleb128 abbrev-port (form-name->code form)))
2080 attrs forms)
2081 (put-uleb128 abbrev-port 0)
2082 (put-uleb128 abbrev-port 0))
2083
2084 (define (intern-abbrev tag has-children? attrs forms)
2085 (let ((key (list tag has-children? attrs forms)))
2086 (match (vhash-assoc key abbrevs)
2087 ((_ . code) code)
0a7340ac 2088 (#f (let ((code (1+ (vlist-length abbrevs))))
a862d8c1
AW
2089 (set! abbrevs (vhash-cons key code abbrevs))
2090 (write-abbrev code tag has-children? attrs forms)
2091 code)))))
2092
0a7340ac
AW
2093 (define (intern-file file)
2094 (match (vhash-assoc file files)
2095 ((_ . code) code)
2096 (#f (let ((code (1+ (vlist-length files))))
2097 (set! files (vhash-cons file code files))
2098 code))))
2099
2100 (define (write-sources)
d56ab5a9
AW
2101 ;; Choose line base and line range values that will allow for an
2102 ;; address advance range of 16 words. The special opcode range is
2103 ;; from 10 to 255, so 246 values.
2104 (define base -4)
2105 (define range 15)
2106
0a7340ac
AW
2107 (let lp ((sources (asm-sources asm)) (out '()))
2108 (match sources
d56ab5a9 2109 (((pc . s) . sources)
0a7340ac
AW
2110 (let ((file (assq-ref s 'filename))
2111 (line (assq-ref s 'line))
2112 (col (assq-ref s 'column)))
d56ab5a9
AW
2113 (lp sources
2114 ;; Guile line and column numbers are 0-indexed, but
2115 ;; they are 1-indexed for DWARF.
2116 (cons (list pc
2117 (if file (intern-file file) 0)
2118 (if line (1+ line))
2119 (if col (1+ col)))
2120 out))))
0a7340ac
AW
2121 (()
2122 ;; Compilation unit header for .debug_line. We write in
2123 ;; DWARF 2 format because more tools understand it than DWARF
2124 ;; 4, which incompatibly adds another field to this header.
2125
2126 (put-u32 line-port 0) ; Length; will patch later.
2127 (put-u16 line-port 2) ; DWARF 2 format.
2128 (put-u32 line-port 0) ; Prologue length; will patch later.
2129 (put-u8 line-port 4) ; Minimum instruction length: 4 bytes.
2130 (put-u8 line-port 1) ; Default is-stmt: true.
2131
d56ab5a9
AW
2132 (put-s8 line-port base) ; Line base. See the DWARF standard.
2133 (put-u8 line-port range) ; Line range. See the DWARF standard.
0a7340ac
AW
2134 (put-u8 line-port 10) ; Opcode base: the first "special" opcode.
2135
2136 ;; A table of the number of uleb128 arguments taken by each
2137 ;; of the standard opcodes.
2138 (put-u8 line-port 0) ; 1: copy
2139 (put-u8 line-port 1) ; 2: advance-pc
2140 (put-u8 line-port 1) ; 3: advance-line
2141 (put-u8 line-port 1) ; 4: set-file
2142 (put-u8 line-port 1) ; 5: set-column
2143 (put-u8 line-port 0) ; 6: negate-stmt
2144 (put-u8 line-port 0) ; 7: set-basic-block
2145 (put-u8 line-port 0) ; 8: const-add-pc
2146 (put-u8 line-port 1) ; 9: fixed-advance-pc
2147
2148 ;; Include directories, as a zero-terminated sequence of
2149 ;; nul-terminated strings. Nothing, for the moment.
2150 (put-u8 line-port 0)
2151
2152 ;; File table. For each file that contributes to this
2153 ;; compilation unit, a nul-terminated file name string, and a
2154 ;; uleb128 for each of directory the file was found in, the
2155 ;; modification time, and the file's size in bytes. We pass
2156 ;; zero for the latter three fields.
32ca15d7
AW
2157 (vlist-fold-right
2158 (lambda (pair seed)
2159 (match pair
2160 ((file . code)
2161 (put-bytevector line-port (string->utf8 file))
2162 (put-u8 line-port 0)
2163 (put-uleb128 line-port 0) ; directory
2164 (put-uleb128 line-port 0) ; mtime
2165 (put-uleb128 line-port 0))) ; size
2166 seed)
2167 #f
2168 files)
0a7340ac
AW
2169 (put-u8 line-port 0) ; 0 byte terminating file list.
2170
2171 ;; Patch prologue length.
2172 (let ((offset (port-position line-port)))
2173 (seek line-port 6 SEEK_SET)
2174 (put-u32 line-port (- offset 10))
2175 (seek line-port offset SEEK_SET))
2176
d56ab5a9
AW
2177 ;; Now write the statement program.
2178 (let ()
2179 (define (extended-op opcode payload-len)
6b71a767 2180 (put-u8 line-port 0) ; extended op
d56ab5a9
AW
2181 (put-uleb128 line-port (1+ payload-len)) ; payload-len + opcode
2182 (put-uleb128 line-port opcode))
2183 (define (set-address sym)
2184 (define (add-reloc! kind)
2185 (set! line-relocs
2186 (cons (make-linker-reloc kind
2187 (port-position line-port)
2188 0
2189 sym)
2190 line-relocs)))
2191 (match (asm-word-size asm)
2192 (4
2193 (extended-op 2 4)
2194 (add-reloc! 'abs32/1)
2195 (put-u32 line-port 0))
2196 (8
2197 (extended-op 2 8)
2198 (add-reloc! 'abs64/1)
2199 (put-u64 line-port 0))))
2200 (define (end-sequence pc)
2201 (let ((pc-inc (- (asm-pos asm) pc)))
6b71a767 2202 (put-u8 line-port 2) ; advance-pc
d56ab5a9
AW
2203 (put-uleb128 line-port pc-inc))
2204 (extended-op 1 0))
2205 (define (advance-pc pc-inc line-inc)
2206 (let ((spec (+ (- line-inc base) (* pc-inc range) 10)))
2207 (cond
2208 ((or (< line-inc base) (>= line-inc (+ base range)))
2209 (advance-line line-inc)
2210 (advance-pc pc-inc 0))
2211 ((<= spec 255)
2212 (put-u8 line-port spec))
2213 ((< spec 500)
2214 (put-u8 line-port 8) ; const-advance-pc
2215 (advance-pc (- pc-inc (floor/ (- 255 10) range))
2216 line-inc))
2217 (else
2218 (put-u8 line-port 2) ; advance-pc
2219 (put-uleb128 line-port pc-inc)
2220 (advance-pc 0 line-inc)))))
2221 (define (advance-line inc)
2222 (put-u8 line-port 3)
2223 (put-sleb128 line-port inc))
2224 (define (set-file file)
2225 (put-u8 line-port 4)
2226 (put-uleb128 line-port file))
2227 (define (set-column col)
2228 (put-u8 line-port 5)
2229 (put-uleb128 line-port col))
2230
2231 (set-address '.rtl-text)
2232
2233 (let lp ((in out) (pc 0) (file 1) (line 1) (col 0))
2234 (match in
6b71a767
AW
2235 (()
2236 (when (null? out)
2237 ;; There was no source info in the first place. Set
2238 ;; file register to 0 before adding final row.
2239 (set-file 0))
2240 (end-sequence pc))
d56ab5a9
AW
2241 (((pc* file* line* col*) . in*)
2242 (cond
2243 ((and (eqv? file file*) (eqv? line line*) (eqv? col col*))
2244 (lp in* pc file line col))
2245 (else
2246 (unless (eqv? col col*)
2247 (set-column col*))
2248 (unless (eqv? file file*)
2249 (set-file file*))
2250 (advance-pc (- pc* pc) (- line* line))
2251 (lp in* pc* file* line* col*)))))))))))
0a7340ac 2252
a862d8c1
AW
2253 (define (compute-code attr val)
2254 (match attr
2255 ('name (string-table-intern! strtab val))
2256 ('low-pc val)
2257 ('high-pc val)
2258 ('producer (string-table-intern! strtab val))
0a7340ac
AW
2259 ('language (language-name->code val))
2260 ('stmt-list val)))
a862d8c1 2261
a862d8c1
AW
2262 (define (choose-form attr val code)
2263 (cond
6371e368 2264 ((string? val) 'strp)
0a7340ac 2265 ((eq? attr 'stmt-list) 'sec-offset)
9a1dfb7d 2266 ((eq? attr 'low-pc) 'addr)
a862d8c1
AW
2267 ((exact-integer? code)
2268 (cond
2269 ((< code 0) 'sleb128)
2270 ((<= code #xff) 'data1)
2271 ((<= code #xffff) 'data2)
2272 ((<= code #xffffffff) 'data4)
2273 ((<= code #xffffffffffffffff) 'data8)
2274 (else 'uleb128)))
a862d8c1
AW
2275 (else (error "unhandled case" attr val code))))
2276
2277 (define (add-die-relocation! kind sym)
2278 (set! die-relocs
0a7340ac 2279 (cons (make-linker-reloc kind (port-position die-port) 0 sym)
a862d8c1
AW
2280 die-relocs)))
2281
2282 (define (write-value code form)
2283 (match form
2284 ('data1 (put-u8 die-port code))
2285 ('data2 (put-u16 die-port code))
2286 ('data4 (put-u32 die-port code))
2287 ('data8 (put-u64 die-port code))
2288 ('uleb128 (put-uleb128 die-port code))
d56ab5a9 2289 ('sleb128 (put-sleb128 die-port code))
a862d8c1
AW
2290 ('addr
2291 (match (asm-word-size asm)
2292 (4
2293 (add-die-relocation! 'abs32/1 code)
2294 (put-u32 die-port 0))
2295 (8
2296 (add-die-relocation! 'abs64/1 code)
2297 (put-u64 die-port 0))))
0a7340ac 2298 ('sec-offset (put-u32 die-port code))
6371e368 2299 ('strp (put-u32 die-port code))))
a862d8c1
AW
2300
2301 (define (write-die die)
2302 (match die
2303 ((tag ('@ (attrs vals) ...) children ...)
2304 (let* ((codes (map compute-code attrs vals))
2305 (forms (map choose-form attrs vals codes))
2306 (has-children? (not (null? children)))
2307 (abbrev-code (intern-abbrev tag has-children? attrs forms)))
2308 (put-uleb128 die-port abbrev-code)
2309 (for-each write-value codes forms)
2310 (when has-children?
2311 (for-each write-die children)
2312 (put-uleb128 die-port 0))))))
2313
2314 ;; Compilation unit header.
2315 (put-u32 die-port 0) ; Length; will patch later.
2316 (put-u16 die-port 4) ; DWARF 4.
2317 (put-u32 die-port 0) ; Abbrevs offset.
2318 (put-u8 die-port (asm-word-size asm)) ; Address size.
2319
2320 (write-die (make-compile-unit-die asm))
2321
2322 ;; Terminate the abbrevs list.
2323 (put-uleb128 abbrev-port 0)
2324
0a7340ac
AW
2325 (write-sources)
2326
a862d8c1
AW
2327 (values (let ((bv (get-die-bv)))
2328 ;; Patch DWARF32 length.
2329 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
2330 (asm-endianness asm))
2331 (make-object asm '.debug_info bv die-relocs '()
2332 #:type SHT_PROGBITS #:flags 0))
2333 (make-object asm '.debug_abbrev (get-abbrev-bv) '() '()
2334 #:type SHT_PROGBITS #:flags 0)
2335 (make-object asm '.debug_str (link-string-table! strtab) '() '()
2336 #:type SHT_PROGBITS #:flags 0)
2337 (make-object asm '.debug_loc #vu8() '() '()
0a7340ac
AW
2338 #:type SHT_PROGBITS #:flags 0)
2339 (let ((bv (get-line-bv)))
2340 ;; Patch DWARF32 length.
2341 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
2342 (asm-endianness asm))
2343 (make-object asm '.debug_line bv line-relocs '()
2344 #:type SHT_PROGBITS #:flags 0)))))
a862d8c1 2345
e78991aa 2346(define (link-objects asm)
c4c098e3
AW
2347 (let*-values (;; Link procprops before constants, because it probably
2348 ;; interns more constants.
2349 ((procprops) (link-procprops asm))
2350 ((ro rw rw-init) (link-constants asm))
e78991aa
AW
2351 ;; Link text object after constants, so that the
2352 ;; constants initializer gets included.
2353 ((text) (link-text-object asm))
02c624fc
AW
2354 ((frame-maps) (link-frame-maps asm))
2355 ((dt) (link-dynamic-section asm text rw rw-init frame-maps))
e78991aa 2356 ((symtab strtab) (link-symtab (linker-object-section text) asm))
b2006c19 2357 ((arities arities-strtab) (link-arities asm))
9128b1a1 2358 ((docstrs docstrs-strtab) (link-docstrs asm))
0a7340ac 2359 ((dinfo dabbrev dstrtab dloc dline) (link-debug asm))
e78991aa
AW
2360 ;; This needs to be linked last, because linking other
2361 ;; sections adds entries to the string table.
2362 ((shstrtab) (link-shstrtab asm)))
b2006c19 2363 (filter identity
02c624fc
AW
2364 (list text ro frame-maps rw dt symtab strtab
2365 arities arities-strtab
a862d8c1 2366 docstrs docstrs-strtab procprops
0a7340ac 2367 dinfo dabbrev dstrtab dloc dline
a862d8c1 2368 shstrtab))))
e78991aa
AW
2369
2370
2371\f
2372
2373;;;
2374;;; High-level public interfaces.
2375;;;
2376
2377(define* (link-assembly asm #:key (page-aligned? #t))
2378 "Produce an ELF image from the code and data emitted into @var{asm}.
2379The result is a bytevector, by default linked so that read-only and
2380writable data are on separate pages. Pass @code{#:page-aligned? #f} to
2381disable this behavior."
2382 (link-elf (link-objects asm) #:page-aligned? page-aligned?))