Merge commit 'fb7dd00169304a5922838e4d2f25253640a35def'
[bpt/guile.git] / module / system / vm / assembler.scm
1 ;;; Guile bytecode assembler
2
3 ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013, 2014 Free Software Foundation, Inc.
4 ;;;
5 ;;; This library is free software; you can redistribute it and/or
6 ;;; modify it under the terms of the GNU Lesser General Public
7 ;;; License as published by the Free Software Foundation; either
8 ;;; version 3 of the License, or (at your option) any later version.
9 ;;;
10 ;;; This library is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;; Lesser General Public License for more details.
14 ;;;
15 ;;; You should have received a copy of the GNU Lesser General Public
16 ;;; License along with this library; if not, write to the Free Software
17 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Commentary:
20 ;;;
21 ;;; This module implements an assembler that creates an ELF image from
22 ;;; bytecode assembly and macro-assembly. The input can be given in
23 ;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
24 ;;; procedural interface, the emit-OP procedures, but that is not
25 ;;; currently exported.
26 ;;;
27 ;;; "Primitive instructions" correspond to VM operations. Assemblers
28 ;;; for primitive instructions are generated programmatically from
29 ;;; (instruction-list), which itself is derived from the VM sources.
30 ;;; There are also "macro-instructions" like "label" or "load-constant"
31 ;;; that expand to 0 or more primitive instructions.
32 ;;;
33 ;;; The assembler also handles some higher-level tasks, like creating
34 ;;; the symbol table, other metadata sections, creating a constant table
35 ;;; for the whole compilation unit, and writing the dynamic section of
36 ;;; the ELF file along with the appropriate initialization routines.
37 ;;;
38 ;;; Most compilers will want to use the trio of make-assembler,
39 ;;; emit-text, and link-assembly. That will result in the creation of
40 ;;; an ELF image as a bytevector, which can then be loaded using
41 ;;; load-thunk-from-memory, or written to disk as a .go file.
42 ;;;
43 ;;; Code:
44
45 (define-module (system vm assembler)
46 #:use-module (system base target)
47 #:use-module (system vm dwarf)
48 #:use-module (system vm elf)
49 #:use-module (system vm linker)
50 #:use-module (language bytecode)
51 #:use-module (rnrs bytevectors)
52 #:use-module (ice-9 binary-ports)
53 #:use-module (ice-9 vlist)
54 #:use-module (ice-9 match)
55 #:use-module (srfi srfi-1)
56 #:use-module (srfi srfi-4)
57 #:use-module (srfi srfi-9)
58 #:use-module (srfi srfi-11)
59 #:export (make-assembler
60 emit-text
61 link-assembly))
62
63
64 \f
65
66 ;;; Bytecode consists of 32-bit units, often subdivided in some way.
67 ;;; These helpers create one 32-bit unit from multiple components.
68
69 (define-inlinable (pack-u8-u24 x y)
70 (unless (<= 0 x 255)
71 (error "out of range" x))
72 (logior x (ash y 8)))
73
74 (define-inlinable (pack-u8-s24 x y)
75 (unless (<= 0 x 255)
76 (error "out of range" x))
77 (logior x (ash (cond
78 ((< 0 (- y) #x800000)
79 (+ y #x1000000))
80 ((<= 0 y #xffffff)
81 y)
82 (else (error "out of range" y)))
83 8)))
84
85 (define-inlinable (pack-u1-u7-u24 x y z)
86 (unless (<= 0 x 1)
87 (error "out of range" x))
88 (unless (<= 0 y 127)
89 (error "out of range" y))
90 (logior x (ash y 1) (ash z 8)))
91
92 (define-inlinable (pack-u8-u12-u12 x y z)
93 (unless (<= 0 x 255)
94 (error "out of range" x))
95 (unless (<= 0 y 4095)
96 (error "out of range" y))
97 (logior x (ash y 8) (ash z 20)))
98
99 (define-inlinable (pack-u8-u8-u16 x y z)
100 (unless (<= 0 x 255)
101 (error "out of range" x))
102 (unless (<= 0 y 255)
103 (error "out of range" y))
104 (logior x (ash y 8) (ash z 16)))
105
106 (define-inlinable (pack-u8-u8-u8-u8 x y z w)
107 (unless (<= 0 x 255)
108 (error "out of range" x))
109 (unless (<= 0 y 255)
110 (error "out of range" y))
111 (unless (<= 0 z 255)
112 (error "out of range" z))
113 (logior x (ash y 8) (ash z 16) (ash w 24)))
114
115 (define-syntax pack-flags
116 (syntax-rules ()
117 ;; Add clauses as needed.
118 ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
119 (if f2 (ash 2 0) 0)))))
120
121 ;;; Helpers to read and write 32-bit units in a buffer.
122
123 (define-syntax-rule (u32-ref buf n)
124 (bytevector-u32-native-ref buf (* n 4)))
125
126 (define-syntax-rule (u32-set! buf n val)
127 (bytevector-u32-native-set! buf (* n 4) val))
128
129 (define-syntax-rule (s32-ref buf n)
130 (bytevector-s32-native-ref buf (* n 4)))
131
132 (define-syntax-rule (s32-set! buf n val)
133 (bytevector-s32-native-set! buf (* n 4) val))
134
135
136 \f
137
138 ;;; A <meta> entry collects metadata for one procedure. Procedures are
139 ;;; written as contiguous ranges of bytecode.
140 ;;;
141 (define-syntax-rule (assert-match arg pattern kind)
142 (let ((x arg))
143 (unless (match x (pattern #t) (_ #f))
144 (error (string-append "expected " kind) x))))
145
146 (define-record-type <meta>
147 (%make-meta label properties low-pc high-pc arities)
148 meta?
149 (label meta-label)
150 (properties meta-properties set-meta-properties!)
151 (low-pc meta-low-pc)
152 (high-pc meta-high-pc set-meta-high-pc!)
153 (arities meta-arities set-meta-arities!))
154
155 (define (make-meta label properties low-pc)
156 (assert-match label (? symbol?) "symbol")
157 (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
158 (%make-meta label properties low-pc #f '()))
159
160 (define (meta-name meta)
161 (assq-ref (meta-properties meta) 'name))
162
163 ;; Metadata for one <lambda-case>.
164 (define-record-type <arity>
165 (make-arity req opt rest kw-indices allow-other-keys?
166 low-pc high-pc)
167 arity?
168 (req arity-req)
169 (opt arity-opt)
170 (rest arity-rest)
171 (kw-indices arity-kw-indices)
172 (allow-other-keys? arity-allow-other-keys?)
173 (low-pc arity-low-pc)
174 (high-pc arity-high-pc set-arity-high-pc!))
175
176 (define-syntax *block-size* (identifier-syntax 32))
177
178 ;;; An assembler collects all of the words emitted during assembly, and
179 ;;; also maintains ancillary information such as the constant table, a
180 ;;; relocation list, and so on.
181 ;;;
182 ;;; Bytecode consists of 32-bit units. We emit bytecode using native
183 ;;; endianness. If we're targeting a foreign endianness, we byte-swap
184 ;;; the bytevector as a whole instead of conditionalizing each access.
185 ;;;
186 (define-record-type <asm>
187 (make-asm cur idx start prev written
188 labels relocs
189 word-size endianness
190 constants inits
191 shstrtab next-section-number
192 meta sources
193 dead-slot-maps)
194 asm?
195
196 ;; We write bytecode into what is logically a growable vector,
197 ;; implemented as a list of blocks. asm-cur is the current block, and
198 ;; asm-idx is the current index into that block, in 32-bit units.
199 ;;
200 (cur asm-cur set-asm-cur!)
201 (idx asm-idx set-asm-idx!)
202
203 ;; asm-start is an absolute position, indicating the offset of the
204 ;; beginning of an instruction (in u32 units). It is updated after
205 ;; writing all the words for one primitive instruction. It models the
206 ;; position of the instruction pointer during execution, given that
207 ;; the VM updates the IP only at the end of executing the instruction,
208 ;; and is thus useful for computing offsets between two points in a
209 ;; program.
210 ;;
211 (start asm-start set-asm-start!)
212
213 ;; The list of previously written blocks.
214 ;;
215 (prev asm-prev set-asm-prev!)
216
217 ;; The number of u32 words written in asm-prev, which is the same as
218 ;; the offset of the current block.
219 ;;
220 (written asm-written set-asm-written!)
221
222 ;; An alist of symbol -> position pairs, indicating the labels defined
223 ;; in this compilation unit.
224 ;;
225 (labels asm-labels set-asm-labels!)
226
227 ;; A list of relocations needed by the program text. We use an
228 ;; internal representation for relocations, and handle textualn
229 ;; relative relocations in the assembler. Other kinds of relocations
230 ;; are later reified as linker relocations and resolved by the linker.
231 ;;
232 (relocs asm-relocs set-asm-relocs!)
233
234 ;; Target information.
235 ;;
236 (word-size asm-word-size)
237 (endianness asm-endianness)
238
239 ;; The constant table, as a vhash of object -> label. All constants
240 ;; get de-duplicated and written into separate sections -- either the
241 ;; .rodata section, for read-only data, or .data, for constants that
242 ;; need initialization at load-time (like symbols). Constants can
243 ;; depend on other constants (e.g. a symbol depending on a stringbuf),
244 ;; so order in this table is important.
245 ;;
246 (constants asm-constants set-asm-constants!)
247
248 ;; A list of instructions needed to initialize the constants. Will
249 ;; run in a thunk with 2 local variables.
250 ;;
251 (inits asm-inits set-asm-inits!)
252
253 ;; The shstrtab, for section names.
254 ;;
255 (shstrtab asm-shstrtab set-asm-shstrtab!)
256
257 ;; The section number for the next section to be written.
258 ;;
259 (next-section-number asm-next-section-number set-asm-next-section-number!)
260
261 ;; A list of <meta>, corresponding to procedure metadata.
262 ;;
263 (meta asm-meta set-asm-meta!)
264
265 ;; A list of (pos . source) pairs, indicating source information. POS
266 ;; is relative to the beginning of the text section, and SOURCE is in
267 ;; the same format that source-properties returns.
268 ;;
269 (sources asm-sources set-asm-sources!)
270
271 ;; A list of (pos . dead-slot-map) pairs, indicating dead slot maps.
272 ;; POS is relative to the beginning of the text section.
273 ;; DEAD-SLOT-MAP is a bitfield of slots that are dead at call sites,
274 ;; as an integer.
275 ;;
276 (dead-slot-maps asm-dead-slot-maps set-asm-dead-slot-maps!))
277
278 (define-inlinable (fresh-block)
279 (make-u32vector *block-size*))
280
281 (define* (make-assembler #:key (word-size (target-word-size))
282 (endianness (target-endianness)))
283 "Create an assembler for a given target @var{word-size} and
284 @var{endianness}, falling back to appropriate values for the configured
285 target."
286 (make-asm (fresh-block) 0 0 '() 0
287 (make-hash-table) '()
288 word-size endianness
289 vlist-null '()
290 (make-string-table) 1
291 '() '() '()))
292
293 (define (intern-section-name! asm string)
294 "Add a string to the section name table (shstrtab)."
295 (string-table-intern! (asm-shstrtab asm) string))
296
297 (define-inlinable (asm-pos asm)
298 "The offset of the next word to be written into the code buffer, in
299 32-bit units."
300 (+ (asm-idx asm) (asm-written asm)))
301
302 (define (allocate-new-block asm)
303 "Close off the current block, and arrange for the next word to be
304 written to a fresh block."
305 (let ((new (fresh-block)))
306 (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
307 (set-asm-written! asm (asm-pos asm))
308 (set-asm-cur! asm new)
309 (set-asm-idx! asm 0)))
310
311 (define-inlinable (emit asm u32)
312 "Emit one 32-bit word into the instruction stream. Assumes that there
313 is space for the word, and ensures that there is space for the next
314 word."
315 (u32-set! (asm-cur asm) (asm-idx asm) u32)
316 (set-asm-idx! asm (1+ (asm-idx asm)))
317 (if (= (asm-idx asm) *block-size*)
318 (allocate-new-block asm)))
319
320 (define-inlinable (make-reloc type label base word)
321 "Make an internal relocation of type @var{type} referencing symbol
322 @var{label}, @var{word} words after position @var{start}. @var{type}
323 may be x8-s24, indicating a 24-bit relative label reference that can be
324 fixed up by the assembler, or s32, indicating a 32-bit relative
325 reference that needs to be fixed up by the linker."
326 (list type label base word))
327
328 (define-inlinable (reset-asm-start! asm)
329 "Reset the asm-start after writing the words for one instruction."
330 (set-asm-start! asm (asm-pos asm)))
331
332 (define (record-label-reference asm label)
333 "Record an x8-s24 local label reference. This value will get patched
334 up later by the assembler."
335 (let* ((start (asm-start asm))
336 (pos (asm-pos asm))
337 (reloc (make-reloc 'x8-s24 label start (- pos start))))
338 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
339
340 (define* (record-far-label-reference asm label #:optional (offset 0))
341 "Record an s32 far label reference. This value will get patched up
342 later by the linker."
343 (let* ((start (- (asm-start asm) offset))
344 (pos (asm-pos asm))
345 (reloc (make-reloc 's32 label start (- pos start))))
346 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
347
348
349 \f
350
351 ;;;
352 ;;; Primitive assemblers are defined by expanding `assembler' for each
353 ;;; opcode in `(instruction-list)'.
354 ;;;
355
356 (eval-when (expand compile load eval)
357 (define (id-append ctx a b)
358 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
359
360 (define-syntax assembler
361 (lambda (x)
362 (define-syntax op-case
363 (lambda (x)
364 (syntax-case x ()
365 ((_ asm name ((type arg ...) code ...) clause ...)
366 #`(if (eq? name 'type)
367 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
368 #'((arg ...)
369 code ...))
370 (op-case asm name clause ...)))
371 ((_ asm name)
372 #'(error "unmatched name" name)))))
373
374 (define (pack-first-word asm opcode type)
375 (with-syntax ((opcode opcode))
376 (op-case
377 asm type
378 ((U8_X24)
379 (emit asm opcode))
380 ((U8_U24 arg)
381 (emit asm (pack-u8-u24 opcode arg)))
382 ((U8_L24 label)
383 (record-label-reference asm label)
384 (emit asm opcode))
385 ((U8_U8_I16 a imm)
386 (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
387 ((U8_U12_U12 a b)
388 (emit asm (pack-u8-u12-u12 opcode a b)))
389 ((U8_U8_U8_U8 a b c)
390 (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
391
392 (define (pack-tail-word asm type)
393 (op-case
394 asm type
395 ((U8_U24 a b)
396 (emit asm (pack-u8-u24 a b)))
397 ((U8_L24 a label)
398 (record-label-reference asm label)
399 (emit asm a))
400 ((U8_U8_I16 a b imm)
401 (emit asm (pack-u8-u8-u16 a b (object-address imm))))
402 ((U8_U12_U12 a b)
403 (emit asm (pack-u8-u12-u12 a b c)))
404 ((U8_U8_U8_U8 a b c d)
405 (emit asm (pack-u8-u8-u8-u8 a b c d)))
406 ((U32 a)
407 (emit asm a))
408 ((I32 imm)
409 (let ((val (object-address imm)))
410 (unless (zero? (ash val -32))
411 (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
412 (emit asm val)))
413 ((A32 imm)
414 (unless (= (asm-word-size asm) 8)
415 (error "make-long-immediate unavailable for this target"))
416 (emit asm (ash (object-address imm) -32))
417 (emit asm (logand (object-address imm) (1- (ash 1 32)))))
418 ((B32))
419 ((N32 label)
420 (record-far-label-reference asm label)
421 (emit asm 0))
422 ((S32 label)
423 (record-far-label-reference asm label)
424 (emit asm 0))
425 ((L32 label)
426 (record-far-label-reference asm label)
427 (emit asm 0))
428 ((LO32 label offset)
429 (record-far-label-reference asm label
430 (* offset (/ (asm-word-size asm) 4)))
431 (emit asm 0))
432 ((X8_U24 a)
433 (emit asm (pack-u8-u24 0 a)))
434 ((X8_U12_U12 a b)
435 (emit asm (pack-u8-u12-u12 0 a b)))
436 ((X8_L24 label)
437 (record-label-reference asm label)
438 (emit asm 0))
439 ((B1_X7_L24 a label)
440 (record-label-reference asm label)
441 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
442 ((B1_U7_L24 a b label)
443 (record-label-reference asm label)
444 (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
445 ((B1_X31 a)
446 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
447 ((B1_X7_U24 a b)
448 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
449
450 (syntax-case x ()
451 ((_ name opcode word0 word* ...)
452 (with-syntax ((((formal0 ...)
453 code0 ...)
454 (pack-first-word #'asm
455 (syntax->datum #'opcode)
456 (syntax->datum #'word0)))
457 ((((formal* ...)
458 code* ...) ...)
459 (map (lambda (word) (pack-tail-word #'asm word))
460 (syntax->datum #'(word* ...)))))
461 #'(lambda (asm formal0 ... formal* ... ...)
462 (unless (asm? asm) (error "not an asm"))
463 code0 ...
464 code* ... ...
465 (reset-asm-start! asm)))))))
466
467 (define assemblers (make-hash-table))
468
469 (define-syntax define-assembler
470 (lambda (x)
471 (syntax-case x ()
472 ((_ name opcode kind arg ...)
473 (with-syntax ((emit (id-append #'name #'emit- #'name)))
474 #'(begin
475 (define emit
476 (let ((emit (assembler name opcode arg ...)))
477 (hashq-set! assemblers 'name emit)
478 emit))
479 (export emit)))))))
480
481 (define-syntax visit-opcodes
482 (lambda (x)
483 (syntax-case x ()
484 ((visit-opcodes macro arg ...)
485 (with-syntax (((inst ...)
486 (map (lambda (x) (datum->syntax #'macro x))
487 (instruction-list))))
488 #'(begin
489 (macro arg ... . inst)
490 ...))))))
491
492 (visit-opcodes define-assembler)
493
494 (define (emit-text asm instructions)
495 "Assemble @var{instructions} using the assembler @var{asm}.
496 @var{instructions} is a sequence of instructions, expressed as a list of
497 lists. This procedure can be called many times before calling
498 @code{link-assembly}."
499 (for-each (lambda (inst)
500 (apply (or (hashq-ref assemblers (car inst))
501 (error 'bad-instruction inst))
502 asm
503 (cdr inst)))
504 instructions))
505
506 \f
507
508 ;;;
509 ;;; The constant table records a topologically sorted set of literal
510 ;;; constants used by a program. For example, a pair uses its car and
511 ;;; cdr, a string uses its stringbuf, etc.
512 ;;;
513 ;;; Some things we want to add to the constant table are not actually
514 ;;; Scheme objects: for example, stringbufs, cache cells for toplevel
515 ;;; references, or cache cells for non-closure procedures. For these we
516 ;;; define special record types and add instances of those record types
517 ;;; to the table.
518 ;;;
519
520 (define-inlinable (immediate? x)
521 "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
522 (not (zero? (logand (object-address x) 6))))
523
524 (define-record-type <stringbuf>
525 (make-stringbuf string)
526 stringbuf?
527 (string stringbuf-string))
528
529 (define-record-type <static-procedure>
530 (make-static-procedure code)
531 static-procedure?
532 (code static-procedure-code))
533
534 (define-record-type <uniform-vector-backing-store>
535 (make-uniform-vector-backing-store bytes element-size)
536 uniform-vector-backing-store?
537 (bytes uniform-vector-backing-store-bytes)
538 (element-size uniform-vector-backing-store-element-size))
539
540 (define-record-type <cache-cell>
541 (make-cache-cell scope key)
542 cache-cell?
543 (scope cache-cell-scope)
544 (key cache-cell-key))
545
546 (define (simple-vector? obj)
547 (and (vector? obj)
548 (equal? (array-shape obj) (list (list 0 (1- (vector-length obj)))))))
549
550 (define (simple-uniform-vector? obj)
551 (and (array? obj)
552 (symbol? (array-type obj))
553 (equal? (array-shape obj) (list (list 0 (1- (array-length obj)))))))
554
555 (define (statically-allocatable? x)
556 "Return @code{#t} if a non-immediate constant can be allocated
557 statically, and @code{#f} if it would need some kind of runtime
558 allocation."
559 (or (pair? x) (string? x) (stringbuf? x) (static-procedure? x) (array? x)))
560
561 (define (intern-constant asm obj)
562 "Add an object to the constant table, and return a label that can be
563 used to reference it. If the object is already present in the constant
564 table, its existing label is used directly."
565 (define (recur obj)
566 (intern-constant asm obj))
567 (define (field dst n obj)
568 (let ((src (recur obj)))
569 (if src
570 (if (statically-allocatable? obj)
571 `((static-patch! ,dst ,n ,src))
572 `((static-ref 1 ,src)
573 (static-set! 1 ,dst ,n)))
574 '())))
575 (define (intern obj label)
576 (cond
577 ((pair? obj)
578 (append (field label 0 (car obj))
579 (field label 1 (cdr obj))))
580 ((simple-vector? obj)
581 (let lp ((i 0) (inits '()))
582 (if (< i (vector-length obj))
583 (lp (1+ i)
584 (append-reverse (field label (1+ i) (vector-ref obj i))
585 inits))
586 (reverse inits))))
587 ((stringbuf? obj) '())
588 ((static-procedure? obj)
589 `((static-patch! ,label 1 ,(static-procedure-code obj))))
590 ((cache-cell? obj) '())
591 ((symbol? obj)
592 `((make-non-immediate 1 ,(recur (symbol->string obj)))
593 (string->symbol 1 1)
594 (static-set! 1 ,label 0)))
595 ((string? obj)
596 `((static-patch! ,label 1 ,(recur (make-stringbuf obj)))))
597 ((keyword? obj)
598 `((static-ref 1 ,(recur (keyword->symbol obj)))
599 (symbol->keyword 1 1)
600 (static-set! 1 ,label 0)))
601 ((number? obj)
602 `((make-non-immediate 1 ,(recur (number->string obj)))
603 (string->number 1 1)
604 (static-set! 1 ,label 0)))
605 ((uniform-vector-backing-store? obj) '())
606 ((simple-uniform-vector? obj)
607 (let ((width (case (array-type obj)
608 ((vu8 u8 s8) 1)
609 ((u16 s16) 2)
610 ;; Bitvectors are addressed in 32-bit units.
611 ;; Although a complex number is 8 or 16 bytes wide,
612 ;; it should be byteswapped in 4 or 8 byte units.
613 ((u32 s32 f32 c32 b) 4)
614 ((u64 s64 f64 c64) 8)
615 (else
616 (error "unhandled array type" obj)))))
617 `((static-patch! ,label 2
618 ,(recur (make-uniform-vector-backing-store
619 (uniform-array->bytevector obj)
620 width))))))
621 (else
622 (error "don't know how to intern" obj))))
623 (cond
624 ((immediate? obj) #f)
625 ((vhash-assoc obj (asm-constants asm)) => cdr)
626 (else
627 ;; Note that calling intern may mutate asm-constants and
628 ;; asm-constant-inits.
629 (let* ((label (gensym "constant"))
630 (inits (intern obj label)))
631 (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
632 (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
633 label))))
634
635 (define (intern-non-immediate asm obj)
636 "Intern a non-immediate into the constant table, and return its
637 label."
638 (when (immediate? obj)
639 (error "expected a non-immediate" obj))
640 (intern-constant asm obj))
641
642 (define (intern-cache-cell asm scope key)
643 "Intern a cache cell into the constant table, and return its label.
644 If there is already a cache cell with the given scope and key, it is
645 returned instead."
646 (intern-constant asm (make-cache-cell scope key)))
647
648 ;; Return the label of the cell that holds the module for a scope.
649 (define (intern-module-cache-cell asm scope)
650 "Intern a cache cell for a module, and return its label."
651 (intern-cache-cell asm scope #t))
652
653
654 \f
655
656 ;;;
657 ;;; Macro assemblers bridge the gap between primitive instructions and
658 ;;; some higher-level operations.
659 ;;;
660
661 (define-syntax define-macro-assembler
662 (lambda (x)
663 (syntax-case x ()
664 ((_ (name arg ...) body body* ...)
665 (with-syntax ((emit (id-append #'name #'emit- #'name)))
666 #'(begin
667 (define emit
668 (let ((emit (lambda (arg ...) body body* ...)))
669 (hashq-set! assemblers 'name emit)
670 emit))
671 (export emit)))))))
672
673 (define-macro-assembler (load-constant asm dst obj)
674 (cond
675 ((immediate? obj)
676 (let ((bits (object-address obj)))
677 (cond
678 ((and (< dst 256) (zero? (ash bits -16)))
679 (emit-make-short-immediate asm dst obj))
680 ((zero? (ash bits -32))
681 (emit-make-long-immediate asm dst obj))
682 (else
683 (emit-make-long-long-immediate asm dst obj)))))
684 ((statically-allocatable? obj)
685 (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
686 (else
687 (emit-static-ref asm dst (intern-non-immediate asm obj)))))
688
689 (define-macro-assembler (load-static-procedure asm dst label)
690 (let ((loc (intern-constant asm (make-static-procedure label))))
691 (emit-make-non-immediate asm dst loc)))
692
693 (define-syntax-rule (define-tc7-macro-assembler name tc7)
694 (define-macro-assembler (name asm slot invert? label)
695 (emit-br-if-tc7 asm slot invert? tc7 label)))
696
697 ;; Keep in sync with tags.h. Part of Guile's ABI. Currently unused
698 ;; macro assemblers are commented out. See also
699 ;; *branching-primcall-arities* in (language cps primitives), the set of
700 ;; macro-instructions in assembly.scm, and
701 ;; disassembler.scm:code-annotation.
702 ;;
703 ;; FIXME: Define all tc7 values in Scheme in one place, derived from
704 ;; tags.h.
705 (define-tc7-macro-assembler br-if-symbol 5)
706 (define-tc7-macro-assembler br-if-variable 7)
707 (define-tc7-macro-assembler br-if-vector 13)
708 ;(define-tc7-macro-assembler br-if-weak-vector 13)
709 (define-tc7-macro-assembler br-if-string 21)
710 ;(define-tc7-macro-assembler br-if-heap-number 23)
711 ;(define-tc7-macro-assembler br-if-stringbuf 39)
712 (define-tc7-macro-assembler br-if-bytevector 77)
713 ;(define-tc7-macro-assembler br-if-pointer 31)
714 ;(define-tc7-macro-assembler br-if-hashtable 29)
715 ;(define-tc7-macro-assembler br-if-fluid 37)
716 ;(define-tc7-macro-assembler br-if-dynamic-state 45)
717 ;(define-tc7-macro-assembler br-if-frame 47)
718 ;(define-tc7-macro-assembler br-if-vm 55)
719 ;(define-tc7-macro-assembler br-if-vm-cont 71)
720 ;(define-tc7-macro-assembler br-if-rtl-program 69)
721 ;(define-tc7-macro-assembler br-if-weak-set 85)
722 ;(define-tc7-macro-assembler br-if-weak-table 87)
723 ;(define-tc7-macro-assembler br-if-array 93)
724 (define-tc7-macro-assembler br-if-bitvector 95)
725 ;(define-tc7-macro-assembler br-if-port 125)
726 ;(define-tc7-macro-assembler br-if-smob 127)
727
728 (define-macro-assembler (begin-program asm label properties)
729 (emit-label asm label)
730 (let ((meta (make-meta label properties (asm-start asm))))
731 (set-asm-meta! asm (cons meta (asm-meta asm)))))
732
733 (define-macro-assembler (end-program asm)
734 (let ((meta (car (asm-meta asm))))
735 (set-meta-high-pc! meta (asm-start asm))
736 (set-meta-arities! meta (reverse (meta-arities meta)))))
737
738 (define-macro-assembler (begin-standard-arity asm req nlocals alternate)
739 (emit-begin-opt-arity asm req '() #f nlocals alternate))
740
741 (define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
742 (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
743
744 (define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
745 allow-other-keys? nlocals alternate)
746 (assert-match req ((? symbol?) ...) "list of symbols")
747 (assert-match opt ((? symbol?) ...) "list of symbols")
748 (assert-match rest (or #f (? symbol?)) "#f or symbol")
749 (assert-match kw-indices (((? keyword?) . (? integer?)) ...)
750 "alist of keyword -> integer")
751 (assert-match allow-other-keys? (? boolean?) "boolean")
752 (assert-match nlocals (? integer?) "integer")
753 (assert-match alternate (or #f (? symbol?)) "#f or symbol")
754 (let* ((meta (car (asm-meta asm)))
755 (arity (make-arity req opt rest kw-indices allow-other-keys?
756 (asm-start asm) #f))
757 ;; The procedure itself is in slot 0, in the standard calling
758 ;; convention. For procedure prologues, nreq includes the
759 ;; procedure, so here we add 1.
760 (nreq (1+ (length req)))
761 (nopt (length opt))
762 (rest? (->bool rest)))
763 (set-meta-arities! meta (cons arity (meta-arities meta)))
764 (cond
765 ((or allow-other-keys? (pair? kw-indices))
766 (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
767 nlocals alternate))
768 ((or rest? (pair? opt))
769 (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
770 (else
771 (emit-standard-prelude asm nreq nlocals alternate)))))
772
773 (define-macro-assembler (end-arity asm)
774 (let ((arity (car (meta-arities (car (asm-meta asm))))))
775 (set-arity-high-pc! arity (asm-start asm))))
776
777 (define-macro-assembler (standard-prelude asm nreq nlocals alternate)
778 (cond
779 (alternate
780 (emit-br-if-nargs-ne asm nreq alternate)
781 (emit-alloc-frame asm nlocals))
782 ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
783 (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
784 (else
785 (emit-assert-nargs-ee asm nreq)
786 (emit-alloc-frame asm nlocals))))
787
788 (define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
789 (if alternate
790 (emit-br-if-nargs-lt asm nreq alternate)
791 (emit-assert-nargs-ge asm nreq))
792 (cond
793 (rest?
794 (emit-bind-rest asm (+ nreq nopt)))
795 (alternate
796 (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
797 (else
798 (emit-assert-nargs-le asm (+ nreq nopt))))
799 (emit-alloc-frame asm nlocals))
800
801 (define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
802 allow-other-keys? nlocals alternate)
803 (if alternate
804 (begin
805 (emit-br-if-nargs-lt asm nreq alternate)
806 (unless rest?
807 (emit-br-if-npos-gt asm nreq (+ nreq nopt) alternate)))
808 (emit-assert-nargs-ge asm nreq))
809 (let ((ntotal (fold (lambda (kw ntotal)
810 (match kw
811 (((? keyword?) . idx)
812 (max (1+ idx) ntotal))))
813 (+ nreq nopt) kw-indices)))
814 ;; FIXME: port 581f410f
815 (emit-bind-kwargs asm nreq
816 (pack-flags allow-other-keys? rest?)
817 (+ nreq nopt)
818 ntotal
819 (intern-constant asm kw-indices))
820 (emit-alloc-frame asm nlocals)))
821
822 (define-macro-assembler (label asm sym)
823 (hashq-set! (asm-labels asm) sym (asm-start asm)))
824
825 (define-macro-assembler (source asm source)
826 (set-asm-sources! asm (acons (asm-start asm) source (asm-sources asm))))
827
828 (define-macro-assembler (cache-current-module! asm module scope)
829 (let ((mod-label (intern-module-cache-cell asm scope)))
830 (emit-static-set! asm module mod-label 0)))
831
832 (define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
833 (let ((sym-label (intern-non-immediate asm sym))
834 (mod-label (intern-module-cache-cell asm scope))
835 (cell-label (intern-cache-cell asm scope sym)))
836 (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
837
838 (define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
839 (let* ((sym-label (intern-non-immediate asm sym))
840 (key (cons public? module-name))
841 (mod-name-label (intern-constant asm key))
842 (cell-label (intern-cache-cell asm key sym)))
843 (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
844
845 (define-macro-assembler (dead-slot-map asm proc-slot dead-slot-map)
846 (unless (zero? dead-slot-map)
847 (set-asm-dead-slot-maps! asm
848 (cons
849 (cons* (asm-start asm) proc-slot dead-slot-map)
850 (asm-dead-slot-maps asm)))))
851
852 \f
853
854 ;;;
855 ;;; Helper for linking objects.
856 ;;;
857
858 (define (make-object asm name bv relocs labels . kwargs)
859 "Make a linker object. This helper handles interning the name in the
860 shstrtab, assigning the size, allocating a fresh index, and defining a
861 corresponding linker symbol for the start of the section."
862 (let ((name-idx (intern-section-name! asm (symbol->string name)))
863 (index (asm-next-section-number asm)))
864 (set-asm-next-section-number! asm (1+ index))
865 (make-linker-object (apply make-elf-section
866 #:index index
867 #:name name-idx
868 #:size (bytevector-length bv)
869 kwargs)
870 bv relocs
871 (cons (make-linker-symbol name 0) labels))))
872
873
874 \f
875
876 ;;;
877 ;;; Linking the constant table. This code is somewhat intertwingled
878 ;;; with the intern-constant code above, as that procedure also
879 ;;; residualizes instructions to initialize constants at load time.
880 ;;;
881
882 (define (write-immediate asm buf pos x)
883 (let ((val (object-address x))
884 (endianness (asm-endianness asm)))
885 (case (asm-word-size asm)
886 ((4) (bytevector-u32-set! buf pos val endianness))
887 ((8) (bytevector-u64-set! buf pos val endianness))
888 (else (error "bad word size" asm)))))
889
890 (define (emit-init-constants asm)
891 "If there is writable data that needs initialization at runtime, emit
892 a procedure to do that and return its label. Otherwise return
893 @code{#f}."
894 (let ((inits (asm-inits asm)))
895 (and (not (null? inits))
896 (let ((label (gensym "init-constants")))
897 (emit-text asm
898 `((begin-program ,label ())
899 (assert-nargs-ee/locals 1 1)
900 ,@(reverse inits)
901 (load-constant 1 ,*unspecified*)
902 (return 1)
903 (end-program)))
904 label))))
905
906 (define (link-data asm data name)
907 "Link the static data for a program into the @var{name} section (which
908 should be .data or .rodata), and return the resulting linker object.
909 @var{data} should be a vhash mapping objects to labels."
910 (define (align address alignment)
911 (+ address
912 (modulo (- alignment (modulo address alignment)) alignment)))
913
914 (define tc7-vector 13)
915 (define stringbuf-shared-flag #x100)
916 (define stringbuf-wide-flag #x400)
917 (define tc7-stringbuf 39)
918 (define tc7-narrow-stringbuf
919 (+ tc7-stringbuf stringbuf-shared-flag))
920 (define tc7-wide-stringbuf
921 (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
922 (define tc7-ro-string (+ 21 #x200))
923 (define tc7-program 69)
924 (define tc7-bytevector 77)
925 (define tc7-bitvector 95)
926
927 (let ((word-size (asm-word-size asm))
928 (endianness (asm-endianness asm)))
929 (define (byte-length x)
930 (cond
931 ((stringbuf? x)
932 (let ((x (stringbuf-string x)))
933 (+ (* 2 word-size)
934 (case (string-bytes-per-char x)
935 ((1) (1+ (string-length x)))
936 ((4) (* (1+ (string-length x)) 4))
937 (else (error "bad string bytes per char" x))))))
938 ((static-procedure? x)
939 (* 2 word-size))
940 ((string? x)
941 (* 4 word-size))
942 ((pair? x)
943 (* 2 word-size))
944 ((simple-vector? x)
945 (* (1+ (vector-length x)) word-size))
946 ((simple-uniform-vector? x)
947 (* 4 word-size))
948 ((uniform-vector-backing-store? x)
949 (bytevector-length (uniform-vector-backing-store-bytes x)))
950 (else
951 word-size)))
952
953 (define (write-constant-reference buf pos x)
954 ;; The asm-inits will fix up any reference to a non-immediate.
955 (write-immediate asm buf pos (if (immediate? x) x #f)))
956
957 (define (write buf pos obj)
958 (cond
959 ((stringbuf? obj)
960 (let* ((x (stringbuf-string obj))
961 (len (string-length x))
962 (tag (if (= (string-bytes-per-char x) 1)
963 tc7-narrow-stringbuf
964 tc7-wide-stringbuf)))
965 (case word-size
966 ((4)
967 (bytevector-u32-set! buf pos tag endianness)
968 (bytevector-u32-set! buf (+ pos 4) len endianness))
969 ((8)
970 (bytevector-u64-set! buf pos tag endianness)
971 (bytevector-u64-set! buf (+ pos 8) len endianness))
972 (else
973 (error "bad word size" asm)))
974 (let ((pos (+ pos (* word-size 2))))
975 (case (string-bytes-per-char x)
976 ((1)
977 (let lp ((i 0))
978 (if (< i len)
979 (let ((u8 (char->integer (string-ref x i))))
980 (bytevector-u8-set! buf (+ pos i) u8)
981 (lp (1+ i)))
982 (bytevector-u8-set! buf (+ pos i) 0))))
983 ((4)
984 (let lp ((i 0))
985 (if (< i len)
986 (let ((u32 (char->integer (string-ref x i))))
987 (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
988 (lp (1+ i)))
989 (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
990 (else (error "bad string bytes per char" x))))))
991
992 ((static-procedure? obj)
993 (case word-size
994 ((4)
995 (bytevector-u32-set! buf pos tc7-program endianness)
996 (bytevector-u32-set! buf (+ pos 4) 0 endianness))
997 ((8)
998 (bytevector-u64-set! buf pos tc7-program endianness)
999 (bytevector-u64-set! buf (+ pos 8) 0 endianness))
1000 (else (error "bad word size"))))
1001
1002 ((cache-cell? obj)
1003 (write-immediate asm buf pos #f))
1004
1005 ((string? obj)
1006 (let ((tag (logior tc7-ro-string (ash (string-length obj) 8))))
1007 (case word-size
1008 ((4)
1009 (bytevector-u32-set! buf pos tc7-ro-string endianness)
1010 (write-immediate asm buf (+ pos 4) #f) ; stringbuf
1011 (bytevector-u32-set! buf (+ pos 8) 0 endianness)
1012 (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
1013 ((8)
1014 (bytevector-u64-set! buf pos tc7-ro-string endianness)
1015 (write-immediate asm buf (+ pos 8) #f) ; stringbuf
1016 (bytevector-u64-set! buf (+ pos 16) 0 endianness)
1017 (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
1018 (else (error "bad word size")))))
1019
1020 ((pair? obj)
1021 (write-constant-reference buf pos (car obj))
1022 (write-constant-reference buf (+ pos word-size) (cdr obj)))
1023
1024 ((simple-vector? obj)
1025 (let* ((len (vector-length obj))
1026 (tag (logior tc7-vector (ash len 8))))
1027 (case word-size
1028 ((4) (bytevector-u32-set! buf pos tag endianness))
1029 ((8) (bytevector-u64-set! buf pos tag endianness))
1030 (else (error "bad word size")))
1031 (let lp ((i 0))
1032 (when (< i (vector-length obj))
1033 (let ((pos (+ pos word-size (* i word-size)))
1034 (elt (vector-ref obj i)))
1035 (write-constant-reference buf pos elt)
1036 (lp (1+ i)))))))
1037
1038 ((symbol? obj)
1039 (write-immediate asm buf pos #f))
1040
1041 ((keyword? obj)
1042 (write-immediate asm buf pos #f))
1043
1044 ((number? obj)
1045 (write-immediate asm buf pos #f))
1046
1047 ((simple-uniform-vector? obj)
1048 (let ((tag (if (bitvector? obj)
1049 tc7-bitvector
1050 (let ((type-code (array-type-code obj)))
1051 (logior tc7-bytevector (ash type-code 7))))))
1052 (case word-size
1053 ((4)
1054 (bytevector-u32-set! buf pos tag endianness)
1055 (bytevector-u32-set! buf (+ pos 4)
1056 (if (bitvector? obj)
1057 (bitvector-length obj)
1058 (bytevector-length obj))
1059 endianness) ; length
1060 (bytevector-u32-set! buf (+ pos 8) 0 endianness) ; pointer
1061 (write-immediate asm buf (+ pos 12) #f)) ; owner
1062 ((8)
1063 (bytevector-u64-set! buf pos tag endianness)
1064 (bytevector-u64-set! buf (+ pos 8)
1065 (if (bitvector? obj)
1066 (bitvector-length obj)
1067 (bytevector-length obj))
1068 endianness) ; length
1069 (bytevector-u64-set! buf (+ pos 16) 0 endianness) ; pointer
1070 (write-immediate asm buf (+ pos 24) #f)) ; owner
1071 (else (error "bad word size")))))
1072
1073 ((uniform-vector-backing-store? obj)
1074 (let ((bv (uniform-vector-backing-store-bytes obj)))
1075 (bytevector-copy! bv 0 buf pos (bytevector-length bv))
1076 (unless (or (= 1 (uniform-vector-backing-store-element-size obj))
1077 (eq? endianness (native-endianness)))
1078 ;; Need to swap units of element-size bytes
1079 (error "FIXME: Implement byte order swap"))))
1080
1081 (else
1082 (error "unrecognized object" obj))))
1083
1084 (cond
1085 ((vlist-null? data) #f)
1086 (else
1087 (let* ((byte-len (vhash-fold (lambda (k v len)
1088 (+ (byte-length k) (align len 8)))
1089 0 data))
1090 (buf (make-bytevector byte-len 0)))
1091 (let lp ((i 0) (pos 0) (symbols '()))
1092 (if (< i (vlist-length data))
1093 (let* ((pair (vlist-ref data i))
1094 (obj (car pair))
1095 (obj-label (cdr pair)))
1096 (write buf pos obj)
1097 (lp (1+ i)
1098 (align (+ (byte-length obj) pos) 8)
1099 (cons (make-linker-symbol obj-label pos) symbols)))
1100 (make-object asm name buf '() symbols
1101 #:flags (match name
1102 ('.data (logior SHF_ALLOC SHF_WRITE))
1103 ('.rodata SHF_ALLOC))))))))))
1104
1105 (define (link-constants asm)
1106 "Link sections to hold constants needed by the program text emitted
1107 using @var{asm}.
1108
1109 Returns three values: an object for the .rodata section, an object for
1110 the .data section, and a label for an initialization procedure. Any of
1111 these may be @code{#f}."
1112 (define (shareable? x)
1113 (cond
1114 ((stringbuf? x) #t)
1115 ((pair? x)
1116 (and (immediate? (car x)) (immediate? (cdr x))))
1117 ((simple-vector? x)
1118 (let lp ((i 0))
1119 (or (= i (vector-length x))
1120 (and (immediate? (vector-ref x i))
1121 (lp (1+ i))))))
1122 ((uniform-vector-backing-store? x) #t)
1123 (else #f)))
1124 (let* ((constants (asm-constants asm))
1125 (len (vlist-length constants)))
1126 (let lp ((i 0)
1127 (ro vlist-null)
1128 (rw vlist-null))
1129 (if (= i len)
1130 (values (link-data asm ro '.rodata)
1131 (link-data asm rw '.data)
1132 (emit-init-constants asm))
1133 (let ((pair (vlist-ref constants i)))
1134 (if (shareable? (car pair))
1135 (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
1136 (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
1137
1138 \f
1139
1140 ;;;
1141 ;;; Linking program text.
1142 ;;;
1143
1144 (define (process-relocs buf relocs labels)
1145 "Patch up internal x8-s24 relocations, and any s32 relocations that
1146 reference symbols in the text section. Return a list of linker
1147 relocations for references to symbols defined outside the text section."
1148 (fold
1149 (lambda (reloc tail)
1150 (match reloc
1151 ((type label base word)
1152 (let ((abs (hashq-ref labels label))
1153 (dst (+ base word)))
1154 (case type
1155 ((s32)
1156 (if abs
1157 (let ((rel (- abs base)))
1158 (s32-set! buf dst rel)
1159 tail)
1160 (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
1161 tail)))
1162 ((x8-s24)
1163 (unless abs
1164 (error "unbound near relocation" reloc))
1165 (let ((rel (- abs base))
1166 (u32 (u32-ref buf dst)))
1167 (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
1168 tail))
1169 (else (error "bad relocation kind" reloc)))))))
1170 '()
1171 relocs))
1172
1173 (define (process-labels labels)
1174 "Define linker symbols for the label-offset map in @var{labels}.
1175 The offsets are expected to be expressed in words."
1176 (hash-map->list (lambda (label loc)
1177 (make-linker-symbol label (* loc 4)))
1178 labels))
1179
1180 (define (swap-bytes! buf)
1181 "Patch up the text buffer @var{buf}, swapping the endianness of each
1182 32-bit unit."
1183 (unless (zero? (modulo (bytevector-length buf) 4))
1184 (error "unexpected length"))
1185 (let ((byte-len (bytevector-length buf)))
1186 (let lp ((pos 0))
1187 (unless (= pos byte-len)
1188 (bytevector-u32-set!
1189 buf pos
1190 (bytevector-u32-ref buf pos (endianness big))
1191 (endianness little))
1192 (lp (+ pos 4))))))
1193
1194 (define (link-text-object asm)
1195 "Link the .rtl-text section, swapping the endianness of the bytes if
1196 needed."
1197 (let ((buf (make-u32vector (asm-pos asm))))
1198 (let lp ((pos 0) (prev (reverse (asm-prev asm))))
1199 (if (null? prev)
1200 (let ((byte-size (* (asm-idx asm) 4)))
1201 (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
1202 (unless (eq? (asm-endianness asm) (native-endianness))
1203 (swap-bytes! buf))
1204 (make-object asm '.rtl-text
1205 buf
1206 (process-relocs buf (asm-relocs asm)
1207 (asm-labels asm))
1208 (process-labels (asm-labels asm))))
1209 (let ((len (* *block-size* 4)))
1210 (bytevector-copy! (car prev) 0 buf pos len)
1211 (lp (+ pos len) (cdr prev)))))))
1212
1213
1214 \f
1215
1216 ;;;
1217 ;;; Create the frame maps. These maps are used by GC to identify dead
1218 ;;; slots in pending call frames, to avoid marking them. We only do
1219 ;;; this when frame makes a non-tail call, as that is the common case.
1220 ;;; Only the topmost frame will see a GC at any other point, but we mark
1221 ;;; top frames conservatively as serializing live slot maps at every
1222 ;;; instruction would take up too much space in the object file.
1223 ;;;
1224
1225 ;; The .guile.frame-maps section starts with two packed u32 values: one
1226 ;; indicating the offset of the first byte of the .rtl-text section, and
1227 ;; another indicating the relative offset in bytes of the slots data.
1228 (define frame-maps-prefix-len 8)
1229
1230 ;; Each header is 8 bytes: 4 for the offset from .rtl_text, and 4 for
1231 ;; the offset of the slot map from the beginning of the
1232 ;; .guile.frame-maps section. The length of a frame map depends on the
1233 ;; frame size at the call site, and is not encoded into this section as
1234 ;; it is available at run-time.
1235 (define frame-map-header-len 8)
1236
1237 (define (link-frame-maps asm)
1238 (define (map-byte-length proc-slot)
1239 (ceiling-quotient (- proc-slot 2) 8))
1240 (define (make-frame-maps maps count map-len)
1241 (let* ((endianness (asm-endianness asm))
1242 (header-pos frame-maps-prefix-len)
1243 (map-pos (+ header-pos (* count frame-map-header-len)))
1244 (bv (make-bytevector (+ map-pos map-len) 0)))
1245 (bytevector-u32-set! bv 4 map-pos endianness)
1246 (let lp ((maps maps) (header-pos header-pos) (map-pos map-pos))
1247 (match maps
1248 (()
1249 (make-object asm '.guile.frame-maps bv
1250 (list (make-linker-reloc 'abs32/1 0 0 '.rtl-text))
1251 '() #:type SHT_PROGBITS #:flags SHF_ALLOC))
1252 (((pos proc-slot . map) . maps)
1253 (bytevector-u32-set! bv header-pos (* pos 4) endianness)
1254 (bytevector-u32-set! bv (+ header-pos 4) map-pos endianness)
1255 (let write-bytes ((map-pos map-pos)
1256 (map map)
1257 (byte-length (map-byte-length proc-slot)))
1258 (if (zero? byte-length)
1259 (lp maps (+ header-pos frame-map-header-len) map-pos)
1260 (begin
1261 (bytevector-u8-set! bv map-pos (logand map #xff))
1262 (write-bytes (1+ map-pos) (ash map -8)
1263 (1- byte-length))))))))))
1264 (match (asm-dead-slot-maps asm)
1265 (() #f)
1266 (in
1267 (let lp ((in in) (out '()) (count 0) (map-len 0))
1268 (match in
1269 (() (make-frame-maps out count map-len))
1270 (((and head (pos proc-slot . map)) . in)
1271 (lp in (cons head out)
1272 (1+ count)
1273 (+ (map-byte-length proc-slot) map-len))))))))
1274
1275 \f
1276
1277 ;;;
1278 ;;; Linking other sections of the ELF file, like the dynamic segment,
1279 ;;; the symbol table, etc.
1280 ;;;
1281
1282 ;; FIXME: Define these somewhere central, shared with C.
1283 (define *bytecode-major-version* #x0202)
1284 (define *bytecode-minor-version* 4)
1285
1286 (define (link-dynamic-section asm text rw rw-init frame-maps)
1287 "Link the dynamic section for an ELF image with bytecode @var{text},
1288 given the writable data section @var{rw} needing fixup from the
1289 procedure with label @var{rw-init}. @var{rw-init} may be false. If
1290 @var{rw} is true, it will be added to the GC roots at runtime."
1291 (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
1292 (let* ((endianness (asm-endianness asm))
1293 (words 6)
1294 (words (if rw (+ words 4) words))
1295 (words (if rw-init (+ words 2) words))
1296 (words (if frame-maps (+ words 2) words))
1297 (bv (make-bytevector (* word-size words) 0))
1298 (set-uword!
1299 (lambda (i uword)
1300 (%set-uword! bv (* i word-size) uword endianness)))
1301 (relocs '())
1302 (set-label!
1303 (lambda (i label)
1304 (set! relocs (cons (make-linker-reloc 'reloc-type
1305 (* i word-size) 0 label)
1306 relocs))
1307 (%set-uword! bv (* i word-size) 0 endianness))))
1308 (set-uword! 0 DT_GUILE_VM_VERSION)
1309 (set-uword! 1 (logior (ash *bytecode-major-version* 16)
1310 *bytecode-minor-version*))
1311 (set-uword! 2 DT_GUILE_ENTRY)
1312 (set-label! 3 '.rtl-text)
1313 (when rw
1314 ;; Add roots to GC.
1315 (set-uword! 4 DT_GUILE_GC_ROOT)
1316 (set-label! 5 '.data)
1317 (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
1318 (set-uword! 7 (bytevector-length (linker-object-bv rw)))
1319 (when rw-init
1320 (set-uword! 8 DT_INIT) ; constants
1321 (set-label! 9 rw-init)))
1322 (when frame-maps
1323 (set-uword! (- words 4) DT_GUILE_FRAME_MAPS)
1324 (set-label! (- words 3) '.guile.frame-maps))
1325 (set-uword! (- words 2) DT_NULL)
1326 (set-uword! (- words 1) 0)
1327 (make-object asm '.dynamic bv relocs '()
1328 #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
1329 (case (asm-word-size asm)
1330 ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
1331 ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
1332 (else (error "bad word size" asm))))
1333
1334 (define (link-shstrtab asm)
1335 "Link the string table for the section headers."
1336 (intern-section-name! asm ".shstrtab")
1337 (make-object asm '.shstrtab
1338 (link-string-table! (asm-shstrtab asm))
1339 '() '()
1340 #:type SHT_STRTAB #:flags 0))
1341
1342 (define (link-symtab text-section asm)
1343 (let* ((endianness (asm-endianness asm))
1344 (word-size (asm-word-size asm))
1345 (size (elf-symbol-len word-size))
1346 (meta (reverse (asm-meta asm)))
1347 (n (length meta))
1348 (strtab (make-string-table))
1349 (bv (make-bytevector (* n size) 0)))
1350 (define (intern-string! name)
1351 (string-table-intern! strtab (if name (symbol->string name) "")))
1352 (for-each
1353 (lambda (meta n)
1354 (let ((name (intern-string! (meta-name meta))))
1355 (write-elf-symbol bv (* n size) endianness word-size
1356 (make-elf-symbol
1357 #:name name
1358 ;; Symbol value and size are measured in
1359 ;; bytes, not u32s.
1360 #:value (* 4 (meta-low-pc meta))
1361 #:size (* 4 (- (meta-high-pc meta)
1362 (meta-low-pc meta)))
1363 #:type STT_FUNC
1364 #:visibility STV_HIDDEN
1365 #:shndx (elf-section-index text-section)))))
1366 meta (iota n))
1367 (let ((strtab (make-object asm '.strtab
1368 (link-string-table! strtab)
1369 '() '()
1370 #:type SHT_STRTAB #:flags 0)))
1371 (values (make-object asm '.symtab
1372 bv
1373 '() '()
1374 #:type SHT_SYMTAB #:flags 0 #:entsize size
1375 #:link (elf-section-index
1376 (linker-object-section strtab)))
1377 strtab))))
1378
1379 ;;; The .guile.arities section describes the arities that a function can
1380 ;;; have. It is in two parts: a sorted array of headers describing
1381 ;;; basic arities, and an array of links out to a string table (and in
1382 ;;; the case of keyword arguments, to the data section) for argument
1383 ;;; names. The whole thing is prefixed by a uint32 indicating the
1384 ;;; offset of the end of the headers array.
1385 ;;;
1386 ;;; The arity headers array is a packed array of structures of the form:
1387 ;;;
1388 ;;; struct arity_header {
1389 ;;; uint32_t low_pc;
1390 ;;; uint32_t high_pc;
1391 ;;; uint32_t offset;
1392 ;;; uint32_t flags;
1393 ;;; uint32_t nreq;
1394 ;;; uint32_t nopt;
1395 ;;; }
1396 ;;;
1397 ;;; All of the offsets and addresses are 32 bits. We can expand in the
1398 ;;; future to use 64-bit offsets if appropriate, but there are other
1399 ;;; aspects of bytecode that constrain us to a total image that fits in
1400 ;;; 32 bits, so for the moment we'll simplify the problem space.
1401 ;;;
1402 ;;; The following flags values are defined:
1403 ;;;
1404 ;;; #x1: has-rest?
1405 ;;; #x2: allow-other-keys?
1406 ;;; #x4: has-keyword-args?
1407 ;;; #x8: is-case-lambda?
1408 ;;; #x10: is-in-case-lambda?
1409 ;;;
1410 ;;; Functions with a single arity specify their number of required and
1411 ;;; optional arguments in nreq and nopt, and do not have the
1412 ;;; is-case-lambda? flag set. Their "offset" member links to an array
1413 ;;; of pointers into the associated .guile.arities.strtab string table,
1414 ;;; identifying the argument names. This offset is relative to the
1415 ;;; start of the .guile.arities section. Links for required arguments
1416 ;;; are first, in order, as uint32 values. Next follow the optionals,
1417 ;;; then the rest link if has-rest? is set, then a link to the "keyword
1418 ;;; indices" literal if has-keyword-args? is set. Unlike the other
1419 ;;; links, the kw-indices link points into the data section, and is
1420 ;;; relative to the ELF image as a whole.
1421 ;;;
1422 ;;; Functions with no arities have no arities information present in the
1423 ;;; .guile.arities section.
1424 ;;;
1425 ;;; Functions with multiple arities are preceded by a header with
1426 ;;; is-case-lambda? set. All other fields are 0, except low-pc and
1427 ;;; high-pc which should be the bounds of the whole function. Headers
1428 ;;; for the individual arities follow, with the is-in-case-lambda? flag
1429 ;;; set. In this way the whole headers array is sorted in increasing
1430 ;;; low-pc order, and case-lambda clauses are contained within the
1431 ;;; [low-pc, high-pc] of the case-lambda header.
1432
1433 ;; Length of the prefix to the arities section, in bytes.
1434 (define arities-prefix-len 4)
1435
1436 ;; Length of an arity header, in bytes.
1437 (define arity-header-len (* 6 4))
1438
1439 ;; The offset of "offset" within arity header, in bytes.
1440 (define arity-header-offset-offset (* 2 4))
1441
1442 (define-syntax-rule (pack-arity-flags has-rest? allow-other-keys?
1443 has-keyword-args? is-case-lambda?
1444 is-in-case-lambda?)
1445 (logior (if has-rest? (ash 1 0) 0)
1446 (if allow-other-keys? (ash 1 1) 0)
1447 (if has-keyword-args? (ash 1 2) 0)
1448 (if is-case-lambda? (ash 1 3) 0)
1449 (if is-in-case-lambda? (ash 1 4) 0)))
1450
1451 (define (meta-arities-size meta)
1452 (define (lambda-size arity)
1453 (+ arity-header-len
1454 (* 4 ;; name pointers
1455 (+ (length (arity-req arity))
1456 (length (arity-opt arity))
1457 (if (arity-rest arity) 1 0)
1458 (if (pair? (arity-kw-indices arity)) 1 0)))))
1459 (define (case-lambda-size arities)
1460 (fold +
1461 arity-header-len ;; case-lambda header
1462 (map lambda-size arities))) ;; the cases
1463 (match (meta-arities meta)
1464 (() 0)
1465 ((arity) (lambda-size arity))
1466 (arities (case-lambda-size arities))))
1467
1468 (define (write-arity-headers metas bv endianness)
1469 (define (write-arity-header* pos low-pc high-pc flags nreq nopt)
1470 (bytevector-u32-set! bv pos (* low-pc 4) endianness)
1471 (bytevector-u32-set! bv (+ pos 4) (* high-pc 4) endianness)
1472 (bytevector-u32-set! bv (+ pos 8) 0 endianness) ; offset
1473 (bytevector-u32-set! bv (+ pos 12) flags endianness)
1474 (bytevector-u32-set! bv (+ pos 16) nreq endianness)
1475 (bytevector-u32-set! bv (+ pos 20) nopt endianness))
1476 (define (write-arity-header pos arity in-case-lambda?)
1477 (write-arity-header* pos (arity-low-pc arity)
1478 (arity-high-pc arity)
1479 (pack-arity-flags (arity-rest arity)
1480 (arity-allow-other-keys? arity)
1481 (pair? (arity-kw-indices arity))
1482 #f
1483 in-case-lambda?)
1484 (length (arity-req arity))
1485 (length (arity-opt arity))))
1486 (let lp ((metas metas) (pos arities-prefix-len) (offsets '()))
1487 (match metas
1488 (()
1489 ;; Fill in the prefix.
1490 (bytevector-u32-set! bv 0 pos endianness)
1491 (values pos (reverse offsets)))
1492 ((meta . metas)
1493 (match (meta-arities meta)
1494 (() (lp metas pos offsets))
1495 ((arity)
1496 (write-arity-header pos arity #f)
1497 (lp metas
1498 (+ pos arity-header-len)
1499 (acons arity (+ pos arity-header-offset-offset) offsets)))
1500 (arities
1501 ;; Write a case-lambda header, then individual arities.
1502 ;; The case-lambda header's offset link is 0.
1503 (write-arity-header* pos (meta-low-pc meta) (meta-high-pc meta)
1504 (pack-arity-flags #f #f #f #t #f) 0 0)
1505 (let lp* ((arities arities) (pos (+ pos arity-header-len))
1506 (offsets offsets))
1507 (match arities
1508 (() (lp metas pos offsets))
1509 ((arity . arities)
1510 (write-arity-header pos arity #t)
1511 (lp* arities
1512 (+ pos arity-header-len)
1513 (acons arity
1514 (+ pos arity-header-offset-offset)
1515 offsets)))))))))))
1516
1517 (define (write-arity-links asm bv pos arity-offset-pairs strtab)
1518 (define (write-symbol sym pos)
1519 (bytevector-u32-set! bv pos
1520 (string-table-intern! strtab (symbol->string sym))
1521 (asm-endianness asm))
1522 (+ pos 4))
1523 (define (write-kw-indices pos kw-indices)
1524 ;; FIXME: Assert that kw-indices is already interned.
1525 (make-linker-reloc 'abs32/1 pos 0
1526 (intern-constant asm kw-indices)))
1527 (let lp ((pos pos) (pairs arity-offset-pairs) (relocs '()))
1528 (match pairs
1529 (()
1530 (unless (= pos (bytevector-length bv))
1531 (error "expected to fully fill the bytevector"
1532 pos (bytevector-length bv)))
1533 relocs)
1534 (((arity . offset) . pairs)
1535 (bytevector-u32-set! bv offset pos (asm-endianness asm))
1536 (let ((pos (fold write-symbol
1537 pos
1538 (append (arity-req arity)
1539 (arity-opt arity)
1540 (cond
1541 ((arity-rest arity) => list)
1542 (else '()))))))
1543 (match (arity-kw-indices arity)
1544 (() (lp pos pairs relocs))
1545 (kw-indices
1546 (lp (+ pos 4)
1547 pairs
1548 (cons (write-kw-indices pos kw-indices) relocs)))))))))
1549
1550 (define (link-arities asm)
1551 (let* ((endianness (asm-endianness asm))
1552 (metas (reverse (asm-meta asm)))
1553 (size (fold (lambda (meta size)
1554 (+ size (meta-arities-size meta)))
1555 arities-prefix-len
1556 metas))
1557 (strtab (make-string-table))
1558 (bv (make-bytevector size 0)))
1559 (let ((kw-indices-relocs
1560 (call-with-values
1561 (lambda ()
1562 (write-arity-headers metas bv endianness))
1563 (lambda (pos arity-offset-pairs)
1564 (write-arity-links asm bv pos arity-offset-pairs strtab)))))
1565 (let ((strtab (make-object asm '.guile.arities.strtab
1566 (link-string-table! strtab)
1567 '() '()
1568 #:type SHT_STRTAB #:flags 0)))
1569 (values (make-object asm '.guile.arities
1570 bv
1571 kw-indices-relocs '()
1572 #:type SHT_PROGBITS #:flags 0
1573 #:link (elf-section-index
1574 (linker-object-section strtab)))
1575 strtab)))))
1576
1577 ;;;
1578 ;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
1579 ;;; values. Pc and str are both 32 bits wide. (Either could change to
1580 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1581 ;;; entry to a program, relative to the start of the text section, in
1582 ;;; bytes, and str is an index into the associated .guile.docstrs.strtab
1583 ;;; string table section.
1584 ;;;
1585
1586 ;; The size of a docstrs entry, in bytes.
1587 (define docstr-size 8)
1588
1589 (define (link-docstrs asm)
1590 (define (find-docstrings)
1591 (filter-map (lambda (meta)
1592 (define (is-documentation? pair)
1593 (eq? (car pair) 'documentation))
1594 (let* ((props (meta-properties meta))
1595 (tail (find-tail is-documentation? props)))
1596 (and tail
1597 (not (find-tail is-documentation? (cdr tail)))
1598 (string? (cdar tail))
1599 (cons (* 4 (meta-low-pc meta)) (cdar tail)))))
1600 (reverse (asm-meta asm))))
1601 (let* ((endianness (asm-endianness asm))
1602 (docstrings (find-docstrings))
1603 (strtab (make-string-table))
1604 (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
1605 (fold (lambda (pair pos)
1606 (match pair
1607 ((pc . string)
1608 (bytevector-u32-set! bv pos pc endianness)
1609 (bytevector-u32-set! bv (+ pos 4)
1610 (string-table-intern! strtab string)
1611 endianness)
1612 (+ pos docstr-size))))
1613 0
1614 docstrings)
1615 (let ((strtab (make-object asm '.guile.docstrs.strtab
1616 (link-string-table! strtab)
1617 '() '()
1618 #:type SHT_STRTAB #:flags 0)))
1619 (values (make-object asm '.guile.docstrs
1620 bv
1621 '() '()
1622 #:type SHT_PROGBITS #:flags 0
1623 #:link (elf-section-index
1624 (linker-object-section strtab)))
1625 strtab))))
1626
1627 ;;;
1628 ;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
1629 ;;; values. Pc and addr are both 32 bits wide. (Either could change to
1630 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1631 ;;; entry to a program, relative to the start of the text section, and
1632 ;;; addr is the address of the associated properties alist, relative to
1633 ;;; the start of the ELF image.
1634 ;;;
1635 ;;; Since procedure properties are stored in the data sections, we need
1636 ;;; to link the procedures property section first. (Note that this
1637 ;;; constraint does not apply to the arities section, which may
1638 ;;; reference the data sections via the kw-indices literal, because
1639 ;;; assembling the text section already makes sure that the kw-indices
1640 ;;; are interned.)
1641 ;;;
1642
1643 ;; The size of a procprops entry, in bytes.
1644 (define procprops-size 8)
1645
1646 (define (link-procprops asm)
1647 (define (assoc-remove-one alist key value-pred)
1648 (match alist
1649 (() '())
1650 ((((? (lambda (x) (eq? x key))) . value) . alist)
1651 (if (value-pred value)
1652 alist
1653 (acons key value alist)))
1654 (((k . v) . alist)
1655 (acons k v (assoc-remove-one alist key value-pred)))))
1656 (define (props-without-name-or-docstring meta)
1657 (assoc-remove-one
1658 (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
1659 'documentation
1660 string?))
1661 (define (find-procprops)
1662 (filter-map (lambda (meta)
1663 (let ((props (props-without-name-or-docstring meta)))
1664 (and (pair? props)
1665 (cons (* 4 (meta-low-pc meta)) props))))
1666 (reverse (asm-meta asm))))
1667 (let* ((endianness (asm-endianness asm))
1668 (procprops (find-procprops))
1669 (bv (make-bytevector (* (length procprops) procprops-size) 0)))
1670 (let lp ((procprops procprops) (pos 0) (relocs '()))
1671 (match procprops
1672 (()
1673 (make-object asm '.guile.procprops
1674 bv
1675 relocs '()
1676 #:type SHT_PROGBITS #:flags 0))
1677 (((pc . props) . procprops)
1678 (bytevector-u32-set! bv pos pc endianness)
1679 (lp procprops
1680 (+ pos procprops-size)
1681 (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
1682 (intern-constant asm props))
1683 relocs)))))))
1684
1685 ;;;
1686 ;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc
1687 ;;; sections provide line number and local variable liveness
1688 ;;; information. Their format is defined by the DWARF
1689 ;;; specifications.
1690 ;;;
1691
1692 (define (asm-language asm)
1693 ;; FIXME: Plumb language through to the assembler.
1694 'scheme)
1695
1696 ;; -> 5 values: .debug_info, .debug_abbrev, .debug_str, .debug_loc, .debug_lines
1697 (define (link-debug asm)
1698 (define (put-s8 port val)
1699 (let ((bv (make-bytevector 1)))
1700 (bytevector-s8-set! bv 0 val)
1701 (put-bytevector port bv)))
1702
1703 (define (put-u16 port val)
1704 (let ((bv (make-bytevector 2)))
1705 (bytevector-u16-set! bv 0 val (asm-endianness asm))
1706 (put-bytevector port bv)))
1707
1708 (define (put-u32 port val)
1709 (let ((bv (make-bytevector 4)))
1710 (bytevector-u32-set! bv 0 val (asm-endianness asm))
1711 (put-bytevector port bv)))
1712
1713 (define (put-u64 port val)
1714 (let ((bv (make-bytevector 8)))
1715 (bytevector-u64-set! bv 0 val (asm-endianness asm))
1716 (put-bytevector port bv)))
1717
1718 (define (put-uleb128 port val)
1719 (let lp ((val val))
1720 (let ((next (ash val -7)))
1721 (if (zero? next)
1722 (put-u8 port val)
1723 (begin
1724 (put-u8 port (logior #x80 (logand val #x7f)))
1725 (lp next))))))
1726
1727 (define (put-sleb128 port val)
1728 (let lp ((val val))
1729 (if (<= 0 (+ val 64) 127)
1730 (put-u8 port (logand val #x7f))
1731 (begin
1732 (put-u8 port (logior #x80 (logand val #x7f)))
1733 (lp (ash val -7))))))
1734
1735 (define (port-position port)
1736 (seek port 0 SEEK_CUR))
1737
1738 (define (meta->subprogram-die meta)
1739 `(subprogram
1740 (@ ,@(cond
1741 ((meta-name meta)
1742 => (lambda (name) `((name ,(symbol->string name)))))
1743 (else
1744 '()))
1745 (low-pc ,(meta-label meta))
1746 (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta)))))))
1747
1748 (define (make-compile-unit-die asm)
1749 `(compile-unit
1750 (@ (producer ,(string-append "Guile " (version)))
1751 (language ,(asm-language asm))
1752 (low-pc .rtl-text)
1753 (high-pc ,(* 4 (asm-pos asm)))
1754 (stmt-list 0))
1755 ,@(map meta->subprogram-die (reverse (asm-meta asm)))))
1756
1757 (let-values (((die-port get-die-bv) (open-bytevector-output-port))
1758 ((die-relocs) '())
1759 ((abbrev-port get-abbrev-bv) (open-bytevector-output-port))
1760 ;; (tag has-kids? attrs forms) -> code
1761 ((abbrevs) vlist-null)
1762 ((strtab) (make-string-table))
1763 ((line-port get-line-bv) (open-bytevector-output-port))
1764 ((line-relocs) '())
1765 ;; file -> code
1766 ((files) vlist-null))
1767
1768 (define (write-abbrev code tag has-children? attrs forms)
1769 (put-uleb128 abbrev-port code)
1770 (put-uleb128 abbrev-port (tag-name->code tag))
1771 (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no)))
1772 (for-each (lambda (attr form)
1773 (put-uleb128 abbrev-port (attribute-name->code attr))
1774 (put-uleb128 abbrev-port (form-name->code form)))
1775 attrs forms)
1776 (put-uleb128 abbrev-port 0)
1777 (put-uleb128 abbrev-port 0))
1778
1779 (define (intern-abbrev tag has-children? attrs forms)
1780 (let ((key (list tag has-children? attrs forms)))
1781 (match (vhash-assoc key abbrevs)
1782 ((_ . code) code)
1783 (#f (let ((code (1+ (vlist-length abbrevs))))
1784 (set! abbrevs (vhash-cons key code abbrevs))
1785 (write-abbrev code tag has-children? attrs forms)
1786 code)))))
1787
1788 (define (intern-file file)
1789 (match (vhash-assoc file files)
1790 ((_ . code) code)
1791 (#f (let ((code (1+ (vlist-length files))))
1792 (set! files (vhash-cons file code files))
1793 code))))
1794
1795 (define (write-sources)
1796 ;; Choose line base and line range values that will allow for an
1797 ;; address advance range of 16 words. The special opcode range is
1798 ;; from 10 to 255, so 246 values.
1799 (define base -4)
1800 (define range 15)
1801
1802 (let lp ((sources (asm-sources asm)) (out '()))
1803 (match sources
1804 (((pc . s) . sources)
1805 (let ((file (assq-ref s 'filename))
1806 (line (assq-ref s 'line))
1807 (col (assq-ref s 'column)))
1808 (lp sources
1809 ;; Guile line and column numbers are 0-indexed, but
1810 ;; they are 1-indexed for DWARF.
1811 (cons (list pc
1812 (if file (intern-file file) 0)
1813 (if line (1+ line))
1814 (if col (1+ col)))
1815 out))))
1816 (()
1817 ;; Compilation unit header for .debug_line. We write in
1818 ;; DWARF 2 format because more tools understand it than DWARF
1819 ;; 4, which incompatibly adds another field to this header.
1820
1821 (put-u32 line-port 0) ; Length; will patch later.
1822 (put-u16 line-port 2) ; DWARF 2 format.
1823 (put-u32 line-port 0) ; Prologue length; will patch later.
1824 (put-u8 line-port 4) ; Minimum instruction length: 4 bytes.
1825 (put-u8 line-port 1) ; Default is-stmt: true.
1826
1827 (put-s8 line-port base) ; Line base. See the DWARF standard.
1828 (put-u8 line-port range) ; Line range. See the DWARF standard.
1829 (put-u8 line-port 10) ; Opcode base: the first "special" opcode.
1830
1831 ;; A table of the number of uleb128 arguments taken by each
1832 ;; of the standard opcodes.
1833 (put-u8 line-port 0) ; 1: copy
1834 (put-u8 line-port 1) ; 2: advance-pc
1835 (put-u8 line-port 1) ; 3: advance-line
1836 (put-u8 line-port 1) ; 4: set-file
1837 (put-u8 line-port 1) ; 5: set-column
1838 (put-u8 line-port 0) ; 6: negate-stmt
1839 (put-u8 line-port 0) ; 7: set-basic-block
1840 (put-u8 line-port 0) ; 8: const-add-pc
1841 (put-u8 line-port 1) ; 9: fixed-advance-pc
1842
1843 ;; Include directories, as a zero-terminated sequence of
1844 ;; nul-terminated strings. Nothing, for the moment.
1845 (put-u8 line-port 0)
1846
1847 ;; File table. For each file that contributes to this
1848 ;; compilation unit, a nul-terminated file name string, and a
1849 ;; uleb128 for each of directory the file was found in, the
1850 ;; modification time, and the file's size in bytes. We pass
1851 ;; zero for the latter three fields.
1852 (vlist-fold-right
1853 (lambda (pair seed)
1854 (match pair
1855 ((file . code)
1856 (put-bytevector line-port (string->utf8 file))
1857 (put-u8 line-port 0)
1858 (put-uleb128 line-port 0) ; directory
1859 (put-uleb128 line-port 0) ; mtime
1860 (put-uleb128 line-port 0))) ; size
1861 seed)
1862 #f
1863 files)
1864 (put-u8 line-port 0) ; 0 byte terminating file list.
1865
1866 ;; Patch prologue length.
1867 (let ((offset (port-position line-port)))
1868 (seek line-port 6 SEEK_SET)
1869 (put-u32 line-port (- offset 10))
1870 (seek line-port offset SEEK_SET))
1871
1872 ;; Now write the statement program.
1873 (let ()
1874 (define (extended-op opcode payload-len)
1875 (put-u8 line-port 0) ; extended op
1876 (put-uleb128 line-port (1+ payload-len)) ; payload-len + opcode
1877 (put-uleb128 line-port opcode))
1878 (define (set-address sym)
1879 (define (add-reloc! kind)
1880 (set! line-relocs
1881 (cons (make-linker-reloc kind
1882 (port-position line-port)
1883 0
1884 sym)
1885 line-relocs)))
1886 (match (asm-word-size asm)
1887 (4
1888 (extended-op 2 4)
1889 (add-reloc! 'abs32/1)
1890 (put-u32 line-port 0))
1891 (8
1892 (extended-op 2 8)
1893 (add-reloc! 'abs64/1)
1894 (put-u64 line-port 0))))
1895 (define (end-sequence pc)
1896 (let ((pc-inc (- (asm-pos asm) pc)))
1897 (put-u8 line-port 2) ; advance-pc
1898 (put-uleb128 line-port pc-inc))
1899 (extended-op 1 0))
1900 (define (advance-pc pc-inc line-inc)
1901 (let ((spec (+ (- line-inc base) (* pc-inc range) 10)))
1902 (cond
1903 ((or (< line-inc base) (>= line-inc (+ base range)))
1904 (advance-line line-inc)
1905 (advance-pc pc-inc 0))
1906 ((<= spec 255)
1907 (put-u8 line-port spec))
1908 ((< spec 500)
1909 (put-u8 line-port 8) ; const-advance-pc
1910 (advance-pc (- pc-inc (floor/ (- 255 10) range))
1911 line-inc))
1912 (else
1913 (put-u8 line-port 2) ; advance-pc
1914 (put-uleb128 line-port pc-inc)
1915 (advance-pc 0 line-inc)))))
1916 (define (advance-line inc)
1917 (put-u8 line-port 3)
1918 (put-sleb128 line-port inc))
1919 (define (set-file file)
1920 (put-u8 line-port 4)
1921 (put-uleb128 line-port file))
1922 (define (set-column col)
1923 (put-u8 line-port 5)
1924 (put-uleb128 line-port col))
1925
1926 (set-address '.rtl-text)
1927
1928 (let lp ((in out) (pc 0) (file 1) (line 1) (col 0))
1929 (match in
1930 (()
1931 (when (null? out)
1932 ;; There was no source info in the first place. Set
1933 ;; file register to 0 before adding final row.
1934 (set-file 0))
1935 (end-sequence pc))
1936 (((pc* file* line* col*) . in*)
1937 (cond
1938 ((and (eqv? file file*) (eqv? line line*) (eqv? col col*))
1939 (lp in* pc file line col))
1940 (else
1941 (unless (eqv? col col*)
1942 (set-column col*))
1943 (unless (eqv? file file*)
1944 (set-file file*))
1945 (advance-pc (- pc* pc) (- line* line))
1946 (lp in* pc* file* line* col*)))))))))))
1947
1948 (define (compute-code attr val)
1949 (match attr
1950 ('name (string-table-intern! strtab val))
1951 ('low-pc val)
1952 ('high-pc val)
1953 ('producer (string-table-intern! strtab val))
1954 ('language (language-name->code val))
1955 ('stmt-list val)))
1956
1957 (define (exact-integer? val)
1958 (and (number? val) (integer? val) (exact? val)))
1959
1960 (define (choose-form attr val code)
1961 (cond
1962 ((string? val) 'strp)
1963 ((eq? attr 'stmt-list) 'sec-offset)
1964 ((exact-integer? code)
1965 (cond
1966 ((< code 0) 'sleb128)
1967 ((<= code #xff) 'data1)
1968 ((<= code #xffff) 'data2)
1969 ((<= code #xffffffff) 'data4)
1970 ((<= code #xffffffffffffffff) 'data8)
1971 (else 'uleb128)))
1972 ((symbol? val) 'addr)
1973 (else (error "unhandled case" attr val code))))
1974
1975 (define (add-die-relocation! kind sym)
1976 (set! die-relocs
1977 (cons (make-linker-reloc kind (port-position die-port) 0 sym)
1978 die-relocs)))
1979
1980 (define (write-value code form)
1981 (match form
1982 ('data1 (put-u8 die-port code))
1983 ('data2 (put-u16 die-port code))
1984 ('data4 (put-u32 die-port code))
1985 ('data8 (put-u64 die-port code))
1986 ('uleb128 (put-uleb128 die-port code))
1987 ('sleb128 (put-sleb128 die-port code))
1988 ('addr
1989 (match (asm-word-size asm)
1990 (4
1991 (add-die-relocation! 'abs32/1 code)
1992 (put-u32 die-port 0))
1993 (8
1994 (add-die-relocation! 'abs64/1 code)
1995 (put-u64 die-port 0))))
1996 ('sec-offset (put-u32 die-port code))
1997 ('strp (put-u32 die-port code))))
1998
1999 (define (write-die die)
2000 (match die
2001 ((tag ('@ (attrs vals) ...) children ...)
2002 (let* ((codes (map compute-code attrs vals))
2003 (forms (map choose-form attrs vals codes))
2004 (has-children? (not (null? children)))
2005 (abbrev-code (intern-abbrev tag has-children? attrs forms)))
2006 (put-uleb128 die-port abbrev-code)
2007 (for-each write-value codes forms)
2008 (when has-children?
2009 (for-each write-die children)
2010 (put-uleb128 die-port 0))))))
2011
2012 ;; Compilation unit header.
2013 (put-u32 die-port 0) ; Length; will patch later.
2014 (put-u16 die-port 4) ; DWARF 4.
2015 (put-u32 die-port 0) ; Abbrevs offset.
2016 (put-u8 die-port (asm-word-size asm)) ; Address size.
2017
2018 (write-die (make-compile-unit-die asm))
2019
2020 ;; Terminate the abbrevs list.
2021 (put-uleb128 abbrev-port 0)
2022
2023 (write-sources)
2024
2025 (values (let ((bv (get-die-bv)))
2026 ;; Patch DWARF32 length.
2027 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
2028 (asm-endianness asm))
2029 (make-object asm '.debug_info bv die-relocs '()
2030 #:type SHT_PROGBITS #:flags 0))
2031 (make-object asm '.debug_abbrev (get-abbrev-bv) '() '()
2032 #:type SHT_PROGBITS #:flags 0)
2033 (make-object asm '.debug_str (link-string-table! strtab) '() '()
2034 #:type SHT_PROGBITS #:flags 0)
2035 (make-object asm '.debug_loc #vu8() '() '()
2036 #:type SHT_PROGBITS #:flags 0)
2037 (let ((bv (get-line-bv)))
2038 ;; Patch DWARF32 length.
2039 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
2040 (asm-endianness asm))
2041 (make-object asm '.debug_line bv line-relocs '()
2042 #:type SHT_PROGBITS #:flags 0)))))
2043
2044 (define (link-objects asm)
2045 (let*-values (;; Link procprops before constants, because it probably
2046 ;; interns more constants.
2047 ((procprops) (link-procprops asm))
2048 ((ro rw rw-init) (link-constants asm))
2049 ;; Link text object after constants, so that the
2050 ;; constants initializer gets included.
2051 ((text) (link-text-object asm))
2052 ((frame-maps) (link-frame-maps asm))
2053 ((dt) (link-dynamic-section asm text rw rw-init frame-maps))
2054 ((symtab strtab) (link-symtab (linker-object-section text) asm))
2055 ((arities arities-strtab) (link-arities asm))
2056 ((docstrs docstrs-strtab) (link-docstrs asm))
2057 ((dinfo dabbrev dstrtab dloc dline) (link-debug asm))
2058 ;; This needs to be linked last, because linking other
2059 ;; sections adds entries to the string table.
2060 ((shstrtab) (link-shstrtab asm)))
2061 (filter identity
2062 (list text ro frame-maps rw dt symtab strtab
2063 arities arities-strtab
2064 docstrs docstrs-strtab procprops
2065 dinfo dabbrev dstrtab dloc dline
2066 shstrtab))))
2067
2068
2069 \f
2070
2071 ;;;
2072 ;;; High-level public interfaces.
2073 ;;;
2074
2075 (define* (link-assembly asm #:key (page-aligned? #t))
2076 "Produce an ELF image from the code and data emitted into @var{asm}.
2077 The result is a bytevector, by default linked so that read-only and
2078 writable data are on separate pages. Pass @code{#:page-aligned? #f} to
2079 disable this behavior."
2080 (link-elf (link-objects asm) #:page-aligned? page-aligned?))