Move assemble-program to test cases.
[bpt/guile.git] / module / system / vm / assembler.scm
1 ;;; Guile RTL assembler
2
3 ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
4 ;;;
5 ;;; This library is free software; you can redistribute it and/or
6 ;;; modify it under the terms of the GNU Lesser General Public
7 ;;; License as published by the Free Software Foundation; either
8 ;;; version 3 of the License, or (at your option) any later version.
9 ;;;
10 ;;; This library is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;; Lesser General Public License for more details.
14 ;;;
15 ;;; You should have received a copy of the GNU Lesser General Public
16 ;;; License along with this library; if not, write to the Free Software
17 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Commentary:
20 ;;;
21 ;;; This module implements an assembler that creates an ELF image from
22 ;;; RTL assembly and macro-assembly. The input can be given in
23 ;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
24 ;;; procedural interface, the emit-OP procedures, but that is not
25 ;;; currently exported.
26 ;;;
27 ;;; "Primitive instructions" correspond to RTL VM operations.
28 ;;; Assemblers for primitive instructions are generated programmatically
29 ;;; from (rtl-instruction-list), which itself is derived from the VM
30 ;;; sources. There are also "macro-instructions" like "label" or
31 ;;; "load-constant" that expand to 0 or more primitive instructions.
32 ;;;
33 ;;; The assembler also handles some higher-level tasks, like creating
34 ;;; the symbol table, other metadata sections, creating a constant table
35 ;;; for the whole compilation unit, and writing the dynamic section of
36 ;;; the ELF file along with the appropriate initialization routines.
37 ;;;
38 ;;; Most compilers will want to use the trio of make-assembler,
39 ;;; emit-text, and link-assembly. That will result in the creation of
40 ;;; an ELF image as a bytevector, which can then be loaded using
41 ;;; load-thunk-from-memory, or written to disk as a .go file.
42 ;;;
43 ;;; Code:
44
45 (define-module (system vm assembler)
46 #:use-module (system base target)
47 #:use-module (system vm instruction)
48 #:use-module (system vm dwarf)
49 #:use-module (system vm elf)
50 #:use-module (system vm linker)
51 #:use-module (rnrs bytevectors)
52 #:use-module (ice-9 binary-ports)
53 #:use-module (ice-9 vlist)
54 #:use-module (ice-9 match)
55 #:use-module (srfi srfi-1)
56 #:use-module (srfi srfi-4)
57 #:use-module (srfi srfi-9)
58 #:use-module (srfi srfi-11)
59 #:export (make-assembler
60 emit-text
61 link-assembly))
62
63
64 \f
65
66 ;;; RTL code consists of 32-bit units, often subdivided in some way.
67 ;;; These helpers create one 32-bit unit from multiple components.
68
69 (define-inlinable (pack-u8-u24 x y)
70 (unless (<= 0 x 255)
71 (error "out of range" x))
72 (logior x (ash y 8)))
73
74 (define-inlinable (pack-u8-s24 x y)
75 (unless (<= 0 x 255)
76 (error "out of range" x))
77 (logior x (ash (cond
78 ((< 0 (- y) #x800000)
79 (+ y #x1000000))
80 ((<= 0 y #xffffff)
81 y)
82 (else (error "out of range" y)))
83 8)))
84
85 (define-inlinable (pack-u1-u7-u24 x y z)
86 (unless (<= 0 x 1)
87 (error "out of range" x))
88 (unless (<= 0 y 127)
89 (error "out of range" y))
90 (logior x (ash y 1) (ash z 8)))
91
92 (define-inlinable (pack-u8-u12-u12 x y z)
93 (unless (<= 0 x 255)
94 (error "out of range" x))
95 (unless (<= 0 y 4095)
96 (error "out of range" y))
97 (logior x (ash y 8) (ash z 20)))
98
99 (define-inlinable (pack-u8-u8-u16 x y z)
100 (unless (<= 0 x 255)
101 (error "out of range" x))
102 (unless (<= 0 y 255)
103 (error "out of range" y))
104 (logior x (ash y 8) (ash z 16)))
105
106 (define-inlinable (pack-u8-u8-u8-u8 x y z w)
107 (unless (<= 0 x 255)
108 (error "out of range" x))
109 (unless (<= 0 y 255)
110 (error "out of range" y))
111 (unless (<= 0 z 255)
112 (error "out of range" z))
113 (logior x (ash y 8) (ash z 16) (ash w 24)))
114
115 (define-syntax pack-flags
116 (syntax-rules ()
117 ;; Add clauses as needed.
118 ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
119 (if f2 (ash 2 0) 0)))))
120
121 ;;; Helpers to read and write 32-bit units in a buffer.
122
123 (define-syntax-rule (u32-ref buf n)
124 (bytevector-u32-native-ref buf (* n 4)))
125
126 (define-syntax-rule (u32-set! buf n val)
127 (bytevector-u32-native-set! buf (* n 4) val))
128
129 (define-syntax-rule (s32-ref buf n)
130 (bytevector-s32-native-ref buf (* n 4)))
131
132 (define-syntax-rule (s32-set! buf n val)
133 (bytevector-s32-native-set! buf (* n 4) val))
134
135
136 \f
137
138 ;;; A <meta> entry collects metadata for one procedure. Procedures are
139 ;;; written as contiguous ranges of RTL code.
140 ;;;
141 (define-syntax-rule (assert-match arg pattern kind)
142 (let ((x arg))
143 (unless (match x (pattern #t) (_ #f))
144 (error (string-append "expected " kind) x))))
145
146 (define-record-type <meta>
147 (%make-meta label properties low-pc high-pc arities)
148 meta?
149 (label meta-label)
150 (properties meta-properties set-meta-properties!)
151 (low-pc meta-low-pc)
152 (high-pc meta-high-pc set-meta-high-pc!)
153 (arities meta-arities set-meta-arities!))
154
155 (define (make-meta label properties low-pc)
156 (assert-match label (? symbol?) "symbol")
157 (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
158 (%make-meta label properties low-pc #f '()))
159
160 (define (meta-name meta)
161 (assq-ref (meta-properties meta) 'name))
162
163 ;; Metadata for one <lambda-case>.
164 (define-record-type <arity>
165 (make-arity req opt rest kw-indices allow-other-keys?
166 low-pc high-pc)
167 arity?
168 (req arity-req)
169 (opt arity-opt)
170 (rest arity-rest)
171 (kw-indices arity-kw-indices)
172 (allow-other-keys? arity-allow-other-keys?)
173 (low-pc arity-low-pc)
174 (high-pc arity-high-pc set-arity-high-pc!))
175
176 (define-syntax *block-size* (identifier-syntax 32))
177
178 ;;; An assembler collects all of the words emitted during assembly, and
179 ;;; also maintains ancillary information such as the constant table, a
180 ;;; relocation list, and so on.
181 ;;;
182 ;;; RTL code consists of 32-bit units. We emit RTL code using native
183 ;;; endianness. If we're targeting a foreign endianness, we byte-swap
184 ;;; the bytevector as a whole instead of conditionalizing each access.
185 ;;;
186 (define-record-type <asm>
187 (make-asm cur idx start prev written
188 labels relocs
189 word-size endianness
190 constants inits
191 shstrtab next-section-number
192 meta sources)
193 asm?
194
195 ;; We write RTL code into what is logically a growable vector,
196 ;; implemented as a list of blocks. asm-cur is the current block, and
197 ;; asm-idx is the current index into that block, in 32-bit units.
198 ;;
199 (cur asm-cur set-asm-cur!)
200 (idx asm-idx set-asm-idx!)
201
202 ;; asm-start is an absolute position, indicating the offset of the
203 ;; beginning of an instruction (in u32 units). It is updated after
204 ;; writing all the words for one primitive instruction. It models the
205 ;; position of the instruction pointer during execution, given that
206 ;; the RTL VM updates the IP only at the end of executing the
207 ;; instruction, and is thus useful for computing offsets between two
208 ;; points in a program.
209 ;;
210 (start asm-start set-asm-start!)
211
212 ;; The list of previously written blocks.
213 ;;
214 (prev asm-prev set-asm-prev!)
215
216 ;; The number of u32 words written in asm-prev, which is the same as
217 ;; the offset of the current block.
218 ;;
219 (written asm-written set-asm-written!)
220
221 ;; An alist of symbol -> position pairs, indicating the labels defined
222 ;; in this compilation unit.
223 ;;
224 (labels asm-labels set-asm-labels!)
225
226 ;; A list of relocations needed by the program text. We use an
227 ;; internal representation for relocations, and handle textualn
228 ;; relative relocations in the assembler. Other kinds of relocations
229 ;; are later reified as linker relocations and resolved by the linker.
230 ;;
231 (relocs asm-relocs set-asm-relocs!)
232
233 ;; Target information.
234 ;;
235 (word-size asm-word-size)
236 (endianness asm-endianness)
237
238 ;; The constant table, as a vhash of object -> label. All constants
239 ;; get de-duplicated and written into separate sections -- either the
240 ;; .rodata section, for read-only data, or .data, for constants that
241 ;; need initialization at load-time (like symbols). Constants can
242 ;; depend on other constants (e.g. a symbol depending on a stringbuf),
243 ;; so order in this table is important.
244 ;;
245 (constants asm-constants set-asm-constants!)
246
247 ;; A list of RTL instructions needed to initialize the constants.
248 ;; Will run in a thunk with 2 local variables.
249 ;;
250 (inits asm-inits set-asm-inits!)
251
252 ;; The shstrtab, for section names.
253 ;;
254 (shstrtab asm-shstrtab set-asm-shstrtab!)
255
256 ;; The section number for the next section to be written.
257 ;;
258 (next-section-number asm-next-section-number set-asm-next-section-number!)
259
260 ;; A list of <meta>, corresponding to procedure metadata.
261 ;;
262 (meta asm-meta set-asm-meta!)
263
264 ;; A list of (pos . source) pairs, indicating source information. POS
265 ;; is relative to the beginning of the text section, and SOURCE is in
266 ;; the same format that source-properties returns.
267 ;;
268 (sources asm-sources set-asm-sources!))
269
270 (define-inlinable (fresh-block)
271 (make-u32vector *block-size*))
272
273 (define* (make-assembler #:key (word-size (target-word-size))
274 (endianness (target-endianness)))
275 "Create an assembler for a given target @var{word-size} and
276 @var{endianness}, falling back to appropriate values for the configured
277 target."
278 (make-asm (fresh-block) 0 0 '() 0
279 (make-hash-table) '()
280 word-size endianness
281 vlist-null '()
282 (make-string-table) 1
283 '() '()))
284
285 (define (intern-section-name! asm string)
286 "Add a string to the section name table (shstrtab)."
287 (string-table-intern! (asm-shstrtab asm) string))
288
289 (define-inlinable (asm-pos asm)
290 "The offset of the next word to be written into the code buffer, in
291 32-bit units."
292 (+ (asm-idx asm) (asm-written asm)))
293
294 (define (allocate-new-block asm)
295 "Close off the current block, and arrange for the next word to be
296 written to a fresh block."
297 (let ((new (fresh-block)))
298 (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
299 (set-asm-written! asm (asm-pos asm))
300 (set-asm-cur! asm new)
301 (set-asm-idx! asm 0)))
302
303 (define-inlinable (emit asm u32)
304 "Emit one 32-bit word into the instruction stream. Assumes that there
305 is space for the word, and ensures that there is space for the next
306 word."
307 (u32-set! (asm-cur asm) (asm-idx asm) u32)
308 (set-asm-idx! asm (1+ (asm-idx asm)))
309 (if (= (asm-idx asm) *block-size*)
310 (allocate-new-block asm)))
311
312 (define-inlinable (make-reloc type label base word)
313 "Make an internal relocation of type @var{type} referencing symbol
314 @var{label}, @var{word} words after position @var{start}. @var{type}
315 may be x8-s24, indicating a 24-bit relative label reference that can be
316 fixed up by the assembler, or s32, indicating a 32-bit relative
317 reference that needs to be fixed up by the linker."
318 (list type label base word))
319
320 (define-inlinable (reset-asm-start! asm)
321 "Reset the asm-start after writing the words for one instruction."
322 (set-asm-start! asm (asm-pos asm)))
323
324 (define (record-label-reference asm label)
325 "Record an x8-s24 local label reference. This value will get patched
326 up later by the assembler."
327 (let* ((start (asm-start asm))
328 (pos (asm-pos asm))
329 (reloc (make-reloc 'x8-s24 label start (- pos start))))
330 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
331
332 (define* (record-far-label-reference asm label #:optional (offset 0))
333 "Record an s32 far label reference. This value will get patched up
334 later by the linker."
335 (let* ((start (- (asm-start asm) offset))
336 (pos (asm-pos asm))
337 (reloc (make-reloc 's32 label start (- pos start))))
338 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
339
340
341 \f
342
343 ;;;
344 ;;; Primitive assemblers are defined by expanding `assembler' for each
345 ;;; opcode in `(rtl-instruction-list)'.
346 ;;;
347
348 (eval-when (expand compile load eval)
349 (define (id-append ctx a b)
350 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
351
352 (define-syntax assembler
353 (lambda (x)
354 (define-syntax op-case
355 (lambda (x)
356 (syntax-case x ()
357 ((_ asm name ((type arg ...) code ...) clause ...)
358 #`(if (eq? name 'type)
359 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
360 #'((arg ...)
361 code ...))
362 (op-case asm name clause ...)))
363 ((_ asm name)
364 #'(error "unmatched name" name)))))
365
366 (define (pack-first-word asm opcode type)
367 (with-syntax ((opcode opcode))
368 (op-case
369 asm type
370 ((U8_X24)
371 (emit asm opcode))
372 ((U8_U24 arg)
373 (emit asm (pack-u8-u24 opcode arg)))
374 ((U8_L24 label)
375 (record-label-reference asm label)
376 (emit asm opcode))
377 ((U8_U8_I16 a imm)
378 (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
379 ((U8_U12_U12 a b)
380 (emit asm (pack-u8-u12-u12 opcode a b)))
381 ((U8_U8_U8_U8 a b c)
382 (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
383
384 (define (pack-tail-word asm type)
385 (op-case
386 asm type
387 ((U8_U24 a b)
388 (emit asm (pack-u8-u24 a b)))
389 ((U8_L24 a label)
390 (record-label-reference asm label)
391 (emit asm a))
392 ((U8_U8_I16 a b imm)
393 (emit asm (pack-u8-u8-u16 a b (object-address imm))))
394 ((U8_U12_U12 a b)
395 (emit asm (pack-u8-u12-u12 a b c)))
396 ((U8_U8_U8_U8 a b c d)
397 (emit asm (pack-u8-u8-u8-u8 a b c d)))
398 ((U32 a)
399 (emit asm a))
400 ((I32 imm)
401 (let ((val (object-address imm)))
402 (unless (zero? (ash val -32))
403 (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
404 (emit asm val)))
405 ((A32 imm)
406 (unless (= (asm-word-size asm) 8)
407 (error "make-long-immediate unavailable for this target"))
408 (emit asm (ash (object-address imm) -32))
409 (emit asm (logand (object-address imm) (1- (ash 1 32)))))
410 ((B32))
411 ((N32 label)
412 (record-far-label-reference asm label)
413 (emit asm 0))
414 ((S32 label)
415 (record-far-label-reference asm label)
416 (emit asm 0))
417 ((L32 label)
418 (record-far-label-reference asm label)
419 (emit asm 0))
420 ((LO32 label offset)
421 (record-far-label-reference asm label
422 (* offset (/ (asm-word-size asm) 4)))
423 (emit asm 0))
424 ((X8_U24 a)
425 (emit asm (pack-u8-u24 0 a)))
426 ((X8_U12_U12 a b)
427 (emit asm (pack-u8-u12-u12 0 a b)))
428 ((X8_L24 label)
429 (record-label-reference asm label)
430 (emit asm 0))
431 ((B1_X7_L24 a label)
432 (record-label-reference asm label)
433 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
434 ((B1_U7_L24 a b label)
435 (record-label-reference asm label)
436 (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
437 ((B1_X31 a)
438 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
439 ((B1_X7_U24 a b)
440 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
441
442 (syntax-case x ()
443 ((_ name opcode word0 word* ...)
444 (with-syntax ((((formal0 ...)
445 code0 ...)
446 (pack-first-word #'asm
447 (syntax->datum #'opcode)
448 (syntax->datum #'word0)))
449 ((((formal* ...)
450 code* ...) ...)
451 (map (lambda (word) (pack-tail-word #'asm word))
452 (syntax->datum #'(word* ...)))))
453 #'(lambda (asm formal0 ... formal* ... ...)
454 (unless (asm? asm) (error "not an asm"))
455 code0 ...
456 code* ... ...
457 (reset-asm-start! asm)))))))
458
459 (define assemblers (make-hash-table))
460
461 (define-syntax define-assembler
462 (lambda (x)
463 (syntax-case x ()
464 ((_ name opcode kind arg ...)
465 (with-syntax ((emit (id-append #'name #'emit- #'name)))
466 #'(begin
467 (define emit
468 (let ((emit (assembler name opcode arg ...)))
469 (hashq-set! assemblers 'name emit)
470 emit))
471 (export emit)))))))
472
473 (define-syntax visit-opcodes
474 (lambda (x)
475 (syntax-case x ()
476 ((visit-opcodes macro arg ...)
477 (with-syntax (((inst ...)
478 (map (lambda (x) (datum->syntax #'macro x))
479 (rtl-instruction-list))))
480 #'(begin
481 (macro arg ... . inst)
482 ...))))))
483
484 (visit-opcodes define-assembler)
485
486 (define (emit-text asm instructions)
487 "Assemble @var{instructions} using the assembler @var{asm}.
488 @var{instructions} is a sequence of RTL instructions, expressed as a
489 list of lists. This procedure can be called many times before calling
490 @code{link-assembly}."
491 (for-each (lambda (inst)
492 (apply (or (hashq-ref assemblers (car inst))
493 (error 'bad-instruction inst))
494 asm
495 (cdr inst)))
496 instructions))
497
498 \f
499
500 ;;;
501 ;;; The constant table records a topologically sorted set of literal
502 ;;; constants used by a program. For example, a pair uses its car and
503 ;;; cdr, a string uses its stringbuf, etc.
504 ;;;
505 ;;; Some things we want to add to the constant table are not actually
506 ;;; Scheme objects: for example, stringbufs, cache cells for toplevel
507 ;;; references, or cache cells for non-closure procedures. For these we
508 ;;; define special record types and add instances of those record types
509 ;;; to the table.
510 ;;;
511
512 (define-inlinable (immediate? x)
513 "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
514 (not (zero? (logand (object-address x) 6))))
515
516 (define-record-type <stringbuf>
517 (make-stringbuf string)
518 stringbuf?
519 (string stringbuf-string))
520
521 (define-record-type <static-procedure>
522 (make-static-procedure code)
523 static-procedure?
524 (code static-procedure-code))
525
526 (define-record-type <uniform-vector-backing-store>
527 (make-uniform-vector-backing-store bytes element-size)
528 uniform-vector-backing-store?
529 (bytes uniform-vector-backing-store-bytes)
530 (element-size uniform-vector-backing-store-element-size))
531
532 (define-record-type <cache-cell>
533 (make-cache-cell scope key)
534 cache-cell?
535 (scope cache-cell-scope)
536 (key cache-cell-key))
537
538 (define (simple-vector? obj)
539 (and (vector? obj)
540 (equal? (array-shape obj) (list (list 0 (1- (vector-length obj)))))))
541
542 (define (simple-uniform-vector? obj)
543 (and (array? obj)
544 (symbol? (array-type obj))
545 (equal? (array-shape obj) (list (list 0 (1- (array-length obj)))))))
546
547 (define (statically-allocatable? x)
548 "Return @code{#t} if a non-immediate constant can be allocated
549 statically, and @code{#f} if it would need some kind of runtime
550 allocation."
551 (or (pair? x) (string? x) (stringbuf? x) (static-procedure? x) (array? x)))
552
553 (define (intern-constant asm obj)
554 "Add an object to the constant table, and return a label that can be
555 used to reference it. If the object is already present in the constant
556 table, its existing label is used directly."
557 (define (recur obj)
558 (intern-constant asm obj))
559 (define (field dst n obj)
560 (let ((src (recur obj)))
561 (if src
562 (if (statically-allocatable? obj)
563 `((static-patch! ,dst ,n ,src))
564 `((static-ref 1 ,src)
565 (static-set! 1 ,dst ,n)))
566 '())))
567 (define (intern obj label)
568 (cond
569 ((pair? obj)
570 (append (field label 0 (car obj))
571 (field label 1 (cdr obj))))
572 ((simple-vector? obj)
573 (let lp ((i 0) (inits '()))
574 (if (< i (vector-length obj))
575 (lp (1+ i)
576 (append-reverse (field label (1+ i) (vector-ref obj i))
577 inits))
578 (reverse inits))))
579 ((stringbuf? obj) '())
580 ((static-procedure? obj)
581 `((static-patch! ,label 1 ,(static-procedure-code obj))))
582 ((cache-cell? obj) '())
583 ((symbol? obj)
584 `((make-non-immediate 1 ,(recur (symbol->string obj)))
585 (string->symbol 1 1)
586 (static-set! 1 ,label 0)))
587 ((string? obj)
588 `((static-patch! ,label 1 ,(recur (make-stringbuf obj)))))
589 ((keyword? obj)
590 `((static-ref 1 ,(recur (keyword->symbol obj)))
591 (symbol->keyword 1 1)
592 (static-set! 1 ,label 0)))
593 ((number? obj)
594 `((make-non-immediate 1 ,(recur (number->string obj)))
595 (string->number 1 1)
596 (static-set! 1 ,label 0)))
597 ((uniform-vector-backing-store? obj) '())
598 ((simple-uniform-vector? obj)
599 `((static-patch! ,label 2
600 ,(recur (make-uniform-vector-backing-store
601 (uniform-array->bytevector obj)
602 (if (bitvector? obj)
603 ;; Bitvectors are addressed in
604 ;; 32-bit units.
605 4
606 (uniform-vector-element-size obj)))))))
607 (else
608 (error "don't know how to intern" obj))))
609 (cond
610 ((immediate? obj) #f)
611 ((vhash-assoc obj (asm-constants asm)) => cdr)
612 (else
613 ;; Note that calling intern may mutate asm-constants and
614 ;; asm-constant-inits.
615 (let* ((label (gensym "constant"))
616 (inits (intern obj label)))
617 (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
618 (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
619 label))))
620
621 (define (intern-non-immediate asm obj)
622 "Intern a non-immediate into the constant table, and return its
623 label."
624 (when (immediate? obj)
625 (error "expected a non-immediate" obj))
626 (intern-constant asm obj))
627
628 (define (intern-cache-cell asm scope key)
629 "Intern a cache cell into the constant table, and return its label.
630 If there is already a cache cell with the given scope and key, it is
631 returned instead."
632 (intern-constant asm (make-cache-cell scope key)))
633
634 ;; Return the label of the cell that holds the module for a scope.
635 (define (intern-module-cache-cell asm scope)
636 "Intern a cache cell for a module, and return its label."
637 (intern-cache-cell asm scope #t))
638
639
640 \f
641
642 ;;;
643 ;;; Macro assemblers bridge the gap between primitive instructions and
644 ;;; some higher-level operations.
645 ;;;
646
647 (define-syntax define-macro-assembler
648 (lambda (x)
649 (syntax-case x ()
650 ((_ (name arg ...) body body* ...)
651 (with-syntax ((emit (id-append #'name #'emit- #'name)))
652 #'(begin
653 (define emit
654 (let ((emit (lambda (arg ...) body body* ...)))
655 (hashq-set! assemblers 'name emit)
656 emit))
657 (export emit)))))))
658
659 (define-macro-assembler (load-constant asm dst obj)
660 (cond
661 ((immediate? obj)
662 (let ((bits (object-address obj)))
663 (cond
664 ((and (< dst 256) (zero? (ash bits -16)))
665 (emit-make-short-immediate asm dst obj))
666 ((zero? (ash bits -32))
667 (emit-make-long-immediate asm dst obj))
668 (else
669 (emit-make-long-long-immediate asm dst obj)))))
670 ((statically-allocatable? obj)
671 (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
672 (else
673 (emit-static-ref asm dst (intern-non-immediate asm obj)))))
674
675 (define-macro-assembler (load-static-procedure asm dst label)
676 (let ((loc (intern-constant asm (make-static-procedure label))))
677 (emit-make-non-immediate asm dst loc)))
678
679 (define-syntax-rule (define-tc7-macro-assembler name tc7)
680 (define-macro-assembler (name asm slot invert? label)
681 (emit-br-if-tc7 asm slot invert? tc7 label)))
682
683 ;; Keep in sync with tags.h. Part of Guile's ABI. Currently unused
684 ;; macro assemblers are commented out. See also
685 ;; *branching-primcall-arities* in (language cps primitives), the set of
686 ;; macro-instructions in assembly.scm, and
687 ;; disassembler.scm:code-annotation.
688 ;;
689 ;; FIXME: Define all tc7 values in Scheme in one place, derived from
690 ;; tags.h.
691 (define-tc7-macro-assembler br-if-symbol 5)
692 (define-tc7-macro-assembler br-if-variable 7)
693 (define-tc7-macro-assembler br-if-vector 13)
694 ;(define-tc7-macro-assembler br-if-weak-vector 13)
695 (define-tc7-macro-assembler br-if-string 21)
696 ;(define-tc7-macro-assembler br-if-heap-number 23)
697 ;(define-tc7-macro-assembler br-if-stringbuf 39)
698 (define-tc7-macro-assembler br-if-bytevector 77)
699 ;(define-tc7-macro-assembler br-if-pointer 31)
700 ;(define-tc7-macro-assembler br-if-hashtable 29)
701 ;(define-tc7-macro-assembler br-if-fluid 37)
702 ;(define-tc7-macro-assembler br-if-dynamic-state 45)
703 ;(define-tc7-macro-assembler br-if-frame 47)
704 ;(define-tc7-macro-assembler br-if-objcode 53)
705 ;(define-tc7-macro-assembler br-if-vm 55)
706 ;(define-tc7-macro-assembler br-if-vm-cont 71)
707 ;(define-tc7-macro-assembler br-if-rtl-program 69)
708 ;(define-tc7-macro-assembler br-if-program 79)
709 ;(define-tc7-macro-assembler br-if-weak-set 85)
710 ;(define-tc7-macro-assembler br-if-weak-table 87)
711 ;(define-tc7-macro-assembler br-if-array 93)
712 (define-tc7-macro-assembler br-if-bitvector 95)
713 ;(define-tc7-macro-assembler br-if-port 125)
714 ;(define-tc7-macro-assembler br-if-smob 127)
715
716 (define-macro-assembler (begin-program asm label properties)
717 (emit-label asm label)
718 (let ((meta (make-meta label properties (asm-start asm))))
719 (set-asm-meta! asm (cons meta (asm-meta asm)))))
720
721 (define-macro-assembler (end-program asm)
722 (let ((meta (car (asm-meta asm))))
723 (set-meta-high-pc! meta (asm-start asm))
724 (set-meta-arities! meta (reverse (meta-arities meta)))))
725
726 (define-macro-assembler (begin-standard-arity asm req nlocals alternate)
727 (emit-begin-opt-arity asm req '() #f nlocals alternate))
728
729 (define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
730 (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
731
732 (define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
733 allow-other-keys? nlocals alternate)
734 (assert-match req ((? symbol?) ...) "list of symbols")
735 (assert-match opt ((? symbol?) ...) "list of symbols")
736 (assert-match rest (or #f (? symbol?)) "#f or symbol")
737 (assert-match kw-indices (((? keyword?) . (? integer?)) ...)
738 "alist of keyword -> integer")
739 (assert-match allow-other-keys? (? boolean?) "boolean")
740 (assert-match nlocals (? integer?) "integer")
741 (assert-match alternate (or #f (? symbol?)) "#f or symbol")
742 (let* ((meta (car (asm-meta asm)))
743 (arity (make-arity req opt rest kw-indices allow-other-keys?
744 (asm-start asm) #f))
745 ;; The procedure itself is in slot 0, in the standard calling
746 ;; convention. For procedure prologues, nreq includes the
747 ;; procedure, so here we add 1.
748 (nreq (1+ (length req)))
749 (nopt (length opt))
750 (rest? (->bool rest)))
751 (set-meta-arities! meta (cons arity (meta-arities meta)))
752 (cond
753 ((or allow-other-keys? (pair? kw-indices))
754 (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
755 nlocals alternate))
756 ((or rest? (pair? opt))
757 (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
758 (else
759 (emit-standard-prelude asm nreq nlocals alternate)))))
760
761 (define-macro-assembler (end-arity asm)
762 (let ((arity (car (meta-arities (car (asm-meta asm))))))
763 (set-arity-high-pc! arity (asm-start asm))))
764
765 (define-macro-assembler (standard-prelude asm nreq nlocals alternate)
766 (cond
767 (alternate
768 (emit-br-if-nargs-ne asm nreq alternate)
769 (emit-alloc-frame asm nlocals))
770 ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
771 (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
772 (else
773 (emit-assert-nargs-ee asm nreq)
774 (emit-alloc-frame asm nlocals))))
775
776 (define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
777 (if alternate
778 (emit-br-if-nargs-lt asm nreq alternate)
779 (emit-assert-nargs-ge asm nreq))
780 (cond
781 (rest?
782 (emit-bind-rest asm (+ nreq nopt)))
783 (alternate
784 (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
785 (else
786 (emit-assert-nargs-le asm (+ nreq nopt))))
787 (emit-alloc-frame asm nlocals))
788
789 (define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
790 allow-other-keys? nlocals alternate)
791 (if alternate
792 (begin
793 (emit-br-if-nargs-lt asm nreq alternate)
794 (unless rest?
795 (emit-br-if-npos-gt asm nreq (+ nreq nopt) alternate)))
796 (emit-assert-nargs-ge asm nreq))
797 (let ((ntotal (fold (lambda (kw ntotal)
798 (match kw
799 (((? keyword?) . idx)
800 (max (1+ idx) ntotal))))
801 (+ nreq nopt) kw-indices)))
802 ;; FIXME: port 581f410f
803 (emit-bind-kwargs asm nreq
804 (pack-flags allow-other-keys? rest?)
805 (+ nreq nopt)
806 ntotal
807 (intern-constant asm kw-indices))
808 (emit-alloc-frame asm nlocals)))
809
810 (define-macro-assembler (label asm sym)
811 (hashq-set! (asm-labels asm) sym (asm-start asm)))
812
813 (define-macro-assembler (source asm source)
814 (set-asm-sources! asm (acons (asm-start asm) source (asm-sources asm))))
815
816 (define-macro-assembler (cache-current-module! asm module scope)
817 (let ((mod-label (intern-module-cache-cell asm scope)))
818 (emit-static-set! asm module mod-label 0)))
819
820 (define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
821 (let ((sym-label (intern-non-immediate asm sym))
822 (mod-label (intern-module-cache-cell asm scope))
823 (cell-label (intern-cache-cell asm scope sym)))
824 (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
825
826 (define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
827 (let* ((sym-label (intern-non-immediate asm sym))
828 (key (cons public? module-name))
829 (mod-name-label (intern-constant asm key))
830 (cell-label (intern-cache-cell asm key sym)))
831 (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
832
833
834 \f
835
836 ;;;
837 ;;; Helper for linking objects.
838 ;;;
839
840 (define (make-object asm name bv relocs labels . kwargs)
841 "Make a linker object. This helper handles interning the name in the
842 shstrtab, assigning the size, allocating a fresh index, and defining a
843 corresponding linker symbol for the start of the section."
844 (let ((name-idx (intern-section-name! asm (symbol->string name)))
845 (index (asm-next-section-number asm)))
846 (set-asm-next-section-number! asm (1+ index))
847 (make-linker-object (apply make-elf-section
848 #:index index
849 #:name name-idx
850 #:size (bytevector-length bv)
851 kwargs)
852 bv relocs
853 (cons (make-linker-symbol name 0) labels))))
854
855
856 \f
857
858 ;;;
859 ;;; Linking the constant table. This code is somewhat intertwingled
860 ;;; with the intern-constant code above, as that procedure also
861 ;;; residualizes instructions to initialize constants at load time.
862 ;;;
863
864 (define (write-immediate asm buf pos x)
865 (let ((val (object-address x))
866 (endianness (asm-endianness asm)))
867 (case (asm-word-size asm)
868 ((4) (bytevector-u32-set! buf pos val endianness))
869 ((8) (bytevector-u64-set! buf pos val endianness))
870 (else (error "bad word size" asm)))))
871
872 (define (emit-init-constants asm)
873 "If there is writable data that needs initialization at runtime, emit
874 a procedure to do that and return its label. Otherwise return
875 @code{#f}."
876 (let ((inits (asm-inits asm)))
877 (and (not (null? inits))
878 (let ((label (gensym "init-constants")))
879 (emit-text asm
880 `((begin-program ,label ())
881 (assert-nargs-ee/locals 1 1)
882 ,@(reverse inits)
883 (load-constant 1 ,*unspecified*)
884 (return 1)
885 (end-program)))
886 label))))
887
888 (define (link-data asm data name)
889 "Link the static data for a program into the @var{name} section (which
890 should be .data or .rodata), and return the resulting linker object.
891 @var{data} should be a vhash mapping objects to labels."
892 (define (align address alignment)
893 (+ address
894 (modulo (- alignment (modulo address alignment)) alignment)))
895
896 (define tc7-vector 13)
897 (define stringbuf-shared-flag #x100)
898 (define stringbuf-wide-flag #x400)
899 (define tc7-stringbuf 39)
900 (define tc7-narrow-stringbuf
901 (+ tc7-stringbuf stringbuf-shared-flag))
902 (define tc7-wide-stringbuf
903 (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
904 (define tc7-ro-string (+ 21 #x200))
905 (define tc7-rtl-program 69)
906 (define tc7-bytevector 77)
907 (define tc7-bitvector 95)
908
909 (let ((word-size (asm-word-size asm))
910 (endianness (asm-endianness asm)))
911 (define (byte-length x)
912 (cond
913 ((stringbuf? x)
914 (let ((x (stringbuf-string x)))
915 (+ (* 2 word-size)
916 (case (string-bytes-per-char x)
917 ((1) (1+ (string-length x)))
918 ((4) (* (1+ (string-length x)) 4))
919 (else (error "bad string bytes per char" x))))))
920 ((static-procedure? x)
921 (* 2 word-size))
922 ((string? x)
923 (* 4 word-size))
924 ((pair? x)
925 (* 2 word-size))
926 ((simple-vector? x)
927 (* (1+ (vector-length x)) word-size))
928 ((simple-uniform-vector? x)
929 (* 4 word-size))
930 ((uniform-vector-backing-store? x)
931 (bytevector-length (uniform-vector-backing-store-bytes x)))
932 (else
933 word-size)))
934
935 (define (write-constant-reference buf pos x)
936 ;; The asm-inits will fix up any reference to a non-immediate.
937 (write-immediate asm buf pos (if (immediate? x) x #f)))
938
939 (define (write buf pos obj)
940 (cond
941 ((stringbuf? obj)
942 (let* ((x (stringbuf-string obj))
943 (len (string-length x))
944 (tag (if (= (string-bytes-per-char x) 1)
945 tc7-narrow-stringbuf
946 tc7-wide-stringbuf)))
947 (case word-size
948 ((4)
949 (bytevector-u32-set! buf pos tag endianness)
950 (bytevector-u32-set! buf (+ pos 4) len endianness))
951 ((8)
952 (bytevector-u64-set! buf pos tag endianness)
953 (bytevector-u64-set! buf (+ pos 8) len endianness))
954 (else
955 (error "bad word size" asm)))
956 (let ((pos (+ pos (* word-size 2))))
957 (case (string-bytes-per-char x)
958 ((1)
959 (let lp ((i 0))
960 (if (< i len)
961 (let ((u8 (char->integer (string-ref x i))))
962 (bytevector-u8-set! buf (+ pos i) u8)
963 (lp (1+ i)))
964 (bytevector-u8-set! buf (+ pos i) 0))))
965 ((4)
966 (let lp ((i 0))
967 (if (< i len)
968 (let ((u32 (char->integer (string-ref x i))))
969 (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
970 (lp (1+ i)))
971 (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
972 (else (error "bad string bytes per char" x))))))
973
974 ((static-procedure? obj)
975 (case word-size
976 ((4)
977 (bytevector-u32-set! buf pos tc7-rtl-program endianness)
978 (bytevector-u32-set! buf (+ pos 4) 0 endianness))
979 ((8)
980 (bytevector-u64-set! buf pos tc7-rtl-program endianness)
981 (bytevector-u64-set! buf (+ pos 8) 0 endianness))
982 (else (error "bad word size"))))
983
984 ((cache-cell? obj)
985 (write-immediate asm buf pos #f))
986
987 ((string? obj)
988 (let ((tag (logior tc7-ro-string (ash (string-length obj) 8))))
989 (case word-size
990 ((4)
991 (bytevector-u32-set! buf pos tc7-ro-string endianness)
992 (write-immediate asm buf (+ pos 4) #f) ; stringbuf
993 (bytevector-u32-set! buf (+ pos 8) 0 endianness)
994 (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
995 ((8)
996 (bytevector-u64-set! buf pos tc7-ro-string endianness)
997 (write-immediate asm buf (+ pos 8) #f) ; stringbuf
998 (bytevector-u64-set! buf (+ pos 16) 0 endianness)
999 (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
1000 (else (error "bad word size")))))
1001
1002 ((pair? obj)
1003 (write-constant-reference buf pos (car obj))
1004 (write-constant-reference buf (+ pos word-size) (cdr obj)))
1005
1006 ((simple-vector? obj)
1007 (let* ((len (vector-length obj))
1008 (tag (logior tc7-vector (ash len 8))))
1009 (case word-size
1010 ((4) (bytevector-u32-set! buf pos tag endianness))
1011 ((8) (bytevector-u64-set! buf pos tag endianness))
1012 (else (error "bad word size")))
1013 (let lp ((i 0))
1014 (when (< i (vector-length obj))
1015 (let ((pos (+ pos word-size (* i word-size)))
1016 (elt (vector-ref obj i)))
1017 (write-constant-reference buf pos elt)
1018 (lp (1+ i)))))))
1019
1020 ((symbol? obj)
1021 (write-immediate asm buf pos #f))
1022
1023 ((keyword? obj)
1024 (write-immediate asm buf pos #f))
1025
1026 ((number? obj)
1027 (write-immediate asm buf pos #f))
1028
1029 ((simple-uniform-vector? obj)
1030 (let ((tag (if (bitvector? obj)
1031 tc7-bitvector
1032 (let ((type-code (uniform-vector-element-type-code obj)))
1033 (logior tc7-bytevector (ash type-code 7))))))
1034 (case word-size
1035 ((4)
1036 (bytevector-u32-set! buf pos tag endianness)
1037 (bytevector-u32-set! buf (+ pos 4)
1038 (if (bitvector? obj)
1039 (bitvector-length obj)
1040 (bytevector-length obj))
1041 endianness) ; length
1042 (bytevector-u32-set! buf (+ pos 8) 0 endianness) ; pointer
1043 (write-immediate asm buf (+ pos 12) #f)) ; owner
1044 ((8)
1045 (bytevector-u64-set! buf pos tag endianness)
1046 (bytevector-u64-set! buf (+ pos 8)
1047 (if (bitvector? obj)
1048 (bitvector-length obj)
1049 (bytevector-length obj))
1050 endianness) ; length
1051 (bytevector-u64-set! buf (+ pos 16) 0 endianness) ; pointer
1052 (write-immediate asm buf (+ pos 24) #f)) ; owner
1053 (else (error "bad word size")))))
1054
1055 ((uniform-vector-backing-store? obj)
1056 (let ((bv (uniform-vector-backing-store-bytes obj)))
1057 (bytevector-copy! bv 0 buf pos (bytevector-length bv))
1058 (unless (or (= 1 (uniform-vector-backing-store-element-size obj))
1059 (eq? endianness (native-endianness)))
1060 ;; Need to swap units of element-size bytes
1061 (error "FIXME: Implement byte order swap"))))
1062
1063 (else
1064 (error "unrecognized object" obj))))
1065
1066 (cond
1067 ((vlist-null? data) #f)
1068 (else
1069 (let* ((byte-len (vhash-fold (lambda (k v len)
1070 (+ (byte-length k) (align len 8)))
1071 0 data))
1072 (buf (make-bytevector byte-len 0)))
1073 (let lp ((i 0) (pos 0) (symbols '()))
1074 (if (< i (vlist-length data))
1075 (let* ((pair (vlist-ref data i))
1076 (obj (car pair))
1077 (obj-label (cdr pair)))
1078 (write buf pos obj)
1079 (lp (1+ i)
1080 (align (+ (byte-length obj) pos) 8)
1081 (cons (make-linker-symbol obj-label pos) symbols)))
1082 (make-object asm name buf '() symbols
1083 #:flags (match name
1084 ('.data (logior SHF_ALLOC SHF_WRITE))
1085 ('.rodata SHF_ALLOC))))))))))
1086
1087 (define (link-constants asm)
1088 "Link sections to hold constants needed by the program text emitted
1089 using @var{asm}.
1090
1091 Returns three values: an object for the .rodata section, an object for
1092 the .data section, and a label for an initialization procedure. Any of
1093 these may be @code{#f}."
1094 (define (shareable? x)
1095 (cond
1096 ((stringbuf? x) #t)
1097 ((pair? x)
1098 (and (immediate? (car x)) (immediate? (cdr x))))
1099 ((simple-vector? x)
1100 (let lp ((i 0))
1101 (or (= i (vector-length x))
1102 (and (immediate? (vector-ref x i))
1103 (lp (1+ i))))))
1104 ((uniform-vector-backing-store? x) #t)
1105 (else #f)))
1106 (let* ((constants (asm-constants asm))
1107 (len (vlist-length constants)))
1108 (let lp ((i 0)
1109 (ro vlist-null)
1110 (rw vlist-null))
1111 (if (= i len)
1112 (values (link-data asm ro '.rodata)
1113 (link-data asm rw '.data)
1114 (emit-init-constants asm))
1115 (let ((pair (vlist-ref constants i)))
1116 (if (shareable? (car pair))
1117 (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
1118 (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
1119
1120 \f
1121
1122 ;;;
1123 ;;; Linking program text.
1124 ;;;
1125
1126 (define (process-relocs buf relocs labels)
1127 "Patch up internal x8-s24 relocations, and any s32 relocations that
1128 reference symbols in the text section. Return a list of linker
1129 relocations for references to symbols defined outside the text section."
1130 (fold
1131 (lambda (reloc tail)
1132 (match reloc
1133 ((type label base word)
1134 (let ((abs (hashq-ref labels label))
1135 (dst (+ base word)))
1136 (case type
1137 ((s32)
1138 (if abs
1139 (let ((rel (- abs base)))
1140 (s32-set! buf dst rel)
1141 tail)
1142 (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
1143 tail)))
1144 ((x8-s24)
1145 (unless abs
1146 (error "unbound near relocation" reloc))
1147 (let ((rel (- abs base))
1148 (u32 (u32-ref buf dst)))
1149 (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
1150 tail))
1151 (else (error "bad relocation kind" reloc)))))))
1152 '()
1153 relocs))
1154
1155 (define (process-labels labels)
1156 "Define linker symbols for the label-offset map in @var{labels}.
1157 The offsets are expected to be expressed in words."
1158 (hash-map->list (lambda (label loc)
1159 (make-linker-symbol label (* loc 4)))
1160 labels))
1161
1162 (define (swap-bytes! buf)
1163 "Patch up the text buffer @var{buf}, swapping the endianness of each
1164 32-bit unit."
1165 (unless (zero? (modulo (bytevector-length buf) 4))
1166 (error "unexpected length"))
1167 (let ((byte-len (bytevector-length buf)))
1168 (let lp ((pos 0))
1169 (unless (= pos byte-len)
1170 (bytevector-u32-set!
1171 buf pos
1172 (bytevector-u32-ref buf pos (endianness big))
1173 (endianness little))
1174 (lp (+ pos 4))))))
1175
1176 (define (link-text-object asm)
1177 "Link the .rtl-text section, swapping the endianness of the bytes if
1178 needed."
1179 (let ((buf (make-u32vector (asm-pos asm))))
1180 (let lp ((pos 0) (prev (reverse (asm-prev asm))))
1181 (if (null? prev)
1182 (let ((byte-size (* (asm-idx asm) 4)))
1183 (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
1184 (unless (eq? (asm-endianness asm) (native-endianness))
1185 (swap-bytes! buf))
1186 (make-object asm '.rtl-text
1187 buf
1188 (process-relocs buf (asm-relocs asm)
1189 (asm-labels asm))
1190 (process-labels (asm-labels asm))))
1191 (let ((len (* *block-size* 4)))
1192 (bytevector-copy! (car prev) 0 buf pos len)
1193 (lp (+ pos len) (cdr prev)))))))
1194
1195
1196 \f
1197
1198 ;;;
1199 ;;; Linking other sections of the ELF file, like the dynamic segment,
1200 ;;; the symbol table, etc.
1201 ;;;
1202
1203 (define (link-dynamic-section asm text rw rw-init)
1204 "Link the dynamic section for an ELF image with RTL text, given the
1205 writable data section @var{rw} needing fixup from the procedure with
1206 label @var{rw-init}. @var{rw-init} may be false. If @var{rw} is true,
1207 it will be added to the GC roots at runtime."
1208 (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
1209 (let* ((endianness (asm-endianness asm))
1210 (bv (make-bytevector (* word-size (if rw (if rw-init 12 10) 6)) 0))
1211 (set-uword!
1212 (lambda (i uword)
1213 (%set-uword! bv (* i word-size) uword endianness)))
1214 (relocs '())
1215 (set-label!
1216 (lambda (i label)
1217 (set! relocs (cons (make-linker-reloc 'reloc-type
1218 (* i word-size) 0 label)
1219 relocs))
1220 (%set-uword! bv (* i word-size) 0 endianness))))
1221 (set-uword! 0 DT_GUILE_RTL_VERSION)
1222 (set-uword! 1 #x02020000)
1223 (set-uword! 2 DT_GUILE_ENTRY)
1224 (set-label! 3 '.rtl-text)
1225 (cond
1226 (rw
1227 ;; Add roots to GC.
1228 (set-uword! 4 DT_GUILE_GC_ROOT)
1229 (set-label! 5 '.data)
1230 (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
1231 (set-uword! 7 (bytevector-length (linker-object-bv rw)))
1232 (cond
1233 (rw-init
1234 (set-uword! 8 DT_INIT) ; constants
1235 (set-label! 9 rw-init)
1236 (set-uword! 10 DT_NULL)
1237 (set-uword! 11 0))
1238 (else
1239 (set-uword! 8 DT_NULL)
1240 (set-uword! 9 0))))
1241 (else
1242 (set-uword! 4 DT_NULL)
1243 (set-uword! 5 0)))
1244 (make-object asm '.dynamic bv relocs '()
1245 #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
1246 (case (asm-word-size asm)
1247 ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
1248 ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
1249 (else (error "bad word size" asm))))
1250
1251 (define (link-shstrtab asm)
1252 "Link the string table for the section headers."
1253 (intern-section-name! asm ".shstrtab")
1254 (make-object asm '.shstrtab
1255 (link-string-table! (asm-shstrtab asm))
1256 '() '()
1257 #:type SHT_STRTAB #:flags 0))
1258
1259 (define (link-symtab text-section asm)
1260 (let* ((endianness (asm-endianness asm))
1261 (word-size (asm-word-size asm))
1262 (size (elf-symbol-len word-size))
1263 (meta (reverse (asm-meta asm)))
1264 (n (length meta))
1265 (strtab (make-string-table))
1266 (bv (make-bytevector (* n size) 0)))
1267 (define (intern-string! name)
1268 (string-table-intern! strtab (if name (symbol->string name) "")))
1269 (for-each
1270 (lambda (meta n)
1271 (let ((name (intern-string! (meta-name meta))))
1272 (write-elf-symbol bv (* n size) endianness word-size
1273 (make-elf-symbol
1274 #:name name
1275 ;; Symbol value and size are measured in
1276 ;; bytes, not u32s.
1277 #:value (* 4 (meta-low-pc meta))
1278 #:size (* 4 (- (meta-high-pc meta)
1279 (meta-low-pc meta)))
1280 #:type STT_FUNC
1281 #:visibility STV_HIDDEN
1282 #:shndx (elf-section-index text-section)))))
1283 meta (iota n))
1284 (let ((strtab (make-object asm '.strtab
1285 (link-string-table! strtab)
1286 '() '()
1287 #:type SHT_STRTAB #:flags 0)))
1288 (values (make-object asm '.symtab
1289 bv
1290 '() '()
1291 #:type SHT_SYMTAB #:flags 0 #:entsize size
1292 #:link (elf-section-index
1293 (linker-object-section strtab)))
1294 strtab))))
1295
1296 ;;; The .guile.arities section describes the arities that a function can
1297 ;;; have. It is in two parts: a sorted array of headers describing
1298 ;;; basic arities, and an array of links out to a string table (and in
1299 ;;; the case of keyword arguments, to the data section) for argument
1300 ;;; names. The whole thing is prefixed by a uint32 indicating the
1301 ;;; offset of the end of the headers array.
1302 ;;;
1303 ;;; The arity headers array is a packed array of structures of the form:
1304 ;;;
1305 ;;; struct arity_header {
1306 ;;; uint32_t low_pc;
1307 ;;; uint32_t high_pc;
1308 ;;; uint32_t offset;
1309 ;;; uint32_t flags;
1310 ;;; uint32_t nreq;
1311 ;;; uint32_t nopt;
1312 ;;; }
1313 ;;;
1314 ;;; All of the offsets and addresses are 32 bits. We can expand in the
1315 ;;; future to use 64-bit offsets if appropriate, but there are other
1316 ;;; aspects of RTL that constrain us to a total image that fits in 32
1317 ;;; bits, so for the moment we'll simplify the problem space.
1318 ;;;
1319 ;;; The following flags values are defined:
1320 ;;;
1321 ;;; #x1: has-rest?
1322 ;;; #x2: allow-other-keys?
1323 ;;; #x4: has-keyword-args?
1324 ;;; #x8: is-case-lambda?
1325 ;;;
1326 ;;; Functions with a single arity specify their number of required and
1327 ;;; optional arguments in nreq and nopt, and do not have the
1328 ;;; is-case-lambda? flag set. Their "offset" member links to an array
1329 ;;; of pointers into the associated .guile.arities.strtab string table,
1330 ;;; identifying the argument names. This offset is relative to the
1331 ;;; start of the .guile.arities section. Links for required arguments
1332 ;;; are first, in order, as uint32 values. Next follow the optionals,
1333 ;;; then the rest link if has-rest? is set, then a link to the "keyword
1334 ;;; indices" literal if has-keyword-args? is set. Unlike the other
1335 ;;; links, the kw-indices link points into the data section, and is
1336 ;;; relative to the ELF image as a whole.
1337 ;;;
1338 ;;; Functions with no arities have no arities information present in the
1339 ;;; .guile.arities section.
1340 ;;;
1341 ;;; Functions with multiple arities are preceded by a header with
1342 ;;; is-case-lambda? set. All other fields are 0, except low-pc and
1343 ;;; high-pc which should be the bounds of the whole function. Headers
1344 ;;; for the individual arities follow. In this way the whole headers
1345 ;;; array is sorted in increasing low-pc order, and case-lambda clauses
1346 ;;; are contained within the [low-pc, high-pc] of the case-lambda
1347 ;;; header.
1348
1349 ;; Length of the prefix to the arities section, in bytes.
1350 (define arities-prefix-len 4)
1351
1352 ;; Length of an arity header, in bytes.
1353 (define arity-header-len (* 6 4))
1354
1355 ;; The offset of "offset" within arity header, in bytes.
1356 (define arity-header-offset-offset (* 2 4))
1357
1358 (define-syntax-rule (pack-arity-flags has-rest? allow-other-keys?
1359 has-keyword-args? is-case-lambda?)
1360 (logior (if has-rest? (ash 1 0) 0)
1361 (if allow-other-keys? (ash 1 1) 0)
1362 (if has-keyword-args? (ash 1 2) 0)
1363 (if is-case-lambda? (ash 1 3) 0)))
1364
1365 (define (meta-arities-size meta)
1366 (define (lambda-size arity)
1367 (+ arity-header-len
1368 (* 4 ;; name pointers
1369 (+ (length (arity-req arity))
1370 (length (arity-opt arity))
1371 (if (arity-rest arity) 1 0)
1372 (if (pair? (arity-kw-indices arity)) 1 0)))))
1373 (define (case-lambda-size arities)
1374 (fold +
1375 arity-header-len ;; case-lambda header
1376 (map lambda-size arities))) ;; the cases
1377 (match (meta-arities meta)
1378 (() 0)
1379 ((arity) (lambda-size arity))
1380 (arities (case-lambda-size arities))))
1381
1382 (define (write-arity-headers metas bv endianness)
1383 (define (write-arity-header* pos low-pc high-pc flags nreq nopt)
1384 (bytevector-u32-set! bv pos (* low-pc 4) endianness)
1385 (bytevector-u32-set! bv (+ pos 4) (* high-pc 4) endianness)
1386 (bytevector-u32-set! bv (+ pos 8) 0 endianness) ; offset
1387 (bytevector-u32-set! bv (+ pos 12) flags endianness)
1388 (bytevector-u32-set! bv (+ pos 16) nreq endianness)
1389 (bytevector-u32-set! bv (+ pos 20) nopt endianness))
1390 (define (write-arity-header pos arity)
1391 (write-arity-header* pos (arity-low-pc arity)
1392 (arity-high-pc arity)
1393 (pack-arity-flags (arity-rest arity)
1394 (arity-allow-other-keys? arity)
1395 (pair? (arity-kw-indices arity))
1396 #f)
1397 (length (arity-req arity))
1398 (length (arity-opt arity))))
1399 (let lp ((metas metas) (pos arities-prefix-len) (offsets '()))
1400 (match metas
1401 (()
1402 ;; Fill in the prefix.
1403 (bytevector-u32-set! bv 0 pos endianness)
1404 (values pos (reverse offsets)))
1405 ((meta . metas)
1406 (match (meta-arities meta)
1407 (() (lp metas pos offsets))
1408 ((arity)
1409 (write-arity-header pos arity)
1410 (lp metas
1411 (+ pos arity-header-len)
1412 (acons arity (+ pos arity-header-offset-offset) offsets)))
1413 (arities
1414 ;; Write a case-lambda header, then individual arities.
1415 ;; The case-lambda header's offset link is 0.
1416 (write-arity-header* pos (meta-low-pc meta) (meta-high-pc meta)
1417 (pack-arity-flags #f #f #f #t) 0 0)
1418 (let lp* ((arities arities) (pos (+ pos arity-header-len))
1419 (offsets offsets))
1420 (match arities
1421 (() (lp metas pos offsets))
1422 ((arity . arities)
1423 (write-arity-header pos arity)
1424 (lp* arities
1425 (+ pos arity-header-len)
1426 (acons arity
1427 (+ pos arity-header-offset-offset)
1428 offsets)))))))))))
1429
1430 (define (write-arity-links asm bv pos arity-offset-pairs strtab)
1431 (define (write-symbol sym pos)
1432 (bytevector-u32-set! bv pos
1433 (string-table-intern! strtab (symbol->string sym))
1434 (asm-endianness asm))
1435 (+ pos 4))
1436 (define (write-kw-indices pos kw-indices)
1437 ;; FIXME: Assert that kw-indices is already interned.
1438 (make-linker-reloc 'abs32/1 pos 0
1439 (intern-constant asm kw-indices)))
1440 (let lp ((pos pos) (pairs arity-offset-pairs) (relocs '()))
1441 (match pairs
1442 (()
1443 (unless (= pos (bytevector-length bv))
1444 (error "expected to fully fill the bytevector"
1445 pos (bytevector-length bv)))
1446 relocs)
1447 (((arity . offset) . pairs)
1448 (bytevector-u32-set! bv offset pos (asm-endianness asm))
1449 (let ((pos (fold write-symbol
1450 pos
1451 (append (arity-req arity)
1452 (arity-opt arity)
1453 (cond
1454 ((arity-rest arity) => list)
1455 (else '()))))))
1456 (match (arity-kw-indices arity)
1457 (() (lp pos pairs relocs))
1458 (kw-indices
1459 (lp (+ pos 4)
1460 pairs
1461 (cons (write-kw-indices pos kw-indices) relocs)))))))))
1462
1463 (define (link-arities asm)
1464 (let* ((endianness (asm-endianness asm))
1465 (metas (reverse (asm-meta asm)))
1466 (size (fold (lambda (meta size)
1467 (+ size (meta-arities-size meta)))
1468 arities-prefix-len
1469 metas))
1470 (strtab (make-string-table))
1471 (bv (make-bytevector size 0)))
1472 (let ((kw-indices-relocs
1473 (call-with-values
1474 (lambda ()
1475 (write-arity-headers metas bv endianness))
1476 (lambda (pos arity-offset-pairs)
1477 (write-arity-links asm bv pos arity-offset-pairs strtab)))))
1478 (let ((strtab (make-object asm '.guile.arities.strtab
1479 (link-string-table! strtab)
1480 '() '()
1481 #:type SHT_STRTAB #:flags 0)))
1482 (values (make-object asm '.guile.arities
1483 bv
1484 kw-indices-relocs '()
1485 #:type SHT_PROGBITS #:flags 0
1486 #:link (elf-section-index
1487 (linker-object-section strtab)))
1488 strtab)))))
1489
1490 ;;;
1491 ;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
1492 ;;; values. Pc and str are both 32 bits wide. (Either could change to
1493 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1494 ;;; entry to a program, relative to the start of the text section, in
1495 ;;; bytes, and str is an index into the associated .guile.docstrs.strtab
1496 ;;; string table section.
1497 ;;;
1498
1499 ;; The size of a docstrs entry, in bytes.
1500 (define docstr-size 8)
1501
1502 (define (link-docstrs asm)
1503 (define (find-docstrings)
1504 (filter-map (lambda (meta)
1505 (define (is-documentation? pair)
1506 (eq? (car pair) 'documentation))
1507 (let* ((props (meta-properties meta))
1508 (tail (find-tail is-documentation? props)))
1509 (and tail
1510 (not (find-tail is-documentation? (cdr tail)))
1511 (string? (cdar tail))
1512 (cons (* 4 (meta-low-pc meta)) (cdar tail)))))
1513 (reverse (asm-meta asm))))
1514 (let* ((endianness (asm-endianness asm))
1515 (docstrings (find-docstrings))
1516 (strtab (make-string-table))
1517 (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
1518 (fold (lambda (pair pos)
1519 (match pair
1520 ((pc . string)
1521 (bytevector-u32-set! bv pos pc endianness)
1522 (bytevector-u32-set! bv (+ pos 4)
1523 (string-table-intern! strtab string)
1524 endianness)
1525 (+ pos docstr-size))))
1526 0
1527 docstrings)
1528 (let ((strtab (make-object asm '.guile.docstrs.strtab
1529 (link-string-table! strtab)
1530 '() '()
1531 #:type SHT_STRTAB #:flags 0)))
1532 (values (make-object asm '.guile.docstrs
1533 bv
1534 '() '()
1535 #:type SHT_PROGBITS #:flags 0
1536 #:link (elf-section-index
1537 (linker-object-section strtab)))
1538 strtab))))
1539
1540 ;;;
1541 ;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
1542 ;;; values. Pc and addr are both 32 bits wide. (Either could change to
1543 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1544 ;;; entry to a program, relative to the start of the text section, and
1545 ;;; addr is the address of the associated properties alist, relative to
1546 ;;; the start of the ELF image.
1547 ;;;
1548 ;;; Since procedure properties are stored in the data sections, we need
1549 ;;; to link the procedures property section first. (Note that this
1550 ;;; constraint does not apply to the arities section, which may
1551 ;;; reference the data sections via the kw-indices literal, because
1552 ;;; assembling the text section already makes sure that the kw-indices
1553 ;;; are interned.)
1554 ;;;
1555
1556 ;; The size of a procprops entry, in bytes.
1557 (define procprops-size 8)
1558
1559 (define (link-procprops asm)
1560 (define (assoc-remove-one alist key value-pred)
1561 (match alist
1562 (() '())
1563 ((((? (lambda (x) (eq? x key))) . value) . alist)
1564 (if (value-pred value)
1565 alist
1566 (acons key value alist)))
1567 (((k . v) . alist)
1568 (acons k v (assoc-remove-one alist key value-pred)))))
1569 (define (props-without-name-or-docstring meta)
1570 (assoc-remove-one
1571 (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
1572 'documentation
1573 string?))
1574 (define (find-procprops)
1575 (filter-map (lambda (meta)
1576 (let ((props (props-without-name-or-docstring meta)))
1577 (and (pair? props)
1578 (cons (meta-low-pc meta) props))))
1579 (reverse (asm-meta asm))))
1580 (let* ((endianness (asm-endianness asm))
1581 (procprops (find-procprops))
1582 (bv (make-bytevector (* (length procprops) procprops-size) 0)))
1583 (let lp ((procprops procprops) (pos 0) (relocs '()))
1584 (match procprops
1585 (()
1586 (make-object asm '.guile.procprops
1587 bv
1588 relocs '()
1589 #:type SHT_PROGBITS #:flags 0))
1590 (((pc . props) . procprops)
1591 (bytevector-u32-set! bv pos pc endianness)
1592 (lp procprops
1593 (+ pos procprops-size)
1594 (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
1595 (intern-constant asm props))
1596 relocs)))))))
1597
1598 ;;;
1599 ;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc
1600 ;;; sections provide line number and local variable liveness
1601 ;;; information. Their format is defined by the DWARF
1602 ;;; specifications.
1603 ;;;
1604
1605 (define (asm-language asm)
1606 ;; FIXME: Plumb language through to the assembler.
1607 'scheme)
1608
1609 ;; -> 5 values: .debug_info, .debug_abbrev, .debug_str, .debug_loc, .debug_lines
1610 (define (link-debug asm)
1611 (define (put-s8 port val)
1612 (let ((bv (make-bytevector 1)))
1613 (bytevector-s8-set! bv 0 val)
1614 (put-bytevector port bv)))
1615
1616 (define (put-u16 port val)
1617 (let ((bv (make-bytevector 2)))
1618 (bytevector-u16-set! bv 0 val (asm-endianness asm))
1619 (put-bytevector port bv)))
1620
1621 (define (put-u32 port val)
1622 (let ((bv (make-bytevector 4)))
1623 (bytevector-u32-set! bv 0 val (asm-endianness asm))
1624 (put-bytevector port bv)))
1625
1626 (define (put-u64 port val)
1627 (let ((bv (make-bytevector 8)))
1628 (bytevector-u64-set! bv 0 val (asm-endianness asm))
1629 (put-bytevector port bv)))
1630
1631 (define (put-uleb128 port val)
1632 (let lp ((val val))
1633 (let ((next (ash val -7)))
1634 (if (zero? next)
1635 (put-u8 port val)
1636 (begin
1637 (put-u8 port (logior #x80 (logand val #x7f)))
1638 (lp next))))))
1639
1640 (define (put-sleb128 port val)
1641 (let lp ((val val))
1642 (if (<= 0 (+ val 64) 127)
1643 (put-u8 port (logand val #x7f))
1644 (begin
1645 (put-u8 port (logior #x80 (logand val #x7f)))
1646 (lp (ash val -7))))))
1647
1648 (define (port-position port)
1649 (seek port 0 SEEK_CUR))
1650
1651 (define (meta->subprogram-die meta)
1652 `(subprogram
1653 (@ ,@(cond
1654 ((meta-name meta)
1655 => (lambda (name) `((name ,(symbol->string name)))))
1656 (else
1657 '()))
1658 (low-pc ,(meta-label meta))
1659 (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta)))))))
1660
1661 (define (make-compile-unit-die asm)
1662 `(compile-unit
1663 (@ (producer ,(string-append "Guile " (version)))
1664 (language ,(asm-language asm))
1665 (low-pc .rtl-text)
1666 (high-pc ,(* 4 (asm-pos asm)))
1667 (stmt-list 0))
1668 ,@(map meta->subprogram-die (reverse (asm-meta asm)))))
1669
1670 (let-values (((die-port get-die-bv) (open-bytevector-output-port))
1671 ((die-relocs) '())
1672 ((abbrev-port get-abbrev-bv) (open-bytevector-output-port))
1673 ;; (tag has-kids? attrs forms) -> code
1674 ((abbrevs) vlist-null)
1675 ((strtab) (make-string-table))
1676 ((line-port get-line-bv) (open-bytevector-output-port))
1677 ((line-relocs) '())
1678 ;; file -> code
1679 ((files) vlist-null))
1680
1681 (define (write-abbrev code tag has-children? attrs forms)
1682 (put-uleb128 abbrev-port code)
1683 (put-uleb128 abbrev-port (tag-name->code tag))
1684 (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no)))
1685 (for-each (lambda (attr form)
1686 (put-uleb128 abbrev-port (attribute-name->code attr))
1687 (put-uleb128 abbrev-port (form-name->code form)))
1688 attrs forms)
1689 (put-uleb128 abbrev-port 0)
1690 (put-uleb128 abbrev-port 0))
1691
1692 (define (intern-abbrev tag has-children? attrs forms)
1693 (let ((key (list tag has-children? attrs forms)))
1694 (match (vhash-assoc key abbrevs)
1695 ((_ . code) code)
1696 (#f (let ((code (1+ (vlist-length abbrevs))))
1697 (set! abbrevs (vhash-cons key code abbrevs))
1698 (write-abbrev code tag has-children? attrs forms)
1699 code)))))
1700
1701 (define (intern-file file)
1702 (match (vhash-assoc file files)
1703 ((_ . code) code)
1704 (#f (let ((code (1+ (vlist-length files))))
1705 (set! files (vhash-cons file code files))
1706 code))))
1707
1708 (define (write-sources)
1709 ;; Choose line base and line range values that will allow for an
1710 ;; address advance range of 16 words. The special opcode range is
1711 ;; from 10 to 255, so 246 values.
1712 (define base -4)
1713 (define range 15)
1714
1715 (let lp ((sources (asm-sources asm)) (out '()))
1716 (match sources
1717 (((pc . s) . sources)
1718 (let ((file (assq-ref s 'filename))
1719 (line (assq-ref s 'line))
1720 (col (assq-ref s 'column)))
1721 (lp sources
1722 ;; Guile line and column numbers are 0-indexed, but
1723 ;; they are 1-indexed for DWARF.
1724 (cons (list pc
1725 (if file (intern-file file) 0)
1726 (if line (1+ line))
1727 (if col (1+ col)))
1728 out))))
1729 (()
1730 ;; Compilation unit header for .debug_line. We write in
1731 ;; DWARF 2 format because more tools understand it than DWARF
1732 ;; 4, which incompatibly adds another field to this header.
1733
1734 (put-u32 line-port 0) ; Length; will patch later.
1735 (put-u16 line-port 2) ; DWARF 2 format.
1736 (put-u32 line-port 0) ; Prologue length; will patch later.
1737 (put-u8 line-port 4) ; Minimum instruction length: 4 bytes.
1738 (put-u8 line-port 1) ; Default is-stmt: true.
1739
1740 (put-s8 line-port base) ; Line base. See the DWARF standard.
1741 (put-u8 line-port range) ; Line range. See the DWARF standard.
1742 (put-u8 line-port 10) ; Opcode base: the first "special" opcode.
1743
1744 ;; A table of the number of uleb128 arguments taken by each
1745 ;; of the standard opcodes.
1746 (put-u8 line-port 0) ; 1: copy
1747 (put-u8 line-port 1) ; 2: advance-pc
1748 (put-u8 line-port 1) ; 3: advance-line
1749 (put-u8 line-port 1) ; 4: set-file
1750 (put-u8 line-port 1) ; 5: set-column
1751 (put-u8 line-port 0) ; 6: negate-stmt
1752 (put-u8 line-port 0) ; 7: set-basic-block
1753 (put-u8 line-port 0) ; 8: const-add-pc
1754 (put-u8 line-port 1) ; 9: fixed-advance-pc
1755
1756 ;; Include directories, as a zero-terminated sequence of
1757 ;; nul-terminated strings. Nothing, for the moment.
1758 (put-u8 line-port 0)
1759
1760 ;; File table. For each file that contributes to this
1761 ;; compilation unit, a nul-terminated file name string, and a
1762 ;; uleb128 for each of directory the file was found in, the
1763 ;; modification time, and the file's size in bytes. We pass
1764 ;; zero for the latter three fields.
1765 (vlist-fold-right
1766 (lambda (pair seed)
1767 (match pair
1768 ((file . code)
1769 (put-bytevector line-port (string->utf8 file))
1770 (put-u8 line-port 0)
1771 (put-uleb128 line-port 0) ; directory
1772 (put-uleb128 line-port 0) ; mtime
1773 (put-uleb128 line-port 0))) ; size
1774 seed)
1775 #f
1776 files)
1777 (put-u8 line-port 0) ; 0 byte terminating file list.
1778
1779 ;; Patch prologue length.
1780 (let ((offset (port-position line-port)))
1781 (seek line-port 6 SEEK_SET)
1782 (put-u32 line-port (- offset 10))
1783 (seek line-port offset SEEK_SET))
1784
1785 ;; Now write the statement program.
1786 (let ()
1787 (define (extended-op opcode payload-len)
1788 (put-u8 line-port 0) ; extended op
1789 (put-uleb128 line-port (1+ payload-len)) ; payload-len + opcode
1790 (put-uleb128 line-port opcode))
1791 (define (set-address sym)
1792 (define (add-reloc! kind)
1793 (set! line-relocs
1794 (cons (make-linker-reloc kind
1795 (port-position line-port)
1796 0
1797 sym)
1798 line-relocs)))
1799 (match (asm-word-size asm)
1800 (4
1801 (extended-op 2 4)
1802 (add-reloc! 'abs32/1)
1803 (put-u32 line-port 0))
1804 (8
1805 (extended-op 2 8)
1806 (add-reloc! 'abs64/1)
1807 (put-u64 line-port 0))))
1808 (define (end-sequence pc)
1809 (let ((pc-inc (- (asm-pos asm) pc)))
1810 (put-u8 line-port 2) ; advance-pc
1811 (put-uleb128 line-port pc-inc))
1812 (extended-op 1 0))
1813 (define (advance-pc pc-inc line-inc)
1814 (let ((spec (+ (- line-inc base) (* pc-inc range) 10)))
1815 (cond
1816 ((or (< line-inc base) (>= line-inc (+ base range)))
1817 (advance-line line-inc)
1818 (advance-pc pc-inc 0))
1819 ((<= spec 255)
1820 (put-u8 line-port spec))
1821 ((< spec 500)
1822 (put-u8 line-port 8) ; const-advance-pc
1823 (advance-pc (- pc-inc (floor/ (- 255 10) range))
1824 line-inc))
1825 (else
1826 (put-u8 line-port 2) ; advance-pc
1827 (put-uleb128 line-port pc-inc)
1828 (advance-pc 0 line-inc)))))
1829 (define (advance-line inc)
1830 (put-u8 line-port 3)
1831 (put-sleb128 line-port inc))
1832 (define (set-file file)
1833 (put-u8 line-port 4)
1834 (put-uleb128 line-port file))
1835 (define (set-column col)
1836 (put-u8 line-port 5)
1837 (put-uleb128 line-port col))
1838
1839 (set-address '.rtl-text)
1840
1841 (let lp ((in out) (pc 0) (file 1) (line 1) (col 0))
1842 (match in
1843 (()
1844 (when (null? out)
1845 ;; There was no source info in the first place. Set
1846 ;; file register to 0 before adding final row.
1847 (set-file 0))
1848 (end-sequence pc))
1849 (((pc* file* line* col*) . in*)
1850 (cond
1851 ((and (eqv? file file*) (eqv? line line*) (eqv? col col*))
1852 (lp in* pc file line col))
1853 (else
1854 (unless (eqv? col col*)
1855 (set-column col*))
1856 (unless (eqv? file file*)
1857 (set-file file*))
1858 (advance-pc (- pc* pc) (- line* line))
1859 (lp in* pc* file* line* col*)))))))))))
1860
1861 (define (compute-code attr val)
1862 (match attr
1863 ('name (string-table-intern! strtab val))
1864 ('low-pc val)
1865 ('high-pc val)
1866 ('producer (string-table-intern! strtab val))
1867 ('language (language-name->code val))
1868 ('stmt-list val)))
1869
1870 (define (exact-integer? val)
1871 (and (number? val) (integer? val) (exact? val)))
1872
1873 (define (choose-form attr val code)
1874 (cond
1875 ((string? val) 'strp)
1876 ((eq? attr 'stmt-list) 'sec-offset)
1877 ((exact-integer? code)
1878 (cond
1879 ((< code 0) 'sleb128)
1880 ((<= code #xff) 'data1)
1881 ((<= code #xffff) 'data2)
1882 ((<= code #xffffffff) 'data4)
1883 ((<= code #xffffffffffffffff) 'data8)
1884 (else 'uleb128)))
1885 ((symbol? val) 'addr)
1886 (else (error "unhandled case" attr val code))))
1887
1888 (define (add-die-relocation! kind sym)
1889 (set! die-relocs
1890 (cons (make-linker-reloc kind (port-position die-port) 0 sym)
1891 die-relocs)))
1892
1893 (define (write-value code form)
1894 (match form
1895 ('data1 (put-u8 die-port code))
1896 ('data2 (put-u16 die-port code))
1897 ('data4 (put-u32 die-port code))
1898 ('data8 (put-u64 die-port code))
1899 ('uleb128 (put-uleb128 die-port code))
1900 ('sleb128 (put-sleb128 die-port code))
1901 ('addr
1902 (match (asm-word-size asm)
1903 (4
1904 (add-die-relocation! 'abs32/1 code)
1905 (put-u32 die-port 0))
1906 (8
1907 (add-die-relocation! 'abs64/1 code)
1908 (put-u64 die-port 0))))
1909 ('sec-offset (put-u32 die-port code))
1910 ('strp (put-u32 die-port code))))
1911
1912 (define (write-die die)
1913 (match die
1914 ((tag ('@ (attrs vals) ...) children ...)
1915 (let* ((codes (map compute-code attrs vals))
1916 (forms (map choose-form attrs vals codes))
1917 (has-children? (not (null? children)))
1918 (abbrev-code (intern-abbrev tag has-children? attrs forms)))
1919 (put-uleb128 die-port abbrev-code)
1920 (for-each write-value codes forms)
1921 (when has-children?
1922 (for-each write-die children)
1923 (put-uleb128 die-port 0))))))
1924
1925 ;; Compilation unit header.
1926 (put-u32 die-port 0) ; Length; will patch later.
1927 (put-u16 die-port 4) ; DWARF 4.
1928 (put-u32 die-port 0) ; Abbrevs offset.
1929 (put-u8 die-port (asm-word-size asm)) ; Address size.
1930
1931 (write-die (make-compile-unit-die asm))
1932
1933 ;; Terminate the abbrevs list.
1934 (put-uleb128 abbrev-port 0)
1935
1936 (write-sources)
1937
1938 (values (let ((bv (get-die-bv)))
1939 ;; Patch DWARF32 length.
1940 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
1941 (asm-endianness asm))
1942 (make-object asm '.debug_info bv die-relocs '()
1943 #:type SHT_PROGBITS #:flags 0))
1944 (make-object asm '.debug_abbrev (get-abbrev-bv) '() '()
1945 #:type SHT_PROGBITS #:flags 0)
1946 (make-object asm '.debug_str (link-string-table! strtab) '() '()
1947 #:type SHT_PROGBITS #:flags 0)
1948 (make-object asm '.debug_loc #vu8() '() '()
1949 #:type SHT_PROGBITS #:flags 0)
1950 (let ((bv (get-line-bv)))
1951 ;; Patch DWARF32 length.
1952 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
1953 (asm-endianness asm))
1954 (make-object asm '.debug_line bv line-relocs '()
1955 #:type SHT_PROGBITS #:flags 0)))))
1956
1957 (define (link-objects asm)
1958 (let*-values (;; Link procprops before constants, because it probably
1959 ;; interns more constants.
1960 ((procprops) (link-procprops asm))
1961 ((ro rw rw-init) (link-constants asm))
1962 ;; Link text object after constants, so that the
1963 ;; constants initializer gets included.
1964 ((text) (link-text-object asm))
1965 ((dt) (link-dynamic-section asm text rw rw-init))
1966 ((symtab strtab) (link-symtab (linker-object-section text) asm))
1967 ((arities arities-strtab) (link-arities asm))
1968 ((docstrs docstrs-strtab) (link-docstrs asm))
1969 ((dinfo dabbrev dstrtab dloc dline) (link-debug asm))
1970 ;; This needs to be linked last, because linking other
1971 ;; sections adds entries to the string table.
1972 ((shstrtab) (link-shstrtab asm)))
1973 (filter identity
1974 (list text ro rw dt symtab strtab arities arities-strtab
1975 docstrs docstrs-strtab procprops
1976 dinfo dabbrev dstrtab dloc dline
1977 shstrtab))))
1978
1979
1980 \f
1981
1982 ;;;
1983 ;;; High-level public interfaces.
1984 ;;;
1985
1986 (define* (link-assembly asm #:key (page-aligned? #t))
1987 "Produce an ELF image from the code and data emitted into @var{asm}.
1988 The result is a bytevector, by default linked so that read-only and
1989 writable data are on separate pages. Pass @code{#:page-aligned? #f} to
1990 disable this behavior."
1991 (link-elf (link-objects asm) #:page-aligned? page-aligned?))