Per-instruction tracing doesn't try to disassemble stack VM code
[bpt/guile.git] / module / system / vm / assembler.scm
CommitLineData
e78991aa
AW
1;;; Guile RTL assembler
2
3;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
4;;;
5;;; This library is free software; you can redistribute it and/or
6;;; modify it under the terms of the GNU Lesser General Public
7;;; License as published by the Free Software Foundation; either
8;;; version 3 of the License, or (at your option) any later version.
9;;;
10;;; This library is distributed in the hope that it will be useful,
11;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13;;; Lesser General Public License for more details.
14;;;
15;;; You should have received a copy of the GNU Lesser General Public
16;;; License along with this library; if not, write to the Free Software
17;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19;;; Commentary:
20;;;
21;;; This module implements an assembler that creates an ELF image from
22;;; RTL assembly and macro-assembly. The input can be given in
23;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
24;;; procedural interface, the emit-OP procedures, but that is not
25;;; currently exported.
26;;;
27;;; "Primitive instructions" correspond to RTL VM operations.
28;;; Assemblers for primitive instructions are generated programmatically
29;;; from (rtl-instruction-list), which itself is derived from the VM
30;;; sources. There are also "macro-instructions" like "label" or
31;;; "load-constant" that expand to 0 or more primitive instructions.
32;;;
33;;; The assembler also handles some higher-level tasks, like creating
34;;; the symbol table, other metadata sections, creating a constant table
35;;; for the whole compilation unit, and writing the dynamic section of
36;;; the ELF file along with the appropriate initialization routines.
37;;;
38;;; Most compilers will want to use the trio of make-assembler,
39;;; emit-text, and link-assembly. That will result in the creation of
40;;; an ELF image as a bytevector, which can then be loaded using
41;;; load-thunk-from-memory, or written to disk as a .go file.
42;;;
43;;; Code:
44
45(define-module (system vm assembler)
46 #:use-module (system base target)
47 #:use-module (system vm instruction)
a862d8c1 48 #:use-module (system vm dwarf)
e78991aa
AW
49 #:use-module (system vm elf)
50 #:use-module (system vm linker)
51 #:use-module (system vm objcode)
52 #:use-module (rnrs bytevectors)
a862d8c1 53 #:use-module (ice-9 binary-ports)
e78991aa
AW
54 #:use-module (ice-9 vlist)
55 #:use-module (ice-9 match)
56 #:use-module (srfi srfi-1)
57 #:use-module (srfi srfi-4)
58 #:use-module (srfi srfi-9)
59 #:use-module (srfi srfi-11)
60 #:export (make-assembler
61 emit-text
62 link-assembly
63 assemble-program))
64
65
66\f
67
68;;; RTL code consists of 32-bit units, often subdivided in some way.
69;;; These helpers create one 32-bit unit from multiple components.
70
cb8054c7
AW
71(define-inlinable (pack-u8-u24 x y)
72 (unless (<= 0 x 255)
73 (error "out of range" x))
e78991aa
AW
74 (logior x (ash y 8)))
75
cb8054c7
AW
76(define-inlinable (pack-u8-s24 x y)
77 (unless (<= 0 x 255)
78 (error "out of range" x))
e78991aa
AW
79 (logior x (ash (cond
80 ((< 0 (- y) #x800000)
81 (+ y #x1000000))
82 ((<= 0 y #xffffff)
83 y)
84 (else (error "out of range" y)))
85 8)))
86
cb8054c7
AW
87(define-inlinable (pack-u1-u7-u24 x y z)
88 (unless (<= 0 x 1)
89 (error "out of range" x))
90 (unless (<= 0 y 127)
91 (error "out of range" y))
e78991aa
AW
92 (logior x (ash y 1) (ash z 8)))
93
cb8054c7
AW
94(define-inlinable (pack-u8-u12-u12 x y z)
95 (unless (<= 0 x 255)
96 (error "out of range" x))
97 (unless (<= 0 y 4095)
98 (error "out of range" y))
e78991aa
AW
99 (logior x (ash y 8) (ash z 20)))
100
cb8054c7
AW
101(define-inlinable (pack-u8-u8-u16 x y z)
102 (unless (<= 0 x 255)
103 (error "out of range" x))
104 (unless (<= 0 y 255)
105 (error "out of range" y))
e78991aa
AW
106 (logior x (ash y 8) (ash z 16)))
107
cb8054c7
AW
108(define-inlinable (pack-u8-u8-u8-u8 x y z w)
109 (unless (<= 0 x 255)
110 (error "out of range" x))
111 (unless (<= 0 y 255)
112 (error "out of range" y))
113 (unless (<= 0 z 255)
114 (error "out of range" z))
e78991aa
AW
115 (logior x (ash y 8) (ash z 16) (ash w 24)))
116
07c05279
AW
117(define-syntax pack-flags
118 (syntax-rules ()
119 ;; Add clauses as needed.
120 ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
121 (if f2 (ash 2 0) 0)))))
122
e78991aa
AW
123;;; Helpers to read and write 32-bit units in a buffer.
124
125(define-syntax-rule (u32-ref buf n)
126 (bytevector-u32-native-ref buf (* n 4)))
127
128(define-syntax-rule (u32-set! buf n val)
129 (bytevector-u32-native-set! buf (* n 4) val))
130
131(define-syntax-rule (s32-ref buf n)
132 (bytevector-s32-native-ref buf (* n 4)))
133
134(define-syntax-rule (s32-set! buf n val)
135 (bytevector-s32-native-set! buf (* n 4) val))
136
137
138\f
139
140;;; A <meta> entry collects metadata for one procedure. Procedures are
141;;; written as contiguous ranges of RTL code.
142;;;
2a4daafd
AW
143(define-syntax-rule (assert-match arg pattern kind)
144 (let ((x arg))
145 (unless (match x (pattern #t) (_ #f))
146 (error (string-append "expected " kind) x))))
147
e78991aa 148(define-record-type <meta>
3185c907 149 (%make-meta label properties low-pc high-pc arities)
e78991aa 150 meta?
2a4daafd
AW
151 (label meta-label)
152 (properties meta-properties set-meta-properties!)
e78991aa 153 (low-pc meta-low-pc)
3185c907
AW
154 (high-pc meta-high-pc set-meta-high-pc!)
155 (arities meta-arities set-meta-arities!))
e78991aa 156
2a4daafd
AW
157(define (make-meta label properties low-pc)
158 (assert-match label (? symbol?) "symbol")
159 (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
3185c907 160 (%make-meta label properties low-pc #f '()))
2a4daafd
AW
161
162(define (meta-name meta)
163 (assq-ref (meta-properties meta) 'name))
164
3185c907
AW
165;; Metadata for one <lambda-case>.
166(define-record-type <arity>
167 (make-arity req opt rest kw-indices allow-other-keys?
168 low-pc high-pc)
169 arity?
170 (req arity-req)
171 (opt arity-opt)
172 (rest arity-rest)
173 (kw-indices arity-kw-indices)
174 (allow-other-keys? arity-allow-other-keys?)
175 (low-pc arity-low-pc)
176 (high-pc arity-high-pc set-arity-high-pc!))
177
e78991aa
AW
178(define-syntax *block-size* (identifier-syntax 32))
179
180;;; An assembler collects all of the words emitted during assembly, and
181;;; also maintains ancillary information such as the constant table, a
182;;; relocation list, and so on.
183;;;
184;;; RTL code consists of 32-bit units. We emit RTL code using native
185;;; endianness. If we're targeting a foreign endianness, we byte-swap
186;;; the bytevector as a whole instead of conditionalizing each access.
187;;;
188(define-record-type <asm>
189 (make-asm cur idx start prev written
190 labels relocs
191 word-size endianness
192 constants inits
193 shstrtab next-section-number
e675e9bd 194 meta sources)
e78991aa
AW
195 asm?
196
197 ;; We write RTL code into what is logically a growable vector,
198 ;; implemented as a list of blocks. asm-cur is the current block, and
199 ;; asm-idx is the current index into that block, in 32-bit units.
200 ;;
201 (cur asm-cur set-asm-cur!)
202 (idx asm-idx set-asm-idx!)
203
204 ;; asm-start is an absolute position, indicating the offset of the
205 ;; beginning of an instruction (in u32 units). It is updated after
206 ;; writing all the words for one primitive instruction. It models the
207 ;; position of the instruction pointer during execution, given that
208 ;; the RTL VM updates the IP only at the end of executing the
209 ;; instruction, and is thus useful for computing offsets between two
210 ;; points in a program.
211 ;;
212 (start asm-start set-asm-start!)
213
214 ;; The list of previously written blocks.
215 ;;
216 (prev asm-prev set-asm-prev!)
217
218 ;; The number of u32 words written in asm-prev, which is the same as
219 ;; the offset of the current block.
220 ;;
221 (written asm-written set-asm-written!)
222
223 ;; An alist of symbol -> position pairs, indicating the labels defined
224 ;; in this compilation unit.
225 ;;
226 (labels asm-labels set-asm-labels!)
227
228 ;; A list of relocations needed by the program text. We use an
229 ;; internal representation for relocations, and handle textualn
230 ;; relative relocations in the assembler. Other kinds of relocations
231 ;; are later reified as linker relocations and resolved by the linker.
232 ;;
233 (relocs asm-relocs set-asm-relocs!)
234
235 ;; Target information.
236 ;;
237 (word-size asm-word-size)
238 (endianness asm-endianness)
239
240 ;; The constant table, as a vhash of object -> label. All constants
241 ;; get de-duplicated and written into separate sections -- either the
242 ;; .rodata section, for read-only data, or .data, for constants that
243 ;; need initialization at load-time (like symbols). Constants can
244 ;; depend on other constants (e.g. a symbol depending on a stringbuf),
245 ;; so order in this table is important.
246 ;;
247 (constants asm-constants set-asm-constants!)
248
249 ;; A list of RTL instructions needed to initialize the constants.
250 ;; Will run in a thunk with 2 local variables.
251 ;;
252 (inits asm-inits set-asm-inits!)
253
254 ;; The shstrtab, for section names.
255 ;;
256 (shstrtab asm-shstrtab set-asm-shstrtab!)
257
258 ;; The section number for the next section to be written.
259 ;;
260 (next-section-number asm-next-section-number set-asm-next-section-number!)
261
262 ;; A list of <meta>, corresponding to procedure metadata.
263 ;;
e675e9bd
AW
264 (meta asm-meta set-asm-meta!)
265
266 ;; A list of (pos . source) pairs, indicating source information. POS
267 ;; is relative to the beginning of the text section, and SOURCE is in
268 ;; the same format that source-properties returns.
269 ;;
270 (sources asm-sources set-asm-sources!))
e78991aa
AW
271
272(define-inlinable (fresh-block)
273 (make-u32vector *block-size*))
274
275(define* (make-assembler #:key (word-size (target-word-size))
276 (endianness (target-endianness)))
277 "Create an assembler for a given target @var{word-size} and
278@var{endianness}, falling back to appropriate values for the configured
279target."
280 (make-asm (fresh-block) 0 0 '() 0
3659ef54 281 (make-hash-table) '()
e78991aa
AW
282 word-size endianness
283 vlist-null '()
284 (make-string-table) 1
e675e9bd 285 '() '()))
e78991aa
AW
286
287(define (intern-section-name! asm string)
288 "Add a string to the section name table (shstrtab)."
289 (string-table-intern! (asm-shstrtab asm) string))
290
291(define-inlinable (asm-pos asm)
292 "The offset of the next word to be written into the code buffer, in
29332-bit units."
294 (+ (asm-idx asm) (asm-written asm)))
295
296(define (allocate-new-block asm)
297 "Close off the current block, and arrange for the next word to be
298written to a fresh block."
299 (let ((new (fresh-block)))
300 (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
301 (set-asm-written! asm (asm-pos asm))
302 (set-asm-cur! asm new)
303 (set-asm-idx! asm 0)))
304
305(define-inlinable (emit asm u32)
306 "Emit one 32-bit word into the instruction stream. Assumes that there
307is space for the word, and ensures that there is space for the next
308word."
309 (u32-set! (asm-cur asm) (asm-idx asm) u32)
310 (set-asm-idx! asm (1+ (asm-idx asm)))
311 (if (= (asm-idx asm) *block-size*)
312 (allocate-new-block asm)))
313
314(define-inlinable (make-reloc type label base word)
315 "Make an internal relocation of type @var{type} referencing symbol
316@var{label}, @var{word} words after position @var{start}. @var{type}
317may be x8-s24, indicating a 24-bit relative label reference that can be
318fixed up by the assembler, or s32, indicating a 32-bit relative
319reference that needs to be fixed up by the linker."
320 (list type label base word))
321
322(define-inlinable (reset-asm-start! asm)
323 "Reset the asm-start after writing the words for one instruction."
324 (set-asm-start! asm (asm-pos asm)))
325
e78991aa
AW
326(define (record-label-reference asm label)
327 "Record an x8-s24 local label reference. This value will get patched
328up later by the assembler."
329 (let* ((start (asm-start asm))
330 (pos (asm-pos asm))
331 (reloc (make-reloc 'x8-s24 label start (- pos start))))
332 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
333
334(define* (record-far-label-reference asm label #:optional (offset 0))
335 "Record an s32 far label reference. This value will get patched up
336later by the linker."
337 (let* ((start (- (asm-start asm) offset))
338 (pos (asm-pos asm))
339 (reloc (make-reloc 's32 label start (- pos start))))
340 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
341
342
343\f
344
345;;;
346;;; Primitive assemblers are defined by expanding `assembler' for each
347;;; opcode in `(rtl-instruction-list)'.
348;;;
349
350(eval-when (expand compile load eval)
351 (define (id-append ctx a b)
352 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
353
354(define-syntax assembler
355 (lambda (x)
356 (define-syntax op-case
357 (lambda (x)
358 (syntax-case x ()
359 ((_ asm name ((type arg ...) code ...) clause ...)
360 #`(if (eq? name 'type)
361 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
362 #'((arg ...)
363 code ...))
364 (op-case asm name clause ...)))
365 ((_ asm name)
366 #'(error "unmatched name" name)))))
367
368 (define (pack-first-word asm opcode type)
369 (with-syntax ((opcode opcode))
370 (op-case
371 asm type
372 ((U8_X24)
373 (emit asm opcode))
374 ((U8_U24 arg)
375 (emit asm (pack-u8-u24 opcode arg)))
376 ((U8_L24 label)
377 (record-label-reference asm label)
378 (emit asm opcode))
e78991aa
AW
379 ((U8_U8_I16 a imm)
380 (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
381 ((U8_U12_U12 a b)
382 (emit asm (pack-u8-u12-u12 opcode a b)))
383 ((U8_U8_U8_U8 a b c)
384 (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
385
386 (define (pack-tail-word asm type)
387 (op-case
388 asm type
389 ((U8_U24 a b)
390 (emit asm (pack-u8-u24 a b)))
391 ((U8_L24 a label)
392 (record-label-reference asm label)
393 (emit asm a))
e78991aa
AW
394 ((U8_U8_I16 a b imm)
395 (emit asm (pack-u8-u8-u16 a b (object-address imm))))
396 ((U8_U12_U12 a b)
397 (emit asm (pack-u8-u12-u12 a b c)))
398 ((U8_U8_U8_U8 a b c d)
399 (emit asm (pack-u8-u8-u8-u8 a b c d)))
400 ((U32 a)
401 (emit asm a))
402 ((I32 imm)
403 (let ((val (object-address imm)))
404 (unless (zero? (ash val -32))
405 (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
406 (emit asm val)))
407 ((A32 imm)
408 (unless (= (asm-word-size asm) 8)
409 (error "make-long-immediate unavailable for this target"))
410 (emit asm (ash (object-address imm) -32))
411 (emit asm (logand (object-address imm) (1- (ash 1 32)))))
412 ((B32))
413 ((N32 label)
414 (record-far-label-reference asm label)
415 (emit asm 0))
416 ((S32 label)
417 (record-far-label-reference asm label)
418 (emit asm 0))
419 ((L32 label)
420 (record-far-label-reference asm label)
421 (emit asm 0))
422 ((LO32 label offset)
423 (record-far-label-reference asm label
424 (* offset (/ (asm-word-size asm) 4)))
425 (emit asm 0))
426 ((X8_U24 a)
427 (emit asm (pack-u8-u24 0 a)))
428 ((X8_U12_U12 a b)
429 (emit asm (pack-u8-u12-u12 0 a b)))
e78991aa
AW
430 ((X8_L24 label)
431 (record-label-reference asm label)
432 (emit asm 0))
433 ((B1_X7_L24 a label)
434 (record-label-reference asm label)
435 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
436 ((B1_U7_L24 a b label)
437 (record-label-reference asm label)
af95414f
AW
438 (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
439 ((B1_X31 a)
440 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
441 ((B1_X7_U24 a b)
442 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
e78991aa
AW
443
444 (syntax-case x ()
445 ((_ name opcode word0 word* ...)
446 (with-syntax ((((formal0 ...)
447 code0 ...)
448 (pack-first-word #'asm
449 (syntax->datum #'opcode)
450 (syntax->datum #'word0)))
451 ((((formal* ...)
452 code* ...) ...)
453 (map (lambda (word) (pack-tail-word #'asm word))
454 (syntax->datum #'(word* ...)))))
455 #'(lambda (asm formal0 ... formal* ... ...)
456 (unless (asm? asm) (error "not an asm"))
457 code0 ...
458 code* ... ...
459 (reset-asm-start! asm)))))))
460
461(define assemblers (make-hash-table))
462
463(define-syntax define-assembler
464 (lambda (x)
465 (syntax-case x ()
2a294c7c 466 ((_ name opcode kind arg ...)
e78991aa 467 (with-syntax ((emit (id-append #'name #'emit- #'name)))
c96933fd
AW
468 #'(begin
469 (define emit
470 (let ((emit (assembler name opcode arg ...)))
471 (hashq-set! assemblers 'name emit)
472 emit))
473 (export emit)))))))
e78991aa
AW
474
475(define-syntax visit-opcodes
476 (lambda (x)
477 (syntax-case x ()
478 ((visit-opcodes macro arg ...)
479 (with-syntax (((inst ...)
480 (map (lambda (x) (datum->syntax #'macro x))
481 (rtl-instruction-list))))
482 #'(begin
483 (macro arg ... . inst)
484 ...))))))
485
486(visit-opcodes define-assembler)
487
488(define (emit-text asm instructions)
489 "Assemble @var{instructions} using the assembler @var{asm}.
490@var{instructions} is a sequence of RTL instructions, expressed as a
491list of lists. This procedure can be called many times before calling
492@code{link-assembly}."
493 (for-each (lambda (inst)
494 (apply (or (hashq-ref assemblers (car inst))
495 (error 'bad-instruction inst))
496 asm
497 (cdr inst)))
498 instructions))
499
500\f
501
502;;;
503;;; The constant table records a topologically sorted set of literal
504;;; constants used by a program. For example, a pair uses its car and
505;;; cdr, a string uses its stringbuf, etc.
506;;;
507;;; Some things we want to add to the constant table are not actually
508;;; Scheme objects: for example, stringbufs, cache cells for toplevel
509;;; references, or cache cells for non-closure procedures. For these we
510;;; define special record types and add instances of those record types
511;;; to the table.
512;;;
513
514(define-inlinable (immediate? x)
515 "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
516 (not (zero? (logand (object-address x) 6))))
517
518(define-record-type <stringbuf>
519 (make-stringbuf string)
520 stringbuf?
521 (string stringbuf-string))
522
523(define-record-type <static-procedure>
524 (make-static-procedure code)
525 static-procedure?
526 (code static-procedure-code))
527
7bfbc7b1 528(define-record-type <uniform-vector-backing-store>
d65514a2 529 (make-uniform-vector-backing-store bytes element-size)
7bfbc7b1 530 uniform-vector-backing-store?
d65514a2
AW
531 (bytes uniform-vector-backing-store-bytes)
532 (element-size uniform-vector-backing-store-element-size))
7bfbc7b1 533
e78991aa
AW
534(define-record-type <cache-cell>
535 (make-cache-cell scope key)
536 cache-cell?
537 (scope cache-cell-scope)
538 (key cache-cell-key))
539
7bfbc7b1
AW
540(define (simple-vector? obj)
541 (and (vector? obj)
542 (equal? (array-shape obj) (list (list 0 (1- (vector-length obj)))))))
543
544(define (simple-uniform-vector? obj)
545 (and (array? obj)
546 (symbol? (array-type obj))
547 (equal? (array-shape obj) (list (list 0 (1- (array-length obj)))))))
548
e78991aa
AW
549(define (statically-allocatable? x)
550 "Return @code{#t} if a non-immediate constant can be allocated
551statically, and @code{#f} if it would need some kind of runtime
552allocation."
7bfbc7b1 553 (or (pair? x) (string? x) (stringbuf? x) (static-procedure? x) (array? x)))
e78991aa
AW
554
555(define (intern-constant asm obj)
556 "Add an object to the constant table, and return a label that can be
557used to reference it. If the object is already present in the constant
558table, its existing label is used directly."
559 (define (recur obj)
560 (intern-constant asm obj))
561 (define (field dst n obj)
562 (let ((src (recur obj)))
563 (if src
c7cb2bc2
AW
564 (if (statically-allocatable? obj)
565 `((static-patch! ,dst ,n ,src))
566 `((static-ref 1 ,src)
567 (static-set! 1 ,dst ,n)))
e78991aa
AW
568 '())))
569 (define (intern obj label)
570 (cond
571 ((pair? obj)
572 (append (field label 0 (car obj))
573 (field label 1 (cdr obj))))
7bfbc7b1 574 ((simple-vector? obj)
e78991aa
AW
575 (let lp ((i 0) (inits '()))
576 (if (< i (vector-length obj))
577 (lp (1+ i)
578 (append-reverse (field label (1+ i) (vector-ref obj i))
579 inits))
580 (reverse inits))))
581 ((stringbuf? obj) '())
582 ((static-procedure? obj)
2ab2a10d 583 `((static-patch! ,label 1 ,(static-procedure-code obj))))
e78991aa
AW
584 ((cache-cell? obj) '())
585 ((symbol? obj)
7396d216
AW
586 `((make-non-immediate 1 ,(recur (symbol->string obj)))
587 (string->symbol 1 1)
588 (static-set! 1 ,label 0)))
e78991aa 589 ((string? obj)
2ab2a10d 590 `((static-patch! ,label 1 ,(recur (make-stringbuf obj)))))
e78991aa 591 ((keyword? obj)
7396d216
AW
592 `((static-ref 1 ,(recur (keyword->symbol obj)))
593 (symbol->keyword 1 1)
594 (static-set! 1 ,label 0)))
e78991aa 595 ((number? obj)
7396d216
AW
596 `((make-non-immediate 1 ,(recur (number->string obj)))
597 (string->number 1 1)
598 (static-set! 1 ,label 0)))
7bfbc7b1
AW
599 ((uniform-vector-backing-store? obj) '())
600 ((simple-uniform-vector? obj)
601 `((static-patch! ,label 2
d65514a2
AW
602 ,(recur (make-uniform-vector-backing-store
603 (uniform-array->bytevector obj)
604 (if (bitvector? obj)
605 ;; Bitvectors are addressed in
606 ;; 32-bit units.
607 4
608 (uniform-vector-element-size obj)))))))
e78991aa
AW
609 (else
610 (error "don't know how to intern" obj))))
611 (cond
612 ((immediate? obj) #f)
613 ((vhash-assoc obj (asm-constants asm)) => cdr)
614 (else
615 ;; Note that calling intern may mutate asm-constants and
616 ;; asm-constant-inits.
617 (let* ((label (gensym "constant"))
618 (inits (intern obj label)))
619 (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
620 (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
621 label))))
622
623(define (intern-non-immediate asm obj)
624 "Intern a non-immediate into the constant table, and return its
625label."
626 (when (immediate? obj)
627 (error "expected a non-immediate" obj))
628 (intern-constant asm obj))
629
630(define (intern-cache-cell asm scope key)
631 "Intern a cache cell into the constant table, and return its label.
632If there is already a cache cell with the given scope and key, it is
633returned instead."
634 (intern-constant asm (make-cache-cell scope key)))
635
636;; Return the label of the cell that holds the module for a scope.
637(define (intern-module-cache-cell asm scope)
638 "Intern a cache cell for a module, and return its label."
639 (intern-cache-cell asm scope #t))
640
641
642\f
643
644;;;
645;;; Macro assemblers bridge the gap between primitive instructions and
646;;; some higher-level operations.
647;;;
648
649(define-syntax define-macro-assembler
650 (lambda (x)
651 (syntax-case x ()
652 ((_ (name arg ...) body body* ...)
653 (with-syntax ((emit (id-append #'name #'emit- #'name)))
c96933fd
AW
654 #'(begin
655 (define emit
656 (let ((emit (lambda (arg ...) body body* ...)))
657 (hashq-set! assemblers 'name emit)
658 emit))
659 (export emit)))))))
e78991aa
AW
660
661(define-macro-assembler (load-constant asm dst obj)
662 (cond
663 ((immediate? obj)
664 (let ((bits (object-address obj)))
665 (cond
666 ((and (< dst 256) (zero? (ash bits -16)))
667 (emit-make-short-immediate asm dst obj))
668 ((zero? (ash bits -32))
669 (emit-make-long-immediate asm dst obj))
670 (else
671 (emit-make-long-long-immediate asm dst obj)))))
672 ((statically-allocatable? obj)
673 (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
674 (else
675 (emit-static-ref asm dst (intern-non-immediate asm obj)))))
676
677(define-macro-assembler (load-static-procedure asm dst label)
678 (let ((loc (intern-constant asm (make-static-procedure label))))
679 (emit-make-non-immediate asm dst loc)))
680
be8b62ca
AW
681(define-syntax-rule (define-tc7-macro-assembler name tc7)
682 (define-macro-assembler (name asm slot invert? label)
683 (emit-br-if-tc7 asm slot invert? tc7 label)))
684
685;; Keep in sync with tags.h. Part of Guile's ABI. Currently unused
becce37b
AW
686;; macro assemblers are commented out. See also
687;; *branching-primcall-arities* in (language cps primitives), the set of
688;; macro-instructions in assembly.scm, and
689;; disassembler.scm:code-annotation.
690;;
691;; FIXME: Define all tc7 values in Scheme in one place, derived from
692;; tags.h.
be8b62ca
AW
693(define-tc7-macro-assembler br-if-symbol 5)
694(define-tc7-macro-assembler br-if-variable 7)
695(define-tc7-macro-assembler br-if-vector 13)
696;(define-tc7-macro-assembler br-if-weak-vector 13)
697(define-tc7-macro-assembler br-if-string 21)
698;(define-tc7-macro-assembler br-if-heap-number 23)
699;(define-tc7-macro-assembler br-if-stringbuf 39)
becce37b 700(define-tc7-macro-assembler br-if-bytevector 77)
be8b62ca
AW
701;(define-tc7-macro-assembler br-if-pointer 31)
702;(define-tc7-macro-assembler br-if-hashtable 29)
703;(define-tc7-macro-assembler br-if-fluid 37)
704;(define-tc7-macro-assembler br-if-dynamic-state 45)
705;(define-tc7-macro-assembler br-if-frame 47)
706;(define-tc7-macro-assembler br-if-objcode 53)
707;(define-tc7-macro-assembler br-if-vm 55)
708;(define-tc7-macro-assembler br-if-vm-cont 71)
709;(define-tc7-macro-assembler br-if-rtl-program 69)
710;(define-tc7-macro-assembler br-if-program 79)
711;(define-tc7-macro-assembler br-if-weak-set 85)
712;(define-tc7-macro-assembler br-if-weak-table 87)
713;(define-tc7-macro-assembler br-if-array 93)
d65514a2 714(define-tc7-macro-assembler br-if-bitvector 95)
be8b62ca
AW
715;(define-tc7-macro-assembler br-if-port 125)
716;(define-tc7-macro-assembler br-if-smob 127)
717
2a4daafd 718(define-macro-assembler (begin-program asm label properties)
e78991aa 719 (emit-label asm label)
2a4daafd 720 (let ((meta (make-meta label properties (asm-start asm))))
e78991aa
AW
721 (set-asm-meta! asm (cons meta (asm-meta asm)))))
722
723(define-macro-assembler (end-program asm)
2a4daafd 724 (let ((meta (car (asm-meta asm))))
3185c907
AW
725 (set-meta-high-pc! meta (asm-start asm))
726 (set-meta-arities! meta (reverse (meta-arities meta)))))
727
728(define-macro-assembler (begin-standard-arity asm req nlocals alternate)
729 (emit-begin-opt-arity asm req '() #f nlocals alternate))
730
731(define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
732 (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
733
734(define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
735 allow-other-keys? nlocals alternate)
736 (assert-match req ((? symbol?) ...) "list of symbols")
737 (assert-match opt ((? symbol?) ...) "list of symbols")
738 (assert-match rest (or #f (? symbol?)) "#f or symbol")
8695854a
AW
739 (assert-match kw-indices (((? keyword?) . (? integer?)) ...)
740 "alist of keyword -> integer")
3185c907
AW
741 (assert-match allow-other-keys? (? boolean?) "boolean")
742 (assert-match nlocals (? integer?) "integer")
743 (assert-match alternate (or #f (? symbol?)) "#f or symbol")
744 (let* ((meta (car (asm-meta asm)))
745 (arity (make-arity req opt rest kw-indices allow-other-keys?
746 (asm-start asm) #f))
7396d216
AW
747 ;; The procedure itself is in slot 0, in the standard calling
748 ;; convention. For procedure prologues, nreq includes the
749 ;; procedure, so here we add 1.
750 (nreq (1+ (length req)))
3185c907
AW
751 (nopt (length opt))
752 (rest? (->bool rest)))
753 (set-meta-arities! meta (cons arity (meta-arities meta)))
754 (cond
755 ((or allow-other-keys? (pair? kw-indices))
756 (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
757 nlocals alternate))
758 ((or rest? (pair? opt))
759 (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
760 (else
761 (emit-standard-prelude asm nreq nlocals alternate)))))
762
763(define-macro-assembler (end-arity asm)
764 (let ((arity (car (meta-arities (car (asm-meta asm))))))
765 (set-arity-high-pc! arity (asm-start asm))))
e78991aa 766
07c05279
AW
767(define-macro-assembler (standard-prelude asm nreq nlocals alternate)
768 (cond
769 (alternate
770 (emit-br-if-nargs-ne asm nreq alternate)
af95414f 771 (emit-alloc-frame asm nlocals))
07c05279
AW
772 ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
773 (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
774 (else
775 (emit-assert-nargs-ee asm nreq)
af95414f 776 (emit-alloc-frame asm nlocals))))
07c05279
AW
777
778(define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
779 (if alternate
780 (emit-br-if-nargs-lt asm nreq alternate)
781 (emit-assert-nargs-ge asm nreq))
782 (cond
783 (rest?
784 (emit-bind-rest asm (+ nreq nopt)))
785 (alternate
786 (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
787 (else
788 (emit-assert-nargs-le asm (+ nreq nopt))))
af95414f 789 (emit-alloc-frame asm nlocals))
07c05279
AW
790
791(define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
792 allow-other-keys? nlocals alternate)
793 (if alternate
b0ed216b
AW
794 (begin
795 (emit-br-if-nargs-lt asm nreq alternate)
796 (unless rest?
797 (emit-br-if-npos-gt asm nreq (+ nreq nopt) alternate)))
07c05279
AW
798 (emit-assert-nargs-ge asm nreq))
799 (let ((ntotal (fold (lambda (kw ntotal)
800 (match kw
801 (((? keyword?) . idx)
802 (max (1+ idx) ntotal))))
803 (+ nreq nopt) kw-indices)))
804 ;; FIXME: port 581f410f
805 (emit-bind-kwargs asm nreq
806 (pack-flags allow-other-keys? rest?)
807 (+ nreq nopt)
808 ntotal
8695854a 809 (intern-constant asm kw-indices))
af95414f 810 (emit-alloc-frame asm nlocals)))
07c05279 811
e78991aa 812(define-macro-assembler (label asm sym)
3659ef54 813 (hashq-set! (asm-labels asm) sym (asm-start asm)))
e78991aa 814
e675e9bd
AW
815(define-macro-assembler (source asm source)
816 (set-asm-sources! asm (acons (asm-start asm) source (asm-sources asm))))
817
af95414f 818(define-macro-assembler (cache-current-module! asm module scope)
e78991aa 819 (let ((mod-label (intern-module-cache-cell asm scope)))
af95414f 820 (emit-static-set! asm module mod-label 0)))
e78991aa 821
af95414f 822(define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
e78991aa
AW
823 (let ((sym-label (intern-non-immediate asm sym))
824 (mod-label (intern-module-cache-cell asm scope))
825 (cell-label (intern-cache-cell asm scope sym)))
af95414f 826 (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
e78991aa 827
af95414f 828(define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
e78991aa
AW
829 (let* ((sym-label (intern-non-immediate asm sym))
830 (key (cons public? module-name))
831 (mod-name-label (intern-constant asm key))
832 (cell-label (intern-cache-cell asm key sym)))
af95414f 833 (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
e78991aa
AW
834
835
836\f
837
838;;;
839;;; Helper for linking objects.
840;;;
841
842(define (make-object asm name bv relocs labels . kwargs)
843 "Make a linker object. This helper handles interning the name in the
844shstrtab, assigning the size, allocating a fresh index, and defining a
845corresponding linker symbol for the start of the section."
846 (let ((name-idx (intern-section-name! asm (symbol->string name)))
847 (index (asm-next-section-number asm)))
848 (set-asm-next-section-number! asm (1+ index))
849 (make-linker-object (apply make-elf-section
850 #:index index
851 #:name name-idx
852 #:size (bytevector-length bv)
853 kwargs)
854 bv relocs
855 (cons (make-linker-symbol name 0) labels))))
856
857
858\f
859
860;;;
861;;; Linking the constant table. This code is somewhat intertwingled
862;;; with the intern-constant code above, as that procedure also
863;;; residualizes instructions to initialize constants at load time.
864;;;
865
866(define (write-immediate asm buf pos x)
867 (let ((val (object-address x))
868 (endianness (asm-endianness asm)))
869 (case (asm-word-size asm)
870 ((4) (bytevector-u32-set! buf pos val endianness))
871 ((8) (bytevector-u64-set! buf pos val endianness))
872 (else (error "bad word size" asm)))))
873
874(define (emit-init-constants asm)
875 "If there is writable data that needs initialization at runtime, emit
876a procedure to do that and return its label. Otherwise return
877@code{#f}."
878 (let ((inits (asm-inits asm)))
879 (and (not (null? inits))
880 (let ((label (gensym "init-constants")))
881 (emit-text asm
2a4daafd 882 `((begin-program ,label ())
7396d216 883 (assert-nargs-ee/locals 1 1)
e78991aa 884 ,@(reverse inits)
7396d216
AW
885 (load-constant 1 ,*unspecified*)
886 (return 1)
e78991aa
AW
887 (end-program)))
888 label))))
889
890(define (link-data asm data name)
891 "Link the static data for a program into the @var{name} section (which
892should be .data or .rodata), and return the resulting linker object.
893@var{data} should be a vhash mapping objects to labels."
894 (define (align address alignment)
895 (+ address
896 (modulo (- alignment (modulo address alignment)) alignment)))
897
898 (define tc7-vector 13)
8fa72889
AW
899 (define stringbuf-shared-flag #x100)
900 (define stringbuf-wide-flag #x400)
901 (define tc7-stringbuf 39)
902 (define tc7-narrow-stringbuf
903 (+ tc7-stringbuf stringbuf-shared-flag))
904 (define tc7-wide-stringbuf
905 (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
e78991aa
AW
906 (define tc7-ro-string (+ 21 #x200))
907 (define tc7-rtl-program 69)
7bfbc7b1 908 (define tc7-bytevector 77)
d65514a2 909 (define tc7-bitvector 95)
e78991aa
AW
910
911 (let ((word-size (asm-word-size asm))
912 (endianness (asm-endianness asm)))
913 (define (byte-length x)
914 (cond
915 ((stringbuf? x)
916 (let ((x (stringbuf-string x)))
917 (+ (* 2 word-size)
918 (case (string-bytes-per-char x)
919 ((1) (1+ (string-length x)))
920 ((4) (* (1+ (string-length x)) 4))
921 (else (error "bad string bytes per char" x))))))
922 ((static-procedure? x)
923 (* 2 word-size))
924 ((string? x)
925 (* 4 word-size))
926 ((pair? x)
927 (* 2 word-size))
7bfbc7b1 928 ((simple-vector? x)
e78991aa 929 (* (1+ (vector-length x)) word-size))
7bfbc7b1
AW
930 ((simple-uniform-vector? x)
931 (* 4 word-size))
932 ((uniform-vector-backing-store? x)
933 (bytevector-length (uniform-vector-backing-store-bytes x)))
e78991aa
AW
934 (else
935 word-size)))
936
937 (define (write-constant-reference buf pos x)
938 ;; The asm-inits will fix up any reference to a non-immediate.
939 (write-immediate asm buf pos (if (immediate? x) x #f)))
940
941 (define (write buf pos obj)
942 (cond
943 ((stringbuf? obj)
944 (let* ((x (stringbuf-string obj))
945 (len (string-length x))
946 (tag (if (= (string-bytes-per-char x) 1)
947 tc7-narrow-stringbuf
948 tc7-wide-stringbuf)))
949 (case word-size
950 ((4)
951 (bytevector-u32-set! buf pos tag endianness)
952 (bytevector-u32-set! buf (+ pos 4) len endianness))
953 ((8)
954 (bytevector-u64-set! buf pos tag endianness)
955 (bytevector-u64-set! buf (+ pos 8) len endianness))
956 (else
957 (error "bad word size" asm)))
958 (let ((pos (+ pos (* word-size 2))))
959 (case (string-bytes-per-char x)
960 ((1)
961 (let lp ((i 0))
962 (if (< i len)
963 (let ((u8 (char->integer (string-ref x i))))
964 (bytevector-u8-set! buf (+ pos i) u8)
965 (lp (1+ i)))
966 (bytevector-u8-set! buf (+ pos i) 0))))
967 ((4)
968 (let lp ((i 0))
969 (if (< i len)
970 (let ((u32 (char->integer (string-ref x i))))
971 (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
972 (lp (1+ i)))
973 (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
974 (else (error "bad string bytes per char" x))))))
975
976 ((static-procedure? obj)
977 (case word-size
978 ((4)
979 (bytevector-u32-set! buf pos tc7-rtl-program endianness)
980 (bytevector-u32-set! buf (+ pos 4) 0 endianness))
981 ((8)
982 (bytevector-u64-set! buf pos tc7-rtl-program endianness)
983 (bytevector-u64-set! buf (+ pos 8) 0 endianness))
984 (else (error "bad word size"))))
985
986 ((cache-cell? obj)
987 (write-immediate asm buf pos #f))
988
989 ((string? obj)
990 (let ((tag (logior tc7-ro-string (ash (string-length obj) 8))))
991 (case word-size
992 ((4)
993 (bytevector-u32-set! buf pos tc7-ro-string endianness)
994 (write-immediate asm buf (+ pos 4) #f) ; stringbuf
995 (bytevector-u32-set! buf (+ pos 8) 0 endianness)
996 (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
997 ((8)
998 (bytevector-u64-set! buf pos tc7-ro-string endianness)
999 (write-immediate asm buf (+ pos 8) #f) ; stringbuf
1000 (bytevector-u64-set! buf (+ pos 16) 0 endianness)
1001 (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
1002 (else (error "bad word size")))))
1003
1004 ((pair? obj)
1005 (write-constant-reference buf pos (car obj))
1006 (write-constant-reference buf (+ pos word-size) (cdr obj)))
1007
7bfbc7b1 1008 ((simple-vector? obj)
e78991aa
AW
1009 (let* ((len (vector-length obj))
1010 (tag (logior tc7-vector (ash len 8))))
1011 (case word-size
1012 ((4) (bytevector-u32-set! buf pos tag endianness))
1013 ((8) (bytevector-u64-set! buf pos tag endianness))
1014 (else (error "bad word size")))
1015 (let lp ((i 0))
1016 (when (< i (vector-length obj))
1017 (let ((pos (+ pos word-size (* i word-size)))
1018 (elt (vector-ref obj i)))
1019 (write-constant-reference buf pos elt)
1020 (lp (1+ i)))))))
1021
1022 ((symbol? obj)
1023 (write-immediate asm buf pos #f))
1024
1025 ((keyword? obj)
1026 (write-immediate asm buf pos #f))
1027
1028 ((number? obj)
1029 (write-immediate asm buf pos #f))
1030
7bfbc7b1 1031 ((simple-uniform-vector? obj)
d65514a2
AW
1032 (let ((tag (if (bitvector? obj)
1033 tc7-bitvector
1034 (let ((type-code (uniform-vector-element-type-code obj)))
1035 (logior tc7-bytevector (ash type-code 7))))))
7bfbc7b1
AW
1036 (case word-size
1037 ((4)
1038 (bytevector-u32-set! buf pos tag endianness)
d65514a2
AW
1039 (bytevector-u32-set! buf (+ pos 4)
1040 (if (bitvector? obj)
1041 (bitvector-length obj)
1042 (bytevector-length obj))
7bfbc7b1
AW
1043 endianness) ; length
1044 (bytevector-u32-set! buf (+ pos 8) 0 endianness) ; pointer
1045 (write-immediate asm buf (+ pos 12) #f)) ; owner
1046 ((8)
1047 (bytevector-u64-set! buf pos tag endianness)
d65514a2
AW
1048 (bytevector-u64-set! buf (+ pos 8)
1049 (if (bitvector? obj)
1050 (bitvector-length obj)
1051 (bytevector-length obj))
7bfbc7b1
AW
1052 endianness) ; length
1053 (bytevector-u64-set! buf (+ pos 16) 0 endianness) ; pointer
1054 (write-immediate asm buf (+ pos 24) #f)) ; owner
1055 (else (error "bad word size")))))
1056
1057 ((uniform-vector-backing-store? obj)
1058 (let ((bv (uniform-vector-backing-store-bytes obj)))
1059 (bytevector-copy! bv 0 buf pos (bytevector-length bv))
d65514a2 1060 (unless (or (= 1 (uniform-vector-backing-store-element-size obj))
7bfbc7b1
AW
1061 (eq? endianness (native-endianness)))
1062 ;; Need to swap units of element-size bytes
1063 (error "FIXME: Implement byte order swap"))))
1064
e78991aa
AW
1065 (else
1066 (error "unrecognized object" obj))))
1067
1068 (cond
1069 ((vlist-null? data) #f)
1070 (else
1071 (let* ((byte-len (vhash-fold (lambda (k v len)
1072 (+ (byte-length k) (align len 8)))
1073 0 data))
1074 (buf (make-bytevector byte-len 0)))
3659ef54 1075 (let lp ((i 0) (pos 0) (symbols '()))
e78991aa
AW
1076 (if (< i (vlist-length data))
1077 (let* ((pair (vlist-ref data i))
1078 (obj (car pair))
1079 (obj-label (cdr pair)))
1080 (write buf pos obj)
1081 (lp (1+ i)
1082 (align (+ (byte-length obj) pos) 8)
3659ef54
AW
1083 (cons (make-linker-symbol obj-label pos) symbols)))
1084 (make-object asm name buf '() symbols
8fa72889
AW
1085 #:flags (match name
1086 ('.data (logior SHF_ALLOC SHF_WRITE))
1087 ('.rodata SHF_ALLOC))))))))))
e78991aa
AW
1088
1089(define (link-constants asm)
1090 "Link sections to hold constants needed by the program text emitted
1091using @var{asm}.
1092
1093Returns three values: an object for the .rodata section, an object for
1094the .data section, and a label for an initialization procedure. Any of
1095these may be @code{#f}."
1096 (define (shareable? x)
1097 (cond
1098 ((stringbuf? x) #t)
1099 ((pair? x)
1100 (and (immediate? (car x)) (immediate? (cdr x))))
7bfbc7b1 1101 ((simple-vector? x)
e78991aa
AW
1102 (let lp ((i 0))
1103 (or (= i (vector-length x))
1104 (and (immediate? (vector-ref x i))
1105 (lp (1+ i))))))
7bfbc7b1 1106 ((uniform-vector-backing-store? x) #t)
e78991aa
AW
1107 (else #f)))
1108 (let* ((constants (asm-constants asm))
1109 (len (vlist-length constants)))
1110 (let lp ((i 0)
1111 (ro vlist-null)
1112 (rw vlist-null))
1113 (if (= i len)
1114 (values (link-data asm ro '.rodata)
1115 (link-data asm rw '.data)
1116 (emit-init-constants asm))
1117 (let ((pair (vlist-ref constants i)))
1118 (if (shareable? (car pair))
1119 (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
1120 (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
1121
1122\f
1123
1124;;;
1125;;; Linking program text.
1126;;;
1127
1128(define (process-relocs buf relocs labels)
1129 "Patch up internal x8-s24 relocations, and any s32 relocations that
1130reference symbols in the text section. Return a list of linker
1131relocations for references to symbols defined outside the text section."
1132 (fold
1133 (lambda (reloc tail)
1134 (match reloc
1135 ((type label base word)
3659ef54 1136 (let ((abs (hashq-ref labels label))
e78991aa
AW
1137 (dst (+ base word)))
1138 (case type
1139 ((s32)
1140 (if abs
1141 (let ((rel (- abs base)))
1142 (s32-set! buf dst rel)
1143 tail)
1144 (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
1145 tail)))
1146 ((x8-s24)
1147 (unless abs
1148 (error "unbound near relocation" reloc))
1149 (let ((rel (- abs base))
1150 (u32 (u32-ref buf dst)))
1151 (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
1152 tail))
1153 (else (error "bad relocation kind" reloc)))))))
1154 '()
1155 relocs))
1156
1157(define (process-labels labels)
3659ef54 1158 "Define linker symbols for the label-offset map in @var{labels}.
e78991aa 1159The offsets are expected to be expressed in words."
3659ef54
AW
1160 (hash-map->list (lambda (label loc)
1161 (make-linker-symbol label (* loc 4)))
1162 labels))
e78991aa
AW
1163
1164(define (swap-bytes! buf)
1165 "Patch up the text buffer @var{buf}, swapping the endianness of each
116632-bit unit."
1167 (unless (zero? (modulo (bytevector-length buf) 4))
1168 (error "unexpected length"))
1169 (let ((byte-len (bytevector-length buf)))
1170 (let lp ((pos 0))
1171 (unless (= pos byte-len)
1172 (bytevector-u32-set!
1173 buf pos
1174 (bytevector-u32-ref buf pos (endianness big))
1175 (endianness little))
1176 (lp (+ pos 4))))))
1177
1178(define (link-text-object asm)
1179 "Link the .rtl-text section, swapping the endianness of the bytes if
1180needed."
1181 (let ((buf (make-u32vector (asm-pos asm))))
1182 (let lp ((pos 0) (prev (reverse (asm-prev asm))))
1183 (if (null? prev)
1184 (let ((byte-size (* (asm-idx asm) 4)))
1185 (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
1186 (unless (eq? (asm-endianness asm) (native-endianness))
1187 (swap-bytes! buf))
1188 (make-object asm '.rtl-text
1189 buf
1190 (process-relocs buf (asm-relocs asm)
1191 (asm-labels asm))
1192 (process-labels (asm-labels asm))))
1193 (let ((len (* *block-size* 4)))
1194 (bytevector-copy! (car prev) 0 buf pos len)
1195 (lp (+ pos len) (cdr prev)))))))
1196
1197
1198\f
1199
1200;;;
1201;;; Linking other sections of the ELF file, like the dynamic segment,
1202;;; the symbol table, etc.
1203;;;
1204
1205(define (link-dynamic-section asm text rw rw-init)
1206 "Link the dynamic section for an ELF image with RTL text, given the
1207writable data section @var{rw} needing fixup from the procedure with
1208label @var{rw-init}. @var{rw-init} may be false. If @var{rw} is true,
1209it will be added to the GC roots at runtime."
1210 (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
1211 (let* ((endianness (asm-endianness asm))
1212 (bv (make-bytevector (* word-size (if rw (if rw-init 12 10) 6)) 0))
1213 (set-uword!
1214 (lambda (i uword)
1215 (%set-uword! bv (* i word-size) uword endianness)))
1216 (relocs '())
1217 (set-label!
1218 (lambda (i label)
1219 (set! relocs (cons (make-linker-reloc 'reloc-type
1220 (* i word-size) 0 label)
1221 relocs))
1222 (%set-uword! bv (* i word-size) 0 endianness))))
1223 (set-uword! 0 DT_GUILE_RTL_VERSION)
1224 (set-uword! 1 #x02020000)
1225 (set-uword! 2 DT_GUILE_ENTRY)
1226 (set-label! 3 '.rtl-text)
1227 (cond
1228 (rw
1229 ;; Add roots to GC.
1230 (set-uword! 4 DT_GUILE_GC_ROOT)
1231 (set-label! 5 '.data)
1232 (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
1233 (set-uword! 7 (bytevector-length (linker-object-bv rw)))
1234 (cond
1235 (rw-init
1236 (set-uword! 8 DT_INIT) ; constants
1237 (set-label! 9 rw-init)
1238 (set-uword! 10 DT_NULL)
1239 (set-uword! 11 0))
1240 (else
1241 (set-uword! 8 DT_NULL)
1242 (set-uword! 9 0))))
1243 (else
1244 (set-uword! 4 DT_NULL)
1245 (set-uword! 5 0)))
1246 (make-object asm '.dynamic bv relocs '()
1247 #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
1248 (case (asm-word-size asm)
1249 ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
1250 ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
1251 (else (error "bad word size" asm))))
1252
1253(define (link-shstrtab asm)
1254 "Link the string table for the section headers."
1255 (intern-section-name! asm ".shstrtab")
1256 (make-object asm '.shstrtab
1257 (link-string-table! (asm-shstrtab asm))
1258 '() '()
1259 #:type SHT_STRTAB #:flags 0))
1260
1261(define (link-symtab text-section asm)
1262 (let* ((endianness (asm-endianness asm))
1263 (word-size (asm-word-size asm))
1264 (size (elf-symbol-len word-size))
1265 (meta (reverse (asm-meta asm)))
1266 (n (length meta))
1267 (strtab (make-string-table))
1268 (bv (make-bytevector (* n size) 0)))
1269 (define (intern-string! name)
2a4daafd 1270 (string-table-intern! strtab (if name (symbol->string name) "")))
e78991aa
AW
1271 (for-each
1272 (lambda (meta n)
1273 (let ((name (intern-string! (meta-name meta))))
1274 (write-elf-symbol bv (* n size) endianness word-size
1275 (make-elf-symbol
1276 #:name name
1277 ;; Symbol value and size are measured in
1278 ;; bytes, not u32s.
1279 #:value (* 4 (meta-low-pc meta))
1280 #:size (* 4 (- (meta-high-pc meta)
1281 (meta-low-pc meta)))
1282 #:type STT_FUNC
1283 #:visibility STV_HIDDEN
1284 #:shndx (elf-section-index text-section)))))
1285 meta (iota n))
1286 (let ((strtab (make-object asm '.strtab
1287 (link-string-table! strtab)
1288 '() '()
1289 #:type SHT_STRTAB #:flags 0)))
1290 (values (make-object asm '.symtab
1291 bv
1292 '() '()
1293 #:type SHT_SYMTAB #:flags 0 #:entsize size
1294 #:link (elf-section-index
1295 (linker-object-section strtab)))
1296 strtab))))
1297
b2006c19
AW
1298;;; The .guile.arities section describes the arities that a function can
1299;;; have. It is in two parts: a sorted array of headers describing
1300;;; basic arities, and an array of links out to a string table (and in
1301;;; the case of keyword arguments, to the data section) for argument
1302;;; names. The whole thing is prefixed by a uint32 indicating the
1303;;; offset of the end of the headers array.
1304;;;
1305;;; The arity headers array is a packed array of structures of the form:
1306;;;
1307;;; struct arity_header {
1308;;; uint32_t low_pc;
1309;;; uint32_t high_pc;
1310;;; uint32_t offset;
1311;;; uint32_t flags;
1312;;; uint32_t nreq;
1313;;; uint32_t nopt;
1314;;; }
1315;;;
1316;;; All of the offsets and addresses are 32 bits. We can expand in the
1317;;; future to use 64-bit offsets if appropriate, but there are other
1318;;; aspects of RTL that constrain us to a total image that fits in 32
1319;;; bits, so for the moment we'll simplify the problem space.
1320;;;
1321;;; The following flags values are defined:
1322;;;
1323;;; #x1: has-rest?
1324;;; #x2: allow-other-keys?
1325;;; #x4: has-keyword-args?
1326;;; #x8: is-case-lambda?
1327;;;
1328;;; Functions with a single arity specify their number of required and
1329;;; optional arguments in nreq and nopt, and do not have the
1330;;; is-case-lambda? flag set. Their "offset" member links to an array
1331;;; of pointers into the associated .guile.arities.strtab string table,
1332;;; identifying the argument names. This offset is relative to the
1333;;; start of the .guile.arities section. Links for required arguments
1334;;; are first, in order, as uint32 values. Next follow the optionals,
1335;;; then the rest link if has-rest? is set, then a link to the "keyword
1336;;; indices" literal if has-keyword-args? is set. Unlike the other
1337;;; links, the kw-indices link points into the data section, and is
1338;;; relative to the ELF image as a whole.
1339;;;
1340;;; Functions with no arities have no arities information present in the
1341;;; .guile.arities section.
1342;;;
1343;;; Functions with multiple arities are preceded by a header with
1344;;; is-case-lambda? set. All other fields are 0, except low-pc and
1345;;; high-pc which should be the bounds of the whole function. Headers
1346;;; for the individual arities follow. In this way the whole headers
1347;;; array is sorted in increasing low-pc order, and case-lambda clauses
1348;;; are contained within the [low-pc, high-pc] of the case-lambda
1349;;; header.
1350
1351;; Length of the prefix to the arities section, in bytes.
1352(define arities-prefix-len 4)
1353
1354;; Length of an arity header, in bytes.
1355(define arity-header-len (* 6 4))
1356
1357;; The offset of "offset" within arity header, in bytes.
1358(define arity-header-offset-offset (* 2 4))
1359
1360(define-syntax-rule (pack-arity-flags has-rest? allow-other-keys?
1361 has-keyword-args? is-case-lambda?)
1362 (logior (if has-rest? (ash 1 0) 0)
1363 (if allow-other-keys? (ash 1 1) 0)
1364 (if has-keyword-args? (ash 1 2) 0)
1365 (if is-case-lambda? (ash 1 3) 0)))
1366
1367(define (meta-arities-size meta)
1368 (define (lambda-size arity)
1369 (+ arity-header-len
1370 (* 4 ;; name pointers
1371 (+ (length (arity-req arity))
1372 (length (arity-opt arity))
1373 (if (arity-rest arity) 1 0)
1374 (if (pair? (arity-kw-indices arity)) 1 0)))))
1375 (define (case-lambda-size arities)
1376 (fold +
1377 arity-header-len ;; case-lambda header
1378 (map lambda-size arities))) ;; the cases
1379 (match (meta-arities meta)
1380 (() 0)
1381 ((arity) (lambda-size arity))
1382 (arities (case-lambda-size arities))))
1383
1384(define (write-arity-headers metas bv endianness)
1385 (define (write-arity-header* pos low-pc high-pc flags nreq nopt)
0e3a59f7
AW
1386 (bytevector-u32-set! bv pos (* low-pc 4) endianness)
1387 (bytevector-u32-set! bv (+ pos 4) (* high-pc 4) endianness)
b2006c19
AW
1388 (bytevector-u32-set! bv (+ pos 8) 0 endianness) ; offset
1389 (bytevector-u32-set! bv (+ pos 12) flags endianness)
1390 (bytevector-u32-set! bv (+ pos 16) nreq endianness)
1391 (bytevector-u32-set! bv (+ pos 20) nopt endianness))
1392 (define (write-arity-header pos arity)
1393 (write-arity-header* pos (arity-low-pc arity)
1394 (arity-high-pc arity)
1395 (pack-arity-flags (arity-rest arity)
1396 (arity-allow-other-keys? arity)
1397 (pair? (arity-kw-indices arity))
1398 #f)
1399 (length (arity-req arity))
1400 (length (arity-opt arity))))
1401 (let lp ((metas metas) (pos arities-prefix-len) (offsets '()))
1402 (match metas
1403 (()
1404 ;; Fill in the prefix.
1405 (bytevector-u32-set! bv 0 pos endianness)
1406 (values pos (reverse offsets)))
1407 ((meta . metas)
1408 (match (meta-arities meta)
1409 (() (lp metas pos offsets))
1410 ((arity)
1411 (write-arity-header pos arity)
1412 (lp metas
1413 (+ pos arity-header-len)
1414 (acons arity (+ pos arity-header-offset-offset) offsets)))
1415 (arities
1416 ;; Write a case-lambda header, then individual arities.
1417 ;; The case-lambda header's offset link is 0.
1418 (write-arity-header* pos (meta-low-pc meta) (meta-high-pc meta)
1419 (pack-arity-flags #f #f #f #t) 0 0)
1420 (let lp* ((arities arities) (pos (+ pos arity-header-len))
1421 (offsets offsets))
1422 (match arities
1423 (() (lp metas pos offsets))
1424 ((arity . arities)
1425 (write-arity-header pos arity)
1426 (lp* arities
1427 (+ pos arity-header-len)
1428 (acons arity
1429 (+ pos arity-header-offset-offset)
1430 offsets)))))))))))
1431
1432(define (write-arity-links asm bv pos arity-offset-pairs strtab)
1433 (define (write-symbol sym pos)
1434 (bytevector-u32-set! bv pos
1435 (string-table-intern! strtab (symbol->string sym))
1436 (asm-endianness asm))
1437 (+ pos 4))
1438 (define (write-kw-indices pos kw-indices)
1439 ;; FIXME: Assert that kw-indices is already interned.
1440 (make-linker-reloc 'abs32/1 pos 0
1441 (intern-constant asm kw-indices)))
1442 (let lp ((pos pos) (pairs arity-offset-pairs) (relocs '()))
1443 (match pairs
1444 (()
1445 (unless (= pos (bytevector-length bv))
1446 (error "expected to fully fill the bytevector"
1447 pos (bytevector-length bv)))
1448 relocs)
1449 (((arity . offset) . pairs)
1450 (bytevector-u32-set! bv offset pos (asm-endianness asm))
1451 (let ((pos (fold write-symbol
1452 pos
1453 (append (arity-req arity)
1454 (arity-opt arity)
1455 (cond
1456 ((arity-rest arity) => list)
1457 (else '()))))))
1458 (match (arity-kw-indices arity)
1459 (() (lp pos pairs relocs))
1460 (kw-indices
1461 (lp (+ pos 4)
1462 pairs
1463 (cons (write-kw-indices pos kw-indices) relocs)))))))))
1464
1465(define (link-arities asm)
1466 (let* ((endianness (asm-endianness asm))
1467 (metas (reverse (asm-meta asm)))
1468 (size (fold (lambda (meta size)
1469 (+ size (meta-arities-size meta)))
1470 arities-prefix-len
1471 metas))
1472 (strtab (make-string-table))
1473 (bv (make-bytevector size 0)))
1474 (let ((kw-indices-relocs
1475 (call-with-values
1476 (lambda ()
1477 (write-arity-headers metas bv endianness))
1478 (lambda (pos arity-offset-pairs)
1479 (write-arity-links asm bv pos arity-offset-pairs strtab)))))
1480 (let ((strtab (make-object asm '.guile.arities.strtab
1481 (link-string-table! strtab)
1482 '() '()
1483 #:type SHT_STRTAB #:flags 0)))
1484 (values (make-object asm '.guile.arities
1485 bv
1486 kw-indices-relocs '()
1487 #:type SHT_PROGBITS #:flags 0
1488 #:link (elf-section-index
1489 (linker-object-section strtab)))
1490 strtab)))))
1491
9128b1a1
AW
1492;;;
1493;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
1494;;; values. Pc and str are both 32 bits wide. (Either could change to
1495;;; 64 bits if appropriate in the future.) Pc is the address of the
0a1d52ac
AW
1496;;; entry to a program, relative to the start of the text section, in
1497;;; bytes, and str is an index into the associated .guile.docstrs.strtab
1498;;; string table section.
9128b1a1
AW
1499;;;
1500
1501;; The size of a docstrs entry, in bytes.
1502(define docstr-size 8)
1503
1504(define (link-docstrs asm)
1505 (define (find-docstrings)
1506 (filter-map (lambda (meta)
1507 (define (is-documentation? pair)
1508 (eq? (car pair) 'documentation))
1509 (let* ((props (meta-properties meta))
1510 (tail (find-tail is-documentation? props)))
1511 (and tail
1512 (not (find-tail is-documentation? (cdr tail)))
1513 (string? (cdar tail))
0a1d52ac 1514 (cons (* 4 (meta-low-pc meta)) (cdar tail)))))
9128b1a1
AW
1515 (reverse (asm-meta asm))))
1516 (let* ((endianness (asm-endianness asm))
1517 (docstrings (find-docstrings))
1518 (strtab (make-string-table))
1519 (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
1520 (fold (lambda (pair pos)
1521 (match pair
1522 ((pc . string)
1523 (bytevector-u32-set! bv pos pc endianness)
1524 (bytevector-u32-set! bv (+ pos 4)
1525 (string-table-intern! strtab string)
1526 endianness)
1527 (+ pos docstr-size))))
1528 0
1529 docstrings)
1530 (let ((strtab (make-object asm '.guile.docstrs.strtab
1531 (link-string-table! strtab)
1532 '() '()
1533 #:type SHT_STRTAB #:flags 0)))
1534 (values (make-object asm '.guile.docstrs
1535 bv
1536 '() '()
1537 #:type SHT_PROGBITS #:flags 0
1538 #:link (elf-section-index
1539 (linker-object-section strtab)))
1540 strtab))))
1541
c4c098e3
AW
1542;;;
1543;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
1544;;; values. Pc and addr are both 32 bits wide. (Either could change to
1545;;; 64 bits if appropriate in the future.) Pc is the address of the
1546;;; entry to a program, relative to the start of the text section, and
1547;;; addr is the address of the associated properties alist, relative to
1548;;; the start of the ELF image.
1549;;;
1550;;; Since procedure properties are stored in the data sections, we need
1551;;; to link the procedures property section first. (Note that this
1552;;; constraint does not apply to the arities section, which may
1553;;; reference the data sections via the kw-indices literal, because
1554;;; assembling the text section already makes sure that the kw-indices
1555;;; are interned.)
1556;;;
1557
1558;; The size of a procprops entry, in bytes.
1559(define procprops-size 8)
1560
1561(define (link-procprops asm)
1562 (define (assoc-remove-one alist key value-pred)
1563 (match alist
1564 (() '())
1565 ((((? (lambda (x) (eq? x key))) . value) . alist)
1566 (if (value-pred value)
1567 alist
1568 (acons key value alist)))
1569 (((k . v) . alist)
1570 (acons k v (assoc-remove-one alist key value-pred)))))
1571 (define (props-without-name-or-docstring meta)
1572 (assoc-remove-one
1573 (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
1574 'documentation
1575 string?))
1576 (define (find-procprops)
1577 (filter-map (lambda (meta)
1578 (let ((props (props-without-name-or-docstring meta)))
1579 (and (pair? props)
1580 (cons (meta-low-pc meta) props))))
1581 (reverse (asm-meta asm))))
1582 (let* ((endianness (asm-endianness asm))
1583 (procprops (find-procprops))
1584 (bv (make-bytevector (* (length procprops) procprops-size) 0)))
1585 (let lp ((procprops procprops) (pos 0) (relocs '()))
1586 (match procprops
1587 (()
1588 (make-object asm '.guile.procprops
1589 bv
1590 relocs '()
1591 #:type SHT_PROGBITS #:flags 0))
1592 (((pc . props) . procprops)
1593 (bytevector-u32-set! bv pos pc endianness)
1594 (lp procprops
1595 (+ pos procprops-size)
1596 (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
1597 (intern-constant asm props))
1598 relocs)))))))
1599
a862d8c1
AW
1600;;;
1601;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc
1602;;; sections provide line number and local variable liveness
1603;;; information. Their format is defined by the DWARF
1604;;; specifications.
1605;;;
1606
1607(define (asm-language asm)
1608 ;; FIXME: Plumb language through to the assembler.
1609 'scheme)
1610
0a7340ac 1611;; -> 5 values: .debug_info, .debug_abbrev, .debug_str, .debug_loc, .debug_lines
a862d8c1 1612(define (link-debug asm)
0a7340ac
AW
1613 (define (put-s8 port val)
1614 (let ((bv (make-bytevector 1)))
1615 (bytevector-s8-set! bv 0 val)
1616 (put-bytevector port bv)))
1617
a862d8c1
AW
1618 (define (put-u16 port val)
1619 (let ((bv (make-bytevector 2)))
1620 (bytevector-u16-set! bv 0 val (asm-endianness asm))
1621 (put-bytevector port bv)))
1622
1623 (define (put-u32 port val)
1624 (let ((bv (make-bytevector 4)))
1625 (bytevector-u32-set! bv 0 val (asm-endianness asm))
1626 (put-bytevector port bv)))
1627
1628 (define (put-u64 port val)
1629 (let ((bv (make-bytevector 8)))
1630 (bytevector-u64-set! bv 0 val (asm-endianness asm))
1631 (put-bytevector port bv)))
1632
1633 (define (put-uleb128 port val)
1634 (let lp ((val val))
1635 (let ((next (ash val -7)))
1636 (if (zero? next)
1637 (put-u8 port val)
1638 (begin
1639 (put-u8 port (logior #x80 (logand val #x7f)))
1640 (lp next))))))
1641
d56ab5a9
AW
1642 (define (put-sleb128 port val)
1643 (let lp ((val val))
31602aa0 1644 (if (<= 0 (+ val 64) 127)
d56ab5a9
AW
1645 (put-u8 port (logand val #x7f))
1646 (begin
1647 (put-u8 port (logior #x80 (logand val #x7f)))
1648 (lp (ash val -7))))))
1649
0a7340ac
AW
1650 (define (port-position port)
1651 (seek port 0 SEEK_CUR))
1652
a862d8c1
AW
1653 (define (meta->subprogram-die meta)
1654 `(subprogram
1655 (@ ,@(cond
1656 ((meta-name meta)
1657 => (lambda (name) `((name ,(symbol->string name)))))
1658 (else
1659 '()))
1660 (low-pc ,(meta-label meta))
1661 (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta)))))))
1662
1663 (define (make-compile-unit-die asm)
1664 `(compile-unit
1665 (@ (producer ,(string-append "Guile " (version)))
1666 (language ,(asm-language asm))
1667 (low-pc .rtl-text)
0a7340ac
AW
1668 (high-pc ,(* 4 (asm-pos asm)))
1669 (stmt-list 0))
a862d8c1
AW
1670 ,@(map meta->subprogram-die (reverse (asm-meta asm)))))
1671
1672 (let-values (((die-port get-die-bv) (open-bytevector-output-port))
1673 ((die-relocs) '())
1674 ((abbrev-port get-abbrev-bv) (open-bytevector-output-port))
1675 ;; (tag has-kids? attrs forms) -> code
1676 ((abbrevs) vlist-null)
0a7340ac
AW
1677 ((strtab) (make-string-table))
1678 ((line-port get-line-bv) (open-bytevector-output-port))
1679 ((line-relocs) '())
1680 ;; file -> code
1681 ((files) vlist-null))
a862d8c1
AW
1682
1683 (define (write-abbrev code tag has-children? attrs forms)
1684 (put-uleb128 abbrev-port code)
1685 (put-uleb128 abbrev-port (tag-name->code tag))
1686 (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no)))
1687 (for-each (lambda (attr form)
1688 (put-uleb128 abbrev-port (attribute-name->code attr))
1689 (put-uleb128 abbrev-port (form-name->code form)))
1690 attrs forms)
1691 (put-uleb128 abbrev-port 0)
1692 (put-uleb128 abbrev-port 0))
1693
1694 (define (intern-abbrev tag has-children? attrs forms)
1695 (let ((key (list tag has-children? attrs forms)))
1696 (match (vhash-assoc key abbrevs)
1697 ((_ . code) code)
0a7340ac 1698 (#f (let ((code (1+ (vlist-length abbrevs))))
a862d8c1
AW
1699 (set! abbrevs (vhash-cons key code abbrevs))
1700 (write-abbrev code tag has-children? attrs forms)
1701 code)))))
1702
0a7340ac
AW
1703 (define (intern-file file)
1704 (match (vhash-assoc file files)
1705 ((_ . code) code)
1706 (#f (let ((code (1+ (vlist-length files))))
1707 (set! files (vhash-cons file code files))
1708 code))))
1709
1710 (define (write-sources)
d56ab5a9
AW
1711 ;; Choose line base and line range values that will allow for an
1712 ;; address advance range of 16 words. The special opcode range is
1713 ;; from 10 to 255, so 246 values.
1714 (define base -4)
1715 (define range 15)
1716
0a7340ac
AW
1717 (let lp ((sources (asm-sources asm)) (out '()))
1718 (match sources
d56ab5a9 1719 (((pc . s) . sources)
0a7340ac
AW
1720 (let ((file (assq-ref s 'filename))
1721 (line (assq-ref s 'line))
1722 (col (assq-ref s 'column)))
d56ab5a9
AW
1723 (lp sources
1724 ;; Guile line and column numbers are 0-indexed, but
1725 ;; they are 1-indexed for DWARF.
1726 (cons (list pc
1727 (if file (intern-file file) 0)
1728 (if line (1+ line))
1729 (if col (1+ col)))
1730 out))))
0a7340ac
AW
1731 (()
1732 ;; Compilation unit header for .debug_line. We write in
1733 ;; DWARF 2 format because more tools understand it than DWARF
1734 ;; 4, which incompatibly adds another field to this header.
1735
1736 (put-u32 line-port 0) ; Length; will patch later.
1737 (put-u16 line-port 2) ; DWARF 2 format.
1738 (put-u32 line-port 0) ; Prologue length; will patch later.
1739 (put-u8 line-port 4) ; Minimum instruction length: 4 bytes.
1740 (put-u8 line-port 1) ; Default is-stmt: true.
1741
d56ab5a9
AW
1742 (put-s8 line-port base) ; Line base. See the DWARF standard.
1743 (put-u8 line-port range) ; Line range. See the DWARF standard.
0a7340ac
AW
1744 (put-u8 line-port 10) ; Opcode base: the first "special" opcode.
1745
1746 ;; A table of the number of uleb128 arguments taken by each
1747 ;; of the standard opcodes.
1748 (put-u8 line-port 0) ; 1: copy
1749 (put-u8 line-port 1) ; 2: advance-pc
1750 (put-u8 line-port 1) ; 3: advance-line
1751 (put-u8 line-port 1) ; 4: set-file
1752 (put-u8 line-port 1) ; 5: set-column
1753 (put-u8 line-port 0) ; 6: negate-stmt
1754 (put-u8 line-port 0) ; 7: set-basic-block
1755 (put-u8 line-port 0) ; 8: const-add-pc
1756 (put-u8 line-port 1) ; 9: fixed-advance-pc
1757
1758 ;; Include directories, as a zero-terminated sequence of
1759 ;; nul-terminated strings. Nothing, for the moment.
1760 (put-u8 line-port 0)
1761
1762 ;; File table. For each file that contributes to this
1763 ;; compilation unit, a nul-terminated file name string, and a
1764 ;; uleb128 for each of directory the file was found in, the
1765 ;; modification time, and the file's size in bytes. We pass
1766 ;; zero for the latter three fields.
32ca15d7
AW
1767 (vlist-fold-right
1768 (lambda (pair seed)
1769 (match pair
1770 ((file . code)
1771 (put-bytevector line-port (string->utf8 file))
1772 (put-u8 line-port 0)
1773 (put-uleb128 line-port 0) ; directory
1774 (put-uleb128 line-port 0) ; mtime
1775 (put-uleb128 line-port 0))) ; size
1776 seed)
1777 #f
1778 files)
0a7340ac
AW
1779 (put-u8 line-port 0) ; 0 byte terminating file list.
1780
1781 ;; Patch prologue length.
1782 (let ((offset (port-position line-port)))
1783 (seek line-port 6 SEEK_SET)
1784 (put-u32 line-port (- offset 10))
1785 (seek line-port offset SEEK_SET))
1786
d56ab5a9
AW
1787 ;; Now write the statement program.
1788 (let ()
1789 (define (extended-op opcode payload-len)
6b71a767 1790 (put-u8 line-port 0) ; extended op
d56ab5a9
AW
1791 (put-uleb128 line-port (1+ payload-len)) ; payload-len + opcode
1792 (put-uleb128 line-port opcode))
1793 (define (set-address sym)
1794 (define (add-reloc! kind)
1795 (set! line-relocs
1796 (cons (make-linker-reloc kind
1797 (port-position line-port)
1798 0
1799 sym)
1800 line-relocs)))
1801 (match (asm-word-size asm)
1802 (4
1803 (extended-op 2 4)
1804 (add-reloc! 'abs32/1)
1805 (put-u32 line-port 0))
1806 (8
1807 (extended-op 2 8)
1808 (add-reloc! 'abs64/1)
1809 (put-u64 line-port 0))))
1810 (define (end-sequence pc)
1811 (let ((pc-inc (- (asm-pos asm) pc)))
6b71a767 1812 (put-u8 line-port 2) ; advance-pc
d56ab5a9
AW
1813 (put-uleb128 line-port pc-inc))
1814 (extended-op 1 0))
1815 (define (advance-pc pc-inc line-inc)
1816 (let ((spec (+ (- line-inc base) (* pc-inc range) 10)))
1817 (cond
1818 ((or (< line-inc base) (>= line-inc (+ base range)))
1819 (advance-line line-inc)
1820 (advance-pc pc-inc 0))
1821 ((<= spec 255)
1822 (put-u8 line-port spec))
1823 ((< spec 500)
1824 (put-u8 line-port 8) ; const-advance-pc
1825 (advance-pc (- pc-inc (floor/ (- 255 10) range))
1826 line-inc))
1827 (else
1828 (put-u8 line-port 2) ; advance-pc
1829 (put-uleb128 line-port pc-inc)
1830 (advance-pc 0 line-inc)))))
1831 (define (advance-line inc)
1832 (put-u8 line-port 3)
1833 (put-sleb128 line-port inc))
1834 (define (set-file file)
1835 (put-u8 line-port 4)
1836 (put-uleb128 line-port file))
1837 (define (set-column col)
1838 (put-u8 line-port 5)
1839 (put-uleb128 line-port col))
1840
1841 (set-address '.rtl-text)
1842
1843 (let lp ((in out) (pc 0) (file 1) (line 1) (col 0))
1844 (match in
6b71a767
AW
1845 (()
1846 (when (null? out)
1847 ;; There was no source info in the first place. Set
1848 ;; file register to 0 before adding final row.
1849 (set-file 0))
1850 (end-sequence pc))
d56ab5a9
AW
1851 (((pc* file* line* col*) . in*)
1852 (cond
1853 ((and (eqv? file file*) (eqv? line line*) (eqv? col col*))
1854 (lp in* pc file line col))
1855 (else
1856 (unless (eqv? col col*)
1857 (set-column col*))
1858 (unless (eqv? file file*)
1859 (set-file file*))
1860 (advance-pc (- pc* pc) (- line* line))
1861 (lp in* pc* file* line* col*)))))))))))
0a7340ac 1862
a862d8c1
AW
1863 (define (compute-code attr val)
1864 (match attr
1865 ('name (string-table-intern! strtab val))
1866 ('low-pc val)
1867 ('high-pc val)
1868 ('producer (string-table-intern! strtab val))
0a7340ac
AW
1869 ('language (language-name->code val))
1870 ('stmt-list val)))
a862d8c1
AW
1871
1872 (define (exact-integer? val)
1873 (and (number? val) (integer? val) (exact? val)))
1874
1875 (define (choose-form attr val code)
1876 (cond
6371e368 1877 ((string? val) 'strp)
0a7340ac 1878 ((eq? attr 'stmt-list) 'sec-offset)
a862d8c1
AW
1879 ((exact-integer? code)
1880 (cond
1881 ((< code 0) 'sleb128)
1882 ((<= code #xff) 'data1)
1883 ((<= code #xffff) 'data2)
1884 ((<= code #xffffffff) 'data4)
1885 ((<= code #xffffffffffffffff) 'data8)
1886 (else 'uleb128)))
1887 ((symbol? val) 'addr)
1888 (else (error "unhandled case" attr val code))))
1889
1890 (define (add-die-relocation! kind sym)
1891 (set! die-relocs
0a7340ac 1892 (cons (make-linker-reloc kind (port-position die-port) 0 sym)
a862d8c1
AW
1893 die-relocs)))
1894
1895 (define (write-value code form)
1896 (match form
1897 ('data1 (put-u8 die-port code))
1898 ('data2 (put-u16 die-port code))
1899 ('data4 (put-u32 die-port code))
1900 ('data8 (put-u64 die-port code))
1901 ('uleb128 (put-uleb128 die-port code))
d56ab5a9 1902 ('sleb128 (put-sleb128 die-port code))
a862d8c1
AW
1903 ('addr
1904 (match (asm-word-size asm)
1905 (4
1906 (add-die-relocation! 'abs32/1 code)
1907 (put-u32 die-port 0))
1908 (8
1909 (add-die-relocation! 'abs64/1 code)
1910 (put-u64 die-port 0))))
0a7340ac 1911 ('sec-offset (put-u32 die-port code))
6371e368 1912 ('strp (put-u32 die-port code))))
a862d8c1
AW
1913
1914 (define (write-die die)
1915 (match die
1916 ((tag ('@ (attrs vals) ...) children ...)
1917 (let* ((codes (map compute-code attrs vals))
1918 (forms (map choose-form attrs vals codes))
1919 (has-children? (not (null? children)))
1920 (abbrev-code (intern-abbrev tag has-children? attrs forms)))
1921 (put-uleb128 die-port abbrev-code)
1922 (for-each write-value codes forms)
1923 (when has-children?
1924 (for-each write-die children)
1925 (put-uleb128 die-port 0))))))
1926
1927 ;; Compilation unit header.
1928 (put-u32 die-port 0) ; Length; will patch later.
1929 (put-u16 die-port 4) ; DWARF 4.
1930 (put-u32 die-port 0) ; Abbrevs offset.
1931 (put-u8 die-port (asm-word-size asm)) ; Address size.
1932
1933 (write-die (make-compile-unit-die asm))
1934
1935 ;; Terminate the abbrevs list.
1936 (put-uleb128 abbrev-port 0)
1937
0a7340ac
AW
1938 (write-sources)
1939
a862d8c1
AW
1940 (values (let ((bv (get-die-bv)))
1941 ;; Patch DWARF32 length.
1942 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
1943 (asm-endianness asm))
1944 (make-object asm '.debug_info bv die-relocs '()
1945 #:type SHT_PROGBITS #:flags 0))
1946 (make-object asm '.debug_abbrev (get-abbrev-bv) '() '()
1947 #:type SHT_PROGBITS #:flags 0)
1948 (make-object asm '.debug_str (link-string-table! strtab) '() '()
1949 #:type SHT_PROGBITS #:flags 0)
1950 (make-object asm '.debug_loc #vu8() '() '()
0a7340ac
AW
1951 #:type SHT_PROGBITS #:flags 0)
1952 (let ((bv (get-line-bv)))
1953 ;; Patch DWARF32 length.
1954 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
1955 (asm-endianness asm))
1956 (make-object asm '.debug_line bv line-relocs '()
1957 #:type SHT_PROGBITS #:flags 0)))))
a862d8c1 1958
e78991aa 1959(define (link-objects asm)
c4c098e3
AW
1960 (let*-values (;; Link procprops before constants, because it probably
1961 ;; interns more constants.
1962 ((procprops) (link-procprops asm))
1963 ((ro rw rw-init) (link-constants asm))
e78991aa
AW
1964 ;; Link text object after constants, so that the
1965 ;; constants initializer gets included.
1966 ((text) (link-text-object asm))
1967 ((dt) (link-dynamic-section asm text rw rw-init))
1968 ((symtab strtab) (link-symtab (linker-object-section text) asm))
b2006c19 1969 ((arities arities-strtab) (link-arities asm))
9128b1a1 1970 ((docstrs docstrs-strtab) (link-docstrs asm))
0a7340ac 1971 ((dinfo dabbrev dstrtab dloc dline) (link-debug asm))
e78991aa
AW
1972 ;; This needs to be linked last, because linking other
1973 ;; sections adds entries to the string table.
1974 ((shstrtab) (link-shstrtab asm)))
b2006c19
AW
1975 (filter identity
1976 (list text ro rw dt symtab strtab arities arities-strtab
a862d8c1 1977 docstrs docstrs-strtab procprops
0a7340ac 1978 dinfo dabbrev dstrtab dloc dline
a862d8c1 1979 shstrtab))))
e78991aa
AW
1980
1981
1982\f
1983
1984;;;
1985;;; High-level public interfaces.
1986;;;
1987
1988(define* (link-assembly asm #:key (page-aligned? #t))
1989 "Produce an ELF image from the code and data emitted into @var{asm}.
1990The result is a bytevector, by default linked so that read-only and
1991writable data are on separate pages. Pass @code{#:page-aligned? #f} to
1992disable this behavior."
1993 (link-elf (link-objects asm) #:page-aligned? page-aligned?))
1994
1995(define (assemble-program instructions)
1996 "Take the sequence of instructions @var{instructions}, assemble them
1997into RTL code, link an image, and load that image from memory. Returns
1998a procedure."
1999 (let ((asm (make-assembler)))
2000 (emit-text asm instructions)
2001 (load-thunk-from-memory (link-assembly asm #:page-aligned? #f))))