All instruction pointers are now scm_t_uint32*
[bpt/guile.git] / module / system / vm / assembler.scm
CommitLineData
e78991aa
AW
1;;; Guile RTL assembler
2
3;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
4;;;
5;;; This library is free software; you can redistribute it and/or
6;;; modify it under the terms of the GNU Lesser General Public
7;;; License as published by the Free Software Foundation; either
8;;; version 3 of the License, or (at your option) any later version.
9;;;
10;;; This library is distributed in the hope that it will be useful,
11;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13;;; Lesser General Public License for more details.
14;;;
15;;; You should have received a copy of the GNU Lesser General Public
16;;; License along with this library; if not, write to the Free Software
17;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19;;; Commentary:
20;;;
21;;; This module implements an assembler that creates an ELF image from
22;;; RTL assembly and macro-assembly. The input can be given in
23;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
24;;; procedural interface, the emit-OP procedures, but that is not
25;;; currently exported.
26;;;
27;;; "Primitive instructions" correspond to RTL VM operations.
28;;; Assemblers for primitive instructions are generated programmatically
29;;; from (rtl-instruction-list), which itself is derived from the VM
30;;; sources. There are also "macro-instructions" like "label" or
31;;; "load-constant" that expand to 0 or more primitive instructions.
32;;;
33;;; The assembler also handles some higher-level tasks, like creating
34;;; the symbol table, other metadata sections, creating a constant table
35;;; for the whole compilation unit, and writing the dynamic section of
36;;; the ELF file along with the appropriate initialization routines.
37;;;
38;;; Most compilers will want to use the trio of make-assembler,
39;;; emit-text, and link-assembly. That will result in the creation of
40;;; an ELF image as a bytevector, which can then be loaded using
41;;; load-thunk-from-memory, or written to disk as a .go file.
42;;;
43;;; Code:
44
45(define-module (system vm assembler)
46 #:use-module (system base target)
47 #:use-module (system vm instruction)
a862d8c1 48 #:use-module (system vm dwarf)
e78991aa
AW
49 #:use-module (system vm elf)
50 #:use-module (system vm linker)
e78991aa 51 #:use-module (rnrs bytevectors)
a862d8c1 52 #:use-module (ice-9 binary-ports)
e78991aa
AW
53 #:use-module (ice-9 vlist)
54 #:use-module (ice-9 match)
55 #:use-module (srfi srfi-1)
56 #:use-module (srfi srfi-4)
57 #:use-module (srfi srfi-9)
58 #:use-module (srfi srfi-11)
59 #:export (make-assembler
60 emit-text
4dfae1bf 61 link-assembly))
e78991aa
AW
62
63
64\f
65
66;;; RTL code consists of 32-bit units, often subdivided in some way.
67;;; These helpers create one 32-bit unit from multiple components.
68
cb8054c7
AW
69(define-inlinable (pack-u8-u24 x y)
70 (unless (<= 0 x 255)
71 (error "out of range" x))
e78991aa
AW
72 (logior x (ash y 8)))
73
cb8054c7
AW
74(define-inlinable (pack-u8-s24 x y)
75 (unless (<= 0 x 255)
76 (error "out of range" x))
e78991aa
AW
77 (logior x (ash (cond
78 ((< 0 (- y) #x800000)
79 (+ y #x1000000))
80 ((<= 0 y #xffffff)
81 y)
82 (else (error "out of range" y)))
83 8)))
84
cb8054c7
AW
85(define-inlinable (pack-u1-u7-u24 x y z)
86 (unless (<= 0 x 1)
87 (error "out of range" x))
88 (unless (<= 0 y 127)
89 (error "out of range" y))
e78991aa
AW
90 (logior x (ash y 1) (ash z 8)))
91
cb8054c7
AW
92(define-inlinable (pack-u8-u12-u12 x y z)
93 (unless (<= 0 x 255)
94 (error "out of range" x))
95 (unless (<= 0 y 4095)
96 (error "out of range" y))
e78991aa
AW
97 (logior x (ash y 8) (ash z 20)))
98
cb8054c7
AW
99(define-inlinable (pack-u8-u8-u16 x y z)
100 (unless (<= 0 x 255)
101 (error "out of range" x))
102 (unless (<= 0 y 255)
103 (error "out of range" y))
e78991aa
AW
104 (logior x (ash y 8) (ash z 16)))
105
cb8054c7
AW
106(define-inlinable (pack-u8-u8-u8-u8 x y z w)
107 (unless (<= 0 x 255)
108 (error "out of range" x))
109 (unless (<= 0 y 255)
110 (error "out of range" y))
111 (unless (<= 0 z 255)
112 (error "out of range" z))
e78991aa
AW
113 (logior x (ash y 8) (ash z 16) (ash w 24)))
114
07c05279
AW
115(define-syntax pack-flags
116 (syntax-rules ()
117 ;; Add clauses as needed.
118 ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
119 (if f2 (ash 2 0) 0)))))
120
e78991aa
AW
121;;; Helpers to read and write 32-bit units in a buffer.
122
123(define-syntax-rule (u32-ref buf n)
124 (bytevector-u32-native-ref buf (* n 4)))
125
126(define-syntax-rule (u32-set! buf n val)
127 (bytevector-u32-native-set! buf (* n 4) val))
128
129(define-syntax-rule (s32-ref buf n)
130 (bytevector-s32-native-ref buf (* n 4)))
131
132(define-syntax-rule (s32-set! buf n val)
133 (bytevector-s32-native-set! buf (* n 4) val))
134
135
136\f
137
138;;; A <meta> entry collects metadata for one procedure. Procedures are
139;;; written as contiguous ranges of RTL code.
140;;;
2a4daafd
AW
141(define-syntax-rule (assert-match arg pattern kind)
142 (let ((x arg))
143 (unless (match x (pattern #t) (_ #f))
144 (error (string-append "expected " kind) x))))
145
e78991aa 146(define-record-type <meta>
3185c907 147 (%make-meta label properties low-pc high-pc arities)
e78991aa 148 meta?
2a4daafd
AW
149 (label meta-label)
150 (properties meta-properties set-meta-properties!)
e78991aa 151 (low-pc meta-low-pc)
3185c907
AW
152 (high-pc meta-high-pc set-meta-high-pc!)
153 (arities meta-arities set-meta-arities!))
e78991aa 154
2a4daafd
AW
155(define (make-meta label properties low-pc)
156 (assert-match label (? symbol?) "symbol")
157 (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
3185c907 158 (%make-meta label properties low-pc #f '()))
2a4daafd
AW
159
160(define (meta-name meta)
161 (assq-ref (meta-properties meta) 'name))
162
3185c907
AW
163;; Metadata for one <lambda-case>.
164(define-record-type <arity>
165 (make-arity req opt rest kw-indices allow-other-keys?
166 low-pc high-pc)
167 arity?
168 (req arity-req)
169 (opt arity-opt)
170 (rest arity-rest)
171 (kw-indices arity-kw-indices)
172 (allow-other-keys? arity-allow-other-keys?)
173 (low-pc arity-low-pc)
174 (high-pc arity-high-pc set-arity-high-pc!))
175
e78991aa
AW
176(define-syntax *block-size* (identifier-syntax 32))
177
178;;; An assembler collects all of the words emitted during assembly, and
179;;; also maintains ancillary information such as the constant table, a
180;;; relocation list, and so on.
181;;;
182;;; RTL code consists of 32-bit units. We emit RTL code using native
183;;; endianness. If we're targeting a foreign endianness, we byte-swap
184;;; the bytevector as a whole instead of conditionalizing each access.
185;;;
186(define-record-type <asm>
187 (make-asm cur idx start prev written
188 labels relocs
189 word-size endianness
190 constants inits
191 shstrtab next-section-number
e675e9bd 192 meta sources)
e78991aa
AW
193 asm?
194
195 ;; We write RTL code into what is logically a growable vector,
196 ;; implemented as a list of blocks. asm-cur is the current block, and
197 ;; asm-idx is the current index into that block, in 32-bit units.
198 ;;
199 (cur asm-cur set-asm-cur!)
200 (idx asm-idx set-asm-idx!)
201
202 ;; asm-start is an absolute position, indicating the offset of the
203 ;; beginning of an instruction (in u32 units). It is updated after
204 ;; writing all the words for one primitive instruction. It models the
205 ;; position of the instruction pointer during execution, given that
206 ;; the RTL VM updates the IP only at the end of executing the
207 ;; instruction, and is thus useful for computing offsets between two
208 ;; points in a program.
209 ;;
210 (start asm-start set-asm-start!)
211
212 ;; The list of previously written blocks.
213 ;;
214 (prev asm-prev set-asm-prev!)
215
216 ;; The number of u32 words written in asm-prev, which is the same as
217 ;; the offset of the current block.
218 ;;
219 (written asm-written set-asm-written!)
220
221 ;; An alist of symbol -> position pairs, indicating the labels defined
222 ;; in this compilation unit.
223 ;;
224 (labels asm-labels set-asm-labels!)
225
226 ;; A list of relocations needed by the program text. We use an
227 ;; internal representation for relocations, and handle textualn
228 ;; relative relocations in the assembler. Other kinds of relocations
229 ;; are later reified as linker relocations and resolved by the linker.
230 ;;
231 (relocs asm-relocs set-asm-relocs!)
232
233 ;; Target information.
234 ;;
235 (word-size asm-word-size)
236 (endianness asm-endianness)
237
238 ;; The constant table, as a vhash of object -> label. All constants
239 ;; get de-duplicated and written into separate sections -- either the
240 ;; .rodata section, for read-only data, or .data, for constants that
241 ;; need initialization at load-time (like symbols). Constants can
242 ;; depend on other constants (e.g. a symbol depending on a stringbuf),
243 ;; so order in this table is important.
244 ;;
245 (constants asm-constants set-asm-constants!)
246
247 ;; A list of RTL instructions needed to initialize the constants.
248 ;; Will run in a thunk with 2 local variables.
249 ;;
250 (inits asm-inits set-asm-inits!)
251
252 ;; The shstrtab, for section names.
253 ;;
254 (shstrtab asm-shstrtab set-asm-shstrtab!)
255
256 ;; The section number for the next section to be written.
257 ;;
258 (next-section-number asm-next-section-number set-asm-next-section-number!)
259
260 ;; A list of <meta>, corresponding to procedure metadata.
261 ;;
e675e9bd
AW
262 (meta asm-meta set-asm-meta!)
263
264 ;; A list of (pos . source) pairs, indicating source information. POS
265 ;; is relative to the beginning of the text section, and SOURCE is in
266 ;; the same format that source-properties returns.
267 ;;
268 (sources asm-sources set-asm-sources!))
e78991aa
AW
269
270(define-inlinable (fresh-block)
271 (make-u32vector *block-size*))
272
273(define* (make-assembler #:key (word-size (target-word-size))
274 (endianness (target-endianness)))
275 "Create an assembler for a given target @var{word-size} and
276@var{endianness}, falling back to appropriate values for the configured
277target."
278 (make-asm (fresh-block) 0 0 '() 0
3659ef54 279 (make-hash-table) '()
e78991aa
AW
280 word-size endianness
281 vlist-null '()
282 (make-string-table) 1
e675e9bd 283 '() '()))
e78991aa
AW
284
285(define (intern-section-name! asm string)
286 "Add a string to the section name table (shstrtab)."
287 (string-table-intern! (asm-shstrtab asm) string))
288
289(define-inlinable (asm-pos asm)
290 "The offset of the next word to be written into the code buffer, in
29132-bit units."
292 (+ (asm-idx asm) (asm-written asm)))
293
294(define (allocate-new-block asm)
295 "Close off the current block, and arrange for the next word to be
296written to a fresh block."
297 (let ((new (fresh-block)))
298 (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
299 (set-asm-written! asm (asm-pos asm))
300 (set-asm-cur! asm new)
301 (set-asm-idx! asm 0)))
302
303(define-inlinable (emit asm u32)
304 "Emit one 32-bit word into the instruction stream. Assumes that there
305is space for the word, and ensures that there is space for the next
306word."
307 (u32-set! (asm-cur asm) (asm-idx asm) u32)
308 (set-asm-idx! asm (1+ (asm-idx asm)))
309 (if (= (asm-idx asm) *block-size*)
310 (allocate-new-block asm)))
311
312(define-inlinable (make-reloc type label base word)
313 "Make an internal relocation of type @var{type} referencing symbol
314@var{label}, @var{word} words after position @var{start}. @var{type}
315may be x8-s24, indicating a 24-bit relative label reference that can be
316fixed up by the assembler, or s32, indicating a 32-bit relative
317reference that needs to be fixed up by the linker."
318 (list type label base word))
319
320(define-inlinable (reset-asm-start! asm)
321 "Reset the asm-start after writing the words for one instruction."
322 (set-asm-start! asm (asm-pos asm)))
323
e78991aa
AW
324(define (record-label-reference asm label)
325 "Record an x8-s24 local label reference. This value will get patched
326up later by the assembler."
327 (let* ((start (asm-start asm))
328 (pos (asm-pos asm))
329 (reloc (make-reloc 'x8-s24 label start (- pos start))))
330 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
331
332(define* (record-far-label-reference asm label #:optional (offset 0))
333 "Record an s32 far label reference. This value will get patched up
334later by the linker."
335 (let* ((start (- (asm-start asm) offset))
336 (pos (asm-pos asm))
337 (reloc (make-reloc 's32 label start (- pos start))))
338 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
339
340
341\f
342
343;;;
344;;; Primitive assemblers are defined by expanding `assembler' for each
345;;; opcode in `(rtl-instruction-list)'.
346;;;
347
348(eval-when (expand compile load eval)
349 (define (id-append ctx a b)
350 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
351
352(define-syntax assembler
353 (lambda (x)
354 (define-syntax op-case
355 (lambda (x)
356 (syntax-case x ()
357 ((_ asm name ((type arg ...) code ...) clause ...)
358 #`(if (eq? name 'type)
359 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
360 #'((arg ...)
361 code ...))
362 (op-case asm name clause ...)))
363 ((_ asm name)
364 #'(error "unmatched name" name)))))
365
366 (define (pack-first-word asm opcode type)
367 (with-syntax ((opcode opcode))
368 (op-case
369 asm type
370 ((U8_X24)
371 (emit asm opcode))
372 ((U8_U24 arg)
373 (emit asm (pack-u8-u24 opcode arg)))
374 ((U8_L24 label)
375 (record-label-reference asm label)
376 (emit asm opcode))
e78991aa
AW
377 ((U8_U8_I16 a imm)
378 (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
379 ((U8_U12_U12 a b)
380 (emit asm (pack-u8-u12-u12 opcode a b)))
381 ((U8_U8_U8_U8 a b c)
382 (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
383
384 (define (pack-tail-word asm type)
385 (op-case
386 asm type
387 ((U8_U24 a b)
388 (emit asm (pack-u8-u24 a b)))
389 ((U8_L24 a label)
390 (record-label-reference asm label)
391 (emit asm a))
e78991aa
AW
392 ((U8_U8_I16 a b imm)
393 (emit asm (pack-u8-u8-u16 a b (object-address imm))))
394 ((U8_U12_U12 a b)
395 (emit asm (pack-u8-u12-u12 a b c)))
396 ((U8_U8_U8_U8 a b c d)
397 (emit asm (pack-u8-u8-u8-u8 a b c d)))
398 ((U32 a)
399 (emit asm a))
400 ((I32 imm)
401 (let ((val (object-address imm)))
402 (unless (zero? (ash val -32))
403 (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
404 (emit asm val)))
405 ((A32 imm)
406 (unless (= (asm-word-size asm) 8)
407 (error "make-long-immediate unavailable for this target"))
408 (emit asm (ash (object-address imm) -32))
409 (emit asm (logand (object-address imm) (1- (ash 1 32)))))
410 ((B32))
411 ((N32 label)
412 (record-far-label-reference asm label)
413 (emit asm 0))
414 ((S32 label)
415 (record-far-label-reference asm label)
416 (emit asm 0))
417 ((L32 label)
418 (record-far-label-reference asm label)
419 (emit asm 0))
420 ((LO32 label offset)
421 (record-far-label-reference asm label
422 (* offset (/ (asm-word-size asm) 4)))
423 (emit asm 0))
424 ((X8_U24 a)
425 (emit asm (pack-u8-u24 0 a)))
426 ((X8_U12_U12 a b)
427 (emit asm (pack-u8-u12-u12 0 a b)))
e78991aa
AW
428 ((X8_L24 label)
429 (record-label-reference asm label)
430 (emit asm 0))
431 ((B1_X7_L24 a label)
432 (record-label-reference asm label)
433 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
434 ((B1_U7_L24 a b label)
435 (record-label-reference asm label)
af95414f
AW
436 (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
437 ((B1_X31 a)
438 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
439 ((B1_X7_U24 a b)
440 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
e78991aa
AW
441
442 (syntax-case x ()
443 ((_ name opcode word0 word* ...)
444 (with-syntax ((((formal0 ...)
445 code0 ...)
446 (pack-first-word #'asm
447 (syntax->datum #'opcode)
448 (syntax->datum #'word0)))
449 ((((formal* ...)
450 code* ...) ...)
451 (map (lambda (word) (pack-tail-word #'asm word))
452 (syntax->datum #'(word* ...)))))
453 #'(lambda (asm formal0 ... formal* ... ...)
454 (unless (asm? asm) (error "not an asm"))
455 code0 ...
456 code* ... ...
457 (reset-asm-start! asm)))))))
458
459(define assemblers (make-hash-table))
460
461(define-syntax define-assembler
462 (lambda (x)
463 (syntax-case x ()
2a294c7c 464 ((_ name opcode kind arg ...)
e78991aa 465 (with-syntax ((emit (id-append #'name #'emit- #'name)))
c96933fd
AW
466 #'(begin
467 (define emit
468 (let ((emit (assembler name opcode arg ...)))
469 (hashq-set! assemblers 'name emit)
470 emit))
471 (export emit)))))))
e78991aa
AW
472
473(define-syntax visit-opcodes
474 (lambda (x)
475 (syntax-case x ()
476 ((visit-opcodes macro arg ...)
477 (with-syntax (((inst ...)
478 (map (lambda (x) (datum->syntax #'macro x))
479 (rtl-instruction-list))))
480 #'(begin
481 (macro arg ... . inst)
482 ...))))))
483
484(visit-opcodes define-assembler)
485
486(define (emit-text asm instructions)
487 "Assemble @var{instructions} using the assembler @var{asm}.
488@var{instructions} is a sequence of RTL instructions, expressed as a
489list of lists. This procedure can be called many times before calling
490@code{link-assembly}."
491 (for-each (lambda (inst)
492 (apply (or (hashq-ref assemblers (car inst))
493 (error 'bad-instruction inst))
494 asm
495 (cdr inst)))
496 instructions))
497
498\f
499
500;;;
501;;; The constant table records a topologically sorted set of literal
502;;; constants used by a program. For example, a pair uses its car and
503;;; cdr, a string uses its stringbuf, etc.
504;;;
505;;; Some things we want to add to the constant table are not actually
506;;; Scheme objects: for example, stringbufs, cache cells for toplevel
507;;; references, or cache cells for non-closure procedures. For these we
508;;; define special record types and add instances of those record types
509;;; to the table.
510;;;
511
512(define-inlinable (immediate? x)
513 "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
514 (not (zero? (logand (object-address x) 6))))
515
516(define-record-type <stringbuf>
517 (make-stringbuf string)
518 stringbuf?
519 (string stringbuf-string))
520
521(define-record-type <static-procedure>
522 (make-static-procedure code)
523 static-procedure?
524 (code static-procedure-code))
525
7bfbc7b1 526(define-record-type <uniform-vector-backing-store>
d65514a2 527 (make-uniform-vector-backing-store bytes element-size)
7bfbc7b1 528 uniform-vector-backing-store?
d65514a2
AW
529 (bytes uniform-vector-backing-store-bytes)
530 (element-size uniform-vector-backing-store-element-size))
7bfbc7b1 531
e78991aa
AW
532(define-record-type <cache-cell>
533 (make-cache-cell scope key)
534 cache-cell?
535 (scope cache-cell-scope)
536 (key cache-cell-key))
537
7bfbc7b1
AW
538(define (simple-vector? obj)
539 (and (vector? obj)
540 (equal? (array-shape obj) (list (list 0 (1- (vector-length obj)))))))
541
542(define (simple-uniform-vector? obj)
543 (and (array? obj)
544 (symbol? (array-type obj))
545 (equal? (array-shape obj) (list (list 0 (1- (array-length obj)))))))
546
e78991aa
AW
547(define (statically-allocatable? x)
548 "Return @code{#t} if a non-immediate constant can be allocated
549statically, and @code{#f} if it would need some kind of runtime
550allocation."
7bfbc7b1 551 (or (pair? x) (string? x) (stringbuf? x) (static-procedure? x) (array? x)))
e78991aa
AW
552
553(define (intern-constant asm obj)
554 "Add an object to the constant table, and return a label that can be
555used to reference it. If the object is already present in the constant
556table, its existing label is used directly."
557 (define (recur obj)
558 (intern-constant asm obj))
559 (define (field dst n obj)
560 (let ((src (recur obj)))
561 (if src
c7cb2bc2
AW
562 (if (statically-allocatable? obj)
563 `((static-patch! ,dst ,n ,src))
564 `((static-ref 1 ,src)
565 (static-set! 1 ,dst ,n)))
e78991aa
AW
566 '())))
567 (define (intern obj label)
568 (cond
569 ((pair? obj)
570 (append (field label 0 (car obj))
571 (field label 1 (cdr obj))))
7bfbc7b1 572 ((simple-vector? obj)
e78991aa
AW
573 (let lp ((i 0) (inits '()))
574 (if (< i (vector-length obj))
575 (lp (1+ i)
576 (append-reverse (field label (1+ i) (vector-ref obj i))
577 inits))
578 (reverse inits))))
579 ((stringbuf? obj) '())
580 ((static-procedure? obj)
2ab2a10d 581 `((static-patch! ,label 1 ,(static-procedure-code obj))))
e78991aa
AW
582 ((cache-cell? obj) '())
583 ((symbol? obj)
7396d216
AW
584 `((make-non-immediate 1 ,(recur (symbol->string obj)))
585 (string->symbol 1 1)
586 (static-set! 1 ,label 0)))
e78991aa 587 ((string? obj)
2ab2a10d 588 `((static-patch! ,label 1 ,(recur (make-stringbuf obj)))))
e78991aa 589 ((keyword? obj)
7396d216
AW
590 `((static-ref 1 ,(recur (keyword->symbol obj)))
591 (symbol->keyword 1 1)
592 (static-set! 1 ,label 0)))
e78991aa 593 ((number? obj)
7396d216
AW
594 `((make-non-immediate 1 ,(recur (number->string obj)))
595 (string->number 1 1)
596 (static-set! 1 ,label 0)))
7bfbc7b1
AW
597 ((uniform-vector-backing-store? obj) '())
598 ((simple-uniform-vector? obj)
599 `((static-patch! ,label 2
d65514a2
AW
600 ,(recur (make-uniform-vector-backing-store
601 (uniform-array->bytevector obj)
602 (if (bitvector? obj)
603 ;; Bitvectors are addressed in
604 ;; 32-bit units.
605 4
606 (uniform-vector-element-size obj)))))))
e78991aa
AW
607 (else
608 (error "don't know how to intern" obj))))
609 (cond
610 ((immediate? obj) #f)
611 ((vhash-assoc obj (asm-constants asm)) => cdr)
612 (else
613 ;; Note that calling intern may mutate asm-constants and
614 ;; asm-constant-inits.
615 (let* ((label (gensym "constant"))
616 (inits (intern obj label)))
617 (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
618 (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
619 label))))
620
621(define (intern-non-immediate asm obj)
622 "Intern a non-immediate into the constant table, and return its
623label."
624 (when (immediate? obj)
625 (error "expected a non-immediate" obj))
626 (intern-constant asm obj))
627
628(define (intern-cache-cell asm scope key)
629 "Intern a cache cell into the constant table, and return its label.
630If there is already a cache cell with the given scope and key, it is
631returned instead."
632 (intern-constant asm (make-cache-cell scope key)))
633
634;; Return the label of the cell that holds the module for a scope.
635(define (intern-module-cache-cell asm scope)
636 "Intern a cache cell for a module, and return its label."
637 (intern-cache-cell asm scope #t))
638
639
640\f
641
642;;;
643;;; Macro assemblers bridge the gap between primitive instructions and
644;;; some higher-level operations.
645;;;
646
647(define-syntax define-macro-assembler
648 (lambda (x)
649 (syntax-case x ()
650 ((_ (name arg ...) body body* ...)
651 (with-syntax ((emit (id-append #'name #'emit- #'name)))
c96933fd
AW
652 #'(begin
653 (define emit
654 (let ((emit (lambda (arg ...) body body* ...)))
655 (hashq-set! assemblers 'name emit)
656 emit))
657 (export emit)))))))
e78991aa
AW
658
659(define-macro-assembler (load-constant asm dst obj)
660 (cond
661 ((immediate? obj)
662 (let ((bits (object-address obj)))
663 (cond
664 ((and (< dst 256) (zero? (ash bits -16)))
665 (emit-make-short-immediate asm dst obj))
666 ((zero? (ash bits -32))
667 (emit-make-long-immediate asm dst obj))
668 (else
669 (emit-make-long-long-immediate asm dst obj)))))
670 ((statically-allocatable? obj)
671 (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
672 (else
673 (emit-static-ref asm dst (intern-non-immediate asm obj)))))
674
675(define-macro-assembler (load-static-procedure asm dst label)
676 (let ((loc (intern-constant asm (make-static-procedure label))))
677 (emit-make-non-immediate asm dst loc)))
678
be8b62ca
AW
679(define-syntax-rule (define-tc7-macro-assembler name tc7)
680 (define-macro-assembler (name asm slot invert? label)
681 (emit-br-if-tc7 asm slot invert? tc7 label)))
682
683;; Keep in sync with tags.h. Part of Guile's ABI. Currently unused
becce37b
AW
684;; macro assemblers are commented out. See also
685;; *branching-primcall-arities* in (language cps primitives), the set of
686;; macro-instructions in assembly.scm, and
687;; disassembler.scm:code-annotation.
688;;
689;; FIXME: Define all tc7 values in Scheme in one place, derived from
690;; tags.h.
be8b62ca
AW
691(define-tc7-macro-assembler br-if-symbol 5)
692(define-tc7-macro-assembler br-if-variable 7)
693(define-tc7-macro-assembler br-if-vector 13)
694;(define-tc7-macro-assembler br-if-weak-vector 13)
695(define-tc7-macro-assembler br-if-string 21)
696;(define-tc7-macro-assembler br-if-heap-number 23)
697;(define-tc7-macro-assembler br-if-stringbuf 39)
becce37b 698(define-tc7-macro-assembler br-if-bytevector 77)
be8b62ca
AW
699;(define-tc7-macro-assembler br-if-pointer 31)
700;(define-tc7-macro-assembler br-if-hashtable 29)
701;(define-tc7-macro-assembler br-if-fluid 37)
702;(define-tc7-macro-assembler br-if-dynamic-state 45)
703;(define-tc7-macro-assembler br-if-frame 47)
be8b62ca
AW
704;(define-tc7-macro-assembler br-if-vm 55)
705;(define-tc7-macro-assembler br-if-vm-cont 71)
706;(define-tc7-macro-assembler br-if-rtl-program 69)
be8b62ca
AW
707;(define-tc7-macro-assembler br-if-weak-set 85)
708;(define-tc7-macro-assembler br-if-weak-table 87)
709;(define-tc7-macro-assembler br-if-array 93)
d65514a2 710(define-tc7-macro-assembler br-if-bitvector 95)
be8b62ca
AW
711;(define-tc7-macro-assembler br-if-port 125)
712;(define-tc7-macro-assembler br-if-smob 127)
713
2a4daafd 714(define-macro-assembler (begin-program asm label properties)
e78991aa 715 (emit-label asm label)
2a4daafd 716 (let ((meta (make-meta label properties (asm-start asm))))
e78991aa
AW
717 (set-asm-meta! asm (cons meta (asm-meta asm)))))
718
719(define-macro-assembler (end-program asm)
2a4daafd 720 (let ((meta (car (asm-meta asm))))
3185c907
AW
721 (set-meta-high-pc! meta (asm-start asm))
722 (set-meta-arities! meta (reverse (meta-arities meta)))))
723
724(define-macro-assembler (begin-standard-arity asm req nlocals alternate)
725 (emit-begin-opt-arity asm req '() #f nlocals alternate))
726
727(define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
728 (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
729
730(define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
731 allow-other-keys? nlocals alternate)
732 (assert-match req ((? symbol?) ...) "list of symbols")
733 (assert-match opt ((? symbol?) ...) "list of symbols")
734 (assert-match rest (or #f (? symbol?)) "#f or symbol")
8695854a
AW
735 (assert-match kw-indices (((? keyword?) . (? integer?)) ...)
736 "alist of keyword -> integer")
3185c907
AW
737 (assert-match allow-other-keys? (? boolean?) "boolean")
738 (assert-match nlocals (? integer?) "integer")
739 (assert-match alternate (or #f (? symbol?)) "#f or symbol")
740 (let* ((meta (car (asm-meta asm)))
741 (arity (make-arity req opt rest kw-indices allow-other-keys?
742 (asm-start asm) #f))
7396d216
AW
743 ;; The procedure itself is in slot 0, in the standard calling
744 ;; convention. For procedure prologues, nreq includes the
745 ;; procedure, so here we add 1.
746 (nreq (1+ (length req)))
3185c907
AW
747 (nopt (length opt))
748 (rest? (->bool rest)))
749 (set-meta-arities! meta (cons arity (meta-arities meta)))
750 (cond
751 ((or allow-other-keys? (pair? kw-indices))
752 (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
753 nlocals alternate))
754 ((or rest? (pair? opt))
755 (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
756 (else
757 (emit-standard-prelude asm nreq nlocals alternate)))))
758
759(define-macro-assembler (end-arity asm)
760 (let ((arity (car (meta-arities (car (asm-meta asm))))))
761 (set-arity-high-pc! arity (asm-start asm))))
e78991aa 762
07c05279
AW
763(define-macro-assembler (standard-prelude asm nreq nlocals alternate)
764 (cond
765 (alternate
766 (emit-br-if-nargs-ne asm nreq alternate)
af95414f 767 (emit-alloc-frame asm nlocals))
07c05279
AW
768 ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
769 (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
770 (else
771 (emit-assert-nargs-ee asm nreq)
af95414f 772 (emit-alloc-frame asm nlocals))))
07c05279
AW
773
774(define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
775 (if alternate
776 (emit-br-if-nargs-lt asm nreq alternate)
777 (emit-assert-nargs-ge asm nreq))
778 (cond
779 (rest?
780 (emit-bind-rest asm (+ nreq nopt)))
781 (alternate
782 (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
783 (else
784 (emit-assert-nargs-le asm (+ nreq nopt))))
af95414f 785 (emit-alloc-frame asm nlocals))
07c05279
AW
786
787(define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
788 allow-other-keys? nlocals alternate)
789 (if alternate
b0ed216b
AW
790 (begin
791 (emit-br-if-nargs-lt asm nreq alternate)
792 (unless rest?
793 (emit-br-if-npos-gt asm nreq (+ nreq nopt) alternate)))
07c05279
AW
794 (emit-assert-nargs-ge asm nreq))
795 (let ((ntotal (fold (lambda (kw ntotal)
796 (match kw
797 (((? keyword?) . idx)
798 (max (1+ idx) ntotal))))
799 (+ nreq nopt) kw-indices)))
800 ;; FIXME: port 581f410f
801 (emit-bind-kwargs asm nreq
802 (pack-flags allow-other-keys? rest?)
803 (+ nreq nopt)
804 ntotal
8695854a 805 (intern-constant asm kw-indices))
af95414f 806 (emit-alloc-frame asm nlocals)))
07c05279 807
e78991aa 808(define-macro-assembler (label asm sym)
3659ef54 809 (hashq-set! (asm-labels asm) sym (asm-start asm)))
e78991aa 810
e675e9bd
AW
811(define-macro-assembler (source asm source)
812 (set-asm-sources! asm (acons (asm-start asm) source (asm-sources asm))))
813
af95414f 814(define-macro-assembler (cache-current-module! asm module scope)
e78991aa 815 (let ((mod-label (intern-module-cache-cell asm scope)))
af95414f 816 (emit-static-set! asm module mod-label 0)))
e78991aa 817
af95414f 818(define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
e78991aa
AW
819 (let ((sym-label (intern-non-immediate asm sym))
820 (mod-label (intern-module-cache-cell asm scope))
821 (cell-label (intern-cache-cell asm scope sym)))
af95414f 822 (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
e78991aa 823
af95414f 824(define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
e78991aa
AW
825 (let* ((sym-label (intern-non-immediate asm sym))
826 (key (cons public? module-name))
827 (mod-name-label (intern-constant asm key))
828 (cell-label (intern-cache-cell asm key sym)))
af95414f 829 (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
e78991aa
AW
830
831
832\f
833
834;;;
835;;; Helper for linking objects.
836;;;
837
838(define (make-object asm name bv relocs labels . kwargs)
839 "Make a linker object. This helper handles interning the name in the
840shstrtab, assigning the size, allocating a fresh index, and defining a
841corresponding linker symbol for the start of the section."
842 (let ((name-idx (intern-section-name! asm (symbol->string name)))
843 (index (asm-next-section-number asm)))
844 (set-asm-next-section-number! asm (1+ index))
845 (make-linker-object (apply make-elf-section
846 #:index index
847 #:name name-idx
848 #:size (bytevector-length bv)
849 kwargs)
850 bv relocs
851 (cons (make-linker-symbol name 0) labels))))
852
853
854\f
855
856;;;
857;;; Linking the constant table. This code is somewhat intertwingled
858;;; with the intern-constant code above, as that procedure also
859;;; residualizes instructions to initialize constants at load time.
860;;;
861
862(define (write-immediate asm buf pos x)
863 (let ((val (object-address x))
864 (endianness (asm-endianness asm)))
865 (case (asm-word-size asm)
866 ((4) (bytevector-u32-set! buf pos val endianness))
867 ((8) (bytevector-u64-set! buf pos val endianness))
868 (else (error "bad word size" asm)))))
869
870(define (emit-init-constants asm)
871 "If there is writable data that needs initialization at runtime, emit
872a procedure to do that and return its label. Otherwise return
873@code{#f}."
874 (let ((inits (asm-inits asm)))
875 (and (not (null? inits))
876 (let ((label (gensym "init-constants")))
877 (emit-text asm
2a4daafd 878 `((begin-program ,label ())
7396d216 879 (assert-nargs-ee/locals 1 1)
e78991aa 880 ,@(reverse inits)
7396d216
AW
881 (load-constant 1 ,*unspecified*)
882 (return 1)
e78991aa
AW
883 (end-program)))
884 label))))
885
886(define (link-data asm data name)
887 "Link the static data for a program into the @var{name} section (which
888should be .data or .rodata), and return the resulting linker object.
889@var{data} should be a vhash mapping objects to labels."
890 (define (align address alignment)
891 (+ address
892 (modulo (- alignment (modulo address alignment)) alignment)))
893
894 (define tc7-vector 13)
8fa72889
AW
895 (define stringbuf-shared-flag #x100)
896 (define stringbuf-wide-flag #x400)
897 (define tc7-stringbuf 39)
898 (define tc7-narrow-stringbuf
899 (+ tc7-stringbuf stringbuf-shared-flag))
900 (define tc7-wide-stringbuf
901 (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
e78991aa
AW
902 (define tc7-ro-string (+ 21 #x200))
903 (define tc7-rtl-program 69)
7bfbc7b1 904 (define tc7-bytevector 77)
d65514a2 905 (define tc7-bitvector 95)
e78991aa
AW
906
907 (let ((word-size (asm-word-size asm))
908 (endianness (asm-endianness asm)))
909 (define (byte-length x)
910 (cond
911 ((stringbuf? x)
912 (let ((x (stringbuf-string x)))
913 (+ (* 2 word-size)
914 (case (string-bytes-per-char x)
915 ((1) (1+ (string-length x)))
916 ((4) (* (1+ (string-length x)) 4))
917 (else (error "bad string bytes per char" x))))))
918 ((static-procedure? x)
919 (* 2 word-size))
920 ((string? x)
921 (* 4 word-size))
922 ((pair? x)
923 (* 2 word-size))
7bfbc7b1 924 ((simple-vector? x)
e78991aa 925 (* (1+ (vector-length x)) word-size))
7bfbc7b1
AW
926 ((simple-uniform-vector? x)
927 (* 4 word-size))
928 ((uniform-vector-backing-store? x)
929 (bytevector-length (uniform-vector-backing-store-bytes x)))
e78991aa
AW
930 (else
931 word-size)))
932
933 (define (write-constant-reference buf pos x)
934 ;; The asm-inits will fix up any reference to a non-immediate.
935 (write-immediate asm buf pos (if (immediate? x) x #f)))
936
937 (define (write buf pos obj)
938 (cond
939 ((stringbuf? obj)
940 (let* ((x (stringbuf-string obj))
941 (len (string-length x))
942 (tag (if (= (string-bytes-per-char x) 1)
943 tc7-narrow-stringbuf
944 tc7-wide-stringbuf)))
945 (case word-size
946 ((4)
947 (bytevector-u32-set! buf pos tag endianness)
948 (bytevector-u32-set! buf (+ pos 4) len endianness))
949 ((8)
950 (bytevector-u64-set! buf pos tag endianness)
951 (bytevector-u64-set! buf (+ pos 8) len endianness))
952 (else
953 (error "bad word size" asm)))
954 (let ((pos (+ pos (* word-size 2))))
955 (case (string-bytes-per-char x)
956 ((1)
957 (let lp ((i 0))
958 (if (< i len)
959 (let ((u8 (char->integer (string-ref x i))))
960 (bytevector-u8-set! buf (+ pos i) u8)
961 (lp (1+ i)))
962 (bytevector-u8-set! buf (+ pos i) 0))))
963 ((4)
964 (let lp ((i 0))
965 (if (< i len)
966 (let ((u32 (char->integer (string-ref x i))))
967 (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
968 (lp (1+ i)))
969 (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
970 (else (error "bad string bytes per char" x))))))
971
972 ((static-procedure? obj)
973 (case word-size
974 ((4)
975 (bytevector-u32-set! buf pos tc7-rtl-program endianness)
976 (bytevector-u32-set! buf (+ pos 4) 0 endianness))
977 ((8)
978 (bytevector-u64-set! buf pos tc7-rtl-program endianness)
979 (bytevector-u64-set! buf (+ pos 8) 0 endianness))
980 (else (error "bad word size"))))
981
982 ((cache-cell? obj)
983 (write-immediate asm buf pos #f))
984
985 ((string? obj)
986 (let ((tag (logior tc7-ro-string (ash (string-length obj) 8))))
987 (case word-size
988 ((4)
989 (bytevector-u32-set! buf pos tc7-ro-string endianness)
990 (write-immediate asm buf (+ pos 4) #f) ; stringbuf
991 (bytevector-u32-set! buf (+ pos 8) 0 endianness)
992 (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
993 ((8)
994 (bytevector-u64-set! buf pos tc7-ro-string endianness)
995 (write-immediate asm buf (+ pos 8) #f) ; stringbuf
996 (bytevector-u64-set! buf (+ pos 16) 0 endianness)
997 (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
998 (else (error "bad word size")))))
999
1000 ((pair? obj)
1001 (write-constant-reference buf pos (car obj))
1002 (write-constant-reference buf (+ pos word-size) (cdr obj)))
1003
7bfbc7b1 1004 ((simple-vector? obj)
e78991aa
AW
1005 (let* ((len (vector-length obj))
1006 (tag (logior tc7-vector (ash len 8))))
1007 (case word-size
1008 ((4) (bytevector-u32-set! buf pos tag endianness))
1009 ((8) (bytevector-u64-set! buf pos tag endianness))
1010 (else (error "bad word size")))
1011 (let lp ((i 0))
1012 (when (< i (vector-length obj))
1013 (let ((pos (+ pos word-size (* i word-size)))
1014 (elt (vector-ref obj i)))
1015 (write-constant-reference buf pos elt)
1016 (lp (1+ i)))))))
1017
1018 ((symbol? obj)
1019 (write-immediate asm buf pos #f))
1020
1021 ((keyword? obj)
1022 (write-immediate asm buf pos #f))
1023
1024 ((number? obj)
1025 (write-immediate asm buf pos #f))
1026
7bfbc7b1 1027 ((simple-uniform-vector? obj)
d65514a2
AW
1028 (let ((tag (if (bitvector? obj)
1029 tc7-bitvector
1030 (let ((type-code (uniform-vector-element-type-code obj)))
1031 (logior tc7-bytevector (ash type-code 7))))))
7bfbc7b1
AW
1032 (case word-size
1033 ((4)
1034 (bytevector-u32-set! buf pos tag endianness)
d65514a2
AW
1035 (bytevector-u32-set! buf (+ pos 4)
1036 (if (bitvector? obj)
1037 (bitvector-length obj)
1038 (bytevector-length obj))
7bfbc7b1
AW
1039 endianness) ; length
1040 (bytevector-u32-set! buf (+ pos 8) 0 endianness) ; pointer
1041 (write-immediate asm buf (+ pos 12) #f)) ; owner
1042 ((8)
1043 (bytevector-u64-set! buf pos tag endianness)
d65514a2
AW
1044 (bytevector-u64-set! buf (+ pos 8)
1045 (if (bitvector? obj)
1046 (bitvector-length obj)
1047 (bytevector-length obj))
7bfbc7b1
AW
1048 endianness) ; length
1049 (bytevector-u64-set! buf (+ pos 16) 0 endianness) ; pointer
1050 (write-immediate asm buf (+ pos 24) #f)) ; owner
1051 (else (error "bad word size")))))
1052
1053 ((uniform-vector-backing-store? obj)
1054 (let ((bv (uniform-vector-backing-store-bytes obj)))
1055 (bytevector-copy! bv 0 buf pos (bytevector-length bv))
d65514a2 1056 (unless (or (= 1 (uniform-vector-backing-store-element-size obj))
7bfbc7b1
AW
1057 (eq? endianness (native-endianness)))
1058 ;; Need to swap units of element-size bytes
1059 (error "FIXME: Implement byte order swap"))))
1060
e78991aa
AW
1061 (else
1062 (error "unrecognized object" obj))))
1063
1064 (cond
1065 ((vlist-null? data) #f)
1066 (else
1067 (let* ((byte-len (vhash-fold (lambda (k v len)
1068 (+ (byte-length k) (align len 8)))
1069 0 data))
1070 (buf (make-bytevector byte-len 0)))
3659ef54 1071 (let lp ((i 0) (pos 0) (symbols '()))
e78991aa
AW
1072 (if (< i (vlist-length data))
1073 (let* ((pair (vlist-ref data i))
1074 (obj (car pair))
1075 (obj-label (cdr pair)))
1076 (write buf pos obj)
1077 (lp (1+ i)
1078 (align (+ (byte-length obj) pos) 8)
3659ef54
AW
1079 (cons (make-linker-symbol obj-label pos) symbols)))
1080 (make-object asm name buf '() symbols
8fa72889
AW
1081 #:flags (match name
1082 ('.data (logior SHF_ALLOC SHF_WRITE))
1083 ('.rodata SHF_ALLOC))))))))))
e78991aa
AW
1084
1085(define (link-constants asm)
1086 "Link sections to hold constants needed by the program text emitted
1087using @var{asm}.
1088
1089Returns three values: an object for the .rodata section, an object for
1090the .data section, and a label for an initialization procedure. Any of
1091these may be @code{#f}."
1092 (define (shareable? x)
1093 (cond
1094 ((stringbuf? x) #t)
1095 ((pair? x)
1096 (and (immediate? (car x)) (immediate? (cdr x))))
7bfbc7b1 1097 ((simple-vector? x)
e78991aa
AW
1098 (let lp ((i 0))
1099 (or (= i (vector-length x))
1100 (and (immediate? (vector-ref x i))
1101 (lp (1+ i))))))
7bfbc7b1 1102 ((uniform-vector-backing-store? x) #t)
e78991aa
AW
1103 (else #f)))
1104 (let* ((constants (asm-constants asm))
1105 (len (vlist-length constants)))
1106 (let lp ((i 0)
1107 (ro vlist-null)
1108 (rw vlist-null))
1109 (if (= i len)
1110 (values (link-data asm ro '.rodata)
1111 (link-data asm rw '.data)
1112 (emit-init-constants asm))
1113 (let ((pair (vlist-ref constants i)))
1114 (if (shareable? (car pair))
1115 (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
1116 (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
1117
1118\f
1119
1120;;;
1121;;; Linking program text.
1122;;;
1123
1124(define (process-relocs buf relocs labels)
1125 "Patch up internal x8-s24 relocations, and any s32 relocations that
1126reference symbols in the text section. Return a list of linker
1127relocations for references to symbols defined outside the text section."
1128 (fold
1129 (lambda (reloc tail)
1130 (match reloc
1131 ((type label base word)
3659ef54 1132 (let ((abs (hashq-ref labels label))
e78991aa
AW
1133 (dst (+ base word)))
1134 (case type
1135 ((s32)
1136 (if abs
1137 (let ((rel (- abs base)))
1138 (s32-set! buf dst rel)
1139 tail)
1140 (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
1141 tail)))
1142 ((x8-s24)
1143 (unless abs
1144 (error "unbound near relocation" reloc))
1145 (let ((rel (- abs base))
1146 (u32 (u32-ref buf dst)))
1147 (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
1148 tail))
1149 (else (error "bad relocation kind" reloc)))))))
1150 '()
1151 relocs))
1152
1153(define (process-labels labels)
3659ef54 1154 "Define linker symbols for the label-offset map in @var{labels}.
e78991aa 1155The offsets are expected to be expressed in words."
3659ef54
AW
1156 (hash-map->list (lambda (label loc)
1157 (make-linker-symbol label (* loc 4)))
1158 labels))
e78991aa
AW
1159
1160(define (swap-bytes! buf)
1161 "Patch up the text buffer @var{buf}, swapping the endianness of each
116232-bit unit."
1163 (unless (zero? (modulo (bytevector-length buf) 4))
1164 (error "unexpected length"))
1165 (let ((byte-len (bytevector-length buf)))
1166 (let lp ((pos 0))
1167 (unless (= pos byte-len)
1168 (bytevector-u32-set!
1169 buf pos
1170 (bytevector-u32-ref buf pos (endianness big))
1171 (endianness little))
1172 (lp (+ pos 4))))))
1173
1174(define (link-text-object asm)
1175 "Link the .rtl-text section, swapping the endianness of the bytes if
1176needed."
1177 (let ((buf (make-u32vector (asm-pos asm))))
1178 (let lp ((pos 0) (prev (reverse (asm-prev asm))))
1179 (if (null? prev)
1180 (let ((byte-size (* (asm-idx asm) 4)))
1181 (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
1182 (unless (eq? (asm-endianness asm) (native-endianness))
1183 (swap-bytes! buf))
1184 (make-object asm '.rtl-text
1185 buf
1186 (process-relocs buf (asm-relocs asm)
1187 (asm-labels asm))
1188 (process-labels (asm-labels asm))))
1189 (let ((len (* *block-size* 4)))
1190 (bytevector-copy! (car prev) 0 buf pos len)
1191 (lp (+ pos len) (cdr prev)))))))
1192
1193
1194\f
1195
1196;;;
1197;;; Linking other sections of the ELF file, like the dynamic segment,
1198;;; the symbol table, etc.
1199;;;
1200
4c906ad5
AW
1201;; FIXME: Define these somewhere central, shared with C.
1202(define *bytecode-major-version* #x0202)
f8085163 1203(define *bytecode-minor-version* 3)
4c906ad5 1204
e78991aa
AW
1205(define (link-dynamic-section asm text rw rw-init)
1206 "Link the dynamic section for an ELF image with RTL text, given the
1207writable data section @var{rw} needing fixup from the procedure with
1208label @var{rw-init}. @var{rw-init} may be false. If @var{rw} is true,
1209it will be added to the GC roots at runtime."
1210 (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
1211 (let* ((endianness (asm-endianness asm))
1212 (bv (make-bytevector (* word-size (if rw (if rw-init 12 10) 6)) 0))
1213 (set-uword!
1214 (lambda (i uword)
1215 (%set-uword! bv (* i word-size) uword endianness)))
1216 (relocs '())
1217 (set-label!
1218 (lambda (i label)
1219 (set! relocs (cons (make-linker-reloc 'reloc-type
1220 (* i word-size) 0 label)
1221 relocs))
1222 (%set-uword! bv (* i word-size) 0 endianness))))
1223 (set-uword! 0 DT_GUILE_RTL_VERSION)
4c906ad5
AW
1224 (set-uword! 1 (logior (ash *bytecode-major-version* 16)
1225 *bytecode-minor-version*))
e78991aa
AW
1226 (set-uword! 2 DT_GUILE_ENTRY)
1227 (set-label! 3 '.rtl-text)
1228 (cond
1229 (rw
1230 ;; Add roots to GC.
1231 (set-uword! 4 DT_GUILE_GC_ROOT)
1232 (set-label! 5 '.data)
1233 (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
1234 (set-uword! 7 (bytevector-length (linker-object-bv rw)))
1235 (cond
1236 (rw-init
1237 (set-uword! 8 DT_INIT) ; constants
1238 (set-label! 9 rw-init)
1239 (set-uword! 10 DT_NULL)
1240 (set-uword! 11 0))
1241 (else
1242 (set-uword! 8 DT_NULL)
1243 (set-uword! 9 0))))
1244 (else
1245 (set-uword! 4 DT_NULL)
1246 (set-uword! 5 0)))
1247 (make-object asm '.dynamic bv relocs '()
1248 #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
1249 (case (asm-word-size asm)
1250 ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
1251 ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
1252 (else (error "bad word size" asm))))
1253
1254(define (link-shstrtab asm)
1255 "Link the string table for the section headers."
1256 (intern-section-name! asm ".shstrtab")
1257 (make-object asm '.shstrtab
1258 (link-string-table! (asm-shstrtab asm))
1259 '() '()
1260 #:type SHT_STRTAB #:flags 0))
1261
1262(define (link-symtab text-section asm)
1263 (let* ((endianness (asm-endianness asm))
1264 (word-size (asm-word-size asm))
1265 (size (elf-symbol-len word-size))
1266 (meta (reverse (asm-meta asm)))
1267 (n (length meta))
1268 (strtab (make-string-table))
1269 (bv (make-bytevector (* n size) 0)))
1270 (define (intern-string! name)
2a4daafd 1271 (string-table-intern! strtab (if name (symbol->string name) "")))
e78991aa
AW
1272 (for-each
1273 (lambda (meta n)
1274 (let ((name (intern-string! (meta-name meta))))
1275 (write-elf-symbol bv (* n size) endianness word-size
1276 (make-elf-symbol
1277 #:name name
1278 ;; Symbol value and size are measured in
1279 ;; bytes, not u32s.
1280 #:value (* 4 (meta-low-pc meta))
1281 #:size (* 4 (- (meta-high-pc meta)
1282 (meta-low-pc meta)))
1283 #:type STT_FUNC
1284 #:visibility STV_HIDDEN
1285 #:shndx (elf-section-index text-section)))))
1286 meta (iota n))
1287 (let ((strtab (make-object asm '.strtab
1288 (link-string-table! strtab)
1289 '() '()
1290 #:type SHT_STRTAB #:flags 0)))
1291 (values (make-object asm '.symtab
1292 bv
1293 '() '()
1294 #:type SHT_SYMTAB #:flags 0 #:entsize size
1295 #:link (elf-section-index
1296 (linker-object-section strtab)))
1297 strtab))))
1298
b2006c19
AW
1299;;; The .guile.arities section describes the arities that a function can
1300;;; have. It is in two parts: a sorted array of headers describing
1301;;; basic arities, and an array of links out to a string table (and in
1302;;; the case of keyword arguments, to the data section) for argument
1303;;; names. The whole thing is prefixed by a uint32 indicating the
1304;;; offset of the end of the headers array.
1305;;;
1306;;; The arity headers array is a packed array of structures of the form:
1307;;;
1308;;; struct arity_header {
1309;;; uint32_t low_pc;
1310;;; uint32_t high_pc;
1311;;; uint32_t offset;
1312;;; uint32_t flags;
1313;;; uint32_t nreq;
1314;;; uint32_t nopt;
1315;;; }
1316;;;
1317;;; All of the offsets and addresses are 32 bits. We can expand in the
1318;;; future to use 64-bit offsets if appropriate, but there are other
1319;;; aspects of RTL that constrain us to a total image that fits in 32
1320;;; bits, so for the moment we'll simplify the problem space.
1321;;;
1322;;; The following flags values are defined:
1323;;;
1324;;; #x1: has-rest?
1325;;; #x2: allow-other-keys?
1326;;; #x4: has-keyword-args?
1327;;; #x8: is-case-lambda?
d8595af5 1328;;; #x10: is-in-case-lambda?
b2006c19
AW
1329;;;
1330;;; Functions with a single arity specify their number of required and
1331;;; optional arguments in nreq and nopt, and do not have the
1332;;; is-case-lambda? flag set. Their "offset" member links to an array
1333;;; of pointers into the associated .guile.arities.strtab string table,
1334;;; identifying the argument names. This offset is relative to the
1335;;; start of the .guile.arities section. Links for required arguments
1336;;; are first, in order, as uint32 values. Next follow the optionals,
1337;;; then the rest link if has-rest? is set, then a link to the "keyword
1338;;; indices" literal if has-keyword-args? is set. Unlike the other
1339;;; links, the kw-indices link points into the data section, and is
1340;;; relative to the ELF image as a whole.
1341;;;
1342;;; Functions with no arities have no arities information present in the
1343;;; .guile.arities section.
1344;;;
1345;;; Functions with multiple arities are preceded by a header with
1346;;; is-case-lambda? set. All other fields are 0, except low-pc and
1347;;; high-pc which should be the bounds of the whole function. Headers
d8595af5
AW
1348;;; for the individual arities follow, with the is-in-case-lambda? flag
1349;;; set. In this way the whole headers array is sorted in increasing
1350;;; low-pc order, and case-lambda clauses are contained within the
1351;;; [low-pc, high-pc] of the case-lambda header.
b2006c19
AW
1352
1353;; Length of the prefix to the arities section, in bytes.
1354(define arities-prefix-len 4)
1355
1356;; Length of an arity header, in bytes.
1357(define arity-header-len (* 6 4))
1358
1359;; The offset of "offset" within arity header, in bytes.
1360(define arity-header-offset-offset (* 2 4))
1361
1362(define-syntax-rule (pack-arity-flags has-rest? allow-other-keys?
d8595af5
AW
1363 has-keyword-args? is-case-lambda?
1364 is-in-case-lambda?)
b2006c19
AW
1365 (logior (if has-rest? (ash 1 0) 0)
1366 (if allow-other-keys? (ash 1 1) 0)
1367 (if has-keyword-args? (ash 1 2) 0)
d8595af5
AW
1368 (if is-case-lambda? (ash 1 3) 0)
1369 (if is-in-case-lambda? (ash 1 4) 0)))
b2006c19
AW
1370
1371(define (meta-arities-size meta)
1372 (define (lambda-size arity)
1373 (+ arity-header-len
1374 (* 4 ;; name pointers
1375 (+ (length (arity-req arity))
1376 (length (arity-opt arity))
1377 (if (arity-rest arity) 1 0)
1378 (if (pair? (arity-kw-indices arity)) 1 0)))))
1379 (define (case-lambda-size arities)
1380 (fold +
1381 arity-header-len ;; case-lambda header
1382 (map lambda-size arities))) ;; the cases
1383 (match (meta-arities meta)
1384 (() 0)
1385 ((arity) (lambda-size arity))
1386 (arities (case-lambda-size arities))))
1387
1388(define (write-arity-headers metas bv endianness)
1389 (define (write-arity-header* pos low-pc high-pc flags nreq nopt)
0e3a59f7
AW
1390 (bytevector-u32-set! bv pos (* low-pc 4) endianness)
1391 (bytevector-u32-set! bv (+ pos 4) (* high-pc 4) endianness)
b2006c19
AW
1392 (bytevector-u32-set! bv (+ pos 8) 0 endianness) ; offset
1393 (bytevector-u32-set! bv (+ pos 12) flags endianness)
1394 (bytevector-u32-set! bv (+ pos 16) nreq endianness)
1395 (bytevector-u32-set! bv (+ pos 20) nopt endianness))
d8595af5 1396 (define (write-arity-header pos arity in-case-lambda?)
b2006c19
AW
1397 (write-arity-header* pos (arity-low-pc arity)
1398 (arity-high-pc arity)
1399 (pack-arity-flags (arity-rest arity)
1400 (arity-allow-other-keys? arity)
1401 (pair? (arity-kw-indices arity))
d8595af5
AW
1402 #f
1403 in-case-lambda?)
b2006c19
AW
1404 (length (arity-req arity))
1405 (length (arity-opt arity))))
1406 (let lp ((metas metas) (pos arities-prefix-len) (offsets '()))
1407 (match metas
1408 (()
1409 ;; Fill in the prefix.
1410 (bytevector-u32-set! bv 0 pos endianness)
1411 (values pos (reverse offsets)))
1412 ((meta . metas)
1413 (match (meta-arities meta)
1414 (() (lp metas pos offsets))
1415 ((arity)
d8595af5 1416 (write-arity-header pos arity #f)
b2006c19
AW
1417 (lp metas
1418 (+ pos arity-header-len)
1419 (acons arity (+ pos arity-header-offset-offset) offsets)))
1420 (arities
1421 ;; Write a case-lambda header, then individual arities.
1422 ;; The case-lambda header's offset link is 0.
1423 (write-arity-header* pos (meta-low-pc meta) (meta-high-pc meta)
d8595af5 1424 (pack-arity-flags #f #f #f #t #f) 0 0)
b2006c19
AW
1425 (let lp* ((arities arities) (pos (+ pos arity-header-len))
1426 (offsets offsets))
1427 (match arities
1428 (() (lp metas pos offsets))
1429 ((arity . arities)
d8595af5 1430 (write-arity-header pos arity #t)
b2006c19
AW
1431 (lp* arities
1432 (+ pos arity-header-len)
1433 (acons arity
1434 (+ pos arity-header-offset-offset)
1435 offsets)))))))))))
1436
1437(define (write-arity-links asm bv pos arity-offset-pairs strtab)
1438 (define (write-symbol sym pos)
1439 (bytevector-u32-set! bv pos
1440 (string-table-intern! strtab (symbol->string sym))
1441 (asm-endianness asm))
1442 (+ pos 4))
1443 (define (write-kw-indices pos kw-indices)
1444 ;; FIXME: Assert that kw-indices is already interned.
1445 (make-linker-reloc 'abs32/1 pos 0
1446 (intern-constant asm kw-indices)))
1447 (let lp ((pos pos) (pairs arity-offset-pairs) (relocs '()))
1448 (match pairs
1449 (()
1450 (unless (= pos (bytevector-length bv))
1451 (error "expected to fully fill the bytevector"
1452 pos (bytevector-length bv)))
1453 relocs)
1454 (((arity . offset) . pairs)
1455 (bytevector-u32-set! bv offset pos (asm-endianness asm))
1456 (let ((pos (fold write-symbol
1457 pos
1458 (append (arity-req arity)
1459 (arity-opt arity)
1460 (cond
1461 ((arity-rest arity) => list)
1462 (else '()))))))
1463 (match (arity-kw-indices arity)
1464 (() (lp pos pairs relocs))
1465 (kw-indices
1466 (lp (+ pos 4)
1467 pairs
1468 (cons (write-kw-indices pos kw-indices) relocs)))))))))
1469
1470(define (link-arities asm)
1471 (let* ((endianness (asm-endianness asm))
1472 (metas (reverse (asm-meta asm)))
1473 (size (fold (lambda (meta size)
1474 (+ size (meta-arities-size meta)))
1475 arities-prefix-len
1476 metas))
1477 (strtab (make-string-table))
1478 (bv (make-bytevector size 0)))
1479 (let ((kw-indices-relocs
1480 (call-with-values
1481 (lambda ()
1482 (write-arity-headers metas bv endianness))
1483 (lambda (pos arity-offset-pairs)
1484 (write-arity-links asm bv pos arity-offset-pairs strtab)))))
1485 (let ((strtab (make-object asm '.guile.arities.strtab
1486 (link-string-table! strtab)
1487 '() '()
1488 #:type SHT_STRTAB #:flags 0)))
1489 (values (make-object asm '.guile.arities
1490 bv
1491 kw-indices-relocs '()
1492 #:type SHT_PROGBITS #:flags 0
1493 #:link (elf-section-index
1494 (linker-object-section strtab)))
1495 strtab)))))
1496
9128b1a1
AW
1497;;;
1498;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
1499;;; values. Pc and str are both 32 bits wide. (Either could change to
1500;;; 64 bits if appropriate in the future.) Pc is the address of the
0a1d52ac
AW
1501;;; entry to a program, relative to the start of the text section, in
1502;;; bytes, and str is an index into the associated .guile.docstrs.strtab
1503;;; string table section.
9128b1a1
AW
1504;;;
1505
1506;; The size of a docstrs entry, in bytes.
1507(define docstr-size 8)
1508
1509(define (link-docstrs asm)
1510 (define (find-docstrings)
1511 (filter-map (lambda (meta)
1512 (define (is-documentation? pair)
1513 (eq? (car pair) 'documentation))
1514 (let* ((props (meta-properties meta))
1515 (tail (find-tail is-documentation? props)))
1516 (and tail
1517 (not (find-tail is-documentation? (cdr tail)))
1518 (string? (cdar tail))
0a1d52ac 1519 (cons (* 4 (meta-low-pc meta)) (cdar tail)))))
9128b1a1
AW
1520 (reverse (asm-meta asm))))
1521 (let* ((endianness (asm-endianness asm))
1522 (docstrings (find-docstrings))
1523 (strtab (make-string-table))
1524 (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
1525 (fold (lambda (pair pos)
1526 (match pair
1527 ((pc . string)
1528 (bytevector-u32-set! bv pos pc endianness)
1529 (bytevector-u32-set! bv (+ pos 4)
1530 (string-table-intern! strtab string)
1531 endianness)
1532 (+ pos docstr-size))))
1533 0
1534 docstrings)
1535 (let ((strtab (make-object asm '.guile.docstrs.strtab
1536 (link-string-table! strtab)
1537 '() '()
1538 #:type SHT_STRTAB #:flags 0)))
1539 (values (make-object asm '.guile.docstrs
1540 bv
1541 '() '()
1542 #:type SHT_PROGBITS #:flags 0
1543 #:link (elf-section-index
1544 (linker-object-section strtab)))
1545 strtab))))
1546
c4c098e3
AW
1547;;;
1548;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
1549;;; values. Pc and addr are both 32 bits wide. (Either could change to
1550;;; 64 bits if appropriate in the future.) Pc is the address of the
1551;;; entry to a program, relative to the start of the text section, and
1552;;; addr is the address of the associated properties alist, relative to
1553;;; the start of the ELF image.
1554;;;
1555;;; Since procedure properties are stored in the data sections, we need
1556;;; to link the procedures property section first. (Note that this
1557;;; constraint does not apply to the arities section, which may
1558;;; reference the data sections via the kw-indices literal, because
1559;;; assembling the text section already makes sure that the kw-indices
1560;;; are interned.)
1561;;;
1562
1563;; The size of a procprops entry, in bytes.
1564(define procprops-size 8)
1565
1566(define (link-procprops asm)
1567 (define (assoc-remove-one alist key value-pred)
1568 (match alist
1569 (() '())
1570 ((((? (lambda (x) (eq? x key))) . value) . alist)
1571 (if (value-pred value)
1572 alist
1573 (acons key value alist)))
1574 (((k . v) . alist)
1575 (acons k v (assoc-remove-one alist key value-pred)))))
1576 (define (props-without-name-or-docstring meta)
1577 (assoc-remove-one
1578 (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
1579 'documentation
1580 string?))
1581 (define (find-procprops)
1582 (filter-map (lambda (meta)
1583 (let ((props (props-without-name-or-docstring meta)))
1584 (and (pair? props)
463469cc 1585 (cons (* 4 (meta-low-pc meta)) props))))
c4c098e3
AW
1586 (reverse (asm-meta asm))))
1587 (let* ((endianness (asm-endianness asm))
1588 (procprops (find-procprops))
1589 (bv (make-bytevector (* (length procprops) procprops-size) 0)))
1590 (let lp ((procprops procprops) (pos 0) (relocs '()))
1591 (match procprops
1592 (()
1593 (make-object asm '.guile.procprops
1594 bv
1595 relocs '()
1596 #:type SHT_PROGBITS #:flags 0))
1597 (((pc . props) . procprops)
1598 (bytevector-u32-set! bv pos pc endianness)
1599 (lp procprops
1600 (+ pos procprops-size)
1601 (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
1602 (intern-constant asm props))
1603 relocs)))))))
1604
a862d8c1
AW
1605;;;
1606;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc
1607;;; sections provide line number and local variable liveness
1608;;; information. Their format is defined by the DWARF
1609;;; specifications.
1610;;;
1611
1612(define (asm-language asm)
1613 ;; FIXME: Plumb language through to the assembler.
1614 'scheme)
1615
0a7340ac 1616;; -> 5 values: .debug_info, .debug_abbrev, .debug_str, .debug_loc, .debug_lines
a862d8c1 1617(define (link-debug asm)
0a7340ac
AW
1618 (define (put-s8 port val)
1619 (let ((bv (make-bytevector 1)))
1620 (bytevector-s8-set! bv 0 val)
1621 (put-bytevector port bv)))
1622
a862d8c1
AW
1623 (define (put-u16 port val)
1624 (let ((bv (make-bytevector 2)))
1625 (bytevector-u16-set! bv 0 val (asm-endianness asm))
1626 (put-bytevector port bv)))
1627
1628 (define (put-u32 port val)
1629 (let ((bv (make-bytevector 4)))
1630 (bytevector-u32-set! bv 0 val (asm-endianness asm))
1631 (put-bytevector port bv)))
1632
1633 (define (put-u64 port val)
1634 (let ((bv (make-bytevector 8)))
1635 (bytevector-u64-set! bv 0 val (asm-endianness asm))
1636 (put-bytevector port bv)))
1637
1638 (define (put-uleb128 port val)
1639 (let lp ((val val))
1640 (let ((next (ash val -7)))
1641 (if (zero? next)
1642 (put-u8 port val)
1643 (begin
1644 (put-u8 port (logior #x80 (logand val #x7f)))
1645 (lp next))))))
1646
d56ab5a9
AW
1647 (define (put-sleb128 port val)
1648 (let lp ((val val))
31602aa0 1649 (if (<= 0 (+ val 64) 127)
d56ab5a9
AW
1650 (put-u8 port (logand val #x7f))
1651 (begin
1652 (put-u8 port (logior #x80 (logand val #x7f)))
1653 (lp (ash val -7))))))
1654
0a7340ac
AW
1655 (define (port-position port)
1656 (seek port 0 SEEK_CUR))
1657
a862d8c1
AW
1658 (define (meta->subprogram-die meta)
1659 `(subprogram
1660 (@ ,@(cond
1661 ((meta-name meta)
1662 => (lambda (name) `((name ,(symbol->string name)))))
1663 (else
1664 '()))
1665 (low-pc ,(meta-label meta))
1666 (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta)))))))
1667
1668 (define (make-compile-unit-die asm)
1669 `(compile-unit
1670 (@ (producer ,(string-append "Guile " (version)))
1671 (language ,(asm-language asm))
1672 (low-pc .rtl-text)
0a7340ac
AW
1673 (high-pc ,(* 4 (asm-pos asm)))
1674 (stmt-list 0))
a862d8c1
AW
1675 ,@(map meta->subprogram-die (reverse (asm-meta asm)))))
1676
1677 (let-values (((die-port get-die-bv) (open-bytevector-output-port))
1678 ((die-relocs) '())
1679 ((abbrev-port get-abbrev-bv) (open-bytevector-output-port))
1680 ;; (tag has-kids? attrs forms) -> code
1681 ((abbrevs) vlist-null)
0a7340ac
AW
1682 ((strtab) (make-string-table))
1683 ((line-port get-line-bv) (open-bytevector-output-port))
1684 ((line-relocs) '())
1685 ;; file -> code
1686 ((files) vlist-null))
a862d8c1
AW
1687
1688 (define (write-abbrev code tag has-children? attrs forms)
1689 (put-uleb128 abbrev-port code)
1690 (put-uleb128 abbrev-port (tag-name->code tag))
1691 (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no)))
1692 (for-each (lambda (attr form)
1693 (put-uleb128 abbrev-port (attribute-name->code attr))
1694 (put-uleb128 abbrev-port (form-name->code form)))
1695 attrs forms)
1696 (put-uleb128 abbrev-port 0)
1697 (put-uleb128 abbrev-port 0))
1698
1699 (define (intern-abbrev tag has-children? attrs forms)
1700 (let ((key (list tag has-children? attrs forms)))
1701 (match (vhash-assoc key abbrevs)
1702 ((_ . code) code)
0a7340ac 1703 (#f (let ((code (1+ (vlist-length abbrevs))))
a862d8c1
AW
1704 (set! abbrevs (vhash-cons key code abbrevs))
1705 (write-abbrev code tag has-children? attrs forms)
1706 code)))))
1707
0a7340ac
AW
1708 (define (intern-file file)
1709 (match (vhash-assoc file files)
1710 ((_ . code) code)
1711 (#f (let ((code (1+ (vlist-length files))))
1712 (set! files (vhash-cons file code files))
1713 code))))
1714
1715 (define (write-sources)
d56ab5a9
AW
1716 ;; Choose line base and line range values that will allow for an
1717 ;; address advance range of 16 words. The special opcode range is
1718 ;; from 10 to 255, so 246 values.
1719 (define base -4)
1720 (define range 15)
1721
0a7340ac
AW
1722 (let lp ((sources (asm-sources asm)) (out '()))
1723 (match sources
d56ab5a9 1724 (((pc . s) . sources)
0a7340ac
AW
1725 (let ((file (assq-ref s 'filename))
1726 (line (assq-ref s 'line))
1727 (col (assq-ref s 'column)))
d56ab5a9
AW
1728 (lp sources
1729 ;; Guile line and column numbers are 0-indexed, but
1730 ;; they are 1-indexed for DWARF.
1731 (cons (list pc
1732 (if file (intern-file file) 0)
1733 (if line (1+ line))
1734 (if col (1+ col)))
1735 out))))
0a7340ac
AW
1736 (()
1737 ;; Compilation unit header for .debug_line. We write in
1738 ;; DWARF 2 format because more tools understand it than DWARF
1739 ;; 4, which incompatibly adds another field to this header.
1740
1741 (put-u32 line-port 0) ; Length; will patch later.
1742 (put-u16 line-port 2) ; DWARF 2 format.
1743 (put-u32 line-port 0) ; Prologue length; will patch later.
1744 (put-u8 line-port 4) ; Minimum instruction length: 4 bytes.
1745 (put-u8 line-port 1) ; Default is-stmt: true.
1746
d56ab5a9
AW
1747 (put-s8 line-port base) ; Line base. See the DWARF standard.
1748 (put-u8 line-port range) ; Line range. See the DWARF standard.
0a7340ac
AW
1749 (put-u8 line-port 10) ; Opcode base: the first "special" opcode.
1750
1751 ;; A table of the number of uleb128 arguments taken by each
1752 ;; of the standard opcodes.
1753 (put-u8 line-port 0) ; 1: copy
1754 (put-u8 line-port 1) ; 2: advance-pc
1755 (put-u8 line-port 1) ; 3: advance-line
1756 (put-u8 line-port 1) ; 4: set-file
1757 (put-u8 line-port 1) ; 5: set-column
1758 (put-u8 line-port 0) ; 6: negate-stmt
1759 (put-u8 line-port 0) ; 7: set-basic-block
1760 (put-u8 line-port 0) ; 8: const-add-pc
1761 (put-u8 line-port 1) ; 9: fixed-advance-pc
1762
1763 ;; Include directories, as a zero-terminated sequence of
1764 ;; nul-terminated strings. Nothing, for the moment.
1765 (put-u8 line-port 0)
1766
1767 ;; File table. For each file that contributes to this
1768 ;; compilation unit, a nul-terminated file name string, and a
1769 ;; uleb128 for each of directory the file was found in, the
1770 ;; modification time, and the file's size in bytes. We pass
1771 ;; zero for the latter three fields.
32ca15d7
AW
1772 (vlist-fold-right
1773 (lambda (pair seed)
1774 (match pair
1775 ((file . code)
1776 (put-bytevector line-port (string->utf8 file))
1777 (put-u8 line-port 0)
1778 (put-uleb128 line-port 0) ; directory
1779 (put-uleb128 line-port 0) ; mtime
1780 (put-uleb128 line-port 0))) ; size
1781 seed)
1782 #f
1783 files)
0a7340ac
AW
1784 (put-u8 line-port 0) ; 0 byte terminating file list.
1785
1786 ;; Patch prologue length.
1787 (let ((offset (port-position line-port)))
1788 (seek line-port 6 SEEK_SET)
1789 (put-u32 line-port (- offset 10))
1790 (seek line-port offset SEEK_SET))
1791
d56ab5a9
AW
1792 ;; Now write the statement program.
1793 (let ()
1794 (define (extended-op opcode payload-len)
6b71a767 1795 (put-u8 line-port 0) ; extended op
d56ab5a9
AW
1796 (put-uleb128 line-port (1+ payload-len)) ; payload-len + opcode
1797 (put-uleb128 line-port opcode))
1798 (define (set-address sym)
1799 (define (add-reloc! kind)
1800 (set! line-relocs
1801 (cons (make-linker-reloc kind
1802 (port-position line-port)
1803 0
1804 sym)
1805 line-relocs)))
1806 (match (asm-word-size asm)
1807 (4
1808 (extended-op 2 4)
1809 (add-reloc! 'abs32/1)
1810 (put-u32 line-port 0))
1811 (8
1812 (extended-op 2 8)
1813 (add-reloc! 'abs64/1)
1814 (put-u64 line-port 0))))
1815 (define (end-sequence pc)
1816 (let ((pc-inc (- (asm-pos asm) pc)))
6b71a767 1817 (put-u8 line-port 2) ; advance-pc
d56ab5a9
AW
1818 (put-uleb128 line-port pc-inc))
1819 (extended-op 1 0))
1820 (define (advance-pc pc-inc line-inc)
1821 (let ((spec (+ (- line-inc base) (* pc-inc range) 10)))
1822 (cond
1823 ((or (< line-inc base) (>= line-inc (+ base range)))
1824 (advance-line line-inc)
1825 (advance-pc pc-inc 0))
1826 ((<= spec 255)
1827 (put-u8 line-port spec))
1828 ((< spec 500)
1829 (put-u8 line-port 8) ; const-advance-pc
1830 (advance-pc (- pc-inc (floor/ (- 255 10) range))
1831 line-inc))
1832 (else
1833 (put-u8 line-port 2) ; advance-pc
1834 (put-uleb128 line-port pc-inc)
1835 (advance-pc 0 line-inc)))))
1836 (define (advance-line inc)
1837 (put-u8 line-port 3)
1838 (put-sleb128 line-port inc))
1839 (define (set-file file)
1840 (put-u8 line-port 4)
1841 (put-uleb128 line-port file))
1842 (define (set-column col)
1843 (put-u8 line-port 5)
1844 (put-uleb128 line-port col))
1845
1846 (set-address '.rtl-text)
1847
1848 (let lp ((in out) (pc 0) (file 1) (line 1) (col 0))
1849 (match in
6b71a767
AW
1850 (()
1851 (when (null? out)
1852 ;; There was no source info in the first place. Set
1853 ;; file register to 0 before adding final row.
1854 (set-file 0))
1855 (end-sequence pc))
d56ab5a9
AW
1856 (((pc* file* line* col*) . in*)
1857 (cond
1858 ((and (eqv? file file*) (eqv? line line*) (eqv? col col*))
1859 (lp in* pc file line col))
1860 (else
1861 (unless (eqv? col col*)
1862 (set-column col*))
1863 (unless (eqv? file file*)
1864 (set-file file*))
1865 (advance-pc (- pc* pc) (- line* line))
1866 (lp in* pc* file* line* col*)))))))))))
0a7340ac 1867
a862d8c1
AW
1868 (define (compute-code attr val)
1869 (match attr
1870 ('name (string-table-intern! strtab val))
1871 ('low-pc val)
1872 ('high-pc val)
1873 ('producer (string-table-intern! strtab val))
0a7340ac
AW
1874 ('language (language-name->code val))
1875 ('stmt-list val)))
a862d8c1
AW
1876
1877 (define (exact-integer? val)
1878 (and (number? val) (integer? val) (exact? val)))
1879
1880 (define (choose-form attr val code)
1881 (cond
6371e368 1882 ((string? val) 'strp)
0a7340ac 1883 ((eq? attr 'stmt-list) 'sec-offset)
a862d8c1
AW
1884 ((exact-integer? code)
1885 (cond
1886 ((< code 0) 'sleb128)
1887 ((<= code #xff) 'data1)
1888 ((<= code #xffff) 'data2)
1889 ((<= code #xffffffff) 'data4)
1890 ((<= code #xffffffffffffffff) 'data8)
1891 (else 'uleb128)))
1892 ((symbol? val) 'addr)
1893 (else (error "unhandled case" attr val code))))
1894
1895 (define (add-die-relocation! kind sym)
1896 (set! die-relocs
0a7340ac 1897 (cons (make-linker-reloc kind (port-position die-port) 0 sym)
a862d8c1
AW
1898 die-relocs)))
1899
1900 (define (write-value code form)
1901 (match form
1902 ('data1 (put-u8 die-port code))
1903 ('data2 (put-u16 die-port code))
1904 ('data4 (put-u32 die-port code))
1905 ('data8 (put-u64 die-port code))
1906 ('uleb128 (put-uleb128 die-port code))
d56ab5a9 1907 ('sleb128 (put-sleb128 die-port code))
a862d8c1
AW
1908 ('addr
1909 (match (asm-word-size asm)
1910 (4
1911 (add-die-relocation! 'abs32/1 code)
1912 (put-u32 die-port 0))
1913 (8
1914 (add-die-relocation! 'abs64/1 code)
1915 (put-u64 die-port 0))))
0a7340ac 1916 ('sec-offset (put-u32 die-port code))
6371e368 1917 ('strp (put-u32 die-port code))))
a862d8c1
AW
1918
1919 (define (write-die die)
1920 (match die
1921 ((tag ('@ (attrs vals) ...) children ...)
1922 (let* ((codes (map compute-code attrs vals))
1923 (forms (map choose-form attrs vals codes))
1924 (has-children? (not (null? children)))
1925 (abbrev-code (intern-abbrev tag has-children? attrs forms)))
1926 (put-uleb128 die-port abbrev-code)
1927 (for-each write-value codes forms)
1928 (when has-children?
1929 (for-each write-die children)
1930 (put-uleb128 die-port 0))))))
1931
1932 ;; Compilation unit header.
1933 (put-u32 die-port 0) ; Length; will patch later.
1934 (put-u16 die-port 4) ; DWARF 4.
1935 (put-u32 die-port 0) ; Abbrevs offset.
1936 (put-u8 die-port (asm-word-size asm)) ; Address size.
1937
1938 (write-die (make-compile-unit-die asm))
1939
1940 ;; Terminate the abbrevs list.
1941 (put-uleb128 abbrev-port 0)
1942
0a7340ac
AW
1943 (write-sources)
1944
a862d8c1
AW
1945 (values (let ((bv (get-die-bv)))
1946 ;; Patch DWARF32 length.
1947 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
1948 (asm-endianness asm))
1949 (make-object asm '.debug_info bv die-relocs '()
1950 #:type SHT_PROGBITS #:flags 0))
1951 (make-object asm '.debug_abbrev (get-abbrev-bv) '() '()
1952 #:type SHT_PROGBITS #:flags 0)
1953 (make-object asm '.debug_str (link-string-table! strtab) '() '()
1954 #:type SHT_PROGBITS #:flags 0)
1955 (make-object asm '.debug_loc #vu8() '() '()
0a7340ac
AW
1956 #:type SHT_PROGBITS #:flags 0)
1957 (let ((bv (get-line-bv)))
1958 ;; Patch DWARF32 length.
1959 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
1960 (asm-endianness asm))
1961 (make-object asm '.debug_line bv line-relocs '()
1962 #:type SHT_PROGBITS #:flags 0)))))
a862d8c1 1963
e78991aa 1964(define (link-objects asm)
c4c098e3
AW
1965 (let*-values (;; Link procprops before constants, because it probably
1966 ;; interns more constants.
1967 ((procprops) (link-procprops asm))
1968 ((ro rw rw-init) (link-constants asm))
e78991aa
AW
1969 ;; Link text object after constants, so that the
1970 ;; constants initializer gets included.
1971 ((text) (link-text-object asm))
1972 ((dt) (link-dynamic-section asm text rw rw-init))
1973 ((symtab strtab) (link-symtab (linker-object-section text) asm))
b2006c19 1974 ((arities arities-strtab) (link-arities asm))
9128b1a1 1975 ((docstrs docstrs-strtab) (link-docstrs asm))
0a7340ac 1976 ((dinfo dabbrev dstrtab dloc dline) (link-debug asm))
e78991aa
AW
1977 ;; This needs to be linked last, because linking other
1978 ;; sections adds entries to the string table.
1979 ((shstrtab) (link-shstrtab asm)))
b2006c19
AW
1980 (filter identity
1981 (list text ro rw dt symtab strtab arities arities-strtab
a862d8c1 1982 docstrs docstrs-strtab procprops
0a7340ac 1983 dinfo dabbrev dstrtab dloc dline
a862d8c1 1984 shstrtab))))
e78991aa
AW
1985
1986
1987\f
1988
1989;;;
1990;;; High-level public interfaces.
1991;;;
1992
1993(define* (link-assembly asm #:key (page-aligned? #t))
1994 "Produce an ELF image from the code and data emitted into @var{asm}.
1995The result is a bytevector, by default linked so that read-only and
1996writable data are on separate pages. Pass @code{#:page-aligned? #f} to
1997disable this behavior."
1998 (link-elf (link-objects asm) #:page-aligned? page-aligned?))