VM opcodes only have <24-bit slot operands in the first word
[bpt/guile.git] / module / system / vm / assembler.scm
CommitLineData
691697de 1;;; Guile bytecode assembler
e78991aa 2
02c624fc 3;;; Copyright (C) 2001, 2009, 2010, 2012, 2013, 2014 Free Software Foundation, Inc.
e78991aa
AW
4;;;
5;;; This library is free software; you can redistribute it and/or
6;;; modify it under the terms of the GNU Lesser General Public
7;;; License as published by the Free Software Foundation; either
8;;; version 3 of the License, or (at your option) any later version.
9;;;
10;;; This library is distributed in the hope that it will be useful,
11;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13;;; Lesser General Public License for more details.
14;;;
15;;; You should have received a copy of the GNU Lesser General Public
16;;; License along with this library; if not, write to the Free Software
17;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19;;; Commentary:
20;;;
21;;; This module implements an assembler that creates an ELF image from
691697de 22;;; bytecode assembly and macro-assembly. The input can be given in
e78991aa
AW
23;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
24;;; procedural interface, the emit-OP procedures, but that is not
25;;; currently exported.
26;;;
691697de
AW
27;;; "Primitive instructions" correspond to VM operations. Assemblers
28;;; for primitive instructions are generated programmatically from
29;;; (instruction-list), which itself is derived from the VM sources.
30;;; There are also "macro-instructions" like "label" or "load-constant"
31;;; that expand to 0 or more primitive instructions.
e78991aa
AW
32;;;
33;;; The assembler also handles some higher-level tasks, like creating
34;;; the symbol table, other metadata sections, creating a constant table
35;;; for the whole compilation unit, and writing the dynamic section of
36;;; the ELF file along with the appropriate initialization routines.
37;;;
38;;; Most compilers will want to use the trio of make-assembler,
39;;; emit-text, and link-assembly. That will result in the creation of
40;;; an ELF image as a bytevector, which can then be loaded using
41;;; load-thunk-from-memory, or written to disk as a .go file.
42;;;
43;;; Code:
44
45(define-module (system vm assembler)
46 #:use-module (system base target)
a862d8c1 47 #:use-module (system vm dwarf)
e78991aa
AW
48 #:use-module (system vm elf)
49 #:use-module (system vm linker)
691697de 50 #:use-module (language bytecode)
e78991aa 51 #:use-module (rnrs bytevectors)
a862d8c1 52 #:use-module (ice-9 binary-ports)
e78991aa
AW
53 #:use-module (ice-9 vlist)
54 #:use-module (ice-9 match)
55 #:use-module (srfi srfi-1)
56 #:use-module (srfi srfi-4)
57 #:use-module (srfi srfi-9)
58 #:use-module (srfi srfi-11)
59 #:export (make-assembler
60 emit-text
4dfae1bf 61 link-assembly))
e78991aa
AW
62
63
64\f
65
691697de 66;;; Bytecode consists of 32-bit units, often subdivided in some way.
e78991aa
AW
67;;; These helpers create one 32-bit unit from multiple components.
68
cb8054c7
AW
69(define-inlinable (pack-u8-u24 x y)
70 (unless (<= 0 x 255)
71 (error "out of range" x))
e78991aa
AW
72 (logior x (ash y 8)))
73
cb8054c7
AW
74(define-inlinable (pack-u8-s24 x y)
75 (unless (<= 0 x 255)
76 (error "out of range" x))
e78991aa
AW
77 (logior x (ash (cond
78 ((< 0 (- y) #x800000)
79 (+ y #x1000000))
80 ((<= 0 y #xffffff)
81 y)
82 (else (error "out of range" y)))
83 8)))
84
cb8054c7
AW
85(define-inlinable (pack-u1-u7-u24 x y z)
86 (unless (<= 0 x 1)
87 (error "out of range" x))
88 (unless (<= 0 y 127)
89 (error "out of range" y))
e78991aa
AW
90 (logior x (ash y 1) (ash z 8)))
91
cb8054c7
AW
92(define-inlinable (pack-u8-u12-u12 x y z)
93 (unless (<= 0 x 255)
94 (error "out of range" x))
95 (unless (<= 0 y 4095)
96 (error "out of range" y))
e78991aa
AW
97 (logior x (ash y 8) (ash z 20)))
98
cb8054c7
AW
99(define-inlinable (pack-u8-u8-u16 x y z)
100 (unless (<= 0 x 255)
101 (error "out of range" x))
102 (unless (<= 0 y 255)
103 (error "out of range" y))
e78991aa
AW
104 (logior x (ash y 8) (ash z 16)))
105
cb8054c7
AW
106(define-inlinable (pack-u8-u8-u8-u8 x y z w)
107 (unless (<= 0 x 255)
108 (error "out of range" x))
109 (unless (<= 0 y 255)
110 (error "out of range" y))
111 (unless (<= 0 z 255)
112 (error "out of range" z))
e78991aa
AW
113 (logior x (ash y 8) (ash z 16) (ash w 24)))
114
07c05279
AW
115(define-syntax pack-flags
116 (syntax-rules ()
117 ;; Add clauses as needed.
118 ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
119 (if f2 (ash 2 0) 0)))))
120
e78991aa
AW
121;;; Helpers to read and write 32-bit units in a buffer.
122
123(define-syntax-rule (u32-ref buf n)
124 (bytevector-u32-native-ref buf (* n 4)))
125
126(define-syntax-rule (u32-set! buf n val)
127 (bytevector-u32-native-set! buf (* n 4) val))
128
129(define-syntax-rule (s32-ref buf n)
130 (bytevector-s32-native-ref buf (* n 4)))
131
132(define-syntax-rule (s32-set! buf n val)
133 (bytevector-s32-native-set! buf (* n 4) val))
134
135
136\f
137
138;;; A <meta> entry collects metadata for one procedure. Procedures are
691697de 139;;; written as contiguous ranges of bytecode.
e78991aa 140;;;
2a4daafd
AW
141(define-syntax-rule (assert-match arg pattern kind)
142 (let ((x arg))
143 (unless (match x (pattern #t) (_ #f))
144 (error (string-append "expected " kind) x))))
145
e78991aa 146(define-record-type <meta>
3185c907 147 (%make-meta label properties low-pc high-pc arities)
e78991aa 148 meta?
2a4daafd
AW
149 (label meta-label)
150 (properties meta-properties set-meta-properties!)
e78991aa 151 (low-pc meta-low-pc)
3185c907
AW
152 (high-pc meta-high-pc set-meta-high-pc!)
153 (arities meta-arities set-meta-arities!))
e78991aa 154
2a4daafd 155(define (make-meta label properties low-pc)
9a1dfb7d 156 (assert-match label (or (? exact-integer?) (? symbol?)) "symbol")
2a4daafd 157 (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
3185c907 158 (%make-meta label properties low-pc #f '()))
2a4daafd
AW
159
160(define (meta-name meta)
161 (assq-ref (meta-properties meta) 'name))
162
3185c907
AW
163;; Metadata for one <lambda-case>.
164(define-record-type <arity>
165 (make-arity req opt rest kw-indices allow-other-keys?
78351d10 166 low-pc high-pc definitions)
3185c907
AW
167 arity?
168 (req arity-req)
169 (opt arity-opt)
170 (rest arity-rest)
171 (kw-indices arity-kw-indices)
172 (allow-other-keys? arity-allow-other-keys?)
173 (low-pc arity-low-pc)
78351d10
AW
174 (high-pc arity-high-pc set-arity-high-pc!)
175 (definitions arity-definitions set-arity-definitions!))
3185c907 176
e78991aa
AW
177(define-syntax *block-size* (identifier-syntax 32))
178
179;;; An assembler collects all of the words emitted during assembly, and
180;;; also maintains ancillary information such as the constant table, a
181;;; relocation list, and so on.
182;;;
691697de 183;;; Bytecode consists of 32-bit units. We emit bytecode using native
e78991aa
AW
184;;; endianness. If we're targeting a foreign endianness, we byte-swap
185;;; the bytevector as a whole instead of conditionalizing each access.
186;;;
187(define-record-type <asm>
188 (make-asm cur idx start prev written
189 labels relocs
190 word-size endianness
191 constants inits
192 shstrtab next-section-number
02c624fc
AW
193 meta sources
194 dead-slot-maps)
e78991aa
AW
195 asm?
196
691697de 197 ;; We write bytecode into what is logically a growable vector,
e78991aa
AW
198 ;; implemented as a list of blocks. asm-cur is the current block, and
199 ;; asm-idx is the current index into that block, in 32-bit units.
200 ;;
201 (cur asm-cur set-asm-cur!)
202 (idx asm-idx set-asm-idx!)
203
204 ;; asm-start is an absolute position, indicating the offset of the
205 ;; beginning of an instruction (in u32 units). It is updated after
206 ;; writing all the words for one primitive instruction. It models the
207 ;; position of the instruction pointer during execution, given that
691697de
AW
208 ;; the VM updates the IP only at the end of executing the instruction,
209 ;; and is thus useful for computing offsets between two points in a
210 ;; program.
e78991aa
AW
211 ;;
212 (start asm-start set-asm-start!)
213
214 ;; The list of previously written blocks.
215 ;;
216 (prev asm-prev set-asm-prev!)
217
218 ;; The number of u32 words written in asm-prev, which is the same as
219 ;; the offset of the current block.
220 ;;
221 (written asm-written set-asm-written!)
222
223 ;; An alist of symbol -> position pairs, indicating the labels defined
224 ;; in this compilation unit.
225 ;;
226 (labels asm-labels set-asm-labels!)
227
228 ;; A list of relocations needed by the program text. We use an
229 ;; internal representation for relocations, and handle textualn
230 ;; relative relocations in the assembler. Other kinds of relocations
231 ;; are later reified as linker relocations and resolved by the linker.
232 ;;
233 (relocs asm-relocs set-asm-relocs!)
234
235 ;; Target information.
236 ;;
237 (word-size asm-word-size)
238 (endianness asm-endianness)
239
240 ;; The constant table, as a vhash of object -> label. All constants
241 ;; get de-duplicated and written into separate sections -- either the
242 ;; .rodata section, for read-only data, or .data, for constants that
243 ;; need initialization at load-time (like symbols). Constants can
244 ;; depend on other constants (e.g. a symbol depending on a stringbuf),
245 ;; so order in this table is important.
246 ;;
247 (constants asm-constants set-asm-constants!)
248
691697de
AW
249 ;; A list of instructions needed to initialize the constants. Will
250 ;; run in a thunk with 2 local variables.
e78991aa
AW
251 ;;
252 (inits asm-inits set-asm-inits!)
253
254 ;; The shstrtab, for section names.
255 ;;
256 (shstrtab asm-shstrtab set-asm-shstrtab!)
257
258 ;; The section number for the next section to be written.
259 ;;
260 (next-section-number asm-next-section-number set-asm-next-section-number!)
261
262 ;; A list of <meta>, corresponding to procedure metadata.
263 ;;
e675e9bd
AW
264 (meta asm-meta set-asm-meta!)
265
266 ;; A list of (pos . source) pairs, indicating source information. POS
267 ;; is relative to the beginning of the text section, and SOURCE is in
268 ;; the same format that source-properties returns.
269 ;;
02c624fc
AW
270 (sources asm-sources set-asm-sources!)
271
272 ;; A list of (pos . dead-slot-map) pairs, indicating dead slot maps.
273 ;; POS is relative to the beginning of the text section.
274 ;; DEAD-SLOT-MAP is a bitfield of slots that are dead at call sites,
275 ;; as an integer.
276 ;;
277 (dead-slot-maps asm-dead-slot-maps set-asm-dead-slot-maps!))
e78991aa
AW
278
279(define-inlinable (fresh-block)
280 (make-u32vector *block-size*))
281
282(define* (make-assembler #:key (word-size (target-word-size))
283 (endianness (target-endianness)))
284 "Create an assembler for a given target @var{word-size} and
285@var{endianness}, falling back to appropriate values for the configured
286target."
287 (make-asm (fresh-block) 0 0 '() 0
3659ef54 288 (make-hash-table) '()
e78991aa
AW
289 word-size endianness
290 vlist-null '()
291 (make-string-table) 1
02c624fc 292 '() '() '()))
e78991aa
AW
293
294(define (intern-section-name! asm string)
295 "Add a string to the section name table (shstrtab)."
296 (string-table-intern! (asm-shstrtab asm) string))
297
298(define-inlinable (asm-pos asm)
299 "The offset of the next word to be written into the code buffer, in
30032-bit units."
301 (+ (asm-idx asm) (asm-written asm)))
302
303(define (allocate-new-block asm)
304 "Close off the current block, and arrange for the next word to be
305written to a fresh block."
306 (let ((new (fresh-block)))
307 (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
308 (set-asm-written! asm (asm-pos asm))
309 (set-asm-cur! asm new)
310 (set-asm-idx! asm 0)))
311
312(define-inlinable (emit asm u32)
313 "Emit one 32-bit word into the instruction stream. Assumes that there
314is space for the word, and ensures that there is space for the next
315word."
316 (u32-set! (asm-cur asm) (asm-idx asm) u32)
317 (set-asm-idx! asm (1+ (asm-idx asm)))
318 (if (= (asm-idx asm) *block-size*)
319 (allocate-new-block asm)))
320
321(define-inlinable (make-reloc type label base word)
322 "Make an internal relocation of type @var{type} referencing symbol
323@var{label}, @var{word} words after position @var{start}. @var{type}
324may be x8-s24, indicating a 24-bit relative label reference that can be
325fixed up by the assembler, or s32, indicating a 32-bit relative
326reference that needs to be fixed up by the linker."
327 (list type label base word))
328
329(define-inlinable (reset-asm-start! asm)
330 "Reset the asm-start after writing the words for one instruction."
331 (set-asm-start! asm (asm-pos asm)))
332
e78991aa
AW
333(define (record-label-reference asm label)
334 "Record an x8-s24 local label reference. This value will get patched
335up later by the assembler."
336 (let* ((start (asm-start asm))
337 (pos (asm-pos asm))
338 (reloc (make-reloc 'x8-s24 label start (- pos start))))
339 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
340
341(define* (record-far-label-reference asm label #:optional (offset 0))
342 "Record an s32 far label reference. This value will get patched up
343later by the linker."
344 (let* ((start (- (asm-start asm) offset))
345 (pos (asm-pos asm))
346 (reloc (make-reloc 's32 label start (- pos start))))
347 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
348
349
350\f
351
352;;;
353;;; Primitive assemblers are defined by expanding `assembler' for each
1b780c13 354;;; opcode in `(instruction-list)'.
e78991aa
AW
355;;;
356
357(eval-when (expand compile load eval)
358 (define (id-append ctx a b)
359 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
360
361(define-syntax assembler
362 (lambda (x)
363 (define-syntax op-case
364 (lambda (x)
365 (syntax-case x ()
366 ((_ asm name ((type arg ...) code ...) clause ...)
367 #`(if (eq? name 'type)
368 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
369 #'((arg ...)
370 code ...))
371 (op-case asm name clause ...)))
372 ((_ asm name)
373 #'(error "unmatched name" name)))))
374
375 (define (pack-first-word asm opcode type)
376 (with-syntax ((opcode opcode))
377 (op-case
378 asm type
379 ((U8_X24)
380 (emit asm opcode))
381 ((U8_U24 arg)
382 (emit asm (pack-u8-u24 opcode arg)))
383 ((U8_L24 label)
384 (record-label-reference asm label)
385 (emit asm opcode))
e78991aa
AW
386 ((U8_U8_I16 a imm)
387 (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
388 ((U8_U12_U12 a b)
389 (emit asm (pack-u8-u12-u12 opcode a b)))
390 ((U8_U8_U8_U8 a b c)
391 (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
392
393 (define (pack-tail-word asm type)
394 (op-case
395 asm type
396 ((U8_U24 a b)
397 (emit asm (pack-u8-u24 a b)))
398 ((U8_L24 a label)
399 (record-label-reference asm label)
400 (emit asm a))
e78991aa
AW
401 ((U32 a)
402 (emit asm a))
403 ((I32 imm)
404 (let ((val (object-address imm)))
405 (unless (zero? (ash val -32))
406 (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
407 (emit asm val)))
408 ((A32 imm)
409 (unless (= (asm-word-size asm) 8)
410 (error "make-long-immediate unavailable for this target"))
411 (emit asm (ash (object-address imm) -32))
412 (emit asm (logand (object-address imm) (1- (ash 1 32)))))
413 ((B32))
414 ((N32 label)
415 (record-far-label-reference asm label)
416 (emit asm 0))
417 ((S32 label)
418 (record-far-label-reference asm label)
419 (emit asm 0))
420 ((L32 label)
421 (record-far-label-reference asm label)
422 (emit asm 0))
423 ((LO32 label offset)
424 (record-far-label-reference asm label
425 (* offset (/ (asm-word-size asm) 4)))
426 (emit asm 0))
427 ((X8_U24 a)
428 (emit asm (pack-u8-u24 0 a)))
e78991aa
AW
429 ((X8_L24 label)
430 (record-label-reference asm label)
431 (emit asm 0))
432 ((B1_X7_L24 a label)
433 (record-label-reference asm label)
434 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
435 ((B1_U7_L24 a b label)
436 (record-label-reference asm label)
af95414f
AW
437 (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
438 ((B1_X31 a)
439 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
440 ((B1_X7_U24 a b)
441 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
e78991aa
AW
442
443 (syntax-case x ()
444 ((_ name opcode word0 word* ...)
445 (with-syntax ((((formal0 ...)
446 code0 ...)
447 (pack-first-word #'asm
448 (syntax->datum #'opcode)
449 (syntax->datum #'word0)))
450 ((((formal* ...)
451 code* ...) ...)
452 (map (lambda (word) (pack-tail-word #'asm word))
453 (syntax->datum #'(word* ...)))))
454 #'(lambda (asm formal0 ... formal* ... ...)
455 (unless (asm? asm) (error "not an asm"))
456 code0 ...
457 code* ... ...
458 (reset-asm-start! asm)))))))
459
460(define assemblers (make-hash-table))
461
462(define-syntax define-assembler
463 (lambda (x)
464 (syntax-case x ()
2a294c7c 465 ((_ name opcode kind arg ...)
e78991aa 466 (with-syntax ((emit (id-append #'name #'emit- #'name)))
c96933fd
AW
467 #'(begin
468 (define emit
469 (let ((emit (assembler name opcode arg ...)))
470 (hashq-set! assemblers 'name emit)
471 emit))
472 (export emit)))))))
e78991aa
AW
473
474(define-syntax visit-opcodes
475 (lambda (x)
476 (syntax-case x ()
477 ((visit-opcodes macro arg ...)
478 (with-syntax (((inst ...)
479 (map (lambda (x) (datum->syntax #'macro x))
1b780c13 480 (instruction-list))))
e78991aa
AW
481 #'(begin
482 (macro arg ... . inst)
483 ...))))))
484
485(visit-opcodes define-assembler)
486
487(define (emit-text asm instructions)
488 "Assemble @var{instructions} using the assembler @var{asm}.
691697de
AW
489@var{instructions} is a sequence of instructions, expressed as a list of
490lists. This procedure can be called many times before calling
e78991aa
AW
491@code{link-assembly}."
492 (for-each (lambda (inst)
493 (apply (or (hashq-ref assemblers (car inst))
494 (error 'bad-instruction inst))
495 asm
496 (cdr inst)))
497 instructions))
498
499\f
500
501;;;
502;;; The constant table records a topologically sorted set of literal
503;;; constants used by a program. For example, a pair uses its car and
504;;; cdr, a string uses its stringbuf, etc.
505;;;
506;;; Some things we want to add to the constant table are not actually
507;;; Scheme objects: for example, stringbufs, cache cells for toplevel
508;;; references, or cache cells for non-closure procedures. For these we
509;;; define special record types and add instances of those record types
510;;; to the table.
511;;;
512
513(define-inlinable (immediate? x)
514 "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
515 (not (zero? (logand (object-address x) 6))))
516
517(define-record-type <stringbuf>
518 (make-stringbuf string)
519 stringbuf?
520 (string stringbuf-string))
521
522(define-record-type <static-procedure>
523 (make-static-procedure code)
524 static-procedure?
525 (code static-procedure-code))
526
7bfbc7b1 527(define-record-type <uniform-vector-backing-store>
d65514a2 528 (make-uniform-vector-backing-store bytes element-size)
7bfbc7b1 529 uniform-vector-backing-store?
d65514a2
AW
530 (bytes uniform-vector-backing-store-bytes)
531 (element-size uniform-vector-backing-store-element-size))
7bfbc7b1 532
e78991aa
AW
533(define-record-type <cache-cell>
534 (make-cache-cell scope key)
535 cache-cell?
536 (scope cache-cell-scope)
537 (key cache-cell-key))
538
7bfbc7b1
AW
539(define (simple-vector? obj)
540 (and (vector? obj)
541 (equal? (array-shape obj) (list (list 0 (1- (vector-length obj)))))))
542
543(define (simple-uniform-vector? obj)
544 (and (array? obj)
545 (symbol? (array-type obj))
546 (equal? (array-shape obj) (list (list 0 (1- (array-length obj)))))))
547
e78991aa
AW
548(define (statically-allocatable? x)
549 "Return @code{#t} if a non-immediate constant can be allocated
550statically, and @code{#f} if it would need some kind of runtime
551allocation."
7bfbc7b1 552 (or (pair? x) (string? x) (stringbuf? x) (static-procedure? x) (array? x)))
e78991aa
AW
553
554(define (intern-constant asm obj)
555 "Add an object to the constant table, and return a label that can be
556used to reference it. If the object is already present in the constant
557table, its existing label is used directly."
558 (define (recur obj)
559 (intern-constant asm obj))
560 (define (field dst n obj)
561 (let ((src (recur obj)))
562 (if src
c7cb2bc2
AW
563 (if (statically-allocatable? obj)
564 `((static-patch! ,dst ,n ,src))
565 `((static-ref 1 ,src)
566 (static-set! 1 ,dst ,n)))
e78991aa
AW
567 '())))
568 (define (intern obj label)
569 (cond
570 ((pair? obj)
571 (append (field label 0 (car obj))
572 (field label 1 (cdr obj))))
7bfbc7b1 573 ((simple-vector? obj)
e78991aa
AW
574 (let lp ((i 0) (inits '()))
575 (if (< i (vector-length obj))
576 (lp (1+ i)
577 (append-reverse (field label (1+ i) (vector-ref obj i))
578 inits))
579 (reverse inits))))
580 ((stringbuf? obj) '())
581 ((static-procedure? obj)
2ab2a10d 582 `((static-patch! ,label 1 ,(static-procedure-code obj))))
e78991aa
AW
583 ((cache-cell? obj) '())
584 ((symbol? obj)
7396d216
AW
585 `((make-non-immediate 1 ,(recur (symbol->string obj)))
586 (string->symbol 1 1)
587 (static-set! 1 ,label 0)))
e78991aa 588 ((string? obj)
2ab2a10d 589 `((static-patch! ,label 1 ,(recur (make-stringbuf obj)))))
e78991aa 590 ((keyword? obj)
7396d216
AW
591 `((static-ref 1 ,(recur (keyword->symbol obj)))
592 (symbol->keyword 1 1)
593 (static-set! 1 ,label 0)))
e78991aa 594 ((number? obj)
7396d216
AW
595 `((make-non-immediate 1 ,(recur (number->string obj)))
596 (string->number 1 1)
597 (static-set! 1 ,label 0)))
7bfbc7b1
AW
598 ((uniform-vector-backing-store? obj) '())
599 ((simple-uniform-vector? obj)
8051cf23
AW
600 (let ((width (case (array-type obj)
601 ((vu8 u8 s8) 1)
602 ((u16 s16) 2)
603 ;; Bitvectors are addressed in 32-bit units.
604 ;; Although a complex number is 8 or 16 bytes wide,
605 ;; it should be byteswapped in 4 or 8 byte units.
606 ((u32 s32 f32 c32 b) 4)
607 ((u64 s64 f64 c64) 8)
608 (else
609 (error "unhandled array type" obj)))))
610 `((static-patch! ,label 2
611 ,(recur (make-uniform-vector-backing-store
612 (uniform-array->bytevector obj)
613 width))))))
e78991aa
AW
614 (else
615 (error "don't know how to intern" obj))))
616 (cond
617 ((immediate? obj) #f)
618 ((vhash-assoc obj (asm-constants asm)) => cdr)
619 (else
620 ;; Note that calling intern may mutate asm-constants and
621 ;; asm-constant-inits.
622 (let* ((label (gensym "constant"))
623 (inits (intern obj label)))
624 (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
625 (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
626 label))))
627
628(define (intern-non-immediate asm obj)
629 "Intern a non-immediate into the constant table, and return its
630label."
631 (when (immediate? obj)
632 (error "expected a non-immediate" obj))
633 (intern-constant asm obj))
634
635(define (intern-cache-cell asm scope key)
636 "Intern a cache cell into the constant table, and return its label.
637If there is already a cache cell with the given scope and key, it is
638returned instead."
639 (intern-constant asm (make-cache-cell scope key)))
640
641;; Return the label of the cell that holds the module for a scope.
642(define (intern-module-cache-cell asm scope)
643 "Intern a cache cell for a module, and return its label."
644 (intern-cache-cell asm scope #t))
645
646
647\f
648
649;;;
650;;; Macro assemblers bridge the gap between primitive instructions and
651;;; some higher-level operations.
652;;;
653
654(define-syntax define-macro-assembler
655 (lambda (x)
656 (syntax-case x ()
657 ((_ (name arg ...) body body* ...)
658 (with-syntax ((emit (id-append #'name #'emit- #'name)))
c96933fd
AW
659 #'(begin
660 (define emit
661 (let ((emit (lambda (arg ...) body body* ...)))
662 (hashq-set! assemblers 'name emit)
663 emit))
664 (export emit)))))))
e78991aa
AW
665
666(define-macro-assembler (load-constant asm dst obj)
667 (cond
668 ((immediate? obj)
669 (let ((bits (object-address obj)))
670 (cond
671 ((and (< dst 256) (zero? (ash bits -16)))
672 (emit-make-short-immediate asm dst obj))
673 ((zero? (ash bits -32))
674 (emit-make-long-immediate asm dst obj))
675 (else
676 (emit-make-long-long-immediate asm dst obj)))))
677 ((statically-allocatable? obj)
678 (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
679 (else
680 (emit-static-ref asm dst (intern-non-immediate asm obj)))))
681
682(define-macro-assembler (load-static-procedure asm dst label)
683 (let ((loc (intern-constant asm (make-static-procedure label))))
684 (emit-make-non-immediate asm dst loc)))
685
be8b62ca
AW
686(define-syntax-rule (define-tc7-macro-assembler name tc7)
687 (define-macro-assembler (name asm slot invert? label)
688 (emit-br-if-tc7 asm slot invert? tc7 label)))
689
690;; Keep in sync with tags.h. Part of Guile's ABI. Currently unused
becce37b
AW
691;; macro assemblers are commented out. See also
692;; *branching-primcall-arities* in (language cps primitives), the set of
693;; macro-instructions in assembly.scm, and
694;; disassembler.scm:code-annotation.
695;;
696;; FIXME: Define all tc7 values in Scheme in one place, derived from
697;; tags.h.
be8b62ca
AW
698(define-tc7-macro-assembler br-if-symbol 5)
699(define-tc7-macro-assembler br-if-variable 7)
700(define-tc7-macro-assembler br-if-vector 13)
701;(define-tc7-macro-assembler br-if-weak-vector 13)
702(define-tc7-macro-assembler br-if-string 21)
703;(define-tc7-macro-assembler br-if-heap-number 23)
704;(define-tc7-macro-assembler br-if-stringbuf 39)
becce37b 705(define-tc7-macro-assembler br-if-bytevector 77)
be8b62ca
AW
706;(define-tc7-macro-assembler br-if-pointer 31)
707;(define-tc7-macro-assembler br-if-hashtable 29)
708;(define-tc7-macro-assembler br-if-fluid 37)
709;(define-tc7-macro-assembler br-if-dynamic-state 45)
710;(define-tc7-macro-assembler br-if-frame 47)
be8b62ca
AW
711;(define-tc7-macro-assembler br-if-vm 55)
712;(define-tc7-macro-assembler br-if-vm-cont 71)
713;(define-tc7-macro-assembler br-if-rtl-program 69)
be8b62ca
AW
714;(define-tc7-macro-assembler br-if-weak-set 85)
715;(define-tc7-macro-assembler br-if-weak-table 87)
716;(define-tc7-macro-assembler br-if-array 93)
d65514a2 717(define-tc7-macro-assembler br-if-bitvector 95)
be8b62ca
AW
718;(define-tc7-macro-assembler br-if-port 125)
719;(define-tc7-macro-assembler br-if-smob 127)
720
2a4daafd 721(define-macro-assembler (begin-program asm label properties)
e78991aa 722 (emit-label asm label)
2a4daafd 723 (let ((meta (make-meta label properties (asm-start asm))))
e78991aa
AW
724 (set-asm-meta! asm (cons meta (asm-meta asm)))))
725
726(define-macro-assembler (end-program asm)
2a4daafd 727 (let ((meta (car (asm-meta asm))))
3185c907
AW
728 (set-meta-high-pc! meta (asm-start asm))
729 (set-meta-arities! meta (reverse (meta-arities meta)))))
730
731(define-macro-assembler (begin-standard-arity asm req nlocals alternate)
732 (emit-begin-opt-arity asm req '() #f nlocals alternate))
733
734(define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
735 (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
736
737(define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
738 allow-other-keys? nlocals alternate)
739 (assert-match req ((? symbol?) ...) "list of symbols")
740 (assert-match opt ((? symbol?) ...) "list of symbols")
741 (assert-match rest (or #f (? symbol?)) "#f or symbol")
8695854a
AW
742 (assert-match kw-indices (((? keyword?) . (? integer?)) ...)
743 "alist of keyword -> integer")
3185c907
AW
744 (assert-match allow-other-keys? (? boolean?) "boolean")
745 (assert-match nlocals (? integer?) "integer")
9a1dfb7d 746 (assert-match alternate (or #f (? exact-integer?) (? symbol?)) "#f or symbol")
3185c907
AW
747 (let* ((meta (car (asm-meta asm)))
748 (arity (make-arity req opt rest kw-indices allow-other-keys?
78351d10 749 (asm-start asm) #f '()))
7396d216
AW
750 ;; The procedure itself is in slot 0, in the standard calling
751 ;; convention. For procedure prologues, nreq includes the
752 ;; procedure, so here we add 1.
753 (nreq (1+ (length req)))
3185c907
AW
754 (nopt (length opt))
755 (rest? (->bool rest)))
756 (set-meta-arities! meta (cons arity (meta-arities meta)))
757 (cond
758 ((or allow-other-keys? (pair? kw-indices))
759 (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
760 nlocals alternate))
761 ((or rest? (pair? opt))
762 (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
763 (else
764 (emit-standard-prelude asm nreq nlocals alternate)))))
765
766(define-macro-assembler (end-arity asm)
767 (let ((arity (car (meta-arities (car (asm-meta asm))))))
78351d10 768 (set-arity-definitions! arity (reverse (arity-definitions arity)))
3185c907 769 (set-arity-high-pc! arity (asm-start asm))))
e78991aa 770
07c05279
AW
771(define-macro-assembler (standard-prelude asm nreq nlocals alternate)
772 (cond
773 (alternate
774 (emit-br-if-nargs-ne asm nreq alternate)
af95414f 775 (emit-alloc-frame asm nlocals))
07c05279
AW
776 ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
777 (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
778 (else
779 (emit-assert-nargs-ee asm nreq)
af95414f 780 (emit-alloc-frame asm nlocals))))
07c05279
AW
781
782(define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
783 (if alternate
784 (emit-br-if-nargs-lt asm nreq alternate)
785 (emit-assert-nargs-ge asm nreq))
786 (cond
787 (rest?
788 (emit-bind-rest asm (+ nreq nopt)))
789 (alternate
790 (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
791 (else
792 (emit-assert-nargs-le asm (+ nreq nopt))))
af95414f 793 (emit-alloc-frame asm nlocals))
07c05279
AW
794
795(define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
796 allow-other-keys? nlocals alternate)
797 (if alternate
b0ed216b
AW
798 (begin
799 (emit-br-if-nargs-lt asm nreq alternate)
800 (unless rest?
801 (emit-br-if-npos-gt asm nreq (+ nreq nopt) alternate)))
07c05279
AW
802 (emit-assert-nargs-ge asm nreq))
803 (let ((ntotal (fold (lambda (kw ntotal)
804 (match kw
805 (((? keyword?) . idx)
806 (max (1+ idx) ntotal))))
807 (+ nreq nopt) kw-indices)))
808 ;; FIXME: port 581f410f
809 (emit-bind-kwargs asm nreq
810 (pack-flags allow-other-keys? rest?)
811 (+ nreq nopt)
812 ntotal
8695854a 813 (intern-constant asm kw-indices))
af95414f 814 (emit-alloc-frame asm nlocals)))
07c05279 815
e78991aa 816(define-macro-assembler (label asm sym)
3659ef54 817 (hashq-set! (asm-labels asm) sym (asm-start asm)))
e78991aa 818
e675e9bd
AW
819(define-macro-assembler (source asm source)
820 (set-asm-sources! asm (acons (asm-start asm) source (asm-sources asm))))
821
78351d10
AW
822(define-macro-assembler (definition asm name slot)
823 (let* ((arity (car (meta-arities (car (asm-meta asm)))))
67ddb7e2
AW
824 (def (vector name
825 slot
826 (* (- (asm-start asm) (arity-low-pc arity)) 4))))
78351d10
AW
827 (set-arity-definitions! arity (cons def (arity-definitions arity)))))
828
af95414f 829(define-macro-assembler (cache-current-module! asm module scope)
e78991aa 830 (let ((mod-label (intern-module-cache-cell asm scope)))
af95414f 831 (emit-static-set! asm module mod-label 0)))
e78991aa 832
af95414f 833(define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
e78991aa
AW
834 (let ((sym-label (intern-non-immediate asm sym))
835 (mod-label (intern-module-cache-cell asm scope))
836 (cell-label (intern-cache-cell asm scope sym)))
af95414f 837 (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
e78991aa 838
af95414f 839(define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
e78991aa
AW
840 (let* ((sym-label (intern-non-immediate asm sym))
841 (key (cons public? module-name))
842 (mod-name-label (intern-constant asm key))
843 (cell-label (intern-cache-cell asm key sym)))
af95414f 844 (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
e78991aa 845
02c624fc
AW
846(define-macro-assembler (dead-slot-map asm proc-slot dead-slot-map)
847 (unless (zero? dead-slot-map)
848 (set-asm-dead-slot-maps! asm
849 (cons
850 (cons* (asm-start asm) proc-slot dead-slot-map)
851 (asm-dead-slot-maps asm)))))
e78991aa
AW
852
853\f
854
855;;;
856;;; Helper for linking objects.
857;;;
858
859(define (make-object asm name bv relocs labels . kwargs)
860 "Make a linker object. This helper handles interning the name in the
861shstrtab, assigning the size, allocating a fresh index, and defining a
862corresponding linker symbol for the start of the section."
863 (let ((name-idx (intern-section-name! asm (symbol->string name)))
864 (index (asm-next-section-number asm)))
865 (set-asm-next-section-number! asm (1+ index))
866 (make-linker-object (apply make-elf-section
867 #:index index
868 #:name name-idx
869 #:size (bytevector-length bv)
870 kwargs)
871 bv relocs
872 (cons (make-linker-symbol name 0) labels))))
873
874
875\f
876
877;;;
878;;; Linking the constant table. This code is somewhat intertwingled
879;;; with the intern-constant code above, as that procedure also
880;;; residualizes instructions to initialize constants at load time.
881;;;
882
883(define (write-immediate asm buf pos x)
884 (let ((val (object-address x))
885 (endianness (asm-endianness asm)))
886 (case (asm-word-size asm)
887 ((4) (bytevector-u32-set! buf pos val endianness))
888 ((8) (bytevector-u64-set! buf pos val endianness))
889 (else (error "bad word size" asm)))))
890
891(define (emit-init-constants asm)
892 "If there is writable data that needs initialization at runtime, emit
893a procedure to do that and return its label. Otherwise return
894@code{#f}."
895 (let ((inits (asm-inits asm)))
896 (and (not (null? inits))
897 (let ((label (gensym "init-constants")))
898 (emit-text asm
2a4daafd 899 `((begin-program ,label ())
7396d216 900 (assert-nargs-ee/locals 1 1)
e78991aa 901 ,@(reverse inits)
7396d216
AW
902 (load-constant 1 ,*unspecified*)
903 (return 1)
e78991aa
AW
904 (end-program)))
905 label))))
906
907(define (link-data asm data name)
908 "Link the static data for a program into the @var{name} section (which
909should be .data or .rodata), and return the resulting linker object.
910@var{data} should be a vhash mapping objects to labels."
911 (define (align address alignment)
912 (+ address
913 (modulo (- alignment (modulo address alignment)) alignment)))
914
915 (define tc7-vector 13)
8fa72889
AW
916 (define stringbuf-shared-flag #x100)
917 (define stringbuf-wide-flag #x400)
918 (define tc7-stringbuf 39)
919 (define tc7-narrow-stringbuf
920 (+ tc7-stringbuf stringbuf-shared-flag))
921 (define tc7-wide-stringbuf
922 (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
e78991aa 923 (define tc7-ro-string (+ 21 #x200))
e0755cd1 924 (define tc7-program 69)
7bfbc7b1 925 (define tc7-bytevector 77)
d65514a2 926 (define tc7-bitvector 95)
e78991aa
AW
927
928 (let ((word-size (asm-word-size asm))
929 (endianness (asm-endianness asm)))
930 (define (byte-length x)
931 (cond
932 ((stringbuf? x)
933 (let ((x (stringbuf-string x)))
934 (+ (* 2 word-size)
935 (case (string-bytes-per-char x)
936 ((1) (1+ (string-length x)))
937 ((4) (* (1+ (string-length x)) 4))
938 (else (error "bad string bytes per char" x))))))
939 ((static-procedure? x)
940 (* 2 word-size))
941 ((string? x)
942 (* 4 word-size))
943 ((pair? x)
944 (* 2 word-size))
7bfbc7b1 945 ((simple-vector? x)
e78991aa 946 (* (1+ (vector-length x)) word-size))
7bfbc7b1
AW
947 ((simple-uniform-vector? x)
948 (* 4 word-size))
949 ((uniform-vector-backing-store? x)
950 (bytevector-length (uniform-vector-backing-store-bytes x)))
e78991aa
AW
951 (else
952 word-size)))
953
954 (define (write-constant-reference buf pos x)
955 ;; The asm-inits will fix up any reference to a non-immediate.
956 (write-immediate asm buf pos (if (immediate? x) x #f)))
957
958 (define (write buf pos obj)
959 (cond
960 ((stringbuf? obj)
961 (let* ((x (stringbuf-string obj))
962 (len (string-length x))
963 (tag (if (= (string-bytes-per-char x) 1)
964 tc7-narrow-stringbuf
965 tc7-wide-stringbuf)))
966 (case word-size
967 ((4)
968 (bytevector-u32-set! buf pos tag endianness)
969 (bytevector-u32-set! buf (+ pos 4) len endianness))
970 ((8)
971 (bytevector-u64-set! buf pos tag endianness)
972 (bytevector-u64-set! buf (+ pos 8) len endianness))
973 (else
974 (error "bad word size" asm)))
975 (let ((pos (+ pos (* word-size 2))))
976 (case (string-bytes-per-char x)
977 ((1)
978 (let lp ((i 0))
979 (if (< i len)
980 (let ((u8 (char->integer (string-ref x i))))
981 (bytevector-u8-set! buf (+ pos i) u8)
982 (lp (1+ i)))
983 (bytevector-u8-set! buf (+ pos i) 0))))
984 ((4)
985 (let lp ((i 0))
986 (if (< i len)
987 (let ((u32 (char->integer (string-ref x i))))
988 (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
989 (lp (1+ i)))
990 (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
991 (else (error "bad string bytes per char" x))))))
992
993 ((static-procedure? obj)
994 (case word-size
995 ((4)
e0755cd1 996 (bytevector-u32-set! buf pos tc7-program endianness)
e78991aa
AW
997 (bytevector-u32-set! buf (+ pos 4) 0 endianness))
998 ((8)
e0755cd1 999 (bytevector-u64-set! buf pos tc7-program endianness)
e78991aa
AW
1000 (bytevector-u64-set! buf (+ pos 8) 0 endianness))
1001 (else (error "bad word size"))))
1002
1003 ((cache-cell? obj)
1004 (write-immediate asm buf pos #f))
1005
1006 ((string? obj)
1007 (let ((tag (logior tc7-ro-string (ash (string-length obj) 8))))
1008 (case word-size
1009 ((4)
1010 (bytevector-u32-set! buf pos tc7-ro-string endianness)
1011 (write-immediate asm buf (+ pos 4) #f) ; stringbuf
1012 (bytevector-u32-set! buf (+ pos 8) 0 endianness)
1013 (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
1014 ((8)
1015 (bytevector-u64-set! buf pos tc7-ro-string endianness)
1016 (write-immediate asm buf (+ pos 8) #f) ; stringbuf
1017 (bytevector-u64-set! buf (+ pos 16) 0 endianness)
1018 (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
1019 (else (error "bad word size")))))
1020
1021 ((pair? obj)
1022 (write-constant-reference buf pos (car obj))
1023 (write-constant-reference buf (+ pos word-size) (cdr obj)))
1024
7bfbc7b1 1025 ((simple-vector? obj)
e78991aa
AW
1026 (let* ((len (vector-length obj))
1027 (tag (logior tc7-vector (ash len 8))))
1028 (case word-size
1029 ((4) (bytevector-u32-set! buf pos tag endianness))
1030 ((8) (bytevector-u64-set! buf pos tag endianness))
1031 (else (error "bad word size")))
1032 (let lp ((i 0))
1033 (when (< i (vector-length obj))
1034 (let ((pos (+ pos word-size (* i word-size)))
1035 (elt (vector-ref obj i)))
1036 (write-constant-reference buf pos elt)
1037 (lp (1+ i)))))))
1038
1039 ((symbol? obj)
1040 (write-immediate asm buf pos #f))
1041
1042 ((keyword? obj)
1043 (write-immediate asm buf pos #f))
1044
1045 ((number? obj)
1046 (write-immediate asm buf pos #f))
1047
7bfbc7b1 1048 ((simple-uniform-vector? obj)
d65514a2
AW
1049 (let ((tag (if (bitvector? obj)
1050 tc7-bitvector
8051cf23 1051 (let ((type-code (array-type-code obj)))
d65514a2 1052 (logior tc7-bytevector (ash type-code 7))))))
7bfbc7b1
AW
1053 (case word-size
1054 ((4)
1055 (bytevector-u32-set! buf pos tag endianness)
d65514a2
AW
1056 (bytevector-u32-set! buf (+ pos 4)
1057 (if (bitvector? obj)
1058 (bitvector-length obj)
1059 (bytevector-length obj))
7bfbc7b1
AW
1060 endianness) ; length
1061 (bytevector-u32-set! buf (+ pos 8) 0 endianness) ; pointer
1062 (write-immediate asm buf (+ pos 12) #f)) ; owner
1063 ((8)
1064 (bytevector-u64-set! buf pos tag endianness)
d65514a2
AW
1065 (bytevector-u64-set! buf (+ pos 8)
1066 (if (bitvector? obj)
1067 (bitvector-length obj)
1068 (bytevector-length obj))
7bfbc7b1
AW
1069 endianness) ; length
1070 (bytevector-u64-set! buf (+ pos 16) 0 endianness) ; pointer
1071 (write-immediate asm buf (+ pos 24) #f)) ; owner
1072 (else (error "bad word size")))))
1073
1074 ((uniform-vector-backing-store? obj)
1075 (let ((bv (uniform-vector-backing-store-bytes obj)))
1076 (bytevector-copy! bv 0 buf pos (bytevector-length bv))
d65514a2 1077 (unless (or (= 1 (uniform-vector-backing-store-element-size obj))
7bfbc7b1
AW
1078 (eq? endianness (native-endianness)))
1079 ;; Need to swap units of element-size bytes
1080 (error "FIXME: Implement byte order swap"))))
1081
e78991aa
AW
1082 (else
1083 (error "unrecognized object" obj))))
1084
1085 (cond
1086 ((vlist-null? data) #f)
1087 (else
1088 (let* ((byte-len (vhash-fold (lambda (k v len)
1089 (+ (byte-length k) (align len 8)))
1090 0 data))
1091 (buf (make-bytevector byte-len 0)))
3659ef54 1092 (let lp ((i 0) (pos 0) (symbols '()))
e78991aa
AW
1093 (if (< i (vlist-length data))
1094 (let* ((pair (vlist-ref data i))
1095 (obj (car pair))
1096 (obj-label (cdr pair)))
1097 (write buf pos obj)
1098 (lp (1+ i)
1099 (align (+ (byte-length obj) pos) 8)
3659ef54
AW
1100 (cons (make-linker-symbol obj-label pos) symbols)))
1101 (make-object asm name buf '() symbols
8fa72889
AW
1102 #:flags (match name
1103 ('.data (logior SHF_ALLOC SHF_WRITE))
1104 ('.rodata SHF_ALLOC))))))))))
e78991aa
AW
1105
1106(define (link-constants asm)
1107 "Link sections to hold constants needed by the program text emitted
1108using @var{asm}.
1109
1110Returns three values: an object for the .rodata section, an object for
1111the .data section, and a label for an initialization procedure. Any of
1112these may be @code{#f}."
1113 (define (shareable? x)
1114 (cond
1115 ((stringbuf? x) #t)
1116 ((pair? x)
1117 (and (immediate? (car x)) (immediate? (cdr x))))
7bfbc7b1 1118 ((simple-vector? x)
e78991aa
AW
1119 (let lp ((i 0))
1120 (or (= i (vector-length x))
1121 (and (immediate? (vector-ref x i))
1122 (lp (1+ i))))))
7bfbc7b1 1123 ((uniform-vector-backing-store? x) #t)
e78991aa
AW
1124 (else #f)))
1125 (let* ((constants (asm-constants asm))
1126 (len (vlist-length constants)))
1127 (let lp ((i 0)
1128 (ro vlist-null)
1129 (rw vlist-null))
1130 (if (= i len)
1131 (values (link-data asm ro '.rodata)
1132 (link-data asm rw '.data)
1133 (emit-init-constants asm))
1134 (let ((pair (vlist-ref constants i)))
1135 (if (shareable? (car pair))
1136 (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
1137 (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
1138
1139\f
1140
1141;;;
1142;;; Linking program text.
1143;;;
1144
1145(define (process-relocs buf relocs labels)
1146 "Patch up internal x8-s24 relocations, and any s32 relocations that
1147reference symbols in the text section. Return a list of linker
1148relocations for references to symbols defined outside the text section."
1149 (fold
1150 (lambda (reloc tail)
1151 (match reloc
1152 ((type label base word)
3659ef54 1153 (let ((abs (hashq-ref labels label))
e78991aa
AW
1154 (dst (+ base word)))
1155 (case type
1156 ((s32)
1157 (if abs
1158 (let ((rel (- abs base)))
1159 (s32-set! buf dst rel)
1160 tail)
1161 (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
1162 tail)))
1163 ((x8-s24)
1164 (unless abs
1165 (error "unbound near relocation" reloc))
1166 (let ((rel (- abs base))
1167 (u32 (u32-ref buf dst)))
1168 (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
1169 tail))
1170 (else (error "bad relocation kind" reloc)))))))
1171 '()
1172 relocs))
1173
1174(define (process-labels labels)
3659ef54 1175 "Define linker symbols for the label-offset map in @var{labels}.
e78991aa 1176The offsets are expected to be expressed in words."
3659ef54
AW
1177 (hash-map->list (lambda (label loc)
1178 (make-linker-symbol label (* loc 4)))
1179 labels))
e78991aa
AW
1180
1181(define (swap-bytes! buf)
1182 "Patch up the text buffer @var{buf}, swapping the endianness of each
118332-bit unit."
1184 (unless (zero? (modulo (bytevector-length buf) 4))
1185 (error "unexpected length"))
1186 (let ((byte-len (bytevector-length buf)))
1187 (let lp ((pos 0))
1188 (unless (= pos byte-len)
1189 (bytevector-u32-set!
1190 buf pos
1191 (bytevector-u32-ref buf pos (endianness big))
1192 (endianness little))
1193 (lp (+ pos 4))))))
1194
1195(define (link-text-object asm)
1196 "Link the .rtl-text section, swapping the endianness of the bytes if
1197needed."
1198 (let ((buf (make-u32vector (asm-pos asm))))
1199 (let lp ((pos 0) (prev (reverse (asm-prev asm))))
1200 (if (null? prev)
1201 (let ((byte-size (* (asm-idx asm) 4)))
1202 (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
1203 (unless (eq? (asm-endianness asm) (native-endianness))
1204 (swap-bytes! buf))
1205 (make-object asm '.rtl-text
1206 buf
1207 (process-relocs buf (asm-relocs asm)
1208 (asm-labels asm))
1209 (process-labels (asm-labels asm))))
1210 (let ((len (* *block-size* 4)))
1211 (bytevector-copy! (car prev) 0 buf pos len)
1212 (lp (+ pos len) (cdr prev)))))))
1213
1214
1215\f
1216
02c624fc
AW
1217;;;
1218;;; Create the frame maps. These maps are used by GC to identify dead
1219;;; slots in pending call frames, to avoid marking them. We only do
1220;;; this when frame makes a non-tail call, as that is the common case.
1221;;; Only the topmost frame will see a GC at any other point, but we mark
1222;;; top frames conservatively as serializing live slot maps at every
1223;;; instruction would take up too much space in the object file.
1224;;;
1225
1226;; The .guile.frame-maps section starts with two packed u32 values: one
1227;; indicating the offset of the first byte of the .rtl-text section, and
1228;; another indicating the relative offset in bytes of the slots data.
1229(define frame-maps-prefix-len 8)
1230
1231;; Each header is 8 bytes: 4 for the offset from .rtl_text, and 4 for
1232;; the offset of the slot map from the beginning of the
1233;; .guile.frame-maps section. The length of a frame map depends on the
1234;; frame size at the call site, and is not encoded into this section as
1235;; it is available at run-time.
1236(define frame-map-header-len 8)
1237
1238(define (link-frame-maps asm)
1239 (define (map-byte-length proc-slot)
1240 (ceiling-quotient (- proc-slot 2) 8))
1241 (define (make-frame-maps maps count map-len)
1242 (let* ((endianness (asm-endianness asm))
1243 (header-pos frame-maps-prefix-len)
1244 (map-pos (+ header-pos (* count frame-map-header-len)))
1245 (bv (make-bytevector (+ map-pos map-len) 0)))
1246 (bytevector-u32-set! bv 4 map-pos endianness)
1247 (let lp ((maps maps) (header-pos header-pos) (map-pos map-pos))
1248 (match maps
1249 (()
1250 (make-object asm '.guile.frame-maps bv
1251 (list (make-linker-reloc 'abs32/1 0 0 '.rtl-text))
1252 '() #:type SHT_PROGBITS #:flags SHF_ALLOC))
1253 (((pos proc-slot . map) . maps)
1254 (bytevector-u32-set! bv header-pos (* pos 4) endianness)
1255 (bytevector-u32-set! bv (+ header-pos 4) map-pos endianness)
1256 (let write-bytes ((map-pos map-pos)
1257 (map map)
1258 (byte-length (map-byte-length proc-slot)))
1259 (if (zero? byte-length)
1260 (lp maps (+ header-pos frame-map-header-len) map-pos)
1261 (begin
1262 (bytevector-u8-set! bv map-pos (logand map #xff))
1263 (write-bytes (1+ map-pos) (ash map -8)
1264 (1- byte-length))))))))))
1265 (match (asm-dead-slot-maps asm)
1266 (() #f)
1267 (in
1268 (let lp ((in in) (out '()) (count 0) (map-len 0))
1269 (match in
1270 (() (make-frame-maps out count map-len))
1271 (((and head (pos proc-slot . map)) . in)
1272 (lp in (cons head out)
1273 (1+ count)
1274 (+ (map-byte-length proc-slot) map-len))))))))
1275
1276\f
1277
e78991aa
AW
1278;;;
1279;;; Linking other sections of the ELF file, like the dynamic segment,
1280;;; the symbol table, etc.
1281;;;
1282
4c906ad5
AW
1283;; FIXME: Define these somewhere central, shared with C.
1284(define *bytecode-major-version* #x0202)
1a82c201 1285(define *bytecode-minor-version* 5)
4c906ad5 1286
02c624fc 1287(define (link-dynamic-section asm text rw rw-init frame-maps)
691697de
AW
1288 "Link the dynamic section for an ELF image with bytecode @var{text},
1289given the writable data section @var{rw} needing fixup from the
1290procedure with label @var{rw-init}. @var{rw-init} may be false. If
1291@var{rw} is true, it will be added to the GC roots at runtime."
e78991aa
AW
1292 (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
1293 (let* ((endianness (asm-endianness asm))
02c624fc
AW
1294 (words 6)
1295 (words (if rw (+ words 4) words))
1296 (words (if rw-init (+ words 2) words))
1297 (words (if frame-maps (+ words 2) words))
1298 (bv (make-bytevector (* word-size words) 0))
e78991aa
AW
1299 (set-uword!
1300 (lambda (i uword)
1301 (%set-uword! bv (* i word-size) uword endianness)))
1302 (relocs '())
1303 (set-label!
1304 (lambda (i label)
1305 (set! relocs (cons (make-linker-reloc 'reloc-type
1306 (* i word-size) 0 label)
1307 relocs))
1308 (%set-uword! bv (* i word-size) 0 endianness))))
8bf83893 1309 (set-uword! 0 DT_GUILE_VM_VERSION)
4c906ad5
AW
1310 (set-uword! 1 (logior (ash *bytecode-major-version* 16)
1311 *bytecode-minor-version*))
e78991aa
AW
1312 (set-uword! 2 DT_GUILE_ENTRY)
1313 (set-label! 3 '.rtl-text)
02c624fc 1314 (when rw
e78991aa
AW
1315 ;; Add roots to GC.
1316 (set-uword! 4 DT_GUILE_GC_ROOT)
1317 (set-label! 5 '.data)
1318 (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
1319 (set-uword! 7 (bytevector-length (linker-object-bv rw)))
02c624fc 1320 (when rw-init
e78991aa 1321 (set-uword! 8 DT_INIT) ; constants
02c624fc
AW
1322 (set-label! 9 rw-init)))
1323 (when frame-maps
1324 (set-uword! (- words 4) DT_GUILE_FRAME_MAPS)
1325 (set-label! (- words 3) '.guile.frame-maps))
1326 (set-uword! (- words 2) DT_NULL)
1327 (set-uword! (- words 1) 0)
e78991aa
AW
1328 (make-object asm '.dynamic bv relocs '()
1329 #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
1330 (case (asm-word-size asm)
1331 ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
1332 ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
1333 (else (error "bad word size" asm))))
1334
1335(define (link-shstrtab asm)
1336 "Link the string table for the section headers."
1337 (intern-section-name! asm ".shstrtab")
1338 (make-object asm '.shstrtab
1339 (link-string-table! (asm-shstrtab asm))
1340 '() '()
1341 #:type SHT_STRTAB #:flags 0))
1342
1343(define (link-symtab text-section asm)
1344 (let* ((endianness (asm-endianness asm))
1345 (word-size (asm-word-size asm))
1346 (size (elf-symbol-len word-size))
1347 (meta (reverse (asm-meta asm)))
1348 (n (length meta))
1349 (strtab (make-string-table))
1350 (bv (make-bytevector (* n size) 0)))
1351 (define (intern-string! name)
2a4daafd 1352 (string-table-intern! strtab (if name (symbol->string name) "")))
e78991aa
AW
1353 (for-each
1354 (lambda (meta n)
1355 (let ((name (intern-string! (meta-name meta))))
1356 (write-elf-symbol bv (* n size) endianness word-size
1357 (make-elf-symbol
1358 #:name name
1359 ;; Symbol value and size are measured in
1360 ;; bytes, not u32s.
1361 #:value (* 4 (meta-low-pc meta))
1362 #:size (* 4 (- (meta-high-pc meta)
1363 (meta-low-pc meta)))
1364 #:type STT_FUNC
1365 #:visibility STV_HIDDEN
1366 #:shndx (elf-section-index text-section)))))
1367 meta (iota n))
1368 (let ((strtab (make-object asm '.strtab
1369 (link-string-table! strtab)
1370 '() '()
1371 #:type SHT_STRTAB #:flags 0)))
1372 (values (make-object asm '.symtab
1373 bv
1374 '() '()
1375 #:type SHT_SYMTAB #:flags 0 #:entsize size
1376 #:link (elf-section-index
1377 (linker-object-section strtab)))
1378 strtab))))
1379
b2006c19
AW
1380;;; The .guile.arities section describes the arities that a function can
1381;;; have. It is in two parts: a sorted array of headers describing
1382;;; basic arities, and an array of links out to a string table (and in
1383;;; the case of keyword arguments, to the data section) for argument
1384;;; names. The whole thing is prefixed by a uint32 indicating the
1385;;; offset of the end of the headers array.
1386;;;
1387;;; The arity headers array is a packed array of structures of the form:
1388;;;
1389;;; struct arity_header {
1390;;; uint32_t low_pc;
1391;;; uint32_t high_pc;
1392;;; uint32_t offset;
1393;;; uint32_t flags;
1394;;; uint32_t nreq;
1395;;; uint32_t nopt;
c3651bd5 1396;;; uint32_t nlocals;
b2006c19
AW
1397;;; }
1398;;;
1399;;; All of the offsets and addresses are 32 bits. We can expand in the
1400;;; future to use 64-bit offsets if appropriate, but there are other
691697de
AW
1401;;; aspects of bytecode that constrain us to a total image that fits in
1402;;; 32 bits, so for the moment we'll simplify the problem space.
b2006c19
AW
1403;;;
1404;;; The following flags values are defined:
1405;;;
1406;;; #x1: has-rest?
1407;;; #x2: allow-other-keys?
1408;;; #x4: has-keyword-args?
1409;;; #x8: is-case-lambda?
d8595af5 1410;;; #x10: is-in-case-lambda?
b2006c19
AW
1411;;;
1412;;; Functions with a single arity specify their number of required and
1413;;; optional arguments in nreq and nopt, and do not have the
1414;;; is-case-lambda? flag set. Their "offset" member links to an array
1415;;; of pointers into the associated .guile.arities.strtab string table,
1416;;; identifying the argument names. This offset is relative to the
cade4c8f
AW
1417;;; start of the .guile.arities section.
1418;;;
1419;;; If the arity has keyword arguments -- if has-keyword-args? is set in
1420;;; the flags -- the first uint32 pointed to by offset encodes a link to
c3651bd5
AW
1421;;; the "keyword indices" literal, in the data section. Then follow the
1422;;; names for all locals, in order, as uleb128 values. The required
1423;;; arguments will be the first locals, followed by the optionals,
1424;;; followed by the rest argument if if has-rest? is set. The names
1425;;; point into the associated string table section.
b2006c19
AW
1426;;;
1427;;; Functions with no arities have no arities information present in the
1428;;; .guile.arities section.
1429;;;
1430;;; Functions with multiple arities are preceded by a header with
1431;;; is-case-lambda? set. All other fields are 0, except low-pc and
1432;;; high-pc which should be the bounds of the whole function. Headers
d8595af5
AW
1433;;; for the individual arities follow, with the is-in-case-lambda? flag
1434;;; set. In this way the whole headers array is sorted in increasing
1435;;; low-pc order, and case-lambda clauses are contained within the
1436;;; [low-pc, high-pc] of the case-lambda header.
b2006c19
AW
1437
1438;; Length of the prefix to the arities section, in bytes.
1439(define arities-prefix-len 4)
1440
1441;; Length of an arity header, in bytes.
c3651bd5
AW
1442(define arity-header-len (* 7 4))
1443
1444;; Some helpers.
1445(define (put-uleb128 port val)
1446 (let lp ((val val))
1447 (let ((next (ash val -7)))
1448 (if (zero? next)
1449 (put-u8 port val)
1450 (begin
1451 (put-u8 port (logior #x80 (logand val #x7f)))
1452 (lp next))))))
b2006c19 1453
c3651bd5
AW
1454(define (put-sleb128 port val)
1455 (let lp ((val val))
1456 (if (<= 0 (+ val 64) 127)
1457 (put-u8 port (logand val #x7f))
1458 (begin
1459 (put-u8 port (logior #x80 (logand val #x7f)))
1460 (lp (ash val -7))))))
1461
1462(define (port-position port)
1463 (seek port 0 SEEK_CUR))
b2006c19
AW
1464
1465(define-syntax-rule (pack-arity-flags has-rest? allow-other-keys?
d8595af5
AW
1466 has-keyword-args? is-case-lambda?
1467 is-in-case-lambda?)
b2006c19
AW
1468 (logior (if has-rest? (ash 1 0) 0)
1469 (if allow-other-keys? (ash 1 1) 0)
1470 (if has-keyword-args? (ash 1 2) 0)
d8595af5
AW
1471 (if is-case-lambda? (ash 1 3) 0)
1472 (if is-in-case-lambda? (ash 1 4) 0)))
b2006c19 1473
c3651bd5
AW
1474(define (write-arities asm metas headers names-port strtab)
1475 (define (write-header pos low-pc high-pc offset flags nreq nopt nlocals)
4cbe4d72
AW
1476 (unless (<= (+ nreq nopt) nlocals)
1477 (error "forgot to emit definition instructions?"))
c3651bd5
AW
1478 (bytevector-u32-set! headers pos (* low-pc 4) (asm-endianness asm))
1479 (bytevector-u32-set! headers (+ pos 4) (* high-pc 4) (asm-endianness asm))
1480 (bytevector-u32-set! headers (+ pos 8) offset (asm-endianness asm))
1481 (bytevector-u32-set! headers (+ pos 12) flags (asm-endianness asm))
1482 (bytevector-u32-set! headers (+ pos 16) nreq (asm-endianness asm))
1483 (bytevector-u32-set! headers (+ pos 20) nopt (asm-endianness asm))
1484 (bytevector-u32-set! headers (+ pos 24) nlocals (asm-endianness asm)))
1485 (define (write-kw-indices kw-indices relocs)
1486 ;; FIXME: Assert that kw-indices is already interned.
1487 (if (pair? kw-indices)
1488 (let ((pos (+ (bytevector-length headers)
1489 (port-position names-port)))
1490 (label (intern-constant asm kw-indices)))
1491 (put-bytevector names-port #vu8(0 0 0 0))
1492 (cons (make-linker-reloc 'abs32/1 pos 0 label) relocs))
1493 relocs))
1494 (define (write-arity pos arity in-case-lambda? relocs)
1495 (write-header pos (arity-low-pc arity)
1496 (arity-high-pc arity)
1497 ;; FIXME: Seems silly to add on bytevector-length of
1498 ;; headers, given the arities-prefix.
1499 (+ (bytevector-length headers) (port-position names-port))
1500 (pack-arity-flags (arity-rest arity)
1501 (arity-allow-other-keys? arity)
1502 (pair? (arity-kw-indices arity))
1503 #f
1504 in-case-lambda?)
1505 (length (arity-req arity))
1506 (length (arity-opt arity))
1507 (length (arity-definitions arity)))
1508 (let ((relocs (write-kw-indices (arity-kw-indices arity) relocs)))
67ddb7e2 1509 ;; Write local names.
c3651bd5
AW
1510 (let lp ((definitions (arity-definitions arity)))
1511 (match definitions
1512 (() relocs)
1513 ((#(name slot def) . definitions)
1514 (let ((sym (if (symbol? name)
1515 (string-table-intern! strtab (symbol->string name))
1516 0)))
1517 (put-uleb128 names-port sym)
67ddb7e2
AW
1518 (lp definitions)))))
1519 ;; Now write their definitions.
1520 (let lp ((definitions (arity-definitions arity)))
1521 (match definitions
1522 (() relocs)
1523 ((#(name slot def) . definitions)
1524 (put-uleb128 names-port def)
1525 (put-uleb128 names-port slot)
1526 (lp definitions))))))
c3651bd5 1527 (let lp ((metas metas) (pos arities-prefix-len) (relocs '()))
b2006c19
AW
1528 (match metas
1529 (()
c3651bd5
AW
1530 (unless (= pos (bytevector-length headers))
1531 (error "expected to fully fill the bytevector"
1532 pos (bytevector-length headers)))
1533 relocs)
b2006c19
AW
1534 ((meta . metas)
1535 (match (meta-arities meta)
c3651bd5 1536 (() (lp metas pos relocs))
b2006c19 1537 ((arity)
b2006c19
AW
1538 (lp metas
1539 (+ pos arity-header-len)
c3651bd5 1540 (write-arity pos arity #f relocs)))
b2006c19
AW
1541 (arities
1542 ;; Write a case-lambda header, then individual arities.
1543 ;; The case-lambda header's offset link is 0.
c3651bd5
AW
1544 (write-header pos (meta-low-pc meta) (meta-high-pc meta) 0
1545 (pack-arity-flags #f #f #f #t #f) 0 0 0)
b2006c19 1546 (let lp* ((arities arities) (pos (+ pos arity-header-len))
c3651bd5 1547 (relocs relocs))
b2006c19 1548 (match arities
c3651bd5 1549 (() (lp metas pos relocs))
b2006c19 1550 ((arity . arities)
b2006c19
AW
1551 (lp* arities
1552 (+ pos arity-header-len)
c3651bd5 1553 (write-arity pos arity #t relocs)))))))))))
b2006c19
AW
1554
1555(define (link-arities asm)
c3651bd5
AW
1556 (define (meta-arities-header-size meta)
1557 (define (lambda-size arity)
1558 arity-header-len)
1559 (define (case-lambda-size arities)
1560 (fold +
1561 arity-header-len ;; case-lambda header
1562 (map lambda-size arities))) ;; the cases
1563 (match (meta-arities meta)
1564 (() 0)
1565 ((arity) (lambda-size arity))
1566 (arities (case-lambda-size arities))))
1567
1568 (define (bytevector-append a b)
1569 (let ((out (make-bytevector (+ (bytevector-length a)
1570 (bytevector-length b)))))
1571 (bytevector-copy! a 0 out 0 (bytevector-length a))
1572 (bytevector-copy! b 0 out (bytevector-length a) (bytevector-length b))
1573 out))
1574
b2006c19
AW
1575 (let* ((endianness (asm-endianness asm))
1576 (metas (reverse (asm-meta asm)))
c3651bd5
AW
1577 (header-size (fold (lambda (meta size)
1578 (+ size (meta-arities-header-size meta)))
1579 arities-prefix-len
1580 metas))
b2006c19 1581 (strtab (make-string-table))
c3651bd5
AW
1582 (headers (make-bytevector header-size 0)))
1583 (bytevector-u32-set! headers 0 (bytevector-length headers) endianness)
1584 (let-values (((names-port get-name-bv) (open-bytevector-output-port)))
1585 (let* ((relocs (write-arities asm metas headers names-port strtab))
1586 (strtab (make-object asm '.guile.arities.strtab
1587 (link-string-table! strtab)
1588 '() '()
1589 #:type SHT_STRTAB #:flags 0)))
b2006c19 1590 (values (make-object asm '.guile.arities
c3651bd5
AW
1591 (bytevector-append headers (get-name-bv))
1592 relocs '()
b2006c19
AW
1593 #:type SHT_PROGBITS #:flags 0
1594 #:link (elf-section-index
1595 (linker-object-section strtab)))
1596 strtab)))))
1597
9128b1a1
AW
1598;;;
1599;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
1600;;; values. Pc and str are both 32 bits wide. (Either could change to
1601;;; 64 bits if appropriate in the future.) Pc is the address of the
0a1d52ac
AW
1602;;; entry to a program, relative to the start of the text section, in
1603;;; bytes, and str is an index into the associated .guile.docstrs.strtab
1604;;; string table section.
9128b1a1
AW
1605;;;
1606
1607;; The size of a docstrs entry, in bytes.
1608(define docstr-size 8)
1609
1610(define (link-docstrs asm)
1611 (define (find-docstrings)
1612 (filter-map (lambda (meta)
1613 (define (is-documentation? pair)
1614 (eq? (car pair) 'documentation))
1615 (let* ((props (meta-properties meta))
1616 (tail (find-tail is-documentation? props)))
1617 (and tail
1618 (not (find-tail is-documentation? (cdr tail)))
1619 (string? (cdar tail))
0a1d52ac 1620 (cons (* 4 (meta-low-pc meta)) (cdar tail)))))
9128b1a1
AW
1621 (reverse (asm-meta asm))))
1622 (let* ((endianness (asm-endianness asm))
1623 (docstrings (find-docstrings))
1624 (strtab (make-string-table))
1625 (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
1626 (fold (lambda (pair pos)
1627 (match pair
1628 ((pc . string)
1629 (bytevector-u32-set! bv pos pc endianness)
1630 (bytevector-u32-set! bv (+ pos 4)
1631 (string-table-intern! strtab string)
1632 endianness)
1633 (+ pos docstr-size))))
1634 0
1635 docstrings)
1636 (let ((strtab (make-object asm '.guile.docstrs.strtab
1637 (link-string-table! strtab)
1638 '() '()
1639 #:type SHT_STRTAB #:flags 0)))
1640 (values (make-object asm '.guile.docstrs
1641 bv
1642 '() '()
1643 #:type SHT_PROGBITS #:flags 0
1644 #:link (elf-section-index
1645 (linker-object-section strtab)))
1646 strtab))))
1647
c4c098e3
AW
1648;;;
1649;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
1650;;; values. Pc and addr are both 32 bits wide. (Either could change to
1651;;; 64 bits if appropriate in the future.) Pc is the address of the
1652;;; entry to a program, relative to the start of the text section, and
1653;;; addr is the address of the associated properties alist, relative to
1654;;; the start of the ELF image.
1655;;;
1656;;; Since procedure properties are stored in the data sections, we need
1657;;; to link the procedures property section first. (Note that this
1658;;; constraint does not apply to the arities section, which may
1659;;; reference the data sections via the kw-indices literal, because
1660;;; assembling the text section already makes sure that the kw-indices
1661;;; are interned.)
1662;;;
1663
1664;; The size of a procprops entry, in bytes.
1665(define procprops-size 8)
1666
1667(define (link-procprops asm)
1668 (define (assoc-remove-one alist key value-pred)
1669 (match alist
1670 (() '())
1671 ((((? (lambda (x) (eq? x key))) . value) . alist)
1672 (if (value-pred value)
1673 alist
1674 (acons key value alist)))
1675 (((k . v) . alist)
1676 (acons k v (assoc-remove-one alist key value-pred)))))
1677 (define (props-without-name-or-docstring meta)
1678 (assoc-remove-one
1679 (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
1680 'documentation
1681 string?))
1682 (define (find-procprops)
1683 (filter-map (lambda (meta)
1684 (let ((props (props-without-name-or-docstring meta)))
1685 (and (pair? props)
463469cc 1686 (cons (* 4 (meta-low-pc meta)) props))))
c4c098e3
AW
1687 (reverse (asm-meta asm))))
1688 (let* ((endianness (asm-endianness asm))
1689 (procprops (find-procprops))
1690 (bv (make-bytevector (* (length procprops) procprops-size) 0)))
1691 (let lp ((procprops procprops) (pos 0) (relocs '()))
1692 (match procprops
1693 (()
1694 (make-object asm '.guile.procprops
1695 bv
1696 relocs '()
1697 #:type SHT_PROGBITS #:flags 0))
1698 (((pc . props) . procprops)
1699 (bytevector-u32-set! bv pos pc endianness)
1700 (lp procprops
1701 (+ pos procprops-size)
1702 (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
1703 (intern-constant asm props))
1704 relocs)))))))
1705
a862d8c1
AW
1706;;;
1707;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc
1708;;; sections provide line number and local variable liveness
1709;;; information. Their format is defined by the DWARF
1710;;; specifications.
1711;;;
1712
1713(define (asm-language asm)
1714 ;; FIXME: Plumb language through to the assembler.
1715 'scheme)
1716
0a7340ac 1717;; -> 5 values: .debug_info, .debug_abbrev, .debug_str, .debug_loc, .debug_lines
a862d8c1 1718(define (link-debug asm)
0a7340ac
AW
1719 (define (put-s8 port val)
1720 (let ((bv (make-bytevector 1)))
1721 (bytevector-s8-set! bv 0 val)
1722 (put-bytevector port bv)))
1723
a862d8c1
AW
1724 (define (put-u16 port val)
1725 (let ((bv (make-bytevector 2)))
1726 (bytevector-u16-set! bv 0 val (asm-endianness asm))
1727 (put-bytevector port bv)))
1728
1729 (define (put-u32 port val)
1730 (let ((bv (make-bytevector 4)))
1731 (bytevector-u32-set! bv 0 val (asm-endianness asm))
1732 (put-bytevector port bv)))
1733
1734 (define (put-u64 port val)
1735 (let ((bv (make-bytevector 8)))
1736 (bytevector-u64-set! bv 0 val (asm-endianness asm))
1737 (put-bytevector port bv)))
1738
a862d8c1
AW
1739 (define (meta->subprogram-die meta)
1740 `(subprogram
1741 (@ ,@(cond
1742 ((meta-name meta)
1743 => (lambda (name) `((name ,(symbol->string name)))))
1744 (else
1745 '()))
1746 (low-pc ,(meta-label meta))
1747 (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta)))))))
1748
1749 (define (make-compile-unit-die asm)
1750 `(compile-unit
1751 (@ (producer ,(string-append "Guile " (version)))
1752 (language ,(asm-language asm))
1753 (low-pc .rtl-text)
0a7340ac
AW
1754 (high-pc ,(* 4 (asm-pos asm)))
1755 (stmt-list 0))
a862d8c1
AW
1756 ,@(map meta->subprogram-die (reverse (asm-meta asm)))))
1757
1758 (let-values (((die-port get-die-bv) (open-bytevector-output-port))
1759 ((die-relocs) '())
1760 ((abbrev-port get-abbrev-bv) (open-bytevector-output-port))
1761 ;; (tag has-kids? attrs forms) -> code
1762 ((abbrevs) vlist-null)
0a7340ac
AW
1763 ((strtab) (make-string-table))
1764 ((line-port get-line-bv) (open-bytevector-output-port))
1765 ((line-relocs) '())
1766 ;; file -> code
1767 ((files) vlist-null))
a862d8c1
AW
1768
1769 (define (write-abbrev code tag has-children? attrs forms)
1770 (put-uleb128 abbrev-port code)
1771 (put-uleb128 abbrev-port (tag-name->code tag))
1772 (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no)))
1773 (for-each (lambda (attr form)
1774 (put-uleb128 abbrev-port (attribute-name->code attr))
1775 (put-uleb128 abbrev-port (form-name->code form)))
1776 attrs forms)
1777 (put-uleb128 abbrev-port 0)
1778 (put-uleb128 abbrev-port 0))
1779
1780 (define (intern-abbrev tag has-children? attrs forms)
1781 (let ((key (list tag has-children? attrs forms)))
1782 (match (vhash-assoc key abbrevs)
1783 ((_ . code) code)
0a7340ac 1784 (#f (let ((code (1+ (vlist-length abbrevs))))
a862d8c1
AW
1785 (set! abbrevs (vhash-cons key code abbrevs))
1786 (write-abbrev code tag has-children? attrs forms)
1787 code)))))
1788
0a7340ac
AW
1789 (define (intern-file file)
1790 (match (vhash-assoc file files)
1791 ((_ . code) code)
1792 (#f (let ((code (1+ (vlist-length files))))
1793 (set! files (vhash-cons file code files))
1794 code))))
1795
1796 (define (write-sources)
d56ab5a9
AW
1797 ;; Choose line base and line range values that will allow for an
1798 ;; address advance range of 16 words. The special opcode range is
1799 ;; from 10 to 255, so 246 values.
1800 (define base -4)
1801 (define range 15)
1802
0a7340ac
AW
1803 (let lp ((sources (asm-sources asm)) (out '()))
1804 (match sources
d56ab5a9 1805 (((pc . s) . sources)
0a7340ac
AW
1806 (let ((file (assq-ref s 'filename))
1807 (line (assq-ref s 'line))
1808 (col (assq-ref s 'column)))
d56ab5a9
AW
1809 (lp sources
1810 ;; Guile line and column numbers are 0-indexed, but
1811 ;; they are 1-indexed for DWARF.
1812 (cons (list pc
1813 (if file (intern-file file) 0)
1814 (if line (1+ line))
1815 (if col (1+ col)))
1816 out))))
0a7340ac
AW
1817 (()
1818 ;; Compilation unit header for .debug_line. We write in
1819 ;; DWARF 2 format because more tools understand it than DWARF
1820 ;; 4, which incompatibly adds another field to this header.
1821
1822 (put-u32 line-port 0) ; Length; will patch later.
1823 (put-u16 line-port 2) ; DWARF 2 format.
1824 (put-u32 line-port 0) ; Prologue length; will patch later.
1825 (put-u8 line-port 4) ; Minimum instruction length: 4 bytes.
1826 (put-u8 line-port 1) ; Default is-stmt: true.
1827
d56ab5a9
AW
1828 (put-s8 line-port base) ; Line base. See the DWARF standard.
1829 (put-u8 line-port range) ; Line range. See the DWARF standard.
0a7340ac
AW
1830 (put-u8 line-port 10) ; Opcode base: the first "special" opcode.
1831
1832 ;; A table of the number of uleb128 arguments taken by each
1833 ;; of the standard opcodes.
1834 (put-u8 line-port 0) ; 1: copy
1835 (put-u8 line-port 1) ; 2: advance-pc
1836 (put-u8 line-port 1) ; 3: advance-line
1837 (put-u8 line-port 1) ; 4: set-file
1838 (put-u8 line-port 1) ; 5: set-column
1839 (put-u8 line-port 0) ; 6: negate-stmt
1840 (put-u8 line-port 0) ; 7: set-basic-block
1841 (put-u8 line-port 0) ; 8: const-add-pc
1842 (put-u8 line-port 1) ; 9: fixed-advance-pc
1843
1844 ;; Include directories, as a zero-terminated sequence of
1845 ;; nul-terminated strings. Nothing, for the moment.
1846 (put-u8 line-port 0)
1847
1848 ;; File table. For each file that contributes to this
1849 ;; compilation unit, a nul-terminated file name string, and a
1850 ;; uleb128 for each of directory the file was found in, the
1851 ;; modification time, and the file's size in bytes. We pass
1852 ;; zero for the latter three fields.
32ca15d7
AW
1853 (vlist-fold-right
1854 (lambda (pair seed)
1855 (match pair
1856 ((file . code)
1857 (put-bytevector line-port (string->utf8 file))
1858 (put-u8 line-port 0)
1859 (put-uleb128 line-port 0) ; directory
1860 (put-uleb128 line-port 0) ; mtime
1861 (put-uleb128 line-port 0))) ; size
1862 seed)
1863 #f
1864 files)
0a7340ac
AW
1865 (put-u8 line-port 0) ; 0 byte terminating file list.
1866
1867 ;; Patch prologue length.
1868 (let ((offset (port-position line-port)))
1869 (seek line-port 6 SEEK_SET)
1870 (put-u32 line-port (- offset 10))
1871 (seek line-port offset SEEK_SET))
1872
d56ab5a9
AW
1873 ;; Now write the statement program.
1874 (let ()
1875 (define (extended-op opcode payload-len)
6b71a767 1876 (put-u8 line-port 0) ; extended op
d56ab5a9
AW
1877 (put-uleb128 line-port (1+ payload-len)) ; payload-len + opcode
1878 (put-uleb128 line-port opcode))
1879 (define (set-address sym)
1880 (define (add-reloc! kind)
1881 (set! line-relocs
1882 (cons (make-linker-reloc kind
1883 (port-position line-port)
1884 0
1885 sym)
1886 line-relocs)))
1887 (match (asm-word-size asm)
1888 (4
1889 (extended-op 2 4)
1890 (add-reloc! 'abs32/1)
1891 (put-u32 line-port 0))
1892 (8
1893 (extended-op 2 8)
1894 (add-reloc! 'abs64/1)
1895 (put-u64 line-port 0))))
1896 (define (end-sequence pc)
1897 (let ((pc-inc (- (asm-pos asm) pc)))
6b71a767 1898 (put-u8 line-port 2) ; advance-pc
d56ab5a9
AW
1899 (put-uleb128 line-port pc-inc))
1900 (extended-op 1 0))
1901 (define (advance-pc pc-inc line-inc)
1902 (let ((spec (+ (- line-inc base) (* pc-inc range) 10)))
1903 (cond
1904 ((or (< line-inc base) (>= line-inc (+ base range)))
1905 (advance-line line-inc)
1906 (advance-pc pc-inc 0))
1907 ((<= spec 255)
1908 (put-u8 line-port spec))
1909 ((< spec 500)
1910 (put-u8 line-port 8) ; const-advance-pc
1911 (advance-pc (- pc-inc (floor/ (- 255 10) range))
1912 line-inc))
1913 (else
1914 (put-u8 line-port 2) ; advance-pc
1915 (put-uleb128 line-port pc-inc)
1916 (advance-pc 0 line-inc)))))
1917 (define (advance-line inc)
1918 (put-u8 line-port 3)
1919 (put-sleb128 line-port inc))
1920 (define (set-file file)
1921 (put-u8 line-port 4)
1922 (put-uleb128 line-port file))
1923 (define (set-column col)
1924 (put-u8 line-port 5)
1925 (put-uleb128 line-port col))
1926
1927 (set-address '.rtl-text)
1928
1929 (let lp ((in out) (pc 0) (file 1) (line 1) (col 0))
1930 (match in
6b71a767
AW
1931 (()
1932 (when (null? out)
1933 ;; There was no source info in the first place. Set
1934 ;; file register to 0 before adding final row.
1935 (set-file 0))
1936 (end-sequence pc))
d56ab5a9
AW
1937 (((pc* file* line* col*) . in*)
1938 (cond
1939 ((and (eqv? file file*) (eqv? line line*) (eqv? col col*))
1940 (lp in* pc file line col))
1941 (else
1942 (unless (eqv? col col*)
1943 (set-column col*))
1944 (unless (eqv? file file*)
1945 (set-file file*))
1946 (advance-pc (- pc* pc) (- line* line))
1947 (lp in* pc* file* line* col*)))))))))))
0a7340ac 1948
a862d8c1
AW
1949 (define (compute-code attr val)
1950 (match attr
1951 ('name (string-table-intern! strtab val))
1952 ('low-pc val)
1953 ('high-pc val)
1954 ('producer (string-table-intern! strtab val))
0a7340ac
AW
1955 ('language (language-name->code val))
1956 ('stmt-list val)))
a862d8c1 1957
a862d8c1
AW
1958 (define (choose-form attr val code)
1959 (cond
6371e368 1960 ((string? val) 'strp)
0a7340ac 1961 ((eq? attr 'stmt-list) 'sec-offset)
9a1dfb7d 1962 ((eq? attr 'low-pc) 'addr)
a862d8c1
AW
1963 ((exact-integer? code)
1964 (cond
1965 ((< code 0) 'sleb128)
1966 ((<= code #xff) 'data1)
1967 ((<= code #xffff) 'data2)
1968 ((<= code #xffffffff) 'data4)
1969 ((<= code #xffffffffffffffff) 'data8)
1970 (else 'uleb128)))
a862d8c1
AW
1971 (else (error "unhandled case" attr val code))))
1972
1973 (define (add-die-relocation! kind sym)
1974 (set! die-relocs
0a7340ac 1975 (cons (make-linker-reloc kind (port-position die-port) 0 sym)
a862d8c1
AW
1976 die-relocs)))
1977
1978 (define (write-value code form)
1979 (match form
1980 ('data1 (put-u8 die-port code))
1981 ('data2 (put-u16 die-port code))
1982 ('data4 (put-u32 die-port code))
1983 ('data8 (put-u64 die-port code))
1984 ('uleb128 (put-uleb128 die-port code))
d56ab5a9 1985 ('sleb128 (put-sleb128 die-port code))
a862d8c1
AW
1986 ('addr
1987 (match (asm-word-size asm)
1988 (4
1989 (add-die-relocation! 'abs32/1 code)
1990 (put-u32 die-port 0))
1991 (8
1992 (add-die-relocation! 'abs64/1 code)
1993 (put-u64 die-port 0))))
0a7340ac 1994 ('sec-offset (put-u32 die-port code))
6371e368 1995 ('strp (put-u32 die-port code))))
a862d8c1
AW
1996
1997 (define (write-die die)
1998 (match die
1999 ((tag ('@ (attrs vals) ...) children ...)
2000 (let* ((codes (map compute-code attrs vals))
2001 (forms (map choose-form attrs vals codes))
2002 (has-children? (not (null? children)))
2003 (abbrev-code (intern-abbrev tag has-children? attrs forms)))
2004 (put-uleb128 die-port abbrev-code)
2005 (for-each write-value codes forms)
2006 (when has-children?
2007 (for-each write-die children)
2008 (put-uleb128 die-port 0))))))
2009
2010 ;; Compilation unit header.
2011 (put-u32 die-port 0) ; Length; will patch later.
2012 (put-u16 die-port 4) ; DWARF 4.
2013 (put-u32 die-port 0) ; Abbrevs offset.
2014 (put-u8 die-port (asm-word-size asm)) ; Address size.
2015
2016 (write-die (make-compile-unit-die asm))
2017
2018 ;; Terminate the abbrevs list.
2019 (put-uleb128 abbrev-port 0)
2020
0a7340ac
AW
2021 (write-sources)
2022
a862d8c1
AW
2023 (values (let ((bv (get-die-bv)))
2024 ;; Patch DWARF32 length.
2025 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
2026 (asm-endianness asm))
2027 (make-object asm '.debug_info bv die-relocs '()
2028 #:type SHT_PROGBITS #:flags 0))
2029 (make-object asm '.debug_abbrev (get-abbrev-bv) '() '()
2030 #:type SHT_PROGBITS #:flags 0)
2031 (make-object asm '.debug_str (link-string-table! strtab) '() '()
2032 #:type SHT_PROGBITS #:flags 0)
2033 (make-object asm '.debug_loc #vu8() '() '()
0a7340ac
AW
2034 #:type SHT_PROGBITS #:flags 0)
2035 (let ((bv (get-line-bv)))
2036 ;; Patch DWARF32 length.
2037 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
2038 (asm-endianness asm))
2039 (make-object asm '.debug_line bv line-relocs '()
2040 #:type SHT_PROGBITS #:flags 0)))))
a862d8c1 2041
e78991aa 2042(define (link-objects asm)
c4c098e3
AW
2043 (let*-values (;; Link procprops before constants, because it probably
2044 ;; interns more constants.
2045 ((procprops) (link-procprops asm))
2046 ((ro rw rw-init) (link-constants asm))
e78991aa
AW
2047 ;; Link text object after constants, so that the
2048 ;; constants initializer gets included.
2049 ((text) (link-text-object asm))
02c624fc
AW
2050 ((frame-maps) (link-frame-maps asm))
2051 ((dt) (link-dynamic-section asm text rw rw-init frame-maps))
e78991aa 2052 ((symtab strtab) (link-symtab (linker-object-section text) asm))
b2006c19 2053 ((arities arities-strtab) (link-arities asm))
9128b1a1 2054 ((docstrs docstrs-strtab) (link-docstrs asm))
0a7340ac 2055 ((dinfo dabbrev dstrtab dloc dline) (link-debug asm))
e78991aa
AW
2056 ;; This needs to be linked last, because linking other
2057 ;; sections adds entries to the string table.
2058 ((shstrtab) (link-shstrtab asm)))
b2006c19 2059 (filter identity
02c624fc
AW
2060 (list text ro frame-maps rw dt symtab strtab
2061 arities arities-strtab
a862d8c1 2062 docstrs docstrs-strtab procprops
0a7340ac 2063 dinfo dabbrev dstrtab dloc dline
a862d8c1 2064 shstrtab))))
e78991aa
AW
2065
2066
2067\f
2068
2069;;;
2070;;; High-level public interfaces.
2071;;;
2072
2073(define* (link-assembly asm #:key (page-aligned? #t))
2074 "Produce an ELF image from the code and data emitted into @var{asm}.
2075The result is a bytevector, by default linked so that read-only and
2076writable data are on separate pages. Pass @code{#:page-aligned? #f} to
2077disable this behavior."
2078 (link-elf (link-objects asm) #:page-aligned? page-aligned?))