RTL compiler supports static bitvectors
[bpt/guile.git] / module / system / vm / assembler.scm
1 ;;; Guile RTL assembler
2
3 ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
4 ;;;
5 ;;; This library is free software; you can redistribute it and/or
6 ;;; modify it under the terms of the GNU Lesser General Public
7 ;;; License as published by the Free Software Foundation; either
8 ;;; version 3 of the License, or (at your option) any later version.
9 ;;;
10 ;;; This library is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;; Lesser General Public License for more details.
14 ;;;
15 ;;; You should have received a copy of the GNU Lesser General Public
16 ;;; License along with this library; if not, write to the Free Software
17 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Commentary:
20 ;;;
21 ;;; This module implements an assembler that creates an ELF image from
22 ;;; RTL assembly and macro-assembly. The input can be given in
23 ;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
24 ;;; procedural interface, the emit-OP procedures, but that is not
25 ;;; currently exported.
26 ;;;
27 ;;; "Primitive instructions" correspond to RTL VM operations.
28 ;;; Assemblers for primitive instructions are generated programmatically
29 ;;; from (rtl-instruction-list), which itself is derived from the VM
30 ;;; sources. There are also "macro-instructions" like "label" or
31 ;;; "load-constant" that expand to 0 or more primitive instructions.
32 ;;;
33 ;;; The assembler also handles some higher-level tasks, like creating
34 ;;; the symbol table, other metadata sections, creating a constant table
35 ;;; for the whole compilation unit, and writing the dynamic section of
36 ;;; the ELF file along with the appropriate initialization routines.
37 ;;;
38 ;;; Most compilers will want to use the trio of make-assembler,
39 ;;; emit-text, and link-assembly. That will result in the creation of
40 ;;; an ELF image as a bytevector, which can then be loaded using
41 ;;; load-thunk-from-memory, or written to disk as a .go file.
42 ;;;
43 ;;; Code:
44
45 (define-module (system vm assembler)
46 #:use-module (system base target)
47 #:use-module (system vm instruction)
48 #:use-module (system vm dwarf)
49 #:use-module (system vm elf)
50 #:use-module (system vm linker)
51 #:use-module (system vm objcode)
52 #:use-module (rnrs bytevectors)
53 #:use-module (ice-9 binary-ports)
54 #:use-module (ice-9 vlist)
55 #:use-module (ice-9 match)
56 #:use-module (srfi srfi-1)
57 #:use-module (srfi srfi-4)
58 #:use-module (srfi srfi-9)
59 #:use-module (srfi srfi-11)
60 #:export (make-assembler
61 emit-text
62 link-assembly
63 assemble-program))
64
65
66 \f
67
68 ;;; RTL code consists of 32-bit units, often subdivided in some way.
69 ;;; These helpers create one 32-bit unit from multiple components.
70
71 (define-inlinable (pack-u8-u24 x y)
72 (unless (<= 0 x 255)
73 (error "out of range" x))
74 (logior x (ash y 8)))
75
76 (define-inlinable (pack-u8-s24 x y)
77 (unless (<= 0 x 255)
78 (error "out of range" x))
79 (logior x (ash (cond
80 ((< 0 (- y) #x800000)
81 (+ y #x1000000))
82 ((<= 0 y #xffffff)
83 y)
84 (else (error "out of range" y)))
85 8)))
86
87 (define-inlinable (pack-u1-u7-u24 x y z)
88 (unless (<= 0 x 1)
89 (error "out of range" x))
90 (unless (<= 0 y 127)
91 (error "out of range" y))
92 (logior x (ash y 1) (ash z 8)))
93
94 (define-inlinable (pack-u8-u12-u12 x y z)
95 (unless (<= 0 x 255)
96 (error "out of range" x))
97 (unless (<= 0 y 4095)
98 (error "out of range" y))
99 (logior x (ash y 8) (ash z 20)))
100
101 (define-inlinable (pack-u8-u8-u16 x y z)
102 (unless (<= 0 x 255)
103 (error "out of range" x))
104 (unless (<= 0 y 255)
105 (error "out of range" y))
106 (logior x (ash y 8) (ash z 16)))
107
108 (define-inlinable (pack-u8-u8-u8-u8 x y z w)
109 (unless (<= 0 x 255)
110 (error "out of range" x))
111 (unless (<= 0 y 255)
112 (error "out of range" y))
113 (unless (<= 0 z 255)
114 (error "out of range" z))
115 (logior x (ash y 8) (ash z 16) (ash w 24)))
116
117 (define-syntax pack-flags
118 (syntax-rules ()
119 ;; Add clauses as needed.
120 ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
121 (if f2 (ash 2 0) 0)))))
122
123 ;;; Helpers to read and write 32-bit units in a buffer.
124
125 (define-syntax-rule (u32-ref buf n)
126 (bytevector-u32-native-ref buf (* n 4)))
127
128 (define-syntax-rule (u32-set! buf n val)
129 (bytevector-u32-native-set! buf (* n 4) val))
130
131 (define-syntax-rule (s32-ref buf n)
132 (bytevector-s32-native-ref buf (* n 4)))
133
134 (define-syntax-rule (s32-set! buf n val)
135 (bytevector-s32-native-set! buf (* n 4) val))
136
137
138 \f
139
140 ;;; A <meta> entry collects metadata for one procedure. Procedures are
141 ;;; written as contiguous ranges of RTL code.
142 ;;;
143 (define-syntax-rule (assert-match arg pattern kind)
144 (let ((x arg))
145 (unless (match x (pattern #t) (_ #f))
146 (error (string-append "expected " kind) x))))
147
148 (define-record-type <meta>
149 (%make-meta label properties low-pc high-pc arities)
150 meta?
151 (label meta-label)
152 (properties meta-properties set-meta-properties!)
153 (low-pc meta-low-pc)
154 (high-pc meta-high-pc set-meta-high-pc!)
155 (arities meta-arities set-meta-arities!))
156
157 (define (make-meta label properties low-pc)
158 (assert-match label (? symbol?) "symbol")
159 (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
160 (%make-meta label properties low-pc #f '()))
161
162 (define (meta-name meta)
163 (assq-ref (meta-properties meta) 'name))
164
165 ;; Metadata for one <lambda-case>.
166 (define-record-type <arity>
167 (make-arity req opt rest kw-indices allow-other-keys?
168 low-pc high-pc)
169 arity?
170 (req arity-req)
171 (opt arity-opt)
172 (rest arity-rest)
173 (kw-indices arity-kw-indices)
174 (allow-other-keys? arity-allow-other-keys?)
175 (low-pc arity-low-pc)
176 (high-pc arity-high-pc set-arity-high-pc!))
177
178 (define-syntax *block-size* (identifier-syntax 32))
179
180 ;;; An assembler collects all of the words emitted during assembly, and
181 ;;; also maintains ancillary information such as the constant table, a
182 ;;; relocation list, and so on.
183 ;;;
184 ;;; RTL code consists of 32-bit units. We emit RTL code using native
185 ;;; endianness. If we're targeting a foreign endianness, we byte-swap
186 ;;; the bytevector as a whole instead of conditionalizing each access.
187 ;;;
188 (define-record-type <asm>
189 (make-asm cur idx start prev written
190 labels relocs
191 word-size endianness
192 constants inits
193 shstrtab next-section-number
194 meta sources)
195 asm?
196
197 ;; We write RTL code into what is logically a growable vector,
198 ;; implemented as a list of blocks. asm-cur is the current block, and
199 ;; asm-idx is the current index into that block, in 32-bit units.
200 ;;
201 (cur asm-cur set-asm-cur!)
202 (idx asm-idx set-asm-idx!)
203
204 ;; asm-start is an absolute position, indicating the offset of the
205 ;; beginning of an instruction (in u32 units). It is updated after
206 ;; writing all the words for one primitive instruction. It models the
207 ;; position of the instruction pointer during execution, given that
208 ;; the RTL VM updates the IP only at the end of executing the
209 ;; instruction, and is thus useful for computing offsets between two
210 ;; points in a program.
211 ;;
212 (start asm-start set-asm-start!)
213
214 ;; The list of previously written blocks.
215 ;;
216 (prev asm-prev set-asm-prev!)
217
218 ;; The number of u32 words written in asm-prev, which is the same as
219 ;; the offset of the current block.
220 ;;
221 (written asm-written set-asm-written!)
222
223 ;; An alist of symbol -> position pairs, indicating the labels defined
224 ;; in this compilation unit.
225 ;;
226 (labels asm-labels set-asm-labels!)
227
228 ;; A list of relocations needed by the program text. We use an
229 ;; internal representation for relocations, and handle textualn
230 ;; relative relocations in the assembler. Other kinds of relocations
231 ;; are later reified as linker relocations and resolved by the linker.
232 ;;
233 (relocs asm-relocs set-asm-relocs!)
234
235 ;; Target information.
236 ;;
237 (word-size asm-word-size)
238 (endianness asm-endianness)
239
240 ;; The constant table, as a vhash of object -> label. All constants
241 ;; get de-duplicated and written into separate sections -- either the
242 ;; .rodata section, for read-only data, or .data, for constants that
243 ;; need initialization at load-time (like symbols). Constants can
244 ;; depend on other constants (e.g. a symbol depending on a stringbuf),
245 ;; so order in this table is important.
246 ;;
247 (constants asm-constants set-asm-constants!)
248
249 ;; A list of RTL instructions needed to initialize the constants.
250 ;; Will run in a thunk with 2 local variables.
251 ;;
252 (inits asm-inits set-asm-inits!)
253
254 ;; The shstrtab, for section names.
255 ;;
256 (shstrtab asm-shstrtab set-asm-shstrtab!)
257
258 ;; The section number for the next section to be written.
259 ;;
260 (next-section-number asm-next-section-number set-asm-next-section-number!)
261
262 ;; A list of <meta>, corresponding to procedure metadata.
263 ;;
264 (meta asm-meta set-asm-meta!)
265
266 ;; A list of (pos . source) pairs, indicating source information. POS
267 ;; is relative to the beginning of the text section, and SOURCE is in
268 ;; the same format that source-properties returns.
269 ;;
270 (sources asm-sources set-asm-sources!))
271
272 (define-inlinable (fresh-block)
273 (make-u32vector *block-size*))
274
275 (define* (make-assembler #:key (word-size (target-word-size))
276 (endianness (target-endianness)))
277 "Create an assembler for a given target @var{word-size} and
278 @var{endianness}, falling back to appropriate values for the configured
279 target."
280 (make-asm (fresh-block) 0 0 '() 0
281 '() '()
282 word-size endianness
283 vlist-null '()
284 (make-string-table) 1
285 '() '()))
286
287 (define (intern-section-name! asm string)
288 "Add a string to the section name table (shstrtab)."
289 (string-table-intern! (asm-shstrtab asm) string))
290
291 (define-inlinable (asm-pos asm)
292 "The offset of the next word to be written into the code buffer, in
293 32-bit units."
294 (+ (asm-idx asm) (asm-written asm)))
295
296 (define (allocate-new-block asm)
297 "Close off the current block, and arrange for the next word to be
298 written to a fresh block."
299 (let ((new (fresh-block)))
300 (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
301 (set-asm-written! asm (asm-pos asm))
302 (set-asm-cur! asm new)
303 (set-asm-idx! asm 0)))
304
305 (define-inlinable (emit asm u32)
306 "Emit one 32-bit word into the instruction stream. Assumes that there
307 is space for the word, and ensures that there is space for the next
308 word."
309 (u32-set! (asm-cur asm) (asm-idx asm) u32)
310 (set-asm-idx! asm (1+ (asm-idx asm)))
311 (if (= (asm-idx asm) *block-size*)
312 (allocate-new-block asm)))
313
314 (define-inlinable (make-reloc type label base word)
315 "Make an internal relocation of type @var{type} referencing symbol
316 @var{label}, @var{word} words after position @var{start}. @var{type}
317 may be x8-s24, indicating a 24-bit relative label reference that can be
318 fixed up by the assembler, or s32, indicating a 32-bit relative
319 reference that needs to be fixed up by the linker."
320 (list type label base word))
321
322 (define-inlinable (reset-asm-start! asm)
323 "Reset the asm-start after writing the words for one instruction."
324 (set-asm-start! asm (asm-pos asm)))
325
326 (define (emit-exported-label asm label)
327 "Define a linker symbol associating @var{label} with the current
328 asm-start."
329 (set-asm-labels! asm (acons label (asm-start asm) (asm-labels asm))))
330
331 (define (record-label-reference asm label)
332 "Record an x8-s24 local label reference. This value will get patched
333 up later by the assembler."
334 (let* ((start (asm-start asm))
335 (pos (asm-pos asm))
336 (reloc (make-reloc 'x8-s24 label start (- pos start))))
337 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
338
339 (define* (record-far-label-reference asm label #:optional (offset 0))
340 "Record an s32 far label reference. This value will get patched up
341 later by the linker."
342 (let* ((start (- (asm-start asm) offset))
343 (pos (asm-pos asm))
344 (reloc (make-reloc 's32 label start (- pos start))))
345 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
346
347
348 \f
349
350 ;;;
351 ;;; Primitive assemblers are defined by expanding `assembler' for each
352 ;;; opcode in `(rtl-instruction-list)'.
353 ;;;
354
355 (eval-when (expand compile load eval)
356 (define (id-append ctx a b)
357 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
358
359 (define-syntax assembler
360 (lambda (x)
361 (define-syntax op-case
362 (lambda (x)
363 (syntax-case x ()
364 ((_ asm name ((type arg ...) code ...) clause ...)
365 #`(if (eq? name 'type)
366 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
367 #'((arg ...)
368 code ...))
369 (op-case asm name clause ...)))
370 ((_ asm name)
371 #'(error "unmatched name" name)))))
372
373 (define (pack-first-word asm opcode type)
374 (with-syntax ((opcode opcode))
375 (op-case
376 asm type
377 ((U8_X24)
378 (emit asm opcode))
379 ((U8_U24 arg)
380 (emit asm (pack-u8-u24 opcode arg)))
381 ((U8_L24 label)
382 (record-label-reference asm label)
383 (emit asm opcode))
384 ((U8_U8_I16 a imm)
385 (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
386 ((U8_U12_U12 a b)
387 (emit asm (pack-u8-u12-u12 opcode a b)))
388 ((U8_U8_U8_U8 a b c)
389 (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
390
391 (define (pack-tail-word asm type)
392 (op-case
393 asm type
394 ((U8_U24 a b)
395 (emit asm (pack-u8-u24 a b)))
396 ((U8_L24 a label)
397 (record-label-reference asm label)
398 (emit asm a))
399 ((U8_U8_I16 a b imm)
400 (emit asm (pack-u8-u8-u16 a b (object-address imm))))
401 ((U8_U12_U12 a b)
402 (emit asm (pack-u8-u12-u12 a b c)))
403 ((U8_U8_U8_U8 a b c d)
404 (emit asm (pack-u8-u8-u8-u8 a b c d)))
405 ((U32 a)
406 (emit asm a))
407 ((I32 imm)
408 (let ((val (object-address imm)))
409 (unless (zero? (ash val -32))
410 (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
411 (emit asm val)))
412 ((A32 imm)
413 (unless (= (asm-word-size asm) 8)
414 (error "make-long-immediate unavailable for this target"))
415 (emit asm (ash (object-address imm) -32))
416 (emit asm (logand (object-address imm) (1- (ash 1 32)))))
417 ((B32))
418 ((N32 label)
419 (record-far-label-reference asm label)
420 (emit asm 0))
421 ((S32 label)
422 (record-far-label-reference asm label)
423 (emit asm 0))
424 ((L32 label)
425 (record-far-label-reference asm label)
426 (emit asm 0))
427 ((LO32 label offset)
428 (record-far-label-reference asm label
429 (* offset (/ (asm-word-size asm) 4)))
430 (emit asm 0))
431 ((X8_U24 a)
432 (emit asm (pack-u8-u24 0 a)))
433 ((X8_U12_U12 a b)
434 (emit asm (pack-u8-u12-u12 0 a b)))
435 ((X8_L24 label)
436 (record-label-reference asm label)
437 (emit asm 0))
438 ((B1_X7_L24 a label)
439 (record-label-reference asm label)
440 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
441 ((B1_U7_L24 a b label)
442 (record-label-reference asm label)
443 (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
444 ((B1_X31 a)
445 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
446 ((B1_X7_U24 a b)
447 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
448
449 (syntax-case x ()
450 ((_ name opcode word0 word* ...)
451 (with-syntax ((((formal0 ...)
452 code0 ...)
453 (pack-first-word #'asm
454 (syntax->datum #'opcode)
455 (syntax->datum #'word0)))
456 ((((formal* ...)
457 code* ...) ...)
458 (map (lambda (word) (pack-tail-word #'asm word))
459 (syntax->datum #'(word* ...)))))
460 #'(lambda (asm formal0 ... formal* ... ...)
461 (unless (asm? asm) (error "not an asm"))
462 code0 ...
463 code* ... ...
464 (reset-asm-start! asm)))))))
465
466 (define assemblers (make-hash-table))
467
468 (define-syntax define-assembler
469 (lambda (x)
470 (syntax-case x ()
471 ((_ name opcode kind arg ...)
472 (with-syntax ((emit (id-append #'name #'emit- #'name)))
473 #'(begin
474 (define emit
475 (let ((emit (assembler name opcode arg ...)))
476 (hashq-set! assemblers 'name emit)
477 emit))
478 (export emit)))))))
479
480 (define-syntax visit-opcodes
481 (lambda (x)
482 (syntax-case x ()
483 ((visit-opcodes macro arg ...)
484 (with-syntax (((inst ...)
485 (map (lambda (x) (datum->syntax #'macro x))
486 (rtl-instruction-list))))
487 #'(begin
488 (macro arg ... . inst)
489 ...))))))
490
491 (visit-opcodes define-assembler)
492
493 (define (emit-text asm instructions)
494 "Assemble @var{instructions} using the assembler @var{asm}.
495 @var{instructions} is a sequence of RTL instructions, expressed as a
496 list of lists. This procedure can be called many times before calling
497 @code{link-assembly}."
498 (for-each (lambda (inst)
499 (apply (or (hashq-ref assemblers (car inst))
500 (error 'bad-instruction inst))
501 asm
502 (cdr inst)))
503 instructions))
504
505 \f
506
507 ;;;
508 ;;; The constant table records a topologically sorted set of literal
509 ;;; constants used by a program. For example, a pair uses its car and
510 ;;; cdr, a string uses its stringbuf, etc.
511 ;;;
512 ;;; Some things we want to add to the constant table are not actually
513 ;;; Scheme objects: for example, stringbufs, cache cells for toplevel
514 ;;; references, or cache cells for non-closure procedures. For these we
515 ;;; define special record types and add instances of those record types
516 ;;; to the table.
517 ;;;
518
519 (define-inlinable (immediate? x)
520 "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
521 (not (zero? (logand (object-address x) 6))))
522
523 (define-record-type <stringbuf>
524 (make-stringbuf string)
525 stringbuf?
526 (string stringbuf-string))
527
528 (define-record-type <static-procedure>
529 (make-static-procedure code)
530 static-procedure?
531 (code static-procedure-code))
532
533 (define-record-type <uniform-vector-backing-store>
534 (make-uniform-vector-backing-store bytes element-size)
535 uniform-vector-backing-store?
536 (bytes uniform-vector-backing-store-bytes)
537 (element-size uniform-vector-backing-store-element-size))
538
539 (define-record-type <cache-cell>
540 (make-cache-cell scope key)
541 cache-cell?
542 (scope cache-cell-scope)
543 (key cache-cell-key))
544
545 (define (simple-vector? obj)
546 (and (vector? obj)
547 (equal? (array-shape obj) (list (list 0 (1- (vector-length obj)))))))
548
549 (define (simple-uniform-vector? obj)
550 (and (array? obj)
551 (symbol? (array-type obj))
552 (equal? (array-shape obj) (list (list 0 (1- (array-length obj)))))))
553
554 (define (statically-allocatable? x)
555 "Return @code{#t} if a non-immediate constant can be allocated
556 statically, and @code{#f} if it would need some kind of runtime
557 allocation."
558 (or (pair? x) (string? x) (stringbuf? x) (static-procedure? x) (array? x)))
559
560 (define (intern-constant asm obj)
561 "Add an object to the constant table, and return a label that can be
562 used to reference it. If the object is already present in the constant
563 table, its existing label is used directly."
564 (define (recur obj)
565 (intern-constant asm obj))
566 (define (field dst n obj)
567 (let ((src (recur obj)))
568 (if src
569 (if (statically-allocatable? obj)
570 `((static-patch! ,dst ,n ,src))
571 `((static-ref 1 ,src)
572 (static-set! 1 ,dst ,n)))
573 '())))
574 (define (intern obj label)
575 (cond
576 ((pair? obj)
577 (append (field label 0 (car obj))
578 (field label 1 (cdr obj))))
579 ((simple-vector? obj)
580 (let lp ((i 0) (inits '()))
581 (if (< i (vector-length obj))
582 (lp (1+ i)
583 (append-reverse (field label (1+ i) (vector-ref obj i))
584 inits))
585 (reverse inits))))
586 ((stringbuf? obj) '())
587 ((static-procedure? obj)
588 `((static-patch! ,label 1 ,(static-procedure-code obj))))
589 ((cache-cell? obj) '())
590 ((symbol? obj)
591 `((make-non-immediate 1 ,(recur (symbol->string obj)))
592 (string->symbol 1 1)
593 (static-set! 1 ,label 0)))
594 ((string? obj)
595 `((static-patch! ,label 1 ,(recur (make-stringbuf obj)))))
596 ((keyword? obj)
597 `((static-ref 1 ,(recur (keyword->symbol obj)))
598 (symbol->keyword 1 1)
599 (static-set! 1 ,label 0)))
600 ((number? obj)
601 `((make-non-immediate 1 ,(recur (number->string obj)))
602 (string->number 1 1)
603 (static-set! 1 ,label 0)))
604 ((uniform-vector-backing-store? obj) '())
605 ((simple-uniform-vector? obj)
606 `((static-patch! ,label 2
607 ,(recur (make-uniform-vector-backing-store
608 (uniform-array->bytevector obj)
609 (if (bitvector? obj)
610 ;; Bitvectors are addressed in
611 ;; 32-bit units.
612 4
613 (uniform-vector-element-size obj)))))))
614 (else
615 (error "don't know how to intern" obj))))
616 (cond
617 ((immediate? obj) #f)
618 ((vhash-assoc obj (asm-constants asm)) => cdr)
619 (else
620 ;; Note that calling intern may mutate asm-constants and
621 ;; asm-constant-inits.
622 (let* ((label (gensym "constant"))
623 (inits (intern obj label)))
624 (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
625 (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
626 label))))
627
628 (define (intern-non-immediate asm obj)
629 "Intern a non-immediate into the constant table, and return its
630 label."
631 (when (immediate? obj)
632 (error "expected a non-immediate" obj))
633 (intern-constant asm obj))
634
635 (define (intern-cache-cell asm scope key)
636 "Intern a cache cell into the constant table, and return its label.
637 If there is already a cache cell with the given scope and key, it is
638 returned instead."
639 (intern-constant asm (make-cache-cell scope key)))
640
641 ;; Return the label of the cell that holds the module for a scope.
642 (define (intern-module-cache-cell asm scope)
643 "Intern a cache cell for a module, and return its label."
644 (intern-cache-cell asm scope #t))
645
646
647 \f
648
649 ;;;
650 ;;; Macro assemblers bridge the gap between primitive instructions and
651 ;;; some higher-level operations.
652 ;;;
653
654 (define-syntax define-macro-assembler
655 (lambda (x)
656 (syntax-case x ()
657 ((_ (name arg ...) body body* ...)
658 (with-syntax ((emit (id-append #'name #'emit- #'name)))
659 #'(begin
660 (define emit
661 (let ((emit (lambda (arg ...) body body* ...)))
662 (hashq-set! assemblers 'name emit)
663 emit))
664 (export emit)))))))
665
666 (define-macro-assembler (load-constant asm dst obj)
667 (cond
668 ((immediate? obj)
669 (let ((bits (object-address obj)))
670 (cond
671 ((and (< dst 256) (zero? (ash bits -16)))
672 (emit-make-short-immediate asm dst obj))
673 ((zero? (ash bits -32))
674 (emit-make-long-immediate asm dst obj))
675 (else
676 (emit-make-long-long-immediate asm dst obj)))))
677 ((statically-allocatable? obj)
678 (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
679 (else
680 (emit-static-ref asm dst (intern-non-immediate asm obj)))))
681
682 (define-macro-assembler (load-static-procedure asm dst label)
683 (let ((loc (intern-constant asm (make-static-procedure label))))
684 (emit-make-non-immediate asm dst loc)))
685
686 (define-syntax-rule (define-tc7-macro-assembler name tc7)
687 (define-macro-assembler (name asm slot invert? label)
688 (emit-br-if-tc7 asm slot invert? tc7 label)))
689
690 ;; Keep in sync with tags.h. Part of Guile's ABI. Currently unused
691 ;; macro assemblers are commented out. See also
692 ;; *branching-primcall-arities* in (language cps primitives), the set of
693 ;; macro-instructions in assembly.scm, and
694 ;; disassembler.scm:code-annotation.
695 ;;
696 ;; FIXME: Define all tc7 values in Scheme in one place, derived from
697 ;; tags.h.
698 (define-tc7-macro-assembler br-if-symbol 5)
699 (define-tc7-macro-assembler br-if-variable 7)
700 (define-tc7-macro-assembler br-if-vector 13)
701 ;(define-tc7-macro-assembler br-if-weak-vector 13)
702 (define-tc7-macro-assembler br-if-string 21)
703 ;(define-tc7-macro-assembler br-if-heap-number 23)
704 ;(define-tc7-macro-assembler br-if-stringbuf 39)
705 (define-tc7-macro-assembler br-if-bytevector 77)
706 ;(define-tc7-macro-assembler br-if-pointer 31)
707 ;(define-tc7-macro-assembler br-if-hashtable 29)
708 ;(define-tc7-macro-assembler br-if-fluid 37)
709 ;(define-tc7-macro-assembler br-if-dynamic-state 45)
710 ;(define-tc7-macro-assembler br-if-frame 47)
711 ;(define-tc7-macro-assembler br-if-objcode 53)
712 ;(define-tc7-macro-assembler br-if-vm 55)
713 ;(define-tc7-macro-assembler br-if-vm-cont 71)
714 ;(define-tc7-macro-assembler br-if-rtl-program 69)
715 ;(define-tc7-macro-assembler br-if-program 79)
716 ;(define-tc7-macro-assembler br-if-weak-set 85)
717 ;(define-tc7-macro-assembler br-if-weak-table 87)
718 ;(define-tc7-macro-assembler br-if-array 93)
719 (define-tc7-macro-assembler br-if-bitvector 95)
720 ;(define-tc7-macro-assembler br-if-port 125)
721 ;(define-tc7-macro-assembler br-if-smob 127)
722
723 (define-macro-assembler (begin-program asm label properties)
724 (emit-label asm label)
725 (let ((meta (make-meta label properties (asm-start asm))))
726 (set-asm-meta! asm (cons meta (asm-meta asm)))))
727
728 (define-macro-assembler (end-program asm)
729 (let ((meta (car (asm-meta asm))))
730 (set-meta-high-pc! meta (asm-start asm))
731 (set-meta-arities! meta (reverse (meta-arities meta)))))
732
733 (define-macro-assembler (begin-standard-arity asm req nlocals alternate)
734 (emit-begin-opt-arity asm req '() #f nlocals alternate))
735
736 (define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
737 (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
738
739 (define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
740 allow-other-keys? nlocals alternate)
741 (assert-match req ((? symbol?) ...) "list of symbols")
742 (assert-match opt ((? symbol?) ...) "list of symbols")
743 (assert-match rest (or #f (? symbol?)) "#f or symbol")
744 (assert-match kw-indices (((? keyword?) . (? integer?)) ...)
745 "alist of keyword -> integer")
746 (assert-match allow-other-keys? (? boolean?) "boolean")
747 (assert-match nlocals (? integer?) "integer")
748 (assert-match alternate (or #f (? symbol?)) "#f or symbol")
749 (let* ((meta (car (asm-meta asm)))
750 (arity (make-arity req opt rest kw-indices allow-other-keys?
751 (asm-start asm) #f))
752 ;; The procedure itself is in slot 0, in the standard calling
753 ;; convention. For procedure prologues, nreq includes the
754 ;; procedure, so here we add 1.
755 (nreq (1+ (length req)))
756 (nopt (length opt))
757 (rest? (->bool rest)))
758 (set-meta-arities! meta (cons arity (meta-arities meta)))
759 (cond
760 ((or allow-other-keys? (pair? kw-indices))
761 (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
762 nlocals alternate))
763 ((or rest? (pair? opt))
764 (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
765 (else
766 (emit-standard-prelude asm nreq nlocals alternate)))))
767
768 (define-macro-assembler (end-arity asm)
769 (let ((arity (car (meta-arities (car (asm-meta asm))))))
770 (set-arity-high-pc! arity (asm-start asm))))
771
772 (define-macro-assembler (standard-prelude asm nreq nlocals alternate)
773 (cond
774 (alternate
775 (emit-br-if-nargs-ne asm nreq alternate)
776 (emit-alloc-frame asm nlocals))
777 ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
778 (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
779 (else
780 (emit-assert-nargs-ee asm nreq)
781 (emit-alloc-frame asm nlocals))))
782
783 (define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
784 (if alternate
785 (emit-br-if-nargs-lt asm nreq alternate)
786 (emit-assert-nargs-ge asm nreq))
787 (cond
788 (rest?
789 (emit-bind-rest asm (+ nreq nopt)))
790 (alternate
791 (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
792 (else
793 (emit-assert-nargs-le asm (+ nreq nopt))))
794 (emit-alloc-frame asm nlocals))
795
796 (define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
797 allow-other-keys? nlocals alternate)
798 (if alternate
799 (emit-br-if-nargs-lt asm nreq alternate)
800 (emit-assert-nargs-ge asm nreq))
801 (let ((ntotal (fold (lambda (kw ntotal)
802 (match kw
803 (((? keyword?) . idx)
804 (max (1+ idx) ntotal))))
805 (+ nreq nopt) kw-indices)))
806 ;; FIXME: port 581f410f
807 (emit-bind-kwargs asm nreq
808 (pack-flags allow-other-keys? rest?)
809 (+ nreq nopt)
810 ntotal
811 (intern-constant asm kw-indices))
812 (emit-alloc-frame asm nlocals)))
813
814 (define-macro-assembler (label asm sym)
815 (set-asm-labels! asm (acons sym (asm-start asm) (asm-labels asm))))
816
817 (define-macro-assembler (source asm source)
818 (set-asm-sources! asm (acons (asm-start asm) source (asm-sources asm))))
819
820 (define-macro-assembler (cache-current-module! asm module scope)
821 (let ((mod-label (intern-module-cache-cell asm scope)))
822 (emit-static-set! asm module mod-label 0)))
823
824 (define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
825 (let ((sym-label (intern-non-immediate asm sym))
826 (mod-label (intern-module-cache-cell asm scope))
827 (cell-label (intern-cache-cell asm scope sym)))
828 (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
829
830 (define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
831 (let* ((sym-label (intern-non-immediate asm sym))
832 (key (cons public? module-name))
833 (mod-name-label (intern-constant asm key))
834 (cell-label (intern-cache-cell asm key sym)))
835 (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
836
837
838 \f
839
840 ;;;
841 ;;; Helper for linking objects.
842 ;;;
843
844 (define (make-object asm name bv relocs labels . kwargs)
845 "Make a linker object. This helper handles interning the name in the
846 shstrtab, assigning the size, allocating a fresh index, and defining a
847 corresponding linker symbol for the start of the section."
848 (let ((name-idx (intern-section-name! asm (symbol->string name)))
849 (index (asm-next-section-number asm)))
850 (set-asm-next-section-number! asm (1+ index))
851 (make-linker-object (apply make-elf-section
852 #:index index
853 #:name name-idx
854 #:size (bytevector-length bv)
855 kwargs)
856 bv relocs
857 (cons (make-linker-symbol name 0) labels))))
858
859
860 \f
861
862 ;;;
863 ;;; Linking the constant table. This code is somewhat intertwingled
864 ;;; with the intern-constant code above, as that procedure also
865 ;;; residualizes instructions to initialize constants at load time.
866 ;;;
867
868 (define (write-immediate asm buf pos x)
869 (let ((val (object-address x))
870 (endianness (asm-endianness asm)))
871 (case (asm-word-size asm)
872 ((4) (bytevector-u32-set! buf pos val endianness))
873 ((8) (bytevector-u64-set! buf pos val endianness))
874 (else (error "bad word size" asm)))))
875
876 (define (emit-init-constants asm)
877 "If there is writable data that needs initialization at runtime, emit
878 a procedure to do that and return its label. Otherwise return
879 @code{#f}."
880 (let ((inits (asm-inits asm)))
881 (and (not (null? inits))
882 (let ((label (gensym "init-constants")))
883 (emit-text asm
884 `((begin-program ,label ())
885 (assert-nargs-ee/locals 1 1)
886 ,@(reverse inits)
887 (load-constant 1 ,*unspecified*)
888 (return 1)
889 (end-program)))
890 label))))
891
892 (define (link-data asm data name)
893 "Link the static data for a program into the @var{name} section (which
894 should be .data or .rodata), and return the resulting linker object.
895 @var{data} should be a vhash mapping objects to labels."
896 (define (align address alignment)
897 (+ address
898 (modulo (- alignment (modulo address alignment)) alignment)))
899
900 (define tc7-vector 13)
901 (define stringbuf-shared-flag #x100)
902 (define stringbuf-wide-flag #x400)
903 (define tc7-stringbuf 39)
904 (define tc7-narrow-stringbuf
905 (+ tc7-stringbuf stringbuf-shared-flag))
906 (define tc7-wide-stringbuf
907 (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
908 (define tc7-ro-string (+ 21 #x200))
909 (define tc7-rtl-program 69)
910 (define tc7-bytevector 77)
911 (define tc7-bitvector 95)
912
913 (let ((word-size (asm-word-size asm))
914 (endianness (asm-endianness asm)))
915 (define (byte-length x)
916 (cond
917 ((stringbuf? x)
918 (let ((x (stringbuf-string x)))
919 (+ (* 2 word-size)
920 (case (string-bytes-per-char x)
921 ((1) (1+ (string-length x)))
922 ((4) (* (1+ (string-length x)) 4))
923 (else (error "bad string bytes per char" x))))))
924 ((static-procedure? x)
925 (* 2 word-size))
926 ((string? x)
927 (* 4 word-size))
928 ((pair? x)
929 (* 2 word-size))
930 ((simple-vector? x)
931 (* (1+ (vector-length x)) word-size))
932 ((simple-uniform-vector? x)
933 (* 4 word-size))
934 ((uniform-vector-backing-store? x)
935 (bytevector-length (uniform-vector-backing-store-bytes x)))
936 (else
937 word-size)))
938
939 (define (write-constant-reference buf pos x)
940 ;; The asm-inits will fix up any reference to a non-immediate.
941 (write-immediate asm buf pos (if (immediate? x) x #f)))
942
943 (define (write buf pos obj)
944 (cond
945 ((stringbuf? obj)
946 (let* ((x (stringbuf-string obj))
947 (len (string-length x))
948 (tag (if (= (string-bytes-per-char x) 1)
949 tc7-narrow-stringbuf
950 tc7-wide-stringbuf)))
951 (case word-size
952 ((4)
953 (bytevector-u32-set! buf pos tag endianness)
954 (bytevector-u32-set! buf (+ pos 4) len endianness))
955 ((8)
956 (bytevector-u64-set! buf pos tag endianness)
957 (bytevector-u64-set! buf (+ pos 8) len endianness))
958 (else
959 (error "bad word size" asm)))
960 (let ((pos (+ pos (* word-size 2))))
961 (case (string-bytes-per-char x)
962 ((1)
963 (let lp ((i 0))
964 (if (< i len)
965 (let ((u8 (char->integer (string-ref x i))))
966 (bytevector-u8-set! buf (+ pos i) u8)
967 (lp (1+ i)))
968 (bytevector-u8-set! buf (+ pos i) 0))))
969 ((4)
970 (let lp ((i 0))
971 (if (< i len)
972 (let ((u32 (char->integer (string-ref x i))))
973 (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
974 (lp (1+ i)))
975 (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
976 (else (error "bad string bytes per char" x))))))
977
978 ((static-procedure? obj)
979 (case word-size
980 ((4)
981 (bytevector-u32-set! buf pos tc7-rtl-program endianness)
982 (bytevector-u32-set! buf (+ pos 4) 0 endianness))
983 ((8)
984 (bytevector-u64-set! buf pos tc7-rtl-program endianness)
985 (bytevector-u64-set! buf (+ pos 8) 0 endianness))
986 (else (error "bad word size"))))
987
988 ((cache-cell? obj)
989 (write-immediate asm buf pos #f))
990
991 ((string? obj)
992 (let ((tag (logior tc7-ro-string (ash (string-length obj) 8))))
993 (case word-size
994 ((4)
995 (bytevector-u32-set! buf pos tc7-ro-string endianness)
996 (write-immediate asm buf (+ pos 4) #f) ; stringbuf
997 (bytevector-u32-set! buf (+ pos 8) 0 endianness)
998 (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
999 ((8)
1000 (bytevector-u64-set! buf pos tc7-ro-string endianness)
1001 (write-immediate asm buf (+ pos 8) #f) ; stringbuf
1002 (bytevector-u64-set! buf (+ pos 16) 0 endianness)
1003 (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
1004 (else (error "bad word size")))))
1005
1006 ((pair? obj)
1007 (write-constant-reference buf pos (car obj))
1008 (write-constant-reference buf (+ pos word-size) (cdr obj)))
1009
1010 ((simple-vector? obj)
1011 (let* ((len (vector-length obj))
1012 (tag (logior tc7-vector (ash len 8))))
1013 (case word-size
1014 ((4) (bytevector-u32-set! buf pos tag endianness))
1015 ((8) (bytevector-u64-set! buf pos tag endianness))
1016 (else (error "bad word size")))
1017 (let lp ((i 0))
1018 (when (< i (vector-length obj))
1019 (let ((pos (+ pos word-size (* i word-size)))
1020 (elt (vector-ref obj i)))
1021 (write-constant-reference buf pos elt)
1022 (lp (1+ i)))))))
1023
1024 ((symbol? obj)
1025 (write-immediate asm buf pos #f))
1026
1027 ((keyword? obj)
1028 (write-immediate asm buf pos #f))
1029
1030 ((number? obj)
1031 (write-immediate asm buf pos #f))
1032
1033 ((simple-uniform-vector? obj)
1034 (let ((tag (if (bitvector? obj)
1035 tc7-bitvector
1036 (let ((type-code (uniform-vector-element-type-code obj)))
1037 (logior tc7-bytevector (ash type-code 7))))))
1038 (case word-size
1039 ((4)
1040 (bytevector-u32-set! buf pos tag endianness)
1041 (bytevector-u32-set! buf (+ pos 4)
1042 (if (bitvector? obj)
1043 (bitvector-length obj)
1044 (bytevector-length obj))
1045 endianness) ; length
1046 (bytevector-u32-set! buf (+ pos 8) 0 endianness) ; pointer
1047 (write-immediate asm buf (+ pos 12) #f)) ; owner
1048 ((8)
1049 (bytevector-u64-set! buf pos tag endianness)
1050 (bytevector-u64-set! buf (+ pos 8)
1051 (if (bitvector? obj)
1052 (bitvector-length obj)
1053 (bytevector-length obj))
1054 endianness) ; length
1055 (bytevector-u64-set! buf (+ pos 16) 0 endianness) ; pointer
1056 (write-immediate asm buf (+ pos 24) #f)) ; owner
1057 (else (error "bad word size")))))
1058
1059 ((uniform-vector-backing-store? obj)
1060 (let ((bv (uniform-vector-backing-store-bytes obj)))
1061 (bytevector-copy! bv 0 buf pos (bytevector-length bv))
1062 (unless (or (= 1 (uniform-vector-backing-store-element-size obj))
1063 (eq? endianness (native-endianness)))
1064 ;; Need to swap units of element-size bytes
1065 (error "FIXME: Implement byte order swap"))))
1066
1067 (else
1068 (error "unrecognized object" obj))))
1069
1070 (cond
1071 ((vlist-null? data) #f)
1072 (else
1073 (let* ((byte-len (vhash-fold (lambda (k v len)
1074 (+ (byte-length k) (align len 8)))
1075 0 data))
1076 (buf (make-bytevector byte-len 0)))
1077 (let lp ((i 0) (pos 0) (labels '()))
1078 (if (< i (vlist-length data))
1079 (let* ((pair (vlist-ref data i))
1080 (obj (car pair))
1081 (obj-label (cdr pair)))
1082 (write buf pos obj)
1083 (lp (1+ i)
1084 (align (+ (byte-length obj) pos) 8)
1085 (cons (make-linker-symbol obj-label pos) labels)))
1086 (make-object asm name buf '() labels
1087 #:flags (match name
1088 ('.data (logior SHF_ALLOC SHF_WRITE))
1089 ('.rodata SHF_ALLOC))))))))))
1090
1091 (define (link-constants asm)
1092 "Link sections to hold constants needed by the program text emitted
1093 using @var{asm}.
1094
1095 Returns three values: an object for the .rodata section, an object for
1096 the .data section, and a label for an initialization procedure. Any of
1097 these may be @code{#f}."
1098 (define (shareable? x)
1099 (cond
1100 ((stringbuf? x) #t)
1101 ((pair? x)
1102 (and (immediate? (car x)) (immediate? (cdr x))))
1103 ((simple-vector? x)
1104 (let lp ((i 0))
1105 (or (= i (vector-length x))
1106 (and (immediate? (vector-ref x i))
1107 (lp (1+ i))))))
1108 ((uniform-vector-backing-store? x) #t)
1109 (else #f)))
1110 (let* ((constants (asm-constants asm))
1111 (len (vlist-length constants)))
1112 (let lp ((i 0)
1113 (ro vlist-null)
1114 (rw vlist-null))
1115 (if (= i len)
1116 (values (link-data asm ro '.rodata)
1117 (link-data asm rw '.data)
1118 (emit-init-constants asm))
1119 (let ((pair (vlist-ref constants i)))
1120 (if (shareable? (car pair))
1121 (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
1122 (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
1123
1124 \f
1125
1126 ;;;
1127 ;;; Linking program text.
1128 ;;;
1129
1130 (define (process-relocs buf relocs labels)
1131 "Patch up internal x8-s24 relocations, and any s32 relocations that
1132 reference symbols in the text section. Return a list of linker
1133 relocations for references to symbols defined outside the text section."
1134 (fold
1135 (lambda (reloc tail)
1136 (match reloc
1137 ((type label base word)
1138 (let ((abs (assq-ref labels label))
1139 (dst (+ base word)))
1140 (case type
1141 ((s32)
1142 (if abs
1143 (let ((rel (- abs base)))
1144 (s32-set! buf dst rel)
1145 tail)
1146 (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
1147 tail)))
1148 ((x8-s24)
1149 (unless abs
1150 (error "unbound near relocation" reloc))
1151 (let ((rel (- abs base))
1152 (u32 (u32-ref buf dst)))
1153 (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
1154 tail))
1155 (else (error "bad relocation kind" reloc)))))))
1156 '()
1157 relocs))
1158
1159 (define (process-labels labels)
1160 "Define linker symbols for the label-offset pairs in @var{labels}.
1161 The offsets are expected to be expressed in words."
1162 (map (lambda (pair)
1163 (make-linker-symbol (car pair) (* (cdr pair) 4)))
1164 labels))
1165
1166 (define (swap-bytes! buf)
1167 "Patch up the text buffer @var{buf}, swapping the endianness of each
1168 32-bit unit."
1169 (unless (zero? (modulo (bytevector-length buf) 4))
1170 (error "unexpected length"))
1171 (let ((byte-len (bytevector-length buf)))
1172 (let lp ((pos 0))
1173 (unless (= pos byte-len)
1174 (bytevector-u32-set!
1175 buf pos
1176 (bytevector-u32-ref buf pos (endianness big))
1177 (endianness little))
1178 (lp (+ pos 4))))))
1179
1180 (define (link-text-object asm)
1181 "Link the .rtl-text section, swapping the endianness of the bytes if
1182 needed."
1183 (let ((buf (make-u32vector (asm-pos asm))))
1184 (let lp ((pos 0) (prev (reverse (asm-prev asm))))
1185 (if (null? prev)
1186 (let ((byte-size (* (asm-idx asm) 4)))
1187 (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
1188 (unless (eq? (asm-endianness asm) (native-endianness))
1189 (swap-bytes! buf))
1190 (make-object asm '.rtl-text
1191 buf
1192 (process-relocs buf (asm-relocs asm)
1193 (asm-labels asm))
1194 (process-labels (asm-labels asm))))
1195 (let ((len (* *block-size* 4)))
1196 (bytevector-copy! (car prev) 0 buf pos len)
1197 (lp (+ pos len) (cdr prev)))))))
1198
1199
1200 \f
1201
1202 ;;;
1203 ;;; Linking other sections of the ELF file, like the dynamic segment,
1204 ;;; the symbol table, etc.
1205 ;;;
1206
1207 (define (link-dynamic-section asm text rw rw-init)
1208 "Link the dynamic section for an ELF image with RTL text, given the
1209 writable data section @var{rw} needing fixup from the procedure with
1210 label @var{rw-init}. @var{rw-init} may be false. If @var{rw} is true,
1211 it will be added to the GC roots at runtime."
1212 (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
1213 (let* ((endianness (asm-endianness asm))
1214 (bv (make-bytevector (* word-size (if rw (if rw-init 12 10) 6)) 0))
1215 (set-uword!
1216 (lambda (i uword)
1217 (%set-uword! bv (* i word-size) uword endianness)))
1218 (relocs '())
1219 (set-label!
1220 (lambda (i label)
1221 (set! relocs (cons (make-linker-reloc 'reloc-type
1222 (* i word-size) 0 label)
1223 relocs))
1224 (%set-uword! bv (* i word-size) 0 endianness))))
1225 (set-uword! 0 DT_GUILE_RTL_VERSION)
1226 (set-uword! 1 #x02020000)
1227 (set-uword! 2 DT_GUILE_ENTRY)
1228 (set-label! 3 '.rtl-text)
1229 (cond
1230 (rw
1231 ;; Add roots to GC.
1232 (set-uword! 4 DT_GUILE_GC_ROOT)
1233 (set-label! 5 '.data)
1234 (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
1235 (set-uword! 7 (bytevector-length (linker-object-bv rw)))
1236 (cond
1237 (rw-init
1238 (set-uword! 8 DT_INIT) ; constants
1239 (set-label! 9 rw-init)
1240 (set-uword! 10 DT_NULL)
1241 (set-uword! 11 0))
1242 (else
1243 (set-uword! 8 DT_NULL)
1244 (set-uword! 9 0))))
1245 (else
1246 (set-uword! 4 DT_NULL)
1247 (set-uword! 5 0)))
1248 (make-object asm '.dynamic bv relocs '()
1249 #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
1250 (case (asm-word-size asm)
1251 ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
1252 ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
1253 (else (error "bad word size" asm))))
1254
1255 (define (link-shstrtab asm)
1256 "Link the string table for the section headers."
1257 (intern-section-name! asm ".shstrtab")
1258 (make-object asm '.shstrtab
1259 (link-string-table! (asm-shstrtab asm))
1260 '() '()
1261 #:type SHT_STRTAB #:flags 0))
1262
1263 (define (link-symtab text-section asm)
1264 (let* ((endianness (asm-endianness asm))
1265 (word-size (asm-word-size asm))
1266 (size (elf-symbol-len word-size))
1267 (meta (reverse (asm-meta asm)))
1268 (n (length meta))
1269 (strtab (make-string-table))
1270 (bv (make-bytevector (* n size) 0)))
1271 (define (intern-string! name)
1272 (string-table-intern! strtab (if name (symbol->string name) "")))
1273 (for-each
1274 (lambda (meta n)
1275 (let ((name (intern-string! (meta-name meta))))
1276 (write-elf-symbol bv (* n size) endianness word-size
1277 (make-elf-symbol
1278 #:name name
1279 ;; Symbol value and size are measured in
1280 ;; bytes, not u32s.
1281 #:value (* 4 (meta-low-pc meta))
1282 #:size (* 4 (- (meta-high-pc meta)
1283 (meta-low-pc meta)))
1284 #:type STT_FUNC
1285 #:visibility STV_HIDDEN
1286 #:shndx (elf-section-index text-section)))))
1287 meta (iota n))
1288 (let ((strtab (make-object asm '.strtab
1289 (link-string-table! strtab)
1290 '() '()
1291 #:type SHT_STRTAB #:flags 0)))
1292 (values (make-object asm '.symtab
1293 bv
1294 '() '()
1295 #:type SHT_SYMTAB #:flags 0 #:entsize size
1296 #:link (elf-section-index
1297 (linker-object-section strtab)))
1298 strtab))))
1299
1300 ;;; The .guile.arities section describes the arities that a function can
1301 ;;; have. It is in two parts: a sorted array of headers describing
1302 ;;; basic arities, and an array of links out to a string table (and in
1303 ;;; the case of keyword arguments, to the data section) for argument
1304 ;;; names. The whole thing is prefixed by a uint32 indicating the
1305 ;;; offset of the end of the headers array.
1306 ;;;
1307 ;;; The arity headers array is a packed array of structures of the form:
1308 ;;;
1309 ;;; struct arity_header {
1310 ;;; uint32_t low_pc;
1311 ;;; uint32_t high_pc;
1312 ;;; uint32_t offset;
1313 ;;; uint32_t flags;
1314 ;;; uint32_t nreq;
1315 ;;; uint32_t nopt;
1316 ;;; }
1317 ;;;
1318 ;;; All of the offsets and addresses are 32 bits. We can expand in the
1319 ;;; future to use 64-bit offsets if appropriate, but there are other
1320 ;;; aspects of RTL that constrain us to a total image that fits in 32
1321 ;;; bits, so for the moment we'll simplify the problem space.
1322 ;;;
1323 ;;; The following flags values are defined:
1324 ;;;
1325 ;;; #x1: has-rest?
1326 ;;; #x2: allow-other-keys?
1327 ;;; #x4: has-keyword-args?
1328 ;;; #x8: is-case-lambda?
1329 ;;;
1330 ;;; Functions with a single arity specify their number of required and
1331 ;;; optional arguments in nreq and nopt, and do not have the
1332 ;;; is-case-lambda? flag set. Their "offset" member links to an array
1333 ;;; of pointers into the associated .guile.arities.strtab string table,
1334 ;;; identifying the argument names. This offset is relative to the
1335 ;;; start of the .guile.arities section. Links for required arguments
1336 ;;; are first, in order, as uint32 values. Next follow the optionals,
1337 ;;; then the rest link if has-rest? is set, then a link to the "keyword
1338 ;;; indices" literal if has-keyword-args? is set. Unlike the other
1339 ;;; links, the kw-indices link points into the data section, and is
1340 ;;; relative to the ELF image as a whole.
1341 ;;;
1342 ;;; Functions with no arities have no arities information present in the
1343 ;;; .guile.arities section.
1344 ;;;
1345 ;;; Functions with multiple arities are preceded by a header with
1346 ;;; is-case-lambda? set. All other fields are 0, except low-pc and
1347 ;;; high-pc which should be the bounds of the whole function. Headers
1348 ;;; for the individual arities follow. In this way the whole headers
1349 ;;; array is sorted in increasing low-pc order, and case-lambda clauses
1350 ;;; are contained within the [low-pc, high-pc] of the case-lambda
1351 ;;; header.
1352
1353 ;; Length of the prefix to the arities section, in bytes.
1354 (define arities-prefix-len 4)
1355
1356 ;; Length of an arity header, in bytes.
1357 (define arity-header-len (* 6 4))
1358
1359 ;; The offset of "offset" within arity header, in bytes.
1360 (define arity-header-offset-offset (* 2 4))
1361
1362 (define-syntax-rule (pack-arity-flags has-rest? allow-other-keys?
1363 has-keyword-args? is-case-lambda?)
1364 (logior (if has-rest? (ash 1 0) 0)
1365 (if allow-other-keys? (ash 1 1) 0)
1366 (if has-keyword-args? (ash 1 2) 0)
1367 (if is-case-lambda? (ash 1 3) 0)))
1368
1369 (define (meta-arities-size meta)
1370 (define (lambda-size arity)
1371 (+ arity-header-len
1372 (* 4 ;; name pointers
1373 (+ (length (arity-req arity))
1374 (length (arity-opt arity))
1375 (if (arity-rest arity) 1 0)
1376 (if (pair? (arity-kw-indices arity)) 1 0)))))
1377 (define (case-lambda-size arities)
1378 (fold +
1379 arity-header-len ;; case-lambda header
1380 (map lambda-size arities))) ;; the cases
1381 (match (meta-arities meta)
1382 (() 0)
1383 ((arity) (lambda-size arity))
1384 (arities (case-lambda-size arities))))
1385
1386 (define (write-arity-headers metas bv endianness)
1387 (define (write-arity-header* pos low-pc high-pc flags nreq nopt)
1388 (bytevector-u32-set! bv pos low-pc endianness)
1389 (bytevector-u32-set! bv (+ pos 4) high-pc endianness)
1390 (bytevector-u32-set! bv (+ pos 8) 0 endianness) ; offset
1391 (bytevector-u32-set! bv (+ pos 12) flags endianness)
1392 (bytevector-u32-set! bv (+ pos 16) nreq endianness)
1393 (bytevector-u32-set! bv (+ pos 20) nopt endianness))
1394 (define (write-arity-header pos arity)
1395 (write-arity-header* pos (arity-low-pc arity)
1396 (arity-high-pc arity)
1397 (pack-arity-flags (arity-rest arity)
1398 (arity-allow-other-keys? arity)
1399 (pair? (arity-kw-indices arity))
1400 #f)
1401 (length (arity-req arity))
1402 (length (arity-opt arity))))
1403 (let lp ((metas metas) (pos arities-prefix-len) (offsets '()))
1404 (match metas
1405 (()
1406 ;; Fill in the prefix.
1407 (bytevector-u32-set! bv 0 pos endianness)
1408 (values pos (reverse offsets)))
1409 ((meta . metas)
1410 (match (meta-arities meta)
1411 (() (lp metas pos offsets))
1412 ((arity)
1413 (write-arity-header pos arity)
1414 (lp metas
1415 (+ pos arity-header-len)
1416 (acons arity (+ pos arity-header-offset-offset) offsets)))
1417 (arities
1418 ;; Write a case-lambda header, then individual arities.
1419 ;; The case-lambda header's offset link is 0.
1420 (write-arity-header* pos (meta-low-pc meta) (meta-high-pc meta)
1421 (pack-arity-flags #f #f #f #t) 0 0)
1422 (let lp* ((arities arities) (pos (+ pos arity-header-len))
1423 (offsets offsets))
1424 (match arities
1425 (() (lp metas pos offsets))
1426 ((arity . arities)
1427 (write-arity-header pos arity)
1428 (lp* arities
1429 (+ pos arity-header-len)
1430 (acons arity
1431 (+ pos arity-header-offset-offset)
1432 offsets)))))))))))
1433
1434 (define (write-arity-links asm bv pos arity-offset-pairs strtab)
1435 (define (write-symbol sym pos)
1436 (bytevector-u32-set! bv pos
1437 (string-table-intern! strtab (symbol->string sym))
1438 (asm-endianness asm))
1439 (+ pos 4))
1440 (define (write-kw-indices pos kw-indices)
1441 ;; FIXME: Assert that kw-indices is already interned.
1442 (make-linker-reloc 'abs32/1 pos 0
1443 (intern-constant asm kw-indices)))
1444 (let lp ((pos pos) (pairs arity-offset-pairs) (relocs '()))
1445 (match pairs
1446 (()
1447 (unless (= pos (bytevector-length bv))
1448 (error "expected to fully fill the bytevector"
1449 pos (bytevector-length bv)))
1450 relocs)
1451 (((arity . offset) . pairs)
1452 (bytevector-u32-set! bv offset pos (asm-endianness asm))
1453 (let ((pos (fold write-symbol
1454 pos
1455 (append (arity-req arity)
1456 (arity-opt arity)
1457 (cond
1458 ((arity-rest arity) => list)
1459 (else '()))))))
1460 (match (arity-kw-indices arity)
1461 (() (lp pos pairs relocs))
1462 (kw-indices
1463 (lp (+ pos 4)
1464 pairs
1465 (cons (write-kw-indices pos kw-indices) relocs)))))))))
1466
1467 (define (link-arities asm)
1468 (let* ((endianness (asm-endianness asm))
1469 (metas (reverse (asm-meta asm)))
1470 (size (fold (lambda (meta size)
1471 (+ size (meta-arities-size meta)))
1472 arities-prefix-len
1473 metas))
1474 (strtab (make-string-table))
1475 (bv (make-bytevector size 0)))
1476 (let ((kw-indices-relocs
1477 (call-with-values
1478 (lambda ()
1479 (write-arity-headers metas bv endianness))
1480 (lambda (pos arity-offset-pairs)
1481 (write-arity-links asm bv pos arity-offset-pairs strtab)))))
1482 (let ((strtab (make-object asm '.guile.arities.strtab
1483 (link-string-table! strtab)
1484 '() '()
1485 #:type SHT_STRTAB #:flags 0)))
1486 (values (make-object asm '.guile.arities
1487 bv
1488 kw-indices-relocs '()
1489 #:type SHT_PROGBITS #:flags 0
1490 #:link (elf-section-index
1491 (linker-object-section strtab)))
1492 strtab)))))
1493
1494 ;;;
1495 ;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
1496 ;;; values. Pc and str are both 32 bits wide. (Either could change to
1497 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1498 ;;; entry to a program, relative to the start of the text section, and
1499 ;;; str is an index into the associated .guile.docstrs.strtab string
1500 ;;; table section.
1501 ;;;
1502
1503 ;; The size of a docstrs entry, in bytes.
1504 (define docstr-size 8)
1505
1506 (define (link-docstrs asm)
1507 (define (find-docstrings)
1508 (filter-map (lambda (meta)
1509 (define (is-documentation? pair)
1510 (eq? (car pair) 'documentation))
1511 (let* ((props (meta-properties meta))
1512 (tail (find-tail is-documentation? props)))
1513 (and tail
1514 (not (find-tail is-documentation? (cdr tail)))
1515 (string? (cdar tail))
1516 (cons (meta-low-pc meta) (cdar tail)))))
1517 (reverse (asm-meta asm))))
1518 (let* ((endianness (asm-endianness asm))
1519 (docstrings (find-docstrings))
1520 (strtab (make-string-table))
1521 (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
1522 (fold (lambda (pair pos)
1523 (match pair
1524 ((pc . string)
1525 (bytevector-u32-set! bv pos pc endianness)
1526 (bytevector-u32-set! bv (+ pos 4)
1527 (string-table-intern! strtab string)
1528 endianness)
1529 (+ pos docstr-size))))
1530 0
1531 docstrings)
1532 (let ((strtab (make-object asm '.guile.docstrs.strtab
1533 (link-string-table! strtab)
1534 '() '()
1535 #:type SHT_STRTAB #:flags 0)))
1536 (values (make-object asm '.guile.docstrs
1537 bv
1538 '() '()
1539 #:type SHT_PROGBITS #:flags 0
1540 #:link (elf-section-index
1541 (linker-object-section strtab)))
1542 strtab))))
1543
1544 ;;;
1545 ;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
1546 ;;; values. Pc and addr are both 32 bits wide. (Either could change to
1547 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1548 ;;; entry to a program, relative to the start of the text section, and
1549 ;;; addr is the address of the associated properties alist, relative to
1550 ;;; the start of the ELF image.
1551 ;;;
1552 ;;; Since procedure properties are stored in the data sections, we need
1553 ;;; to link the procedures property section first. (Note that this
1554 ;;; constraint does not apply to the arities section, which may
1555 ;;; reference the data sections via the kw-indices literal, because
1556 ;;; assembling the text section already makes sure that the kw-indices
1557 ;;; are interned.)
1558 ;;;
1559
1560 ;; The size of a procprops entry, in bytes.
1561 (define procprops-size 8)
1562
1563 (define (link-procprops asm)
1564 (define (assoc-remove-one alist key value-pred)
1565 (match alist
1566 (() '())
1567 ((((? (lambda (x) (eq? x key))) . value) . alist)
1568 (if (value-pred value)
1569 alist
1570 (acons key value alist)))
1571 (((k . v) . alist)
1572 (acons k v (assoc-remove-one alist key value-pred)))))
1573 (define (props-without-name-or-docstring meta)
1574 (assoc-remove-one
1575 (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
1576 'documentation
1577 string?))
1578 (define (find-procprops)
1579 (filter-map (lambda (meta)
1580 (let ((props (props-without-name-or-docstring meta)))
1581 (and (pair? props)
1582 (cons (meta-low-pc meta) props))))
1583 (reverse (asm-meta asm))))
1584 (let* ((endianness (asm-endianness asm))
1585 (procprops (find-procprops))
1586 (bv (make-bytevector (* (length procprops) procprops-size) 0)))
1587 (let lp ((procprops procprops) (pos 0) (relocs '()))
1588 (match procprops
1589 (()
1590 (make-object asm '.guile.procprops
1591 bv
1592 relocs '()
1593 #:type SHT_PROGBITS #:flags 0))
1594 (((pc . props) . procprops)
1595 (bytevector-u32-set! bv pos pc endianness)
1596 (lp procprops
1597 (+ pos procprops-size)
1598 (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
1599 (intern-constant asm props))
1600 relocs)))))))
1601
1602 ;;;
1603 ;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc
1604 ;;; sections provide line number and local variable liveness
1605 ;;; information. Their format is defined by the DWARF
1606 ;;; specifications.
1607 ;;;
1608
1609 (define (asm-language asm)
1610 ;; FIXME: Plumb language through to the assembler.
1611 'scheme)
1612
1613 ;; -> 5 values: .debug_info, .debug_abbrev, .debug_str, .debug_loc, .debug_lines
1614 (define (link-debug asm)
1615 (define (put-s8 port val)
1616 (let ((bv (make-bytevector 1)))
1617 (bytevector-s8-set! bv 0 val)
1618 (put-bytevector port bv)))
1619
1620 (define (put-u16 port val)
1621 (let ((bv (make-bytevector 2)))
1622 (bytevector-u16-set! bv 0 val (asm-endianness asm))
1623 (put-bytevector port bv)))
1624
1625 (define (put-u32 port val)
1626 (let ((bv (make-bytevector 4)))
1627 (bytevector-u32-set! bv 0 val (asm-endianness asm))
1628 (put-bytevector port bv)))
1629
1630 (define (put-u64 port val)
1631 (let ((bv (make-bytevector 8)))
1632 (bytevector-u64-set! bv 0 val (asm-endianness asm))
1633 (put-bytevector port bv)))
1634
1635 (define (put-uleb128 port val)
1636 (let lp ((val val))
1637 (let ((next (ash val -7)))
1638 (if (zero? next)
1639 (put-u8 port val)
1640 (begin
1641 (put-u8 port (logior #x80 (logand val #x7f)))
1642 (lp next))))))
1643
1644 (define (put-sleb128 port val)
1645 (let lp ((val val))
1646 (if (<= 0 (+ val 64) 128)
1647 (put-u8 port (logand val #x7f))
1648 (begin
1649 (put-u8 port (logior #x80 (logand val #x7f)))
1650 (lp (ash val -7))))))
1651
1652 (define (port-position port)
1653 (seek port 0 SEEK_CUR))
1654
1655 (define (meta->subprogram-die meta)
1656 `(subprogram
1657 (@ ,@(cond
1658 ((meta-name meta)
1659 => (lambda (name) `((name ,(symbol->string name)))))
1660 (else
1661 '()))
1662 (low-pc ,(meta-label meta))
1663 (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta)))))))
1664
1665 (define (make-compile-unit-die asm)
1666 `(compile-unit
1667 (@ (producer ,(string-append "Guile " (version)))
1668 (language ,(asm-language asm))
1669 (low-pc .rtl-text)
1670 (high-pc ,(* 4 (asm-pos asm)))
1671 (stmt-list 0))
1672 ,@(map meta->subprogram-die (reverse (asm-meta asm)))))
1673
1674 (let-values (((die-port get-die-bv) (open-bytevector-output-port))
1675 ((die-relocs) '())
1676 ((abbrev-port get-abbrev-bv) (open-bytevector-output-port))
1677 ;; (tag has-kids? attrs forms) -> code
1678 ((abbrevs) vlist-null)
1679 ((strtab) (make-string-table))
1680 ((line-port get-line-bv) (open-bytevector-output-port))
1681 ((line-relocs) '())
1682 ;; file -> code
1683 ((files) vlist-null))
1684
1685 (define (write-abbrev code tag has-children? attrs forms)
1686 (put-uleb128 abbrev-port code)
1687 (put-uleb128 abbrev-port (tag-name->code tag))
1688 (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no)))
1689 (for-each (lambda (attr form)
1690 (put-uleb128 abbrev-port (attribute-name->code attr))
1691 (put-uleb128 abbrev-port (form-name->code form)))
1692 attrs forms)
1693 (put-uleb128 abbrev-port 0)
1694 (put-uleb128 abbrev-port 0))
1695
1696 (define (intern-abbrev tag has-children? attrs forms)
1697 (let ((key (list tag has-children? attrs forms)))
1698 (match (vhash-assoc key abbrevs)
1699 ((_ . code) code)
1700 (#f (let ((code (1+ (vlist-length abbrevs))))
1701 (set! abbrevs (vhash-cons key code abbrevs))
1702 (write-abbrev code tag has-children? attrs forms)
1703 code)))))
1704
1705 (define (intern-file file)
1706 (match (vhash-assoc file files)
1707 ((_ . code) code)
1708 (#f (let ((code (1+ (vlist-length files))))
1709 (set! files (vhash-cons file code files))
1710 code))))
1711
1712 (define (write-sources)
1713 ;; Choose line base and line range values that will allow for an
1714 ;; address advance range of 16 words. The special opcode range is
1715 ;; from 10 to 255, so 246 values.
1716 (define base -4)
1717 (define range 15)
1718
1719 (let lp ((sources (asm-sources asm)) (out '()))
1720 (match sources
1721 (((pc . s) . sources)
1722 (let ((file (assq-ref s 'filename))
1723 (line (assq-ref s 'line))
1724 (col (assq-ref s 'column)))
1725 (lp sources
1726 ;; Guile line and column numbers are 0-indexed, but
1727 ;; they are 1-indexed for DWARF.
1728 (cons (list pc
1729 (if file (intern-file file) 0)
1730 (if line (1+ line))
1731 (if col (1+ col)))
1732 out))))
1733 (()
1734 ;; Compilation unit header for .debug_line. We write in
1735 ;; DWARF 2 format because more tools understand it than DWARF
1736 ;; 4, which incompatibly adds another field to this header.
1737
1738 (put-u32 line-port 0) ; Length; will patch later.
1739 (put-u16 line-port 2) ; DWARF 2 format.
1740 (put-u32 line-port 0) ; Prologue length; will patch later.
1741 (put-u8 line-port 4) ; Minimum instruction length: 4 bytes.
1742 (put-u8 line-port 1) ; Default is-stmt: true.
1743
1744 (put-s8 line-port base) ; Line base. See the DWARF standard.
1745 (put-u8 line-port range) ; Line range. See the DWARF standard.
1746 (put-u8 line-port 10) ; Opcode base: the first "special" opcode.
1747
1748 ;; A table of the number of uleb128 arguments taken by each
1749 ;; of the standard opcodes.
1750 (put-u8 line-port 0) ; 1: copy
1751 (put-u8 line-port 1) ; 2: advance-pc
1752 (put-u8 line-port 1) ; 3: advance-line
1753 (put-u8 line-port 1) ; 4: set-file
1754 (put-u8 line-port 1) ; 5: set-column
1755 (put-u8 line-port 0) ; 6: negate-stmt
1756 (put-u8 line-port 0) ; 7: set-basic-block
1757 (put-u8 line-port 0) ; 8: const-add-pc
1758 (put-u8 line-port 1) ; 9: fixed-advance-pc
1759
1760 ;; Include directories, as a zero-terminated sequence of
1761 ;; nul-terminated strings. Nothing, for the moment.
1762 (put-u8 line-port 0)
1763
1764 ;; File table. For each file that contributes to this
1765 ;; compilation unit, a nul-terminated file name string, and a
1766 ;; uleb128 for each of directory the file was found in, the
1767 ;; modification time, and the file's size in bytes. We pass
1768 ;; zero for the latter three fields.
1769 (vlist-for-each (match-lambda
1770 ((file . code)
1771 (put-bytevector line-port (string->utf8 file))
1772 (put-u8 line-port 0)
1773 (put-uleb128 line-port 0) ; directory
1774 (put-uleb128 line-port 0) ; mtime
1775 (put-uleb128 line-port 0) ; size
1776 ))
1777 files)
1778 (put-u8 line-port 0) ; 0 byte terminating file list.
1779
1780 ;; Patch prologue length.
1781 (let ((offset (port-position line-port)))
1782 (seek line-port 6 SEEK_SET)
1783 (put-u32 line-port (- offset 10))
1784 (seek line-port offset SEEK_SET))
1785
1786 ;; Now write the statement program.
1787 (let ()
1788 (define (extended-op opcode payload-len)
1789 (put-u8 line-port 0) ; extended op
1790 (put-uleb128 line-port (1+ payload-len)) ; payload-len + opcode
1791 (put-uleb128 line-port opcode))
1792 (define (set-address sym)
1793 (define (add-reloc! kind)
1794 (set! line-relocs
1795 (cons (make-linker-reloc kind
1796 (port-position line-port)
1797 0
1798 sym)
1799 line-relocs)))
1800 (match (asm-word-size asm)
1801 (4
1802 (extended-op 2 4)
1803 (add-reloc! 'abs32/1)
1804 (put-u32 line-port 0))
1805 (8
1806 (extended-op 2 8)
1807 (add-reloc! 'abs64/1)
1808 (put-u64 line-port 0))))
1809 (define (end-sequence pc)
1810 (let ((pc-inc (- (asm-pos asm) pc)))
1811 (put-u8 line-port 2) ; advance-pc
1812 (put-uleb128 line-port pc-inc))
1813 (extended-op 1 0))
1814 (define (advance-pc pc-inc line-inc)
1815 (let ((spec (+ (- line-inc base) (* pc-inc range) 10)))
1816 (cond
1817 ((or (< line-inc base) (>= line-inc (+ base range)))
1818 (advance-line line-inc)
1819 (advance-pc pc-inc 0))
1820 ((<= spec 255)
1821 (put-u8 line-port spec))
1822 ((< spec 500)
1823 (put-u8 line-port 8) ; const-advance-pc
1824 (advance-pc (- pc-inc (floor/ (- 255 10) range))
1825 line-inc))
1826 (else
1827 (put-u8 line-port 2) ; advance-pc
1828 (put-uleb128 line-port pc-inc)
1829 (advance-pc 0 line-inc)))))
1830 (define (advance-line inc)
1831 (put-u8 line-port 3)
1832 (put-sleb128 line-port inc))
1833 (define (set-file file)
1834 (put-u8 line-port 4)
1835 (put-uleb128 line-port file))
1836 (define (set-column col)
1837 (put-u8 line-port 5)
1838 (put-uleb128 line-port col))
1839
1840 (set-address '.rtl-text)
1841
1842 (let lp ((in out) (pc 0) (file 1) (line 1) (col 0))
1843 (match in
1844 (()
1845 (when (null? out)
1846 ;; There was no source info in the first place. Set
1847 ;; file register to 0 before adding final row.
1848 (set-file 0))
1849 (end-sequence pc))
1850 (((pc* file* line* col*) . in*)
1851 (cond
1852 ((and (eqv? file file*) (eqv? line line*) (eqv? col col*))
1853 (lp in* pc file line col))
1854 (else
1855 (unless (eqv? col col*)
1856 (set-column col*))
1857 (unless (eqv? file file*)
1858 (set-file file*))
1859 (advance-pc (- pc* pc) (- line* line))
1860 (lp in* pc* file* line* col*)))))))))))
1861
1862 (define (compute-code attr val)
1863 (match attr
1864 ('name (string-table-intern! strtab val))
1865 ('low-pc val)
1866 ('high-pc val)
1867 ('producer (string-table-intern! strtab val))
1868 ('language (language-name->code val))
1869 ('stmt-list val)))
1870
1871 (define (exact-integer? val)
1872 (and (number? val) (integer? val) (exact? val)))
1873
1874 (define (choose-form attr val code)
1875 (cond
1876 ((string? val) 'strp)
1877 ((eq? attr 'stmt-list) 'sec-offset)
1878 ((exact-integer? code)
1879 (cond
1880 ((< code 0) 'sleb128)
1881 ((<= code #xff) 'data1)
1882 ((<= code #xffff) 'data2)
1883 ((<= code #xffffffff) 'data4)
1884 ((<= code #xffffffffffffffff) 'data8)
1885 (else 'uleb128)))
1886 ((symbol? val) 'addr)
1887 (else (error "unhandled case" attr val code))))
1888
1889 (define (add-die-relocation! kind sym)
1890 (set! die-relocs
1891 (cons (make-linker-reloc kind (port-position die-port) 0 sym)
1892 die-relocs)))
1893
1894 (define (write-value code form)
1895 (match form
1896 ('data1 (put-u8 die-port code))
1897 ('data2 (put-u16 die-port code))
1898 ('data4 (put-u32 die-port code))
1899 ('data8 (put-u64 die-port code))
1900 ('uleb128 (put-uleb128 die-port code))
1901 ('sleb128 (put-sleb128 die-port code))
1902 ('addr
1903 (match (asm-word-size asm)
1904 (4
1905 (add-die-relocation! 'abs32/1 code)
1906 (put-u32 die-port 0))
1907 (8
1908 (add-die-relocation! 'abs64/1 code)
1909 (put-u64 die-port 0))))
1910 ('sec-offset (put-u32 die-port code))
1911 ('strp (put-u32 die-port code))))
1912
1913 (define (write-die die)
1914 (match die
1915 ((tag ('@ (attrs vals) ...) children ...)
1916 (let* ((codes (map compute-code attrs vals))
1917 (forms (map choose-form attrs vals codes))
1918 (has-children? (not (null? children)))
1919 (abbrev-code (intern-abbrev tag has-children? attrs forms)))
1920 (put-uleb128 die-port abbrev-code)
1921 (for-each write-value codes forms)
1922 (when has-children?
1923 (for-each write-die children)
1924 (put-uleb128 die-port 0))))))
1925
1926 ;; Compilation unit header.
1927 (put-u32 die-port 0) ; Length; will patch later.
1928 (put-u16 die-port 4) ; DWARF 4.
1929 (put-u32 die-port 0) ; Abbrevs offset.
1930 (put-u8 die-port (asm-word-size asm)) ; Address size.
1931
1932 (write-die (make-compile-unit-die asm))
1933
1934 ;; Terminate the abbrevs list.
1935 (put-uleb128 abbrev-port 0)
1936
1937 (write-sources)
1938
1939 (values (let ((bv (get-die-bv)))
1940 ;; Patch DWARF32 length.
1941 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
1942 (asm-endianness asm))
1943 (make-object asm '.debug_info bv die-relocs '()
1944 #:type SHT_PROGBITS #:flags 0))
1945 (make-object asm '.debug_abbrev (get-abbrev-bv) '() '()
1946 #:type SHT_PROGBITS #:flags 0)
1947 (make-object asm '.debug_str (link-string-table! strtab) '() '()
1948 #:type SHT_PROGBITS #:flags 0)
1949 (make-object asm '.debug_loc #vu8() '() '()
1950 #:type SHT_PROGBITS #:flags 0)
1951 (let ((bv (get-line-bv)))
1952 ;; Patch DWARF32 length.
1953 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
1954 (asm-endianness asm))
1955 (make-object asm '.debug_line bv line-relocs '()
1956 #:type SHT_PROGBITS #:flags 0)))))
1957
1958 (define (link-objects asm)
1959 (let*-values (;; Link procprops before constants, because it probably
1960 ;; interns more constants.
1961 ((procprops) (link-procprops asm))
1962 ((ro rw rw-init) (link-constants asm))
1963 ;; Link text object after constants, so that the
1964 ;; constants initializer gets included.
1965 ((text) (link-text-object asm))
1966 ((dt) (link-dynamic-section asm text rw rw-init))
1967 ((symtab strtab) (link-symtab (linker-object-section text) asm))
1968 ((arities arities-strtab) (link-arities asm))
1969 ((docstrs docstrs-strtab) (link-docstrs asm))
1970 ((dinfo dabbrev dstrtab dloc dline) (link-debug asm))
1971 ;; This needs to be linked last, because linking other
1972 ;; sections adds entries to the string table.
1973 ((shstrtab) (link-shstrtab asm)))
1974 (filter identity
1975 (list text ro rw dt symtab strtab arities arities-strtab
1976 docstrs docstrs-strtab procprops
1977 dinfo dabbrev dstrtab dloc dline
1978 shstrtab))))
1979
1980
1981 \f
1982
1983 ;;;
1984 ;;; High-level public interfaces.
1985 ;;;
1986
1987 (define* (link-assembly asm #:key (page-aligned? #t))
1988 "Produce an ELF image from the code and data emitted into @var{asm}.
1989 The result is a bytevector, by default linked so that read-only and
1990 writable data are on separate pages. Pass @code{#:page-aligned? #f} to
1991 disable this behavior."
1992 (link-elf (link-objects asm) #:page-aligned? page-aligned?))
1993
1994 (define (assemble-program instructions)
1995 "Take the sequence of instructions @var{instructions}, assemble them
1996 into RTL code, link an image, and load that image from memory. Returns
1997 a procedure."
1998 (let ((asm (make-assembler)))
1999 (emit-text asm instructions)
2000 (load-thunk-from-memory (link-assembly asm #:page-aligned? #f))))