Export the assembler procedures
[bpt/guile.git] / module / system / vm / assembler.scm
1 ;;; Guile RTL assembler
2
3 ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
4 ;;;
5 ;;; This library is free software; you can redistribute it and/or
6 ;;; modify it under the terms of the GNU Lesser General Public
7 ;;; License as published by the Free Software Foundation; either
8 ;;; version 3 of the License, or (at your option) any later version.
9 ;;;
10 ;;; This library is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;; Lesser General Public License for more details.
14 ;;;
15 ;;; You should have received a copy of the GNU Lesser General Public
16 ;;; License along with this library; if not, write to the Free Software
17 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Commentary:
20 ;;;
21 ;;; This module implements an assembler that creates an ELF image from
22 ;;; RTL assembly and macro-assembly. The input can be given in
23 ;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
24 ;;; procedural interface, the emit-OP procedures, but that is not
25 ;;; currently exported.
26 ;;;
27 ;;; "Primitive instructions" correspond to RTL VM operations.
28 ;;; Assemblers for primitive instructions are generated programmatically
29 ;;; from (rtl-instruction-list), which itself is derived from the VM
30 ;;; sources. There are also "macro-instructions" like "label" or
31 ;;; "load-constant" that expand to 0 or more primitive instructions.
32 ;;;
33 ;;; The assembler also handles some higher-level tasks, like creating
34 ;;; the symbol table, other metadata sections, creating a constant table
35 ;;; for the whole compilation unit, and writing the dynamic section of
36 ;;; the ELF file along with the appropriate initialization routines.
37 ;;;
38 ;;; Most compilers will want to use the trio of make-assembler,
39 ;;; emit-text, and link-assembly. That will result in the creation of
40 ;;; an ELF image as a bytevector, which can then be loaded using
41 ;;; load-thunk-from-memory, or written to disk as a .go file.
42 ;;;
43 ;;; Code:
44
45 (define-module (system vm assembler)
46 #:use-module (system base target)
47 #:use-module (system vm instruction)
48 #:use-module (system vm elf)
49 #:use-module (system vm linker)
50 #:use-module (system vm objcode)
51 #:use-module (rnrs bytevectors)
52 #:use-module (ice-9 vlist)
53 #:use-module (ice-9 match)
54 #:use-module (srfi srfi-1)
55 #:use-module (srfi srfi-4)
56 #:use-module (srfi srfi-9)
57 #:use-module (srfi srfi-11)
58 #:export (make-assembler
59 emit-text
60 link-assembly
61 assemble-program))
62
63
64 \f
65
66 ;;; RTL code consists of 32-bit units, often subdivided in some way.
67 ;;; These helpers create one 32-bit unit from multiple components.
68
69 (define-syntax-rule (pack-u8-u24 x y)
70 (logior x (ash y 8)))
71
72 (define-syntax-rule (pack-u8-s24 x y)
73 (logior x (ash (cond
74 ((< 0 (- y) #x800000)
75 (+ y #x1000000))
76 ((<= 0 y #xffffff)
77 y)
78 (else (error "out of range" y)))
79 8)))
80
81 (define-syntax-rule (pack-u1-u7-u24 x y z)
82 (logior x (ash y 1) (ash z 8)))
83
84 (define-syntax-rule (pack-u8-u12-u12 x y z)
85 (logior x (ash y 8) (ash z 20)))
86
87 (define-syntax-rule (pack-u8-u8-u16 x y z)
88 (logior x (ash y 8) (ash z 16)))
89
90 (define-syntax-rule (pack-u8-u8-u8-u8 x y z w)
91 (logior x (ash y 8) (ash z 16) (ash w 24)))
92
93 (define-syntax pack-flags
94 (syntax-rules ()
95 ;; Add clauses as needed.
96 ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
97 (if f2 (ash 2 0) 0)))))
98
99 ;;; Helpers to read and write 32-bit units in a buffer.
100
101 (define-syntax-rule (u32-ref buf n)
102 (bytevector-u32-native-ref buf (* n 4)))
103
104 (define-syntax-rule (u32-set! buf n val)
105 (bytevector-u32-native-set! buf (* n 4) val))
106
107 (define-syntax-rule (s32-ref buf n)
108 (bytevector-s32-native-ref buf (* n 4)))
109
110 (define-syntax-rule (s32-set! buf n val)
111 (bytevector-s32-native-set! buf (* n 4) val))
112
113
114 \f
115
116 ;;; A <meta> entry collects metadata for one procedure. Procedures are
117 ;;; written as contiguous ranges of RTL code.
118 ;;;
119 (define-syntax-rule (assert-match arg pattern kind)
120 (let ((x arg))
121 (unless (match x (pattern #t) (_ #f))
122 (error (string-append "expected " kind) x))))
123
124 (define-record-type <meta>
125 (%make-meta label properties low-pc high-pc arities)
126 meta?
127 (label meta-label)
128 (properties meta-properties set-meta-properties!)
129 (low-pc meta-low-pc)
130 (high-pc meta-high-pc set-meta-high-pc!)
131 (arities meta-arities set-meta-arities!))
132
133 (define (make-meta label properties low-pc)
134 (assert-match label (? symbol?) "symbol")
135 (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
136 (%make-meta label properties low-pc #f '()))
137
138 (define (meta-name meta)
139 (assq-ref (meta-properties meta) 'name))
140
141 ;; Metadata for one <lambda-case>.
142 (define-record-type <arity>
143 (make-arity req opt rest kw-indices allow-other-keys?
144 low-pc high-pc)
145 arity?
146 (req arity-req)
147 (opt arity-opt)
148 (rest arity-rest)
149 (kw-indices arity-kw-indices)
150 (allow-other-keys? arity-allow-other-keys?)
151 (low-pc arity-low-pc)
152 (high-pc arity-high-pc set-arity-high-pc!))
153
154 (define-syntax *block-size* (identifier-syntax 32))
155
156 ;;; An assembler collects all of the words emitted during assembly, and
157 ;;; also maintains ancillary information such as the constant table, a
158 ;;; relocation list, and so on.
159 ;;;
160 ;;; RTL code consists of 32-bit units. We emit RTL code using native
161 ;;; endianness. If we're targeting a foreign endianness, we byte-swap
162 ;;; the bytevector as a whole instead of conditionalizing each access.
163 ;;;
164 (define-record-type <asm>
165 (make-asm cur idx start prev written
166 labels relocs
167 word-size endianness
168 constants inits
169 shstrtab next-section-number
170 meta)
171 asm?
172
173 ;; We write RTL code into what is logically a growable vector,
174 ;; implemented as a list of blocks. asm-cur is the current block, and
175 ;; asm-idx is the current index into that block, in 32-bit units.
176 ;;
177 (cur asm-cur set-asm-cur!)
178 (idx asm-idx set-asm-idx!)
179
180 ;; asm-start is an absolute position, indicating the offset of the
181 ;; beginning of an instruction (in u32 units). It is updated after
182 ;; writing all the words for one primitive instruction. It models the
183 ;; position of the instruction pointer during execution, given that
184 ;; the RTL VM updates the IP only at the end of executing the
185 ;; instruction, and is thus useful for computing offsets between two
186 ;; points in a program.
187 ;;
188 (start asm-start set-asm-start!)
189
190 ;; The list of previously written blocks.
191 ;;
192 (prev asm-prev set-asm-prev!)
193
194 ;; The number of u32 words written in asm-prev, which is the same as
195 ;; the offset of the current block.
196 ;;
197 (written asm-written set-asm-written!)
198
199 ;; An alist of symbol -> position pairs, indicating the labels defined
200 ;; in this compilation unit.
201 ;;
202 (labels asm-labels set-asm-labels!)
203
204 ;; A list of relocations needed by the program text. We use an
205 ;; internal representation for relocations, and handle textualn
206 ;; relative relocations in the assembler. Other kinds of relocations
207 ;; are later reified as linker relocations and resolved by the linker.
208 ;;
209 (relocs asm-relocs set-asm-relocs!)
210
211 ;; Target information.
212 ;;
213 (word-size asm-word-size)
214 (endianness asm-endianness)
215
216 ;; The constant table, as a vhash of object -> label. All constants
217 ;; get de-duplicated and written into separate sections -- either the
218 ;; .rodata section, for read-only data, or .data, for constants that
219 ;; need initialization at load-time (like symbols). Constants can
220 ;; depend on other constants (e.g. a symbol depending on a stringbuf),
221 ;; so order in this table is important.
222 ;;
223 (constants asm-constants set-asm-constants!)
224
225 ;; A list of RTL instructions needed to initialize the constants.
226 ;; Will run in a thunk with 2 local variables.
227 ;;
228 (inits asm-inits set-asm-inits!)
229
230 ;; The shstrtab, for section names.
231 ;;
232 (shstrtab asm-shstrtab set-asm-shstrtab!)
233
234 ;; The section number for the next section to be written.
235 ;;
236 (next-section-number asm-next-section-number set-asm-next-section-number!)
237
238 ;; A list of <meta>, corresponding to procedure metadata.
239 ;;
240 (meta asm-meta set-asm-meta!))
241
242 (define-inlinable (fresh-block)
243 (make-u32vector *block-size*))
244
245 (define* (make-assembler #:key (word-size (target-word-size))
246 (endianness (target-endianness)))
247 "Create an assembler for a given target @var{word-size} and
248 @var{endianness}, falling back to appropriate values for the configured
249 target."
250 (make-asm (fresh-block) 0 0 '() 0
251 '() '()
252 word-size endianness
253 vlist-null '()
254 (make-string-table) 1
255 '()))
256
257 (define (intern-section-name! asm string)
258 "Add a string to the section name table (shstrtab)."
259 (string-table-intern! (asm-shstrtab asm) string))
260
261 (define-inlinable (asm-pos asm)
262 "The offset of the next word to be written into the code buffer, in
263 32-bit units."
264 (+ (asm-idx asm) (asm-written asm)))
265
266 (define (allocate-new-block asm)
267 "Close off the current block, and arrange for the next word to be
268 written to a fresh block."
269 (let ((new (fresh-block)))
270 (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
271 (set-asm-written! asm (asm-pos asm))
272 (set-asm-cur! asm new)
273 (set-asm-idx! asm 0)))
274
275 (define-inlinable (emit asm u32)
276 "Emit one 32-bit word into the instruction stream. Assumes that there
277 is space for the word, and ensures that there is space for the next
278 word."
279 (u32-set! (asm-cur asm) (asm-idx asm) u32)
280 (set-asm-idx! asm (1+ (asm-idx asm)))
281 (if (= (asm-idx asm) *block-size*)
282 (allocate-new-block asm)))
283
284 (define-inlinable (make-reloc type label base word)
285 "Make an internal relocation of type @var{type} referencing symbol
286 @var{label}, @var{word} words after position @var{start}. @var{type}
287 may be x8-s24, indicating a 24-bit relative label reference that can be
288 fixed up by the assembler, or s32, indicating a 32-bit relative
289 reference that needs to be fixed up by the linker."
290 (list type label base word))
291
292 (define-inlinable (reset-asm-start! asm)
293 "Reset the asm-start after writing the words for one instruction."
294 (set-asm-start! asm (asm-pos asm)))
295
296 (define (emit-exported-label asm label)
297 "Define a linker symbol associating @var{label} with the current
298 asm-start."
299 (set-asm-labels! asm (acons label (asm-start asm) (asm-labels asm))))
300
301 (define (record-label-reference asm label)
302 "Record an x8-s24 local label reference. This value will get patched
303 up later by the assembler."
304 (let* ((start (asm-start asm))
305 (pos (asm-pos asm))
306 (reloc (make-reloc 'x8-s24 label start (- pos start))))
307 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
308
309 (define* (record-far-label-reference asm label #:optional (offset 0))
310 "Record an s32 far label reference. This value will get patched up
311 later by the linker."
312 (let* ((start (- (asm-start asm) offset))
313 (pos (asm-pos asm))
314 (reloc (make-reloc 's32 label start (- pos start))))
315 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
316
317
318 \f
319
320 ;;;
321 ;;; Primitive assemblers are defined by expanding `assembler' for each
322 ;;; opcode in `(rtl-instruction-list)'.
323 ;;;
324
325 (eval-when (expand compile load eval)
326 (define (id-append ctx a b)
327 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
328
329 (define-syntax assembler
330 (lambda (x)
331 (define-syntax op-case
332 (lambda (x)
333 (syntax-case x ()
334 ((_ asm name ((type arg ...) code ...) clause ...)
335 #`(if (eq? name 'type)
336 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
337 #'((arg ...)
338 code ...))
339 (op-case asm name clause ...)))
340 ((_ asm name)
341 #'(error "unmatched name" name)))))
342
343 (define (pack-first-word asm opcode type)
344 (with-syntax ((opcode opcode))
345 (op-case
346 asm type
347 ((U8_X24)
348 (emit asm opcode))
349 ((U8_U24 arg)
350 (emit asm (pack-u8-u24 opcode arg)))
351 ((U8_L24 label)
352 (record-label-reference asm label)
353 (emit asm opcode))
354 ((U8_U8_I16 a imm)
355 (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
356 ((U8_U12_U12 a b)
357 (emit asm (pack-u8-u12-u12 opcode a b)))
358 ((U8_U8_U8_U8 a b c)
359 (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
360
361 (define (pack-tail-word asm type)
362 (op-case
363 asm type
364 ((U8_U24 a b)
365 (emit asm (pack-u8-u24 a b)))
366 ((U8_L24 a label)
367 (record-label-reference asm label)
368 (emit asm a))
369 ((U8_U8_I16 a b imm)
370 (emit asm (pack-u8-u8-u16 a b (object-address imm))))
371 ((U8_U12_U12 a b)
372 (emit asm (pack-u8-u12-u12 a b c)))
373 ((U8_U8_U8_U8 a b c d)
374 (emit asm (pack-u8-u8-u8-u8 a b c d)))
375 ((U32 a)
376 (emit asm a))
377 ((I32 imm)
378 (let ((val (object-address imm)))
379 (unless (zero? (ash val -32))
380 (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
381 (emit asm val)))
382 ((A32 imm)
383 (unless (= (asm-word-size asm) 8)
384 (error "make-long-immediate unavailable for this target"))
385 (emit asm (ash (object-address imm) -32))
386 (emit asm (logand (object-address imm) (1- (ash 1 32)))))
387 ((B32))
388 ((N32 label)
389 (record-far-label-reference asm label)
390 (emit asm 0))
391 ((S32 label)
392 (record-far-label-reference asm label)
393 (emit asm 0))
394 ((L32 label)
395 (record-far-label-reference asm label)
396 (emit asm 0))
397 ((LO32 label offset)
398 (record-far-label-reference asm label
399 (* offset (/ (asm-word-size asm) 4)))
400 (emit asm 0))
401 ((X8_U24 a)
402 (emit asm (pack-u8-u24 0 a)))
403 ((X8_U12_U12 a b)
404 (emit asm (pack-u8-u12-u12 0 a b)))
405 ((X8_L24 label)
406 (record-label-reference asm label)
407 (emit asm 0))
408 ((B1_X7_L24 a label)
409 (record-label-reference asm label)
410 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
411 ((B1_U7_L24 a b label)
412 (record-label-reference asm label)
413 (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
414 ((B1_X31 a)
415 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
416 ((B1_X7_U24 a b)
417 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
418
419 (syntax-case x ()
420 ((_ name opcode word0 word* ...)
421 (with-syntax ((((formal0 ...)
422 code0 ...)
423 (pack-first-word #'asm
424 (syntax->datum #'opcode)
425 (syntax->datum #'word0)))
426 ((((formal* ...)
427 code* ...) ...)
428 (map (lambda (word) (pack-tail-word #'asm word))
429 (syntax->datum #'(word* ...)))))
430 #'(lambda (asm formal0 ... formal* ... ...)
431 (unless (asm? asm) (error "not an asm"))
432 code0 ...
433 code* ... ...
434 (reset-asm-start! asm)))))))
435
436 (define assemblers (make-hash-table))
437
438 (define-syntax define-assembler
439 (lambda (x)
440 (syntax-case x ()
441 ((_ name opcode kind arg ...)
442 (with-syntax ((emit (id-append #'name #'emit- #'name)))
443 #'(begin
444 (define emit
445 (let ((emit (assembler name opcode arg ...)))
446 (hashq-set! assemblers 'name emit)
447 emit))
448 (export emit)))))))
449
450 (define-syntax visit-opcodes
451 (lambda (x)
452 (syntax-case x ()
453 ((visit-opcodes macro arg ...)
454 (with-syntax (((inst ...)
455 (map (lambda (x) (datum->syntax #'macro x))
456 (rtl-instruction-list))))
457 #'(begin
458 (macro arg ... . inst)
459 ...))))))
460
461 (visit-opcodes define-assembler)
462
463 (define (emit-text asm instructions)
464 "Assemble @var{instructions} using the assembler @var{asm}.
465 @var{instructions} is a sequence of RTL instructions, expressed as a
466 list of lists. This procedure can be called many times before calling
467 @code{link-assembly}."
468 (for-each (lambda (inst)
469 (apply (or (hashq-ref assemblers (car inst))
470 (error 'bad-instruction inst))
471 asm
472 (cdr inst)))
473 instructions))
474
475 \f
476
477 ;;;
478 ;;; The constant table records a topologically sorted set of literal
479 ;;; constants used by a program. For example, a pair uses its car and
480 ;;; cdr, a string uses its stringbuf, etc.
481 ;;;
482 ;;; Some things we want to add to the constant table are not actually
483 ;;; Scheme objects: for example, stringbufs, cache cells for toplevel
484 ;;; references, or cache cells for non-closure procedures. For these we
485 ;;; define special record types and add instances of those record types
486 ;;; to the table.
487 ;;;
488
489 (define-inlinable (immediate? x)
490 "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
491 (not (zero? (logand (object-address x) 6))))
492
493 (define-record-type <stringbuf>
494 (make-stringbuf string)
495 stringbuf?
496 (string stringbuf-string))
497
498 (define-record-type <static-procedure>
499 (make-static-procedure code)
500 static-procedure?
501 (code static-procedure-code))
502
503 (define-record-type <cache-cell>
504 (make-cache-cell scope key)
505 cache-cell?
506 (scope cache-cell-scope)
507 (key cache-cell-key))
508
509 (define (statically-allocatable? x)
510 "Return @code{#t} if a non-immediate constant can be allocated
511 statically, and @code{#f} if it would need some kind of runtime
512 allocation."
513 (or (pair? x) (vector? x) (string? x) (stringbuf? x) (static-procedure? x)))
514
515 (define (intern-constant asm obj)
516 "Add an object to the constant table, and return a label that can be
517 used to reference it. If the object is already present in the constant
518 table, its existing label is used directly."
519 (define (recur obj)
520 (intern-constant asm obj))
521 (define (field dst n obj)
522 (let ((src (recur obj)))
523 (if src
524 (list (if (statically-allocatable? obj)
525 `(make-non-immediate 1 ,src)
526 `(static-ref 1 ,src))
527 `(static-set! 1 ,dst ,n))
528 '())))
529 (define (intern obj label)
530 (cond
531 ((pair? obj)
532 (append (field label 0 (car obj))
533 (field label 1 (cdr obj))))
534 ((vector? obj)
535 (let lp ((i 0) (inits '()))
536 (if (< i (vector-length obj))
537 (lp (1+ i)
538 (append-reverse (field label (1+ i) (vector-ref obj i))
539 inits))
540 (reverse inits))))
541 ((stringbuf? obj) '())
542 ((static-procedure? obj)
543 `((make-non-immediate 1 ,label)
544 (link-procedure! 1 ,(static-procedure-code obj))))
545 ((cache-cell? obj) '())
546 ((symbol? obj)
547 `((make-non-immediate 1 ,(recur (symbol->string obj)))
548 (string->symbol 1 1)
549 (static-set! 1 ,label 0)))
550 ((string? obj)
551 `((make-non-immediate 1 ,(recur (make-stringbuf obj)))
552 (static-set! 1 ,label 1)))
553 ((keyword? obj)
554 `((static-ref 1 ,(recur (keyword->symbol obj)))
555 (symbol->keyword 1 1)
556 (static-set! 1 ,label 0)))
557 ((number? obj)
558 `((make-non-immediate 1 ,(recur (number->string obj)))
559 (string->number 1 1)
560 (static-set! 1 ,label 0)))
561 (else
562 (error "don't know how to intern" obj))))
563 (cond
564 ((immediate? obj) #f)
565 ((vhash-assoc obj (asm-constants asm)) => cdr)
566 (else
567 ;; Note that calling intern may mutate asm-constants and
568 ;; asm-constant-inits.
569 (let* ((label (gensym "constant"))
570 (inits (intern obj label)))
571 (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
572 (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
573 label))))
574
575 (define (intern-non-immediate asm obj)
576 "Intern a non-immediate into the constant table, and return its
577 label."
578 (when (immediate? obj)
579 (error "expected a non-immediate" obj))
580 (intern-constant asm obj))
581
582 (define (intern-cache-cell asm scope key)
583 "Intern a cache cell into the constant table, and return its label.
584 If there is already a cache cell with the given scope and key, it is
585 returned instead."
586 (intern-constant asm (make-cache-cell scope key)))
587
588 ;; Return the label of the cell that holds the module for a scope.
589 (define (intern-module-cache-cell asm scope)
590 "Intern a cache cell for a module, and return its label."
591 (intern-cache-cell asm scope #t))
592
593
594 \f
595
596 ;;;
597 ;;; Macro assemblers bridge the gap between primitive instructions and
598 ;;; some higher-level operations.
599 ;;;
600
601 (define-syntax define-macro-assembler
602 (lambda (x)
603 (syntax-case x ()
604 ((_ (name arg ...) body body* ...)
605 (with-syntax ((emit (id-append #'name #'emit- #'name)))
606 #'(begin
607 (define emit
608 (let ((emit (lambda (arg ...) body body* ...)))
609 (hashq-set! assemblers 'name emit)
610 emit))
611 (export emit)))))))
612
613 (define-macro-assembler (load-constant asm dst obj)
614 (cond
615 ((immediate? obj)
616 (let ((bits (object-address obj)))
617 (cond
618 ((and (< dst 256) (zero? (ash bits -16)))
619 (emit-make-short-immediate asm dst obj))
620 ((zero? (ash bits -32))
621 (emit-make-long-immediate asm dst obj))
622 (else
623 (emit-make-long-long-immediate asm dst obj)))))
624 ((statically-allocatable? obj)
625 (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
626 (else
627 (emit-static-ref asm dst (intern-non-immediate asm obj)))))
628
629 (define-macro-assembler (load-static-procedure asm dst label)
630 (let ((loc (intern-constant asm (make-static-procedure label))))
631 (emit-make-non-immediate asm dst loc)))
632
633 (define-macro-assembler (begin-program asm label properties)
634 (emit-label asm label)
635 (let ((meta (make-meta label properties (asm-start asm))))
636 (set-asm-meta! asm (cons meta (asm-meta asm)))))
637
638 (define-macro-assembler (end-program asm)
639 (let ((meta (car (asm-meta asm))))
640 (set-meta-high-pc! meta (asm-start asm))
641 (set-meta-arities! meta (reverse (meta-arities meta)))))
642
643 (define-macro-assembler (begin-standard-arity asm req nlocals alternate)
644 (emit-begin-opt-arity asm req '() #f nlocals alternate))
645
646 (define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
647 (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
648
649 (define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
650 allow-other-keys? nlocals alternate)
651 (assert-match req ((? symbol?) ...) "list of symbols")
652 (assert-match opt ((? symbol?) ...) "list of symbols")
653 (assert-match rest (or #f (? symbol?)) "#f or symbol")
654 (assert-match kw-indices (((? symbol?) . (? integer?)) ...)
655 "alist of symbol -> integer")
656 (assert-match allow-other-keys? (? boolean?) "boolean")
657 (assert-match nlocals (? integer?) "integer")
658 (assert-match alternate (or #f (? symbol?)) "#f or symbol")
659 (let* ((meta (car (asm-meta asm)))
660 (arity (make-arity req opt rest kw-indices allow-other-keys?
661 (asm-start asm) #f))
662 ;; The procedure itself is in slot 0, in the standard calling
663 ;; convention. For procedure prologues, nreq includes the
664 ;; procedure, so here we add 1.
665 (nreq (1+ (length req)))
666 (nopt (length opt))
667 (rest? (->bool rest)))
668 (set-meta-arities! meta (cons arity (meta-arities meta)))
669 (cond
670 ((or allow-other-keys? (pair? kw-indices))
671 (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
672 nlocals alternate))
673 ((or rest? (pair? opt))
674 (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
675 (else
676 (emit-standard-prelude asm nreq nlocals alternate)))))
677
678 (define-macro-assembler (end-arity asm)
679 (let ((arity (car (meta-arities (car (asm-meta asm))))))
680 (set-arity-high-pc! arity (asm-start asm))))
681
682 (define-macro-assembler (standard-prelude asm nreq nlocals alternate)
683 (cond
684 (alternate
685 (emit-br-if-nargs-ne asm nreq alternate)
686 (emit-alloc-frame asm nlocals))
687 ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
688 (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
689 (else
690 (emit-assert-nargs-ee asm nreq)
691 (emit-alloc-frame asm nlocals))))
692
693 (define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
694 (if alternate
695 (emit-br-if-nargs-lt asm nreq alternate)
696 (emit-assert-nargs-ge asm nreq))
697 (cond
698 (rest?
699 (emit-bind-rest asm (+ nreq nopt)))
700 (alternate
701 (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
702 (else
703 (emit-assert-nargs-le asm (+ nreq nopt))))
704 (emit-alloc-frame asm nlocals))
705
706 (define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
707 allow-other-keys? nlocals alternate)
708 (if alternate
709 (emit-br-if-nargs-lt asm nreq alternate)
710 (emit-assert-nargs-ge asm nreq))
711 (let ((ntotal (fold (lambda (kw ntotal)
712 (match kw
713 (((? keyword?) . idx)
714 (max (1+ idx) ntotal))))
715 (+ nreq nopt) kw-indices)))
716 ;; FIXME: port 581f410f
717 (emit-bind-kwargs asm nreq
718 (pack-flags allow-other-keys? rest?)
719 (+ nreq nopt)
720 ntotal
721 kw-indices)
722 (emit-alloc-frame asm nlocals)))
723
724 (define-macro-assembler (label asm sym)
725 (set-asm-labels! asm (acons sym (asm-start asm) (asm-labels asm))))
726
727 (define-macro-assembler (cache-current-module! asm module scope)
728 (let ((mod-label (intern-module-cache-cell asm scope)))
729 (emit-static-set! asm module mod-label 0)))
730
731 (define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
732 (let ((sym-label (intern-non-immediate asm sym))
733 (mod-label (intern-module-cache-cell asm scope))
734 (cell-label (intern-cache-cell asm scope sym)))
735 (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
736
737 (define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
738 (let* ((sym-label (intern-non-immediate asm sym))
739 (key (cons public? module-name))
740 (mod-name-label (intern-constant asm key))
741 (cell-label (intern-cache-cell asm key sym)))
742 (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
743
744
745 \f
746
747 ;;;
748 ;;; Helper for linking objects.
749 ;;;
750
751 (define (make-object asm name bv relocs labels . kwargs)
752 "Make a linker object. This helper handles interning the name in the
753 shstrtab, assigning the size, allocating a fresh index, and defining a
754 corresponding linker symbol for the start of the section."
755 (let ((name-idx (intern-section-name! asm (symbol->string name)))
756 (index (asm-next-section-number asm)))
757 (set-asm-next-section-number! asm (1+ index))
758 (make-linker-object (apply make-elf-section
759 #:index index
760 #:name name-idx
761 #:size (bytevector-length bv)
762 kwargs)
763 bv relocs
764 (cons (make-linker-symbol name 0) labels))))
765
766
767 \f
768
769 ;;;
770 ;;; Linking the constant table. This code is somewhat intertwingled
771 ;;; with the intern-constant code above, as that procedure also
772 ;;; residualizes instructions to initialize constants at load time.
773 ;;;
774
775 (define (write-immediate asm buf pos x)
776 (let ((val (object-address x))
777 (endianness (asm-endianness asm)))
778 (case (asm-word-size asm)
779 ((4) (bytevector-u32-set! buf pos val endianness))
780 ((8) (bytevector-u64-set! buf pos val endianness))
781 (else (error "bad word size" asm)))))
782
783 (define (emit-init-constants asm)
784 "If there is writable data that needs initialization at runtime, emit
785 a procedure to do that and return its label. Otherwise return
786 @code{#f}."
787 (let ((inits (asm-inits asm)))
788 (and (not (null? inits))
789 (let ((label (gensym "init-constants")))
790 (emit-text asm
791 `((begin-program ,label ())
792 (assert-nargs-ee/locals 1 1)
793 ,@(reverse inits)
794 (load-constant 1 ,*unspecified*)
795 (return 1)
796 (end-program)))
797 label))))
798
799 (define (link-data asm data name)
800 "Link the static data for a program into the @var{name} section (which
801 should be .data or .rodata), and return the resulting linker object.
802 @var{data} should be a vhash mapping objects to labels."
803 (define (align address alignment)
804 (+ address
805 (modulo (- alignment (modulo address alignment)) alignment)))
806
807 (define tc7-vector 13)
808 (define stringbuf-shared-flag #x100)
809 (define stringbuf-wide-flag #x400)
810 (define tc7-stringbuf 39)
811 (define tc7-narrow-stringbuf
812 (+ tc7-stringbuf stringbuf-shared-flag))
813 (define tc7-wide-stringbuf
814 (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
815 (define tc7-ro-string (+ 21 #x200))
816 (define tc7-rtl-program 69)
817
818 (let ((word-size (asm-word-size asm))
819 (endianness (asm-endianness asm)))
820 (define (byte-length x)
821 (cond
822 ((stringbuf? x)
823 (let ((x (stringbuf-string x)))
824 (+ (* 2 word-size)
825 (case (string-bytes-per-char x)
826 ((1) (1+ (string-length x)))
827 ((4) (* (1+ (string-length x)) 4))
828 (else (error "bad string bytes per char" x))))))
829 ((static-procedure? x)
830 (* 2 word-size))
831 ((string? x)
832 (* 4 word-size))
833 ((pair? x)
834 (* 2 word-size))
835 ((vector? x)
836 (* (1+ (vector-length x)) word-size))
837 (else
838 word-size)))
839
840 (define (write-constant-reference buf pos x)
841 ;; The asm-inits will fix up any reference to a non-immediate.
842 (write-immediate asm buf pos (if (immediate? x) x #f)))
843
844 (define (write buf pos obj)
845 (cond
846 ((stringbuf? obj)
847 (let* ((x (stringbuf-string obj))
848 (len (string-length x))
849 (tag (if (= (string-bytes-per-char x) 1)
850 tc7-narrow-stringbuf
851 tc7-wide-stringbuf)))
852 (case word-size
853 ((4)
854 (bytevector-u32-set! buf pos tag endianness)
855 (bytevector-u32-set! buf (+ pos 4) len endianness))
856 ((8)
857 (bytevector-u64-set! buf pos tag endianness)
858 (bytevector-u64-set! buf (+ pos 8) len endianness))
859 (else
860 (error "bad word size" asm)))
861 (let ((pos (+ pos (* word-size 2))))
862 (case (string-bytes-per-char x)
863 ((1)
864 (let lp ((i 0))
865 (if (< i len)
866 (let ((u8 (char->integer (string-ref x i))))
867 (bytevector-u8-set! buf (+ pos i) u8)
868 (lp (1+ i)))
869 (bytevector-u8-set! buf (+ pos i) 0))))
870 ((4)
871 (let lp ((i 0))
872 (if (< i len)
873 (let ((u32 (char->integer (string-ref x i))))
874 (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
875 (lp (1+ i)))
876 (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
877 (else (error "bad string bytes per char" x))))))
878
879 ((static-procedure? obj)
880 (case word-size
881 ((4)
882 (bytevector-u32-set! buf pos tc7-rtl-program endianness)
883 (bytevector-u32-set! buf (+ pos 4) 0 endianness))
884 ((8)
885 (bytevector-u64-set! buf pos tc7-rtl-program endianness)
886 (bytevector-u64-set! buf (+ pos 8) 0 endianness))
887 (else (error "bad word size"))))
888
889 ((cache-cell? obj)
890 (write-immediate asm buf pos #f))
891
892 ((string? obj)
893 (let ((tag (logior tc7-ro-string (ash (string-length obj) 8))))
894 (case word-size
895 ((4)
896 (bytevector-u32-set! buf pos tc7-ro-string endianness)
897 (write-immediate asm buf (+ pos 4) #f) ; stringbuf
898 (bytevector-u32-set! buf (+ pos 8) 0 endianness)
899 (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
900 ((8)
901 (bytevector-u64-set! buf pos tc7-ro-string endianness)
902 (write-immediate asm buf (+ pos 8) #f) ; stringbuf
903 (bytevector-u64-set! buf (+ pos 16) 0 endianness)
904 (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
905 (else (error "bad word size")))))
906
907 ((pair? obj)
908 (write-constant-reference buf pos (car obj))
909 (write-constant-reference buf (+ pos word-size) (cdr obj)))
910
911 ((vector? obj)
912 (let* ((len (vector-length obj))
913 (tag (logior tc7-vector (ash len 8))))
914 (case word-size
915 ((4) (bytevector-u32-set! buf pos tag endianness))
916 ((8) (bytevector-u64-set! buf pos tag endianness))
917 (else (error "bad word size")))
918 (let lp ((i 0))
919 (when (< i (vector-length obj))
920 (let ((pos (+ pos word-size (* i word-size)))
921 (elt (vector-ref obj i)))
922 (write-constant-reference buf pos elt)
923 (lp (1+ i)))))))
924
925 ((symbol? obj)
926 (write-immediate asm buf pos #f))
927
928 ((keyword? obj)
929 (write-immediate asm buf pos #f))
930
931 ((number? obj)
932 (write-immediate asm buf pos #f))
933
934 (else
935 (error "unrecognized object" obj))))
936
937 (cond
938 ((vlist-null? data) #f)
939 (else
940 (let* ((byte-len (vhash-fold (lambda (k v len)
941 (+ (byte-length k) (align len 8)))
942 0 data))
943 (buf (make-bytevector byte-len 0)))
944 (let lp ((i 0) (pos 0) (labels '()))
945 (if (< i (vlist-length data))
946 (let* ((pair (vlist-ref data i))
947 (obj (car pair))
948 (obj-label (cdr pair)))
949 (write buf pos obj)
950 (lp (1+ i)
951 (align (+ (byte-length obj) pos) 8)
952 (cons (make-linker-symbol obj-label pos) labels)))
953 (make-object asm name buf '() labels
954 #:flags (match name
955 ('.data (logior SHF_ALLOC SHF_WRITE))
956 ('.rodata SHF_ALLOC))))))))))
957
958 (define (link-constants asm)
959 "Link sections to hold constants needed by the program text emitted
960 using @var{asm}.
961
962 Returns three values: an object for the .rodata section, an object for
963 the .data section, and a label for an initialization procedure. Any of
964 these may be @code{#f}."
965 (define (shareable? x)
966 (cond
967 ((stringbuf? x) #t)
968 ((pair? x)
969 (and (immediate? (car x)) (immediate? (cdr x))))
970 ((vector? x)
971 (let lp ((i 0))
972 (or (= i (vector-length x))
973 (and (immediate? (vector-ref x i))
974 (lp (1+ i))))))
975 (else #f)))
976 (let* ((constants (asm-constants asm))
977 (len (vlist-length constants)))
978 (let lp ((i 0)
979 (ro vlist-null)
980 (rw vlist-null))
981 (if (= i len)
982 (values (link-data asm ro '.rodata)
983 (link-data asm rw '.data)
984 (emit-init-constants asm))
985 (let ((pair (vlist-ref constants i)))
986 (if (shareable? (car pair))
987 (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
988 (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
989
990 \f
991
992 ;;;
993 ;;; Linking program text.
994 ;;;
995
996 (define (process-relocs buf relocs labels)
997 "Patch up internal x8-s24 relocations, and any s32 relocations that
998 reference symbols in the text section. Return a list of linker
999 relocations for references to symbols defined outside the text section."
1000 (fold
1001 (lambda (reloc tail)
1002 (match reloc
1003 ((type label base word)
1004 (let ((abs (assq-ref labels label))
1005 (dst (+ base word)))
1006 (case type
1007 ((s32)
1008 (if abs
1009 (let ((rel (- abs base)))
1010 (s32-set! buf dst rel)
1011 tail)
1012 (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
1013 tail)))
1014 ((x8-s24)
1015 (unless abs
1016 (error "unbound near relocation" reloc))
1017 (let ((rel (- abs base))
1018 (u32 (u32-ref buf dst)))
1019 (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
1020 tail))
1021 (else (error "bad relocation kind" reloc)))))))
1022 '()
1023 relocs))
1024
1025 (define (process-labels labels)
1026 "Define linker symbols for the label-offset pairs in @var{labels}.
1027 The offsets are expected to be expressed in words."
1028 (map (lambda (pair)
1029 (make-linker-symbol (car pair) (* (cdr pair) 4)))
1030 labels))
1031
1032 (define (swap-bytes! buf)
1033 "Patch up the text buffer @var{buf}, swapping the endianness of each
1034 32-bit unit."
1035 (unless (zero? (modulo (bytevector-length buf) 4))
1036 (error "unexpected length"))
1037 (let ((byte-len (bytevector-length buf)))
1038 (let lp ((pos 0))
1039 (unless (= pos byte-len)
1040 (bytevector-u32-set!
1041 buf pos
1042 (bytevector-u32-ref buf pos (endianness big))
1043 (endianness little))
1044 (lp (+ pos 4))))))
1045
1046 (define (link-text-object asm)
1047 "Link the .rtl-text section, swapping the endianness of the bytes if
1048 needed."
1049 (let ((buf (make-u32vector (asm-pos asm))))
1050 (let lp ((pos 0) (prev (reverse (asm-prev asm))))
1051 (if (null? prev)
1052 (let ((byte-size (* (asm-idx asm) 4)))
1053 (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
1054 (unless (eq? (asm-endianness asm) (native-endianness))
1055 (swap-bytes! buf))
1056 (make-object asm '.rtl-text
1057 buf
1058 (process-relocs buf (asm-relocs asm)
1059 (asm-labels asm))
1060 (process-labels (asm-labels asm))))
1061 (let ((len (* *block-size* 4)))
1062 (bytevector-copy! (car prev) 0 buf pos len)
1063 (lp (+ pos len) (cdr prev)))))))
1064
1065
1066 \f
1067
1068 ;;;
1069 ;;; Linking other sections of the ELF file, like the dynamic segment,
1070 ;;; the symbol table, etc.
1071 ;;;
1072
1073 (define (link-dynamic-section asm text rw rw-init)
1074 "Link the dynamic section for an ELF image with RTL text, given the
1075 writable data section @var{rw} needing fixup from the procedure with
1076 label @var{rw-init}. @var{rw-init} may be false. If @var{rw} is true,
1077 it will be added to the GC roots at runtime."
1078 (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
1079 (let* ((endianness (asm-endianness asm))
1080 (bv (make-bytevector (* word-size (if rw (if rw-init 12 10) 6)) 0))
1081 (set-uword!
1082 (lambda (i uword)
1083 (%set-uword! bv (* i word-size) uword endianness)))
1084 (relocs '())
1085 (set-label!
1086 (lambda (i label)
1087 (set! relocs (cons (make-linker-reloc 'reloc-type
1088 (* i word-size) 0 label)
1089 relocs))
1090 (%set-uword! bv (* i word-size) 0 endianness))))
1091 (set-uword! 0 DT_GUILE_RTL_VERSION)
1092 (set-uword! 1 #x02020000)
1093 (set-uword! 2 DT_GUILE_ENTRY)
1094 (set-label! 3 '.rtl-text)
1095 (cond
1096 (rw
1097 ;; Add roots to GC.
1098 (set-uword! 4 DT_GUILE_GC_ROOT)
1099 (set-label! 5 '.data)
1100 (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
1101 (set-uword! 7 (bytevector-length (linker-object-bv rw)))
1102 (cond
1103 (rw-init
1104 (set-uword! 8 DT_INIT) ; constants
1105 (set-label! 9 rw-init)
1106 (set-uword! 10 DT_NULL)
1107 (set-uword! 11 0))
1108 (else
1109 (set-uword! 8 DT_NULL)
1110 (set-uword! 9 0))))
1111 (else
1112 (set-uword! 4 DT_NULL)
1113 (set-uword! 5 0)))
1114 (make-object asm '.dynamic bv relocs '()
1115 #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
1116 (case (asm-word-size asm)
1117 ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
1118 ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
1119 (else (error "bad word size" asm))))
1120
1121 (define (link-shstrtab asm)
1122 "Link the string table for the section headers."
1123 (intern-section-name! asm ".shstrtab")
1124 (make-object asm '.shstrtab
1125 (link-string-table! (asm-shstrtab asm))
1126 '() '()
1127 #:type SHT_STRTAB #:flags 0))
1128
1129 (define (link-symtab text-section asm)
1130 (let* ((endianness (asm-endianness asm))
1131 (word-size (asm-word-size asm))
1132 (size (elf-symbol-len word-size))
1133 (meta (reverse (asm-meta asm)))
1134 (n (length meta))
1135 (strtab (make-string-table))
1136 (bv (make-bytevector (* n size) 0)))
1137 (define (intern-string! name)
1138 (string-table-intern! strtab (if name (symbol->string name) "")))
1139 (for-each
1140 (lambda (meta n)
1141 (let ((name (intern-string! (meta-name meta))))
1142 (write-elf-symbol bv (* n size) endianness word-size
1143 (make-elf-symbol
1144 #:name name
1145 ;; Symbol value and size are measured in
1146 ;; bytes, not u32s.
1147 #:value (* 4 (meta-low-pc meta))
1148 #:size (* 4 (- (meta-high-pc meta)
1149 (meta-low-pc meta)))
1150 #:type STT_FUNC
1151 #:visibility STV_HIDDEN
1152 #:shndx (elf-section-index text-section)))))
1153 meta (iota n))
1154 (let ((strtab (make-object asm '.strtab
1155 (link-string-table! strtab)
1156 '() '()
1157 #:type SHT_STRTAB #:flags 0)))
1158 (values (make-object asm '.symtab
1159 bv
1160 '() '()
1161 #:type SHT_SYMTAB #:flags 0 #:entsize size
1162 #:link (elf-section-index
1163 (linker-object-section strtab)))
1164 strtab))))
1165
1166 ;;; The .guile.arities section describes the arities that a function can
1167 ;;; have. It is in two parts: a sorted array of headers describing
1168 ;;; basic arities, and an array of links out to a string table (and in
1169 ;;; the case of keyword arguments, to the data section) for argument
1170 ;;; names. The whole thing is prefixed by a uint32 indicating the
1171 ;;; offset of the end of the headers array.
1172 ;;;
1173 ;;; The arity headers array is a packed array of structures of the form:
1174 ;;;
1175 ;;; struct arity_header {
1176 ;;; uint32_t low_pc;
1177 ;;; uint32_t high_pc;
1178 ;;; uint32_t offset;
1179 ;;; uint32_t flags;
1180 ;;; uint32_t nreq;
1181 ;;; uint32_t nopt;
1182 ;;; }
1183 ;;;
1184 ;;; All of the offsets and addresses are 32 bits. We can expand in the
1185 ;;; future to use 64-bit offsets if appropriate, but there are other
1186 ;;; aspects of RTL that constrain us to a total image that fits in 32
1187 ;;; bits, so for the moment we'll simplify the problem space.
1188 ;;;
1189 ;;; The following flags values are defined:
1190 ;;;
1191 ;;; #x1: has-rest?
1192 ;;; #x2: allow-other-keys?
1193 ;;; #x4: has-keyword-args?
1194 ;;; #x8: is-case-lambda?
1195 ;;;
1196 ;;; Functions with a single arity specify their number of required and
1197 ;;; optional arguments in nreq and nopt, and do not have the
1198 ;;; is-case-lambda? flag set. Their "offset" member links to an array
1199 ;;; of pointers into the associated .guile.arities.strtab string table,
1200 ;;; identifying the argument names. This offset is relative to the
1201 ;;; start of the .guile.arities section. Links for required arguments
1202 ;;; are first, in order, as uint32 values. Next follow the optionals,
1203 ;;; then the rest link if has-rest? is set, then a link to the "keyword
1204 ;;; indices" literal if has-keyword-args? is set. Unlike the other
1205 ;;; links, the kw-indices link points into the data section, and is
1206 ;;; relative to the ELF image as a whole.
1207 ;;;
1208 ;;; Functions with no arities have no arities information present in the
1209 ;;; .guile.arities section.
1210 ;;;
1211 ;;; Functions with multiple arities are preceded by a header with
1212 ;;; is-case-lambda? set. All other fields are 0, except low-pc and
1213 ;;; high-pc which should be the bounds of the whole function. Headers
1214 ;;; for the individual arities follow. In this way the whole headers
1215 ;;; array is sorted in increasing low-pc order, and case-lambda clauses
1216 ;;; are contained within the [low-pc, high-pc] of the case-lambda
1217 ;;; header.
1218
1219 ;; Length of the prefix to the arities section, in bytes.
1220 (define arities-prefix-len 4)
1221
1222 ;; Length of an arity header, in bytes.
1223 (define arity-header-len (* 6 4))
1224
1225 ;; The offset of "offset" within arity header, in bytes.
1226 (define arity-header-offset-offset (* 2 4))
1227
1228 (define-syntax-rule (pack-arity-flags has-rest? allow-other-keys?
1229 has-keyword-args? is-case-lambda?)
1230 (logior (if has-rest? (ash 1 0) 0)
1231 (if allow-other-keys? (ash 1 1) 0)
1232 (if has-keyword-args? (ash 1 2) 0)
1233 (if is-case-lambda? (ash 1 3) 0)))
1234
1235 (define (meta-arities-size meta)
1236 (define (lambda-size arity)
1237 (+ arity-header-len
1238 (* 4 ;; name pointers
1239 (+ (length (arity-req arity))
1240 (length (arity-opt arity))
1241 (if (arity-rest arity) 1 0)
1242 (if (pair? (arity-kw-indices arity)) 1 0)))))
1243 (define (case-lambda-size arities)
1244 (fold +
1245 arity-header-len ;; case-lambda header
1246 (map lambda-size arities))) ;; the cases
1247 (match (meta-arities meta)
1248 (() 0)
1249 ((arity) (lambda-size arity))
1250 (arities (case-lambda-size arities))))
1251
1252 (define (write-arity-headers metas bv endianness)
1253 (define (write-arity-header* pos low-pc high-pc flags nreq nopt)
1254 (bytevector-u32-set! bv pos low-pc endianness)
1255 (bytevector-u32-set! bv (+ pos 4) high-pc endianness)
1256 (bytevector-u32-set! bv (+ pos 8) 0 endianness) ; offset
1257 (bytevector-u32-set! bv (+ pos 12) flags endianness)
1258 (bytevector-u32-set! bv (+ pos 16) nreq endianness)
1259 (bytevector-u32-set! bv (+ pos 20) nopt endianness))
1260 (define (write-arity-header pos arity)
1261 (write-arity-header* pos (arity-low-pc arity)
1262 (arity-high-pc arity)
1263 (pack-arity-flags (arity-rest arity)
1264 (arity-allow-other-keys? arity)
1265 (pair? (arity-kw-indices arity))
1266 #f)
1267 (length (arity-req arity))
1268 (length (arity-opt arity))))
1269 (let lp ((metas metas) (pos arities-prefix-len) (offsets '()))
1270 (match metas
1271 (()
1272 ;; Fill in the prefix.
1273 (bytevector-u32-set! bv 0 pos endianness)
1274 (values pos (reverse offsets)))
1275 ((meta . metas)
1276 (match (meta-arities meta)
1277 (() (lp metas pos offsets))
1278 ((arity)
1279 (write-arity-header pos arity)
1280 (lp metas
1281 (+ pos arity-header-len)
1282 (acons arity (+ pos arity-header-offset-offset) offsets)))
1283 (arities
1284 ;; Write a case-lambda header, then individual arities.
1285 ;; The case-lambda header's offset link is 0.
1286 (write-arity-header* pos (meta-low-pc meta) (meta-high-pc meta)
1287 (pack-arity-flags #f #f #f #t) 0 0)
1288 (let lp* ((arities arities) (pos (+ pos arity-header-len))
1289 (offsets offsets))
1290 (match arities
1291 (() (lp metas pos offsets))
1292 ((arity . arities)
1293 (write-arity-header pos arity)
1294 (lp* arities
1295 (+ pos arity-header-len)
1296 (acons arity
1297 (+ pos arity-header-offset-offset)
1298 offsets)))))))))))
1299
1300 (define (write-arity-links asm bv pos arity-offset-pairs strtab)
1301 (define (write-symbol sym pos)
1302 (bytevector-u32-set! bv pos
1303 (string-table-intern! strtab (symbol->string sym))
1304 (asm-endianness asm))
1305 (+ pos 4))
1306 (define (write-kw-indices pos kw-indices)
1307 ;; FIXME: Assert that kw-indices is already interned.
1308 (make-linker-reloc 'abs32/1 pos 0
1309 (intern-constant asm kw-indices)))
1310 (let lp ((pos pos) (pairs arity-offset-pairs) (relocs '()))
1311 (match pairs
1312 (()
1313 (unless (= pos (bytevector-length bv))
1314 (error "expected to fully fill the bytevector"
1315 pos (bytevector-length bv)))
1316 relocs)
1317 (((arity . offset) . pairs)
1318 (bytevector-u32-set! bv offset pos (asm-endianness asm))
1319 (let ((pos (fold write-symbol
1320 pos
1321 (append (arity-req arity)
1322 (arity-opt arity)
1323 (cond
1324 ((arity-rest arity) => list)
1325 (else '()))))))
1326 (match (arity-kw-indices arity)
1327 (() (lp pos pairs relocs))
1328 (kw-indices
1329 (lp (+ pos 4)
1330 pairs
1331 (cons (write-kw-indices pos kw-indices) relocs)))))))))
1332
1333 (define (link-arities asm)
1334 (let* ((endianness (asm-endianness asm))
1335 (metas (reverse (asm-meta asm)))
1336 (size (fold (lambda (meta size)
1337 (+ size (meta-arities-size meta)))
1338 arities-prefix-len
1339 metas))
1340 (strtab (make-string-table))
1341 (bv (make-bytevector size 0)))
1342 (let ((kw-indices-relocs
1343 (call-with-values
1344 (lambda ()
1345 (write-arity-headers metas bv endianness))
1346 (lambda (pos arity-offset-pairs)
1347 (write-arity-links asm bv pos arity-offset-pairs strtab)))))
1348 (let ((strtab (make-object asm '.guile.arities.strtab
1349 (link-string-table! strtab)
1350 '() '()
1351 #:type SHT_STRTAB #:flags 0)))
1352 (values (make-object asm '.guile.arities
1353 bv
1354 kw-indices-relocs '()
1355 #:type SHT_PROGBITS #:flags 0
1356 #:link (elf-section-index
1357 (linker-object-section strtab)))
1358 strtab)))))
1359
1360 ;;;
1361 ;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
1362 ;;; values. Pc and str are both 32 bits wide. (Either could change to
1363 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1364 ;;; entry to a program, relative to the start of the text section, and
1365 ;;; str is an index into the associated .guile.docstrs.strtab string
1366 ;;; table section.
1367 ;;;
1368
1369 ;; The size of a docstrs entry, in bytes.
1370 (define docstr-size 8)
1371
1372 (define (link-docstrs asm)
1373 (define (find-docstrings)
1374 (filter-map (lambda (meta)
1375 (define (is-documentation? pair)
1376 (eq? (car pair) 'documentation))
1377 (let* ((props (meta-properties meta))
1378 (tail (find-tail is-documentation? props)))
1379 (and tail
1380 (not (find-tail is-documentation? (cdr tail)))
1381 (string? (cdar tail))
1382 (cons (meta-low-pc meta) (cdar tail)))))
1383 (reverse (asm-meta asm))))
1384 (let* ((endianness (asm-endianness asm))
1385 (docstrings (find-docstrings))
1386 (strtab (make-string-table))
1387 (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
1388 (fold (lambda (pair pos)
1389 (match pair
1390 ((pc . string)
1391 (bytevector-u32-set! bv pos pc endianness)
1392 (bytevector-u32-set! bv (+ pos 4)
1393 (string-table-intern! strtab string)
1394 endianness)
1395 (+ pos docstr-size))))
1396 0
1397 docstrings)
1398 (let ((strtab (make-object asm '.guile.docstrs.strtab
1399 (link-string-table! strtab)
1400 '() '()
1401 #:type SHT_STRTAB #:flags 0)))
1402 (values (make-object asm '.guile.docstrs
1403 bv
1404 '() '()
1405 #:type SHT_PROGBITS #:flags 0
1406 #:link (elf-section-index
1407 (linker-object-section strtab)))
1408 strtab))))
1409
1410 ;;;
1411 ;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
1412 ;;; values. Pc and addr are both 32 bits wide. (Either could change to
1413 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1414 ;;; entry to a program, relative to the start of the text section, and
1415 ;;; addr is the address of the associated properties alist, relative to
1416 ;;; the start of the ELF image.
1417 ;;;
1418 ;;; Since procedure properties are stored in the data sections, we need
1419 ;;; to link the procedures property section first. (Note that this
1420 ;;; constraint does not apply to the arities section, which may
1421 ;;; reference the data sections via the kw-indices literal, because
1422 ;;; assembling the text section already makes sure that the kw-indices
1423 ;;; are interned.)
1424 ;;;
1425
1426 ;; The size of a procprops entry, in bytes.
1427 (define procprops-size 8)
1428
1429 (define (link-procprops asm)
1430 (define (assoc-remove-one alist key value-pred)
1431 (match alist
1432 (() '())
1433 ((((? (lambda (x) (eq? x key))) . value) . alist)
1434 (if (value-pred value)
1435 alist
1436 (acons key value alist)))
1437 (((k . v) . alist)
1438 (acons k v (assoc-remove-one alist key value-pred)))))
1439 (define (props-without-name-or-docstring meta)
1440 (assoc-remove-one
1441 (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
1442 'documentation
1443 string?))
1444 (define (find-procprops)
1445 (filter-map (lambda (meta)
1446 (let ((props (props-without-name-or-docstring meta)))
1447 (and (pair? props)
1448 (cons (meta-low-pc meta) props))))
1449 (reverse (asm-meta asm))))
1450 (let* ((endianness (asm-endianness asm))
1451 (procprops (find-procprops))
1452 (bv (make-bytevector (* (length procprops) procprops-size) 0)))
1453 (let lp ((procprops procprops) (pos 0) (relocs '()))
1454 (match procprops
1455 (()
1456 (make-object asm '.guile.procprops
1457 bv
1458 relocs '()
1459 #:type SHT_PROGBITS #:flags 0))
1460 (((pc . props) . procprops)
1461 (bytevector-u32-set! bv pos pc endianness)
1462 (lp procprops
1463 (+ pos procprops-size)
1464 (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
1465 (intern-constant asm props))
1466 relocs)))))))
1467
1468 (define (link-objects asm)
1469 (let*-values (;; Link procprops before constants, because it probably
1470 ;; interns more constants.
1471 ((procprops) (link-procprops asm))
1472 ((ro rw rw-init) (link-constants asm))
1473 ;; Link text object after constants, so that the
1474 ;; constants initializer gets included.
1475 ((text) (link-text-object asm))
1476 ((dt) (link-dynamic-section asm text rw rw-init))
1477 ((symtab strtab) (link-symtab (linker-object-section text) asm))
1478 ((arities arities-strtab) (link-arities asm))
1479 ((docstrs docstrs-strtab) (link-docstrs asm))
1480 ;; This needs to be linked last, because linking other
1481 ;; sections adds entries to the string table.
1482 ((shstrtab) (link-shstrtab asm)))
1483 (filter identity
1484 (list text ro rw dt symtab strtab arities arities-strtab
1485 docstrs docstrs-strtab procprops shstrtab))))
1486
1487
1488 \f
1489
1490 ;;;
1491 ;;; High-level public interfaces.
1492 ;;;
1493
1494 (define* (link-assembly asm #:key (page-aligned? #t))
1495 "Produce an ELF image from the code and data emitted into @var{asm}.
1496 The result is a bytevector, by default linked so that read-only and
1497 writable data are on separate pages. Pass @code{#:page-aligned? #f} to
1498 disable this behavior."
1499 (link-elf (link-objects asm) #:page-aligned? page-aligned?))
1500
1501 (define (assemble-program instructions)
1502 "Take the sequence of instructions @var{instructions}, assemble them
1503 into RTL code, link an image, and load that image from memory. Returns
1504 a procedure."
1505 (let ((asm (make-assembler)))
1506 (emit-text asm instructions)
1507 (load-thunk-from-memory (link-assembly asm #:page-aligned? #f))))