DWARF linker: encode strings using the correct form
[bpt/guile.git] / module / system / vm / assembler.scm
1 ;;; Guile RTL assembler
2
3 ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
4 ;;;
5 ;;; This library is free software; you can redistribute it and/or
6 ;;; modify it under the terms of the GNU Lesser General Public
7 ;;; License as published by the Free Software Foundation; either
8 ;;; version 3 of the License, or (at your option) any later version.
9 ;;;
10 ;;; This library is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;; Lesser General Public License for more details.
14 ;;;
15 ;;; You should have received a copy of the GNU Lesser General Public
16 ;;; License along with this library; if not, write to the Free Software
17 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Commentary:
20 ;;;
21 ;;; This module implements an assembler that creates an ELF image from
22 ;;; RTL assembly and macro-assembly. The input can be given in
23 ;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
24 ;;; procedural interface, the emit-OP procedures, but that is not
25 ;;; currently exported.
26 ;;;
27 ;;; "Primitive instructions" correspond to RTL VM operations.
28 ;;; Assemblers for primitive instructions are generated programmatically
29 ;;; from (rtl-instruction-list), which itself is derived from the VM
30 ;;; sources. There are also "macro-instructions" like "label" or
31 ;;; "load-constant" that expand to 0 or more primitive instructions.
32 ;;;
33 ;;; The assembler also handles some higher-level tasks, like creating
34 ;;; the symbol table, other metadata sections, creating a constant table
35 ;;; for the whole compilation unit, and writing the dynamic section of
36 ;;; the ELF file along with the appropriate initialization routines.
37 ;;;
38 ;;; Most compilers will want to use the trio of make-assembler,
39 ;;; emit-text, and link-assembly. That will result in the creation of
40 ;;; an ELF image as a bytevector, which can then be loaded using
41 ;;; load-thunk-from-memory, or written to disk as a .go file.
42 ;;;
43 ;;; Code:
44
45 (define-module (system vm assembler)
46 #:use-module (system base target)
47 #:use-module (system vm instruction)
48 #:use-module (system vm dwarf)
49 #:use-module (system vm elf)
50 #:use-module (system vm linker)
51 #:use-module (system vm objcode)
52 #:use-module (rnrs bytevectors)
53 #:use-module (ice-9 binary-ports)
54 #:use-module (ice-9 vlist)
55 #:use-module (ice-9 match)
56 #:use-module (srfi srfi-1)
57 #:use-module (srfi srfi-4)
58 #:use-module (srfi srfi-9)
59 #:use-module (srfi srfi-11)
60 #:export (make-assembler
61 emit-text
62 link-assembly
63 assemble-program))
64
65
66 \f
67
68 ;;; RTL code consists of 32-bit units, often subdivided in some way.
69 ;;; These helpers create one 32-bit unit from multiple components.
70
71 (define-syntax-rule (pack-u8-u24 x y)
72 (logior x (ash y 8)))
73
74 (define-syntax-rule (pack-u8-s24 x y)
75 (logior x (ash (cond
76 ((< 0 (- y) #x800000)
77 (+ y #x1000000))
78 ((<= 0 y #xffffff)
79 y)
80 (else (error "out of range" y)))
81 8)))
82
83 (define-syntax-rule (pack-u1-u7-u24 x y z)
84 (logior x (ash y 1) (ash z 8)))
85
86 (define-syntax-rule (pack-u8-u12-u12 x y z)
87 (logior x (ash y 8) (ash z 20)))
88
89 (define-syntax-rule (pack-u8-u8-u16 x y z)
90 (logior x (ash y 8) (ash z 16)))
91
92 (define-syntax-rule (pack-u8-u8-u8-u8 x y z w)
93 (logior x (ash y 8) (ash z 16) (ash w 24)))
94
95 (define-syntax pack-flags
96 (syntax-rules ()
97 ;; Add clauses as needed.
98 ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
99 (if f2 (ash 2 0) 0)))))
100
101 ;;; Helpers to read and write 32-bit units in a buffer.
102
103 (define-syntax-rule (u32-ref buf n)
104 (bytevector-u32-native-ref buf (* n 4)))
105
106 (define-syntax-rule (u32-set! buf n val)
107 (bytevector-u32-native-set! buf (* n 4) val))
108
109 (define-syntax-rule (s32-ref buf n)
110 (bytevector-s32-native-ref buf (* n 4)))
111
112 (define-syntax-rule (s32-set! buf n val)
113 (bytevector-s32-native-set! buf (* n 4) val))
114
115
116 \f
117
118 ;;; A <meta> entry collects metadata for one procedure. Procedures are
119 ;;; written as contiguous ranges of RTL code.
120 ;;;
121 (define-syntax-rule (assert-match arg pattern kind)
122 (let ((x arg))
123 (unless (match x (pattern #t) (_ #f))
124 (error (string-append "expected " kind) x))))
125
126 (define-record-type <meta>
127 (%make-meta label properties low-pc high-pc arities)
128 meta?
129 (label meta-label)
130 (properties meta-properties set-meta-properties!)
131 (low-pc meta-low-pc)
132 (high-pc meta-high-pc set-meta-high-pc!)
133 (arities meta-arities set-meta-arities!))
134
135 (define (make-meta label properties low-pc)
136 (assert-match label (? symbol?) "symbol")
137 (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
138 (%make-meta label properties low-pc #f '()))
139
140 (define (meta-name meta)
141 (assq-ref (meta-properties meta) 'name))
142
143 ;; Metadata for one <lambda-case>.
144 (define-record-type <arity>
145 (make-arity req opt rest kw-indices allow-other-keys?
146 low-pc high-pc)
147 arity?
148 (req arity-req)
149 (opt arity-opt)
150 (rest arity-rest)
151 (kw-indices arity-kw-indices)
152 (allow-other-keys? arity-allow-other-keys?)
153 (low-pc arity-low-pc)
154 (high-pc arity-high-pc set-arity-high-pc!))
155
156 (define-syntax *block-size* (identifier-syntax 32))
157
158 ;;; An assembler collects all of the words emitted during assembly, and
159 ;;; also maintains ancillary information such as the constant table, a
160 ;;; relocation list, and so on.
161 ;;;
162 ;;; RTL code consists of 32-bit units. We emit RTL code using native
163 ;;; endianness. If we're targeting a foreign endianness, we byte-swap
164 ;;; the bytevector as a whole instead of conditionalizing each access.
165 ;;;
166 (define-record-type <asm>
167 (make-asm cur idx start prev written
168 labels relocs
169 word-size endianness
170 constants inits
171 shstrtab next-section-number
172 meta)
173 asm?
174
175 ;; We write RTL code into what is logically a growable vector,
176 ;; implemented as a list of blocks. asm-cur is the current block, and
177 ;; asm-idx is the current index into that block, in 32-bit units.
178 ;;
179 (cur asm-cur set-asm-cur!)
180 (idx asm-idx set-asm-idx!)
181
182 ;; asm-start is an absolute position, indicating the offset of the
183 ;; beginning of an instruction (in u32 units). It is updated after
184 ;; writing all the words for one primitive instruction. It models the
185 ;; position of the instruction pointer during execution, given that
186 ;; the RTL VM updates the IP only at the end of executing the
187 ;; instruction, and is thus useful for computing offsets between two
188 ;; points in a program.
189 ;;
190 (start asm-start set-asm-start!)
191
192 ;; The list of previously written blocks.
193 ;;
194 (prev asm-prev set-asm-prev!)
195
196 ;; The number of u32 words written in asm-prev, which is the same as
197 ;; the offset of the current block.
198 ;;
199 (written asm-written set-asm-written!)
200
201 ;; An alist of symbol -> position pairs, indicating the labels defined
202 ;; in this compilation unit.
203 ;;
204 (labels asm-labels set-asm-labels!)
205
206 ;; A list of relocations needed by the program text. We use an
207 ;; internal representation for relocations, and handle textualn
208 ;; relative relocations in the assembler. Other kinds of relocations
209 ;; are later reified as linker relocations and resolved by the linker.
210 ;;
211 (relocs asm-relocs set-asm-relocs!)
212
213 ;; Target information.
214 ;;
215 (word-size asm-word-size)
216 (endianness asm-endianness)
217
218 ;; The constant table, as a vhash of object -> label. All constants
219 ;; get de-duplicated and written into separate sections -- either the
220 ;; .rodata section, for read-only data, or .data, for constants that
221 ;; need initialization at load-time (like symbols). Constants can
222 ;; depend on other constants (e.g. a symbol depending on a stringbuf),
223 ;; so order in this table is important.
224 ;;
225 (constants asm-constants set-asm-constants!)
226
227 ;; A list of RTL instructions needed to initialize the constants.
228 ;; Will run in a thunk with 2 local variables.
229 ;;
230 (inits asm-inits set-asm-inits!)
231
232 ;; The shstrtab, for section names.
233 ;;
234 (shstrtab asm-shstrtab set-asm-shstrtab!)
235
236 ;; The section number for the next section to be written.
237 ;;
238 (next-section-number asm-next-section-number set-asm-next-section-number!)
239
240 ;; A list of <meta>, corresponding to procedure metadata.
241 ;;
242 (meta asm-meta set-asm-meta!))
243
244 (define-inlinable (fresh-block)
245 (make-u32vector *block-size*))
246
247 (define* (make-assembler #:key (word-size (target-word-size))
248 (endianness (target-endianness)))
249 "Create an assembler for a given target @var{word-size} and
250 @var{endianness}, falling back to appropriate values for the configured
251 target."
252 (make-asm (fresh-block) 0 0 '() 0
253 '() '()
254 word-size endianness
255 vlist-null '()
256 (make-string-table) 1
257 '()))
258
259 (define (intern-section-name! asm string)
260 "Add a string to the section name table (shstrtab)."
261 (string-table-intern! (asm-shstrtab asm) string))
262
263 (define-inlinable (asm-pos asm)
264 "The offset of the next word to be written into the code buffer, in
265 32-bit units."
266 (+ (asm-idx asm) (asm-written asm)))
267
268 (define (allocate-new-block asm)
269 "Close off the current block, and arrange for the next word to be
270 written to a fresh block."
271 (let ((new (fresh-block)))
272 (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
273 (set-asm-written! asm (asm-pos asm))
274 (set-asm-cur! asm new)
275 (set-asm-idx! asm 0)))
276
277 (define-inlinable (emit asm u32)
278 "Emit one 32-bit word into the instruction stream. Assumes that there
279 is space for the word, and ensures that there is space for the next
280 word."
281 (u32-set! (asm-cur asm) (asm-idx asm) u32)
282 (set-asm-idx! asm (1+ (asm-idx asm)))
283 (if (= (asm-idx asm) *block-size*)
284 (allocate-new-block asm)))
285
286 (define-inlinable (make-reloc type label base word)
287 "Make an internal relocation of type @var{type} referencing symbol
288 @var{label}, @var{word} words after position @var{start}. @var{type}
289 may be x8-s24, indicating a 24-bit relative label reference that can be
290 fixed up by the assembler, or s32, indicating a 32-bit relative
291 reference that needs to be fixed up by the linker."
292 (list type label base word))
293
294 (define-inlinable (reset-asm-start! asm)
295 "Reset the asm-start after writing the words for one instruction."
296 (set-asm-start! asm (asm-pos asm)))
297
298 (define (emit-exported-label asm label)
299 "Define a linker symbol associating @var{label} with the current
300 asm-start."
301 (set-asm-labels! asm (acons label (asm-start asm) (asm-labels asm))))
302
303 (define (record-label-reference asm label)
304 "Record an x8-s24 local label reference. This value will get patched
305 up later by the assembler."
306 (let* ((start (asm-start asm))
307 (pos (asm-pos asm))
308 (reloc (make-reloc 'x8-s24 label start (- pos start))))
309 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
310
311 (define* (record-far-label-reference asm label #:optional (offset 0))
312 "Record an s32 far label reference. This value will get patched up
313 later by the linker."
314 (let* ((start (- (asm-start asm) offset))
315 (pos (asm-pos asm))
316 (reloc (make-reloc 's32 label start (- pos start))))
317 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
318
319
320 \f
321
322 ;;;
323 ;;; Primitive assemblers are defined by expanding `assembler' for each
324 ;;; opcode in `(rtl-instruction-list)'.
325 ;;;
326
327 (eval-when (expand compile load eval)
328 (define (id-append ctx a b)
329 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
330
331 (define-syntax assembler
332 (lambda (x)
333 (define-syntax op-case
334 (lambda (x)
335 (syntax-case x ()
336 ((_ asm name ((type arg ...) code ...) clause ...)
337 #`(if (eq? name 'type)
338 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
339 #'((arg ...)
340 code ...))
341 (op-case asm name clause ...)))
342 ((_ asm name)
343 #'(error "unmatched name" name)))))
344
345 (define (pack-first-word asm opcode type)
346 (with-syntax ((opcode opcode))
347 (op-case
348 asm type
349 ((U8_X24)
350 (emit asm opcode))
351 ((U8_U24 arg)
352 (emit asm (pack-u8-u24 opcode arg)))
353 ((U8_L24 label)
354 (record-label-reference asm label)
355 (emit asm opcode))
356 ((U8_U8_I16 a imm)
357 (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
358 ((U8_U12_U12 a b)
359 (emit asm (pack-u8-u12-u12 opcode a b)))
360 ((U8_U8_U8_U8 a b c)
361 (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
362
363 (define (pack-tail-word asm type)
364 (op-case
365 asm type
366 ((U8_U24 a b)
367 (emit asm (pack-u8-u24 a b)))
368 ((U8_L24 a label)
369 (record-label-reference asm label)
370 (emit asm a))
371 ((U8_U8_I16 a b imm)
372 (emit asm (pack-u8-u8-u16 a b (object-address imm))))
373 ((U8_U12_U12 a b)
374 (emit asm (pack-u8-u12-u12 a b c)))
375 ((U8_U8_U8_U8 a b c d)
376 (emit asm (pack-u8-u8-u8-u8 a b c d)))
377 ((U32 a)
378 (emit asm a))
379 ((I32 imm)
380 (let ((val (object-address imm)))
381 (unless (zero? (ash val -32))
382 (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
383 (emit asm val)))
384 ((A32 imm)
385 (unless (= (asm-word-size asm) 8)
386 (error "make-long-immediate unavailable for this target"))
387 (emit asm (ash (object-address imm) -32))
388 (emit asm (logand (object-address imm) (1- (ash 1 32)))))
389 ((B32))
390 ((N32 label)
391 (record-far-label-reference asm label)
392 (emit asm 0))
393 ((S32 label)
394 (record-far-label-reference asm label)
395 (emit asm 0))
396 ((L32 label)
397 (record-far-label-reference asm label)
398 (emit asm 0))
399 ((LO32 label offset)
400 (record-far-label-reference asm label
401 (* offset (/ (asm-word-size asm) 4)))
402 (emit asm 0))
403 ((X8_U24 a)
404 (emit asm (pack-u8-u24 0 a)))
405 ((X8_U12_U12 a b)
406 (emit asm (pack-u8-u12-u12 0 a b)))
407 ((X8_L24 label)
408 (record-label-reference asm label)
409 (emit asm 0))
410 ((B1_X7_L24 a label)
411 (record-label-reference asm label)
412 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
413 ((B1_U7_L24 a b label)
414 (record-label-reference asm label)
415 (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))
416 ((B1_X31 a)
417 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
418 ((B1_X7_U24 a b)
419 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 b)))))
420
421 (syntax-case x ()
422 ((_ name opcode word0 word* ...)
423 (with-syntax ((((formal0 ...)
424 code0 ...)
425 (pack-first-word #'asm
426 (syntax->datum #'opcode)
427 (syntax->datum #'word0)))
428 ((((formal* ...)
429 code* ...) ...)
430 (map (lambda (word) (pack-tail-word #'asm word))
431 (syntax->datum #'(word* ...)))))
432 #'(lambda (asm formal0 ... formal* ... ...)
433 (unless (asm? asm) (error "not an asm"))
434 code0 ...
435 code* ... ...
436 (reset-asm-start! asm)))))))
437
438 (define assemblers (make-hash-table))
439
440 (define-syntax define-assembler
441 (lambda (x)
442 (syntax-case x ()
443 ((_ name opcode kind arg ...)
444 (with-syntax ((emit (id-append #'name #'emit- #'name)))
445 #'(begin
446 (define emit
447 (let ((emit (assembler name opcode arg ...)))
448 (hashq-set! assemblers 'name emit)
449 emit))
450 (export emit)))))))
451
452 (define-syntax visit-opcodes
453 (lambda (x)
454 (syntax-case x ()
455 ((visit-opcodes macro arg ...)
456 (with-syntax (((inst ...)
457 (map (lambda (x) (datum->syntax #'macro x))
458 (rtl-instruction-list))))
459 #'(begin
460 (macro arg ... . inst)
461 ...))))))
462
463 (visit-opcodes define-assembler)
464
465 (define (emit-text asm instructions)
466 "Assemble @var{instructions} using the assembler @var{asm}.
467 @var{instructions} is a sequence of RTL instructions, expressed as a
468 list of lists. This procedure can be called many times before calling
469 @code{link-assembly}."
470 (for-each (lambda (inst)
471 (apply (or (hashq-ref assemblers (car inst))
472 (error 'bad-instruction inst))
473 asm
474 (cdr inst)))
475 instructions))
476
477 \f
478
479 ;;;
480 ;;; The constant table records a topologically sorted set of literal
481 ;;; constants used by a program. For example, a pair uses its car and
482 ;;; cdr, a string uses its stringbuf, etc.
483 ;;;
484 ;;; Some things we want to add to the constant table are not actually
485 ;;; Scheme objects: for example, stringbufs, cache cells for toplevel
486 ;;; references, or cache cells for non-closure procedures. For these we
487 ;;; define special record types and add instances of those record types
488 ;;; to the table.
489 ;;;
490
491 (define-inlinable (immediate? x)
492 "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
493 (not (zero? (logand (object-address x) 6))))
494
495 (define-record-type <stringbuf>
496 (make-stringbuf string)
497 stringbuf?
498 (string stringbuf-string))
499
500 (define-record-type <static-procedure>
501 (make-static-procedure code)
502 static-procedure?
503 (code static-procedure-code))
504
505 (define-record-type <cache-cell>
506 (make-cache-cell scope key)
507 cache-cell?
508 (scope cache-cell-scope)
509 (key cache-cell-key))
510
511 (define (statically-allocatable? x)
512 "Return @code{#t} if a non-immediate constant can be allocated
513 statically, and @code{#f} if it would need some kind of runtime
514 allocation."
515 (or (pair? x) (vector? x) (string? x) (stringbuf? x) (static-procedure? x)))
516
517 (define (intern-constant asm obj)
518 "Add an object to the constant table, and return a label that can be
519 used to reference it. If the object is already present in the constant
520 table, its existing label is used directly."
521 (define (recur obj)
522 (intern-constant asm obj))
523 (define (field dst n obj)
524 (let ((src (recur obj)))
525 (if src
526 (list (if (statically-allocatable? obj)
527 `(make-non-immediate 1 ,src)
528 `(static-ref 1 ,src))
529 `(static-set! 1 ,dst ,n))
530 '())))
531 (define (intern obj label)
532 (cond
533 ((pair? obj)
534 (append (field label 0 (car obj))
535 (field label 1 (cdr obj))))
536 ((vector? obj)
537 (let lp ((i 0) (inits '()))
538 (if (< i (vector-length obj))
539 (lp (1+ i)
540 (append-reverse (field label (1+ i) (vector-ref obj i))
541 inits))
542 (reverse inits))))
543 ((stringbuf? obj) '())
544 ((static-procedure? obj)
545 `((make-non-immediate 1 ,label)
546 (link-procedure! 1 ,(static-procedure-code obj))))
547 ((cache-cell? obj) '())
548 ((symbol? obj)
549 `((make-non-immediate 1 ,(recur (symbol->string obj)))
550 (string->symbol 1 1)
551 (static-set! 1 ,label 0)))
552 ((string? obj)
553 `((make-non-immediate 1 ,(recur (make-stringbuf obj)))
554 (static-set! 1 ,label 1)))
555 ((keyword? obj)
556 `((static-ref 1 ,(recur (keyword->symbol obj)))
557 (symbol->keyword 1 1)
558 (static-set! 1 ,label 0)))
559 ((number? obj)
560 `((make-non-immediate 1 ,(recur (number->string obj)))
561 (string->number 1 1)
562 (static-set! 1 ,label 0)))
563 (else
564 (error "don't know how to intern" obj))))
565 (cond
566 ((immediate? obj) #f)
567 ((vhash-assoc obj (asm-constants asm)) => cdr)
568 (else
569 ;; Note that calling intern may mutate asm-constants and
570 ;; asm-constant-inits.
571 (let* ((label (gensym "constant"))
572 (inits (intern obj label)))
573 (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
574 (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
575 label))))
576
577 (define (intern-non-immediate asm obj)
578 "Intern a non-immediate into the constant table, and return its
579 label."
580 (when (immediate? obj)
581 (error "expected a non-immediate" obj))
582 (intern-constant asm obj))
583
584 (define (intern-cache-cell asm scope key)
585 "Intern a cache cell into the constant table, and return its label.
586 If there is already a cache cell with the given scope and key, it is
587 returned instead."
588 (intern-constant asm (make-cache-cell scope key)))
589
590 ;; Return the label of the cell that holds the module for a scope.
591 (define (intern-module-cache-cell asm scope)
592 "Intern a cache cell for a module, and return its label."
593 (intern-cache-cell asm scope #t))
594
595
596 \f
597
598 ;;;
599 ;;; Macro assemblers bridge the gap between primitive instructions and
600 ;;; some higher-level operations.
601 ;;;
602
603 (define-syntax define-macro-assembler
604 (lambda (x)
605 (syntax-case x ()
606 ((_ (name arg ...) body body* ...)
607 (with-syntax ((emit (id-append #'name #'emit- #'name)))
608 #'(begin
609 (define emit
610 (let ((emit (lambda (arg ...) body body* ...)))
611 (hashq-set! assemblers 'name emit)
612 emit))
613 (export emit)))))))
614
615 (define-macro-assembler (load-constant asm dst obj)
616 (cond
617 ((immediate? obj)
618 (let ((bits (object-address obj)))
619 (cond
620 ((and (< dst 256) (zero? (ash bits -16)))
621 (emit-make-short-immediate asm dst obj))
622 ((zero? (ash bits -32))
623 (emit-make-long-immediate asm dst obj))
624 (else
625 (emit-make-long-long-immediate asm dst obj)))))
626 ((statically-allocatable? obj)
627 (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
628 (else
629 (emit-static-ref asm dst (intern-non-immediate asm obj)))))
630
631 (define-macro-assembler (load-static-procedure asm dst label)
632 (let ((loc (intern-constant asm (make-static-procedure label))))
633 (emit-make-non-immediate asm dst loc)))
634
635 (define-macro-assembler (begin-program asm label properties)
636 (emit-label asm label)
637 (let ((meta (make-meta label properties (asm-start asm))))
638 (set-asm-meta! asm (cons meta (asm-meta asm)))))
639
640 (define-macro-assembler (end-program asm)
641 (let ((meta (car (asm-meta asm))))
642 (set-meta-high-pc! meta (asm-start asm))
643 (set-meta-arities! meta (reverse (meta-arities meta)))))
644
645 (define-macro-assembler (begin-standard-arity asm req nlocals alternate)
646 (emit-begin-opt-arity asm req '() #f nlocals alternate))
647
648 (define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
649 (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
650
651 (define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
652 allow-other-keys? nlocals alternate)
653 (assert-match req ((? symbol?) ...) "list of symbols")
654 (assert-match opt ((? symbol?) ...) "list of symbols")
655 (assert-match rest (or #f (? symbol?)) "#f or symbol")
656 (assert-match kw-indices (((? symbol?) . (? integer?)) ...)
657 "alist of symbol -> integer")
658 (assert-match allow-other-keys? (? boolean?) "boolean")
659 (assert-match nlocals (? integer?) "integer")
660 (assert-match alternate (or #f (? symbol?)) "#f or symbol")
661 (let* ((meta (car (asm-meta asm)))
662 (arity (make-arity req opt rest kw-indices allow-other-keys?
663 (asm-start asm) #f))
664 ;; The procedure itself is in slot 0, in the standard calling
665 ;; convention. For procedure prologues, nreq includes the
666 ;; procedure, so here we add 1.
667 (nreq (1+ (length req)))
668 (nopt (length opt))
669 (rest? (->bool rest)))
670 (set-meta-arities! meta (cons arity (meta-arities meta)))
671 (cond
672 ((or allow-other-keys? (pair? kw-indices))
673 (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
674 nlocals alternate))
675 ((or rest? (pair? opt))
676 (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
677 (else
678 (emit-standard-prelude asm nreq nlocals alternate)))))
679
680 (define-macro-assembler (end-arity asm)
681 (let ((arity (car (meta-arities (car (asm-meta asm))))))
682 (set-arity-high-pc! arity (asm-start asm))))
683
684 (define-macro-assembler (standard-prelude asm nreq nlocals alternate)
685 (cond
686 (alternate
687 (emit-br-if-nargs-ne asm nreq alternate)
688 (emit-alloc-frame asm nlocals))
689 ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
690 (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
691 (else
692 (emit-assert-nargs-ee asm nreq)
693 (emit-alloc-frame asm nlocals))))
694
695 (define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
696 (if alternate
697 (emit-br-if-nargs-lt asm nreq alternate)
698 (emit-assert-nargs-ge asm nreq))
699 (cond
700 (rest?
701 (emit-bind-rest asm (+ nreq nopt)))
702 (alternate
703 (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
704 (else
705 (emit-assert-nargs-le asm (+ nreq nopt))))
706 (emit-alloc-frame asm nlocals))
707
708 (define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
709 allow-other-keys? nlocals alternate)
710 (if alternate
711 (emit-br-if-nargs-lt asm nreq alternate)
712 (emit-assert-nargs-ge asm nreq))
713 (let ((ntotal (fold (lambda (kw ntotal)
714 (match kw
715 (((? keyword?) . idx)
716 (max (1+ idx) ntotal))))
717 (+ nreq nopt) kw-indices)))
718 ;; FIXME: port 581f410f
719 (emit-bind-kwargs asm nreq
720 (pack-flags allow-other-keys? rest?)
721 (+ nreq nopt)
722 ntotal
723 kw-indices)
724 (emit-alloc-frame asm nlocals)))
725
726 (define-macro-assembler (label asm sym)
727 (set-asm-labels! asm (acons sym (asm-start asm) (asm-labels asm))))
728
729 (define-macro-assembler (cache-current-module! asm module scope)
730 (let ((mod-label (intern-module-cache-cell asm scope)))
731 (emit-static-set! asm module mod-label 0)))
732
733 (define-macro-assembler (cached-toplevel-box asm dst scope sym bound?)
734 (let ((sym-label (intern-non-immediate asm sym))
735 (mod-label (intern-module-cache-cell asm scope))
736 (cell-label (intern-cache-cell asm scope sym)))
737 (emit-toplevel-box asm dst cell-label mod-label sym-label bound?)))
738
739 (define-macro-assembler (cached-module-box asm dst module-name sym public? bound?)
740 (let* ((sym-label (intern-non-immediate asm sym))
741 (key (cons public? module-name))
742 (mod-name-label (intern-constant asm key))
743 (cell-label (intern-cache-cell asm key sym)))
744 (emit-module-box asm dst cell-label mod-name-label sym-label bound?)))
745
746
747 \f
748
749 ;;;
750 ;;; Helper for linking objects.
751 ;;;
752
753 (define (make-object asm name bv relocs labels . kwargs)
754 "Make a linker object. This helper handles interning the name in the
755 shstrtab, assigning the size, allocating a fresh index, and defining a
756 corresponding linker symbol for the start of the section."
757 (let ((name-idx (intern-section-name! asm (symbol->string name)))
758 (index (asm-next-section-number asm)))
759 (set-asm-next-section-number! asm (1+ index))
760 (make-linker-object (apply make-elf-section
761 #:index index
762 #:name name-idx
763 #:size (bytevector-length bv)
764 kwargs)
765 bv relocs
766 (cons (make-linker-symbol name 0) labels))))
767
768
769 \f
770
771 ;;;
772 ;;; Linking the constant table. This code is somewhat intertwingled
773 ;;; with the intern-constant code above, as that procedure also
774 ;;; residualizes instructions to initialize constants at load time.
775 ;;;
776
777 (define (write-immediate asm buf pos x)
778 (let ((val (object-address x))
779 (endianness (asm-endianness asm)))
780 (case (asm-word-size asm)
781 ((4) (bytevector-u32-set! buf pos val endianness))
782 ((8) (bytevector-u64-set! buf pos val endianness))
783 (else (error "bad word size" asm)))))
784
785 (define (emit-init-constants asm)
786 "If there is writable data that needs initialization at runtime, emit
787 a procedure to do that and return its label. Otherwise return
788 @code{#f}."
789 (let ((inits (asm-inits asm)))
790 (and (not (null? inits))
791 (let ((label (gensym "init-constants")))
792 (emit-text asm
793 `((begin-program ,label ())
794 (assert-nargs-ee/locals 1 1)
795 ,@(reverse inits)
796 (load-constant 1 ,*unspecified*)
797 (return 1)
798 (end-program)))
799 label))))
800
801 (define (link-data asm data name)
802 "Link the static data for a program into the @var{name} section (which
803 should be .data or .rodata), and return the resulting linker object.
804 @var{data} should be a vhash mapping objects to labels."
805 (define (align address alignment)
806 (+ address
807 (modulo (- alignment (modulo address alignment)) alignment)))
808
809 (define tc7-vector 13)
810 (define stringbuf-shared-flag #x100)
811 (define stringbuf-wide-flag #x400)
812 (define tc7-stringbuf 39)
813 (define tc7-narrow-stringbuf
814 (+ tc7-stringbuf stringbuf-shared-flag))
815 (define tc7-wide-stringbuf
816 (+ tc7-stringbuf stringbuf-shared-flag stringbuf-wide-flag))
817 (define tc7-ro-string (+ 21 #x200))
818 (define tc7-rtl-program 69)
819
820 (let ((word-size (asm-word-size asm))
821 (endianness (asm-endianness asm)))
822 (define (byte-length x)
823 (cond
824 ((stringbuf? x)
825 (let ((x (stringbuf-string x)))
826 (+ (* 2 word-size)
827 (case (string-bytes-per-char x)
828 ((1) (1+ (string-length x)))
829 ((4) (* (1+ (string-length x)) 4))
830 (else (error "bad string bytes per char" x))))))
831 ((static-procedure? x)
832 (* 2 word-size))
833 ((string? x)
834 (* 4 word-size))
835 ((pair? x)
836 (* 2 word-size))
837 ((vector? x)
838 (* (1+ (vector-length x)) word-size))
839 (else
840 word-size)))
841
842 (define (write-constant-reference buf pos x)
843 ;; The asm-inits will fix up any reference to a non-immediate.
844 (write-immediate asm buf pos (if (immediate? x) x #f)))
845
846 (define (write buf pos obj)
847 (cond
848 ((stringbuf? obj)
849 (let* ((x (stringbuf-string obj))
850 (len (string-length x))
851 (tag (if (= (string-bytes-per-char x) 1)
852 tc7-narrow-stringbuf
853 tc7-wide-stringbuf)))
854 (case word-size
855 ((4)
856 (bytevector-u32-set! buf pos tag endianness)
857 (bytevector-u32-set! buf (+ pos 4) len endianness))
858 ((8)
859 (bytevector-u64-set! buf pos tag endianness)
860 (bytevector-u64-set! buf (+ pos 8) len endianness))
861 (else
862 (error "bad word size" asm)))
863 (let ((pos (+ pos (* word-size 2))))
864 (case (string-bytes-per-char x)
865 ((1)
866 (let lp ((i 0))
867 (if (< i len)
868 (let ((u8 (char->integer (string-ref x i))))
869 (bytevector-u8-set! buf (+ pos i) u8)
870 (lp (1+ i)))
871 (bytevector-u8-set! buf (+ pos i) 0))))
872 ((4)
873 (let lp ((i 0))
874 (if (< i len)
875 (let ((u32 (char->integer (string-ref x i))))
876 (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
877 (lp (1+ i)))
878 (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
879 (else (error "bad string bytes per char" x))))))
880
881 ((static-procedure? obj)
882 (case word-size
883 ((4)
884 (bytevector-u32-set! buf pos tc7-rtl-program endianness)
885 (bytevector-u32-set! buf (+ pos 4) 0 endianness))
886 ((8)
887 (bytevector-u64-set! buf pos tc7-rtl-program endianness)
888 (bytevector-u64-set! buf (+ pos 8) 0 endianness))
889 (else (error "bad word size"))))
890
891 ((cache-cell? obj)
892 (write-immediate asm buf pos #f))
893
894 ((string? obj)
895 (let ((tag (logior tc7-ro-string (ash (string-length obj) 8))))
896 (case word-size
897 ((4)
898 (bytevector-u32-set! buf pos tc7-ro-string endianness)
899 (write-immediate asm buf (+ pos 4) #f) ; stringbuf
900 (bytevector-u32-set! buf (+ pos 8) 0 endianness)
901 (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
902 ((8)
903 (bytevector-u64-set! buf pos tc7-ro-string endianness)
904 (write-immediate asm buf (+ pos 8) #f) ; stringbuf
905 (bytevector-u64-set! buf (+ pos 16) 0 endianness)
906 (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
907 (else (error "bad word size")))))
908
909 ((pair? obj)
910 (write-constant-reference buf pos (car obj))
911 (write-constant-reference buf (+ pos word-size) (cdr obj)))
912
913 ((vector? obj)
914 (let* ((len (vector-length obj))
915 (tag (logior tc7-vector (ash len 8))))
916 (case word-size
917 ((4) (bytevector-u32-set! buf pos tag endianness))
918 ((8) (bytevector-u64-set! buf pos tag endianness))
919 (else (error "bad word size")))
920 (let lp ((i 0))
921 (when (< i (vector-length obj))
922 (let ((pos (+ pos word-size (* i word-size)))
923 (elt (vector-ref obj i)))
924 (write-constant-reference buf pos elt)
925 (lp (1+ i)))))))
926
927 ((symbol? obj)
928 (write-immediate asm buf pos #f))
929
930 ((keyword? obj)
931 (write-immediate asm buf pos #f))
932
933 ((number? obj)
934 (write-immediate asm buf pos #f))
935
936 (else
937 (error "unrecognized object" obj))))
938
939 (cond
940 ((vlist-null? data) #f)
941 (else
942 (let* ((byte-len (vhash-fold (lambda (k v len)
943 (+ (byte-length k) (align len 8)))
944 0 data))
945 (buf (make-bytevector byte-len 0)))
946 (let lp ((i 0) (pos 0) (labels '()))
947 (if (< i (vlist-length data))
948 (let* ((pair (vlist-ref data i))
949 (obj (car pair))
950 (obj-label (cdr pair)))
951 (write buf pos obj)
952 (lp (1+ i)
953 (align (+ (byte-length obj) pos) 8)
954 (cons (make-linker-symbol obj-label pos) labels)))
955 (make-object asm name buf '() labels
956 #:flags (match name
957 ('.data (logior SHF_ALLOC SHF_WRITE))
958 ('.rodata SHF_ALLOC))))))))))
959
960 (define (link-constants asm)
961 "Link sections to hold constants needed by the program text emitted
962 using @var{asm}.
963
964 Returns three values: an object for the .rodata section, an object for
965 the .data section, and a label for an initialization procedure. Any of
966 these may be @code{#f}."
967 (define (shareable? x)
968 (cond
969 ((stringbuf? x) #t)
970 ((pair? x)
971 (and (immediate? (car x)) (immediate? (cdr x))))
972 ((vector? x)
973 (let lp ((i 0))
974 (or (= i (vector-length x))
975 (and (immediate? (vector-ref x i))
976 (lp (1+ i))))))
977 (else #f)))
978 (let* ((constants (asm-constants asm))
979 (len (vlist-length constants)))
980 (let lp ((i 0)
981 (ro vlist-null)
982 (rw vlist-null))
983 (if (= i len)
984 (values (link-data asm ro '.rodata)
985 (link-data asm rw '.data)
986 (emit-init-constants asm))
987 (let ((pair (vlist-ref constants i)))
988 (if (shareable? (car pair))
989 (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
990 (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
991
992 \f
993
994 ;;;
995 ;;; Linking program text.
996 ;;;
997
998 (define (process-relocs buf relocs labels)
999 "Patch up internal x8-s24 relocations, and any s32 relocations that
1000 reference symbols in the text section. Return a list of linker
1001 relocations for references to symbols defined outside the text section."
1002 (fold
1003 (lambda (reloc tail)
1004 (match reloc
1005 ((type label base word)
1006 (let ((abs (assq-ref labels label))
1007 (dst (+ base word)))
1008 (case type
1009 ((s32)
1010 (if abs
1011 (let ((rel (- abs base)))
1012 (s32-set! buf dst rel)
1013 tail)
1014 (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
1015 tail)))
1016 ((x8-s24)
1017 (unless abs
1018 (error "unbound near relocation" reloc))
1019 (let ((rel (- abs base))
1020 (u32 (u32-ref buf dst)))
1021 (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
1022 tail))
1023 (else (error "bad relocation kind" reloc)))))))
1024 '()
1025 relocs))
1026
1027 (define (process-labels labels)
1028 "Define linker symbols for the label-offset pairs in @var{labels}.
1029 The offsets are expected to be expressed in words."
1030 (map (lambda (pair)
1031 (make-linker-symbol (car pair) (* (cdr pair) 4)))
1032 labels))
1033
1034 (define (swap-bytes! buf)
1035 "Patch up the text buffer @var{buf}, swapping the endianness of each
1036 32-bit unit."
1037 (unless (zero? (modulo (bytevector-length buf) 4))
1038 (error "unexpected length"))
1039 (let ((byte-len (bytevector-length buf)))
1040 (let lp ((pos 0))
1041 (unless (= pos byte-len)
1042 (bytevector-u32-set!
1043 buf pos
1044 (bytevector-u32-ref buf pos (endianness big))
1045 (endianness little))
1046 (lp (+ pos 4))))))
1047
1048 (define (link-text-object asm)
1049 "Link the .rtl-text section, swapping the endianness of the bytes if
1050 needed."
1051 (let ((buf (make-u32vector (asm-pos asm))))
1052 (let lp ((pos 0) (prev (reverse (asm-prev asm))))
1053 (if (null? prev)
1054 (let ((byte-size (* (asm-idx asm) 4)))
1055 (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
1056 (unless (eq? (asm-endianness asm) (native-endianness))
1057 (swap-bytes! buf))
1058 (make-object asm '.rtl-text
1059 buf
1060 (process-relocs buf (asm-relocs asm)
1061 (asm-labels asm))
1062 (process-labels (asm-labels asm))))
1063 (let ((len (* *block-size* 4)))
1064 (bytevector-copy! (car prev) 0 buf pos len)
1065 (lp (+ pos len) (cdr prev)))))))
1066
1067
1068 \f
1069
1070 ;;;
1071 ;;; Linking other sections of the ELF file, like the dynamic segment,
1072 ;;; the symbol table, etc.
1073 ;;;
1074
1075 (define (link-dynamic-section asm text rw rw-init)
1076 "Link the dynamic section for an ELF image with RTL text, given the
1077 writable data section @var{rw} needing fixup from the procedure with
1078 label @var{rw-init}. @var{rw-init} may be false. If @var{rw} is true,
1079 it will be added to the GC roots at runtime."
1080 (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
1081 (let* ((endianness (asm-endianness asm))
1082 (bv (make-bytevector (* word-size (if rw (if rw-init 12 10) 6)) 0))
1083 (set-uword!
1084 (lambda (i uword)
1085 (%set-uword! bv (* i word-size) uword endianness)))
1086 (relocs '())
1087 (set-label!
1088 (lambda (i label)
1089 (set! relocs (cons (make-linker-reloc 'reloc-type
1090 (* i word-size) 0 label)
1091 relocs))
1092 (%set-uword! bv (* i word-size) 0 endianness))))
1093 (set-uword! 0 DT_GUILE_RTL_VERSION)
1094 (set-uword! 1 #x02020000)
1095 (set-uword! 2 DT_GUILE_ENTRY)
1096 (set-label! 3 '.rtl-text)
1097 (cond
1098 (rw
1099 ;; Add roots to GC.
1100 (set-uword! 4 DT_GUILE_GC_ROOT)
1101 (set-label! 5 '.data)
1102 (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
1103 (set-uword! 7 (bytevector-length (linker-object-bv rw)))
1104 (cond
1105 (rw-init
1106 (set-uword! 8 DT_INIT) ; constants
1107 (set-label! 9 rw-init)
1108 (set-uword! 10 DT_NULL)
1109 (set-uword! 11 0))
1110 (else
1111 (set-uword! 8 DT_NULL)
1112 (set-uword! 9 0))))
1113 (else
1114 (set-uword! 4 DT_NULL)
1115 (set-uword! 5 0)))
1116 (make-object asm '.dynamic bv relocs '()
1117 #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
1118 (case (asm-word-size asm)
1119 ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
1120 ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
1121 (else (error "bad word size" asm))))
1122
1123 (define (link-shstrtab asm)
1124 "Link the string table for the section headers."
1125 (intern-section-name! asm ".shstrtab")
1126 (make-object asm '.shstrtab
1127 (link-string-table! (asm-shstrtab asm))
1128 '() '()
1129 #:type SHT_STRTAB #:flags 0))
1130
1131 (define (link-symtab text-section asm)
1132 (let* ((endianness (asm-endianness asm))
1133 (word-size (asm-word-size asm))
1134 (size (elf-symbol-len word-size))
1135 (meta (reverse (asm-meta asm)))
1136 (n (length meta))
1137 (strtab (make-string-table))
1138 (bv (make-bytevector (* n size) 0)))
1139 (define (intern-string! name)
1140 (string-table-intern! strtab (if name (symbol->string name) "")))
1141 (for-each
1142 (lambda (meta n)
1143 (let ((name (intern-string! (meta-name meta))))
1144 (write-elf-symbol bv (* n size) endianness word-size
1145 (make-elf-symbol
1146 #:name name
1147 ;; Symbol value and size are measured in
1148 ;; bytes, not u32s.
1149 #:value (* 4 (meta-low-pc meta))
1150 #:size (* 4 (- (meta-high-pc meta)
1151 (meta-low-pc meta)))
1152 #:type STT_FUNC
1153 #:visibility STV_HIDDEN
1154 #:shndx (elf-section-index text-section)))))
1155 meta (iota n))
1156 (let ((strtab (make-object asm '.strtab
1157 (link-string-table! strtab)
1158 '() '()
1159 #:type SHT_STRTAB #:flags 0)))
1160 (values (make-object asm '.symtab
1161 bv
1162 '() '()
1163 #:type SHT_SYMTAB #:flags 0 #:entsize size
1164 #:link (elf-section-index
1165 (linker-object-section strtab)))
1166 strtab))))
1167
1168 ;;; The .guile.arities section describes the arities that a function can
1169 ;;; have. It is in two parts: a sorted array of headers describing
1170 ;;; basic arities, and an array of links out to a string table (and in
1171 ;;; the case of keyword arguments, to the data section) for argument
1172 ;;; names. The whole thing is prefixed by a uint32 indicating the
1173 ;;; offset of the end of the headers array.
1174 ;;;
1175 ;;; The arity headers array is a packed array of structures of the form:
1176 ;;;
1177 ;;; struct arity_header {
1178 ;;; uint32_t low_pc;
1179 ;;; uint32_t high_pc;
1180 ;;; uint32_t offset;
1181 ;;; uint32_t flags;
1182 ;;; uint32_t nreq;
1183 ;;; uint32_t nopt;
1184 ;;; }
1185 ;;;
1186 ;;; All of the offsets and addresses are 32 bits. We can expand in the
1187 ;;; future to use 64-bit offsets if appropriate, but there are other
1188 ;;; aspects of RTL that constrain us to a total image that fits in 32
1189 ;;; bits, so for the moment we'll simplify the problem space.
1190 ;;;
1191 ;;; The following flags values are defined:
1192 ;;;
1193 ;;; #x1: has-rest?
1194 ;;; #x2: allow-other-keys?
1195 ;;; #x4: has-keyword-args?
1196 ;;; #x8: is-case-lambda?
1197 ;;;
1198 ;;; Functions with a single arity specify their number of required and
1199 ;;; optional arguments in nreq and nopt, and do not have the
1200 ;;; is-case-lambda? flag set. Their "offset" member links to an array
1201 ;;; of pointers into the associated .guile.arities.strtab string table,
1202 ;;; identifying the argument names. This offset is relative to the
1203 ;;; start of the .guile.arities section. Links for required arguments
1204 ;;; are first, in order, as uint32 values. Next follow the optionals,
1205 ;;; then the rest link if has-rest? is set, then a link to the "keyword
1206 ;;; indices" literal if has-keyword-args? is set. Unlike the other
1207 ;;; links, the kw-indices link points into the data section, and is
1208 ;;; relative to the ELF image as a whole.
1209 ;;;
1210 ;;; Functions with no arities have no arities information present in the
1211 ;;; .guile.arities section.
1212 ;;;
1213 ;;; Functions with multiple arities are preceded by a header with
1214 ;;; is-case-lambda? set. All other fields are 0, except low-pc and
1215 ;;; high-pc which should be the bounds of the whole function. Headers
1216 ;;; for the individual arities follow. In this way the whole headers
1217 ;;; array is sorted in increasing low-pc order, and case-lambda clauses
1218 ;;; are contained within the [low-pc, high-pc] of the case-lambda
1219 ;;; header.
1220
1221 ;; Length of the prefix to the arities section, in bytes.
1222 (define arities-prefix-len 4)
1223
1224 ;; Length of an arity header, in bytes.
1225 (define arity-header-len (* 6 4))
1226
1227 ;; The offset of "offset" within arity header, in bytes.
1228 (define arity-header-offset-offset (* 2 4))
1229
1230 (define-syntax-rule (pack-arity-flags has-rest? allow-other-keys?
1231 has-keyword-args? is-case-lambda?)
1232 (logior (if has-rest? (ash 1 0) 0)
1233 (if allow-other-keys? (ash 1 1) 0)
1234 (if has-keyword-args? (ash 1 2) 0)
1235 (if is-case-lambda? (ash 1 3) 0)))
1236
1237 (define (meta-arities-size meta)
1238 (define (lambda-size arity)
1239 (+ arity-header-len
1240 (* 4 ;; name pointers
1241 (+ (length (arity-req arity))
1242 (length (arity-opt arity))
1243 (if (arity-rest arity) 1 0)
1244 (if (pair? (arity-kw-indices arity)) 1 0)))))
1245 (define (case-lambda-size arities)
1246 (fold +
1247 arity-header-len ;; case-lambda header
1248 (map lambda-size arities))) ;; the cases
1249 (match (meta-arities meta)
1250 (() 0)
1251 ((arity) (lambda-size arity))
1252 (arities (case-lambda-size arities))))
1253
1254 (define (write-arity-headers metas bv endianness)
1255 (define (write-arity-header* pos low-pc high-pc flags nreq nopt)
1256 (bytevector-u32-set! bv pos low-pc endianness)
1257 (bytevector-u32-set! bv (+ pos 4) high-pc endianness)
1258 (bytevector-u32-set! bv (+ pos 8) 0 endianness) ; offset
1259 (bytevector-u32-set! bv (+ pos 12) flags endianness)
1260 (bytevector-u32-set! bv (+ pos 16) nreq endianness)
1261 (bytevector-u32-set! bv (+ pos 20) nopt endianness))
1262 (define (write-arity-header pos arity)
1263 (write-arity-header* pos (arity-low-pc arity)
1264 (arity-high-pc arity)
1265 (pack-arity-flags (arity-rest arity)
1266 (arity-allow-other-keys? arity)
1267 (pair? (arity-kw-indices arity))
1268 #f)
1269 (length (arity-req arity))
1270 (length (arity-opt arity))))
1271 (let lp ((metas metas) (pos arities-prefix-len) (offsets '()))
1272 (match metas
1273 (()
1274 ;; Fill in the prefix.
1275 (bytevector-u32-set! bv 0 pos endianness)
1276 (values pos (reverse offsets)))
1277 ((meta . metas)
1278 (match (meta-arities meta)
1279 (() (lp metas pos offsets))
1280 ((arity)
1281 (write-arity-header pos arity)
1282 (lp metas
1283 (+ pos arity-header-len)
1284 (acons arity (+ pos arity-header-offset-offset) offsets)))
1285 (arities
1286 ;; Write a case-lambda header, then individual arities.
1287 ;; The case-lambda header's offset link is 0.
1288 (write-arity-header* pos (meta-low-pc meta) (meta-high-pc meta)
1289 (pack-arity-flags #f #f #f #t) 0 0)
1290 (let lp* ((arities arities) (pos (+ pos arity-header-len))
1291 (offsets offsets))
1292 (match arities
1293 (() (lp metas pos offsets))
1294 ((arity . arities)
1295 (write-arity-header pos arity)
1296 (lp* arities
1297 (+ pos arity-header-len)
1298 (acons arity
1299 (+ pos arity-header-offset-offset)
1300 offsets)))))))))))
1301
1302 (define (write-arity-links asm bv pos arity-offset-pairs strtab)
1303 (define (write-symbol sym pos)
1304 (bytevector-u32-set! bv pos
1305 (string-table-intern! strtab (symbol->string sym))
1306 (asm-endianness asm))
1307 (+ pos 4))
1308 (define (write-kw-indices pos kw-indices)
1309 ;; FIXME: Assert that kw-indices is already interned.
1310 (make-linker-reloc 'abs32/1 pos 0
1311 (intern-constant asm kw-indices)))
1312 (let lp ((pos pos) (pairs arity-offset-pairs) (relocs '()))
1313 (match pairs
1314 (()
1315 (unless (= pos (bytevector-length bv))
1316 (error "expected to fully fill the bytevector"
1317 pos (bytevector-length bv)))
1318 relocs)
1319 (((arity . offset) . pairs)
1320 (bytevector-u32-set! bv offset pos (asm-endianness asm))
1321 (let ((pos (fold write-symbol
1322 pos
1323 (append (arity-req arity)
1324 (arity-opt arity)
1325 (cond
1326 ((arity-rest arity) => list)
1327 (else '()))))))
1328 (match (arity-kw-indices arity)
1329 (() (lp pos pairs relocs))
1330 (kw-indices
1331 (lp (+ pos 4)
1332 pairs
1333 (cons (write-kw-indices pos kw-indices) relocs)))))))))
1334
1335 (define (link-arities asm)
1336 (let* ((endianness (asm-endianness asm))
1337 (metas (reverse (asm-meta asm)))
1338 (size (fold (lambda (meta size)
1339 (+ size (meta-arities-size meta)))
1340 arities-prefix-len
1341 metas))
1342 (strtab (make-string-table))
1343 (bv (make-bytevector size 0)))
1344 (let ((kw-indices-relocs
1345 (call-with-values
1346 (lambda ()
1347 (write-arity-headers metas bv endianness))
1348 (lambda (pos arity-offset-pairs)
1349 (write-arity-links asm bv pos arity-offset-pairs strtab)))))
1350 (let ((strtab (make-object asm '.guile.arities.strtab
1351 (link-string-table! strtab)
1352 '() '()
1353 #:type SHT_STRTAB #:flags 0)))
1354 (values (make-object asm '.guile.arities
1355 bv
1356 kw-indices-relocs '()
1357 #:type SHT_PROGBITS #:flags 0
1358 #:link (elf-section-index
1359 (linker-object-section strtab)))
1360 strtab)))))
1361
1362 ;;;
1363 ;;; The .guile.docstrs section is a packed, sorted array of (pc, str)
1364 ;;; values. Pc and str are both 32 bits wide. (Either could change to
1365 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1366 ;;; entry to a program, relative to the start of the text section, and
1367 ;;; str is an index into the associated .guile.docstrs.strtab string
1368 ;;; table section.
1369 ;;;
1370
1371 ;; The size of a docstrs entry, in bytes.
1372 (define docstr-size 8)
1373
1374 (define (link-docstrs asm)
1375 (define (find-docstrings)
1376 (filter-map (lambda (meta)
1377 (define (is-documentation? pair)
1378 (eq? (car pair) 'documentation))
1379 (let* ((props (meta-properties meta))
1380 (tail (find-tail is-documentation? props)))
1381 (and tail
1382 (not (find-tail is-documentation? (cdr tail)))
1383 (string? (cdar tail))
1384 (cons (meta-low-pc meta) (cdar tail)))))
1385 (reverse (asm-meta asm))))
1386 (let* ((endianness (asm-endianness asm))
1387 (docstrings (find-docstrings))
1388 (strtab (make-string-table))
1389 (bv (make-bytevector (* (length docstrings) docstr-size) 0)))
1390 (fold (lambda (pair pos)
1391 (match pair
1392 ((pc . string)
1393 (bytevector-u32-set! bv pos pc endianness)
1394 (bytevector-u32-set! bv (+ pos 4)
1395 (string-table-intern! strtab string)
1396 endianness)
1397 (+ pos docstr-size))))
1398 0
1399 docstrings)
1400 (let ((strtab (make-object asm '.guile.docstrs.strtab
1401 (link-string-table! strtab)
1402 '() '()
1403 #:type SHT_STRTAB #:flags 0)))
1404 (values (make-object asm '.guile.docstrs
1405 bv
1406 '() '()
1407 #:type SHT_PROGBITS #:flags 0
1408 #:link (elf-section-index
1409 (linker-object-section strtab)))
1410 strtab))))
1411
1412 ;;;
1413 ;;; The .guile.procprops section is a packed, sorted array of (pc, addr)
1414 ;;; values. Pc and addr are both 32 bits wide. (Either could change to
1415 ;;; 64 bits if appropriate in the future.) Pc is the address of the
1416 ;;; entry to a program, relative to the start of the text section, and
1417 ;;; addr is the address of the associated properties alist, relative to
1418 ;;; the start of the ELF image.
1419 ;;;
1420 ;;; Since procedure properties are stored in the data sections, we need
1421 ;;; to link the procedures property section first. (Note that this
1422 ;;; constraint does not apply to the arities section, which may
1423 ;;; reference the data sections via the kw-indices literal, because
1424 ;;; assembling the text section already makes sure that the kw-indices
1425 ;;; are interned.)
1426 ;;;
1427
1428 ;; The size of a procprops entry, in bytes.
1429 (define procprops-size 8)
1430
1431 (define (link-procprops asm)
1432 (define (assoc-remove-one alist key value-pred)
1433 (match alist
1434 (() '())
1435 ((((? (lambda (x) (eq? x key))) . value) . alist)
1436 (if (value-pred value)
1437 alist
1438 (acons key value alist)))
1439 (((k . v) . alist)
1440 (acons k v (assoc-remove-one alist key value-pred)))))
1441 (define (props-without-name-or-docstring meta)
1442 (assoc-remove-one
1443 (assoc-remove-one (meta-properties meta) 'name (lambda (x) #t))
1444 'documentation
1445 string?))
1446 (define (find-procprops)
1447 (filter-map (lambda (meta)
1448 (let ((props (props-without-name-or-docstring meta)))
1449 (and (pair? props)
1450 (cons (meta-low-pc meta) props))))
1451 (reverse (asm-meta asm))))
1452 (let* ((endianness (asm-endianness asm))
1453 (procprops (find-procprops))
1454 (bv (make-bytevector (* (length procprops) procprops-size) 0)))
1455 (let lp ((procprops procprops) (pos 0) (relocs '()))
1456 (match procprops
1457 (()
1458 (make-object asm '.guile.procprops
1459 bv
1460 relocs '()
1461 #:type SHT_PROGBITS #:flags 0))
1462 (((pc . props) . procprops)
1463 (bytevector-u32-set! bv pos pc endianness)
1464 (lp procprops
1465 (+ pos procprops-size)
1466 (cons (make-linker-reloc 'abs32/1 (+ pos 4) 0
1467 (intern-constant asm props))
1468 relocs)))))))
1469
1470 ;;;
1471 ;;; The DWARF .debug_info, .debug_abbrev, .debug_str, and .debug_loc
1472 ;;; sections provide line number and local variable liveness
1473 ;;; information. Their format is defined by the DWARF
1474 ;;; specifications.
1475 ;;;
1476
1477 (define (asm-language asm)
1478 ;; FIXME: Plumb language through to the assembler.
1479 'scheme)
1480
1481 ;; -> 4 values: .debug_info, .debug_abbrev, .debug_str, and .debug_loc
1482 (define (link-debug asm)
1483 (define (put-u16 port val)
1484 (let ((bv (make-bytevector 2)))
1485 (bytevector-u16-set! bv 0 val (asm-endianness asm))
1486 (put-bytevector port bv)))
1487
1488 (define (put-u32 port val)
1489 (let ((bv (make-bytevector 4)))
1490 (bytevector-u32-set! bv 0 val (asm-endianness asm))
1491 (put-bytevector port bv)))
1492
1493 (define (put-u64 port val)
1494 (let ((bv (make-bytevector 8)))
1495 (bytevector-u64-set! bv 0 val (asm-endianness asm))
1496 (put-bytevector port bv)))
1497
1498 (define (put-uleb128 port val)
1499 (let lp ((val val))
1500 (let ((next (ash val -7)))
1501 (if (zero? next)
1502 (put-u8 port val)
1503 (begin
1504 (put-u8 port (logior #x80 (logand val #x7f)))
1505 (lp next))))))
1506
1507 (define (meta->subprogram-die meta)
1508 `(subprogram
1509 (@ ,@(cond
1510 ((meta-name meta)
1511 => (lambda (name) `((name ,(symbol->string name)))))
1512 (else
1513 '()))
1514 (low-pc ,(meta-label meta))
1515 (high-pc ,(* 4 (- (meta-high-pc meta) (meta-low-pc meta)))))))
1516
1517 (define (make-compile-unit-die asm)
1518 `(compile-unit
1519 (@ (producer ,(string-append "Guile " (version)))
1520 (language ,(asm-language asm))
1521 (low-pc .rtl-text)
1522 (high-pc ,(* 4 (asm-pos asm))))
1523 ,@(map meta->subprogram-die (reverse (asm-meta asm)))))
1524
1525 (let-values (((die-port get-die-bv) (open-bytevector-output-port))
1526 ((die-relocs) '())
1527 ((abbrev-port get-abbrev-bv) (open-bytevector-output-port))
1528 ;; (tag has-kids? attrs forms) -> code
1529 ((abbrevs) vlist-null)
1530 ((next-abbrev-code) 1)
1531 ((strtab) (make-string-table)))
1532
1533 (define (write-abbrev code tag has-children? attrs forms)
1534 (put-uleb128 abbrev-port code)
1535 (put-uleb128 abbrev-port (tag-name->code tag))
1536 (put-u8 abbrev-port (children-name->code (if has-children? 'yes 'no)))
1537 (for-each (lambda (attr form)
1538 (put-uleb128 abbrev-port (attribute-name->code attr))
1539 (put-uleb128 abbrev-port (form-name->code form)))
1540 attrs forms)
1541 (put-uleb128 abbrev-port 0)
1542 (put-uleb128 abbrev-port 0))
1543
1544 (define (intern-abbrev tag has-children? attrs forms)
1545 (let ((key (list tag has-children? attrs forms)))
1546 (match (vhash-assoc key abbrevs)
1547 ((_ . code) code)
1548 (#f (let ((code next-abbrev-code))
1549 (set! next-abbrev-code (1+ next-abbrev-code))
1550 (set! abbrevs (vhash-cons key code abbrevs))
1551 (write-abbrev code tag has-children? attrs forms)
1552 code)))))
1553
1554 (define (compute-code attr val)
1555 (match attr
1556 ('name (string-table-intern! strtab val))
1557 ('low-pc val)
1558 ('high-pc val)
1559 ('producer (string-table-intern! strtab val))
1560 ('language (language-name->code val))))
1561
1562 (define (exact-integer? val)
1563 (and (number? val) (integer? val) (exact? val)))
1564
1565 (define (choose-form attr val code)
1566 (cond
1567 ((string? val) 'strp)
1568 ((exact-integer? code)
1569 (cond
1570 ((< code 0) 'sleb128)
1571 ((<= code #xff) 'data1)
1572 ((<= code #xffff) 'data2)
1573 ((<= code #xffffffff) 'data4)
1574 ((<= code #xffffffffffffffff) 'data8)
1575 (else 'uleb128)))
1576 ((symbol? val) 'addr)
1577 (else (error "unhandled case" attr val code))))
1578
1579 (define (add-die-relocation! kind sym)
1580 (set! die-relocs
1581 (cons (make-linker-reloc kind (seek die-port 0 SEEK_CUR) 0 sym)
1582 die-relocs)))
1583
1584 (define (write-value code form)
1585 (match form
1586 ('data1 (put-u8 die-port code))
1587 ('data2 (put-u16 die-port code))
1588 ('data4 (put-u32 die-port code))
1589 ('data8 (put-u64 die-port code))
1590 ('uleb128 (put-uleb128 die-port code))
1591 ('sleb128 (error "not yet implemented"))
1592 ('addr
1593 (match (asm-word-size asm)
1594 (4
1595 (add-die-relocation! 'abs32/1 code)
1596 (put-u32 die-port 0))
1597 (8
1598 (add-die-relocation! 'abs64/1 code)
1599 (put-u64 die-port 0))))
1600 ('strp (put-u32 die-port code))))
1601
1602 (define (write-die die)
1603 (match die
1604 ((tag ('@ (attrs vals) ...) children ...)
1605 (let* ((codes (map compute-code attrs vals))
1606 (forms (map choose-form attrs vals codes))
1607 (has-children? (not (null? children)))
1608 (abbrev-code (intern-abbrev tag has-children? attrs forms)))
1609 (put-uleb128 die-port abbrev-code)
1610 (for-each write-value codes forms)
1611 (when has-children?
1612 (for-each write-die children)
1613 (put-uleb128 die-port 0))))))
1614
1615 ;; Compilation unit header.
1616 (put-u32 die-port 0) ; Length; will patch later.
1617 (put-u16 die-port 4) ; DWARF 4.
1618 (put-u32 die-port 0) ; Abbrevs offset.
1619 (put-u8 die-port (asm-word-size asm)) ; Address size.
1620
1621 (write-die (make-compile-unit-die asm))
1622
1623 ;; Terminate the abbrevs list.
1624 (put-uleb128 abbrev-port 0)
1625
1626 (values (let ((bv (get-die-bv)))
1627 ;; Patch DWARF32 length.
1628 (bytevector-u32-set! bv 0 (- (bytevector-length bv) 4)
1629 (asm-endianness asm))
1630 (make-object asm '.debug_info bv die-relocs '()
1631 #:type SHT_PROGBITS #:flags 0))
1632 (make-object asm '.debug_abbrev (get-abbrev-bv) '() '()
1633 #:type SHT_PROGBITS #:flags 0)
1634 (make-object asm '.debug_str (link-string-table! strtab) '() '()
1635 #:type SHT_PROGBITS #:flags 0)
1636 (make-object asm '.debug_loc #vu8() '() '()
1637 #:type SHT_PROGBITS #:flags 0))))
1638
1639 (define (link-objects asm)
1640 (let*-values (;; Link procprops before constants, because it probably
1641 ;; interns more constants.
1642 ((procprops) (link-procprops asm))
1643 ((ro rw rw-init) (link-constants asm))
1644 ;; Link text object after constants, so that the
1645 ;; constants initializer gets included.
1646 ((text) (link-text-object asm))
1647 ((dt) (link-dynamic-section asm text rw rw-init))
1648 ((symtab strtab) (link-symtab (linker-object-section text) asm))
1649 ((arities arities-strtab) (link-arities asm))
1650 ((docstrs docstrs-strtab) (link-docstrs asm))
1651 ((dinfo dabbrev dstrtab dloc) (link-debug asm))
1652 ;; This needs to be linked last, because linking other
1653 ;; sections adds entries to the string table.
1654 ((shstrtab) (link-shstrtab asm)))
1655 (filter identity
1656 (list text ro rw dt symtab strtab arities arities-strtab
1657 docstrs docstrs-strtab procprops
1658 dinfo dabbrev dstrtab dloc
1659 shstrtab))))
1660
1661
1662 \f
1663
1664 ;;;
1665 ;;; High-level public interfaces.
1666 ;;;
1667
1668 (define* (link-assembly asm #:key (page-aligned? #t))
1669 "Produce an ELF image from the code and data emitted into @var{asm}.
1670 The result is a bytevector, by default linked so that read-only and
1671 writable data are on separate pages. Pass @code{#:page-aligned? #f} to
1672 disable this behavior."
1673 (link-elf (link-objects asm) #:page-aligned? page-aligned?))
1674
1675 (define (assemble-program instructions)
1676 "Take the sequence of instructions @var{instructions}, assemble them
1677 into RTL code, link an image, and load that image from memory. Returns
1678 a procedure."
1679 (let ((asm (make-assembler)))
1680 (emit-text asm instructions)
1681 (load-thunk-from-memory (link-assembly asm #:page-aligned? #f))))