Beginnings of tracking of procedure arities in assembler
[bpt/guile.git] / module / system / vm / assembler.scm
1 ;;; Guile RTL assembler
2
3 ;;; Copyright (C) 2001, 2009, 2010, 2012, 2013 Free Software Foundation, Inc.
4 ;;;
5 ;;; This library is free software; you can redistribute it and/or
6 ;;; modify it under the terms of the GNU Lesser General Public
7 ;;; License as published by the Free Software Foundation; either
8 ;;; version 3 of the License, or (at your option) any later version.
9 ;;;
10 ;;; This library is distributed in the hope that it will be useful,
11 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;; Lesser General Public License for more details.
14 ;;;
15 ;;; You should have received a copy of the GNU Lesser General Public
16 ;;; License along with this library; if not, write to the Free Software
17 ;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
19 ;;; Commentary:
20 ;;;
21 ;;; This module implements an assembler that creates an ELF image from
22 ;;; RTL assembly and macro-assembly. The input can be given in
23 ;;; s-expression form, like ((OP ARG ...) ...). Internally there is a
24 ;;; procedural interface, the emit-OP procedures, but that is not
25 ;;; currently exported.
26 ;;;
27 ;;; "Primitive instructions" correspond to RTL VM operations.
28 ;;; Assemblers for primitive instructions are generated programmatically
29 ;;; from (rtl-instruction-list), which itself is derived from the VM
30 ;;; sources. There are also "macro-instructions" like "label" or
31 ;;; "load-constant" that expand to 0 or more primitive instructions.
32 ;;;
33 ;;; The assembler also handles some higher-level tasks, like creating
34 ;;; the symbol table, other metadata sections, creating a constant table
35 ;;; for the whole compilation unit, and writing the dynamic section of
36 ;;; the ELF file along with the appropriate initialization routines.
37 ;;;
38 ;;; Most compilers will want to use the trio of make-assembler,
39 ;;; emit-text, and link-assembly. That will result in the creation of
40 ;;; an ELF image as a bytevector, which can then be loaded using
41 ;;; load-thunk-from-memory, or written to disk as a .go file.
42 ;;;
43 ;;; Code:
44
45 (define-module (system vm assembler)
46 #:use-module (system base target)
47 #:use-module (system vm instruction)
48 #:use-module (system vm elf)
49 #:use-module (system vm linker)
50 #:use-module (system vm objcode)
51 #:use-module (rnrs bytevectors)
52 #:use-module (ice-9 vlist)
53 #:use-module (ice-9 match)
54 #:use-module (srfi srfi-1)
55 #:use-module (srfi srfi-4)
56 #:use-module (srfi srfi-9)
57 #:use-module (srfi srfi-11)
58 #:export (make-assembler
59 emit-text
60 link-assembly
61 assemble-program))
62
63
64 \f
65
66 ;;; RTL code consists of 32-bit units, often subdivided in some way.
67 ;;; These helpers create one 32-bit unit from multiple components.
68
69 (define-syntax-rule (pack-u8-u24 x y)
70 (logior x (ash y 8)))
71
72 (define-syntax-rule (pack-u8-s24 x y)
73 (logior x (ash (cond
74 ((< 0 (- y) #x800000)
75 (+ y #x1000000))
76 ((<= 0 y #xffffff)
77 y)
78 (else (error "out of range" y)))
79 8)))
80
81 (define-syntax-rule (pack-u1-u7-u24 x y z)
82 (logior x (ash y 1) (ash z 8)))
83
84 (define-syntax-rule (pack-u8-u12-u12 x y z)
85 (logior x (ash y 8) (ash z 20)))
86
87 (define-syntax-rule (pack-u8-u8-u16 x y z)
88 (logior x (ash y 8) (ash z 16)))
89
90 (define-syntax-rule (pack-u8-u8-u8-u8 x y z w)
91 (logior x (ash y 8) (ash z 16) (ash w 24)))
92
93 (define-syntax pack-flags
94 (syntax-rules ()
95 ;; Add clauses as needed.
96 ((pack-flags f1 f2) (logior (if f1 (ash 1 0) 0)
97 (if f2 (ash 2 0) 0)))))
98
99 ;;; Helpers to read and write 32-bit units in a buffer.
100
101 (define-syntax-rule (u32-ref buf n)
102 (bytevector-u32-native-ref buf (* n 4)))
103
104 (define-syntax-rule (u32-set! buf n val)
105 (bytevector-u32-native-set! buf (* n 4) val))
106
107 (define-syntax-rule (s32-ref buf n)
108 (bytevector-s32-native-ref buf (* n 4)))
109
110 (define-syntax-rule (s32-set! buf n val)
111 (bytevector-s32-native-set! buf (* n 4) val))
112
113
114 \f
115
116 ;;; A <meta> entry collects metadata for one procedure. Procedures are
117 ;;; written as contiguous ranges of RTL code.
118 ;;;
119 (define-syntax-rule (assert-match arg pattern kind)
120 (let ((x arg))
121 (unless (match x (pattern #t) (_ #f))
122 (error (string-append "expected " kind) x))))
123
124 (define-record-type <meta>
125 (%make-meta label properties low-pc high-pc arities)
126 meta?
127 (label meta-label)
128 (properties meta-properties set-meta-properties!)
129 (low-pc meta-low-pc)
130 (high-pc meta-high-pc set-meta-high-pc!)
131 (arities meta-arities set-meta-arities!))
132
133 (define (make-meta label properties low-pc)
134 (assert-match label (? symbol?) "symbol")
135 (assert-match properties (((? symbol?) . _) ...) "alist with symbolic keys")
136 (%make-meta label properties low-pc #f '()))
137
138 (define (meta-name meta)
139 (assq-ref (meta-properties meta) 'name))
140
141 ;; Metadata for one <lambda-case>.
142 (define-record-type <arity>
143 (make-arity req opt rest kw-indices allow-other-keys?
144 low-pc high-pc)
145 arity?
146 (req arity-req)
147 (opt arity-opt)
148 (rest arity-rest)
149 (kw-indices arity-kw-indices)
150 (allow-other-keys? arity-allow-other-keys?)
151 (low-pc arity-low-pc)
152 (high-pc arity-high-pc set-arity-high-pc!))
153
154 (define-syntax *block-size* (identifier-syntax 32))
155
156 ;;; An assembler collects all of the words emitted during assembly, and
157 ;;; also maintains ancillary information such as the constant table, a
158 ;;; relocation list, and so on.
159 ;;;
160 ;;; RTL code consists of 32-bit units. We emit RTL code using native
161 ;;; endianness. If we're targeting a foreign endianness, we byte-swap
162 ;;; the bytevector as a whole instead of conditionalizing each access.
163 ;;;
164 (define-record-type <asm>
165 (make-asm cur idx start prev written
166 labels relocs
167 word-size endianness
168 constants inits
169 shstrtab next-section-number
170 meta)
171 asm?
172
173 ;; We write RTL code into what is logically a growable vector,
174 ;; implemented as a list of blocks. asm-cur is the current block, and
175 ;; asm-idx is the current index into that block, in 32-bit units.
176 ;;
177 (cur asm-cur set-asm-cur!)
178 (idx asm-idx set-asm-idx!)
179
180 ;; asm-start is an absolute position, indicating the offset of the
181 ;; beginning of an instruction (in u32 units). It is updated after
182 ;; writing all the words for one primitive instruction. It models the
183 ;; position of the instruction pointer during execution, given that
184 ;; the RTL VM updates the IP only at the end of executing the
185 ;; instruction, and is thus useful for computing offsets between two
186 ;; points in a program.
187 ;;
188 (start asm-start set-asm-start!)
189
190 ;; The list of previously written blocks.
191 ;;
192 (prev asm-prev set-asm-prev!)
193
194 ;; The number of u32 words written in asm-prev, which is the same as
195 ;; the offset of the current block.
196 ;;
197 (written asm-written set-asm-written!)
198
199 ;; An alist of symbol -> position pairs, indicating the labels defined
200 ;; in this compilation unit.
201 ;;
202 (labels asm-labels set-asm-labels!)
203
204 ;; A list of relocations needed by the program text. We use an
205 ;; internal representation for relocations, and handle textualn
206 ;; relative relocations in the assembler. Other kinds of relocations
207 ;; are later reified as linker relocations and resolved by the linker.
208 ;;
209 (relocs asm-relocs set-asm-relocs!)
210
211 ;; Target information.
212 ;;
213 (word-size asm-word-size)
214 (endianness asm-endianness)
215
216 ;; The constant table, as a vhash of object -> label. All constants
217 ;; get de-duplicated and written into separate sections -- either the
218 ;; .rodata section, for read-only data, or .data, for constants that
219 ;; need initialization at load-time (like symbols). Constants can
220 ;; depend on other constants (e.g. a symbol depending on a stringbuf),
221 ;; so order in this table is important.
222 ;;
223 (constants asm-constants set-asm-constants!)
224
225 ;; A list of RTL instructions needed to initialize the constants.
226 ;; Will run in a thunk with 2 local variables.
227 ;;
228 (inits asm-inits set-asm-inits!)
229
230 ;; The shstrtab, for section names.
231 ;;
232 (shstrtab asm-shstrtab set-asm-shstrtab!)
233
234 ;; The section number for the next section to be written.
235 ;;
236 (next-section-number asm-next-section-number set-asm-next-section-number!)
237
238 ;; A list of <meta>, corresponding to procedure metadata.
239 ;;
240 (meta asm-meta set-asm-meta!))
241
242 (define-inlinable (fresh-block)
243 (make-u32vector *block-size*))
244
245 (define* (make-assembler #:key (word-size (target-word-size))
246 (endianness (target-endianness)))
247 "Create an assembler for a given target @var{word-size} and
248 @var{endianness}, falling back to appropriate values for the configured
249 target."
250 (make-asm (fresh-block) 0 0 '() 0
251 '() '()
252 word-size endianness
253 vlist-null '()
254 (make-string-table) 1
255 '()))
256
257 (define (intern-section-name! asm string)
258 "Add a string to the section name table (shstrtab)."
259 (string-table-intern! (asm-shstrtab asm) string))
260
261 (define-inlinable (asm-pos asm)
262 "The offset of the next word to be written into the code buffer, in
263 32-bit units."
264 (+ (asm-idx asm) (asm-written asm)))
265
266 (define (allocate-new-block asm)
267 "Close off the current block, and arrange for the next word to be
268 written to a fresh block."
269 (let ((new (fresh-block)))
270 (set-asm-prev! asm (cons (asm-cur asm) (asm-prev asm)))
271 (set-asm-written! asm (asm-pos asm))
272 (set-asm-cur! asm new)
273 (set-asm-idx! asm 0)))
274
275 (define-inlinable (emit asm u32)
276 "Emit one 32-bit word into the instruction stream. Assumes that there
277 is space for the word, and ensures that there is space for the next
278 word."
279 (u32-set! (asm-cur asm) (asm-idx asm) u32)
280 (set-asm-idx! asm (1+ (asm-idx asm)))
281 (if (= (asm-idx asm) *block-size*)
282 (allocate-new-block asm)))
283
284 (define-inlinable (make-reloc type label base word)
285 "Make an internal relocation of type @var{type} referencing symbol
286 @var{label}, @var{word} words after position @var{start}. @var{type}
287 may be x8-s24, indicating a 24-bit relative label reference that can be
288 fixed up by the assembler, or s32, indicating a 32-bit relative
289 reference that needs to be fixed up by the linker."
290 (list type label base word))
291
292 (define-inlinable (reset-asm-start! asm)
293 "Reset the asm-start after writing the words for one instruction."
294 (set-asm-start! asm (asm-pos asm)))
295
296 (define (emit-exported-label asm label)
297 "Define a linker symbol associating @var{label} with the current
298 asm-start."
299 (set-asm-labels! asm (acons label (asm-start asm) (asm-labels asm))))
300
301 (define (record-label-reference asm label)
302 "Record an x8-s24 local label reference. This value will get patched
303 up later by the assembler."
304 (let* ((start (asm-start asm))
305 (pos (asm-pos asm))
306 (reloc (make-reloc 'x8-s24 label start (- pos start))))
307 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
308
309 (define* (record-far-label-reference asm label #:optional (offset 0))
310 "Record an s32 far label reference. This value will get patched up
311 later by the linker."
312 (let* ((start (- (asm-start asm) offset))
313 (pos (asm-pos asm))
314 (reloc (make-reloc 's32 label start (- pos start))))
315 (set-asm-relocs! asm (cons reloc (asm-relocs asm)))))
316
317
318 \f
319
320 ;;;
321 ;;; Primitive assemblers are defined by expanding `assembler' for each
322 ;;; opcode in `(rtl-instruction-list)'.
323 ;;;
324
325 (eval-when (expand compile load eval)
326 (define (id-append ctx a b)
327 (datum->syntax ctx (symbol-append (syntax->datum a) (syntax->datum b)))))
328
329 (define-syntax assembler
330 (lambda (x)
331 (define-syntax op-case
332 (lambda (x)
333 (syntax-case x ()
334 ((_ asm name ((type arg ...) code ...) clause ...)
335 #`(if (eq? name 'type)
336 (with-syntax (((arg ...) (generate-temporaries #'(arg ...))))
337 #'((arg ...)
338 code ...))
339 (op-case asm name clause ...)))
340 ((_ asm name)
341 #'(error "unmatched name" name)))))
342
343 (define (pack-first-word asm opcode type)
344 (with-syntax ((opcode opcode))
345 (op-case
346 asm type
347 ((U8_X24)
348 (emit asm opcode))
349 ((U8_U24 arg)
350 (emit asm (pack-u8-u24 opcode arg)))
351 ((U8_L24 label)
352 (record-label-reference asm label)
353 (emit asm opcode))
354 ((U8_R24 rest)
355 (emit asm (pack-u8-u24 opcode (list rest)))
356 (for-each (lambda (x) (emit asm x)) rest))
357 ((U8_U8_I16 a imm)
358 (emit asm (pack-u8-u8-u16 opcode a (object-address imm))))
359 ((U8_U12_U12 a b)
360 (emit asm (pack-u8-u12-u12 opcode a b)))
361 ((U8_U8_U8_U8 a b c)
362 (emit asm (pack-u8-u8-u8-u8 opcode a b c))))))
363
364 (define (pack-tail-word asm type)
365 (op-case
366 asm type
367 ((U8_U24 a b)
368 (emit asm (pack-u8-u24 a b)))
369 ((U8_L24 a label)
370 (record-label-reference asm label)
371 (emit asm a))
372 ((U8_R24 rest)
373 (emit asm (pack-u8-u24 a (length rest)))
374 (for-each (lambda (x) (emit asm x)) rest))
375 ((U8_U8_I16 a b imm)
376 (emit asm (pack-u8-u8-u16 a b (object-address imm))))
377 ((U8_U12_U12 a b)
378 (emit asm (pack-u8-u12-u12 a b c)))
379 ((U8_U8_U8_U8 a b c d)
380 (emit asm (pack-u8-u8-u8-u8 a b c d)))
381 ((U32 a)
382 (emit asm a))
383 ((I32 imm)
384 (let ((val (object-address imm)))
385 (unless (zero? (ash val -32))
386 (error "FIXME: enable truncation of negative fixnums when cross-compiling"))
387 (emit asm val)))
388 ((A32 imm)
389 (unless (= (asm-word-size asm) 8)
390 (error "make-long-immediate unavailable for this target"))
391 (emit asm (ash (object-address imm) -32))
392 (emit asm (logand (object-address imm) (1- (ash 1 32)))))
393 ((B32))
394 ((N32 label)
395 (record-far-label-reference asm label)
396 (emit asm 0))
397 ((S32 label)
398 (record-far-label-reference asm label)
399 (emit asm 0))
400 ((L32 label)
401 (record-far-label-reference asm label)
402 (emit asm 0))
403 ((LO32 label offset)
404 (record-far-label-reference asm label
405 (* offset (/ (asm-word-size asm) 4)))
406 (emit asm 0))
407 ((X8_U24 a)
408 (emit asm (pack-u8-u24 0 a)))
409 ((X8_U12_U12 a b)
410 (emit asm (pack-u8-u12-u12 0 a b)))
411 ((X8_R24 rest)
412 (emit asm (pack-u8-u24 0 (length rest)))
413 (for-each (lambda (x) (emit asm x)) rest))
414 ((X8_L24 label)
415 (record-label-reference asm label)
416 (emit asm 0))
417 ((B1_X7_L24 a label)
418 (record-label-reference asm label)
419 (emit asm (pack-u1-u7-u24 (if a 1 0) 0 0)))
420 ((B1_U7_L24 a b label)
421 (record-label-reference asm label)
422 (emit asm (pack-u1-u7-u24 (if a 1 0) b 0)))))
423
424 (syntax-case x ()
425 ((_ name opcode word0 word* ...)
426 (with-syntax ((((formal0 ...)
427 code0 ...)
428 (pack-first-word #'asm
429 (syntax->datum #'opcode)
430 (syntax->datum #'word0)))
431 ((((formal* ...)
432 code* ...) ...)
433 (map (lambda (word) (pack-tail-word #'asm word))
434 (syntax->datum #'(word* ...)))))
435 #'(lambda (asm formal0 ... formal* ... ...)
436 (unless (asm? asm) (error "not an asm"))
437 code0 ...
438 code* ... ...
439 (reset-asm-start! asm)))))))
440
441 (define assemblers (make-hash-table))
442
443 (define-syntax define-assembler
444 (lambda (x)
445 (syntax-case x ()
446 ((_ name opcode arg ...)
447 (with-syntax ((emit (id-append #'name #'emit- #'name)))
448 #'(define emit
449 (let ((emit (assembler name opcode arg ...)))
450 (hashq-set! assemblers 'name emit)
451 emit)))))))
452
453 (define-syntax visit-opcodes
454 (lambda (x)
455 (syntax-case x ()
456 ((visit-opcodes macro arg ...)
457 (with-syntax (((inst ...)
458 (map (lambda (x) (datum->syntax #'macro x))
459 (rtl-instruction-list))))
460 #'(begin
461 (macro arg ... . inst)
462 ...))))))
463
464 (visit-opcodes define-assembler)
465
466 (define (emit-text asm instructions)
467 "Assemble @var{instructions} using the assembler @var{asm}.
468 @var{instructions} is a sequence of RTL instructions, expressed as a
469 list of lists. This procedure can be called many times before calling
470 @code{link-assembly}."
471 (for-each (lambda (inst)
472 (apply (or (hashq-ref assemblers (car inst))
473 (error 'bad-instruction inst))
474 asm
475 (cdr inst)))
476 instructions))
477
478 \f
479
480 ;;;
481 ;;; The constant table records a topologically sorted set of literal
482 ;;; constants used by a program. For example, a pair uses its car and
483 ;;; cdr, a string uses its stringbuf, etc.
484 ;;;
485 ;;; Some things we want to add to the constant table are not actually
486 ;;; Scheme objects: for example, stringbufs, cache cells for toplevel
487 ;;; references, or cache cells for non-closure procedures. For these we
488 ;;; define special record types and add instances of those record types
489 ;;; to the table.
490 ;;;
491
492 (define-inlinable (immediate? x)
493 "Return @code{#t} if @var{x} is immediate, and @code{#f} otherwise."
494 (not (zero? (logand (object-address x) 6))))
495
496 (define-record-type <stringbuf>
497 (make-stringbuf string)
498 stringbuf?
499 (string stringbuf-string))
500
501 (define-record-type <static-procedure>
502 (make-static-procedure code)
503 static-procedure?
504 (code static-procedure-code))
505
506 (define-record-type <cache-cell>
507 (make-cache-cell scope key)
508 cache-cell?
509 (scope cache-cell-scope)
510 (key cache-cell-key))
511
512 (define (statically-allocatable? x)
513 "Return @code{#t} if a non-immediate constant can be allocated
514 statically, and @code{#f} if it would need some kind of runtime
515 allocation."
516 (or (pair? x) (vector? x) (string? x) (stringbuf? x) (static-procedure? x)))
517
518 (define (intern-constant asm obj)
519 "Add an object to the constant table, and return a label that can be
520 used to reference it. If the object is already present in the constant
521 table, its existing label is used directly."
522 (define (recur obj)
523 (intern-constant asm obj))
524 (define (field dst n obj)
525 (let ((src (recur obj)))
526 (if src
527 (list (if (statically-allocatable? obj)
528 `(make-non-immediate 0 ,src)
529 `(static-ref 0 ,src))
530 `(static-set! 0 ,dst ,n))
531 '())))
532 (define (intern obj label)
533 (cond
534 ((pair? obj)
535 (append (field label 0 (car obj))
536 (field label 1 (cdr obj))))
537 ((vector? obj)
538 (let lp ((i 0) (inits '()))
539 (if (< i (vector-length obj))
540 (lp (1+ i)
541 (append-reverse (field label (1+ i) (vector-ref obj i))
542 inits))
543 (reverse inits))))
544 ((stringbuf? obj) '())
545 ((static-procedure? obj)
546 `((make-non-immediate 0 ,label)
547 (link-procedure! 0 ,(static-procedure-code obj))))
548 ((cache-cell? obj) '())
549 ((symbol? obj)
550 `((make-non-immediate 0 ,(recur (symbol->string obj)))
551 (string->symbol 0 0)
552 (static-set! 0 ,label 0)))
553 ((string? obj)
554 `((make-non-immediate 0 ,(recur (make-stringbuf obj)))
555 (static-set! 0 ,label 1)))
556 ((keyword? obj)
557 `((static-ref 0 ,(recur (keyword->symbol obj)))
558 (symbol->keyword 0 0)
559 (static-set! 0 ,label 0)))
560 ((number? obj)
561 `((make-non-immediate 0 ,(recur (number->string obj)))
562 (string->number 0 0)
563 (static-set! 0 ,label 0)))
564 (else
565 (error "don't know how to intern" obj))))
566 (cond
567 ((immediate? obj) #f)
568 ((vhash-assoc obj (asm-constants asm)) => cdr)
569 (else
570 ;; Note that calling intern may mutate asm-constants and
571 ;; asm-constant-inits.
572 (let* ((label (gensym "constant"))
573 (inits (intern obj label)))
574 (set-asm-constants! asm (vhash-cons obj label (asm-constants asm)))
575 (set-asm-inits! asm (append-reverse inits (asm-inits asm)))
576 label))))
577
578 (define (intern-non-immediate asm obj)
579 "Intern a non-immediate into the constant table, and return its
580 label."
581 (when (immediate? obj)
582 (error "expected a non-immediate" obj))
583 (intern-constant asm obj))
584
585 (define (intern-cache-cell asm scope key)
586 "Intern a cache cell into the constant table, and return its label.
587 If there is already a cache cell with the given scope and key, it is
588 returned instead."
589 (intern-constant asm (make-cache-cell scope key)))
590
591 ;; Return the label of the cell that holds the module for a scope.
592 (define (intern-module-cache-cell asm scope)
593 "Intern a cache cell for a module, and return its label."
594 (intern-cache-cell asm scope #t))
595
596
597 \f
598
599 ;;;
600 ;;; Macro assemblers bridge the gap between primitive instructions and
601 ;;; some higher-level operations.
602 ;;;
603
604 (define-syntax define-macro-assembler
605 (lambda (x)
606 (syntax-case x ()
607 ((_ (name arg ...) body body* ...)
608 (with-syntax ((emit (id-append #'name #'emit- #'name)))
609 #'(define emit
610 (let ((emit (lambda (arg ...) body body* ...)))
611 (hashq-set! assemblers 'name emit)
612 emit)))))))
613
614 (define-macro-assembler (load-constant asm dst obj)
615 (cond
616 ((immediate? obj)
617 (let ((bits (object-address obj)))
618 (cond
619 ((and (< dst 256) (zero? (ash bits -16)))
620 (emit-make-short-immediate asm dst obj))
621 ((zero? (ash bits -32))
622 (emit-make-long-immediate asm dst obj))
623 (else
624 (emit-make-long-long-immediate asm dst obj)))))
625 ((statically-allocatable? obj)
626 (emit-make-non-immediate asm dst (intern-non-immediate asm obj)))
627 (else
628 (emit-static-ref asm dst (intern-non-immediate asm obj)))))
629
630 (define-macro-assembler (load-static-procedure asm dst label)
631 (let ((loc (intern-constant asm (make-static-procedure label))))
632 (emit-make-non-immediate asm dst loc)))
633
634 (define-macro-assembler (begin-program asm label properties)
635 (emit-label asm label)
636 (let ((meta (make-meta label properties (asm-start asm))))
637 (set-asm-meta! asm (cons meta (asm-meta asm)))))
638
639 (define-macro-assembler (end-program asm)
640 (let ((meta (car (asm-meta asm))))
641 (set-meta-high-pc! meta (asm-start asm))
642 (set-meta-arities! meta (reverse (meta-arities meta)))))
643
644 (define-macro-assembler (begin-standard-arity asm req nlocals alternate)
645 (emit-begin-opt-arity asm req '() #f nlocals alternate))
646
647 (define-macro-assembler (begin-opt-arity asm req opt rest nlocals alternate)
648 (emit-begin-kw-arity asm req opt rest '() #f nlocals alternate))
649
650 (define-macro-assembler (begin-kw-arity asm req opt rest kw-indices
651 allow-other-keys? nlocals alternate)
652 (assert-match req ((? symbol?) ...) "list of symbols")
653 (assert-match opt ((? symbol?) ...) "list of symbols")
654 (assert-match rest (or #f (? symbol?)) "#f or symbol")
655 (assert-match kw-indices (((? symbol?) . (? integer?)) ...)
656 "alist of symbol -> integer")
657 (assert-match allow-other-keys? (? boolean?) "boolean")
658 (assert-match nlocals (? integer?) "integer")
659 (assert-match alternate (or #f (? symbol?)) "#f or symbol")
660 (let* ((meta (car (asm-meta asm)))
661 (arity (make-arity req opt rest kw-indices allow-other-keys?
662 (asm-start asm) #f))
663 (nreq (length req))
664 (nopt (length opt))
665 (rest? (->bool rest)))
666 (set-meta-arities! meta (cons arity (meta-arities meta)))
667 (cond
668 ((or allow-other-keys? (pair? kw-indices))
669 (emit-kw-prelude asm nreq nopt rest? kw-indices allow-other-keys?
670 nlocals alternate))
671 ((or rest? (pair? opt))
672 (emit-opt-prelude asm nreq nopt rest? nlocals alternate))
673 (else
674 (emit-standard-prelude asm nreq nlocals alternate)))))
675
676 (define-macro-assembler (end-arity asm)
677 (let ((arity (car (meta-arities (car (asm-meta asm))))))
678 (set-arity-high-pc! arity (asm-start asm))))
679
680 (define-macro-assembler (standard-prelude asm nreq nlocals alternate)
681 (cond
682 (alternate
683 (emit-br-if-nargs-ne asm nreq alternate)
684 (emit-reserve-locals asm nlocals))
685 ((and (< nreq (ash 1 12)) (< (- nlocals nreq) (ash 1 12)))
686 (emit-assert-nargs-ee/locals asm nreq (- nlocals nreq)))
687 (else
688 (emit-assert-nargs-ee asm nreq)
689 (emit-reserve-locals asm nlocals))))
690
691 (define-macro-assembler (opt-prelude asm nreq nopt rest? nlocals alternate)
692 (if alternate
693 (emit-br-if-nargs-lt asm nreq alternate)
694 (emit-assert-nargs-ge asm nreq))
695 (cond
696 (rest?
697 (emit-bind-rest asm (+ nreq nopt)))
698 (alternate
699 (emit-br-if-nargs-gt asm (+ nreq nopt) alternate))
700 (else
701 (emit-assert-nargs-le asm (+ nreq nopt))))
702 (emit-reserve-locals asm nlocals))
703
704 (define-macro-assembler (kw-prelude asm nreq nopt rest? kw-indices
705 allow-other-keys? nlocals alternate)
706 (if alternate
707 (emit-br-if-nargs-lt asm nreq alternate)
708 (emit-assert-nargs-ge asm nreq))
709 (let ((ntotal (fold (lambda (kw ntotal)
710 (match kw
711 (((? keyword?) . idx)
712 (max (1+ idx) ntotal))))
713 (+ nreq nopt) kw-indices)))
714 ;; FIXME: port 581f410f
715 (emit-bind-kwargs asm nreq
716 (pack-flags allow-other-keys? rest?)
717 (+ nreq nopt)
718 ntotal
719 kw-indices)
720 (emit-reserve-locals asm nlocals)))
721
722 (define-macro-assembler (label asm sym)
723 (set-asm-labels! asm (acons sym (asm-start asm) (asm-labels asm))))
724
725 (define-macro-assembler (cache-current-module! asm tmp scope)
726 (let ((mod-label (intern-module-cache-cell asm scope)))
727 (emit-current-module asm tmp)
728 (emit-static-set! asm tmp mod-label 0)))
729
730 (define-macro-assembler (cached-toplevel-ref asm dst scope sym)
731 (let ((sym-label (intern-non-immediate asm sym))
732 (mod-label (intern-module-cache-cell asm scope))
733 (cell-label (intern-cache-cell asm scope sym)))
734 (emit-toplevel-ref asm dst cell-label mod-label sym-label)))
735
736 (define-macro-assembler (cached-toplevel-set! asm src scope sym)
737 (let ((sym-label (intern-non-immediate asm sym))
738 (mod-label (intern-module-cache-cell asm scope))
739 (cell-label (intern-cache-cell asm scope sym)))
740 (emit-toplevel-set! asm src cell-label mod-label sym-label)))
741
742 (define-macro-assembler (cached-module-ref asm dst module-name public? sym)
743 (let* ((sym-label (intern-non-immediate asm sym))
744 (key (cons public? module-name))
745 (mod-name-label (intern-constant asm key))
746 (cell-label (intern-cache-cell asm key sym)))
747 (emit-module-ref asm dst cell-label mod-name-label sym-label)))
748
749 (define-macro-assembler (cached-module-set! asm src module-name public? sym)
750 (let* ((sym-label (intern-non-immediate asm sym))
751 (key (cons public? module-name))
752 (mod-name-label (intern-non-immediate asm key))
753 (cell-label (intern-cache-cell asm key sym)))
754 (emit-module-set! asm src cell-label mod-name-label sym-label)))
755
756
757 \f
758
759 ;;;
760 ;;; Helper for linking objects.
761 ;;;
762
763 (define (make-object asm name bv relocs labels . kwargs)
764 "Make a linker object. This helper handles interning the name in the
765 shstrtab, assigning the size, allocating a fresh index, and defining a
766 corresponding linker symbol for the start of the section."
767 (let ((name-idx (intern-section-name! asm (symbol->string name)))
768 (index (asm-next-section-number asm)))
769 (set-asm-next-section-number! asm (1+ index))
770 (make-linker-object (apply make-elf-section
771 #:index index
772 #:name name-idx
773 #:size (bytevector-length bv)
774 kwargs)
775 bv relocs
776 (cons (make-linker-symbol name 0) labels))))
777
778
779 \f
780
781 ;;;
782 ;;; Linking the constant table. This code is somewhat intertwingled
783 ;;; with the intern-constant code above, as that procedure also
784 ;;; residualizes instructions to initialize constants at load time.
785 ;;;
786
787 (define (write-immediate asm buf pos x)
788 (let ((val (object-address x))
789 (endianness (asm-endianness asm)))
790 (case (asm-word-size asm)
791 ((4) (bytevector-u32-set! buf pos val endianness))
792 ((8) (bytevector-u64-set! buf pos val endianness))
793 (else (error "bad word size" asm)))))
794
795 (define (emit-init-constants asm)
796 "If there is writable data that needs initialization at runtime, emit
797 a procedure to do that and return its label. Otherwise return
798 @code{#f}."
799 (let ((inits (asm-inits asm)))
800 (and (not (null? inits))
801 (let ((label (gensym "init-constants")))
802 (emit-text asm
803 `((begin-program ,label ())
804 (assert-nargs-ee/locals 0 1)
805 ,@(reverse inits)
806 (load-constant 0 ,*unspecified*)
807 (return 0)
808 (end-program)))
809 label))))
810
811 (define (link-data asm data name)
812 "Link the static data for a program into the @var{name} section (which
813 should be .data or .rodata), and return the resulting linker object.
814 @var{data} should be a vhash mapping objects to labels."
815 (define (align address alignment)
816 (+ address
817 (modulo (- alignment (modulo address alignment)) alignment)))
818
819 (define tc7-vector 13)
820 (define tc7-narrow-stringbuf 39)
821 (define tc7-wide-stringbuf (+ 39 #x400))
822 (define tc7-ro-string (+ 21 #x200))
823 (define tc7-rtl-program 69)
824
825 (let ((word-size (asm-word-size asm))
826 (endianness (asm-endianness asm)))
827 (define (byte-length x)
828 (cond
829 ((stringbuf? x)
830 (let ((x (stringbuf-string x)))
831 (+ (* 2 word-size)
832 (case (string-bytes-per-char x)
833 ((1) (1+ (string-length x)))
834 ((4) (* (1+ (string-length x)) 4))
835 (else (error "bad string bytes per char" x))))))
836 ((static-procedure? x)
837 (* 2 word-size))
838 ((string? x)
839 (* 4 word-size))
840 ((pair? x)
841 (* 2 word-size))
842 ((vector? x)
843 (* (1+ (vector-length x)) word-size))
844 (else
845 word-size)))
846
847 (define (write-constant-reference buf pos x)
848 ;; The asm-inits will fix up any reference to a non-immediate.
849 (write-immediate asm buf pos (if (immediate? x) x #f)))
850
851 (define (write buf pos obj)
852 (cond
853 ((stringbuf? obj)
854 (let* ((x (stringbuf-string obj))
855 (len (string-length x))
856 (tag (if (= (string-bytes-per-char x) 1)
857 tc7-narrow-stringbuf
858 tc7-wide-stringbuf)))
859 (case word-size
860 ((4)
861 (bytevector-u32-set! buf pos tag endianness)
862 (bytevector-u32-set! buf (+ pos 4) len endianness))
863 ((8)
864 (bytevector-u64-set! buf pos tag endianness)
865 (bytevector-u64-set! buf (+ pos 8) len endianness))
866 (else
867 (error "bad word size" asm)))
868 (let ((pos (+ pos (* word-size 2))))
869 (case (string-bytes-per-char x)
870 ((1)
871 (let lp ((i 0))
872 (if (< i len)
873 (let ((u8 (char->integer (string-ref x i))))
874 (bytevector-u8-set! buf (+ pos i) u8)
875 (lp (1+ i)))
876 (bytevector-u8-set! buf (+ pos i) 0))))
877 ((4)
878 (let lp ((i 0))
879 (if (< i len)
880 (let ((u32 (char->integer (string-ref x i))))
881 (bytevector-u32-set! buf (+ pos (* i 4)) u32 endianness)
882 (lp (1+ i)))
883 (bytevector-u32-set! buf (+ pos (* i 4)) 0 endianness))))
884 (else (error "bad string bytes per char" x))))))
885
886 ((static-procedure? obj)
887 (case word-size
888 ((4)
889 (bytevector-u32-set! buf pos tc7-rtl-program endianness)
890 (bytevector-u32-set! buf (+ pos 4) 0 endianness))
891 ((8)
892 (bytevector-u64-set! buf pos tc7-rtl-program endianness)
893 (bytevector-u64-set! buf (+ pos 8) 0 endianness))
894 (else (error "bad word size"))))
895
896 ((cache-cell? obj)
897 (write-immediate asm buf pos #f))
898
899 ((string? obj)
900 (let ((tag (logior tc7-ro-string (ash (string-length obj) 8))))
901 (case word-size
902 ((4)
903 (bytevector-u32-set! buf pos tc7-ro-string endianness)
904 (write-immediate asm buf (+ pos 4) #f) ; stringbuf
905 (bytevector-u32-set! buf (+ pos 8) 0 endianness)
906 (bytevector-u32-set! buf (+ pos 12) (string-length obj) endianness))
907 ((8)
908 (bytevector-u64-set! buf pos tc7-ro-string endianness)
909 (write-immediate asm buf (+ pos 8) #f) ; stringbuf
910 (bytevector-u64-set! buf (+ pos 16) 0 endianness)
911 (bytevector-u64-set! buf (+ pos 24) (string-length obj) endianness))
912 (else (error "bad word size")))))
913
914 ((pair? obj)
915 (write-constant-reference buf pos (car obj))
916 (write-constant-reference buf (+ pos word-size) (cdr obj)))
917
918 ((vector? obj)
919 (let* ((len (vector-length obj))
920 (tag (logior tc7-vector (ash len 8))))
921 (case word-size
922 ((4) (bytevector-u32-set! buf pos tag endianness))
923 ((8) (bytevector-u64-set! buf pos tag endianness))
924 (else (error "bad word size")))
925 (let lp ((i 0))
926 (when (< i (vector-length obj))
927 (let ((pos (+ pos word-size (* i word-size)))
928 (elt (vector-ref obj i)))
929 (write-constant-reference buf pos elt)
930 (lp (1+ i)))))))
931
932 ((symbol? obj)
933 (write-immediate asm buf pos #f))
934
935 ((keyword? obj)
936 (write-immediate asm buf pos #f))
937
938 ((number? obj)
939 (write-immediate asm buf pos #f))
940
941 (else
942 (error "unrecognized object" obj))))
943
944 (cond
945 ((vlist-null? data) #f)
946 (else
947 (let* ((byte-len (vhash-fold (lambda (k v len)
948 (+ (byte-length k) (align len 8)))
949 0 data))
950 (buf (make-bytevector byte-len 0)))
951 (let lp ((i 0) (pos 0) (labels '()))
952 (if (< i (vlist-length data))
953 (let* ((pair (vlist-ref data i))
954 (obj (car pair))
955 (obj-label (cdr pair)))
956 (write buf pos obj)
957 (lp (1+ i)
958 (align (+ (byte-length obj) pos) 8)
959 (cons (make-linker-symbol obj-label pos) labels)))
960 (make-object asm name buf '() labels))))))))
961
962 (define (link-constants asm)
963 "Link sections to hold constants needed by the program text emitted
964 using @var{asm}.
965
966 Returns three values: an object for the .rodata section, an object for
967 the .data section, and a label for an initialization procedure. Any of
968 these may be @code{#f}."
969 (define (shareable? x)
970 (cond
971 ((stringbuf? x) #t)
972 ((pair? x)
973 (and (immediate? (car x)) (immediate? (cdr x))))
974 ((vector? x)
975 (let lp ((i 0))
976 (or (= i (vector-length x))
977 (and (immediate? (vector-ref x i))
978 (lp (1+ i))))))
979 (else #f)))
980 (let* ((constants (asm-constants asm))
981 (len (vlist-length constants)))
982 (let lp ((i 0)
983 (ro vlist-null)
984 (rw vlist-null))
985 (if (= i len)
986 (values (link-data asm ro '.rodata)
987 (link-data asm rw '.data)
988 (emit-init-constants asm))
989 (let ((pair (vlist-ref constants i)))
990 (if (shareable? (car pair))
991 (lp (1+ i) (vhash-consq (car pair) (cdr pair) ro) rw)
992 (lp (1+ i) ro (vhash-consq (car pair) (cdr pair) rw))))))))
993
994 \f
995
996 ;;;
997 ;;; Linking program text.
998 ;;;
999
1000 (define (process-relocs buf relocs labels)
1001 "Patch up internal x8-s24 relocations, and any s32 relocations that
1002 reference symbols in the text section. Return a list of linker
1003 relocations for references to symbols defined outside the text section."
1004 (fold
1005 (lambda (reloc tail)
1006 (match reloc
1007 ((type label base word)
1008 (let ((abs (assq-ref labels label))
1009 (dst (+ base word)))
1010 (case type
1011 ((s32)
1012 (if abs
1013 (let ((rel (- abs base)))
1014 (s32-set! buf dst rel)
1015 tail)
1016 (cons (make-linker-reloc 'rel32/4 (* dst 4) word label)
1017 tail)))
1018 ((x8-s24)
1019 (unless abs
1020 (error "unbound near relocation" reloc))
1021 (let ((rel (- abs base))
1022 (u32 (u32-ref buf dst)))
1023 (u32-set! buf dst (pack-u8-s24 (logand u32 #xff) rel))
1024 tail))
1025 (else (error "bad relocation kind" reloc)))))))
1026 '()
1027 relocs))
1028
1029 (define (process-labels labels)
1030 "Define linker symbols for the label-offset pairs in @var{labels}.
1031 The offsets are expected to be expressed in words."
1032 (map (lambda (pair)
1033 (make-linker-symbol (car pair) (* (cdr pair) 4)))
1034 labels))
1035
1036 (define (swap-bytes! buf)
1037 "Patch up the text buffer @var{buf}, swapping the endianness of each
1038 32-bit unit."
1039 (unless (zero? (modulo (bytevector-length buf) 4))
1040 (error "unexpected length"))
1041 (let ((byte-len (bytevector-length buf)))
1042 (let lp ((pos 0))
1043 (unless (= pos byte-len)
1044 (bytevector-u32-set!
1045 buf pos
1046 (bytevector-u32-ref buf pos (endianness big))
1047 (endianness little))
1048 (lp (+ pos 4))))))
1049
1050 (define (link-text-object asm)
1051 "Link the .rtl-text section, swapping the endianness of the bytes if
1052 needed."
1053 (let ((buf (make-u32vector (asm-pos asm))))
1054 (let lp ((pos 0) (prev (reverse (asm-prev asm))))
1055 (if (null? prev)
1056 (let ((byte-size (* (asm-idx asm) 4)))
1057 (bytevector-copy! (asm-cur asm) 0 buf pos byte-size)
1058 (unless (eq? (asm-endianness asm) (native-endianness))
1059 (swap-bytes! buf))
1060 (make-object asm '.rtl-text
1061 buf
1062 (process-relocs buf (asm-relocs asm)
1063 (asm-labels asm))
1064 (process-labels (asm-labels asm))))
1065 (let ((len (* *block-size* 4)))
1066 (bytevector-copy! (car prev) 0 buf pos len)
1067 (lp (+ pos len) (cdr prev)))))))
1068
1069
1070 \f
1071
1072 ;;;
1073 ;;; Linking other sections of the ELF file, like the dynamic segment,
1074 ;;; the symbol table, etc.
1075 ;;;
1076
1077 (define (link-dynamic-section asm text rw rw-init)
1078 "Link the dynamic section for an ELF image with RTL text, given the
1079 writable data section @var{rw} needing fixup from the procedure with
1080 label @var{rw-init}. @var{rw-init} may be false. If @var{rw} is true,
1081 it will be added to the GC roots at runtime."
1082 (define-syntax-rule (emit-dynamic-section word-size %set-uword! reloc-type)
1083 (let* ((endianness (asm-endianness asm))
1084 (bv (make-bytevector (* word-size (if rw (if rw-init 12 10) 6)) 0))
1085 (set-uword!
1086 (lambda (i uword)
1087 (%set-uword! bv (* i word-size) uword endianness)))
1088 (relocs '())
1089 (set-label!
1090 (lambda (i label)
1091 (set! relocs (cons (make-linker-reloc 'reloc-type
1092 (* i word-size) 0 label)
1093 relocs))
1094 (%set-uword! bv (* i word-size) 0 endianness))))
1095 (set-uword! 0 DT_GUILE_RTL_VERSION)
1096 (set-uword! 1 #x02020000)
1097 (set-uword! 2 DT_GUILE_ENTRY)
1098 (set-label! 3 '.rtl-text)
1099 (cond
1100 (rw
1101 ;; Add roots to GC.
1102 (set-uword! 4 DT_GUILE_GC_ROOT)
1103 (set-label! 5 '.data)
1104 (set-uword! 6 DT_GUILE_GC_ROOT_SZ)
1105 (set-uword! 7 (bytevector-length (linker-object-bv rw)))
1106 (cond
1107 (rw-init
1108 (set-uword! 8 DT_INIT) ; constants
1109 (set-label! 9 rw-init)
1110 (set-uword! 10 DT_NULL)
1111 (set-uword! 11 0))
1112 (else
1113 (set-uword! 8 DT_NULL)
1114 (set-uword! 9 0))))
1115 (else
1116 (set-uword! 4 DT_NULL)
1117 (set-uword! 5 0)))
1118 (make-object asm '.dynamic bv relocs '()
1119 #:type SHT_DYNAMIC #:flags SHF_ALLOC)))
1120 (case (asm-word-size asm)
1121 ((4) (emit-dynamic-section 4 bytevector-u32-set! abs32/1))
1122 ((8) (emit-dynamic-section 8 bytevector-u64-set! abs64/1))
1123 (else (error "bad word size" asm))))
1124
1125 (define (link-shstrtab asm)
1126 "Link the string table for the section headers."
1127 (intern-section-name! asm ".shstrtab")
1128 (make-object asm '.shstrtab
1129 (link-string-table! (asm-shstrtab asm))
1130 '() '()
1131 #:type SHT_STRTAB #:flags 0))
1132
1133 (define (link-symtab text-section asm)
1134 (let* ((endianness (asm-endianness asm))
1135 (word-size (asm-word-size asm))
1136 (size (elf-symbol-len word-size))
1137 (meta (reverse (asm-meta asm)))
1138 (n (length meta))
1139 (strtab (make-string-table))
1140 (bv (make-bytevector (* n size) 0)))
1141 (define (intern-string! name)
1142 (string-table-intern! strtab (if name (symbol->string name) "")))
1143 (for-each
1144 (lambda (meta n)
1145 (let ((name (intern-string! (meta-name meta))))
1146 (write-elf-symbol bv (* n size) endianness word-size
1147 (make-elf-symbol
1148 #:name name
1149 ;; Symbol value and size are measured in
1150 ;; bytes, not u32s.
1151 #:value (* 4 (meta-low-pc meta))
1152 #:size (* 4 (- (meta-high-pc meta)
1153 (meta-low-pc meta)))
1154 #:type STT_FUNC
1155 #:visibility STV_HIDDEN
1156 #:shndx (elf-section-index text-section)))))
1157 meta (iota n))
1158 (let ((strtab (make-object asm '.strtab
1159 (link-string-table! strtab)
1160 '() '()
1161 #:type SHT_STRTAB #:flags 0)))
1162 (values (make-object asm '.symtab
1163 bv
1164 '() '()
1165 #:type SHT_SYMTAB #:flags 0 #:entsize size
1166 #:link (elf-section-index
1167 (linker-object-section strtab)))
1168 strtab))))
1169
1170 (define (link-objects asm)
1171 (let*-values (((ro rw rw-init) (link-constants asm))
1172 ;; Link text object after constants, so that the
1173 ;; constants initializer gets included.
1174 ((text) (link-text-object asm))
1175 ((dt) (link-dynamic-section asm text rw rw-init))
1176 ((symtab strtab) (link-symtab (linker-object-section text) asm))
1177 ;; This needs to be linked last, because linking other
1178 ;; sections adds entries to the string table.
1179 ((shstrtab) (link-shstrtab asm)))
1180 (filter identity (list text ro rw dt symtab strtab shstrtab))))
1181
1182
1183 \f
1184
1185 ;;;
1186 ;;; High-level public interfaces.
1187 ;;;
1188
1189 (define* (link-assembly asm #:key (page-aligned? #t))
1190 "Produce an ELF image from the code and data emitted into @var{asm}.
1191 The result is a bytevector, by default linked so that read-only and
1192 writable data are on separate pages. Pass @code{#:page-aligned? #f} to
1193 disable this behavior."
1194 (link-elf (link-objects asm) #:page-aligned? page-aligned?))
1195
1196 (define (assemble-program instructions)
1197 "Take the sequence of instructions @var{instructions}, assemble them
1198 into RTL code, link an image, and load that image from memory. Returns
1199 a procedure."
1200 (let ((asm (make-assembler)))
1201 (emit-text asm instructions)
1202 (load-thunk-from-memory (link-assembly asm #:page-aligned? #f))))