3 ;; Copyright (C) 2011, 2012, 2013 Free Software Foundation, Inc.
5 ;;;; This library is free software; you can redistribute it and/or
6 ;;;; modify it under the terms of the GNU Lesser General Public
7 ;;;; License as published by the Free Software Foundation; either
8 ;;;; version 3 of the License, or (at your option) any later version.
10 ;;;; This library is distributed in the hope that it will be useful,
11 ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;;;; Lesser General Public License for more details.
15 ;;;; You should have received a copy of the GNU Lesser General Public
16 ;;;; License along with this library; if not, write to the Free Software
17 ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;;; A linker combines several linker objects into an executable or a
24 ;;; There are several common formats for libraries out there. Since
25 ;;; Guile includes its own linker and loader, we are free to choose any
26 ;;; format, or make up our own.
28 ;;; There are essentially two requirements for a linker format:
29 ;;; libraries should be able to be loaded with the minimal amount of
30 ;;; work; and they should support introspection in some way, in order to
31 ;;; enable good debugging.
33 ;;; These requirements are somewhat at odds, as loading should not have
34 ;;; to stumble over features related to introspection. It so happens
35 ;;; that a lot of smart people have thought about this situation, and
36 ;;; the ELF format embodies the outcome of their thinking. Guile uses
37 ;;; ELF as its format, regardless of the platform's native library
38 ;;; format. It's not inconceivable that Guile could interoperate with
39 ;;; the native dynamic loader at some point, but it's not a near-term
42 ;;; Guile's linker takes a list of objects, sorts them according to
43 ;;; similarity from the perspective of the loader, then writes them out
44 ;;; into one big bytevector in ELF format.
46 ;;; It is often the case that different parts of a library need to refer
47 ;;; to each other. For example, program text may need to refer to a
48 ;;; constant from writable memory. When the linker places sections
49 ;;; (linker objects) into specific locations in the linked bytevector,
50 ;;; it needs to fix up those references. This process is called
51 ;;; /relocation/. References needing relocations are recorded in
52 ;;; "linker-reloc" objects, and collected in a list in each
53 ;;; "linker-object". The actual definitions of the references are
54 ;;; stored in "linker-symbol" objects, also collected in a list in each
57 ;;; By default, the ELF files created by the linker include some padding
58 ;;; so that different parts of the file can be loaded in with different
59 ;;; permissions. For example, some parts of the file are read-only and
60 ;;; thus can be shared between processes. Some parts of the file don't
61 ;;; need to be loaded at all. However this padding can be too much for
62 ;;; interactive compilation, when the code is never written out to disk;
63 ;;; in that case, pass #:page-aligned? #f to `link-elf'.
67 (define-module (system vm linker)
68 #:use-module (rnrs bytevectors)
69 #:use-module (system foreign)
70 #:use-module (system base target)
71 #:use-module ((srfi srfi-1) #:select (append-map))
72 #:use-module (srfi srfi-9)
73 #:use-module (ice-9 receive)
74 #:use-module (ice-9 vlist)
75 #:use-module (ice-9 match)
76 #:use-module (system vm elf)
77 #:export (make-linker-reloc
85 (linker-object-symbols* . linker-object-symbols)
93 (define-syntax fold-values
96 ((_ proc list seed ...)
97 (with-syntax (((s ...) (generate-temporaries #'(seed ...))))
99 (let lp ((l list) (s seed) ...)
103 (call-with-values (lambda () (p elt s ...))
104 (lambda (s ...) (lp l s ...))))))))))))
106 ;; A relocation records a reference to a symbol. When the symbol is
107 ;; resolved to an address, the reloc location will be updated to point
110 ;; Two types. Abs32/1 and Abs64/1 are absolute offsets in bytes.
111 ;; Rel32/4 is a relative signed offset in 32-bit units. Either can have
112 ;; an arbitrary addend as well.
114 (define-record-type <linker-reloc>
115 (make-linker-reloc type loc addend symbol)
117 (type linker-reloc-type) ;; rel32/4, abs32/1, abs64/1
118 (loc linker-reloc-loc)
119 (addend linker-reloc-addend)
120 (symbol linker-reloc-symbol))
122 ;; A symbol is an association between a name and an address. The
123 ;; address is always in regard to some particular address space. When
124 ;; objects come into the linker, their symbols live in the object
125 ;; address space. When the objects are allocated into ELF segments, the
126 ;; symbols will be relocated into memory address space, corresponding to
127 ;; the position the ELF will be loaded at.
129 (define-record-type <linker-symbol>
130 (make-linker-symbol name address)
132 (name linker-symbol-name)
133 (address linker-symbol-address))
135 (define-record-type <linker-object>
136 (%make-linker-object section bv relocs symbols)
138 (section linker-object-section)
139 (bv linker-object-bv)
140 (relocs linker-object-relocs)
141 (symbols linker-object-symbols))
143 (define (make-linker-object section bv relocs symbols)
144 "Create a linker object with the @code{<elf-section>} header
145 @var{section}, bytevector contents @var{bv}, list of linker relocations
146 @var{relocs}, and list of linker symbols @var{symbols}."
147 (%make-linker-object section bv relocs
148 ;; Hide a symbol to the beginning of the section
150 (cons (make-linker-symbol (gensym "*section*") 0)
152 (define (linker-object-section-symbol object)
153 "Return the linker symbol corresponding to the start of this section."
154 (car (linker-object-symbols object)))
155 (define (linker-object-symbols* object)
156 "Return the linker symbols defined by the user for this this section."
157 (cdr (linker-object-symbols object)))
159 (define-record-type <string-table>
160 (%make-string-table strings linked?)
162 (strings string-table-strings set-string-table-strings!)
163 (linked? string-table-linked? set-string-table-linked?!))
165 (define (make-string-table)
166 "Return a string table with one entry: the empty string."
167 (%make-string-table '(("" 0 #vu8())) #f))
169 (define (string-table-length strings)
170 "Return the number of bytes needed for the @var{strings}."
172 (((str pos bytes) . _)
173 ;; The + 1 is for the trailing NUL byte.
174 (+ pos (bytevector-length bytes) 1))))
176 (define (string-table-intern! table str)
177 "Ensure that @var{str} is present in the string table @var{table}.
178 Returns the byte index of the string in that table."
180 (($ <string-table> strings linked?)
181 (match (assoc str strings)
184 (let ((next (string-table-length strings)))
186 (error "string table already linked, can't intern" table str))
187 (set-string-table-strings! table
188 (cons (list str next (string->utf8 str))
192 (define (link-string-table! table)
193 "Link the functional string table @var{table} into a sequence of
194 bytes, suitable for use as the contents of an ELF string table section."
196 (($ <string-table> strings #f)
197 (let ((out (make-bytevector (string-table-length strings) 0)))
201 (bytevector-copy! bytes 0 out pos (bytevector-length bytes))))
203 (set-string-table-linked?! table #t)
206 (define (segment-kind section)
207 "Return the type of segment needed to store @var{section}, as a pair.
208 The car is the @code{PT_} segment type, or @code{#f} if the section
209 doesn't need to be present in a loadable segment. The cdr is a bitfield
210 of associated @code{PF_} permissions."
211 (let ((flags (elf-section-flags section)))
213 ((= (elf-section-type section) SHT_DYNAMIC) PT_DYNAMIC)
214 ;; Sections without SHF_ALLOC don't go in segments.
217 (logior (if (zero? (logand SHF_ALLOC flags))
220 (if (zero? (logand SHF_EXECINSTR flags))
223 (if (zero? (logand SHF_WRITE flags))
227 (define (count-segments objects)
228 "Return the total number of segments needed to represent the linker
229 objects in @var{objects}, including the segment needed for the ELF
230 header and segment table."
232 (fold-values (lambda (object kinds)
233 (let ((kind (segment-kind (linker-object-section object))))
234 (if (and (car kind) (not (member kind kinds)))
238 ;; We know there will be at least one segment,
239 ;; containing at least the header and segment table.
240 (list (cons PT_LOAD PF_R)))))
242 (define (group-by-cars ls)
243 (let lp ((ls ls) (k #f) (group #f) (out '()))
248 (cons (cons k (reverse! group)) out)
251 (if (and group (equal? k k*))
252 (lp ls k (cons v group) out)
255 (cons (cons k (reverse! group)) out)
258 (define (collate-objects-into-segments objects)
259 "Given the list of linker objects @var{objects}, group them into
260 contiguous ELF segments of the same type and flags. The result is an
261 alist that maps segment types to lists of linker objects. See
262 @code{segment-type} for a description of segment types. Within a
263 segment, the order of the linker objects is preserved."
267 (cons (segment-kind (linker-object-section o)) o))
270 (let* ((x-kind (car x)) (y-kind (car y))
271 (x-type (car x-kind)) (y-type (car y-kind))
272 (x-flags (cdr x-kind)) (y-flags (cdr y-kind))
273 (x-section (linker-object-section (cdr x)))
274 (y-section (linker-object-section (cdr y))))
276 ((not (equal? x-kind y-kind))
280 ((not (equal? x-flags y-flags))
286 ((not (equal? (elf-section-type x-section)
287 (elf-section-type y-section)))
289 ((equal? (elf-section-type x-section) SHT_NOBITS) #t)
290 ((equal? (elf-section-type y-section) SHT_NOBITS) #f)
291 (else (< (elf-section-type x-section)
292 (elf-section-type y-section)))))
294 ;; Leave them in the initial order. This allows us to ensure
295 ;; that the ELF header is written first.
298 (define (align address alignment)
299 (if (zero? alignment)
302 (modulo (- alignment (modulo address alignment)) alignment))))
304 (define (relocate-section-header sec offset)
305 "Return a new section header, just like @var{sec} but with its
306 @code{offset} (and @code{addr} if it is loadable) set to @var{offset}."
307 (make-elf-section #:index (elf-section-index sec)
308 #:name (elf-section-name sec)
309 #:type (elf-section-type sec)
310 #:flags (elf-section-flags sec)
311 #:addr (if (zero? (logand SHF_ALLOC
312 (elf-section-flags sec)))
316 #:size (elf-section-size sec)
317 #:link (elf-section-link sec)
318 #:info (elf-section-info sec)
319 #:addralign (elf-section-addralign sec)
320 #:entsize (elf-section-entsize sec)))
322 (define *page-size* 4096)
324 (define (add-symbols symbols offset symtab)
325 "Add @var{symbols} to the symbol table @var{symtab}, relocating them
326 from object address space to memory address space. Returns a new symbol
329 (lambda (symbol symtab)
330 (let ((name (linker-symbol-name symbol))
331 (addr (linker-symbol-address symbol)))
332 (when (vhash-assq name symtab)
333 (error "duplicate symbol" name))
334 (vhash-consq name (make-linker-symbol name (+ addr offset)) symtab)))
338 (define (allocate-segment write-segment-header!
339 phidx type flags objects addr symtab alignment)
340 "Given a list of linker objects that should go in a segment, the type
341 and flags that the segment should have, and the address at which the
342 segment should start, compute the positions that each object should have
345 Returns three values: the address of the next byte after the segment, a
346 list of relocated objects, and the symbol table. The symbol table is
347 the same as @var{symtab}, augmented with the symbols defined in
348 @var{objects}, relocated to their positions in the image.
350 In what is something of a quirky interface, this routine also patches up
351 the segment table using @code{write-segment-header!}."
352 (let* ((alignment (fold-values (lambda (o alignment)
353 (lcm (elf-section-addralign
354 (linker-object-section o))
358 (addr (align addr alignment)))
359 (receive (objects endaddr symtab)
361 (lambda (o out addr symtab)
362 (let* ((section (linker-object-section o))
363 (addr (align addr (elf-section-addralign section))))
365 (cons (make-linker-object
366 (relocate-section-header section addr)
368 (linker-object-relocs o)
369 (linker-object-symbols o))
371 (+ addr (elf-section-size section))
372 (add-symbols (linker-object-symbols o) addr symtab))))
376 (write-segment-header!
377 (make-elf-segment #:index phidx #:type type
378 #:offset addr #:vaddr addr
379 #:filesz (- endaddr addr) #:memsz (- endaddr addr)
380 #:flags flags #:align alignment)))
385 (define (process-reloc reloc bv section-offset symtab endianness)
386 "Process a relocation. Given that a section containing @var{reloc}
387 was just written into the image @var{bv} at offset @var{section-offset},
388 fix it up so that its reference points to the correct position of its
389 symbol, as present in @var{symtab}."
390 (match (vhash-assq (linker-reloc-symbol reloc) symtab)
392 (error "Undefined symbol" (linker-reloc-symbol reloc)))
394 ;; The reloc was written at LOC bytes after SECTION-OFFSET.
395 (let* ((offset (+ (linker-reloc-loc reloc) section-offset))
396 (target (linker-symbol-address symbol)))
397 (case (linker-reloc-type reloc)
399 (let ((diff (- target offset)))
400 (unless (zero? (modulo diff 4))
401 (error "Bad offset" reloc symbol offset))
402 (bytevector-s32-set! bv offset
403 (+ (/ diff 4) (linker-reloc-addend reloc))
406 (bytevector-u32-set! bv offset target endianness))
408 (bytevector-u64-set! bv offset target endianness))
410 (error "bad reloc type" reloc)))))))
412 (define (write-linker-object bv o symtab endianness)
413 "Write the bytevector for the section wrapped by the linker object
414 @var{o} into the image @var{bv}. The section header in @var{o} should
415 already be relocated its final position in the image. Any relocations
416 in the section will be processed to point to the correct symbol
417 locations, as given in @var{symtab}."
418 (let* ((section (linker-object-section o))
419 (offset (elf-section-offset section))
420 (len (elf-section-size section))
421 (bytes (linker-object-bv o))
422 (relocs (linker-object-relocs o)))
423 (if (zero? (logand SHF_ALLOC (elf-section-flags section)))
424 (unless (zero? (elf-section-addr section))
425 (error "non-loadable section has non-zero addr" section))
426 (unless (= offset (elf-section-addr section))
427 (error "loadable section has offset != addr" section)))
428 (if (not (= (elf-section-type section) SHT_NOBITS))
430 (if (not (= len (bytevector-length bytes)))
431 (error "unexpected length" section bytes))
432 (bytevector-copy! bytes 0 bv offset len)
433 (for-each (lambda (reloc)
434 (process-reloc reloc bv offset symtab endianness))
437 (define (find-shstrndx objects)
438 "Find the section name string table in @var{objects}, and return its
440 (or-map (lambda (object)
441 (let* ((section (linker-object-section object))
442 (bv (linker-object-bv object))
443 (name (elf-section-name section)))
444 (and (= (elf-section-type section) SHT_STRTAB)
445 (equal? (false-if-exception (string-table-ref bv name))
447 (elf-section-index section))))
450 (define (add-elf-objects objects endianness word-size)
451 "Given the list of linker objects supplied by the user, add linker
452 objects corresponding to parts of the ELF file: the null object, the ELF
453 header, and the section table.
455 Both of these internal objects include relocs, allowing their
456 inter-object references to be patched up when the final image allocation
457 is known. There is special support for patching up the segment table,
458 however. Because the segment table needs to know the segment sizes,
459 which is the difference between two symbols in image space, and there is
460 no reloc kind that is the difference between two symbols, we make a hack
461 and return a closure that patches up segment table entries. It seems to
464 Returns two values: the procedure to patch the segment table, and the
465 list of objects, augmented with objects for the special ELF sections."
466 (define phoff (elf-header-len word-size))
467 (define phentsize (elf-program-header-len word-size))
468 (define shentsize (elf-section-header-len word-size))
469 (define shnum (+ (length objects) 3))
474 (else (error "bad word size" word-size))))
476 ;; ELF requires that the first entry in the section table be of type
479 (define (make-null-section)
480 (make-linker-object (make-elf-section #:index 0 #:type SHT_NULL
481 #:flags 0 #:addralign 0)
484 ;; The ELF header and the segment table.
486 (define (make-header phnum index shoff-label)
487 (let* ((header (make-elf #:byte-order endianness #:word-size word-size
488 #:phoff phoff #:phnum phnum #:phentsize phentsize
489 #:shoff 0 #:shnum shnum #:shentsize shentsize
490 #:shstrndx (or (find-shstrndx objects) SHN_UNDEF)))
491 (shoff-reloc (make-linker-reloc reloc-kind
492 (elf-header-shoff-offset word-size)
495 (size (+ phoff (* phnum phentsize)))
496 (bv (make-bytevector size 0)))
497 (write-elf-header bv header)
498 ;; Leave the segment table uninitialized; it will be filled in
499 ;; later by calls to the write-segment-header! closure.
500 (make-linker-object (make-elf-section #:index index #:type SHT_PROGBITS
501 #:flags SHF_ALLOC #:size size)
506 ;; The section table.
508 (define (make-footer objects shoff-label)
509 (let* ((size (* shentsize shnum))
510 (bv (make-bytevector size 0))
511 (section-table (make-elf-section #:index (length objects)
515 (define (write-and-reloc section-label section relocs)
516 (let ((offset (* shentsize (elf-section-index section))))
517 (write-elf-section-header bv offset endianness word-size section)
518 (if (= (elf-section-type section) SHT_NULL)
521 (cons (make-linker-reloc
524 (elf-section-header-offset-offset word-size))
528 (if (zero? (logand SHF_ALLOC (elf-section-flags section)))
530 (cons (make-linker-reloc
533 (elf-section-header-addr-offset word-size))
537 (let ((relocs (fold-values
538 (lambda (object relocs)
541 (linker-object-section-symbol object))
542 (linker-object-section object)
545 (write-and-reloc shoff-label section-table '()))))
546 (%make-linker-object section-table bv relocs
547 (list (make-linker-symbol shoff-label 0))))))
549 (let* ((null-section (make-null-section))
550 (objects (cons null-section objects))
552 (shoff (gensym "*section-table*"))
553 (header (make-header (count-segments objects) (length objects) shoff))
554 (objects (cons header objects))
556 (footer (make-footer objects shoff))
557 (objects (cons footer objects)))
559 ;; The header includes the segment table, which needs offsets and
560 ;; sizes of the segments. Normally we would use relocs to rewrite
561 ;; these values, but there is no reloc type that would allow us to
562 ;; compute size. Such a reloc would need to take the difference
563 ;; between two symbols, and it's probably a bad idea architecturally
566 ;; So instead we return a closure to patch up the segment table.
567 ;; Normally we'd shy away from such destructive interfaces, but it's
568 ;; OK as we create the header section ourselves.
570 (define (write-segment-header! segment)
571 (let ((bv (linker-object-bv header))
572 (offset (+ phoff (* (elf-segment-index segment) phentsize))))
573 (write-elf-program-header bv offset endianness word-size segment)))
575 (values write-segment-header! objects)))
577 (define (allocate-elf objects page-aligned? endianness word-size)
578 "Lay out @var{objects} into an ELF image, computing the size of the
579 file, the positions of the objects, and the global symbol table.
581 If @var{page-aligned?} is true, read-only and writable data are
582 separated so that only those writable parts of the image need be mapped
583 with writable permissions. This makes the resulting image larger. It
584 is more suitable to situations where you would write a file out to disk
585 and read it in with mmap. Otherwise if @var{page-aligned?} is false,
586 sections default to 8-byte alignment.
588 Returns three values: the total image size, a list of objects with
589 relocated headers, and the global symbol table."
590 (receive (write-segment-header! objects)
591 (add-elf-objects objects endianness word-size)
592 (let lp ((seglists (collate-objects-into-segments objects))
599 ((((type . flags) objs-in ...) seglists ...)
600 (receive (addr objs-out symtab)
602 write-segment-header!
603 phidx type flags objs-in addr symtab
604 (if (and page-aligned?
605 (not (= flags prev-flags))
606 ;; Allow sections that are not in
607 ;; loadable segments to share pages
608 ;; with PF_R segments.
609 (not (and (not type) (= PF_R prev-flags))))
613 (fold-values cons objs-out objects)
614 (if type (1+ phidx) phidx)
623 (define (check-section-numbers objects)
624 "Verify that taken as a whole, that all objects have distinct,
625 contiguous section numbers, starting from 1. (Section 0 is the null
627 (let* ((nsections (1+ (length objects))) ; 1+ for initial NULL section.
628 (sections (make-vector nsections #f)))
629 (for-each (lambda (object)
630 (let ((n (elf-section-index (linker-object-section object))))
633 (error "Invalid section number" object))
635 (error "Invalid section number" object))
636 ((vector-ref sections n)
637 (error "Duplicate section" (vector-ref sections n) object))
639 (vector-set! sections n object)))))
642 ;; Given a list of linker objects, collate the objects into segments,
643 ;; allocate the segments, allocate the ELF bytevector, and write the
644 ;; segments into the bytevector, relocating as we go.
646 (define* (link-elf objects #:key
648 (endianness (target-endianness))
649 (word-size (target-word-size)))
650 "Create an ELF image from the linker objects, @var{objects}.
652 If @var{page-aligned?} is true, read-only and writable data are
653 separated so that only those writable parts of the image need be mapped
654 with writable permissions. This is suitable for situations where you
655 would write a file out to disk and read it in with @code{mmap}.
656 Otherwise if @var{page-aligned?} is false, sections default to 8-byte
659 Returns a bytevector."
660 (check-section-numbers objects)
661 (receive (size objects symtab)
662 (allocate-elf objects page-aligned? endianness word-size)
663 (let ((bv (make-bytevector size 0)))
666 (write-linker-object bv object symtab endianness))