Merge branch 'stable-2.0'
[bpt/guile.git] / libguile / loader.c
1 /* Copyright (C) 2001, 2009, 2010, 2011, 2012
2 * 2013 Free Software Foundation, Inc.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public License
6 * as published by the Free Software Foundation; either version 3 of
7 * the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17 * 02110-1301 USA
18 */
19
20 #if HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <string.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27
28 #ifdef HAVE_SYS_MMAN_H
29 #include <sys/mman.h>
30 #endif
31
32 #include <sys/stat.h>
33 #include <sys/types.h>
34 #include <assert.h>
35 #include <alignof.h>
36 #include <byteswap.h>
37
38 #include <full-read.h>
39
40 #include "_scm.h"
41 #include "elf.h"
42 #include "programs.h"
43 #include "loader.h"
44
45 /* This file contains the loader for Guile's on-disk format: ELF with
46 some custom tags in the dynamic segment. */
47
48 #if SIZEOF_SCM_T_BITS == 4
49 #define Elf_Half Elf32_Half
50 #define Elf_Word Elf32_Word
51 #define Elf_Ehdr Elf32_Ehdr
52 #define ELFCLASS ELFCLASS32
53 #define Elf_Phdr Elf32_Phdr
54 #define Elf_Dyn Elf32_Dyn
55 #elif SIZEOF_SCM_T_BITS == 8
56 #define Elf_Half Elf64_Half
57 #define Elf_Word Elf64_Word
58 #define Elf_Ehdr Elf64_Ehdr
59 #define ELFCLASS ELFCLASS64
60 #define Elf_Phdr Elf64_Phdr
61 #define Elf_Dyn Elf64_Dyn
62 #else
63 #error
64 #endif
65
66 #define DT_LOGUILE 0x37146000 /* Start of Guile-specific */
67 #define DT_GUILE_GC_ROOT 0x37146000 /* Offset of GC roots */
68 #define DT_GUILE_GC_ROOT_SZ 0x37146001 /* Size in machine words of GC
69 roots */
70 #define DT_GUILE_ENTRY 0x37146002 /* Address of entry thunk */
71 #define DT_GUILE_VM_VERSION 0x37146003 /* Bytecode version */
72 #define DT_HIGUILE 0x37146fff /* End of Guile-specific */
73
74 #ifdef WORDS_BIGENDIAN
75 #define ELFDATA ELFDATA2MSB
76 #else
77 #define ELFDATA ELFDATA2LSB
78 #endif
79
80 static void register_elf (char *data, size_t len);
81
82 enum bytecode_kind
83 {
84 BYTECODE_KIND_NONE,
85 BYTECODE_KIND_GUILE_2_2
86 };
87
88 static SCM
89 pointer_to_procedure (enum bytecode_kind bytecode_kind, char *ptr)
90 {
91 switch (bytecode_kind)
92 {
93 case BYTECODE_KIND_GUILE_2_2:
94 {
95 return scm_i_make_program ((scm_t_uint32 *) ptr);
96 }
97 case BYTECODE_KIND_NONE:
98 default:
99 abort ();
100 }
101 }
102
103 static const char*
104 check_elf_header (const Elf_Ehdr *header)
105 {
106 if (!(header->e_ident[EI_MAG0] == ELFMAG0
107 && header->e_ident[EI_MAG1] == ELFMAG1
108 && header->e_ident[EI_MAG2] == ELFMAG2
109 && header->e_ident[EI_MAG3] == ELFMAG3))
110 return "not an ELF file";
111
112 if (header->e_ident[EI_CLASS] != ELFCLASS)
113 return "ELF file does not have native word size";
114
115 if (header->e_ident[EI_DATA] != ELFDATA)
116 return "ELF file does not have native byte order";
117
118 if (header->e_ident[EI_VERSION] != EV_CURRENT)
119 return "bad ELF version";
120
121 if (header->e_ident[EI_OSABI] != ELFOSABI_STANDALONE)
122 return "unexpected OS ABI";
123
124 if (header->e_ident[EI_ABIVERSION] != 0)
125 return "unexpected ABI version";
126
127 if (header->e_type != ET_DYN)
128 return "unexpected ELF type";
129
130 if (header->e_machine != EM_NONE)
131 return "unexpected machine";
132
133 if (header->e_version != EV_CURRENT)
134 return "unexpected ELF version";
135
136 if (header->e_ehsize != sizeof *header)
137 return "unexpected header size";
138
139 if (header->e_phentsize != sizeof (Elf_Phdr))
140 return "unexpected program header size";
141
142 return NULL;
143 }
144
145 #define IS_ALIGNED(offset, alignment) \
146 (!((offset) & ((alignment) - 1)))
147 #define ALIGN(offset, alignment) \
148 ((offset + (alignment - 1)) & ~(alignment - 1))
149
150 /* Return the alignment required by the ELF at DATA, of LEN bytes. */
151 static size_t
152 elf_alignment (const char *data, size_t len)
153 {
154 Elf_Ehdr *header;
155 int i;
156 size_t alignment = 8;
157
158 if (len < sizeof(Elf_Ehdr))
159 return alignment;
160 header = (Elf_Ehdr *) data;
161 if (header->e_phoff + header->e_phnum * header->e_phentsize >= len)
162 return alignment;
163 for (i = 0; i < header->e_phnum; i++)
164 {
165 Elf_Phdr *phdr;
166 const char *phdr_addr = data + header->e_phoff + i * header->e_phentsize;
167
168 if (!IS_ALIGNED ((scm_t_uintptr) phdr_addr, alignof_type (Elf_Phdr)))
169 return alignment;
170 phdr = (Elf_Phdr *) phdr_addr;
171
172 if (phdr->p_align & (phdr->p_align - 1))
173 return alignment;
174
175 if (phdr->p_align > alignment)
176 alignment = phdr->p_align;
177 }
178
179 return alignment;
180 }
181
182 /* This function leaks the memory that it allocates. */
183 static char*
184 alloc_aligned (size_t len, unsigned alignment)
185 {
186 char *ret;
187
188 if (alignment == 8)
189 {
190 /* FIXME: Assert that we actually have an 8-byte-aligned malloc. */
191 ret = malloc (len);
192 }
193 #if defined(HAVE_SYS_MMAN_H) && defined(MMAP_ANONYMOUS)
194 else if (alignment == SCM_PAGE_SIZE)
195 {
196 ret = mmap (NULL, len, PROT_READ | PROT_WRITE, -1, 0);
197 if (ret == MAP_FAILED)
198 SCM_SYSERROR;
199 }
200 #endif
201 else
202 {
203 if (len + alignment < len)
204 abort ();
205
206 ret = malloc (len + alignment - 1);
207 if (!ret)
208 abort ();
209 ret = (char *) ALIGN ((scm_t_uintptr) ret, alignment);
210 }
211
212 return ret;
213 }
214
215 static char*
216 copy_and_align_elf_data (const char *data, size_t len)
217 {
218 size_t alignment;
219 char *copy;
220
221 alignment = elf_alignment (data, len);
222 copy = alloc_aligned (len, alignment);
223 memcpy(copy, data, len);
224
225 return copy;
226 }
227
228 #ifdef HAVE_SYS_MMAN_H
229 static int
230 segment_flags_to_prot (Elf_Word flags)
231 {
232 int prot = 0;
233
234 if (flags & PF_X)
235 prot |= PROT_EXEC;
236 if (flags & PF_W)
237 prot |= PROT_WRITE;
238 if (flags & PF_R)
239 prot |= PROT_READ;
240
241 return prot;
242 }
243 #endif
244
245 static char*
246 process_dynamic_segment (char *base, Elf_Phdr *dyn_phdr,
247 SCM *init_out, SCM *entry_out)
248 {
249 char *dyn_addr = base + dyn_phdr->p_vaddr;
250 Elf_Dyn *dyn = (Elf_Dyn *) dyn_addr;
251 size_t i, dyn_size = dyn_phdr->p_memsz / sizeof (Elf_Dyn);
252 char *init = 0, *gc_root = 0, *entry = 0;
253 scm_t_ptrdiff gc_root_size = 0;
254 enum bytecode_kind bytecode_kind = BYTECODE_KIND_NONE;
255
256 for (i = 0; i < dyn_size; i++)
257 {
258 if (dyn[i].d_tag == DT_NULL)
259 break;
260
261 switch (dyn[i].d_tag)
262 {
263 case DT_INIT:
264 if (init)
265 return "duplicate DT_INIT";
266 init = base + dyn[i].d_un.d_val;
267 break;
268 case DT_GUILE_GC_ROOT:
269 if (gc_root)
270 return "duplicate DT_GUILE_GC_ROOT";
271 gc_root = base + dyn[i].d_un.d_val;
272 break;
273 case DT_GUILE_GC_ROOT_SZ:
274 if (gc_root_size)
275 return "duplicate DT_GUILE_GC_ROOT_SZ";
276 gc_root_size = dyn[i].d_un.d_val;
277 break;
278 case DT_GUILE_ENTRY:
279 if (entry)
280 return "duplicate DT_GUILE_ENTRY";
281 entry = base + dyn[i].d_un.d_val;
282 break;
283 case DT_GUILE_VM_VERSION:
284 if (bytecode_kind != BYTECODE_KIND_NONE)
285 return "duplicate DT_GUILE_VM_VERSION";
286 {
287 scm_t_uint16 major = dyn[i].d_un.d_val >> 16;
288 scm_t_uint16 minor = dyn[i].d_un.d_val & 0xffff;
289 switch (major)
290 {
291 case 0x0202:
292 bytecode_kind = BYTECODE_KIND_GUILE_2_2;
293 /* As we get closer to 2.2, we will allow for backwards
294 compatibility and we can change this test to ">"
295 instead of "!=". However until then, to deal with VM
296 churn it's best to keep these things in
297 lock-step. */
298 if (minor != SCM_OBJCODE_MINOR_VERSION)
299 return "incompatible bytecode version";
300 break;
301 default:
302 return "incompatible bytecode kind";
303 }
304 break;
305 }
306 }
307 }
308
309 if (!entry)
310 return "missing DT_GUILE_ENTRY";
311
312 switch (bytecode_kind)
313 {
314 case BYTECODE_KIND_GUILE_2_2:
315 if ((scm_t_uintptr) init % 4)
316 return "unaligned DT_INIT";
317 if ((scm_t_uintptr) entry % 4)
318 return "unaligned DT_GUILE_ENTRY";
319 break;
320 case BYTECODE_KIND_NONE:
321 default:
322 return "missing DT_GUILE_VM_VERSION";
323 }
324
325 if (gc_root)
326 GC_add_roots (gc_root, gc_root + gc_root_size);
327
328 *init_out = init ? pointer_to_procedure (bytecode_kind, init) : SCM_BOOL_F;
329 *entry_out = pointer_to_procedure (bytecode_kind, entry);
330 return NULL;
331 }
332
333 #define ABORT(msg) do { err_msg = msg; goto cleanup; } while (0)
334
335 static SCM
336 load_thunk_from_memory (char *data, size_t len, int is_read_only)
337 #define FUNC_NAME "load-thunk-from-memory"
338 {
339 Elf_Ehdr *header;
340 Elf_Phdr *ph;
341 const char *err_msg = 0;
342 size_t n, alignment = 8;
343 int i;
344 int dynamic_segment = -1;
345 SCM init = SCM_BOOL_F, entry = SCM_BOOL_F;
346
347 if (len < sizeof *header)
348 ABORT ("object file too small");
349
350 header = (Elf_Ehdr*) data;
351
352 if ((err_msg = check_elf_header (header)))
353 goto cleanup;
354
355 if (header->e_phnum == 0)
356 ABORT ("no loadable segments");
357 n = header->e_phnum;
358
359 if (len < header->e_phoff + n * sizeof (Elf_Phdr))
360 ABORT ("object file too small");
361
362 ph = (Elf_Phdr*) (data + header->e_phoff);
363
364 /* Check that the segment table is sane. */
365 for (i = 0; i < n; i++)
366 {
367 if (ph[i].p_filesz != ph[i].p_memsz)
368 ABORT ("expected p_filesz == p_memsz");
369
370 if (!ph[i].p_flags)
371 ABORT ("expected nonzero segment flags");
372
373 if (ph[i].p_align < alignment)
374 {
375 if (ph[i].p_align % alignment)
376 ABORT ("expected new alignment to be multiple of old");
377 alignment = ph[i].p_align;
378 }
379
380 if (ph[i].p_type == PT_DYNAMIC)
381 {
382 if (dynamic_segment >= 0)
383 ABORT ("expected only one PT_DYNAMIC segment");
384 dynamic_segment = i;
385 }
386
387 if (i == 0)
388 {
389 if (ph[i].p_vaddr != 0)
390 ABORT ("first loadable vaddr is not 0");
391 }
392 else
393 {
394 if (ph[i].p_vaddr < ph[i-1].p_vaddr + ph[i-1].p_memsz)
395 ABORT ("overlapping segments");
396
397 if (ph[i].p_offset + ph[i].p_filesz > len)
398 ABORT ("segment beyond end of byte array");
399 }
400 }
401
402 if (dynamic_segment < 0)
403 ABORT ("no PT_DYNAMIC segment");
404
405 if (!IS_ALIGNED ((scm_t_uintptr) data, alignment))
406 ABORT ("incorrectly aligned base");
407
408 /* Allow writes to writable pages. */
409 if (is_read_only)
410 {
411 #ifdef HAVE_SYS_MMAN_H
412 for (i = 0; i < n; i++)
413 {
414 if (ph[i].p_flags == PF_R)
415 continue;
416 if (ph[i].p_align != 4096)
417 continue;
418
419 if (mprotect (data + ph[i].p_vaddr,
420 ph[i].p_memsz,
421 segment_flags_to_prot (ph[i].p_flags)))
422 goto cleanup;
423 }
424 #else
425 ABORT ("expected writable pages");
426 #endif
427 }
428
429 if ((err_msg = process_dynamic_segment (data, &ph[dynamic_segment],
430 &init, &entry)))
431 goto cleanup;
432
433 if (scm_is_true (init))
434 scm_call_0 (init);
435
436 register_elf (data, len);
437
438 /* Finally! Return the thunk. */
439 return entry;
440
441 cleanup:
442 {
443 if (errno)
444 SCM_SYSERROR;
445 scm_misc_error (FUNC_NAME, err_msg ? err_msg : "error loading ELF file",
446 SCM_EOL);
447 }
448 }
449 #undef FUNC_NAME
450
451 #define SCM_PAGE_SIZE 4096
452
453 static char*
454 map_file_contents (int fd, size_t len, int *is_read_only)
455 #define FUNC_NAME "load-thunk-from-file"
456 {
457 char *data;
458
459 #ifdef HAVE_SYS_MMAN_H
460 data = mmap (NULL, len, PROT_READ, MAP_PRIVATE, fd, 0);
461 if (data == MAP_FAILED)
462 SCM_SYSERROR;
463 *is_read_only = 1;
464 #else
465 if (lseek (fd, 0, SEEK_START) < 0)
466 {
467 int errno_save = errno;
468 (void) close (fd);
469 errno = errno_save;
470 SCM_SYSERROR;
471 }
472
473 /* Given that we are using the read fallback, optimistically assume
474 that the .go files were made with 8-byte alignment.
475 alignment. */
476 data = malloc (end);
477 if (!data)
478 {
479 (void) close (fd);
480 scm_misc_error (FUNC_NAME, "failed to allocate ~A bytes",
481 scm_list_1 (scm_from_size_t (end)));
482 }
483
484 if (full_read (fd, data, end) != end)
485 {
486 int errno_save = errno;
487 (void) close (fd);
488 errno = errno_save;
489 if (errno)
490 SCM_SYSERROR;
491 scm_misc_error (FUNC_NAME, "short read while loading objcode",
492 SCM_EOL);
493 }
494
495 /* If our optimism failed, fall back. */
496 {
497 unsigned alignment = sniff_elf_alignment (data, end);
498
499 if (alignment != 8)
500 {
501 char *copy = copy_and_align_elf_data (data, end, alignment);
502 free (data);
503 data = copy;
504 }
505 }
506
507 *is_read_only = 0;
508 #endif
509
510 return data;
511 }
512 #undef FUNC_NAME
513
514 SCM_DEFINE (scm_load_thunk_from_file, "load-thunk-from-file", 1, 0, 0,
515 (SCM filename),
516 "")
517 #define FUNC_NAME s_scm_load_thunk_from_file
518 {
519 char *c_filename;
520 int fd, is_read_only;
521 off_t end;
522 char *data;
523
524 SCM_VALIDATE_STRING (1, filename);
525
526 c_filename = scm_to_locale_string (filename);
527 fd = open (c_filename, O_RDONLY | O_BINARY | O_CLOEXEC);
528 free (c_filename);
529 if (fd < 0) SCM_SYSERROR;
530
531 end = lseek (fd, 0, SEEK_END);
532 if (end < 0)
533 SCM_SYSERROR;
534
535 data = map_file_contents (fd, end, &is_read_only);
536
537 (void) close (fd);
538
539 return load_thunk_from_memory (data, end, is_read_only);
540 }
541 #undef FUNC_NAME
542
543 SCM_DEFINE (scm_load_thunk_from_memory, "load-thunk-from-memory", 1, 0, 0,
544 (SCM bv),
545 "")
546 #define FUNC_NAME s_scm_load_thunk_from_memory
547 {
548 char *data;
549 size_t len;
550
551 SCM_VALIDATE_BYTEVECTOR (1, bv);
552
553 data = (char *) SCM_BYTEVECTOR_CONTENTS (bv);
554 len = SCM_BYTEVECTOR_LENGTH (bv);
555
556 /* Copy data in order to align it, to trace its GC roots and
557 writable sections, and to keep it in memory. */
558
559 data = copy_and_align_elf_data (data, len);
560
561 return load_thunk_from_memory (data, len, 0);
562 }
563 #undef FUNC_NAME
564
565 \f
566
567 struct mapped_elf_image
568 {
569 char *start;
570 char *end;
571 };
572
573 static struct mapped_elf_image *mapped_elf_images = NULL;
574 static size_t mapped_elf_images_count = 0;
575 static size_t mapped_elf_images_allocated = 0;
576
577 static size_t
578 find_mapped_elf_insertion_index (char *ptr)
579 {
580 /* "mapped_elf_images_count" must never be dereferenced. */
581 size_t start = 0, end = mapped_elf_images_count;
582
583 while (start < end)
584 {
585 size_t n = start + (end - start) / 2;
586
587 if (ptr < mapped_elf_images[n].end)
588 end = n;
589 else
590 start = n + 1;
591 }
592
593 return start;
594 }
595
596 static void
597 register_elf (char *data, size_t len)
598 {
599 scm_i_pthread_mutex_lock (&scm_i_misc_mutex);
600 {
601 /* My kingdom for a generic growable sorted vector library. */
602 if (mapped_elf_images_count == mapped_elf_images_allocated)
603 {
604 struct mapped_elf_image *prev;
605 size_t n;
606
607 if (mapped_elf_images_allocated)
608 mapped_elf_images_allocated *= 2;
609 else
610 mapped_elf_images_allocated = 16;
611
612 prev = mapped_elf_images;
613 mapped_elf_images =
614 scm_gc_malloc_pointerless (sizeof (*mapped_elf_images)
615 * mapped_elf_images_allocated,
616 "mapped elf images");
617
618 for (n = 0; n < mapped_elf_images_count; n++)
619 {
620 mapped_elf_images[n].start = prev[n].start;
621 mapped_elf_images[n].end = prev[n].end;
622 }
623 }
624
625 {
626 size_t end;
627 size_t n = find_mapped_elf_insertion_index (data);
628
629 for (end = mapped_elf_images_count; n < end; end--)
630 {
631 mapped_elf_images[end].start = mapped_elf_images[end - 1].start;
632 mapped_elf_images[end].end = mapped_elf_images[end - 1].end;
633 }
634 mapped_elf_images_count++;
635
636 mapped_elf_images[n].start = data;
637 mapped_elf_images[n].end = data + len;
638 }
639 }
640 scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
641 }
642
643 static SCM
644 scm_find_mapped_elf_image (SCM ip)
645 {
646 char *ptr = (char *) scm_to_uintptr_t (ip);
647 SCM result;
648
649 scm_i_pthread_mutex_lock (&scm_i_misc_mutex);
650 {
651 size_t n = find_mapped_elf_insertion_index ((char *) ptr);
652 if (n < mapped_elf_images_count
653 && mapped_elf_images[n].start <= ptr
654 && ptr < mapped_elf_images[n].end)
655 {
656 signed char *data = (signed char *) mapped_elf_images[n].start;
657 size_t len = mapped_elf_images[n].end - mapped_elf_images[n].start;
658 result = scm_c_take_gc_bytevector (data, len, SCM_BOOL_F);
659 }
660 else
661 result = SCM_BOOL_F;
662 }
663 scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
664
665 return result;
666 }
667
668 static SCM
669 scm_all_mapped_elf_images (void)
670 {
671 SCM result = SCM_EOL;
672
673 scm_i_pthread_mutex_lock (&scm_i_misc_mutex);
674 {
675 size_t n;
676 for (n = 0; n < mapped_elf_images_count; n++)
677 {
678 signed char *data = (signed char *) mapped_elf_images[n].start;
679 size_t len = mapped_elf_images[n].end - mapped_elf_images[n].start;
680 result = scm_cons (scm_c_take_gc_bytevector (data, len, SCM_BOOL_F),
681 result);
682 }
683 }
684 scm_i_pthread_mutex_unlock (&scm_i_misc_mutex);
685
686 return result;
687 }
688
689 \f
690 void
691 scm_bootstrap_loader (void)
692 {
693 scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION,
694 "scm_init_loader",
695 (scm_t_extension_init_func)scm_init_loader, NULL);
696 }
697
698 void
699 scm_init_loader (void)
700 {
701 #ifndef SCM_MAGIC_SNARFER
702 #include "libguile/loader.x"
703 #endif
704
705 scm_c_define_gsubr ("find-mapped-elf-image", 1, 0, 0,
706 (scm_t_subr) scm_find_mapped_elf_image);
707 scm_c_define_gsubr ("all-mapped-elf-images", 0, 0, 0,
708 (scm_t_subr) scm_all_mapped_elf_images);
709 }
710
711 /*
712 Local Variables:
713 c-file-style: "gnu"
714 End:
715 */