Optimize 'string-hash'.
[bpt/guile.git] / libguile / objcodes.c
CommitLineData
5a4a4454 1/* Copyright (C) 2001, 2009, 2010, 2011, 2013 Free Software Foundation, Inc.
8f5cfc81 2 *
560b9c25 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
8f5cfc81 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
560b9c25
AW
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
8f5cfc81 12 *
560b9c25
AW
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
560b9c25 17 */
8f5cfc81 18
13c47753
AW
19#if HAVE_CONFIG_H
20# include <config.h>
21#endif
22
8f5cfc81
KN
23#include <string.h>
24#include <fcntl.h>
25#include <unistd.h>
13a78b0f
AW
26
27#ifdef HAVE_SYS_MMAN_H
8f5cfc81 28#include <sys/mman.h>
13a78b0f
AW
29#endif
30
8f5cfc81
KN
31#include <sys/stat.h>
32#include <sys/types.h>
054599f1 33#include <assert.h>
1119e493 34#include <alignof.h>
de2c0a10 35#include <byteswap.h>
8f5cfc81 36
13a78b0f
AW
37#include <full-read.h>
38
560b9c25 39#include "_scm.h"
8f5cfc81
KN
40#include "programs.h"
41#include "objcodes.h"
42
4b02bf47
AW
43/* SCM_OBJCODE_COOKIE, defined in _scm.h, is a magic value prepended
44 to objcode on disk but not in memory.
45
46 The length of the header must be a multiple of 8 bytes. */
86cfb42d 47verify (((sizeof (SCM_OBJCODE_COOKIE) - 1) & 7) == 0);
8f5cfc81 48
de2c0a10
LC
49/* Endianness and word size of the compilation target. */
50static SCM target_endianness_var = SCM_BOOL_F;
51static SCM target_word_size_var = SCM_BOOL_F;
52
8f5cfc81
KN
53\f
54/*
55 * Objcode type
56 */
57
de2c0a10
LC
58/* Endianness of the build machine. */
59#ifdef WORDS_BIGENDIAN
60# define NATIVE_ENDIANNESS 'B'
61#else
62# define NATIVE_ENDIANNESS 'L'
63#endif
64
65/* Return the endianness of the compilation target. */
66static char
67target_endianness (void)
68{
69 if (scm_is_true (target_endianness_var))
70 return scm_is_eq (scm_call_0 (scm_variable_ref (target_endianness_var)),
71 scm_endianness_big) ? 'B' : 'L';
72 else
73 return NATIVE_ENDIANNESS;
74}
75
76/* Return the word size in bytes of the compilation target. */
77static size_t
78target_word_size (void)
79{
80 if (scm_is_true (target_word_size_var))
81 return scm_to_size_t (scm_call_0
82 (scm_variable_ref (target_word_size_var)));
83 else
84 return sizeof (void *);
85}
86
87/* Convert X, which is in byte order ENDIANNESS, to its native
88 representation. */
89static inline uint32_t
90to_native_order (uint32_t x, char endianness)
91{
92 if (endianness == NATIVE_ENDIANNESS)
93 return x;
94 else
95 return bswap_32 (x);
96}
97
13a78b0f
AW
98static void
99verify_cookie (char *cookie, struct stat *st, int map_fd, void *map_addr)
100#define FUNC_NAME "make_objcode_from_file"
101{
102 /* The cookie ends with a version of the form M.N, where M is the
103 major version and N is the minor version. For this Guile to be
104 able to load an objcode, M must be SCM_OBJCODE_MAJOR_VERSION, and N
105 must be less than or equal to SCM_OBJCODE_MINOR_VERSION. Since N
106 is the last character, we do a strict comparison on all but the
107 last, then a <= on the last one. */
108 if (memcmp (cookie, SCM_OBJCODE_COOKIE, strlen (SCM_OBJCODE_COOKIE) - 1))
109 {
110 SCM args = scm_list_1 (scm_from_latin1_stringn
111 (cookie, strlen (SCM_OBJCODE_COOKIE)));
112 if (map_fd >= 0)
113 {
114 (void) close (map_fd);
115#ifdef HAVE_SYS_MMAN_H
116 (void) munmap (map_addr, st->st_size);
117#endif
118 }
119 scm_misc_error (FUNC_NAME, "bad header on object file: ~s", args);
120 }
121
122 {
123 char minor_version = cookie[strlen (SCM_OBJCODE_COOKIE) - 1];
124
125 if (minor_version > SCM_OBJCODE_MINOR_VERSION_STRING[0])
126 {
127 if (map_fd >= 0)
128 {
129 (void) close (map_fd);
130#ifdef HAVE_SYS_MMAN_H
131 (void) munmap (map_addr, st->st_size);
132#endif
133 }
134
135 scm_misc_error (FUNC_NAME, "objcode minor version too new (~a > ~a)",
136 scm_list_2 (scm_from_latin1_stringn (&minor_version, 1),
137 scm_from_latin1_string
138 (SCM_OBJCODE_MINOR_VERSION_STRING)));
139 }
140 }
141}
142#undef FUNC_NAME
143
4b02bf47 144/* The words in an objcode SCM object are as follows:
6ce3666f 145 - scm_tc7_objcode | type | flags
4b02bf47 146 - the struct scm_objcode C object
6ce3666f 147 - the parent of this objcode: either another objcode, a bytevector,
213544e0 148 or, in the case of mmap types, #f
6ce3666f 149 - "native code" -- not currently used.
4b02bf47
AW
150 */
151
8f5cfc81 152static SCM
13a78b0f
AW
153make_objcode_from_file (int fd)
154#define FUNC_NAME "make_objcode_from_file"
8f5cfc81
KN
155{
156 int ret;
13a78b0f
AW
157 /* The SCM_OBJCODE_COOKIE is a string literal, and thus has an extra
158 trailing NUL, hence the - 1. */
159 char cookie[sizeof (SCM_OBJCODE_COOKIE) - 1];
8f5cfc81 160 struct stat st;
8f5cfc81
KN
161
162 ret = fstat (fd, &st);
0b5f0e49 163 if (ret < 0)
62082959 164 SCM_SYSERROR;
8f5cfc81 165
13a78b0f 166 if (st.st_size <= sizeof (struct scm_objcode) + sizeof cookie)
0b5f0e49 167 scm_misc_error (FUNC_NAME, "object file too small (~a bytes)",
da8b4747 168 scm_list_1 (SCM_I_MAKINUM (st.st_size)));
0b5f0e49 169
13a78b0f 170#ifdef HAVE_SYS_MMAN_H
e8ab529d 171 {
13a78b0f
AW
172 char *addr;
173 struct scm_objcode *data;
174
175 addr = mmap (0, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
176
177 if (addr == MAP_FAILED)
178 {
179 int errno_save = errno;
180 (void) close (fd);
181 errno = errno_save;
182 SCM_SYSERROR;
183 }
184 else
185 {
186 memcpy (cookie, addr, sizeof cookie);
187 data = (struct scm_objcode *) (addr + sizeof cookie);
188 }
189
190 verify_cookie (cookie, &st, fd, addr);
191
192
193 if (data->len + data->metalen
194 != (st.st_size - sizeof (*data) - sizeof cookie))
195 {
196 size_t total_len = sizeof (*data) + data->len + data->metalen;
197
198 (void) close (fd);
199 (void) munmap (addr, st.st_size);
200
201 scm_misc_error (FUNC_NAME, "bad length header (~a, ~a)",
202 scm_list_2 (scm_from_size_t (st.st_size),
203 scm_from_size_t (total_len)));
204 }
205
213544e0 206 (void) close (fd);
13a78b0f
AW
207 return scm_permanent_object
208 (scm_double_cell (SCM_MAKE_OBJCODE_TAG (SCM_OBJCODE_TYPE_MMAP, 0),
209 (scm_t_bits)(addr + strlen (SCM_OBJCODE_COOKIE)),
213544e0 210 SCM_BOOL_F_BITS, 0));
e8ab529d 211 }
13a78b0f
AW
212#else
213 {
214 SCM bv = scm_c_make_bytevector (st.st_size - sizeof cookie);
e8ab529d 215
13a78b0f
AW
216 if (full_read (fd, cookie, sizeof cookie) != sizeof cookie
217 || full_read (fd, SCM_BYTEVECTOR_CONTENTS (bv),
218 SCM_BYTEVECTOR_LENGTH (bv)) != SCM_BYTEVECTOR_LENGTH (bv))
219 {
220 int errno_save = errno;
221 (void) close (fd);
222 errno = errno_save;
e716f441
AW
223 if (errno)
224 SCM_SYSERROR;
225 scm_misc_error (FUNC_NAME, "file truncated while reading", SCM_EOL);
13a78b0f 226 }
53e28ed9 227
13a78b0f 228 (void) close (fd);
53e28ed9 229
13a78b0f 230 verify_cookie (cookie, &st, -1, NULL);
53e28ed9 231
de2c0a10 232 return scm_bytecode_to_native_objcode (bv);
13a78b0f
AW
233 }
234#endif
8f5cfc81
KN
235}
236#undef FUNC_NAME
237
13a78b0f 238
53e28ed9 239SCM
b67cb286 240scm_c_make_objcode_slice (SCM parent, const scm_t_uint8 *ptr)
53e28ed9 241#define FUNC_NAME "make-objcode-slice"
8f5cfc81 242{
b67cb286 243 const struct scm_objcode *data, *parent_data;
3dbbe28d 244 const scm_t_uint8 *parent_base;
53e28ed9
AW
245
246 SCM_VALIDATE_OBJCODE (1, parent);
247 parent_data = SCM_OBJCODE_DATA (parent);
3dbbe28d
LC
248 parent_base = SCM_C_OBJCODE_BASE (parent_data);
249
250 if (ptr < parent_base
251 || ptr >= (parent_base + parent_data->len + parent_data->metalen
53e28ed9 252 - sizeof (struct scm_objcode)))
3d27ef4b
AW
253 scm_misc_error
254 (FUNC_NAME, "offset out of bounds (~a vs ~a + ~a + ~a)",
255 scm_list_4 (scm_from_unsigned_integer ((scm_t_bits) ptr),
256 scm_from_unsigned_integer ((scm_t_bits) parent_base),
257 scm_from_uint32 (parent_data->len),
258 scm_from_uint32 (parent_data->metalen)));
53e28ed9 259
e3c9c676
LC
260 /* Make sure bytecode for the objcode-meta is suitable aligned. Failing to
261 do so leads to SIGBUS/SIGSEGV on some arches (e.g., SPARC). */
1119e493
LC
262 assert ((((scm_t_bits) ptr) &
263 (alignof_type (struct scm_objcode) - 1UL)) == 0);
53e28ed9 264
3dbbe28d
LC
265 data = (struct scm_objcode*) ptr;
266 assert (SCM_C_OBJCODE_BASE (data) + data->len + data->metalen
267 <= parent_base + parent_data->len + parent_data->metalen);
53e28ed9 268
f9654187 269 return scm_double_cell (SCM_MAKE_OBJCODE_TAG (SCM_OBJCODE_TYPE_SLICE, 0),
6f3b0cc2 270 (scm_t_bits)data, SCM_UNPACK (parent), 0);
8f5cfc81
KN
271}
272#undef FUNC_NAME
273
274\f
275/*
276 * Scheme interface
277 */
278
279SCM_DEFINE (scm_objcode_p, "objcode?", 1, 0, 0,
280 (SCM obj),
281 "")
282#define FUNC_NAME s_scm_objcode_p
283{
5c8cefe5 284 return scm_from_bool (SCM_OBJCODE_P (obj));
8f5cfc81
KN
285}
286#undef FUNC_NAME
287
1f1ec13b
AW
288SCM_DEFINE (scm_objcode_meta, "objcode-meta", 1, 0, 0,
289 (SCM objcode),
290 "")
291#define FUNC_NAME s_scm_objcode_meta
292{
293 SCM_VALIDATE_OBJCODE (1, objcode);
294
295 if (SCM_OBJCODE_META_LEN (objcode) == 0)
296 return SCM_BOOL_F;
297 else
298 return scm_c_make_objcode_slice (objcode, (SCM_OBJCODE_BASE (objcode)
299 + SCM_OBJCODE_LEN (objcode)));
300}
301#undef FUNC_NAME
302
de2c0a10
LC
303/* Turn BYTECODE into objcode encoded for ENDIANNESS and WORD_SIZE. */
304static SCM
305bytecode_to_objcode (SCM bytecode, char endianness, size_t word_size)
306#define FUNC_NAME "bytecode->objcode"
8f5cfc81 307{
de2c0a10 308 size_t size, len, metalen;
b6368dbb 309 const scm_t_uint8 *c_bytecode;
53e28ed9 310 struct scm_objcode *data;
8f5cfc81 311
a2689737 312 if (!scm_is_bytevector (bytecode))
054599f1 313 scm_wrong_type_arg (FUNC_NAME, 1, bytecode);
8f5cfc81 314
a2689737
AW
315 size = SCM_BYTEVECTOR_LENGTH (bytecode);
316 c_bytecode = (const scm_t_uint8*)SCM_BYTEVECTOR_CONTENTS (bytecode);
de2c0a10 317
a2689737 318 SCM_ASSERT_RANGE (0, bytecode, size >= sizeof(struct scm_objcode));
53e28ed9 319 data = (struct scm_objcode*)c_bytecode;
ac47d5f6 320
de2c0a10
LC
321 len = to_native_order (data->len, endianness);
322 metalen = to_native_order (data->metalen, endianness);
323
324 if (len + metalen != (size - sizeof (*data)))
a2689737 325 scm_misc_error (FUNC_NAME, "bad bytevector size (~a != ~a)",
da8b4747 326 scm_list_2 (scm_from_size_t (size),
de2c0a10 327 scm_from_uint32 (sizeof (*data) + len + metalen)));
a2689737 328
53e28ed9
AW
329 /* foolishly, we assume that as long as bytecode is around, that c_bytecode
330 will be of the same length; perhaps a bad assumption? */
f9654187 331 return scm_double_cell (SCM_MAKE_OBJCODE_TAG (SCM_OBJCODE_TYPE_BYTEVECTOR, 0),
6f3b0cc2 332 (scm_t_bits)data, SCM_UNPACK (bytecode), 0);
8f5cfc81
KN
333}
334#undef FUNC_NAME
335
de2c0a10
LC
336SCM_DEFINE (scm_bytecode_to_objcode, "bytecode->objcode", 1, 0, 0,
337 (SCM bytecode),
338 "")
339#define FUNC_NAME s_scm_bytecode_to_objcode
340{
341 /* Assume we're called from Scheme, which known that to do with
342 `target-type'. */
343 return bytecode_to_objcode (bytecode, target_endianness (),
344 target_word_size ());
345}
346#undef FUNC_NAME
347
348/* Like `bytecode->objcode', but ignore the `target-type' fluid. This
349 is useful for native compilation that happens lazily---e.g., direct
350 calls to this function from libguile itself. */
351SCM
352scm_bytecode_to_native_objcode (SCM bytecode)
353{
354 return bytecode_to_objcode (bytecode, NATIVE_ENDIANNESS, sizeof (void *));
355}
356
8f5cfc81
KN
357SCM_DEFINE (scm_load_objcode, "load-objcode", 1, 0, 0,
358 (SCM file),
359 "")
360#define FUNC_NAME s_scm_load_objcode
361{
362 int fd;
2d80426a 363 char *c_file;
8f5cfc81
KN
364
365 SCM_VALIDATE_STRING (1, file);
366
2d80426a 367 c_file = scm_to_locale_string (file);
5a4a4454 368 fd = open (c_file, O_RDONLY | O_BINARY | O_CLOEXEC);
2d80426a 369 free (c_file);
8f5cfc81
KN
370 if (fd < 0) SCM_SYSERROR;
371
13a78b0f 372 return make_objcode_from_file (fd);
8f5cfc81
KN
373}
374#undef FUNC_NAME
375
9bb8012d 376SCM_DEFINE (scm_objcode_to_bytecode, "objcode->bytecode", 1, 0, 0,
8f5cfc81
KN
377 (SCM objcode),
378 "")
9bb8012d 379#define FUNC_NAME s_scm_objcode_to_bytecode
8f5cfc81 380{
a2689737 381 scm_t_int8 *s8vector;
53e28ed9 382 scm_t_uint32 len;
054599f1 383
8f5cfc81 384 SCM_VALIDATE_OBJCODE (1, objcode);
054599f1 385
9a690dfb 386 len = sizeof (struct scm_objcode) + SCM_OBJCODE_TOTAL_LEN (objcode);
d7e7a02a 387
fb031aba 388 s8vector = scm_gc_malloc_pointerless (len, FUNC_NAME);
a2689737 389 memcpy (s8vector, SCM_OBJCODE_DATA (objcode), len);
054599f1 390
fb031aba 391 return scm_c_take_gc_bytevector (s8vector, len);
8f5cfc81
KN
392}
393#undef FUNC_NAME
394
53e28ed9
AW
395SCM_DEFINE (scm_write_objcode, "write-objcode", 2, 0, 0,
396 (SCM objcode, SCM port),
8f5cfc81 397 "")
53e28ed9 398#define FUNC_NAME s_scm_write_objcode
8f5cfc81 399{
f0b7c3c6 400 char cookie[sizeof (SCM_OBJCODE_COOKIE) - 1];
de2c0a10
LC
401 char endianness, word_size;
402 size_t total_size;
f0b7c3c6 403
8f5cfc81 404 SCM_VALIDATE_OBJCODE (1, objcode);
53e28ed9 405 SCM_VALIDATE_OUTPUT_PORT (2, port);
de2c0a10
LC
406 endianness = target_endianness ();
407 switch (target_word_size ())
f0b7c3c6 408 {
de2c0a10
LC
409 case 4:
410 word_size = '4';
411 break;
412 case 8:
413 word_size = '8';
414 break;
415 default:
416 abort ();
f0b7c3c6
AW
417 }
418
419 memcpy (cookie, SCM_OBJCODE_COOKIE, strlen (SCM_OBJCODE_COOKIE));
420 cookie[SCM_OBJCODE_ENDIANNESS_OFFSET] = endianness;
421 cookie[SCM_OBJCODE_WORD_SIZE_OFFSET] = word_size;
422
de2c0a10
LC
423 total_size =
424 to_native_order (SCM_OBJCODE_LEN (objcode), target_endianness ())
425 + to_native_order (SCM_OBJCODE_META_LEN (objcode), target_endianness ());
426
f0b7c3c6 427 scm_c_write (port, cookie, strlen (SCM_OBJCODE_COOKIE));
53e28ed9 428 scm_c_write (port, SCM_OBJCODE_DATA (objcode),
de2c0a10 429 sizeof (struct scm_objcode) + total_size);
53e28ed9
AW
430
431 return SCM_UNSPECIFIED;
8f5cfc81
KN
432}
433#undef FUNC_NAME
434
6f3b0cc2
AW
435void
436scm_i_objcode_print (SCM objcode, SCM port, scm_print_state *pstate)
437{
438 scm_puts ("#<objcode ", port);
439 scm_uintprint ((scm_t_bits)SCM_OBJCODE_BASE (objcode), 16, port);
440 scm_puts (">", port);
441}
442
8f5cfc81
KN
443\f
444void
07e56b27 445scm_bootstrap_objcodes (void)
8f5cfc81 446{
44602b08
AW
447 scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION,
448 "scm_init_objcodes",
60ae5ca2 449 (scm_t_extension_init_func)scm_init_objcodes, NULL);
07e56b27
AW
450}
451
8992a9e3
AW
452/* Before, we used __BYTE_ORDER, but that is not defined on all
453 systems. So punt and use automake, PDP endianness be damned. */
454#ifdef WORDS_BIGENDIAN
455#define SCM_BYTE_ORDER 4321
456#else
457#define SCM_BYTE_ORDER 1234
458#endif
459
07e56b27
AW
460void
461scm_init_objcodes (void)
462{
8f5cfc81 463#ifndef SCM_MAGIC_SNARFER
aeeff258 464#include "libguile/objcodes.x"
8f5cfc81 465#endif
53e28ed9
AW
466
467 scm_c_define ("word-size", scm_from_size_t (sizeof(SCM)));
8992a9e3 468 scm_c_define ("byte-order", scm_from_uint16 (SCM_BYTE_ORDER));
de2c0a10
LC
469
470 target_endianness_var = scm_c_public_variable ("system base target",
471 "target-endianness");
472 target_word_size_var = scm_c_public_variable ("system base target",
473 "target-word-size");
8f5cfc81
KN
474}
475
476/*
477 Local Variables:
478 c-file-style: "gnu"
479 End:
480*/