Avoid signed overflow and use size_t in bytevectors.c.
[bpt/guile.git] / libguile / bytevectors.c
1 /* Copyright (C) 2009-2014 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <alloca.h>
25 #include <assert.h>
26
27 #include <gmp.h>
28
29 #include "libguile/_scm.h"
30 #include "libguile/extensions.h"
31 #include "libguile/bytevectors.h"
32 #include "libguile/strings.h"
33 #include "libguile/validate.h"
34 #include "libguile/arrays.h"
35 #include "libguile/array-handle.h"
36 #include "libguile/uniform.h"
37 #include "libguile/srfi-4.h"
38
39 #include <byteswap.h>
40 #include <striconveh.h>
41 #include <uniconv.h>
42 #include <unistr.h>
43
44 #ifdef HAVE_LIMITS_H
45 # include <limits.h>
46 #endif
47
48 #include <string.h>
49
50
51 \f
52 /* Utilities. */
53
54 /* Convenience macros. These are used by the various templates (macros) that
55 are parameterized by integer signedness. */
56 #define INT8_T_signed scm_t_int8
57 #define INT8_T_unsigned scm_t_uint8
58 #define INT16_T_signed scm_t_int16
59 #define INT16_T_unsigned scm_t_uint16
60 #define INT32_T_signed scm_t_int32
61 #define INT32_T_unsigned scm_t_uint32
62 #define is_signed_int8(_x) (((_x) >= -128L) && ((_x) <= 127L))
63 #define is_unsigned_int8(_x) ((_x) <= 255UL)
64 #define is_signed_int16(_x) (((_x) >= -32768L) && ((_x) <= 32767L))
65 #define is_unsigned_int16(_x) ((_x) <= 65535UL)
66 #define is_signed_int32(_x) (((_x) >= -2147483648L) && ((_x) <= 2147483647L))
67 #define is_unsigned_int32(_x) ((_x) <= 4294967295UL)
68 #define SIGNEDNESS_signed 1
69 #define SIGNEDNESS_unsigned 0
70
71 #define INT_TYPE(_size, _sign) INT ## _size ## _T_ ## _sign
72 #define INT_SWAP(_size) bswap_ ## _size
73 #define INT_VALID_P(_size, _sign) is_ ## _sign ## _int ## _size
74 #define SIGNEDNESS(_sign) SIGNEDNESS_ ## _sign
75
76
77 #define INTEGER_ACCESSOR_PROLOGUE(_len, _sign) \
78 size_t c_len, c_index; \
79 _sign char *c_bv; \
80 \
81 SCM_VALIDATE_BYTEVECTOR (1, bv); \
82 c_index = scm_to_uint (index); \
83 \
84 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
85 c_bv = (_sign char *) SCM_BYTEVECTOR_CONTENTS (bv); \
86 \
87 if (SCM_UNLIKELY (c_index + ((_len) >> 3UL) - 1 >= c_len)) \
88 scm_out_of_range (FUNC_NAME, index);
89
90 /* Template for fixed-size integer access (only 8, 16 or 32-bit). */
91 #define INTEGER_REF(_len, _sign) \
92 SCM result; \
93 \
94 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
95 SCM_VALIDATE_SYMBOL (3, endianness); \
96 \
97 { \
98 INT_TYPE (_len, _sign) c_result; \
99 \
100 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
101 if (!scm_is_eq (endianness, scm_i_native_endianness)) \
102 c_result = INT_SWAP (_len) (c_result); \
103 \
104 result = SCM_I_MAKINUM (c_result); \
105 } \
106 \
107 return result;
108
109 /* Template for fixed-size integer access using the native endianness. */
110 #define INTEGER_NATIVE_REF(_len, _sign) \
111 SCM result; \
112 \
113 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
114 \
115 { \
116 INT_TYPE (_len, _sign) c_result; \
117 \
118 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
119 result = SCM_I_MAKINUM (c_result); \
120 } \
121 \
122 return result;
123
124 /* Template for fixed-size integer modification (only 8, 16 or 32-bit). */
125 #define INTEGER_SET(_len, _sign) \
126 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
127 SCM_VALIDATE_SYMBOL (3, endianness); \
128 \
129 { \
130 scm_t_signed_bits c_value; \
131 INT_TYPE (_len, _sign) c_value_short; \
132 \
133 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
134 scm_wrong_type_arg (FUNC_NAME, 3, value); \
135 \
136 c_value = SCM_I_INUM (value); \
137 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
138 scm_out_of_range (FUNC_NAME, value); \
139 \
140 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
141 if (!scm_is_eq (endianness, scm_i_native_endianness)) \
142 c_value_short = INT_SWAP (_len) (c_value_short); \
143 \
144 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
145 } \
146 \
147 return SCM_UNSPECIFIED;
148
149 /* Template for fixed-size integer modification using the native
150 endianness. */
151 #define INTEGER_NATIVE_SET(_len, _sign) \
152 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
153 \
154 { \
155 scm_t_signed_bits c_value; \
156 INT_TYPE (_len, _sign) c_value_short; \
157 \
158 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
159 scm_wrong_type_arg (FUNC_NAME, 3, value); \
160 \
161 c_value = SCM_I_INUM (value); \
162 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
163 scm_out_of_range (FUNC_NAME, value); \
164 \
165 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
166 \
167 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
168 } \
169 \
170 return SCM_UNSPECIFIED;
171
172
173 \f
174 /* Bytevector type. */
175
176 #define SCM_BYTEVECTOR_HEADER_BYTES \
177 (SCM_BYTEVECTOR_HEADER_SIZE * sizeof (scm_t_bits))
178
179 #define SCM_BYTEVECTOR_SET_LENGTH(_bv, _len) \
180 SCM_SET_CELL_WORD_1 ((_bv), (scm_t_bits) (_len))
181 #define SCM_BYTEVECTOR_SET_CONTENTS(_bv, _contents) \
182 SCM_SET_CELL_WORD_2 ((_bv), (scm_t_bits) (_contents))
183 #define SCM_BYTEVECTOR_SET_CONTIGUOUS_P(bv, contiguous_p) \
184 SCM_SET_BYTEVECTOR_FLAGS ((bv), \
185 SCM_BYTEVECTOR_ELEMENT_TYPE (bv) \
186 | ((contiguous_p) << 8UL))
187
188 #define SCM_BYTEVECTOR_SET_ELEMENT_TYPE(bv, hint) \
189 SCM_SET_BYTEVECTOR_FLAGS ((bv), \
190 (hint) \
191 | (SCM_BYTEVECTOR_CONTIGUOUS_P (bv) << 8UL))
192 #define SCM_BYTEVECTOR_TYPE_SIZE(var) \
193 (scm_i_array_element_type_sizes[SCM_BYTEVECTOR_ELEMENT_TYPE (var)]/8)
194 #define SCM_BYTEVECTOR_TYPED_LENGTH(var) \
195 (SCM_BYTEVECTOR_LENGTH (var) / SCM_BYTEVECTOR_TYPE_SIZE (var))
196
197 /* The empty bytevector. */
198 SCM scm_null_bytevector = SCM_UNSPECIFIED;
199
200
201 static inline SCM
202 make_bytevector (size_t len, scm_t_array_element_type element_type)
203 {
204 SCM ret;
205 size_t c_len;
206
207 if (SCM_UNLIKELY (element_type > SCM_ARRAY_ELEMENT_TYPE_LAST
208 || scm_i_array_element_type_sizes[element_type] < 8
209 || len >= (SCM_I_SIZE_MAX
210 / (scm_i_array_element_type_sizes[element_type]/8))))
211 /* This would be an internal Guile programming error */
212 abort ();
213
214 if (SCM_UNLIKELY (len == 0 && element_type == SCM_ARRAY_ELEMENT_TYPE_VU8
215 && SCM_BYTEVECTOR_P (scm_null_bytevector)))
216 ret = scm_null_bytevector;
217 else
218 {
219 signed char *contents;
220
221 c_len = len * (scm_i_array_element_type_sizes[element_type] / 8);
222
223 contents = scm_gc_malloc_pointerless (SCM_BYTEVECTOR_HEADER_BYTES + c_len,
224 SCM_GC_BYTEVECTOR);
225 ret = PTR2SCM (contents);
226 contents += SCM_BYTEVECTOR_HEADER_BYTES;
227
228 SCM_BYTEVECTOR_SET_LENGTH (ret, c_len);
229 SCM_BYTEVECTOR_SET_CONTENTS (ret, contents);
230 SCM_BYTEVECTOR_SET_CONTIGUOUS_P (ret, 1);
231 SCM_BYTEVECTOR_SET_ELEMENT_TYPE (ret, element_type);
232 }
233
234 return ret;
235 }
236
237 /* Return a bytevector of LEN elements of type ELEMENT_TYPE, with element
238 values taken from CONTENTS. Assume that the storage for CONTENTS will be
239 automatically reclaimed when it becomes unreachable. */
240 static inline SCM
241 make_bytevector_from_buffer (size_t len, void *contents,
242 scm_t_array_element_type element_type)
243 {
244 SCM ret;
245
246 if (SCM_UNLIKELY (len == 0))
247 ret = make_bytevector (len, element_type);
248 else
249 {
250 size_t c_len;
251
252 ret = PTR2SCM (scm_gc_malloc (SCM_BYTEVECTOR_HEADER_BYTES,
253 SCM_GC_BYTEVECTOR));
254
255 c_len = len * (scm_i_array_element_type_sizes[element_type] / 8);
256
257 SCM_BYTEVECTOR_SET_LENGTH (ret, c_len);
258 SCM_BYTEVECTOR_SET_CONTENTS (ret, contents);
259 SCM_BYTEVECTOR_SET_CONTIGUOUS_P (ret, 0);
260 SCM_BYTEVECTOR_SET_ELEMENT_TYPE (ret, element_type);
261 }
262
263 return ret;
264 }
265
266
267 /* Return a new bytevector of size LEN octets. */
268 SCM
269 scm_c_make_bytevector (size_t len)
270 {
271 return make_bytevector (len, SCM_ARRAY_ELEMENT_TYPE_VU8);
272 }
273
274 /* Return a new bytevector of size LEN elements. */
275 SCM
276 scm_i_make_typed_bytevector (size_t len, scm_t_array_element_type element_type)
277 {
278 return make_bytevector (len, element_type);
279 }
280
281 /* Return a bytevector of size LEN made up of CONTENTS. The area pointed to
282 by CONTENTS must have been allocated using `scm_gc_malloc ()'. */
283 SCM
284 scm_c_take_gc_bytevector (signed char *contents, size_t len)
285 {
286 return make_bytevector_from_buffer (len, contents, SCM_ARRAY_ELEMENT_TYPE_VU8);
287 }
288
289 SCM
290 scm_c_take_typed_bytevector (signed char *contents, size_t len,
291 scm_t_array_element_type element_type)
292 {
293 return make_bytevector_from_buffer (len, contents, element_type);
294 }
295
296 /* Shrink BV to C_NEW_LEN (which is assumed to be smaller than its current
297 size) and return the new bytevector (possibly different from BV). */
298 SCM
299 scm_c_shrink_bytevector (SCM bv, size_t c_new_len)
300 {
301 SCM new_bv;
302 size_t c_len;
303
304 if (SCM_UNLIKELY (c_new_len % SCM_BYTEVECTOR_TYPE_SIZE (bv)))
305 /* This would be an internal Guile programming error */
306 abort ();
307
308 c_len = SCM_BYTEVECTOR_LENGTH (bv);
309 if (SCM_UNLIKELY (c_new_len > c_len))
310 abort ();
311
312 SCM_BYTEVECTOR_SET_LENGTH (bv, c_new_len);
313
314 if (SCM_BYTEVECTOR_CONTIGUOUS_P (bv))
315 {
316 signed char *c_bv;
317
318 c_bv = scm_gc_realloc (SCM2PTR (bv),
319 c_len + SCM_BYTEVECTOR_HEADER_BYTES,
320 c_new_len + SCM_BYTEVECTOR_HEADER_BYTES,
321 SCM_GC_BYTEVECTOR);
322 new_bv = PTR2SCM (c_bv);
323 SCM_BYTEVECTOR_SET_CONTENTS (new_bv, c_bv + SCM_BYTEVECTOR_HEADER_BYTES);
324 }
325 else
326 {
327 signed char *c_bv;
328
329 c_bv = scm_gc_realloc (SCM_BYTEVECTOR_CONTENTS (bv),
330 c_len, c_new_len, SCM_GC_BYTEVECTOR);
331 SCM_BYTEVECTOR_SET_CONTENTS (bv, c_bv);
332
333 new_bv = bv;
334 }
335
336 return new_bv;
337 }
338
339 int
340 scm_is_bytevector (SCM obj)
341 {
342 return SCM_BYTEVECTOR_P (obj);
343 }
344
345 size_t
346 scm_c_bytevector_length (SCM bv)
347 #define FUNC_NAME "scm_c_bytevector_length"
348 {
349 SCM_VALIDATE_BYTEVECTOR (1, bv);
350
351 return SCM_BYTEVECTOR_LENGTH (bv);
352 }
353 #undef FUNC_NAME
354
355 scm_t_uint8
356 scm_c_bytevector_ref (SCM bv, size_t index)
357 #define FUNC_NAME "scm_c_bytevector_ref"
358 {
359 size_t c_len;
360 const scm_t_uint8 *c_bv;
361
362 SCM_VALIDATE_BYTEVECTOR (1, bv);
363
364 c_len = SCM_BYTEVECTOR_LENGTH (bv);
365 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
366
367 if (SCM_UNLIKELY (index >= c_len))
368 scm_out_of_range (FUNC_NAME, scm_from_size_t (index));
369
370 return c_bv[index];
371 }
372 #undef FUNC_NAME
373
374 void
375 scm_c_bytevector_set_x (SCM bv, size_t index, scm_t_uint8 value)
376 #define FUNC_NAME "scm_c_bytevector_set_x"
377 {
378 size_t c_len;
379 scm_t_uint8 *c_bv;
380
381 SCM_VALIDATE_BYTEVECTOR (1, bv);
382
383 c_len = SCM_BYTEVECTOR_LENGTH (bv);
384 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
385
386 if (SCM_UNLIKELY (index >= c_len))
387 scm_out_of_range (FUNC_NAME, scm_from_size_t (index));
388
389 c_bv[index] = value;
390 }
391 #undef FUNC_NAME
392
393
394 \f
395 int
396 scm_i_print_bytevector (SCM bv, SCM port, scm_print_state *pstate SCM_UNUSED)
397 {
398 ssize_t ubnd, inc, i;
399 scm_t_array_handle h;
400
401 scm_array_get_handle (bv, &h);
402
403 scm_putc ('#', port);
404 scm_write (scm_array_handle_element_type (&h), port);
405 scm_putc ('(', port);
406 for (i = h.dims[0].lbnd, ubnd = h.dims[0].ubnd, inc = h.dims[0].inc;
407 i <= ubnd; i += inc)
408 {
409 if (i > 0)
410 scm_putc (' ', port);
411 scm_write (scm_array_handle_ref (&h, i), port);
412 }
413 scm_putc (')', port);
414
415 return 1;
416 }
417
418 \f
419 /* General operations. */
420
421 SCM_SYMBOL (scm_sym_big, "big");
422 SCM_SYMBOL (scm_sym_little, "little");
423
424 SCM scm_endianness_big, scm_endianness_little;
425
426 /* Host endianness (a symbol). */
427 SCM scm_i_native_endianness = SCM_UNSPECIFIED;
428
429 /* Byte-swapping. */
430 #ifndef bswap_24
431 # define bswap_24(_x) \
432 ((((_x) & 0xff0000) >> 16) | \
433 (((_x) & 0x00ff00)) | \
434 (((_x) & 0x0000ff) << 16))
435 #endif
436
437
438 SCM_DEFINE (scm_native_endianness, "native-endianness", 0, 0, 0,
439 (void),
440 "Return a symbol denoting the machine's native endianness.")
441 #define FUNC_NAME s_scm_native_endianness
442 {
443 return scm_i_native_endianness;
444 }
445 #undef FUNC_NAME
446
447 SCM_DEFINE (scm_bytevector_p, "bytevector?", 1, 0, 0,
448 (SCM obj),
449 "Return true if @var{obj} is a bytevector.")
450 #define FUNC_NAME s_scm_bytevector_p
451 {
452 return scm_from_bool (scm_is_bytevector (obj));
453 }
454 #undef FUNC_NAME
455
456 SCM_DEFINE (scm_make_bytevector, "make-bytevector", 1, 1, 0,
457 (SCM len, SCM fill),
458 "Return a newly allocated bytevector of @var{len} bytes, "
459 "optionally filled with @var{fill}.")
460 #define FUNC_NAME s_scm_make_bytevector
461 {
462 SCM bv;
463 size_t c_len;
464 scm_t_uint8 c_fill = 0;
465
466 SCM_VALIDATE_SIZE_COPY (1, len, c_len);
467 if (!scm_is_eq (fill, SCM_UNDEFINED))
468 {
469 int value;
470
471 value = scm_to_int (fill);
472 if (SCM_UNLIKELY ((value < -128) || (value > 255)))
473 scm_out_of_range (FUNC_NAME, fill);
474 c_fill = (scm_t_uint8) value;
475 }
476
477 bv = make_bytevector (c_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
478 if (!scm_is_eq (fill, SCM_UNDEFINED))
479 {
480 size_t i;
481 scm_t_uint8 *contents;
482
483 contents = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
484 for (i = 0; i < c_len; i++)
485 contents[i] = c_fill;
486 }
487 else
488 memset (SCM_BYTEVECTOR_CONTENTS (bv), 0, c_len);
489
490 return bv;
491 }
492 #undef FUNC_NAME
493
494 SCM_DEFINE (scm_bytevector_length, "bytevector-length", 1, 0, 0,
495 (SCM bv),
496 "Return the length (in bytes) of @var{bv}.")
497 #define FUNC_NAME s_scm_bytevector_length
498 {
499 return scm_from_uint (scm_c_bytevector_length (bv));
500 }
501 #undef FUNC_NAME
502
503 SCM_DEFINE (scm_bytevector_eq_p, "bytevector=?", 2, 0, 0,
504 (SCM bv1, SCM bv2),
505 "Return is @var{bv1} equals to @var{bv2}---i.e., if they "
506 "have the same length and contents.")
507 #define FUNC_NAME s_scm_bytevector_eq_p
508 {
509 SCM result = SCM_BOOL_F;
510 size_t c_len1, c_len2;
511
512 SCM_VALIDATE_BYTEVECTOR (1, bv1);
513 SCM_VALIDATE_BYTEVECTOR (2, bv2);
514
515 c_len1 = SCM_BYTEVECTOR_LENGTH (bv1);
516 c_len2 = SCM_BYTEVECTOR_LENGTH (bv2);
517
518 if (c_len1 == c_len2 && (SCM_BYTEVECTOR_ELEMENT_TYPE (bv1)
519 == SCM_BYTEVECTOR_ELEMENT_TYPE (bv2)))
520 {
521 signed char *c_bv1, *c_bv2;
522
523 c_bv1 = SCM_BYTEVECTOR_CONTENTS (bv1);
524 c_bv2 = SCM_BYTEVECTOR_CONTENTS (bv2);
525
526 result = scm_from_bool (!memcmp (c_bv1, c_bv2, c_len1));
527 }
528
529 return result;
530 }
531 #undef FUNC_NAME
532
533 SCM_DEFINE (scm_bytevector_fill_x, "bytevector-fill!", 2, 0, 0,
534 (SCM bv, SCM fill),
535 "Fill bytevector @var{bv} with @var{fill}, a byte.")
536 #define FUNC_NAME s_scm_bytevector_fill_x
537 {
538 size_t c_len, i;
539 scm_t_uint8 *c_bv, c_fill;
540
541 SCM_VALIDATE_BYTEVECTOR (1, bv);
542 c_fill = scm_to_int8 (fill);
543
544 c_len = SCM_BYTEVECTOR_LENGTH (bv);
545 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
546
547 for (i = 0; i < c_len; i++)
548 c_bv[i] = c_fill;
549
550 return SCM_UNSPECIFIED;
551 }
552 #undef FUNC_NAME
553
554 SCM_DEFINE (scm_bytevector_copy_x, "bytevector-copy!", 5, 0, 0,
555 (SCM source, SCM source_start, SCM target, SCM target_start,
556 SCM len),
557 "Copy @var{len} bytes from @var{source} into @var{target}, "
558 "starting reading from @var{source_start} (a positive index "
559 "within @var{source}) and start writing at "
560 "@var{target_start}.")
561 #define FUNC_NAME s_scm_bytevector_copy_x
562 {
563 size_t c_len, c_source_len, c_target_len;
564 size_t c_source_start, c_target_start;
565 signed char *c_source, *c_target;
566
567 SCM_VALIDATE_BYTEVECTOR (1, source);
568 SCM_VALIDATE_BYTEVECTOR (3, target);
569
570 c_len = scm_to_size_t (len);
571 c_source_start = scm_to_size_t (source_start);
572 c_target_start = scm_to_size_t (target_start);
573
574 c_source = SCM_BYTEVECTOR_CONTENTS (source);
575 c_target = SCM_BYTEVECTOR_CONTENTS (target);
576 c_source_len = SCM_BYTEVECTOR_LENGTH (source);
577 c_target_len = SCM_BYTEVECTOR_LENGTH (target);
578
579 if (SCM_UNLIKELY (c_source_start + c_len > c_source_len))
580 scm_out_of_range (FUNC_NAME, source_start);
581 if (SCM_UNLIKELY (c_target_start + c_len > c_target_len))
582 scm_out_of_range (FUNC_NAME, target_start);
583
584 memmove (c_target + c_target_start,
585 c_source + c_source_start,
586 c_len);
587
588 return SCM_UNSPECIFIED;
589 }
590 #undef FUNC_NAME
591
592 SCM_DEFINE (scm_bytevector_copy, "bytevector-copy", 1, 0, 0,
593 (SCM bv),
594 "Return a newly allocated copy of @var{bv}.")
595 #define FUNC_NAME s_scm_bytevector_copy
596 {
597 SCM copy;
598 size_t c_len;
599 signed char *c_bv, *c_copy;
600
601 SCM_VALIDATE_BYTEVECTOR (1, bv);
602
603 c_len = SCM_BYTEVECTOR_LENGTH (bv);
604 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
605
606 copy = make_bytevector (c_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
607 c_copy = SCM_BYTEVECTOR_CONTENTS (copy);
608 memcpy (c_copy, c_bv, c_len);
609
610 return copy;
611 }
612 #undef FUNC_NAME
613
614 SCM_DEFINE (scm_uniform_array_to_bytevector, "uniform-array->bytevector",
615 1, 0, 0, (SCM array),
616 "Return a newly allocated bytevector whose contents\n"
617 "will be copied from the uniform array @var{array}.")
618 #define FUNC_NAME s_scm_uniform_array_to_bytevector
619 {
620 SCM contents, ret;
621 size_t len, sz, byte_len;
622 scm_t_array_handle h;
623 const void *elts;
624
625 contents = scm_array_contents (array, SCM_BOOL_T);
626 if (scm_is_false (contents))
627 scm_wrong_type_arg_msg (FUNC_NAME, 0, array, "uniform contiguous array");
628
629 scm_array_get_handle (contents, &h);
630 assert (h.base == 0);
631
632 elts = h.elements;
633 len = h.dims->inc * (h.dims->ubnd - h.dims->lbnd + 1);
634 sz = scm_array_handle_uniform_element_bit_size (&h);
635 if (sz >= 8 && ((sz % 8) == 0))
636 byte_len = len * (sz / 8);
637 else if (sz < 8)
638 /* byte_len = ceil (len * sz / 8) */
639 byte_len = (len * sz + 7) / 8;
640 else
641 /* an internal guile error, really */
642 SCM_MISC_ERROR ("uniform elements larger than 8 bits must fill whole bytes", SCM_EOL);
643
644 ret = make_bytevector (byte_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
645 memcpy (SCM_BYTEVECTOR_CONTENTS (ret), elts, byte_len);
646
647 scm_array_handle_release (&h);
648
649 return ret;
650 }
651 #undef FUNC_NAME
652
653 \f
654 /* Operations on bytes and octets. */
655
656 SCM_DEFINE (scm_bytevector_u8_ref, "bytevector-u8-ref", 2, 0, 0,
657 (SCM bv, SCM index),
658 "Return the octet located at @var{index} in @var{bv}.")
659 #define FUNC_NAME s_scm_bytevector_u8_ref
660 {
661 INTEGER_NATIVE_REF (8, unsigned);
662 }
663 #undef FUNC_NAME
664
665 SCM_DEFINE (scm_bytevector_s8_ref, "bytevector-s8-ref", 2, 0, 0,
666 (SCM bv, SCM index),
667 "Return the byte located at @var{index} in @var{bv}.")
668 #define FUNC_NAME s_scm_bytevector_s8_ref
669 {
670 INTEGER_NATIVE_REF (8, signed);
671 }
672 #undef FUNC_NAME
673
674 SCM_DEFINE (scm_bytevector_u8_set_x, "bytevector-u8-set!", 3, 0, 0,
675 (SCM bv, SCM index, SCM value),
676 "Return the octet located at @var{index} in @var{bv}.")
677 #define FUNC_NAME s_scm_bytevector_u8_set_x
678 {
679 INTEGER_NATIVE_SET (8, unsigned);
680 }
681 #undef FUNC_NAME
682
683 SCM_DEFINE (scm_bytevector_s8_set_x, "bytevector-s8-set!", 3, 0, 0,
684 (SCM bv, SCM index, SCM value),
685 "Return the octet located at @var{index} in @var{bv}.")
686 #define FUNC_NAME s_scm_bytevector_s8_set_x
687 {
688 INTEGER_NATIVE_SET (8, signed);
689 }
690 #undef FUNC_NAME
691
692 #undef OCTET_ACCESSOR_PROLOGUE
693
694
695 SCM_DEFINE (scm_bytevector_to_u8_list, "bytevector->u8-list", 1, 0, 0,
696 (SCM bv),
697 "Return a newly allocated list of octets containing the "
698 "contents of @var{bv}.")
699 #define FUNC_NAME s_scm_bytevector_to_u8_list
700 {
701 SCM lst, pair;
702 size_t c_len, i;
703 scm_t_uint8 *c_bv;
704
705 SCM_VALIDATE_BYTEVECTOR (1, bv);
706
707 c_len = SCM_BYTEVECTOR_LENGTH (bv);
708 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
709
710 lst = scm_make_list (scm_from_size_t (c_len), SCM_UNSPECIFIED);
711 for (i = 0, pair = lst;
712 i < c_len;
713 i++, pair = SCM_CDR (pair))
714 {
715 SCM_SETCAR (pair, SCM_I_MAKINUM (c_bv[i]));
716 }
717
718 return lst;
719 }
720 #undef FUNC_NAME
721
722 SCM_DEFINE (scm_u8_list_to_bytevector, "u8-list->bytevector", 1, 0, 0,
723 (SCM lst),
724 "Turn @var{lst}, a list of octets, into a bytevector.")
725 #define FUNC_NAME s_scm_u8_list_to_bytevector
726 {
727 SCM bv, item;
728 size_t c_len, i;
729 scm_t_uint8 *c_bv;
730
731 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len);
732
733 bv = make_bytevector (c_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
734 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
735
736 for (i = 0; i < c_len; lst = SCM_CDR (lst), i++)
737 {
738 item = SCM_CAR (lst);
739
740 if (SCM_LIKELY (SCM_I_INUMP (item)))
741 {
742 scm_t_signed_bits c_item;
743
744 c_item = SCM_I_INUM (item);
745 if (SCM_LIKELY ((c_item >= 0) && (c_item < 256)))
746 c_bv[i] = (scm_t_uint8) c_item;
747 else
748 goto type_error;
749 }
750 else
751 goto type_error;
752 }
753
754 return bv;
755
756 type_error:
757 scm_wrong_type_arg (FUNC_NAME, 1, item);
758
759 return SCM_BOOL_F;
760 }
761 #undef FUNC_NAME
762
763 /* Compute the two's complement of VALUE (a positive integer) on SIZE octets
764 using (2^(SIZE * 8) - VALUE). */
765 static inline void
766 twos_complement (mpz_t value, size_t size)
767 {
768 unsigned long bit_count;
769
770 /* We expect BIT_COUNT to fit in a unsigned long thanks to the range
771 checking on SIZE performed earlier. */
772 bit_count = (unsigned long) size << 3UL;
773
774 if (SCM_LIKELY (bit_count < sizeof (unsigned long)))
775 mpz_ui_sub (value, 1UL << bit_count, value);
776 else
777 {
778 mpz_t max;
779
780 mpz_init (max);
781 mpz_ui_pow_ui (max, 2, bit_count);
782 mpz_sub (value, max, value);
783 mpz_clear (max);
784 }
785 }
786
787 static inline SCM
788 bytevector_large_ref (const char *c_bv, size_t c_size, int signed_p,
789 SCM endianness)
790 {
791 SCM result;
792 mpz_t c_mpz;
793 int c_endianness, negative_p = 0;
794
795 if (signed_p)
796 {
797 if (scm_is_eq (endianness, scm_sym_big))
798 negative_p = c_bv[0] & 0x80;
799 else
800 negative_p = c_bv[c_size - 1] & 0x80;
801 }
802
803 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
804
805 mpz_init (c_mpz);
806 mpz_import (c_mpz, 1 /* 1 word */, 1 /* word order doesn't matter */,
807 c_size /* word is C_SIZE-byte long */,
808 c_endianness,
809 0 /* nails */, c_bv);
810
811 if (signed_p && negative_p)
812 {
813 twos_complement (c_mpz, c_size);
814 mpz_neg (c_mpz, c_mpz);
815 }
816
817 result = scm_from_mpz (c_mpz);
818 mpz_clear (c_mpz); /* FIXME: Needed? */
819
820 return result;
821 }
822
823 static inline int
824 bytevector_large_set (char *c_bv, size_t c_size, int signed_p,
825 SCM value, SCM endianness)
826 {
827 mpz_t c_mpz;
828 int c_endianness, c_sign, err = 0;
829
830 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
831
832 mpz_init (c_mpz);
833 scm_to_mpz (value, c_mpz);
834
835 c_sign = mpz_sgn (c_mpz);
836 if (c_sign < 0)
837 {
838 if (SCM_LIKELY (signed_p))
839 {
840 mpz_neg (c_mpz, c_mpz);
841 twos_complement (c_mpz, c_size);
842 }
843 else
844 {
845 err = -1;
846 goto finish;
847 }
848 }
849
850 if (c_sign == 0)
851 /* Zero. */
852 memset (c_bv, 0, c_size);
853 else
854 {
855 size_t word_count, value_size;
856
857 value_size = (mpz_sizeinbase (c_mpz, 2) + (8 * c_size)) / (8 * c_size);
858 if (SCM_UNLIKELY (value_size > c_size))
859 {
860 err = -2;
861 goto finish;
862 }
863
864
865 mpz_export (c_bv, &word_count, 1 /* word order doesn't matter */,
866 c_size, c_endianness,
867 0 /* nails */, c_mpz);
868 if (SCM_UNLIKELY (word_count != 1))
869 /* Shouldn't happen since we already checked with VALUE_SIZE. */
870 abort ();
871 }
872
873 finish:
874 mpz_clear (c_mpz);
875
876 return err;
877 }
878
879 #define GENERIC_INTEGER_ACCESSOR_PROLOGUE(_sign) \
880 size_t c_len, c_index, c_size; \
881 char *c_bv; \
882 \
883 SCM_VALIDATE_BYTEVECTOR (1, bv); \
884 c_index = scm_to_size_t (index); \
885 c_size = scm_to_size_t (size); \
886 \
887 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
888 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
889 \
890 /* C_SIZE must have its 3 higher bits set to zero so that \
891 multiplying it by 8 yields a number that fits in a \
892 size_t. */ \
893 if (SCM_UNLIKELY (c_size == 0 || c_size >= (SCM_I_SIZE_MAX >> 3))) \
894 scm_out_of_range (FUNC_NAME, size); \
895 if (SCM_UNLIKELY (c_index + c_size > c_len)) \
896 scm_out_of_range (FUNC_NAME, index);
897
898
899 /* Template of an integer reference function. */
900 #define GENERIC_INTEGER_REF(_sign) \
901 SCM result; \
902 \
903 if (c_size < 3) \
904 { \
905 int swap; \
906 _sign int value; \
907 \
908 swap = !scm_is_eq (endianness, scm_i_native_endianness); \
909 switch (c_size) \
910 { \
911 case 1: \
912 { \
913 _sign char c_value8; \
914 memcpy (&c_value8, c_bv, 1); \
915 value = c_value8; \
916 } \
917 break; \
918 case 2: \
919 { \
920 INT_TYPE (16, _sign) c_value16; \
921 memcpy (&c_value16, c_bv, 2); \
922 if (swap) \
923 value = (INT_TYPE (16, _sign)) bswap_16 (c_value16); \
924 else \
925 value = c_value16; \
926 } \
927 break; \
928 default: \
929 abort (); \
930 } \
931 \
932 result = SCM_I_MAKINUM ((_sign int) value); \
933 } \
934 else \
935 result = bytevector_large_ref ((char *) c_bv, \
936 c_size, SIGNEDNESS (_sign), \
937 endianness); \
938 \
939 return result;
940
941 static inline SCM
942 bytevector_signed_ref (const char *c_bv, size_t c_size, SCM endianness)
943 {
944 GENERIC_INTEGER_REF (signed);
945 }
946
947 static inline SCM
948 bytevector_unsigned_ref (const char *c_bv, size_t c_size, SCM endianness)
949 {
950 GENERIC_INTEGER_REF (unsigned);
951 }
952
953
954 /* Template of an integer assignment function. */
955 #define GENERIC_INTEGER_SET(_sign) \
956 if (c_size < 3) \
957 { \
958 scm_t_signed_bits c_value; \
959 \
960 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
961 goto range_error; \
962 \
963 c_value = SCM_I_INUM (value); \
964 switch (c_size) \
965 { \
966 case 1: \
967 if (SCM_LIKELY (INT_VALID_P (8, _sign) (c_value))) \
968 { \
969 _sign char c_value8; \
970 c_value8 = (_sign char) c_value; \
971 memcpy (c_bv, &c_value8, 1); \
972 } \
973 else \
974 goto range_error; \
975 break; \
976 \
977 case 2: \
978 if (SCM_LIKELY (INT_VALID_P (16, _sign) (c_value))) \
979 { \
980 int swap; \
981 INT_TYPE (16, _sign) c_value16; \
982 \
983 swap = !scm_is_eq (endianness, scm_i_native_endianness); \
984 \
985 if (swap) \
986 c_value16 = (INT_TYPE (16, _sign)) bswap_16 (c_value); \
987 else \
988 c_value16 = c_value; \
989 \
990 memcpy (c_bv, &c_value16, 2); \
991 } \
992 else \
993 goto range_error; \
994 break; \
995 \
996 default: \
997 abort (); \
998 } \
999 } \
1000 else \
1001 { \
1002 int err; \
1003 \
1004 err = bytevector_large_set (c_bv, c_size, \
1005 SIGNEDNESS (_sign), \
1006 value, endianness); \
1007 if (err) \
1008 goto range_error; \
1009 } \
1010 \
1011 return; \
1012 \
1013 range_error: \
1014 scm_out_of_range (FUNC_NAME, value); \
1015 return;
1016
1017 static inline void
1018 bytevector_signed_set (char *c_bv, size_t c_size,
1019 SCM value, SCM endianness,
1020 const char *func_name)
1021 #define FUNC_NAME func_name
1022 {
1023 GENERIC_INTEGER_SET (signed);
1024 }
1025 #undef FUNC_NAME
1026
1027 static inline void
1028 bytevector_unsigned_set (char *c_bv, size_t c_size,
1029 SCM value, SCM endianness,
1030 const char *func_name)
1031 #define FUNC_NAME func_name
1032 {
1033 GENERIC_INTEGER_SET (unsigned);
1034 }
1035 #undef FUNC_NAME
1036
1037 #undef GENERIC_INTEGER_SET
1038 #undef GENERIC_INTEGER_REF
1039
1040
1041 SCM_DEFINE (scm_bytevector_uint_ref, "bytevector-uint-ref", 4, 0, 0,
1042 (SCM bv, SCM index, SCM endianness, SCM size),
1043 "Return the @var{size}-octet long unsigned integer at index "
1044 "@var{index} in @var{bv}.")
1045 #define FUNC_NAME s_scm_bytevector_uint_ref
1046 {
1047 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
1048
1049 return (bytevector_unsigned_ref (&c_bv[c_index], c_size, endianness));
1050 }
1051 #undef FUNC_NAME
1052
1053 SCM_DEFINE (scm_bytevector_sint_ref, "bytevector-sint-ref", 4, 0, 0,
1054 (SCM bv, SCM index, SCM endianness, SCM size),
1055 "Return the @var{size}-octet long unsigned integer at index "
1056 "@var{index} in @var{bv}.")
1057 #define FUNC_NAME s_scm_bytevector_sint_ref
1058 {
1059 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
1060
1061 return (bytevector_signed_ref (&c_bv[c_index], c_size, endianness));
1062 }
1063 #undef FUNC_NAME
1064
1065 SCM_DEFINE (scm_bytevector_uint_set_x, "bytevector-uint-set!", 5, 0, 0,
1066 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
1067 "Set the @var{size}-octet long unsigned integer at @var{index} "
1068 "to @var{value}.")
1069 #define FUNC_NAME s_scm_bytevector_uint_set_x
1070 {
1071 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
1072
1073 bytevector_unsigned_set (&c_bv[c_index], c_size, value, endianness,
1074 FUNC_NAME);
1075
1076 return SCM_UNSPECIFIED;
1077 }
1078 #undef FUNC_NAME
1079
1080 SCM_DEFINE (scm_bytevector_sint_set_x, "bytevector-sint-set!", 5, 0, 0,
1081 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
1082 "Set the @var{size}-octet long signed integer at @var{index} "
1083 "to @var{value}.")
1084 #define FUNC_NAME s_scm_bytevector_sint_set_x
1085 {
1086 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
1087
1088 bytevector_signed_set (&c_bv[c_index], c_size, value, endianness,
1089 FUNC_NAME);
1090
1091 return SCM_UNSPECIFIED;
1092 }
1093 #undef FUNC_NAME
1094
1095
1096 \f
1097 /* Operations on integers of arbitrary size. */
1098
1099 #define INTEGERS_TO_LIST(_sign) \
1100 SCM lst, pair; \
1101 size_t i, c_len, c_size; \
1102 \
1103 SCM_VALIDATE_BYTEVECTOR (1, bv); \
1104 SCM_VALIDATE_SYMBOL (2, endianness); \
1105 c_size = scm_to_unsigned_integer (size, 1, (size_t) -1); \
1106 \
1107 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
1108 if (SCM_UNLIKELY (c_len < c_size)) \
1109 lst = SCM_EOL; \
1110 else \
1111 { \
1112 const char *c_bv; \
1113 \
1114 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
1115 \
1116 lst = scm_make_list (scm_from_size_t (c_len / c_size), \
1117 SCM_UNSPECIFIED); \
1118 for (i = 0, pair = lst; \
1119 i <= c_len - c_size; \
1120 i += c_size, c_bv += c_size, pair = SCM_CDR (pair)) \
1121 { \
1122 SCM_SETCAR (pair, \
1123 bytevector_ ## _sign ## _ref (c_bv, c_size, \
1124 endianness)); \
1125 } \
1126 } \
1127 \
1128 return lst;
1129
1130 SCM_DEFINE (scm_bytevector_to_sint_list, "bytevector->sint-list",
1131 3, 0, 0,
1132 (SCM bv, SCM endianness, SCM size),
1133 "Return a list of signed integers of @var{size} octets "
1134 "representing the contents of @var{bv}.")
1135 #define FUNC_NAME s_scm_bytevector_to_sint_list
1136 {
1137 INTEGERS_TO_LIST (signed);
1138 }
1139 #undef FUNC_NAME
1140
1141 SCM_DEFINE (scm_bytevector_to_uint_list, "bytevector->uint-list",
1142 3, 0, 0,
1143 (SCM bv, SCM endianness, SCM size),
1144 "Return a list of unsigned integers of @var{size} octets "
1145 "representing the contents of @var{bv}.")
1146 #define FUNC_NAME s_scm_bytevector_to_uint_list
1147 {
1148 INTEGERS_TO_LIST (unsigned);
1149 }
1150 #undef FUNC_NAME
1151
1152 #undef INTEGER_TO_LIST
1153
1154
1155 #define INTEGER_LIST_TO_BYTEVECTOR(_sign) \
1156 SCM bv; \
1157 size_t c_len; \
1158 size_t c_size; \
1159 char *c_bv, *c_bv_ptr; \
1160 \
1161 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len); \
1162 SCM_VALIDATE_SYMBOL (2, endianness); \
1163 c_size = scm_to_size_t (size); \
1164 \
1165 if (SCM_UNLIKELY (c_size == 0 || c_size >= (SCM_I_SIZE_MAX >> 3))) \
1166 scm_out_of_range (FUNC_NAME, size); \
1167 \
1168 bv = make_bytevector (c_len * c_size, SCM_ARRAY_ELEMENT_TYPE_VU8); \
1169 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
1170 \
1171 for (c_bv_ptr = c_bv; \
1172 !scm_is_null (lst); \
1173 lst = SCM_CDR (lst), c_bv_ptr += c_size) \
1174 { \
1175 bytevector_ ## _sign ## _set (c_bv_ptr, c_size, \
1176 SCM_CAR (lst), endianness, \
1177 FUNC_NAME); \
1178 } \
1179 \
1180 return bv;
1181
1182
1183 SCM_DEFINE (scm_uint_list_to_bytevector, "uint-list->bytevector",
1184 3, 0, 0,
1185 (SCM lst, SCM endianness, SCM size),
1186 "Return a bytevector containing the unsigned integers "
1187 "listed in @var{lst} and encoded on @var{size} octets "
1188 "according to @var{endianness}.")
1189 #define FUNC_NAME s_scm_uint_list_to_bytevector
1190 {
1191 INTEGER_LIST_TO_BYTEVECTOR (unsigned);
1192 }
1193 #undef FUNC_NAME
1194
1195 SCM_DEFINE (scm_sint_list_to_bytevector, "sint-list->bytevector",
1196 3, 0, 0,
1197 (SCM lst, SCM endianness, SCM size),
1198 "Return a bytevector containing the signed integers "
1199 "listed in @var{lst} and encoded on @var{size} octets "
1200 "according to @var{endianness}.")
1201 #define FUNC_NAME s_scm_sint_list_to_bytevector
1202 {
1203 INTEGER_LIST_TO_BYTEVECTOR (signed);
1204 }
1205 #undef FUNC_NAME
1206
1207 #undef INTEGER_LIST_TO_BYTEVECTOR
1208
1209
1210 \f
1211 /* Operations on 16-bit integers. */
1212
1213 SCM_DEFINE (scm_bytevector_u16_ref, "bytevector-u16-ref",
1214 3, 0, 0,
1215 (SCM bv, SCM index, SCM endianness),
1216 "Return the unsigned 16-bit integer from @var{bv} at "
1217 "@var{index}.")
1218 #define FUNC_NAME s_scm_bytevector_u16_ref
1219 {
1220 INTEGER_REF (16, unsigned);
1221 }
1222 #undef FUNC_NAME
1223
1224 SCM_DEFINE (scm_bytevector_s16_ref, "bytevector-s16-ref",
1225 3, 0, 0,
1226 (SCM bv, SCM index, SCM endianness),
1227 "Return the signed 16-bit integer from @var{bv} at "
1228 "@var{index}.")
1229 #define FUNC_NAME s_scm_bytevector_s16_ref
1230 {
1231 INTEGER_REF (16, signed);
1232 }
1233 #undef FUNC_NAME
1234
1235 SCM_DEFINE (scm_bytevector_u16_native_ref, "bytevector-u16-native-ref",
1236 2, 0, 0,
1237 (SCM bv, SCM index),
1238 "Return the unsigned 16-bit integer from @var{bv} at "
1239 "@var{index} using the native endianness.")
1240 #define FUNC_NAME s_scm_bytevector_u16_native_ref
1241 {
1242 INTEGER_NATIVE_REF (16, unsigned);
1243 }
1244 #undef FUNC_NAME
1245
1246 SCM_DEFINE (scm_bytevector_s16_native_ref, "bytevector-s16-native-ref",
1247 2, 0, 0,
1248 (SCM bv, SCM index),
1249 "Return the unsigned 16-bit integer from @var{bv} at "
1250 "@var{index} using the native endianness.")
1251 #define FUNC_NAME s_scm_bytevector_s16_native_ref
1252 {
1253 INTEGER_NATIVE_REF (16, signed);
1254 }
1255 #undef FUNC_NAME
1256
1257 SCM_DEFINE (scm_bytevector_u16_set_x, "bytevector-u16-set!",
1258 4, 0, 0,
1259 (SCM bv, SCM index, SCM value, SCM endianness),
1260 "Store @var{value} in @var{bv} at @var{index} according to "
1261 "@var{endianness}.")
1262 #define FUNC_NAME s_scm_bytevector_u16_set_x
1263 {
1264 INTEGER_SET (16, unsigned);
1265 }
1266 #undef FUNC_NAME
1267
1268 SCM_DEFINE (scm_bytevector_s16_set_x, "bytevector-s16-set!",
1269 4, 0, 0,
1270 (SCM bv, SCM index, SCM value, SCM endianness),
1271 "Store @var{value} in @var{bv} at @var{index} according to "
1272 "@var{endianness}.")
1273 #define FUNC_NAME s_scm_bytevector_s16_set_x
1274 {
1275 INTEGER_SET (16, signed);
1276 }
1277 #undef FUNC_NAME
1278
1279 SCM_DEFINE (scm_bytevector_u16_native_set_x, "bytevector-u16-native-set!",
1280 3, 0, 0,
1281 (SCM bv, SCM index, SCM value),
1282 "Store the unsigned integer @var{value} at index @var{index} "
1283 "of @var{bv} using the native endianness.")
1284 #define FUNC_NAME s_scm_bytevector_u16_native_set_x
1285 {
1286 INTEGER_NATIVE_SET (16, unsigned);
1287 }
1288 #undef FUNC_NAME
1289
1290 SCM_DEFINE (scm_bytevector_s16_native_set_x, "bytevector-s16-native-set!",
1291 3, 0, 0,
1292 (SCM bv, SCM index, SCM value),
1293 "Store the signed integer @var{value} at index @var{index} "
1294 "of @var{bv} using the native endianness.")
1295 #define FUNC_NAME s_scm_bytevector_s16_native_set_x
1296 {
1297 INTEGER_NATIVE_SET (16, signed);
1298 }
1299 #undef FUNC_NAME
1300
1301
1302 \f
1303 /* Operations on 32-bit integers. */
1304
1305 /* Unfortunately, on 32-bit machines `SCM' is not large enough to hold
1306 arbitrary 32-bit integers. Thus we fall back to using the
1307 `large_{ref,set}' variants on 32-bit machines. */
1308
1309 #define LARGE_INTEGER_REF(_len, _sign) \
1310 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1311 SCM_VALIDATE_SYMBOL (3, endianness); \
1312 \
1313 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1314 SIGNEDNESS (_sign), endianness));
1315
1316 #define LARGE_INTEGER_SET(_len, _sign) \
1317 int err; \
1318 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1319 SCM_VALIDATE_SYMBOL (4, endianness); \
1320 \
1321 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1322 SIGNEDNESS (_sign), value, endianness); \
1323 if (SCM_UNLIKELY (err)) \
1324 scm_out_of_range (FUNC_NAME, value); \
1325 \
1326 return SCM_UNSPECIFIED;
1327
1328 #define LARGE_INTEGER_NATIVE_REF(_len, _sign) \
1329 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1330 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1331 SIGNEDNESS (_sign), scm_i_native_endianness));
1332
1333 #define LARGE_INTEGER_NATIVE_SET(_len, _sign) \
1334 int err; \
1335 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1336 \
1337 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1338 SIGNEDNESS (_sign), value, \
1339 scm_i_native_endianness); \
1340 if (SCM_UNLIKELY (err)) \
1341 scm_out_of_range (FUNC_NAME, value); \
1342 \
1343 return SCM_UNSPECIFIED;
1344
1345
1346 SCM_DEFINE (scm_bytevector_u32_ref, "bytevector-u32-ref",
1347 3, 0, 0,
1348 (SCM bv, SCM index, SCM endianness),
1349 "Return the unsigned 32-bit integer from @var{bv} at "
1350 "@var{index}.")
1351 #define FUNC_NAME s_scm_bytevector_u32_ref
1352 {
1353 #if SIZEOF_VOID_P > 4
1354 INTEGER_REF (32, unsigned);
1355 #else
1356 LARGE_INTEGER_REF (32, unsigned);
1357 #endif
1358 }
1359 #undef FUNC_NAME
1360
1361 SCM_DEFINE (scm_bytevector_s32_ref, "bytevector-s32-ref",
1362 3, 0, 0,
1363 (SCM bv, SCM index, SCM endianness),
1364 "Return the signed 32-bit integer from @var{bv} at "
1365 "@var{index}.")
1366 #define FUNC_NAME s_scm_bytevector_s32_ref
1367 {
1368 #if SIZEOF_VOID_P > 4
1369 INTEGER_REF (32, signed);
1370 #else
1371 LARGE_INTEGER_REF (32, signed);
1372 #endif
1373 }
1374 #undef FUNC_NAME
1375
1376 SCM_DEFINE (scm_bytevector_u32_native_ref, "bytevector-u32-native-ref",
1377 2, 0, 0,
1378 (SCM bv, SCM index),
1379 "Return the unsigned 32-bit integer from @var{bv} at "
1380 "@var{index} using the native endianness.")
1381 #define FUNC_NAME s_scm_bytevector_u32_native_ref
1382 {
1383 #if SIZEOF_VOID_P > 4
1384 INTEGER_NATIVE_REF (32, unsigned);
1385 #else
1386 LARGE_INTEGER_NATIVE_REF (32, unsigned);
1387 #endif
1388 }
1389 #undef FUNC_NAME
1390
1391 SCM_DEFINE (scm_bytevector_s32_native_ref, "bytevector-s32-native-ref",
1392 2, 0, 0,
1393 (SCM bv, SCM index),
1394 "Return the unsigned 32-bit integer from @var{bv} at "
1395 "@var{index} using the native endianness.")
1396 #define FUNC_NAME s_scm_bytevector_s32_native_ref
1397 {
1398 #if SIZEOF_VOID_P > 4
1399 INTEGER_NATIVE_REF (32, signed);
1400 #else
1401 LARGE_INTEGER_NATIVE_REF (32, signed);
1402 #endif
1403 }
1404 #undef FUNC_NAME
1405
1406 SCM_DEFINE (scm_bytevector_u32_set_x, "bytevector-u32-set!",
1407 4, 0, 0,
1408 (SCM bv, SCM index, SCM value, SCM endianness),
1409 "Store @var{value} in @var{bv} at @var{index} according to "
1410 "@var{endianness}.")
1411 #define FUNC_NAME s_scm_bytevector_u32_set_x
1412 {
1413 #if SIZEOF_VOID_P > 4
1414 INTEGER_SET (32, unsigned);
1415 #else
1416 LARGE_INTEGER_SET (32, unsigned);
1417 #endif
1418 }
1419 #undef FUNC_NAME
1420
1421 SCM_DEFINE (scm_bytevector_s32_set_x, "bytevector-s32-set!",
1422 4, 0, 0,
1423 (SCM bv, SCM index, SCM value, SCM endianness),
1424 "Store @var{value} in @var{bv} at @var{index} according to "
1425 "@var{endianness}.")
1426 #define FUNC_NAME s_scm_bytevector_s32_set_x
1427 {
1428 #if SIZEOF_VOID_P > 4
1429 INTEGER_SET (32, signed);
1430 #else
1431 LARGE_INTEGER_SET (32, signed);
1432 #endif
1433 }
1434 #undef FUNC_NAME
1435
1436 SCM_DEFINE (scm_bytevector_u32_native_set_x, "bytevector-u32-native-set!",
1437 3, 0, 0,
1438 (SCM bv, SCM index, SCM value),
1439 "Store the unsigned integer @var{value} at index @var{index} "
1440 "of @var{bv} using the native endianness.")
1441 #define FUNC_NAME s_scm_bytevector_u32_native_set_x
1442 {
1443 #if SIZEOF_VOID_P > 4
1444 INTEGER_NATIVE_SET (32, unsigned);
1445 #else
1446 LARGE_INTEGER_NATIVE_SET (32, unsigned);
1447 #endif
1448 }
1449 #undef FUNC_NAME
1450
1451 SCM_DEFINE (scm_bytevector_s32_native_set_x, "bytevector-s32-native-set!",
1452 3, 0, 0,
1453 (SCM bv, SCM index, SCM value),
1454 "Store the signed integer @var{value} at index @var{index} "
1455 "of @var{bv} using the native endianness.")
1456 #define FUNC_NAME s_scm_bytevector_s32_native_set_x
1457 {
1458 #if SIZEOF_VOID_P > 4
1459 INTEGER_NATIVE_SET (32, signed);
1460 #else
1461 LARGE_INTEGER_NATIVE_SET (32, signed);
1462 #endif
1463 }
1464 #undef FUNC_NAME
1465
1466
1467 \f
1468 /* Operations on 64-bit integers. */
1469
1470 /* For 64-bit integers, we use only the `large_{ref,set}' variant. */
1471
1472 SCM_DEFINE (scm_bytevector_u64_ref, "bytevector-u64-ref",
1473 3, 0, 0,
1474 (SCM bv, SCM index, SCM endianness),
1475 "Return the unsigned 64-bit integer from @var{bv} at "
1476 "@var{index}.")
1477 #define FUNC_NAME s_scm_bytevector_u64_ref
1478 {
1479 LARGE_INTEGER_REF (64, unsigned);
1480 }
1481 #undef FUNC_NAME
1482
1483 SCM_DEFINE (scm_bytevector_s64_ref, "bytevector-s64-ref",
1484 3, 0, 0,
1485 (SCM bv, SCM index, SCM endianness),
1486 "Return the signed 64-bit integer from @var{bv} at "
1487 "@var{index}.")
1488 #define FUNC_NAME s_scm_bytevector_s64_ref
1489 {
1490 LARGE_INTEGER_REF (64, signed);
1491 }
1492 #undef FUNC_NAME
1493
1494 SCM_DEFINE (scm_bytevector_u64_native_ref, "bytevector-u64-native-ref",
1495 2, 0, 0,
1496 (SCM bv, SCM index),
1497 "Return the unsigned 64-bit integer from @var{bv} at "
1498 "@var{index} using the native endianness.")
1499 #define FUNC_NAME s_scm_bytevector_u64_native_ref
1500 {
1501 LARGE_INTEGER_NATIVE_REF (64, unsigned);
1502 }
1503 #undef FUNC_NAME
1504
1505 SCM_DEFINE (scm_bytevector_s64_native_ref, "bytevector-s64-native-ref",
1506 2, 0, 0,
1507 (SCM bv, SCM index),
1508 "Return the unsigned 64-bit integer from @var{bv} at "
1509 "@var{index} using the native endianness.")
1510 #define FUNC_NAME s_scm_bytevector_s64_native_ref
1511 {
1512 LARGE_INTEGER_NATIVE_REF (64, signed);
1513 }
1514 #undef FUNC_NAME
1515
1516 SCM_DEFINE (scm_bytevector_u64_set_x, "bytevector-u64-set!",
1517 4, 0, 0,
1518 (SCM bv, SCM index, SCM value, SCM endianness),
1519 "Store @var{value} in @var{bv} at @var{index} according to "
1520 "@var{endianness}.")
1521 #define FUNC_NAME s_scm_bytevector_u64_set_x
1522 {
1523 LARGE_INTEGER_SET (64, unsigned);
1524 }
1525 #undef FUNC_NAME
1526
1527 SCM_DEFINE (scm_bytevector_s64_set_x, "bytevector-s64-set!",
1528 4, 0, 0,
1529 (SCM bv, SCM index, SCM value, SCM endianness),
1530 "Store @var{value} in @var{bv} at @var{index} according to "
1531 "@var{endianness}.")
1532 #define FUNC_NAME s_scm_bytevector_s64_set_x
1533 {
1534 LARGE_INTEGER_SET (64, signed);
1535 }
1536 #undef FUNC_NAME
1537
1538 SCM_DEFINE (scm_bytevector_u64_native_set_x, "bytevector-u64-native-set!",
1539 3, 0, 0,
1540 (SCM bv, SCM index, SCM value),
1541 "Store the unsigned integer @var{value} at index @var{index} "
1542 "of @var{bv} using the native endianness.")
1543 #define FUNC_NAME s_scm_bytevector_u64_native_set_x
1544 {
1545 LARGE_INTEGER_NATIVE_SET (64, unsigned);
1546 }
1547 #undef FUNC_NAME
1548
1549 SCM_DEFINE (scm_bytevector_s64_native_set_x, "bytevector-s64-native-set!",
1550 3, 0, 0,
1551 (SCM bv, SCM index, SCM value),
1552 "Store the signed integer @var{value} at index @var{index} "
1553 "of @var{bv} using the native endianness.")
1554 #define FUNC_NAME s_scm_bytevector_s64_native_set_x
1555 {
1556 LARGE_INTEGER_NATIVE_SET (64, signed);
1557 }
1558 #undef FUNC_NAME
1559
1560
1561 \f
1562 /* Operations on IEEE-754 numbers. */
1563
1564 /* There are two possible word endians, visible in glibc's <ieee754.h>.
1565 However, in R6RS, when the endianness is `little', little endian is
1566 assumed for both the byte order and the word order. This is clear from
1567 Section 2.1 of R6RS-lib (in response to
1568 http://www.r6rs.org/formal-comments/comment-187.txt). */
1569
1570 union scm_ieee754_float
1571 {
1572 float f;
1573 scm_t_uint32 i;
1574 };
1575
1576 union scm_ieee754_double
1577 {
1578 double d;
1579 scm_t_uint64 i;
1580 };
1581
1582
1583 /* Convert to/from a floating-point number with different endianness. This
1584 method is probably not the most efficient but it should be portable. */
1585
1586 static inline void
1587 float_to_foreign_endianness (union scm_ieee754_float *target,
1588 float source)
1589 {
1590 union scm_ieee754_float input;
1591
1592 input.f = source;
1593 target->i = bswap_32 (input.i);
1594 }
1595
1596 static inline float
1597 float_from_foreign_endianness (const union scm_ieee754_float *source)
1598 {
1599 union scm_ieee754_float result;
1600
1601 result.i = bswap_32 (source->i);
1602
1603 return (result.f);
1604 }
1605
1606 static inline void
1607 double_to_foreign_endianness (union scm_ieee754_double *target,
1608 double source)
1609 {
1610 union scm_ieee754_double input;
1611
1612 input.d = source;
1613 target->i = bswap_64 (input.i);
1614 }
1615
1616 static inline double
1617 double_from_foreign_endianness (const union scm_ieee754_double *source)
1618 {
1619 union scm_ieee754_double result;
1620
1621 result.i = bswap_64 (source->i);
1622
1623 return (result.d);
1624 }
1625
1626 /* Template macros to abstract over doubles and floats.
1627 XXX: Guile can only convert to/from doubles. */
1628 #define IEEE754_UNION(_c_type) union scm_ieee754_ ## _c_type
1629 #define IEEE754_TO_SCM(_c_type) scm_from_double
1630 #define IEEE754_FROM_SCM(_c_type) scm_to_double
1631 #define IEEE754_FROM_FOREIGN_ENDIANNESS(_c_type) \
1632 _c_type ## _from_foreign_endianness
1633 #define IEEE754_TO_FOREIGN_ENDIANNESS(_c_type) \
1634 _c_type ## _to_foreign_endianness
1635
1636
1637 /* FIXME: SCM_VALIDATE_REAL rejects integers, etc. grrr */
1638 #define VALIDATE_REAL(pos, v) \
1639 do { \
1640 SCM_ASSERT_TYPE (scm_is_real (v), v, pos, FUNC_NAME, "real"); \
1641 } while (0)
1642
1643 /* Templace getters and setters. */
1644
1645 #define IEEE754_ACCESSOR_PROLOGUE(_type) \
1646 INTEGER_ACCESSOR_PROLOGUE (sizeof (_type) << 3UL, signed);
1647
1648 #define IEEE754_REF(_type) \
1649 _type c_result; \
1650 \
1651 IEEE754_ACCESSOR_PROLOGUE (_type); \
1652 SCM_VALIDATE_SYMBOL (3, endianness); \
1653 \
1654 if (scm_is_eq (endianness, scm_i_native_endianness)) \
1655 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1656 else \
1657 { \
1658 IEEE754_UNION (_type) c_raw; \
1659 \
1660 memcpy (&c_raw, &c_bv[c_index], sizeof (c_raw)); \
1661 c_result = \
1662 IEEE754_FROM_FOREIGN_ENDIANNESS (_type) (&c_raw); \
1663 } \
1664 \
1665 return (IEEE754_TO_SCM (_type) (c_result));
1666
1667 #define IEEE754_NATIVE_REF(_type) \
1668 _type c_result; \
1669 \
1670 IEEE754_ACCESSOR_PROLOGUE (_type); \
1671 \
1672 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1673 return (IEEE754_TO_SCM (_type) (c_result));
1674
1675 #define IEEE754_SET(_type) \
1676 _type c_value; \
1677 \
1678 IEEE754_ACCESSOR_PROLOGUE (_type); \
1679 VALIDATE_REAL (3, value); \
1680 SCM_VALIDATE_SYMBOL (4, endianness); \
1681 c_value = IEEE754_FROM_SCM (_type) (value); \
1682 \
1683 if (scm_is_eq (endianness, scm_i_native_endianness)) \
1684 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1685 else \
1686 { \
1687 IEEE754_UNION (_type) c_raw; \
1688 \
1689 IEEE754_TO_FOREIGN_ENDIANNESS (_type) (&c_raw, c_value); \
1690 memcpy (&c_bv[c_index], &c_raw, sizeof (c_raw)); \
1691 } \
1692 \
1693 return SCM_UNSPECIFIED;
1694
1695 #define IEEE754_NATIVE_SET(_type) \
1696 _type c_value; \
1697 \
1698 IEEE754_ACCESSOR_PROLOGUE (_type); \
1699 VALIDATE_REAL (3, value); \
1700 c_value = IEEE754_FROM_SCM (_type) (value); \
1701 \
1702 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1703 return SCM_UNSPECIFIED;
1704
1705
1706 /* Single precision. */
1707
1708 SCM_DEFINE (scm_bytevector_ieee_single_ref,
1709 "bytevector-ieee-single-ref",
1710 3, 0, 0,
1711 (SCM bv, SCM index, SCM endianness),
1712 "Return the IEEE-754 single from @var{bv} at "
1713 "@var{index}.")
1714 #define FUNC_NAME s_scm_bytevector_ieee_single_ref
1715 {
1716 IEEE754_REF (float);
1717 }
1718 #undef FUNC_NAME
1719
1720 SCM_DEFINE (scm_bytevector_ieee_single_native_ref,
1721 "bytevector-ieee-single-native-ref",
1722 2, 0, 0,
1723 (SCM bv, SCM index),
1724 "Return the IEEE-754 single from @var{bv} at "
1725 "@var{index} using the native endianness.")
1726 #define FUNC_NAME s_scm_bytevector_ieee_single_native_ref
1727 {
1728 IEEE754_NATIVE_REF (float);
1729 }
1730 #undef FUNC_NAME
1731
1732 SCM_DEFINE (scm_bytevector_ieee_single_set_x,
1733 "bytevector-ieee-single-set!",
1734 4, 0, 0,
1735 (SCM bv, SCM index, SCM value, SCM endianness),
1736 "Store real @var{value} in @var{bv} at @var{index} according to "
1737 "@var{endianness}.")
1738 #define FUNC_NAME s_scm_bytevector_ieee_single_set_x
1739 {
1740 IEEE754_SET (float);
1741 }
1742 #undef FUNC_NAME
1743
1744 SCM_DEFINE (scm_bytevector_ieee_single_native_set_x,
1745 "bytevector-ieee-single-native-set!",
1746 3, 0, 0,
1747 (SCM bv, SCM index, SCM value),
1748 "Store the real @var{value} at index @var{index} "
1749 "of @var{bv} using the native endianness.")
1750 #define FUNC_NAME s_scm_bytevector_ieee_single_native_set_x
1751 {
1752 IEEE754_NATIVE_SET (float);
1753 }
1754 #undef FUNC_NAME
1755
1756
1757 /* Double precision. */
1758
1759 SCM_DEFINE (scm_bytevector_ieee_double_ref,
1760 "bytevector-ieee-double-ref",
1761 3, 0, 0,
1762 (SCM bv, SCM index, SCM endianness),
1763 "Return the IEEE-754 double from @var{bv} at "
1764 "@var{index}.")
1765 #define FUNC_NAME s_scm_bytevector_ieee_double_ref
1766 {
1767 IEEE754_REF (double);
1768 }
1769 #undef FUNC_NAME
1770
1771 SCM_DEFINE (scm_bytevector_ieee_double_native_ref,
1772 "bytevector-ieee-double-native-ref",
1773 2, 0, 0,
1774 (SCM bv, SCM index),
1775 "Return the IEEE-754 double from @var{bv} at "
1776 "@var{index} using the native endianness.")
1777 #define FUNC_NAME s_scm_bytevector_ieee_double_native_ref
1778 {
1779 IEEE754_NATIVE_REF (double);
1780 }
1781 #undef FUNC_NAME
1782
1783 SCM_DEFINE (scm_bytevector_ieee_double_set_x,
1784 "bytevector-ieee-double-set!",
1785 4, 0, 0,
1786 (SCM bv, SCM index, SCM value, SCM endianness),
1787 "Store real @var{value} in @var{bv} at @var{index} according to "
1788 "@var{endianness}.")
1789 #define FUNC_NAME s_scm_bytevector_ieee_double_set_x
1790 {
1791 IEEE754_SET (double);
1792 }
1793 #undef FUNC_NAME
1794
1795 SCM_DEFINE (scm_bytevector_ieee_double_native_set_x,
1796 "bytevector-ieee-double-native-set!",
1797 3, 0, 0,
1798 (SCM bv, SCM index, SCM value),
1799 "Store the real @var{value} at index @var{index} "
1800 "of @var{bv} using the native endianness.")
1801 #define FUNC_NAME s_scm_bytevector_ieee_double_native_set_x
1802 {
1803 IEEE754_NATIVE_SET (double);
1804 }
1805 #undef FUNC_NAME
1806
1807
1808 #undef IEEE754_UNION
1809 #undef IEEE754_TO_SCM
1810 #undef IEEE754_FROM_SCM
1811 #undef IEEE754_FROM_FOREIGN_ENDIANNESS
1812 #undef IEEE754_TO_FOREIGN_ENDIANNESS
1813 #undef IEEE754_REF
1814 #undef IEEE754_NATIVE_REF
1815 #undef IEEE754_SET
1816 #undef IEEE754_NATIVE_SET
1817
1818 \f
1819 /* Operations on strings. */
1820
1821
1822 /* Produce a function that returns the length of a UTF-encoded string. */
1823 #define UTF_STRLEN_FUNCTION(_utf_width) \
1824 static inline size_t \
1825 utf ## _utf_width ## _strlen (const uint ## _utf_width ## _t *str) \
1826 { \
1827 size_t len = 0; \
1828 const uint ## _utf_width ## _t *ptr; \
1829 for (ptr = str; \
1830 *ptr != 0; \
1831 ptr++) \
1832 { \
1833 len++; \
1834 } \
1835 \
1836 return (len * ((_utf_width) / 8)); \
1837 }
1838
1839 UTF_STRLEN_FUNCTION (8)
1840
1841
1842 /* Return the length (in bytes) of STR, a UTF-(UTF_WIDTH) encoded string. */
1843 #define UTF_STRLEN(_utf_width, _str) \
1844 utf ## _utf_width ## _strlen (_str)
1845
1846 /* Return the "portable" name of the UTF encoding of size UTF_WIDTH and
1847 ENDIANNESS (Gnulib's `iconv_open' module guarantees the portability of the
1848 encoding name). */
1849 static inline void
1850 utf_encoding_name (char *name, size_t utf_width, SCM endianness)
1851 {
1852 strcpy (name, "UTF-");
1853 strcat (name, ((utf_width == 8)
1854 ? "8"
1855 : ((utf_width == 16)
1856 ? "16"
1857 : ((utf_width == 32)
1858 ? "32"
1859 : "??"))));
1860 strcat (name,
1861 ((scm_is_eq (endianness, scm_sym_big))
1862 ? "BE"
1863 : ((scm_is_eq (endianness, scm_sym_little))
1864 ? "LE"
1865 : "unknown")));
1866 }
1867
1868 /* Maximum length of a UTF encoding name. */
1869 #define MAX_UTF_ENCODING_NAME_LEN 16
1870
1871 /* Produce the body of a `string->utf' function. */
1872 #define STRING_TO_UTF(_utf_width) \
1873 SCM utf; \
1874 int err; \
1875 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1876 char *c_utf = NULL; \
1877 size_t c_strlen, c_utf_len = 0; \
1878 \
1879 SCM_VALIDATE_STRING (1, str); \
1880 if (scm_is_eq (endianness, SCM_UNDEFINED)) \
1881 endianness = scm_sym_big; \
1882 else \
1883 SCM_VALIDATE_SYMBOL (2, endianness); \
1884 \
1885 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1886 \
1887 c_strlen = scm_i_string_length (str); \
1888 if (scm_i_is_narrow_string (str)) \
1889 { \
1890 err = mem_iconveh (scm_i_string_chars (str), c_strlen, \
1891 "ISO-8859-1", c_utf_name, \
1892 iconveh_question_mark, NULL, \
1893 &c_utf, &c_utf_len); \
1894 if (SCM_UNLIKELY (err)) \
1895 scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \
1896 scm_list_1 (str), err); \
1897 } \
1898 else \
1899 { \
1900 const scm_t_wchar *wbuf = scm_i_string_wide_chars (str); \
1901 c_utf = u32_conv_to_encoding (c_utf_name, \
1902 iconveh_question_mark, \
1903 (scm_t_uint32 *) wbuf, \
1904 c_strlen, NULL, NULL, &c_utf_len); \
1905 if (SCM_UNLIKELY (c_utf == NULL)) \
1906 scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \
1907 scm_list_1 (str), errno); \
1908 } \
1909 scm_dynwind_begin (0); \
1910 scm_dynwind_free (c_utf); \
1911 utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8); \
1912 memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len); \
1913 scm_dynwind_end (); \
1914 \
1915 return (utf);
1916
1917
1918
1919 SCM_DEFINE (scm_string_to_utf8, "string->utf8",
1920 1, 0, 0,
1921 (SCM str),
1922 "Return a newly allocated bytevector that contains the UTF-8 "
1923 "encoding of @var{str}.")
1924 #define FUNC_NAME s_scm_string_to_utf8
1925 {
1926 SCM utf;
1927 scm_t_uint8 *c_utf;
1928 size_t c_utf_len = 0;
1929
1930 SCM_VALIDATE_STRING (1, str);
1931
1932 c_utf = (scm_t_uint8 *) scm_to_utf8_stringn (str, &c_utf_len);
1933 utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
1934 memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
1935 free (c_utf);
1936
1937 return (utf);
1938 }
1939 #undef FUNC_NAME
1940
1941 SCM_DEFINE (scm_string_to_utf16, "string->utf16",
1942 1, 1, 0,
1943 (SCM str, SCM endianness),
1944 "Return a newly allocated bytevector that contains the UTF-16 "
1945 "encoding of @var{str}.")
1946 #define FUNC_NAME s_scm_string_to_utf16
1947 {
1948 STRING_TO_UTF (16);
1949 }
1950 #undef FUNC_NAME
1951
1952 static void
1953 swap_u32 (scm_t_wchar *vals, size_t len)
1954 {
1955 size_t n;
1956 for (n = 0; n < len; n++)
1957 vals[n] = bswap_32 (vals[n]);
1958 }
1959
1960 SCM_DEFINE (scm_string_to_utf32, "string->utf32",
1961 1, 1, 0,
1962 (SCM str, SCM endianness),
1963 "Return a newly allocated bytevector that contains the UTF-32 "
1964 "encoding of @var{str}.")
1965 #define FUNC_NAME s_scm_string_to_utf32
1966 {
1967 SCM bv;
1968 scm_t_wchar *wchars;
1969 size_t wchar_len, bytes_len;
1970
1971 wchars = scm_to_utf32_stringn (str, &wchar_len);
1972 bytes_len = wchar_len * sizeof (scm_t_wchar);
1973 if (!scm_is_eq (SCM_UNBNDP (endianness) ? scm_endianness_big : endianness,
1974 scm_i_native_endianness))
1975 swap_u32 (wchars, wchar_len);
1976
1977 bv = make_bytevector (bytes_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
1978 memcpy (SCM_BYTEVECTOR_CONTENTS (bv), wchars, bytes_len);
1979 free (wchars);
1980
1981 return bv;
1982 }
1983 #undef FUNC_NAME
1984
1985
1986 /* Produce the body of a function that converts a UTF-encoded bytevector to a
1987 string. */
1988 #define UTF_TO_STRING(_utf_width) \
1989 SCM str = SCM_BOOL_F; \
1990 int err; \
1991 char *c_str = NULL; \
1992 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1993 char *c_utf; \
1994 size_t c_strlen = 0, c_utf_len = 0; \
1995 \
1996 SCM_VALIDATE_BYTEVECTOR (1, utf); \
1997 if (scm_is_eq (endianness, SCM_UNDEFINED)) \
1998 endianness = scm_sym_big; \
1999 else \
2000 SCM_VALIDATE_SYMBOL (2, endianness); \
2001 \
2002 c_utf_len = SCM_BYTEVECTOR_LENGTH (utf); \
2003 c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf); \
2004 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
2005 \
2006 err = mem_iconveh (c_utf, c_utf_len, \
2007 c_utf_name, "UTF-8", \
2008 iconveh_question_mark, NULL, \
2009 &c_str, &c_strlen); \
2010 if (SCM_UNLIKELY (err)) \
2011 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A", \
2012 scm_list_1 (utf), err); \
2013 else \
2014 { \
2015 str = scm_from_stringn (c_str, c_strlen, "UTF-8", \
2016 SCM_FAILED_CONVERSION_ERROR); \
2017 free (c_str); \
2018 } \
2019 return (str);
2020
2021
2022 SCM_DEFINE (scm_utf8_to_string, "utf8->string",
2023 1, 0, 0,
2024 (SCM utf),
2025 "Return a newly allocate string that contains from the UTF-8-"
2026 "encoded contents of bytevector @var{utf}.")
2027 #define FUNC_NAME s_scm_utf8_to_string
2028 {
2029 SCM str;
2030 const char *c_utf;
2031 size_t c_utf_len = 0;
2032
2033 SCM_VALIDATE_BYTEVECTOR (1, utf);
2034
2035 c_utf_len = SCM_BYTEVECTOR_LENGTH (utf);
2036 c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);
2037 str = scm_from_stringn (c_utf, c_utf_len, "UTF-8",
2038 SCM_FAILED_CONVERSION_ERROR);
2039
2040 return (str);
2041 }
2042 #undef FUNC_NAME
2043
2044 SCM_DEFINE (scm_utf16_to_string, "utf16->string",
2045 1, 1, 0,
2046 (SCM utf, SCM endianness),
2047 "Return a newly allocate string that contains from the UTF-16-"
2048 "encoded contents of bytevector @var{utf}.")
2049 #define FUNC_NAME s_scm_utf16_to_string
2050 {
2051 UTF_TO_STRING (16);
2052 }
2053 #undef FUNC_NAME
2054
2055 SCM_DEFINE (scm_utf32_to_string, "utf32->string",
2056 1, 1, 0,
2057 (SCM utf, SCM endianness),
2058 "Return a newly allocate string that contains from the UTF-32-"
2059 "encoded contents of bytevector @var{utf}.")
2060 #define FUNC_NAME s_scm_utf32_to_string
2061 {
2062 UTF_TO_STRING (32);
2063 }
2064 #undef FUNC_NAME
2065
2066 \f
2067 /* Bytevectors as generalized vectors & arrays. */
2068
2069 #define COMPLEX_ACCESSOR_PROLOGUE(_type) \
2070 size_t c_len, c_index; \
2071 char *c_bv; \
2072 \
2073 SCM_VALIDATE_BYTEVECTOR (1, bv); \
2074 c_index = scm_to_size_t (index); \
2075 \
2076 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
2077 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
2078 \
2079 if (SCM_UNLIKELY (c_index + 2 * sizeof (_type) - 1 >= c_len)) \
2080 scm_out_of_range (FUNC_NAME, index);
2081
2082 /* Template for native access to complex numbers of type TYPE. */
2083 #define COMPLEX_NATIVE_REF(_type) \
2084 SCM result; \
2085 \
2086 COMPLEX_ACCESSOR_PROLOGUE (_type); \
2087 \
2088 { \
2089 _type real, imag; \
2090 \
2091 memcpy (&real, &c_bv[c_index], sizeof (_type)); \
2092 memcpy (&imag, &c_bv[c_index + sizeof (_type)], sizeof (_type)); \
2093 \
2094 result = scm_c_make_rectangular (real, imag); \
2095 } \
2096 \
2097 return result;
2098
2099 static SCM
2100 bytevector_ref_c32 (SCM bv, SCM index)
2101 #define FUNC_NAME "bytevector_ref_c32"
2102 {
2103 COMPLEX_NATIVE_REF (float);
2104 }
2105 #undef FUNC_NAME
2106
2107 static SCM
2108 bytevector_ref_c64 (SCM bv, SCM index)
2109 #define FUNC_NAME "bytevector_ref_c64"
2110 {
2111 COMPLEX_NATIVE_REF (double);
2112 }
2113 #undef FUNC_NAME
2114
2115 typedef SCM (*scm_t_bytevector_ref_fn)(SCM, SCM);
2116
2117 static const scm_t_bytevector_ref_fn
2118 bytevector_ref_fns[SCM_ARRAY_ELEMENT_TYPE_LAST + 1] =
2119 {
2120 NULL, /* SCM */
2121 NULL, /* CHAR */
2122 NULL, /* BIT */
2123 scm_bytevector_u8_ref, /* VU8 */
2124 scm_bytevector_u8_ref, /* U8 */
2125 scm_bytevector_s8_ref,
2126 scm_bytevector_u16_native_ref,
2127 scm_bytevector_s16_native_ref,
2128 scm_bytevector_u32_native_ref,
2129 scm_bytevector_s32_native_ref,
2130 scm_bytevector_u64_native_ref,
2131 scm_bytevector_s64_native_ref,
2132 scm_bytevector_ieee_single_native_ref,
2133 scm_bytevector_ieee_double_native_ref,
2134 bytevector_ref_c32,
2135 bytevector_ref_c64
2136 };
2137
2138 static SCM
2139 bv_handle_ref (scm_t_array_handle *h, size_t index)
2140 {
2141 SCM byte_index;
2142 scm_t_bytevector_ref_fn ref_fn;
2143
2144 ref_fn = bytevector_ref_fns[h->element_type];
2145 byte_index =
2146 scm_from_size_t (index * scm_array_handle_uniform_element_size (h));
2147 return ref_fn (h->array, byte_index);
2148 }
2149
2150 /* Template for native modification of complex numbers of type TYPE. */
2151 #define COMPLEX_NATIVE_SET(_type) \
2152 COMPLEX_ACCESSOR_PROLOGUE (_type); \
2153 \
2154 { \
2155 _type real, imag; \
2156 real = scm_c_real_part (value); \
2157 imag = scm_c_imag_part (value); \
2158 \
2159 memcpy (&c_bv[c_index], &real, sizeof (_type)); \
2160 memcpy (&c_bv[c_index + sizeof (_type)], &imag, sizeof (_type)); \
2161 } \
2162 \
2163 return SCM_UNSPECIFIED;
2164
2165 static SCM
2166 bytevector_set_c32 (SCM bv, SCM index, SCM value)
2167 #define FUNC_NAME "bytevector_set_c32"
2168 {
2169 COMPLEX_NATIVE_SET (float);
2170 }
2171 #undef FUNC_NAME
2172
2173 static SCM
2174 bytevector_set_c64 (SCM bv, SCM index, SCM value)
2175 #define FUNC_NAME "bytevector_set_c64"
2176 {
2177 COMPLEX_NATIVE_SET (double);
2178 }
2179 #undef FUNC_NAME
2180
2181 typedef SCM (*scm_t_bytevector_set_fn)(SCM, SCM, SCM);
2182
2183 const scm_t_bytevector_set_fn bytevector_set_fns[SCM_ARRAY_ELEMENT_TYPE_LAST + 1] =
2184 {
2185 NULL, /* SCM */
2186 NULL, /* CHAR */
2187 NULL, /* BIT */
2188 scm_bytevector_u8_set_x, /* VU8 */
2189 scm_bytevector_u8_set_x, /* U8 */
2190 scm_bytevector_s8_set_x,
2191 scm_bytevector_u16_native_set_x,
2192 scm_bytevector_s16_native_set_x,
2193 scm_bytevector_u32_native_set_x,
2194 scm_bytevector_s32_native_set_x,
2195 scm_bytevector_u64_native_set_x,
2196 scm_bytevector_s64_native_set_x,
2197 scm_bytevector_ieee_single_native_set_x,
2198 scm_bytevector_ieee_double_native_set_x,
2199 bytevector_set_c32,
2200 bytevector_set_c64
2201 };
2202
2203 static void
2204 bv_handle_set_x (scm_t_array_handle *h, size_t index, SCM val)
2205 {
2206 SCM byte_index;
2207 scm_t_bytevector_set_fn set_fn;
2208
2209 set_fn = bytevector_set_fns[h->element_type];
2210 byte_index =
2211 scm_from_size_t (index * scm_array_handle_uniform_element_size (h));
2212 set_fn (h->array, byte_index, val);
2213 }
2214
2215 static void
2216 bytevector_get_handle (SCM v, scm_t_array_handle *h)
2217 {
2218 h->array = v;
2219 h->ndims = 1;
2220 h->dims = &h->dim0;
2221 h->dim0.lbnd = 0;
2222 h->dim0.ubnd = SCM_BYTEVECTOR_TYPED_LENGTH (v) - 1;
2223 h->dim0.inc = 1;
2224 h->element_type = SCM_BYTEVECTOR_ELEMENT_TYPE (v);
2225 h->elements = h->writable_elements = SCM_BYTEVECTOR_CONTENTS (v);
2226 }
2227
2228 \f
2229 /* Initialization. */
2230
2231 void
2232 scm_bootstrap_bytevectors (void)
2233 {
2234 /* This must be instantiated here because the generalized-vector API may
2235 want to access bytevectors even though `(rnrs bytevectors)' hasn't been
2236 loaded. */
2237 scm_null_bytevector = make_bytevector (0, SCM_ARRAY_ELEMENT_TYPE_VU8);
2238
2239 #ifdef WORDS_BIGENDIAN
2240 scm_i_native_endianness = scm_from_latin1_symbol ("big");
2241 #else
2242 scm_i_native_endianness = scm_from_latin1_symbol ("little");
2243 #endif
2244
2245 scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION,
2246 "scm_init_bytevectors",
2247 (scm_t_extension_init_func) scm_init_bytevectors,
2248 NULL);
2249
2250 {
2251 scm_t_array_implementation impl;
2252
2253 impl.tag = scm_tc7_bytevector;
2254 impl.mask = 0x7f;
2255 impl.vref = bv_handle_ref;
2256 impl.vset = bv_handle_set_x;
2257 impl.get_handle = bytevector_get_handle;
2258 scm_i_register_array_implementation (&impl);
2259 scm_i_register_vector_constructor
2260 (scm_i_array_element_types[SCM_ARRAY_ELEMENT_TYPE_VU8],
2261 scm_make_bytevector);
2262 }
2263 }
2264
2265 void
2266 scm_init_bytevectors (void)
2267 {
2268 #include "libguile/bytevectors.x"
2269
2270 scm_endianness_big = scm_sym_big;
2271 scm_endianness_little = scm_sym_little;
2272 }