dc326f526bb4c30323e214902da05255a9b359db
[bpt/guile.git] / libguile / bytevectors.c
1 /* Copyright (C) 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <alloca.h>
25 #include <assert.h>
26
27 #include <gmp.h>
28
29 #include "libguile/_scm.h"
30 #include "libguile/extensions.h"
31 #include "libguile/bytevectors.h"
32 #include "libguile/strings.h"
33 #include "libguile/validate.h"
34 #include "libguile/ieee-754.h"
35 #include "libguile/arrays.h"
36 #include "libguile/array-handle.h"
37 #include "libguile/uniform.h"
38 #include "libguile/srfi-4.h"
39
40 #include <byteswap.h>
41 #include <striconveh.h>
42 #include <uniconv.h>
43 #include <unistr.h>
44
45 #ifdef HAVE_LIMITS_H
46 # include <limits.h>
47 #else
48 /* Assuming 32-bit longs. */
49 # define ULONG_MAX 4294967295UL
50 #endif
51
52 #include <string.h>
53
54
55 \f
56 /* Utilities. */
57
58 /* Convenience macros. These are used by the various templates (macros) that
59 are parameterized by integer signedness. */
60 #define INT8_T_signed scm_t_int8
61 #define INT8_T_unsigned scm_t_uint8
62 #define INT16_T_signed scm_t_int16
63 #define INT16_T_unsigned scm_t_uint16
64 #define INT32_T_signed scm_t_int32
65 #define INT32_T_unsigned scm_t_uint32
66 #define is_signed_int8(_x) (((_x) >= -128L) && ((_x) <= 127L))
67 #define is_unsigned_int8(_x) ((_x) <= 255UL)
68 #define is_signed_int16(_x) (((_x) >= -32768L) && ((_x) <= 32767L))
69 #define is_unsigned_int16(_x) ((_x) <= 65535UL)
70 #define is_signed_int32(_x) (((_x) >= -2147483648L) && ((_x) <= 2147483647L))
71 #define is_unsigned_int32(_x) ((_x) <= 4294967295UL)
72 #define SIGNEDNESS_signed 1
73 #define SIGNEDNESS_unsigned 0
74
75 #define INT_TYPE(_size, _sign) INT ## _size ## _T_ ## _sign
76 #define INT_SWAP(_size) bswap_ ## _size
77 #define INT_VALID_P(_size, _sign) is_ ## _sign ## _int ## _size
78 #define SIGNEDNESS(_sign) SIGNEDNESS_ ## _sign
79
80
81 #define INTEGER_ACCESSOR_PROLOGUE(_len, _sign) \
82 size_t c_len, c_index; \
83 _sign char *c_bv; \
84 \
85 SCM_VALIDATE_BYTEVECTOR (1, bv); \
86 c_index = scm_to_uint (index); \
87 \
88 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
89 c_bv = (_sign char *) SCM_BYTEVECTOR_CONTENTS (bv); \
90 \
91 if (SCM_UNLIKELY (c_index + ((_len) >> 3UL) - 1 >= c_len)) \
92 scm_out_of_range (FUNC_NAME, index);
93
94 /* Template for fixed-size integer access (only 8, 16 or 32-bit). */
95 #define INTEGER_REF(_len, _sign) \
96 SCM result; \
97 \
98 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
99 SCM_VALIDATE_SYMBOL (3, endianness); \
100 \
101 { \
102 INT_TYPE (_len, _sign) c_result; \
103 \
104 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
105 if (!scm_is_eq (endianness, scm_i_native_endianness)) \
106 c_result = INT_SWAP (_len) (c_result); \
107 \
108 result = SCM_I_MAKINUM (c_result); \
109 } \
110 \
111 return result;
112
113 /* Template for fixed-size integer access using the native endianness. */
114 #define INTEGER_NATIVE_REF(_len, _sign) \
115 SCM result; \
116 \
117 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
118 \
119 { \
120 INT_TYPE (_len, _sign) c_result; \
121 \
122 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
123 result = SCM_I_MAKINUM (c_result); \
124 } \
125 \
126 return result;
127
128 /* Template for fixed-size integer modification (only 8, 16 or 32-bit). */
129 #define INTEGER_SET(_len, _sign) \
130 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
131 SCM_VALIDATE_SYMBOL (3, endianness); \
132 \
133 { \
134 scm_t_signed_bits c_value; \
135 INT_TYPE (_len, _sign) c_value_short; \
136 \
137 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
138 scm_wrong_type_arg (FUNC_NAME, 3, value); \
139 \
140 c_value = SCM_I_INUM (value); \
141 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
142 scm_out_of_range (FUNC_NAME, value); \
143 \
144 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
145 if (!scm_is_eq (endianness, scm_i_native_endianness)) \
146 c_value_short = INT_SWAP (_len) (c_value_short); \
147 \
148 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
149 } \
150 \
151 return SCM_UNSPECIFIED;
152
153 /* Template for fixed-size integer modification using the native
154 endianness. */
155 #define INTEGER_NATIVE_SET(_len, _sign) \
156 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
157 \
158 { \
159 scm_t_signed_bits c_value; \
160 INT_TYPE (_len, _sign) c_value_short; \
161 \
162 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
163 scm_wrong_type_arg (FUNC_NAME, 3, value); \
164 \
165 c_value = SCM_I_INUM (value); \
166 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
167 scm_out_of_range (FUNC_NAME, value); \
168 \
169 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
170 \
171 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
172 } \
173 \
174 return SCM_UNSPECIFIED;
175
176
177 \f
178 /* Bytevector type. */
179
180 #define SCM_BYTEVECTOR_HEADER_BYTES \
181 (SCM_BYTEVECTOR_HEADER_SIZE * sizeof (scm_t_bits))
182
183 #define SCM_BYTEVECTOR_SET_LENGTH(_bv, _len) \
184 SCM_SET_CELL_WORD_1 ((_bv), (scm_t_bits) (_len))
185 #define SCM_BYTEVECTOR_SET_CONTENTS(_bv, _contents) \
186 SCM_SET_CELL_WORD_2 ((_bv), (scm_t_bits) (_contents))
187 #define SCM_BYTEVECTOR_SET_CONTIGUOUS_P(bv, contiguous_p) \
188 SCM_SET_BYTEVECTOR_FLAGS ((bv), \
189 SCM_BYTEVECTOR_ELEMENT_TYPE (bv) \
190 | ((contiguous_p) << 8UL))
191
192 #define SCM_BYTEVECTOR_SET_ELEMENT_TYPE(bv, hint) \
193 SCM_SET_BYTEVECTOR_FLAGS ((bv), \
194 (hint) \
195 | (SCM_BYTEVECTOR_CONTIGUOUS_P (bv) << 8UL))
196 #define SCM_BYTEVECTOR_TYPE_SIZE(var) \
197 (scm_i_array_element_type_sizes[SCM_BYTEVECTOR_ELEMENT_TYPE (var)]/8)
198 #define SCM_BYTEVECTOR_TYPED_LENGTH(var) \
199 (SCM_BYTEVECTOR_LENGTH (var) / SCM_BYTEVECTOR_TYPE_SIZE (var))
200
201 /* The empty bytevector. */
202 SCM scm_null_bytevector = SCM_UNSPECIFIED;
203
204
205 static inline SCM
206 make_bytevector (size_t len, scm_t_array_element_type element_type)
207 {
208 SCM ret;
209 size_t c_len;
210
211 if (SCM_UNLIKELY (element_type > SCM_ARRAY_ELEMENT_TYPE_LAST
212 || scm_i_array_element_type_sizes[element_type] < 8
213 || len >= (SCM_I_SIZE_MAX
214 / (scm_i_array_element_type_sizes[element_type]/8))))
215 /* This would be an internal Guile programming error */
216 abort ();
217
218 if (SCM_UNLIKELY (len == 0 && element_type == SCM_ARRAY_ELEMENT_TYPE_VU8
219 && SCM_BYTEVECTOR_P (scm_null_bytevector)))
220 ret = scm_null_bytevector;
221 else
222 {
223 signed char *contents;
224
225 c_len = len * (scm_i_array_element_type_sizes[element_type] / 8);
226
227 contents = scm_gc_malloc_pointerless (SCM_BYTEVECTOR_HEADER_BYTES + c_len,
228 SCM_GC_BYTEVECTOR);
229 ret = PTR2SCM (contents);
230 contents += SCM_BYTEVECTOR_HEADER_BYTES;
231
232 SCM_BYTEVECTOR_SET_LENGTH (ret, c_len);
233 SCM_BYTEVECTOR_SET_CONTENTS (ret, contents);
234 SCM_BYTEVECTOR_SET_CONTIGUOUS_P (ret, 1);
235 SCM_BYTEVECTOR_SET_ELEMENT_TYPE (ret, element_type);
236 }
237
238 return ret;
239 }
240
241 /* Return a bytevector of LEN elements of type ELEMENT_TYPE, with element
242 values taken from CONTENTS. Assume that the storage for CONTENTS will be
243 automatically reclaimed when it becomes unreachable. */
244 static inline SCM
245 make_bytevector_from_buffer (size_t len, void *contents,
246 scm_t_array_element_type element_type)
247 {
248 SCM ret;
249
250 if (SCM_UNLIKELY (len == 0))
251 ret = make_bytevector (len, element_type);
252 else
253 {
254 size_t c_len;
255
256 ret = PTR2SCM (scm_gc_malloc (SCM_BYTEVECTOR_HEADER_BYTES,
257 SCM_GC_BYTEVECTOR));
258
259 c_len = len * (scm_i_array_element_type_sizes[element_type] / 8);
260
261 SCM_BYTEVECTOR_SET_LENGTH (ret, c_len);
262 SCM_BYTEVECTOR_SET_CONTENTS (ret, contents);
263 SCM_BYTEVECTOR_SET_CONTIGUOUS_P (ret, 0);
264 SCM_BYTEVECTOR_SET_ELEMENT_TYPE (ret, element_type);
265 }
266
267 return ret;
268 }
269
270
271 /* Return a new bytevector of size LEN octets. */
272 SCM
273 scm_c_make_bytevector (size_t len)
274 {
275 return make_bytevector (len, SCM_ARRAY_ELEMENT_TYPE_VU8);
276 }
277
278 /* Return a new bytevector of size LEN elements. */
279 SCM
280 scm_i_make_typed_bytevector (size_t len, scm_t_array_element_type element_type)
281 {
282 return make_bytevector (len, element_type);
283 }
284
285 /* Return a bytevector of size LEN made up of CONTENTS. The area pointed to
286 by CONTENTS must have been allocated using `scm_gc_malloc ()'. */
287 SCM
288 scm_c_take_gc_bytevector (signed char *contents, size_t len)
289 {
290 return make_bytevector_from_buffer (len, contents, SCM_ARRAY_ELEMENT_TYPE_VU8);
291 }
292
293 SCM
294 scm_c_take_typed_bytevector (signed char *contents, size_t len,
295 scm_t_array_element_type element_type)
296 {
297 return make_bytevector_from_buffer (len, contents, element_type);
298 }
299
300 /* Shrink BV to C_NEW_LEN (which is assumed to be smaller than its current
301 size) and return the new bytevector (possibly different from BV). */
302 SCM
303 scm_c_shrink_bytevector (SCM bv, size_t c_new_len)
304 {
305 SCM new_bv;
306 size_t c_len;
307
308 if (SCM_UNLIKELY (c_new_len % SCM_BYTEVECTOR_TYPE_SIZE (bv)))
309 /* This would be an internal Guile programming error */
310 abort ();
311
312 c_len = SCM_BYTEVECTOR_LENGTH (bv);
313 if (SCM_UNLIKELY (c_new_len > c_len))
314 abort ();
315
316 SCM_BYTEVECTOR_SET_LENGTH (bv, c_new_len);
317
318 if (SCM_BYTEVECTOR_CONTIGUOUS_P (bv))
319 new_bv = PTR2SCM (scm_gc_realloc (SCM2PTR (bv),
320 c_len + SCM_BYTEVECTOR_HEADER_BYTES,
321 c_new_len + SCM_BYTEVECTOR_HEADER_BYTES,
322 SCM_GC_BYTEVECTOR));
323 else
324 {
325 signed char *c_bv;
326
327 c_bv = scm_gc_realloc (SCM_BYTEVECTOR_CONTENTS (bv),
328 c_len, c_new_len, SCM_GC_BYTEVECTOR);
329 SCM_BYTEVECTOR_SET_CONTENTS (bv, c_bv);
330
331 new_bv = bv;
332 }
333
334 return new_bv;
335 }
336
337 int
338 scm_is_bytevector (SCM obj)
339 {
340 return SCM_BYTEVECTOR_P (obj);
341 }
342
343 size_t
344 scm_c_bytevector_length (SCM bv)
345 #define FUNC_NAME "scm_c_bytevector_length"
346 {
347 SCM_VALIDATE_BYTEVECTOR (1, bv);
348
349 return SCM_BYTEVECTOR_LENGTH (bv);
350 }
351 #undef FUNC_NAME
352
353 scm_t_uint8
354 scm_c_bytevector_ref (SCM bv, size_t index)
355 #define FUNC_NAME "scm_c_bytevector_ref"
356 {
357 size_t c_len;
358 const scm_t_uint8 *c_bv;
359
360 SCM_VALIDATE_BYTEVECTOR (1, bv);
361
362 c_len = SCM_BYTEVECTOR_LENGTH (bv);
363 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
364
365 if (SCM_UNLIKELY (index >= c_len))
366 scm_out_of_range (FUNC_NAME, scm_from_size_t (index));
367
368 return c_bv[index];
369 }
370 #undef FUNC_NAME
371
372 void
373 scm_c_bytevector_set_x (SCM bv, size_t index, scm_t_uint8 value)
374 #define FUNC_NAME "scm_c_bytevector_set_x"
375 {
376 size_t c_len;
377 scm_t_uint8 *c_bv;
378
379 SCM_VALIDATE_BYTEVECTOR (1, bv);
380
381 c_len = SCM_BYTEVECTOR_LENGTH (bv);
382 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
383
384 if (SCM_UNLIKELY (index >= c_len))
385 scm_out_of_range (FUNC_NAME, scm_from_size_t (index));
386
387 c_bv[index] = value;
388 }
389 #undef FUNC_NAME
390
391
392 \f
393 int
394 scm_i_print_bytevector (SCM bv, SCM port, scm_print_state *pstate SCM_UNUSED)
395 {
396 ssize_t ubnd, inc, i;
397 scm_t_array_handle h;
398
399 scm_array_get_handle (bv, &h);
400
401 scm_putc ('#', port);
402 scm_write (scm_array_handle_element_type (&h), port);
403 scm_putc ('(', port);
404 for (i = h.dims[0].lbnd, ubnd = h.dims[0].ubnd, inc = h.dims[0].inc;
405 i <= ubnd; i += inc)
406 {
407 if (i > 0)
408 scm_putc (' ', port);
409 scm_write (scm_array_handle_ref (&h, i), port);
410 }
411 scm_putc (')', port);
412
413 return 1;
414 }
415
416 \f
417 /* General operations. */
418
419 SCM_SYMBOL (scm_sym_big, "big");
420 SCM_SYMBOL (scm_sym_little, "little");
421
422 SCM scm_endianness_big, scm_endianness_little;
423
424 /* Host endianness (a symbol). */
425 SCM scm_i_native_endianness = SCM_UNSPECIFIED;
426
427 /* Byte-swapping. */
428 #ifndef bswap_24
429 # define bswap_24(_x) \
430 ((((_x) & 0xff0000) >> 16) | \
431 (((_x) & 0x00ff00)) | \
432 (((_x) & 0x0000ff) << 16))
433 #endif
434
435
436 SCM_DEFINE (scm_native_endianness, "native-endianness", 0, 0, 0,
437 (void),
438 "Return a symbol denoting the machine's native endianness.")
439 #define FUNC_NAME s_scm_native_endianness
440 {
441 return scm_i_native_endianness;
442 }
443 #undef FUNC_NAME
444
445 SCM_DEFINE (scm_bytevector_p, "bytevector?", 1, 0, 0,
446 (SCM obj),
447 "Return true if @var{obj} is a bytevector.")
448 #define FUNC_NAME s_scm_bytevector_p
449 {
450 return scm_from_bool (scm_is_bytevector (obj));
451 }
452 #undef FUNC_NAME
453
454 SCM_DEFINE (scm_make_bytevector, "make-bytevector", 1, 1, 0,
455 (SCM len, SCM fill),
456 "Return a newly allocated bytevector of @var{len} bytes, "
457 "optionally filled with @var{fill}.")
458 #define FUNC_NAME s_scm_make_bytevector
459 {
460 SCM bv;
461 unsigned c_len;
462 signed char c_fill = '\0';
463
464 SCM_VALIDATE_UINT_COPY (1, len, c_len);
465 if (!scm_is_eq (fill, SCM_UNDEFINED))
466 {
467 int value;
468
469 value = scm_to_int (fill);
470 if (SCM_UNLIKELY ((value < -128) || (value > 255)))
471 scm_out_of_range (FUNC_NAME, fill);
472 c_fill = (signed char) value;
473 }
474
475 bv = make_bytevector (c_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
476 if (!scm_is_eq (fill, SCM_UNDEFINED))
477 {
478 unsigned i;
479 signed char *contents;
480
481 contents = SCM_BYTEVECTOR_CONTENTS (bv);
482 for (i = 0; i < c_len; i++)
483 contents[i] = c_fill;
484 }
485 else
486 memset (SCM_BYTEVECTOR_CONTENTS (bv), 0, c_len);
487
488 return bv;
489 }
490 #undef FUNC_NAME
491
492 SCM_DEFINE (scm_bytevector_length, "bytevector-length", 1, 0, 0,
493 (SCM bv),
494 "Return the length (in bytes) of @var{bv}.")
495 #define FUNC_NAME s_scm_bytevector_length
496 {
497 return scm_from_uint (scm_c_bytevector_length (bv));
498 }
499 #undef FUNC_NAME
500
501 SCM_DEFINE (scm_bytevector_eq_p, "bytevector=?", 2, 0, 0,
502 (SCM bv1, SCM bv2),
503 "Return is @var{bv1} equals to @var{bv2}---i.e., if they "
504 "have the same length and contents.")
505 #define FUNC_NAME s_scm_bytevector_eq_p
506 {
507 SCM result = SCM_BOOL_F;
508 unsigned c_len1, c_len2;
509
510 SCM_VALIDATE_BYTEVECTOR (1, bv1);
511 SCM_VALIDATE_BYTEVECTOR (2, bv2);
512
513 c_len1 = SCM_BYTEVECTOR_LENGTH (bv1);
514 c_len2 = SCM_BYTEVECTOR_LENGTH (bv2);
515
516 if (c_len1 == c_len2 && (SCM_BYTEVECTOR_ELEMENT_TYPE (bv1)
517 == SCM_BYTEVECTOR_ELEMENT_TYPE (bv2)))
518 {
519 signed char *c_bv1, *c_bv2;
520
521 c_bv1 = SCM_BYTEVECTOR_CONTENTS (bv1);
522 c_bv2 = SCM_BYTEVECTOR_CONTENTS (bv2);
523
524 result = scm_from_bool (!memcmp (c_bv1, c_bv2, c_len1));
525 }
526
527 return result;
528 }
529 #undef FUNC_NAME
530
531 SCM_DEFINE (scm_bytevector_fill_x, "bytevector-fill!", 2, 0, 0,
532 (SCM bv, SCM fill),
533 "Fill bytevector @var{bv} with @var{fill}, a byte.")
534 #define FUNC_NAME s_scm_bytevector_fill_x
535 {
536 unsigned c_len, i;
537 signed char *c_bv, c_fill;
538
539 SCM_VALIDATE_BYTEVECTOR (1, bv);
540 c_fill = scm_to_int8 (fill);
541
542 c_len = SCM_BYTEVECTOR_LENGTH (bv);
543 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
544
545 for (i = 0; i < c_len; i++)
546 c_bv[i] = c_fill;
547
548 return SCM_UNSPECIFIED;
549 }
550 #undef FUNC_NAME
551
552 SCM_DEFINE (scm_bytevector_copy_x, "bytevector-copy!", 5, 0, 0,
553 (SCM source, SCM source_start, SCM target, SCM target_start,
554 SCM len),
555 "Copy @var{len} bytes from @var{source} into @var{target}, "
556 "starting reading from @var{source_start} (a positive index "
557 "within @var{source}) and start writing at "
558 "@var{target_start}.")
559 #define FUNC_NAME s_scm_bytevector_copy_x
560 {
561 unsigned c_len, c_source_len, c_target_len;
562 unsigned c_source_start, c_target_start;
563 signed char *c_source, *c_target;
564
565 SCM_VALIDATE_BYTEVECTOR (1, source);
566 SCM_VALIDATE_BYTEVECTOR (3, target);
567
568 c_len = scm_to_uint (len);
569 c_source_start = scm_to_uint (source_start);
570 c_target_start = scm_to_uint (target_start);
571
572 c_source = SCM_BYTEVECTOR_CONTENTS (source);
573 c_target = SCM_BYTEVECTOR_CONTENTS (target);
574 c_source_len = SCM_BYTEVECTOR_LENGTH (source);
575 c_target_len = SCM_BYTEVECTOR_LENGTH (target);
576
577 if (SCM_UNLIKELY (c_source_start + c_len > c_source_len))
578 scm_out_of_range (FUNC_NAME, source_start);
579 if (SCM_UNLIKELY (c_target_start + c_len > c_target_len))
580 scm_out_of_range (FUNC_NAME, target_start);
581
582 memmove (c_target + c_target_start,
583 c_source + c_source_start,
584 c_len);
585
586 return SCM_UNSPECIFIED;
587 }
588 #undef FUNC_NAME
589
590 SCM_DEFINE (scm_bytevector_copy, "bytevector-copy", 1, 0, 0,
591 (SCM bv),
592 "Return a newly allocated copy of @var{bv}.")
593 #define FUNC_NAME s_scm_bytevector_copy
594 {
595 SCM copy;
596 unsigned c_len;
597 signed char *c_bv, *c_copy;
598
599 SCM_VALIDATE_BYTEVECTOR (1, bv);
600
601 c_len = SCM_BYTEVECTOR_LENGTH (bv);
602 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
603
604 copy = make_bytevector (c_len, SCM_BYTEVECTOR_ELEMENT_TYPE (bv));
605 c_copy = SCM_BYTEVECTOR_CONTENTS (copy);
606 memcpy (c_copy, c_bv, c_len);
607
608 return copy;
609 }
610 #undef FUNC_NAME
611
612 SCM_DEFINE (scm_uniform_array_to_bytevector, "uniform-array->bytevector",
613 1, 0, 0, (SCM array),
614 "Return a newly allocated bytevector whose contents\n"
615 "will be copied from the uniform array @var{array}.")
616 #define FUNC_NAME s_scm_uniform_array_to_bytevector
617 {
618 SCM contents, ret;
619 size_t len, sz, byte_len;
620 scm_t_array_handle h;
621 const void *elts;
622
623 contents = scm_array_contents (array, SCM_BOOL_T);
624 if (scm_is_false (contents))
625 scm_wrong_type_arg_msg (FUNC_NAME, 0, array, "uniform contiguous array");
626
627 scm_array_get_handle (contents, &h);
628 assert (h.base == 0);
629
630 elts = h.elements;
631 len = h.dims->inc * (h.dims->ubnd - h.dims->lbnd + 1);
632 sz = scm_array_handle_uniform_element_bit_size (&h);
633 if (sz >= 8 && ((sz % 8) == 0))
634 byte_len = len * (sz / 8);
635 else if (sz < 8)
636 /* byte_len = ceil (len * sz / 8) */
637 byte_len = (len * sz + 7) / 8;
638 else
639 /* an internal guile error, really */
640 SCM_MISC_ERROR ("uniform elements larger than 8 bits must fill whole bytes", SCM_EOL);
641
642 ret = make_bytevector (byte_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
643 memcpy (SCM_BYTEVECTOR_CONTENTS (ret), elts, byte_len);
644
645 scm_array_handle_release (&h);
646
647 return ret;
648 }
649 #undef FUNC_NAME
650
651 \f
652 /* Operations on bytes and octets. */
653
654 SCM_DEFINE (scm_bytevector_u8_ref, "bytevector-u8-ref", 2, 0, 0,
655 (SCM bv, SCM index),
656 "Return the octet located at @var{index} in @var{bv}.")
657 #define FUNC_NAME s_scm_bytevector_u8_ref
658 {
659 INTEGER_NATIVE_REF (8, unsigned);
660 }
661 #undef FUNC_NAME
662
663 SCM_DEFINE (scm_bytevector_s8_ref, "bytevector-s8-ref", 2, 0, 0,
664 (SCM bv, SCM index),
665 "Return the byte located at @var{index} in @var{bv}.")
666 #define FUNC_NAME s_scm_bytevector_s8_ref
667 {
668 INTEGER_NATIVE_REF (8, signed);
669 }
670 #undef FUNC_NAME
671
672 SCM_DEFINE (scm_bytevector_u8_set_x, "bytevector-u8-set!", 3, 0, 0,
673 (SCM bv, SCM index, SCM value),
674 "Return the octet located at @var{index} in @var{bv}.")
675 #define FUNC_NAME s_scm_bytevector_u8_set_x
676 {
677 INTEGER_NATIVE_SET (8, unsigned);
678 }
679 #undef FUNC_NAME
680
681 SCM_DEFINE (scm_bytevector_s8_set_x, "bytevector-s8-set!", 3, 0, 0,
682 (SCM bv, SCM index, SCM value),
683 "Return the octet located at @var{index} in @var{bv}.")
684 #define FUNC_NAME s_scm_bytevector_s8_set_x
685 {
686 INTEGER_NATIVE_SET (8, signed);
687 }
688 #undef FUNC_NAME
689
690 #undef OCTET_ACCESSOR_PROLOGUE
691
692
693 SCM_DEFINE (scm_bytevector_to_u8_list, "bytevector->u8-list", 1, 0, 0,
694 (SCM bv),
695 "Return a newly allocated list of octets containing the "
696 "contents of @var{bv}.")
697 #define FUNC_NAME s_scm_bytevector_to_u8_list
698 {
699 SCM lst, pair;
700 unsigned c_len, i;
701 unsigned char *c_bv;
702
703 SCM_VALIDATE_BYTEVECTOR (1, bv);
704
705 c_len = SCM_BYTEVECTOR_LENGTH (bv);
706 c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
707
708 lst = scm_make_list (scm_from_uint (c_len), SCM_UNSPECIFIED);
709 for (i = 0, pair = lst;
710 i < c_len;
711 i++, pair = SCM_CDR (pair))
712 {
713 SCM_SETCAR (pair, SCM_I_MAKINUM (c_bv[i]));
714 }
715
716 return lst;
717 }
718 #undef FUNC_NAME
719
720 SCM_DEFINE (scm_u8_list_to_bytevector, "u8-list->bytevector", 1, 0, 0,
721 (SCM lst),
722 "Turn @var{lst}, a list of octets, into a bytevector.")
723 #define FUNC_NAME s_scm_u8_list_to_bytevector
724 {
725 SCM bv, item;
726 long c_len, i;
727 unsigned char *c_bv;
728
729 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len);
730
731 bv = make_bytevector (c_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
732 c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
733
734 for (i = 0; i < c_len; lst = SCM_CDR (lst), i++)
735 {
736 item = SCM_CAR (lst);
737
738 if (SCM_LIKELY (SCM_I_INUMP (item)))
739 {
740 scm_t_signed_bits c_item;
741
742 c_item = SCM_I_INUM (item);
743 if (SCM_LIKELY ((c_item >= 0) && (c_item < 256)))
744 c_bv[i] = (unsigned char) c_item;
745 else
746 goto type_error;
747 }
748 else
749 goto type_error;
750 }
751
752 return bv;
753
754 type_error:
755 scm_wrong_type_arg (FUNC_NAME, 1, item);
756
757 return SCM_BOOL_F;
758 }
759 #undef FUNC_NAME
760
761 /* Compute the two's complement of VALUE (a positive integer) on SIZE octets
762 using (2^(SIZE * 8) - VALUE). */
763 static inline void
764 twos_complement (mpz_t value, size_t size)
765 {
766 unsigned long bit_count;
767
768 /* We expect BIT_COUNT to fit in a unsigned long thanks to the range
769 checking on SIZE performed earlier. */
770 bit_count = (unsigned long) size << 3UL;
771
772 if (SCM_LIKELY (bit_count < sizeof (unsigned long)))
773 mpz_ui_sub (value, 1UL << bit_count, value);
774 else
775 {
776 mpz_t max;
777
778 mpz_init (max);
779 mpz_ui_pow_ui (max, 2, bit_count);
780 mpz_sub (value, max, value);
781 mpz_clear (max);
782 }
783 }
784
785 static inline SCM
786 bytevector_large_ref (const char *c_bv, size_t c_size, int signed_p,
787 SCM endianness)
788 {
789 SCM result;
790 mpz_t c_mpz;
791 int c_endianness, negative_p = 0;
792
793 if (signed_p)
794 {
795 if (scm_is_eq (endianness, scm_sym_big))
796 negative_p = c_bv[0] & 0x80;
797 else
798 negative_p = c_bv[c_size - 1] & 0x80;
799 }
800
801 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
802
803 mpz_init (c_mpz);
804 mpz_import (c_mpz, 1 /* 1 word */, 1 /* word order doesn't matter */,
805 c_size /* word is C_SIZE-byte long */,
806 c_endianness,
807 0 /* nails */, c_bv);
808
809 if (signed_p && negative_p)
810 {
811 twos_complement (c_mpz, c_size);
812 mpz_neg (c_mpz, c_mpz);
813 }
814
815 result = scm_from_mpz (c_mpz);
816 mpz_clear (c_mpz); /* FIXME: Needed? */
817
818 return result;
819 }
820
821 static inline int
822 bytevector_large_set (char *c_bv, size_t c_size, int signed_p,
823 SCM value, SCM endianness)
824 {
825 mpz_t c_mpz;
826 int c_endianness, c_sign, err = 0;
827
828 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
829
830 mpz_init (c_mpz);
831 scm_to_mpz (value, c_mpz);
832
833 c_sign = mpz_sgn (c_mpz);
834 if (c_sign < 0)
835 {
836 if (SCM_LIKELY (signed_p))
837 {
838 mpz_neg (c_mpz, c_mpz);
839 twos_complement (c_mpz, c_size);
840 }
841 else
842 {
843 err = -1;
844 goto finish;
845 }
846 }
847
848 if (c_sign == 0)
849 /* Zero. */
850 memset (c_bv, 0, c_size);
851 else
852 {
853 size_t word_count, value_size;
854
855 value_size = (mpz_sizeinbase (c_mpz, 2) + (8 * c_size)) / (8 * c_size);
856 if (SCM_UNLIKELY (value_size > c_size))
857 {
858 err = -2;
859 goto finish;
860 }
861
862
863 mpz_export (c_bv, &word_count, 1 /* word order doesn't matter */,
864 c_size, c_endianness,
865 0 /* nails */, c_mpz);
866 if (SCM_UNLIKELY (word_count != 1))
867 /* Shouldn't happen since we already checked with VALUE_SIZE. */
868 abort ();
869 }
870
871 finish:
872 mpz_clear (c_mpz);
873
874 return err;
875 }
876
877 #define GENERIC_INTEGER_ACCESSOR_PROLOGUE(_sign) \
878 unsigned long c_len, c_index, c_size; \
879 char *c_bv; \
880 \
881 SCM_VALIDATE_BYTEVECTOR (1, bv); \
882 c_index = scm_to_ulong (index); \
883 c_size = scm_to_ulong (size); \
884 \
885 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
886 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
887 \
888 /* C_SIZE must have its 3 higher bits set to zero so that \
889 multiplying it by 8 yields a number that fits in an \
890 unsigned long. */ \
891 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
892 scm_out_of_range (FUNC_NAME, size); \
893 if (SCM_UNLIKELY (c_index + c_size > c_len)) \
894 scm_out_of_range (FUNC_NAME, index);
895
896
897 /* Template of an integer reference function. */
898 #define GENERIC_INTEGER_REF(_sign) \
899 SCM result; \
900 \
901 if (c_size < 3) \
902 { \
903 int swap; \
904 _sign int value; \
905 \
906 swap = !scm_is_eq (endianness, scm_i_native_endianness); \
907 switch (c_size) \
908 { \
909 case 1: \
910 { \
911 _sign char c_value8; \
912 memcpy (&c_value8, c_bv, 1); \
913 value = c_value8; \
914 } \
915 break; \
916 case 2: \
917 { \
918 INT_TYPE (16, _sign) c_value16; \
919 memcpy (&c_value16, c_bv, 2); \
920 if (swap) \
921 value = (INT_TYPE (16, _sign)) bswap_16 (c_value16); \
922 else \
923 value = c_value16; \
924 } \
925 break; \
926 default: \
927 abort (); \
928 } \
929 \
930 result = SCM_I_MAKINUM ((_sign int) value); \
931 } \
932 else \
933 result = bytevector_large_ref ((char *) c_bv, \
934 c_size, SIGNEDNESS (_sign), \
935 endianness); \
936 \
937 return result;
938
939 static inline SCM
940 bytevector_signed_ref (const char *c_bv, size_t c_size, SCM endianness)
941 {
942 GENERIC_INTEGER_REF (signed);
943 }
944
945 static inline SCM
946 bytevector_unsigned_ref (const char *c_bv, size_t c_size, SCM endianness)
947 {
948 GENERIC_INTEGER_REF (unsigned);
949 }
950
951
952 /* Template of an integer assignment function. */
953 #define GENERIC_INTEGER_SET(_sign) \
954 if (c_size < 3) \
955 { \
956 scm_t_signed_bits c_value; \
957 \
958 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
959 goto range_error; \
960 \
961 c_value = SCM_I_INUM (value); \
962 switch (c_size) \
963 { \
964 case 1: \
965 if (SCM_LIKELY (INT_VALID_P (8, _sign) (c_value))) \
966 { \
967 _sign char c_value8; \
968 c_value8 = (_sign char) c_value; \
969 memcpy (c_bv, &c_value8, 1); \
970 } \
971 else \
972 goto range_error; \
973 break; \
974 \
975 case 2: \
976 if (SCM_LIKELY (INT_VALID_P (16, _sign) (c_value))) \
977 { \
978 int swap; \
979 INT_TYPE (16, _sign) c_value16; \
980 \
981 swap = !scm_is_eq (endianness, scm_i_native_endianness); \
982 \
983 if (swap) \
984 c_value16 = (INT_TYPE (16, _sign)) bswap_16 (c_value); \
985 else \
986 c_value16 = c_value; \
987 \
988 memcpy (c_bv, &c_value16, 2); \
989 } \
990 else \
991 goto range_error; \
992 break; \
993 \
994 default: \
995 abort (); \
996 } \
997 } \
998 else \
999 { \
1000 int err; \
1001 \
1002 err = bytevector_large_set (c_bv, c_size, \
1003 SIGNEDNESS (_sign), \
1004 value, endianness); \
1005 if (err) \
1006 goto range_error; \
1007 } \
1008 \
1009 return; \
1010 \
1011 range_error: \
1012 scm_out_of_range (FUNC_NAME, value); \
1013 return;
1014
1015 static inline void
1016 bytevector_signed_set (char *c_bv, size_t c_size,
1017 SCM value, SCM endianness,
1018 const char *func_name)
1019 #define FUNC_NAME func_name
1020 {
1021 GENERIC_INTEGER_SET (signed);
1022 }
1023 #undef FUNC_NAME
1024
1025 static inline void
1026 bytevector_unsigned_set (char *c_bv, size_t c_size,
1027 SCM value, SCM endianness,
1028 const char *func_name)
1029 #define FUNC_NAME func_name
1030 {
1031 GENERIC_INTEGER_SET (unsigned);
1032 }
1033 #undef FUNC_NAME
1034
1035 #undef GENERIC_INTEGER_SET
1036 #undef GENERIC_INTEGER_REF
1037
1038
1039 SCM_DEFINE (scm_bytevector_uint_ref, "bytevector-uint-ref", 4, 0, 0,
1040 (SCM bv, SCM index, SCM endianness, SCM size),
1041 "Return the @var{size}-octet long unsigned integer at index "
1042 "@var{index} in @var{bv}.")
1043 #define FUNC_NAME s_scm_bytevector_uint_ref
1044 {
1045 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
1046
1047 return (bytevector_unsigned_ref (&c_bv[c_index], c_size, endianness));
1048 }
1049 #undef FUNC_NAME
1050
1051 SCM_DEFINE (scm_bytevector_sint_ref, "bytevector-sint-ref", 4, 0, 0,
1052 (SCM bv, SCM index, SCM endianness, SCM size),
1053 "Return the @var{size}-octet long unsigned integer at index "
1054 "@var{index} in @var{bv}.")
1055 #define FUNC_NAME s_scm_bytevector_sint_ref
1056 {
1057 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
1058
1059 return (bytevector_signed_ref (&c_bv[c_index], c_size, endianness));
1060 }
1061 #undef FUNC_NAME
1062
1063 SCM_DEFINE (scm_bytevector_uint_set_x, "bytevector-uint-set!", 5, 0, 0,
1064 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
1065 "Set the @var{size}-octet long unsigned integer at @var{index} "
1066 "to @var{value}.")
1067 #define FUNC_NAME s_scm_bytevector_uint_set_x
1068 {
1069 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
1070
1071 bytevector_unsigned_set (&c_bv[c_index], c_size, value, endianness,
1072 FUNC_NAME);
1073
1074 return SCM_UNSPECIFIED;
1075 }
1076 #undef FUNC_NAME
1077
1078 SCM_DEFINE (scm_bytevector_sint_set_x, "bytevector-sint-set!", 5, 0, 0,
1079 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
1080 "Set the @var{size}-octet long signed integer at @var{index} "
1081 "to @var{value}.")
1082 #define FUNC_NAME s_scm_bytevector_sint_set_x
1083 {
1084 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
1085
1086 bytevector_signed_set (&c_bv[c_index], c_size, value, endianness,
1087 FUNC_NAME);
1088
1089 return SCM_UNSPECIFIED;
1090 }
1091 #undef FUNC_NAME
1092
1093
1094 \f
1095 /* Operations on integers of arbitrary size. */
1096
1097 #define INTEGERS_TO_LIST(_sign) \
1098 SCM lst, pair; \
1099 size_t i, c_len, c_size; \
1100 \
1101 SCM_VALIDATE_BYTEVECTOR (1, bv); \
1102 SCM_VALIDATE_SYMBOL (2, endianness); \
1103 c_size = scm_to_uint (size); \
1104 \
1105 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
1106 if (SCM_UNLIKELY (c_len == 0)) \
1107 lst = SCM_EOL; \
1108 else if (SCM_UNLIKELY (c_len < c_size)) \
1109 scm_out_of_range (FUNC_NAME, size); \
1110 else \
1111 { \
1112 const char *c_bv; \
1113 \
1114 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
1115 \
1116 lst = scm_make_list (scm_from_uint (c_len / c_size), \
1117 SCM_UNSPECIFIED); \
1118 for (i = 0, pair = lst; \
1119 i <= c_len - c_size; \
1120 i += c_size, c_bv += c_size, pair = SCM_CDR (pair)) \
1121 { \
1122 SCM_SETCAR (pair, \
1123 bytevector_ ## _sign ## _ref (c_bv, c_size, \
1124 endianness)); \
1125 } \
1126 } \
1127 \
1128 return lst;
1129
1130 SCM_DEFINE (scm_bytevector_to_sint_list, "bytevector->sint-list",
1131 3, 0, 0,
1132 (SCM bv, SCM endianness, SCM size),
1133 "Return a list of signed integers of @var{size} octets "
1134 "representing the contents of @var{bv}.")
1135 #define FUNC_NAME s_scm_bytevector_to_sint_list
1136 {
1137 INTEGERS_TO_LIST (signed);
1138 }
1139 #undef FUNC_NAME
1140
1141 SCM_DEFINE (scm_bytevector_to_uint_list, "bytevector->uint-list",
1142 3, 0, 0,
1143 (SCM bv, SCM endianness, SCM size),
1144 "Return a list of unsigned integers of @var{size} octets "
1145 "representing the contents of @var{bv}.")
1146 #define FUNC_NAME s_scm_bytevector_to_uint_list
1147 {
1148 INTEGERS_TO_LIST (unsigned);
1149 }
1150 #undef FUNC_NAME
1151
1152 #undef INTEGER_TO_LIST
1153
1154
1155 #define INTEGER_LIST_TO_BYTEVECTOR(_sign) \
1156 SCM bv; \
1157 long c_len; \
1158 size_t c_size; \
1159 char *c_bv, *c_bv_ptr; \
1160 \
1161 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len); \
1162 SCM_VALIDATE_SYMBOL (2, endianness); \
1163 c_size = scm_to_uint (size); \
1164 \
1165 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
1166 scm_out_of_range (FUNC_NAME, size); \
1167 \
1168 bv = make_bytevector (c_len * c_size, SCM_ARRAY_ELEMENT_TYPE_VU8); \
1169 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
1170 \
1171 for (c_bv_ptr = c_bv; \
1172 !scm_is_null (lst); \
1173 lst = SCM_CDR (lst), c_bv_ptr += c_size) \
1174 { \
1175 bytevector_ ## _sign ## _set (c_bv_ptr, c_size, \
1176 SCM_CAR (lst), endianness, \
1177 FUNC_NAME); \
1178 } \
1179 \
1180 return bv;
1181
1182
1183 SCM_DEFINE (scm_uint_list_to_bytevector, "uint-list->bytevector",
1184 3, 0, 0,
1185 (SCM lst, SCM endianness, SCM size),
1186 "Return a bytevector containing the unsigned integers "
1187 "listed in @var{lst} and encoded on @var{size} octets "
1188 "according to @var{endianness}.")
1189 #define FUNC_NAME s_scm_uint_list_to_bytevector
1190 {
1191 INTEGER_LIST_TO_BYTEVECTOR (unsigned);
1192 }
1193 #undef FUNC_NAME
1194
1195 SCM_DEFINE (scm_sint_list_to_bytevector, "sint-list->bytevector",
1196 3, 0, 0,
1197 (SCM lst, SCM endianness, SCM size),
1198 "Return a bytevector containing the signed integers "
1199 "listed in @var{lst} and encoded on @var{size} octets "
1200 "according to @var{endianness}.")
1201 #define FUNC_NAME s_scm_sint_list_to_bytevector
1202 {
1203 INTEGER_LIST_TO_BYTEVECTOR (signed);
1204 }
1205 #undef FUNC_NAME
1206
1207 #undef INTEGER_LIST_TO_BYTEVECTOR
1208
1209
1210 \f
1211 /* Operations on 16-bit integers. */
1212
1213 SCM_DEFINE (scm_bytevector_u16_ref, "bytevector-u16-ref",
1214 3, 0, 0,
1215 (SCM bv, SCM index, SCM endianness),
1216 "Return the unsigned 16-bit integer from @var{bv} at "
1217 "@var{index}.")
1218 #define FUNC_NAME s_scm_bytevector_u16_ref
1219 {
1220 INTEGER_REF (16, unsigned);
1221 }
1222 #undef FUNC_NAME
1223
1224 SCM_DEFINE (scm_bytevector_s16_ref, "bytevector-s16-ref",
1225 3, 0, 0,
1226 (SCM bv, SCM index, SCM endianness),
1227 "Return the signed 16-bit integer from @var{bv} at "
1228 "@var{index}.")
1229 #define FUNC_NAME s_scm_bytevector_s16_ref
1230 {
1231 INTEGER_REF (16, signed);
1232 }
1233 #undef FUNC_NAME
1234
1235 SCM_DEFINE (scm_bytevector_u16_native_ref, "bytevector-u16-native-ref",
1236 2, 0, 0,
1237 (SCM bv, SCM index),
1238 "Return the unsigned 16-bit integer from @var{bv} at "
1239 "@var{index} using the native endianness.")
1240 #define FUNC_NAME s_scm_bytevector_u16_native_ref
1241 {
1242 INTEGER_NATIVE_REF (16, unsigned);
1243 }
1244 #undef FUNC_NAME
1245
1246 SCM_DEFINE (scm_bytevector_s16_native_ref, "bytevector-s16-native-ref",
1247 2, 0, 0,
1248 (SCM bv, SCM index),
1249 "Return the unsigned 16-bit integer from @var{bv} at "
1250 "@var{index} using the native endianness.")
1251 #define FUNC_NAME s_scm_bytevector_s16_native_ref
1252 {
1253 INTEGER_NATIVE_REF (16, signed);
1254 }
1255 #undef FUNC_NAME
1256
1257 SCM_DEFINE (scm_bytevector_u16_set_x, "bytevector-u16-set!",
1258 4, 0, 0,
1259 (SCM bv, SCM index, SCM value, SCM endianness),
1260 "Store @var{value} in @var{bv} at @var{index} according to "
1261 "@var{endianness}.")
1262 #define FUNC_NAME s_scm_bytevector_u16_set_x
1263 {
1264 INTEGER_SET (16, unsigned);
1265 }
1266 #undef FUNC_NAME
1267
1268 SCM_DEFINE (scm_bytevector_s16_set_x, "bytevector-s16-set!",
1269 4, 0, 0,
1270 (SCM bv, SCM index, SCM value, SCM endianness),
1271 "Store @var{value} in @var{bv} at @var{index} according to "
1272 "@var{endianness}.")
1273 #define FUNC_NAME s_scm_bytevector_s16_set_x
1274 {
1275 INTEGER_SET (16, signed);
1276 }
1277 #undef FUNC_NAME
1278
1279 SCM_DEFINE (scm_bytevector_u16_native_set_x, "bytevector-u16-native-set!",
1280 3, 0, 0,
1281 (SCM bv, SCM index, SCM value),
1282 "Store the unsigned integer @var{value} at index @var{index} "
1283 "of @var{bv} using the native endianness.")
1284 #define FUNC_NAME s_scm_bytevector_u16_native_set_x
1285 {
1286 INTEGER_NATIVE_SET (16, unsigned);
1287 }
1288 #undef FUNC_NAME
1289
1290 SCM_DEFINE (scm_bytevector_s16_native_set_x, "bytevector-s16-native-set!",
1291 3, 0, 0,
1292 (SCM bv, SCM index, SCM value),
1293 "Store the signed integer @var{value} at index @var{index} "
1294 "of @var{bv} using the native endianness.")
1295 #define FUNC_NAME s_scm_bytevector_s16_native_set_x
1296 {
1297 INTEGER_NATIVE_SET (16, signed);
1298 }
1299 #undef FUNC_NAME
1300
1301
1302 \f
1303 /* Operations on 32-bit integers. */
1304
1305 /* Unfortunately, on 32-bit machines `SCM' is not large enough to hold
1306 arbitrary 32-bit integers. Thus we fall back to using the
1307 `large_{ref,set}' variants on 32-bit machines. */
1308
1309 #define LARGE_INTEGER_REF(_len, _sign) \
1310 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1311 SCM_VALIDATE_SYMBOL (3, endianness); \
1312 \
1313 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1314 SIGNEDNESS (_sign), endianness));
1315
1316 #define LARGE_INTEGER_SET(_len, _sign) \
1317 int err; \
1318 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1319 SCM_VALIDATE_SYMBOL (4, endianness); \
1320 \
1321 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1322 SIGNEDNESS (_sign), value, endianness); \
1323 if (SCM_UNLIKELY (err)) \
1324 scm_out_of_range (FUNC_NAME, value); \
1325 \
1326 return SCM_UNSPECIFIED;
1327
1328 #define LARGE_INTEGER_NATIVE_REF(_len, _sign) \
1329 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1330 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1331 SIGNEDNESS (_sign), scm_i_native_endianness));
1332
1333 #define LARGE_INTEGER_NATIVE_SET(_len, _sign) \
1334 int err; \
1335 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1336 \
1337 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1338 SIGNEDNESS (_sign), value, \
1339 scm_i_native_endianness); \
1340 if (SCM_UNLIKELY (err)) \
1341 scm_out_of_range (FUNC_NAME, value); \
1342 \
1343 return SCM_UNSPECIFIED;
1344
1345
1346 SCM_DEFINE (scm_bytevector_u32_ref, "bytevector-u32-ref",
1347 3, 0, 0,
1348 (SCM bv, SCM index, SCM endianness),
1349 "Return the unsigned 32-bit integer from @var{bv} at "
1350 "@var{index}.")
1351 #define FUNC_NAME s_scm_bytevector_u32_ref
1352 {
1353 #if SIZEOF_VOID_P > 4
1354 INTEGER_REF (32, unsigned);
1355 #else
1356 LARGE_INTEGER_REF (32, unsigned);
1357 #endif
1358 }
1359 #undef FUNC_NAME
1360
1361 SCM_DEFINE (scm_bytevector_s32_ref, "bytevector-s32-ref",
1362 3, 0, 0,
1363 (SCM bv, SCM index, SCM endianness),
1364 "Return the signed 32-bit integer from @var{bv} at "
1365 "@var{index}.")
1366 #define FUNC_NAME s_scm_bytevector_s32_ref
1367 {
1368 #if SIZEOF_VOID_P > 4
1369 INTEGER_REF (32, signed);
1370 #else
1371 LARGE_INTEGER_REF (32, signed);
1372 #endif
1373 }
1374 #undef FUNC_NAME
1375
1376 SCM_DEFINE (scm_bytevector_u32_native_ref, "bytevector-u32-native-ref",
1377 2, 0, 0,
1378 (SCM bv, SCM index),
1379 "Return the unsigned 32-bit integer from @var{bv} at "
1380 "@var{index} using the native endianness.")
1381 #define FUNC_NAME s_scm_bytevector_u32_native_ref
1382 {
1383 #if SIZEOF_VOID_P > 4
1384 INTEGER_NATIVE_REF (32, unsigned);
1385 #else
1386 LARGE_INTEGER_NATIVE_REF (32, unsigned);
1387 #endif
1388 }
1389 #undef FUNC_NAME
1390
1391 SCM_DEFINE (scm_bytevector_s32_native_ref, "bytevector-s32-native-ref",
1392 2, 0, 0,
1393 (SCM bv, SCM index),
1394 "Return the unsigned 32-bit integer from @var{bv} at "
1395 "@var{index} using the native endianness.")
1396 #define FUNC_NAME s_scm_bytevector_s32_native_ref
1397 {
1398 #if SIZEOF_VOID_P > 4
1399 INTEGER_NATIVE_REF (32, signed);
1400 #else
1401 LARGE_INTEGER_NATIVE_REF (32, signed);
1402 #endif
1403 }
1404 #undef FUNC_NAME
1405
1406 SCM_DEFINE (scm_bytevector_u32_set_x, "bytevector-u32-set!",
1407 4, 0, 0,
1408 (SCM bv, SCM index, SCM value, SCM endianness),
1409 "Store @var{value} in @var{bv} at @var{index} according to "
1410 "@var{endianness}.")
1411 #define FUNC_NAME s_scm_bytevector_u32_set_x
1412 {
1413 #if SIZEOF_VOID_P > 4
1414 INTEGER_SET (32, unsigned);
1415 #else
1416 LARGE_INTEGER_SET (32, unsigned);
1417 #endif
1418 }
1419 #undef FUNC_NAME
1420
1421 SCM_DEFINE (scm_bytevector_s32_set_x, "bytevector-s32-set!",
1422 4, 0, 0,
1423 (SCM bv, SCM index, SCM value, SCM endianness),
1424 "Store @var{value} in @var{bv} at @var{index} according to "
1425 "@var{endianness}.")
1426 #define FUNC_NAME s_scm_bytevector_s32_set_x
1427 {
1428 #if SIZEOF_VOID_P > 4
1429 INTEGER_SET (32, signed);
1430 #else
1431 LARGE_INTEGER_SET (32, signed);
1432 #endif
1433 }
1434 #undef FUNC_NAME
1435
1436 SCM_DEFINE (scm_bytevector_u32_native_set_x, "bytevector-u32-native-set!",
1437 3, 0, 0,
1438 (SCM bv, SCM index, SCM value),
1439 "Store the unsigned integer @var{value} at index @var{index} "
1440 "of @var{bv} using the native endianness.")
1441 #define FUNC_NAME s_scm_bytevector_u32_native_set_x
1442 {
1443 #if SIZEOF_VOID_P > 4
1444 INTEGER_NATIVE_SET (32, unsigned);
1445 #else
1446 LARGE_INTEGER_NATIVE_SET (32, unsigned);
1447 #endif
1448 }
1449 #undef FUNC_NAME
1450
1451 SCM_DEFINE (scm_bytevector_s32_native_set_x, "bytevector-s32-native-set!",
1452 3, 0, 0,
1453 (SCM bv, SCM index, SCM value),
1454 "Store the signed integer @var{value} at index @var{index} "
1455 "of @var{bv} using the native endianness.")
1456 #define FUNC_NAME s_scm_bytevector_s32_native_set_x
1457 {
1458 #if SIZEOF_VOID_P > 4
1459 INTEGER_NATIVE_SET (32, signed);
1460 #else
1461 LARGE_INTEGER_NATIVE_SET (32, signed);
1462 #endif
1463 }
1464 #undef FUNC_NAME
1465
1466
1467 \f
1468 /* Operations on 64-bit integers. */
1469
1470 /* For 64-bit integers, we use only the `large_{ref,set}' variant. */
1471
1472 SCM_DEFINE (scm_bytevector_u64_ref, "bytevector-u64-ref",
1473 3, 0, 0,
1474 (SCM bv, SCM index, SCM endianness),
1475 "Return the unsigned 64-bit integer from @var{bv} at "
1476 "@var{index}.")
1477 #define FUNC_NAME s_scm_bytevector_u64_ref
1478 {
1479 LARGE_INTEGER_REF (64, unsigned);
1480 }
1481 #undef FUNC_NAME
1482
1483 SCM_DEFINE (scm_bytevector_s64_ref, "bytevector-s64-ref",
1484 3, 0, 0,
1485 (SCM bv, SCM index, SCM endianness),
1486 "Return the signed 64-bit integer from @var{bv} at "
1487 "@var{index}.")
1488 #define FUNC_NAME s_scm_bytevector_s64_ref
1489 {
1490 LARGE_INTEGER_REF (64, signed);
1491 }
1492 #undef FUNC_NAME
1493
1494 SCM_DEFINE (scm_bytevector_u64_native_ref, "bytevector-u64-native-ref",
1495 2, 0, 0,
1496 (SCM bv, SCM index),
1497 "Return the unsigned 64-bit integer from @var{bv} at "
1498 "@var{index} using the native endianness.")
1499 #define FUNC_NAME s_scm_bytevector_u64_native_ref
1500 {
1501 LARGE_INTEGER_NATIVE_REF (64, unsigned);
1502 }
1503 #undef FUNC_NAME
1504
1505 SCM_DEFINE (scm_bytevector_s64_native_ref, "bytevector-s64-native-ref",
1506 2, 0, 0,
1507 (SCM bv, SCM index),
1508 "Return the unsigned 64-bit integer from @var{bv} at "
1509 "@var{index} using the native endianness.")
1510 #define FUNC_NAME s_scm_bytevector_s64_native_ref
1511 {
1512 LARGE_INTEGER_NATIVE_REF (64, signed);
1513 }
1514 #undef FUNC_NAME
1515
1516 SCM_DEFINE (scm_bytevector_u64_set_x, "bytevector-u64-set!",
1517 4, 0, 0,
1518 (SCM bv, SCM index, SCM value, SCM endianness),
1519 "Store @var{value} in @var{bv} at @var{index} according to "
1520 "@var{endianness}.")
1521 #define FUNC_NAME s_scm_bytevector_u64_set_x
1522 {
1523 LARGE_INTEGER_SET (64, unsigned);
1524 }
1525 #undef FUNC_NAME
1526
1527 SCM_DEFINE (scm_bytevector_s64_set_x, "bytevector-s64-set!",
1528 4, 0, 0,
1529 (SCM bv, SCM index, SCM value, SCM endianness),
1530 "Store @var{value} in @var{bv} at @var{index} according to "
1531 "@var{endianness}.")
1532 #define FUNC_NAME s_scm_bytevector_s64_set_x
1533 {
1534 LARGE_INTEGER_SET (64, signed);
1535 }
1536 #undef FUNC_NAME
1537
1538 SCM_DEFINE (scm_bytevector_u64_native_set_x, "bytevector-u64-native-set!",
1539 3, 0, 0,
1540 (SCM bv, SCM index, SCM value),
1541 "Store the unsigned integer @var{value} at index @var{index} "
1542 "of @var{bv} using the native endianness.")
1543 #define FUNC_NAME s_scm_bytevector_u64_native_set_x
1544 {
1545 LARGE_INTEGER_NATIVE_SET (64, unsigned);
1546 }
1547 #undef FUNC_NAME
1548
1549 SCM_DEFINE (scm_bytevector_s64_native_set_x, "bytevector-s64-native-set!",
1550 3, 0, 0,
1551 (SCM bv, SCM index, SCM value),
1552 "Store the signed integer @var{value} at index @var{index} "
1553 "of @var{bv} using the native endianness.")
1554 #define FUNC_NAME s_scm_bytevector_s64_native_set_x
1555 {
1556 LARGE_INTEGER_NATIVE_SET (64, signed);
1557 }
1558 #undef FUNC_NAME
1559
1560
1561 \f
1562 /* Operations on IEEE-754 numbers. */
1563
1564 /* There are two possible word endians, visible in glibc's <ieee754.h>.
1565 However, in R6RS, when the endianness is `little', little endian is
1566 assumed for both the byte order and the word order. This is clear from
1567 Section 2.1 of R6RS-lib (in response to
1568 http://www.r6rs.org/formal-comments/comment-187.txt). */
1569
1570
1571 /* Convert to/from a floating-point number with different endianness. This
1572 method is probably not the most efficient but it should be portable. */
1573
1574 static inline void
1575 float_to_foreign_endianness (union scm_ieee754_float *target,
1576 float source)
1577 {
1578 union scm_ieee754_float src;
1579
1580 src.f = source;
1581
1582 #ifdef WORDS_BIGENDIAN
1583 /* Assuming little endian for both byte and word order. */
1584 target->little_endian.negative = src.big_endian.negative;
1585 target->little_endian.exponent = src.big_endian.exponent;
1586 target->little_endian.mantissa = src.big_endian.mantissa;
1587 #else
1588 target->big_endian.negative = src.little_endian.negative;
1589 target->big_endian.exponent = src.little_endian.exponent;
1590 target->big_endian.mantissa = src.little_endian.mantissa;
1591 #endif
1592 }
1593
1594 static inline float
1595 float_from_foreign_endianness (const union scm_ieee754_float *source)
1596 {
1597 union scm_ieee754_float result;
1598
1599 #ifdef WORDS_BIGENDIAN
1600 /* Assuming little endian for both byte and word order. */
1601 result.big_endian.negative = source->little_endian.negative;
1602 result.big_endian.exponent = source->little_endian.exponent;
1603 result.big_endian.mantissa = source->little_endian.mantissa;
1604 #else
1605 result.little_endian.negative = source->big_endian.negative;
1606 result.little_endian.exponent = source->big_endian.exponent;
1607 result.little_endian.mantissa = source->big_endian.mantissa;
1608 #endif
1609
1610 return (result.f);
1611 }
1612
1613 static inline void
1614 double_to_foreign_endianness (union scm_ieee754_double *target,
1615 double source)
1616 {
1617 union scm_ieee754_double src;
1618
1619 src.d = source;
1620
1621 #ifdef WORDS_BIGENDIAN
1622 /* Assuming little endian for both byte and word order. */
1623 target->little_little_endian.negative = src.big_endian.negative;
1624 target->little_little_endian.exponent = src.big_endian.exponent;
1625 target->little_little_endian.mantissa0 = src.big_endian.mantissa0;
1626 target->little_little_endian.mantissa1 = src.big_endian.mantissa1;
1627 #else
1628 target->big_endian.negative = src.little_little_endian.negative;
1629 target->big_endian.exponent = src.little_little_endian.exponent;
1630 target->big_endian.mantissa0 = src.little_little_endian.mantissa0;
1631 target->big_endian.mantissa1 = src.little_little_endian.mantissa1;
1632 #endif
1633 }
1634
1635 static inline double
1636 double_from_foreign_endianness (const union scm_ieee754_double *source)
1637 {
1638 union scm_ieee754_double result;
1639
1640 #ifdef WORDS_BIGENDIAN
1641 /* Assuming little endian for both byte and word order. */
1642 result.big_endian.negative = source->little_little_endian.negative;
1643 result.big_endian.exponent = source->little_little_endian.exponent;
1644 result.big_endian.mantissa0 = source->little_little_endian.mantissa0;
1645 result.big_endian.mantissa1 = source->little_little_endian.mantissa1;
1646 #else
1647 result.little_little_endian.negative = source->big_endian.negative;
1648 result.little_little_endian.exponent = source->big_endian.exponent;
1649 result.little_little_endian.mantissa0 = source->big_endian.mantissa0;
1650 result.little_little_endian.mantissa1 = source->big_endian.mantissa1;
1651 #endif
1652
1653 return (result.d);
1654 }
1655
1656 /* Template macros to abstract over doubles and floats.
1657 XXX: Guile can only convert to/from doubles. */
1658 #define IEEE754_UNION(_c_type) union scm_ieee754_ ## _c_type
1659 #define IEEE754_TO_SCM(_c_type) scm_from_double
1660 #define IEEE754_FROM_SCM(_c_type) scm_to_double
1661 #define IEEE754_FROM_FOREIGN_ENDIANNESS(_c_type) \
1662 _c_type ## _from_foreign_endianness
1663 #define IEEE754_TO_FOREIGN_ENDIANNESS(_c_type) \
1664 _c_type ## _to_foreign_endianness
1665
1666
1667 /* FIXME: SCM_VALIDATE_REAL rejects integers, etc. grrr */
1668 #define VALIDATE_REAL(pos, v) \
1669 do { \
1670 SCM_ASSERT_TYPE (scm_is_real (v), v, pos, FUNC_NAME, "real"); \
1671 } while (0)
1672
1673 /* Templace getters and setters. */
1674
1675 #define IEEE754_ACCESSOR_PROLOGUE(_type) \
1676 INTEGER_ACCESSOR_PROLOGUE (sizeof (_type) << 3UL, signed);
1677
1678 #define IEEE754_REF(_type) \
1679 _type c_result; \
1680 \
1681 IEEE754_ACCESSOR_PROLOGUE (_type); \
1682 SCM_VALIDATE_SYMBOL (3, endianness); \
1683 \
1684 if (scm_is_eq (endianness, scm_i_native_endianness)) \
1685 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1686 else \
1687 { \
1688 IEEE754_UNION (_type) c_raw; \
1689 \
1690 memcpy (&c_raw, &c_bv[c_index], sizeof (c_raw)); \
1691 c_result = \
1692 IEEE754_FROM_FOREIGN_ENDIANNESS (_type) (&c_raw); \
1693 } \
1694 \
1695 return (IEEE754_TO_SCM (_type) (c_result));
1696
1697 #define IEEE754_NATIVE_REF(_type) \
1698 _type c_result; \
1699 \
1700 IEEE754_ACCESSOR_PROLOGUE (_type); \
1701 \
1702 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1703 return (IEEE754_TO_SCM (_type) (c_result));
1704
1705 #define IEEE754_SET(_type) \
1706 _type c_value; \
1707 \
1708 IEEE754_ACCESSOR_PROLOGUE (_type); \
1709 VALIDATE_REAL (3, value); \
1710 SCM_VALIDATE_SYMBOL (4, endianness); \
1711 c_value = IEEE754_FROM_SCM (_type) (value); \
1712 \
1713 if (scm_is_eq (endianness, scm_i_native_endianness)) \
1714 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1715 else \
1716 { \
1717 IEEE754_UNION (_type) c_raw; \
1718 \
1719 IEEE754_TO_FOREIGN_ENDIANNESS (_type) (&c_raw, c_value); \
1720 memcpy (&c_bv[c_index], &c_raw, sizeof (c_raw)); \
1721 } \
1722 \
1723 return SCM_UNSPECIFIED;
1724
1725 #define IEEE754_NATIVE_SET(_type) \
1726 _type c_value; \
1727 \
1728 IEEE754_ACCESSOR_PROLOGUE (_type); \
1729 VALIDATE_REAL (3, value); \
1730 c_value = IEEE754_FROM_SCM (_type) (value); \
1731 \
1732 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1733 return SCM_UNSPECIFIED;
1734
1735
1736 /* Single precision. */
1737
1738 SCM_DEFINE (scm_bytevector_ieee_single_ref,
1739 "bytevector-ieee-single-ref",
1740 3, 0, 0,
1741 (SCM bv, SCM index, SCM endianness),
1742 "Return the IEEE-754 single from @var{bv} at "
1743 "@var{index}.")
1744 #define FUNC_NAME s_scm_bytevector_ieee_single_ref
1745 {
1746 IEEE754_REF (float);
1747 }
1748 #undef FUNC_NAME
1749
1750 SCM_DEFINE (scm_bytevector_ieee_single_native_ref,
1751 "bytevector-ieee-single-native-ref",
1752 2, 0, 0,
1753 (SCM bv, SCM index),
1754 "Return the IEEE-754 single from @var{bv} at "
1755 "@var{index} using the native endianness.")
1756 #define FUNC_NAME s_scm_bytevector_ieee_single_native_ref
1757 {
1758 IEEE754_NATIVE_REF (float);
1759 }
1760 #undef FUNC_NAME
1761
1762 SCM_DEFINE (scm_bytevector_ieee_single_set_x,
1763 "bytevector-ieee-single-set!",
1764 4, 0, 0,
1765 (SCM bv, SCM index, SCM value, SCM endianness),
1766 "Store real @var{value} in @var{bv} at @var{index} according to "
1767 "@var{endianness}.")
1768 #define FUNC_NAME s_scm_bytevector_ieee_single_set_x
1769 {
1770 IEEE754_SET (float);
1771 }
1772 #undef FUNC_NAME
1773
1774 SCM_DEFINE (scm_bytevector_ieee_single_native_set_x,
1775 "bytevector-ieee-single-native-set!",
1776 3, 0, 0,
1777 (SCM bv, SCM index, SCM value),
1778 "Store the real @var{value} at index @var{index} "
1779 "of @var{bv} using the native endianness.")
1780 #define FUNC_NAME s_scm_bytevector_ieee_single_native_set_x
1781 {
1782 IEEE754_NATIVE_SET (float);
1783 }
1784 #undef FUNC_NAME
1785
1786
1787 /* Double precision. */
1788
1789 SCM_DEFINE (scm_bytevector_ieee_double_ref,
1790 "bytevector-ieee-double-ref",
1791 3, 0, 0,
1792 (SCM bv, SCM index, SCM endianness),
1793 "Return the IEEE-754 double from @var{bv} at "
1794 "@var{index}.")
1795 #define FUNC_NAME s_scm_bytevector_ieee_double_ref
1796 {
1797 IEEE754_REF (double);
1798 }
1799 #undef FUNC_NAME
1800
1801 SCM_DEFINE (scm_bytevector_ieee_double_native_ref,
1802 "bytevector-ieee-double-native-ref",
1803 2, 0, 0,
1804 (SCM bv, SCM index),
1805 "Return the IEEE-754 double from @var{bv} at "
1806 "@var{index} using the native endianness.")
1807 #define FUNC_NAME s_scm_bytevector_ieee_double_native_ref
1808 {
1809 IEEE754_NATIVE_REF (double);
1810 }
1811 #undef FUNC_NAME
1812
1813 SCM_DEFINE (scm_bytevector_ieee_double_set_x,
1814 "bytevector-ieee-double-set!",
1815 4, 0, 0,
1816 (SCM bv, SCM index, SCM value, SCM endianness),
1817 "Store real @var{value} in @var{bv} at @var{index} according to "
1818 "@var{endianness}.")
1819 #define FUNC_NAME s_scm_bytevector_ieee_double_set_x
1820 {
1821 IEEE754_SET (double);
1822 }
1823 #undef FUNC_NAME
1824
1825 SCM_DEFINE (scm_bytevector_ieee_double_native_set_x,
1826 "bytevector-ieee-double-native-set!",
1827 3, 0, 0,
1828 (SCM bv, SCM index, SCM value),
1829 "Store the real @var{value} at index @var{index} "
1830 "of @var{bv} using the native endianness.")
1831 #define FUNC_NAME s_scm_bytevector_ieee_double_native_set_x
1832 {
1833 IEEE754_NATIVE_SET (double);
1834 }
1835 #undef FUNC_NAME
1836
1837
1838 #undef IEEE754_UNION
1839 #undef IEEE754_TO_SCM
1840 #undef IEEE754_FROM_SCM
1841 #undef IEEE754_FROM_FOREIGN_ENDIANNESS
1842 #undef IEEE754_TO_FOREIGN_ENDIANNESS
1843 #undef IEEE754_REF
1844 #undef IEEE754_NATIVE_REF
1845 #undef IEEE754_SET
1846 #undef IEEE754_NATIVE_SET
1847
1848 \f
1849 /* Operations on strings. */
1850
1851
1852 /* Produce a function that returns the length of a UTF-encoded string. */
1853 #define UTF_STRLEN_FUNCTION(_utf_width) \
1854 static inline size_t \
1855 utf ## _utf_width ## _strlen (const uint ## _utf_width ## _t *str) \
1856 { \
1857 size_t len = 0; \
1858 const uint ## _utf_width ## _t *ptr; \
1859 for (ptr = str; \
1860 *ptr != 0; \
1861 ptr++) \
1862 { \
1863 len++; \
1864 } \
1865 \
1866 return (len * ((_utf_width) / 8)); \
1867 }
1868
1869 UTF_STRLEN_FUNCTION (8)
1870
1871
1872 /* Return the length (in bytes) of STR, a UTF-(UTF_WIDTH) encoded string. */
1873 #define UTF_STRLEN(_utf_width, _str) \
1874 utf ## _utf_width ## _strlen (_str)
1875
1876 /* Return the "portable" name of the UTF encoding of size UTF_WIDTH and
1877 ENDIANNESS (Gnulib's `iconv_open' module guarantees the portability of the
1878 encoding name). */
1879 static inline void
1880 utf_encoding_name (char *name, size_t utf_width, SCM endianness)
1881 {
1882 strcpy (name, "UTF-");
1883 strcat (name, ((utf_width == 8)
1884 ? "8"
1885 : ((utf_width == 16)
1886 ? "16"
1887 : ((utf_width == 32)
1888 ? "32"
1889 : "??"))));
1890 strcat (name,
1891 ((scm_is_eq (endianness, scm_sym_big))
1892 ? "BE"
1893 : ((scm_is_eq (endianness, scm_sym_little))
1894 ? "LE"
1895 : "unknown")));
1896 }
1897
1898 /* Maximum length of a UTF encoding name. */
1899 #define MAX_UTF_ENCODING_NAME_LEN 16
1900
1901 /* Produce the body of a `string->utf' function. */
1902 #define STRING_TO_UTF(_utf_width) \
1903 SCM utf; \
1904 int err; \
1905 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1906 char *c_utf = NULL; \
1907 size_t c_strlen, c_utf_len = 0; \
1908 \
1909 SCM_VALIDATE_STRING (1, str); \
1910 if (scm_is_eq (endianness, SCM_UNDEFINED)) \
1911 endianness = scm_sym_big; \
1912 else \
1913 SCM_VALIDATE_SYMBOL (2, endianness); \
1914 \
1915 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1916 \
1917 c_strlen = scm_i_string_length (str); \
1918 if (scm_i_is_narrow_string (str)) \
1919 { \
1920 err = mem_iconveh (scm_i_string_chars (str), c_strlen, \
1921 "ISO-8859-1", c_utf_name, \
1922 iconveh_question_mark, NULL, \
1923 &c_utf, &c_utf_len); \
1924 if (SCM_UNLIKELY (err)) \
1925 scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \
1926 scm_list_1 (str), err); \
1927 } \
1928 else \
1929 { \
1930 const scm_t_wchar *wbuf = scm_i_string_wide_chars (str); \
1931 c_utf = u32_conv_to_encoding (c_utf_name, \
1932 iconveh_question_mark, \
1933 (scm_t_uint32 *) wbuf, \
1934 c_strlen, NULL, NULL, &c_utf_len); \
1935 if (SCM_UNLIKELY (c_utf == NULL)) \
1936 scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \
1937 scm_list_1 (str), errno); \
1938 } \
1939 scm_dynwind_begin (0); \
1940 scm_dynwind_free (c_utf); \
1941 utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8); \
1942 memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len); \
1943 scm_dynwind_end (); \
1944 \
1945 return (utf);
1946
1947
1948
1949 SCM_DEFINE (scm_string_to_utf8, "string->utf8",
1950 1, 0, 0,
1951 (SCM str),
1952 "Return a newly allocated bytevector that contains the UTF-8 "
1953 "encoding of @var{str}.")
1954 #define FUNC_NAME s_scm_string_to_utf8
1955 {
1956 SCM utf;
1957 scm_t_uint8 *c_utf;
1958 size_t c_utf_len = 0;
1959
1960 SCM_VALIDATE_STRING (1, str);
1961
1962 c_utf = (scm_t_uint8 *) scm_to_utf8_stringn (str, &c_utf_len);
1963 utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
1964 memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
1965 free (c_utf);
1966
1967 return (utf);
1968 }
1969 #undef FUNC_NAME
1970
1971 SCM_DEFINE (scm_string_to_utf16, "string->utf16",
1972 1, 1, 0,
1973 (SCM str, SCM endianness),
1974 "Return a newly allocated bytevector that contains the UTF-16 "
1975 "encoding of @var{str}.")
1976 #define FUNC_NAME s_scm_string_to_utf16
1977 {
1978 STRING_TO_UTF (16);
1979 }
1980 #undef FUNC_NAME
1981
1982 static void
1983 swap_u32 (scm_t_wchar *vals, size_t len)
1984 {
1985 size_t n;
1986 for (n = 0; n < len; n++)
1987 vals[n] = bswap_32 (vals[n]);
1988 }
1989
1990 SCM_DEFINE (scm_string_to_utf32, "string->utf32",
1991 1, 1, 0,
1992 (SCM str, SCM endianness),
1993 "Return a newly allocated bytevector that contains the UTF-32 "
1994 "encoding of @var{str}.")
1995 #define FUNC_NAME s_scm_string_to_utf32
1996 {
1997 SCM bv;
1998 scm_t_wchar *wchars;
1999 size_t wchar_len, bytes_len;
2000
2001 wchars = scm_to_utf32_stringn (str, &wchar_len);
2002 bytes_len = wchar_len * sizeof (scm_t_wchar);
2003 if (!scm_is_eq (SCM_UNBNDP (endianness) ? scm_endianness_big : endianness,
2004 scm_i_native_endianness))
2005 swap_u32 (wchars, wchar_len);
2006
2007 bv = make_bytevector (bytes_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
2008 memcpy (SCM_BYTEVECTOR_CONTENTS (bv), wchars, bytes_len);
2009 free (wchars);
2010
2011 return bv;
2012 }
2013 #undef FUNC_NAME
2014
2015
2016 /* Produce the body of a function that converts a UTF-encoded bytevector to a
2017 string. */
2018 #define UTF_TO_STRING(_utf_width) \
2019 SCM str = SCM_BOOL_F; \
2020 int err; \
2021 char *c_str = NULL; \
2022 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
2023 char *c_utf; \
2024 size_t c_strlen = 0, c_utf_len = 0; \
2025 \
2026 SCM_VALIDATE_BYTEVECTOR (1, utf); \
2027 if (scm_is_eq (endianness, SCM_UNDEFINED)) \
2028 endianness = scm_sym_big; \
2029 else \
2030 SCM_VALIDATE_SYMBOL (2, endianness); \
2031 \
2032 c_utf_len = SCM_BYTEVECTOR_LENGTH (utf); \
2033 c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf); \
2034 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
2035 \
2036 err = mem_iconveh (c_utf, c_utf_len, \
2037 c_utf_name, "UTF-8", \
2038 iconveh_question_mark, NULL, \
2039 &c_str, &c_strlen); \
2040 if (SCM_UNLIKELY (err)) \
2041 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A", \
2042 scm_list_1 (utf), err); \
2043 else \
2044 { \
2045 str = scm_from_stringn (c_str, c_strlen, "UTF-8", \
2046 SCM_FAILED_CONVERSION_ERROR); \
2047 free (c_str); \
2048 } \
2049 return (str);
2050
2051
2052 SCM_DEFINE (scm_utf8_to_string, "utf8->string",
2053 1, 0, 0,
2054 (SCM utf),
2055 "Return a newly allocate string that contains from the UTF-8-"
2056 "encoded contents of bytevector @var{utf}.")
2057 #define FUNC_NAME s_scm_utf8_to_string
2058 {
2059 SCM str;
2060 const char *c_utf;
2061 size_t c_utf_len = 0;
2062
2063 SCM_VALIDATE_BYTEVECTOR (1, utf);
2064
2065 c_utf_len = SCM_BYTEVECTOR_LENGTH (utf);
2066 c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);
2067 str = scm_from_stringn (c_utf, c_utf_len, "UTF-8",
2068 SCM_FAILED_CONVERSION_ERROR);
2069
2070 return (str);
2071 }
2072 #undef FUNC_NAME
2073
2074 SCM_DEFINE (scm_utf16_to_string, "utf16->string",
2075 1, 1, 0,
2076 (SCM utf, SCM endianness),
2077 "Return a newly allocate string that contains from the UTF-16-"
2078 "encoded contents of bytevector @var{utf}.")
2079 #define FUNC_NAME s_scm_utf16_to_string
2080 {
2081 UTF_TO_STRING (16);
2082 }
2083 #undef FUNC_NAME
2084
2085 SCM_DEFINE (scm_utf32_to_string, "utf32->string",
2086 1, 1, 0,
2087 (SCM utf, SCM endianness),
2088 "Return a newly allocate string that contains from the UTF-32-"
2089 "encoded contents of bytevector @var{utf}.")
2090 #define FUNC_NAME s_scm_utf32_to_string
2091 {
2092 UTF_TO_STRING (32);
2093 }
2094 #undef FUNC_NAME
2095
2096 \f
2097 /* Bytevectors as generalized vectors & arrays. */
2098
2099 #define COMPLEX_ACCESSOR_PROLOGUE(_type) \
2100 size_t c_len, c_index; \
2101 char *c_bv; \
2102 \
2103 SCM_VALIDATE_BYTEVECTOR (1, bv); \
2104 c_index = scm_to_size_t (index); \
2105 \
2106 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
2107 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
2108 \
2109 if (SCM_UNLIKELY (c_index + 2 * sizeof (_type) - 1 >= c_len)) \
2110 scm_out_of_range (FUNC_NAME, index);
2111
2112 /* Template for native access to complex numbers of type TYPE. */
2113 #define COMPLEX_NATIVE_REF(_type) \
2114 SCM result; \
2115 \
2116 COMPLEX_ACCESSOR_PROLOGUE (_type); \
2117 \
2118 { \
2119 _type real, imag; \
2120 \
2121 memcpy (&real, &c_bv[c_index], sizeof (_type)); \
2122 memcpy (&imag, &c_bv[c_index + sizeof (_type)], sizeof (_type)); \
2123 \
2124 result = scm_c_make_rectangular (real, imag); \
2125 } \
2126 \
2127 return result;
2128
2129 static SCM
2130 bytevector_ref_c32 (SCM bv, SCM index)
2131 #define FUNC_NAME "bytevector_ref_c32"
2132 {
2133 COMPLEX_NATIVE_REF (float);
2134 }
2135 #undef FUNC_NAME
2136
2137 static SCM
2138 bytevector_ref_c64 (SCM bv, SCM index)
2139 #define FUNC_NAME "bytevector_ref_c64"
2140 {
2141 COMPLEX_NATIVE_REF (double);
2142 }
2143 #undef FUNC_NAME
2144
2145 typedef SCM (*scm_t_bytevector_ref_fn)(SCM, SCM);
2146
2147 static const scm_t_bytevector_ref_fn
2148 bytevector_ref_fns[SCM_ARRAY_ELEMENT_TYPE_LAST + 1] =
2149 {
2150 NULL, /* SCM */
2151 NULL, /* CHAR */
2152 NULL, /* BIT */
2153 scm_bytevector_u8_ref, /* VU8 */
2154 scm_bytevector_u8_ref, /* U8 */
2155 scm_bytevector_s8_ref,
2156 scm_bytevector_u16_native_ref,
2157 scm_bytevector_s16_native_ref,
2158 scm_bytevector_u32_native_ref,
2159 scm_bytevector_s32_native_ref,
2160 scm_bytevector_u64_native_ref,
2161 scm_bytevector_s64_native_ref,
2162 scm_bytevector_ieee_single_native_ref,
2163 scm_bytevector_ieee_double_native_ref,
2164 bytevector_ref_c32,
2165 bytevector_ref_c64
2166 };
2167
2168 static SCM
2169 bv_handle_ref (scm_t_array_handle *h, size_t index)
2170 {
2171 SCM byte_index;
2172 scm_t_bytevector_ref_fn ref_fn;
2173
2174 ref_fn = bytevector_ref_fns[h->element_type];
2175 byte_index =
2176 scm_from_size_t (index * scm_array_handle_uniform_element_size (h));
2177 return ref_fn (h->array, byte_index);
2178 }
2179
2180 /* Template for native modification of complex numbers of type TYPE. */
2181 #define COMPLEX_NATIVE_SET(_type) \
2182 COMPLEX_ACCESSOR_PROLOGUE (_type); \
2183 \
2184 { \
2185 _type real, imag; \
2186 real = scm_c_real_part (value); \
2187 imag = scm_c_imag_part (value); \
2188 \
2189 memcpy (&c_bv[c_index], &real, sizeof (_type)); \
2190 memcpy (&c_bv[c_index + sizeof (_type)], &imag, sizeof (_type)); \
2191 } \
2192 \
2193 return SCM_UNSPECIFIED;
2194
2195 static SCM
2196 bytevector_set_c32 (SCM bv, SCM index, SCM value)
2197 #define FUNC_NAME "bytevector_set_c32"
2198 {
2199 COMPLEX_NATIVE_SET (float);
2200 }
2201 #undef FUNC_NAME
2202
2203 static SCM
2204 bytevector_set_c64 (SCM bv, SCM index, SCM value)
2205 #define FUNC_NAME "bytevector_set_c64"
2206 {
2207 COMPLEX_NATIVE_SET (double);
2208 }
2209 #undef FUNC_NAME
2210
2211 typedef SCM (*scm_t_bytevector_set_fn)(SCM, SCM, SCM);
2212
2213 const scm_t_bytevector_set_fn bytevector_set_fns[SCM_ARRAY_ELEMENT_TYPE_LAST + 1] =
2214 {
2215 NULL, /* SCM */
2216 NULL, /* CHAR */
2217 NULL, /* BIT */
2218 scm_bytevector_u8_set_x, /* VU8 */
2219 scm_bytevector_u8_set_x, /* U8 */
2220 scm_bytevector_s8_set_x,
2221 scm_bytevector_u16_native_set_x,
2222 scm_bytevector_s16_native_set_x,
2223 scm_bytevector_u32_native_set_x,
2224 scm_bytevector_s32_native_set_x,
2225 scm_bytevector_u64_native_set_x,
2226 scm_bytevector_s64_native_set_x,
2227 scm_bytevector_ieee_single_native_set_x,
2228 scm_bytevector_ieee_double_native_set_x,
2229 bytevector_set_c32,
2230 bytevector_set_c64
2231 };
2232
2233 static void
2234 bv_handle_set_x (scm_t_array_handle *h, size_t index, SCM val)
2235 {
2236 SCM byte_index;
2237 scm_t_bytevector_set_fn set_fn;
2238
2239 set_fn = bytevector_set_fns[h->element_type];
2240 byte_index =
2241 scm_from_size_t (index * scm_array_handle_uniform_element_size (h));
2242 set_fn (h->array, byte_index, val);
2243 }
2244
2245 static void
2246 bytevector_get_handle (SCM v, scm_t_array_handle *h)
2247 {
2248 h->array = v;
2249 h->ndims = 1;
2250 h->dims = &h->dim0;
2251 h->dim0.lbnd = 0;
2252 h->dim0.ubnd = SCM_BYTEVECTOR_TYPED_LENGTH (v) - 1;
2253 h->dim0.inc = 1;
2254 h->element_type = SCM_BYTEVECTOR_ELEMENT_TYPE (v);
2255 h->elements = h->writable_elements = SCM_BYTEVECTOR_CONTENTS (v);
2256 }
2257
2258 \f
2259 /* Initialization. */
2260
2261 void
2262 scm_bootstrap_bytevectors (void)
2263 {
2264 /* This must be instantiated here because the generalized-vector API may
2265 want to access bytevectors even though `(rnrs bytevectors)' hasn't been
2266 loaded. */
2267 scm_null_bytevector = make_bytevector (0, SCM_ARRAY_ELEMENT_TYPE_VU8);
2268
2269 #ifdef WORDS_BIGENDIAN
2270 scm_i_native_endianness = scm_from_latin1_symbol ("big");
2271 #else
2272 scm_i_native_endianness = scm_from_latin1_symbol ("little");
2273 #endif
2274
2275 scm_c_register_extension ("libguile-" SCM_EFFECTIVE_VERSION,
2276 "scm_init_bytevectors",
2277 (scm_t_extension_init_func) scm_init_bytevectors,
2278 NULL);
2279
2280 {
2281 scm_t_array_implementation impl;
2282
2283 impl.tag = scm_tc7_bytevector;
2284 impl.mask = 0x7f;
2285 impl.vref = bv_handle_ref;
2286 impl.vset = bv_handle_set_x;
2287 impl.get_handle = bytevector_get_handle;
2288 scm_i_register_array_implementation (&impl);
2289 scm_i_register_vector_constructor
2290 (scm_i_array_element_types[SCM_ARRAY_ELEMENT_TYPE_VU8],
2291 scm_make_bytevector);
2292 }
2293 }
2294
2295 void
2296 scm_init_bytevectors (void)
2297 {
2298 #include "libguile/bytevectors.x"
2299
2300 scm_endianness_big = scm_sym_big;
2301 scm_endianness_little = scm_sym_little;
2302 }