9e9586d754781117d8d98486bf986d38631eb636
[bpt/guile.git] / libguile / bytevectors.c
1 /* Copyright (C) 2009 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <alloca.h>
25
26 #include <gmp.h>
27
28 #include "libguile/_scm.h"
29 #include "libguile/extensions.h"
30 #include "libguile/bytevectors.h"
31 #include "libguile/strings.h"
32 #include "libguile/validate.h"
33 #include "libguile/ieee-754.h"
34 #include "libguile/arrays.h"
35 #include "libguile/array-handle.h"
36 #include "libguile/uniform.h"
37 #include "libguile/srfi-4.h"
38
39 #include <byteswap.h>
40 #include <striconveh.h>
41 #include <uniconv.h>
42
43 #ifdef HAVE_LIMITS_H
44 # include <limits.h>
45 #else
46 /* Assuming 32-bit longs. */
47 # define ULONG_MAX 4294967295UL
48 #endif
49
50 #include <string.h>
51
52
53 \f
54 /* Utilities. */
55
56 /* Convenience macros. These are used by the various templates (macros) that
57 are parameterized by integer signedness. */
58 #define INT8_T_signed scm_t_int8
59 #define INT8_T_unsigned scm_t_uint8
60 #define INT16_T_signed scm_t_int16
61 #define INT16_T_unsigned scm_t_uint16
62 #define INT32_T_signed scm_t_int32
63 #define INT32_T_unsigned scm_t_uint32
64 #define is_signed_int8(_x) (((_x) >= -128L) && ((_x) <= 127L))
65 #define is_unsigned_int8(_x) ((_x) <= 255UL)
66 #define is_signed_int16(_x) (((_x) >= -32768L) && ((_x) <= 32767L))
67 #define is_unsigned_int16(_x) ((_x) <= 65535UL)
68 #define is_signed_int32(_x) (((_x) >= -2147483648L) && ((_x) <= 2147483647L))
69 #define is_unsigned_int32(_x) ((_x) <= 4294967295UL)
70 #define SIGNEDNESS_signed 1
71 #define SIGNEDNESS_unsigned 0
72
73 #define INT_TYPE(_size, _sign) INT ## _size ## _T_ ## _sign
74 #define INT_SWAP(_size) bswap_ ## _size
75 #define INT_VALID_P(_size, _sign) is_ ## _sign ## _int ## _size
76 #define SIGNEDNESS(_sign) SIGNEDNESS_ ## _sign
77
78
79 #define INTEGER_ACCESSOR_PROLOGUE(_len, _sign) \
80 size_t c_len, c_index; \
81 _sign char *c_bv; \
82 \
83 SCM_VALIDATE_BYTEVECTOR (1, bv); \
84 c_index = scm_to_uint (index); \
85 \
86 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
87 c_bv = (_sign char *) SCM_BYTEVECTOR_CONTENTS (bv); \
88 \
89 if (SCM_UNLIKELY (c_index + ((_len) >> 3UL) - 1 >= c_len)) \
90 scm_out_of_range (FUNC_NAME, index);
91
92 /* Template for fixed-size integer access (only 8, 16 or 32-bit). */
93 #define INTEGER_REF(_len, _sign) \
94 SCM result; \
95 \
96 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
97 SCM_VALIDATE_SYMBOL (3, endianness); \
98 \
99 { \
100 INT_TYPE (_len, _sign) c_result; \
101 \
102 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
103 if (!scm_is_eq (endianness, scm_i_native_endianness)) \
104 c_result = INT_SWAP (_len) (c_result); \
105 \
106 result = SCM_I_MAKINUM (c_result); \
107 } \
108 \
109 return result;
110
111 /* Template for fixed-size integer access using the native endianness. */
112 #define INTEGER_NATIVE_REF(_len, _sign) \
113 SCM result; \
114 \
115 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
116 \
117 { \
118 INT_TYPE (_len, _sign) c_result; \
119 \
120 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
121 result = SCM_I_MAKINUM (c_result); \
122 } \
123 \
124 return result;
125
126 /* Template for fixed-size integer modification (only 8, 16 or 32-bit). */
127 #define INTEGER_SET(_len, _sign) \
128 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
129 SCM_VALIDATE_SYMBOL (3, endianness); \
130 \
131 { \
132 _sign long c_value; \
133 INT_TYPE (_len, _sign) c_value_short; \
134 \
135 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
136 scm_wrong_type_arg (FUNC_NAME, 3, value); \
137 \
138 c_value = SCM_I_INUM (value); \
139 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
140 scm_out_of_range (FUNC_NAME, value); \
141 \
142 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
143 if (!scm_is_eq (endianness, scm_i_native_endianness)) \
144 c_value_short = INT_SWAP (_len) (c_value_short); \
145 \
146 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
147 } \
148 \
149 return SCM_UNSPECIFIED;
150
151 /* Template for fixed-size integer modification using the native
152 endianness. */
153 #define INTEGER_NATIVE_SET(_len, _sign) \
154 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
155 \
156 { \
157 _sign long c_value; \
158 INT_TYPE (_len, _sign) c_value_short; \
159 \
160 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
161 scm_wrong_type_arg (FUNC_NAME, 3, value); \
162 \
163 c_value = SCM_I_INUM (value); \
164 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
165 scm_out_of_range (FUNC_NAME, value); \
166 \
167 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
168 \
169 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
170 } \
171 \
172 return SCM_UNSPECIFIED;
173
174
175 \f
176 /* Bytevector type. */
177
178 scm_t_bits scm_tc16_bytevector;
179
180 #define SCM_BYTEVECTOR_SET_LENGTH(_bv, _len) \
181 SCM_SET_SMOB_DATA ((_bv), (scm_t_bits) (_len))
182 #define SCM_BYTEVECTOR_SET_CONTENTS(_bv, _buf) \
183 SCM_SET_SMOB_DATA_2 ((_bv), (scm_t_bits) (_buf))
184
185 /* The empty bytevector. */
186 SCM scm_null_bytevector = SCM_UNSPECIFIED;
187
188
189 static inline SCM
190 make_bytevector_from_buffer (size_t len, signed char *contents)
191 {
192 /* Assuming LEN > SCM_BYTEVECTOR_INLINE_THRESHOLD. */
193 SCM_RETURN_NEWSMOB2 (scm_tc16_bytevector, len, contents);
194 }
195
196 static inline SCM
197 make_bytevector (size_t len)
198 {
199 SCM bv;
200
201 if (SCM_UNLIKELY (len == 0))
202 bv = scm_null_bytevector;
203 else
204 {
205 signed char *contents = NULL;
206
207 if (!SCM_BYTEVECTOR_INLINEABLE_SIZE_P (len))
208 contents = (signed char *) scm_gc_malloc (len, SCM_GC_BYTEVECTOR);
209
210 bv = make_bytevector_from_buffer (len, contents);
211 }
212
213 return bv;
214 }
215
216 /* Return a new bytevector of size LEN octets. */
217 SCM
218 scm_c_make_bytevector (size_t len)
219 {
220 return (make_bytevector (len));
221 }
222
223 /* Return a bytevector of size LEN made up of CONTENTS. The area pointed to
224 by CONTENTS must have been allocated using `scm_gc_malloc ()'. */
225 SCM
226 scm_c_take_bytevector (signed char *contents, size_t len)
227 {
228 SCM bv;
229
230 if (SCM_UNLIKELY (SCM_BYTEVECTOR_INLINEABLE_SIZE_P (len)))
231 {
232 /* Copy CONTENTS into an "in-line" buffer, then free CONTENTS. */
233 signed char *c_bv;
234
235 bv = make_bytevector (len);
236 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
237 memcpy (c_bv, contents, len);
238 scm_gc_free (contents, len, SCM_GC_BYTEVECTOR);
239 }
240 else
241 bv = make_bytevector_from_buffer (len, contents);
242
243 return bv;
244 }
245
246 /* Shrink BV to C_NEW_LEN (which is assumed to be smaller than its current
247 size) and return BV. */
248 SCM
249 scm_i_shrink_bytevector (SCM bv, size_t c_new_len)
250 {
251 if (!SCM_BYTEVECTOR_INLINE_P (bv))
252 {
253 size_t c_len;
254 signed char *c_bv, *c_new_bv;
255
256 c_len = SCM_BYTEVECTOR_LENGTH (bv);
257 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
258
259 SCM_BYTEVECTOR_SET_LENGTH (bv, c_new_len);
260
261 if (SCM_BYTEVECTOR_INLINEABLE_SIZE_P (c_new_len))
262 {
263 /* Copy to the in-line buffer and free the current buffer. */
264 c_new_bv = SCM_BYTEVECTOR_CONTENTS (bv);
265 memcpy (c_new_bv, c_bv, c_new_len);
266 scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
267 }
268 else
269 {
270 /* Resize the existing buffer. */
271 c_new_bv = scm_gc_realloc (c_bv, c_len, c_new_len,
272 SCM_GC_BYTEVECTOR);
273 SCM_BYTEVECTOR_SET_CONTENTS (bv, c_new_bv);
274 }
275 }
276
277 return bv;
278 }
279
280 int
281 scm_is_bytevector (SCM obj)
282 {
283 return SCM_SMOB_PREDICATE (scm_tc16_bytevector, obj);
284 }
285
286 size_t
287 scm_c_bytevector_length (SCM bv)
288 #define FUNC_NAME "scm_c_bytevector_length"
289 {
290 SCM_VALIDATE_BYTEVECTOR (1, bv);
291
292 return SCM_BYTEVECTOR_LENGTH (bv);
293 }
294 #undef FUNC_NAME
295
296 scm_t_uint8
297 scm_c_bytevector_ref (SCM bv, size_t index)
298 #define FUNC_NAME "scm_c_bytevector_ref"
299 {
300 size_t c_len;
301 const scm_t_uint8 *c_bv;
302
303 SCM_VALIDATE_BYTEVECTOR (1, bv);
304
305 c_len = SCM_BYTEVECTOR_LENGTH (bv);
306 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
307
308 if (SCM_UNLIKELY (index >= c_len))
309 scm_out_of_range (FUNC_NAME, scm_from_size_t (index));
310
311 return c_bv[index];
312 }
313 #undef FUNC_NAME
314
315 void
316 scm_c_bytevector_set_x (SCM bv, size_t index, scm_t_uint8 value)
317 #define FUNC_NAME "scm_c_bytevector_set_x"
318 {
319 size_t c_len;
320 scm_t_uint8 *c_bv;
321
322 SCM_VALIDATE_BYTEVECTOR (1, bv);
323
324 c_len = SCM_BYTEVECTOR_LENGTH (bv);
325 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
326
327 if (SCM_UNLIKELY (index >= c_len))
328 scm_out_of_range (FUNC_NAME, scm_from_size_t (index));
329
330 c_bv[index] = value;
331 }
332 #undef FUNC_NAME
333
334 /* This procedure is used by `scm_c_generalized_vector_set_x ()'. */
335 void
336 scm_i_bytevector_generalized_set_x (SCM bv, size_t index, SCM value)
337 #define FUNC_NAME "scm_i_bytevector_generalized_set_x"
338 {
339 scm_c_bytevector_set_x (bv, index, scm_to_uint8 (value));
340 }
341 #undef FUNC_NAME
342
343 static int
344 print_bytevector (SCM bv, SCM port, scm_print_state *pstate)
345 {
346 unsigned c_len, i;
347 unsigned char *c_bv;
348
349 c_len = SCM_BYTEVECTOR_LENGTH (bv);
350 c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
351
352 scm_puts ("#vu8(", port);
353 for (i = 0; i < c_len; i++)
354 {
355 if (i > 0)
356 scm_putc (' ', port);
357
358 scm_uintprint (c_bv[i], 10, port);
359 }
360
361 scm_putc (')', port);
362
363 /* Make GCC think we use it. */
364 scm_remember_upto_here ((SCM) pstate);
365
366 return 1;
367 }
368
369 static SCM
370 bytevector_equal_p (SCM bv1, SCM bv2)
371 {
372 return scm_bytevector_eq_p (bv1, bv2);
373 }
374
375 static size_t
376 free_bytevector (SCM bv)
377 {
378
379 if (!SCM_BYTEVECTOR_INLINE_P (bv))
380 {
381 unsigned c_len;
382 signed char *c_bv;
383
384 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
385 c_len = SCM_BYTEVECTOR_LENGTH (bv);
386
387 scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
388 }
389
390 return 0;
391 }
392
393
394 \f
395 /* General operations. */
396
397 SCM_SYMBOL (scm_sym_big, "big");
398 SCM_SYMBOL (scm_sym_little, "little");
399
400 SCM scm_endianness_big, scm_endianness_little;
401
402 /* Host endianness (a symbol). */
403 SCM scm_i_native_endianness = SCM_UNSPECIFIED;
404
405 /* Byte-swapping. */
406 #ifndef bswap_24
407 # define bswap_24(_x) \
408 ((((_x) & 0xff0000) >> 16) | \
409 (((_x) & 0x00ff00)) | \
410 (((_x) & 0x0000ff) << 16))
411 #endif
412
413
414 SCM_DEFINE (scm_native_endianness, "native-endianness", 0, 0, 0,
415 (void),
416 "Return a symbol denoting the machine's native endianness.")
417 #define FUNC_NAME s_scm_native_endianness
418 {
419 return scm_i_native_endianness;
420 }
421 #undef FUNC_NAME
422
423 SCM_DEFINE (scm_bytevector_p, "bytevector?", 1, 0, 0,
424 (SCM obj),
425 "Return true if @var{obj} is a bytevector.")
426 #define FUNC_NAME s_scm_bytevector_p
427 {
428 return scm_from_bool (scm_is_bytevector (obj));
429 }
430 #undef FUNC_NAME
431
432 SCM_DEFINE (scm_make_bytevector, "make-bytevector", 1, 1, 0,
433 (SCM len, SCM fill),
434 "Return a newly allocated bytevector of @var{len} bytes, "
435 "optionally filled with @var{fill}.")
436 #define FUNC_NAME s_scm_make_bytevector
437 {
438 SCM bv;
439 unsigned c_len;
440 signed char c_fill = '\0';
441
442 SCM_VALIDATE_UINT_COPY (1, len, c_len);
443 if (fill != SCM_UNDEFINED)
444 {
445 int value;
446
447 value = scm_to_int (fill);
448 if (SCM_UNLIKELY ((value < -128) || (value > 255)))
449 scm_out_of_range (FUNC_NAME, fill);
450 c_fill = (signed char) value;
451 }
452
453 bv = make_bytevector (c_len);
454 if (fill != SCM_UNDEFINED)
455 {
456 unsigned i;
457 signed char *contents;
458
459 contents = SCM_BYTEVECTOR_CONTENTS (bv);
460 for (i = 0; i < c_len; i++)
461 contents[i] = c_fill;
462 }
463
464 return bv;
465 }
466 #undef FUNC_NAME
467
468 SCM_DEFINE (scm_bytevector_length, "bytevector-length", 1, 0, 0,
469 (SCM bv),
470 "Return the length (in bytes) of @var{bv}.")
471 #define FUNC_NAME s_scm_bytevector_length
472 {
473 return scm_from_uint (scm_c_bytevector_length (bv));
474 }
475 #undef FUNC_NAME
476
477 SCM_DEFINE (scm_bytevector_eq_p, "bytevector=?", 2, 0, 0,
478 (SCM bv1, SCM bv2),
479 "Return is @var{bv1} equals to @var{bv2}---i.e., if they "
480 "have the same length and contents.")
481 #define FUNC_NAME s_scm_bytevector_eq_p
482 {
483 SCM result = SCM_BOOL_F;
484 unsigned c_len1, c_len2;
485
486 SCM_VALIDATE_BYTEVECTOR (1, bv1);
487 SCM_VALIDATE_BYTEVECTOR (2, bv2);
488
489 c_len1 = SCM_BYTEVECTOR_LENGTH (bv1);
490 c_len2 = SCM_BYTEVECTOR_LENGTH (bv2);
491
492 if (c_len1 == c_len2)
493 {
494 signed char *c_bv1, *c_bv2;
495
496 c_bv1 = SCM_BYTEVECTOR_CONTENTS (bv1);
497 c_bv2 = SCM_BYTEVECTOR_CONTENTS (bv2);
498
499 result = scm_from_bool (!memcmp (c_bv1, c_bv2, c_len1));
500 }
501
502 return result;
503 }
504 #undef FUNC_NAME
505
506 SCM_DEFINE (scm_bytevector_fill_x, "bytevector-fill!", 2, 0, 0,
507 (SCM bv, SCM fill),
508 "Fill bytevector @var{bv} with @var{fill}, a byte.")
509 #define FUNC_NAME s_scm_bytevector_fill_x
510 {
511 unsigned c_len, i;
512 signed char *c_bv, c_fill;
513
514 SCM_VALIDATE_BYTEVECTOR (1, bv);
515 c_fill = scm_to_int8 (fill);
516
517 c_len = SCM_BYTEVECTOR_LENGTH (bv);
518 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
519
520 for (i = 0; i < c_len; i++)
521 c_bv[i] = c_fill;
522
523 return SCM_UNSPECIFIED;
524 }
525 #undef FUNC_NAME
526
527 SCM_DEFINE (scm_bytevector_copy_x, "bytevector-copy!", 5, 0, 0,
528 (SCM source, SCM source_start, SCM target, SCM target_start,
529 SCM len),
530 "Copy @var{len} bytes from @var{source} into @var{target}, "
531 "starting reading from @var{source_start} (a positive index "
532 "within @var{source}) and start writing at "
533 "@var{target_start}.")
534 #define FUNC_NAME s_scm_bytevector_copy_x
535 {
536 unsigned c_len, c_source_len, c_target_len;
537 unsigned c_source_start, c_target_start;
538 signed char *c_source, *c_target;
539
540 SCM_VALIDATE_BYTEVECTOR (1, source);
541 SCM_VALIDATE_BYTEVECTOR (3, target);
542
543 c_len = scm_to_uint (len);
544 c_source_start = scm_to_uint (source_start);
545 c_target_start = scm_to_uint (target_start);
546
547 c_source = SCM_BYTEVECTOR_CONTENTS (source);
548 c_target = SCM_BYTEVECTOR_CONTENTS (target);
549 c_source_len = SCM_BYTEVECTOR_LENGTH (source);
550 c_target_len = SCM_BYTEVECTOR_LENGTH (target);
551
552 if (SCM_UNLIKELY (c_source_start + c_len > c_source_len))
553 scm_out_of_range (FUNC_NAME, source_start);
554 if (SCM_UNLIKELY (c_target_start + c_len > c_target_len))
555 scm_out_of_range (FUNC_NAME, target_start);
556
557 memcpy (c_target + c_target_start,
558 c_source + c_source_start,
559 c_len);
560
561 return SCM_UNSPECIFIED;
562 }
563 #undef FUNC_NAME
564
565 SCM_DEFINE (scm_bytevector_copy, "bytevector-copy", 1, 0, 0,
566 (SCM bv),
567 "Return a newly allocated copy of @var{bv}.")
568 #define FUNC_NAME s_scm_bytevector_copy
569 {
570 SCM copy;
571 unsigned c_len;
572 signed char *c_bv, *c_copy;
573
574 SCM_VALIDATE_BYTEVECTOR (1, bv);
575
576 c_len = SCM_BYTEVECTOR_LENGTH (bv);
577 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
578
579 copy = make_bytevector (c_len);
580 c_copy = SCM_BYTEVECTOR_CONTENTS (copy);
581 memcpy (c_copy, c_bv, c_len);
582
583 return copy;
584 }
585 #undef FUNC_NAME
586
587 SCM_DEFINE (scm_uniform_array_to_bytevector, "uniform-array->bytevector",
588 1, 0, 0, (SCM array),
589 "Return a newly allocated bytevector whose contents\n"
590 "will be copied from the uniform array @var{array}.")
591 #define FUNC_NAME s_scm_uniform_array_to_bytevector
592 {
593 SCM contents, ret;
594 size_t len;
595 scm_t_array_handle h;
596 const void *base;
597 size_t sz;
598
599 contents = scm_array_contents (array, SCM_BOOL_T);
600 if (scm_is_false (contents))
601 scm_wrong_type_arg_msg (FUNC_NAME, 0, array, "uniform contiguous array");
602
603 scm_array_get_handle (contents, &h);
604
605 base = scm_array_handle_uniform_elements (&h);
606 len = h.dims->inc * (h.dims->ubnd - h.dims->lbnd + 1);
607 sz = scm_array_handle_uniform_element_size (&h);
608
609 ret = make_bytevector (len * sz);
610 memcpy (SCM_BYTEVECTOR_CONTENTS (ret), base, len * sz);
611
612 scm_array_handle_release (&h);
613
614 return ret;
615 }
616 #undef FUNC_NAME
617
618 \f
619 /* Operations on bytes and octets. */
620
621 SCM_DEFINE (scm_bytevector_u8_ref, "bytevector-u8-ref", 2, 0, 0,
622 (SCM bv, SCM index),
623 "Return the octet located at @var{index} in @var{bv}.")
624 #define FUNC_NAME s_scm_bytevector_u8_ref
625 {
626 INTEGER_NATIVE_REF (8, unsigned);
627 }
628 #undef FUNC_NAME
629
630 SCM_DEFINE (scm_bytevector_s8_ref, "bytevector-s8-ref", 2, 0, 0,
631 (SCM bv, SCM index),
632 "Return the byte located at @var{index} in @var{bv}.")
633 #define FUNC_NAME s_scm_bytevector_s8_ref
634 {
635 INTEGER_NATIVE_REF (8, signed);
636 }
637 #undef FUNC_NAME
638
639 SCM_DEFINE (scm_bytevector_u8_set_x, "bytevector-u8-set!", 3, 0, 0,
640 (SCM bv, SCM index, SCM value),
641 "Return the octet located at @var{index} in @var{bv}.")
642 #define FUNC_NAME s_scm_bytevector_u8_set_x
643 {
644 INTEGER_NATIVE_SET (8, unsigned);
645 }
646 #undef FUNC_NAME
647
648 SCM_DEFINE (scm_bytevector_s8_set_x, "bytevector-s8-set!", 3, 0, 0,
649 (SCM bv, SCM index, SCM value),
650 "Return the octet located at @var{index} in @var{bv}.")
651 #define FUNC_NAME s_scm_bytevector_s8_set_x
652 {
653 INTEGER_NATIVE_SET (8, signed);
654 }
655 #undef FUNC_NAME
656
657 #undef OCTET_ACCESSOR_PROLOGUE
658
659
660 SCM_DEFINE (scm_bytevector_to_u8_list, "bytevector->u8-list", 1, 0, 0,
661 (SCM bv),
662 "Return a newly allocated list of octets containing the "
663 "contents of @var{bv}.")
664 #define FUNC_NAME s_scm_bytevector_to_u8_list
665 {
666 SCM lst, pair;
667 unsigned c_len, i;
668 unsigned char *c_bv;
669
670 SCM_VALIDATE_BYTEVECTOR (1, bv);
671
672 c_len = SCM_BYTEVECTOR_LENGTH (bv);
673 c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
674
675 lst = scm_make_list (scm_from_uint (c_len), SCM_UNSPECIFIED);
676 for (i = 0, pair = lst;
677 i < c_len;
678 i++, pair = SCM_CDR (pair))
679 {
680 SCM_SETCAR (pair, SCM_I_MAKINUM (c_bv[i]));
681 }
682
683 return lst;
684 }
685 #undef FUNC_NAME
686
687 SCM_DEFINE (scm_u8_list_to_bytevector, "u8-list->bytevector", 1, 0, 0,
688 (SCM lst),
689 "Turn @var{lst}, a list of octets, into a bytevector.")
690 #define FUNC_NAME s_scm_u8_list_to_bytevector
691 {
692 SCM bv, item;
693 long c_len, i;
694 unsigned char *c_bv;
695
696 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len);
697
698 bv = make_bytevector (c_len);
699 c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
700
701 for (i = 0; i < c_len; lst = SCM_CDR (lst), i++)
702 {
703 item = SCM_CAR (lst);
704
705 if (SCM_LIKELY (SCM_I_INUMP (item)))
706 {
707 long c_item;
708
709 c_item = SCM_I_INUM (item);
710 if (SCM_LIKELY ((c_item >= 0) && (c_item < 256)))
711 c_bv[i] = (unsigned char) c_item;
712 else
713 goto type_error;
714 }
715 else
716 goto type_error;
717 }
718
719 return bv;
720
721 type_error:
722 scm_wrong_type_arg (FUNC_NAME, 1, item);
723
724 return SCM_BOOL_F;
725 }
726 #undef FUNC_NAME
727
728 /* Compute the two's complement of VALUE (a positive integer) on SIZE octets
729 using (2^(SIZE * 8) - VALUE). */
730 static inline void
731 twos_complement (mpz_t value, size_t size)
732 {
733 unsigned long bit_count;
734
735 /* We expect BIT_COUNT to fit in a unsigned long thanks to the range
736 checking on SIZE performed earlier. */
737 bit_count = (unsigned long) size << 3UL;
738
739 if (SCM_LIKELY (bit_count < sizeof (unsigned long)))
740 mpz_ui_sub (value, 1UL << bit_count, value);
741 else
742 {
743 mpz_t max;
744
745 mpz_init (max);
746 mpz_ui_pow_ui (max, 2, bit_count);
747 mpz_sub (value, max, value);
748 mpz_clear (max);
749 }
750 }
751
752 static inline SCM
753 bytevector_large_ref (const char *c_bv, size_t c_size, int signed_p,
754 SCM endianness)
755 {
756 SCM result;
757 mpz_t c_mpz;
758 int c_endianness, negative_p = 0;
759
760 if (signed_p)
761 {
762 if (scm_is_eq (endianness, scm_sym_big))
763 negative_p = c_bv[0] & 0x80;
764 else
765 negative_p = c_bv[c_size - 1] & 0x80;
766 }
767
768 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
769
770 mpz_init (c_mpz);
771 mpz_import (c_mpz, 1 /* 1 word */, 1 /* word order doesn't matter */,
772 c_size /* word is C_SIZE-byte long */,
773 c_endianness,
774 0 /* nails */, c_bv);
775
776 if (signed_p && negative_p)
777 {
778 twos_complement (c_mpz, c_size);
779 mpz_neg (c_mpz, c_mpz);
780 }
781
782 result = scm_from_mpz (c_mpz);
783 mpz_clear (c_mpz); /* FIXME: Needed? */
784
785 return result;
786 }
787
788 static inline int
789 bytevector_large_set (char *c_bv, size_t c_size, int signed_p,
790 SCM value, SCM endianness)
791 {
792 mpz_t c_mpz;
793 int c_endianness, c_sign, err = 0;
794
795 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
796
797 mpz_init (c_mpz);
798 scm_to_mpz (value, c_mpz);
799
800 c_sign = mpz_sgn (c_mpz);
801 if (c_sign < 0)
802 {
803 if (SCM_LIKELY (signed_p))
804 {
805 mpz_neg (c_mpz, c_mpz);
806 twos_complement (c_mpz, c_size);
807 }
808 else
809 {
810 err = -1;
811 goto finish;
812 }
813 }
814
815 if (c_sign == 0)
816 /* Zero. */
817 memset (c_bv, 0, c_size);
818 else
819 {
820 size_t word_count, value_size;
821
822 value_size = (mpz_sizeinbase (c_mpz, 2) + (8 * c_size)) / (8 * c_size);
823 if (SCM_UNLIKELY (value_size > c_size))
824 {
825 err = -2;
826 goto finish;
827 }
828
829
830 mpz_export (c_bv, &word_count, 1 /* word order doesn't matter */,
831 c_size, c_endianness,
832 0 /* nails */, c_mpz);
833 if (SCM_UNLIKELY (word_count != 1))
834 /* Shouldn't happen since we already checked with VALUE_SIZE. */
835 abort ();
836 }
837
838 finish:
839 mpz_clear (c_mpz);
840
841 return err;
842 }
843
844 #define GENERIC_INTEGER_ACCESSOR_PROLOGUE(_sign) \
845 unsigned long c_len, c_index, c_size; \
846 char *c_bv; \
847 \
848 SCM_VALIDATE_BYTEVECTOR (1, bv); \
849 c_index = scm_to_ulong (index); \
850 c_size = scm_to_ulong (size); \
851 \
852 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
853 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
854 \
855 /* C_SIZE must have its 3 higher bits set to zero so that \
856 multiplying it by 8 yields a number that fits in an \
857 unsigned long. */ \
858 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
859 scm_out_of_range (FUNC_NAME, size); \
860 if (SCM_UNLIKELY (c_index + c_size > c_len)) \
861 scm_out_of_range (FUNC_NAME, index);
862
863
864 /* Template of an integer reference function. */
865 #define GENERIC_INTEGER_REF(_sign) \
866 SCM result; \
867 \
868 if (c_size < 3) \
869 { \
870 int swap; \
871 _sign int value; \
872 \
873 swap = !scm_is_eq (endianness, scm_i_native_endianness); \
874 switch (c_size) \
875 { \
876 case 1: \
877 { \
878 _sign char c_value8; \
879 memcpy (&c_value8, c_bv, 1); \
880 value = c_value8; \
881 } \
882 break; \
883 case 2: \
884 { \
885 INT_TYPE (16, _sign) c_value16; \
886 memcpy (&c_value16, c_bv, 2); \
887 if (swap) \
888 value = (INT_TYPE (16, _sign)) bswap_16 (c_value16); \
889 else \
890 value = c_value16; \
891 } \
892 break; \
893 default: \
894 abort (); \
895 } \
896 \
897 result = SCM_I_MAKINUM ((_sign int) value); \
898 } \
899 else \
900 result = bytevector_large_ref ((char *) c_bv, \
901 c_size, SIGNEDNESS (_sign), \
902 endianness); \
903 \
904 return result;
905
906 static inline SCM
907 bytevector_signed_ref (const char *c_bv, size_t c_size, SCM endianness)
908 {
909 GENERIC_INTEGER_REF (signed);
910 }
911
912 static inline SCM
913 bytevector_unsigned_ref (const char *c_bv, size_t c_size, SCM endianness)
914 {
915 GENERIC_INTEGER_REF (unsigned);
916 }
917
918
919 /* Template of an integer assignment function. */
920 #define GENERIC_INTEGER_SET(_sign) \
921 if (c_size < 3) \
922 { \
923 _sign int c_value; \
924 \
925 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
926 goto range_error; \
927 \
928 c_value = SCM_I_INUM (value); \
929 switch (c_size) \
930 { \
931 case 1: \
932 if (SCM_LIKELY (INT_VALID_P (8, _sign) (c_value))) \
933 { \
934 _sign char c_value8; \
935 c_value8 = (_sign char) c_value; \
936 memcpy (c_bv, &c_value8, 1); \
937 } \
938 else \
939 goto range_error; \
940 break; \
941 \
942 case 2: \
943 if (SCM_LIKELY (INT_VALID_P (16, _sign) (c_value))) \
944 { \
945 int swap; \
946 INT_TYPE (16, _sign) c_value16; \
947 \
948 swap = !scm_is_eq (endianness, scm_i_native_endianness); \
949 \
950 if (swap) \
951 c_value16 = (INT_TYPE (16, _sign)) bswap_16 (c_value); \
952 else \
953 c_value16 = c_value; \
954 \
955 memcpy (c_bv, &c_value16, 2); \
956 } \
957 else \
958 goto range_error; \
959 break; \
960 \
961 default: \
962 abort (); \
963 } \
964 } \
965 else \
966 { \
967 int err; \
968 \
969 err = bytevector_large_set (c_bv, c_size, \
970 SIGNEDNESS (_sign), \
971 value, endianness); \
972 if (err) \
973 goto range_error; \
974 } \
975 \
976 return; \
977 \
978 range_error: \
979 scm_out_of_range (FUNC_NAME, value); \
980 return;
981
982 static inline void
983 bytevector_signed_set (char *c_bv, size_t c_size,
984 SCM value, SCM endianness,
985 const char *func_name)
986 #define FUNC_NAME func_name
987 {
988 GENERIC_INTEGER_SET (signed);
989 }
990 #undef FUNC_NAME
991
992 static inline void
993 bytevector_unsigned_set (char *c_bv, size_t c_size,
994 SCM value, SCM endianness,
995 const char *func_name)
996 #define FUNC_NAME func_name
997 {
998 GENERIC_INTEGER_SET (unsigned);
999 }
1000 #undef FUNC_NAME
1001
1002 #undef GENERIC_INTEGER_SET
1003 #undef GENERIC_INTEGER_REF
1004
1005
1006 SCM_DEFINE (scm_bytevector_uint_ref, "bytevector-uint-ref", 4, 0, 0,
1007 (SCM bv, SCM index, SCM endianness, SCM size),
1008 "Return the @var{size}-octet long unsigned integer at index "
1009 "@var{index} in @var{bv}.")
1010 #define FUNC_NAME s_scm_bytevector_uint_ref
1011 {
1012 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
1013
1014 return (bytevector_unsigned_ref (&c_bv[c_index], c_size, endianness));
1015 }
1016 #undef FUNC_NAME
1017
1018 SCM_DEFINE (scm_bytevector_sint_ref, "bytevector-sint-ref", 4, 0, 0,
1019 (SCM bv, SCM index, SCM endianness, SCM size),
1020 "Return the @var{size}-octet long unsigned integer at index "
1021 "@var{index} in @var{bv}.")
1022 #define FUNC_NAME s_scm_bytevector_sint_ref
1023 {
1024 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
1025
1026 return (bytevector_signed_ref (&c_bv[c_index], c_size, endianness));
1027 }
1028 #undef FUNC_NAME
1029
1030 SCM_DEFINE (scm_bytevector_uint_set_x, "bytevector-uint-set!", 5, 0, 0,
1031 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
1032 "Set the @var{size}-octet long unsigned integer at @var{index} "
1033 "to @var{value}.")
1034 #define FUNC_NAME s_scm_bytevector_uint_set_x
1035 {
1036 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
1037
1038 bytevector_unsigned_set (&c_bv[c_index], c_size, value, endianness,
1039 FUNC_NAME);
1040
1041 return SCM_UNSPECIFIED;
1042 }
1043 #undef FUNC_NAME
1044
1045 SCM_DEFINE (scm_bytevector_sint_set_x, "bytevector-sint-set!", 5, 0, 0,
1046 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
1047 "Set the @var{size}-octet long signed integer at @var{index} "
1048 "to @var{value}.")
1049 #define FUNC_NAME s_scm_bytevector_sint_set_x
1050 {
1051 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
1052
1053 bytevector_signed_set (&c_bv[c_index], c_size, value, endianness,
1054 FUNC_NAME);
1055
1056 return SCM_UNSPECIFIED;
1057 }
1058 #undef FUNC_NAME
1059
1060
1061 \f
1062 /* Operations on integers of arbitrary size. */
1063
1064 #define INTEGERS_TO_LIST(_sign) \
1065 SCM lst, pair; \
1066 size_t i, c_len, c_size; \
1067 \
1068 SCM_VALIDATE_BYTEVECTOR (1, bv); \
1069 SCM_VALIDATE_SYMBOL (2, endianness); \
1070 c_size = scm_to_uint (size); \
1071 \
1072 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
1073 if (SCM_UNLIKELY (c_len == 0)) \
1074 lst = SCM_EOL; \
1075 else if (SCM_UNLIKELY (c_len < c_size)) \
1076 scm_out_of_range (FUNC_NAME, size); \
1077 else \
1078 { \
1079 const char *c_bv; \
1080 \
1081 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
1082 \
1083 lst = scm_make_list (scm_from_uint (c_len / c_size), \
1084 SCM_UNSPECIFIED); \
1085 for (i = 0, pair = lst; \
1086 i <= c_len - c_size; \
1087 i += c_size, c_bv += c_size, pair = SCM_CDR (pair)) \
1088 { \
1089 SCM_SETCAR (pair, \
1090 bytevector_ ## _sign ## _ref (c_bv, c_size, \
1091 endianness)); \
1092 } \
1093 } \
1094 \
1095 return lst;
1096
1097 SCM_DEFINE (scm_bytevector_to_sint_list, "bytevector->sint-list",
1098 3, 0, 0,
1099 (SCM bv, SCM endianness, SCM size),
1100 "Return a list of signed integers of @var{size} octets "
1101 "representing the contents of @var{bv}.")
1102 #define FUNC_NAME s_scm_bytevector_to_sint_list
1103 {
1104 INTEGERS_TO_LIST (signed);
1105 }
1106 #undef FUNC_NAME
1107
1108 SCM_DEFINE (scm_bytevector_to_uint_list, "bytevector->uint-list",
1109 3, 0, 0,
1110 (SCM bv, SCM endianness, SCM size),
1111 "Return a list of unsigned integers of @var{size} octets "
1112 "representing the contents of @var{bv}.")
1113 #define FUNC_NAME s_scm_bytevector_to_uint_list
1114 {
1115 INTEGERS_TO_LIST (unsigned);
1116 }
1117 #undef FUNC_NAME
1118
1119 #undef INTEGER_TO_LIST
1120
1121
1122 #define INTEGER_LIST_TO_BYTEVECTOR(_sign) \
1123 SCM bv; \
1124 long c_len; \
1125 size_t c_size; \
1126 char *c_bv, *c_bv_ptr; \
1127 \
1128 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len); \
1129 SCM_VALIDATE_SYMBOL (2, endianness); \
1130 c_size = scm_to_uint (size); \
1131 \
1132 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
1133 scm_out_of_range (FUNC_NAME, size); \
1134 \
1135 bv = make_bytevector (c_len * c_size); \
1136 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
1137 \
1138 for (c_bv_ptr = c_bv; \
1139 !scm_is_null (lst); \
1140 lst = SCM_CDR (lst), c_bv_ptr += c_size) \
1141 { \
1142 bytevector_ ## _sign ## _set (c_bv_ptr, c_size, \
1143 SCM_CAR (lst), endianness, \
1144 FUNC_NAME); \
1145 } \
1146 \
1147 return bv;
1148
1149
1150 SCM_DEFINE (scm_uint_list_to_bytevector, "uint-list->bytevector",
1151 3, 0, 0,
1152 (SCM lst, SCM endianness, SCM size),
1153 "Return a bytevector containing the unsigned integers "
1154 "listed in @var{lst} and encoded on @var{size} octets "
1155 "according to @var{endianness}.")
1156 #define FUNC_NAME s_scm_uint_list_to_bytevector
1157 {
1158 INTEGER_LIST_TO_BYTEVECTOR (unsigned);
1159 }
1160 #undef FUNC_NAME
1161
1162 SCM_DEFINE (scm_sint_list_to_bytevector, "sint-list->bytevector",
1163 3, 0, 0,
1164 (SCM lst, SCM endianness, SCM size),
1165 "Return a bytevector containing the signed integers "
1166 "listed in @var{lst} and encoded on @var{size} octets "
1167 "according to @var{endianness}.")
1168 #define FUNC_NAME s_scm_sint_list_to_bytevector
1169 {
1170 INTEGER_LIST_TO_BYTEVECTOR (signed);
1171 }
1172 #undef FUNC_NAME
1173
1174 #undef INTEGER_LIST_TO_BYTEVECTOR
1175
1176
1177 \f
1178 /* Operations on 16-bit integers. */
1179
1180 SCM_DEFINE (scm_bytevector_u16_ref, "bytevector-u16-ref",
1181 3, 0, 0,
1182 (SCM bv, SCM index, SCM endianness),
1183 "Return the unsigned 16-bit integer from @var{bv} at "
1184 "@var{index}.")
1185 #define FUNC_NAME s_scm_bytevector_u16_ref
1186 {
1187 INTEGER_REF (16, unsigned);
1188 }
1189 #undef FUNC_NAME
1190
1191 SCM_DEFINE (scm_bytevector_s16_ref, "bytevector-s16-ref",
1192 3, 0, 0,
1193 (SCM bv, SCM index, SCM endianness),
1194 "Return the signed 16-bit integer from @var{bv} at "
1195 "@var{index}.")
1196 #define FUNC_NAME s_scm_bytevector_s16_ref
1197 {
1198 INTEGER_REF (16, signed);
1199 }
1200 #undef FUNC_NAME
1201
1202 SCM_DEFINE (scm_bytevector_u16_native_ref, "bytevector-u16-native-ref",
1203 2, 0, 0,
1204 (SCM bv, SCM index),
1205 "Return the unsigned 16-bit integer from @var{bv} at "
1206 "@var{index} using the native endianness.")
1207 #define FUNC_NAME s_scm_bytevector_u16_native_ref
1208 {
1209 INTEGER_NATIVE_REF (16, unsigned);
1210 }
1211 #undef FUNC_NAME
1212
1213 SCM_DEFINE (scm_bytevector_s16_native_ref, "bytevector-s16-native-ref",
1214 2, 0, 0,
1215 (SCM bv, SCM index),
1216 "Return the unsigned 16-bit integer from @var{bv} at "
1217 "@var{index} using the native endianness.")
1218 #define FUNC_NAME s_scm_bytevector_s16_native_ref
1219 {
1220 INTEGER_NATIVE_REF (16, signed);
1221 }
1222 #undef FUNC_NAME
1223
1224 SCM_DEFINE (scm_bytevector_u16_set_x, "bytevector-u16-set!",
1225 4, 0, 0,
1226 (SCM bv, SCM index, SCM value, SCM endianness),
1227 "Store @var{value} in @var{bv} at @var{index} according to "
1228 "@var{endianness}.")
1229 #define FUNC_NAME s_scm_bytevector_u16_set_x
1230 {
1231 INTEGER_SET (16, unsigned);
1232 }
1233 #undef FUNC_NAME
1234
1235 SCM_DEFINE (scm_bytevector_s16_set_x, "bytevector-s16-set!",
1236 4, 0, 0,
1237 (SCM bv, SCM index, SCM value, SCM endianness),
1238 "Store @var{value} in @var{bv} at @var{index} according to "
1239 "@var{endianness}.")
1240 #define FUNC_NAME s_scm_bytevector_s16_set_x
1241 {
1242 INTEGER_SET (16, signed);
1243 }
1244 #undef FUNC_NAME
1245
1246 SCM_DEFINE (scm_bytevector_u16_native_set_x, "bytevector-u16-native-set!",
1247 3, 0, 0,
1248 (SCM bv, SCM index, SCM value),
1249 "Store the unsigned integer @var{value} at index @var{index} "
1250 "of @var{bv} using the native endianness.")
1251 #define FUNC_NAME s_scm_bytevector_u16_native_set_x
1252 {
1253 INTEGER_NATIVE_SET (16, unsigned);
1254 }
1255 #undef FUNC_NAME
1256
1257 SCM_DEFINE (scm_bytevector_s16_native_set_x, "bytevector-s16-native-set!",
1258 3, 0, 0,
1259 (SCM bv, SCM index, SCM value),
1260 "Store the signed integer @var{value} at index @var{index} "
1261 "of @var{bv} using the native endianness.")
1262 #define FUNC_NAME s_scm_bytevector_s16_native_set_x
1263 {
1264 INTEGER_NATIVE_SET (16, signed);
1265 }
1266 #undef FUNC_NAME
1267
1268
1269 \f
1270 /* Operations on 32-bit integers. */
1271
1272 /* Unfortunately, on 32-bit machines `SCM' is not large enough to hold
1273 arbitrary 32-bit integers. Thus we fall back to using the
1274 `large_{ref,set}' variants on 32-bit machines. */
1275
1276 #define LARGE_INTEGER_REF(_len, _sign) \
1277 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1278 SCM_VALIDATE_SYMBOL (3, endianness); \
1279 \
1280 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1281 SIGNEDNESS (_sign), endianness));
1282
1283 #define LARGE_INTEGER_SET(_len, _sign) \
1284 int err; \
1285 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1286 SCM_VALIDATE_SYMBOL (4, endianness); \
1287 \
1288 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1289 SIGNEDNESS (_sign), value, endianness); \
1290 if (SCM_UNLIKELY (err)) \
1291 scm_out_of_range (FUNC_NAME, value); \
1292 \
1293 return SCM_UNSPECIFIED;
1294
1295 #define LARGE_INTEGER_NATIVE_REF(_len, _sign) \
1296 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1297 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1298 SIGNEDNESS (_sign), scm_i_native_endianness));
1299
1300 #define LARGE_INTEGER_NATIVE_SET(_len, _sign) \
1301 int err; \
1302 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1303 \
1304 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1305 SIGNEDNESS (_sign), value, \
1306 scm_i_native_endianness); \
1307 if (SCM_UNLIKELY (err)) \
1308 scm_out_of_range (FUNC_NAME, value); \
1309 \
1310 return SCM_UNSPECIFIED;
1311
1312
1313 SCM_DEFINE (scm_bytevector_u32_ref, "bytevector-u32-ref",
1314 3, 0, 0,
1315 (SCM bv, SCM index, SCM endianness),
1316 "Return the unsigned 32-bit integer from @var{bv} at "
1317 "@var{index}.")
1318 #define FUNC_NAME s_scm_bytevector_u32_ref
1319 {
1320 #if SIZEOF_VOID_P > 4
1321 INTEGER_REF (32, unsigned);
1322 #else
1323 LARGE_INTEGER_REF (32, unsigned);
1324 #endif
1325 }
1326 #undef FUNC_NAME
1327
1328 SCM_DEFINE (scm_bytevector_s32_ref, "bytevector-s32-ref",
1329 3, 0, 0,
1330 (SCM bv, SCM index, SCM endianness),
1331 "Return the signed 32-bit integer from @var{bv} at "
1332 "@var{index}.")
1333 #define FUNC_NAME s_scm_bytevector_s32_ref
1334 {
1335 #if SIZEOF_VOID_P > 4
1336 INTEGER_REF (32, signed);
1337 #else
1338 LARGE_INTEGER_REF (32, signed);
1339 #endif
1340 }
1341 #undef FUNC_NAME
1342
1343 SCM_DEFINE (scm_bytevector_u32_native_ref, "bytevector-u32-native-ref",
1344 2, 0, 0,
1345 (SCM bv, SCM index),
1346 "Return the unsigned 32-bit integer from @var{bv} at "
1347 "@var{index} using the native endianness.")
1348 #define FUNC_NAME s_scm_bytevector_u32_native_ref
1349 {
1350 #if SIZEOF_VOID_P > 4
1351 INTEGER_NATIVE_REF (32, unsigned);
1352 #else
1353 LARGE_INTEGER_NATIVE_REF (32, unsigned);
1354 #endif
1355 }
1356 #undef FUNC_NAME
1357
1358 SCM_DEFINE (scm_bytevector_s32_native_ref, "bytevector-s32-native-ref",
1359 2, 0, 0,
1360 (SCM bv, SCM index),
1361 "Return the unsigned 32-bit integer from @var{bv} at "
1362 "@var{index} using the native endianness.")
1363 #define FUNC_NAME s_scm_bytevector_s32_native_ref
1364 {
1365 #if SIZEOF_VOID_P > 4
1366 INTEGER_NATIVE_REF (32, signed);
1367 #else
1368 LARGE_INTEGER_NATIVE_REF (32, signed);
1369 #endif
1370 }
1371 #undef FUNC_NAME
1372
1373 SCM_DEFINE (scm_bytevector_u32_set_x, "bytevector-u32-set!",
1374 4, 0, 0,
1375 (SCM bv, SCM index, SCM value, SCM endianness),
1376 "Store @var{value} in @var{bv} at @var{index} according to "
1377 "@var{endianness}.")
1378 #define FUNC_NAME s_scm_bytevector_u32_set_x
1379 {
1380 #if SIZEOF_VOID_P > 4
1381 INTEGER_SET (32, unsigned);
1382 #else
1383 LARGE_INTEGER_SET (32, unsigned);
1384 #endif
1385 }
1386 #undef FUNC_NAME
1387
1388 SCM_DEFINE (scm_bytevector_s32_set_x, "bytevector-s32-set!",
1389 4, 0, 0,
1390 (SCM bv, SCM index, SCM value, SCM endianness),
1391 "Store @var{value} in @var{bv} at @var{index} according to "
1392 "@var{endianness}.")
1393 #define FUNC_NAME s_scm_bytevector_s32_set_x
1394 {
1395 #if SIZEOF_VOID_P > 4
1396 INTEGER_SET (32, signed);
1397 #else
1398 LARGE_INTEGER_SET (32, signed);
1399 #endif
1400 }
1401 #undef FUNC_NAME
1402
1403 SCM_DEFINE (scm_bytevector_u32_native_set_x, "bytevector-u32-native-set!",
1404 3, 0, 0,
1405 (SCM bv, SCM index, SCM value),
1406 "Store the unsigned integer @var{value} at index @var{index} "
1407 "of @var{bv} using the native endianness.")
1408 #define FUNC_NAME s_scm_bytevector_u32_native_set_x
1409 {
1410 #if SIZEOF_VOID_P > 4
1411 INTEGER_NATIVE_SET (32, unsigned);
1412 #else
1413 LARGE_INTEGER_NATIVE_SET (32, unsigned);
1414 #endif
1415 }
1416 #undef FUNC_NAME
1417
1418 SCM_DEFINE (scm_bytevector_s32_native_set_x, "bytevector-s32-native-set!",
1419 3, 0, 0,
1420 (SCM bv, SCM index, SCM value),
1421 "Store the signed integer @var{value} at index @var{index} "
1422 "of @var{bv} using the native endianness.")
1423 #define FUNC_NAME s_scm_bytevector_s32_native_set_x
1424 {
1425 #if SIZEOF_VOID_P > 4
1426 INTEGER_NATIVE_SET (32, signed);
1427 #else
1428 LARGE_INTEGER_NATIVE_SET (32, signed);
1429 #endif
1430 }
1431 #undef FUNC_NAME
1432
1433
1434 \f
1435 /* Operations on 64-bit integers. */
1436
1437 /* For 64-bit integers, we use only the `large_{ref,set}' variant. */
1438
1439 SCM_DEFINE (scm_bytevector_u64_ref, "bytevector-u64-ref",
1440 3, 0, 0,
1441 (SCM bv, SCM index, SCM endianness),
1442 "Return the unsigned 64-bit integer from @var{bv} at "
1443 "@var{index}.")
1444 #define FUNC_NAME s_scm_bytevector_u64_ref
1445 {
1446 LARGE_INTEGER_REF (64, unsigned);
1447 }
1448 #undef FUNC_NAME
1449
1450 SCM_DEFINE (scm_bytevector_s64_ref, "bytevector-s64-ref",
1451 3, 0, 0,
1452 (SCM bv, SCM index, SCM endianness),
1453 "Return the signed 64-bit integer from @var{bv} at "
1454 "@var{index}.")
1455 #define FUNC_NAME s_scm_bytevector_s64_ref
1456 {
1457 LARGE_INTEGER_REF (64, signed);
1458 }
1459 #undef FUNC_NAME
1460
1461 SCM_DEFINE (scm_bytevector_u64_native_ref, "bytevector-u64-native-ref",
1462 2, 0, 0,
1463 (SCM bv, SCM index),
1464 "Return the unsigned 64-bit integer from @var{bv} at "
1465 "@var{index} using the native endianness.")
1466 #define FUNC_NAME s_scm_bytevector_u64_native_ref
1467 {
1468 LARGE_INTEGER_NATIVE_REF (64, unsigned);
1469 }
1470 #undef FUNC_NAME
1471
1472 SCM_DEFINE (scm_bytevector_s64_native_ref, "bytevector-s64-native-ref",
1473 2, 0, 0,
1474 (SCM bv, SCM index),
1475 "Return the unsigned 64-bit integer from @var{bv} at "
1476 "@var{index} using the native endianness.")
1477 #define FUNC_NAME s_scm_bytevector_s64_native_ref
1478 {
1479 LARGE_INTEGER_NATIVE_REF (64, signed);
1480 }
1481 #undef FUNC_NAME
1482
1483 SCM_DEFINE (scm_bytevector_u64_set_x, "bytevector-u64-set!",
1484 4, 0, 0,
1485 (SCM bv, SCM index, SCM value, SCM endianness),
1486 "Store @var{value} in @var{bv} at @var{index} according to "
1487 "@var{endianness}.")
1488 #define FUNC_NAME s_scm_bytevector_u64_set_x
1489 {
1490 LARGE_INTEGER_SET (64, unsigned);
1491 }
1492 #undef FUNC_NAME
1493
1494 SCM_DEFINE (scm_bytevector_s64_set_x, "bytevector-s64-set!",
1495 4, 0, 0,
1496 (SCM bv, SCM index, SCM value, SCM endianness),
1497 "Store @var{value} in @var{bv} at @var{index} according to "
1498 "@var{endianness}.")
1499 #define FUNC_NAME s_scm_bytevector_s64_set_x
1500 {
1501 LARGE_INTEGER_SET (64, signed);
1502 }
1503 #undef FUNC_NAME
1504
1505 SCM_DEFINE (scm_bytevector_u64_native_set_x, "bytevector-u64-native-set!",
1506 3, 0, 0,
1507 (SCM bv, SCM index, SCM value),
1508 "Store the unsigned integer @var{value} at index @var{index} "
1509 "of @var{bv} using the native endianness.")
1510 #define FUNC_NAME s_scm_bytevector_u64_native_set_x
1511 {
1512 LARGE_INTEGER_NATIVE_SET (64, unsigned);
1513 }
1514 #undef FUNC_NAME
1515
1516 SCM_DEFINE (scm_bytevector_s64_native_set_x, "bytevector-s64-native-set!",
1517 3, 0, 0,
1518 (SCM bv, SCM index, SCM value),
1519 "Store the signed integer @var{value} at index @var{index} "
1520 "of @var{bv} using the native endianness.")
1521 #define FUNC_NAME s_scm_bytevector_s64_native_set_x
1522 {
1523 LARGE_INTEGER_NATIVE_SET (64, signed);
1524 }
1525 #undef FUNC_NAME
1526
1527
1528 \f
1529 /* Operations on IEEE-754 numbers. */
1530
1531 /* There are two possible word endians, visible in glibc's <ieee754.h>.
1532 However, in R6RS, when the endianness is `little', little endian is
1533 assumed for both the byte order and the word order. This is clear from
1534 Section 2.1 of R6RS-lib (in response to
1535 http://www.r6rs.org/formal-comments/comment-187.txt). */
1536
1537
1538 /* Convert to/from a floating-point number with different endianness. This
1539 method is probably not the most efficient but it should be portable. */
1540
1541 static inline void
1542 float_to_foreign_endianness (union scm_ieee754_float *target,
1543 float source)
1544 {
1545 union scm_ieee754_float src;
1546
1547 src.f = source;
1548
1549 #ifdef WORDS_BIGENDIAN
1550 /* Assuming little endian for both byte and word order. */
1551 target->little_endian.negative = src.big_endian.negative;
1552 target->little_endian.exponent = src.big_endian.exponent;
1553 target->little_endian.mantissa = src.big_endian.mantissa;
1554 #else
1555 target->big_endian.negative = src.little_endian.negative;
1556 target->big_endian.exponent = src.little_endian.exponent;
1557 target->big_endian.mantissa = src.little_endian.mantissa;
1558 #endif
1559 }
1560
1561 static inline float
1562 float_from_foreign_endianness (const union scm_ieee754_float *source)
1563 {
1564 union scm_ieee754_float result;
1565
1566 #ifdef WORDS_BIGENDIAN
1567 /* Assuming little endian for both byte and word order. */
1568 result.big_endian.negative = source->little_endian.negative;
1569 result.big_endian.exponent = source->little_endian.exponent;
1570 result.big_endian.mantissa = source->little_endian.mantissa;
1571 #else
1572 result.little_endian.negative = source->big_endian.negative;
1573 result.little_endian.exponent = source->big_endian.exponent;
1574 result.little_endian.mantissa = source->big_endian.mantissa;
1575 #endif
1576
1577 return (result.f);
1578 }
1579
1580 static inline void
1581 double_to_foreign_endianness (union scm_ieee754_double *target,
1582 double source)
1583 {
1584 union scm_ieee754_double src;
1585
1586 src.d = source;
1587
1588 #ifdef WORDS_BIGENDIAN
1589 /* Assuming little endian for both byte and word order. */
1590 target->little_little_endian.negative = src.big_endian.negative;
1591 target->little_little_endian.exponent = src.big_endian.exponent;
1592 target->little_little_endian.mantissa0 = src.big_endian.mantissa0;
1593 target->little_little_endian.mantissa1 = src.big_endian.mantissa1;
1594 #else
1595 target->big_endian.negative = src.little_little_endian.negative;
1596 target->big_endian.exponent = src.little_little_endian.exponent;
1597 target->big_endian.mantissa0 = src.little_little_endian.mantissa0;
1598 target->big_endian.mantissa1 = src.little_little_endian.mantissa1;
1599 #endif
1600 }
1601
1602 static inline double
1603 double_from_foreign_endianness (const union scm_ieee754_double *source)
1604 {
1605 union scm_ieee754_double result;
1606
1607 #ifdef WORDS_BIGENDIAN
1608 /* Assuming little endian for both byte and word order. */
1609 result.big_endian.negative = source->little_little_endian.negative;
1610 result.big_endian.exponent = source->little_little_endian.exponent;
1611 result.big_endian.mantissa0 = source->little_little_endian.mantissa0;
1612 result.big_endian.mantissa1 = source->little_little_endian.mantissa1;
1613 #else
1614 result.little_little_endian.negative = source->big_endian.negative;
1615 result.little_little_endian.exponent = source->big_endian.exponent;
1616 result.little_little_endian.mantissa0 = source->big_endian.mantissa0;
1617 result.little_little_endian.mantissa1 = source->big_endian.mantissa1;
1618 #endif
1619
1620 return (result.d);
1621 }
1622
1623 /* Template macros to abstract over doubles and floats.
1624 XXX: Guile can only convert to/from doubles. */
1625 #define IEEE754_UNION(_c_type) union scm_ieee754_ ## _c_type
1626 #define IEEE754_TO_SCM(_c_type) scm_from_double
1627 #define IEEE754_FROM_SCM(_c_type) scm_to_double
1628 #define IEEE754_FROM_FOREIGN_ENDIANNESS(_c_type) \
1629 _c_type ## _from_foreign_endianness
1630 #define IEEE754_TO_FOREIGN_ENDIANNESS(_c_type) \
1631 _c_type ## _to_foreign_endianness
1632
1633
1634 /* Templace getters and setters. */
1635
1636 #define IEEE754_ACCESSOR_PROLOGUE(_type) \
1637 INTEGER_ACCESSOR_PROLOGUE (sizeof (_type) << 3UL, signed);
1638
1639 #define IEEE754_REF(_type) \
1640 _type c_result; \
1641 \
1642 IEEE754_ACCESSOR_PROLOGUE (_type); \
1643 SCM_VALIDATE_SYMBOL (3, endianness); \
1644 \
1645 if (scm_is_eq (endianness, scm_i_native_endianness)) \
1646 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1647 else \
1648 { \
1649 IEEE754_UNION (_type) c_raw; \
1650 \
1651 memcpy (&c_raw, &c_bv[c_index], sizeof (c_raw)); \
1652 c_result = \
1653 IEEE754_FROM_FOREIGN_ENDIANNESS (_type) (&c_raw); \
1654 } \
1655 \
1656 return (IEEE754_TO_SCM (_type) (c_result));
1657
1658 #define IEEE754_NATIVE_REF(_type) \
1659 _type c_result; \
1660 \
1661 IEEE754_ACCESSOR_PROLOGUE (_type); \
1662 \
1663 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1664 return (IEEE754_TO_SCM (_type) (c_result));
1665
1666 #define IEEE754_SET(_type) \
1667 _type c_value; \
1668 \
1669 IEEE754_ACCESSOR_PROLOGUE (_type); \
1670 SCM_VALIDATE_REAL (3, value); \
1671 SCM_VALIDATE_SYMBOL (4, endianness); \
1672 c_value = IEEE754_FROM_SCM (_type) (value); \
1673 \
1674 if (scm_is_eq (endianness, scm_i_native_endianness)) \
1675 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1676 else \
1677 { \
1678 IEEE754_UNION (_type) c_raw; \
1679 \
1680 IEEE754_TO_FOREIGN_ENDIANNESS (_type) (&c_raw, c_value); \
1681 memcpy (&c_bv[c_index], &c_raw, sizeof (c_raw)); \
1682 } \
1683 \
1684 return SCM_UNSPECIFIED;
1685
1686 #define IEEE754_NATIVE_SET(_type) \
1687 _type c_value; \
1688 \
1689 IEEE754_ACCESSOR_PROLOGUE (_type); \
1690 SCM_VALIDATE_REAL (3, value); \
1691 c_value = IEEE754_FROM_SCM (_type) (value); \
1692 \
1693 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1694 return SCM_UNSPECIFIED;
1695
1696
1697 /* Single precision. */
1698
1699 SCM_DEFINE (scm_bytevector_ieee_single_ref,
1700 "bytevector-ieee-single-ref",
1701 3, 0, 0,
1702 (SCM bv, SCM index, SCM endianness),
1703 "Return the IEEE-754 single from @var{bv} at "
1704 "@var{index}.")
1705 #define FUNC_NAME s_scm_bytevector_ieee_single_ref
1706 {
1707 IEEE754_REF (float);
1708 }
1709 #undef FUNC_NAME
1710
1711 SCM_DEFINE (scm_bytevector_ieee_single_native_ref,
1712 "bytevector-ieee-single-native-ref",
1713 2, 0, 0,
1714 (SCM bv, SCM index),
1715 "Return the IEEE-754 single from @var{bv} at "
1716 "@var{index} using the native endianness.")
1717 #define FUNC_NAME s_scm_bytevector_ieee_single_native_ref
1718 {
1719 IEEE754_NATIVE_REF (float);
1720 }
1721 #undef FUNC_NAME
1722
1723 SCM_DEFINE (scm_bytevector_ieee_single_set_x,
1724 "bytevector-ieee-single-set!",
1725 4, 0, 0,
1726 (SCM bv, SCM index, SCM value, SCM endianness),
1727 "Store real @var{value} in @var{bv} at @var{index} according to "
1728 "@var{endianness}.")
1729 #define FUNC_NAME s_scm_bytevector_ieee_single_set_x
1730 {
1731 IEEE754_SET (float);
1732 }
1733 #undef FUNC_NAME
1734
1735 SCM_DEFINE (scm_bytevector_ieee_single_native_set_x,
1736 "bytevector-ieee-single-native-set!",
1737 3, 0, 0,
1738 (SCM bv, SCM index, SCM value),
1739 "Store the real @var{value} at index @var{index} "
1740 "of @var{bv} using the native endianness.")
1741 #define FUNC_NAME s_scm_bytevector_ieee_single_native_set_x
1742 {
1743 IEEE754_NATIVE_SET (float);
1744 }
1745 #undef FUNC_NAME
1746
1747
1748 /* Double precision. */
1749
1750 SCM_DEFINE (scm_bytevector_ieee_double_ref,
1751 "bytevector-ieee-double-ref",
1752 3, 0, 0,
1753 (SCM bv, SCM index, SCM endianness),
1754 "Return the IEEE-754 double from @var{bv} at "
1755 "@var{index}.")
1756 #define FUNC_NAME s_scm_bytevector_ieee_double_ref
1757 {
1758 IEEE754_REF (double);
1759 }
1760 #undef FUNC_NAME
1761
1762 SCM_DEFINE (scm_bytevector_ieee_double_native_ref,
1763 "bytevector-ieee-double-native-ref",
1764 2, 0, 0,
1765 (SCM bv, SCM index),
1766 "Return the IEEE-754 double from @var{bv} at "
1767 "@var{index} using the native endianness.")
1768 #define FUNC_NAME s_scm_bytevector_ieee_double_native_ref
1769 {
1770 IEEE754_NATIVE_REF (double);
1771 }
1772 #undef FUNC_NAME
1773
1774 SCM_DEFINE (scm_bytevector_ieee_double_set_x,
1775 "bytevector-ieee-double-set!",
1776 4, 0, 0,
1777 (SCM bv, SCM index, SCM value, SCM endianness),
1778 "Store real @var{value} in @var{bv} at @var{index} according to "
1779 "@var{endianness}.")
1780 #define FUNC_NAME s_scm_bytevector_ieee_double_set_x
1781 {
1782 IEEE754_SET (double);
1783 }
1784 #undef FUNC_NAME
1785
1786 SCM_DEFINE (scm_bytevector_ieee_double_native_set_x,
1787 "bytevector-ieee-double-native-set!",
1788 3, 0, 0,
1789 (SCM bv, SCM index, SCM value),
1790 "Store the real @var{value} at index @var{index} "
1791 "of @var{bv} using the native endianness.")
1792 #define FUNC_NAME s_scm_bytevector_ieee_double_native_set_x
1793 {
1794 IEEE754_NATIVE_SET (double);
1795 }
1796 #undef FUNC_NAME
1797
1798
1799 #undef IEEE754_UNION
1800 #undef IEEE754_TO_SCM
1801 #undef IEEE754_FROM_SCM
1802 #undef IEEE754_FROM_FOREIGN_ENDIANNESS
1803 #undef IEEE754_TO_FOREIGN_ENDIANNESS
1804 #undef IEEE754_REF
1805 #undef IEEE754_NATIVE_REF
1806 #undef IEEE754_SET
1807 #undef IEEE754_NATIVE_SET
1808
1809 \f
1810 /* Operations on strings. */
1811
1812
1813 /* Produce a function that returns the length of a UTF-encoded string. */
1814 #define UTF_STRLEN_FUNCTION(_utf_width) \
1815 static inline size_t \
1816 utf ## _utf_width ## _strlen (const uint ## _utf_width ## _t *str) \
1817 { \
1818 size_t len = 0; \
1819 const uint ## _utf_width ## _t *ptr; \
1820 for (ptr = str; \
1821 *ptr != 0; \
1822 ptr++) \
1823 { \
1824 len++; \
1825 } \
1826 \
1827 return (len * ((_utf_width) / 8)); \
1828 }
1829
1830 UTF_STRLEN_FUNCTION (8)
1831
1832
1833 /* Return the length (in bytes) of STR, a UTF-(UTF_WIDTH) encoded string. */
1834 #define UTF_STRLEN(_utf_width, _str) \
1835 utf ## _utf_width ## _strlen (_str)
1836
1837 /* Return the "portable" name of the UTF encoding of size UTF_WIDTH and
1838 ENDIANNESS (Gnulib's `iconv_open' module guarantees the portability of the
1839 encoding name). */
1840 static inline void
1841 utf_encoding_name (char *name, size_t utf_width, SCM endianness)
1842 {
1843 strcpy (name, "UTF-");
1844 strcat (name, ((utf_width == 8)
1845 ? "8"
1846 : ((utf_width == 16)
1847 ? "16"
1848 : ((utf_width == 32)
1849 ? "32"
1850 : "??"))));
1851 strcat (name,
1852 ((scm_is_eq (endianness, scm_sym_big))
1853 ? "BE"
1854 : ((scm_is_eq (endianness, scm_sym_little))
1855 ? "LE"
1856 : "unknown")));
1857 }
1858
1859 /* Maximum length of a UTF encoding name. */
1860 #define MAX_UTF_ENCODING_NAME_LEN 16
1861
1862 /* Produce the body of a `string->utf' function. */
1863 #define STRING_TO_UTF(_utf_width) \
1864 SCM utf; \
1865 int err; \
1866 char *c_str; \
1867 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1868 char *c_utf = NULL, *c_locale; \
1869 size_t c_strlen, c_raw_strlen, c_utf_len = 0; \
1870 \
1871 SCM_VALIDATE_STRING (1, str); \
1872 if (endianness == SCM_UNDEFINED) \
1873 endianness = scm_sym_big; \
1874 else \
1875 SCM_VALIDATE_SYMBOL (2, endianness); \
1876 \
1877 c_strlen = scm_c_string_length (str); \
1878 c_raw_strlen = c_strlen * ((_utf_width) / 8); \
1879 do \
1880 { \
1881 c_str = (char *) alloca (c_raw_strlen + 1); \
1882 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen); \
1883 } \
1884 while (c_raw_strlen > c_strlen); \
1885 c_str[c_raw_strlen] = '\0'; \
1886 \
1887 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1888 \
1889 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
1890 strcpy (c_locale, locale_charset ()); \
1891 \
1892 err = mem_iconveh (c_str, c_raw_strlen, \
1893 c_locale, c_utf_name, \
1894 iconveh_question_mark, NULL, \
1895 &c_utf, &c_utf_len); \
1896 if (SCM_UNLIKELY (err)) \
1897 scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \
1898 scm_list_1 (str), err); \
1899 else \
1900 /* C_UTF is null-terminated. */ \
1901 utf = scm_c_take_bytevector ((signed char *) c_utf, \
1902 c_utf_len); \
1903 \
1904 return (utf);
1905
1906
1907
1908 SCM_DEFINE (scm_string_to_utf8, "string->utf8",
1909 1, 0, 0,
1910 (SCM str),
1911 "Return a newly allocated bytevector that contains the UTF-8 "
1912 "encoding of @var{str}.")
1913 #define FUNC_NAME s_scm_string_to_utf8
1914 {
1915 SCM utf;
1916 char *c_str;
1917 uint8_t *c_utf;
1918 size_t c_strlen, c_raw_strlen;
1919
1920 SCM_VALIDATE_STRING (1, str);
1921
1922 c_strlen = scm_c_string_length (str);
1923 c_raw_strlen = c_strlen;
1924 do
1925 {
1926 c_str = (char *) alloca (c_raw_strlen + 1);
1927 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen);
1928 }
1929 while (c_raw_strlen > c_strlen);
1930 c_str[c_raw_strlen] = '\0';
1931
1932 c_utf = u8_strconv_from_locale (c_str);
1933 if (SCM_UNLIKELY (c_utf == NULL))
1934 scm_syserror (FUNC_NAME);
1935 else
1936 /* C_UTF is null-terminated. */
1937 utf = scm_c_take_bytevector ((signed char *) c_utf,
1938 UTF_STRLEN (8, c_utf));
1939
1940 return (utf);
1941 }
1942 #undef FUNC_NAME
1943
1944 SCM_DEFINE (scm_string_to_utf16, "string->utf16",
1945 1, 1, 0,
1946 (SCM str, SCM endianness),
1947 "Return a newly allocated bytevector that contains the UTF-16 "
1948 "encoding of @var{str}.")
1949 #define FUNC_NAME s_scm_string_to_utf16
1950 {
1951 STRING_TO_UTF (16);
1952 }
1953 #undef FUNC_NAME
1954
1955 SCM_DEFINE (scm_string_to_utf32, "string->utf32",
1956 1, 1, 0,
1957 (SCM str, SCM endianness),
1958 "Return a newly allocated bytevector that contains the UTF-32 "
1959 "encoding of @var{str}.")
1960 #define FUNC_NAME s_scm_string_to_utf32
1961 {
1962 STRING_TO_UTF (32);
1963 }
1964 #undef FUNC_NAME
1965
1966
1967 /* Produce the body of a function that converts a UTF-encoded bytevector to a
1968 string. */
1969 #define UTF_TO_STRING(_utf_width) \
1970 SCM str = SCM_BOOL_F; \
1971 int err; \
1972 char *c_str = NULL, *c_locale; \
1973 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1974 const char *c_utf; \
1975 size_t c_strlen = 0, c_utf_len; \
1976 \
1977 SCM_VALIDATE_BYTEVECTOR (1, utf); \
1978 if (endianness == SCM_UNDEFINED) \
1979 endianness = scm_sym_big; \
1980 else \
1981 SCM_VALIDATE_SYMBOL (2, endianness); \
1982 \
1983 c_utf_len = SCM_BYTEVECTOR_LENGTH (utf); \
1984 c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf); \
1985 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1986 \
1987 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
1988 strcpy (c_locale, locale_charset ()); \
1989 \
1990 err = mem_iconveh (c_utf, c_utf_len, \
1991 c_utf_name, c_locale, \
1992 iconveh_question_mark, NULL, \
1993 &c_str, &c_strlen); \
1994 if (SCM_UNLIKELY (err)) \
1995 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A", \
1996 scm_list_1 (utf), err); \
1997 else \
1998 /* C_STR is null-terminated. */ \
1999 str = scm_take_locale_stringn (c_str, c_strlen); \
2000 \
2001 return (str);
2002
2003
2004 SCM_DEFINE (scm_utf8_to_string, "utf8->string",
2005 1, 0, 0,
2006 (SCM utf),
2007 "Return a newly allocate string that contains from the UTF-8-"
2008 "encoded contents of bytevector @var{utf}.")
2009 #define FUNC_NAME s_scm_utf8_to_string
2010 {
2011 SCM str;
2012 int err;
2013 char *c_str = NULL, *c_locale;
2014 const char *c_utf;
2015 size_t c_utf_len, c_strlen = 0;
2016
2017 SCM_VALIDATE_BYTEVECTOR (1, utf);
2018
2019 c_utf_len = SCM_BYTEVECTOR_LENGTH (utf);
2020
2021 c_locale = (char *) alloca (strlen (locale_charset ()) + 1);
2022 strcpy (c_locale, locale_charset ());
2023
2024 c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);
2025 err = mem_iconveh (c_utf, c_utf_len,
2026 "UTF-8", c_locale,
2027 iconveh_question_mark, NULL,
2028 &c_str, &c_strlen);
2029 if (SCM_UNLIKELY (err))
2030 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A",
2031 scm_list_1 (utf), err);
2032 else
2033 /* C_STR is null-terminated. */
2034 str = scm_take_locale_stringn (c_str, c_strlen);
2035
2036 return (str);
2037 }
2038 #undef FUNC_NAME
2039
2040 SCM_DEFINE (scm_utf16_to_string, "utf16->string",
2041 1, 1, 0,
2042 (SCM utf, SCM endianness),
2043 "Return a newly allocate string that contains from the UTF-16-"
2044 "encoded contents of bytevector @var{utf}.")
2045 #define FUNC_NAME s_scm_utf16_to_string
2046 {
2047 UTF_TO_STRING (16);
2048 }
2049 #undef FUNC_NAME
2050
2051 SCM_DEFINE (scm_utf32_to_string, "utf32->string",
2052 1, 1, 0,
2053 (SCM utf, SCM endianness),
2054 "Return a newly allocate string that contains from the UTF-32-"
2055 "encoded contents of bytevector @var{utf}.")
2056 #define FUNC_NAME s_scm_utf32_to_string
2057 {
2058 UTF_TO_STRING (32);
2059 }
2060 #undef FUNC_NAME
2061
2062
2063 \f
2064 /* Bytevectors as generalized vectors & arrays. */
2065
2066 static SCM
2067 bv_handle_ref (scm_t_array_handle *h, size_t index)
2068 {
2069 return SCM_I_MAKINUM (scm_c_bytevector_ref (h->array, index));
2070 }
2071
2072 static void
2073 bv_handle_set_x (scm_t_array_handle *h, size_t index, SCM val)
2074 {
2075 scm_c_bytevector_set_x (h->array, index, scm_to_uint8 (val));
2076 }
2077
2078 static void
2079 bytevector_get_handle (SCM v, scm_t_array_handle *h)
2080 {
2081 h->array = v;
2082 h->ndims = 1;
2083 h->dims = &h->dim0;
2084 h->dim0.lbnd = 0;
2085 h->dim0.ubnd = SCM_BYTEVECTOR_LENGTH (v) - 1;
2086 h->dim0.inc = 1;
2087 h->element_type = SCM_ARRAY_ELEMENT_TYPE_VU8;
2088 h->elements = h->writable_elements = SCM_BYTEVECTOR_CONTENTS (v);
2089 }
2090
2091 \f
2092 /* Initialization. */
2093
2094 void
2095 scm_bootstrap_bytevectors (void)
2096 {
2097 /* The SMOB type must be instantiated here because the
2098 generalized-vector API may want to access bytevectors even though
2099 `(rnrs bytevector)' hasn't been loaded. */
2100 scm_tc16_bytevector = scm_make_smob_type ("bytevector", 0);
2101 scm_set_smob_free (scm_tc16_bytevector, free_bytevector);
2102 scm_set_smob_print (scm_tc16_bytevector, print_bytevector);
2103 scm_set_smob_equalp (scm_tc16_bytevector, bytevector_equal_p);
2104
2105 scm_null_bytevector =
2106 scm_gc_protect_object (make_bytevector_from_buffer (0, NULL));
2107
2108 #ifdef WORDS_BIGENDIAN
2109 scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("big"));
2110 #else
2111 scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("little"));
2112 #endif
2113
2114 scm_c_register_extension ("libguile", "scm_init_bytevectors",
2115 (scm_t_extension_init_func) scm_init_bytevectors,
2116 NULL);
2117
2118 {
2119 scm_t_array_implementation impl;
2120
2121 impl.tag = scm_tc16_bytevector;
2122 impl.mask = 0xffff;
2123 impl.vref = bv_handle_ref;
2124 impl.vset = bv_handle_set_x;
2125 impl.get_handle = bytevector_get_handle;
2126 scm_i_register_array_implementation (&impl);
2127 scm_i_register_vector_constructor
2128 (scm_i_array_element_types[SCM_ARRAY_ELEMENT_TYPE_VU8],
2129 scm_make_bytevector);
2130 }
2131 }
2132
2133 void
2134 scm_init_bytevectors (void)
2135 {
2136 #include "libguile/bytevectors.x"
2137
2138 scm_endianness_big = scm_sym_big;
2139 scm_endianness_little = scm_sym_little;
2140 }