bytevector inlinedness indicated by flag, not length
[bpt/guile.git] / libguile / bytevectors.c
1 /* Copyright (C) 2009 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <alloca.h>
25
26 #include <gmp.h>
27
28 #include "libguile/_scm.h"
29 #include "libguile/extensions.h"
30 #include "libguile/bytevectors.h"
31 #include "libguile/strings.h"
32 #include "libguile/validate.h"
33 #include "libguile/ieee-754.h"
34 #include "libguile/arrays.h"
35 #include "libguile/array-handle.h"
36 #include "libguile/uniform.h"
37 #include "libguile/srfi-4.h"
38
39 #include <byteswap.h>
40 #include <striconveh.h>
41 #include <uniconv.h>
42
43 #ifdef HAVE_LIMITS_H
44 # include <limits.h>
45 #else
46 /* Assuming 32-bit longs. */
47 # define ULONG_MAX 4294967295UL
48 #endif
49
50 #include <string.h>
51
52
53 \f
54 /* Utilities. */
55
56 /* Convenience macros. These are used by the various templates (macros) that
57 are parameterized by integer signedness. */
58 #define INT8_T_signed scm_t_int8
59 #define INT8_T_unsigned scm_t_uint8
60 #define INT16_T_signed scm_t_int16
61 #define INT16_T_unsigned scm_t_uint16
62 #define INT32_T_signed scm_t_int32
63 #define INT32_T_unsigned scm_t_uint32
64 #define is_signed_int8(_x) (((_x) >= -128L) && ((_x) <= 127L))
65 #define is_unsigned_int8(_x) ((_x) <= 255UL)
66 #define is_signed_int16(_x) (((_x) >= -32768L) && ((_x) <= 32767L))
67 #define is_unsigned_int16(_x) ((_x) <= 65535UL)
68 #define is_signed_int32(_x) (((_x) >= -2147483648L) && ((_x) <= 2147483647L))
69 #define is_unsigned_int32(_x) ((_x) <= 4294967295UL)
70 #define SIGNEDNESS_signed 1
71 #define SIGNEDNESS_unsigned 0
72
73 #define INT_TYPE(_size, _sign) INT ## _size ## _T_ ## _sign
74 #define INT_SWAP(_size) bswap_ ## _size
75 #define INT_VALID_P(_size, _sign) is_ ## _sign ## _int ## _size
76 #define SIGNEDNESS(_sign) SIGNEDNESS_ ## _sign
77
78
79 #define INTEGER_ACCESSOR_PROLOGUE(_len, _sign) \
80 size_t c_len, c_index; \
81 _sign char *c_bv; \
82 \
83 SCM_VALIDATE_BYTEVECTOR (1, bv); \
84 c_index = scm_to_uint (index); \
85 \
86 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
87 c_bv = (_sign char *) SCM_BYTEVECTOR_CONTENTS (bv); \
88 \
89 if (SCM_UNLIKELY (c_index + ((_len) >> 3UL) - 1 >= c_len)) \
90 scm_out_of_range (FUNC_NAME, index);
91
92 /* Template for fixed-size integer access (only 8, 16 or 32-bit). */
93 #define INTEGER_REF(_len, _sign) \
94 SCM result; \
95 \
96 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
97 SCM_VALIDATE_SYMBOL (3, endianness); \
98 \
99 { \
100 INT_TYPE (_len, _sign) c_result; \
101 \
102 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
103 if (!scm_is_eq (endianness, scm_i_native_endianness)) \
104 c_result = INT_SWAP (_len) (c_result); \
105 \
106 result = SCM_I_MAKINUM (c_result); \
107 } \
108 \
109 return result;
110
111 /* Template for fixed-size integer access using the native endianness. */
112 #define INTEGER_NATIVE_REF(_len, _sign) \
113 SCM result; \
114 \
115 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
116 \
117 { \
118 INT_TYPE (_len, _sign) c_result; \
119 \
120 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
121 result = SCM_I_MAKINUM (c_result); \
122 } \
123 \
124 return result;
125
126 /* Template for fixed-size integer modification (only 8, 16 or 32-bit). */
127 #define INTEGER_SET(_len, _sign) \
128 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
129 SCM_VALIDATE_SYMBOL (3, endianness); \
130 \
131 { \
132 _sign long c_value; \
133 INT_TYPE (_len, _sign) c_value_short; \
134 \
135 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
136 scm_wrong_type_arg (FUNC_NAME, 3, value); \
137 \
138 c_value = SCM_I_INUM (value); \
139 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
140 scm_out_of_range (FUNC_NAME, value); \
141 \
142 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
143 if (!scm_is_eq (endianness, scm_i_native_endianness)) \
144 c_value_short = INT_SWAP (_len) (c_value_short); \
145 \
146 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
147 } \
148 \
149 return SCM_UNSPECIFIED;
150
151 /* Template for fixed-size integer modification using the native
152 endianness. */
153 #define INTEGER_NATIVE_SET(_len, _sign) \
154 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
155 \
156 { \
157 _sign long c_value; \
158 INT_TYPE (_len, _sign) c_value_short; \
159 \
160 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
161 scm_wrong_type_arg (FUNC_NAME, 3, value); \
162 \
163 c_value = SCM_I_INUM (value); \
164 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
165 scm_out_of_range (FUNC_NAME, value); \
166 \
167 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
168 \
169 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
170 } \
171 \
172 return SCM_UNSPECIFIED;
173
174
175 \f
176 /* Bytevector type. */
177
178 scm_t_bits scm_tc16_bytevector;
179
180 #define SCM_BYTEVECTOR_INLINE_THRESHOLD (2 * sizeof (SCM))
181 #define SCM_BYTEVECTOR_INLINEABLE_SIZE_P(_size) \
182 ((_size) <= SCM_BYTEVECTOR_INLINE_THRESHOLD)
183 #define SCM_BYTEVECTOR_SET_LENGTH(_bv, _len) \
184 SCM_SET_SMOB_DATA ((_bv), (scm_t_bits) (_len))
185 #define SCM_BYTEVECTOR_SET_CONTENTS(_bv, _buf) \
186 SCM_SET_SMOB_DATA_2 ((_bv), (scm_t_bits) (_buf))
187 #define SCM_BYTEVECTOR_SET_INLINE(bv) \
188 SCM_SET_SMOB_FLAGS (bv, SCM_SMOB_FLAGS (bv) | SCM_F_BYTEVECTOR_INLINE)
189
190 /* The empty bytevector. */
191 SCM scm_null_bytevector = SCM_UNSPECIFIED;
192
193
194 static inline SCM
195 make_bytevector_from_buffer (size_t len, signed char *contents)
196 {
197 SCM ret;
198 if (!SCM_BYTEVECTOR_INLINEABLE_SIZE_P (len))
199 SCM_NEWSMOB2 (ret, scm_tc16_bytevector, len, contents);
200 else
201 {
202 SCM_NEWSMOB2 (ret, scm_tc16_bytevector, len, NULL);
203 SCM_BYTEVECTOR_SET_INLINE (ret);
204 if (contents)
205 {
206 memcpy (SCM_BYTEVECTOR_CONTENTS (ret), contents, len);
207 scm_gc_free (contents, len, SCM_GC_BYTEVECTOR);
208 }
209 }
210 return ret;
211 }
212
213 static inline SCM
214 make_bytevector (size_t len)
215 {
216 if (SCM_UNLIKELY (len == 0))
217 return scm_null_bytevector;
218
219 if (SCM_BYTEVECTOR_INLINEABLE_SIZE_P (len))
220 {
221 SCM ret;
222 SCM_NEWSMOB2 (ret, scm_tc16_bytevector, len, NULL);
223 SCM_BYTEVECTOR_SET_INLINE (ret);
224 return ret;
225 }
226 else
227 {
228 void *buf = scm_gc_malloc (len, SCM_GC_BYTEVECTOR);
229 return make_bytevector_from_buffer (len, buf);
230 }
231 }
232
233 /* Return a new bytevector of size LEN octets. */
234 SCM
235 scm_c_make_bytevector (size_t len)
236 {
237 return (make_bytevector (len));
238 }
239
240 /* Return a bytevector of size LEN made up of CONTENTS. The area pointed to
241 by CONTENTS must have been allocated using `scm_gc_malloc ()'. */
242 SCM
243 scm_c_take_bytevector (signed char *contents, size_t len)
244 {
245 return make_bytevector_from_buffer (len, contents);
246 }
247
248 /* Shrink BV to C_NEW_LEN (which is assumed to be smaller than its current
249 size) and return BV. */
250 SCM
251 scm_i_shrink_bytevector (SCM bv, size_t c_new_len)
252 {
253 if (!SCM_BYTEVECTOR_INLINE_P (bv))
254 {
255 size_t c_len;
256 signed char *c_bv, *c_new_bv;
257
258 c_len = SCM_BYTEVECTOR_LENGTH (bv);
259 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
260
261 SCM_BYTEVECTOR_SET_LENGTH (bv, c_new_len);
262
263 if (SCM_BYTEVECTOR_INLINEABLE_SIZE_P (c_new_len))
264 {
265 /* Copy to the in-line buffer and free the current buffer. */
266 SCM_BYTEVECTOR_SET_INLINE (bv);
267 c_new_bv = SCM_BYTEVECTOR_CONTENTS (bv);
268 memcpy (c_new_bv, c_bv, c_new_len);
269 scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
270 }
271 else
272 {
273 /* Resize the existing buffer. */
274 c_new_bv = scm_gc_realloc (c_bv, c_len, c_new_len,
275 SCM_GC_BYTEVECTOR);
276 SCM_BYTEVECTOR_SET_CONTENTS (bv, c_new_bv);
277 }
278 }
279 else
280 SCM_BYTEVECTOR_SET_LENGTH (bv, c_new_len);
281
282 return bv;
283 }
284
285 int
286 scm_is_bytevector (SCM obj)
287 {
288 return SCM_SMOB_PREDICATE (scm_tc16_bytevector, obj);
289 }
290
291 size_t
292 scm_c_bytevector_length (SCM bv)
293 #define FUNC_NAME "scm_c_bytevector_length"
294 {
295 SCM_VALIDATE_BYTEVECTOR (1, bv);
296
297 return SCM_BYTEVECTOR_LENGTH (bv);
298 }
299 #undef FUNC_NAME
300
301 scm_t_uint8
302 scm_c_bytevector_ref (SCM bv, size_t index)
303 #define FUNC_NAME "scm_c_bytevector_ref"
304 {
305 size_t c_len;
306 const scm_t_uint8 *c_bv;
307
308 SCM_VALIDATE_BYTEVECTOR (1, bv);
309
310 c_len = SCM_BYTEVECTOR_LENGTH (bv);
311 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
312
313 if (SCM_UNLIKELY (index >= c_len))
314 scm_out_of_range (FUNC_NAME, scm_from_size_t (index));
315
316 return c_bv[index];
317 }
318 #undef FUNC_NAME
319
320 void
321 scm_c_bytevector_set_x (SCM bv, size_t index, scm_t_uint8 value)
322 #define FUNC_NAME "scm_c_bytevector_set_x"
323 {
324 size_t c_len;
325 scm_t_uint8 *c_bv;
326
327 SCM_VALIDATE_BYTEVECTOR (1, bv);
328
329 c_len = SCM_BYTEVECTOR_LENGTH (bv);
330 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
331
332 if (SCM_UNLIKELY (index >= c_len))
333 scm_out_of_range (FUNC_NAME, scm_from_size_t (index));
334
335 c_bv[index] = value;
336 }
337 #undef FUNC_NAME
338
339 /* This procedure is used by `scm_c_generalized_vector_set_x ()'. */
340 void
341 scm_i_bytevector_generalized_set_x (SCM bv, size_t index, SCM value)
342 #define FUNC_NAME "scm_i_bytevector_generalized_set_x"
343 {
344 scm_c_bytevector_set_x (bv, index, scm_to_uint8 (value));
345 }
346 #undef FUNC_NAME
347
348 static int
349 print_bytevector (SCM bv, SCM port, scm_print_state *pstate)
350 {
351 unsigned c_len, i;
352 unsigned char *c_bv;
353
354 c_len = SCM_BYTEVECTOR_LENGTH (bv);
355 c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
356
357 scm_puts ("#vu8(", port);
358 for (i = 0; i < c_len; i++)
359 {
360 if (i > 0)
361 scm_putc (' ', port);
362
363 scm_uintprint (c_bv[i], 10, port);
364 }
365
366 scm_putc (')', port);
367
368 /* Make GCC think we use it. */
369 scm_remember_upto_here ((SCM) pstate);
370
371 return 1;
372 }
373
374 static SCM
375 bytevector_equal_p (SCM bv1, SCM bv2)
376 {
377 return scm_bytevector_eq_p (bv1, bv2);
378 }
379
380 static size_t
381 free_bytevector (SCM bv)
382 {
383
384 if (!SCM_BYTEVECTOR_INLINE_P (bv))
385 {
386 unsigned c_len;
387 signed char *c_bv;
388
389 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
390 c_len = SCM_BYTEVECTOR_LENGTH (bv);
391
392 scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
393 }
394
395 return 0;
396 }
397
398
399 \f
400 /* General operations. */
401
402 SCM_SYMBOL (scm_sym_big, "big");
403 SCM_SYMBOL (scm_sym_little, "little");
404
405 SCM scm_endianness_big, scm_endianness_little;
406
407 /* Host endianness (a symbol). */
408 SCM scm_i_native_endianness = SCM_UNSPECIFIED;
409
410 /* Byte-swapping. */
411 #ifndef bswap_24
412 # define bswap_24(_x) \
413 ((((_x) & 0xff0000) >> 16) | \
414 (((_x) & 0x00ff00)) | \
415 (((_x) & 0x0000ff) << 16))
416 #endif
417
418
419 SCM_DEFINE (scm_native_endianness, "native-endianness", 0, 0, 0,
420 (void),
421 "Return a symbol denoting the machine's native endianness.")
422 #define FUNC_NAME s_scm_native_endianness
423 {
424 return scm_i_native_endianness;
425 }
426 #undef FUNC_NAME
427
428 SCM_DEFINE (scm_bytevector_p, "bytevector?", 1, 0, 0,
429 (SCM obj),
430 "Return true if @var{obj} is a bytevector.")
431 #define FUNC_NAME s_scm_bytevector_p
432 {
433 return scm_from_bool (scm_is_bytevector (obj));
434 }
435 #undef FUNC_NAME
436
437 SCM_DEFINE (scm_make_bytevector, "make-bytevector", 1, 1, 0,
438 (SCM len, SCM fill),
439 "Return a newly allocated bytevector of @var{len} bytes, "
440 "optionally filled with @var{fill}.")
441 #define FUNC_NAME s_scm_make_bytevector
442 {
443 SCM bv;
444 unsigned c_len;
445 signed char c_fill = '\0';
446
447 SCM_VALIDATE_UINT_COPY (1, len, c_len);
448 if (fill != SCM_UNDEFINED)
449 {
450 int value;
451
452 value = scm_to_int (fill);
453 if (SCM_UNLIKELY ((value < -128) || (value > 255)))
454 scm_out_of_range (FUNC_NAME, fill);
455 c_fill = (signed char) value;
456 }
457
458 bv = make_bytevector (c_len);
459 if (fill != SCM_UNDEFINED)
460 {
461 unsigned i;
462 signed char *contents;
463
464 contents = SCM_BYTEVECTOR_CONTENTS (bv);
465 for (i = 0; i < c_len; i++)
466 contents[i] = c_fill;
467 }
468
469 return bv;
470 }
471 #undef FUNC_NAME
472
473 SCM_DEFINE (scm_bytevector_length, "bytevector-length", 1, 0, 0,
474 (SCM bv),
475 "Return the length (in bytes) of @var{bv}.")
476 #define FUNC_NAME s_scm_bytevector_length
477 {
478 return scm_from_uint (scm_c_bytevector_length (bv));
479 }
480 #undef FUNC_NAME
481
482 SCM_DEFINE (scm_bytevector_eq_p, "bytevector=?", 2, 0, 0,
483 (SCM bv1, SCM bv2),
484 "Return is @var{bv1} equals to @var{bv2}---i.e., if they "
485 "have the same length and contents.")
486 #define FUNC_NAME s_scm_bytevector_eq_p
487 {
488 SCM result = SCM_BOOL_F;
489 unsigned c_len1, c_len2;
490
491 SCM_VALIDATE_BYTEVECTOR (1, bv1);
492 SCM_VALIDATE_BYTEVECTOR (2, bv2);
493
494 c_len1 = SCM_BYTEVECTOR_LENGTH (bv1);
495 c_len2 = SCM_BYTEVECTOR_LENGTH (bv2);
496
497 if (c_len1 == c_len2)
498 {
499 signed char *c_bv1, *c_bv2;
500
501 c_bv1 = SCM_BYTEVECTOR_CONTENTS (bv1);
502 c_bv2 = SCM_BYTEVECTOR_CONTENTS (bv2);
503
504 result = scm_from_bool (!memcmp (c_bv1, c_bv2, c_len1));
505 }
506
507 return result;
508 }
509 #undef FUNC_NAME
510
511 SCM_DEFINE (scm_bytevector_fill_x, "bytevector-fill!", 2, 0, 0,
512 (SCM bv, SCM fill),
513 "Fill bytevector @var{bv} with @var{fill}, a byte.")
514 #define FUNC_NAME s_scm_bytevector_fill_x
515 {
516 unsigned c_len, i;
517 signed char *c_bv, c_fill;
518
519 SCM_VALIDATE_BYTEVECTOR (1, bv);
520 c_fill = scm_to_int8 (fill);
521
522 c_len = SCM_BYTEVECTOR_LENGTH (bv);
523 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
524
525 for (i = 0; i < c_len; i++)
526 c_bv[i] = c_fill;
527
528 return SCM_UNSPECIFIED;
529 }
530 #undef FUNC_NAME
531
532 SCM_DEFINE (scm_bytevector_copy_x, "bytevector-copy!", 5, 0, 0,
533 (SCM source, SCM source_start, SCM target, SCM target_start,
534 SCM len),
535 "Copy @var{len} bytes from @var{source} into @var{target}, "
536 "starting reading from @var{source_start} (a positive index "
537 "within @var{source}) and start writing at "
538 "@var{target_start}.")
539 #define FUNC_NAME s_scm_bytevector_copy_x
540 {
541 unsigned c_len, c_source_len, c_target_len;
542 unsigned c_source_start, c_target_start;
543 signed char *c_source, *c_target;
544
545 SCM_VALIDATE_BYTEVECTOR (1, source);
546 SCM_VALIDATE_BYTEVECTOR (3, target);
547
548 c_len = scm_to_uint (len);
549 c_source_start = scm_to_uint (source_start);
550 c_target_start = scm_to_uint (target_start);
551
552 c_source = SCM_BYTEVECTOR_CONTENTS (source);
553 c_target = SCM_BYTEVECTOR_CONTENTS (target);
554 c_source_len = SCM_BYTEVECTOR_LENGTH (source);
555 c_target_len = SCM_BYTEVECTOR_LENGTH (target);
556
557 if (SCM_UNLIKELY (c_source_start + c_len > c_source_len))
558 scm_out_of_range (FUNC_NAME, source_start);
559 if (SCM_UNLIKELY (c_target_start + c_len > c_target_len))
560 scm_out_of_range (FUNC_NAME, target_start);
561
562 memcpy (c_target + c_target_start,
563 c_source + c_source_start,
564 c_len);
565
566 return SCM_UNSPECIFIED;
567 }
568 #undef FUNC_NAME
569
570 SCM_DEFINE (scm_bytevector_copy, "bytevector-copy", 1, 0, 0,
571 (SCM bv),
572 "Return a newly allocated copy of @var{bv}.")
573 #define FUNC_NAME s_scm_bytevector_copy
574 {
575 SCM copy;
576 unsigned c_len;
577 signed char *c_bv, *c_copy;
578
579 SCM_VALIDATE_BYTEVECTOR (1, bv);
580
581 c_len = SCM_BYTEVECTOR_LENGTH (bv);
582 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
583
584 copy = make_bytevector (c_len);
585 c_copy = SCM_BYTEVECTOR_CONTENTS (copy);
586 memcpy (c_copy, c_bv, c_len);
587
588 return copy;
589 }
590 #undef FUNC_NAME
591
592 SCM_DEFINE (scm_uniform_array_to_bytevector, "uniform-array->bytevector",
593 1, 0, 0, (SCM array),
594 "Return a newly allocated bytevector whose contents\n"
595 "will be copied from the uniform array @var{array}.")
596 #define FUNC_NAME s_scm_uniform_array_to_bytevector
597 {
598 SCM contents, ret;
599 size_t len;
600 scm_t_array_handle h;
601 const void *base;
602 size_t sz;
603
604 contents = scm_array_contents (array, SCM_BOOL_T);
605 if (scm_is_false (contents))
606 scm_wrong_type_arg_msg (FUNC_NAME, 0, array, "uniform contiguous array");
607
608 scm_array_get_handle (contents, &h);
609
610 base = scm_array_handle_uniform_elements (&h);
611 len = h.dims->inc * (h.dims->ubnd - h.dims->lbnd + 1);
612 sz = scm_array_handle_uniform_element_size (&h);
613
614 ret = make_bytevector (len * sz);
615 memcpy (SCM_BYTEVECTOR_CONTENTS (ret), base, len * sz);
616
617 scm_array_handle_release (&h);
618
619 return ret;
620 }
621 #undef FUNC_NAME
622
623 \f
624 /* Operations on bytes and octets. */
625
626 SCM_DEFINE (scm_bytevector_u8_ref, "bytevector-u8-ref", 2, 0, 0,
627 (SCM bv, SCM index),
628 "Return the octet located at @var{index} in @var{bv}.")
629 #define FUNC_NAME s_scm_bytevector_u8_ref
630 {
631 INTEGER_NATIVE_REF (8, unsigned);
632 }
633 #undef FUNC_NAME
634
635 SCM_DEFINE (scm_bytevector_s8_ref, "bytevector-s8-ref", 2, 0, 0,
636 (SCM bv, SCM index),
637 "Return the byte located at @var{index} in @var{bv}.")
638 #define FUNC_NAME s_scm_bytevector_s8_ref
639 {
640 INTEGER_NATIVE_REF (8, signed);
641 }
642 #undef FUNC_NAME
643
644 SCM_DEFINE (scm_bytevector_u8_set_x, "bytevector-u8-set!", 3, 0, 0,
645 (SCM bv, SCM index, SCM value),
646 "Return the octet located at @var{index} in @var{bv}.")
647 #define FUNC_NAME s_scm_bytevector_u8_set_x
648 {
649 INTEGER_NATIVE_SET (8, unsigned);
650 }
651 #undef FUNC_NAME
652
653 SCM_DEFINE (scm_bytevector_s8_set_x, "bytevector-s8-set!", 3, 0, 0,
654 (SCM bv, SCM index, SCM value),
655 "Return the octet located at @var{index} in @var{bv}.")
656 #define FUNC_NAME s_scm_bytevector_s8_set_x
657 {
658 INTEGER_NATIVE_SET (8, signed);
659 }
660 #undef FUNC_NAME
661
662 #undef OCTET_ACCESSOR_PROLOGUE
663
664
665 SCM_DEFINE (scm_bytevector_to_u8_list, "bytevector->u8-list", 1, 0, 0,
666 (SCM bv),
667 "Return a newly allocated list of octets containing the "
668 "contents of @var{bv}.")
669 #define FUNC_NAME s_scm_bytevector_to_u8_list
670 {
671 SCM lst, pair;
672 unsigned c_len, i;
673 unsigned char *c_bv;
674
675 SCM_VALIDATE_BYTEVECTOR (1, bv);
676
677 c_len = SCM_BYTEVECTOR_LENGTH (bv);
678 c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
679
680 lst = scm_make_list (scm_from_uint (c_len), SCM_UNSPECIFIED);
681 for (i = 0, pair = lst;
682 i < c_len;
683 i++, pair = SCM_CDR (pair))
684 {
685 SCM_SETCAR (pair, SCM_I_MAKINUM (c_bv[i]));
686 }
687
688 return lst;
689 }
690 #undef FUNC_NAME
691
692 SCM_DEFINE (scm_u8_list_to_bytevector, "u8-list->bytevector", 1, 0, 0,
693 (SCM lst),
694 "Turn @var{lst}, a list of octets, into a bytevector.")
695 #define FUNC_NAME s_scm_u8_list_to_bytevector
696 {
697 SCM bv, item;
698 long c_len, i;
699 unsigned char *c_bv;
700
701 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len);
702
703 bv = make_bytevector (c_len);
704 c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
705
706 for (i = 0; i < c_len; lst = SCM_CDR (lst), i++)
707 {
708 item = SCM_CAR (lst);
709
710 if (SCM_LIKELY (SCM_I_INUMP (item)))
711 {
712 long c_item;
713
714 c_item = SCM_I_INUM (item);
715 if (SCM_LIKELY ((c_item >= 0) && (c_item < 256)))
716 c_bv[i] = (unsigned char) c_item;
717 else
718 goto type_error;
719 }
720 else
721 goto type_error;
722 }
723
724 return bv;
725
726 type_error:
727 scm_wrong_type_arg (FUNC_NAME, 1, item);
728
729 return SCM_BOOL_F;
730 }
731 #undef FUNC_NAME
732
733 /* Compute the two's complement of VALUE (a positive integer) on SIZE octets
734 using (2^(SIZE * 8) - VALUE). */
735 static inline void
736 twos_complement (mpz_t value, size_t size)
737 {
738 unsigned long bit_count;
739
740 /* We expect BIT_COUNT to fit in a unsigned long thanks to the range
741 checking on SIZE performed earlier. */
742 bit_count = (unsigned long) size << 3UL;
743
744 if (SCM_LIKELY (bit_count < sizeof (unsigned long)))
745 mpz_ui_sub (value, 1UL << bit_count, value);
746 else
747 {
748 mpz_t max;
749
750 mpz_init (max);
751 mpz_ui_pow_ui (max, 2, bit_count);
752 mpz_sub (value, max, value);
753 mpz_clear (max);
754 }
755 }
756
757 static inline SCM
758 bytevector_large_ref (const char *c_bv, size_t c_size, int signed_p,
759 SCM endianness)
760 {
761 SCM result;
762 mpz_t c_mpz;
763 int c_endianness, negative_p = 0;
764
765 if (signed_p)
766 {
767 if (scm_is_eq (endianness, scm_sym_big))
768 negative_p = c_bv[0] & 0x80;
769 else
770 negative_p = c_bv[c_size - 1] & 0x80;
771 }
772
773 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
774
775 mpz_init (c_mpz);
776 mpz_import (c_mpz, 1 /* 1 word */, 1 /* word order doesn't matter */,
777 c_size /* word is C_SIZE-byte long */,
778 c_endianness,
779 0 /* nails */, c_bv);
780
781 if (signed_p && negative_p)
782 {
783 twos_complement (c_mpz, c_size);
784 mpz_neg (c_mpz, c_mpz);
785 }
786
787 result = scm_from_mpz (c_mpz);
788 mpz_clear (c_mpz); /* FIXME: Needed? */
789
790 return result;
791 }
792
793 static inline int
794 bytevector_large_set (char *c_bv, size_t c_size, int signed_p,
795 SCM value, SCM endianness)
796 {
797 mpz_t c_mpz;
798 int c_endianness, c_sign, err = 0;
799
800 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
801
802 mpz_init (c_mpz);
803 scm_to_mpz (value, c_mpz);
804
805 c_sign = mpz_sgn (c_mpz);
806 if (c_sign < 0)
807 {
808 if (SCM_LIKELY (signed_p))
809 {
810 mpz_neg (c_mpz, c_mpz);
811 twos_complement (c_mpz, c_size);
812 }
813 else
814 {
815 err = -1;
816 goto finish;
817 }
818 }
819
820 if (c_sign == 0)
821 /* Zero. */
822 memset (c_bv, 0, c_size);
823 else
824 {
825 size_t word_count, value_size;
826
827 value_size = (mpz_sizeinbase (c_mpz, 2) + (8 * c_size)) / (8 * c_size);
828 if (SCM_UNLIKELY (value_size > c_size))
829 {
830 err = -2;
831 goto finish;
832 }
833
834
835 mpz_export (c_bv, &word_count, 1 /* word order doesn't matter */,
836 c_size, c_endianness,
837 0 /* nails */, c_mpz);
838 if (SCM_UNLIKELY (word_count != 1))
839 /* Shouldn't happen since we already checked with VALUE_SIZE. */
840 abort ();
841 }
842
843 finish:
844 mpz_clear (c_mpz);
845
846 return err;
847 }
848
849 #define GENERIC_INTEGER_ACCESSOR_PROLOGUE(_sign) \
850 unsigned long c_len, c_index, c_size; \
851 char *c_bv; \
852 \
853 SCM_VALIDATE_BYTEVECTOR (1, bv); \
854 c_index = scm_to_ulong (index); \
855 c_size = scm_to_ulong (size); \
856 \
857 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
858 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
859 \
860 /* C_SIZE must have its 3 higher bits set to zero so that \
861 multiplying it by 8 yields a number that fits in an \
862 unsigned long. */ \
863 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
864 scm_out_of_range (FUNC_NAME, size); \
865 if (SCM_UNLIKELY (c_index + c_size > c_len)) \
866 scm_out_of_range (FUNC_NAME, index);
867
868
869 /* Template of an integer reference function. */
870 #define GENERIC_INTEGER_REF(_sign) \
871 SCM result; \
872 \
873 if (c_size < 3) \
874 { \
875 int swap; \
876 _sign int value; \
877 \
878 swap = !scm_is_eq (endianness, scm_i_native_endianness); \
879 switch (c_size) \
880 { \
881 case 1: \
882 { \
883 _sign char c_value8; \
884 memcpy (&c_value8, c_bv, 1); \
885 value = c_value8; \
886 } \
887 break; \
888 case 2: \
889 { \
890 INT_TYPE (16, _sign) c_value16; \
891 memcpy (&c_value16, c_bv, 2); \
892 if (swap) \
893 value = (INT_TYPE (16, _sign)) bswap_16 (c_value16); \
894 else \
895 value = c_value16; \
896 } \
897 break; \
898 default: \
899 abort (); \
900 } \
901 \
902 result = SCM_I_MAKINUM ((_sign int) value); \
903 } \
904 else \
905 result = bytevector_large_ref ((char *) c_bv, \
906 c_size, SIGNEDNESS (_sign), \
907 endianness); \
908 \
909 return result;
910
911 static inline SCM
912 bytevector_signed_ref (const char *c_bv, size_t c_size, SCM endianness)
913 {
914 GENERIC_INTEGER_REF (signed);
915 }
916
917 static inline SCM
918 bytevector_unsigned_ref (const char *c_bv, size_t c_size, SCM endianness)
919 {
920 GENERIC_INTEGER_REF (unsigned);
921 }
922
923
924 /* Template of an integer assignment function. */
925 #define GENERIC_INTEGER_SET(_sign) \
926 if (c_size < 3) \
927 { \
928 _sign int c_value; \
929 \
930 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
931 goto range_error; \
932 \
933 c_value = SCM_I_INUM (value); \
934 switch (c_size) \
935 { \
936 case 1: \
937 if (SCM_LIKELY (INT_VALID_P (8, _sign) (c_value))) \
938 { \
939 _sign char c_value8; \
940 c_value8 = (_sign char) c_value; \
941 memcpy (c_bv, &c_value8, 1); \
942 } \
943 else \
944 goto range_error; \
945 break; \
946 \
947 case 2: \
948 if (SCM_LIKELY (INT_VALID_P (16, _sign) (c_value))) \
949 { \
950 int swap; \
951 INT_TYPE (16, _sign) c_value16; \
952 \
953 swap = !scm_is_eq (endianness, scm_i_native_endianness); \
954 \
955 if (swap) \
956 c_value16 = (INT_TYPE (16, _sign)) bswap_16 (c_value); \
957 else \
958 c_value16 = c_value; \
959 \
960 memcpy (c_bv, &c_value16, 2); \
961 } \
962 else \
963 goto range_error; \
964 break; \
965 \
966 default: \
967 abort (); \
968 } \
969 } \
970 else \
971 { \
972 int err; \
973 \
974 err = bytevector_large_set (c_bv, c_size, \
975 SIGNEDNESS (_sign), \
976 value, endianness); \
977 if (err) \
978 goto range_error; \
979 } \
980 \
981 return; \
982 \
983 range_error: \
984 scm_out_of_range (FUNC_NAME, value); \
985 return;
986
987 static inline void
988 bytevector_signed_set (char *c_bv, size_t c_size,
989 SCM value, SCM endianness,
990 const char *func_name)
991 #define FUNC_NAME func_name
992 {
993 GENERIC_INTEGER_SET (signed);
994 }
995 #undef FUNC_NAME
996
997 static inline void
998 bytevector_unsigned_set (char *c_bv, size_t c_size,
999 SCM value, SCM endianness,
1000 const char *func_name)
1001 #define FUNC_NAME func_name
1002 {
1003 GENERIC_INTEGER_SET (unsigned);
1004 }
1005 #undef FUNC_NAME
1006
1007 #undef GENERIC_INTEGER_SET
1008 #undef GENERIC_INTEGER_REF
1009
1010
1011 SCM_DEFINE (scm_bytevector_uint_ref, "bytevector-uint-ref", 4, 0, 0,
1012 (SCM bv, SCM index, SCM endianness, SCM size),
1013 "Return the @var{size}-octet long unsigned integer at index "
1014 "@var{index} in @var{bv}.")
1015 #define FUNC_NAME s_scm_bytevector_uint_ref
1016 {
1017 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
1018
1019 return (bytevector_unsigned_ref (&c_bv[c_index], c_size, endianness));
1020 }
1021 #undef FUNC_NAME
1022
1023 SCM_DEFINE (scm_bytevector_sint_ref, "bytevector-sint-ref", 4, 0, 0,
1024 (SCM bv, SCM index, SCM endianness, SCM size),
1025 "Return the @var{size}-octet long unsigned integer at index "
1026 "@var{index} in @var{bv}.")
1027 #define FUNC_NAME s_scm_bytevector_sint_ref
1028 {
1029 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
1030
1031 return (bytevector_signed_ref (&c_bv[c_index], c_size, endianness));
1032 }
1033 #undef FUNC_NAME
1034
1035 SCM_DEFINE (scm_bytevector_uint_set_x, "bytevector-uint-set!", 5, 0, 0,
1036 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
1037 "Set the @var{size}-octet long unsigned integer at @var{index} "
1038 "to @var{value}.")
1039 #define FUNC_NAME s_scm_bytevector_uint_set_x
1040 {
1041 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
1042
1043 bytevector_unsigned_set (&c_bv[c_index], c_size, value, endianness,
1044 FUNC_NAME);
1045
1046 return SCM_UNSPECIFIED;
1047 }
1048 #undef FUNC_NAME
1049
1050 SCM_DEFINE (scm_bytevector_sint_set_x, "bytevector-sint-set!", 5, 0, 0,
1051 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
1052 "Set the @var{size}-octet long signed integer at @var{index} "
1053 "to @var{value}.")
1054 #define FUNC_NAME s_scm_bytevector_sint_set_x
1055 {
1056 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
1057
1058 bytevector_signed_set (&c_bv[c_index], c_size, value, endianness,
1059 FUNC_NAME);
1060
1061 return SCM_UNSPECIFIED;
1062 }
1063 #undef FUNC_NAME
1064
1065
1066 \f
1067 /* Operations on integers of arbitrary size. */
1068
1069 #define INTEGERS_TO_LIST(_sign) \
1070 SCM lst, pair; \
1071 size_t i, c_len, c_size; \
1072 \
1073 SCM_VALIDATE_BYTEVECTOR (1, bv); \
1074 SCM_VALIDATE_SYMBOL (2, endianness); \
1075 c_size = scm_to_uint (size); \
1076 \
1077 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
1078 if (SCM_UNLIKELY (c_len == 0)) \
1079 lst = SCM_EOL; \
1080 else if (SCM_UNLIKELY (c_len < c_size)) \
1081 scm_out_of_range (FUNC_NAME, size); \
1082 else \
1083 { \
1084 const char *c_bv; \
1085 \
1086 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
1087 \
1088 lst = scm_make_list (scm_from_uint (c_len / c_size), \
1089 SCM_UNSPECIFIED); \
1090 for (i = 0, pair = lst; \
1091 i <= c_len - c_size; \
1092 i += c_size, c_bv += c_size, pair = SCM_CDR (pair)) \
1093 { \
1094 SCM_SETCAR (pair, \
1095 bytevector_ ## _sign ## _ref (c_bv, c_size, \
1096 endianness)); \
1097 } \
1098 } \
1099 \
1100 return lst;
1101
1102 SCM_DEFINE (scm_bytevector_to_sint_list, "bytevector->sint-list",
1103 3, 0, 0,
1104 (SCM bv, SCM endianness, SCM size),
1105 "Return a list of signed integers of @var{size} octets "
1106 "representing the contents of @var{bv}.")
1107 #define FUNC_NAME s_scm_bytevector_to_sint_list
1108 {
1109 INTEGERS_TO_LIST (signed);
1110 }
1111 #undef FUNC_NAME
1112
1113 SCM_DEFINE (scm_bytevector_to_uint_list, "bytevector->uint-list",
1114 3, 0, 0,
1115 (SCM bv, SCM endianness, SCM size),
1116 "Return a list of unsigned integers of @var{size} octets "
1117 "representing the contents of @var{bv}.")
1118 #define FUNC_NAME s_scm_bytevector_to_uint_list
1119 {
1120 INTEGERS_TO_LIST (unsigned);
1121 }
1122 #undef FUNC_NAME
1123
1124 #undef INTEGER_TO_LIST
1125
1126
1127 #define INTEGER_LIST_TO_BYTEVECTOR(_sign) \
1128 SCM bv; \
1129 long c_len; \
1130 size_t c_size; \
1131 char *c_bv, *c_bv_ptr; \
1132 \
1133 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len); \
1134 SCM_VALIDATE_SYMBOL (2, endianness); \
1135 c_size = scm_to_uint (size); \
1136 \
1137 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
1138 scm_out_of_range (FUNC_NAME, size); \
1139 \
1140 bv = make_bytevector (c_len * c_size); \
1141 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
1142 \
1143 for (c_bv_ptr = c_bv; \
1144 !scm_is_null (lst); \
1145 lst = SCM_CDR (lst), c_bv_ptr += c_size) \
1146 { \
1147 bytevector_ ## _sign ## _set (c_bv_ptr, c_size, \
1148 SCM_CAR (lst), endianness, \
1149 FUNC_NAME); \
1150 } \
1151 \
1152 return bv;
1153
1154
1155 SCM_DEFINE (scm_uint_list_to_bytevector, "uint-list->bytevector",
1156 3, 0, 0,
1157 (SCM lst, SCM endianness, SCM size),
1158 "Return a bytevector containing the unsigned integers "
1159 "listed in @var{lst} and encoded on @var{size} octets "
1160 "according to @var{endianness}.")
1161 #define FUNC_NAME s_scm_uint_list_to_bytevector
1162 {
1163 INTEGER_LIST_TO_BYTEVECTOR (unsigned);
1164 }
1165 #undef FUNC_NAME
1166
1167 SCM_DEFINE (scm_sint_list_to_bytevector, "sint-list->bytevector",
1168 3, 0, 0,
1169 (SCM lst, SCM endianness, SCM size),
1170 "Return a bytevector containing the signed integers "
1171 "listed in @var{lst} and encoded on @var{size} octets "
1172 "according to @var{endianness}.")
1173 #define FUNC_NAME s_scm_sint_list_to_bytevector
1174 {
1175 INTEGER_LIST_TO_BYTEVECTOR (signed);
1176 }
1177 #undef FUNC_NAME
1178
1179 #undef INTEGER_LIST_TO_BYTEVECTOR
1180
1181
1182 \f
1183 /* Operations on 16-bit integers. */
1184
1185 SCM_DEFINE (scm_bytevector_u16_ref, "bytevector-u16-ref",
1186 3, 0, 0,
1187 (SCM bv, SCM index, SCM endianness),
1188 "Return the unsigned 16-bit integer from @var{bv} at "
1189 "@var{index}.")
1190 #define FUNC_NAME s_scm_bytevector_u16_ref
1191 {
1192 INTEGER_REF (16, unsigned);
1193 }
1194 #undef FUNC_NAME
1195
1196 SCM_DEFINE (scm_bytevector_s16_ref, "bytevector-s16-ref",
1197 3, 0, 0,
1198 (SCM bv, SCM index, SCM endianness),
1199 "Return the signed 16-bit integer from @var{bv} at "
1200 "@var{index}.")
1201 #define FUNC_NAME s_scm_bytevector_s16_ref
1202 {
1203 INTEGER_REF (16, signed);
1204 }
1205 #undef FUNC_NAME
1206
1207 SCM_DEFINE (scm_bytevector_u16_native_ref, "bytevector-u16-native-ref",
1208 2, 0, 0,
1209 (SCM bv, SCM index),
1210 "Return the unsigned 16-bit integer from @var{bv} at "
1211 "@var{index} using the native endianness.")
1212 #define FUNC_NAME s_scm_bytevector_u16_native_ref
1213 {
1214 INTEGER_NATIVE_REF (16, unsigned);
1215 }
1216 #undef FUNC_NAME
1217
1218 SCM_DEFINE (scm_bytevector_s16_native_ref, "bytevector-s16-native-ref",
1219 2, 0, 0,
1220 (SCM bv, SCM index),
1221 "Return the unsigned 16-bit integer from @var{bv} at "
1222 "@var{index} using the native endianness.")
1223 #define FUNC_NAME s_scm_bytevector_s16_native_ref
1224 {
1225 INTEGER_NATIVE_REF (16, signed);
1226 }
1227 #undef FUNC_NAME
1228
1229 SCM_DEFINE (scm_bytevector_u16_set_x, "bytevector-u16-set!",
1230 4, 0, 0,
1231 (SCM bv, SCM index, SCM value, SCM endianness),
1232 "Store @var{value} in @var{bv} at @var{index} according to "
1233 "@var{endianness}.")
1234 #define FUNC_NAME s_scm_bytevector_u16_set_x
1235 {
1236 INTEGER_SET (16, unsigned);
1237 }
1238 #undef FUNC_NAME
1239
1240 SCM_DEFINE (scm_bytevector_s16_set_x, "bytevector-s16-set!",
1241 4, 0, 0,
1242 (SCM bv, SCM index, SCM value, SCM endianness),
1243 "Store @var{value} in @var{bv} at @var{index} according to "
1244 "@var{endianness}.")
1245 #define FUNC_NAME s_scm_bytevector_s16_set_x
1246 {
1247 INTEGER_SET (16, signed);
1248 }
1249 #undef FUNC_NAME
1250
1251 SCM_DEFINE (scm_bytevector_u16_native_set_x, "bytevector-u16-native-set!",
1252 3, 0, 0,
1253 (SCM bv, SCM index, SCM value),
1254 "Store the unsigned integer @var{value} at index @var{index} "
1255 "of @var{bv} using the native endianness.")
1256 #define FUNC_NAME s_scm_bytevector_u16_native_set_x
1257 {
1258 INTEGER_NATIVE_SET (16, unsigned);
1259 }
1260 #undef FUNC_NAME
1261
1262 SCM_DEFINE (scm_bytevector_s16_native_set_x, "bytevector-s16-native-set!",
1263 3, 0, 0,
1264 (SCM bv, SCM index, SCM value),
1265 "Store the signed integer @var{value} at index @var{index} "
1266 "of @var{bv} using the native endianness.")
1267 #define FUNC_NAME s_scm_bytevector_s16_native_set_x
1268 {
1269 INTEGER_NATIVE_SET (16, signed);
1270 }
1271 #undef FUNC_NAME
1272
1273
1274 \f
1275 /* Operations on 32-bit integers. */
1276
1277 /* Unfortunately, on 32-bit machines `SCM' is not large enough to hold
1278 arbitrary 32-bit integers. Thus we fall back to using the
1279 `large_{ref,set}' variants on 32-bit machines. */
1280
1281 #define LARGE_INTEGER_REF(_len, _sign) \
1282 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1283 SCM_VALIDATE_SYMBOL (3, endianness); \
1284 \
1285 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1286 SIGNEDNESS (_sign), endianness));
1287
1288 #define LARGE_INTEGER_SET(_len, _sign) \
1289 int err; \
1290 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1291 SCM_VALIDATE_SYMBOL (4, endianness); \
1292 \
1293 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1294 SIGNEDNESS (_sign), value, endianness); \
1295 if (SCM_UNLIKELY (err)) \
1296 scm_out_of_range (FUNC_NAME, value); \
1297 \
1298 return SCM_UNSPECIFIED;
1299
1300 #define LARGE_INTEGER_NATIVE_REF(_len, _sign) \
1301 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1302 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1303 SIGNEDNESS (_sign), scm_i_native_endianness));
1304
1305 #define LARGE_INTEGER_NATIVE_SET(_len, _sign) \
1306 int err; \
1307 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1308 \
1309 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1310 SIGNEDNESS (_sign), value, \
1311 scm_i_native_endianness); \
1312 if (SCM_UNLIKELY (err)) \
1313 scm_out_of_range (FUNC_NAME, value); \
1314 \
1315 return SCM_UNSPECIFIED;
1316
1317
1318 SCM_DEFINE (scm_bytevector_u32_ref, "bytevector-u32-ref",
1319 3, 0, 0,
1320 (SCM bv, SCM index, SCM endianness),
1321 "Return the unsigned 32-bit integer from @var{bv} at "
1322 "@var{index}.")
1323 #define FUNC_NAME s_scm_bytevector_u32_ref
1324 {
1325 #if SIZEOF_VOID_P > 4
1326 INTEGER_REF (32, unsigned);
1327 #else
1328 LARGE_INTEGER_REF (32, unsigned);
1329 #endif
1330 }
1331 #undef FUNC_NAME
1332
1333 SCM_DEFINE (scm_bytevector_s32_ref, "bytevector-s32-ref",
1334 3, 0, 0,
1335 (SCM bv, SCM index, SCM endianness),
1336 "Return the signed 32-bit integer from @var{bv} at "
1337 "@var{index}.")
1338 #define FUNC_NAME s_scm_bytevector_s32_ref
1339 {
1340 #if SIZEOF_VOID_P > 4
1341 INTEGER_REF (32, signed);
1342 #else
1343 LARGE_INTEGER_REF (32, signed);
1344 #endif
1345 }
1346 #undef FUNC_NAME
1347
1348 SCM_DEFINE (scm_bytevector_u32_native_ref, "bytevector-u32-native-ref",
1349 2, 0, 0,
1350 (SCM bv, SCM index),
1351 "Return the unsigned 32-bit integer from @var{bv} at "
1352 "@var{index} using the native endianness.")
1353 #define FUNC_NAME s_scm_bytevector_u32_native_ref
1354 {
1355 #if SIZEOF_VOID_P > 4
1356 INTEGER_NATIVE_REF (32, unsigned);
1357 #else
1358 LARGE_INTEGER_NATIVE_REF (32, unsigned);
1359 #endif
1360 }
1361 #undef FUNC_NAME
1362
1363 SCM_DEFINE (scm_bytevector_s32_native_ref, "bytevector-s32-native-ref",
1364 2, 0, 0,
1365 (SCM bv, SCM index),
1366 "Return the unsigned 32-bit integer from @var{bv} at "
1367 "@var{index} using the native endianness.")
1368 #define FUNC_NAME s_scm_bytevector_s32_native_ref
1369 {
1370 #if SIZEOF_VOID_P > 4
1371 INTEGER_NATIVE_REF (32, signed);
1372 #else
1373 LARGE_INTEGER_NATIVE_REF (32, signed);
1374 #endif
1375 }
1376 #undef FUNC_NAME
1377
1378 SCM_DEFINE (scm_bytevector_u32_set_x, "bytevector-u32-set!",
1379 4, 0, 0,
1380 (SCM bv, SCM index, SCM value, SCM endianness),
1381 "Store @var{value} in @var{bv} at @var{index} according to "
1382 "@var{endianness}.")
1383 #define FUNC_NAME s_scm_bytevector_u32_set_x
1384 {
1385 #if SIZEOF_VOID_P > 4
1386 INTEGER_SET (32, unsigned);
1387 #else
1388 LARGE_INTEGER_SET (32, unsigned);
1389 #endif
1390 }
1391 #undef FUNC_NAME
1392
1393 SCM_DEFINE (scm_bytevector_s32_set_x, "bytevector-s32-set!",
1394 4, 0, 0,
1395 (SCM bv, SCM index, SCM value, SCM endianness),
1396 "Store @var{value} in @var{bv} at @var{index} according to "
1397 "@var{endianness}.")
1398 #define FUNC_NAME s_scm_bytevector_s32_set_x
1399 {
1400 #if SIZEOF_VOID_P > 4
1401 INTEGER_SET (32, signed);
1402 #else
1403 LARGE_INTEGER_SET (32, signed);
1404 #endif
1405 }
1406 #undef FUNC_NAME
1407
1408 SCM_DEFINE (scm_bytevector_u32_native_set_x, "bytevector-u32-native-set!",
1409 3, 0, 0,
1410 (SCM bv, SCM index, SCM value),
1411 "Store the unsigned integer @var{value} at index @var{index} "
1412 "of @var{bv} using the native endianness.")
1413 #define FUNC_NAME s_scm_bytevector_u32_native_set_x
1414 {
1415 #if SIZEOF_VOID_P > 4
1416 INTEGER_NATIVE_SET (32, unsigned);
1417 #else
1418 LARGE_INTEGER_NATIVE_SET (32, unsigned);
1419 #endif
1420 }
1421 #undef FUNC_NAME
1422
1423 SCM_DEFINE (scm_bytevector_s32_native_set_x, "bytevector-s32-native-set!",
1424 3, 0, 0,
1425 (SCM bv, SCM index, SCM value),
1426 "Store the signed integer @var{value} at index @var{index} "
1427 "of @var{bv} using the native endianness.")
1428 #define FUNC_NAME s_scm_bytevector_s32_native_set_x
1429 {
1430 #if SIZEOF_VOID_P > 4
1431 INTEGER_NATIVE_SET (32, signed);
1432 #else
1433 LARGE_INTEGER_NATIVE_SET (32, signed);
1434 #endif
1435 }
1436 #undef FUNC_NAME
1437
1438
1439 \f
1440 /* Operations on 64-bit integers. */
1441
1442 /* For 64-bit integers, we use only the `large_{ref,set}' variant. */
1443
1444 SCM_DEFINE (scm_bytevector_u64_ref, "bytevector-u64-ref",
1445 3, 0, 0,
1446 (SCM bv, SCM index, SCM endianness),
1447 "Return the unsigned 64-bit integer from @var{bv} at "
1448 "@var{index}.")
1449 #define FUNC_NAME s_scm_bytevector_u64_ref
1450 {
1451 LARGE_INTEGER_REF (64, unsigned);
1452 }
1453 #undef FUNC_NAME
1454
1455 SCM_DEFINE (scm_bytevector_s64_ref, "bytevector-s64-ref",
1456 3, 0, 0,
1457 (SCM bv, SCM index, SCM endianness),
1458 "Return the signed 64-bit integer from @var{bv} at "
1459 "@var{index}.")
1460 #define FUNC_NAME s_scm_bytevector_s64_ref
1461 {
1462 LARGE_INTEGER_REF (64, signed);
1463 }
1464 #undef FUNC_NAME
1465
1466 SCM_DEFINE (scm_bytevector_u64_native_ref, "bytevector-u64-native-ref",
1467 2, 0, 0,
1468 (SCM bv, SCM index),
1469 "Return the unsigned 64-bit integer from @var{bv} at "
1470 "@var{index} using the native endianness.")
1471 #define FUNC_NAME s_scm_bytevector_u64_native_ref
1472 {
1473 LARGE_INTEGER_NATIVE_REF (64, unsigned);
1474 }
1475 #undef FUNC_NAME
1476
1477 SCM_DEFINE (scm_bytevector_s64_native_ref, "bytevector-s64-native-ref",
1478 2, 0, 0,
1479 (SCM bv, SCM index),
1480 "Return the unsigned 64-bit integer from @var{bv} at "
1481 "@var{index} using the native endianness.")
1482 #define FUNC_NAME s_scm_bytevector_s64_native_ref
1483 {
1484 LARGE_INTEGER_NATIVE_REF (64, signed);
1485 }
1486 #undef FUNC_NAME
1487
1488 SCM_DEFINE (scm_bytevector_u64_set_x, "bytevector-u64-set!",
1489 4, 0, 0,
1490 (SCM bv, SCM index, SCM value, SCM endianness),
1491 "Store @var{value} in @var{bv} at @var{index} according to "
1492 "@var{endianness}.")
1493 #define FUNC_NAME s_scm_bytevector_u64_set_x
1494 {
1495 LARGE_INTEGER_SET (64, unsigned);
1496 }
1497 #undef FUNC_NAME
1498
1499 SCM_DEFINE (scm_bytevector_s64_set_x, "bytevector-s64-set!",
1500 4, 0, 0,
1501 (SCM bv, SCM index, SCM value, SCM endianness),
1502 "Store @var{value} in @var{bv} at @var{index} according to "
1503 "@var{endianness}.")
1504 #define FUNC_NAME s_scm_bytevector_s64_set_x
1505 {
1506 LARGE_INTEGER_SET (64, signed);
1507 }
1508 #undef FUNC_NAME
1509
1510 SCM_DEFINE (scm_bytevector_u64_native_set_x, "bytevector-u64-native-set!",
1511 3, 0, 0,
1512 (SCM bv, SCM index, SCM value),
1513 "Store the unsigned integer @var{value} at index @var{index} "
1514 "of @var{bv} using the native endianness.")
1515 #define FUNC_NAME s_scm_bytevector_u64_native_set_x
1516 {
1517 LARGE_INTEGER_NATIVE_SET (64, unsigned);
1518 }
1519 #undef FUNC_NAME
1520
1521 SCM_DEFINE (scm_bytevector_s64_native_set_x, "bytevector-s64-native-set!",
1522 3, 0, 0,
1523 (SCM bv, SCM index, SCM value),
1524 "Store the signed integer @var{value} at index @var{index} "
1525 "of @var{bv} using the native endianness.")
1526 #define FUNC_NAME s_scm_bytevector_s64_native_set_x
1527 {
1528 LARGE_INTEGER_NATIVE_SET (64, signed);
1529 }
1530 #undef FUNC_NAME
1531
1532
1533 \f
1534 /* Operations on IEEE-754 numbers. */
1535
1536 /* There are two possible word endians, visible in glibc's <ieee754.h>.
1537 However, in R6RS, when the endianness is `little', little endian is
1538 assumed for both the byte order and the word order. This is clear from
1539 Section 2.1 of R6RS-lib (in response to
1540 http://www.r6rs.org/formal-comments/comment-187.txt). */
1541
1542
1543 /* Convert to/from a floating-point number with different endianness. This
1544 method is probably not the most efficient but it should be portable. */
1545
1546 static inline void
1547 float_to_foreign_endianness (union scm_ieee754_float *target,
1548 float source)
1549 {
1550 union scm_ieee754_float src;
1551
1552 src.f = source;
1553
1554 #ifdef WORDS_BIGENDIAN
1555 /* Assuming little endian for both byte and word order. */
1556 target->little_endian.negative = src.big_endian.negative;
1557 target->little_endian.exponent = src.big_endian.exponent;
1558 target->little_endian.mantissa = src.big_endian.mantissa;
1559 #else
1560 target->big_endian.negative = src.little_endian.negative;
1561 target->big_endian.exponent = src.little_endian.exponent;
1562 target->big_endian.mantissa = src.little_endian.mantissa;
1563 #endif
1564 }
1565
1566 static inline float
1567 float_from_foreign_endianness (const union scm_ieee754_float *source)
1568 {
1569 union scm_ieee754_float result;
1570
1571 #ifdef WORDS_BIGENDIAN
1572 /* Assuming little endian for both byte and word order. */
1573 result.big_endian.negative = source->little_endian.negative;
1574 result.big_endian.exponent = source->little_endian.exponent;
1575 result.big_endian.mantissa = source->little_endian.mantissa;
1576 #else
1577 result.little_endian.negative = source->big_endian.negative;
1578 result.little_endian.exponent = source->big_endian.exponent;
1579 result.little_endian.mantissa = source->big_endian.mantissa;
1580 #endif
1581
1582 return (result.f);
1583 }
1584
1585 static inline void
1586 double_to_foreign_endianness (union scm_ieee754_double *target,
1587 double source)
1588 {
1589 union scm_ieee754_double src;
1590
1591 src.d = source;
1592
1593 #ifdef WORDS_BIGENDIAN
1594 /* Assuming little endian for both byte and word order. */
1595 target->little_little_endian.negative = src.big_endian.negative;
1596 target->little_little_endian.exponent = src.big_endian.exponent;
1597 target->little_little_endian.mantissa0 = src.big_endian.mantissa0;
1598 target->little_little_endian.mantissa1 = src.big_endian.mantissa1;
1599 #else
1600 target->big_endian.negative = src.little_little_endian.negative;
1601 target->big_endian.exponent = src.little_little_endian.exponent;
1602 target->big_endian.mantissa0 = src.little_little_endian.mantissa0;
1603 target->big_endian.mantissa1 = src.little_little_endian.mantissa1;
1604 #endif
1605 }
1606
1607 static inline double
1608 double_from_foreign_endianness (const union scm_ieee754_double *source)
1609 {
1610 union scm_ieee754_double result;
1611
1612 #ifdef WORDS_BIGENDIAN
1613 /* Assuming little endian for both byte and word order. */
1614 result.big_endian.negative = source->little_little_endian.negative;
1615 result.big_endian.exponent = source->little_little_endian.exponent;
1616 result.big_endian.mantissa0 = source->little_little_endian.mantissa0;
1617 result.big_endian.mantissa1 = source->little_little_endian.mantissa1;
1618 #else
1619 result.little_little_endian.negative = source->big_endian.negative;
1620 result.little_little_endian.exponent = source->big_endian.exponent;
1621 result.little_little_endian.mantissa0 = source->big_endian.mantissa0;
1622 result.little_little_endian.mantissa1 = source->big_endian.mantissa1;
1623 #endif
1624
1625 return (result.d);
1626 }
1627
1628 /* Template macros to abstract over doubles and floats.
1629 XXX: Guile can only convert to/from doubles. */
1630 #define IEEE754_UNION(_c_type) union scm_ieee754_ ## _c_type
1631 #define IEEE754_TO_SCM(_c_type) scm_from_double
1632 #define IEEE754_FROM_SCM(_c_type) scm_to_double
1633 #define IEEE754_FROM_FOREIGN_ENDIANNESS(_c_type) \
1634 _c_type ## _from_foreign_endianness
1635 #define IEEE754_TO_FOREIGN_ENDIANNESS(_c_type) \
1636 _c_type ## _to_foreign_endianness
1637
1638
1639 /* Templace getters and setters. */
1640
1641 #define IEEE754_ACCESSOR_PROLOGUE(_type) \
1642 INTEGER_ACCESSOR_PROLOGUE (sizeof (_type) << 3UL, signed);
1643
1644 #define IEEE754_REF(_type) \
1645 _type c_result; \
1646 \
1647 IEEE754_ACCESSOR_PROLOGUE (_type); \
1648 SCM_VALIDATE_SYMBOL (3, endianness); \
1649 \
1650 if (scm_is_eq (endianness, scm_i_native_endianness)) \
1651 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1652 else \
1653 { \
1654 IEEE754_UNION (_type) c_raw; \
1655 \
1656 memcpy (&c_raw, &c_bv[c_index], sizeof (c_raw)); \
1657 c_result = \
1658 IEEE754_FROM_FOREIGN_ENDIANNESS (_type) (&c_raw); \
1659 } \
1660 \
1661 return (IEEE754_TO_SCM (_type) (c_result));
1662
1663 #define IEEE754_NATIVE_REF(_type) \
1664 _type c_result; \
1665 \
1666 IEEE754_ACCESSOR_PROLOGUE (_type); \
1667 \
1668 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1669 return (IEEE754_TO_SCM (_type) (c_result));
1670
1671 #define IEEE754_SET(_type) \
1672 _type c_value; \
1673 \
1674 IEEE754_ACCESSOR_PROLOGUE (_type); \
1675 SCM_VALIDATE_REAL (3, value); \
1676 SCM_VALIDATE_SYMBOL (4, endianness); \
1677 c_value = IEEE754_FROM_SCM (_type) (value); \
1678 \
1679 if (scm_is_eq (endianness, scm_i_native_endianness)) \
1680 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1681 else \
1682 { \
1683 IEEE754_UNION (_type) c_raw; \
1684 \
1685 IEEE754_TO_FOREIGN_ENDIANNESS (_type) (&c_raw, c_value); \
1686 memcpy (&c_bv[c_index], &c_raw, sizeof (c_raw)); \
1687 } \
1688 \
1689 return SCM_UNSPECIFIED;
1690
1691 #define IEEE754_NATIVE_SET(_type) \
1692 _type c_value; \
1693 \
1694 IEEE754_ACCESSOR_PROLOGUE (_type); \
1695 SCM_VALIDATE_REAL (3, value); \
1696 c_value = IEEE754_FROM_SCM (_type) (value); \
1697 \
1698 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1699 return SCM_UNSPECIFIED;
1700
1701
1702 /* Single precision. */
1703
1704 SCM_DEFINE (scm_bytevector_ieee_single_ref,
1705 "bytevector-ieee-single-ref",
1706 3, 0, 0,
1707 (SCM bv, SCM index, SCM endianness),
1708 "Return the IEEE-754 single from @var{bv} at "
1709 "@var{index}.")
1710 #define FUNC_NAME s_scm_bytevector_ieee_single_ref
1711 {
1712 IEEE754_REF (float);
1713 }
1714 #undef FUNC_NAME
1715
1716 SCM_DEFINE (scm_bytevector_ieee_single_native_ref,
1717 "bytevector-ieee-single-native-ref",
1718 2, 0, 0,
1719 (SCM bv, SCM index),
1720 "Return the IEEE-754 single from @var{bv} at "
1721 "@var{index} using the native endianness.")
1722 #define FUNC_NAME s_scm_bytevector_ieee_single_native_ref
1723 {
1724 IEEE754_NATIVE_REF (float);
1725 }
1726 #undef FUNC_NAME
1727
1728 SCM_DEFINE (scm_bytevector_ieee_single_set_x,
1729 "bytevector-ieee-single-set!",
1730 4, 0, 0,
1731 (SCM bv, SCM index, SCM value, SCM endianness),
1732 "Store real @var{value} in @var{bv} at @var{index} according to "
1733 "@var{endianness}.")
1734 #define FUNC_NAME s_scm_bytevector_ieee_single_set_x
1735 {
1736 IEEE754_SET (float);
1737 }
1738 #undef FUNC_NAME
1739
1740 SCM_DEFINE (scm_bytevector_ieee_single_native_set_x,
1741 "bytevector-ieee-single-native-set!",
1742 3, 0, 0,
1743 (SCM bv, SCM index, SCM value),
1744 "Store the real @var{value} at index @var{index} "
1745 "of @var{bv} using the native endianness.")
1746 #define FUNC_NAME s_scm_bytevector_ieee_single_native_set_x
1747 {
1748 IEEE754_NATIVE_SET (float);
1749 }
1750 #undef FUNC_NAME
1751
1752
1753 /* Double precision. */
1754
1755 SCM_DEFINE (scm_bytevector_ieee_double_ref,
1756 "bytevector-ieee-double-ref",
1757 3, 0, 0,
1758 (SCM bv, SCM index, SCM endianness),
1759 "Return the IEEE-754 double from @var{bv} at "
1760 "@var{index}.")
1761 #define FUNC_NAME s_scm_bytevector_ieee_double_ref
1762 {
1763 IEEE754_REF (double);
1764 }
1765 #undef FUNC_NAME
1766
1767 SCM_DEFINE (scm_bytevector_ieee_double_native_ref,
1768 "bytevector-ieee-double-native-ref",
1769 2, 0, 0,
1770 (SCM bv, SCM index),
1771 "Return the IEEE-754 double from @var{bv} at "
1772 "@var{index} using the native endianness.")
1773 #define FUNC_NAME s_scm_bytevector_ieee_double_native_ref
1774 {
1775 IEEE754_NATIVE_REF (double);
1776 }
1777 #undef FUNC_NAME
1778
1779 SCM_DEFINE (scm_bytevector_ieee_double_set_x,
1780 "bytevector-ieee-double-set!",
1781 4, 0, 0,
1782 (SCM bv, SCM index, SCM value, SCM endianness),
1783 "Store real @var{value} in @var{bv} at @var{index} according to "
1784 "@var{endianness}.")
1785 #define FUNC_NAME s_scm_bytevector_ieee_double_set_x
1786 {
1787 IEEE754_SET (double);
1788 }
1789 #undef FUNC_NAME
1790
1791 SCM_DEFINE (scm_bytevector_ieee_double_native_set_x,
1792 "bytevector-ieee-double-native-set!",
1793 3, 0, 0,
1794 (SCM bv, SCM index, SCM value),
1795 "Store the real @var{value} at index @var{index} "
1796 "of @var{bv} using the native endianness.")
1797 #define FUNC_NAME s_scm_bytevector_ieee_double_native_set_x
1798 {
1799 IEEE754_NATIVE_SET (double);
1800 }
1801 #undef FUNC_NAME
1802
1803
1804 #undef IEEE754_UNION
1805 #undef IEEE754_TO_SCM
1806 #undef IEEE754_FROM_SCM
1807 #undef IEEE754_FROM_FOREIGN_ENDIANNESS
1808 #undef IEEE754_TO_FOREIGN_ENDIANNESS
1809 #undef IEEE754_REF
1810 #undef IEEE754_NATIVE_REF
1811 #undef IEEE754_SET
1812 #undef IEEE754_NATIVE_SET
1813
1814 \f
1815 /* Operations on strings. */
1816
1817
1818 /* Produce a function that returns the length of a UTF-encoded string. */
1819 #define UTF_STRLEN_FUNCTION(_utf_width) \
1820 static inline size_t \
1821 utf ## _utf_width ## _strlen (const uint ## _utf_width ## _t *str) \
1822 { \
1823 size_t len = 0; \
1824 const uint ## _utf_width ## _t *ptr; \
1825 for (ptr = str; \
1826 *ptr != 0; \
1827 ptr++) \
1828 { \
1829 len++; \
1830 } \
1831 \
1832 return (len * ((_utf_width) / 8)); \
1833 }
1834
1835 UTF_STRLEN_FUNCTION (8)
1836
1837
1838 /* Return the length (in bytes) of STR, a UTF-(UTF_WIDTH) encoded string. */
1839 #define UTF_STRLEN(_utf_width, _str) \
1840 utf ## _utf_width ## _strlen (_str)
1841
1842 /* Return the "portable" name of the UTF encoding of size UTF_WIDTH and
1843 ENDIANNESS (Gnulib's `iconv_open' module guarantees the portability of the
1844 encoding name). */
1845 static inline void
1846 utf_encoding_name (char *name, size_t utf_width, SCM endianness)
1847 {
1848 strcpy (name, "UTF-");
1849 strcat (name, ((utf_width == 8)
1850 ? "8"
1851 : ((utf_width == 16)
1852 ? "16"
1853 : ((utf_width == 32)
1854 ? "32"
1855 : "??"))));
1856 strcat (name,
1857 ((scm_is_eq (endianness, scm_sym_big))
1858 ? "BE"
1859 : ((scm_is_eq (endianness, scm_sym_little))
1860 ? "LE"
1861 : "unknown")));
1862 }
1863
1864 /* Maximum length of a UTF encoding name. */
1865 #define MAX_UTF_ENCODING_NAME_LEN 16
1866
1867 /* Produce the body of a `string->utf' function. */
1868 #define STRING_TO_UTF(_utf_width) \
1869 SCM utf; \
1870 int err; \
1871 char *c_str; \
1872 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1873 char *c_utf = NULL, *c_locale; \
1874 size_t c_strlen, c_raw_strlen, c_utf_len = 0; \
1875 \
1876 SCM_VALIDATE_STRING (1, str); \
1877 if (endianness == SCM_UNDEFINED) \
1878 endianness = scm_sym_big; \
1879 else \
1880 SCM_VALIDATE_SYMBOL (2, endianness); \
1881 \
1882 c_strlen = scm_c_string_length (str); \
1883 c_raw_strlen = c_strlen * ((_utf_width) / 8); \
1884 do \
1885 { \
1886 c_str = (char *) alloca (c_raw_strlen + 1); \
1887 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen); \
1888 } \
1889 while (c_raw_strlen > c_strlen); \
1890 c_str[c_raw_strlen] = '\0'; \
1891 \
1892 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1893 \
1894 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
1895 strcpy (c_locale, locale_charset ()); \
1896 \
1897 err = mem_iconveh (c_str, c_raw_strlen, \
1898 c_locale, c_utf_name, \
1899 iconveh_question_mark, NULL, \
1900 &c_utf, &c_utf_len); \
1901 if (SCM_UNLIKELY (err)) \
1902 scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \
1903 scm_list_1 (str), err); \
1904 else \
1905 /* C_UTF is null-terminated. */ \
1906 utf = scm_c_take_bytevector ((signed char *) c_utf, c_utf_len); \
1907 \
1908 return (utf);
1909
1910
1911
1912 SCM_DEFINE (scm_string_to_utf8, "string->utf8",
1913 1, 0, 0,
1914 (SCM str),
1915 "Return a newly allocated bytevector that contains the UTF-8 "
1916 "encoding of @var{str}.")
1917 #define FUNC_NAME s_scm_string_to_utf8
1918 {
1919 SCM utf;
1920 char *c_str;
1921 uint8_t *c_utf;
1922 size_t c_strlen, c_raw_strlen;
1923
1924 SCM_VALIDATE_STRING (1, str);
1925
1926 c_strlen = scm_c_string_length (str);
1927 c_raw_strlen = c_strlen;
1928 do
1929 {
1930 c_str = (char *) alloca (c_raw_strlen + 1);
1931 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen);
1932 }
1933 while (c_raw_strlen > c_strlen);
1934 c_str[c_raw_strlen] = '\0';
1935
1936 c_utf = u8_strconv_from_locale (c_str);
1937 if (SCM_UNLIKELY (c_utf == NULL))
1938 scm_syserror (FUNC_NAME);
1939 else
1940 /* C_UTF is null-terminated. */
1941 utf = scm_c_take_bytevector ((signed char *) c_utf,
1942 UTF_STRLEN (8, c_utf));
1943
1944 return (utf);
1945 }
1946 #undef FUNC_NAME
1947
1948 SCM_DEFINE (scm_string_to_utf16, "string->utf16",
1949 1, 1, 0,
1950 (SCM str, SCM endianness),
1951 "Return a newly allocated bytevector that contains the UTF-16 "
1952 "encoding of @var{str}.")
1953 #define FUNC_NAME s_scm_string_to_utf16
1954 {
1955 STRING_TO_UTF (16);
1956 }
1957 #undef FUNC_NAME
1958
1959 SCM_DEFINE (scm_string_to_utf32, "string->utf32",
1960 1, 1, 0,
1961 (SCM str, SCM endianness),
1962 "Return a newly allocated bytevector that contains the UTF-32 "
1963 "encoding of @var{str}.")
1964 #define FUNC_NAME s_scm_string_to_utf32
1965 {
1966 STRING_TO_UTF (32);
1967 }
1968 #undef FUNC_NAME
1969
1970
1971 /* Produce the body of a function that converts a UTF-encoded bytevector to a
1972 string. */
1973 #define UTF_TO_STRING(_utf_width) \
1974 SCM str = SCM_BOOL_F; \
1975 int err; \
1976 char *c_str = NULL, *c_locale; \
1977 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1978 const char *c_utf; \
1979 size_t c_strlen = 0, c_utf_len; \
1980 \
1981 SCM_VALIDATE_BYTEVECTOR (1, utf); \
1982 if (endianness == SCM_UNDEFINED) \
1983 endianness = scm_sym_big; \
1984 else \
1985 SCM_VALIDATE_SYMBOL (2, endianness); \
1986 \
1987 c_utf_len = SCM_BYTEVECTOR_LENGTH (utf); \
1988 c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf); \
1989 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1990 \
1991 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
1992 strcpy (c_locale, locale_charset ()); \
1993 \
1994 err = mem_iconveh (c_utf, c_utf_len, \
1995 c_utf_name, c_locale, \
1996 iconveh_question_mark, NULL, \
1997 &c_str, &c_strlen); \
1998 if (SCM_UNLIKELY (err)) \
1999 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A", \
2000 scm_list_1 (utf), err); \
2001 else \
2002 /* C_STR is null-terminated. */ \
2003 str = scm_take_locale_stringn (c_str, c_strlen); \
2004 \
2005 return (str);
2006
2007
2008 SCM_DEFINE (scm_utf8_to_string, "utf8->string",
2009 1, 0, 0,
2010 (SCM utf),
2011 "Return a newly allocate string that contains from the UTF-8-"
2012 "encoded contents of bytevector @var{utf}.")
2013 #define FUNC_NAME s_scm_utf8_to_string
2014 {
2015 SCM str;
2016 int err;
2017 char *c_str = NULL, *c_locale;
2018 const char *c_utf;
2019 size_t c_utf_len, c_strlen = 0;
2020
2021 SCM_VALIDATE_BYTEVECTOR (1, utf);
2022
2023 c_utf_len = SCM_BYTEVECTOR_LENGTH (utf);
2024
2025 c_locale = (char *) alloca (strlen (locale_charset ()) + 1);
2026 strcpy (c_locale, locale_charset ());
2027
2028 c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);
2029 err = mem_iconveh (c_utf, c_utf_len,
2030 "UTF-8", c_locale,
2031 iconveh_question_mark, NULL,
2032 &c_str, &c_strlen);
2033 if (SCM_UNLIKELY (err))
2034 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A",
2035 scm_list_1 (utf), err);
2036 else
2037 /* C_STR is null-terminated. */
2038 str = scm_take_locale_stringn (c_str, c_strlen);
2039
2040 return (str);
2041 }
2042 #undef FUNC_NAME
2043
2044 SCM_DEFINE (scm_utf16_to_string, "utf16->string",
2045 1, 1, 0,
2046 (SCM utf, SCM endianness),
2047 "Return a newly allocate string that contains from the UTF-16-"
2048 "encoded contents of bytevector @var{utf}.")
2049 #define FUNC_NAME s_scm_utf16_to_string
2050 {
2051 UTF_TO_STRING (16);
2052 }
2053 #undef FUNC_NAME
2054
2055 SCM_DEFINE (scm_utf32_to_string, "utf32->string",
2056 1, 1, 0,
2057 (SCM utf, SCM endianness),
2058 "Return a newly allocate string that contains from the UTF-32-"
2059 "encoded contents of bytevector @var{utf}.")
2060 #define FUNC_NAME s_scm_utf32_to_string
2061 {
2062 UTF_TO_STRING (32);
2063 }
2064 #undef FUNC_NAME
2065
2066
2067 \f
2068 /* Bytevectors as generalized vectors & arrays. */
2069
2070 static SCM
2071 bv_handle_ref (scm_t_array_handle *h, size_t index)
2072 {
2073 return SCM_I_MAKINUM (scm_c_bytevector_ref (h->array, index));
2074 }
2075
2076 static void
2077 bv_handle_set_x (scm_t_array_handle *h, size_t index, SCM val)
2078 {
2079 scm_c_bytevector_set_x (h->array, index, scm_to_uint8 (val));
2080 }
2081
2082 static void
2083 bytevector_get_handle (SCM v, scm_t_array_handle *h)
2084 {
2085 h->array = v;
2086 h->ndims = 1;
2087 h->dims = &h->dim0;
2088 h->dim0.lbnd = 0;
2089 h->dim0.ubnd = SCM_BYTEVECTOR_LENGTH (v) - 1;
2090 h->dim0.inc = 1;
2091 h->element_type = SCM_ARRAY_ELEMENT_TYPE_VU8;
2092 h->elements = h->writable_elements = SCM_BYTEVECTOR_CONTENTS (v);
2093 }
2094
2095 \f
2096 /* Initialization. */
2097
2098 void
2099 scm_bootstrap_bytevectors (void)
2100 {
2101 /* The SMOB type must be instantiated here because the
2102 generalized-vector API may want to access bytevectors even though
2103 `(rnrs bytevector)' hasn't been loaded. */
2104 scm_tc16_bytevector = scm_make_smob_type ("bytevector", 0);
2105 scm_set_smob_free (scm_tc16_bytevector, free_bytevector);
2106 scm_set_smob_print (scm_tc16_bytevector, print_bytevector);
2107 scm_set_smob_equalp (scm_tc16_bytevector, bytevector_equal_p);
2108
2109 scm_null_bytevector =
2110 scm_gc_protect_object (make_bytevector_from_buffer (0, NULL));
2111
2112 #ifdef WORDS_BIGENDIAN
2113 scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("big"));
2114 #else
2115 scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("little"));
2116 #endif
2117
2118 scm_c_register_extension ("libguile", "scm_init_bytevectors",
2119 (scm_t_extension_init_func) scm_init_bytevectors,
2120 NULL);
2121
2122 {
2123 scm_t_array_implementation impl;
2124
2125 impl.tag = scm_tc16_bytevector;
2126 impl.mask = 0xffff;
2127 impl.vref = bv_handle_ref;
2128 impl.vset = bv_handle_set_x;
2129 impl.get_handle = bytevector_get_handle;
2130 scm_i_register_array_implementation (&impl);
2131 scm_i_register_vector_constructor
2132 (scm_i_array_element_types[SCM_ARRAY_ELEMENT_TYPE_VU8],
2133 scm_make_bytevector);
2134 }
2135 }
2136
2137 void
2138 scm_init_bytevectors (void)
2139 {
2140 #include "libguile/bytevectors.x"
2141
2142 scm_endianness_big = scm_sym_big;
2143 scm_endianness_little = scm_sym_little;
2144 }