Use a TC7 tag instead of a SMOB for bytevectors.
[bpt/guile.git] / libguile / bytevectors.c
1 /* Copyright (C) 2009 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <alloca.h>
25
26 #include <gmp.h>
27
28 #include "libguile/_scm.h"
29 #include "libguile/extensions.h"
30 #include "libguile/bytevectors.h"
31 #include "libguile/strings.h"
32 #include "libguile/validate.h"
33 #include "libguile/ieee-754.h"
34 #include "libguile/arrays.h"
35 #include "libguile/array-handle.h"
36 #include "libguile/uniform.h"
37 #include "libguile/srfi-4.h"
38
39 #include <byteswap.h>
40 #include <striconveh.h>
41 #include <uniconv.h>
42
43 #ifdef HAVE_LIMITS_H
44 # include <limits.h>
45 #else
46 /* Assuming 32-bit longs. */
47 # define ULONG_MAX 4294967295UL
48 #endif
49
50 #include <string.h>
51
52
53 \f
54 /* Utilities. */
55
56 /* Convenience macros. These are used by the various templates (macros) that
57 are parameterized by integer signedness. */
58 #define INT8_T_signed scm_t_int8
59 #define INT8_T_unsigned scm_t_uint8
60 #define INT16_T_signed scm_t_int16
61 #define INT16_T_unsigned scm_t_uint16
62 #define INT32_T_signed scm_t_int32
63 #define INT32_T_unsigned scm_t_uint32
64 #define is_signed_int8(_x) (((_x) >= -128L) && ((_x) <= 127L))
65 #define is_unsigned_int8(_x) ((_x) <= 255UL)
66 #define is_signed_int16(_x) (((_x) >= -32768L) && ((_x) <= 32767L))
67 #define is_unsigned_int16(_x) ((_x) <= 65535UL)
68 #define is_signed_int32(_x) (((_x) >= -2147483648L) && ((_x) <= 2147483647L))
69 #define is_unsigned_int32(_x) ((_x) <= 4294967295UL)
70 #define SIGNEDNESS_signed 1
71 #define SIGNEDNESS_unsigned 0
72
73 #define INT_TYPE(_size, _sign) INT ## _size ## _T_ ## _sign
74 #define INT_SWAP(_size) bswap_ ## _size
75 #define INT_VALID_P(_size, _sign) is_ ## _sign ## _int ## _size
76 #define SIGNEDNESS(_sign) SIGNEDNESS_ ## _sign
77
78
79 #define INTEGER_ACCESSOR_PROLOGUE(_len, _sign) \
80 size_t c_len, c_index; \
81 _sign char *c_bv; \
82 \
83 SCM_VALIDATE_BYTEVECTOR (1, bv); \
84 c_index = scm_to_uint (index); \
85 \
86 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
87 c_bv = (_sign char *) SCM_BYTEVECTOR_CONTENTS (bv); \
88 \
89 if (SCM_UNLIKELY (c_index + ((_len) >> 3UL) - 1 >= c_len)) \
90 scm_out_of_range (FUNC_NAME, index);
91
92 /* Template for fixed-size integer access (only 8, 16 or 32-bit). */
93 #define INTEGER_REF(_len, _sign) \
94 SCM result; \
95 \
96 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
97 SCM_VALIDATE_SYMBOL (3, endianness); \
98 \
99 { \
100 INT_TYPE (_len, _sign) c_result; \
101 \
102 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
103 if (!scm_is_eq (endianness, scm_i_native_endianness)) \
104 c_result = INT_SWAP (_len) (c_result); \
105 \
106 result = SCM_I_MAKINUM (c_result); \
107 } \
108 \
109 return result;
110
111 /* Template for fixed-size integer access using the native endianness. */
112 #define INTEGER_NATIVE_REF(_len, _sign) \
113 SCM result; \
114 \
115 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
116 \
117 { \
118 INT_TYPE (_len, _sign) c_result; \
119 \
120 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
121 result = SCM_I_MAKINUM (c_result); \
122 } \
123 \
124 return result;
125
126 /* Template for fixed-size integer modification (only 8, 16 or 32-bit). */
127 #define INTEGER_SET(_len, _sign) \
128 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
129 SCM_VALIDATE_SYMBOL (3, endianness); \
130 \
131 { \
132 _sign long c_value; \
133 INT_TYPE (_len, _sign) c_value_short; \
134 \
135 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
136 scm_wrong_type_arg (FUNC_NAME, 3, value); \
137 \
138 c_value = SCM_I_INUM (value); \
139 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
140 scm_out_of_range (FUNC_NAME, value); \
141 \
142 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
143 if (!scm_is_eq (endianness, scm_i_native_endianness)) \
144 c_value_short = INT_SWAP (_len) (c_value_short); \
145 \
146 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
147 } \
148 \
149 return SCM_UNSPECIFIED;
150
151 /* Template for fixed-size integer modification using the native
152 endianness. */
153 #define INTEGER_NATIVE_SET(_len, _sign) \
154 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
155 \
156 { \
157 _sign long c_value; \
158 INT_TYPE (_len, _sign) c_value_short; \
159 \
160 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
161 scm_wrong_type_arg (FUNC_NAME, 3, value); \
162 \
163 c_value = SCM_I_INUM (value); \
164 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
165 scm_out_of_range (FUNC_NAME, value); \
166 \
167 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
168 \
169 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
170 } \
171 \
172 return SCM_UNSPECIFIED;
173
174
175 \f
176 /* Bytevector type. */
177
178 /* The threshold (in octets) under which bytevectors are stored "in-line",
179 i.e., without allocating memory beside the double cell itself.
180 This optimization is necessary since small bytevectors are expected to be
181 common. */
182 #define SCM_BYTEVECTOR_INLINE_THRESHOLD (2 * sizeof (SCM))
183
184 #define SCM_BYTEVECTOR_INLINEABLE_SIZE_P(_size) \
185 ((_size) <= SCM_BYTEVECTOR_INLINE_THRESHOLD)
186 #define SCM_BYTEVECTOR_SET_LENGTH(_bv, _len) \
187 SCM_SET_CELL_WORD_1 ((_bv), (scm_t_bits) (_len))
188 #define SCM_BYTEVECTOR_SET_CONTENTS(_bv, _buf) \
189 SCM_SET_CELL_WORD_2 ((_bv), (scm_t_bits) (_buf))
190 #define SCM_BYTEVECTOR_SET_INLINE(bv) \
191 SCM_SET_BYTEVECTOR_FLAGS (bv, \
192 SCM_BYTEVECTOR_FLAGS (bv) \
193 | SCM_F_BYTEVECTOR_INLINE)
194
195 #define SCM_BYTEVECTOR_SET_ELEMENT_TYPE(bv, hint) \
196 SCM_SET_BYTEVECTOR_FLAGS (bv, \
197 (SCM_BYTEVECTOR_FLAGS (bv) & SCM_F_BYTEVECTOR_INLINE) \
198 | ((hint) << 1UL))
199 #define SCM_BYTEVECTOR_TYPE_SIZE(var) \
200 (scm_i_array_element_type_sizes[SCM_BYTEVECTOR_ELEMENT_TYPE (var)]/8)
201 #define SCM_BYTEVECTOR_TYPED_LENGTH(var) \
202 SCM_BYTEVECTOR_LENGTH (var) / SCM_BYTEVECTOR_TYPE_SIZE (var)
203
204 /* The empty bytevector. */
205 SCM scm_null_bytevector = SCM_UNSPECIFIED;
206
207
208 static inline SCM
209 make_bytevector_from_buffer (size_t len, void *contents,
210 scm_t_array_element_type element_type)
211 {
212 SCM ret;
213 size_t c_len;
214
215 if (SCM_UNLIKELY (element_type > SCM_ARRAY_ELEMENT_TYPE_LAST
216 || scm_i_array_element_type_sizes[element_type] < 8
217 || len >= (SCM_I_SIZE_MAX
218 / (scm_i_array_element_type_sizes[element_type]/8))))
219 /* This would be an internal Guile programming error */
220 abort ();
221
222 c_len = len * (scm_i_array_element_type_sizes[element_type] / 8);
223 if (!SCM_BYTEVECTOR_INLINEABLE_SIZE_P (c_len))
224 ret = scm_double_cell (scm_tc7_bytevector, (scm_t_bits) c_len,
225 (scm_t_bits) contents, 0);
226 else
227 {
228 ret = scm_double_cell (scm_tc7_bytevector, (scm_t_bits) c_len, 0, 0);
229 SCM_BYTEVECTOR_SET_INLINE (ret);
230 if (contents)
231 {
232 memcpy (SCM_BYTEVECTOR_CONTENTS (ret), contents, c_len);
233 scm_gc_free (contents, c_len, SCM_GC_BYTEVECTOR);
234 }
235 }
236 SCM_BYTEVECTOR_SET_ELEMENT_TYPE (ret, element_type);
237 return ret;
238 }
239
240 static inline SCM
241 make_bytevector (size_t len, scm_t_array_element_type element_type)
242 {
243 size_t c_len;
244
245 if (SCM_UNLIKELY (len == 0 && element_type == 0))
246 return scm_null_bytevector;
247 else if (SCM_UNLIKELY (element_type > SCM_ARRAY_ELEMENT_TYPE_LAST
248 || scm_i_array_element_type_sizes[element_type] < 8
249 || len >= (SCM_I_SIZE_MAX
250 / (scm_i_array_element_type_sizes[element_type]/8))))
251 /* This would be an internal Guile programming error */
252 abort ();
253
254 c_len = len * (scm_i_array_element_type_sizes[element_type]/8);
255 if (SCM_BYTEVECTOR_INLINEABLE_SIZE_P (c_len))
256 {
257 SCM ret;
258 ret = scm_double_cell (scm_tc7_bytevector, (scm_t_bits) c_len, 0, 0);
259 SCM_BYTEVECTOR_SET_INLINE (ret);
260 SCM_BYTEVECTOR_SET_ELEMENT_TYPE (ret, element_type);
261 return ret;
262 }
263 else
264 {
265 void *buf = scm_gc_malloc_pointerless (c_len, SCM_GC_BYTEVECTOR);
266 return make_bytevector_from_buffer (len, buf, element_type);
267 }
268 }
269
270 /* Return a new bytevector of size LEN octets. */
271 SCM
272 scm_c_make_bytevector (size_t len)
273 {
274 return make_bytevector (len, SCM_ARRAY_ELEMENT_TYPE_VU8);
275 }
276
277 /* Return a new bytevector of size LEN elements. */
278 SCM
279 scm_i_make_typed_bytevector (size_t len, scm_t_array_element_type element_type)
280 {
281 return make_bytevector (len, element_type);
282 }
283
284 /* Return a bytevector of size LEN made up of CONTENTS. The area pointed to
285 by CONTENTS must have been allocated using `scm_gc_malloc ()'. */
286 SCM
287 scm_c_take_bytevector (signed char *contents, size_t len)
288 {
289 return make_bytevector_from_buffer (len, contents, SCM_ARRAY_ELEMENT_TYPE_VU8);
290 }
291
292 SCM
293 scm_c_take_typed_bytevector (signed char *contents, size_t len,
294 scm_t_array_element_type element_type)
295 {
296 return make_bytevector_from_buffer (len, contents, element_type);
297 }
298
299 /* Shrink BV to C_NEW_LEN (which is assumed to be smaller than its current
300 size) and return BV. */
301 SCM
302 scm_i_shrink_bytevector (SCM bv, size_t c_new_len)
303 {
304 if (SCM_UNLIKELY (c_new_len % SCM_BYTEVECTOR_TYPE_SIZE (bv)))
305 /* This would be an internal Guile programming error */
306 abort ();
307
308 if (!SCM_BYTEVECTOR_INLINE_P (bv))
309 {
310 size_t c_len;
311 signed char *c_bv, *c_new_bv;
312
313 c_len = SCM_BYTEVECTOR_LENGTH (bv);
314 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
315
316 SCM_BYTEVECTOR_SET_LENGTH (bv, c_new_len);
317
318 if (SCM_BYTEVECTOR_INLINEABLE_SIZE_P (c_new_len))
319 {
320 /* Copy to the in-line buffer and free the current buffer. */
321 SCM_BYTEVECTOR_SET_INLINE (bv);
322 c_new_bv = SCM_BYTEVECTOR_CONTENTS (bv);
323 memcpy (c_new_bv, c_bv, c_new_len);
324 scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
325 }
326 else
327 {
328 /* Resize the existing buffer. */
329 c_new_bv = scm_gc_realloc (c_bv, c_len, c_new_len,
330 SCM_GC_BYTEVECTOR);
331 SCM_BYTEVECTOR_SET_CONTENTS (bv, c_new_bv);
332 }
333 }
334 else
335 SCM_BYTEVECTOR_SET_LENGTH (bv, c_new_len);
336
337 return bv;
338 }
339
340 int
341 scm_is_bytevector (SCM obj)
342 {
343 return SCM_BYTEVECTOR_P (obj);
344 }
345
346 size_t
347 scm_c_bytevector_length (SCM bv)
348 #define FUNC_NAME "scm_c_bytevector_length"
349 {
350 SCM_VALIDATE_BYTEVECTOR (1, bv);
351
352 return SCM_BYTEVECTOR_LENGTH (bv);
353 }
354 #undef FUNC_NAME
355
356 scm_t_uint8
357 scm_c_bytevector_ref (SCM bv, size_t index)
358 #define FUNC_NAME "scm_c_bytevector_ref"
359 {
360 size_t c_len;
361 const scm_t_uint8 *c_bv;
362
363 SCM_VALIDATE_BYTEVECTOR (1, bv);
364
365 c_len = SCM_BYTEVECTOR_LENGTH (bv);
366 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
367
368 if (SCM_UNLIKELY (index >= c_len))
369 scm_out_of_range (FUNC_NAME, scm_from_size_t (index));
370
371 return c_bv[index];
372 }
373 #undef FUNC_NAME
374
375 void
376 scm_c_bytevector_set_x (SCM bv, size_t index, scm_t_uint8 value)
377 #define FUNC_NAME "scm_c_bytevector_set_x"
378 {
379 size_t c_len;
380 scm_t_uint8 *c_bv;
381
382 SCM_VALIDATE_BYTEVECTOR (1, bv);
383
384 c_len = SCM_BYTEVECTOR_LENGTH (bv);
385 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
386
387 if (SCM_UNLIKELY (index >= c_len))
388 scm_out_of_range (FUNC_NAME, scm_from_size_t (index));
389
390 c_bv[index] = value;
391 }
392 #undef FUNC_NAME
393
394
395 \f
396 int
397 scm_i_print_bytevector (SCM bv, SCM port, scm_print_state *pstate SCM_UNUSED)
398 {
399 ssize_t ubnd, inc, i;
400 scm_t_array_handle h;
401
402 scm_array_get_handle (bv, &h);
403
404 scm_putc ('#', port);
405 scm_write (scm_array_handle_element_type (&h), port);
406 scm_putc ('(', port);
407 for (i = h.dims[0].lbnd, ubnd = h.dims[0].ubnd, inc = h.dims[0].inc;
408 i <= ubnd; i += inc)
409 {
410 if (i > 0)
411 scm_putc (' ', port);
412 scm_write (scm_array_handle_ref (&h, i), port);
413 }
414 scm_putc (')', port);
415
416 return 1;
417 }
418
419 \f
420 /* General operations. */
421
422 SCM_SYMBOL (scm_sym_big, "big");
423 SCM_SYMBOL (scm_sym_little, "little");
424
425 SCM scm_endianness_big, scm_endianness_little;
426
427 /* Host endianness (a symbol). */
428 SCM scm_i_native_endianness = SCM_UNSPECIFIED;
429
430 /* Byte-swapping. */
431 #ifndef bswap_24
432 # define bswap_24(_x) \
433 ((((_x) & 0xff0000) >> 16) | \
434 (((_x) & 0x00ff00)) | \
435 (((_x) & 0x0000ff) << 16))
436 #endif
437
438
439 SCM_DEFINE (scm_native_endianness, "native-endianness", 0, 0, 0,
440 (void),
441 "Return a symbol denoting the machine's native endianness.")
442 #define FUNC_NAME s_scm_native_endianness
443 {
444 return scm_i_native_endianness;
445 }
446 #undef FUNC_NAME
447
448 SCM_DEFINE (scm_bytevector_p, "bytevector?", 1, 0, 0,
449 (SCM obj),
450 "Return true if @var{obj} is a bytevector.")
451 #define FUNC_NAME s_scm_bytevector_p
452 {
453 return scm_from_bool (scm_is_bytevector (obj));
454 }
455 #undef FUNC_NAME
456
457 SCM_DEFINE (scm_make_bytevector, "make-bytevector", 1, 1, 0,
458 (SCM len, SCM fill),
459 "Return a newly allocated bytevector of @var{len} bytes, "
460 "optionally filled with @var{fill}.")
461 #define FUNC_NAME s_scm_make_bytevector
462 {
463 SCM bv;
464 unsigned c_len;
465 signed char c_fill = '\0';
466
467 SCM_VALIDATE_UINT_COPY (1, len, c_len);
468 if (fill != SCM_UNDEFINED)
469 {
470 int value;
471
472 value = scm_to_int (fill);
473 if (SCM_UNLIKELY ((value < -128) || (value > 255)))
474 scm_out_of_range (FUNC_NAME, fill);
475 c_fill = (signed char) value;
476 }
477
478 bv = make_bytevector (c_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
479 if (fill != SCM_UNDEFINED)
480 {
481 unsigned i;
482 signed char *contents;
483
484 contents = SCM_BYTEVECTOR_CONTENTS (bv);
485 for (i = 0; i < c_len; i++)
486 contents[i] = c_fill;
487 }
488
489 return bv;
490 }
491 #undef FUNC_NAME
492
493 SCM_DEFINE (scm_bytevector_length, "bytevector-length", 1, 0, 0,
494 (SCM bv),
495 "Return the length (in bytes) of @var{bv}.")
496 #define FUNC_NAME s_scm_bytevector_length
497 {
498 return scm_from_uint (scm_c_bytevector_length (bv));
499 }
500 #undef FUNC_NAME
501
502 SCM_DEFINE (scm_bytevector_eq_p, "bytevector=?", 2, 0, 0,
503 (SCM bv1, SCM bv2),
504 "Return is @var{bv1} equals to @var{bv2}---i.e., if they "
505 "have the same length and contents.")
506 #define FUNC_NAME s_scm_bytevector_eq_p
507 {
508 SCM result = SCM_BOOL_F;
509 unsigned c_len1, c_len2;
510
511 SCM_VALIDATE_BYTEVECTOR (1, bv1);
512 SCM_VALIDATE_BYTEVECTOR (2, bv2);
513
514 c_len1 = SCM_BYTEVECTOR_LENGTH (bv1);
515 c_len2 = SCM_BYTEVECTOR_LENGTH (bv2);
516
517 if (c_len1 == c_len2)
518 {
519 signed char *c_bv1, *c_bv2;
520
521 c_bv1 = SCM_BYTEVECTOR_CONTENTS (bv1);
522 c_bv2 = SCM_BYTEVECTOR_CONTENTS (bv2);
523
524 result = scm_from_bool (!memcmp (c_bv1, c_bv2, c_len1));
525 }
526
527 return result;
528 }
529 #undef FUNC_NAME
530
531 SCM_DEFINE (scm_bytevector_fill_x, "bytevector-fill!", 2, 0, 0,
532 (SCM bv, SCM fill),
533 "Fill bytevector @var{bv} with @var{fill}, a byte.")
534 #define FUNC_NAME s_scm_bytevector_fill_x
535 {
536 unsigned c_len, i;
537 signed char *c_bv, c_fill;
538
539 SCM_VALIDATE_BYTEVECTOR (1, bv);
540 c_fill = scm_to_int8 (fill);
541
542 c_len = SCM_BYTEVECTOR_LENGTH (bv);
543 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
544
545 for (i = 0; i < c_len; i++)
546 c_bv[i] = c_fill;
547
548 return SCM_UNSPECIFIED;
549 }
550 #undef FUNC_NAME
551
552 SCM_DEFINE (scm_bytevector_copy_x, "bytevector-copy!", 5, 0, 0,
553 (SCM source, SCM source_start, SCM target, SCM target_start,
554 SCM len),
555 "Copy @var{len} bytes from @var{source} into @var{target}, "
556 "starting reading from @var{source_start} (a positive index "
557 "within @var{source}) and start writing at "
558 "@var{target_start}.")
559 #define FUNC_NAME s_scm_bytevector_copy_x
560 {
561 unsigned c_len, c_source_len, c_target_len;
562 unsigned c_source_start, c_target_start;
563 signed char *c_source, *c_target;
564
565 SCM_VALIDATE_BYTEVECTOR (1, source);
566 SCM_VALIDATE_BYTEVECTOR (3, target);
567
568 c_len = scm_to_uint (len);
569 c_source_start = scm_to_uint (source_start);
570 c_target_start = scm_to_uint (target_start);
571
572 c_source = SCM_BYTEVECTOR_CONTENTS (source);
573 c_target = SCM_BYTEVECTOR_CONTENTS (target);
574 c_source_len = SCM_BYTEVECTOR_LENGTH (source);
575 c_target_len = SCM_BYTEVECTOR_LENGTH (target);
576
577 if (SCM_UNLIKELY (c_source_start + c_len > c_source_len))
578 scm_out_of_range (FUNC_NAME, source_start);
579 if (SCM_UNLIKELY (c_target_start + c_len > c_target_len))
580 scm_out_of_range (FUNC_NAME, target_start);
581
582 memcpy (c_target + c_target_start,
583 c_source + c_source_start,
584 c_len);
585
586 return SCM_UNSPECIFIED;
587 }
588 #undef FUNC_NAME
589
590 SCM_DEFINE (scm_bytevector_copy, "bytevector-copy", 1, 0, 0,
591 (SCM bv),
592 "Return a newly allocated copy of @var{bv}.")
593 #define FUNC_NAME s_scm_bytevector_copy
594 {
595 SCM copy;
596 unsigned c_len;
597 signed char *c_bv, *c_copy;
598
599 SCM_VALIDATE_BYTEVECTOR (1, bv);
600
601 c_len = SCM_BYTEVECTOR_LENGTH (bv);
602 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
603
604 copy = make_bytevector (c_len, SCM_BYTEVECTOR_ELEMENT_TYPE (bv));
605 c_copy = SCM_BYTEVECTOR_CONTENTS (copy);
606 memcpy (c_copy, c_bv, c_len);
607
608 return copy;
609 }
610 #undef FUNC_NAME
611
612 SCM_DEFINE (scm_uniform_array_to_bytevector, "uniform-array->bytevector",
613 1, 0, 0, (SCM array),
614 "Return a newly allocated bytevector whose contents\n"
615 "will be copied from the uniform array @var{array}.")
616 #define FUNC_NAME s_scm_uniform_array_to_bytevector
617 {
618 SCM contents, ret;
619 size_t len;
620 scm_t_array_handle h;
621 const void *base;
622 size_t sz;
623
624 contents = scm_array_contents (array, SCM_BOOL_T);
625 if (scm_is_false (contents))
626 scm_wrong_type_arg_msg (FUNC_NAME, 0, array, "uniform contiguous array");
627
628 scm_array_get_handle (contents, &h);
629
630 base = scm_array_handle_uniform_elements (&h);
631 len = h.dims->inc * (h.dims->ubnd - h.dims->lbnd + 1);
632 sz = scm_array_handle_uniform_element_size (&h);
633
634 ret = make_bytevector (len * sz, SCM_ARRAY_ELEMENT_TYPE_VU8);
635 memcpy (SCM_BYTEVECTOR_CONTENTS (ret), base, len * sz);
636
637 scm_array_handle_release (&h);
638
639 return ret;
640 }
641 #undef FUNC_NAME
642
643 \f
644 /* Operations on bytes and octets. */
645
646 SCM_DEFINE (scm_bytevector_u8_ref, "bytevector-u8-ref", 2, 0, 0,
647 (SCM bv, SCM index),
648 "Return the octet located at @var{index} in @var{bv}.")
649 #define FUNC_NAME s_scm_bytevector_u8_ref
650 {
651 INTEGER_NATIVE_REF (8, unsigned);
652 }
653 #undef FUNC_NAME
654
655 SCM_DEFINE (scm_bytevector_s8_ref, "bytevector-s8-ref", 2, 0, 0,
656 (SCM bv, SCM index),
657 "Return the byte located at @var{index} in @var{bv}.")
658 #define FUNC_NAME s_scm_bytevector_s8_ref
659 {
660 INTEGER_NATIVE_REF (8, signed);
661 }
662 #undef FUNC_NAME
663
664 SCM_DEFINE (scm_bytevector_u8_set_x, "bytevector-u8-set!", 3, 0, 0,
665 (SCM bv, SCM index, SCM value),
666 "Return the octet located at @var{index} in @var{bv}.")
667 #define FUNC_NAME s_scm_bytevector_u8_set_x
668 {
669 INTEGER_NATIVE_SET (8, unsigned);
670 }
671 #undef FUNC_NAME
672
673 SCM_DEFINE (scm_bytevector_s8_set_x, "bytevector-s8-set!", 3, 0, 0,
674 (SCM bv, SCM index, SCM value),
675 "Return the octet located at @var{index} in @var{bv}.")
676 #define FUNC_NAME s_scm_bytevector_s8_set_x
677 {
678 INTEGER_NATIVE_SET (8, signed);
679 }
680 #undef FUNC_NAME
681
682 #undef OCTET_ACCESSOR_PROLOGUE
683
684
685 SCM_DEFINE (scm_bytevector_to_u8_list, "bytevector->u8-list", 1, 0, 0,
686 (SCM bv),
687 "Return a newly allocated list of octets containing the "
688 "contents of @var{bv}.")
689 #define FUNC_NAME s_scm_bytevector_to_u8_list
690 {
691 SCM lst, pair;
692 unsigned c_len, i;
693 unsigned char *c_bv;
694
695 SCM_VALIDATE_BYTEVECTOR (1, bv);
696
697 c_len = SCM_BYTEVECTOR_LENGTH (bv);
698 c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
699
700 lst = scm_make_list (scm_from_uint (c_len), SCM_UNSPECIFIED);
701 for (i = 0, pair = lst;
702 i < c_len;
703 i++, pair = SCM_CDR (pair))
704 {
705 SCM_SETCAR (pair, SCM_I_MAKINUM (c_bv[i]));
706 }
707
708 return lst;
709 }
710 #undef FUNC_NAME
711
712 SCM_DEFINE (scm_u8_list_to_bytevector, "u8-list->bytevector", 1, 0, 0,
713 (SCM lst),
714 "Turn @var{lst}, a list of octets, into a bytevector.")
715 #define FUNC_NAME s_scm_u8_list_to_bytevector
716 {
717 SCM bv, item;
718 long c_len, i;
719 unsigned char *c_bv;
720
721 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len);
722
723 bv = make_bytevector (c_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
724 c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
725
726 for (i = 0; i < c_len; lst = SCM_CDR (lst), i++)
727 {
728 item = SCM_CAR (lst);
729
730 if (SCM_LIKELY (SCM_I_INUMP (item)))
731 {
732 long c_item;
733
734 c_item = SCM_I_INUM (item);
735 if (SCM_LIKELY ((c_item >= 0) && (c_item < 256)))
736 c_bv[i] = (unsigned char) c_item;
737 else
738 goto type_error;
739 }
740 else
741 goto type_error;
742 }
743
744 return bv;
745
746 type_error:
747 scm_wrong_type_arg (FUNC_NAME, 1, item);
748
749 return SCM_BOOL_F;
750 }
751 #undef FUNC_NAME
752
753 /* Compute the two's complement of VALUE (a positive integer) on SIZE octets
754 using (2^(SIZE * 8) - VALUE). */
755 static inline void
756 twos_complement (mpz_t value, size_t size)
757 {
758 unsigned long bit_count;
759
760 /* We expect BIT_COUNT to fit in a unsigned long thanks to the range
761 checking on SIZE performed earlier. */
762 bit_count = (unsigned long) size << 3UL;
763
764 if (SCM_LIKELY (bit_count < sizeof (unsigned long)))
765 mpz_ui_sub (value, 1UL << bit_count, value);
766 else
767 {
768 mpz_t max;
769
770 mpz_init (max);
771 mpz_ui_pow_ui (max, 2, bit_count);
772 mpz_sub (value, max, value);
773 mpz_clear (max);
774 }
775 }
776
777 static inline SCM
778 bytevector_large_ref (const char *c_bv, size_t c_size, int signed_p,
779 SCM endianness)
780 {
781 SCM result;
782 mpz_t c_mpz;
783 int c_endianness, negative_p = 0;
784
785 if (signed_p)
786 {
787 if (scm_is_eq (endianness, scm_sym_big))
788 negative_p = c_bv[0] & 0x80;
789 else
790 negative_p = c_bv[c_size - 1] & 0x80;
791 }
792
793 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
794
795 mpz_init (c_mpz);
796 mpz_import (c_mpz, 1 /* 1 word */, 1 /* word order doesn't matter */,
797 c_size /* word is C_SIZE-byte long */,
798 c_endianness,
799 0 /* nails */, c_bv);
800
801 if (signed_p && negative_p)
802 {
803 twos_complement (c_mpz, c_size);
804 mpz_neg (c_mpz, c_mpz);
805 }
806
807 result = scm_from_mpz (c_mpz);
808 mpz_clear (c_mpz); /* FIXME: Needed? */
809
810 return result;
811 }
812
813 static inline int
814 bytevector_large_set (char *c_bv, size_t c_size, int signed_p,
815 SCM value, SCM endianness)
816 {
817 mpz_t c_mpz;
818 int c_endianness, c_sign, err = 0;
819
820 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
821
822 mpz_init (c_mpz);
823 scm_to_mpz (value, c_mpz);
824
825 c_sign = mpz_sgn (c_mpz);
826 if (c_sign < 0)
827 {
828 if (SCM_LIKELY (signed_p))
829 {
830 mpz_neg (c_mpz, c_mpz);
831 twos_complement (c_mpz, c_size);
832 }
833 else
834 {
835 err = -1;
836 goto finish;
837 }
838 }
839
840 if (c_sign == 0)
841 /* Zero. */
842 memset (c_bv, 0, c_size);
843 else
844 {
845 size_t word_count, value_size;
846
847 value_size = (mpz_sizeinbase (c_mpz, 2) + (8 * c_size)) / (8 * c_size);
848 if (SCM_UNLIKELY (value_size > c_size))
849 {
850 err = -2;
851 goto finish;
852 }
853
854
855 mpz_export (c_bv, &word_count, 1 /* word order doesn't matter */,
856 c_size, c_endianness,
857 0 /* nails */, c_mpz);
858 if (SCM_UNLIKELY (word_count != 1))
859 /* Shouldn't happen since we already checked with VALUE_SIZE. */
860 abort ();
861 }
862
863 finish:
864 mpz_clear (c_mpz);
865
866 return err;
867 }
868
869 #define GENERIC_INTEGER_ACCESSOR_PROLOGUE(_sign) \
870 unsigned long c_len, c_index, c_size; \
871 char *c_bv; \
872 \
873 SCM_VALIDATE_BYTEVECTOR (1, bv); \
874 c_index = scm_to_ulong (index); \
875 c_size = scm_to_ulong (size); \
876 \
877 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
878 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
879 \
880 /* C_SIZE must have its 3 higher bits set to zero so that \
881 multiplying it by 8 yields a number that fits in an \
882 unsigned long. */ \
883 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
884 scm_out_of_range (FUNC_NAME, size); \
885 if (SCM_UNLIKELY (c_index + c_size > c_len)) \
886 scm_out_of_range (FUNC_NAME, index);
887
888
889 /* Template of an integer reference function. */
890 #define GENERIC_INTEGER_REF(_sign) \
891 SCM result; \
892 \
893 if (c_size < 3) \
894 { \
895 int swap; \
896 _sign int value; \
897 \
898 swap = !scm_is_eq (endianness, scm_i_native_endianness); \
899 switch (c_size) \
900 { \
901 case 1: \
902 { \
903 _sign char c_value8; \
904 memcpy (&c_value8, c_bv, 1); \
905 value = c_value8; \
906 } \
907 break; \
908 case 2: \
909 { \
910 INT_TYPE (16, _sign) c_value16; \
911 memcpy (&c_value16, c_bv, 2); \
912 if (swap) \
913 value = (INT_TYPE (16, _sign)) bswap_16 (c_value16); \
914 else \
915 value = c_value16; \
916 } \
917 break; \
918 default: \
919 abort (); \
920 } \
921 \
922 result = SCM_I_MAKINUM ((_sign int) value); \
923 } \
924 else \
925 result = bytevector_large_ref ((char *) c_bv, \
926 c_size, SIGNEDNESS (_sign), \
927 endianness); \
928 \
929 return result;
930
931 static inline SCM
932 bytevector_signed_ref (const char *c_bv, size_t c_size, SCM endianness)
933 {
934 GENERIC_INTEGER_REF (signed);
935 }
936
937 static inline SCM
938 bytevector_unsigned_ref (const char *c_bv, size_t c_size, SCM endianness)
939 {
940 GENERIC_INTEGER_REF (unsigned);
941 }
942
943
944 /* Template of an integer assignment function. */
945 #define GENERIC_INTEGER_SET(_sign) \
946 if (c_size < 3) \
947 { \
948 _sign int c_value; \
949 \
950 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
951 goto range_error; \
952 \
953 c_value = SCM_I_INUM (value); \
954 switch (c_size) \
955 { \
956 case 1: \
957 if (SCM_LIKELY (INT_VALID_P (8, _sign) (c_value))) \
958 { \
959 _sign char c_value8; \
960 c_value8 = (_sign char) c_value; \
961 memcpy (c_bv, &c_value8, 1); \
962 } \
963 else \
964 goto range_error; \
965 break; \
966 \
967 case 2: \
968 if (SCM_LIKELY (INT_VALID_P (16, _sign) (c_value))) \
969 { \
970 int swap; \
971 INT_TYPE (16, _sign) c_value16; \
972 \
973 swap = !scm_is_eq (endianness, scm_i_native_endianness); \
974 \
975 if (swap) \
976 c_value16 = (INT_TYPE (16, _sign)) bswap_16 (c_value); \
977 else \
978 c_value16 = c_value; \
979 \
980 memcpy (c_bv, &c_value16, 2); \
981 } \
982 else \
983 goto range_error; \
984 break; \
985 \
986 default: \
987 abort (); \
988 } \
989 } \
990 else \
991 { \
992 int err; \
993 \
994 err = bytevector_large_set (c_bv, c_size, \
995 SIGNEDNESS (_sign), \
996 value, endianness); \
997 if (err) \
998 goto range_error; \
999 } \
1000 \
1001 return; \
1002 \
1003 range_error: \
1004 scm_out_of_range (FUNC_NAME, value); \
1005 return;
1006
1007 static inline void
1008 bytevector_signed_set (char *c_bv, size_t c_size,
1009 SCM value, SCM endianness,
1010 const char *func_name)
1011 #define FUNC_NAME func_name
1012 {
1013 GENERIC_INTEGER_SET (signed);
1014 }
1015 #undef FUNC_NAME
1016
1017 static inline void
1018 bytevector_unsigned_set (char *c_bv, size_t c_size,
1019 SCM value, SCM endianness,
1020 const char *func_name)
1021 #define FUNC_NAME func_name
1022 {
1023 GENERIC_INTEGER_SET (unsigned);
1024 }
1025 #undef FUNC_NAME
1026
1027 #undef GENERIC_INTEGER_SET
1028 #undef GENERIC_INTEGER_REF
1029
1030
1031 SCM_DEFINE (scm_bytevector_uint_ref, "bytevector-uint-ref", 4, 0, 0,
1032 (SCM bv, SCM index, SCM endianness, SCM size),
1033 "Return the @var{size}-octet long unsigned integer at index "
1034 "@var{index} in @var{bv}.")
1035 #define FUNC_NAME s_scm_bytevector_uint_ref
1036 {
1037 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
1038
1039 return (bytevector_unsigned_ref (&c_bv[c_index], c_size, endianness));
1040 }
1041 #undef FUNC_NAME
1042
1043 SCM_DEFINE (scm_bytevector_sint_ref, "bytevector-sint-ref", 4, 0, 0,
1044 (SCM bv, SCM index, SCM endianness, SCM size),
1045 "Return the @var{size}-octet long unsigned integer at index "
1046 "@var{index} in @var{bv}.")
1047 #define FUNC_NAME s_scm_bytevector_sint_ref
1048 {
1049 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
1050
1051 return (bytevector_signed_ref (&c_bv[c_index], c_size, endianness));
1052 }
1053 #undef FUNC_NAME
1054
1055 SCM_DEFINE (scm_bytevector_uint_set_x, "bytevector-uint-set!", 5, 0, 0,
1056 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
1057 "Set the @var{size}-octet long unsigned integer at @var{index} "
1058 "to @var{value}.")
1059 #define FUNC_NAME s_scm_bytevector_uint_set_x
1060 {
1061 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
1062
1063 bytevector_unsigned_set (&c_bv[c_index], c_size, value, endianness,
1064 FUNC_NAME);
1065
1066 return SCM_UNSPECIFIED;
1067 }
1068 #undef FUNC_NAME
1069
1070 SCM_DEFINE (scm_bytevector_sint_set_x, "bytevector-sint-set!", 5, 0, 0,
1071 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
1072 "Set the @var{size}-octet long signed integer at @var{index} "
1073 "to @var{value}.")
1074 #define FUNC_NAME s_scm_bytevector_sint_set_x
1075 {
1076 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
1077
1078 bytevector_signed_set (&c_bv[c_index], c_size, value, endianness,
1079 FUNC_NAME);
1080
1081 return SCM_UNSPECIFIED;
1082 }
1083 #undef FUNC_NAME
1084
1085
1086 \f
1087 /* Operations on integers of arbitrary size. */
1088
1089 #define INTEGERS_TO_LIST(_sign) \
1090 SCM lst, pair; \
1091 size_t i, c_len, c_size; \
1092 \
1093 SCM_VALIDATE_BYTEVECTOR (1, bv); \
1094 SCM_VALIDATE_SYMBOL (2, endianness); \
1095 c_size = scm_to_uint (size); \
1096 \
1097 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
1098 if (SCM_UNLIKELY (c_len == 0)) \
1099 lst = SCM_EOL; \
1100 else if (SCM_UNLIKELY (c_len < c_size)) \
1101 scm_out_of_range (FUNC_NAME, size); \
1102 else \
1103 { \
1104 const char *c_bv; \
1105 \
1106 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
1107 \
1108 lst = scm_make_list (scm_from_uint (c_len / c_size), \
1109 SCM_UNSPECIFIED); \
1110 for (i = 0, pair = lst; \
1111 i <= c_len - c_size; \
1112 i += c_size, c_bv += c_size, pair = SCM_CDR (pair)) \
1113 { \
1114 SCM_SETCAR (pair, \
1115 bytevector_ ## _sign ## _ref (c_bv, c_size, \
1116 endianness)); \
1117 } \
1118 } \
1119 \
1120 return lst;
1121
1122 SCM_DEFINE (scm_bytevector_to_sint_list, "bytevector->sint-list",
1123 3, 0, 0,
1124 (SCM bv, SCM endianness, SCM size),
1125 "Return a list of signed integers of @var{size} octets "
1126 "representing the contents of @var{bv}.")
1127 #define FUNC_NAME s_scm_bytevector_to_sint_list
1128 {
1129 INTEGERS_TO_LIST (signed);
1130 }
1131 #undef FUNC_NAME
1132
1133 SCM_DEFINE (scm_bytevector_to_uint_list, "bytevector->uint-list",
1134 3, 0, 0,
1135 (SCM bv, SCM endianness, SCM size),
1136 "Return a list of unsigned integers of @var{size} octets "
1137 "representing the contents of @var{bv}.")
1138 #define FUNC_NAME s_scm_bytevector_to_uint_list
1139 {
1140 INTEGERS_TO_LIST (unsigned);
1141 }
1142 #undef FUNC_NAME
1143
1144 #undef INTEGER_TO_LIST
1145
1146
1147 #define INTEGER_LIST_TO_BYTEVECTOR(_sign) \
1148 SCM bv; \
1149 long c_len; \
1150 size_t c_size; \
1151 char *c_bv, *c_bv_ptr; \
1152 \
1153 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len); \
1154 SCM_VALIDATE_SYMBOL (2, endianness); \
1155 c_size = scm_to_uint (size); \
1156 \
1157 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
1158 scm_out_of_range (FUNC_NAME, size); \
1159 \
1160 bv = make_bytevector (c_len * c_size, SCM_ARRAY_ELEMENT_TYPE_VU8); \
1161 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
1162 \
1163 for (c_bv_ptr = c_bv; \
1164 !scm_is_null (lst); \
1165 lst = SCM_CDR (lst), c_bv_ptr += c_size) \
1166 { \
1167 bytevector_ ## _sign ## _set (c_bv_ptr, c_size, \
1168 SCM_CAR (lst), endianness, \
1169 FUNC_NAME); \
1170 } \
1171 \
1172 return bv;
1173
1174
1175 SCM_DEFINE (scm_uint_list_to_bytevector, "uint-list->bytevector",
1176 3, 0, 0,
1177 (SCM lst, SCM endianness, SCM size),
1178 "Return a bytevector containing the unsigned integers "
1179 "listed in @var{lst} and encoded on @var{size} octets "
1180 "according to @var{endianness}.")
1181 #define FUNC_NAME s_scm_uint_list_to_bytevector
1182 {
1183 INTEGER_LIST_TO_BYTEVECTOR (unsigned);
1184 }
1185 #undef FUNC_NAME
1186
1187 SCM_DEFINE (scm_sint_list_to_bytevector, "sint-list->bytevector",
1188 3, 0, 0,
1189 (SCM lst, SCM endianness, SCM size),
1190 "Return a bytevector containing the signed integers "
1191 "listed in @var{lst} and encoded on @var{size} octets "
1192 "according to @var{endianness}.")
1193 #define FUNC_NAME s_scm_sint_list_to_bytevector
1194 {
1195 INTEGER_LIST_TO_BYTEVECTOR (signed);
1196 }
1197 #undef FUNC_NAME
1198
1199 #undef INTEGER_LIST_TO_BYTEVECTOR
1200
1201
1202 \f
1203 /* Operations on 16-bit integers. */
1204
1205 SCM_DEFINE (scm_bytevector_u16_ref, "bytevector-u16-ref",
1206 3, 0, 0,
1207 (SCM bv, SCM index, SCM endianness),
1208 "Return the unsigned 16-bit integer from @var{bv} at "
1209 "@var{index}.")
1210 #define FUNC_NAME s_scm_bytevector_u16_ref
1211 {
1212 INTEGER_REF (16, unsigned);
1213 }
1214 #undef FUNC_NAME
1215
1216 SCM_DEFINE (scm_bytevector_s16_ref, "bytevector-s16-ref",
1217 3, 0, 0,
1218 (SCM bv, SCM index, SCM endianness),
1219 "Return the signed 16-bit integer from @var{bv} at "
1220 "@var{index}.")
1221 #define FUNC_NAME s_scm_bytevector_s16_ref
1222 {
1223 INTEGER_REF (16, signed);
1224 }
1225 #undef FUNC_NAME
1226
1227 SCM_DEFINE (scm_bytevector_u16_native_ref, "bytevector-u16-native-ref",
1228 2, 0, 0,
1229 (SCM bv, SCM index),
1230 "Return the unsigned 16-bit integer from @var{bv} at "
1231 "@var{index} using the native endianness.")
1232 #define FUNC_NAME s_scm_bytevector_u16_native_ref
1233 {
1234 INTEGER_NATIVE_REF (16, unsigned);
1235 }
1236 #undef FUNC_NAME
1237
1238 SCM_DEFINE (scm_bytevector_s16_native_ref, "bytevector-s16-native-ref",
1239 2, 0, 0,
1240 (SCM bv, SCM index),
1241 "Return the unsigned 16-bit integer from @var{bv} at "
1242 "@var{index} using the native endianness.")
1243 #define FUNC_NAME s_scm_bytevector_s16_native_ref
1244 {
1245 INTEGER_NATIVE_REF (16, signed);
1246 }
1247 #undef FUNC_NAME
1248
1249 SCM_DEFINE (scm_bytevector_u16_set_x, "bytevector-u16-set!",
1250 4, 0, 0,
1251 (SCM bv, SCM index, SCM value, SCM endianness),
1252 "Store @var{value} in @var{bv} at @var{index} according to "
1253 "@var{endianness}.")
1254 #define FUNC_NAME s_scm_bytevector_u16_set_x
1255 {
1256 INTEGER_SET (16, unsigned);
1257 }
1258 #undef FUNC_NAME
1259
1260 SCM_DEFINE (scm_bytevector_s16_set_x, "bytevector-s16-set!",
1261 4, 0, 0,
1262 (SCM bv, SCM index, SCM value, SCM endianness),
1263 "Store @var{value} in @var{bv} at @var{index} according to "
1264 "@var{endianness}.")
1265 #define FUNC_NAME s_scm_bytevector_s16_set_x
1266 {
1267 INTEGER_SET (16, signed);
1268 }
1269 #undef FUNC_NAME
1270
1271 SCM_DEFINE (scm_bytevector_u16_native_set_x, "bytevector-u16-native-set!",
1272 3, 0, 0,
1273 (SCM bv, SCM index, SCM value),
1274 "Store the unsigned integer @var{value} at index @var{index} "
1275 "of @var{bv} using the native endianness.")
1276 #define FUNC_NAME s_scm_bytevector_u16_native_set_x
1277 {
1278 INTEGER_NATIVE_SET (16, unsigned);
1279 }
1280 #undef FUNC_NAME
1281
1282 SCM_DEFINE (scm_bytevector_s16_native_set_x, "bytevector-s16-native-set!",
1283 3, 0, 0,
1284 (SCM bv, SCM index, SCM value),
1285 "Store the signed integer @var{value} at index @var{index} "
1286 "of @var{bv} using the native endianness.")
1287 #define FUNC_NAME s_scm_bytevector_s16_native_set_x
1288 {
1289 INTEGER_NATIVE_SET (16, signed);
1290 }
1291 #undef FUNC_NAME
1292
1293
1294 \f
1295 /* Operations on 32-bit integers. */
1296
1297 /* Unfortunately, on 32-bit machines `SCM' is not large enough to hold
1298 arbitrary 32-bit integers. Thus we fall back to using the
1299 `large_{ref,set}' variants on 32-bit machines. */
1300
1301 #define LARGE_INTEGER_REF(_len, _sign) \
1302 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1303 SCM_VALIDATE_SYMBOL (3, endianness); \
1304 \
1305 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1306 SIGNEDNESS (_sign), endianness));
1307
1308 #define LARGE_INTEGER_SET(_len, _sign) \
1309 int err; \
1310 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1311 SCM_VALIDATE_SYMBOL (4, endianness); \
1312 \
1313 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1314 SIGNEDNESS (_sign), value, endianness); \
1315 if (SCM_UNLIKELY (err)) \
1316 scm_out_of_range (FUNC_NAME, value); \
1317 \
1318 return SCM_UNSPECIFIED;
1319
1320 #define LARGE_INTEGER_NATIVE_REF(_len, _sign) \
1321 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1322 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1323 SIGNEDNESS (_sign), scm_i_native_endianness));
1324
1325 #define LARGE_INTEGER_NATIVE_SET(_len, _sign) \
1326 int err; \
1327 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1328 \
1329 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1330 SIGNEDNESS (_sign), value, \
1331 scm_i_native_endianness); \
1332 if (SCM_UNLIKELY (err)) \
1333 scm_out_of_range (FUNC_NAME, value); \
1334 \
1335 return SCM_UNSPECIFIED;
1336
1337
1338 SCM_DEFINE (scm_bytevector_u32_ref, "bytevector-u32-ref",
1339 3, 0, 0,
1340 (SCM bv, SCM index, SCM endianness),
1341 "Return the unsigned 32-bit integer from @var{bv} at "
1342 "@var{index}.")
1343 #define FUNC_NAME s_scm_bytevector_u32_ref
1344 {
1345 #if SIZEOF_VOID_P > 4
1346 INTEGER_REF (32, unsigned);
1347 #else
1348 LARGE_INTEGER_REF (32, unsigned);
1349 #endif
1350 }
1351 #undef FUNC_NAME
1352
1353 SCM_DEFINE (scm_bytevector_s32_ref, "bytevector-s32-ref",
1354 3, 0, 0,
1355 (SCM bv, SCM index, SCM endianness),
1356 "Return the signed 32-bit integer from @var{bv} at "
1357 "@var{index}.")
1358 #define FUNC_NAME s_scm_bytevector_s32_ref
1359 {
1360 #if SIZEOF_VOID_P > 4
1361 INTEGER_REF (32, signed);
1362 #else
1363 LARGE_INTEGER_REF (32, signed);
1364 #endif
1365 }
1366 #undef FUNC_NAME
1367
1368 SCM_DEFINE (scm_bytevector_u32_native_ref, "bytevector-u32-native-ref",
1369 2, 0, 0,
1370 (SCM bv, SCM index),
1371 "Return the unsigned 32-bit integer from @var{bv} at "
1372 "@var{index} using the native endianness.")
1373 #define FUNC_NAME s_scm_bytevector_u32_native_ref
1374 {
1375 #if SIZEOF_VOID_P > 4
1376 INTEGER_NATIVE_REF (32, unsigned);
1377 #else
1378 LARGE_INTEGER_NATIVE_REF (32, unsigned);
1379 #endif
1380 }
1381 #undef FUNC_NAME
1382
1383 SCM_DEFINE (scm_bytevector_s32_native_ref, "bytevector-s32-native-ref",
1384 2, 0, 0,
1385 (SCM bv, SCM index),
1386 "Return the unsigned 32-bit integer from @var{bv} at "
1387 "@var{index} using the native endianness.")
1388 #define FUNC_NAME s_scm_bytevector_s32_native_ref
1389 {
1390 #if SIZEOF_VOID_P > 4
1391 INTEGER_NATIVE_REF (32, signed);
1392 #else
1393 LARGE_INTEGER_NATIVE_REF (32, signed);
1394 #endif
1395 }
1396 #undef FUNC_NAME
1397
1398 SCM_DEFINE (scm_bytevector_u32_set_x, "bytevector-u32-set!",
1399 4, 0, 0,
1400 (SCM bv, SCM index, SCM value, SCM endianness),
1401 "Store @var{value} in @var{bv} at @var{index} according to "
1402 "@var{endianness}.")
1403 #define FUNC_NAME s_scm_bytevector_u32_set_x
1404 {
1405 #if SIZEOF_VOID_P > 4
1406 INTEGER_SET (32, unsigned);
1407 #else
1408 LARGE_INTEGER_SET (32, unsigned);
1409 #endif
1410 }
1411 #undef FUNC_NAME
1412
1413 SCM_DEFINE (scm_bytevector_s32_set_x, "bytevector-s32-set!",
1414 4, 0, 0,
1415 (SCM bv, SCM index, SCM value, SCM endianness),
1416 "Store @var{value} in @var{bv} at @var{index} according to "
1417 "@var{endianness}.")
1418 #define FUNC_NAME s_scm_bytevector_s32_set_x
1419 {
1420 #if SIZEOF_VOID_P > 4
1421 INTEGER_SET (32, signed);
1422 #else
1423 LARGE_INTEGER_SET (32, signed);
1424 #endif
1425 }
1426 #undef FUNC_NAME
1427
1428 SCM_DEFINE (scm_bytevector_u32_native_set_x, "bytevector-u32-native-set!",
1429 3, 0, 0,
1430 (SCM bv, SCM index, SCM value),
1431 "Store the unsigned integer @var{value} at index @var{index} "
1432 "of @var{bv} using the native endianness.")
1433 #define FUNC_NAME s_scm_bytevector_u32_native_set_x
1434 {
1435 #if SIZEOF_VOID_P > 4
1436 INTEGER_NATIVE_SET (32, unsigned);
1437 #else
1438 LARGE_INTEGER_NATIVE_SET (32, unsigned);
1439 #endif
1440 }
1441 #undef FUNC_NAME
1442
1443 SCM_DEFINE (scm_bytevector_s32_native_set_x, "bytevector-s32-native-set!",
1444 3, 0, 0,
1445 (SCM bv, SCM index, SCM value),
1446 "Store the signed integer @var{value} at index @var{index} "
1447 "of @var{bv} using the native endianness.")
1448 #define FUNC_NAME s_scm_bytevector_s32_native_set_x
1449 {
1450 #if SIZEOF_VOID_P > 4
1451 INTEGER_NATIVE_SET (32, signed);
1452 #else
1453 LARGE_INTEGER_NATIVE_SET (32, signed);
1454 #endif
1455 }
1456 #undef FUNC_NAME
1457
1458
1459 \f
1460 /* Operations on 64-bit integers. */
1461
1462 /* For 64-bit integers, we use only the `large_{ref,set}' variant. */
1463
1464 SCM_DEFINE (scm_bytevector_u64_ref, "bytevector-u64-ref",
1465 3, 0, 0,
1466 (SCM bv, SCM index, SCM endianness),
1467 "Return the unsigned 64-bit integer from @var{bv} at "
1468 "@var{index}.")
1469 #define FUNC_NAME s_scm_bytevector_u64_ref
1470 {
1471 LARGE_INTEGER_REF (64, unsigned);
1472 }
1473 #undef FUNC_NAME
1474
1475 SCM_DEFINE (scm_bytevector_s64_ref, "bytevector-s64-ref",
1476 3, 0, 0,
1477 (SCM bv, SCM index, SCM endianness),
1478 "Return the signed 64-bit integer from @var{bv} at "
1479 "@var{index}.")
1480 #define FUNC_NAME s_scm_bytevector_s64_ref
1481 {
1482 LARGE_INTEGER_REF (64, signed);
1483 }
1484 #undef FUNC_NAME
1485
1486 SCM_DEFINE (scm_bytevector_u64_native_ref, "bytevector-u64-native-ref",
1487 2, 0, 0,
1488 (SCM bv, SCM index),
1489 "Return the unsigned 64-bit integer from @var{bv} at "
1490 "@var{index} using the native endianness.")
1491 #define FUNC_NAME s_scm_bytevector_u64_native_ref
1492 {
1493 LARGE_INTEGER_NATIVE_REF (64, unsigned);
1494 }
1495 #undef FUNC_NAME
1496
1497 SCM_DEFINE (scm_bytevector_s64_native_ref, "bytevector-s64-native-ref",
1498 2, 0, 0,
1499 (SCM bv, SCM index),
1500 "Return the unsigned 64-bit integer from @var{bv} at "
1501 "@var{index} using the native endianness.")
1502 #define FUNC_NAME s_scm_bytevector_s64_native_ref
1503 {
1504 LARGE_INTEGER_NATIVE_REF (64, signed);
1505 }
1506 #undef FUNC_NAME
1507
1508 SCM_DEFINE (scm_bytevector_u64_set_x, "bytevector-u64-set!",
1509 4, 0, 0,
1510 (SCM bv, SCM index, SCM value, SCM endianness),
1511 "Store @var{value} in @var{bv} at @var{index} according to "
1512 "@var{endianness}.")
1513 #define FUNC_NAME s_scm_bytevector_u64_set_x
1514 {
1515 LARGE_INTEGER_SET (64, unsigned);
1516 }
1517 #undef FUNC_NAME
1518
1519 SCM_DEFINE (scm_bytevector_s64_set_x, "bytevector-s64-set!",
1520 4, 0, 0,
1521 (SCM bv, SCM index, SCM value, SCM endianness),
1522 "Store @var{value} in @var{bv} at @var{index} according to "
1523 "@var{endianness}.")
1524 #define FUNC_NAME s_scm_bytevector_s64_set_x
1525 {
1526 LARGE_INTEGER_SET (64, signed);
1527 }
1528 #undef FUNC_NAME
1529
1530 SCM_DEFINE (scm_bytevector_u64_native_set_x, "bytevector-u64-native-set!",
1531 3, 0, 0,
1532 (SCM bv, SCM index, SCM value),
1533 "Store the unsigned integer @var{value} at index @var{index} "
1534 "of @var{bv} using the native endianness.")
1535 #define FUNC_NAME s_scm_bytevector_u64_native_set_x
1536 {
1537 LARGE_INTEGER_NATIVE_SET (64, unsigned);
1538 }
1539 #undef FUNC_NAME
1540
1541 SCM_DEFINE (scm_bytevector_s64_native_set_x, "bytevector-s64-native-set!",
1542 3, 0, 0,
1543 (SCM bv, SCM index, SCM value),
1544 "Store the signed integer @var{value} at index @var{index} "
1545 "of @var{bv} using the native endianness.")
1546 #define FUNC_NAME s_scm_bytevector_s64_native_set_x
1547 {
1548 LARGE_INTEGER_NATIVE_SET (64, signed);
1549 }
1550 #undef FUNC_NAME
1551
1552
1553 \f
1554 /* Operations on IEEE-754 numbers. */
1555
1556 /* There are two possible word endians, visible in glibc's <ieee754.h>.
1557 However, in R6RS, when the endianness is `little', little endian is
1558 assumed for both the byte order and the word order. This is clear from
1559 Section 2.1 of R6RS-lib (in response to
1560 http://www.r6rs.org/formal-comments/comment-187.txt). */
1561
1562
1563 /* Convert to/from a floating-point number with different endianness. This
1564 method is probably not the most efficient but it should be portable. */
1565
1566 static inline void
1567 float_to_foreign_endianness (union scm_ieee754_float *target,
1568 float source)
1569 {
1570 union scm_ieee754_float src;
1571
1572 src.f = source;
1573
1574 #ifdef WORDS_BIGENDIAN
1575 /* Assuming little endian for both byte and word order. */
1576 target->little_endian.negative = src.big_endian.negative;
1577 target->little_endian.exponent = src.big_endian.exponent;
1578 target->little_endian.mantissa = src.big_endian.mantissa;
1579 #else
1580 target->big_endian.negative = src.little_endian.negative;
1581 target->big_endian.exponent = src.little_endian.exponent;
1582 target->big_endian.mantissa = src.little_endian.mantissa;
1583 #endif
1584 }
1585
1586 static inline float
1587 float_from_foreign_endianness (const union scm_ieee754_float *source)
1588 {
1589 union scm_ieee754_float result;
1590
1591 #ifdef WORDS_BIGENDIAN
1592 /* Assuming little endian for both byte and word order. */
1593 result.big_endian.negative = source->little_endian.negative;
1594 result.big_endian.exponent = source->little_endian.exponent;
1595 result.big_endian.mantissa = source->little_endian.mantissa;
1596 #else
1597 result.little_endian.negative = source->big_endian.negative;
1598 result.little_endian.exponent = source->big_endian.exponent;
1599 result.little_endian.mantissa = source->big_endian.mantissa;
1600 #endif
1601
1602 return (result.f);
1603 }
1604
1605 static inline void
1606 double_to_foreign_endianness (union scm_ieee754_double *target,
1607 double source)
1608 {
1609 union scm_ieee754_double src;
1610
1611 src.d = source;
1612
1613 #ifdef WORDS_BIGENDIAN
1614 /* Assuming little endian for both byte and word order. */
1615 target->little_little_endian.negative = src.big_endian.negative;
1616 target->little_little_endian.exponent = src.big_endian.exponent;
1617 target->little_little_endian.mantissa0 = src.big_endian.mantissa0;
1618 target->little_little_endian.mantissa1 = src.big_endian.mantissa1;
1619 #else
1620 target->big_endian.negative = src.little_little_endian.negative;
1621 target->big_endian.exponent = src.little_little_endian.exponent;
1622 target->big_endian.mantissa0 = src.little_little_endian.mantissa0;
1623 target->big_endian.mantissa1 = src.little_little_endian.mantissa1;
1624 #endif
1625 }
1626
1627 static inline double
1628 double_from_foreign_endianness (const union scm_ieee754_double *source)
1629 {
1630 union scm_ieee754_double result;
1631
1632 #ifdef WORDS_BIGENDIAN
1633 /* Assuming little endian for both byte and word order. */
1634 result.big_endian.negative = source->little_little_endian.negative;
1635 result.big_endian.exponent = source->little_little_endian.exponent;
1636 result.big_endian.mantissa0 = source->little_little_endian.mantissa0;
1637 result.big_endian.mantissa1 = source->little_little_endian.mantissa1;
1638 #else
1639 result.little_little_endian.negative = source->big_endian.negative;
1640 result.little_little_endian.exponent = source->big_endian.exponent;
1641 result.little_little_endian.mantissa0 = source->big_endian.mantissa0;
1642 result.little_little_endian.mantissa1 = source->big_endian.mantissa1;
1643 #endif
1644
1645 return (result.d);
1646 }
1647
1648 /* Template macros to abstract over doubles and floats.
1649 XXX: Guile can only convert to/from doubles. */
1650 #define IEEE754_UNION(_c_type) union scm_ieee754_ ## _c_type
1651 #define IEEE754_TO_SCM(_c_type) scm_from_double
1652 #define IEEE754_FROM_SCM(_c_type) scm_to_double
1653 #define IEEE754_FROM_FOREIGN_ENDIANNESS(_c_type) \
1654 _c_type ## _from_foreign_endianness
1655 #define IEEE754_TO_FOREIGN_ENDIANNESS(_c_type) \
1656 _c_type ## _to_foreign_endianness
1657
1658
1659 /* FIXME: SCM_VALIDATE_REAL rejects integers, etc. grrr */
1660 #define VALIDATE_REAL(pos, v) \
1661 do { \
1662 SCM_ASSERT_TYPE (scm_is_true (scm_rational_p (v)), v, pos, FUNC_NAME, "real"); \
1663 } while (0)
1664
1665 /* Templace getters and setters. */
1666
1667 #define IEEE754_ACCESSOR_PROLOGUE(_type) \
1668 INTEGER_ACCESSOR_PROLOGUE (sizeof (_type) << 3UL, signed);
1669
1670 #define IEEE754_REF(_type) \
1671 _type c_result; \
1672 \
1673 IEEE754_ACCESSOR_PROLOGUE (_type); \
1674 SCM_VALIDATE_SYMBOL (3, endianness); \
1675 \
1676 if (scm_is_eq (endianness, scm_i_native_endianness)) \
1677 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1678 else \
1679 { \
1680 IEEE754_UNION (_type) c_raw; \
1681 \
1682 memcpy (&c_raw, &c_bv[c_index], sizeof (c_raw)); \
1683 c_result = \
1684 IEEE754_FROM_FOREIGN_ENDIANNESS (_type) (&c_raw); \
1685 } \
1686 \
1687 return (IEEE754_TO_SCM (_type) (c_result));
1688
1689 #define IEEE754_NATIVE_REF(_type) \
1690 _type c_result; \
1691 \
1692 IEEE754_ACCESSOR_PROLOGUE (_type); \
1693 \
1694 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1695 return (IEEE754_TO_SCM (_type) (c_result));
1696
1697 #define IEEE754_SET(_type) \
1698 _type c_value; \
1699 \
1700 IEEE754_ACCESSOR_PROLOGUE (_type); \
1701 VALIDATE_REAL (3, value); \
1702 SCM_VALIDATE_SYMBOL (4, endianness); \
1703 c_value = IEEE754_FROM_SCM (_type) (value); \
1704 \
1705 if (scm_is_eq (endianness, scm_i_native_endianness)) \
1706 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1707 else \
1708 { \
1709 IEEE754_UNION (_type) c_raw; \
1710 \
1711 IEEE754_TO_FOREIGN_ENDIANNESS (_type) (&c_raw, c_value); \
1712 memcpy (&c_bv[c_index], &c_raw, sizeof (c_raw)); \
1713 } \
1714 \
1715 return SCM_UNSPECIFIED;
1716
1717 #define IEEE754_NATIVE_SET(_type) \
1718 _type c_value; \
1719 \
1720 IEEE754_ACCESSOR_PROLOGUE (_type); \
1721 VALIDATE_REAL (3, value); \
1722 c_value = IEEE754_FROM_SCM (_type) (value); \
1723 \
1724 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1725 return SCM_UNSPECIFIED;
1726
1727
1728 /* Single precision. */
1729
1730 SCM_DEFINE (scm_bytevector_ieee_single_ref,
1731 "bytevector-ieee-single-ref",
1732 3, 0, 0,
1733 (SCM bv, SCM index, SCM endianness),
1734 "Return the IEEE-754 single from @var{bv} at "
1735 "@var{index}.")
1736 #define FUNC_NAME s_scm_bytevector_ieee_single_ref
1737 {
1738 IEEE754_REF (float);
1739 }
1740 #undef FUNC_NAME
1741
1742 SCM_DEFINE (scm_bytevector_ieee_single_native_ref,
1743 "bytevector-ieee-single-native-ref",
1744 2, 0, 0,
1745 (SCM bv, SCM index),
1746 "Return the IEEE-754 single from @var{bv} at "
1747 "@var{index} using the native endianness.")
1748 #define FUNC_NAME s_scm_bytevector_ieee_single_native_ref
1749 {
1750 IEEE754_NATIVE_REF (float);
1751 }
1752 #undef FUNC_NAME
1753
1754 SCM_DEFINE (scm_bytevector_ieee_single_set_x,
1755 "bytevector-ieee-single-set!",
1756 4, 0, 0,
1757 (SCM bv, SCM index, SCM value, SCM endianness),
1758 "Store real @var{value} in @var{bv} at @var{index} according to "
1759 "@var{endianness}.")
1760 #define FUNC_NAME s_scm_bytevector_ieee_single_set_x
1761 {
1762 IEEE754_SET (float);
1763 }
1764 #undef FUNC_NAME
1765
1766 SCM_DEFINE (scm_bytevector_ieee_single_native_set_x,
1767 "bytevector-ieee-single-native-set!",
1768 3, 0, 0,
1769 (SCM bv, SCM index, SCM value),
1770 "Store the real @var{value} at index @var{index} "
1771 "of @var{bv} using the native endianness.")
1772 #define FUNC_NAME s_scm_bytevector_ieee_single_native_set_x
1773 {
1774 IEEE754_NATIVE_SET (float);
1775 }
1776 #undef FUNC_NAME
1777
1778
1779 /* Double precision. */
1780
1781 SCM_DEFINE (scm_bytevector_ieee_double_ref,
1782 "bytevector-ieee-double-ref",
1783 3, 0, 0,
1784 (SCM bv, SCM index, SCM endianness),
1785 "Return the IEEE-754 double from @var{bv} at "
1786 "@var{index}.")
1787 #define FUNC_NAME s_scm_bytevector_ieee_double_ref
1788 {
1789 IEEE754_REF (double);
1790 }
1791 #undef FUNC_NAME
1792
1793 SCM_DEFINE (scm_bytevector_ieee_double_native_ref,
1794 "bytevector-ieee-double-native-ref",
1795 2, 0, 0,
1796 (SCM bv, SCM index),
1797 "Return the IEEE-754 double from @var{bv} at "
1798 "@var{index} using the native endianness.")
1799 #define FUNC_NAME s_scm_bytevector_ieee_double_native_ref
1800 {
1801 IEEE754_NATIVE_REF (double);
1802 }
1803 #undef FUNC_NAME
1804
1805 SCM_DEFINE (scm_bytevector_ieee_double_set_x,
1806 "bytevector-ieee-double-set!",
1807 4, 0, 0,
1808 (SCM bv, SCM index, SCM value, SCM endianness),
1809 "Store real @var{value} in @var{bv} at @var{index} according to "
1810 "@var{endianness}.")
1811 #define FUNC_NAME s_scm_bytevector_ieee_double_set_x
1812 {
1813 IEEE754_SET (double);
1814 }
1815 #undef FUNC_NAME
1816
1817 SCM_DEFINE (scm_bytevector_ieee_double_native_set_x,
1818 "bytevector-ieee-double-native-set!",
1819 3, 0, 0,
1820 (SCM bv, SCM index, SCM value),
1821 "Store the real @var{value} at index @var{index} "
1822 "of @var{bv} using the native endianness.")
1823 #define FUNC_NAME s_scm_bytevector_ieee_double_native_set_x
1824 {
1825 IEEE754_NATIVE_SET (double);
1826 }
1827 #undef FUNC_NAME
1828
1829
1830 #undef IEEE754_UNION
1831 #undef IEEE754_TO_SCM
1832 #undef IEEE754_FROM_SCM
1833 #undef IEEE754_FROM_FOREIGN_ENDIANNESS
1834 #undef IEEE754_TO_FOREIGN_ENDIANNESS
1835 #undef IEEE754_REF
1836 #undef IEEE754_NATIVE_REF
1837 #undef IEEE754_SET
1838 #undef IEEE754_NATIVE_SET
1839
1840 \f
1841 /* Operations on strings. */
1842
1843
1844 /* Produce a function that returns the length of a UTF-encoded string. */
1845 #define UTF_STRLEN_FUNCTION(_utf_width) \
1846 static inline size_t \
1847 utf ## _utf_width ## _strlen (const uint ## _utf_width ## _t *str) \
1848 { \
1849 size_t len = 0; \
1850 const uint ## _utf_width ## _t *ptr; \
1851 for (ptr = str; \
1852 *ptr != 0; \
1853 ptr++) \
1854 { \
1855 len++; \
1856 } \
1857 \
1858 return (len * ((_utf_width) / 8)); \
1859 }
1860
1861 UTF_STRLEN_FUNCTION (8)
1862
1863
1864 /* Return the length (in bytes) of STR, a UTF-(UTF_WIDTH) encoded string. */
1865 #define UTF_STRLEN(_utf_width, _str) \
1866 utf ## _utf_width ## _strlen (_str)
1867
1868 /* Return the "portable" name of the UTF encoding of size UTF_WIDTH and
1869 ENDIANNESS (Gnulib's `iconv_open' module guarantees the portability of the
1870 encoding name). */
1871 static inline void
1872 utf_encoding_name (char *name, size_t utf_width, SCM endianness)
1873 {
1874 strcpy (name, "UTF-");
1875 strcat (name, ((utf_width == 8)
1876 ? "8"
1877 : ((utf_width == 16)
1878 ? "16"
1879 : ((utf_width == 32)
1880 ? "32"
1881 : "??"))));
1882 strcat (name,
1883 ((scm_is_eq (endianness, scm_sym_big))
1884 ? "BE"
1885 : ((scm_is_eq (endianness, scm_sym_little))
1886 ? "LE"
1887 : "unknown")));
1888 }
1889
1890 /* Maximum length of a UTF encoding name. */
1891 #define MAX_UTF_ENCODING_NAME_LEN 16
1892
1893 /* Produce the body of a `string->utf' function. */
1894 #define STRING_TO_UTF(_utf_width) \
1895 SCM utf; \
1896 int err; \
1897 char *c_str; \
1898 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1899 char *c_utf = NULL, *c_locale; \
1900 size_t c_strlen, c_raw_strlen, c_utf_len = 0; \
1901 \
1902 SCM_VALIDATE_STRING (1, str); \
1903 if (endianness == SCM_UNDEFINED) \
1904 endianness = scm_sym_big; \
1905 else \
1906 SCM_VALIDATE_SYMBOL (2, endianness); \
1907 \
1908 c_strlen = scm_c_string_length (str); \
1909 c_raw_strlen = c_strlen * ((_utf_width) / 8); \
1910 do \
1911 { \
1912 c_str = (char *) alloca (c_raw_strlen + 1); \
1913 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen); \
1914 } \
1915 while (c_raw_strlen > c_strlen); \
1916 c_str[c_raw_strlen] = '\0'; \
1917 \
1918 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1919 \
1920 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
1921 strcpy (c_locale, locale_charset ()); \
1922 \
1923 err = mem_iconveh (c_str, c_raw_strlen, \
1924 c_locale, c_utf_name, \
1925 iconveh_question_mark, NULL, \
1926 &c_utf, &c_utf_len); \
1927 if (SCM_UNLIKELY (err)) \
1928 scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \
1929 scm_list_1 (str), err); \
1930 else \
1931 { \
1932 /* C_UTF is null-terminated. It is malloc(3)-allocated, so we cannot \
1933 use `scm_c_take_bytevector ()'. */ \
1934 scm_dynwind_begin (0); \
1935 scm_dynwind_free (c_utf); \
1936 \
1937 utf = make_bytevector (c_utf_len, \
1938 SCM_ARRAY_ELEMENT_TYPE_VU8); \
1939 memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, \
1940 c_utf_len); \
1941 \
1942 scm_dynwind_end (); \
1943 } \
1944 \
1945 return (utf);
1946
1947
1948
1949 SCM_DEFINE (scm_string_to_utf8, "string->utf8",
1950 1, 0, 0,
1951 (SCM str),
1952 "Return a newly allocated bytevector that contains the UTF-8 "
1953 "encoding of @var{str}.")
1954 #define FUNC_NAME s_scm_string_to_utf8
1955 {
1956 SCM utf;
1957 char *c_str;
1958 uint8_t *c_utf;
1959 size_t c_strlen, c_raw_strlen;
1960
1961 SCM_VALIDATE_STRING (1, str);
1962
1963 c_strlen = scm_c_string_length (str);
1964 c_raw_strlen = c_strlen;
1965 do
1966 {
1967 c_str = (char *) alloca (c_raw_strlen + 1);
1968 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen);
1969 }
1970 while (c_raw_strlen > c_strlen);
1971 c_str[c_raw_strlen] = '\0';
1972
1973 c_utf = u8_strconv_from_locale (c_str);
1974 if (SCM_UNLIKELY (c_utf == NULL))
1975 scm_syserror (FUNC_NAME);
1976 else
1977 {
1978 /* C_UTF is null-terminated. It is malloc(3)-allocated, so we cannot
1979 use `scm_c_take_bytevector ()'. */
1980 scm_dynwind_begin (0);
1981 scm_dynwind_free (c_utf);
1982
1983 utf = make_bytevector (UTF_STRLEN (8, c_utf),
1984 SCM_ARRAY_ELEMENT_TYPE_VU8);
1985 memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf,
1986 UTF_STRLEN (8, c_utf));
1987
1988 scm_dynwind_end ();
1989 }
1990
1991 return (utf);
1992 }
1993 #undef FUNC_NAME
1994
1995 SCM_DEFINE (scm_string_to_utf16, "string->utf16",
1996 1, 1, 0,
1997 (SCM str, SCM endianness),
1998 "Return a newly allocated bytevector that contains the UTF-16 "
1999 "encoding of @var{str}.")
2000 #define FUNC_NAME s_scm_string_to_utf16
2001 {
2002 STRING_TO_UTF (16);
2003 }
2004 #undef FUNC_NAME
2005
2006 SCM_DEFINE (scm_string_to_utf32, "string->utf32",
2007 1, 1, 0,
2008 (SCM str, SCM endianness),
2009 "Return a newly allocated bytevector that contains the UTF-32 "
2010 "encoding of @var{str}.")
2011 #define FUNC_NAME s_scm_string_to_utf32
2012 {
2013 STRING_TO_UTF (32);
2014 }
2015 #undef FUNC_NAME
2016
2017
2018 /* Produce the body of a function that converts a UTF-encoded bytevector to a
2019 string. */
2020 #define UTF_TO_STRING(_utf_width) \
2021 SCM str = SCM_BOOL_F; \
2022 int err; \
2023 char *c_str = NULL, *c_locale; \
2024 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
2025 const char *c_utf; \
2026 size_t c_strlen = 0, c_utf_len; \
2027 \
2028 SCM_VALIDATE_BYTEVECTOR (1, utf); \
2029 if (endianness == SCM_UNDEFINED) \
2030 endianness = scm_sym_big; \
2031 else \
2032 SCM_VALIDATE_SYMBOL (2, endianness); \
2033 \
2034 c_utf_len = SCM_BYTEVECTOR_LENGTH (utf); \
2035 c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf); \
2036 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
2037 \
2038 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
2039 strcpy (c_locale, locale_charset ()); \
2040 \
2041 err = mem_iconveh (c_utf, c_utf_len, \
2042 c_utf_name, c_locale, \
2043 iconveh_question_mark, NULL, \
2044 &c_str, &c_strlen); \
2045 if (SCM_UNLIKELY (err)) \
2046 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A", \
2047 scm_list_1 (utf), err); \
2048 else \
2049 /* C_STR is null-terminated. */ \
2050 str = scm_take_locale_stringn (c_str, c_strlen); \
2051 \
2052 return (str);
2053
2054
2055 SCM_DEFINE (scm_utf8_to_string, "utf8->string",
2056 1, 0, 0,
2057 (SCM utf),
2058 "Return a newly allocate string that contains from the UTF-8-"
2059 "encoded contents of bytevector @var{utf}.")
2060 #define FUNC_NAME s_scm_utf8_to_string
2061 {
2062 SCM str;
2063 int err;
2064 char *c_str = NULL, *c_locale;
2065 const char *c_utf;
2066 size_t c_utf_len, c_strlen = 0;
2067
2068 SCM_VALIDATE_BYTEVECTOR (1, utf);
2069
2070 c_utf_len = SCM_BYTEVECTOR_LENGTH (utf);
2071
2072 c_locale = (char *) alloca (strlen (locale_charset ()) + 1);
2073 strcpy (c_locale, locale_charset ());
2074
2075 c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);
2076 err = mem_iconveh (c_utf, c_utf_len,
2077 "UTF-8", c_locale,
2078 iconveh_question_mark, NULL,
2079 &c_str, &c_strlen);
2080 if (SCM_UNLIKELY (err))
2081 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A",
2082 scm_list_1 (utf), err);
2083 else
2084 /* C_STR is null-terminated. */
2085 str = scm_take_locale_stringn (c_str, c_strlen);
2086
2087 return (str);
2088 }
2089 #undef FUNC_NAME
2090
2091 SCM_DEFINE (scm_utf16_to_string, "utf16->string",
2092 1, 1, 0,
2093 (SCM utf, SCM endianness),
2094 "Return a newly allocate string that contains from the UTF-16-"
2095 "encoded contents of bytevector @var{utf}.")
2096 #define FUNC_NAME s_scm_utf16_to_string
2097 {
2098 UTF_TO_STRING (16);
2099 }
2100 #undef FUNC_NAME
2101
2102 SCM_DEFINE (scm_utf32_to_string, "utf32->string",
2103 1, 1, 0,
2104 (SCM utf, SCM endianness),
2105 "Return a newly allocate string that contains from the UTF-32-"
2106 "encoded contents of bytevector @var{utf}.")
2107 #define FUNC_NAME s_scm_utf32_to_string
2108 {
2109 UTF_TO_STRING (32);
2110 }
2111 #undef FUNC_NAME
2112
2113
2114 \f
2115 /* Bytevectors as generalized vectors & arrays. */
2116
2117
2118 static SCM
2119 bytevector_ref_c32 (SCM bv, SCM idx)
2120 { /* FIXME add some checks */
2121 const float *contents = (const float*)SCM_BYTEVECTOR_CONTENTS (bv);
2122 size_t i = scm_to_size_t (idx);
2123 return scm_c_make_rectangular (contents[i/8], contents[i/8 + 1]);
2124 }
2125
2126 static SCM
2127 bytevector_ref_c64 (SCM bv, SCM idx)
2128 { /* FIXME add some checks */
2129 const double *contents = (const double*)SCM_BYTEVECTOR_CONTENTS (bv);
2130 size_t i = scm_to_size_t (idx);
2131 return scm_c_make_rectangular (contents[i/16], contents[i/16 + 1]);
2132 }
2133
2134 typedef SCM (*scm_t_bytevector_ref_fn)(SCM, SCM);
2135
2136 const scm_t_bytevector_ref_fn bytevector_ref_fns[SCM_ARRAY_ELEMENT_TYPE_LAST + 1] =
2137 {
2138 NULL, /* SCM */
2139 NULL, /* CHAR */
2140 NULL, /* BIT */
2141 scm_bytevector_u8_ref, /* VU8 */
2142 scm_bytevector_u8_ref, /* U8 */
2143 scm_bytevector_s8_ref,
2144 scm_bytevector_u16_native_ref,
2145 scm_bytevector_s16_native_ref,
2146 scm_bytevector_u32_native_ref,
2147 scm_bytevector_s32_native_ref,
2148 scm_bytevector_u64_native_ref,
2149 scm_bytevector_s64_native_ref,
2150 scm_bytevector_ieee_single_native_ref,
2151 scm_bytevector_ieee_double_native_ref,
2152 bytevector_ref_c32,
2153 bytevector_ref_c64
2154 };
2155
2156 static SCM
2157 bv_handle_ref (scm_t_array_handle *h, size_t index)
2158 {
2159 SCM byte_index;
2160 scm_t_bytevector_ref_fn ref_fn;
2161
2162 ref_fn = bytevector_ref_fns[h->element_type];
2163 byte_index =
2164 scm_from_size_t (index * scm_array_handle_uniform_element_size (h));
2165 return ref_fn (h->array, byte_index);
2166 }
2167
2168 static SCM
2169 bytevector_set_c32 (SCM bv, SCM idx, SCM val)
2170 { /* checks are unnecessary here */
2171 float *contents = (float*)SCM_BYTEVECTOR_CONTENTS (bv);
2172 size_t i = scm_to_size_t (idx);
2173 contents[i/8] = scm_c_real_part (val);
2174 contents[i/8 + 1] = scm_c_imag_part (val);
2175 return SCM_UNSPECIFIED;
2176 }
2177
2178 static SCM
2179 bytevector_set_c64 (SCM bv, SCM idx, SCM val)
2180 { /* checks are unnecessary here */
2181 double *contents = (double*)SCM_BYTEVECTOR_CONTENTS (bv);
2182 size_t i = scm_to_size_t (idx);
2183 contents[i/16] = scm_c_real_part (val);
2184 contents[i/16 + 1] = scm_c_imag_part (val);
2185 return SCM_UNSPECIFIED;
2186 }
2187
2188 typedef SCM (*scm_t_bytevector_set_fn)(SCM, SCM, SCM);
2189
2190 const scm_t_bytevector_set_fn bytevector_set_fns[SCM_ARRAY_ELEMENT_TYPE_LAST + 1] =
2191 {
2192 NULL, /* SCM */
2193 NULL, /* CHAR */
2194 NULL, /* BIT */
2195 scm_bytevector_u8_set_x, /* VU8 */
2196 scm_bytevector_u8_set_x, /* U8 */
2197 scm_bytevector_s8_set_x,
2198 scm_bytevector_u16_native_set_x,
2199 scm_bytevector_s16_native_set_x,
2200 scm_bytevector_u32_native_set_x,
2201 scm_bytevector_s32_native_set_x,
2202 scm_bytevector_u64_native_set_x,
2203 scm_bytevector_s64_native_set_x,
2204 scm_bytevector_ieee_single_native_set_x,
2205 scm_bytevector_ieee_double_native_set_x,
2206 bytevector_set_c32,
2207 bytevector_set_c64
2208 };
2209
2210 static void
2211 bv_handle_set_x (scm_t_array_handle *h, size_t index, SCM val)
2212 {
2213 SCM byte_index;
2214 scm_t_bytevector_set_fn set_fn;
2215
2216 set_fn = bytevector_set_fns[h->element_type];
2217 byte_index =
2218 scm_from_size_t (index * scm_array_handle_uniform_element_size (h));
2219 set_fn (h->array, byte_index, val);
2220 }
2221
2222 static void
2223 bytevector_get_handle (SCM v, scm_t_array_handle *h)
2224 {
2225 h->array = v;
2226 h->ndims = 1;
2227 h->dims = &h->dim0;
2228 h->dim0.lbnd = 0;
2229 h->dim0.ubnd = SCM_BYTEVECTOR_TYPED_LENGTH (v) - 1;
2230 h->dim0.inc = 1;
2231 h->element_type = SCM_BYTEVECTOR_ELEMENT_TYPE (v);
2232 h->elements = h->writable_elements = SCM_BYTEVECTOR_CONTENTS (v);
2233 }
2234
2235 \f
2236 /* Initialization. */
2237
2238 void
2239 scm_bootstrap_bytevectors (void)
2240 {
2241 /* This must be instantiated here because the generalized-vector API may
2242 want to access bytevectors even though `(rnrs bytevector)' hasn't been
2243 loaded. */
2244 scm_null_bytevector =
2245 scm_gc_protect_object
2246 (make_bytevector_from_buffer (0, NULL, SCM_ARRAY_ELEMENT_TYPE_VU8));
2247
2248 #ifdef WORDS_BIGENDIAN
2249 scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("big"));
2250 #else
2251 scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("little"));
2252 #endif
2253
2254 scm_c_register_extension ("libguile", "scm_init_bytevectors",
2255 (scm_t_extension_init_func) scm_init_bytevectors,
2256 NULL);
2257
2258 {
2259 scm_t_array_implementation impl;
2260
2261 impl.tag = scm_tc7_bytevector;
2262 impl.mask = 0x7f;
2263 impl.vref = bv_handle_ref;
2264 impl.vset = bv_handle_set_x;
2265 impl.get_handle = bytevector_get_handle;
2266 scm_i_register_array_implementation (&impl);
2267 scm_i_register_vector_constructor
2268 (scm_i_array_element_types[SCM_ARRAY_ELEMENT_TYPE_VU8],
2269 scm_make_bytevector);
2270 }
2271 }
2272
2273 void
2274 scm_init_bytevectors (void)
2275 {
2276 #include "libguile/bytevectors.x"
2277
2278 scm_endianness_big = scm_sym_big;
2279 scm_endianness_little = scm_sym_little;
2280 }