generic vector ops to own file
[bpt/guile.git] / libguile / bytevectors.c
CommitLineData
1ee2c72e
LC
1/* Copyright (C) 2009 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
1ee2c72e 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
1ee2c72e
LC
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
1ee2c72e
LC
17 */
18
19
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
23
24#include <alloca.h>
25
26#include <gmp.h>
27
28#include "libguile/_scm.h"
cfb4702f 29#include "libguile/extensions.h"
1ee2c72e
LC
30#include "libguile/bytevectors.h"
31#include "libguile/strings.h"
32#include "libguile/validate.h"
33#include "libguile/ieee-754.h"
2fa901a5 34#include "libguile/arrays.h"
2a610be5 35#include "libguile/array-handle.h"
782a82ee 36#include "libguile/srfi-4.h"
1ee2c72e
LC
37
38#include <byteswap.h>
39#include <striconveh.h>
40#include <uniconv.h>
41
42#ifdef HAVE_LIMITS_H
43# include <limits.h>
44#else
45/* Assuming 32-bit longs. */
46# define ULONG_MAX 4294967295UL
47#endif
48
49#include <string.h>
50
51
52\f
53/* Utilities. */
54
55/* Convenience macros. These are used by the various templates (macros) that
56 are parameterized by integer signedness. */
57#define INT8_T_signed scm_t_int8
58#define INT8_T_unsigned scm_t_uint8
59#define INT16_T_signed scm_t_int16
60#define INT16_T_unsigned scm_t_uint16
61#define INT32_T_signed scm_t_int32
62#define INT32_T_unsigned scm_t_uint32
63#define is_signed_int8(_x) (((_x) >= -128L) && ((_x) <= 127L))
64#define is_unsigned_int8(_x) ((_x) <= 255UL)
65#define is_signed_int16(_x) (((_x) >= -32768L) && ((_x) <= 32767L))
66#define is_unsigned_int16(_x) ((_x) <= 65535UL)
67#define is_signed_int32(_x) (((_x) >= -2147483648L) && ((_x) <= 2147483647L))
68#define is_unsigned_int32(_x) ((_x) <= 4294967295UL)
69#define SIGNEDNESS_signed 1
70#define SIGNEDNESS_unsigned 0
71
72#define INT_TYPE(_size, _sign) INT ## _size ## _T_ ## _sign
73#define INT_SWAP(_size) bswap_ ## _size
74#define INT_VALID_P(_size, _sign) is_ ## _sign ## _int ## _size
75#define SIGNEDNESS(_sign) SIGNEDNESS_ ## _sign
76
77
78#define INTEGER_ACCESSOR_PROLOGUE(_len, _sign) \
2d34e924 79 size_t c_len, c_index; \
1ee2c72e
LC
80 _sign char *c_bv; \
81 \
82 SCM_VALIDATE_BYTEVECTOR (1, bv); \
83 c_index = scm_to_uint (index); \
84 \
85 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
86 c_bv = (_sign char *) SCM_BYTEVECTOR_CONTENTS (bv); \
87 \
88 if (SCM_UNLIKELY (c_index + ((_len) >> 3UL) - 1 >= c_len)) \
89 scm_out_of_range (FUNC_NAME, index);
90
91/* Template for fixed-size integer access (only 8, 16 or 32-bit). */
caa92f5e
AW
92#define INTEGER_REF(_len, _sign) \
93 SCM result; \
94 \
95 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
96 SCM_VALIDATE_SYMBOL (3, endianness); \
97 \
98 { \
99 INT_TYPE (_len, _sign) c_result; \
100 \
101 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
102 if (!scm_is_eq (endianness, scm_i_native_endianness)) \
103 c_result = INT_SWAP (_len) (c_result); \
104 \
105 result = SCM_I_MAKINUM (c_result); \
106 } \
107 \
1ee2c72e
LC
108 return result;
109
110/* Template for fixed-size integer access using the native endianness. */
111#define INTEGER_NATIVE_REF(_len, _sign) \
112 SCM result; \
113 \
114 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
115 \
116 { \
117 INT_TYPE (_len, _sign) c_result; \
118 \
119 memcpy (&c_result, &c_bv[c_index], (_len) / 8); \
120 result = SCM_I_MAKINUM (c_result); \
121 } \
122 \
123 return result;
124
125/* Template for fixed-size integer modification (only 8, 16 or 32-bit). */
126#define INTEGER_SET(_len, _sign) \
127 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
128 SCM_VALIDATE_SYMBOL (3, endianness); \
129 \
130 { \
131 _sign long c_value; \
132 INT_TYPE (_len, _sign) c_value_short; \
133 \
134 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
135 scm_wrong_type_arg (FUNC_NAME, 3, value); \
136 \
137 c_value = SCM_I_INUM (value); \
138 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
139 scm_out_of_range (FUNC_NAME, value); \
140 \
141 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
caa92f5e 142 if (!scm_is_eq (endianness, scm_i_native_endianness)) \
1ee2c72e
LC
143 c_value_short = INT_SWAP (_len) (c_value_short); \
144 \
145 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
146 } \
147 \
148 return SCM_UNSPECIFIED;
149
150/* Template for fixed-size integer modification using the native
151 endianness. */
152#define INTEGER_NATIVE_SET(_len, _sign) \
153 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
154 \
155 { \
156 _sign long c_value; \
157 INT_TYPE (_len, _sign) c_value_short; \
158 \
159 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
160 scm_wrong_type_arg (FUNC_NAME, 3, value); \
161 \
162 c_value = SCM_I_INUM (value); \
163 if (SCM_UNLIKELY (!INT_VALID_P (_len, _sign) (c_value))) \
164 scm_out_of_range (FUNC_NAME, value); \
165 \
166 c_value_short = (INT_TYPE (_len, _sign)) c_value; \
167 \
168 memcpy (&c_bv[c_index], &c_value_short, (_len) / 8); \
169 } \
170 \
171 return SCM_UNSPECIFIED;
172
173
174\f
175/* Bytevector type. */
176
cfb4702f 177scm_t_bits scm_tc16_bytevector;
1ee2c72e
LC
178
179#define SCM_BYTEVECTOR_SET_LENGTH(_bv, _len) \
180 SCM_SET_SMOB_DATA ((_bv), (scm_t_bits) (_len))
181#define SCM_BYTEVECTOR_SET_CONTENTS(_bv, _buf) \
182 SCM_SET_SMOB_DATA_2 ((_bv), (scm_t_bits) (_buf))
183
184/* The empty bytevector. */
185SCM scm_null_bytevector = SCM_UNSPECIFIED;
186
187
188static inline SCM
2d34e924 189make_bytevector_from_buffer (size_t len, signed char *contents)
1ee2c72e
LC
190{
191 /* Assuming LEN > SCM_BYTEVECTOR_INLINE_THRESHOLD. */
192 SCM_RETURN_NEWSMOB2 (scm_tc16_bytevector, len, contents);
193}
194
195static inline SCM
2d34e924 196make_bytevector (size_t len)
1ee2c72e
LC
197{
198 SCM bv;
199
200 if (SCM_UNLIKELY (len == 0))
201 bv = scm_null_bytevector;
202 else
203 {
204 signed char *contents = NULL;
205
206 if (!SCM_BYTEVECTOR_INLINEABLE_SIZE_P (len))
207 contents = (signed char *) scm_gc_malloc (len, SCM_GC_BYTEVECTOR);
208
209 bv = make_bytevector_from_buffer (len, contents);
210 }
211
212 return bv;
213}
214
215/* Return a new bytevector of size LEN octets. */
216SCM
2d34e924 217scm_c_make_bytevector (size_t len)
1ee2c72e
LC
218{
219 return (make_bytevector (len));
220}
221
222/* Return a bytevector of size LEN made up of CONTENTS. The area pointed to
223 by CONTENTS must have been allocated using `scm_gc_malloc ()'. */
224SCM
2d34e924 225scm_c_take_bytevector (signed char *contents, size_t len)
1ee2c72e
LC
226{
227 SCM bv;
228
229 if (SCM_UNLIKELY (SCM_BYTEVECTOR_INLINEABLE_SIZE_P (len)))
230 {
231 /* Copy CONTENTS into an "in-line" buffer, then free CONTENTS. */
232 signed char *c_bv;
233
234 bv = make_bytevector (len);
235 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
236 memcpy (c_bv, contents, len);
237 scm_gc_free (contents, len, SCM_GC_BYTEVECTOR);
238 }
239 else
240 bv = make_bytevector_from_buffer (len, contents);
241
242 return bv;
243}
244
245/* Shrink BV to C_NEW_LEN (which is assumed to be smaller than its current
246 size) and return BV. */
247SCM
2d34e924 248scm_i_shrink_bytevector (SCM bv, size_t c_new_len)
1ee2c72e
LC
249{
250 if (!SCM_BYTEVECTOR_INLINE_P (bv))
251 {
2d34e924 252 size_t c_len;
1ee2c72e
LC
253 signed char *c_bv, *c_new_bv;
254
255 c_len = SCM_BYTEVECTOR_LENGTH (bv);
256 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
257
258 SCM_BYTEVECTOR_SET_LENGTH (bv, c_new_len);
259
260 if (SCM_BYTEVECTOR_INLINEABLE_SIZE_P (c_new_len))
261 {
262 /* Copy to the in-line buffer and free the current buffer. */
263 c_new_bv = SCM_BYTEVECTOR_CONTENTS (bv);
264 memcpy (c_new_bv, c_bv, c_new_len);
265 scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
266 }
267 else
268 {
269 /* Resize the existing buffer. */
270 c_new_bv = scm_gc_realloc (c_bv, c_len, c_new_len,
271 SCM_GC_BYTEVECTOR);
272 SCM_BYTEVECTOR_SET_CONTENTS (bv, c_new_bv);
273 }
274 }
275
276 return bv;
277}
278
404bb5f8
LC
279int
280scm_is_bytevector (SCM obj)
281{
282 return SCM_SMOB_PREDICATE (scm_tc16_bytevector, obj);
283}
284
285size_t
286scm_c_bytevector_length (SCM bv)
287#define FUNC_NAME "scm_c_bytevector_length"
288{
289 SCM_VALIDATE_BYTEVECTOR (1, bv);
290
291 return SCM_BYTEVECTOR_LENGTH (bv);
292}
293#undef FUNC_NAME
294
295scm_t_uint8
296scm_c_bytevector_ref (SCM bv, size_t index)
297#define FUNC_NAME "scm_c_bytevector_ref"
298{
299 size_t c_len;
300 const scm_t_uint8 *c_bv;
301
302 SCM_VALIDATE_BYTEVECTOR (1, bv);
303
304 c_len = SCM_BYTEVECTOR_LENGTH (bv);
305 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
306
307 if (SCM_UNLIKELY (index >= c_len))
308 scm_out_of_range (FUNC_NAME, scm_from_size_t (index));
309
310 return c_bv[index];
311}
312#undef FUNC_NAME
313
314void
315scm_c_bytevector_set_x (SCM bv, size_t index, scm_t_uint8 value)
316#define FUNC_NAME "scm_c_bytevector_set_x"
317{
318 size_t c_len;
319 scm_t_uint8 *c_bv;
320
321 SCM_VALIDATE_BYTEVECTOR (1, bv);
322
323 c_len = SCM_BYTEVECTOR_LENGTH (bv);
324 c_bv = (scm_t_uint8 *) SCM_BYTEVECTOR_CONTENTS (bv);
325
326 if (SCM_UNLIKELY (index >= c_len))
327 scm_out_of_range (FUNC_NAME, scm_from_size_t (index));
328
329 c_bv[index] = value;
330}
331#undef FUNC_NAME
332
438974d0
LC
333/* This procedure is used by `scm_c_generalized_vector_set_x ()'. */
334void
335scm_i_bytevector_generalized_set_x (SCM bv, size_t index, SCM value)
336#define FUNC_NAME "scm_i_bytevector_generalized_set_x"
337{
338 scm_c_bytevector_set_x (bv, index, scm_to_uint8 (value));
339}
340#undef FUNC_NAME
341
cfb4702f
LC
342static int
343print_bytevector (SCM bv, SCM port, scm_print_state *pstate)
1ee2c72e
LC
344{
345 unsigned c_len, i;
346 unsigned char *c_bv;
347
348 c_len = SCM_BYTEVECTOR_LENGTH (bv);
349 c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
350
351 scm_puts ("#vu8(", port);
352 for (i = 0; i < c_len; i++)
353 {
354 if (i > 0)
355 scm_putc (' ', port);
356
357 scm_uintprint (c_bv[i], 10, port);
358 }
359
360 scm_putc (')', port);
361
362 /* Make GCC think we use it. */
363 scm_remember_upto_here ((SCM) pstate);
364
365 return 1;
366}
367
cfb4702f
LC
368static SCM
369bytevector_equal_p (SCM bv1, SCM bv2)
55bf8cb7
LC
370{
371 return scm_bytevector_eq_p (bv1, bv2);
372}
373
cfb4702f
LC
374static size_t
375free_bytevector (SCM bv)
1ee2c72e
LC
376{
377
378 if (!SCM_BYTEVECTOR_INLINE_P (bv))
379 {
380 unsigned c_len;
381 signed char *c_bv;
382
383 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
384 c_len = SCM_BYTEVECTOR_LENGTH (bv);
385
386 scm_gc_free (c_bv, c_len, SCM_GC_BYTEVECTOR);
387 }
388
389 return 0;
390}
391
392
393\f
394/* General operations. */
395
396SCM_SYMBOL (scm_sym_big, "big");
397SCM_SYMBOL (scm_sym_little, "little");
398
399SCM scm_endianness_big, scm_endianness_little;
400
401/* Host endianness (a symbol). */
caa92f5e 402SCM scm_i_native_endianness = SCM_UNSPECIFIED;
1ee2c72e
LC
403
404/* Byte-swapping. */
405#ifndef bswap_24
406# define bswap_24(_x) \
407 ((((_x) & 0xff0000) >> 16) | \
408 (((_x) & 0x00ff00)) | \
409 (((_x) & 0x0000ff) << 16))
410#endif
411
412
413SCM_DEFINE (scm_native_endianness, "native-endianness", 0, 0, 0,
414 (void),
415 "Return a symbol denoting the machine's native endianness.")
416#define FUNC_NAME s_scm_native_endianness
417{
caa92f5e 418 return scm_i_native_endianness;
1ee2c72e
LC
419}
420#undef FUNC_NAME
421
422SCM_DEFINE (scm_bytevector_p, "bytevector?", 1, 0, 0,
423 (SCM obj),
424 "Return true if @var{obj} is a bytevector.")
425#define FUNC_NAME s_scm_bytevector_p
426{
404bb5f8 427 return scm_from_bool (scm_is_bytevector (obj));
1ee2c72e
LC
428}
429#undef FUNC_NAME
430
431SCM_DEFINE (scm_make_bytevector, "make-bytevector", 1, 1, 0,
432 (SCM len, SCM fill),
433 "Return a newly allocated bytevector of @var{len} bytes, "
434 "optionally filled with @var{fill}.")
435#define FUNC_NAME s_scm_make_bytevector
436{
437 SCM bv;
438 unsigned c_len;
439 signed char c_fill = '\0';
440
441 SCM_VALIDATE_UINT_COPY (1, len, c_len);
442 if (fill != SCM_UNDEFINED)
443 {
444 int value;
445
446 value = scm_to_int (fill);
447 if (SCM_UNLIKELY ((value < -128) || (value > 255)))
448 scm_out_of_range (FUNC_NAME, fill);
449 c_fill = (signed char) value;
450 }
451
452 bv = make_bytevector (c_len);
453 if (fill != SCM_UNDEFINED)
454 {
455 unsigned i;
456 signed char *contents;
457
458 contents = SCM_BYTEVECTOR_CONTENTS (bv);
459 for (i = 0; i < c_len; i++)
460 contents[i] = c_fill;
461 }
462
463 return bv;
464}
465#undef FUNC_NAME
466
467SCM_DEFINE (scm_bytevector_length, "bytevector-length", 1, 0, 0,
468 (SCM bv),
469 "Return the length (in bytes) of @var{bv}.")
470#define FUNC_NAME s_scm_bytevector_length
471{
404bb5f8 472 return scm_from_uint (scm_c_bytevector_length (bv));
1ee2c72e
LC
473}
474#undef FUNC_NAME
475
476SCM_DEFINE (scm_bytevector_eq_p, "bytevector=?", 2, 0, 0,
477 (SCM bv1, SCM bv2),
478 "Return is @var{bv1} equals to @var{bv2}---i.e., if they "
479 "have the same length and contents.")
480#define FUNC_NAME s_scm_bytevector_eq_p
481{
482 SCM result = SCM_BOOL_F;
483 unsigned c_len1, c_len2;
484
485 SCM_VALIDATE_BYTEVECTOR (1, bv1);
486 SCM_VALIDATE_BYTEVECTOR (2, bv2);
487
488 c_len1 = SCM_BYTEVECTOR_LENGTH (bv1);
489 c_len2 = SCM_BYTEVECTOR_LENGTH (bv2);
490
491 if (c_len1 == c_len2)
492 {
493 signed char *c_bv1, *c_bv2;
494
495 c_bv1 = SCM_BYTEVECTOR_CONTENTS (bv1);
496 c_bv2 = SCM_BYTEVECTOR_CONTENTS (bv2);
497
498 result = scm_from_bool (!memcmp (c_bv1, c_bv2, c_len1));
499 }
500
501 return result;
502}
503#undef FUNC_NAME
504
505SCM_DEFINE (scm_bytevector_fill_x, "bytevector-fill!", 2, 0, 0,
506 (SCM bv, SCM fill),
507 "Fill bytevector @var{bv} with @var{fill}, a byte.")
508#define FUNC_NAME s_scm_bytevector_fill_x
509{
510 unsigned c_len, i;
511 signed char *c_bv, c_fill;
512
513 SCM_VALIDATE_BYTEVECTOR (1, bv);
514 c_fill = scm_to_int8 (fill);
515
516 c_len = SCM_BYTEVECTOR_LENGTH (bv);
517 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
518
519 for (i = 0; i < c_len; i++)
520 c_bv[i] = c_fill;
521
522 return SCM_UNSPECIFIED;
523}
524#undef FUNC_NAME
525
526SCM_DEFINE (scm_bytevector_copy_x, "bytevector-copy!", 5, 0, 0,
527 (SCM source, SCM source_start, SCM target, SCM target_start,
528 SCM len),
529 "Copy @var{len} bytes from @var{source} into @var{target}, "
530 "starting reading from @var{source_start} (a positive index "
531 "within @var{source}) and start writing at "
532 "@var{target_start}.")
533#define FUNC_NAME s_scm_bytevector_copy_x
534{
535 unsigned c_len, c_source_len, c_target_len;
536 unsigned c_source_start, c_target_start;
537 signed char *c_source, *c_target;
538
539 SCM_VALIDATE_BYTEVECTOR (1, source);
540 SCM_VALIDATE_BYTEVECTOR (3, target);
541
542 c_len = scm_to_uint (len);
543 c_source_start = scm_to_uint (source_start);
544 c_target_start = scm_to_uint (target_start);
545
546 c_source = SCM_BYTEVECTOR_CONTENTS (source);
547 c_target = SCM_BYTEVECTOR_CONTENTS (target);
548 c_source_len = SCM_BYTEVECTOR_LENGTH (source);
549 c_target_len = SCM_BYTEVECTOR_LENGTH (target);
550
551 if (SCM_UNLIKELY (c_source_start + c_len > c_source_len))
552 scm_out_of_range (FUNC_NAME, source_start);
553 if (SCM_UNLIKELY (c_target_start + c_len > c_target_len))
554 scm_out_of_range (FUNC_NAME, target_start);
555
556 memcpy (c_target + c_target_start,
557 c_source + c_source_start,
558 c_len);
559
560 return SCM_UNSPECIFIED;
561}
562#undef FUNC_NAME
563
564SCM_DEFINE (scm_bytevector_copy, "bytevector-copy", 1, 0, 0,
565 (SCM bv),
566 "Return a newly allocated copy of @var{bv}.")
567#define FUNC_NAME s_scm_bytevector_copy
568{
569 SCM copy;
570 unsigned c_len;
571 signed char *c_bv, *c_copy;
572
573 SCM_VALIDATE_BYTEVECTOR (1, bv);
574
575 c_len = SCM_BYTEVECTOR_LENGTH (bv);
576 c_bv = SCM_BYTEVECTOR_CONTENTS (bv);
577
578 copy = make_bytevector (c_len);
579 c_copy = SCM_BYTEVECTOR_CONTENTS (copy);
580 memcpy (c_copy, c_bv, c_len);
581
582 return copy;
583}
584#undef FUNC_NAME
585
782a82ee
AW
586SCM_DEFINE (scm_uniform_array_to_bytevector, "uniform-array->bytevector",
587 1, 0, 0, (SCM array),
588 "Return a newly allocated bytevector whose contents\n"
589 "will be copied from the uniform array @var{array}.")
590#define FUNC_NAME s_scm_uniform_array_to_bytevector
591{
592 SCM contents, ret;
593 size_t len;
594 scm_t_array_handle h;
595 const void *base;
596 size_t sz;
597
598 contents = scm_array_contents (array, SCM_BOOL_T);
599 if (scm_is_false (contents))
600 scm_wrong_type_arg_msg (FUNC_NAME, 0, array, "uniform contiguous array");
601
602 scm_array_get_handle (contents, &h);
603
604 base = scm_array_handle_uniform_elements (&h);
605 len = h.dims->inc * (h.dims->ubnd - h.dims->lbnd + 1);
606 sz = scm_array_handle_uniform_element_size (&h);
607
608 ret = make_bytevector (len * sz);
609 memcpy (SCM_BYTEVECTOR_CONTENTS (ret), base, len * sz);
610
611 scm_array_handle_release (&h);
612
613 return ret;
614}
615#undef FUNC_NAME
616
1ee2c72e
LC
617\f
618/* Operations on bytes and octets. */
619
620SCM_DEFINE (scm_bytevector_u8_ref, "bytevector-u8-ref", 2, 0, 0,
621 (SCM bv, SCM index),
622 "Return the octet located at @var{index} in @var{bv}.")
623#define FUNC_NAME s_scm_bytevector_u8_ref
624{
625 INTEGER_NATIVE_REF (8, unsigned);
626}
627#undef FUNC_NAME
628
629SCM_DEFINE (scm_bytevector_s8_ref, "bytevector-s8-ref", 2, 0, 0,
630 (SCM bv, SCM index),
631 "Return the byte located at @var{index} in @var{bv}.")
632#define FUNC_NAME s_scm_bytevector_s8_ref
633{
634 INTEGER_NATIVE_REF (8, signed);
635}
636#undef FUNC_NAME
637
638SCM_DEFINE (scm_bytevector_u8_set_x, "bytevector-u8-set!", 3, 0, 0,
639 (SCM bv, SCM index, SCM value),
640 "Return the octet located at @var{index} in @var{bv}.")
641#define FUNC_NAME s_scm_bytevector_u8_set_x
642{
643 INTEGER_NATIVE_SET (8, unsigned);
644}
645#undef FUNC_NAME
646
647SCM_DEFINE (scm_bytevector_s8_set_x, "bytevector-s8-set!", 3, 0, 0,
648 (SCM bv, SCM index, SCM value),
649 "Return the octet located at @var{index} in @var{bv}.")
cabf1b31 650#define FUNC_NAME s_scm_bytevector_s8_set_x
1ee2c72e
LC
651{
652 INTEGER_NATIVE_SET (8, signed);
653}
654#undef FUNC_NAME
655
656#undef OCTET_ACCESSOR_PROLOGUE
657
658
659SCM_DEFINE (scm_bytevector_to_u8_list, "bytevector->u8-list", 1, 0, 0,
660 (SCM bv),
661 "Return a newly allocated list of octets containing the "
662 "contents of @var{bv}.")
663#define FUNC_NAME s_scm_bytevector_to_u8_list
664{
665 SCM lst, pair;
666 unsigned c_len, i;
667 unsigned char *c_bv;
668
669 SCM_VALIDATE_BYTEVECTOR (1, bv);
670
671 c_len = SCM_BYTEVECTOR_LENGTH (bv);
672 c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
673
674 lst = scm_make_list (scm_from_uint (c_len), SCM_UNSPECIFIED);
675 for (i = 0, pair = lst;
676 i < c_len;
677 i++, pair = SCM_CDR (pair))
678 {
679 SCM_SETCAR (pair, SCM_I_MAKINUM (c_bv[i]));
680 }
681
682 return lst;
683}
684#undef FUNC_NAME
685
686SCM_DEFINE (scm_u8_list_to_bytevector, "u8-list->bytevector", 1, 0, 0,
687 (SCM lst),
688 "Turn @var{lst}, a list of octets, into a bytevector.")
689#define FUNC_NAME s_scm_u8_list_to_bytevector
690{
691 SCM bv, item;
692 long c_len, i;
693 unsigned char *c_bv;
694
695 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len);
696
697 bv = make_bytevector (c_len);
698 c_bv = (unsigned char *) SCM_BYTEVECTOR_CONTENTS (bv);
699
700 for (i = 0; i < c_len; lst = SCM_CDR (lst), i++)
701 {
702 item = SCM_CAR (lst);
703
704 if (SCM_LIKELY (SCM_I_INUMP (item)))
705 {
706 long c_item;
707
708 c_item = SCM_I_INUM (item);
709 if (SCM_LIKELY ((c_item >= 0) && (c_item < 256)))
710 c_bv[i] = (unsigned char) c_item;
711 else
712 goto type_error;
713 }
714 else
715 goto type_error;
716 }
717
718 return bv;
719
720 type_error:
721 scm_wrong_type_arg (FUNC_NAME, 1, item);
722
723 return SCM_BOOL_F;
724}
725#undef FUNC_NAME
726
727/* Compute the two's complement of VALUE (a positive integer) on SIZE octets
728 using (2^(SIZE * 8) - VALUE). */
729static inline void
730twos_complement (mpz_t value, size_t size)
731{
732 unsigned long bit_count;
733
734 /* We expect BIT_COUNT to fit in a unsigned long thanks to the range
735 checking on SIZE performed earlier. */
736 bit_count = (unsigned long) size << 3UL;
737
738 if (SCM_LIKELY (bit_count < sizeof (unsigned long)))
739 mpz_ui_sub (value, 1UL << bit_count, value);
740 else
741 {
742 mpz_t max;
743
744 mpz_init (max);
745 mpz_ui_pow_ui (max, 2, bit_count);
746 mpz_sub (value, max, value);
747 mpz_clear (max);
748 }
749}
750
751static inline SCM
752bytevector_large_ref (const char *c_bv, size_t c_size, int signed_p,
753 SCM endianness)
754{
755 SCM result;
756 mpz_t c_mpz;
757 int c_endianness, negative_p = 0;
758
759 if (signed_p)
760 {
761 if (scm_is_eq (endianness, scm_sym_big))
762 negative_p = c_bv[0] & 0x80;
763 else
764 negative_p = c_bv[c_size - 1] & 0x80;
765 }
766
767 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
768
769 mpz_init (c_mpz);
770 mpz_import (c_mpz, 1 /* 1 word */, 1 /* word order doesn't matter */,
771 c_size /* word is C_SIZE-byte long */,
772 c_endianness,
773 0 /* nails */, c_bv);
774
775 if (signed_p && negative_p)
776 {
777 twos_complement (c_mpz, c_size);
778 mpz_neg (c_mpz, c_mpz);
779 }
780
781 result = scm_from_mpz (c_mpz);
782 mpz_clear (c_mpz); /* FIXME: Needed? */
783
784 return result;
785}
786
787static inline int
788bytevector_large_set (char *c_bv, size_t c_size, int signed_p,
789 SCM value, SCM endianness)
790{
791 mpz_t c_mpz;
792 int c_endianness, c_sign, err = 0;
793
794 c_endianness = scm_is_eq (endianness, scm_sym_big) ? 1 : -1;
795
796 mpz_init (c_mpz);
797 scm_to_mpz (value, c_mpz);
798
799 c_sign = mpz_sgn (c_mpz);
800 if (c_sign < 0)
801 {
802 if (SCM_LIKELY (signed_p))
803 {
804 mpz_neg (c_mpz, c_mpz);
805 twos_complement (c_mpz, c_size);
806 }
807 else
808 {
809 err = -1;
810 goto finish;
811 }
812 }
813
814 if (c_sign == 0)
815 /* Zero. */
816 memset (c_bv, 0, c_size);
817 else
818 {
819 size_t word_count, value_size;
820
821 value_size = (mpz_sizeinbase (c_mpz, 2) + (8 * c_size)) / (8 * c_size);
822 if (SCM_UNLIKELY (value_size > c_size))
823 {
824 err = -2;
825 goto finish;
826 }
827
828
829 mpz_export (c_bv, &word_count, 1 /* word order doesn't matter */,
830 c_size, c_endianness,
831 0 /* nails */, c_mpz);
832 if (SCM_UNLIKELY (word_count != 1))
833 /* Shouldn't happen since we already checked with VALUE_SIZE. */
834 abort ();
835 }
836
837 finish:
838 mpz_clear (c_mpz);
839
840 return err;
841}
842
843#define GENERIC_INTEGER_ACCESSOR_PROLOGUE(_sign) \
844 unsigned long c_len, c_index, c_size; \
845 char *c_bv; \
846 \
847 SCM_VALIDATE_BYTEVECTOR (1, bv); \
848 c_index = scm_to_ulong (index); \
849 c_size = scm_to_ulong (size); \
850 \
851 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
852 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
853 \
854 /* C_SIZE must have its 3 higher bits set to zero so that \
855 multiplying it by 8 yields a number that fits in an \
856 unsigned long. */ \
857 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
858 scm_out_of_range (FUNC_NAME, size); \
859 if (SCM_UNLIKELY (c_index + c_size > c_len)) \
860 scm_out_of_range (FUNC_NAME, index);
861
862
863/* Template of an integer reference function. */
864#define GENERIC_INTEGER_REF(_sign) \
865 SCM result; \
866 \
867 if (c_size < 3) \
868 { \
869 int swap; \
870 _sign int value; \
871 \
caa92f5e 872 swap = !scm_is_eq (endianness, scm_i_native_endianness); \
1ee2c72e
LC
873 switch (c_size) \
874 { \
875 case 1: \
876 { \
877 _sign char c_value8; \
878 memcpy (&c_value8, c_bv, 1); \
879 value = c_value8; \
880 } \
881 break; \
882 case 2: \
883 { \
884 INT_TYPE (16, _sign) c_value16; \
885 memcpy (&c_value16, c_bv, 2); \
886 if (swap) \
887 value = (INT_TYPE (16, _sign)) bswap_16 (c_value16); \
888 else \
889 value = c_value16; \
890 } \
891 break; \
892 default: \
893 abort (); \
894 } \
895 \
896 result = SCM_I_MAKINUM ((_sign int) value); \
897 } \
898 else \
899 result = bytevector_large_ref ((char *) c_bv, \
900 c_size, SIGNEDNESS (_sign), \
901 endianness); \
902 \
903 return result;
904
905static inline SCM
906bytevector_signed_ref (const char *c_bv, size_t c_size, SCM endianness)
907{
908 GENERIC_INTEGER_REF (signed);
909}
910
911static inline SCM
912bytevector_unsigned_ref (const char *c_bv, size_t c_size, SCM endianness)
913{
914 GENERIC_INTEGER_REF (unsigned);
915}
916
917
918/* Template of an integer assignment function. */
919#define GENERIC_INTEGER_SET(_sign) \
920 if (c_size < 3) \
921 { \
922 _sign int c_value; \
923 \
924 if (SCM_UNLIKELY (!SCM_I_INUMP (value))) \
925 goto range_error; \
926 \
927 c_value = SCM_I_INUM (value); \
928 switch (c_size) \
929 { \
930 case 1: \
931 if (SCM_LIKELY (INT_VALID_P (8, _sign) (c_value))) \
932 { \
933 _sign char c_value8; \
934 c_value8 = (_sign char) c_value; \
935 memcpy (c_bv, &c_value8, 1); \
936 } \
937 else \
938 goto range_error; \
939 break; \
940 \
941 case 2: \
942 if (SCM_LIKELY (INT_VALID_P (16, _sign) (c_value))) \
943 { \
944 int swap; \
945 INT_TYPE (16, _sign) c_value16; \
946 \
caa92f5e 947 swap = !scm_is_eq (endianness, scm_i_native_endianness); \
1ee2c72e
LC
948 \
949 if (swap) \
950 c_value16 = (INT_TYPE (16, _sign)) bswap_16 (c_value); \
951 else \
952 c_value16 = c_value; \
953 \
954 memcpy (c_bv, &c_value16, 2); \
955 } \
956 else \
957 goto range_error; \
958 break; \
959 \
960 default: \
961 abort (); \
962 } \
963 } \
964 else \
965 { \
966 int err; \
967 \
968 err = bytevector_large_set (c_bv, c_size, \
969 SIGNEDNESS (_sign), \
970 value, endianness); \
971 if (err) \
972 goto range_error; \
973 } \
974 \
975 return; \
976 \
977 range_error: \
978 scm_out_of_range (FUNC_NAME, value); \
979 return;
980
981static inline void
982bytevector_signed_set (char *c_bv, size_t c_size,
983 SCM value, SCM endianness,
984 const char *func_name)
985#define FUNC_NAME func_name
986{
987 GENERIC_INTEGER_SET (signed);
988}
989#undef FUNC_NAME
990
991static inline void
992bytevector_unsigned_set (char *c_bv, size_t c_size,
993 SCM value, SCM endianness,
994 const char *func_name)
995#define FUNC_NAME func_name
996{
997 GENERIC_INTEGER_SET (unsigned);
998}
999#undef FUNC_NAME
1000
1001#undef GENERIC_INTEGER_SET
1002#undef GENERIC_INTEGER_REF
1003
1004
1005SCM_DEFINE (scm_bytevector_uint_ref, "bytevector-uint-ref", 4, 0, 0,
1006 (SCM bv, SCM index, SCM endianness, SCM size),
1007 "Return the @var{size}-octet long unsigned integer at index "
1008 "@var{index} in @var{bv}.")
1009#define FUNC_NAME s_scm_bytevector_uint_ref
1010{
1011 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
1012
1013 return (bytevector_unsigned_ref (&c_bv[c_index], c_size, endianness));
1014}
1015#undef FUNC_NAME
1016
1017SCM_DEFINE (scm_bytevector_sint_ref, "bytevector-sint-ref", 4, 0, 0,
1018 (SCM bv, SCM index, SCM endianness, SCM size),
1019 "Return the @var{size}-octet long unsigned integer at index "
1020 "@var{index} in @var{bv}.")
1021#define FUNC_NAME s_scm_bytevector_sint_ref
1022{
1023 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
1024
1025 return (bytevector_signed_ref (&c_bv[c_index], c_size, endianness));
1026}
1027#undef FUNC_NAME
1028
1029SCM_DEFINE (scm_bytevector_uint_set_x, "bytevector-uint-set!", 5, 0, 0,
1030 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
1031 "Set the @var{size}-octet long unsigned integer at @var{index} "
1032 "to @var{value}.")
1033#define FUNC_NAME s_scm_bytevector_uint_set_x
1034{
1035 GENERIC_INTEGER_ACCESSOR_PROLOGUE (unsigned);
1036
1037 bytevector_unsigned_set (&c_bv[c_index], c_size, value, endianness,
1038 FUNC_NAME);
1039
1040 return SCM_UNSPECIFIED;
1041}
1042#undef FUNC_NAME
1043
1044SCM_DEFINE (scm_bytevector_sint_set_x, "bytevector-sint-set!", 5, 0, 0,
1045 (SCM bv, SCM index, SCM value, SCM endianness, SCM size),
1046 "Set the @var{size}-octet long signed integer at @var{index} "
1047 "to @var{value}.")
1048#define FUNC_NAME s_scm_bytevector_sint_set_x
1049{
1050 GENERIC_INTEGER_ACCESSOR_PROLOGUE (signed);
1051
1052 bytevector_signed_set (&c_bv[c_index], c_size, value, endianness,
1053 FUNC_NAME);
1054
1055 return SCM_UNSPECIFIED;
1056}
1057#undef FUNC_NAME
1058
1059
1060\f
1061/* Operations on integers of arbitrary size. */
1062
1063#define INTEGERS_TO_LIST(_sign) \
1064 SCM lst, pair; \
1065 size_t i, c_len, c_size; \
1066 \
1067 SCM_VALIDATE_BYTEVECTOR (1, bv); \
1068 SCM_VALIDATE_SYMBOL (2, endianness); \
1069 c_size = scm_to_uint (size); \
1070 \
1071 c_len = SCM_BYTEVECTOR_LENGTH (bv); \
1072 if (SCM_UNLIKELY (c_len == 0)) \
1073 lst = SCM_EOL; \
1074 else if (SCM_UNLIKELY (c_len < c_size)) \
1075 scm_out_of_range (FUNC_NAME, size); \
1076 else \
1077 { \
1078 const char *c_bv; \
1079 \
1080 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
1081 \
1082 lst = scm_make_list (scm_from_uint (c_len / c_size), \
1083 SCM_UNSPECIFIED); \
1084 for (i = 0, pair = lst; \
1085 i <= c_len - c_size; \
1086 i += c_size, c_bv += c_size, pair = SCM_CDR (pair)) \
1087 { \
1088 SCM_SETCAR (pair, \
1089 bytevector_ ## _sign ## _ref (c_bv, c_size, \
1090 endianness)); \
1091 } \
1092 } \
1093 \
1094 return lst;
1095
1096SCM_DEFINE (scm_bytevector_to_sint_list, "bytevector->sint-list",
1097 3, 0, 0,
1098 (SCM bv, SCM endianness, SCM size),
1099 "Return a list of signed integers of @var{size} octets "
1100 "representing the contents of @var{bv}.")
1101#define FUNC_NAME s_scm_bytevector_to_sint_list
1102{
1103 INTEGERS_TO_LIST (signed);
1104}
1105#undef FUNC_NAME
1106
1107SCM_DEFINE (scm_bytevector_to_uint_list, "bytevector->uint-list",
1108 3, 0, 0,
1109 (SCM bv, SCM endianness, SCM size),
1110 "Return a list of unsigned integers of @var{size} octets "
1111 "representing the contents of @var{bv}.")
1112#define FUNC_NAME s_scm_bytevector_to_uint_list
1113{
1114 INTEGERS_TO_LIST (unsigned);
1115}
1116#undef FUNC_NAME
1117
1118#undef INTEGER_TO_LIST
1119
1120
1121#define INTEGER_LIST_TO_BYTEVECTOR(_sign) \
1122 SCM bv; \
1123 long c_len; \
1124 size_t c_size; \
1125 char *c_bv, *c_bv_ptr; \
1126 \
1127 SCM_VALIDATE_LIST_COPYLEN (1, lst, c_len); \
1128 SCM_VALIDATE_SYMBOL (2, endianness); \
1129 c_size = scm_to_uint (size); \
1130 \
1131 if (SCM_UNLIKELY ((c_size == 0) || (c_size >= (ULONG_MAX >> 3L)))) \
1132 scm_out_of_range (FUNC_NAME, size); \
1133 \
1134 bv = make_bytevector (c_len * c_size); \
1135 c_bv = (char *) SCM_BYTEVECTOR_CONTENTS (bv); \
1136 \
1137 for (c_bv_ptr = c_bv; \
1138 !scm_is_null (lst); \
1139 lst = SCM_CDR (lst), c_bv_ptr += c_size) \
1140 { \
1141 bytevector_ ## _sign ## _set (c_bv_ptr, c_size, \
1142 SCM_CAR (lst), endianness, \
1143 FUNC_NAME); \
1144 } \
1145 \
1146 return bv;
1147
1148
1149SCM_DEFINE (scm_uint_list_to_bytevector, "uint-list->bytevector",
1150 3, 0, 0,
1151 (SCM lst, SCM endianness, SCM size),
1152 "Return a bytevector containing the unsigned integers "
1153 "listed in @var{lst} and encoded on @var{size} octets "
1154 "according to @var{endianness}.")
1155#define FUNC_NAME s_scm_uint_list_to_bytevector
1156{
1157 INTEGER_LIST_TO_BYTEVECTOR (unsigned);
1158}
1159#undef FUNC_NAME
1160
1161SCM_DEFINE (scm_sint_list_to_bytevector, "sint-list->bytevector",
1162 3, 0, 0,
1163 (SCM lst, SCM endianness, SCM size),
1164 "Return a bytevector containing the signed integers "
1165 "listed in @var{lst} and encoded on @var{size} octets "
1166 "according to @var{endianness}.")
1167#define FUNC_NAME s_scm_sint_list_to_bytevector
1168{
1169 INTEGER_LIST_TO_BYTEVECTOR (signed);
1170}
1171#undef FUNC_NAME
1172
1173#undef INTEGER_LIST_TO_BYTEVECTOR
1174
1175
1176\f
1177/* Operations on 16-bit integers. */
1178
1179SCM_DEFINE (scm_bytevector_u16_ref, "bytevector-u16-ref",
1180 3, 0, 0,
1181 (SCM bv, SCM index, SCM endianness),
1182 "Return the unsigned 16-bit integer from @var{bv} at "
1183 "@var{index}.")
1184#define FUNC_NAME s_scm_bytevector_u16_ref
1185{
1186 INTEGER_REF (16, unsigned);
1187}
1188#undef FUNC_NAME
1189
1190SCM_DEFINE (scm_bytevector_s16_ref, "bytevector-s16-ref",
1191 3, 0, 0,
1192 (SCM bv, SCM index, SCM endianness),
1193 "Return the signed 16-bit integer from @var{bv} at "
1194 "@var{index}.")
1195#define FUNC_NAME s_scm_bytevector_s16_ref
1196{
1197 INTEGER_REF (16, signed);
1198}
1199#undef FUNC_NAME
1200
1201SCM_DEFINE (scm_bytevector_u16_native_ref, "bytevector-u16-native-ref",
1202 2, 0, 0,
1203 (SCM bv, SCM index),
1204 "Return the unsigned 16-bit integer from @var{bv} at "
1205 "@var{index} using the native endianness.")
1206#define FUNC_NAME s_scm_bytevector_u16_native_ref
1207{
1208 INTEGER_NATIVE_REF (16, unsigned);
1209}
1210#undef FUNC_NAME
1211
1212SCM_DEFINE (scm_bytevector_s16_native_ref, "bytevector-s16-native-ref",
1213 2, 0, 0,
1214 (SCM bv, SCM index),
1215 "Return the unsigned 16-bit integer from @var{bv} at "
1216 "@var{index} using the native endianness.")
1217#define FUNC_NAME s_scm_bytevector_s16_native_ref
1218{
1219 INTEGER_NATIVE_REF (16, signed);
1220}
1221#undef FUNC_NAME
1222
1223SCM_DEFINE (scm_bytevector_u16_set_x, "bytevector-u16-set!",
1224 4, 0, 0,
1225 (SCM bv, SCM index, SCM value, SCM endianness),
1226 "Store @var{value} in @var{bv} at @var{index} according to "
1227 "@var{endianness}.")
1228#define FUNC_NAME s_scm_bytevector_u16_set_x
1229{
1230 INTEGER_SET (16, unsigned);
1231}
1232#undef FUNC_NAME
1233
1234SCM_DEFINE (scm_bytevector_s16_set_x, "bytevector-s16-set!",
1235 4, 0, 0,
1236 (SCM bv, SCM index, SCM value, SCM endianness),
1237 "Store @var{value} in @var{bv} at @var{index} according to "
1238 "@var{endianness}.")
1239#define FUNC_NAME s_scm_bytevector_s16_set_x
1240{
1241 INTEGER_SET (16, signed);
1242}
1243#undef FUNC_NAME
1244
1245SCM_DEFINE (scm_bytevector_u16_native_set_x, "bytevector-u16-native-set!",
1246 3, 0, 0,
1247 (SCM bv, SCM index, SCM value),
1248 "Store the unsigned integer @var{value} at index @var{index} "
1249 "of @var{bv} using the native endianness.")
1250#define FUNC_NAME s_scm_bytevector_u16_native_set_x
1251{
1252 INTEGER_NATIVE_SET (16, unsigned);
1253}
1254#undef FUNC_NAME
1255
1256SCM_DEFINE (scm_bytevector_s16_native_set_x, "bytevector-s16-native-set!",
1257 3, 0, 0,
1258 (SCM bv, SCM index, SCM value),
1259 "Store the signed integer @var{value} at index @var{index} "
1260 "of @var{bv} using the native endianness.")
1261#define FUNC_NAME s_scm_bytevector_s16_native_set_x
1262{
1263 INTEGER_NATIVE_SET (16, signed);
1264}
1265#undef FUNC_NAME
1266
1267
1268\f
1269/* Operations on 32-bit integers. */
1270
1271/* Unfortunately, on 32-bit machines `SCM' is not large enough to hold
1272 arbitrary 32-bit integers. Thus we fall back to using the
1273 `large_{ref,set}' variants on 32-bit machines. */
1274
1275#define LARGE_INTEGER_REF(_len, _sign) \
1276 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1277 SCM_VALIDATE_SYMBOL (3, endianness); \
1278 \
1279 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
1280 SIGNEDNESS (_sign), endianness));
1281
1282#define LARGE_INTEGER_SET(_len, _sign) \
1283 int err; \
1284 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1285 SCM_VALIDATE_SYMBOL (4, endianness); \
1286 \
1287 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1288 SIGNEDNESS (_sign), value, endianness); \
1289 if (SCM_UNLIKELY (err)) \
1290 scm_out_of_range (FUNC_NAME, value); \
1291 \
1292 return SCM_UNSPECIFIED;
1293
1294#define LARGE_INTEGER_NATIVE_REF(_len, _sign) \
1295 INTEGER_ACCESSOR_PROLOGUE(_len, _sign); \
1296 return (bytevector_large_ref ((char *) c_bv + c_index, _len / 8, \
caa92f5e 1297 SIGNEDNESS (_sign), scm_i_native_endianness));
1ee2c72e
LC
1298
1299#define LARGE_INTEGER_NATIVE_SET(_len, _sign) \
1300 int err; \
1301 INTEGER_ACCESSOR_PROLOGUE (_len, _sign); \
1302 \
1303 err = bytevector_large_set ((char *) c_bv + c_index, _len / 8, \
1304 SIGNEDNESS (_sign), value, \
caa92f5e 1305 scm_i_native_endianness); \
1ee2c72e
LC
1306 if (SCM_UNLIKELY (err)) \
1307 scm_out_of_range (FUNC_NAME, value); \
1308 \
1309 return SCM_UNSPECIFIED;
1310
1311
1312SCM_DEFINE (scm_bytevector_u32_ref, "bytevector-u32-ref",
1313 3, 0, 0,
1314 (SCM bv, SCM index, SCM endianness),
1315 "Return the unsigned 32-bit integer from @var{bv} at "
1316 "@var{index}.")
1317#define FUNC_NAME s_scm_bytevector_u32_ref
1318{
1319#if SIZEOF_VOID_P > 4
1320 INTEGER_REF (32, unsigned);
1321#else
1322 LARGE_INTEGER_REF (32, unsigned);
1323#endif
1324}
1325#undef FUNC_NAME
1326
1327SCM_DEFINE (scm_bytevector_s32_ref, "bytevector-s32-ref",
1328 3, 0, 0,
1329 (SCM bv, SCM index, SCM endianness),
1330 "Return the signed 32-bit integer from @var{bv} at "
1331 "@var{index}.")
1332#define FUNC_NAME s_scm_bytevector_s32_ref
1333{
1334#if SIZEOF_VOID_P > 4
1335 INTEGER_REF (32, signed);
1336#else
1337 LARGE_INTEGER_REF (32, signed);
1338#endif
1339}
1340#undef FUNC_NAME
1341
1342SCM_DEFINE (scm_bytevector_u32_native_ref, "bytevector-u32-native-ref",
1343 2, 0, 0,
1344 (SCM bv, SCM index),
1345 "Return the unsigned 32-bit integer from @var{bv} at "
1346 "@var{index} using the native endianness.")
1347#define FUNC_NAME s_scm_bytevector_u32_native_ref
1348{
1349#if SIZEOF_VOID_P > 4
1350 INTEGER_NATIVE_REF (32, unsigned);
1351#else
1352 LARGE_INTEGER_NATIVE_REF (32, unsigned);
1353#endif
1354}
1355#undef FUNC_NAME
1356
1357SCM_DEFINE (scm_bytevector_s32_native_ref, "bytevector-s32-native-ref",
1358 2, 0, 0,
1359 (SCM bv, SCM index),
1360 "Return the unsigned 32-bit integer from @var{bv} at "
1361 "@var{index} using the native endianness.")
1362#define FUNC_NAME s_scm_bytevector_s32_native_ref
1363{
1364#if SIZEOF_VOID_P > 4
1365 INTEGER_NATIVE_REF (32, signed);
1366#else
1367 LARGE_INTEGER_NATIVE_REF (32, signed);
1368#endif
1369}
1370#undef FUNC_NAME
1371
1372SCM_DEFINE (scm_bytevector_u32_set_x, "bytevector-u32-set!",
1373 4, 0, 0,
1374 (SCM bv, SCM index, SCM value, SCM endianness),
1375 "Store @var{value} in @var{bv} at @var{index} according to "
1376 "@var{endianness}.")
1377#define FUNC_NAME s_scm_bytevector_u32_set_x
1378{
1379#if SIZEOF_VOID_P > 4
1380 INTEGER_SET (32, unsigned);
1381#else
1382 LARGE_INTEGER_SET (32, unsigned);
1383#endif
1384}
1385#undef FUNC_NAME
1386
1387SCM_DEFINE (scm_bytevector_s32_set_x, "bytevector-s32-set!",
1388 4, 0, 0,
1389 (SCM bv, SCM index, SCM value, SCM endianness),
1390 "Store @var{value} in @var{bv} at @var{index} according to "
1391 "@var{endianness}.")
1392#define FUNC_NAME s_scm_bytevector_s32_set_x
1393{
1394#if SIZEOF_VOID_P > 4
1395 INTEGER_SET (32, signed);
1396#else
1397 LARGE_INTEGER_SET (32, signed);
1398#endif
1399}
1400#undef FUNC_NAME
1401
1402SCM_DEFINE (scm_bytevector_u32_native_set_x, "bytevector-u32-native-set!",
1403 3, 0, 0,
1404 (SCM bv, SCM index, SCM value),
1405 "Store the unsigned integer @var{value} at index @var{index} "
1406 "of @var{bv} using the native endianness.")
1407#define FUNC_NAME s_scm_bytevector_u32_native_set_x
1408{
1409#if SIZEOF_VOID_P > 4
1410 INTEGER_NATIVE_SET (32, unsigned);
1411#else
1412 LARGE_INTEGER_NATIVE_SET (32, unsigned);
1413#endif
1414}
1415#undef FUNC_NAME
1416
1417SCM_DEFINE (scm_bytevector_s32_native_set_x, "bytevector-s32-native-set!",
1418 3, 0, 0,
1419 (SCM bv, SCM index, SCM value),
1420 "Store the signed integer @var{value} at index @var{index} "
1421 "of @var{bv} using the native endianness.")
1422#define FUNC_NAME s_scm_bytevector_s32_native_set_x
1423{
1424#if SIZEOF_VOID_P > 4
1425 INTEGER_NATIVE_SET (32, signed);
1426#else
1427 LARGE_INTEGER_NATIVE_SET (32, signed);
1428#endif
1429}
1430#undef FUNC_NAME
1431
1432
1433\f
1434/* Operations on 64-bit integers. */
1435
1436/* For 64-bit integers, we use only the `large_{ref,set}' variant. */
1437
1438SCM_DEFINE (scm_bytevector_u64_ref, "bytevector-u64-ref",
1439 3, 0, 0,
1440 (SCM bv, SCM index, SCM endianness),
1441 "Return the unsigned 64-bit integer from @var{bv} at "
1442 "@var{index}.")
1443#define FUNC_NAME s_scm_bytevector_u64_ref
1444{
1445 LARGE_INTEGER_REF (64, unsigned);
1446}
1447#undef FUNC_NAME
1448
1449SCM_DEFINE (scm_bytevector_s64_ref, "bytevector-s64-ref",
1450 3, 0, 0,
1451 (SCM bv, SCM index, SCM endianness),
1452 "Return the signed 64-bit integer from @var{bv} at "
1453 "@var{index}.")
1454#define FUNC_NAME s_scm_bytevector_s64_ref
1455{
1456 LARGE_INTEGER_REF (64, signed);
1457}
1458#undef FUNC_NAME
1459
1460SCM_DEFINE (scm_bytevector_u64_native_ref, "bytevector-u64-native-ref",
1461 2, 0, 0,
1462 (SCM bv, SCM index),
1463 "Return the unsigned 64-bit integer from @var{bv} at "
1464 "@var{index} using the native endianness.")
1465#define FUNC_NAME s_scm_bytevector_u64_native_ref
1466{
1467 LARGE_INTEGER_NATIVE_REF (64, unsigned);
1468}
1469#undef FUNC_NAME
1470
1471SCM_DEFINE (scm_bytevector_s64_native_ref, "bytevector-s64-native-ref",
1472 2, 0, 0,
1473 (SCM bv, SCM index),
1474 "Return the unsigned 64-bit integer from @var{bv} at "
1475 "@var{index} using the native endianness.")
1476#define FUNC_NAME s_scm_bytevector_s64_native_ref
1477{
1478 LARGE_INTEGER_NATIVE_REF (64, signed);
1479}
1480#undef FUNC_NAME
1481
1482SCM_DEFINE (scm_bytevector_u64_set_x, "bytevector-u64-set!",
1483 4, 0, 0,
1484 (SCM bv, SCM index, SCM value, SCM endianness),
1485 "Store @var{value} in @var{bv} at @var{index} according to "
1486 "@var{endianness}.")
1487#define FUNC_NAME s_scm_bytevector_u64_set_x
1488{
1489 LARGE_INTEGER_SET (64, unsigned);
1490}
1491#undef FUNC_NAME
1492
1493SCM_DEFINE (scm_bytevector_s64_set_x, "bytevector-s64-set!",
1494 4, 0, 0,
1495 (SCM bv, SCM index, SCM value, SCM endianness),
1496 "Store @var{value} in @var{bv} at @var{index} according to "
1497 "@var{endianness}.")
1498#define FUNC_NAME s_scm_bytevector_s64_set_x
1499{
1500 LARGE_INTEGER_SET (64, signed);
1501}
1502#undef FUNC_NAME
1503
1504SCM_DEFINE (scm_bytevector_u64_native_set_x, "bytevector-u64-native-set!",
1505 3, 0, 0,
1506 (SCM bv, SCM index, SCM value),
1507 "Store the unsigned integer @var{value} at index @var{index} "
1508 "of @var{bv} using the native endianness.")
1509#define FUNC_NAME s_scm_bytevector_u64_native_set_x
1510{
1511 LARGE_INTEGER_NATIVE_SET (64, unsigned);
1512}
1513#undef FUNC_NAME
1514
1515SCM_DEFINE (scm_bytevector_s64_native_set_x, "bytevector-s64-native-set!",
1516 3, 0, 0,
1517 (SCM bv, SCM index, SCM value),
1518 "Store the signed integer @var{value} at index @var{index} "
1519 "of @var{bv} using the native endianness.")
1520#define FUNC_NAME s_scm_bytevector_s64_native_set_x
1521{
1522 LARGE_INTEGER_NATIVE_SET (64, signed);
1523}
1524#undef FUNC_NAME
1525
1526
1527\f
1528/* Operations on IEEE-754 numbers. */
1529
1530/* There are two possible word endians, visible in glibc's <ieee754.h>.
1531 However, in R6RS, when the endianness is `little', little endian is
1532 assumed for both the byte order and the word order. This is clear from
1533 Section 2.1 of R6RS-lib (in response to
1534 http://www.r6rs.org/formal-comments/comment-187.txt). */
1535
1536
1537/* Convert to/from a floating-point number with different endianness. This
1538 method is probably not the most efficient but it should be portable. */
1539
1540static inline void
1541float_to_foreign_endianness (union scm_ieee754_float *target,
1542 float source)
1543{
1544 union scm_ieee754_float src;
1545
1546 src.f = source;
1547
1548#ifdef WORDS_BIGENDIAN
1549 /* Assuming little endian for both byte and word order. */
1550 target->little_endian.negative = src.big_endian.negative;
1551 target->little_endian.exponent = src.big_endian.exponent;
1552 target->little_endian.mantissa = src.big_endian.mantissa;
1553#else
1554 target->big_endian.negative = src.little_endian.negative;
1555 target->big_endian.exponent = src.little_endian.exponent;
1556 target->big_endian.mantissa = src.little_endian.mantissa;
1557#endif
1558}
1559
1560static inline float
1561float_from_foreign_endianness (const union scm_ieee754_float *source)
1562{
1563 union scm_ieee754_float result;
1564
1565#ifdef WORDS_BIGENDIAN
1566 /* Assuming little endian for both byte and word order. */
1567 result.big_endian.negative = source->little_endian.negative;
1568 result.big_endian.exponent = source->little_endian.exponent;
1569 result.big_endian.mantissa = source->little_endian.mantissa;
1570#else
1571 result.little_endian.negative = source->big_endian.negative;
1572 result.little_endian.exponent = source->big_endian.exponent;
1573 result.little_endian.mantissa = source->big_endian.mantissa;
1574#endif
1575
1576 return (result.f);
1577}
1578
1579static inline void
1580double_to_foreign_endianness (union scm_ieee754_double *target,
1581 double source)
1582{
1583 union scm_ieee754_double src;
1584
1585 src.d = source;
1586
1587#ifdef WORDS_BIGENDIAN
1588 /* Assuming little endian for both byte and word order. */
1589 target->little_little_endian.negative = src.big_endian.negative;
1590 target->little_little_endian.exponent = src.big_endian.exponent;
1591 target->little_little_endian.mantissa0 = src.big_endian.mantissa0;
1592 target->little_little_endian.mantissa1 = src.big_endian.mantissa1;
1593#else
1594 target->big_endian.negative = src.little_little_endian.negative;
1595 target->big_endian.exponent = src.little_little_endian.exponent;
1596 target->big_endian.mantissa0 = src.little_little_endian.mantissa0;
1597 target->big_endian.mantissa1 = src.little_little_endian.mantissa1;
1598#endif
1599}
1600
1601static inline double
1602double_from_foreign_endianness (const union scm_ieee754_double *source)
1603{
1604 union scm_ieee754_double result;
1605
1606#ifdef WORDS_BIGENDIAN
1607 /* Assuming little endian for both byte and word order. */
1608 result.big_endian.negative = source->little_little_endian.negative;
1609 result.big_endian.exponent = source->little_little_endian.exponent;
1610 result.big_endian.mantissa0 = source->little_little_endian.mantissa0;
1611 result.big_endian.mantissa1 = source->little_little_endian.mantissa1;
1612#else
1613 result.little_little_endian.negative = source->big_endian.negative;
1614 result.little_little_endian.exponent = source->big_endian.exponent;
1615 result.little_little_endian.mantissa0 = source->big_endian.mantissa0;
1616 result.little_little_endian.mantissa1 = source->big_endian.mantissa1;
1617#endif
1618
1619 return (result.d);
1620}
1621
1622/* Template macros to abstract over doubles and floats.
1623 XXX: Guile can only convert to/from doubles. */
1624#define IEEE754_UNION(_c_type) union scm_ieee754_ ## _c_type
1625#define IEEE754_TO_SCM(_c_type) scm_from_double
1626#define IEEE754_FROM_SCM(_c_type) scm_to_double
1627#define IEEE754_FROM_FOREIGN_ENDIANNESS(_c_type) \
1628 _c_type ## _from_foreign_endianness
1629#define IEEE754_TO_FOREIGN_ENDIANNESS(_c_type) \
1630 _c_type ## _to_foreign_endianness
1631
1632
1633/* Templace getters and setters. */
1634
1635#define IEEE754_ACCESSOR_PROLOGUE(_type) \
1636 INTEGER_ACCESSOR_PROLOGUE (sizeof (_type) << 3UL, signed);
1637
1638#define IEEE754_REF(_type) \
1639 _type c_result; \
1640 \
1641 IEEE754_ACCESSOR_PROLOGUE (_type); \
1642 SCM_VALIDATE_SYMBOL (3, endianness); \
1643 \
caa92f5e 1644 if (scm_is_eq (endianness, scm_i_native_endianness)) \
1ee2c72e
LC
1645 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1646 else \
1647 { \
1648 IEEE754_UNION (_type) c_raw; \
1649 \
1650 memcpy (&c_raw, &c_bv[c_index], sizeof (c_raw)); \
1651 c_result = \
1652 IEEE754_FROM_FOREIGN_ENDIANNESS (_type) (&c_raw); \
1653 } \
1654 \
1655 return (IEEE754_TO_SCM (_type) (c_result));
1656
1657#define IEEE754_NATIVE_REF(_type) \
1658 _type c_result; \
1659 \
1660 IEEE754_ACCESSOR_PROLOGUE (_type); \
1661 \
1662 memcpy (&c_result, &c_bv[c_index], sizeof (c_result)); \
1663 return (IEEE754_TO_SCM (_type) (c_result));
1664
1665#define IEEE754_SET(_type) \
1666 _type c_value; \
1667 \
1668 IEEE754_ACCESSOR_PROLOGUE (_type); \
1669 SCM_VALIDATE_REAL (3, value); \
1670 SCM_VALIDATE_SYMBOL (4, endianness); \
1671 c_value = IEEE754_FROM_SCM (_type) (value); \
1672 \
caa92f5e 1673 if (scm_is_eq (endianness, scm_i_native_endianness)) \
1ee2c72e
LC
1674 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1675 else \
1676 { \
1677 IEEE754_UNION (_type) c_raw; \
1678 \
1679 IEEE754_TO_FOREIGN_ENDIANNESS (_type) (&c_raw, c_value); \
1680 memcpy (&c_bv[c_index], &c_raw, sizeof (c_raw)); \
1681 } \
1682 \
1683 return SCM_UNSPECIFIED;
1684
1685#define IEEE754_NATIVE_SET(_type) \
1686 _type c_value; \
1687 \
1688 IEEE754_ACCESSOR_PROLOGUE (_type); \
1689 SCM_VALIDATE_REAL (3, value); \
1690 c_value = IEEE754_FROM_SCM (_type) (value); \
1691 \
1692 memcpy (&c_bv[c_index], &c_value, sizeof (c_value)); \
1693 return SCM_UNSPECIFIED;
1694
1695
1696/* Single precision. */
1697
1698SCM_DEFINE (scm_bytevector_ieee_single_ref,
1699 "bytevector-ieee-single-ref",
1700 3, 0, 0,
1701 (SCM bv, SCM index, SCM endianness),
1702 "Return the IEEE-754 single from @var{bv} at "
1703 "@var{index}.")
1704#define FUNC_NAME s_scm_bytevector_ieee_single_ref
1705{
1706 IEEE754_REF (float);
1707}
1708#undef FUNC_NAME
1709
1710SCM_DEFINE (scm_bytevector_ieee_single_native_ref,
1711 "bytevector-ieee-single-native-ref",
1712 2, 0, 0,
1713 (SCM bv, SCM index),
1714 "Return the IEEE-754 single from @var{bv} at "
1715 "@var{index} using the native endianness.")
1716#define FUNC_NAME s_scm_bytevector_ieee_single_native_ref
1717{
1718 IEEE754_NATIVE_REF (float);
1719}
1720#undef FUNC_NAME
1721
1722SCM_DEFINE (scm_bytevector_ieee_single_set_x,
1723 "bytevector-ieee-single-set!",
1724 4, 0, 0,
1725 (SCM bv, SCM index, SCM value, SCM endianness),
1726 "Store real @var{value} in @var{bv} at @var{index} according to "
1727 "@var{endianness}.")
1728#define FUNC_NAME s_scm_bytevector_ieee_single_set_x
1729{
1730 IEEE754_SET (float);
1731}
1732#undef FUNC_NAME
1733
1734SCM_DEFINE (scm_bytevector_ieee_single_native_set_x,
1735 "bytevector-ieee-single-native-set!",
1736 3, 0, 0,
1737 (SCM bv, SCM index, SCM value),
1738 "Store the real @var{value} at index @var{index} "
1739 "of @var{bv} using the native endianness.")
1740#define FUNC_NAME s_scm_bytevector_ieee_single_native_set_x
1741{
1742 IEEE754_NATIVE_SET (float);
1743}
1744#undef FUNC_NAME
1745
1746
1747/* Double precision. */
1748
1749SCM_DEFINE (scm_bytevector_ieee_double_ref,
1750 "bytevector-ieee-double-ref",
1751 3, 0, 0,
1752 (SCM bv, SCM index, SCM endianness),
1753 "Return the IEEE-754 double from @var{bv} at "
1754 "@var{index}.")
1755#define FUNC_NAME s_scm_bytevector_ieee_double_ref
1756{
1757 IEEE754_REF (double);
1758}
1759#undef FUNC_NAME
1760
1761SCM_DEFINE (scm_bytevector_ieee_double_native_ref,
1762 "bytevector-ieee-double-native-ref",
1763 2, 0, 0,
1764 (SCM bv, SCM index),
1765 "Return the IEEE-754 double from @var{bv} at "
1766 "@var{index} using the native endianness.")
1767#define FUNC_NAME s_scm_bytevector_ieee_double_native_ref
1768{
1769 IEEE754_NATIVE_REF (double);
1770}
1771#undef FUNC_NAME
1772
1773SCM_DEFINE (scm_bytevector_ieee_double_set_x,
1774 "bytevector-ieee-double-set!",
1775 4, 0, 0,
1776 (SCM bv, SCM index, SCM value, SCM endianness),
1777 "Store real @var{value} in @var{bv} at @var{index} according to "
1778 "@var{endianness}.")
1779#define FUNC_NAME s_scm_bytevector_ieee_double_set_x
1780{
1781 IEEE754_SET (double);
1782}
1783#undef FUNC_NAME
1784
1785SCM_DEFINE (scm_bytevector_ieee_double_native_set_x,
1786 "bytevector-ieee-double-native-set!",
1787 3, 0, 0,
1788 (SCM bv, SCM index, SCM value),
1789 "Store the real @var{value} at index @var{index} "
1790 "of @var{bv} using the native endianness.")
1791#define FUNC_NAME s_scm_bytevector_ieee_double_native_set_x
1792{
1793 IEEE754_NATIVE_SET (double);
1794}
1795#undef FUNC_NAME
1796
1797
1798#undef IEEE754_UNION
1799#undef IEEE754_TO_SCM
1800#undef IEEE754_FROM_SCM
1801#undef IEEE754_FROM_FOREIGN_ENDIANNESS
1802#undef IEEE754_TO_FOREIGN_ENDIANNESS
1803#undef IEEE754_REF
1804#undef IEEE754_NATIVE_REF
1805#undef IEEE754_SET
1806#undef IEEE754_NATIVE_SET
1807
1808\f
1809/* Operations on strings. */
1810
1811
1812/* Produce a function that returns the length of a UTF-encoded string. */
1813#define UTF_STRLEN_FUNCTION(_utf_width) \
1814static inline size_t \
1815utf ## _utf_width ## _strlen (const uint ## _utf_width ## _t *str) \
1816{ \
1817 size_t len = 0; \
1818 const uint ## _utf_width ## _t *ptr; \
1819 for (ptr = str; \
1820 *ptr != 0; \
1821 ptr++) \
1822 { \
1823 len++; \
1824 } \
1825 \
1826 return (len * ((_utf_width) / 8)); \
1827}
1828
1829UTF_STRLEN_FUNCTION (8)
1830
1831
1832/* Return the length (in bytes) of STR, a UTF-(UTF_WIDTH) encoded string. */
1833#define UTF_STRLEN(_utf_width, _str) \
1834 utf ## _utf_width ## _strlen (_str)
1835
1836/* Return the "portable" name of the UTF encoding of size UTF_WIDTH and
1837 ENDIANNESS (Gnulib's `iconv_open' module guarantees the portability of the
1838 encoding name). */
1839static inline void
1840utf_encoding_name (char *name, size_t utf_width, SCM endianness)
1841{
1842 strcpy (name, "UTF-");
1843 strcat (name, ((utf_width == 8)
1844 ? "8"
1845 : ((utf_width == 16)
1846 ? "16"
1847 : ((utf_width == 32)
1848 ? "32"
1849 : "??"))));
1850 strcat (name,
1851 ((scm_is_eq (endianness, scm_sym_big))
1852 ? "BE"
1853 : ((scm_is_eq (endianness, scm_sym_little))
1854 ? "LE"
1855 : "unknown")));
1856}
1857
1858/* Maximum length of a UTF encoding name. */
1859#define MAX_UTF_ENCODING_NAME_LEN 16
1860
1861/* Produce the body of a `string->utf' function. */
1862#define STRING_TO_UTF(_utf_width) \
1863 SCM utf; \
1864 int err; \
1865 char *c_str; \
1866 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1867 char *c_utf = NULL, *c_locale; \
1868 size_t c_strlen, c_raw_strlen, c_utf_len = 0; \
1869 \
1870 SCM_VALIDATE_STRING (1, str); \
1871 if (endianness == SCM_UNDEFINED) \
1872 endianness = scm_sym_big; \
1873 else \
1874 SCM_VALIDATE_SYMBOL (2, endianness); \
1875 \
1876 c_strlen = scm_c_string_length (str); \
1877 c_raw_strlen = c_strlen * ((_utf_width) / 8); \
1878 do \
1879 { \
1880 c_str = (char *) alloca (c_raw_strlen + 1); \
1881 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen); \
1882 } \
1883 while (c_raw_strlen > c_strlen); \
1884 c_str[c_raw_strlen] = '\0'; \
1885 \
1886 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1887 \
1888 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
1889 strcpy (c_locale, locale_charset ()); \
1890 \
1891 err = mem_iconveh (c_str, c_raw_strlen, \
1892 c_locale, c_utf_name, \
1893 iconveh_question_mark, NULL, \
1894 &c_utf, &c_utf_len); \
1895 if (SCM_UNLIKELY (err)) \
1896 scm_syserror_msg (FUNC_NAME, "failed to convert string: ~A", \
1897 scm_list_1 (str), err); \
1898 else \
1899 /* C_UTF is null-terminated. */ \
1900 utf = scm_c_take_bytevector ((signed char *) c_utf, \
1901 c_utf_len); \
1902 \
1903 return (utf);
1904
1905
1906
1907SCM_DEFINE (scm_string_to_utf8, "string->utf8",
1908 1, 0, 0,
1909 (SCM str),
1910 "Return a newly allocated bytevector that contains the UTF-8 "
1911 "encoding of @var{str}.")
1912#define FUNC_NAME s_scm_string_to_utf8
1913{
1914 SCM utf;
1915 char *c_str;
1916 uint8_t *c_utf;
1917 size_t c_strlen, c_raw_strlen;
1918
1919 SCM_VALIDATE_STRING (1, str);
1920
1921 c_strlen = scm_c_string_length (str);
1922 c_raw_strlen = c_strlen;
1923 do
1924 {
1925 c_str = (char *) alloca (c_raw_strlen + 1);
1926 c_raw_strlen = scm_to_locale_stringbuf (str, c_str, c_strlen);
1927 }
1928 while (c_raw_strlen > c_strlen);
1929 c_str[c_raw_strlen] = '\0';
1930
1931 c_utf = u8_strconv_from_locale (c_str);
1932 if (SCM_UNLIKELY (c_utf == NULL))
1933 scm_syserror (FUNC_NAME);
1934 else
1935 /* C_UTF is null-terminated. */
1936 utf = scm_c_take_bytevector ((signed char *) c_utf,
1937 UTF_STRLEN (8, c_utf));
1938
1939 return (utf);
1940}
1941#undef FUNC_NAME
1942
1943SCM_DEFINE (scm_string_to_utf16, "string->utf16",
1944 1, 1, 0,
1945 (SCM str, SCM endianness),
1946 "Return a newly allocated bytevector that contains the UTF-16 "
1947 "encoding of @var{str}.")
1948#define FUNC_NAME s_scm_string_to_utf16
1949{
1950 STRING_TO_UTF (16);
1951}
1952#undef FUNC_NAME
1953
1954SCM_DEFINE (scm_string_to_utf32, "string->utf32",
1955 1, 1, 0,
1956 (SCM str, SCM endianness),
1957 "Return a newly allocated bytevector that contains the UTF-32 "
1958 "encoding of @var{str}.")
1959#define FUNC_NAME s_scm_string_to_utf32
1960{
1961 STRING_TO_UTF (32);
1962}
1963#undef FUNC_NAME
1964
1965
1966/* Produce the body of a function that converts a UTF-encoded bytevector to a
1967 string. */
1968#define UTF_TO_STRING(_utf_width) \
1969 SCM str = SCM_BOOL_F; \
1970 int err; \
1971 char *c_str = NULL, *c_locale; \
1972 char c_utf_name[MAX_UTF_ENCODING_NAME_LEN]; \
1973 const char *c_utf; \
1974 size_t c_strlen = 0, c_utf_len; \
1975 \
1976 SCM_VALIDATE_BYTEVECTOR (1, utf); \
1977 if (endianness == SCM_UNDEFINED) \
1978 endianness = scm_sym_big; \
1979 else \
1980 SCM_VALIDATE_SYMBOL (2, endianness); \
1981 \
1982 c_utf_len = SCM_BYTEVECTOR_LENGTH (utf); \
1983 c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf); \
1984 utf_encoding_name (c_utf_name, (_utf_width), endianness); \
1985 \
1986 c_locale = (char *) alloca (strlen (locale_charset ()) + 1); \
1987 strcpy (c_locale, locale_charset ()); \
1988 \
1989 err = mem_iconveh (c_utf, c_utf_len, \
1990 c_utf_name, c_locale, \
1991 iconveh_question_mark, NULL, \
1992 &c_str, &c_strlen); \
1993 if (SCM_UNLIKELY (err)) \
1994 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A", \
1995 scm_list_1 (utf), err); \
1996 else \
1997 /* C_STR is null-terminated. */ \
1998 str = scm_take_locale_stringn (c_str, c_strlen); \
1999 \
2000 return (str);
2001
2002
2003SCM_DEFINE (scm_utf8_to_string, "utf8->string",
2004 1, 0, 0,
2005 (SCM utf),
2006 "Return a newly allocate string that contains from the UTF-8-"
2007 "encoded contents of bytevector @var{utf}.")
2008#define FUNC_NAME s_scm_utf8_to_string
2009{
2010 SCM str;
2011 int err;
2012 char *c_str = NULL, *c_locale;
2013 const char *c_utf;
2014 size_t c_utf_len, c_strlen = 0;
2015
2016 SCM_VALIDATE_BYTEVECTOR (1, utf);
2017
2018 c_utf_len = SCM_BYTEVECTOR_LENGTH (utf);
2019
2020 c_locale = (char *) alloca (strlen (locale_charset ()) + 1);
2021 strcpy (c_locale, locale_charset ());
2022
2023 c_utf = (char *) SCM_BYTEVECTOR_CONTENTS (utf);
2024 err = mem_iconveh (c_utf, c_utf_len,
2025 "UTF-8", c_locale,
2026 iconveh_question_mark, NULL,
2027 &c_str, &c_strlen);
2028 if (SCM_UNLIKELY (err))
2029 scm_syserror_msg (FUNC_NAME, "failed to convert to string: ~A",
2030 scm_list_1 (utf), err);
2031 else
2032 /* C_STR is null-terminated. */
2033 str = scm_take_locale_stringn (c_str, c_strlen);
2034
2035 return (str);
2036}
2037#undef FUNC_NAME
2038
2039SCM_DEFINE (scm_utf16_to_string, "utf16->string",
2040 1, 1, 0,
2041 (SCM utf, SCM endianness),
2042 "Return a newly allocate string that contains from the UTF-16-"
2043 "encoded contents of bytevector @var{utf}.")
2044#define FUNC_NAME s_scm_utf16_to_string
2045{
2046 UTF_TO_STRING (16);
2047}
2048#undef FUNC_NAME
2049
2050SCM_DEFINE (scm_utf32_to_string, "utf32->string",
2051 1, 1, 0,
2052 (SCM utf, SCM endianness),
2053 "Return a newly allocate string that contains from the UTF-32-"
2054 "encoded contents of bytevector @var{utf}.")
2055#define FUNC_NAME s_scm_utf32_to_string
2056{
2057 UTF_TO_STRING (32);
2058}
2059#undef FUNC_NAME
2060
2061
2062\f
2a610be5
AW
2063/* Bytevectors as generalized vectors & arrays. */
2064
2065static SCM
2066bv_handle_ref (scm_t_array_handle *h, size_t index)
2067{
2068 return SCM_I_MAKINUM (scm_c_bytevector_ref (h->array, index));
2069}
2070
2071static void
2072bv_handle_set_x (scm_t_array_handle *h, size_t index, SCM val)
2073{
2074 scm_c_bytevector_set_x (h->array, index, scm_to_uint8 (val));
2075}
2076
2077static void
2078bytevector_get_handle (SCM v, scm_t_array_handle *h)
2079{
2080 h->array = v;
2081 h->ndims = 1;
2082 h->dims = &h->dim0;
2083 h->dim0.lbnd = 0;
2084 h->dim0.ubnd = SCM_BYTEVECTOR_LENGTH (v) - 1;
2085 h->dim0.inc = 1;
2086 h->element_type = SCM_ARRAY_ELEMENT_TYPE_VU8;
2087 h->elements = h->writable_elements = SCM_BYTEVECTOR_CONTENTS (v);
2088}
2089
2090\f
1ee2c72e
LC
2091/* Initialization. */
2092
cfb4702f
LC
2093void
2094scm_bootstrap_bytevectors (void)
2095{
2096 /* The SMOB type must be instantiated here because the
2097 generalized-vector API may want to access bytevectors even though
2098 `(rnrs bytevector)' hasn't been loaded. */
2099 scm_tc16_bytevector = scm_make_smob_type ("bytevector", 0);
2100 scm_set_smob_free (scm_tc16_bytevector, free_bytevector);
2101 scm_set_smob_print (scm_tc16_bytevector, print_bytevector);
2102 scm_set_smob_equalp (scm_tc16_bytevector, bytevector_equal_p);
2103
2104 scm_null_bytevector =
2105 scm_gc_protect_object (make_bytevector_from_buffer (0, NULL));
2106
caa92f5e
AW
2107#ifdef WORDS_BIGENDIAN
2108 scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("big"));
2109#else
2110 scm_i_native_endianness = scm_permanent_object (scm_from_locale_symbol ("little"));
2111#endif
2112
cfb4702f
LC
2113 scm_c_register_extension ("libguile", "scm_init_bytevectors",
2114 (scm_t_extension_init_func) scm_init_bytevectors,
2115 NULL);
2a610be5
AW
2116
2117 {
2118 scm_t_array_implementation impl;
2119 impl.tag = scm_tc16_bytevector;
2120 impl.mask = 0xffff;
2121 impl.vref = bv_handle_ref;
2122 impl.vset = bv_handle_set_x;
2123 impl.get_handle = bytevector_get_handle;
2124 scm_i_register_array_implementation (&impl);
2125 }
cfb4702f
LC
2126}
2127
1ee2c72e
LC
2128void
2129scm_init_bytevectors (void)
2130{
2131#include "libguile/bytevectors.x"
2132
1ee2c72e
LC
2133 scm_endianness_big = scm_sym_big;
2134 scm_endianness_little = scm_sym_little;
1ee2c72e 2135}