Bump version number for 1.9.9.
[bpt/guile.git] / libguile / strings.c
1 /* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19
20 \f
21 #ifdef HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24
25 #include <string.h>
26 #include <stdio.h>
27 #include <ctype.h>
28 #include <uninorm.h>
29 #include <unistr.h>
30 #include <uniconv.h>
31
32 #include "striconveh.h"
33
34 #include "libguile/_scm.h"
35 #include "libguile/chars.h"
36 #include "libguile/root.h"
37 #include "libguile/strings.h"
38 #include "libguile/error.h"
39 #include "libguile/generalized-vectors.h"
40 #include "libguile/deprecation.h"
41 #include "libguile/validate.h"
42 #include "libguile/private-options.h"
43
44 \f
45
46 /* {Strings}
47 */
48
49
50 /* Stringbufs
51 *
52 * XXX - keeping an accurate refcount during GC seems to be quite
53 * tricky, so we just keep score of whether a stringbuf might be
54 * shared, not whether it definitely is.
55 *
56 * The scheme I (mvo) tried to keep an accurate reference count would
57 * recount all strings that point to a stringbuf during the mark-phase
58 * of the GC. This was done since one cannot access the stringbuf of
59 * a string when that string is freed (in order to decrease the
60 * reference count). The memory of the stringbuf might have been
61 * reused already for something completely different.
62 *
63 * This recounted worked for a small number of threads beating on
64 * cow-strings, but it failed randomly with more than 10 threads, say.
65 * I couldn't figure out what went wrong, so I used the conservative
66 * approach implemented below.
67 *
68 * There are 2 storage strategies for stringbufs: 8-bit and wide. 8-bit
69 * strings are ISO-8859-1-encoded strings; wide strings are 32-bit (UCS-4)
70 * strings.
71 */
72
73 /* The size in words of the stringbuf header (type tag + size). */
74 #define STRINGBUF_HEADER_SIZE 2U
75
76 #define STRINGBUF_HEADER_BYTES (STRINGBUF_HEADER_SIZE * sizeof (SCM))
77
78 #define STRINGBUF_F_SHARED SCM_I_STRINGBUF_F_SHARED
79 #define STRINGBUF_F_WIDE SCM_I_STRINGBUF_F_WIDE
80
81 #define STRINGBUF_TAG scm_tc7_stringbuf
82 #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
83 #define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
84
85 #define STRINGBUF_CHARS(buf) ((unsigned char *) \
86 SCM_CELL_OBJECT_LOC (buf, \
87 STRINGBUF_HEADER_SIZE))
88 #define STRINGBUF_LENGTH(buf) (SCM_CELL_WORD_1 (buf))
89
90 #define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *) STRINGBUF_CHARS (buf))
91
92 #define SET_STRINGBUF_SHARED(buf) \
93 do \
94 { \
95 /* Don't modify BUF if it's already marked as shared since it might be \
96 a read-only, statically allocated stringbuf. */ \
97 if (SCM_LIKELY (!STRINGBUF_SHARED (buf))) \
98 SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
99 } \
100 while (0)
101
102 #ifdef SCM_STRING_LENGTH_HISTOGRAM
103 static size_t lenhist[1001];
104 #endif
105
106 /* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
107 characters. */
108 static SCM
109 make_stringbuf (size_t len)
110 {
111 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
112 scm_i_symbol_chars, all stringbufs are null-terminated. Once
113 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
114 has been changed for scm_i_symbol_chars, this null-termination
115 can be dropped.
116 */
117
118 SCM buf;
119
120 #ifdef SCM_STRING_LENGTH_HISTOGRAM
121 if (len < 1000)
122 lenhist[len]++;
123 else
124 lenhist[1000]++;
125 #endif
126
127 buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + len + 1,
128 "string"));
129
130 SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG);
131 SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
132
133 STRINGBUF_CHARS (buf)[len] = 0;
134
135 return buf;
136 }
137
138 /* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
139 characters. */
140 static SCM
141 make_wide_stringbuf (size_t len)
142 {
143 SCM buf;
144 size_t raw_len;
145
146 #ifdef SCM_STRING_LENGTH_HISTOGRAM
147 if (len < 1000)
148 lenhist[len]++;
149 else
150 lenhist[1000]++;
151 #endif
152
153 raw_len = (len + 1) * sizeof (scm_t_wchar);
154 buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + raw_len,
155 "string"));
156
157 SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG | STRINGBUF_F_WIDE);
158 SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
159
160 STRINGBUF_WIDE_CHARS (buf)[len] = 0;
161
162 return buf;
163 }
164
165 /* Return a UCS-4-encoded stringbuf containing the (possibly Latin-1-encoded)
166 characters from BUF. */
167 static SCM
168 wide_stringbuf (SCM buf)
169 {
170 SCM new_buf;
171
172 if (STRINGBUF_WIDE (buf))
173 new_buf = buf;
174 else
175 {
176 size_t i, len;
177 scm_t_wchar *mem;
178
179 len = STRINGBUF_LENGTH (buf);
180
181 new_buf = make_wide_stringbuf (len);
182
183 mem = STRINGBUF_WIDE_CHARS (new_buf);
184 for (i = 0; i < len; i++)
185 mem[i] = (scm_t_wchar) STRINGBUF_CHARS (buf)[i];
186 mem[len] = 0;
187 }
188
189 return new_buf;
190 }
191
192 /* Return a Latin-1-encoded stringbuf containing the (possibly UCS-4-encoded)
193 characters from BUF, if possible. */
194 static SCM
195 narrow_stringbuf (SCM buf)
196 {
197 SCM new_buf;
198
199 if (!STRINGBUF_WIDE (buf))
200 new_buf = buf;
201 else
202 {
203 size_t i, len;
204 scm_t_wchar *wmem;
205 unsigned char *mem;
206
207 len = STRINGBUF_LENGTH (buf);
208 wmem = STRINGBUF_WIDE_CHARS (buf);
209
210 for (i = 0; i < len; i++)
211 if (wmem[i] > 0xFF)
212 /* BUF cannot be narrowed. */
213 return buf;
214
215 new_buf = make_stringbuf (len);
216
217 mem = STRINGBUF_CHARS (new_buf);
218 for (i = 0; i < len; i++)
219 mem[i] = (unsigned char) wmem[i];
220 mem[len] = 0;
221 }
222
223 return new_buf;
224 }
225
226 scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
227
228 \f
229 /* Copy-on-write strings.
230 */
231
232 #define STRING_TAG scm_tc7_string
233
234 #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
235 #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
236 #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
237
238 #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
239 #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
240
241 #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
242
243 /* Read-only strings.
244 */
245
246 #define RO_STRING_TAG scm_tc7_ro_string
247 #define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
248
249 /* Mutation-sharing substrings
250 */
251
252 #define SH_STRING_TAG (scm_tc7_string + 0x100)
253
254 #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
255 /* START and LENGTH as for STRINGs. */
256
257 #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
258
259 SCM scm_nullstr;
260
261 /* Create a scheme string with space for LEN 8-bit Latin-1-encoded
262 characters. CHARSP, if not NULL, will be set to location of the
263 char array. */
264 SCM
265 scm_i_make_string (size_t len, char **charsp)
266 {
267 SCM buf = make_stringbuf (len);
268 SCM res;
269 if (charsp)
270 *charsp = (char *) STRINGBUF_CHARS (buf);
271 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
272 (scm_t_bits)0, (scm_t_bits) len);
273 return res;
274 }
275
276 /* Create a scheme string with space for LEN 32-bit UCS-4-encoded
277 characters. CHARSP, if not NULL, will be set to location of the
278 character array. */
279 SCM
280 scm_i_make_wide_string (size_t len, scm_t_wchar **charsp)
281 {
282 SCM buf = make_wide_stringbuf (len);
283 SCM res;
284 if (charsp)
285 *charsp = STRINGBUF_WIDE_CHARS (buf);
286 res = scm_double_cell (STRING_TAG, SCM_UNPACK (buf),
287 (scm_t_bits) 0, (scm_t_bits) len);
288 return res;
289 }
290
291 static void
292 validate_substring_args (SCM str, size_t start, size_t end)
293 {
294 if (!IS_STRING (str))
295 scm_wrong_type_arg_msg (NULL, 0, str, "string");
296 if (start > STRING_LENGTH (str))
297 scm_out_of_range (NULL, scm_from_size_t (start));
298 if (end > STRING_LENGTH (str) || end < start)
299 scm_out_of_range (NULL, scm_from_size_t (end));
300 }
301
302 static inline void
303 get_str_buf_start (SCM *str, SCM *buf, size_t *start)
304 {
305 *start = STRING_START (*str);
306 if (IS_SH_STRING (*str))
307 {
308 *str = SH_STRING_STRING (*str);
309 *start += STRING_START (*str);
310 }
311 *buf = STRING_STRINGBUF (*str);
312 }
313
314 SCM
315 scm_i_substring (SCM str, size_t start, size_t end)
316 {
317 SCM buf;
318 size_t str_start;
319 get_str_buf_start (&str, &buf, &str_start);
320 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
321 SET_STRINGBUF_SHARED (buf);
322 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
323 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
324 (scm_t_bits)str_start + start,
325 (scm_t_bits) end - start);
326 }
327
328 SCM
329 scm_i_substring_read_only (SCM str, size_t start, size_t end)
330 {
331 SCM buf;
332 size_t str_start;
333 get_str_buf_start (&str, &buf, &str_start);
334 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
335 SET_STRINGBUF_SHARED (buf);
336 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
337 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
338 (scm_t_bits)str_start + start,
339 (scm_t_bits) end - start);
340 }
341
342 SCM
343 scm_i_substring_copy (SCM str, size_t start, size_t end)
344 {
345 size_t len = end - start;
346 SCM buf, my_buf;
347 size_t str_start;
348 get_str_buf_start (&str, &buf, &str_start);
349 if (scm_i_is_narrow_string (str))
350 {
351 my_buf = make_stringbuf (len);
352 memcpy (STRINGBUF_CHARS (my_buf),
353 STRINGBUF_CHARS (buf) + str_start + start, len);
354 }
355 else
356 {
357 my_buf = make_wide_stringbuf (len);
358 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
359 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
360 + start), len);
361 /* Even though this string is wide, the substring may be narrow.
362 Consider adding code to narrow the string. */
363 }
364 scm_remember_upto_here_1 (buf);
365 return scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
366 (scm_t_bits) 0, (scm_t_bits) len);
367 }
368
369 SCM
370 scm_i_substring_shared (SCM str, size_t start, size_t end)
371 {
372 if (start == 0 && end == STRING_LENGTH (str))
373 return str;
374 else
375 {
376 size_t len = end - start;
377 if (IS_SH_STRING (str))
378 {
379 start += STRING_START (str);
380 str = SH_STRING_STRING (str);
381 }
382 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
383 (scm_t_bits)start, (scm_t_bits) len);
384 }
385 }
386
387 SCM
388 scm_c_substring (SCM str, size_t start, size_t end)
389 {
390 validate_substring_args (str, start, end);
391 return scm_i_substring (str, start, end);
392 }
393
394 SCM
395 scm_c_substring_read_only (SCM str, size_t start, size_t end)
396 {
397 validate_substring_args (str, start, end);
398 return scm_i_substring_read_only (str, start, end);
399 }
400
401 SCM
402 scm_c_substring_copy (SCM str, size_t start, size_t end)
403 {
404 validate_substring_args (str, start, end);
405 return scm_i_substring_copy (str, start, end);
406 }
407
408 SCM
409 scm_c_substring_shared (SCM str, size_t start, size_t end)
410 {
411 validate_substring_args (str, start, end);
412 return scm_i_substring_shared (str, start, end);
413 }
414
415 \f
416 /* Internal accessors
417 */
418
419 /* Returns the number of characters in STR. This may be different
420 than the memory size of the string storage. */
421 size_t
422 scm_i_string_length (SCM str)
423 {
424 return STRING_LENGTH (str);
425 }
426
427 /* True if the string is 'narrow', meaning it has a 8-bit Latin-1
428 encoding. False if it is 'wide', having a 32-bit UCS-4
429 encoding. */
430 int
431 scm_i_is_narrow_string (SCM str)
432 {
433 return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
434 }
435
436 /* Try to coerce a string to be narrow. It if is narrow already, do
437 nothing. If it is wide, shrink it to narrow if none of its
438 characters are above 0xFF. Return true if the string is narrow or
439 was made to be narrow. */
440 int
441 scm_i_try_narrow_string (SCM str)
442 {
443 SET_STRING_STRINGBUF (str, narrow_stringbuf (STRING_STRINGBUF (str)));
444
445 return scm_i_is_narrow_string (str);
446 }
447
448 /* Returns a pointer to the 8-bit Latin-1 encoded character array of
449 STR. */
450 const char *
451 scm_i_string_chars (SCM str)
452 {
453 SCM buf;
454 size_t start;
455 get_str_buf_start (&str, &buf, &start);
456 if (scm_i_is_narrow_string (str))
457 return (const char *) STRINGBUF_CHARS (buf) + start;
458 else
459 scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s",
460 scm_list_1 (str));
461 return NULL;
462 }
463
464 /* Returns a pointer to the 32-bit UCS-4 encoded character array of
465 STR. */
466 const scm_t_wchar *
467 scm_i_string_wide_chars (SCM str)
468 {
469 SCM buf;
470 size_t start;
471
472 get_str_buf_start (&str, &buf, &start);
473 if (!scm_i_is_narrow_string (str))
474 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf) + start;
475 else
476 scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
477 scm_list_1 (str));
478 }
479
480 /* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
481 a new string buffer, so that it can be modified without modifying
482 other strings. Also, lock the string mutex. Later, one must call
483 scm_i_string_stop_writing to unlock the mutex. */
484 SCM
485 scm_i_string_start_writing (SCM orig_str)
486 {
487 SCM buf, str = orig_str;
488 size_t start;
489
490 get_str_buf_start (&str, &buf, &start);
491 if (IS_RO_STRING (str))
492 scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
493
494 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
495 if (STRINGBUF_SHARED (buf))
496 {
497 /* Clone the stringbuf. */
498 size_t len = STRING_LENGTH (str);
499 SCM new_buf;
500
501 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
502
503 if (scm_i_is_narrow_string (str))
504 {
505 new_buf = make_stringbuf (len);
506 memcpy (STRINGBUF_CHARS (new_buf),
507 STRINGBUF_CHARS (buf) + STRING_START (str), len);
508
509 }
510 else
511 {
512 new_buf = make_wide_stringbuf (len);
513 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
514 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf)
515 + STRING_START (str)), len);
516 }
517
518 SET_STRING_STRINGBUF (str, new_buf);
519 start -= STRING_START (str);
520
521 /* FIXME: The following operations are not atomic, so other threads
522 looking at STR may see an inconsistent state. Nevertheless it can't
523 hurt much since (i) accessing STR while it is being mutated can't
524 yield a crash, and (ii) concurrent accesses to STR should be
525 protected by a mutex at the application level. The latter may not
526 apply when STR != ORIG_STR, though. */
527 SET_STRING_START (str, 0);
528 SET_STRING_STRINGBUF (str, new_buf);
529
530 buf = new_buf;
531
532 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
533 }
534 return orig_str;
535 }
536
537 /* Return a pointer to the 8-bit Latin-1 chars of a string. */
538 char *
539 scm_i_string_writable_chars (SCM str)
540 {
541 SCM buf;
542 size_t start;
543
544 get_str_buf_start (&str, &buf, &start);
545 if (scm_i_is_narrow_string (str))
546 return (char *) STRINGBUF_CHARS (buf) + start;
547 else
548 scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s",
549 scm_list_1 (str));
550 return NULL;
551 }
552
553 /* Return a pointer to the UCS-4 codepoints of a string. */
554 static scm_t_wchar *
555 scm_i_string_writable_wide_chars (SCM str)
556 {
557 SCM buf;
558 size_t start;
559
560 get_str_buf_start (&str, &buf, &start);
561 if (!scm_i_is_narrow_string (str))
562 return STRINGBUF_WIDE_CHARS (buf) + start;
563 else
564 scm_misc_error (NULL, "Invalid write access of chars of narrow string: ~s",
565 scm_list_1 (str));
566 }
567
568 /* Unlock the string mutex that was locked when
569 scm_i_string_start_writing was called. */
570 void
571 scm_i_string_stop_writing (void)
572 {
573 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
574 }
575
576 /* Return the Xth character of STR as a UCS-4 codepoint. */
577 scm_t_wchar
578 scm_i_string_ref (SCM str, size_t x)
579 {
580 if (scm_i_is_narrow_string (str))
581 return (scm_t_wchar) (unsigned char) (scm_i_string_chars (str)[x]);
582 else
583 return scm_i_string_wide_chars (str)[x];
584 }
585
586 /* Returns index+1 of the first char in STR that matches C, or
587 0 if the char is not found. */
588 int
589 scm_i_string_contains_char (SCM str, char ch)
590 {
591 size_t i;
592 size_t len = scm_i_string_length (str);
593
594 i = 0;
595 if (scm_i_is_narrow_string (str))
596 {
597 while (i < len)
598 {
599 if (scm_i_string_chars (str)[i] == ch)
600 return i+1;
601 i++;
602 }
603 }
604 else
605 {
606 while (i < len)
607 {
608 if (scm_i_string_wide_chars (str)[i]
609 == (unsigned char) ch)
610 return i+1;
611 i++;
612 }
613 }
614 return 0;
615 }
616
617 int
618 scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
619 {
620 if (scm_i_is_narrow_string (sstr))
621 {
622 const char *a = scm_i_string_chars (sstr) + start_x;
623 const char *b = cstr;
624 return strncmp (a, b, strlen(b));
625 }
626 else
627 {
628 size_t i;
629 const scm_t_wchar *a = scm_i_string_wide_chars (sstr) + start_x;
630 const char *b = cstr;
631 for (i = 0; i < strlen (b); i++)
632 {
633 if (a[i] != (unsigned char) b[i])
634 return 1;
635 }
636 }
637 return 0;
638 }
639
640 /* Set the Pth character of STR to UCS-4 codepoint CHR. */
641 void
642 scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
643 {
644 if (chr > 0xFF && scm_i_is_narrow_string (str))
645 SET_STRING_STRINGBUF (str, wide_stringbuf (STRING_STRINGBUF (str)));
646
647 if (scm_i_is_narrow_string (str))
648 {
649 char *dst = scm_i_string_writable_chars (str);
650 dst[p] = chr;
651 }
652 else
653 {
654 scm_t_wchar *dst = scm_i_string_writable_wide_chars (str);
655 dst[p] = chr;
656 }
657 }
658
659 \f
660 /* Symbols.
661
662 Basic symbol creation and accessing is done here, the rest is in
663 symbols.[hc]. This has been done to keep stringbufs and the
664 internals of strings and string-like objects confined to this file.
665 */
666
667 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
668
669 SCM
670 scm_i_make_symbol (SCM name, scm_t_bits flags,
671 unsigned long hash, SCM props)
672 {
673 SCM buf;
674 size_t start = STRING_START (name);
675 size_t length = STRING_LENGTH (name);
676
677 if (IS_SH_STRING (name))
678 {
679 name = SH_STRING_STRING (name);
680 start += STRING_START (name);
681 }
682 buf = SYMBOL_STRINGBUF (name);
683
684 if (start == 0 && length == STRINGBUF_LENGTH (buf))
685 {
686 /* reuse buf. */
687 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
688 SET_STRINGBUF_SHARED (buf);
689 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
690 }
691 else
692 {
693 /* make new buf. */
694 if (scm_i_is_narrow_string (name))
695 {
696 SCM new_buf = make_stringbuf (length);
697 memcpy (STRINGBUF_CHARS (new_buf),
698 STRINGBUF_CHARS (buf) + start, length);
699 buf = new_buf;
700 }
701 else
702 {
703 SCM new_buf = make_wide_stringbuf (length);
704 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
705 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
706 length);
707 buf = new_buf;
708 }
709 }
710 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
711 (scm_t_bits) hash, SCM_UNPACK (props));
712 }
713
714 SCM
715 scm_i_c_make_symbol (const char *name, size_t len,
716 scm_t_bits flags, unsigned long hash, SCM props)
717 {
718 SCM buf = make_stringbuf (len);
719 memcpy (STRINGBUF_CHARS (buf), name, len);
720
721 return scm_immutable_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
722 (scm_t_bits) hash, SCM_UNPACK (props));
723 }
724
725 /* Returns the number of characters in SYM. This may be different
726 from the memory size of SYM. */
727 size_t
728 scm_i_symbol_length (SCM sym)
729 {
730 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
731 }
732
733 size_t
734 scm_c_symbol_length (SCM sym)
735 #define FUNC_NAME "scm_c_symbol_length"
736 {
737 SCM_VALIDATE_SYMBOL (1, sym);
738
739 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
740 }
741 #undef FUNC_NAME
742
743 /* True if the name of SYM is stored as a Latin-1 encoded string.
744 False if it is stored as a 32-bit UCS-4-encoded string. */
745 int
746 scm_i_is_narrow_symbol (SCM sym)
747 {
748 SCM buf;
749
750 buf = SYMBOL_STRINGBUF (sym);
751 return !STRINGBUF_WIDE (buf);
752 }
753
754 /* Returns a pointer to the 8-bit Latin-1 encoded character array that
755 contains the name of SYM. */
756 const char *
757 scm_i_symbol_chars (SCM sym)
758 {
759 SCM buf;
760
761 buf = SYMBOL_STRINGBUF (sym);
762 if (!STRINGBUF_WIDE (buf))
763 return (const char *) STRINGBUF_CHARS (buf);
764 else
765 scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S",
766 scm_list_1 (sym));
767 }
768
769 /* Return a pointer to the 32-bit UCS-4-encoded character array of a
770 symbol's name. */
771 const scm_t_wchar *
772 scm_i_symbol_wide_chars (SCM sym)
773 {
774 SCM buf;
775
776 buf = SYMBOL_STRINGBUF (sym);
777 if (STRINGBUF_WIDE (buf))
778 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf);
779 else
780 scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S",
781 scm_list_1 (sym));
782 }
783
784 SCM
785 scm_i_symbol_substring (SCM sym, size_t start, size_t end)
786 {
787 SCM buf = SYMBOL_STRINGBUF (sym);
788 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
789 SET_STRINGBUF_SHARED (buf);
790 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
791 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
792 (scm_t_bits)start, (scm_t_bits) end - start);
793 }
794
795 /* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */
796 scm_t_wchar
797 scm_i_symbol_ref (SCM sym, size_t x)
798 {
799 if (scm_i_is_narrow_symbol (sym))
800 return (scm_t_wchar) (unsigned char) (scm_i_symbol_chars (sym)[x]);
801 else
802 return scm_i_symbol_wide_chars (sym)[x];
803 }
804
805 /* Debugging
806 */
807
808 SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
809 "Returns an association list containing debugging information\n"
810 "for @var{str}. The association list has the following entries."
811 "@table @code\n"
812 "@item string\n"
813 "The string itself.\n"
814 "@item start\n"
815 "The start index of the string into its stringbuf\n"
816 "@item length\n"
817 "The length of the string\n"
818 "@item shared\n"
819 "If this string is a substring, it returns its parent string.\n"
820 "Otherwise, it returns @code{#f}\n"
821 "@item read-only\n"
822 "@code{#t} if the string is read-only\n"
823 "@item stringbuf-chars\n"
824 "A new string containing this string's stringbuf's characters\n"
825 "@item stringbuf-length\n"
826 "The number of characters in this stringbuf\n"
827 "@item stringbuf-shared\n"
828 "@code{#t} if this stringbuf is shared\n"
829 "@item stringbuf-wide\n"
830 "@code{#t} if this stringbuf's characters are stored in a\n"
831 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
832 "buffer\n"
833 "@end table")
834 #define FUNC_NAME s_scm_sys_string_dump
835 {
836 SCM e1, e2, e3, e4, e5, e6, e7, e8, e9;
837 SCM buf;
838 SCM_VALIDATE_STRING (1, str);
839
840 /* String info */
841 e1 = scm_cons (scm_from_locale_symbol ("string"),
842 str);
843 e2 = scm_cons (scm_from_locale_symbol ("start"),
844 scm_from_size_t (STRING_START (str)));
845 e3 = scm_cons (scm_from_locale_symbol ("length"),
846 scm_from_size_t (STRING_LENGTH (str)));
847
848 if (IS_SH_STRING (str))
849 {
850 e4 = scm_cons (scm_from_locale_symbol ("shared"),
851 SH_STRING_STRING (str));
852 buf = STRING_STRINGBUF (SH_STRING_STRING (str));
853 }
854 else
855 {
856 e4 = scm_cons (scm_from_locale_symbol ("shared"),
857 SCM_BOOL_F);
858 buf = STRING_STRINGBUF (str);
859 }
860
861 if (IS_RO_STRING (str))
862 e5 = scm_cons (scm_from_locale_symbol ("read-only"),
863 SCM_BOOL_T);
864 else
865 e5 = scm_cons (scm_from_locale_symbol ("read-only"),
866 SCM_BOOL_F);
867
868 /* Stringbuf info */
869 if (!STRINGBUF_WIDE (buf))
870 {
871 size_t len = STRINGBUF_LENGTH (buf);
872 char *cbuf;
873 SCM sbc = scm_i_make_string (len, &cbuf);
874 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
875 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
876 sbc);
877 }
878 else
879 {
880 size_t len = STRINGBUF_LENGTH (buf);
881 scm_t_wchar *cbuf;
882 SCM sbc = scm_i_make_wide_string (len, &cbuf);
883 u32_cpy ((scm_t_uint32 *) cbuf,
884 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
885 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
886 sbc);
887 }
888 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-length"),
889 scm_from_size_t (STRINGBUF_LENGTH (buf)));
890 if (STRINGBUF_SHARED (buf))
891 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
892 SCM_BOOL_T);
893 else
894 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
895 SCM_BOOL_F);
896 if (STRINGBUF_WIDE (buf))
897 e9 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
898 SCM_BOOL_T);
899 else
900 e9 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
901 SCM_BOOL_F);
902
903 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, e9, SCM_UNDEFINED);
904 }
905 #undef FUNC_NAME
906
907 SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
908 "Returns an association list containing debugging information\n"
909 "for @var{sym}. The association list has the following entries."
910 "@table @code\n"
911 "@item symbol\n"
912 "The symbol itself\n"
913 "@item hash\n"
914 "Its hash value\n"
915 "@item interned\n"
916 "@code{#t} if it is an interned symbol\n"
917 "@item stringbuf-chars\n"
918 "A new string containing this symbols's stringbuf's characters\n"
919 "@item stringbuf-length\n"
920 "The number of characters in this stringbuf\n"
921 "@item stringbuf-shared\n"
922 "@code{#t} if this stringbuf is shared\n"
923 "@item stringbuf-wide\n"
924 "@code{#t} if this stringbuf's characters are stored in a\n"
925 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
926 "buffer\n"
927 "@end table")
928 #define FUNC_NAME s_scm_sys_symbol_dump
929 {
930 SCM e1, e2, e3, e4, e5, e6, e7;
931 SCM buf;
932 SCM_VALIDATE_SYMBOL (1, sym);
933 e1 = scm_cons (scm_from_locale_symbol ("symbol"),
934 sym);
935 e2 = scm_cons (scm_from_locale_symbol ("hash"),
936 scm_from_ulong (scm_i_symbol_hash (sym)));
937 e3 = scm_cons (scm_from_locale_symbol ("interned"),
938 scm_symbol_interned_p (sym));
939 buf = SYMBOL_STRINGBUF (sym);
940
941 /* Stringbuf info */
942 if (!STRINGBUF_WIDE (buf))
943 {
944 size_t len = STRINGBUF_LENGTH (buf);
945 char *cbuf;
946 SCM sbc = scm_i_make_string (len, &cbuf);
947 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
948 e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
949 sbc);
950 }
951 else
952 {
953 size_t len = STRINGBUF_LENGTH (buf);
954 scm_t_wchar *cbuf;
955 SCM sbc = scm_i_make_wide_string (len, &cbuf);
956 u32_cpy ((scm_t_uint32 *) cbuf,
957 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
958 e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
959 sbc);
960 }
961 e5 = scm_cons (scm_from_locale_symbol ("stringbuf-length"),
962 scm_from_size_t (STRINGBUF_LENGTH (buf)));
963 if (STRINGBUF_SHARED (buf))
964 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
965 SCM_BOOL_T);
966 else
967 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
968 SCM_BOOL_F);
969 if (STRINGBUF_WIDE (buf))
970 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
971 SCM_BOOL_T);
972 else
973 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
974 SCM_BOOL_F);
975 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, SCM_UNDEFINED);
976
977 }
978 #undef FUNC_NAME
979
980 #ifdef SCM_STRING_LENGTH_HISTOGRAM
981
982 SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, (void), "")
983 #define FUNC_NAME s_scm_sys_stringbuf_hist
984 {
985 int i;
986 for (i = 0; i < 1000; i++)
987 if (lenhist[i])
988 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
989 fprintf (stderr, ">999: %u\n", lenhist[1000]);
990 return SCM_UNSPECIFIED;
991 }
992 #undef FUNC_NAME
993
994 #endif
995
996 \f
997
998 SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
999 (SCM obj),
1000 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
1001 #define FUNC_NAME s_scm_string_p
1002 {
1003 return scm_from_bool (IS_STRING (obj));
1004 }
1005 #undef FUNC_NAME
1006
1007
1008 SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
1009
1010 SCM_DEFINE (scm_string, "string", 0, 0, 1,
1011 (SCM chrs),
1012 "@deffnx {Scheme Procedure} list->string chrs\n"
1013 "Return a newly allocated string composed of the arguments,\n"
1014 "@var{chrs}.")
1015 #define FUNC_NAME s_scm_string
1016 {
1017 SCM result = SCM_BOOL_F;
1018 SCM rest;
1019 size_t len;
1020 size_t p = 0;
1021 long i;
1022 int wide = 0;
1023
1024 /* Verify that this is a list of chars. */
1025 i = scm_ilength (chrs);
1026 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
1027
1028 len = (size_t) i;
1029 rest = chrs;
1030
1031 while (len > 0 && scm_is_pair (rest))
1032 {
1033 SCM elt = SCM_CAR (rest);
1034 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
1035 if (SCM_CHAR (elt) > 0xFF)
1036 wide = 1;
1037 rest = SCM_CDR (rest);
1038 len--;
1039 scm_remember_upto_here_1 (elt);
1040 }
1041
1042 /* Construct a string containing this list of chars. */
1043 len = (size_t) i;
1044 rest = chrs;
1045
1046 if (wide == 0)
1047 {
1048 char *buf;
1049
1050 result = scm_i_make_string (len, NULL);
1051 result = scm_i_string_start_writing (result);
1052 buf = scm_i_string_writable_chars (result);
1053 while (len > 0 && scm_is_pair (rest))
1054 {
1055 SCM elt = SCM_CAR (rest);
1056 buf[p] = (unsigned char) SCM_CHAR (elt);
1057 p++;
1058 rest = SCM_CDR (rest);
1059 len--;
1060 scm_remember_upto_here_1 (elt);
1061 }
1062 }
1063 else
1064 {
1065 scm_t_wchar *buf;
1066
1067 result = scm_i_make_wide_string (len, NULL);
1068 result = scm_i_string_start_writing (result);
1069 buf = scm_i_string_writable_wide_chars (result);
1070 while (len > 0 && scm_is_pair (rest))
1071 {
1072 SCM elt = SCM_CAR (rest);
1073 buf[p] = SCM_CHAR (elt);
1074 p++;
1075 rest = SCM_CDR (rest);
1076 len--;
1077 scm_remember_upto_here_1 (elt);
1078 }
1079 }
1080 scm_i_string_stop_writing ();
1081
1082 if (len > 0)
1083 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
1084 if (!scm_is_null (rest))
1085 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
1086
1087 return result;
1088 }
1089 #undef FUNC_NAME
1090
1091 SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
1092 (SCM k, SCM chr),
1093 "Return a newly allocated string of\n"
1094 "length @var{k}. If @var{chr} is given, then all elements of\n"
1095 "the string are initialized to @var{chr}, otherwise the contents\n"
1096 "of the @var{string} are unspecified.")
1097 #define FUNC_NAME s_scm_make_string
1098 {
1099 return scm_c_make_string (scm_to_size_t (k), chr);
1100 }
1101 #undef FUNC_NAME
1102
1103 SCM
1104 scm_c_make_string (size_t len, SCM chr)
1105 #define FUNC_NAME NULL
1106 {
1107 size_t p;
1108 SCM res = scm_i_make_string (len, NULL);
1109
1110 if (!SCM_UNBNDP (chr))
1111 {
1112 SCM_VALIDATE_CHAR (0, chr);
1113 res = scm_i_string_start_writing (res);
1114 for (p = 0; p < len; p++)
1115 scm_i_string_set_x (res, p, SCM_CHAR (chr));
1116 scm_i_string_stop_writing ();
1117 }
1118
1119 return res;
1120 }
1121 #undef FUNC_NAME
1122
1123 SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
1124 (SCM string),
1125 "Return the number of characters in @var{string}.")
1126 #define FUNC_NAME s_scm_string_length
1127 {
1128 SCM_VALIDATE_STRING (1, string);
1129 return scm_from_size_t (STRING_LENGTH (string));
1130 }
1131 #undef FUNC_NAME
1132
1133 SCM_DEFINE (scm_string_bytes_per_char, "string-bytes-per-char", 1, 0, 0,
1134 (SCM string),
1135 "Return the bytes used to represent a character in @var{string}."
1136 "This will return 1 or 4.")
1137 #define FUNC_NAME s_scm_string_bytes_per_char
1138 {
1139 SCM_VALIDATE_STRING (1, string);
1140 if (!scm_i_is_narrow_string (string))
1141 return scm_from_int (4);
1142
1143 return scm_from_int (1);
1144 }
1145 #undef FUNC_NAME
1146
1147 size_t
1148 scm_c_string_length (SCM string)
1149 {
1150 if (!IS_STRING (string))
1151 scm_wrong_type_arg_msg (NULL, 0, string, "string");
1152 return STRING_LENGTH (string);
1153 }
1154
1155 SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
1156 (SCM str, SCM k),
1157 "Return character @var{k} of @var{str} using zero-origin\n"
1158 "indexing. @var{k} must be a valid index of @var{str}.")
1159 #define FUNC_NAME s_scm_string_ref
1160 {
1161 size_t len;
1162 unsigned long idx;
1163
1164 SCM_VALIDATE_STRING (1, str);
1165
1166 len = scm_i_string_length (str);
1167 if (SCM_LIKELY (len > 0))
1168 idx = scm_to_unsigned_integer (k, 0, len - 1);
1169 else
1170 scm_out_of_range (NULL, k);
1171
1172 if (scm_i_is_narrow_string (str))
1173 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
1174 else
1175 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[idx]);
1176 }
1177 #undef FUNC_NAME
1178
1179 SCM
1180 scm_c_string_ref (SCM str, size_t p)
1181 {
1182 if (p >= scm_i_string_length (str))
1183 scm_out_of_range (NULL, scm_from_size_t (p));
1184 if (scm_i_is_narrow_string (str))
1185 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
1186 else
1187 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[p]);
1188
1189 }
1190
1191 SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
1192 (SCM str, SCM k, SCM chr),
1193 "Store @var{chr} in element @var{k} of @var{str} and return\n"
1194 "an unspecified value. @var{k} must be a valid index of\n"
1195 "@var{str}.")
1196 #define FUNC_NAME s_scm_string_set_x
1197 {
1198 size_t len;
1199 unsigned long idx;
1200
1201 SCM_VALIDATE_STRING (1, str);
1202
1203 len = scm_i_string_length (str);
1204 if (SCM_LIKELY (len > 0))
1205 idx = scm_to_unsigned_integer (k, 0, len - 1);
1206 else
1207 scm_out_of_range (NULL, k);
1208
1209 SCM_VALIDATE_CHAR (3, chr);
1210 str = scm_i_string_start_writing (str);
1211 scm_i_string_set_x (str, idx, SCM_CHAR (chr));
1212 scm_i_string_stop_writing ();
1213
1214 return SCM_UNSPECIFIED;
1215 }
1216 #undef FUNC_NAME
1217
1218 void
1219 scm_c_string_set_x (SCM str, size_t p, SCM chr)
1220 {
1221 if (p >= scm_i_string_length (str))
1222 scm_out_of_range (NULL, scm_from_size_t (p));
1223 str = scm_i_string_start_writing (str);
1224 scm_i_string_set_x (str, p, SCM_CHAR (chr));
1225 scm_i_string_stop_writing ();
1226 }
1227
1228 SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
1229 (SCM str, SCM start, SCM end),
1230 "Return a newly allocated string formed from the characters\n"
1231 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1232 "ending with index @var{end} (exclusive).\n"
1233 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1234 "exact integers satisfying:\n\n"
1235 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1236 #define FUNC_NAME s_scm_substring
1237 {
1238 size_t len, from, to;
1239
1240 SCM_VALIDATE_STRING (1, str);
1241 len = scm_i_string_length (str);
1242 from = scm_to_unsigned_integer (start, 0, len);
1243 if (SCM_UNBNDP (end))
1244 to = len;
1245 else
1246 to = scm_to_unsigned_integer (end, from, len);
1247 return scm_i_substring (str, from, to);
1248 }
1249 #undef FUNC_NAME
1250
1251 SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
1252 (SCM str, SCM start, SCM end),
1253 "Return a newly allocated string formed from the characters\n"
1254 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1255 "ending with index @var{end} (exclusive).\n"
1256 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1257 "exact integers satisfying:\n"
1258 "\n"
1259 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
1260 "\n"
1261 "The returned string is read-only.\n")
1262 #define FUNC_NAME s_scm_substring_read_only
1263 {
1264 size_t len, from, to;
1265
1266 SCM_VALIDATE_STRING (1, str);
1267 len = scm_i_string_length (str);
1268 from = scm_to_unsigned_integer (start, 0, len);
1269 if (SCM_UNBNDP (end))
1270 to = len;
1271 else
1272 to = scm_to_unsigned_integer (end, from, len);
1273 return scm_i_substring_read_only (str, from, to);
1274 }
1275 #undef FUNC_NAME
1276
1277 SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
1278 (SCM str, SCM start, SCM end),
1279 "Return a newly allocated string formed from the characters\n"
1280 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1281 "ending with index @var{end} (exclusive).\n"
1282 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1283 "exact integers satisfying:\n\n"
1284 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1285 #define FUNC_NAME s_scm_substring_copy
1286 {
1287 /* For the Scheme version, START is mandatory, but for the C
1288 version, it is optional. See scm_string_copy in srfi-13.c for a
1289 rationale.
1290 */
1291
1292 size_t from, to;
1293
1294 SCM_VALIDATE_STRING (1, str);
1295 scm_i_get_substring_spec (scm_i_string_length (str),
1296 start, &from, end, &to);
1297 return scm_i_substring_copy (str, from, to);
1298 }
1299 #undef FUNC_NAME
1300
1301 SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
1302 (SCM str, SCM start, SCM end),
1303 "Return string that indirectly refers to the characters\n"
1304 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1305 "ending with index @var{end} (exclusive).\n"
1306 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1307 "exact integers satisfying:\n\n"
1308 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1309 #define FUNC_NAME s_scm_substring_shared
1310 {
1311 size_t len, from, to;
1312
1313 SCM_VALIDATE_STRING (1, str);
1314 len = scm_i_string_length (str);
1315 from = scm_to_unsigned_integer (start, 0, len);
1316 if (SCM_UNBNDP (end))
1317 to = len;
1318 else
1319 to = scm_to_unsigned_integer (end, from, len);
1320 return scm_i_substring_shared (str, from, to);
1321 }
1322 #undef FUNC_NAME
1323
1324 SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
1325 (SCM args),
1326 "Return a newly allocated string whose characters form the\n"
1327 "concatenation of the given strings, @var{args}.")
1328 #define FUNC_NAME s_scm_string_append
1329 {
1330 SCM res;
1331 size_t len = 0;
1332 int wide = 0;
1333 SCM l, s;
1334 size_t i;
1335 union
1336 {
1337 char *narrow;
1338 scm_t_wchar *wide;
1339 } data;
1340
1341 SCM_VALIDATE_REST_ARGUMENT (args);
1342 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
1343 {
1344 s = SCM_CAR (l);
1345 SCM_VALIDATE_STRING (SCM_ARGn, s);
1346 len += scm_i_string_length (s);
1347 if (!scm_i_is_narrow_string (s))
1348 wide = 1;
1349 }
1350 data.narrow = NULL;
1351 if (!wide)
1352 res = scm_i_make_string (len, &data.narrow);
1353 else
1354 res = scm_i_make_wide_string (len, &data.wide);
1355
1356 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
1357 {
1358 size_t len;
1359 s = SCM_CAR (l);
1360 SCM_VALIDATE_STRING (SCM_ARGn, s);
1361 len = scm_i_string_length (s);
1362 if (!wide)
1363 {
1364 memcpy (data.narrow, scm_i_string_chars (s), len);
1365 data.narrow += len;
1366 }
1367 else
1368 {
1369 if (scm_i_is_narrow_string (s))
1370 {
1371 for (i = 0; i < scm_i_string_length (s); i++)
1372 data.wide[i] = (unsigned char) scm_i_string_chars (s)[i];
1373 }
1374 else
1375 u32_cpy ((scm_t_uint32 *) data.wide,
1376 (scm_t_uint32 *) scm_i_string_wide_chars (s), len);
1377 data.wide += len;
1378 }
1379 scm_remember_upto_here_1 (s);
1380 }
1381 return res;
1382 }
1383 #undef FUNC_NAME
1384
1385 int
1386 scm_is_string (SCM obj)
1387 {
1388 return IS_STRING (obj);
1389 }
1390
1391 \f
1392 /* Conversion to/from other encodings. */
1393
1394 SCM_SYMBOL (scm_encoding_error_key, "encoding-error");
1395 static void
1396 scm_encoding_error (const char *subr, int err, const char *message,
1397 const char *from, const char *to, SCM string_or_bv)
1398 {
1399 /* Raise an exception that conveys all the information needed to debug the
1400 problem. Only perform locale conversions that are safe; in particular,
1401 don't try to display STRING_OR_BV when it's a string since converting it to
1402 the output locale may fail. */
1403 scm_throw (scm_encoding_error_key,
1404 scm_list_n (scm_from_locale_string (subr),
1405 scm_from_locale_string (message),
1406 scm_from_int (err),
1407 scm_from_locale_string (from),
1408 scm_from_locale_string (to),
1409 string_or_bv,
1410 SCM_UNDEFINED));
1411 }
1412
1413 SCM
1414 scm_from_stringn (const char *str, size_t len, const char *encoding,
1415 scm_t_string_failed_conversion_handler handler)
1416 {
1417 size_t u32len, i;
1418 scm_t_wchar *u32;
1419 int wide = 0;
1420 SCM res;
1421
1422 if (len == 0)
1423 return scm_nullstr;
1424
1425 if (encoding == NULL)
1426 {
1427 /* If encoding is null, use Latin-1. */
1428 char *buf;
1429 res = scm_i_make_string (len, &buf);
1430 memcpy (buf, str, len);
1431 return res;
1432 }
1433
1434 u32len = 0;
1435 u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
1436 (enum iconv_ilseq_handler)
1437 handler,
1438 str, len,
1439 NULL,
1440 NULL, &u32len);
1441
1442 if (SCM_UNLIKELY (u32 == NULL))
1443 {
1444 /* Raise an error and pass the raw C string as a bytevector to the `throw'
1445 handler. */
1446 SCM bv;
1447 signed char *buf;
1448
1449 buf = scm_gc_malloc_pointerless (len, "bytevector");
1450 memcpy (buf, str, len);
1451 bv = scm_c_take_bytevector (buf, len);
1452
1453 scm_encoding_error (__func__, errno,
1454 "input locale conversion error",
1455 encoding, "UTF-32", bv);
1456 }
1457
1458 i = 0;
1459 while (i < u32len)
1460 if (u32[i++] > 0xFF)
1461 {
1462 wide = 1;
1463 break;
1464 }
1465
1466 if (!wide)
1467 {
1468 char *dst;
1469 res = scm_i_make_string (u32len, &dst);
1470 for (i = 0; i < u32len; i ++)
1471 dst[i] = (unsigned char) u32[i];
1472 dst[u32len] = '\0';
1473 }
1474 else
1475 {
1476 scm_t_wchar *wdst;
1477 res = scm_i_make_wide_string (u32len, &wdst);
1478 u32_cpy ((scm_t_uint32 *) wdst, (scm_t_uint32 *) u32, u32len);
1479 wdst[u32len] = 0;
1480 }
1481
1482 free (u32);
1483 return res;
1484 }
1485
1486 SCM
1487 scm_from_locale_stringn (const char *str, size_t len)
1488 {
1489 const char *enc;
1490 scm_t_string_failed_conversion_handler hndl;
1491 SCM inport;
1492 scm_t_port *pt;
1493
1494 if (len == (size_t) -1)
1495 len = strlen (str);
1496 if (len == 0)
1497 return scm_nullstr;
1498
1499 inport = scm_current_input_port ();
1500 if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
1501 {
1502 pt = SCM_PTAB_ENTRY (inport);
1503 enc = pt->encoding;
1504 hndl = pt->ilseq_handler;
1505 }
1506 else
1507 {
1508 enc = NULL;
1509 hndl = SCM_FAILED_CONVERSION_ERROR;
1510 }
1511
1512 return scm_from_stringn (str, len, enc, hndl);
1513 }
1514
1515 SCM
1516 scm_from_locale_string (const char *str)
1517 {
1518 if (str == NULL)
1519 return scm_nullstr;
1520
1521 return scm_from_locale_stringn (str, -1);
1522 }
1523
1524 SCM
1525 scm_i_from_utf8_string (const scm_t_uint8 *str)
1526 {
1527 return scm_from_stringn ((const char *) str,
1528 strlen ((char *) str), "UTF-8",
1529 SCM_FAILED_CONVERSION_ERROR);
1530 }
1531
1532 /* Create a new scheme string from the C string STR. The memory of
1533 STR may be used directly as storage for the new string. */
1534 /* FIXME: GC-wise, the only way to use the memory area pointed to by STR
1535 would be to register a finalizer to eventually free(3) STR, which isn't
1536 worth it. Should we just deprecate the `scm_take_' functions? */
1537 SCM
1538 scm_take_locale_stringn (char *str, size_t len)
1539 {
1540 SCM res;
1541
1542 res = scm_from_locale_stringn (str, len);
1543 free (str);
1544
1545 return res;
1546 }
1547
1548 SCM
1549 scm_take_locale_string (char *str)
1550 {
1551 return scm_take_locale_stringn (str, -1);
1552 }
1553
1554 /* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
1555 and \UXXXXXX. */
1556 static void
1557 unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
1558 {
1559 char *before, *after;
1560 size_t i, j;
1561
1562 before = *bufp;
1563 after = *bufp;
1564 i = 0;
1565 j = 0;
1566 while (i < *lenp)
1567 {
1568 if ((i <= *lenp - 6)
1569 && before[i] == '\\'
1570 && before[i + 1] == 'u'
1571 && before[i + 2] == '0' && before[i + 3] == '0')
1572 {
1573 /* Convert \u00NN to \xNN */
1574 after[j] = '\\';
1575 after[j + 1] = 'x';
1576 after[j + 2] = tolower ((int) before[i + 4]);
1577 after[j + 3] = tolower ((int) before[i + 5]);
1578 i += 6;
1579 j += 4;
1580 }
1581 else if ((i <= *lenp - 10)
1582 && before[i] == '\\'
1583 && before[i + 1] == 'U'
1584 && before[i + 2] == '0' && before[i + 3] == '0')
1585 {
1586 /* Convert \U00NNNNNN to \UNNNNNN */
1587 after[j] = '\\';
1588 after[j + 1] = 'U';
1589 after[j + 2] = tolower ((int) before[i + 4]);
1590 after[j + 3] = tolower ((int) before[i + 5]);
1591 after[j + 4] = tolower ((int) before[i + 6]);
1592 after[j + 5] = tolower ((int) before[i + 7]);
1593 after[j + 6] = tolower ((int) before[i + 8]);
1594 after[j + 7] = tolower ((int) before[i + 9]);
1595 i += 10;
1596 j += 8;
1597 }
1598 else
1599 {
1600 after[j] = before[i];
1601 i++;
1602 j++;
1603 }
1604 }
1605 *lenp = j;
1606 after = scm_realloc (after, j);
1607 }
1608
1609 /* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXXXX; */
1610 static void
1611 unistring_escapes_to_r6rs_escapes (char **bufp, size_t *lenp)
1612 {
1613 char *before, *after;
1614 size_t i, j;
1615 /* The worst case is if the input string contains all 4-digit hex escapes.
1616 "\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */
1617 size_t max_out_len = (*lenp * 7) / 6 + 1;
1618 size_t nzeros, ndigits;
1619
1620 before = *bufp;
1621 after = alloca (max_out_len);
1622 i = 0;
1623 j = 0;
1624 while (i < *lenp)
1625 {
1626 if (((i <= *lenp - 6) && before[i] == '\\' && before[i + 1] == 'u')
1627 || ((i <= *lenp - 10) && before[i] == '\\' && before[i + 1] == 'U'))
1628 {
1629 if (before[i + 1] == 'u')
1630 ndigits = 4;
1631 else if (before[i + 1] == 'U')
1632 ndigits = 8;
1633 else
1634 abort ();
1635
1636 /* Add the R6RS hex escape initial sequence. */
1637 after[j] = '\\';
1638 after[j + 1] = 'x';
1639
1640 /* Move string positions to the start of the hex numbers. */
1641 i += 2;
1642 j += 2;
1643
1644 /* Find the number of initial zeros in this hex number. */
1645 nzeros = 0;
1646 while (before[i + nzeros] == '0' && nzeros < ndigits)
1647 nzeros++;
1648
1649 /* Copy the number, skipping initial zeros, and then move the string
1650 positions. */
1651 if (nzeros == ndigits)
1652 {
1653 after[j] = '0';
1654 i += ndigits;
1655 j += 1;
1656 }
1657 else
1658 {
1659 int pos;
1660 for (pos = 0; pos < ndigits - nzeros; pos++)
1661 after[j + pos] = tolower ((int) before[i + nzeros + pos]);
1662 i += ndigits;
1663 j += (ndigits - nzeros);
1664 }
1665
1666 /* Add terminating semicolon. */
1667 after[j] = ';';
1668 j++;
1669 }
1670 else
1671 {
1672 after[j] = before[i];
1673 i++;
1674 j++;
1675 }
1676 }
1677 *lenp = j;
1678 before = scm_realloc (before, j);
1679 memcpy (before, after, j);
1680 }
1681
1682
1683 char *
1684 scm_to_locale_stringn (SCM str, size_t *lenp)
1685 {
1686 SCM outport;
1687 scm_t_port *pt;
1688 const char *enc;
1689
1690 outport = scm_current_output_port ();
1691 if (!SCM_UNBNDP (outport) && SCM_OPOUTPORTP (outport))
1692 {
1693 pt = SCM_PTAB_ENTRY (outport);
1694 enc = pt->encoding;
1695 }
1696 else
1697 enc = NULL;
1698
1699 return scm_to_stringn (str, lenp,
1700 enc,
1701 scm_i_get_conversion_strategy (SCM_BOOL_F));
1702 }
1703
1704 /* Return a malloc(3)-allocated buffer containing the contents of STR encoded
1705 according to ENCODING. If LENP is non-NULL, set it to the size in bytes of
1706 the returned buffer. If the conversion to ENCODING fails, apply the strategy
1707 defined by HANDLER. */
1708 char *
1709 scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
1710 scm_t_string_failed_conversion_handler handler)
1711 {
1712 char *buf;
1713 size_t ilen, len, i;
1714 int ret;
1715 const char *enc;
1716
1717 if (!scm_is_string (str))
1718 scm_wrong_type_arg_msg (NULL, 0, str, "string");
1719 ilen = scm_i_string_length (str);
1720
1721 if (ilen == 0)
1722 {
1723 buf = scm_malloc (1);
1724 buf[0] = '\0';
1725 if (lenp)
1726 *lenp = 0;
1727 return buf;
1728 }
1729
1730 if (lenp == NULL)
1731 for (i = 0; i < ilen; i++)
1732 if (scm_i_string_ref (str, i) == '\0')
1733 scm_misc_error (NULL,
1734 "string contains #\\nul character: ~S",
1735 scm_list_1 (str));
1736
1737 if (scm_i_is_narrow_string (str) && (encoding == NULL))
1738 {
1739 /* If using native Latin-1 encoding, just copy the string
1740 contents. */
1741 if (lenp)
1742 {
1743 buf = scm_malloc (ilen);
1744 memcpy (buf, scm_i_string_chars (str), ilen);
1745 *lenp = ilen;
1746 return buf;
1747 }
1748 else
1749 {
1750 buf = scm_malloc (ilen + 1);
1751 memcpy (buf, scm_i_string_chars (str), ilen);
1752 buf[ilen] = '\0';
1753 return buf;
1754 }
1755 }
1756
1757
1758 buf = NULL;
1759 len = 0;
1760 enc = encoding;
1761 if (enc == NULL)
1762 enc = "ISO-8859-1";
1763 if (scm_i_is_narrow_string (str))
1764 {
1765 ret = mem_iconveh (scm_i_string_chars (str), ilen,
1766 "ISO-8859-1", enc,
1767 (enum iconv_ilseq_handler) handler, NULL,
1768 &buf, &len);
1769
1770 if (ret != 0)
1771 scm_encoding_error (__func__, errno,
1772 "cannot convert to output locale",
1773 "ISO-8859-1", enc, str);
1774 }
1775 else
1776 {
1777 buf = u32_conv_to_encoding (enc,
1778 (enum iconv_ilseq_handler) handler,
1779 (scm_t_uint32 *) scm_i_string_wide_chars (str),
1780 ilen,
1781 NULL,
1782 NULL, &len);
1783 if (buf == NULL)
1784 scm_encoding_error (__func__, errno,
1785 "cannot convert to output locale",
1786 "UTF-32", enc, str);
1787 }
1788 if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1789 {
1790 if (SCM_R6RS_ESCAPES_P)
1791 unistring_escapes_to_r6rs_escapes (&buf, &len);
1792 else
1793 unistring_escapes_to_guile_escapes (&buf, &len);
1794 }
1795 if (lenp)
1796 *lenp = len;
1797 else
1798 {
1799 buf = scm_realloc (buf, len + 1);
1800 buf[len] = '\0';
1801 }
1802
1803 scm_remember_upto_here_1 (str);
1804 return buf;
1805 }
1806
1807 char *
1808 scm_to_locale_string (SCM str)
1809 {
1810 return scm_to_locale_stringn (str, NULL);
1811 }
1812
1813 scm_t_uint8 *
1814 scm_i_to_utf8_string (SCM str)
1815 {
1816 char *u8str;
1817 u8str = scm_to_stringn (str, NULL, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
1818 return (scm_t_uint8 *) u8str;
1819 }
1820
1821 size_t
1822 scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
1823 {
1824 size_t len;
1825 char *result = NULL;
1826 if (!scm_is_string (str))
1827 scm_wrong_type_arg_msg (NULL, 0, str, "string");
1828 result = scm_to_locale_stringn (str, &len);
1829
1830 memcpy (buf, result, (len > max_len) ? max_len : len);
1831 free (result);
1832
1833 scm_remember_upto_here_1 (str);
1834 return len;
1835 }
1836
1837 \f
1838 /* Unicode string normalization. */
1839
1840 /* This function is a partial clone of SCM_STRING_TO_U32_BUF from
1841 libguile/i18n.c. It would be useful to have this factored out into a more
1842 convenient location, but its use of alloca makes that tricky to do. */
1843
1844 static SCM
1845 normalize_str (SCM string, uninorm_t form)
1846 {
1847 SCM ret;
1848 scm_t_uint32 *w_str;
1849 scm_t_wchar *cbuf;
1850 size_t rlen, len = scm_i_string_length (string);
1851
1852 if (scm_i_is_narrow_string (string))
1853 {
1854 size_t i;
1855 const char *buf = scm_i_string_chars (string);
1856
1857 w_str = alloca (sizeof (scm_t_wchar) * (len + 1));
1858
1859 for (i = 0; i < len; i ++)
1860 w_str[i] = (unsigned char) buf[i];
1861 w_str[len] = 0;
1862 }
1863 else
1864 w_str = (scm_t_uint32 *) scm_i_string_wide_chars (string);
1865
1866 w_str = u32_normalize (form, w_str, len, NULL, &rlen);
1867
1868 ret = scm_i_make_wide_string (rlen, &cbuf);
1869 u32_cpy ((scm_t_uint32 *) cbuf, w_str, rlen);
1870 free (w_str);
1871
1872 scm_i_try_narrow_string (ret);
1873
1874 return ret;
1875 }
1876
1877 SCM_DEFINE (scm_string_normalize_nfc, "string-normalize-nfc", 1, 0, 0,
1878 (SCM string),
1879 "Returns the NFC normalized form of @var{string}.")
1880 #define FUNC_NAME s_scm_string_normalize_nfc
1881 {
1882 SCM_VALIDATE_STRING (1, string);
1883 return normalize_str (string, UNINORM_NFC);
1884 }
1885 #undef FUNC_NAME
1886
1887 SCM_DEFINE (scm_string_normalize_nfd, "string-normalize-nfd", 1, 0, 0,
1888 (SCM string),
1889 "Returns the NFD normalized form of @var{string}.")
1890 #define FUNC_NAME s_scm_string_normalize_nfd
1891 {
1892 SCM_VALIDATE_STRING (1, string);
1893 return normalize_str (string, UNINORM_NFD);
1894 }
1895 #undef FUNC_NAME
1896
1897 SCM_DEFINE (scm_string_normalize_nfkc, "string-normalize-nfkc", 1, 0, 0,
1898 (SCM string),
1899 "Returns the NFKC normalized form of @var{string}.")
1900 #define FUNC_NAME s_scm_string_normalize_nfkc
1901 {
1902 SCM_VALIDATE_STRING (1, string);
1903 return normalize_str (string, UNINORM_NFKC);
1904 }
1905 #undef FUNC_NAME
1906
1907 SCM_DEFINE (scm_string_normalize_nfkd, "string-normalize-nfkd", 1, 0, 0,
1908 (SCM string),
1909 "Returns the NFKD normalized form of @var{string}.")
1910 #define FUNC_NAME s_scm_string_normalize_nfkd
1911 {
1912 SCM_VALIDATE_STRING (1, string);
1913 return normalize_str (string, UNINORM_NFKD);
1914 }
1915 #undef FUNC_NAME
1916
1917 /* converts C scm_array of strings to SCM scm_list of strings. */
1918 /* If argc < 0, a null terminated scm_array is assumed. */
1919 SCM
1920 scm_makfromstrs (int argc, char **argv)
1921 {
1922 int i = argc;
1923 SCM lst = SCM_EOL;
1924 if (0 > i)
1925 for (i = 0; argv[i]; i++);
1926 while (i--)
1927 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
1928 return lst;
1929 }
1930
1931 /* Return a newly allocated array of char pointers to each of the strings
1932 in args, with a terminating NULL pointer. */
1933
1934 char **
1935 scm_i_allocate_string_pointers (SCM list)
1936 #define FUNC_NAME "scm_i_allocate_string_pointers"
1937 {
1938 char **result;
1939 int len = scm_ilength (list);
1940 int i;
1941
1942 if (len < 0)
1943 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
1944
1945 result = scm_gc_malloc ((len + 1) * sizeof (char *),
1946 "string pointers");
1947 result[len] = NULL;
1948
1949 /* The list might be have been modified in another thread, so
1950 we check LIST before each access.
1951 */
1952 for (i = 0; i < len && scm_is_pair (list); i++)
1953 {
1954 SCM str;
1955 size_t len;
1956
1957 str = SCM_CAR (list);
1958 len = scm_c_string_length (str);
1959
1960 result[i] = scm_gc_malloc_pointerless (len + 1, "string pointers");
1961 memcpy (result[i], scm_i_string_chars (str), len);
1962 result[i][len] = '\0';
1963
1964 list = SCM_CDR (list);
1965 }
1966
1967 return result;
1968 }
1969 #undef FUNC_NAME
1970
1971 void
1972 scm_i_get_substring_spec (size_t len,
1973 SCM start, size_t *cstart,
1974 SCM end, size_t *cend)
1975 {
1976 if (SCM_UNBNDP (start))
1977 *cstart = 0;
1978 else
1979 *cstart = scm_to_unsigned_integer (start, 0, len);
1980
1981 if (SCM_UNBNDP (end))
1982 *cend = len;
1983 else
1984 *cend = scm_to_unsigned_integer (end, *cstart, len);
1985 }
1986
1987 #if SCM_ENABLE_DEPRECATED
1988
1989 /* When these definitions are removed, it becomes reasonable to use
1990 read-only strings for string literals. For that, change the reader
1991 to create string literals with scm_c_substring_read_only instead of
1992 with scm_c_substring_copy.
1993 */
1994
1995 int
1996 scm_i_deprecated_stringp (SCM str)
1997 {
1998 scm_c_issue_deprecation_warning
1999 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
2000
2001 return scm_is_string (str);
2002 }
2003
2004 char *
2005 scm_i_deprecated_string_chars (SCM str)
2006 {
2007 char *chars;
2008
2009 scm_c_issue_deprecation_warning
2010 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
2011
2012 /* We don't accept shared substrings here since they are not
2013 null-terminated.
2014 */
2015 if (IS_SH_STRING (str))
2016 scm_misc_error (NULL,
2017 "SCM_STRING_CHARS does not work with shared substrings",
2018 SCM_EOL);
2019
2020 /* We explicitly test for read-only strings to produce a better
2021 error message.
2022 */
2023
2024 if (IS_RO_STRING (str))
2025 scm_misc_error (NULL,
2026 "SCM_STRING_CHARS does not work with read-only strings",
2027 SCM_EOL);
2028
2029 /* The following is still wrong, of course...
2030 */
2031 str = scm_i_string_start_writing (str);
2032 chars = scm_i_string_writable_chars (str);
2033 scm_i_string_stop_writing ();
2034 return chars;
2035 }
2036
2037 size_t
2038 scm_i_deprecated_string_length (SCM str)
2039 {
2040 scm_c_issue_deprecation_warning
2041 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
2042 return scm_c_string_length (str);
2043 }
2044
2045 #endif
2046
2047 static SCM
2048 string_handle_ref (scm_t_array_handle *h, size_t index)
2049 {
2050 return scm_c_string_ref (h->array, index);
2051 }
2052
2053 static void
2054 string_handle_set (scm_t_array_handle *h, size_t index, SCM val)
2055 {
2056 scm_c_string_set_x (h->array, index, val);
2057 }
2058
2059 static void
2060 string_get_handle (SCM v, scm_t_array_handle *h)
2061 {
2062 h->array = v;
2063 h->ndims = 1;
2064 h->dims = &h->dim0;
2065 h->dim0.lbnd = 0;
2066 h->dim0.ubnd = scm_c_string_length (v) - 1;
2067 h->dim0.inc = 1;
2068 h->element_type = SCM_ARRAY_ELEMENT_TYPE_CHAR;
2069 h->elements = h->writable_elements = NULL;
2070 }
2071
2072 SCM_ARRAY_IMPLEMENTATION (scm_tc7_string, 0x7f,
2073 string_handle_ref, string_handle_set,
2074 string_get_handle)
2075 SCM_VECTOR_IMPLEMENTATION (SCM_ARRAY_ELEMENT_TYPE_CHAR, scm_make_string)
2076
2077 void
2078 scm_init_strings ()
2079 {
2080 scm_nullstr = scm_i_make_string (0, NULL);
2081
2082 #include "libguile/strings.x"
2083 }
2084
2085
2086 /*
2087 Local Variables:
2088 c-file-style: "gnu"
2089 End:
2090 */