Fix write-beyond-end-of-string error in the conversion to R6RS string escapes.
[bpt/guile.git] / libguile / strings.c
1 /* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
17 */
18
19
20 \f
21 #ifdef HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24
25 #include <string.h>
26 #include <stdio.h>
27 #include <ctype.h>
28 #include <uninorm.h>
29 #include <unistr.h>
30 #include <uniconv.h>
31
32 #include "striconveh.h"
33
34 #include "libguile/_scm.h"
35 #include "libguile/chars.h"
36 #include "libguile/root.h"
37 #include "libguile/strings.h"
38 #include "libguile/error.h"
39 #include "libguile/generalized-vectors.h"
40 #include "libguile/deprecation.h"
41 #include "libguile/validate.h"
42 #include "libguile/private-options.h"
43
44 \f
45
46 /* {Strings}
47 */
48
49
50 /* Stringbufs
51 *
52 * XXX - keeping an accurate refcount during GC seems to be quite
53 * tricky, so we just keep score of whether a stringbuf might be
54 * shared, not whether it definitely is.
55 *
56 * The scheme I (mvo) tried to keep an accurate reference count would
57 * recount all strings that point to a stringbuf during the mark-phase
58 * of the GC. This was done since one cannot access the stringbuf of
59 * a string when that string is freed (in order to decrease the
60 * reference count). The memory of the stringbuf might have been
61 * reused already for something completely different.
62 *
63 * This recounted worked for a small number of threads beating on
64 * cow-strings, but it failed randomly with more than 10 threads, say.
65 * I couldn't figure out what went wrong, so I used the conservative
66 * approach implemented below.
67 *
68 * There are 2 storage strategies for stringbufs: 8-bit and wide. 8-bit
69 * strings are ISO-8859-1-encoded strings; wide strings are 32-bit (UCS-4)
70 * strings.
71 */
72
73 /* The size in words of the stringbuf header (type tag + size). */
74 #define STRINGBUF_HEADER_SIZE 2U
75
76 #define STRINGBUF_HEADER_BYTES (STRINGBUF_HEADER_SIZE * sizeof (SCM))
77
78 #define STRINGBUF_F_SHARED SCM_I_STRINGBUF_F_SHARED
79 #define STRINGBUF_F_WIDE SCM_I_STRINGBUF_F_WIDE
80
81 #define STRINGBUF_TAG scm_tc7_stringbuf
82 #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
83 #define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
84
85 #define STRINGBUF_CONTENTS(buf) ((void *) \
86 SCM_CELL_OBJECT_LOC (buf, \
87 STRINGBUF_HEADER_SIZE))
88 #define STRINGBUF_CHARS(buf) ((unsigned char *) STRINGBUF_CONTENTS (buf))
89 #define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *) STRINGBUF_CONTENTS (buf))
90
91 #define STRINGBUF_LENGTH(buf) (SCM_CELL_WORD_1 (buf))
92
93 #define SET_STRINGBUF_SHARED(buf) \
94 do \
95 { \
96 /* Don't modify BUF if it's already marked as shared since it might be \
97 a read-only, statically allocated stringbuf. */ \
98 if (SCM_LIKELY (!STRINGBUF_SHARED (buf))) \
99 SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
100 } \
101 while (0)
102
103 #ifdef SCM_STRING_LENGTH_HISTOGRAM
104 static size_t lenhist[1001];
105 #endif
106
107 /* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
108 characters. */
109 static SCM
110 make_stringbuf (size_t len)
111 {
112 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
113 scm_i_symbol_chars, all stringbufs are null-terminated. Once
114 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
115 has been changed for scm_i_symbol_chars, this null-termination
116 can be dropped.
117 */
118
119 SCM buf;
120
121 #ifdef SCM_STRING_LENGTH_HISTOGRAM
122 if (len < 1000)
123 lenhist[len]++;
124 else
125 lenhist[1000]++;
126 #endif
127
128 buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + len + 1,
129 "string"));
130
131 SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG);
132 SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
133
134 STRINGBUF_CHARS (buf)[len] = 0;
135
136 return buf;
137 }
138
139 /* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
140 characters. */
141 static SCM
142 make_wide_stringbuf (size_t len)
143 {
144 SCM buf;
145 size_t raw_len;
146
147 #ifdef SCM_STRING_LENGTH_HISTOGRAM
148 if (len < 1000)
149 lenhist[len]++;
150 else
151 lenhist[1000]++;
152 #endif
153
154 raw_len = (len + 1) * sizeof (scm_t_wchar);
155 buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + raw_len,
156 "string"));
157
158 SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG | STRINGBUF_F_WIDE);
159 SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
160
161 STRINGBUF_WIDE_CHARS (buf)[len] = 0;
162
163 return buf;
164 }
165
166 /* Return a UCS-4-encoded stringbuf containing the (possibly Latin-1-encoded)
167 characters from BUF. */
168 static SCM
169 wide_stringbuf (SCM buf)
170 {
171 SCM new_buf;
172
173 if (STRINGBUF_WIDE (buf))
174 new_buf = buf;
175 else
176 {
177 size_t i, len;
178 scm_t_wchar *mem;
179
180 len = STRINGBUF_LENGTH (buf);
181
182 new_buf = make_wide_stringbuf (len);
183
184 mem = STRINGBUF_WIDE_CHARS (new_buf);
185 for (i = 0; i < len; i++)
186 mem[i] = (scm_t_wchar) STRINGBUF_CHARS (buf)[i];
187 mem[len] = 0;
188 }
189
190 return new_buf;
191 }
192
193 /* Return a Latin-1-encoded stringbuf containing the (possibly UCS-4-encoded)
194 characters from BUF, if possible. */
195 static SCM
196 narrow_stringbuf (SCM buf)
197 {
198 SCM new_buf;
199
200 if (!STRINGBUF_WIDE (buf))
201 new_buf = buf;
202 else
203 {
204 size_t i, len;
205 scm_t_wchar *wmem;
206 unsigned char *mem;
207
208 len = STRINGBUF_LENGTH (buf);
209 wmem = STRINGBUF_WIDE_CHARS (buf);
210
211 for (i = 0; i < len; i++)
212 if (wmem[i] > 0xFF)
213 /* BUF cannot be narrowed. */
214 return buf;
215
216 new_buf = make_stringbuf (len);
217
218 mem = STRINGBUF_CHARS (new_buf);
219 for (i = 0; i < len; i++)
220 mem[i] = (unsigned char) wmem[i];
221 mem[len] = 0;
222 }
223
224 return new_buf;
225 }
226
227 scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
228
229 \f
230 /* Copy-on-write strings.
231 */
232
233 #define STRING_TAG scm_tc7_string
234
235 #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
236 #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
237 #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
238
239 #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
240 #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
241
242 #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
243
244 /* Read-only strings.
245 */
246
247 #define RO_STRING_TAG scm_tc7_ro_string
248 #define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
249
250 /* Mutation-sharing substrings
251 */
252
253 #define SH_STRING_TAG (scm_tc7_string + 0x100)
254
255 #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
256 /* START and LENGTH as for STRINGs. */
257
258 #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
259
260 SCM scm_nullstr;
261
262 /* Create a scheme string with space for LEN 8-bit Latin-1-encoded
263 characters. CHARSP, if not NULL, will be set to location of the
264 char array. */
265 SCM
266 scm_i_make_string (size_t len, char **charsp)
267 {
268 SCM buf = make_stringbuf (len);
269 SCM res;
270 if (charsp)
271 *charsp = (char *) STRINGBUF_CHARS (buf);
272 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
273 (scm_t_bits)0, (scm_t_bits) len);
274 return res;
275 }
276
277 /* Create a scheme string with space for LEN 32-bit UCS-4-encoded
278 characters. CHARSP, if not NULL, will be set to location of the
279 character array. */
280 SCM
281 scm_i_make_wide_string (size_t len, scm_t_wchar **charsp)
282 {
283 SCM buf = make_wide_stringbuf (len);
284 SCM res;
285 if (charsp)
286 *charsp = STRINGBUF_WIDE_CHARS (buf);
287 res = scm_double_cell (STRING_TAG, SCM_UNPACK (buf),
288 (scm_t_bits) 0, (scm_t_bits) len);
289 return res;
290 }
291
292 static void
293 validate_substring_args (SCM str, size_t start, size_t end)
294 {
295 if (!IS_STRING (str))
296 scm_wrong_type_arg_msg (NULL, 0, str, "string");
297 if (start > STRING_LENGTH (str))
298 scm_out_of_range (NULL, scm_from_size_t (start));
299 if (end > STRING_LENGTH (str) || end < start)
300 scm_out_of_range (NULL, scm_from_size_t (end));
301 }
302
303 static inline void
304 get_str_buf_start (SCM *str, SCM *buf, size_t *start)
305 {
306 *start = STRING_START (*str);
307 if (IS_SH_STRING (*str))
308 {
309 *str = SH_STRING_STRING (*str);
310 *start += STRING_START (*str);
311 }
312 *buf = STRING_STRINGBUF (*str);
313 }
314
315 SCM
316 scm_i_substring (SCM str, size_t start, size_t end)
317 {
318 SCM buf;
319 size_t str_start;
320 get_str_buf_start (&str, &buf, &str_start);
321 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
322 SET_STRINGBUF_SHARED (buf);
323 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
324 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
325 (scm_t_bits)str_start + start,
326 (scm_t_bits) end - start);
327 }
328
329 SCM
330 scm_i_substring_read_only (SCM str, size_t start, size_t end)
331 {
332 SCM buf;
333 size_t str_start;
334 get_str_buf_start (&str, &buf, &str_start);
335 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
336 SET_STRINGBUF_SHARED (buf);
337 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
338 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
339 (scm_t_bits)str_start + start,
340 (scm_t_bits) end - start);
341 }
342
343 SCM
344 scm_i_substring_copy (SCM str, size_t start, size_t end)
345 {
346 size_t len = end - start;
347 SCM buf, my_buf;
348 size_t str_start;
349 get_str_buf_start (&str, &buf, &str_start);
350 if (scm_i_is_narrow_string (str))
351 {
352 my_buf = make_stringbuf (len);
353 memcpy (STRINGBUF_CHARS (my_buf),
354 STRINGBUF_CHARS (buf) + str_start + start, len);
355 }
356 else
357 {
358 my_buf = make_wide_stringbuf (len);
359 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
360 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
361 + start), len);
362 /* Even though this string is wide, the substring may be narrow.
363 Consider adding code to narrow the string. */
364 }
365 scm_remember_upto_here_1 (buf);
366 return scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
367 (scm_t_bits) 0, (scm_t_bits) len);
368 }
369
370 SCM
371 scm_i_substring_shared (SCM str, size_t start, size_t end)
372 {
373 if (start == 0 && end == STRING_LENGTH (str))
374 return str;
375 else
376 {
377 size_t len = end - start;
378 if (IS_SH_STRING (str))
379 {
380 start += STRING_START (str);
381 str = SH_STRING_STRING (str);
382 }
383 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
384 (scm_t_bits)start, (scm_t_bits) len);
385 }
386 }
387
388 SCM
389 scm_c_substring (SCM str, size_t start, size_t end)
390 {
391 validate_substring_args (str, start, end);
392 return scm_i_substring (str, start, end);
393 }
394
395 SCM
396 scm_c_substring_read_only (SCM str, size_t start, size_t end)
397 {
398 validate_substring_args (str, start, end);
399 return scm_i_substring_read_only (str, start, end);
400 }
401
402 SCM
403 scm_c_substring_copy (SCM str, size_t start, size_t end)
404 {
405 validate_substring_args (str, start, end);
406 return scm_i_substring_copy (str, start, end);
407 }
408
409 SCM
410 scm_c_substring_shared (SCM str, size_t start, size_t end)
411 {
412 validate_substring_args (str, start, end);
413 return scm_i_substring_shared (str, start, end);
414 }
415
416 \f
417 /* Internal accessors
418 */
419
420 /* Returns the number of characters in STR. This may be different
421 than the memory size of the string storage. */
422 size_t
423 scm_i_string_length (SCM str)
424 {
425 return STRING_LENGTH (str);
426 }
427
428 /* True if the string is 'narrow', meaning it has a 8-bit Latin-1
429 encoding. False if it is 'wide', having a 32-bit UCS-4
430 encoding. */
431 int
432 scm_i_is_narrow_string (SCM str)
433 {
434 return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
435 }
436
437 /* Try to coerce a string to be narrow. It if is narrow already, do
438 nothing. If it is wide, shrink it to narrow if none of its
439 characters are above 0xFF. Return true if the string is narrow or
440 was made to be narrow. */
441 int
442 scm_i_try_narrow_string (SCM str)
443 {
444 SET_STRING_STRINGBUF (str, narrow_stringbuf (STRING_STRINGBUF (str)));
445
446 return scm_i_is_narrow_string (str);
447 }
448
449 /* Return a pointer to the raw data of the string, which can be either Latin-1
450 or UCS-4 encoded data, depending on `scm_i_is_narrow_string (STR)'. */
451 const void *
452 scm_i_string_data (SCM str)
453 {
454 SCM buf;
455 size_t start;
456 const char *data;
457
458 get_str_buf_start (&str, &buf, &start);
459
460 data = STRINGBUF_CONTENTS (buf);
461 data += start * (scm_i_is_narrow_string (str) ? 1 : 4);
462
463 return data;
464 }
465
466 /* Returns a pointer to the 8-bit Latin-1 encoded character array of
467 STR. */
468 const char *
469 scm_i_string_chars (SCM str)
470 {
471 SCM buf;
472 size_t start;
473 get_str_buf_start (&str, &buf, &start);
474 if (scm_i_is_narrow_string (str))
475 return (const char *) STRINGBUF_CHARS (buf) + start;
476 else
477 scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s",
478 scm_list_1 (str));
479 return NULL;
480 }
481
482 /* Returns a pointer to the 32-bit UCS-4 encoded character array of
483 STR. */
484 const scm_t_wchar *
485 scm_i_string_wide_chars (SCM str)
486 {
487 SCM buf;
488 size_t start;
489
490 get_str_buf_start (&str, &buf, &start);
491 if (!scm_i_is_narrow_string (str))
492 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf) + start;
493 else
494 scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
495 scm_list_1 (str));
496 }
497
498 /* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
499 a new string buffer, so that it can be modified without modifying
500 other strings. Also, lock the string mutex. Later, one must call
501 scm_i_string_stop_writing to unlock the mutex. */
502 SCM
503 scm_i_string_start_writing (SCM orig_str)
504 {
505 SCM buf, str = orig_str;
506 size_t start;
507
508 get_str_buf_start (&str, &buf, &start);
509 if (IS_RO_STRING (str))
510 scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
511
512 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
513 if (STRINGBUF_SHARED (buf))
514 {
515 /* Clone the stringbuf. */
516 size_t len = STRING_LENGTH (str);
517 SCM new_buf;
518
519 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
520
521 if (scm_i_is_narrow_string (str))
522 {
523 new_buf = make_stringbuf (len);
524 memcpy (STRINGBUF_CHARS (new_buf),
525 STRINGBUF_CHARS (buf) + STRING_START (str), len);
526
527 }
528 else
529 {
530 new_buf = make_wide_stringbuf (len);
531 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
532 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf)
533 + STRING_START (str)), len);
534 }
535
536 SET_STRING_STRINGBUF (str, new_buf);
537 start -= STRING_START (str);
538
539 /* FIXME: The following operations are not atomic, so other threads
540 looking at STR may see an inconsistent state. Nevertheless it can't
541 hurt much since (i) accessing STR while it is being mutated can't
542 yield a crash, and (ii) concurrent accesses to STR should be
543 protected by a mutex at the application level. The latter may not
544 apply when STR != ORIG_STR, though. */
545 SET_STRING_START (str, 0);
546 SET_STRING_STRINGBUF (str, new_buf);
547
548 buf = new_buf;
549
550 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
551 }
552 return orig_str;
553 }
554
555 /* Return a pointer to the 8-bit Latin-1 chars of a string. */
556 char *
557 scm_i_string_writable_chars (SCM str)
558 {
559 SCM buf;
560 size_t start;
561
562 get_str_buf_start (&str, &buf, &start);
563 if (scm_i_is_narrow_string (str))
564 return (char *) STRINGBUF_CHARS (buf) + start;
565 else
566 scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s",
567 scm_list_1 (str));
568 return NULL;
569 }
570
571 /* Return a pointer to the UCS-4 codepoints of a string. */
572 static scm_t_wchar *
573 scm_i_string_writable_wide_chars (SCM str)
574 {
575 SCM buf;
576 size_t start;
577
578 get_str_buf_start (&str, &buf, &start);
579 if (!scm_i_is_narrow_string (str))
580 return STRINGBUF_WIDE_CHARS (buf) + start;
581 else
582 scm_misc_error (NULL, "Invalid write access of chars of narrow string: ~s",
583 scm_list_1 (str));
584 }
585
586 /* Unlock the string mutex that was locked when
587 scm_i_string_start_writing was called. */
588 void
589 scm_i_string_stop_writing (void)
590 {
591 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
592 }
593
594 /* Return the Xth character of STR as a UCS-4 codepoint. */
595 scm_t_wchar
596 scm_i_string_ref (SCM str, size_t x)
597 {
598 if (scm_i_is_narrow_string (str))
599 return (scm_t_wchar) (unsigned char) (scm_i_string_chars (str)[x]);
600 else
601 return scm_i_string_wide_chars (str)[x];
602 }
603
604 /* Returns index+1 of the first char in STR that matches C, or
605 0 if the char is not found. */
606 int
607 scm_i_string_contains_char (SCM str, char ch)
608 {
609 size_t i;
610 size_t len = scm_i_string_length (str);
611
612 i = 0;
613 if (scm_i_is_narrow_string (str))
614 {
615 while (i < len)
616 {
617 if (scm_i_string_chars (str)[i] == ch)
618 return i+1;
619 i++;
620 }
621 }
622 else
623 {
624 while (i < len)
625 {
626 if (scm_i_string_wide_chars (str)[i]
627 == (unsigned char) ch)
628 return i+1;
629 i++;
630 }
631 }
632 return 0;
633 }
634
635 int
636 scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
637 {
638 if (scm_i_is_narrow_string (sstr))
639 {
640 const char *a = scm_i_string_chars (sstr) + start_x;
641 const char *b = cstr;
642 return strncmp (a, b, strlen(b));
643 }
644 else
645 {
646 size_t i;
647 const scm_t_wchar *a = scm_i_string_wide_chars (sstr) + start_x;
648 const char *b = cstr;
649 for (i = 0; i < strlen (b); i++)
650 {
651 if (a[i] != (unsigned char) b[i])
652 return 1;
653 }
654 }
655 return 0;
656 }
657
658 /* Set the Pth character of STR to UCS-4 codepoint CHR. */
659 void
660 scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
661 {
662 if (chr > 0xFF && scm_i_is_narrow_string (str))
663 SET_STRING_STRINGBUF (str, wide_stringbuf (STRING_STRINGBUF (str)));
664
665 if (scm_i_is_narrow_string (str))
666 {
667 char *dst = scm_i_string_writable_chars (str);
668 dst[p] = chr;
669 }
670 else
671 {
672 scm_t_wchar *dst = scm_i_string_writable_wide_chars (str);
673 dst[p] = chr;
674 }
675 }
676
677 \f
678 /* Symbols.
679
680 Basic symbol creation and accessing is done here, the rest is in
681 symbols.[hc]. This has been done to keep stringbufs and the
682 internals of strings and string-like objects confined to this file.
683 */
684
685 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
686
687 SCM
688 scm_i_make_symbol (SCM name, scm_t_bits flags,
689 unsigned long hash, SCM props)
690 {
691 SCM buf;
692 size_t start = STRING_START (name);
693 size_t length = STRING_LENGTH (name);
694
695 if (IS_SH_STRING (name))
696 {
697 name = SH_STRING_STRING (name);
698 start += STRING_START (name);
699 }
700 buf = SYMBOL_STRINGBUF (name);
701
702 if (start == 0 && length == STRINGBUF_LENGTH (buf))
703 {
704 /* reuse buf. */
705 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
706 SET_STRINGBUF_SHARED (buf);
707 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
708 }
709 else
710 {
711 /* make new buf. */
712 if (scm_i_is_narrow_string (name))
713 {
714 SCM new_buf = make_stringbuf (length);
715 memcpy (STRINGBUF_CHARS (new_buf),
716 STRINGBUF_CHARS (buf) + start, length);
717 buf = new_buf;
718 }
719 else
720 {
721 SCM new_buf = make_wide_stringbuf (length);
722 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
723 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
724 length);
725 buf = new_buf;
726 }
727 }
728 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
729 (scm_t_bits) hash, SCM_UNPACK (props));
730 }
731
732 SCM
733 scm_i_c_make_symbol (const char *name, size_t len,
734 scm_t_bits flags, unsigned long hash, SCM props)
735 {
736 SCM buf = make_stringbuf (len);
737 memcpy (STRINGBUF_CHARS (buf), name, len);
738
739 return scm_immutable_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
740 (scm_t_bits) hash, SCM_UNPACK (props));
741 }
742
743 /* Returns the number of characters in SYM. This may be different
744 from the memory size of SYM. */
745 size_t
746 scm_i_symbol_length (SCM sym)
747 {
748 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
749 }
750
751 size_t
752 scm_c_symbol_length (SCM sym)
753 #define FUNC_NAME "scm_c_symbol_length"
754 {
755 SCM_VALIDATE_SYMBOL (1, sym);
756
757 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
758 }
759 #undef FUNC_NAME
760
761 /* True if the name of SYM is stored as a Latin-1 encoded string.
762 False if it is stored as a 32-bit UCS-4-encoded string. */
763 int
764 scm_i_is_narrow_symbol (SCM sym)
765 {
766 SCM buf;
767
768 buf = SYMBOL_STRINGBUF (sym);
769 return !STRINGBUF_WIDE (buf);
770 }
771
772 /* Returns a pointer to the 8-bit Latin-1 encoded character array that
773 contains the name of SYM. */
774 const char *
775 scm_i_symbol_chars (SCM sym)
776 {
777 SCM buf;
778
779 buf = SYMBOL_STRINGBUF (sym);
780 if (!STRINGBUF_WIDE (buf))
781 return (const char *) STRINGBUF_CHARS (buf);
782 else
783 scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S",
784 scm_list_1 (sym));
785 }
786
787 /* Return a pointer to the 32-bit UCS-4-encoded character array of a
788 symbol's name. */
789 const scm_t_wchar *
790 scm_i_symbol_wide_chars (SCM sym)
791 {
792 SCM buf;
793
794 buf = SYMBOL_STRINGBUF (sym);
795 if (STRINGBUF_WIDE (buf))
796 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf);
797 else
798 scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S",
799 scm_list_1 (sym));
800 }
801
802 SCM
803 scm_i_symbol_substring (SCM sym, size_t start, size_t end)
804 {
805 SCM buf = SYMBOL_STRINGBUF (sym);
806 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
807 SET_STRINGBUF_SHARED (buf);
808 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
809 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
810 (scm_t_bits)start, (scm_t_bits) end - start);
811 }
812
813 /* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */
814 scm_t_wchar
815 scm_i_symbol_ref (SCM sym, size_t x)
816 {
817 if (scm_i_is_narrow_symbol (sym))
818 return (scm_t_wchar) (unsigned char) (scm_i_symbol_chars (sym)[x]);
819 else
820 return scm_i_symbol_wide_chars (sym)[x];
821 }
822
823 /* Debugging
824 */
825
826 SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
827 "Returns an association list containing debugging information\n"
828 "for @var{str}. The association list has the following entries."
829 "@table @code\n"
830 "@item string\n"
831 "The string itself.\n"
832 "@item start\n"
833 "The start index of the string into its stringbuf\n"
834 "@item length\n"
835 "The length of the string\n"
836 "@item shared\n"
837 "If this string is a substring, it returns its parent string.\n"
838 "Otherwise, it returns @code{#f}\n"
839 "@item read-only\n"
840 "@code{#t} if the string is read-only\n"
841 "@item stringbuf-chars\n"
842 "A new string containing this string's stringbuf's characters\n"
843 "@item stringbuf-length\n"
844 "The number of characters in this stringbuf\n"
845 "@item stringbuf-shared\n"
846 "@code{#t} if this stringbuf is shared\n"
847 "@item stringbuf-wide\n"
848 "@code{#t} if this stringbuf's characters are stored in a\n"
849 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
850 "buffer\n"
851 "@end table")
852 #define FUNC_NAME s_scm_sys_string_dump
853 {
854 SCM e1, e2, e3, e4, e5, e6, e7, e8, e9;
855 SCM buf;
856 SCM_VALIDATE_STRING (1, str);
857
858 /* String info */
859 e1 = scm_cons (scm_from_locale_symbol ("string"),
860 str);
861 e2 = scm_cons (scm_from_locale_symbol ("start"),
862 scm_from_size_t (STRING_START (str)));
863 e3 = scm_cons (scm_from_locale_symbol ("length"),
864 scm_from_size_t (STRING_LENGTH (str)));
865
866 if (IS_SH_STRING (str))
867 {
868 e4 = scm_cons (scm_from_locale_symbol ("shared"),
869 SH_STRING_STRING (str));
870 buf = STRING_STRINGBUF (SH_STRING_STRING (str));
871 }
872 else
873 {
874 e4 = scm_cons (scm_from_locale_symbol ("shared"),
875 SCM_BOOL_F);
876 buf = STRING_STRINGBUF (str);
877 }
878
879 if (IS_RO_STRING (str))
880 e5 = scm_cons (scm_from_locale_symbol ("read-only"),
881 SCM_BOOL_T);
882 else
883 e5 = scm_cons (scm_from_locale_symbol ("read-only"),
884 SCM_BOOL_F);
885
886 /* Stringbuf info */
887 if (!STRINGBUF_WIDE (buf))
888 {
889 size_t len = STRINGBUF_LENGTH (buf);
890 char *cbuf;
891 SCM sbc = scm_i_make_string (len, &cbuf);
892 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
893 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
894 sbc);
895 }
896 else
897 {
898 size_t len = STRINGBUF_LENGTH (buf);
899 scm_t_wchar *cbuf;
900 SCM sbc = scm_i_make_wide_string (len, &cbuf);
901 u32_cpy ((scm_t_uint32 *) cbuf,
902 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
903 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
904 sbc);
905 }
906 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-length"),
907 scm_from_size_t (STRINGBUF_LENGTH (buf)));
908 if (STRINGBUF_SHARED (buf))
909 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
910 SCM_BOOL_T);
911 else
912 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
913 SCM_BOOL_F);
914 if (STRINGBUF_WIDE (buf))
915 e9 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
916 SCM_BOOL_T);
917 else
918 e9 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
919 SCM_BOOL_F);
920
921 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, e9, SCM_UNDEFINED);
922 }
923 #undef FUNC_NAME
924
925 SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
926 "Returns an association list containing debugging information\n"
927 "for @var{sym}. The association list has the following entries."
928 "@table @code\n"
929 "@item symbol\n"
930 "The symbol itself\n"
931 "@item hash\n"
932 "Its hash value\n"
933 "@item interned\n"
934 "@code{#t} if it is an interned symbol\n"
935 "@item stringbuf-chars\n"
936 "A new string containing this symbols's stringbuf's characters\n"
937 "@item stringbuf-length\n"
938 "The number of characters in this stringbuf\n"
939 "@item stringbuf-shared\n"
940 "@code{#t} if this stringbuf is shared\n"
941 "@item stringbuf-wide\n"
942 "@code{#t} if this stringbuf's characters are stored in a\n"
943 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
944 "buffer\n"
945 "@end table")
946 #define FUNC_NAME s_scm_sys_symbol_dump
947 {
948 SCM e1, e2, e3, e4, e5, e6, e7;
949 SCM buf;
950 SCM_VALIDATE_SYMBOL (1, sym);
951 e1 = scm_cons (scm_from_locale_symbol ("symbol"),
952 sym);
953 e2 = scm_cons (scm_from_locale_symbol ("hash"),
954 scm_from_ulong (scm_i_symbol_hash (sym)));
955 e3 = scm_cons (scm_from_locale_symbol ("interned"),
956 scm_symbol_interned_p (sym));
957 buf = SYMBOL_STRINGBUF (sym);
958
959 /* Stringbuf info */
960 if (!STRINGBUF_WIDE (buf))
961 {
962 size_t len = STRINGBUF_LENGTH (buf);
963 char *cbuf;
964 SCM sbc = scm_i_make_string (len, &cbuf);
965 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
966 e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
967 sbc);
968 }
969 else
970 {
971 size_t len = STRINGBUF_LENGTH (buf);
972 scm_t_wchar *cbuf;
973 SCM sbc = scm_i_make_wide_string (len, &cbuf);
974 u32_cpy ((scm_t_uint32 *) cbuf,
975 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
976 e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
977 sbc);
978 }
979 e5 = scm_cons (scm_from_locale_symbol ("stringbuf-length"),
980 scm_from_size_t (STRINGBUF_LENGTH (buf)));
981 if (STRINGBUF_SHARED (buf))
982 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
983 SCM_BOOL_T);
984 else
985 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
986 SCM_BOOL_F);
987 if (STRINGBUF_WIDE (buf))
988 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
989 SCM_BOOL_T);
990 else
991 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
992 SCM_BOOL_F);
993 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, SCM_UNDEFINED);
994
995 }
996 #undef FUNC_NAME
997
998 #ifdef SCM_STRING_LENGTH_HISTOGRAM
999
1000 SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, (void), "")
1001 #define FUNC_NAME s_scm_sys_stringbuf_hist
1002 {
1003 int i;
1004 for (i = 0; i < 1000; i++)
1005 if (lenhist[i])
1006 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
1007 fprintf (stderr, ">999: %u\n", lenhist[1000]);
1008 return SCM_UNSPECIFIED;
1009 }
1010 #undef FUNC_NAME
1011
1012 #endif
1013
1014 \f
1015
1016 SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
1017 (SCM obj),
1018 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
1019 #define FUNC_NAME s_scm_string_p
1020 {
1021 return scm_from_bool (IS_STRING (obj));
1022 }
1023 #undef FUNC_NAME
1024
1025
1026 SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
1027
1028 SCM_DEFINE (scm_string, "string", 0, 0, 1,
1029 (SCM chrs),
1030 "@deffnx {Scheme Procedure} list->string chrs\n"
1031 "Return a newly allocated string composed of the arguments,\n"
1032 "@var{chrs}.")
1033 #define FUNC_NAME s_scm_string
1034 {
1035 SCM result = SCM_BOOL_F;
1036 SCM rest;
1037 size_t len;
1038 size_t p = 0;
1039 long i;
1040 int wide = 0;
1041
1042 /* Verify that this is a list of chars. */
1043 i = scm_ilength (chrs);
1044 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
1045
1046 len = (size_t) i;
1047 rest = chrs;
1048
1049 while (len > 0 && scm_is_pair (rest))
1050 {
1051 SCM elt = SCM_CAR (rest);
1052 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
1053 if (SCM_CHAR (elt) > 0xFF)
1054 wide = 1;
1055 rest = SCM_CDR (rest);
1056 len--;
1057 scm_remember_upto_here_1 (elt);
1058 }
1059
1060 /* Construct a string containing this list of chars. */
1061 len = (size_t) i;
1062 rest = chrs;
1063
1064 if (wide == 0)
1065 {
1066 char *buf;
1067
1068 result = scm_i_make_string (len, NULL);
1069 result = scm_i_string_start_writing (result);
1070 buf = scm_i_string_writable_chars (result);
1071 while (len > 0 && scm_is_pair (rest))
1072 {
1073 SCM elt = SCM_CAR (rest);
1074 buf[p] = (unsigned char) SCM_CHAR (elt);
1075 p++;
1076 rest = SCM_CDR (rest);
1077 len--;
1078 scm_remember_upto_here_1 (elt);
1079 }
1080 }
1081 else
1082 {
1083 scm_t_wchar *buf;
1084
1085 result = scm_i_make_wide_string (len, NULL);
1086 result = scm_i_string_start_writing (result);
1087 buf = scm_i_string_writable_wide_chars (result);
1088 while (len > 0 && scm_is_pair (rest))
1089 {
1090 SCM elt = SCM_CAR (rest);
1091 buf[p] = SCM_CHAR (elt);
1092 p++;
1093 rest = SCM_CDR (rest);
1094 len--;
1095 scm_remember_upto_here_1 (elt);
1096 }
1097 }
1098 scm_i_string_stop_writing ();
1099
1100 if (len > 0)
1101 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
1102 if (!scm_is_null (rest))
1103 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
1104
1105 return result;
1106 }
1107 #undef FUNC_NAME
1108
1109 SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
1110 (SCM k, SCM chr),
1111 "Return a newly allocated string of\n"
1112 "length @var{k}. If @var{chr} is given, then all elements of\n"
1113 "the string are initialized to @var{chr}, otherwise the contents\n"
1114 "of the @var{string} are unspecified.")
1115 #define FUNC_NAME s_scm_make_string
1116 {
1117 return scm_c_make_string (scm_to_size_t (k), chr);
1118 }
1119 #undef FUNC_NAME
1120
1121 SCM
1122 scm_c_make_string (size_t len, SCM chr)
1123 #define FUNC_NAME NULL
1124 {
1125 size_t p;
1126 SCM res = scm_i_make_string (len, NULL);
1127
1128 if (!SCM_UNBNDP (chr))
1129 {
1130 SCM_VALIDATE_CHAR (0, chr);
1131 res = scm_i_string_start_writing (res);
1132 for (p = 0; p < len; p++)
1133 scm_i_string_set_x (res, p, SCM_CHAR (chr));
1134 scm_i_string_stop_writing ();
1135 }
1136
1137 return res;
1138 }
1139 #undef FUNC_NAME
1140
1141 SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
1142 (SCM string),
1143 "Return the number of characters in @var{string}.")
1144 #define FUNC_NAME s_scm_string_length
1145 {
1146 SCM_VALIDATE_STRING (1, string);
1147 return scm_from_size_t (STRING_LENGTH (string));
1148 }
1149 #undef FUNC_NAME
1150
1151 SCM_DEFINE (scm_string_bytes_per_char, "string-bytes-per-char", 1, 0, 0,
1152 (SCM string),
1153 "Return the bytes used to represent a character in @var{string}."
1154 "This will return 1 or 4.")
1155 #define FUNC_NAME s_scm_string_bytes_per_char
1156 {
1157 SCM_VALIDATE_STRING (1, string);
1158 if (!scm_i_is_narrow_string (string))
1159 return scm_from_int (4);
1160
1161 return scm_from_int (1);
1162 }
1163 #undef FUNC_NAME
1164
1165 size_t
1166 scm_c_string_length (SCM string)
1167 {
1168 if (!IS_STRING (string))
1169 scm_wrong_type_arg_msg (NULL, 0, string, "string");
1170 return STRING_LENGTH (string);
1171 }
1172
1173 SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
1174 (SCM str, SCM k),
1175 "Return character @var{k} of @var{str} using zero-origin\n"
1176 "indexing. @var{k} must be a valid index of @var{str}.")
1177 #define FUNC_NAME s_scm_string_ref
1178 {
1179 size_t len;
1180 unsigned long idx;
1181
1182 SCM_VALIDATE_STRING (1, str);
1183
1184 len = scm_i_string_length (str);
1185 if (SCM_LIKELY (len > 0))
1186 idx = scm_to_unsigned_integer (k, 0, len - 1);
1187 else
1188 scm_out_of_range (NULL, k);
1189
1190 if (scm_i_is_narrow_string (str))
1191 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
1192 else
1193 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[idx]);
1194 }
1195 #undef FUNC_NAME
1196
1197 SCM
1198 scm_c_string_ref (SCM str, size_t p)
1199 {
1200 if (p >= scm_i_string_length (str))
1201 scm_out_of_range (NULL, scm_from_size_t (p));
1202 if (scm_i_is_narrow_string (str))
1203 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
1204 else
1205 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[p]);
1206
1207 }
1208
1209 SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
1210 (SCM str, SCM k, SCM chr),
1211 "Store @var{chr} in element @var{k} of @var{str} and return\n"
1212 "an unspecified value. @var{k} must be a valid index of\n"
1213 "@var{str}.")
1214 #define FUNC_NAME s_scm_string_set_x
1215 {
1216 size_t len;
1217 unsigned long idx;
1218
1219 SCM_VALIDATE_STRING (1, str);
1220
1221 len = scm_i_string_length (str);
1222 if (SCM_LIKELY (len > 0))
1223 idx = scm_to_unsigned_integer (k, 0, len - 1);
1224 else
1225 scm_out_of_range (NULL, k);
1226
1227 SCM_VALIDATE_CHAR (3, chr);
1228 str = scm_i_string_start_writing (str);
1229 scm_i_string_set_x (str, idx, SCM_CHAR (chr));
1230 scm_i_string_stop_writing ();
1231
1232 return SCM_UNSPECIFIED;
1233 }
1234 #undef FUNC_NAME
1235
1236 void
1237 scm_c_string_set_x (SCM str, size_t p, SCM chr)
1238 {
1239 if (p >= scm_i_string_length (str))
1240 scm_out_of_range (NULL, scm_from_size_t (p));
1241 str = scm_i_string_start_writing (str);
1242 scm_i_string_set_x (str, p, SCM_CHAR (chr));
1243 scm_i_string_stop_writing ();
1244 }
1245
1246 SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
1247 (SCM str, SCM start, SCM end),
1248 "Return a newly allocated string formed from the characters\n"
1249 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1250 "ending with index @var{end} (exclusive).\n"
1251 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1252 "exact integers satisfying:\n\n"
1253 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1254 #define FUNC_NAME s_scm_substring
1255 {
1256 size_t len, from, to;
1257
1258 SCM_VALIDATE_STRING (1, str);
1259 len = scm_i_string_length (str);
1260 from = scm_to_unsigned_integer (start, 0, len);
1261 if (SCM_UNBNDP (end))
1262 to = len;
1263 else
1264 to = scm_to_unsigned_integer (end, from, len);
1265 return scm_i_substring (str, from, to);
1266 }
1267 #undef FUNC_NAME
1268
1269 SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
1270 (SCM str, SCM start, SCM end),
1271 "Return a newly allocated string formed from the characters\n"
1272 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1273 "ending with index @var{end} (exclusive).\n"
1274 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1275 "exact integers satisfying:\n"
1276 "\n"
1277 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
1278 "\n"
1279 "The returned string is read-only.\n")
1280 #define FUNC_NAME s_scm_substring_read_only
1281 {
1282 size_t len, from, to;
1283
1284 SCM_VALIDATE_STRING (1, str);
1285 len = scm_i_string_length (str);
1286 from = scm_to_unsigned_integer (start, 0, len);
1287 if (SCM_UNBNDP (end))
1288 to = len;
1289 else
1290 to = scm_to_unsigned_integer (end, from, len);
1291 return scm_i_substring_read_only (str, from, to);
1292 }
1293 #undef FUNC_NAME
1294
1295 SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
1296 (SCM str, SCM start, SCM end),
1297 "Return a newly allocated string formed from the characters\n"
1298 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1299 "ending with index @var{end} (exclusive).\n"
1300 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1301 "exact integers satisfying:\n\n"
1302 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1303 #define FUNC_NAME s_scm_substring_copy
1304 {
1305 /* For the Scheme version, START is mandatory, but for the C
1306 version, it is optional. See scm_string_copy in srfi-13.c for a
1307 rationale.
1308 */
1309
1310 size_t from, to;
1311
1312 SCM_VALIDATE_STRING (1, str);
1313 scm_i_get_substring_spec (scm_i_string_length (str),
1314 start, &from, end, &to);
1315 return scm_i_substring_copy (str, from, to);
1316 }
1317 #undef FUNC_NAME
1318
1319 SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
1320 (SCM str, SCM start, SCM end),
1321 "Return string that indirectly refers to the characters\n"
1322 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1323 "ending with index @var{end} (exclusive).\n"
1324 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1325 "exact integers satisfying:\n\n"
1326 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1327 #define FUNC_NAME s_scm_substring_shared
1328 {
1329 size_t len, from, to;
1330
1331 SCM_VALIDATE_STRING (1, str);
1332 len = scm_i_string_length (str);
1333 from = scm_to_unsigned_integer (start, 0, len);
1334 if (SCM_UNBNDP (end))
1335 to = len;
1336 else
1337 to = scm_to_unsigned_integer (end, from, len);
1338 return scm_i_substring_shared (str, from, to);
1339 }
1340 #undef FUNC_NAME
1341
1342 SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
1343 (SCM args),
1344 "Return a newly allocated string whose characters form the\n"
1345 "concatenation of the given strings, @var{args}.")
1346 #define FUNC_NAME s_scm_string_append
1347 {
1348 SCM res;
1349 size_t len = 0;
1350 int wide = 0;
1351 SCM l, s;
1352 size_t i;
1353 union
1354 {
1355 char *narrow;
1356 scm_t_wchar *wide;
1357 } data;
1358
1359 SCM_VALIDATE_REST_ARGUMENT (args);
1360 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
1361 {
1362 s = SCM_CAR (l);
1363 SCM_VALIDATE_STRING (SCM_ARGn, s);
1364 len += scm_i_string_length (s);
1365 if (!scm_i_is_narrow_string (s))
1366 wide = 1;
1367 }
1368 data.narrow = NULL;
1369 if (!wide)
1370 res = scm_i_make_string (len, &data.narrow);
1371 else
1372 res = scm_i_make_wide_string (len, &data.wide);
1373
1374 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
1375 {
1376 size_t len;
1377 s = SCM_CAR (l);
1378 SCM_VALIDATE_STRING (SCM_ARGn, s);
1379 len = scm_i_string_length (s);
1380 if (!wide)
1381 {
1382 memcpy (data.narrow, scm_i_string_chars (s), len);
1383 data.narrow += len;
1384 }
1385 else
1386 {
1387 if (scm_i_is_narrow_string (s))
1388 {
1389 for (i = 0; i < scm_i_string_length (s); i++)
1390 data.wide[i] = (unsigned char) scm_i_string_chars (s)[i];
1391 }
1392 else
1393 u32_cpy ((scm_t_uint32 *) data.wide,
1394 (scm_t_uint32 *) scm_i_string_wide_chars (s), len);
1395 data.wide += len;
1396 }
1397 scm_remember_upto_here_1 (s);
1398 }
1399 return res;
1400 }
1401 #undef FUNC_NAME
1402
1403 int
1404 scm_is_string (SCM obj)
1405 {
1406 return IS_STRING (obj);
1407 }
1408
1409 \f
1410 /* Conversion to/from other encodings. */
1411
1412 SCM_SYMBOL (scm_encoding_error_key, "encoding-error");
1413 void
1414 scm_encoding_error (const char *subr, int err, const char *message,
1415 const char *from, const char *to, SCM string_or_bv)
1416 {
1417 /* Raise an exception that conveys all the information needed to debug the
1418 problem. Only perform locale conversions that are safe; in particular,
1419 don't try to display STRING_OR_BV when it's a string since converting it to
1420 the output locale may fail. */
1421 scm_throw (scm_encoding_error_key,
1422 scm_list_n (scm_from_locale_string (subr),
1423 scm_from_locale_string (message),
1424 scm_from_int (err),
1425 scm_from_locale_string (from),
1426 scm_from_locale_string (to),
1427 string_or_bv,
1428 SCM_UNDEFINED));
1429 }
1430
1431 SCM
1432 scm_from_stringn (const char *str, size_t len, const char *encoding,
1433 scm_t_string_failed_conversion_handler handler)
1434 {
1435 size_t u32len, i;
1436 scm_t_wchar *u32;
1437 int wide = 0;
1438 SCM res;
1439
1440 if (len == 0)
1441 return scm_nullstr;
1442
1443 if (encoding == NULL)
1444 {
1445 /* If encoding is null, use Latin-1. */
1446 char *buf;
1447 res = scm_i_make_string (len, &buf);
1448 memcpy (buf, str, len);
1449 return res;
1450 }
1451
1452 u32len = 0;
1453 u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
1454 (enum iconv_ilseq_handler)
1455 handler,
1456 str, len,
1457 NULL,
1458 NULL, &u32len);
1459
1460 if (SCM_UNLIKELY (u32 == NULL))
1461 {
1462 /* Raise an error and pass the raw C string as a bytevector to the `throw'
1463 handler. */
1464 SCM bv;
1465 signed char *buf;
1466
1467 buf = scm_gc_malloc_pointerless (len, "bytevector");
1468 memcpy (buf, str, len);
1469 bv = scm_c_take_bytevector (buf, len);
1470
1471 scm_encoding_error (__func__, errno,
1472 "input locale conversion error",
1473 encoding, "UTF-32", bv);
1474 }
1475
1476 i = 0;
1477 while (i < u32len)
1478 if (u32[i++] > 0xFF)
1479 {
1480 wide = 1;
1481 break;
1482 }
1483
1484 if (!wide)
1485 {
1486 char *dst;
1487 res = scm_i_make_string (u32len, &dst);
1488 for (i = 0; i < u32len; i ++)
1489 dst[i] = (unsigned char) u32[i];
1490 dst[u32len] = '\0';
1491 }
1492 else
1493 {
1494 scm_t_wchar *wdst;
1495 res = scm_i_make_wide_string (u32len, &wdst);
1496 u32_cpy ((scm_t_uint32 *) wdst, (scm_t_uint32 *) u32, u32len);
1497 wdst[u32len] = 0;
1498 }
1499
1500 free (u32);
1501 return res;
1502 }
1503
1504 SCM
1505 scm_from_latin1_stringn (const char *str, size_t len)
1506 {
1507 return scm_from_stringn (str, len, NULL, SCM_FAILED_CONVERSION_ERROR);
1508 }
1509
1510 SCM
1511 scm_from_locale_stringn (const char *str, size_t len)
1512 {
1513 const char *enc;
1514 scm_t_string_failed_conversion_handler hndl;
1515 SCM inport;
1516 scm_t_port *pt;
1517
1518 if (len == (size_t) -1)
1519 len = strlen (str);
1520 if (len == 0)
1521 return scm_nullstr;
1522
1523 inport = scm_current_input_port ();
1524 if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
1525 {
1526 pt = SCM_PTAB_ENTRY (inport);
1527 enc = pt->encoding;
1528 hndl = pt->ilseq_handler;
1529 }
1530 else
1531 {
1532 enc = NULL;
1533 hndl = SCM_FAILED_CONVERSION_ERROR;
1534 }
1535
1536 return scm_from_stringn (str, len, enc, hndl);
1537 }
1538
1539 SCM
1540 scm_from_locale_string (const char *str)
1541 {
1542 if (str == NULL)
1543 return scm_nullstr;
1544
1545 return scm_from_locale_stringn (str, -1);
1546 }
1547
1548 SCM
1549 scm_i_from_utf8_string (const scm_t_uint8 *str)
1550 {
1551 return scm_from_stringn ((const char *) str,
1552 strlen ((char *) str), "UTF-8",
1553 SCM_FAILED_CONVERSION_ERROR);
1554 }
1555
1556 /* Create a new scheme string from the C string STR. The memory of
1557 STR may be used directly as storage for the new string. */
1558 /* FIXME: GC-wise, the only way to use the memory area pointed to by STR
1559 would be to register a finalizer to eventually free(3) STR, which isn't
1560 worth it. Should we just deprecate the `scm_take_' functions? */
1561 SCM
1562 scm_take_locale_stringn (char *str, size_t len)
1563 {
1564 SCM res;
1565
1566 res = scm_from_locale_stringn (str, len);
1567 free (str);
1568
1569 return res;
1570 }
1571
1572 SCM
1573 scm_take_locale_string (char *str)
1574 {
1575 return scm_take_locale_stringn (str, -1);
1576 }
1577
1578 /* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
1579 *LENP-byte locale-encoded string, to `\xXX', `\uXXXX', or `\UXXXXXX'.
1580 Set *LENP to the size of the resulting string. */
1581 void
1582 scm_i_unistring_escapes_to_guile_escapes (char *buf, size_t *lenp)
1583 {
1584 char *before, *after;
1585 size_t i, j;
1586
1587 before = buf;
1588 after = buf;
1589 i = 0;
1590 j = 0;
1591 while (i < *lenp)
1592 {
1593 if ((i <= *lenp - 6)
1594 && before[i] == '\\'
1595 && before[i + 1] == 'u'
1596 && before[i + 2] == '0' && before[i + 3] == '0')
1597 {
1598 /* Convert \u00NN to \xNN */
1599 after[j] = '\\';
1600 after[j + 1] = 'x';
1601 after[j + 2] = tolower ((int) before[i + 4]);
1602 after[j + 3] = tolower ((int) before[i + 5]);
1603 i += 6;
1604 j += 4;
1605 }
1606 else if ((i <= *lenp - 10)
1607 && before[i] == '\\'
1608 && before[i + 1] == 'U'
1609 && before[i + 2] == '0' && before[i + 3] == '0')
1610 {
1611 /* Convert \U00NNNNNN to \UNNNNNN */
1612 after[j] = '\\';
1613 after[j + 1] = 'U';
1614 after[j + 2] = tolower ((int) before[i + 4]);
1615 after[j + 3] = tolower ((int) before[i + 5]);
1616 after[j + 4] = tolower ((int) before[i + 6]);
1617 after[j + 5] = tolower ((int) before[i + 7]);
1618 after[j + 6] = tolower ((int) before[i + 8]);
1619 after[j + 7] = tolower ((int) before[i + 9]);
1620 i += 10;
1621 j += 8;
1622 }
1623 else
1624 {
1625 after[j] = before[i];
1626 i++;
1627 j++;
1628 }
1629 }
1630 *lenp = j;
1631 }
1632
1633 /* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
1634 *LENP-byte locale-encoded string, to `\xXXXX;'. Set *LEN to the size
1635 of the resulting string. BUF must be large enough to handle the
1636 worst case when `\uXXXX' escapes (6 characters) are replaced by
1637 `\xXXXX;' (7 characters). */
1638 void
1639 scm_i_unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
1640 {
1641 char *before, *after;
1642 size_t i, j;
1643 /* The worst case is if the input string contains all 4-digit hex escapes.
1644 "\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */
1645 size_t max_out_len = (*lenp * 7) / 6 + 1;
1646 size_t nzeros, ndigits;
1647
1648 before = buf;
1649 after = alloca (max_out_len);
1650 i = 0;
1651 j = 0;
1652 while (i < *lenp)
1653 {
1654 if (((i <= *lenp - 6) && before[i] == '\\' && before[i + 1] == 'u')
1655 || ((i <= *lenp - 10) && before[i] == '\\' && before[i + 1] == 'U'))
1656 {
1657 if (before[i + 1] == 'u')
1658 ndigits = 4;
1659 else if (before[i + 1] == 'U')
1660 ndigits = 8;
1661 else
1662 abort ();
1663
1664 /* Add the R6RS hex escape initial sequence. */
1665 after[j] = '\\';
1666 after[j + 1] = 'x';
1667
1668 /* Move string positions to the start of the hex numbers. */
1669 i += 2;
1670 j += 2;
1671
1672 /* Find the number of initial zeros in this hex number. */
1673 nzeros = 0;
1674 while (before[i + nzeros] == '0' && nzeros < ndigits)
1675 nzeros++;
1676
1677 /* Copy the number, skipping initial zeros, and then move the string
1678 positions. */
1679 if (nzeros == ndigits)
1680 {
1681 after[j] = '0';
1682 i += ndigits;
1683 j += 1;
1684 }
1685 else
1686 {
1687 int pos;
1688 for (pos = 0; pos < ndigits - nzeros; pos++)
1689 after[j + pos] = tolower ((int) before[i + nzeros + pos]);
1690 i += ndigits;
1691 j += (ndigits - nzeros);
1692 }
1693
1694 /* Add terminating semicolon. */
1695 after[j] = ';';
1696 j++;
1697 }
1698 else
1699 {
1700 after[j] = before[i];
1701 i++;
1702 j++;
1703 }
1704 }
1705 *lenp = j;
1706 memcpy (before, after, j);
1707 }
1708
1709 char *
1710 scm_to_latin1_stringn (SCM str, size_t *lenp)
1711 {
1712 return scm_to_stringn (str, lenp, NULL, SCM_FAILED_CONVERSION_ERROR);
1713 }
1714
1715 char *
1716 scm_to_locale_stringn (SCM str, size_t *lenp)
1717 {
1718 SCM outport;
1719 scm_t_port *pt;
1720 const char *enc;
1721
1722 outport = scm_current_output_port ();
1723 if (!SCM_UNBNDP (outport) && SCM_OPOUTPORTP (outport))
1724 {
1725 pt = SCM_PTAB_ENTRY (outport);
1726 enc = pt->encoding;
1727 }
1728 else
1729 enc = NULL;
1730
1731 return scm_to_stringn (str, lenp,
1732 enc,
1733 scm_i_get_conversion_strategy (SCM_BOOL_F));
1734 }
1735
1736 /* Return a malloc(3)-allocated buffer containing the contents of STR encoded
1737 according to ENCODING. If LENP is non-NULL, set it to the size in bytes of
1738 the returned buffer. If the conversion to ENCODING fails, apply the strategy
1739 defined by HANDLER. */
1740 char *
1741 scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
1742 scm_t_string_failed_conversion_handler handler)
1743 {
1744 char *buf;
1745 size_t ilen, len, i;
1746 int ret;
1747 const char *enc;
1748
1749 if (!scm_is_string (str))
1750 scm_wrong_type_arg_msg (NULL, 0, str, "string");
1751 ilen = scm_i_string_length (str);
1752
1753 if (ilen == 0)
1754 {
1755 buf = scm_malloc (1);
1756 buf[0] = '\0';
1757 if (lenp)
1758 *lenp = 0;
1759 return buf;
1760 }
1761
1762 if (lenp == NULL)
1763 for (i = 0; i < ilen; i++)
1764 if (scm_i_string_ref (str, i) == '\0')
1765 scm_misc_error (NULL,
1766 "string contains #\\nul character: ~S",
1767 scm_list_1 (str));
1768
1769 if (scm_i_is_narrow_string (str) && (encoding == NULL))
1770 {
1771 /* If using native Latin-1 encoding, just copy the string
1772 contents. */
1773 if (lenp)
1774 {
1775 buf = scm_malloc (ilen);
1776 memcpy (buf, scm_i_string_chars (str), ilen);
1777 *lenp = ilen;
1778 return buf;
1779 }
1780 else
1781 {
1782 buf = scm_malloc (ilen + 1);
1783 memcpy (buf, scm_i_string_chars (str), ilen);
1784 buf[ilen] = '\0';
1785 return buf;
1786 }
1787 }
1788
1789
1790 buf = NULL;
1791 len = 0;
1792 enc = encoding;
1793 if (enc == NULL)
1794 enc = "ISO-8859-1";
1795 if (scm_i_is_narrow_string (str))
1796 {
1797 ret = mem_iconveh (scm_i_string_chars (str), ilen,
1798 "ISO-8859-1", enc,
1799 (enum iconv_ilseq_handler) handler, NULL,
1800 &buf, &len);
1801
1802 if (ret != 0)
1803 scm_encoding_error (__func__, errno,
1804 "cannot convert to output locale",
1805 "ISO-8859-1", enc, str);
1806 }
1807 else
1808 {
1809 buf = u32_conv_to_encoding (enc,
1810 (enum iconv_ilseq_handler) handler,
1811 (scm_t_uint32 *) scm_i_string_wide_chars (str),
1812 ilen,
1813 NULL,
1814 NULL, &len);
1815 if (buf == NULL)
1816 scm_encoding_error (__func__, errno,
1817 "cannot convert to output locale",
1818 "UTF-32", enc, str);
1819 }
1820 if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1821 {
1822 if (SCM_R6RS_ESCAPES_P)
1823 {
1824 /* The worst case is if the input string contains all 4-digit
1825 hex escapes. "\uXXXX" (six characters) becomes "\xXXXX;"
1826 (seven characters). Make BUF large enough to hold
1827 that. */
1828 buf = scm_realloc (buf, (len * 7) / 6 + 1);
1829 scm_i_unistring_escapes_to_r6rs_escapes (buf, &len);
1830 }
1831 else
1832 scm_i_unistring_escapes_to_guile_escapes (buf, &len);
1833
1834 buf = scm_realloc (buf, len);
1835 }
1836 if (lenp)
1837 *lenp = len;
1838 else
1839 {
1840 buf = scm_realloc (buf, len + 1);
1841 buf[len] = '\0';
1842 }
1843
1844 scm_remember_upto_here_1 (str);
1845 return buf;
1846 }
1847
1848 char *
1849 scm_to_locale_string (SCM str)
1850 {
1851 return scm_to_locale_stringn (str, NULL);
1852 }
1853
1854 scm_t_uint8 *
1855 scm_i_to_utf8_string (SCM str)
1856 {
1857 char *u8str;
1858 u8str = scm_to_stringn (str, NULL, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
1859 return (scm_t_uint8 *) u8str;
1860 }
1861
1862 size_t
1863 scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
1864 {
1865 size_t len;
1866 char *result = NULL;
1867 if (!scm_is_string (str))
1868 scm_wrong_type_arg_msg (NULL, 0, str, "string");
1869 result = scm_to_locale_stringn (str, &len);
1870
1871 memcpy (buf, result, (len > max_len) ? max_len : len);
1872 free (result);
1873
1874 scm_remember_upto_here_1 (str);
1875 return len;
1876 }
1877
1878 \f
1879 /* Unicode string normalization. */
1880
1881 /* This function is a partial clone of SCM_STRING_TO_U32_BUF from
1882 libguile/i18n.c. It would be useful to have this factored out into a more
1883 convenient location, but its use of alloca makes that tricky to do. */
1884
1885 static SCM
1886 normalize_str (SCM string, uninorm_t form)
1887 {
1888 SCM ret;
1889 scm_t_uint32 *w_str;
1890 scm_t_wchar *cbuf;
1891 size_t rlen, len = scm_i_string_length (string);
1892
1893 if (scm_i_is_narrow_string (string))
1894 {
1895 size_t i;
1896 const char *buf = scm_i_string_chars (string);
1897
1898 w_str = alloca (sizeof (scm_t_wchar) * (len + 1));
1899
1900 for (i = 0; i < len; i ++)
1901 w_str[i] = (unsigned char) buf[i];
1902 w_str[len] = 0;
1903 }
1904 else
1905 w_str = (scm_t_uint32 *) scm_i_string_wide_chars (string);
1906
1907 w_str = u32_normalize (form, w_str, len, NULL, &rlen);
1908
1909 ret = scm_i_make_wide_string (rlen, &cbuf);
1910 u32_cpy ((scm_t_uint32 *) cbuf, w_str, rlen);
1911 free (w_str);
1912
1913 scm_i_try_narrow_string (ret);
1914
1915 return ret;
1916 }
1917
1918 SCM_DEFINE (scm_string_normalize_nfc, "string-normalize-nfc", 1, 0, 0,
1919 (SCM string),
1920 "Returns the NFC normalized form of @var{string}.")
1921 #define FUNC_NAME s_scm_string_normalize_nfc
1922 {
1923 SCM_VALIDATE_STRING (1, string);
1924 return normalize_str (string, UNINORM_NFC);
1925 }
1926 #undef FUNC_NAME
1927
1928 SCM_DEFINE (scm_string_normalize_nfd, "string-normalize-nfd", 1, 0, 0,
1929 (SCM string),
1930 "Returns the NFD normalized form of @var{string}.")
1931 #define FUNC_NAME s_scm_string_normalize_nfd
1932 {
1933 SCM_VALIDATE_STRING (1, string);
1934 return normalize_str (string, UNINORM_NFD);
1935 }
1936 #undef FUNC_NAME
1937
1938 SCM_DEFINE (scm_string_normalize_nfkc, "string-normalize-nfkc", 1, 0, 0,
1939 (SCM string),
1940 "Returns the NFKC normalized form of @var{string}.")
1941 #define FUNC_NAME s_scm_string_normalize_nfkc
1942 {
1943 SCM_VALIDATE_STRING (1, string);
1944 return normalize_str (string, UNINORM_NFKC);
1945 }
1946 #undef FUNC_NAME
1947
1948 SCM_DEFINE (scm_string_normalize_nfkd, "string-normalize-nfkd", 1, 0, 0,
1949 (SCM string),
1950 "Returns the NFKD normalized form of @var{string}.")
1951 #define FUNC_NAME s_scm_string_normalize_nfkd
1952 {
1953 SCM_VALIDATE_STRING (1, string);
1954 return normalize_str (string, UNINORM_NFKD);
1955 }
1956 #undef FUNC_NAME
1957
1958 /* converts C scm_array of strings to SCM scm_list of strings. */
1959 /* If argc < 0, a null terminated scm_array is assumed. */
1960 SCM
1961 scm_makfromstrs (int argc, char **argv)
1962 {
1963 int i = argc;
1964 SCM lst = SCM_EOL;
1965 if (0 > i)
1966 for (i = 0; argv[i]; i++);
1967 while (i--)
1968 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
1969 return lst;
1970 }
1971
1972 /* Return a newly allocated array of char pointers to each of the strings
1973 in args, with a terminating NULL pointer. */
1974
1975 char **
1976 scm_i_allocate_string_pointers (SCM list)
1977 #define FUNC_NAME "scm_i_allocate_string_pointers"
1978 {
1979 char **result;
1980 int len = scm_ilength (list);
1981 int i;
1982
1983 if (len < 0)
1984 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
1985
1986 result = scm_gc_malloc ((len + 1) * sizeof (char *),
1987 "string pointers");
1988 result[len] = NULL;
1989
1990 /* The list might be have been modified in another thread, so
1991 we check LIST before each access.
1992 */
1993 for (i = 0; i < len && scm_is_pair (list); i++)
1994 {
1995 SCM str;
1996 size_t len;
1997
1998 str = SCM_CAR (list);
1999 len = scm_c_string_length (str);
2000
2001 result[i] = scm_gc_malloc_pointerless (len + 1, "string pointers");
2002 memcpy (result[i], scm_i_string_chars (str), len);
2003 result[i][len] = '\0';
2004
2005 list = SCM_CDR (list);
2006 }
2007
2008 return result;
2009 }
2010 #undef FUNC_NAME
2011
2012 void
2013 scm_i_get_substring_spec (size_t len,
2014 SCM start, size_t *cstart,
2015 SCM end, size_t *cend)
2016 {
2017 if (SCM_UNBNDP (start))
2018 *cstart = 0;
2019 else
2020 *cstart = scm_to_unsigned_integer (start, 0, len);
2021
2022 if (SCM_UNBNDP (end))
2023 *cend = len;
2024 else
2025 *cend = scm_to_unsigned_integer (end, *cstart, len);
2026 }
2027
2028 #if SCM_ENABLE_DEPRECATED
2029
2030 /* When these definitions are removed, it becomes reasonable to use
2031 read-only strings for string literals. For that, change the reader
2032 to create string literals with scm_c_substring_read_only instead of
2033 with scm_c_substring_copy.
2034 */
2035
2036 int
2037 scm_i_deprecated_stringp (SCM str)
2038 {
2039 scm_c_issue_deprecation_warning
2040 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
2041
2042 return scm_is_string (str);
2043 }
2044
2045 char *
2046 scm_i_deprecated_string_chars (SCM str)
2047 {
2048 char *chars;
2049
2050 scm_c_issue_deprecation_warning
2051 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
2052
2053 /* We don't accept shared substrings here since they are not
2054 null-terminated.
2055 */
2056 if (IS_SH_STRING (str))
2057 scm_misc_error (NULL,
2058 "SCM_STRING_CHARS does not work with shared substrings",
2059 SCM_EOL);
2060
2061 /* We explicitly test for read-only strings to produce a better
2062 error message.
2063 */
2064
2065 if (IS_RO_STRING (str))
2066 scm_misc_error (NULL,
2067 "SCM_STRING_CHARS does not work with read-only strings",
2068 SCM_EOL);
2069
2070 /* The following is still wrong, of course...
2071 */
2072 str = scm_i_string_start_writing (str);
2073 chars = scm_i_string_writable_chars (str);
2074 scm_i_string_stop_writing ();
2075 return chars;
2076 }
2077
2078 size_t
2079 scm_i_deprecated_string_length (SCM str)
2080 {
2081 scm_c_issue_deprecation_warning
2082 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
2083 return scm_c_string_length (str);
2084 }
2085
2086 #endif
2087
2088 static SCM
2089 string_handle_ref (scm_t_array_handle *h, size_t index)
2090 {
2091 return scm_c_string_ref (h->array, index);
2092 }
2093
2094 static void
2095 string_handle_set (scm_t_array_handle *h, size_t index, SCM val)
2096 {
2097 scm_c_string_set_x (h->array, index, val);
2098 }
2099
2100 static void
2101 string_get_handle (SCM v, scm_t_array_handle *h)
2102 {
2103 h->array = v;
2104 h->ndims = 1;
2105 h->dims = &h->dim0;
2106 h->dim0.lbnd = 0;
2107 h->dim0.ubnd = scm_c_string_length (v) - 1;
2108 h->dim0.inc = 1;
2109 h->element_type = SCM_ARRAY_ELEMENT_TYPE_CHAR;
2110 h->elements = h->writable_elements = NULL;
2111 }
2112
2113 SCM_ARRAY_IMPLEMENTATION (scm_tc7_string, 0x7f,
2114 string_handle_ref, string_handle_set,
2115 string_get_handle)
2116 SCM_VECTOR_IMPLEMENTATION (SCM_ARRAY_ELEMENT_TYPE_CHAR, scm_make_string)
2117
2118 void
2119 scm_init_strings ()
2120 {
2121 scm_nullstr = scm_i_make_string (0, NULL);
2122
2123 #include "libguile/strings.x"
2124 }
2125
2126
2127 /*
2128 Local Variables:
2129 c-file-style: "gnu"
2130 End:
2131 */