1 /* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 #include "libguile/_scm.h"
32 #include "libguile/chars.h"
33 #include "libguile/root.h"
34 #include "libguile/strings.h"
35 #include "libguile/deprecation.h"
36 #include "libguile/validate.h"
37 #include "libguile/dynwind.h"
47 * XXX - keeping an accurate refcount during GC seems to be quite
48 * tricky, so we just keep score of whether a stringbuf might be
49 * shared, not whether it definitely is.
51 * The scheme I (mvo) tried to keep an accurate reference count would
52 * recount all strings that point to a stringbuf during the mark-phase
53 * of the GC. This was done since one cannot access the stringbuf of
54 * a string when that string is freed (in order to decrease the
55 * reference count). The memory of the stringbuf might have been
56 * reused already for something completely different.
58 * This recounted worked for a small number of threads beating on
59 * cow-strings, but it failed randomly with more than 10 threads, say.
60 * I couldn't figure out what went wrong, so I used the conservative
61 * approach implemented below.
63 * A stringbuf needs to know its length, but only so that it can be
64 * reported when the stringbuf is freed.
66 * There are 3 storage strategies for stringbufs: inline, outline, and
69 * Inline strings are small 8-bit strings stored within the double
70 * cell itself. Outline strings are larger 8-bit strings with GC
71 * allocated storage. Wide strings are 32-bit strings with allocated
74 * There was little value in making wide string inlineable, since
75 * there is only room for three inlined 32-bit characters. Thus wide
76 * stringbufs are never inlined.
79 #define STRINGBUF_F_SHARED 0x100
80 #define STRINGBUF_F_INLINE 0x200
81 #define STRINGBUF_F_WIDE 0x400 /* If true, strings have UCS-4
82 encoding. Otherwise, strings
85 #define STRINGBUF_TAG scm_tc7_stringbuf
86 #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
87 #define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
88 #define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
90 #define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
91 #define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
92 #define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
93 #define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
95 #define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
96 ? STRINGBUF_INLINE_CHARS (buf) \
97 : STRINGBUF_OUTLINE_CHARS (buf))
99 #define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *)SCM_CELL_WORD_1(buf))
100 #define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
101 ? STRINGBUF_INLINE_LENGTH (buf) \
102 : STRINGBUF_OUTLINE_LENGTH (buf))
104 #define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
106 #define SET_STRINGBUF_SHARED(buf) \
107 (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
109 #if SCM_STRING_LENGTH_HISTOGRAM
110 static size_t lenhist
[1001];
113 /* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
116 make_stringbuf (size_t len
)
118 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
119 scm_i_symbol_chars, all stringbufs are null-terminated. Once
120 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
121 has been changed for scm_i_symbol_chars, this null-termination
125 #if SCM_STRING_LENGTH_HISTOGRAM
132 if (len
<= STRINGBUF_MAX_INLINE_LEN
-1)
134 return scm_double_cell (STRINGBUF_TAG
| STRINGBUF_F_INLINE
| (len
<< 16),
139 char *mem
= scm_gc_malloc (len
+1, "string");
141 return scm_double_cell (STRINGBUF_TAG
, (scm_t_bits
) mem
,
142 (scm_t_bits
) len
, (scm_t_bits
) 0);
146 /* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
149 make_wide_stringbuf (size_t len
)
152 #if SCM_STRING_LENGTH_HISTOGRAM
159 mem
= scm_gc_malloc (sizeof (scm_t_wchar
) * (len
+ 1), "string");
161 return scm_double_cell (STRINGBUF_TAG
| STRINGBUF_F_WIDE
, (scm_t_bits
) mem
,
162 (scm_t_bits
) len
, (scm_t_bits
) 0);
165 /* Return a new stringbuf whose underlying storage consists of the LEN+1
166 octets pointed to by STR (the last octet is zero). */
168 scm_i_take_stringbufn (char *str
, size_t len
)
170 scm_gc_register_collectable_memory (str
, len
+ 1, "stringbuf");
172 return scm_double_cell (STRINGBUF_TAG
, (scm_t_bits
) str
,
173 (scm_t_bits
) len
, (scm_t_bits
) 0);
177 scm_i_stringbuf_mark (SCM buf
)
183 scm_i_stringbuf_free (SCM buf
)
185 if (!STRINGBUF_INLINE (buf
))
187 if (!STRINGBUF_WIDE (buf
))
188 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf
),
189 STRINGBUF_OUTLINE_LENGTH (buf
) + 1, "string");
191 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf
),
192 sizeof (scm_t_wchar
) * (STRINGBUF_OUTLINE_LENGTH (buf
)
198 /* Convert a stringbuf containing 8-bit Latin-1-encoded characters to
199 one containing 32-bit UCS-4-encoded characters. */
201 widen_stringbuf (SCM buf
)
206 if (STRINGBUF_WIDE (buf
))
209 if (STRINGBUF_INLINE (buf
))
211 len
= STRINGBUF_INLINE_LENGTH (buf
);
213 mem
= scm_gc_malloc (sizeof (scm_t_wchar
) * (len
+ 1), "string");
214 for (i
= 0; i
< len
; i
++)
216 (scm_t_wchar
) (unsigned char) STRINGBUF_INLINE_CHARS (buf
)[i
];
219 SCM_SET_CELL_WORD_0 (buf
, SCM_CELL_WORD_0 (buf
) ^ STRINGBUF_F_INLINE
);
220 SCM_SET_CELL_WORD_0 (buf
, SCM_CELL_WORD_0 (buf
) | STRINGBUF_F_WIDE
);
221 SCM_SET_CELL_WORD_1 (buf
, mem
);
222 SCM_SET_CELL_WORD_2 (buf
, len
);
226 len
= STRINGBUF_OUTLINE_LENGTH (buf
);
228 mem
= scm_gc_malloc (sizeof (scm_t_wchar
) * (len
+ 1), "string");
229 for (i
= 0; i
< len
; i
++)
231 (scm_t_wchar
) (unsigned char) STRINGBUF_OUTLINE_CHARS (buf
)[i
];
234 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf
), len
+ 1, "string");
236 SCM_SET_CELL_WORD_0 (buf
, SCM_CELL_WORD_0 (buf
) | STRINGBUF_F_WIDE
);
237 SCM_SET_CELL_WORD_1 (buf
, mem
);
238 SCM_SET_CELL_WORD_2 (buf
, len
);
242 scm_i_pthread_mutex_t stringbuf_write_mutex
= SCM_I_PTHREAD_MUTEX_INITIALIZER
;
244 /* Copy-on-write strings.
247 #define STRING_TAG scm_tc7_string
249 #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
250 #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
251 #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
253 #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
254 #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
256 #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
258 /* Read-only strings.
261 #define RO_STRING_TAG (scm_tc7_string + 0x200)
262 #define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
264 /* Mutation-sharing substrings
267 #define SH_STRING_TAG (scm_tc7_string + 0x100)
269 #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
270 /* START and LENGTH as for STRINGs. */
272 #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
274 /* Create a scheme string with space for LEN 8-bit Latin-1-encoded
275 characters. CHARSP, if not NULL, will be set to location of the
278 scm_i_make_string (size_t len
, char **charsp
)
280 SCM buf
= make_stringbuf (len
);
283 *charsp
= STRINGBUF_CHARS (buf
);
284 res
= scm_double_cell (STRING_TAG
, SCM_UNPACK(buf
),
285 (scm_t_bits
)0, (scm_t_bits
) len
);
289 /* Create a scheme string with space for LEN 32-bit UCS-4-encoded
290 characters. CHARSP, if not NULL, will be set to location of the
293 scm_i_make_wide_string (size_t len
, scm_t_wchar
**charsp
)
295 SCM buf
= make_wide_stringbuf (len
);
298 *charsp
= STRINGBUF_WIDE_CHARS (buf
);
299 res
= scm_double_cell (STRING_TAG
, SCM_UNPACK (buf
),
300 (scm_t_bits
) 0, (scm_t_bits
) len
);
305 validate_substring_args (SCM str
, size_t start
, size_t end
)
307 if (!IS_STRING (str
))
308 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
309 if (start
> STRING_LENGTH (str
))
310 scm_out_of_range (NULL
, scm_from_size_t (start
));
311 if (end
> STRING_LENGTH (str
) || end
< start
)
312 scm_out_of_range (NULL
, scm_from_size_t (end
));
316 get_str_buf_start (SCM
*str
, SCM
*buf
, size_t *start
)
318 *start
= STRING_START (*str
);
319 if (IS_SH_STRING (*str
))
321 *str
= SH_STRING_STRING (*str
);
322 *start
+= STRING_START (*str
);
324 *buf
= STRING_STRINGBUF (*str
);
328 scm_i_substring (SCM str
, size_t start
, size_t end
)
332 get_str_buf_start (&str
, &buf
, &str_start
);
333 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
334 SET_STRINGBUF_SHARED (buf
);
335 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
336 return scm_double_cell (STRING_TAG
, SCM_UNPACK(buf
),
337 (scm_t_bits
)str_start
+ start
,
338 (scm_t_bits
) end
- start
);
342 scm_i_substring_read_only (SCM str
, size_t start
, size_t end
)
346 get_str_buf_start (&str
, &buf
, &str_start
);
347 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
348 SET_STRINGBUF_SHARED (buf
);
349 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
350 return scm_double_cell (RO_STRING_TAG
, SCM_UNPACK(buf
),
351 (scm_t_bits
)str_start
+ start
,
352 (scm_t_bits
) end
- start
);
356 scm_i_substring_copy (SCM str
, size_t start
, size_t end
)
358 size_t len
= end
- start
;
361 get_str_buf_start (&str
, &buf
, &str_start
);
362 if (scm_i_is_narrow_string (str
))
364 my_buf
= make_stringbuf (len
);
365 memcpy (STRINGBUF_CHARS (my_buf
),
366 STRINGBUF_CHARS (buf
) + str_start
+ start
, len
);
370 my_buf
= make_wide_stringbuf (len
);
371 u32_cpy ((scm_t_uint32
*) STRINGBUF_WIDE_CHARS (my_buf
),
372 (scm_t_uint32
*) (STRINGBUF_WIDE_CHARS (buf
) + str_start
374 /* Even though this string is wide, the substring may be narrow.
375 Consider adding code to narrow the string. */
377 scm_remember_upto_here_1 (buf
);
378 return scm_double_cell (STRING_TAG
, SCM_UNPACK (my_buf
),
379 (scm_t_bits
) 0, (scm_t_bits
) len
);
383 scm_i_substring_shared (SCM str
, size_t start
, size_t end
)
385 if (start
== 0 && end
== STRING_LENGTH (str
))
389 size_t len
= end
- start
;
390 if (IS_SH_STRING (str
))
392 start
+= STRING_START (str
);
393 str
= SH_STRING_STRING (str
);
395 return scm_double_cell (SH_STRING_TAG
, SCM_UNPACK(str
),
396 (scm_t_bits
)start
, (scm_t_bits
) len
);
401 scm_c_substring (SCM str
, size_t start
, size_t end
)
403 validate_substring_args (str
, start
, end
);
404 return scm_i_substring (str
, start
, end
);
408 scm_c_substring_read_only (SCM str
, size_t start
, size_t end
)
410 validate_substring_args (str
, start
, end
);
411 return scm_i_substring_read_only (str
, start
, end
);
415 scm_c_substring_copy (SCM str
, size_t start
, size_t end
)
417 validate_substring_args (str
, start
, end
);
418 return scm_i_substring_copy (str
, start
, end
);
422 scm_c_substring_shared (SCM str
, size_t start
, size_t end
)
424 validate_substring_args (str
, start
, end
);
425 return scm_i_substring_shared (str
, start
, end
);
429 scm_i_string_mark (SCM str
)
431 if (IS_SH_STRING (str
))
432 return SH_STRING_STRING (str
);
434 return STRING_STRINGBUF (str
);
438 scm_i_string_free (SCM str
)
442 /* Internal accessors
445 /* Returns the number of characters in STR. This may be different
446 than the memory size of the string storage. */
448 scm_i_string_length (SCM str
)
450 return STRING_LENGTH (str
);
453 /* True if the string is 'narrow', meaning it has a 8-bit Latin-1
454 encoding. False if it is 'wide', having a 32-bit UCS-4
457 scm_i_is_narrow_string (SCM str
)
459 return !STRINGBUF_WIDE (STRING_STRINGBUF (str
));
462 /* Returns a pointer to the 8-bit Latin-1 encoded character array of
465 scm_i_string_chars (SCM str
)
469 get_str_buf_start (&str
, &buf
, &start
);
470 if (scm_i_is_narrow_string (str
))
471 return STRINGBUF_CHARS (buf
) + start
;
473 scm_misc_error (NULL
, "Invalid read access of chars of wide string: ~s",
478 /* Returns a pointer to the 32-bit UCS-4 encoded character array of
481 scm_i_string_wide_chars (SCM str
)
486 get_str_buf_start (&str
, &buf
, &start
);
487 if (!scm_i_is_narrow_string (str
))
488 return STRINGBUF_WIDE_CHARS (buf
) + start
;
490 scm_misc_error (NULL
, "Invalid read access of chars of narrow string: ~s",
494 /* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
495 a new string buffer, so that it can be modified without modifying
496 other strings. Also, lock the string mutex. Later, one must call
497 scm_i_string_stop_writing to unlock the mutex. */
499 scm_i_string_start_writing (SCM orig_str
)
501 SCM buf
, str
= orig_str
;
504 get_str_buf_start (&str
, &buf
, &start
);
505 if (IS_RO_STRING (str
))
506 scm_misc_error (NULL
, "string is read-only: ~s", scm_list_1 (orig_str
));
508 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
509 if (STRINGBUF_SHARED (buf
))
511 /* Clone the stringbuf. */
512 size_t len
= STRING_LENGTH (str
);
515 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
517 if (scm_i_is_narrow_string (str
))
519 new_buf
= make_stringbuf (len
);
520 memcpy (STRINGBUF_CHARS (new_buf
),
521 STRINGBUF_CHARS (buf
) + STRING_START (str
), len
);
526 new_buf
= make_wide_stringbuf (len
);
527 u32_cpy ((scm_t_uint32
*) STRINGBUF_WIDE_CHARS (new_buf
),
528 (scm_t_uint32
*) (STRINGBUF_WIDE_CHARS (buf
)
529 + STRING_START (str
)), len
);
531 scm_i_thread_put_to_sleep ();
532 SET_STRING_STRINGBUF (str
, new_buf
);
533 start
-= STRING_START (str
);
534 SET_STRING_START (str
, 0);
535 scm_i_thread_wake_up ();
539 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
544 /* Return a pointer to the 8-bit Latin-1 chars of a string. */
546 scm_i_string_writable_chars (SCM str
)
551 get_str_buf_start (&str
, &buf
, &start
);
552 if (scm_i_is_narrow_string (str
))
553 return STRINGBUF_CHARS (buf
) + start
;
555 scm_misc_error (NULL
, "Invalid write access of chars of wide string: ~s",
560 /* Return a pointer to the UCS-4 codepoints of a string. */
562 scm_i_string_writable_wide_chars (SCM str
)
567 get_str_buf_start (&str
, &buf
, &start
);
568 if (!scm_i_is_narrow_string (str
))
569 return STRINGBUF_WIDE_CHARS (buf
) + start
;
571 scm_misc_error (NULL
, "Invalid read access of chars of narrow string: ~s",
575 /* Unlock the string mutex that was locked when
576 scm_i_string_start_writing was called. */
578 scm_i_string_stop_writing (void)
580 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
583 /* Return the Xth character of STR as a UCS-4 codepoint. */
585 scm_i_string_ref (SCM str
, size_t x
)
587 if (scm_i_is_narrow_string (str
))
588 return (scm_t_wchar
) (unsigned char) (scm_i_string_chars (str
)[x
]);
590 return scm_i_string_wide_chars (str
)[x
];
593 /* Set the Pth character of STR to UCS-4 codepoint CHR. */
595 scm_i_string_set_x (SCM str
, size_t p
, scm_t_wchar chr
)
597 if (chr
> 0xFF && scm_i_is_narrow_string (str
))
598 widen_stringbuf (STRING_STRINGBUF (str
));
600 if (scm_i_is_narrow_string (str
))
602 char *dst
= scm_i_string_writable_chars (str
);
603 dst
[p
] = (char) (unsigned char) chr
;
607 scm_t_wchar
*dst
= scm_i_string_writable_wide_chars (str
);
614 Basic symbol creation and accessing is done here, the rest is in
615 symbols.[hc]. This has been done to keep stringbufs and the
616 internals of strings and string-like objects confined to this file.
619 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
622 scm_i_make_symbol (SCM name
, scm_t_bits flags
,
623 unsigned long hash
, SCM props
)
626 size_t start
= STRING_START (name
);
627 size_t length
= STRING_LENGTH (name
);
629 if (IS_SH_STRING (name
))
631 name
= SH_STRING_STRING (name
);
632 start
+= STRING_START (name
);
634 buf
= SYMBOL_STRINGBUF (name
);
636 if (start
== 0 && length
== STRINGBUF_LENGTH (buf
))
639 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
640 SET_STRINGBUF_SHARED (buf
);
641 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
646 if (scm_i_is_narrow_string (name
))
648 SCM new_buf
= make_stringbuf (length
);
649 memcpy (STRINGBUF_CHARS (new_buf
),
650 STRINGBUF_CHARS (buf
) + start
, length
);
655 SCM new_buf
= make_wide_stringbuf (length
);
656 u32_cpy ((scm_t_uint32
*) STRINGBUF_WIDE_CHARS (new_buf
),
657 (scm_t_uint32
*) STRINGBUF_WIDE_CHARS (buf
) + start
,
662 return scm_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
663 (scm_t_bits
) hash
, SCM_UNPACK (props
));
667 scm_i_c_make_symbol (const char *name
, size_t len
,
668 scm_t_bits flags
, unsigned long hash
, SCM props
)
670 SCM buf
= make_stringbuf (len
);
671 memcpy (STRINGBUF_CHARS (buf
), name
, len
);
673 return scm_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
674 (scm_t_bits
) hash
, SCM_UNPACK (props
));
677 /* Return a new symbol that uses the LEN bytes pointed to by NAME as its
678 underlying storage. */
680 scm_i_c_take_symbol (char *name
, size_t len
,
681 scm_t_bits flags
, unsigned long hash
, SCM props
)
683 SCM buf
= scm_i_take_stringbufn (name
, len
);
685 return scm_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
686 (scm_t_bits
) hash
, SCM_UNPACK (props
));
689 /* Returns the number of characters in SYM. This may be different
690 from the memory size of SYM. */
692 scm_i_symbol_length (SCM sym
)
694 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym
));
698 scm_c_symbol_length (SCM sym
)
699 #define FUNC_NAME "scm_c_symbol_length"
701 SCM_VALIDATE_SYMBOL (1, sym
);
703 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym
));
707 /* True if the name of SYM is stored as a Latin-1 encoded string.
708 False if it is stored as a 32-bit UCS-4-encoded string. */
710 scm_i_is_narrow_symbol (SCM sym
)
714 buf
= SYMBOL_STRINGBUF (sym
);
715 return !STRINGBUF_WIDE (buf
);
718 /* Returns a pointer to the 8-bit Latin-1 encoded character array that
719 contains the name of SYM. */
721 scm_i_symbol_chars (SCM sym
)
725 buf
= SYMBOL_STRINGBUF (sym
);
726 if (!STRINGBUF_WIDE (buf
))
727 return STRINGBUF_CHARS (buf
);
729 scm_misc_error (NULL
, "Invalid access of chars of a wide symbol ~S",
733 /* Return a pointer to the 32-bit UCS-4-encoded character array of a
736 scm_i_symbol_wide_chars (SCM sym
)
740 buf
= SYMBOL_STRINGBUF (sym
);
741 if (STRINGBUF_WIDE (buf
))
742 return STRINGBUF_WIDE_CHARS (buf
);
744 scm_misc_error (NULL
, "Invalid access of chars of a narrow symbol ~S",
749 scm_i_symbol_mark (SCM sym
)
751 scm_gc_mark (SYMBOL_STRINGBUF (sym
));
752 return SCM_CELL_OBJECT_3 (sym
);
756 scm_i_symbol_free (SCM sym
)
761 scm_i_symbol_substring (SCM sym
, size_t start
, size_t end
)
763 SCM buf
= SYMBOL_STRINGBUF (sym
);
764 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
765 SET_STRINGBUF_SHARED (buf
);
766 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
767 return scm_double_cell (RO_STRING_TAG
, SCM_UNPACK (buf
),
768 (scm_t_bits
)start
, (scm_t_bits
) end
- start
);
771 /* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */
773 scm_i_symbol_ref (SCM sym
, size_t x
)
775 if (scm_i_is_narrow_symbol (sym
))
776 return (scm_t_wchar
) (unsigned char) (scm_i_symbol_chars (sym
)[x
]);
778 return scm_i_symbol_wide_chars (sym
)[x
];
784 SCM_DEFINE (scm_sys_string_dump
, "%string-dump", 1, 0, 0, (SCM str
),
785 "Returns an association list containing debugging information\n"
786 "for @var{str}. The association list has the following entries."
789 "The string itself.\n"
791 "The start index of the string into its stringbuf\n"
793 "The length of the string\n"
795 "If this string is a substring, it returns its parent string.\n"
796 "Otherwise, it returns @code{#f}\n"
798 "@code{#t} if the string is read-only\n"
799 "@item stringbuf-chars\n"
800 "A new string containing this string's stringbuf's characters\n"
801 "@item stringbuf-length\n"
802 "The number of characters in this stringbuf\n"
803 "@item stringbuf-shared\n"
804 "@code{#t} if this stringbuf is shared\n"
805 "@item stringbuf-inline\n"
806 "@code{#t} if this stringbuf's characters are stored in the\n"
807 "cell itself, or @code{#f} if they were allocated in memory\n"
808 "@item stringbuf-wide\n"
809 "@code{#t} if this stringbuf's characters are stored in a\n"
810 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
813 #define FUNC_NAME s_scm_sys_string_dump
815 SCM e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
;
817 SCM_VALIDATE_STRING (1, str
);
820 e1
= scm_cons (scm_from_locale_symbol ("string"),
822 e2
= scm_cons (scm_from_locale_symbol ("start"),
823 scm_from_size_t (STRING_START (str
)));
824 e3
= scm_cons (scm_from_locale_symbol ("length"),
825 scm_from_size_t (STRING_LENGTH (str
)));
827 if (IS_SH_STRING (str
))
829 e4
= scm_cons (scm_from_locale_symbol ("shared"),
830 SH_STRING_STRING (str
));
831 buf
= STRING_STRINGBUF (SH_STRING_STRING (str
));
835 e4
= scm_cons (scm_from_locale_symbol ("shared"),
837 buf
= STRING_STRINGBUF (str
);
840 if (IS_RO_STRING (str
))
841 e5
= scm_cons (scm_from_locale_symbol ("read-only"),
844 e5
= scm_cons (scm_from_locale_symbol ("read-only"),
848 if (!STRINGBUF_WIDE (buf
))
850 size_t len
= STRINGBUF_LENGTH (buf
);
852 SCM sbc
= scm_i_make_string (len
, &cbuf
);
853 memcpy (cbuf
, STRINGBUF_CHARS (buf
), len
);
854 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
859 size_t len
= STRINGBUF_LENGTH (buf
);
861 SCM sbc
= scm_i_make_wide_string (len
, &cbuf
);
862 u32_cpy ((scm_t_uint32
*) cbuf
,
863 (scm_t_uint32
*) STRINGBUF_WIDE_CHARS (buf
), len
);
864 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
867 e7
= scm_cons (scm_from_locale_symbol ("stringbuf-length"),
868 scm_from_size_t (STRINGBUF_LENGTH (buf
)));
869 if (STRINGBUF_SHARED (buf
))
870 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
873 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
875 if (STRINGBUF_INLINE (buf
))
876 e9
= scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
879 e9
= scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
881 if (STRINGBUF_WIDE (buf
))
882 e10
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
885 e10
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
888 return scm_list_n (e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
, SCM_UNDEFINED
);
892 SCM_DEFINE (scm_sys_symbol_dump
, "%symbol-dump", 1, 0, 0, (SCM sym
),
893 "Returns an association list containing debugging information\n"
894 "for @var{sym}. The association list has the following entries."
897 "The symbol itself\n"
901 "@code{#t} if it is an interned symbol\n"
902 "@item stringbuf-chars\n"
903 "A new string containing this symbols's stringbuf's characters\n"
904 "@item stringbuf-length\n"
905 "The number of characters in this stringbuf\n"
906 "@item stringbuf-shared\n"
907 "@code{#t} if this stringbuf is shared\n"
908 "@item stringbuf-inline\n"
909 "@code{#t} if this stringbuf's characters are stored in the\n"
910 "cell itself, or @code{#f} if they were allocated in memory\n"
911 "@item stringbuf-wide\n"
912 "@code{#t} if this stringbuf's characters are stored in a\n"
913 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
916 #define FUNC_NAME s_scm_sys_symbol_dump
918 SCM e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
;
920 SCM_VALIDATE_SYMBOL (1, sym
);
921 e1
= scm_cons (scm_from_locale_symbol ("symbol"),
923 e2
= scm_cons (scm_from_locale_symbol ("hash"),
924 scm_from_ulong (scm_i_symbol_hash (sym
)));
925 e3
= scm_cons (scm_from_locale_symbol ("interned"),
926 scm_symbol_interned_p (sym
));
927 buf
= SYMBOL_STRINGBUF (sym
);
930 if (!STRINGBUF_WIDE (buf
))
932 size_t len
= STRINGBUF_LENGTH (buf
);
934 SCM sbc
= scm_i_make_string (len
, &cbuf
);
935 memcpy (cbuf
, STRINGBUF_CHARS (buf
), len
);
936 e4
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
941 size_t len
= STRINGBUF_LENGTH (buf
);
943 SCM sbc
= scm_i_make_wide_string (len
, &cbuf
);
944 u32_cpy ((scm_t_uint32
*) cbuf
,
945 (scm_t_uint32
*) STRINGBUF_WIDE_CHARS (buf
), len
);
946 e4
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
949 e5
= scm_cons (scm_from_locale_symbol ("stringbuf-length"),
950 scm_from_size_t (STRINGBUF_LENGTH (buf
)));
951 if (STRINGBUF_SHARED (buf
))
952 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
955 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
957 if (STRINGBUF_INLINE (buf
))
958 e7
= scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
961 e7
= scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
963 if (STRINGBUF_WIDE (buf
))
964 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
967 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
969 return scm_list_n (e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, SCM_UNDEFINED
);
974 #if SCM_STRING_LENGTH_HISTOGRAM
976 SCM_DEFINE (scm_sys_stringbuf_hist
, "%stringbuf-hist", 0, 0, 0, (void), "")
977 #define FUNC_NAME s_scm_sys_stringbuf_hist
980 for (i
= 0; i
< 1000; i
++)
982 fprintf (stderr
, " %3d: %u\n", i
, lenhist
[i
]);
983 fprintf (stderr
, ">999: %u\n", lenhist
[1000]);
984 return SCM_UNSPECIFIED
;
992 SCM_DEFINE (scm_string_p
, "string?", 1, 0, 0,
994 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
995 #define FUNC_NAME s_scm_string_p
997 return scm_from_bool (IS_STRING (obj
));
1002 SCM_REGISTER_PROC (s_scm_list_to_string
, "list->string", 1, 0, 0, scm_string
);
1004 SCM_DEFINE (scm_string
, "string", 0, 0, 1,
1006 "@deffnx {Scheme Procedure} list->string chrs\n"
1007 "Return a newly allocated string composed of the arguments,\n"
1009 #define FUNC_NAME s_scm_string
1017 /* Verify that this is a list of chars. */
1018 i
= scm_ilength (chrs
);
1019 SCM_ASSERT (i
>= 0, chrs
, SCM_ARG1
, FUNC_NAME
);
1024 while (len
> 0 && scm_is_pair (rest
))
1026 SCM elt
= SCM_CAR (rest
);
1027 SCM_VALIDATE_CHAR (SCM_ARGn
, elt
);
1028 rest
= SCM_CDR (rest
);
1030 scm_remember_upto_here_1 (elt
);
1033 /* Construct a string containing this list of chars. */
1037 result
= scm_i_make_string (len
, NULL
);
1038 result
= scm_i_string_start_writing (result
);
1039 while (len
> 0 && scm_is_pair (rest
))
1041 SCM elt
= SCM_CAR (rest
);
1042 scm_i_string_set_x (result
, p
, SCM_CHAR (elt
));
1044 rest
= SCM_CDR (rest
);
1046 scm_remember_upto_here_1 (elt
);
1048 scm_i_string_stop_writing ();
1051 scm_misc_error (NULL
, "list changed while constructing string", SCM_EOL
);
1052 if (!scm_is_null (rest
))
1053 scm_wrong_type_arg_msg (NULL
, 0, chrs
, "proper list");
1059 SCM_DEFINE (scm_make_string
, "make-string", 1, 1, 0,
1061 "Return a newly allocated string of\n"
1062 "length @var{k}. If @var{chr} is given, then all elements of\n"
1063 "the string are initialized to @var{chr}, otherwise the contents\n"
1064 "of the @var{string} are unspecified.")
1065 #define FUNC_NAME s_scm_make_string
1067 return scm_c_make_string (scm_to_size_t (k
), chr
);
1072 scm_c_make_string (size_t len
, SCM chr
)
1073 #define FUNC_NAME NULL
1076 SCM res
= scm_i_make_string (len
, NULL
);
1078 if (!SCM_UNBNDP (chr
))
1080 SCM_VALIDATE_CHAR (0, chr
);
1081 res
= scm_i_string_start_writing (res
);
1082 for (p
= 0; p
< len
; p
++)
1083 scm_i_string_set_x (res
, p
, SCM_CHAR (chr
));
1084 scm_i_string_stop_writing ();
1091 SCM_DEFINE (scm_string_length
, "string-length", 1, 0, 0,
1093 "Return the number of characters in @var{string}.")
1094 #define FUNC_NAME s_scm_string_length
1096 SCM_VALIDATE_STRING (1, string
);
1097 return scm_from_size_t (STRING_LENGTH (string
));
1101 SCM_DEFINE (scm_string_width
, "string-width", 1, 0, 0,
1103 "Return the bytes used to represent a character in @var{string}."
1104 "This will return 1 or 4.")
1105 #define FUNC_NAME s_scm_string_width
1107 SCM_VALIDATE_STRING (1, string
);
1108 if (!scm_i_is_narrow_string (string
))
1109 return scm_from_int (4);
1111 return scm_from_int (1);
1116 scm_c_string_length (SCM string
)
1118 if (!IS_STRING (string
))
1119 scm_wrong_type_arg_msg (NULL
, 0, string
, "string");
1120 return STRING_LENGTH (string
);
1123 SCM_DEFINE (scm_string_ref
, "string-ref", 2, 0, 0,
1125 "Return character @var{k} of @var{str} using zero-origin\n"
1126 "indexing. @var{k} must be a valid index of @var{str}.")
1127 #define FUNC_NAME s_scm_string_ref
1132 SCM_VALIDATE_STRING (1, str
);
1134 len
= scm_i_string_length (str
);
1135 if (SCM_LIKELY (len
> 0))
1136 idx
= scm_to_unsigned_integer (k
, 0, len
- 1);
1138 scm_out_of_range (NULL
, k
);
1140 if (scm_i_is_narrow_string (str
))
1141 return SCM_MAKE_CHAR (scm_i_string_chars (str
)[idx
]);
1143 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str
)[idx
]);
1148 scm_c_string_ref (SCM str
, size_t p
)
1150 if (p
>= scm_i_string_length (str
))
1151 scm_out_of_range (NULL
, scm_from_size_t (p
));
1152 if (scm_i_is_narrow_string (str
))
1153 return SCM_MAKE_CHAR (scm_i_string_chars (str
)[p
]);
1155 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str
)[p
]);
1159 SCM_DEFINE (scm_string_set_x
, "string-set!", 3, 0, 0,
1160 (SCM str
, SCM k
, SCM chr
),
1161 "Store @var{chr} in element @var{k} of @var{str} and return\n"
1162 "an unspecified value. @var{k} must be a valid index of\n"
1164 #define FUNC_NAME s_scm_string_set_x
1169 SCM_VALIDATE_STRING (1, str
);
1171 len
= scm_i_string_length (str
);
1172 if (SCM_LIKELY (len
> 0))
1173 idx
= scm_to_unsigned_integer (k
, 0, len
- 1);
1175 scm_out_of_range (NULL
, k
);
1177 SCM_VALIDATE_CHAR (3, chr
);
1178 str
= scm_i_string_start_writing (str
);
1179 scm_i_string_set_x (str
, idx
, SCM_CHAR (chr
));
1180 scm_i_string_stop_writing ();
1182 return SCM_UNSPECIFIED
;
1187 scm_c_string_set_x (SCM str
, size_t p
, SCM chr
)
1189 if (p
>= scm_i_string_length (str
))
1190 scm_out_of_range (NULL
, scm_from_size_t (p
));
1191 str
= scm_i_string_start_writing (str
);
1192 scm_i_string_set_x (str
, p
, SCM_CHAR (chr
));
1193 scm_i_string_stop_writing ();
1196 SCM_DEFINE (scm_substring
, "substring", 2, 1, 0,
1197 (SCM str
, SCM start
, SCM end
),
1198 "Return a newly allocated string formed from the characters\n"
1199 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1200 "ending with index @var{end} (exclusive).\n"
1201 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1202 "exact integers satisfying:\n\n"
1203 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1204 #define FUNC_NAME s_scm_substring
1206 size_t len
, from
, to
;
1208 SCM_VALIDATE_STRING (1, str
);
1209 len
= scm_i_string_length (str
);
1210 from
= scm_to_unsigned_integer (start
, 0, len
);
1211 if (SCM_UNBNDP (end
))
1214 to
= scm_to_unsigned_integer (end
, from
, len
);
1215 return scm_i_substring (str
, from
, to
);
1219 SCM_DEFINE (scm_substring_read_only
, "substring/read-only", 2, 1, 0,
1220 (SCM str
, SCM start
, SCM end
),
1221 "Return a newly allocated string formed from the characters\n"
1222 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1223 "ending with index @var{end} (exclusive).\n"
1224 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1225 "exact integers satisfying:\n"
1227 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
1229 "The returned string is read-only.\n")
1230 #define FUNC_NAME s_scm_substring_read_only
1232 size_t len
, from
, to
;
1234 SCM_VALIDATE_STRING (1, str
);
1235 len
= scm_i_string_length (str
);
1236 from
= scm_to_unsigned_integer (start
, 0, len
);
1237 if (SCM_UNBNDP (end
))
1240 to
= scm_to_unsigned_integer (end
, from
, len
);
1241 return scm_i_substring_read_only (str
, from
, to
);
1245 SCM_DEFINE (scm_substring_copy
, "substring/copy", 2, 1, 0,
1246 (SCM str
, SCM start
, SCM end
),
1247 "Return a newly allocated string formed from the characters\n"
1248 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1249 "ending with index @var{end} (exclusive).\n"
1250 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1251 "exact integers satisfying:\n\n"
1252 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1253 #define FUNC_NAME s_scm_substring_copy
1255 /* For the Scheme version, START is mandatory, but for the C
1256 version, it is optional. See scm_string_copy in srfi-13.c for a
1262 SCM_VALIDATE_STRING (1, str
);
1263 scm_i_get_substring_spec (scm_i_string_length (str
),
1264 start
, &from
, end
, &to
);
1265 return scm_i_substring_copy (str
, from
, to
);
1269 SCM_DEFINE (scm_substring_shared
, "substring/shared", 2, 1, 0,
1270 (SCM str
, SCM start
, SCM end
),
1271 "Return string that indirectly refers to the characters\n"
1272 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1273 "ending with index @var{end} (exclusive).\n"
1274 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1275 "exact integers satisfying:\n\n"
1276 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1277 #define FUNC_NAME s_scm_substring_shared
1279 size_t len
, from
, to
;
1281 SCM_VALIDATE_STRING (1, str
);
1282 len
= scm_i_string_length (str
);
1283 from
= scm_to_unsigned_integer (start
, 0, len
);
1284 if (SCM_UNBNDP (end
))
1287 to
= scm_to_unsigned_integer (end
, from
, len
);
1288 return scm_i_substring_shared (str
, from
, to
);
1292 SCM_DEFINE (scm_string_append
, "string-append", 0, 0, 1,
1294 "Return a newly allocated string whose characters form the\n"
1295 "concatenation of the given strings, @var{args}.")
1296 #define FUNC_NAME s_scm_string_append
1309 SCM_VALIDATE_REST_ARGUMENT (args
);
1310 for (l
= args
; !scm_is_null (l
); l
= SCM_CDR (l
))
1313 SCM_VALIDATE_STRING (SCM_ARGn
, s
);
1314 len
+= scm_i_string_length (s
);
1315 if (!scm_i_is_narrow_string (s
))
1320 res
= scm_i_make_string (len
, &data
.narrow
);
1322 res
= scm_i_make_wide_string (len
, &data
.wide
);
1324 for (l
= args
; !scm_is_null (l
); l
= SCM_CDR (l
))
1328 SCM_VALIDATE_STRING (SCM_ARGn
, s
);
1329 len
= scm_i_string_length (s
);
1332 memcpy (data
.narrow
, scm_i_string_chars (s
), len
);
1337 if (scm_i_is_narrow_string (s
))
1339 for (i
= 0; i
< scm_i_string_length (s
); i
++)
1340 data
.wide
[i
] = (unsigned char) scm_i_string_chars (s
)[i
];
1343 u32_cpy ((scm_t_uint32
*) data
.wide
,
1344 (scm_t_uint32
*) scm_i_string_wide_chars (s
), len
);
1347 scm_remember_upto_here_1 (s
);
1354 scm_is_string (SCM obj
)
1356 return IS_STRING (obj
);
1360 scm_from_locale_stringn (const char *str
, size_t len
)
1365 if (len
== (size_t) -1)
1370 res
= scm_i_make_string (len
, &dst
);
1371 memcpy (dst
, str
, len
);
1376 scm_from_locale_string (const char *str
)
1381 return scm_from_locale_stringn (str
, -1);
1384 /* Create a new scheme string from the C string STR. The memory of
1385 STR may be used directly as storage for the new string. */
1387 scm_take_locale_stringn (char *str
, size_t len
)
1391 if (len
== (size_t) -1)
1395 /* Ensure STR is null terminated. A realloc for 1 extra byte should
1396 often be satisfied from the alignment padding after the block, with
1397 no actual data movement. */
1398 str
= scm_realloc (str
, len
+ 1);
1402 buf
= scm_i_take_stringbufn (str
, len
);
1403 res
= scm_double_cell (STRING_TAG
,
1404 SCM_UNPACK (buf
), (scm_t_bits
) 0, (scm_t_bits
) len
);
1409 scm_take_locale_string (char *str
)
1411 return scm_take_locale_stringn (str
, -1);
1414 /* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
1417 unistring_escapes_to_guile_escapes (char **bufp
, size_t *lenp
)
1419 char *before
, *after
;
1428 if ((i
<= *lenp
- 6)
1429 && before
[i
] == '\\'
1430 && before
[i
+ 1] == 'u'
1431 && before
[i
+ 2] == '0' && before
[i
+ 3] == '0')
1433 /* Convert \u00NN to \xNN */
1436 after
[j
+ 2] = tolower ((int) before
[i
+ 4]);
1437 after
[j
+ 3] = tolower ((int) before
[i
+ 5]);
1441 else if ((i
<= *lenp
- 10)
1442 && before
[i
] == '\\'
1443 && before
[i
+ 1] == 'U'
1444 && before
[i
+ 2] == '0' && before
[i
+ 3] == '0')
1446 /* Convert \U00NNNNNN to \UNNNNNN */
1449 after
[j
+ 2] = tolower ((int) before
[i
+ 4]);
1450 after
[j
+ 3] = tolower ((int) before
[i
+ 5]);
1451 after
[j
+ 4] = tolower ((int) before
[i
+ 6]);
1452 after
[j
+ 5] = tolower ((int) before
[i
+ 7]);
1453 after
[j
+ 6] = tolower ((int) before
[i
+ 8]);
1454 after
[j
+ 7] = tolower ((int) before
[i
+ 9]);
1460 after
[j
] = before
[i
];
1466 after
= scm_realloc (after
, j
);
1470 scm_to_locale_stringn (SCM str
, size_t * lenp
)
1474 /* In the future, enc will hold the port's encoding. */
1477 return scm_to_stringn (str
, lenp
, enc
,
1478 SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE
);
1481 /* Low-level scheme to C string conversion function. */
1483 scm_to_stringn (SCM str
, size_t * lenp
, const char *encoding
,
1484 scm_t_string_failed_conversion_handler handler
)
1486 static const char iso
[11] = "ISO-8859-1";
1488 size_t ilen
, len
, i
;
1490 if (!scm_is_string (str
))
1491 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
1492 ilen
= scm_i_string_length (str
);
1496 buf
= scm_malloc (1);
1504 for (i
= 0; i
< ilen
; i
++)
1505 if (scm_i_string_ref (str
, i
) == '\0')
1506 scm_misc_error (NULL
,
1507 "string contains #\\nul character: ~S",
1510 if (scm_i_is_narrow_string (str
))
1514 buf
= scm_malloc (ilen
);
1515 memcpy (buf
, scm_i_string_chars (str
), ilen
);
1521 buf
= scm_malloc (ilen
+ 1);
1522 memcpy (buf
, scm_i_string_chars (str
), ilen
);
1531 buf
= u32_conv_to_encoding (iso
,
1532 (enum iconv_ilseq_handler
) handler
,
1533 (scm_t_uint32
*) scm_i_string_wide_chars (str
),
1534 ilen
, NULL
, NULL
, &len
);
1536 scm_misc_error (NULL
, "cannot convert to output locale ~s: \"~s\"",
1537 scm_list_2 (scm_from_locale_string (iso
), str
));
1539 if (handler
== SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE
)
1540 unistring_escapes_to_guile_escapes (&buf
, &len
);
1546 buf
= scm_realloc (buf
, len
+ 1);
1550 scm_remember_upto_here_1 (str
);
1555 scm_to_locale_string (SCM str
)
1557 return scm_to_locale_stringn (str
, NULL
);
1561 scm_to_locale_stringbuf (SCM str
, char *buf
, size_t max_len
)
1564 char *result
= NULL
;
1565 if (!scm_is_string (str
))
1566 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
1567 result
= scm_to_locale_stringn (str
, &len
);
1569 memcpy (buf
, result
, (len
> max_len
) ? max_len
: len
);
1572 scm_remember_upto_here_1 (str
);
1576 /* converts C scm_array of strings to SCM scm_list of strings. */
1577 /* If argc < 0, a null terminated scm_array is assumed. */
1579 scm_makfromstrs (int argc
, char **argv
)
1584 for (i
= 0; argv
[i
]; i
++);
1586 lst
= scm_cons (scm_from_locale_string (argv
[i
]), lst
);
1590 /* Return a newly allocated array of char pointers to each of the strings
1591 in args, with a terminating NULL pointer. */
1594 scm_i_allocate_string_pointers (SCM list
)
1597 int len
= scm_ilength (list
);
1601 scm_wrong_type_arg_msg (NULL
, 0, list
, "proper list");
1603 scm_dynwind_begin (0);
1605 result
= (char **) scm_malloc ((len
+ 1) * sizeof (char *));
1607 scm_dynwind_unwind_handler (free
, result
, 0);
1609 /* The list might be have been modified in another thread, so
1610 we check LIST before each access.
1612 for (i
= 0; i
< len
&& scm_is_pair (list
); i
++)
1614 result
[i
] = scm_to_locale_string (SCM_CAR (list
));
1615 list
= SCM_CDR (list
);
1623 scm_i_free_string_pointers (char **pointers
)
1627 for (i
= 0; pointers
[i
]; i
++)
1633 scm_i_get_substring_spec (size_t len
,
1634 SCM start
, size_t *cstart
,
1635 SCM end
, size_t *cend
)
1637 if (SCM_UNBNDP (start
))
1640 *cstart
= scm_to_unsigned_integer (start
, 0, len
);
1642 if (SCM_UNBNDP (end
))
1645 *cend
= scm_to_unsigned_integer (end
, *cstart
, len
);
1648 #if SCM_ENABLE_DEPRECATED
1650 /* When these definitions are removed, it becomes reasonable to use
1651 read-only strings for string literals. For that, change the reader
1652 to create string literals with scm_c_substring_read_only instead of
1653 with scm_c_substring_copy.
1657 scm_i_deprecated_stringp (SCM str
)
1659 scm_c_issue_deprecation_warning
1660 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1662 return scm_is_string (str
);
1666 scm_i_deprecated_string_chars (SCM str
)
1670 scm_c_issue_deprecation_warning
1671 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1673 /* We don't accept shared substrings here since they are not
1676 if (IS_SH_STRING (str
))
1677 scm_misc_error (NULL
,
1678 "SCM_STRING_CHARS does not work with shared substrings.",
1681 /* We explicitly test for read-only strings to produce a better
1685 if (IS_RO_STRING (str
))
1686 scm_misc_error (NULL
,
1687 "SCM_STRING_CHARS does not work with read-only strings.",
1690 /* The following is still wrong, of course...
1692 str
= scm_i_string_start_writing (str
);
1693 chars
= scm_i_string_writable_chars (str
);
1694 scm_i_string_stop_writing ();
1699 scm_i_deprecated_string_length (SCM str
)
1701 scm_c_issue_deprecation_warning
1702 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1703 return scm_c_string_length (str
);
1711 scm_nullstr
= scm_i_make_string (0, NULL
);
1713 #include "libguile/strings.x"