1 /* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
30 #include "libguile/_scm.h"
31 #include "libguile/chars.h"
32 #include "libguile/root.h"
33 #include "libguile/strings.h"
34 #include "libguile/deprecation.h"
35 #include "libguile/validate.h"
36 #include "libguile/dynwind.h"
46 * XXX - keeping an accurate refcount during GC seems to be quite
47 * tricky, so we just keep score of whether a stringbuf might be
48 * shared, not whether it definitely is.
50 * The scheme I (mvo) tried to keep an accurate reference count would
51 * recount all strings that point to a stringbuf during the mark-phase
52 * of the GC. This was done since one cannot access the stringbuf of
53 * a string when that string is freed (in order to decrease the
54 * reference count). The memory of the stringbuf might have been
55 * reused already for something completely different.
57 * This recounted worked for a small number of threads beating on
58 * cow-strings, but it failed randomly with more than 10 threads, say.
59 * I couldn't figure out what went wrong, so I used the conservative
60 * approach implemented below.
62 * A stringbuf needs to know its length, but only so that it can be
63 * reported when the stringbuf is freed.
65 * There are 3 storage strategies for stringbufs: inline, outline, and
68 * Inline strings are small 8-bit strings stored within the double
69 * cell itself. Outline strings are larger 8-bit strings with GC
70 * allocated storage. Wide strings are 32-bit strings with allocated
73 * There was little value in making wide string inlineable, since
74 * there is only room for three inlined 32-bit characters. Thus wide
75 * stringbufs are never inlined.
78 #define STRINGBUF_F_SHARED 0x100
79 #define STRINGBUF_F_INLINE 0x200
80 #define STRINGBUF_F_WIDE 0x400 /* If true, strings have UCS-4
81 encoding. Otherwise, strings
84 #define STRINGBUF_TAG scm_tc7_stringbuf
85 #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
86 #define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
87 #define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
89 #define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
90 #define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
91 #define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
92 #define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
94 #define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
95 ? STRINGBUF_INLINE_CHARS (buf) \
96 : STRINGBUF_OUTLINE_CHARS (buf))
98 #define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *)SCM_CELL_WORD_1(buf))
99 #define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
100 ? STRINGBUF_INLINE_LENGTH (buf) \
101 : STRINGBUF_OUTLINE_LENGTH (buf))
103 #define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
105 #define SET_STRINGBUF_SHARED(buf) \
106 (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
108 #if SCM_STRING_LENGTH_HISTOGRAM
109 static size_t lenhist
[1001];
112 /* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
115 make_stringbuf (size_t len
)
117 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
118 scm_i_symbol_chars, all stringbufs are null-terminated. Once
119 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
120 has been changed for scm_i_symbol_chars, this null-termination
124 #if SCM_STRING_LENGTH_HISTOGRAM
131 if (len
<= STRINGBUF_MAX_INLINE_LEN
-1)
133 return scm_double_cell (STRINGBUF_TAG
| STRINGBUF_F_INLINE
| (len
<< 16),
138 char *mem
= scm_gc_malloc (len
+1, "string");
140 return scm_double_cell (STRINGBUF_TAG
, (scm_t_bits
) mem
,
141 (scm_t_bits
) len
, (scm_t_bits
) 0);
145 /* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
148 make_wide_stringbuf (size_t len
)
151 #if SCM_STRING_LENGTH_HISTOGRAM
158 mem
= scm_gc_malloc (sizeof (scm_t_wchar
) * (len
+ 1), "string");
160 return scm_double_cell (STRINGBUF_TAG
| STRINGBUF_F_WIDE
, (scm_t_bits
) mem
,
161 (scm_t_bits
) len
, (scm_t_bits
) 0);
164 /* Return a new stringbuf whose underlying storage consists of the LEN+1
165 octets pointed to by STR (the last octet is zero). */
167 scm_i_take_stringbufn (char *str
, size_t len
)
169 scm_gc_register_collectable_memory (str
, len
+ 1, "stringbuf");
171 return scm_double_cell (STRINGBUF_TAG
, (scm_t_bits
) str
,
172 (scm_t_bits
) len
, (scm_t_bits
) 0);
176 scm_i_stringbuf_mark (SCM buf
)
182 scm_i_stringbuf_free (SCM buf
)
184 if (!STRINGBUF_INLINE (buf
))
186 if (!STRINGBUF_WIDE (buf
))
187 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf
),
188 STRINGBUF_OUTLINE_LENGTH (buf
) + 1, "string");
190 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf
),
191 sizeof (scm_t_wchar
) * (STRINGBUF_OUTLINE_LENGTH (buf
)
197 /* Convert a stringbuf containing 8-bit Latin-1-encoded characters to
198 one containing 32-bit UCS-4-encoded characters. */
200 widen_stringbuf (SCM buf
)
205 if (STRINGBUF_WIDE (buf
))
208 if (STRINGBUF_INLINE (buf
))
210 len
= STRINGBUF_INLINE_LENGTH (buf
);
212 mem
= scm_gc_malloc (sizeof (scm_t_wchar
) * (len
+ 1), "string");
213 for (i
= 0; i
< len
; i
++)
215 (scm_t_wchar
) (unsigned char) STRINGBUF_INLINE_CHARS (buf
)[i
];
218 SCM_SET_CELL_WORD_0 (buf
, SCM_CELL_WORD_0 (buf
) ^ STRINGBUF_F_INLINE
);
219 SCM_SET_CELL_WORD_0 (buf
, SCM_CELL_WORD_0 (buf
) | STRINGBUF_F_WIDE
);
220 SCM_SET_CELL_WORD_1 (buf
, mem
);
221 SCM_SET_CELL_WORD_2 (buf
, len
);
225 len
= STRINGBUF_OUTLINE_LENGTH (buf
);
227 mem
= scm_gc_malloc (sizeof (scm_t_wchar
) * (len
+ 1), "string");
228 for (i
= 0; i
< len
; i
++)
230 (scm_t_wchar
) (unsigned char) STRINGBUF_OUTLINE_CHARS (buf
)[i
];
233 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf
), len
+ 1, "string");
235 SCM_SET_CELL_WORD_0 (buf
, SCM_CELL_WORD_0 (buf
) | STRINGBUF_F_WIDE
);
236 SCM_SET_CELL_WORD_1 (buf
, mem
);
237 SCM_SET_CELL_WORD_2 (buf
, len
);
241 scm_i_pthread_mutex_t stringbuf_write_mutex
= SCM_I_PTHREAD_MUTEX_INITIALIZER
;
243 /* Copy-on-write strings.
246 #define STRING_TAG scm_tc7_string
248 #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
249 #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
250 #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
252 #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
253 #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
255 #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
257 /* Read-only strings.
260 #define RO_STRING_TAG (scm_tc7_string + 0x200)
261 #define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
263 /* Mutation-sharing substrings
266 #define SH_STRING_TAG (scm_tc7_string + 0x100)
268 #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
269 /* START and LENGTH as for STRINGs. */
271 #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
273 /* Create a scheme string with space for LEN 8-bit Latin-1-encoded
274 characters. CHARSP, if not NULL, will be set to location of the
277 scm_i_make_string (size_t len
, char **charsp
)
279 SCM buf
= make_stringbuf (len
);
282 *charsp
= STRINGBUF_CHARS (buf
);
283 res
= scm_double_cell (STRING_TAG
, SCM_UNPACK(buf
),
284 (scm_t_bits
)0, (scm_t_bits
) len
);
288 /* Create a scheme string with space for LEN 32-bit UCS-4-encoded
289 characters. CHARSP, if not NULL, will be set to location of the
292 scm_i_make_wide_string (size_t len
, scm_t_wchar
**charsp
)
294 SCM buf
= make_wide_stringbuf (len
);
297 *charsp
= STRINGBUF_WIDE_CHARS (buf
);
298 res
= scm_double_cell (STRING_TAG
, SCM_UNPACK (buf
),
299 (scm_t_bits
) 0, (scm_t_bits
) len
);
304 validate_substring_args (SCM str
, size_t start
, size_t end
)
306 if (!IS_STRING (str
))
307 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
308 if (start
> STRING_LENGTH (str
))
309 scm_out_of_range (NULL
, scm_from_size_t (start
));
310 if (end
> STRING_LENGTH (str
) || end
< start
)
311 scm_out_of_range (NULL
, scm_from_size_t (end
));
315 get_str_buf_start (SCM
*str
, SCM
*buf
, size_t *start
)
317 *start
= STRING_START (*str
);
318 if (IS_SH_STRING (*str
))
320 *str
= SH_STRING_STRING (*str
);
321 *start
+= STRING_START (*str
);
323 *buf
= STRING_STRINGBUF (*str
);
327 scm_i_substring (SCM str
, size_t start
, size_t end
)
331 get_str_buf_start (&str
, &buf
, &str_start
);
332 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
333 SET_STRINGBUF_SHARED (buf
);
334 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
335 return scm_double_cell (STRING_TAG
, SCM_UNPACK(buf
),
336 (scm_t_bits
)str_start
+ start
,
337 (scm_t_bits
) end
- start
);
341 scm_i_substring_read_only (SCM str
, size_t start
, size_t end
)
345 get_str_buf_start (&str
, &buf
, &str_start
);
346 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
347 SET_STRINGBUF_SHARED (buf
);
348 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
349 return scm_double_cell (RO_STRING_TAG
, SCM_UNPACK(buf
),
350 (scm_t_bits
)str_start
+ start
,
351 (scm_t_bits
) end
- start
);
355 scm_i_substring_copy (SCM str
, size_t start
, size_t end
)
357 size_t len
= end
- start
;
360 get_str_buf_start (&str
, &buf
, &str_start
);
361 if (scm_i_is_narrow_string (str
))
363 my_buf
= make_stringbuf (len
);
364 memcpy (STRINGBUF_CHARS (my_buf
),
365 STRINGBUF_CHARS (buf
) + str_start
+ start
, len
);
369 my_buf
= make_wide_stringbuf (len
);
370 u32_cpy ((scm_t_uint32
*) STRINGBUF_WIDE_CHARS (my_buf
),
371 (scm_t_uint32
*) (STRINGBUF_WIDE_CHARS (buf
) + str_start
373 /* Even though this string is wide, the substring may be narrow.
374 Consider adding code to narrow the string. */
376 scm_remember_upto_here_1 (buf
);
377 return scm_double_cell (STRING_TAG
, SCM_UNPACK (my_buf
),
378 (scm_t_bits
) 0, (scm_t_bits
) len
);
382 scm_i_substring_shared (SCM str
, size_t start
, size_t end
)
384 if (start
== 0 && end
== STRING_LENGTH (str
))
388 size_t len
= end
- start
;
389 if (IS_SH_STRING (str
))
391 start
+= STRING_START (str
);
392 str
= SH_STRING_STRING (str
);
394 return scm_double_cell (SH_STRING_TAG
, SCM_UNPACK(str
),
395 (scm_t_bits
)start
, (scm_t_bits
) len
);
400 scm_c_substring (SCM str
, size_t start
, size_t end
)
402 validate_substring_args (str
, start
, end
);
403 return scm_i_substring (str
, start
, end
);
407 scm_c_substring_read_only (SCM str
, size_t start
, size_t end
)
409 validate_substring_args (str
, start
, end
);
410 return scm_i_substring_read_only (str
, start
, end
);
414 scm_c_substring_copy (SCM str
, size_t start
, size_t end
)
416 validate_substring_args (str
, start
, end
);
417 return scm_i_substring_copy (str
, start
, end
);
421 scm_c_substring_shared (SCM str
, size_t start
, size_t end
)
423 validate_substring_args (str
, start
, end
);
424 return scm_i_substring_shared (str
, start
, end
);
428 scm_i_string_mark (SCM str
)
430 if (IS_SH_STRING (str
))
431 return SH_STRING_STRING (str
);
433 return STRING_STRINGBUF (str
);
437 scm_i_string_free (SCM str
)
441 /* Internal accessors
444 /* Returns the number of characters in STR. This may be different
445 than the memory size of the string storage. */
447 scm_i_string_length (SCM str
)
449 return STRING_LENGTH (str
);
452 /* True if the string is 'narrow', meaning it has a 8-bit Latin-1
453 encoding. False if it is 'wide', having a 32-bit UCS-4
456 scm_i_is_narrow_string (SCM str
)
458 return !STRINGBUF_WIDE (STRING_STRINGBUF (str
));
461 /* Returns a pointer to the 8-bit Latin-1 encoded character array of
464 scm_i_string_chars (SCM str
)
468 get_str_buf_start (&str
, &buf
, &start
);
469 if (scm_i_is_narrow_string (str
))
470 return STRINGBUF_CHARS (buf
) + start
;
472 scm_misc_error (NULL
, "Invalid read access of chars of wide string: ~s",
477 /* Returns a pointer to the 32-bit UCS-4 encoded character array of
480 scm_i_string_wide_chars (SCM str
)
485 get_str_buf_start (&str
, &buf
, &start
);
486 if (!scm_i_is_narrow_string (str
))
487 return STRINGBUF_WIDE_CHARS (buf
) + start
;
489 scm_misc_error (NULL
, "Invalid read access of chars of narrow string: ~s",
493 /* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
494 a new string buffer, so that it can be modified without modifying
495 other strings. Also, lock the string mutex. Later, one must call
496 scm_i_string_stop_writing to unlock the mutex. */
498 scm_i_string_start_writing (SCM orig_str
)
500 SCM buf
, str
= orig_str
;
503 get_str_buf_start (&str
, &buf
, &start
);
504 if (IS_RO_STRING (str
))
505 scm_misc_error (NULL
, "string is read-only: ~s", scm_list_1 (orig_str
));
507 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
508 if (STRINGBUF_SHARED (buf
))
510 /* Clone the stringbuf. */
511 size_t len
= STRING_LENGTH (str
);
514 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
516 if (scm_i_is_narrow_string (str
))
518 new_buf
= make_stringbuf (len
);
519 memcpy (STRINGBUF_CHARS (new_buf
),
520 STRINGBUF_CHARS (buf
) + STRING_START (str
), len
);
525 new_buf
= make_wide_stringbuf (len
);
526 u32_cpy ((scm_t_uint32
*) STRINGBUF_WIDE_CHARS (new_buf
),
527 (scm_t_uint32
*) (STRINGBUF_WIDE_CHARS (buf
)
528 + STRING_START (str
)), len
);
530 scm_i_thread_put_to_sleep ();
531 SET_STRING_STRINGBUF (str
, new_buf
);
532 start
-= STRING_START (str
);
533 SET_STRING_START (str
, 0);
534 scm_i_thread_wake_up ();
538 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
543 /* Return a pointer to the 8-bit Latin-1 chars of a string. */
545 scm_i_string_writable_chars (SCM str
)
550 get_str_buf_start (&str
, &buf
, &start
);
551 if (scm_i_is_narrow_string (str
))
552 return STRINGBUF_CHARS (buf
) + start
;
554 scm_misc_error (NULL
, "Invalid write access of chars of wide string: ~s",
559 /* Return a pointer to the UCS-4 codepoints of a string. */
561 scm_i_string_writable_wide_chars (SCM str
)
566 get_str_buf_start (&str
, &buf
, &start
);
567 if (!scm_i_is_narrow_string (str
))
568 return STRINGBUF_WIDE_CHARS (buf
) + start
;
570 scm_misc_error (NULL
, "Invalid read access of chars of narrow string: ~s",
574 /* Unlock the string mutex that was locked when
575 scm_i_string_start_writing was called. */
577 scm_i_string_stop_writing (void)
579 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
582 /* Return the Xth character of STR as a UCS-4 codepoint. */
584 scm_i_string_ref (SCM str
, size_t x
)
586 if (scm_i_is_narrow_string (str
))
587 return (scm_t_wchar
) (unsigned char) (scm_i_string_chars (str
)[x
]);
589 return scm_i_string_wide_chars (str
)[x
];
592 /* Set the Pth character of STR to UCS-4 codepoint CHR. */
594 scm_i_string_set_x (SCM str
, size_t p
, scm_t_wchar chr
)
596 if (chr
> 0xFF && scm_i_is_narrow_string (str
))
597 widen_stringbuf (STRING_STRINGBUF (str
));
599 if (scm_i_is_narrow_string (str
))
601 char *dst
= scm_i_string_writable_chars (str
);
602 dst
[p
] = (char) (unsigned char) chr
;
606 scm_t_wchar
*dst
= scm_i_string_writable_wide_chars (str
);
613 Basic symbol creation and accessing is done here, the rest is in
614 symbols.[hc]. This has been done to keep stringbufs and the
615 internals of strings and string-like objects confined to this file.
618 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
621 scm_i_make_symbol (SCM name
, scm_t_bits flags
,
622 unsigned long hash
, SCM props
)
625 size_t start
= STRING_START (name
);
626 size_t length
= STRING_LENGTH (name
);
628 if (IS_SH_STRING (name
))
630 name
= SH_STRING_STRING (name
);
631 start
+= STRING_START (name
);
633 buf
= SYMBOL_STRINGBUF (name
);
635 if (start
== 0 && length
== STRINGBUF_LENGTH (buf
))
638 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
639 SET_STRINGBUF_SHARED (buf
);
640 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
645 if (scm_i_is_narrow_string (name
))
647 SCM new_buf
= make_stringbuf (length
);
648 memcpy (STRINGBUF_CHARS (new_buf
),
649 STRINGBUF_CHARS (buf
) + start
, length
);
654 SCM new_buf
= make_wide_stringbuf (length
);
655 u32_cpy ((scm_t_uint32
*) STRINGBUF_WIDE_CHARS (new_buf
),
656 (scm_t_uint32
*) STRINGBUF_WIDE_CHARS (buf
) + start
,
661 return scm_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
662 (scm_t_bits
) hash
, SCM_UNPACK (props
));
666 scm_i_c_make_symbol (const char *name
, size_t len
,
667 scm_t_bits flags
, unsigned long hash
, SCM props
)
669 SCM buf
= make_stringbuf (len
);
670 memcpy (STRINGBUF_CHARS (buf
), name
, len
);
672 return scm_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
673 (scm_t_bits
) hash
, SCM_UNPACK (props
));
676 /* Return a new symbol that uses the LEN bytes pointed to by NAME as its
677 underlying storage. */
679 scm_i_c_take_symbol (char *name
, size_t len
,
680 scm_t_bits flags
, unsigned long hash
, SCM props
)
682 SCM buf
= scm_i_take_stringbufn (name
, len
);
684 return scm_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
685 (scm_t_bits
) hash
, SCM_UNPACK (props
));
688 /* Returns the number of characters in SYM. This may be different
689 from the memory size of SYM. */
691 scm_i_symbol_length (SCM sym
)
693 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym
));
697 scm_c_symbol_length (SCM sym
)
698 #define FUNC_NAME "scm_c_symbol_length"
700 SCM_VALIDATE_SYMBOL (1, sym
);
702 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym
));
706 /* True if the name of SYM is stored as a Latin-1 encoded string.
707 False if it is stored as a 32-bit UCS-4-encoded string. */
709 scm_i_is_narrow_symbol (SCM sym
)
713 buf
= SYMBOL_STRINGBUF (sym
);
714 return !STRINGBUF_WIDE (buf
);
717 /* Returns a pointer to the 8-bit Latin-1 encoded character array that
718 contains the name of SYM. */
720 scm_i_symbol_chars (SCM sym
)
724 buf
= SYMBOL_STRINGBUF (sym
);
725 if (!STRINGBUF_WIDE (buf
))
726 return STRINGBUF_CHARS (buf
);
728 scm_misc_error (NULL
, "Invalid access of chars of a wide symbol ~S",
732 /* Return a pointer to the 32-bit UCS-4-encoded character array of a
735 scm_i_symbol_wide_chars (SCM sym
)
739 buf
= SYMBOL_STRINGBUF (sym
);
740 if (STRINGBUF_WIDE (buf
))
741 return STRINGBUF_WIDE_CHARS (buf
);
743 scm_misc_error (NULL
, "Invalid access of chars of a narrow symbol ~S",
748 scm_i_symbol_mark (SCM sym
)
750 scm_gc_mark (SYMBOL_STRINGBUF (sym
));
751 return SCM_CELL_OBJECT_3 (sym
);
755 scm_i_symbol_free (SCM sym
)
760 scm_i_symbol_substring (SCM sym
, size_t start
, size_t end
)
762 SCM buf
= SYMBOL_STRINGBUF (sym
);
763 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
764 SET_STRINGBUF_SHARED (buf
);
765 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
766 return scm_double_cell (RO_STRING_TAG
, SCM_UNPACK (buf
),
767 (scm_t_bits
)start
, (scm_t_bits
) end
- start
);
770 /* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */
772 scm_i_symbol_ref (SCM sym
, size_t x
)
774 if (scm_i_is_narrow_symbol (sym
))
775 return (scm_t_wchar
) (unsigned char) (scm_i_symbol_chars (sym
)[x
]);
777 return scm_i_symbol_wide_chars (sym
)[x
];
783 SCM_DEFINE (scm_sys_string_dump
, "%string-dump", 1, 0, 0, (SCM str
),
784 "Returns an association list containing debugging information\n"
785 "for @var{str}. The association list has the following entries."
788 "The string itself.\n"
790 "The start index of the string into its stringbuf\n"
792 "The length of the string\n"
794 "If this string is a substring, it returns its parent string.\n"
795 "Otherwise, it returns @code{#f}\n"
797 "The string buffer that contains this string's characters\n"
798 "@item stringbuf-chars\n"
799 "A new string containing this string's stringbuf's characters\n"
800 "@item stringbuf-length\n"
801 "The number of characters in this stringbuf\n"
802 "@item stringbuf-shared\n"
803 "@code{#t} if this stringbuf is shared\n"
804 "@item stringbuf-inline\n"
805 "@code{#t} if this stringbuf's characters are stored in the\n"
806 "cell itself, or @code{#f} if they were allocated in memory\n"
807 "@item stringbuf-wide\n"
808 "@code{#t} if this stringbuf's characters are stored in a\n"
809 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
812 #define FUNC_NAME s_scm_sys_string_dump
814 SCM e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
;
816 SCM_VALIDATE_STRING (1, str
);
819 e1
= scm_cons (scm_from_locale_symbol ("string"),
821 e2
= scm_cons (scm_from_locale_symbol ("start"),
822 scm_from_size_t (STRING_START (str
)));
823 e3
= scm_cons (scm_from_locale_symbol ("length"),
824 scm_from_size_t (STRING_LENGTH (str
)));
826 if (IS_SH_STRING (str
))
828 e4
= scm_cons (scm_from_locale_symbol ("shared"),
829 SH_STRING_STRING (str
));
830 buf
= STRING_STRINGBUF (SH_STRING_STRING (str
));
834 e4
= scm_cons (scm_from_locale_symbol ("shared"),
836 buf
= STRING_STRINGBUF (str
);
840 e5
= scm_cons (scm_from_locale_symbol ("stringbuf"),
843 if (!STRINGBUF_WIDE (buf
))
845 size_t len
= STRINGBUF_LENGTH (buf
);
847 SCM sbc
= scm_i_make_string (len
, &cbuf
);
848 memcpy (cbuf
, STRINGBUF_CHARS (buf
), len
);
849 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
854 size_t len
= STRINGBUF_LENGTH (buf
);
856 SCM sbc
= scm_i_make_wide_string (len
, &cbuf
);
857 u32_cpy ((scm_t_uint32
*) cbuf
,
858 (scm_t_uint32
*) STRINGBUF_WIDE_CHARS (buf
), len
);
859 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
862 e7
= scm_cons (scm_from_locale_symbol ("stringbuf-length"),
863 scm_from_size_t (STRINGBUF_LENGTH (buf
)));
864 if (STRINGBUF_SHARED (buf
))
865 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
868 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
870 if (STRINGBUF_INLINE (buf
))
871 e9
= scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
874 e9
= scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
876 if (STRINGBUF_WIDE (buf
))
877 e10
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
880 e10
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
883 return scm_list_n (e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
, SCM_UNDEFINED
);
887 SCM_DEFINE (scm_sys_symbol_dump
, "%symbol-dump", 1, 0, 0, (SCM sym
),
888 "Returns an association list containing debugging information\n"
889 "for @var{sym}. The association list has the following entries."
892 "The symbol itself\n"
896 "The string buffer that contains this symbol's characters\n"
897 "@item stringbuf-chars\n"
898 "A new string containing this symbols's stringbuf's characters\n"
899 "@item stringbuf-length\n"
900 "The number of characters in this stringbuf\n"
901 "@item stringbuf-shared\n"
902 "@code{#t} if this stringbuf is shared\n"
903 "@item stringbuf-inline\n"
904 "@code{#t} if this stringbuf's characters are stored in the\n"
905 "cell itself, or @code{#f} if they were allocated in memory\n"
906 "@item stringbuf-wide\n"
907 "@code{#t} if this stringbuf's characters are stored in a\n"
908 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
911 #define FUNC_NAME s_scm_sys_symbol_dump
913 SCM e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
;
915 SCM_VALIDATE_SYMBOL (1, sym
);
916 e1
= scm_cons (scm_from_locale_symbol ("symbol"),
918 e2
= scm_cons (scm_from_locale_symbol ("hash"),
919 scm_from_ulong (scm_i_symbol_hash (sym
)));
921 buf
= SYMBOL_STRINGBUF (sym
);
924 e3
= scm_cons (scm_from_locale_symbol ("stringbuf"),
927 if (!STRINGBUF_WIDE (buf
))
929 size_t len
= STRINGBUF_LENGTH (buf
);
931 SCM sbc
= scm_i_make_string (len
, &cbuf
);
932 memcpy (cbuf
, STRINGBUF_CHARS (buf
), len
);
933 e4
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
938 size_t len
= STRINGBUF_LENGTH (buf
);
940 SCM sbc
= scm_i_make_wide_string (len
, &cbuf
);
941 u32_cpy ((scm_t_uint32
*) cbuf
,
942 (scm_t_uint32
*) STRINGBUF_WIDE_CHARS (buf
), len
);
943 e4
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
946 e5
= scm_cons (scm_from_locale_symbol ("stringbuf-length"),
947 scm_from_size_t (STRINGBUF_LENGTH (buf
)));
948 if (STRINGBUF_SHARED (buf
))
949 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
952 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
954 if (STRINGBUF_INLINE (buf
))
955 e7
= scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
958 e7
= scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
960 if (STRINGBUF_WIDE (buf
))
961 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
964 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
966 return scm_list_n (e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, SCM_UNDEFINED
);
971 #if SCM_STRING_LENGTH_HISTOGRAM
973 SCM_DEFINE (scm_sys_stringbuf_hist
, "%stringbuf-hist", 0, 0, 0, (void), "")
974 #define FUNC_NAME s_scm_sys_stringbuf_hist
977 for (i
= 0; i
< 1000; i
++)
979 fprintf (stderr
, " %3d: %u\n", i
, lenhist
[i
]);
980 fprintf (stderr
, ">999: %u\n", lenhist
[1000]);
981 return SCM_UNSPECIFIED
;
989 SCM_DEFINE (scm_string_p
, "string?", 1, 0, 0,
991 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
992 #define FUNC_NAME s_scm_string_p
994 return scm_from_bool (IS_STRING (obj
));
999 SCM_REGISTER_PROC (s_scm_list_to_string
, "list->string", 1, 0, 0, scm_string
);
1001 SCM_DEFINE (scm_string
, "string", 0, 0, 1,
1003 "@deffnx {Scheme Procedure} list->string chrs\n"
1004 "Return a newly allocated string composed of the arguments,\n"
1006 #define FUNC_NAME s_scm_string
1014 /* Verify that this is a list of chars. */
1015 i
= scm_ilength (chrs
);
1019 SCM_ASSERT (len
>= 0, chrs
, SCM_ARG1
, FUNC_NAME
);
1020 while (len
> 0 && scm_is_pair (rest
))
1022 SCM elt
= SCM_CAR (rest
);
1023 SCM_VALIDATE_CHAR (SCM_ARGn
, elt
);
1024 rest
= SCM_CDR (rest
);
1026 scm_remember_upto_here_1 (elt
);
1029 /* Construct a string containing this list of chars. */
1033 result
= scm_i_make_string (len
, NULL
);
1034 result
= scm_i_string_start_writing (result
);
1035 while (len
> 0 && scm_is_pair (rest
))
1037 SCM elt
= SCM_CAR (rest
);
1038 scm_i_string_set_x (result
, p
, SCM_CHAR (elt
));
1040 rest
= SCM_CDR (rest
);
1042 scm_remember_upto_here_1 (elt
);
1044 scm_i_string_stop_writing ();
1047 scm_misc_error (NULL
, "list changed while constructing string", SCM_EOL
);
1048 if (!scm_is_null (rest
))
1049 scm_wrong_type_arg_msg (NULL
, 0, chrs
, "proper list");
1055 SCM_DEFINE (scm_make_string
, "make-string", 1, 1, 0,
1057 "Return a newly allocated string of\n"
1058 "length @var{k}. If @var{chr} is given, then all elements of\n"
1059 "the string are initialized to @var{chr}, otherwise the contents\n"
1060 "of the @var{string} are unspecified.")
1061 #define FUNC_NAME s_scm_make_string
1063 return scm_c_make_string (scm_to_size_t (k
), chr
);
1068 scm_c_make_string (size_t len
, SCM chr
)
1069 #define FUNC_NAME NULL
1072 SCM res
= scm_i_make_string (len
, NULL
);
1074 if (!SCM_UNBNDP (chr
))
1076 SCM_VALIDATE_CHAR (0, chr
);
1077 res
= scm_i_string_start_writing (res
);
1078 for (p
= 0; p
< len
; p
++)
1079 scm_i_string_set_x (res
, p
, SCM_CHAR (chr
));
1080 scm_i_string_stop_writing ();
1087 SCM_DEFINE (scm_string_length
, "string-length", 1, 0, 0,
1089 "Return the number of characters in @var{string}.")
1090 #define FUNC_NAME s_scm_string_length
1092 SCM_VALIDATE_STRING (1, string
);
1093 return scm_from_size_t (STRING_LENGTH (string
));
1097 SCM_DEFINE (scm_string_width
, "string-width", 1, 0, 0,
1099 "Return the bytes used to represent a character in @var{string}."
1100 "This will return 1 or 4.")
1101 #define FUNC_NAME s_scm_string_width
1103 SCM_VALIDATE_STRING (1, string
);
1104 if (!scm_i_is_narrow_string (string
))
1105 return scm_from_int (4);
1107 return scm_from_int (1);
1112 scm_c_string_length (SCM string
)
1114 if (!IS_STRING (string
))
1115 scm_wrong_type_arg_msg (NULL
, 0, string
, "string");
1116 return STRING_LENGTH (string
);
1119 SCM_DEFINE (scm_string_ref
, "string-ref", 2, 0, 0,
1121 "Return character @var{k} of @var{str} using zero-origin\n"
1122 "indexing. @var{k} must be a valid index of @var{str}.")
1123 #define FUNC_NAME s_scm_string_ref
1128 SCM_VALIDATE_STRING (1, str
);
1130 len
= scm_i_string_length (str
);
1131 if (SCM_LIKELY (len
> 0))
1132 idx
= scm_to_unsigned_integer (k
, 0, len
- 1);
1134 scm_out_of_range (NULL
, k
);
1136 if (scm_i_is_narrow_string (str
))
1137 return SCM_MAKE_CHAR (scm_i_string_chars (str
)[idx
]);
1139 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str
)[idx
]);
1144 scm_c_string_ref (SCM str
, size_t p
)
1146 if (p
>= scm_i_string_length (str
))
1147 scm_out_of_range (NULL
, scm_from_size_t (p
));
1148 if (scm_i_is_narrow_string (str
))
1149 return SCM_MAKE_CHAR (scm_i_string_chars (str
)[p
]);
1151 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str
)[p
]);
1155 SCM_DEFINE (scm_string_set_x
, "string-set!", 3, 0, 0,
1156 (SCM str
, SCM k
, SCM chr
),
1157 "Store @var{chr} in element @var{k} of @var{str} and return\n"
1158 "an unspecified value. @var{k} must be a valid index of\n"
1160 #define FUNC_NAME s_scm_string_set_x
1165 SCM_VALIDATE_STRING (1, str
);
1167 len
= scm_i_string_length (str
);
1168 if (SCM_LIKELY (len
> 0))
1169 idx
= scm_to_unsigned_integer (k
, 0, len
- 1);
1171 scm_out_of_range (NULL
, k
);
1173 SCM_VALIDATE_CHAR (3, chr
);
1174 str
= scm_i_string_start_writing (str
);
1175 scm_i_string_set_x (str
, idx
, SCM_CHAR (chr
));
1176 scm_i_string_stop_writing ();
1178 return SCM_UNSPECIFIED
;
1183 scm_c_string_set_x (SCM str
, size_t p
, SCM chr
)
1185 if (p
>= scm_i_string_length (str
))
1186 scm_out_of_range (NULL
, scm_from_size_t (p
));
1187 str
= scm_i_string_start_writing (str
);
1188 scm_i_string_set_x (str
, p
, SCM_CHAR (chr
));
1189 scm_i_string_stop_writing ();
1192 SCM_DEFINE (scm_substring
, "substring", 2, 1, 0,
1193 (SCM str
, SCM start
, SCM end
),
1194 "Return a newly allocated string formed from the characters\n"
1195 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1196 "ending with index @var{end} (exclusive).\n"
1197 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1198 "exact integers satisfying:\n\n"
1199 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1200 #define FUNC_NAME s_scm_substring
1202 size_t len
, from
, to
;
1204 SCM_VALIDATE_STRING (1, str
);
1205 len
= scm_i_string_length (str
);
1206 from
= scm_to_unsigned_integer (start
, 0, len
);
1207 if (SCM_UNBNDP (end
))
1210 to
= scm_to_unsigned_integer (end
, from
, len
);
1211 return scm_i_substring (str
, from
, to
);
1215 SCM_DEFINE (scm_substring_read_only
, "substring/read-only", 2, 1, 0,
1216 (SCM str
, SCM start
, SCM end
),
1217 "Return a newly allocated string formed from the characters\n"
1218 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1219 "ending with index @var{end} (exclusive).\n"
1220 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1221 "exact integers satisfying:\n"
1223 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
1225 "The returned string is read-only.\n")
1226 #define FUNC_NAME s_scm_substring_read_only
1228 size_t len
, from
, to
;
1230 SCM_VALIDATE_STRING (1, str
);
1231 len
= scm_i_string_length (str
);
1232 from
= scm_to_unsigned_integer (start
, 0, len
);
1233 if (SCM_UNBNDP (end
))
1236 to
= scm_to_unsigned_integer (end
, from
, len
);
1237 return scm_i_substring_read_only (str
, from
, to
);
1241 SCM_DEFINE (scm_substring_copy
, "substring/copy", 2, 1, 0,
1242 (SCM str
, SCM start
, SCM end
),
1243 "Return a newly allocated string formed from the characters\n"
1244 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1245 "ending with index @var{end} (exclusive).\n"
1246 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1247 "exact integers satisfying:\n\n"
1248 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1249 #define FUNC_NAME s_scm_substring_copy
1251 /* For the Scheme version, START is mandatory, but for the C
1252 version, it is optional. See scm_string_copy in srfi-13.c for a
1258 SCM_VALIDATE_STRING (1, str
);
1259 scm_i_get_substring_spec (scm_i_string_length (str
),
1260 start
, &from
, end
, &to
);
1261 return scm_i_substring_copy (str
, from
, to
);
1265 SCM_DEFINE (scm_substring_shared
, "substring/shared", 2, 1, 0,
1266 (SCM str
, SCM start
, SCM end
),
1267 "Return string that indirectly refers to the characters\n"
1268 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1269 "ending with index @var{end} (exclusive).\n"
1270 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1271 "exact integers satisfying:\n\n"
1272 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1273 #define FUNC_NAME s_scm_substring_shared
1275 size_t len
, from
, to
;
1277 SCM_VALIDATE_STRING (1, str
);
1278 len
= scm_i_string_length (str
);
1279 from
= scm_to_unsigned_integer (start
, 0, len
);
1280 if (SCM_UNBNDP (end
))
1283 to
= scm_to_unsigned_integer (end
, from
, len
);
1284 return scm_i_substring_shared (str
, from
, to
);
1288 SCM_DEFINE (scm_string_append
, "string-append", 0, 0, 1,
1290 "Return a newly allocated string whose characters form the\n"
1291 "concatenation of the given strings, @var{args}.")
1292 #define FUNC_NAME s_scm_string_append
1302 SCM_VALIDATE_REST_ARGUMENT (args
);
1303 for (l
= args
; !scm_is_null (l
); l
= SCM_CDR (l
))
1306 SCM_VALIDATE_STRING (SCM_ARGn
, s
);
1307 len
+= scm_i_string_length (s
);
1308 if (!scm_i_is_narrow_string (s
))
1312 res
= scm_i_make_string (len
, &data
);
1314 res
= scm_i_make_wide_string (len
, &wdata
);
1316 for (l
= args
; !scm_is_null (l
); l
= SCM_CDR (l
))
1320 SCM_VALIDATE_STRING (SCM_ARGn
, s
);
1321 len
= scm_i_string_length (s
);
1324 memcpy (data
, scm_i_string_chars (s
), len
);
1329 if (scm_i_is_narrow_string (s
))
1331 for (i
= 0; i
< scm_i_string_length (s
); i
++)
1332 wdata
[i
] = (unsigned char) scm_i_string_chars (s
)[i
];
1335 u32_cpy ((scm_t_uint32
*) wdata
,
1336 (scm_t_uint32
*) scm_i_string_wide_chars (s
), len
);
1339 scm_remember_upto_here_1 (s
);
1346 scm_is_string (SCM obj
)
1348 return IS_STRING (obj
);
1352 scm_from_locale_stringn (const char *str
, size_t len
)
1357 if (len
== (size_t) -1)
1362 res
= scm_i_make_string (len
, &dst
);
1363 memcpy (dst
, str
, len
);
1368 scm_from_locale_string (const char *str
)
1373 return scm_from_locale_stringn (str
, -1);
1376 /* Create a new scheme string from the C string STR. The memory of
1377 STR may be used directly as storage for the new string. */
1379 scm_take_locale_stringn (char *str
, size_t len
)
1383 if (len
== (size_t) -1)
1387 /* Ensure STR is null terminated. A realloc for 1 extra byte should
1388 often be satisfied from the alignment padding after the block, with
1389 no actual data movement. */
1390 str
= scm_realloc (str
, len
+ 1);
1394 buf
= scm_i_take_stringbufn (str
, len
);
1395 res
= scm_double_cell (STRING_TAG
,
1396 SCM_UNPACK (buf
), (scm_t_bits
) 0, (scm_t_bits
) len
);
1401 scm_take_locale_string (char *str
)
1403 return scm_take_locale_stringn (str
, -1);
1406 /* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
1409 unistring_escapes_to_guile_escapes (char **bufp
, size_t *lenp
)
1411 char *before
, *after
;
1420 if ((i
<= *lenp
- 6)
1421 && before
[i
] == '\\'
1422 && before
[i
+ 1] == 'u'
1423 && before
[i
+ 2] == '0' && before
[i
+ 3] == '0')
1425 /* Convert \u00NN to \xNN */
1428 after
[j
+ 2] = tolower (before
[i
+ 4]);
1429 after
[j
+ 3] = tolower (before
[i
+ 5]);
1433 else if ((i
<= *lenp
- 10)
1434 && before
[i
] == '\\'
1435 && before
[i
+ 1] == 'U'
1436 && before
[i
+ 2] == '0' && before
[i
+ 3] == '0')
1438 /* Convert \U00NNNNNN to \UNNNNNN */
1441 after
[j
+ 2] = tolower (before
[i
+ 4]);
1442 after
[j
+ 3] = tolower (before
[i
+ 5]);
1443 after
[j
+ 4] = tolower (before
[i
+ 6]);
1444 after
[j
+ 5] = tolower (before
[i
+ 7]);
1445 after
[j
+ 6] = tolower (before
[i
+ 8]);
1446 after
[j
+ 7] = tolower (before
[i
+ 9]);
1452 after
[j
] = before
[i
];
1458 after
= scm_realloc (after
, j
);
1462 scm_to_locale_stringn (SCM str
, size_t * lenp
)
1466 /* In the future, enc will hold the port's encoding. */
1469 return scm_to_stringn (str
, lenp
, enc
, iconveh_escape_sequence
);
1472 /* Low-level scheme to C string conversion function. */
1474 scm_to_stringn (SCM str
, size_t * lenp
, const char *encoding
,
1475 enum iconv_ilseq_handler handler
)
1477 static const char iso
[11] = "ISO-8859-1";
1479 size_t ilen
, len
, i
;
1481 if (!scm_is_string (str
))
1482 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
1483 ilen
= scm_i_string_length (str
);
1487 buf
= scm_malloc (1);
1495 for (i
= 0; i
< ilen
; i
++)
1496 if (scm_i_string_ref (str
, i
) == '\0')
1497 scm_misc_error (NULL
,
1498 "string contains #\\nul character: ~S",
1501 if (scm_i_is_narrow_string (str
))
1505 buf
= scm_malloc (ilen
);
1506 memcpy (buf
, scm_i_string_chars (str
), ilen
);
1512 buf
= scm_malloc (ilen
+ 1);
1513 memcpy (buf
, scm_i_string_chars (str
), ilen
);
1522 buf
= u32_conv_to_encoding (iso
,
1524 (scm_t_uint32
*) scm_i_string_wide_chars (str
),
1525 ilen
, NULL
, NULL
, &len
);
1527 scm_misc_error (NULL
, "cannot convert to output locale ~s: \"~s\"",
1528 scm_list_2 (scm_from_locale_string (iso
), str
));
1530 if (handler
== iconveh_escape_sequence
)
1531 unistring_escapes_to_guile_escapes (&buf
, &len
);
1537 buf
= scm_realloc (buf
, len
+ 1);
1541 scm_remember_upto_here_1 (str
);
1546 scm_to_locale_string (SCM str
)
1548 return scm_to_locale_stringn (str
, NULL
);
1552 scm_to_locale_stringbuf (SCM str
, char *buf
, size_t max_len
)
1555 char *result
= NULL
;
1556 if (!scm_is_string (str
))
1557 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
1558 result
= scm_to_locale_stringn (str
, &len
);
1560 memcpy (buf
, result
, (len
> max_len
) ? max_len
: len
);
1563 scm_remember_upto_here_1 (str
);
1567 /* converts C scm_array of strings to SCM scm_list of strings. */
1568 /* If argc < 0, a null terminated scm_array is assumed. */
1570 scm_makfromstrs (int argc
, char **argv
)
1575 for (i
= 0; argv
[i
]; i
++);
1577 lst
= scm_cons (scm_from_locale_string (argv
[i
]), lst
);
1581 /* Return a newly allocated array of char pointers to each of the strings
1582 in args, with a terminating NULL pointer. */
1585 scm_i_allocate_string_pointers (SCM list
)
1588 int len
= scm_ilength (list
);
1592 scm_wrong_type_arg_msg (NULL
, 0, list
, "proper list");
1594 scm_dynwind_begin (0);
1596 result
= (char **) scm_malloc ((len
+ 1) * sizeof (char *));
1598 scm_dynwind_unwind_handler (free
, result
, 0);
1600 /* The list might be have been modified in another thread, so
1601 we check LIST before each access.
1603 for (i
= 0; i
< len
&& scm_is_pair (list
); i
++)
1605 result
[i
] = scm_to_locale_string (SCM_CAR (list
));
1606 list
= SCM_CDR (list
);
1614 scm_i_free_string_pointers (char **pointers
)
1618 for (i
= 0; pointers
[i
]; i
++)
1624 scm_i_get_substring_spec (size_t len
,
1625 SCM start
, size_t *cstart
,
1626 SCM end
, size_t *cend
)
1628 if (SCM_UNBNDP (start
))
1631 *cstart
= scm_to_unsigned_integer (start
, 0, len
);
1633 if (SCM_UNBNDP (end
))
1636 *cend
= scm_to_unsigned_integer (end
, *cstart
, len
);
1639 #if SCM_ENABLE_DEPRECATED
1641 /* When these definitions are removed, it becomes reasonable to use
1642 read-only strings for string literals. For that, change the reader
1643 to create string literals with scm_c_substring_read_only instead of
1644 with scm_c_substring_copy.
1648 scm_i_deprecated_stringp (SCM str
)
1650 scm_c_issue_deprecation_warning
1651 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1653 return scm_is_string (str
);
1657 scm_i_deprecated_string_chars (SCM str
)
1661 scm_c_issue_deprecation_warning
1662 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1664 /* We don't accept shared substrings here since they are not
1667 if (IS_SH_STRING (str
))
1668 scm_misc_error (NULL
,
1669 "SCM_STRING_CHARS does not work with shared substrings.",
1672 /* We explicitly test for read-only strings to produce a better
1676 if (IS_RO_STRING (str
))
1677 scm_misc_error (NULL
,
1678 "SCM_STRING_CHARS does not work with read-only strings.",
1681 /* The following is still wrong, of course...
1683 str
= scm_i_string_start_writing (str
);
1684 chars
= scm_i_string_writable_chars (str
);
1685 scm_i_string_stop_writing ();
1690 scm_i_deprecated_string_length (SCM str
)
1692 scm_c_issue_deprecation_warning
1693 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1694 return scm_c_string_length (str
);
1702 scm_nullstr
= scm_i_make_string (0, NULL
);
1704 #include "libguile/strings.x"