1 /* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 #include "striconveh.h"
33 #include "libguile/_scm.h"
34 #include "libguile/chars.h"
35 #include "libguile/root.h"
36 #include "libguile/strings.h"
37 #include "libguile/generalized-vectors.h"
38 #include "libguile/deprecation.h"
39 #include "libguile/validate.h"
49 * XXX - keeping an accurate refcount during GC seems to be quite
50 * tricky, so we just keep score of whether a stringbuf might be
51 * shared, not whether it definitely is.
53 * The scheme I (mvo) tried to keep an accurate reference count would
54 * recount all strings that point to a stringbuf during the mark-phase
55 * of the GC. This was done since one cannot access the stringbuf of
56 * a string when that string is freed (in order to decrease the
57 * reference count). The memory of the stringbuf might have been
58 * reused already for something completely different.
60 * This recounted worked for a small number of threads beating on
61 * cow-strings, but it failed randomly with more than 10 threads, say.
62 * I couldn't figure out what went wrong, so I used the conservative
63 * approach implemented below.
65 * There are 2 storage strategies for stringbufs: 8-bit and wide. 8-bit
66 * strings are ISO-8859-1-encoded strings; wide strings are 32-bit (UCS-4)
70 /* The size in words of the stringbuf header (type tag + size). */
71 #define STRINGBUF_HEADER_SIZE 2U
73 #define STRINGBUF_HEADER_BYTES (STRINGBUF_HEADER_SIZE * sizeof (SCM))
75 #define STRINGBUF_F_SHARED SCM_I_STRINGBUF_F_SHARED
76 #define STRINGBUF_F_WIDE SCM_I_STRINGBUF_F_WIDE
78 #define STRINGBUF_TAG scm_tc7_stringbuf
79 #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
80 #define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
82 #define STRINGBUF_CHARS(buf) ((unsigned char *) \
83 SCM_CELL_OBJECT_LOC (buf, \
84 STRINGBUF_HEADER_SIZE))
85 #define STRINGBUF_LENGTH(buf) (SCM_CELL_WORD_1 (buf))
87 #define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *) STRINGBUF_CHARS (buf))
89 #define SET_STRINGBUF_SHARED(buf) \
92 /* Don't modify BUF if it's already marked as shared since it might be \
93 a read-only, statically allocated stringbuf. */ \
94 if (SCM_LIKELY (!STRINGBUF_SHARED (buf))) \
95 SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
99 #if SCM_STRING_LENGTH_HISTOGRAM
100 static size_t lenhist
[1001];
103 /* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
106 make_stringbuf (size_t len
)
108 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
109 scm_i_symbol_chars, all stringbufs are null-terminated. Once
110 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
111 has been changed for scm_i_symbol_chars, this null-termination
117 #if SCM_STRING_LENGTH_HISTOGRAM
124 buf
= PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES
+ len
+ 1,
127 SCM_SET_CELL_TYPE (buf
, STRINGBUF_TAG
);
128 SCM_SET_CELL_WORD_1 (buf
, (scm_t_bits
) len
);
130 STRINGBUF_CHARS (buf
)[len
] = 0;
135 /* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
138 make_wide_stringbuf (size_t len
)
143 #if SCM_STRING_LENGTH_HISTOGRAM
150 raw_len
= (len
+ 1) * sizeof (scm_t_wchar
);
151 buf
= PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES
+ raw_len
,
154 SCM_SET_CELL_TYPE (buf
, STRINGBUF_TAG
| STRINGBUF_F_WIDE
);
155 SCM_SET_CELL_WORD_1 (buf
, (scm_t_bits
) len
);
157 STRINGBUF_WIDE_CHARS (buf
)[len
] = 0;
162 /* Return a UCS-4-encoded stringbuf containing the (possibly Latin-1-encoded)
163 characters from BUF. */
165 wide_stringbuf (SCM buf
)
169 if (STRINGBUF_WIDE (buf
))
176 len
= STRINGBUF_LENGTH (buf
);
178 new_buf
= make_wide_stringbuf (len
);
180 mem
= STRINGBUF_WIDE_CHARS (new_buf
);
181 for (i
= 0; i
< len
; i
++)
182 mem
[i
] = (scm_t_wchar
) STRINGBUF_CHARS (buf
)[i
];
189 /* Return a Latin-1-encoded stringbuf containing the (possibly UCS-4-encoded)
190 characters from BUF, if possible. */
192 narrow_stringbuf (SCM buf
)
196 if (!STRINGBUF_WIDE (buf
))
204 len
= STRINGBUF_LENGTH (buf
);
205 wmem
= STRINGBUF_WIDE_CHARS (buf
);
207 for (i
= 0; i
< len
; i
++)
209 /* BUF cannot be narrowed. */
212 new_buf
= make_stringbuf (len
);
214 mem
= STRINGBUF_CHARS (new_buf
);
215 for (i
= 0; i
< len
; i
++)
216 mem
[i
] = (unsigned char) wmem
[i
];
223 scm_i_pthread_mutex_t stringbuf_write_mutex
= SCM_I_PTHREAD_MUTEX_INITIALIZER
;
226 /* Copy-on-write strings.
229 #define STRING_TAG scm_tc7_string
231 #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
232 #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
233 #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
235 #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
236 #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
238 #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
240 /* Read-only strings.
243 #define RO_STRING_TAG scm_tc7_ro_string
244 #define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
246 /* Mutation-sharing substrings
249 #define SH_STRING_TAG (scm_tc7_string + 0x100)
251 #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
252 /* START and LENGTH as for STRINGs. */
254 #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
256 /* Create a scheme string with space for LEN 8-bit Latin-1-encoded
257 characters. CHARSP, if not NULL, will be set to location of the
260 scm_i_make_string (size_t len
, char **charsp
)
262 SCM buf
= make_stringbuf (len
);
265 *charsp
= (char *) STRINGBUF_CHARS (buf
);
266 res
= scm_double_cell (STRING_TAG
, SCM_UNPACK(buf
),
267 (scm_t_bits
)0, (scm_t_bits
) len
);
271 /* Create a scheme string with space for LEN 32-bit UCS-4-encoded
272 characters. CHARSP, if not NULL, will be set to location of the
275 scm_i_make_wide_string (size_t len
, scm_t_wchar
**charsp
)
277 SCM buf
= make_wide_stringbuf (len
);
280 *charsp
= STRINGBUF_WIDE_CHARS (buf
);
281 res
= scm_double_cell (STRING_TAG
, SCM_UNPACK (buf
),
282 (scm_t_bits
) 0, (scm_t_bits
) len
);
287 validate_substring_args (SCM str
, size_t start
, size_t end
)
289 if (!IS_STRING (str
))
290 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
291 if (start
> STRING_LENGTH (str
))
292 scm_out_of_range (NULL
, scm_from_size_t (start
));
293 if (end
> STRING_LENGTH (str
) || end
< start
)
294 scm_out_of_range (NULL
, scm_from_size_t (end
));
298 get_str_buf_start (SCM
*str
, SCM
*buf
, size_t *start
)
300 *start
= STRING_START (*str
);
301 if (IS_SH_STRING (*str
))
303 *str
= SH_STRING_STRING (*str
);
304 *start
+= STRING_START (*str
);
306 *buf
= STRING_STRINGBUF (*str
);
310 scm_i_substring (SCM str
, size_t start
, size_t end
)
314 get_str_buf_start (&str
, &buf
, &str_start
);
315 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
316 SET_STRINGBUF_SHARED (buf
);
317 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
318 return scm_double_cell (STRING_TAG
, SCM_UNPACK(buf
),
319 (scm_t_bits
)str_start
+ start
,
320 (scm_t_bits
) end
- start
);
324 scm_i_substring_read_only (SCM str
, size_t start
, size_t end
)
328 get_str_buf_start (&str
, &buf
, &str_start
);
329 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
330 SET_STRINGBUF_SHARED (buf
);
331 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
332 return scm_double_cell (RO_STRING_TAG
, SCM_UNPACK(buf
),
333 (scm_t_bits
)str_start
+ start
,
334 (scm_t_bits
) end
- start
);
338 scm_i_substring_copy (SCM str
, size_t start
, size_t end
)
340 size_t len
= end
- start
;
343 get_str_buf_start (&str
, &buf
, &str_start
);
344 if (scm_i_is_narrow_string (str
))
346 my_buf
= make_stringbuf (len
);
347 memcpy (STRINGBUF_CHARS (my_buf
),
348 STRINGBUF_CHARS (buf
) + str_start
+ start
, len
);
352 my_buf
= make_wide_stringbuf (len
);
353 u32_cpy ((scm_t_uint32
*) STRINGBUF_WIDE_CHARS (my_buf
),
354 (scm_t_uint32
*) (STRINGBUF_WIDE_CHARS (buf
) + str_start
356 /* Even though this string is wide, the substring may be narrow.
357 Consider adding code to narrow the string. */
359 scm_remember_upto_here_1 (buf
);
360 return scm_double_cell (STRING_TAG
, SCM_UNPACK (my_buf
),
361 (scm_t_bits
) 0, (scm_t_bits
) len
);
365 scm_i_substring_shared (SCM str
, size_t start
, size_t end
)
367 if (start
== 0 && end
== STRING_LENGTH (str
))
371 size_t len
= end
- start
;
372 if (IS_SH_STRING (str
))
374 start
+= STRING_START (str
);
375 str
= SH_STRING_STRING (str
);
377 return scm_double_cell (SH_STRING_TAG
, SCM_UNPACK(str
),
378 (scm_t_bits
)start
, (scm_t_bits
) len
);
383 scm_c_substring (SCM str
, size_t start
, size_t end
)
385 validate_substring_args (str
, start
, end
);
386 return scm_i_substring (str
, start
, end
);
390 scm_c_substring_read_only (SCM str
, size_t start
, size_t end
)
392 validate_substring_args (str
, start
, end
);
393 return scm_i_substring_read_only (str
, start
, end
);
397 scm_c_substring_copy (SCM str
, size_t start
, size_t end
)
399 validate_substring_args (str
, start
, end
);
400 return scm_i_substring_copy (str
, start
, end
);
404 scm_c_substring_shared (SCM str
, size_t start
, size_t end
)
406 validate_substring_args (str
, start
, end
);
407 return scm_i_substring_shared (str
, start
, end
);
411 /* Internal accessors
414 /* Returns the number of characters in STR. This may be different
415 than the memory size of the string storage. */
417 scm_i_string_length (SCM str
)
419 return STRING_LENGTH (str
);
422 /* True if the string is 'narrow', meaning it has a 8-bit Latin-1
423 encoding. False if it is 'wide', having a 32-bit UCS-4
426 scm_i_is_narrow_string (SCM str
)
428 return !STRINGBUF_WIDE (STRING_STRINGBUF (str
));
431 /* Try to coerce a string to be narrow. It if is narrow already, do
432 nothing. If it is wide, shrink it to narrow if none of its
433 characters are above 0xFF. Return true if the string is narrow or
434 was made to be narrow. */
436 scm_i_try_narrow_string (SCM str
)
438 SET_STRING_STRINGBUF (str
, narrow_stringbuf (STRING_STRINGBUF (str
)));
440 return scm_i_is_narrow_string (str
);
443 /* Returns a pointer to the 8-bit Latin-1 encoded character array of
446 scm_i_string_chars (SCM str
)
450 get_str_buf_start (&str
, &buf
, &start
);
451 if (scm_i_is_narrow_string (str
))
452 return (const char *) STRINGBUF_CHARS (buf
) + start
;
454 scm_misc_error (NULL
, "Invalid read access of chars of wide string: ~s",
459 /* Returns a pointer to the 32-bit UCS-4 encoded character array of
462 scm_i_string_wide_chars (SCM str
)
467 get_str_buf_start (&str
, &buf
, &start
);
468 if (!scm_i_is_narrow_string (str
))
469 return (const scm_t_wchar
*) STRINGBUF_WIDE_CHARS (buf
) + start
;
471 scm_misc_error (NULL
, "Invalid read access of chars of narrow string: ~s",
475 /* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
476 a new string buffer, so that it can be modified without modifying
477 other strings. Also, lock the string mutex. Later, one must call
478 scm_i_string_stop_writing to unlock the mutex. */
480 scm_i_string_start_writing (SCM orig_str
)
482 SCM buf
, str
= orig_str
;
485 get_str_buf_start (&str
, &buf
, &start
);
486 if (IS_RO_STRING (str
))
487 scm_misc_error (NULL
, "string is read-only: ~s", scm_list_1 (orig_str
));
489 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
490 if (STRINGBUF_SHARED (buf
))
492 /* Clone the stringbuf. */
493 size_t len
= STRING_LENGTH (str
);
496 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
498 if (scm_i_is_narrow_string (str
))
500 new_buf
= make_stringbuf (len
);
501 memcpy (STRINGBUF_CHARS (new_buf
),
502 STRINGBUF_CHARS (buf
) + STRING_START (str
), len
);
507 new_buf
= make_wide_stringbuf (len
);
508 u32_cpy ((scm_t_uint32
*) STRINGBUF_WIDE_CHARS (new_buf
),
509 (scm_t_uint32
*) (STRINGBUF_WIDE_CHARS (buf
)
510 + STRING_START (str
)), len
);
513 SET_STRING_STRINGBUF (str
, new_buf
);
514 start
-= STRING_START (str
);
516 /* FIXME: The following operations are not atomic, so other threads
517 looking at STR may see an inconsistent state. Nevertheless it can't
518 hurt much since (i) accessing STR while it is being mutated can't
519 yield a crash, and (ii) concurrent accesses to STR should be
520 protected by a mutex at the application level. The latter may not
521 apply when STR != ORIG_STR, though. */
522 SET_STRING_START (str
, 0);
523 SET_STRING_STRINGBUF (str
, new_buf
);
527 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
532 /* Return a pointer to the 8-bit Latin-1 chars of a string. */
534 scm_i_string_writable_chars (SCM str
)
539 get_str_buf_start (&str
, &buf
, &start
);
540 if (scm_i_is_narrow_string (str
))
541 return (char *) STRINGBUF_CHARS (buf
) + start
;
543 scm_misc_error (NULL
, "Invalid write access of chars of wide string: ~s",
548 /* Return a pointer to the UCS-4 codepoints of a string. */
550 scm_i_string_writable_wide_chars (SCM str
)
555 get_str_buf_start (&str
, &buf
, &start
);
556 if (!scm_i_is_narrow_string (str
))
557 return STRINGBUF_WIDE_CHARS (buf
) + start
;
559 scm_misc_error (NULL
, "Invalid write access of chars of narrow string: ~s",
563 /* Unlock the string mutex that was locked when
564 scm_i_string_start_writing was called. */
566 scm_i_string_stop_writing (void)
568 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
571 /* Return the Xth character of STR as a UCS-4 codepoint. */
573 scm_i_string_ref (SCM str
, size_t x
)
575 if (scm_i_is_narrow_string (str
))
576 return (scm_t_wchar
) (unsigned char) (scm_i_string_chars (str
)[x
]);
578 return scm_i_string_wide_chars (str
)[x
];
581 /* Returns index+1 of the first char in STR that matches C, or
582 0 if the char is not found. */
584 scm_i_string_contains_char (SCM str
, char ch
)
587 size_t len
= scm_i_string_length (str
);
590 if (scm_i_is_narrow_string (str
))
594 if (scm_i_string_chars (str
)[i
] == ch
)
603 if (scm_i_string_wide_chars (str
)[i
]
604 == (unsigned char) ch
)
613 scm_i_string_strcmp (SCM sstr
, size_t start_x
, const char *cstr
)
615 if (scm_i_is_narrow_string (sstr
))
617 const char *a
= scm_i_string_chars (sstr
) + start_x
;
618 const char *b
= cstr
;
619 return strncmp (a
, b
, strlen(b
));
624 const scm_t_wchar
*a
= scm_i_string_wide_chars (sstr
) + start_x
;
625 const char *b
= cstr
;
626 for (i
= 0; i
< strlen (b
); i
++)
628 if (a
[i
] != (unsigned char) b
[i
])
635 /* Set the Pth character of STR to UCS-4 codepoint CHR. */
637 scm_i_string_set_x (SCM str
, size_t p
, scm_t_wchar chr
)
639 if (chr
> 0xFF && scm_i_is_narrow_string (str
))
640 SET_STRING_STRINGBUF (str
, wide_stringbuf (STRING_STRINGBUF (str
)));
642 if (scm_i_is_narrow_string (str
))
644 char *dst
= scm_i_string_writable_chars (str
);
649 scm_t_wchar
*dst
= scm_i_string_writable_wide_chars (str
);
657 Basic symbol creation and accessing is done here, the rest is in
658 symbols.[hc]. This has been done to keep stringbufs and the
659 internals of strings and string-like objects confined to this file.
662 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
665 scm_i_make_symbol (SCM name
, scm_t_bits flags
,
666 unsigned long hash
, SCM props
)
669 size_t start
= STRING_START (name
);
670 size_t length
= STRING_LENGTH (name
);
672 if (IS_SH_STRING (name
))
674 name
= SH_STRING_STRING (name
);
675 start
+= STRING_START (name
);
677 buf
= SYMBOL_STRINGBUF (name
);
679 if (start
== 0 && length
== STRINGBUF_LENGTH (buf
))
682 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
683 SET_STRINGBUF_SHARED (buf
);
684 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
689 if (scm_i_is_narrow_string (name
))
691 SCM new_buf
= make_stringbuf (length
);
692 memcpy (STRINGBUF_CHARS (new_buf
),
693 STRINGBUF_CHARS (buf
) + start
, length
);
698 SCM new_buf
= make_wide_stringbuf (length
);
699 u32_cpy ((scm_t_uint32
*) STRINGBUF_WIDE_CHARS (new_buf
),
700 (scm_t_uint32
*) STRINGBUF_WIDE_CHARS (buf
) + start
,
705 return scm_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
706 (scm_t_bits
) hash
, SCM_UNPACK (props
));
710 scm_i_c_make_symbol (const char *name
, size_t len
,
711 scm_t_bits flags
, unsigned long hash
, SCM props
)
713 SCM buf
= make_stringbuf (len
);
714 memcpy (STRINGBUF_CHARS (buf
), name
, len
);
716 return scm_immutable_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
717 (scm_t_bits
) hash
, SCM_UNPACK (props
));
720 /* Returns the number of characters in SYM. This may be different
721 from the memory size of SYM. */
723 scm_i_symbol_length (SCM sym
)
725 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym
));
729 scm_c_symbol_length (SCM sym
)
730 #define FUNC_NAME "scm_c_symbol_length"
732 SCM_VALIDATE_SYMBOL (1, sym
);
734 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym
));
738 /* True if the name of SYM is stored as a Latin-1 encoded string.
739 False if it is stored as a 32-bit UCS-4-encoded string. */
741 scm_i_is_narrow_symbol (SCM sym
)
745 buf
= SYMBOL_STRINGBUF (sym
);
746 return !STRINGBUF_WIDE (buf
);
749 /* Returns a pointer to the 8-bit Latin-1 encoded character array that
750 contains the name of SYM. */
752 scm_i_symbol_chars (SCM sym
)
756 buf
= SYMBOL_STRINGBUF (sym
);
757 if (!STRINGBUF_WIDE (buf
))
758 return (const char *) STRINGBUF_CHARS (buf
);
760 scm_misc_error (NULL
, "Invalid access of chars of a wide symbol ~S",
764 /* Return a pointer to the 32-bit UCS-4-encoded character array of a
767 scm_i_symbol_wide_chars (SCM sym
)
771 buf
= SYMBOL_STRINGBUF (sym
);
772 if (STRINGBUF_WIDE (buf
))
773 return (const scm_t_wchar
*) STRINGBUF_WIDE_CHARS (buf
);
775 scm_misc_error (NULL
, "Invalid access of chars of a narrow symbol ~S",
780 scm_i_symbol_substring (SCM sym
, size_t start
, size_t end
)
782 SCM buf
= SYMBOL_STRINGBUF (sym
);
783 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
784 SET_STRINGBUF_SHARED (buf
);
785 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
786 return scm_double_cell (RO_STRING_TAG
, SCM_UNPACK (buf
),
787 (scm_t_bits
)start
, (scm_t_bits
) end
- start
);
790 /* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */
792 scm_i_symbol_ref (SCM sym
, size_t x
)
794 if (scm_i_is_narrow_symbol (sym
))
795 return (scm_t_wchar
) (unsigned char) (scm_i_symbol_chars (sym
)[x
]);
797 return scm_i_symbol_wide_chars (sym
)[x
];
803 SCM_DEFINE (scm_sys_string_dump
, "%string-dump", 1, 0, 0, (SCM str
),
804 "Returns an association list containing debugging information\n"
805 "for @var{str}. The association list has the following entries."
808 "The string itself.\n"
810 "The start index of the string into its stringbuf\n"
812 "The length of the string\n"
814 "If this string is a substring, it returns its parent string.\n"
815 "Otherwise, it returns @code{#f}\n"
817 "@code{#t} if the string is read-only\n"
818 "@item stringbuf-chars\n"
819 "A new string containing this string's stringbuf's characters\n"
820 "@item stringbuf-length\n"
821 "The number of characters in this stringbuf\n"
822 "@item stringbuf-shared\n"
823 "@code{#t} if this stringbuf is shared\n"
824 "@item stringbuf-wide\n"
825 "@code{#t} if this stringbuf's characters are stored in a\n"
826 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
829 #define FUNC_NAME s_scm_sys_string_dump
831 SCM e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
;
833 SCM_VALIDATE_STRING (1, str
);
836 e1
= scm_cons (scm_from_locale_symbol ("string"),
838 e2
= scm_cons (scm_from_locale_symbol ("start"),
839 scm_from_size_t (STRING_START (str
)));
840 e3
= scm_cons (scm_from_locale_symbol ("length"),
841 scm_from_size_t (STRING_LENGTH (str
)));
843 if (IS_SH_STRING (str
))
845 e4
= scm_cons (scm_from_locale_symbol ("shared"),
846 SH_STRING_STRING (str
));
847 buf
= STRING_STRINGBUF (SH_STRING_STRING (str
));
851 e4
= scm_cons (scm_from_locale_symbol ("shared"),
853 buf
= STRING_STRINGBUF (str
);
856 if (IS_RO_STRING (str
))
857 e5
= scm_cons (scm_from_locale_symbol ("read-only"),
860 e5
= scm_cons (scm_from_locale_symbol ("read-only"),
864 if (!STRINGBUF_WIDE (buf
))
866 size_t len
= STRINGBUF_LENGTH (buf
);
868 SCM sbc
= scm_i_make_string (len
, &cbuf
);
869 memcpy (cbuf
, STRINGBUF_CHARS (buf
), len
);
870 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
875 size_t len
= STRINGBUF_LENGTH (buf
);
877 SCM sbc
= scm_i_make_wide_string (len
, &cbuf
);
878 u32_cpy ((scm_t_uint32
*) cbuf
,
879 (scm_t_uint32
*) STRINGBUF_WIDE_CHARS (buf
), len
);
880 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
883 e7
= scm_cons (scm_from_locale_symbol ("stringbuf-length"),
884 scm_from_size_t (STRINGBUF_LENGTH (buf
)));
885 if (STRINGBUF_SHARED (buf
))
886 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
889 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
891 if (STRINGBUF_WIDE (buf
))
892 e9
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
895 e9
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
898 return scm_list_n (e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, SCM_UNDEFINED
);
902 SCM_DEFINE (scm_sys_symbol_dump
, "%symbol-dump", 1, 0, 0, (SCM sym
),
903 "Returns an association list containing debugging information\n"
904 "for @var{sym}. The association list has the following entries."
907 "The symbol itself\n"
911 "@code{#t} if it is an interned symbol\n"
912 "@item stringbuf-chars\n"
913 "A new string containing this symbols's stringbuf's characters\n"
914 "@item stringbuf-length\n"
915 "The number of characters in this stringbuf\n"
916 "@item stringbuf-shared\n"
917 "@code{#t} if this stringbuf is shared\n"
918 "@item stringbuf-wide\n"
919 "@code{#t} if this stringbuf's characters are stored in a\n"
920 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
923 #define FUNC_NAME s_scm_sys_symbol_dump
925 SCM e1
, e2
, e3
, e4
, e5
, e6
, e7
;
927 SCM_VALIDATE_SYMBOL (1, sym
);
928 e1
= scm_cons (scm_from_locale_symbol ("symbol"),
930 e2
= scm_cons (scm_from_locale_symbol ("hash"),
931 scm_from_ulong (scm_i_symbol_hash (sym
)));
932 e3
= scm_cons (scm_from_locale_symbol ("interned"),
933 scm_symbol_interned_p (sym
));
934 buf
= SYMBOL_STRINGBUF (sym
);
937 if (!STRINGBUF_WIDE (buf
))
939 size_t len
= STRINGBUF_LENGTH (buf
);
941 SCM sbc
= scm_i_make_string (len
, &cbuf
);
942 memcpy (cbuf
, STRINGBUF_CHARS (buf
), len
);
943 e4
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
948 size_t len
= STRINGBUF_LENGTH (buf
);
950 SCM sbc
= scm_i_make_wide_string (len
, &cbuf
);
951 u32_cpy ((scm_t_uint32
*) cbuf
,
952 (scm_t_uint32
*) STRINGBUF_WIDE_CHARS (buf
), len
);
953 e4
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
956 e5
= scm_cons (scm_from_locale_symbol ("stringbuf-length"),
957 scm_from_size_t (STRINGBUF_LENGTH (buf
)));
958 if (STRINGBUF_SHARED (buf
))
959 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
962 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
964 if (STRINGBUF_WIDE (buf
))
965 e7
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
968 e7
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
970 return scm_list_n (e1
, e2
, e3
, e4
, e5
, e6
, e7
, SCM_UNDEFINED
);
975 #if SCM_STRING_LENGTH_HISTOGRAM
977 SCM_DEFINE (scm_sys_stringbuf_hist
, "%stringbuf-hist", 0, 0, 0, (void), "")
978 #define FUNC_NAME s_scm_sys_stringbuf_hist
981 for (i
= 0; i
< 1000; i
++)
983 fprintf (stderr
, " %3d: %u\n", i
, lenhist
[i
]);
984 fprintf (stderr
, ">999: %u\n", lenhist
[1000]);
985 return SCM_UNSPECIFIED
;
993 SCM_DEFINE (scm_string_p
, "string?", 1, 0, 0,
995 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
996 #define FUNC_NAME s_scm_string_p
998 return scm_from_bool (IS_STRING (obj
));
1003 SCM_REGISTER_PROC (s_scm_list_to_string
, "list->string", 1, 0, 0, scm_string
);
1005 SCM_DEFINE (scm_string
, "string", 0, 0, 1,
1007 "@deffnx {Scheme Procedure} list->string chrs\n"
1008 "Return a newly allocated string composed of the arguments,\n"
1010 #define FUNC_NAME s_scm_string
1012 SCM result
= SCM_BOOL_F
;
1019 /* Verify that this is a list of chars. */
1020 i
= scm_ilength (chrs
);
1021 SCM_ASSERT (i
>= 0, chrs
, SCM_ARG1
, FUNC_NAME
);
1026 while (len
> 0 && scm_is_pair (rest
))
1028 SCM elt
= SCM_CAR (rest
);
1029 SCM_VALIDATE_CHAR (SCM_ARGn
, elt
);
1030 if (SCM_CHAR (elt
) > 0xFF)
1032 rest
= SCM_CDR (rest
);
1034 scm_remember_upto_here_1 (elt
);
1037 /* Construct a string containing this list of chars. */
1043 result
= scm_i_make_string (len
, NULL
);
1044 result
= scm_i_string_start_writing (result
);
1045 char *buf
= scm_i_string_writable_chars (result
);
1046 while (len
> 0 && scm_is_pair (rest
))
1048 SCM elt
= SCM_CAR (rest
);
1049 buf
[p
] = (unsigned char) SCM_CHAR (elt
);
1051 rest
= SCM_CDR (rest
);
1053 scm_remember_upto_here_1 (elt
);
1058 result
= scm_i_make_wide_string (len
, NULL
);
1059 result
= scm_i_string_start_writing (result
);
1060 scm_t_wchar
*buf
= scm_i_string_writable_wide_chars (result
);
1061 while (len
> 0 && scm_is_pair (rest
))
1063 SCM elt
= SCM_CAR (rest
);
1064 buf
[p
] = SCM_CHAR (elt
);
1066 rest
= SCM_CDR (rest
);
1068 scm_remember_upto_here_1 (elt
);
1071 scm_i_string_stop_writing ();
1074 scm_misc_error (NULL
, "list changed while constructing string", SCM_EOL
);
1075 if (!scm_is_null (rest
))
1076 scm_wrong_type_arg_msg (NULL
, 0, chrs
, "proper list");
1082 SCM_DEFINE (scm_make_string
, "make-string", 1, 1, 0,
1084 "Return a newly allocated string of\n"
1085 "length @var{k}. If @var{chr} is given, then all elements of\n"
1086 "the string are initialized to @var{chr}, otherwise the contents\n"
1087 "of the @var{string} are unspecified.")
1088 #define FUNC_NAME s_scm_make_string
1090 return scm_c_make_string (scm_to_size_t (k
), chr
);
1095 scm_c_make_string (size_t len
, SCM chr
)
1096 #define FUNC_NAME NULL
1099 SCM res
= scm_i_make_string (len
, NULL
);
1101 if (!SCM_UNBNDP (chr
))
1103 SCM_VALIDATE_CHAR (0, chr
);
1104 res
= scm_i_string_start_writing (res
);
1105 for (p
= 0; p
< len
; p
++)
1106 scm_i_string_set_x (res
, p
, SCM_CHAR (chr
));
1107 scm_i_string_stop_writing ();
1114 SCM_DEFINE (scm_string_length
, "string-length", 1, 0, 0,
1116 "Return the number of characters in @var{string}.")
1117 #define FUNC_NAME s_scm_string_length
1119 SCM_VALIDATE_STRING (1, string
);
1120 return scm_from_size_t (STRING_LENGTH (string
));
1124 SCM_DEFINE (scm_string_bytes_per_char
, "string-bytes-per-char", 1, 0, 0,
1126 "Return the bytes used to represent a character in @var{string}."
1127 "This will return 1 or 4.")
1128 #define FUNC_NAME s_scm_string_bytes_per_char
1130 SCM_VALIDATE_STRING (1, string
);
1131 if (!scm_i_is_narrow_string (string
))
1132 return scm_from_int (4);
1134 return scm_from_int (1);
1139 scm_c_string_length (SCM string
)
1141 if (!IS_STRING (string
))
1142 scm_wrong_type_arg_msg (NULL
, 0, string
, "string");
1143 return STRING_LENGTH (string
);
1146 SCM_DEFINE (scm_string_ref
, "string-ref", 2, 0, 0,
1148 "Return character @var{k} of @var{str} using zero-origin\n"
1149 "indexing. @var{k} must be a valid index of @var{str}.")
1150 #define FUNC_NAME s_scm_string_ref
1155 SCM_VALIDATE_STRING (1, str
);
1157 len
= scm_i_string_length (str
);
1158 if (SCM_LIKELY (len
> 0))
1159 idx
= scm_to_unsigned_integer (k
, 0, len
- 1);
1161 scm_out_of_range (NULL
, k
);
1163 if (scm_i_is_narrow_string (str
))
1164 return SCM_MAKE_CHAR (scm_i_string_chars (str
)[idx
]);
1166 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str
)[idx
]);
1171 scm_c_string_ref (SCM str
, size_t p
)
1173 if (p
>= scm_i_string_length (str
))
1174 scm_out_of_range (NULL
, scm_from_size_t (p
));
1175 if (scm_i_is_narrow_string (str
))
1176 return SCM_MAKE_CHAR (scm_i_string_chars (str
)[p
]);
1178 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str
)[p
]);
1182 SCM_DEFINE (scm_string_set_x
, "string-set!", 3, 0, 0,
1183 (SCM str
, SCM k
, SCM chr
),
1184 "Store @var{chr} in element @var{k} of @var{str} and return\n"
1185 "an unspecified value. @var{k} must be a valid index of\n"
1187 #define FUNC_NAME s_scm_string_set_x
1192 SCM_VALIDATE_STRING (1, str
);
1194 len
= scm_i_string_length (str
);
1195 if (SCM_LIKELY (len
> 0))
1196 idx
= scm_to_unsigned_integer (k
, 0, len
- 1);
1198 scm_out_of_range (NULL
, k
);
1200 SCM_VALIDATE_CHAR (3, chr
);
1201 str
= scm_i_string_start_writing (str
);
1202 scm_i_string_set_x (str
, idx
, SCM_CHAR (chr
));
1203 scm_i_string_stop_writing ();
1205 return SCM_UNSPECIFIED
;
1210 scm_c_string_set_x (SCM str
, size_t p
, SCM chr
)
1212 if (p
>= scm_i_string_length (str
))
1213 scm_out_of_range (NULL
, scm_from_size_t (p
));
1214 str
= scm_i_string_start_writing (str
);
1215 scm_i_string_set_x (str
, p
, SCM_CHAR (chr
));
1216 scm_i_string_stop_writing ();
1219 SCM_DEFINE (scm_substring
, "substring", 2, 1, 0,
1220 (SCM str
, SCM start
, SCM end
),
1221 "Return a newly allocated string formed from the characters\n"
1222 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1223 "ending with index @var{end} (exclusive).\n"
1224 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1225 "exact integers satisfying:\n\n"
1226 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1227 #define FUNC_NAME s_scm_substring
1229 size_t len
, from
, to
;
1231 SCM_VALIDATE_STRING (1, str
);
1232 len
= scm_i_string_length (str
);
1233 from
= scm_to_unsigned_integer (start
, 0, len
);
1234 if (SCM_UNBNDP (end
))
1237 to
= scm_to_unsigned_integer (end
, from
, len
);
1238 return scm_i_substring (str
, from
, to
);
1242 SCM_DEFINE (scm_substring_read_only
, "substring/read-only", 2, 1, 0,
1243 (SCM str
, SCM start
, SCM end
),
1244 "Return a newly allocated string formed from the characters\n"
1245 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1246 "ending with index @var{end} (exclusive).\n"
1247 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1248 "exact integers satisfying:\n"
1250 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
1252 "The returned string is read-only.\n")
1253 #define FUNC_NAME s_scm_substring_read_only
1255 size_t len
, from
, to
;
1257 SCM_VALIDATE_STRING (1, str
);
1258 len
= scm_i_string_length (str
);
1259 from
= scm_to_unsigned_integer (start
, 0, len
);
1260 if (SCM_UNBNDP (end
))
1263 to
= scm_to_unsigned_integer (end
, from
, len
);
1264 return scm_i_substring_read_only (str
, from
, to
);
1268 SCM_DEFINE (scm_substring_copy
, "substring/copy", 2, 1, 0,
1269 (SCM str
, SCM start
, SCM end
),
1270 "Return a newly allocated string formed from the characters\n"
1271 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1272 "ending with index @var{end} (exclusive).\n"
1273 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1274 "exact integers satisfying:\n\n"
1275 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1276 #define FUNC_NAME s_scm_substring_copy
1278 /* For the Scheme version, START is mandatory, but for the C
1279 version, it is optional. See scm_string_copy in srfi-13.c for a
1285 SCM_VALIDATE_STRING (1, str
);
1286 scm_i_get_substring_spec (scm_i_string_length (str
),
1287 start
, &from
, end
, &to
);
1288 return scm_i_substring_copy (str
, from
, to
);
1292 SCM_DEFINE (scm_substring_shared
, "substring/shared", 2, 1, 0,
1293 (SCM str
, SCM start
, SCM end
),
1294 "Return string that indirectly refers to the characters\n"
1295 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1296 "ending with index @var{end} (exclusive).\n"
1297 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1298 "exact integers satisfying:\n\n"
1299 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1300 #define FUNC_NAME s_scm_substring_shared
1302 size_t len
, from
, to
;
1304 SCM_VALIDATE_STRING (1, str
);
1305 len
= scm_i_string_length (str
);
1306 from
= scm_to_unsigned_integer (start
, 0, len
);
1307 if (SCM_UNBNDP (end
))
1310 to
= scm_to_unsigned_integer (end
, from
, len
);
1311 return scm_i_substring_shared (str
, from
, to
);
1315 SCM_DEFINE (scm_string_append
, "string-append", 0, 0, 1,
1317 "Return a newly allocated string whose characters form the\n"
1318 "concatenation of the given strings, @var{args}.")
1319 #define FUNC_NAME s_scm_string_append
1332 SCM_VALIDATE_REST_ARGUMENT (args
);
1333 for (l
= args
; !scm_is_null (l
); l
= SCM_CDR (l
))
1336 SCM_VALIDATE_STRING (SCM_ARGn
, s
);
1337 len
+= scm_i_string_length (s
);
1338 if (!scm_i_is_narrow_string (s
))
1343 res
= scm_i_make_string (len
, &data
.narrow
);
1345 res
= scm_i_make_wide_string (len
, &data
.wide
);
1347 for (l
= args
; !scm_is_null (l
); l
= SCM_CDR (l
))
1351 SCM_VALIDATE_STRING (SCM_ARGn
, s
);
1352 len
= scm_i_string_length (s
);
1355 memcpy (data
.narrow
, scm_i_string_chars (s
), len
);
1360 if (scm_i_is_narrow_string (s
))
1362 for (i
= 0; i
< scm_i_string_length (s
); i
++)
1363 data
.wide
[i
] = (unsigned char) scm_i_string_chars (s
)[i
];
1366 u32_cpy ((scm_t_uint32
*) data
.wide
,
1367 (scm_t_uint32
*) scm_i_string_wide_chars (s
), len
);
1370 scm_remember_upto_here_1 (s
);
1377 scm_is_string (SCM obj
)
1379 return IS_STRING (obj
);
1383 scm_from_stringn (const char *str
, size_t len
, const char *encoding
,
1384 scm_t_string_failed_conversion_handler handler
)
1394 if (encoding
== NULL
)
1396 /* If encoding is null, use Latin-1. */
1398 res
= scm_i_make_string (len
, &buf
);
1399 memcpy (buf
, str
, len
);
1404 u32
= (scm_t_wchar
*) u32_conv_from_encoding (encoding
,
1405 (enum iconv_ilseq_handler
)
1413 if (errno
== ENOMEM
)
1414 scm_memory_error ("locale string conversion");
1417 /* There are invalid sequences in the input string. */
1420 errstr
= scm_i_make_string (len
, &dst
);
1421 memcpy (dst
, str
, len
);
1422 scm_misc_error (NULL
, "input locale conversion error from ~s: ~s",
1423 scm_list_2 (scm_from_locale_string (encoding
),
1425 scm_remember_upto_here_1 (errstr
);
1431 if (u32
[i
++] > 0xFF)
1440 res
= scm_i_make_string (u32len
, &dst
);
1441 for (i
= 0; i
< u32len
; i
++)
1442 dst
[i
] = (unsigned char) u32
[i
];
1448 res
= scm_i_make_wide_string (u32len
, &wdst
);
1449 u32_cpy ((scm_t_uint32
*) wdst
, (scm_t_uint32
*) u32
, u32len
);
1458 scm_from_locale_stringn (const char *str
, size_t len
)
1461 scm_t_string_failed_conversion_handler hndl
;
1465 if (len
== (size_t) -1)
1470 inport
= scm_current_input_port ();
1471 if (!SCM_UNBNDP (inport
) && SCM_OPINPORTP (inport
))
1473 pt
= SCM_PTAB_ENTRY (inport
);
1475 hndl
= pt
->ilseq_handler
;
1480 hndl
= SCM_FAILED_CONVERSION_ERROR
;
1483 return scm_from_stringn (str
, len
, enc
, hndl
);
1487 scm_from_locale_string (const char *str
)
1492 return scm_from_locale_stringn (str
, -1);
1496 scm_i_from_utf8_string (const scm_t_uint8
*str
)
1498 return scm_from_stringn ((const char *) str
,
1499 strlen ((char *) str
), "UTF-8",
1500 SCM_FAILED_CONVERSION_ERROR
);
1503 /* Create a new scheme string from the C string STR. The memory of
1504 STR may be used directly as storage for the new string. */
1505 /* FIXME: GC-wise, the only way to use the memory area pointed to by STR
1506 would be to register a finalizer to eventually free(3) STR, which isn't
1507 worth it. Should we just deprecate the `scm_take_' functions? */
1509 scm_take_locale_stringn (char *str
, size_t len
)
1513 res
= scm_from_locale_stringn (str
, len
);
1520 scm_take_locale_string (char *str
)
1522 return scm_take_locale_stringn (str
, -1);
1525 /* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
1528 unistring_escapes_to_guile_escapes (char **bufp
, size_t *lenp
)
1530 char *before
, *after
;
1539 if ((i
<= *lenp
- 6)
1540 && before
[i
] == '\\'
1541 && before
[i
+ 1] == 'u'
1542 && before
[i
+ 2] == '0' && before
[i
+ 3] == '0')
1544 /* Convert \u00NN to \xNN */
1547 after
[j
+ 2] = tolower ((int) before
[i
+ 4]);
1548 after
[j
+ 3] = tolower ((int) before
[i
+ 5]);
1552 else if ((i
<= *lenp
- 10)
1553 && before
[i
] == '\\'
1554 && before
[i
+ 1] == 'U'
1555 && before
[i
+ 2] == '0' && before
[i
+ 3] == '0')
1557 /* Convert \U00NNNNNN to \UNNNNNN */
1560 after
[j
+ 2] = tolower ((int) before
[i
+ 4]);
1561 after
[j
+ 3] = tolower ((int) before
[i
+ 5]);
1562 after
[j
+ 4] = tolower ((int) before
[i
+ 6]);
1563 after
[j
+ 5] = tolower ((int) before
[i
+ 7]);
1564 after
[j
+ 6] = tolower ((int) before
[i
+ 8]);
1565 after
[j
+ 7] = tolower ((int) before
[i
+ 9]);
1571 after
[j
] = before
[i
];
1577 after
= scm_realloc (after
, j
);
1581 scm_to_locale_stringn (SCM str
, size_t *lenp
)
1587 outport
= scm_current_output_port ();
1588 if (!SCM_UNBNDP (outport
) && SCM_OPOUTPORTP (outport
))
1590 pt
= SCM_PTAB_ENTRY (outport
);
1596 return scm_to_stringn (str
, lenp
,
1598 scm_i_get_conversion_strategy (SCM_BOOL_F
));
1601 /* Low-level scheme to C string conversion function. */
1603 scm_to_stringn (SCM str
, size_t *lenp
, const char *encoding
,
1604 scm_t_string_failed_conversion_handler handler
)
1607 size_t ilen
, len
, i
;
1611 if (!scm_is_string (str
))
1612 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
1613 ilen
= scm_i_string_length (str
);
1617 buf
= scm_malloc (1);
1625 for (i
= 0; i
< ilen
; i
++)
1626 if (scm_i_string_ref (str
, i
) == '\0')
1627 scm_misc_error (NULL
,
1628 "string contains #\\nul character: ~S",
1631 if (scm_i_is_narrow_string (str
) && (encoding
== NULL
))
1633 /* If using native Latin-1 encoding, just copy the string
1637 buf
= scm_malloc (ilen
);
1638 memcpy (buf
, scm_i_string_chars (str
), ilen
);
1644 buf
= scm_malloc (ilen
+ 1);
1645 memcpy (buf
, scm_i_string_chars (str
), ilen
);
1657 if (scm_i_is_narrow_string (str
))
1659 ret
= mem_iconveh (scm_i_string_chars (str
), ilen
,
1661 (enum iconv_ilseq_handler
) handler
, NULL
,
1664 if (ret
== 0 && handler
== SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE
)
1665 unistring_escapes_to_guile_escapes (&buf
, &len
);
1669 scm_misc_error (NULL
, "cannot convert to output locale ~s: \"~s\"",
1670 scm_list_2 (scm_from_locale_string (enc
),
1676 buf
= u32_conv_to_encoding (enc
,
1677 (enum iconv_ilseq_handler
) handler
,
1678 (scm_t_uint32
*) scm_i_string_wide_chars (str
),
1684 scm_misc_error (NULL
, "cannot convert to output locale ~s: \"~s\"",
1685 scm_list_2 (scm_from_locale_string (enc
),
1688 if (handler
== SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE
)
1689 unistring_escapes_to_guile_escapes (&buf
, &len
);
1695 buf
= scm_realloc (buf
, len
+ 1);
1699 scm_remember_upto_here_1 (str
);
1704 scm_to_locale_string (SCM str
)
1706 return scm_to_locale_stringn (str
, NULL
);
1710 scm_i_to_utf8_string (SCM str
)
1713 u8str
= scm_to_stringn (str
, NULL
, "UTF-8", SCM_FAILED_CONVERSION_ERROR
);
1714 return (scm_t_uint8
*) u8str
;
1718 scm_to_locale_stringbuf (SCM str
, char *buf
, size_t max_len
)
1721 char *result
= NULL
;
1722 if (!scm_is_string (str
))
1723 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
1724 result
= scm_to_locale_stringn (str
, &len
);
1726 memcpy (buf
, result
, (len
> max_len
) ? max_len
: len
);
1729 scm_remember_upto_here_1 (str
);
1733 /* converts C scm_array of strings to SCM scm_list of strings. */
1734 /* If argc < 0, a null terminated scm_array is assumed. */
1736 scm_makfromstrs (int argc
, char **argv
)
1741 for (i
= 0; argv
[i
]; i
++);
1743 lst
= scm_cons (scm_from_locale_string (argv
[i
]), lst
);
1747 /* Return a newly allocated array of char pointers to each of the strings
1748 in args, with a terminating NULL pointer. */
1751 scm_i_allocate_string_pointers (SCM list
)
1752 #define FUNC_NAME "scm_i_allocate_string_pointers"
1755 int len
= scm_ilength (list
);
1759 scm_wrong_type_arg_msg (NULL
, 0, list
, "proper list");
1761 result
= scm_gc_malloc ((len
+ 1) * sizeof (char *),
1765 /* The list might be have been modified in another thread, so
1766 we check LIST before each access.
1768 for (i
= 0; i
< len
&& scm_is_pair (list
); i
++)
1773 str
= SCM_CAR (list
);
1774 len
= scm_c_string_length (str
);
1776 result
[i
] = scm_gc_malloc_pointerless (len
+ 1, "string pointers");
1777 memcpy (result
[i
], scm_i_string_chars (str
), len
);
1778 result
[i
][len
] = '\0';
1780 list
= SCM_CDR (list
);
1788 scm_i_get_substring_spec (size_t len
,
1789 SCM start
, size_t *cstart
,
1790 SCM end
, size_t *cend
)
1792 if (SCM_UNBNDP (start
))
1795 *cstart
= scm_to_unsigned_integer (start
, 0, len
);
1797 if (SCM_UNBNDP (end
))
1800 *cend
= scm_to_unsigned_integer (end
, *cstart
, len
);
1803 #if SCM_ENABLE_DEPRECATED
1805 /* When these definitions are removed, it becomes reasonable to use
1806 read-only strings for string literals. For that, change the reader
1807 to create string literals with scm_c_substring_read_only instead of
1808 with scm_c_substring_copy.
1812 scm_i_deprecated_stringp (SCM str
)
1814 scm_c_issue_deprecation_warning
1815 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1817 return scm_is_string (str
);
1821 scm_i_deprecated_string_chars (SCM str
)
1825 scm_c_issue_deprecation_warning
1826 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1828 /* We don't accept shared substrings here since they are not
1831 if (IS_SH_STRING (str
))
1832 scm_misc_error (NULL
,
1833 "SCM_STRING_CHARS does not work with shared substrings.",
1836 /* We explicitly test for read-only strings to produce a better
1840 if (IS_RO_STRING (str
))
1841 scm_misc_error (NULL
,
1842 "SCM_STRING_CHARS does not work with read-only strings.",
1845 /* The following is still wrong, of course...
1847 str
= scm_i_string_start_writing (str
);
1848 chars
= scm_i_string_writable_chars (str
);
1849 scm_i_string_stop_writing ();
1854 scm_i_deprecated_string_length (SCM str
)
1856 scm_c_issue_deprecation_warning
1857 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1858 return scm_c_string_length (str
);
1864 string_handle_ref (scm_t_array_handle
*h
, size_t index
)
1866 return scm_c_string_ref (h
->array
, index
);
1870 string_handle_set (scm_t_array_handle
*h
, size_t index
, SCM val
)
1872 scm_c_string_set_x (h
->array
, index
, val
);
1876 string_get_handle (SCM v
, scm_t_array_handle
*h
)
1882 h
->dim0
.ubnd
= scm_c_string_length (v
) - 1;
1884 h
->element_type
= SCM_ARRAY_ELEMENT_TYPE_CHAR
;
1885 h
->elements
= h
->writable_elements
= NULL
;
1888 SCM_ARRAY_IMPLEMENTATION (scm_tc7_string
, 0x7f & ~2,
1889 string_handle_ref
, string_handle_set
,
1891 SCM_VECTOR_IMPLEMENTATION (SCM_ARRAY_ELEMENT_TYPE_CHAR
, scm_make_string
);
1896 scm_nullstr
= scm_i_make_string (0, NULL
);
1898 #include "libguile/strings.x"