Fix leaky handling of `scm_take_locale_{symbol,string} ()'.
[bpt/guile.git] / libguile / strings.c
CommitLineData
2a776823 1/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd 20\f
dbb605f5
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
0f2d19dd 24
faf2c9d7 25#include <string.h>
3ee86942 26#include <stdio.h>
9c44cd45
MG
27#include <ctype.h>
28#include <unistr.h>
eca29b02 29#include <uniconv.h>
faf2c9d7 30
889975e5
MG
31#include "striconveh.h"
32
a0599745
MD
33#include "libguile/_scm.h"
34#include "libguile/chars.h"
7c33806a 35#include "libguile/root.h"
a0599745 36#include "libguile/strings.h"
f45eccff 37#include "libguile/generalized-vectors.h"
1afff620 38#include "libguile/deprecation.h"
a0599745 39#include "libguile/validate.h"
1afff620 40
0f2d19dd
JB
41\f
42
43/* {Strings}
44 */
45
3ee86942
MV
46
47/* Stringbufs
48 *
49 * XXX - keeping an accurate refcount during GC seems to be quite
50 * tricky, so we just keep score of whether a stringbuf might be
50b1996f 51 * shared, not whether it definitely is.
3ee86942
MV
52 *
53 * The scheme I (mvo) tried to keep an accurate reference count would
54 * recount all strings that point to a stringbuf during the mark-phase
55 * of the GC. This was done since one cannot access the stringbuf of
56 * a string when that string is freed (in order to decrease the
57 * reference count). The memory of the stringbuf might have been
58 * reused already for something completely different.
59 *
60 * This recounted worked for a small number of threads beating on
61 * cow-strings, but it failed randomly with more than 10 threads, say.
62 * I couldn't figure out what went wrong, so I used the conservative
63 * approach implemented below.
64 *
65 * A stringbuf needs to know its length, but only so that it can be
66 * reported when the stringbuf is freed.
67 *
50b1996f
MG
68 * There are 3 storage strategies for stringbufs: inline, outline, and
69 * wide.
70 *
71 * Inline strings are small 8-bit strings stored within the double
72 * cell itself. Outline strings are larger 8-bit strings with GC
73 * allocated storage. Wide strings are 32-bit strings with allocated
74 * storage.
75 *
76 * There was little value in making wide string inlineable, since
77 * there is only room for three inlined 32-bit characters. Thus wide
78 * stringbufs are never inlined.
3ee86942
MV
79 */
80
81#define STRINGBUF_F_SHARED 0x100
82#define STRINGBUF_F_INLINE 0x200
50b1996f
MG
83#define STRINGBUF_F_WIDE 0x400 /* If true, strings have UCS-4
84 encoding. Otherwise, strings
85 are Latin-1. */
3ee86942
MV
86
87#define STRINGBUF_TAG scm_tc7_stringbuf
88#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
89#define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
9c44cd45 90#define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
3ee86942 91
f59cf998 92#define STRINGBUF_OUTLINE_CHARS(buf) ((unsigned char *) SCM_CELL_WORD_1(buf))
3ee86942 93#define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
f59cf998 94#define STRINGBUF_INLINE_CHARS(buf) ((unsigned char *) SCM_CELL_OBJECT_LOC(buf,1))
3ee86942
MV
95#define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
96
97#define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
98 ? STRINGBUF_INLINE_CHARS (buf) \
99 : STRINGBUF_OUTLINE_CHARS (buf))
50b1996f 100
f59cf998 101#define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *) SCM_CELL_WORD_1(buf))
3ee86942
MV
102#define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
103 ? STRINGBUF_INLINE_LENGTH (buf) \
104 : STRINGBUF_OUTLINE_LENGTH (buf))
105
106#define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
107
108#define SET_STRINGBUF_SHARED(buf) \
109 (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
110
6ce6923b 111#if SCM_STRING_LENGTH_HISTOGRAM
3ee86942
MV
112static size_t lenhist[1001];
113#endif
114
50b1996f
MG
115/* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
116 characters. */
3ee86942
MV
117static SCM
118make_stringbuf (size_t len)
0f2d19dd 119{
3ee86942
MV
120 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
121 scm_i_symbol_chars, all stringbufs are null-terminated. Once
122 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
123 has been changed for scm_i_symbol_chars, this null-termination
124 can be dropped.
125 */
126
6ce6923b 127#if SCM_STRING_LENGTH_HISTOGRAM
3ee86942
MV
128 if (len < 1000)
129 lenhist[len]++;
130 else
131 lenhist[1000]++;
132#endif
0f2d19dd 133
3ee86942
MV
134 if (len <= STRINGBUF_MAX_INLINE_LEN-1)
135 {
136 return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_INLINE | (len << 16),
137 0, 0, 0);
138 }
139 else
140 {
c5018a2b 141 char *mem = scm_gc_malloc_pointerless (len + 1, "string");
3ee86942
MV
142 mem[len] = '\0';
143 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) mem,
144 (scm_t_bits) len, (scm_t_bits) 0);
145 }
146}
e53cc817 147
50b1996f
MG
148/* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
149 characters. */
9c44cd45
MG
150static SCM
151make_wide_stringbuf (size_t len)
152{
153 scm_t_wchar *mem;
6ce6923b 154#if SCM_STRING_LENGTH_HISTOGRAM
9c44cd45
MG
155 if (len < 1000)
156 lenhist[len]++;
157 else
158 lenhist[1000]++;
159#endif
160
161 mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
162 mem[len] = 0;
163 return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_WIDE, (scm_t_bits) mem,
164 (scm_t_bits) len, (scm_t_bits) 0);
165}
166
50b1996f
MG
167/* Convert a stringbuf containing 8-bit Latin-1-encoded characters to
168 one containing 32-bit UCS-4-encoded characters. */
9c44cd45
MG
169static void
170widen_stringbuf (SCM buf)
171{
172 size_t i, len;
173 scm_t_wchar *mem;
174
175 if (STRINGBUF_WIDE (buf))
176 return;
177
178 if (STRINGBUF_INLINE (buf))
179 {
180 len = STRINGBUF_INLINE_LENGTH (buf);
181
182 mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
183 for (i = 0; i < len; i++)
184 mem[i] =
f59cf998 185 (scm_t_wchar) STRINGBUF_INLINE_CHARS (buf)[i];
9c44cd45
MG
186 mem[len] = 0;
187
188 SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) ^ STRINGBUF_F_INLINE);
189 SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_WIDE);
190 SCM_SET_CELL_WORD_1 (buf, mem);
191 SCM_SET_CELL_WORD_2 (buf, len);
192 }
193 else
194 {
195 len = STRINGBUF_OUTLINE_LENGTH (buf);
196
197 mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
198 for (i = 0; i < len; i++)
199 mem[i] =
f59cf998 200 (scm_t_wchar) STRINGBUF_OUTLINE_CHARS (buf)[i];
9c44cd45
MG
201 mem[len] = 0;
202
203 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf), len + 1, "string");
204
205 SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_WIDE);
206 SCM_SET_CELL_WORD_1 (buf, mem);
207 SCM_SET_CELL_WORD_2 (buf, len);
208 }
3ee86942 209}
bd9e24b3 210
587a3355
MG
211/* Convert a stringbuf of 32-bit UCS-4-encoded characters to one
212 containing 8-bit Latin-1-encoded characters, if possible. */
213static void
214narrow_stringbuf (SCM buf)
215{
216 size_t i, len;
217 scm_t_wchar *wmem;
218 char *mem;
219
220 if (!STRINGBUF_WIDE (buf))
221 return;
222
223 len = STRINGBUF_OUTLINE_LENGTH (buf);
224 i = 0;
225 wmem = STRINGBUF_WIDE_CHARS (buf);
226 while (i < len)
227 if (wmem[i++] > 0xFF)
228 return;
229
230 mem = scm_gc_malloc (sizeof (char) * (len + 1), "string");
231 for (i = 0; i < len; i++)
232 mem[i] = (unsigned char) wmem[i];
233
234 scm_gc_free (wmem, sizeof (scm_t_wchar) * (len + 1), "string");
235
236 SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) ^ STRINGBUF_F_WIDE);
237 SCM_SET_CELL_WORD_1 (buf, mem);
238 SCM_SET_CELL_WORD_2 (buf, len);
239}
240
9de87eea 241scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
bd9e24b3 242
3ee86942
MV
243/* Copy-on-write strings.
244 */
bd9e24b3 245
3ee86942 246#define STRING_TAG scm_tc7_string
bd9e24b3 247
3ee86942
MV
248#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
249#define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
250#define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
bd9e24b3 251
3ee86942
MV
252#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
253#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
254
255#define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
256
ed35de72
MV
257/* Read-only strings.
258 */
259
260#define RO_STRING_TAG (scm_tc7_string + 0x200)
261#define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
262
e1b29f6a
MV
263/* Mutation-sharing substrings
264 */
265
266#define SH_STRING_TAG (scm_tc7_string + 0x100)
267
268#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
269/* START and LENGTH as for STRINGs. */
270
271#define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
272
50b1996f
MG
273/* Create a scheme string with space for LEN 8-bit Latin-1-encoded
274 characters. CHARSP, if not NULL, will be set to location of the
275 char array. */
3ee86942
MV
276SCM
277scm_i_make_string (size_t len, char **charsp)
278{
279 SCM buf = make_stringbuf (len);
280 SCM res;
281 if (charsp)
f59cf998 282 *charsp = (char *) STRINGBUF_CHARS (buf);
3ee86942
MV
283 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
284 (scm_t_bits)0, (scm_t_bits) len);
285 return res;
0f2d19dd
JB
286}
287
50b1996f
MG
288/* Create a scheme string with space for LEN 32-bit UCS-4-encoded
289 characters. CHARSP, if not NULL, will be set to location of the
290 character array. */
9c44cd45 291SCM
50b1996f 292scm_i_make_wide_string (size_t len, scm_t_wchar **charsp)
9c44cd45
MG
293{
294 SCM buf = make_wide_stringbuf (len);
295 SCM res;
296 if (charsp)
297 *charsp = STRINGBUF_WIDE_CHARS (buf);
298 res = scm_double_cell (STRING_TAG, SCM_UNPACK (buf),
299 (scm_t_bits) 0, (scm_t_bits) len);
300 return res;
301}
302
3ee86942
MV
303static void
304validate_substring_args (SCM str, size_t start, size_t end)
305{
306 if (!IS_STRING (str))
307 scm_wrong_type_arg_msg (NULL, 0, str, "string");
308 if (start > STRING_LENGTH (str))
309 scm_out_of_range (NULL, scm_from_size_t (start));
310 if (end > STRING_LENGTH (str) || end < start)
311 scm_out_of_range (NULL, scm_from_size_t (end));
312}
0f2d19dd 313
e1b29f6a
MV
314static inline void
315get_str_buf_start (SCM *str, SCM *buf, size_t *start)
316{
317 *start = STRING_START (*str);
318 if (IS_SH_STRING (*str))
319 {
320 *str = SH_STRING_STRING (*str);
321 *start += STRING_START (*str);
322 }
323 *buf = STRING_STRINGBUF (*str);
324}
325
3ee86942
MV
326SCM
327scm_i_substring (SCM str, size_t start, size_t end)
0f2d19dd 328{
e1b29f6a
MV
329 SCM buf;
330 size_t str_start;
331 get_str_buf_start (&str, &buf, &str_start);
9de87eea 332 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 333 SET_STRINGBUF_SHARED (buf);
9de87eea 334 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 335 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
e1b29f6a
MV
336 (scm_t_bits)str_start + start,
337 (scm_t_bits) end - start);
0f2d19dd
JB
338}
339
ed35de72
MV
340SCM
341scm_i_substring_read_only (SCM str, size_t start, size_t end)
342{
45a9f430
LC
343 SCM buf;
344 size_t str_start;
345 get_str_buf_start (&str, &buf, &str_start);
346 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
347 SET_STRINGBUF_SHARED (buf);
348 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
349 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
350 (scm_t_bits)str_start + start,
351 (scm_t_bits) end - start);
ed35de72
MV
352}
353
3ee86942
MV
354SCM
355scm_i_substring_copy (SCM str, size_t start, size_t end)
356{
357 size_t len = end - start;
edea856c 358 SCM buf, my_buf;
e1b29f6a
MV
359 size_t str_start;
360 get_str_buf_start (&str, &buf, &str_start);
9c44cd45
MG
361 if (scm_i_is_narrow_string (str))
362 {
363 my_buf = make_stringbuf (len);
364 memcpy (STRINGBUF_CHARS (my_buf),
365 STRINGBUF_CHARS (buf) + str_start + start, len);
366 }
367 else
368 {
369 my_buf = make_wide_stringbuf (len);
370 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
371 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
372 + start), len);
373 /* Even though this string is wide, the substring may be narrow.
50b1996f 374 Consider adding code to narrow the string. */
9c44cd45 375 }
3ee86942 376 scm_remember_upto_here_1 (buf);
9c44cd45
MG
377 return scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
378 (scm_t_bits) 0, (scm_t_bits) len);
3ee86942 379}
0f2d19dd 380
e1b29f6a
MV
381SCM
382scm_i_substring_shared (SCM str, size_t start, size_t end)
383{
384 if (start == 0 && end == STRING_LENGTH (str))
385 return str;
386 else
387 {
388 size_t len = end - start;
389 if (IS_SH_STRING (str))
390 {
391 start += STRING_START (str);
392 str = SH_STRING_STRING (str);
393 }
394 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
395 (scm_t_bits)start, (scm_t_bits) len);
396 }
397}
398
3ee86942
MV
399SCM
400scm_c_substring (SCM str, size_t start, size_t end)
401{
402 validate_substring_args (str, start, end);
403 return scm_i_substring (str, start, end);
404}
ee149d03 405
ed35de72
MV
406SCM
407scm_c_substring_read_only (SCM str, size_t start, size_t end)
408{
409 validate_substring_args (str, start, end);
410 return scm_i_substring_read_only (str, start, end);
411}
412
0f2d19dd 413SCM
3ee86942 414scm_c_substring_copy (SCM str, size_t start, size_t end)
0f2d19dd 415{
3ee86942
MV
416 validate_substring_args (str, start, end);
417 return scm_i_substring_copy (str, start, end);
418}
419
3ee86942
MV
420SCM
421scm_c_substring_shared (SCM str, size_t start, size_t end)
422{
423 validate_substring_args (str, start, end);
424 return scm_i_substring_shared (str, start, end);
425}
0f2d19dd 426
d6c74168 427\f
3ee86942
MV
428/* Internal accessors
429 */
430
50b1996f
MG
431/* Returns the number of characters in STR. This may be different
432 than the memory size of the string storage. */
3ee86942
MV
433size_t
434scm_i_string_length (SCM str)
0f2d19dd 435{
3ee86942 436 return STRING_LENGTH (str);
0f2d19dd
JB
437}
438
50b1996f
MG
439/* True if the string is 'narrow', meaning it has a 8-bit Latin-1
440 encoding. False if it is 'wide', having a 32-bit UCS-4
441 encoding. */
9c44cd45
MG
442int
443scm_i_is_narrow_string (SCM str)
444{
445 return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
446}
447
587a3355
MG
448/* Try to coerce a string to be narrow. It if is narrow already, do
449 nothing. If it is wide, shrink it to narrow if none of its
450 characters are above 0xFF. Return true if the string is narrow or
451 was made to be narrow. */
452int
453scm_i_try_narrow_string (SCM str)
454{
455 narrow_stringbuf (STRING_STRINGBUF (str));
456
457 return scm_i_is_narrow_string (str);
458}
459
50b1996f
MG
460/* Returns a pointer to the 8-bit Latin-1 encoded character array of
461 STR. */
3ee86942
MV
462const char *
463scm_i_string_chars (SCM str)
464{
465 SCM buf;
e1b29f6a
MV
466 size_t start;
467 get_str_buf_start (&str, &buf, &start);
9c44cd45 468 if (scm_i_is_narrow_string (str))
f59cf998 469 return (const char *) STRINGBUF_CHARS (buf) + start;
9c44cd45
MG
470 else
471 scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s",
472 scm_list_1 (str));
473 return NULL;
3ee86942 474}
b00418df 475
50b1996f
MG
476/* Returns a pointer to the 32-bit UCS-4 encoded character array of
477 STR. */
9c44cd45
MG
478const scm_t_wchar *
479scm_i_string_wide_chars (SCM str)
480{
481 SCM buf;
482 size_t start;
483
484 get_str_buf_start (&str, &buf, &start);
485 if (!scm_i_is_narrow_string (str))
f59cf998 486 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf) + start;
9c44cd45
MG
487 else
488 scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
489 scm_list_1 (str));
490}
491
492/* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
493 a new string buffer, so that it can be modified without modifying
50b1996f
MG
494 other strings. Also, lock the string mutex. Later, one must call
495 scm_i_string_stop_writing to unlock the mutex. */
9c44cd45
MG
496SCM
497scm_i_string_start_writing (SCM orig_str)
b00418df 498{
ed35de72 499 SCM buf, str = orig_str;
e1b29f6a 500 size_t start;
ed35de72 501
e1b29f6a 502 get_str_buf_start (&str, &buf, &start);
ed35de72
MV
503 if (IS_RO_STRING (str))
504 scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
505
9de87eea 506 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942
MV
507 if (STRINGBUF_SHARED (buf))
508 {
9c44cd45 509 /* Clone the stringbuf. */
3ee86942
MV
510 size_t len = STRING_LENGTH (str);
511 SCM new_buf;
512
9de87eea 513 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 514
9c44cd45
MG
515 if (scm_i_is_narrow_string (str))
516 {
517 new_buf = make_stringbuf (len);
518 memcpy (STRINGBUF_CHARS (new_buf),
519 STRINGBUF_CHARS (buf) + STRING_START (str), len);
520
521 }
522 else
523 {
524 new_buf = make_wide_stringbuf (len);
525 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
526 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf)
527 + STRING_START (str)), len);
528 }
3ee86942 529
3ee86942
MV
530 SET_STRING_STRINGBUF (str, new_buf);
531 start -= STRING_START (str);
902578f1
LC
532
533 /* FIXME: The following operations are not atomic, so other threads
534 looking at STR may see an inconsistent state. Nevertheless it can't
535 hurt much since (i) accessing STR while it is being mutated can't
536 yield a crash, and (ii) concurrent accesses to STR should be
537 protected by a mutex at the application level. The latter may not
538 apply when STR != ORIG_STR, though. */
3ee86942 539 SET_STRING_START (str, 0);
902578f1 540 SET_STRING_STRINGBUF (str, new_buf);
3ee86942
MV
541
542 buf = new_buf;
543
9de87eea 544 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 545 }
9c44cd45
MG
546 return orig_str;
547}
3ee86942 548
50b1996f 549/* Return a pointer to the 8-bit Latin-1 chars of a string. */
9c44cd45
MG
550char *
551scm_i_string_writable_chars (SCM str)
552{
553 SCM buf;
554 size_t start;
3ee86942 555
9c44cd45
MG
556 get_str_buf_start (&str, &buf, &start);
557 if (scm_i_is_narrow_string (str))
f59cf998 558 return (char *) STRINGBUF_CHARS (buf) + start;
9c44cd45
MG
559 else
560 scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s",
561 scm_list_1 (str));
562 return NULL;
b00418df
DH
563}
564
50b1996f 565/* Return a pointer to the UCS-4 codepoints of a string. */
9c44cd45
MG
566static scm_t_wchar *
567scm_i_string_writable_wide_chars (SCM str)
568{
569 SCM buf;
570 size_t start;
571
572 get_str_buf_start (&str, &buf, &start);
573 if (!scm_i_is_narrow_string (str))
574 return STRINGBUF_WIDE_CHARS (buf) + start;
575 else
1c7b216f 576 scm_misc_error (NULL, "Invalid write access of chars of narrow string: ~s",
9c44cd45 577 scm_list_1 (str));
b00418df
DH
578}
579
50b1996f
MG
580/* Unlock the string mutex that was locked when
581 scm_i_string_start_writing was called. */
3ee86942
MV
582void
583scm_i_string_stop_writing (void)
584{
9de87eea 585 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 586}
b00418df 587
50b1996f 588/* Return the Xth character of STR as a UCS-4 codepoint. */
9c44cd45
MG
589scm_t_wchar
590scm_i_string_ref (SCM str, size_t x)
591{
592 if (scm_i_is_narrow_string (str))
593 return (scm_t_wchar) (unsigned char) (scm_i_string_chars (str)[x]);
594 else
595 return scm_i_string_wide_chars (str)[x];
596}
597
889975e5
MG
598/* Returns index+1 of the first char in STR that matches C, or
599 0 if the char is not found. */
600int
601scm_i_string_contains_char (SCM str, char ch)
602{
603 size_t i;
604 size_t len = scm_i_string_length (str);
605
606 i = 0;
607 if (scm_i_is_narrow_string (str))
608 {
609 while (i < len)
610 {
611 if (scm_i_string_chars (str)[i] == ch)
612 return i+1;
613 i++;
614 }
615 }
616 else
617 {
618 while (i < len)
619 {
620 if (scm_i_string_wide_chars (str)[i]
621 == (unsigned char) ch)
622 return i+1;
623 i++;
624 }
625 }
626 return 0;
627}
628
3f47e526
MG
629int
630scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
631{
632 if (scm_i_is_narrow_string (sstr))
633 {
634 const char *a = scm_i_string_chars (sstr) + start_x;
635 const char *b = cstr;
636 return strncmp (a, b, strlen(b));
637 }
638 else
639 {
640 size_t i;
641 const scm_t_wchar *a = scm_i_string_wide_chars (sstr) + start_x;
642 const char *b = cstr;
643 for (i = 0; i < strlen (b); i++)
644 {
645 if (a[i] != (unsigned char) b[i])
646 return 1;
647 }
648 }
649 return 0;
650}
651
50b1996f 652/* Set the Pth character of STR to UCS-4 codepoint CHR. */
9c44cd45
MG
653void
654scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
655{
656 if (chr > 0xFF && scm_i_is_narrow_string (str))
657 widen_stringbuf (STRING_STRINGBUF (str));
658
659 if (scm_i_is_narrow_string (str))
660 {
661 char *dst = scm_i_string_writable_chars (str);
587a3355 662 dst[p] = chr;
9c44cd45
MG
663 }
664 else
665 {
666 scm_t_wchar *dst = scm_i_string_writable_wide_chars (str);
667 dst[p] = chr;
668 }
669}
670
3ee86942 671/* Symbols.
587a3355 672
3ee86942
MV
673 Basic symbol creation and accessing is done here, the rest is in
674 symbols.[hc]. This has been done to keep stringbufs and the
675 internals of strings and string-like objects confined to this file.
676*/
677
678#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
679
680SCM
6869328b
MV
681scm_i_make_symbol (SCM name, scm_t_bits flags,
682 unsigned long hash, SCM props)
3ee86942
MV
683{
684 SCM buf;
685 size_t start = STRING_START (name);
686 size_t length = STRING_LENGTH (name);
687
688 if (IS_SH_STRING (name))
689 {
690 name = SH_STRING_STRING (name);
691 start += STRING_START (name);
692 }
693 buf = SYMBOL_STRINGBUF (name);
694
695 if (start == 0 && length == STRINGBUF_LENGTH (buf))
696 {
697 /* reuse buf. */
9de87eea 698 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 699 SET_STRINGBUF_SHARED (buf);
9de87eea 700 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942
MV
701 }
702 else
703 {
704 /* make new buf. */
9c44cd45
MG
705 if (scm_i_is_narrow_string (name))
706 {
707 SCM new_buf = make_stringbuf (length);
708 memcpy (STRINGBUF_CHARS (new_buf),
709 STRINGBUF_CHARS (buf) + start, length);
710 buf = new_buf;
711 }
712 else
713 {
714 SCM new_buf = make_wide_stringbuf (length);
715 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
716 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
717 length);
718 buf = new_buf;
719 }
3ee86942 720 }
6869328b 721 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
3ee86942
MV
722 (scm_t_bits) hash, SCM_UNPACK (props));
723}
724
fd0a5bbc
HWN
725SCM
726scm_i_c_make_symbol (const char *name, size_t len,
727 scm_t_bits flags, unsigned long hash, SCM props)
728{
729 SCM buf = make_stringbuf (len);
730 memcpy (STRINGBUF_CHARS (buf), name, len);
731
a284cc7e
LC
732 return scm_immutable_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
733 (scm_t_bits) hash, SCM_UNPACK (props));
fd0a5bbc
HWN
734}
735
50b1996f
MG
736/* Returns the number of characters in SYM. This may be different
737 from the memory size of SYM. */
3ee86942
MV
738size_t
739scm_i_symbol_length (SCM sym)
0f2d19dd 740{
3ee86942 741 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
0f2d19dd
JB
742}
743
071bb6a8
LC
744size_t
745scm_c_symbol_length (SCM sym)
746#define FUNC_NAME "scm_c_symbol_length"
747{
748 SCM_VALIDATE_SYMBOL (1, sym);
749
750 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
751}
752#undef FUNC_NAME
753
50b1996f
MG
754/* True if the name of SYM is stored as a Latin-1 encoded string.
755 False if it is stored as a 32-bit UCS-4-encoded string. */
9c44cd45
MG
756int
757scm_i_is_narrow_symbol (SCM sym)
758{
759 SCM buf;
760
761 buf = SYMBOL_STRINGBUF (sym);
762 return !STRINGBUF_WIDE (buf);
763}
764
50b1996f
MG
765/* Returns a pointer to the 8-bit Latin-1 encoded character array that
766 contains the name of SYM. */
3ee86942
MV
767const char *
768scm_i_symbol_chars (SCM sym)
769{
9c44cd45
MG
770 SCM buf;
771
772 buf = SYMBOL_STRINGBUF (sym);
773 if (!STRINGBUF_WIDE (buf))
f59cf998 774 return (const char *) STRINGBUF_CHARS (buf);
9c44cd45
MG
775 else
776 scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S",
777 scm_list_1 (sym));
778}
779
50b1996f
MG
780/* Return a pointer to the 32-bit UCS-4-encoded character array of a
781 symbol's name. */
9c44cd45
MG
782const scm_t_wchar *
783scm_i_symbol_wide_chars (SCM sym)
784{
785 SCM buf;
786
787 buf = SYMBOL_STRINGBUF (sym);
788 if (STRINGBUF_WIDE (buf))
f59cf998 789 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf);
9c44cd45
MG
790 else
791 scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S",
792 scm_list_1 (sym));
3ee86942 793}
1cc91f1b 794
be54b15d 795SCM
3ee86942 796scm_i_symbol_substring (SCM sym, size_t start, size_t end)
be54b15d 797{
3ee86942 798 SCM buf = SYMBOL_STRINGBUF (sym);
9de87eea 799 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 800 SET_STRINGBUF_SHARED (buf);
9de87eea 801 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
fd2b17b9 802 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
3ee86942
MV
803 (scm_t_bits)start, (scm_t_bits) end - start);
804}
be54b15d 805
50b1996f 806/* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */
9c44cd45
MG
807scm_t_wchar
808scm_i_symbol_ref (SCM sym, size_t x)
809{
810 if (scm_i_is_narrow_symbol (sym))
811 return (scm_t_wchar) (unsigned char) (scm_i_symbol_chars (sym)[x]);
812 else
813 return scm_i_symbol_wide_chars (sym)[x];
814}
815
3ee86942
MV
816/* Debugging
817 */
be54b15d 818
6ce6923b
MG
819SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
820 "Returns an association list containing debugging information\n"
821 "for @var{str}. The association list has the following entries."
822 "@table @code\n"
823 "@item string\n"
824 "The string itself.\n"
825 "@item start\n"
826 "The start index of the string into its stringbuf\n"
827 "@item length\n"
828 "The length of the string\n"
829 "@item shared\n"
830 "If this string is a substring, it returns its parent string.\n"
831 "Otherwise, it returns @code{#f}\n"
88ed5759
MG
832 "@item read-only\n"
833 "@code{#t} if the string is read-only\n"
6ce6923b
MG
834 "@item stringbuf-chars\n"
835 "A new string containing this string's stringbuf's characters\n"
836 "@item stringbuf-length\n"
837 "The number of characters in this stringbuf\n"
838 "@item stringbuf-shared\n"
839 "@code{#t} if this stringbuf is shared\n"
840 "@item stringbuf-inline\n"
841 "@code{#t} if this stringbuf's characters are stored in the\n"
842 "cell itself, or @code{#f} if they were allocated in memory\n"
843 "@item stringbuf-wide\n"
844 "@code{#t} if this stringbuf's characters are stored in a\n"
845 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
846 "buffer\n"
847 "@end table")
3ee86942
MV
848#define FUNC_NAME s_scm_sys_string_dump
849{
6ce6923b
MG
850 SCM e1, e2, e3, e4, e5, e6, e7, e8, e9, e10;
851 SCM buf;
3ee86942 852 SCM_VALIDATE_STRING (1, str);
6ce6923b
MG
853
854 /* String info */
855 e1 = scm_cons (scm_from_locale_symbol ("string"),
856 str);
857 e2 = scm_cons (scm_from_locale_symbol ("start"),
858 scm_from_size_t (STRING_START (str)));
859 e3 = scm_cons (scm_from_locale_symbol ("length"),
860 scm_from_size_t (STRING_LENGTH (str)));
861
3ee86942
MV
862 if (IS_SH_STRING (str))
863 {
6ce6923b
MG
864 e4 = scm_cons (scm_from_locale_symbol ("shared"),
865 SH_STRING_STRING (str));
866 buf = STRING_STRINGBUF (SH_STRING_STRING (str));
3ee86942
MV
867 }
868 else
869 {
6ce6923b
MG
870 e4 = scm_cons (scm_from_locale_symbol ("shared"),
871 SCM_BOOL_F);
872 buf = STRING_STRINGBUF (str);
3ee86942 873 }
9c44cd45 874
88ed5759
MG
875 if (IS_RO_STRING (str))
876 e5 = scm_cons (scm_from_locale_symbol ("read-only"),
877 SCM_BOOL_T);
878 else
879 e5 = scm_cons (scm_from_locale_symbol ("read-only"),
880 SCM_BOOL_F);
587a3355 881
6ce6923b 882 /* Stringbuf info */
6ce6923b
MG
883 if (!STRINGBUF_WIDE (buf))
884 {
885 size_t len = STRINGBUF_LENGTH (buf);
886 char *cbuf;
887 SCM sbc = scm_i_make_string (len, &cbuf);
888 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
889 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
890 sbc);
3ee86942 891 }
6ce6923b
MG
892 else
893 {
894 size_t len = STRINGBUF_LENGTH (buf);
895 scm_t_wchar *cbuf;
896 SCM sbc = scm_i_make_wide_string (len, &cbuf);
897 u32_cpy ((scm_t_uint32 *) cbuf,
898 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
899 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
900 sbc);
901 }
902 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-length"),
903 scm_from_size_t (STRINGBUF_LENGTH (buf)));
904 if (STRINGBUF_SHARED (buf))
905 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
906 SCM_BOOL_T);
907 else
908 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
909 SCM_BOOL_F);
910 if (STRINGBUF_INLINE (buf))
911 e9 = scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
912 SCM_BOOL_T);
913 else
914 e9 = scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
915 SCM_BOOL_F);
916 if (STRINGBUF_WIDE (buf))
917 e10 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
918 SCM_BOOL_T);
919 else
920 e10 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
921 SCM_BOOL_F);
922
923 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, SCM_UNDEFINED);
3ee86942
MV
924}
925#undef FUNC_NAME
926
6ce6923b
MG
927SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
928 "Returns an association list containing debugging information\n"
929 "for @var{sym}. The association list has the following entries."
930 "@table @code\n"
931 "@item symbol\n"
932 "The symbol itself\n"
933 "@item hash\n"
934 "Its hash value\n"
88ed5759
MG
935 "@item interned\n"
936 "@code{#t} if it is an interned symbol\n"
6ce6923b
MG
937 "@item stringbuf-chars\n"
938 "A new string containing this symbols's stringbuf's characters\n"
939 "@item stringbuf-length\n"
940 "The number of characters in this stringbuf\n"
941 "@item stringbuf-shared\n"
942 "@code{#t} if this stringbuf is shared\n"
943 "@item stringbuf-inline\n"
944 "@code{#t} if this stringbuf's characters are stored in the\n"
945 "cell itself, or @code{#f} if they were allocated in memory\n"
946 "@item stringbuf-wide\n"
947 "@code{#t} if this stringbuf's characters are stored in a\n"
948 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
949 "buffer\n"
950 "@end table")
3ee86942
MV
951#define FUNC_NAME s_scm_sys_symbol_dump
952{
6ce6923b
MG
953 SCM e1, e2, e3, e4, e5, e6, e7, e8;
954 SCM buf;
3ee86942 955 SCM_VALIDATE_SYMBOL (1, sym);
6ce6923b
MG
956 e1 = scm_cons (scm_from_locale_symbol ("symbol"),
957 sym);
958 e2 = scm_cons (scm_from_locale_symbol ("hash"),
959 scm_from_ulong (scm_i_symbol_hash (sym)));
88ed5759
MG
960 e3 = scm_cons (scm_from_locale_symbol ("interned"),
961 scm_symbol_interned_p (sym));
6ce6923b
MG
962 buf = SYMBOL_STRINGBUF (sym);
963
964 /* Stringbuf info */
6ce6923b
MG
965 if (!STRINGBUF_WIDE (buf))
966 {
967 size_t len = STRINGBUF_LENGTH (buf);
968 char *cbuf;
969 SCM sbc = scm_i_make_string (len, &cbuf);
970 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
971 e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
972 sbc);
973 }
9c44cd45 974 else
6ce6923b
MG
975 {
976 size_t len = STRINGBUF_LENGTH (buf);
977 scm_t_wchar *cbuf;
978 SCM sbc = scm_i_make_wide_string (len, &cbuf);
979 u32_cpy ((scm_t_uint32 *) cbuf,
980 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
981 e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
982 sbc);
983 }
984 e5 = scm_cons (scm_from_locale_symbol ("stringbuf-length"),
985 scm_from_size_t (STRINGBUF_LENGTH (buf)));
986 if (STRINGBUF_SHARED (buf))
987 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
988 SCM_BOOL_T);
989 else
990 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
991 SCM_BOOL_F);
992 if (STRINGBUF_INLINE (buf))
993 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
994 SCM_BOOL_T);
995 else
996 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
997 SCM_BOOL_F);
998 if (STRINGBUF_WIDE (buf))
999 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
1000 SCM_BOOL_T);
1001 else
1002 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
1003 SCM_BOOL_F);
1004 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, SCM_UNDEFINED);
1005
3ee86942
MV
1006}
1007#undef FUNC_NAME
1008
6ce6923b
MG
1009#if SCM_STRING_LENGTH_HISTOGRAM
1010
9c44cd45 1011SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, (void), "")
e1b29f6a 1012#define FUNC_NAME s_scm_sys_stringbuf_hist
3ee86942
MV
1013{
1014 int i;
1015 for (i = 0; i < 1000; i++)
1016 if (lenhist[i])
1017 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
1018 fprintf (stderr, ">999: %u\n", lenhist[1000]);
1019 return SCM_UNSPECIFIED;
be54b15d
DH
1020}
1021#undef FUNC_NAME
1022
3ee86942
MV
1023#endif
1024
1025\f
1026
1027SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
1028 (SCM obj),
1029 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
1030#define FUNC_NAME s_scm_string_p
1031{
1032 return scm_from_bool (IS_STRING (obj));
1033}
1034#undef FUNC_NAME
1035
1036
1037SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
1038
1039SCM_DEFINE (scm_string, "string", 0, 0, 1,
1040 (SCM chrs),
1041 "@deffnx {Scheme Procedure} list->string chrs\n"
1042 "Return a newly allocated string composed of the arguments,\n"
1043 "@var{chrs}.")
1044#define FUNC_NAME s_scm_string
1045{
9aa27c1a 1046 SCM result = SCM_BOOL_F;
9c44cd45 1047 SCM rest;
3ee86942 1048 size_t len;
9c44cd45
MG
1049 size_t p = 0;
1050 long i;
9aa27c1a 1051 int wide = 0;
3ee86942 1052
9c44cd45
MG
1053 /* Verify that this is a list of chars. */
1054 i = scm_ilength (chrs);
3c7cf7f5 1055 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
3ee86942 1056
9c44cd45
MG
1057 len = (size_t) i;
1058 rest = chrs;
3ee86942 1059
9c44cd45 1060 while (len > 0 && scm_is_pair (rest))
3ee86942 1061 {
9c44cd45 1062 SCM elt = SCM_CAR (rest);
3ee86942 1063 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
9aa27c1a
MG
1064 if (SCM_CHAR (elt) > 0xFF)
1065 wide = 1;
9c44cd45
MG
1066 rest = SCM_CDR (rest);
1067 len--;
1068 scm_remember_upto_here_1 (elt);
1069 }
1070
1071 /* Construct a string containing this list of chars. */
1072 len = (size_t) i;
1073 rest = chrs;
1074
9aa27c1a 1075 if (wide == 0)
9c44cd45 1076 {
9aa27c1a
MG
1077 result = scm_i_make_string (len, NULL);
1078 result = scm_i_string_start_writing (result);
1079 char *buf = scm_i_string_writable_chars (result);
1080 while (len > 0 && scm_is_pair (rest))
1081 {
1082 SCM elt = SCM_CAR (rest);
1083 buf[p] = (unsigned char) SCM_CHAR (elt);
1084 p++;
1085 rest = SCM_CDR (rest);
1086 len--;
1087 scm_remember_upto_here_1 (elt);
1088 }
1089 }
1090 else
1091 {
1092 result = scm_i_make_wide_string (len, NULL);
1093 result = scm_i_string_start_writing (result);
1094 scm_t_wchar *buf = scm_i_string_writable_wide_chars (result);
1095 while (len > 0 && scm_is_pair (rest))
1096 {
1097 SCM elt = SCM_CAR (rest);
1098 buf[p] = SCM_CHAR (elt);
1099 p++;
1100 rest = SCM_CDR (rest);
1101 len--;
1102 scm_remember_upto_here_1 (elt);
1103 }
3ee86942 1104 }
9c44cd45
MG
1105 scm_i_string_stop_writing ();
1106
3ee86942
MV
1107 if (len > 0)
1108 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
9c44cd45 1109 if (!scm_is_null (rest))
3ee86942
MV
1110 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
1111
1112 return result;
1113}
1114#undef FUNC_NAME
be54b15d 1115
3b3b36dd 1116SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
6fa73e72 1117 (SCM k, SCM chr),
0d26a824
MG
1118 "Return a newly allocated string of\n"
1119 "length @var{k}. If @var{chr} is given, then all elements of\n"
1120 "the string are initialized to @var{chr}, otherwise the contents\n"
9401323e 1121 "of the @var{string} are unspecified.")
1bbd0b84 1122#define FUNC_NAME s_scm_make_string
0f2d19dd 1123{
3ee86942
MV
1124 return scm_c_make_string (scm_to_size_t (k), chr);
1125}
1126#undef FUNC_NAME
1127
1128SCM
1129scm_c_make_string (size_t len, SCM chr)
1130#define FUNC_NAME NULL
1131{
9c44cd45
MG
1132 size_t p;
1133 SCM res = scm_i_make_string (len, NULL);
cb0d8be2 1134
e11e83f3
MV
1135 if (!SCM_UNBNDP (chr))
1136 {
3ee86942 1137 SCM_VALIDATE_CHAR (0, chr);
9c44cd45
MG
1138 res = scm_i_string_start_writing (res);
1139 for (p = 0; p < len; p++)
1140 scm_i_string_set_x (res, p, SCM_CHAR (chr));
1141 scm_i_string_stop_writing ();
0f2d19dd 1142 }
e11e83f3
MV
1143
1144 return res;
0f2d19dd 1145}
1bbd0b84 1146#undef FUNC_NAME
0f2d19dd 1147
3b3b36dd 1148SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
0d26a824
MG
1149 (SCM string),
1150 "Return the number of characters in @var{string}.")
1bbd0b84 1151#define FUNC_NAME s_scm_string_length
0f2d19dd 1152{
d1ca2c64 1153 SCM_VALIDATE_STRING (1, string);
3ee86942 1154 return scm_from_size_t (STRING_LENGTH (string));
0f2d19dd 1155}
1bbd0b84 1156#undef FUNC_NAME
0f2d19dd 1157
f8ba2bb9 1158SCM_DEFINE (scm_string_bytes_per_char, "string-bytes-per-char", 1, 0, 0,
9c44cd45
MG
1159 (SCM string),
1160 "Return the bytes used to represent a character in @var{string}."
1161 "This will return 1 or 4.")
f8ba2bb9 1162#define FUNC_NAME s_scm_string_bytes_per_char
9c44cd45
MG
1163{
1164 SCM_VALIDATE_STRING (1, string);
1165 if (!scm_i_is_narrow_string (string))
1166 return scm_from_int (4);
1167
1168 return scm_from_int (1);
1169}
1170#undef FUNC_NAME
1171
3ee86942
MV
1172size_t
1173scm_c_string_length (SCM string)
1174{
1175 if (!IS_STRING (string))
1176 scm_wrong_type_arg_msg (NULL, 0, string, "string");
1177 return STRING_LENGTH (string);
1178}
1179
bd9e24b3 1180SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
6fa73e72 1181 (SCM str, SCM k),
9c44cd45
MG
1182 "Return character @var{k} of @var{str} using zero-origin\n"
1183 "indexing. @var{k} must be a valid index of @var{str}.")
1bbd0b84 1184#define FUNC_NAME s_scm_string_ref
0f2d19dd 1185{
3ae3166b 1186 size_t len;
a55c2b68 1187 unsigned long idx;
bd9e24b3 1188
d1ca2c64 1189 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
1190
1191 len = scm_i_string_length (str);
1192 if (SCM_LIKELY (len > 0))
1193 idx = scm_to_unsigned_integer (k, 0, len - 1);
1194 else
1195 scm_out_of_range (NULL, k);
1196
9c44cd45
MG
1197 if (scm_i_is_narrow_string (str))
1198 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
1199 else
1200 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[idx]);
0f2d19dd 1201}
1bbd0b84 1202#undef FUNC_NAME
0f2d19dd 1203
3ee86942
MV
1204SCM
1205scm_c_string_ref (SCM str, size_t p)
1206{
1207 if (p >= scm_i_string_length (str))
1208 scm_out_of_range (NULL, scm_from_size_t (p));
9c44cd45
MG
1209 if (scm_i_is_narrow_string (str))
1210 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
1211 else
1212 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[p]);
1213
3ee86942 1214}
f0942910 1215
3b3b36dd 1216SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
6fa73e72 1217 (SCM str, SCM k, SCM chr),
9c44cd45
MG
1218 "Store @var{chr} in element @var{k} of @var{str} and return\n"
1219 "an unspecified value. @var{k} must be a valid index of\n"
1220 "@var{str}.")
1bbd0b84 1221#define FUNC_NAME s_scm_string_set_x
0f2d19dd 1222{
3ae3166b 1223 size_t len;
a55c2b68
MV
1224 unsigned long idx;
1225
f0942910 1226 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
1227
1228 len = scm_i_string_length (str);
1229 if (SCM_LIKELY (len > 0))
1230 idx = scm_to_unsigned_integer (k, 0, len - 1);
1231 else
1232 scm_out_of_range (NULL, k);
1233
34d19ef6 1234 SCM_VALIDATE_CHAR (3, chr);
9c44cd45
MG
1235 str = scm_i_string_start_writing (str);
1236 scm_i_string_set_x (str, idx, SCM_CHAR (chr));
1237 scm_i_string_stop_writing ();
1238
0f2d19dd
JB
1239 return SCM_UNSPECIFIED;
1240}
1bbd0b84 1241#undef FUNC_NAME
0f2d19dd 1242
3ee86942
MV
1243void
1244scm_c_string_set_x (SCM str, size_t p, SCM chr)
1245{
1246 if (p >= scm_i_string_length (str))
1247 scm_out_of_range (NULL, scm_from_size_t (p));
9c44cd45
MG
1248 str = scm_i_string_start_writing (str);
1249 scm_i_string_set_x (str, p, SCM_CHAR (chr));
1250 scm_i_string_stop_writing ();
3ee86942 1251}
0f2d19dd 1252
3b3b36dd 1253SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
0d26a824
MG
1254 (SCM str, SCM start, SCM end),
1255 "Return a newly allocated string formed from the characters\n"
1256 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1257 "ending with index @var{end} (exclusive).\n"
1258 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1259 "exact integers satisfying:\n\n"
1260 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1bbd0b84 1261#define FUNC_NAME s_scm_substring
0f2d19dd 1262{
3ee86942 1263 size_t len, from, to;
685c0d71 1264
d1ca2c64 1265 SCM_VALIDATE_STRING (1, str);
3ee86942
MV
1266 len = scm_i_string_length (str);
1267 from = scm_to_unsigned_integer (start, 0, len);
a55c2b68 1268 if (SCM_UNBNDP (end))
3ee86942 1269 to = len;
a55c2b68 1270 else
3ee86942
MV
1271 to = scm_to_unsigned_integer (end, from, len);
1272 return scm_i_substring (str, from, to);
0f2d19dd 1273}
1bbd0b84 1274#undef FUNC_NAME
0f2d19dd 1275
ed35de72
MV
1276SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
1277 (SCM str, SCM start, SCM end),
1278 "Return a newly allocated string formed from the characters\n"
1279 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1280 "ending with index @var{end} (exclusive).\n"
1281 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1282 "exact integers satisfying:\n"
1283 "\n"
1284 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
1285 "\n"
1286 "The returned string is read-only.\n")
1287#define FUNC_NAME s_scm_substring_read_only
1288{
1289 size_t len, from, to;
1290
1291 SCM_VALIDATE_STRING (1, str);
1292 len = scm_i_string_length (str);
1293 from = scm_to_unsigned_integer (start, 0, len);
1294 if (SCM_UNBNDP (end))
1295 to = len;
1296 else
1297 to = scm_to_unsigned_integer (end, from, len);
1298 return scm_i_substring_read_only (str, from, to);
1299}
1300#undef FUNC_NAME
1301
3ee86942
MV
1302SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
1303 (SCM str, SCM start, SCM end),
1304 "Return a newly allocated string formed from the characters\n"
1305 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1306 "ending with index @var{end} (exclusive).\n"
1307 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1308 "exact integers satisfying:\n\n"
1309 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1310#define FUNC_NAME s_scm_substring_copy
1311{
e1b29f6a
MV
1312 /* For the Scheme version, START is mandatory, but for the C
1313 version, it is optional. See scm_string_copy in srfi-13.c for a
1314 rationale.
1315 */
1316
1317 size_t from, to;
3ee86942
MV
1318
1319 SCM_VALIDATE_STRING (1, str);
e1b29f6a
MV
1320 scm_i_get_substring_spec (scm_i_string_length (str),
1321 start, &from, end, &to);
3ee86942
MV
1322 return scm_i_substring_copy (str, from, to);
1323}
1324#undef FUNC_NAME
1325
1326SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
1327 (SCM str, SCM start, SCM end),
1328 "Return string that indirectly refers to the characters\n"
1329 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1330 "ending with index @var{end} (exclusive).\n"
1331 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1332 "exact integers satisfying:\n\n"
1333 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1334#define FUNC_NAME s_scm_substring_shared
1335{
1336 size_t len, from, to;
1337
1338 SCM_VALIDATE_STRING (1, str);
1339 len = scm_i_string_length (str);
1340 from = scm_to_unsigned_integer (start, 0, len);
1341 if (SCM_UNBNDP (end))
1342 to = len;
1343 else
1344 to = scm_to_unsigned_integer (end, from, len);
1345 return scm_i_substring_shared (str, from, to);
1346}
1347#undef FUNC_NAME
685c0d71 1348
3b3b36dd 1349SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
6fa73e72 1350 (SCM args),
9c44cd45 1351 "Return a newly allocated string whose characters form the\n"
0d26a824 1352 "concatenation of the given strings, @var{args}.")
1bbd0b84 1353#define FUNC_NAME s_scm_string_append
0f2d19dd
JB
1354{
1355 SCM res;
9c44cd45
MG
1356 size_t len = 0;
1357 int wide = 0;
c829a427 1358 SCM l, s;
bd4911ef 1359 size_t i;
9909c395
MG
1360 union
1361 {
1362 char *narrow;
1363 scm_t_wchar *wide;
1364 } data;
af45e3b0
DH
1365
1366 SCM_VALIDATE_REST_ARGUMENT (args);
9c44cd45 1367 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427
MV
1368 {
1369 s = SCM_CAR (l);
1370 SCM_VALIDATE_STRING (SCM_ARGn, s);
9c44cd45
MG
1371 len += scm_i_string_length (s);
1372 if (!scm_i_is_narrow_string (s))
1373 wide = 1;
c829a427 1374 }
9909c395 1375 data.narrow = NULL;
9c44cd45 1376 if (!wide)
9909c395 1377 res = scm_i_make_string (len, &data.narrow);
9c44cd45 1378 else
9909c395 1379 res = scm_i_make_wide_string (len, &data.wide);
9c44cd45
MG
1380
1381 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427 1382 {
edea856c 1383 size_t len;
c829a427 1384 s = SCM_CAR (l);
3ee86942 1385 SCM_VALIDATE_STRING (SCM_ARGn, s);
edea856c 1386 len = scm_i_string_length (s);
9c44cd45
MG
1387 if (!wide)
1388 {
9909c395
MG
1389 memcpy (data.narrow, scm_i_string_chars (s), len);
1390 data.narrow += len;
9c44cd45
MG
1391 }
1392 else
1393 {
1394 if (scm_i_is_narrow_string (s))
1395 {
1396 for (i = 0; i < scm_i_string_length (s); i++)
9909c395 1397 data.wide[i] = (unsigned char) scm_i_string_chars (s)[i];
9c44cd45
MG
1398 }
1399 else
9909c395 1400 u32_cpy ((scm_t_uint32 *) data.wide,
9c44cd45 1401 (scm_t_uint32 *) scm_i_string_wide_chars (s), len);
9909c395 1402 data.wide += len;
9c44cd45 1403 }
c829a427
MV
1404 scm_remember_upto_here_1 (s);
1405 }
0f2d19dd
JB
1406 return res;
1407}
1bbd0b84 1408#undef FUNC_NAME
0f2d19dd 1409
c829a427
MV
1410int
1411scm_is_string (SCM obj)
1412{
3ee86942 1413 return IS_STRING (obj);
c829a427 1414}
24933780 1415
587a3355
MG
1416static SCM
1417scm_from_stringn (const char *str, size_t len, const char *encoding,
1418 scm_t_string_failed_conversion_handler handler)
1419{
1420 size_t u32len, i;
1421 scm_t_wchar *u32;
1422 int wide = 0;
1423 SCM res;
1424
889975e5
MG
1425 if (encoding == NULL)
1426 {
1427 /* If encoding is null, use Latin-1. */
1428 char *buf;
1429 res = scm_i_make_string (len, &buf);
1430 memcpy (buf, str, len);
1431 return res;
1432 }
1433
587a3355
MG
1434 u32len = 0;
1435 u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
1436 (enum iconv_ilseq_handler)
1437 handler,
1438 str, len,
1439 NULL,
1440 NULL, &u32len);
1441
1442 if (u32 == NULL)
1443 {
1444 if (errno == ENOMEM)
1445 scm_memory_error ("locale string conversion");
1446 else
1447 {
889975e5 1448 /* There are invalid sequences in the input string. */
587a3355
MG
1449 SCM errstr;
1450 char *dst;
587a3355
MG
1451 errstr = scm_i_make_string (len, &dst);
1452 memcpy (dst, str, len);
1453 scm_misc_error (NULL, "input locale conversion error from ~s: ~s",
1454 scm_list_2 (scm_from_locale_string (encoding),
1455 errstr));
1456 scm_remember_upto_here_1 (errstr);
1457 }
1458 }
1459
1460 i = 0;
1461 while (i < u32len)
1462 if (u32[i++] > 0xFF)
1463 {
1464 wide = 1;
1465 break;
1466 }
1467
1468 if (!wide)
1469 {
1470 char *dst;
1471 res = scm_i_make_string (u32len, &dst);
1472 for (i = 0; i < u32len; i ++)
1473 dst[i] = (unsigned char) u32[i];
1474 dst[u32len] = '\0';
1475 }
1476 else
1477 {
1478 scm_t_wchar *wdst;
1479 res = scm_i_make_wide_string (u32len, &wdst);
1480 u32_cpy ((scm_t_uint32 *) wdst, (scm_t_uint32 *) u32, u32len);
1481 wdst[u32len] = 0;
1482 }
1483
1484 free (u32);
1485 return res;
1486}
1487
c829a427
MV
1488SCM
1489scm_from_locale_stringn (const char *str, size_t len)
1490{
889975e5
MG
1491 const char *enc;
1492 scm_t_string_failed_conversion_handler hndl;
1493 SCM inport;
1494 scm_t_port *pt;
4d4528e7 1495
9c44cd45 1496 if (len == (size_t) -1)
c829a427 1497 len = strlen (str);
9c44cd45
MG
1498 if (len == 0)
1499 return scm_nullstr;
1500
889975e5
MG
1501 inport = scm_current_input_port ();
1502 if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
1503 {
1504 pt = SCM_PTAB_ENTRY (inport);
1505 enc = pt->encoding;
1506 hndl = pt->ilseq_handler;
1507 }
1508 else
1509 {
1510 enc = NULL;
1511 hndl = SCM_FAILED_CONVERSION_ERROR;
1512 }
1513
1514 return scm_from_stringn (str, len, enc, hndl);
c829a427 1515}
4d4528e7 1516
c829a427
MV
1517SCM
1518scm_from_locale_string (const char *str)
4d4528e7 1519{
9c44cd45
MG
1520 if (str == NULL)
1521 return scm_nullstr;
1522
c829a427
MV
1523 return scm_from_locale_stringn (str, -1);
1524}
4d4528e7 1525
587a3355
MG
1526SCM
1527scm_i_from_utf8_string (const scm_t_uint8 *str)
1528{
1529 return scm_from_stringn ((const char *) str,
1530 strlen ((char *) str), "UTF-8",
1531 SCM_FAILED_CONVERSION_ERROR);
1532}
1533
50b1996f
MG
1534/* Create a new scheme string from the C string STR. The memory of
1535 STR may be used directly as storage for the new string. */
13a94556
LC
1536/* FIXME: GC-wise, the only way to use the memory area pointed to by STR
1537 would be to register a finalizer to eventually free(3) STR, which isn't
1538 worth it. Should we just deprecate the `scm_take_' functions? */
c829a427
MV
1539SCM
1540scm_take_locale_stringn (char *str, size_t len)
1541{
13a94556 1542 SCM res;
48ddf0d9 1543
13a94556
LC
1544 res = scm_from_locale_stringn (str, len);
1545 free (str);
c829a427 1546
c829a427
MV
1547 return res;
1548}
1549
48ddf0d9
KR
1550SCM
1551scm_take_locale_string (char *str)
1552{
1553 return scm_take_locale_stringn (str, -1);
1554}
1555
9c44cd45
MG
1556/* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
1557 and \UXXXXXX. */
1558static void
1559unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
1560{
1561 char *before, *after;
1562 size_t i, j;
1563
1564 before = *bufp;
1565 after = *bufp;
1566 i = 0;
1567 j = 0;
1568 while (i < *lenp)
1569 {
1570 if ((i <= *lenp - 6)
1571 && before[i] == '\\'
1572 && before[i + 1] == 'u'
1573 && before[i + 2] == '0' && before[i + 3] == '0')
1574 {
1575 /* Convert \u00NN to \xNN */
1576 after[j] = '\\';
1577 after[j + 1] = 'x';
30a6b9ca
MG
1578 after[j + 2] = tolower ((int) before[i + 4]);
1579 after[j + 3] = tolower ((int) before[i + 5]);
9c44cd45
MG
1580 i += 6;
1581 j += 4;
1582 }
1583 else if ((i <= *lenp - 10)
1584 && before[i] == '\\'
1585 && before[i + 1] == 'U'
1586 && before[i + 2] == '0' && before[i + 3] == '0')
1587 {
1588 /* Convert \U00NNNNNN to \UNNNNNN */
1589 after[j] = '\\';
1590 after[j + 1] = 'U';
30a6b9ca
MG
1591 after[j + 2] = tolower ((int) before[i + 4]);
1592 after[j + 3] = tolower ((int) before[i + 5]);
1593 after[j + 4] = tolower ((int) before[i + 6]);
1594 after[j + 5] = tolower ((int) before[i + 7]);
1595 after[j + 6] = tolower ((int) before[i + 8]);
1596 after[j + 7] = tolower ((int) before[i + 9]);
9c44cd45
MG
1597 i += 10;
1598 j += 8;
1599 }
1600 else
1601 {
1602 after[j] = before[i];
1603 i++;
1604 j++;
1605 }
1606 }
1607 *lenp = j;
1608 after = scm_realloc (after, j);
1609}
1610
c829a427 1611char *
9c44cd45 1612scm_to_locale_stringn (SCM str, size_t * lenp)
c829a427 1613{
889975e5
MG
1614 SCM outport;
1615 scm_t_port *pt;
9c44cd45
MG
1616 const char *enc;
1617
889975e5
MG
1618 outport = scm_current_output_port ();
1619 if (!SCM_UNBNDP (outport) && SCM_OPOUTPORTP (outport))
1620 {
1621 pt = SCM_PTAB_ENTRY (outport);
1622 enc = pt->encoding;
1623 }
1624 else
1625 enc = NULL;
9c44cd45 1626
889975e5
MG
1627 return scm_to_stringn (str, lenp,
1628 enc,
1629 scm_i_get_conversion_strategy (SCM_BOOL_F));
9c44cd45
MG
1630}
1631
1632/* Low-level scheme to C string conversion function. */
1633char *
587a3355 1634scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
eca29b02 1635 scm_t_string_failed_conversion_handler handler)
9c44cd45 1636{
9c44cd45
MG
1637 char *buf;
1638 size_t ilen, len, i;
889975e5
MG
1639 int ret;
1640 const char *enc;
4d4528e7 1641
3ee86942 1642 if (!scm_is_string (str))
c829a427 1643 scm_wrong_type_arg_msg (NULL, 0, str, "string");
9c44cd45
MG
1644 ilen = scm_i_string_length (str);
1645
1646 if (ilen == 0)
1647 {
1648 buf = scm_malloc (1);
1649 buf[0] = '\0';
1650 if (lenp)
1651 *lenp = 0;
1652 return buf;
1653 }
587a3355 1654
c829a427 1655 if (lenp == NULL)
9c44cd45
MG
1656 for (i = 0; i < ilen; i++)
1657 if (scm_i_string_ref (str, i) == '\0')
1658 scm_misc_error (NULL,
1659 "string contains #\\nul character: ~S",
1660 scm_list_1 (str));
1661
889975e5 1662 if (scm_i_is_narrow_string (str) && (encoding == NULL))
c829a427 1663 {
889975e5
MG
1664 /* If using native Latin-1 encoding, just copy the string
1665 contents. */
9c44cd45
MG
1666 if (lenp)
1667 {
1668 buf = scm_malloc (ilen);
1669 memcpy (buf, scm_i_string_chars (str), ilen);
1670 *lenp = ilen;
1671 return buf;
1672 }
1673 else
1674 {
1675 buf = scm_malloc (ilen + 1);
1676 memcpy (buf, scm_i_string_chars (str), ilen);
1677 buf[ilen] = '\0';
1678 return buf;
1679 }
c829a427 1680 }
9c44cd45 1681
587a3355 1682
9c44cd45
MG
1683 buf = NULL;
1684 len = 0;
889975e5
MG
1685 enc = encoding;
1686 if (enc == NULL)
1687 enc = "ISO-8859-1";
1688 if (scm_i_is_narrow_string (str))
1689 {
1690 ret = mem_iconveh (scm_i_string_chars (str), ilen,
1691 "ISO-8859-1", enc,
1692 (enum iconv_ilseq_handler) handler, NULL,
1693 &buf, &len);
9c44cd45 1694
889975e5
MG
1695 if (ret == 0 && handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1696 unistring_escapes_to_guile_escapes (&buf, &len);
9c44cd45 1697
889975e5
MG
1698 if (ret != 0)
1699 {
1700 scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
1701 scm_list_2 (scm_from_locale_string (enc),
1702 str));
1703 }
1704 }
1705 else
1706 {
1707 buf = u32_conv_to_encoding (enc,
1708 (enum iconv_ilseq_handler) handler,
1709 (scm_t_uint32 *) scm_i_string_wide_chars (str),
1710 ilen,
1711 NULL,
1712 NULL, &len);
1713 if (buf == NULL)
1714 {
1715 scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
1716 scm_list_2 (scm_from_locale_string (enc),
1717 str));
1718 }
1719 }
9c44cd45 1720 if (lenp)
4d4528e7 1721 *lenp = len;
9c44cd45
MG
1722 else
1723 {
1724 buf = scm_realloc (buf, len + 1);
1725 buf[len] = '\0';
1726 }
24933780 1727
c829a427 1728 scm_remember_upto_here_1 (str);
9c44cd45 1729 return buf;
4d4528e7 1730}
af68e5e5 1731
c829a427
MV
1732char *
1733scm_to_locale_string (SCM str)
1734{
1735 return scm_to_locale_stringn (str, NULL);
1736}
af68e5e5 1737
587a3355
MG
1738scm_t_uint8 *
1739scm_i_to_utf8_string (SCM str)
1740{
1741 char *u8str;
1742 u8str = scm_to_stringn (str, NULL, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
1743 return (scm_t_uint8 *) u8str;
1744}
1745
c829a427
MV
1746size_t
1747scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
1748{
1749 size_t len;
9c44cd45 1750 char *result = NULL;
3ee86942 1751 if (!scm_is_string (str))
c829a427 1752 scm_wrong_type_arg_msg (NULL, 0, str, "string");
9c44cd45
MG
1753 result = scm_to_locale_stringn (str, &len);
1754
1755 memcpy (buf, result, (len > max_len) ? max_len : len);
1756 free (result);
1757
c829a427
MV
1758 scm_remember_upto_here_1 (str);
1759 return len;
1760}
af68e5e5 1761
3ee86942
MV
1762/* converts C scm_array of strings to SCM scm_list of strings. */
1763/* If argc < 0, a null terminated scm_array is assumed. */
9c44cd45 1764SCM
3ee86942
MV
1765scm_makfromstrs (int argc, char **argv)
1766{
1767 int i = argc;
1768 SCM lst = SCM_EOL;
1769 if (0 > i)
1770 for (i = 0; argv[i]; i++);
1771 while (i--)
1772 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
1773 return lst;
1774}
1775
c829a427
MV
1776/* Return a newly allocated array of char pointers to each of the strings
1777 in args, with a terminating NULL pointer. */
1778
1779char **
1780scm_i_allocate_string_pointers (SCM list)
2a776823 1781#define FUNC_NAME "scm_i_allocate_string_pointers"
af68e5e5 1782{
c829a427
MV
1783 char **result;
1784 int len = scm_ilength (list);
1785 int i;
1786
1787 if (len < 0)
1788 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
1789
2a776823
LC
1790 result = scm_gc_malloc ((len + 1) * sizeof (char *),
1791 "string pointers");
c829a427 1792 result[len] = NULL;
c829a427
MV
1793
1794 /* The list might be have been modified in another thread, so
1795 we check LIST before each access.
1796 */
d2e53ed6 1797 for (i = 0; i < len && scm_is_pair (list); i++)
c829a427 1798 {
2a776823
LC
1799 SCM str;
1800 size_t len;
1801
1802 str = SCM_CAR (list);
1803 len = scm_c_string_length (str);
1804
1805 result[i] = scm_gc_malloc_pointerless (len + 1, "string pointers");
1806 memcpy (result[i], scm_i_string_chars (str), len);
1807 result[i][len] = '\0';
1808
c829a427
MV
1809 list = SCM_CDR (list);
1810 }
1811
c829a427 1812 return result;
af68e5e5 1813}
2a776823 1814#undef FUNC_NAME
24933780 1815
6f14f578
MV
1816void
1817scm_i_get_substring_spec (size_t len,
1818 SCM start, size_t *cstart,
1819 SCM end, size_t *cend)
1820{
1821 if (SCM_UNBNDP (start))
1822 *cstart = 0;
1823 else
1824 *cstart = scm_to_unsigned_integer (start, 0, len);
1825
1826 if (SCM_UNBNDP (end))
1827 *cend = len;
1828 else
1829 *cend = scm_to_unsigned_integer (end, *cstart, len);
1830}
1831
3ee86942
MV
1832#if SCM_ENABLE_DEPRECATED
1833
556d75db
MV
1834/* When these definitions are removed, it becomes reasonable to use
1835 read-only strings for string literals. For that, change the reader
1836 to create string literals with scm_c_substring_read_only instead of
1837 with scm_c_substring_copy.
1838*/
1839
3ee86942 1840int
fe78c51a 1841scm_i_deprecated_stringp (SCM str)
3ee86942
MV
1842{
1843 scm_c_issue_deprecation_warning
1844 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1845
2616f0e0 1846 return scm_is_string (str);
3ee86942
MV
1847}
1848
1849char *
fe78c51a 1850scm_i_deprecated_string_chars (SCM str)
3ee86942
MV
1851{
1852 char *chars;
1853
1854 scm_c_issue_deprecation_warning
1855 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1856
2616f0e0
MV
1857 /* We don't accept shared substrings here since they are not
1858 null-terminated.
1859 */
1860 if (IS_SH_STRING (str))
1861 scm_misc_error (NULL,
1862 "SCM_STRING_CHARS does not work with shared substrings.",
1863 SCM_EOL);
1864
877f06c3 1865 /* We explicitly test for read-only strings to produce a better
556d75db
MV
1866 error message.
1867 */
1868
1869 if (IS_RO_STRING (str))
1870 scm_misc_error (NULL,
1871 "SCM_STRING_CHARS does not work with read-only strings.",
1872 SCM_EOL);
1873
2616f0e0 1874 /* The following is still wrong, of course...
3ee86942 1875 */
9c44cd45 1876 str = scm_i_string_start_writing (str);
3ee86942
MV
1877 chars = scm_i_string_writable_chars (str);
1878 scm_i_string_stop_writing ();
1879 return chars;
1880}
1881
1882size_t
fe78c51a 1883scm_i_deprecated_string_length (SCM str)
3ee86942
MV
1884{
1885 scm_c_issue_deprecation_warning
1886 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1887 return scm_c_string_length (str);
1888}
1889
1890#endif
1891
2a610be5
AW
1892static SCM
1893string_handle_ref (scm_t_array_handle *h, size_t index)
1894{
1895 return scm_c_string_ref (h->array, index);
1896}
1897
1898static void
1899string_handle_set (scm_t_array_handle *h, size_t index, SCM val)
1900{
1901 scm_c_string_set_x (h->array, index, val);
1902}
1903
1904static void
1905string_get_handle (SCM v, scm_t_array_handle *h)
1906{
1907 h->array = v;
1908 h->ndims = 1;
1909 h->dims = &h->dim0;
1910 h->dim0.lbnd = 0;
1911 h->dim0.ubnd = scm_c_string_length (v) - 1;
1912 h->dim0.inc = 1;
1913 h->element_type = SCM_ARRAY_ELEMENT_TYPE_CHAR;
1914 h->elements = h->writable_elements = NULL;
1915}
1916
1917SCM_ARRAY_IMPLEMENTATION (scm_tc7_string, 0x7f & ~2,
1918 string_handle_ref, string_handle_set,
1919 string_get_handle);
f45eccff 1920SCM_VECTOR_IMPLEMENTATION (SCM_ARRAY_ELEMENT_TYPE_CHAR, scm_make_string);
2a610be5 1921
0f2d19dd
JB
1922void
1923scm_init_strings ()
0f2d19dd 1924{
3ee86942 1925 scm_nullstr = scm_i_make_string (0, NULL);
7c33806a 1926
a0599745 1927#include "libguile/strings.x"
0f2d19dd
JB
1928}
1929
89e00824
ML
1930
1931/*
1932 Local Variables:
1933 c-file-style: "gnu"
1934 End:
1935*/