Fix encoding errors with strings returned by string ports
[bpt/guile.git] / libguile / strings.c
CommitLineData
50b1996f 1/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd 20\f
dbb605f5
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
0f2d19dd 24
faf2c9d7 25#include <string.h>
3ee86942 26#include <stdio.h>
9c44cd45
MG
27#include <ctype.h>
28#include <unistr.h>
eca29b02 29#include <uniconv.h>
faf2c9d7 30
889975e5
MG
31#include "striconveh.h"
32
a0599745
MD
33#include "libguile/_scm.h"
34#include "libguile/chars.h"
7c33806a 35#include "libguile/root.h"
a0599745 36#include "libguile/strings.h"
f45eccff 37#include "libguile/generalized-vectors.h"
1afff620 38#include "libguile/deprecation.h"
a0599745 39#include "libguile/validate.h"
c829a427 40#include "libguile/dynwind.h"
1afff620 41
0f2d19dd
JB
42\f
43
44/* {Strings}
45 */
46
3ee86942
MV
47
48/* Stringbufs
49 *
50 * XXX - keeping an accurate refcount during GC seems to be quite
51 * tricky, so we just keep score of whether a stringbuf might be
50b1996f 52 * shared, not whether it definitely is.
3ee86942
MV
53 *
54 * The scheme I (mvo) tried to keep an accurate reference count would
55 * recount all strings that point to a stringbuf during the mark-phase
56 * of the GC. This was done since one cannot access the stringbuf of
57 * a string when that string is freed (in order to decrease the
58 * reference count). The memory of the stringbuf might have been
59 * reused already for something completely different.
60 *
61 * This recounted worked for a small number of threads beating on
62 * cow-strings, but it failed randomly with more than 10 threads, say.
63 * I couldn't figure out what went wrong, so I used the conservative
64 * approach implemented below.
65 *
66 * A stringbuf needs to know its length, but only so that it can be
67 * reported when the stringbuf is freed.
68 *
50b1996f
MG
69 * There are 3 storage strategies for stringbufs: inline, outline, and
70 * wide.
71 *
72 * Inline strings are small 8-bit strings stored within the double
73 * cell itself. Outline strings are larger 8-bit strings with GC
74 * allocated storage. Wide strings are 32-bit strings with allocated
75 * storage.
76 *
77 * There was little value in making wide string inlineable, since
78 * there is only room for three inlined 32-bit characters. Thus wide
79 * stringbufs are never inlined.
3ee86942
MV
80 */
81
82#define STRINGBUF_F_SHARED 0x100
83#define STRINGBUF_F_INLINE 0x200
50b1996f
MG
84#define STRINGBUF_F_WIDE 0x400 /* If true, strings have UCS-4
85 encoding. Otherwise, strings
86 are Latin-1. */
3ee86942
MV
87
88#define STRINGBUF_TAG scm_tc7_stringbuf
89#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
90#define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
9c44cd45 91#define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
3ee86942 92
f59cf998 93#define STRINGBUF_OUTLINE_CHARS(buf) ((unsigned char *) SCM_CELL_WORD_1(buf))
3ee86942 94#define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
f59cf998 95#define STRINGBUF_INLINE_CHARS(buf) ((unsigned char *) SCM_CELL_OBJECT_LOC(buf,1))
3ee86942
MV
96#define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
97
98#define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
99 ? STRINGBUF_INLINE_CHARS (buf) \
100 : STRINGBUF_OUTLINE_CHARS (buf))
50b1996f 101
f59cf998 102#define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *) SCM_CELL_WORD_1(buf))
3ee86942
MV
103#define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
104 ? STRINGBUF_INLINE_LENGTH (buf) \
105 : STRINGBUF_OUTLINE_LENGTH (buf))
106
107#define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
108
109#define SET_STRINGBUF_SHARED(buf) \
110 (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
111
6ce6923b 112#if SCM_STRING_LENGTH_HISTOGRAM
3ee86942
MV
113static size_t lenhist[1001];
114#endif
115
50b1996f
MG
116/* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
117 characters. */
3ee86942
MV
118static SCM
119make_stringbuf (size_t len)
0f2d19dd 120{
3ee86942
MV
121 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
122 scm_i_symbol_chars, all stringbufs are null-terminated. Once
123 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
124 has been changed for scm_i_symbol_chars, this null-termination
125 can be dropped.
126 */
127
6ce6923b 128#if SCM_STRING_LENGTH_HISTOGRAM
3ee86942
MV
129 if (len < 1000)
130 lenhist[len]++;
131 else
132 lenhist[1000]++;
133#endif
0f2d19dd 134
3ee86942
MV
135 if (len <= STRINGBUF_MAX_INLINE_LEN-1)
136 {
137 return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_INLINE | (len << 16),
138 0, 0, 0);
139 }
140 else
141 {
142 char *mem = scm_gc_malloc (len+1, "string");
143 mem[len] = '\0';
144 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) mem,
145 (scm_t_bits) len, (scm_t_bits) 0);
146 }
147}
e53cc817 148
50b1996f
MG
149/* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
150 characters. */
9c44cd45
MG
151static SCM
152make_wide_stringbuf (size_t len)
153{
154 scm_t_wchar *mem;
6ce6923b 155#if SCM_STRING_LENGTH_HISTOGRAM
9c44cd45
MG
156 if (len < 1000)
157 lenhist[len]++;
158 else
159 lenhist[1000]++;
160#endif
161
162 mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
163 mem[len] = 0;
164 return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_WIDE, (scm_t_bits) mem,
165 (scm_t_bits) len, (scm_t_bits) 0);
166}
167
2b829bbb
KR
168/* Return a new stringbuf whose underlying storage consists of the LEN+1
169 octets pointed to by STR (the last octet is zero). */
7f74cf9a 170SCM
fd0a5bbc
HWN
171scm_i_take_stringbufn (char *str, size_t len)
172{
2b829bbb 173 scm_gc_register_collectable_memory (str, len + 1, "stringbuf");
fd0a5bbc
HWN
174
175 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str,
176 (scm_t_bits) len, (scm_t_bits) 0);
177}
178
3ee86942
MV
179SCM
180scm_i_stringbuf_mark (SCM buf)
181{
182 return SCM_BOOL_F;
183}
1bbd0b84 184
3ee86942
MV
185void
186scm_i_stringbuf_free (SCM buf)
0f2d19dd 187{
3ee86942 188 if (!STRINGBUF_INLINE (buf))
9c44cd45
MG
189 {
190 if (!STRINGBUF_WIDE (buf))
191 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf),
192 STRINGBUF_OUTLINE_LENGTH (buf) + 1, "string");
193 else
194 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf),
195 sizeof (scm_t_wchar) * (STRINGBUF_OUTLINE_LENGTH (buf)
196 + 1), "string");
197 }
198
199}
200
50b1996f
MG
201/* Convert a stringbuf containing 8-bit Latin-1-encoded characters to
202 one containing 32-bit UCS-4-encoded characters. */
9c44cd45
MG
203static void
204widen_stringbuf (SCM buf)
205{
206 size_t i, len;
207 scm_t_wchar *mem;
208
209 if (STRINGBUF_WIDE (buf))
210 return;
211
212 if (STRINGBUF_INLINE (buf))
213 {
214 len = STRINGBUF_INLINE_LENGTH (buf);
215
216 mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
217 for (i = 0; i < len; i++)
218 mem[i] =
f59cf998 219 (scm_t_wchar) STRINGBUF_INLINE_CHARS (buf)[i];
9c44cd45
MG
220 mem[len] = 0;
221
222 SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) ^ STRINGBUF_F_INLINE);
223 SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_WIDE);
224 SCM_SET_CELL_WORD_1 (buf, mem);
225 SCM_SET_CELL_WORD_2 (buf, len);
226 }
227 else
228 {
229 len = STRINGBUF_OUTLINE_LENGTH (buf);
230
231 mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
232 for (i = 0; i < len; i++)
233 mem[i] =
f59cf998 234 (scm_t_wchar) STRINGBUF_OUTLINE_CHARS (buf)[i];
9c44cd45
MG
235 mem[len] = 0;
236
237 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf), len + 1, "string");
238
239 SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_WIDE);
240 SCM_SET_CELL_WORD_1 (buf, mem);
241 SCM_SET_CELL_WORD_2 (buf, len);
242 }
3ee86942 243}
bd9e24b3 244
587a3355
MG
245/* Convert a stringbuf of 32-bit UCS-4-encoded characters to one
246 containing 8-bit Latin-1-encoded characters, if possible. */
247static void
248narrow_stringbuf (SCM buf)
249{
250 size_t i, len;
251 scm_t_wchar *wmem;
252 char *mem;
253
254 if (!STRINGBUF_WIDE (buf))
255 return;
256
257 len = STRINGBUF_OUTLINE_LENGTH (buf);
258 i = 0;
259 wmem = STRINGBUF_WIDE_CHARS (buf);
260 while (i < len)
261 if (wmem[i++] > 0xFF)
262 return;
263
264 mem = scm_gc_malloc (sizeof (char) * (len + 1), "string");
265 for (i = 0; i < len; i++)
266 mem[i] = (unsigned char) wmem[i];
267
268 scm_gc_free (wmem, sizeof (scm_t_wchar) * (len + 1), "string");
269
270 SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) ^ STRINGBUF_F_WIDE);
271 SCM_SET_CELL_WORD_1 (buf, mem);
272 SCM_SET_CELL_WORD_2 (buf, len);
273}
274
9de87eea 275scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
bd9e24b3 276
3ee86942
MV
277/* Copy-on-write strings.
278 */
bd9e24b3 279
3ee86942 280#define STRING_TAG scm_tc7_string
bd9e24b3 281
3ee86942
MV
282#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
283#define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
284#define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
bd9e24b3 285
3ee86942
MV
286#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
287#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
288
289#define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
290
ed35de72
MV
291/* Read-only strings.
292 */
293
294#define RO_STRING_TAG (scm_tc7_string + 0x200)
295#define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
296
e1b29f6a
MV
297/* Mutation-sharing substrings
298 */
299
300#define SH_STRING_TAG (scm_tc7_string + 0x100)
301
302#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
303/* START and LENGTH as for STRINGs. */
304
305#define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
306
50b1996f
MG
307/* Create a scheme string with space for LEN 8-bit Latin-1-encoded
308 characters. CHARSP, if not NULL, will be set to location of the
309 char array. */
3ee86942
MV
310SCM
311scm_i_make_string (size_t len, char **charsp)
312{
313 SCM buf = make_stringbuf (len);
314 SCM res;
315 if (charsp)
f59cf998 316 *charsp = (char *) STRINGBUF_CHARS (buf);
3ee86942
MV
317 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
318 (scm_t_bits)0, (scm_t_bits) len);
319 return res;
0f2d19dd
JB
320}
321
50b1996f
MG
322/* Create a scheme string with space for LEN 32-bit UCS-4-encoded
323 characters. CHARSP, if not NULL, will be set to location of the
324 character array. */
9c44cd45 325SCM
50b1996f 326scm_i_make_wide_string (size_t len, scm_t_wchar **charsp)
9c44cd45
MG
327{
328 SCM buf = make_wide_stringbuf (len);
329 SCM res;
330 if (charsp)
331 *charsp = STRINGBUF_WIDE_CHARS (buf);
332 res = scm_double_cell (STRING_TAG, SCM_UNPACK (buf),
333 (scm_t_bits) 0, (scm_t_bits) len);
334 return res;
335}
336
3ee86942
MV
337static void
338validate_substring_args (SCM str, size_t start, size_t end)
339{
340 if (!IS_STRING (str))
341 scm_wrong_type_arg_msg (NULL, 0, str, "string");
342 if (start > STRING_LENGTH (str))
343 scm_out_of_range (NULL, scm_from_size_t (start));
344 if (end > STRING_LENGTH (str) || end < start)
345 scm_out_of_range (NULL, scm_from_size_t (end));
346}
0f2d19dd 347
e1b29f6a
MV
348static inline void
349get_str_buf_start (SCM *str, SCM *buf, size_t *start)
350{
351 *start = STRING_START (*str);
352 if (IS_SH_STRING (*str))
353 {
354 *str = SH_STRING_STRING (*str);
355 *start += STRING_START (*str);
356 }
357 *buf = STRING_STRINGBUF (*str);
358}
359
3ee86942
MV
360SCM
361scm_i_substring (SCM str, size_t start, size_t end)
0f2d19dd 362{
e1b29f6a
MV
363 SCM buf;
364 size_t str_start;
365 get_str_buf_start (&str, &buf, &str_start);
9de87eea 366 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 367 SET_STRINGBUF_SHARED (buf);
9de87eea 368 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 369 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
e1b29f6a
MV
370 (scm_t_bits)str_start + start,
371 (scm_t_bits) end - start);
0f2d19dd
JB
372}
373
ed35de72
MV
374SCM
375scm_i_substring_read_only (SCM str, size_t start, size_t end)
376{
45a9f430
LC
377 SCM buf;
378 size_t str_start;
379 get_str_buf_start (&str, &buf, &str_start);
380 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
381 SET_STRINGBUF_SHARED (buf);
382 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
383 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
384 (scm_t_bits)str_start + start,
385 (scm_t_bits) end - start);
ed35de72
MV
386}
387
3ee86942
MV
388SCM
389scm_i_substring_copy (SCM str, size_t start, size_t end)
390{
391 size_t len = end - start;
edea856c 392 SCM buf, my_buf;
e1b29f6a
MV
393 size_t str_start;
394 get_str_buf_start (&str, &buf, &str_start);
9c44cd45
MG
395 if (scm_i_is_narrow_string (str))
396 {
397 my_buf = make_stringbuf (len);
398 memcpy (STRINGBUF_CHARS (my_buf),
399 STRINGBUF_CHARS (buf) + str_start + start, len);
400 }
401 else
402 {
403 my_buf = make_wide_stringbuf (len);
404 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
405 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
406 + start), len);
407 /* Even though this string is wide, the substring may be narrow.
50b1996f 408 Consider adding code to narrow the string. */
9c44cd45 409 }
3ee86942 410 scm_remember_upto_here_1 (buf);
9c44cd45
MG
411 return scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
412 (scm_t_bits) 0, (scm_t_bits) len);
3ee86942 413}
0f2d19dd 414
e1b29f6a
MV
415SCM
416scm_i_substring_shared (SCM str, size_t start, size_t end)
417{
418 if (start == 0 && end == STRING_LENGTH (str))
419 return str;
420 else
421 {
422 size_t len = end - start;
423 if (IS_SH_STRING (str))
424 {
425 start += STRING_START (str);
426 str = SH_STRING_STRING (str);
427 }
428 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
429 (scm_t_bits)start, (scm_t_bits) len);
430 }
431}
432
3ee86942
MV
433SCM
434scm_c_substring (SCM str, size_t start, size_t end)
435{
436 validate_substring_args (str, start, end);
437 return scm_i_substring (str, start, end);
438}
ee149d03 439
ed35de72
MV
440SCM
441scm_c_substring_read_only (SCM str, size_t start, size_t end)
442{
443 validate_substring_args (str, start, end);
444 return scm_i_substring_read_only (str, start, end);
445}
446
0f2d19dd 447SCM
3ee86942 448scm_c_substring_copy (SCM str, size_t start, size_t end)
0f2d19dd 449{
3ee86942
MV
450 validate_substring_args (str, start, end);
451 return scm_i_substring_copy (str, start, end);
452}
453
3ee86942
MV
454SCM
455scm_c_substring_shared (SCM str, size_t start, size_t end)
456{
457 validate_substring_args (str, start, end);
458 return scm_i_substring_shared (str, start, end);
459}
0f2d19dd 460
ee149d03 461SCM
3ee86942 462scm_i_string_mark (SCM str)
ee149d03 463{
3ee86942
MV
464 if (IS_SH_STRING (str))
465 return SH_STRING_STRING (str);
466 else
467 return STRING_STRINGBUF (str);
ee149d03
JB
468}
469
3ee86942
MV
470void
471scm_i_string_free (SCM str)
472{
473}
36284627 474
3ee86942
MV
475/* Internal accessors
476 */
477
50b1996f
MG
478/* Returns the number of characters in STR. This may be different
479 than the memory size of the string storage. */
3ee86942
MV
480size_t
481scm_i_string_length (SCM str)
0f2d19dd 482{
3ee86942 483 return STRING_LENGTH (str);
0f2d19dd
JB
484}
485
50b1996f
MG
486/* True if the string is 'narrow', meaning it has a 8-bit Latin-1
487 encoding. False if it is 'wide', having a 32-bit UCS-4
488 encoding. */
9c44cd45
MG
489int
490scm_i_is_narrow_string (SCM str)
491{
492 return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
493}
494
587a3355
MG
495/* Try to coerce a string to be narrow. It if is narrow already, do
496 nothing. If it is wide, shrink it to narrow if none of its
497 characters are above 0xFF. Return true if the string is narrow or
498 was made to be narrow. */
499int
500scm_i_try_narrow_string (SCM str)
501{
502 narrow_stringbuf (STRING_STRINGBUF (str));
503
504 return scm_i_is_narrow_string (str);
505}
506
50b1996f
MG
507/* Returns a pointer to the 8-bit Latin-1 encoded character array of
508 STR. */
3ee86942
MV
509const char *
510scm_i_string_chars (SCM str)
511{
512 SCM buf;
e1b29f6a
MV
513 size_t start;
514 get_str_buf_start (&str, &buf, &start);
9c44cd45 515 if (scm_i_is_narrow_string (str))
f59cf998 516 return (const char *) STRINGBUF_CHARS (buf) + start;
9c44cd45
MG
517 else
518 scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s",
519 scm_list_1 (str));
520 return NULL;
3ee86942 521}
b00418df 522
50b1996f
MG
523/* Returns a pointer to the 32-bit UCS-4 encoded character array of
524 STR. */
9c44cd45
MG
525const scm_t_wchar *
526scm_i_string_wide_chars (SCM str)
527{
528 SCM buf;
529 size_t start;
530
531 get_str_buf_start (&str, &buf, &start);
532 if (!scm_i_is_narrow_string (str))
f59cf998 533 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf) + start;
9c44cd45
MG
534 else
535 scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
536 scm_list_1 (str));
537}
538
539/* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
540 a new string buffer, so that it can be modified without modifying
50b1996f
MG
541 other strings. Also, lock the string mutex. Later, one must call
542 scm_i_string_stop_writing to unlock the mutex. */
9c44cd45
MG
543SCM
544scm_i_string_start_writing (SCM orig_str)
b00418df 545{
ed35de72 546 SCM buf, str = orig_str;
e1b29f6a 547 size_t start;
ed35de72 548
e1b29f6a 549 get_str_buf_start (&str, &buf, &start);
ed35de72
MV
550 if (IS_RO_STRING (str))
551 scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
552
9de87eea 553 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942
MV
554 if (STRINGBUF_SHARED (buf))
555 {
9c44cd45 556 /* Clone the stringbuf. */
3ee86942
MV
557 size_t len = STRING_LENGTH (str);
558 SCM new_buf;
559
9de87eea 560 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 561
9c44cd45
MG
562 if (scm_i_is_narrow_string (str))
563 {
564 new_buf = make_stringbuf (len);
565 memcpy (STRINGBUF_CHARS (new_buf),
566 STRINGBUF_CHARS (buf) + STRING_START (str), len);
567
568 }
569 else
570 {
571 new_buf = make_wide_stringbuf (len);
572 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
573 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf)
574 + STRING_START (str)), len);
575 }
3ee86942
MV
576 scm_i_thread_put_to_sleep ();
577 SET_STRING_STRINGBUF (str, new_buf);
578 start -= STRING_START (str);
579 SET_STRING_START (str, 0);
580 scm_i_thread_wake_up ();
581
582 buf = new_buf;
583
9de87eea 584 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 585 }
9c44cd45
MG
586 return orig_str;
587}
588
50b1996f 589/* Return a pointer to the 8-bit Latin-1 chars of a string. */
9c44cd45
MG
590char *
591scm_i_string_writable_chars (SCM str)
592{
593 SCM buf;
594 size_t start;
3ee86942 595
9c44cd45
MG
596 get_str_buf_start (&str, &buf, &start);
597 if (scm_i_is_narrow_string (str))
f59cf998 598 return (char *) STRINGBUF_CHARS (buf) + start;
9c44cd45
MG
599 else
600 scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s",
601 scm_list_1 (str));
602 return NULL;
603}
604
50b1996f 605/* Return a pointer to the UCS-4 codepoints of a string. */
9c44cd45
MG
606static scm_t_wchar *
607scm_i_string_writable_wide_chars (SCM str)
608{
609 SCM buf;
610 size_t start;
611
612 get_str_buf_start (&str, &buf, &start);
613 if (!scm_i_is_narrow_string (str))
614 return STRINGBUF_WIDE_CHARS (buf) + start;
615 else
1c7b216f 616 scm_misc_error (NULL, "Invalid write access of chars of narrow string: ~s",
9c44cd45 617 scm_list_1 (str));
b00418df
DH
618}
619
50b1996f
MG
620/* Unlock the string mutex that was locked when
621 scm_i_string_start_writing was called. */
3ee86942
MV
622void
623scm_i_string_stop_writing (void)
624{
9de87eea 625 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 626}
b00418df 627
50b1996f 628/* Return the Xth character of STR as a UCS-4 codepoint. */
9c44cd45
MG
629scm_t_wchar
630scm_i_string_ref (SCM str, size_t x)
631{
632 if (scm_i_is_narrow_string (str))
633 return (scm_t_wchar) (unsigned char) (scm_i_string_chars (str)[x]);
634 else
635 return scm_i_string_wide_chars (str)[x];
636}
637
889975e5
MG
638/* Returns index+1 of the first char in STR that matches C, or
639 0 if the char is not found. */
640int
641scm_i_string_contains_char (SCM str, char ch)
642{
643 size_t i;
644 size_t len = scm_i_string_length (str);
645
646 i = 0;
647 if (scm_i_is_narrow_string (str))
648 {
649 while (i < len)
650 {
651 if (scm_i_string_chars (str)[i] == ch)
652 return i+1;
653 i++;
654 }
655 }
656 else
657 {
658 while (i < len)
659 {
660 if (scm_i_string_wide_chars (str)[i]
661 == (unsigned char) ch)
662 return i+1;
663 i++;
664 }
665 }
666 return 0;
667}
668
3f47e526
MG
669int
670scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
671{
672 if (scm_i_is_narrow_string (sstr))
673 {
674 const char *a = scm_i_string_chars (sstr) + start_x;
675 const char *b = cstr;
676 return strncmp (a, b, strlen(b));
677 }
678 else
679 {
680 size_t i;
681 const scm_t_wchar *a = scm_i_string_wide_chars (sstr) + start_x;
682 const char *b = cstr;
683 for (i = 0; i < strlen (b); i++)
684 {
685 if (a[i] != (unsigned char) b[i])
686 return 1;
687 }
688 }
689 return 0;
690}
691
50b1996f 692/* Set the Pth character of STR to UCS-4 codepoint CHR. */
9c44cd45
MG
693void
694scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
695{
696 if (chr > 0xFF && scm_i_is_narrow_string (str))
697 widen_stringbuf (STRING_STRINGBUF (str));
698
699 if (scm_i_is_narrow_string (str))
700 {
701 char *dst = scm_i_string_writable_chars (str);
587a3355 702 dst[p] = chr;
9c44cd45
MG
703 }
704 else
705 {
706 scm_t_wchar *dst = scm_i_string_writable_wide_chars (str);
707 dst[p] = chr;
708 }
709}
710
3ee86942 711/* Symbols.
587a3355 712
3ee86942
MV
713 Basic symbol creation and accessing is done here, the rest is in
714 symbols.[hc]. This has been done to keep stringbufs and the
715 internals of strings and string-like objects confined to this file.
716*/
717
718#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
719
720SCM
6869328b
MV
721scm_i_make_symbol (SCM name, scm_t_bits flags,
722 unsigned long hash, SCM props)
3ee86942
MV
723{
724 SCM buf;
725 size_t start = STRING_START (name);
726 size_t length = STRING_LENGTH (name);
727
728 if (IS_SH_STRING (name))
729 {
730 name = SH_STRING_STRING (name);
731 start += STRING_START (name);
732 }
733 buf = SYMBOL_STRINGBUF (name);
734
735 if (start == 0 && length == STRINGBUF_LENGTH (buf))
736 {
737 /* reuse buf. */
9de87eea 738 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 739 SET_STRINGBUF_SHARED (buf);
9de87eea 740 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942
MV
741 }
742 else
743 {
744 /* make new buf. */
9c44cd45
MG
745 if (scm_i_is_narrow_string (name))
746 {
747 SCM new_buf = make_stringbuf (length);
748 memcpy (STRINGBUF_CHARS (new_buf),
749 STRINGBUF_CHARS (buf) + start, length);
750 buf = new_buf;
751 }
752 else
753 {
754 SCM new_buf = make_wide_stringbuf (length);
755 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
756 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
757 length);
758 buf = new_buf;
759 }
3ee86942 760 }
6869328b 761 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
3ee86942
MV
762 (scm_t_bits) hash, SCM_UNPACK (props));
763}
764
fd0a5bbc
HWN
765SCM
766scm_i_c_make_symbol (const char *name, size_t len,
767 scm_t_bits flags, unsigned long hash, SCM props)
768{
769 SCM buf = make_stringbuf (len);
770 memcpy (STRINGBUF_CHARS (buf), name, len);
771
772 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
773 (scm_t_bits) hash, SCM_UNPACK (props));
774}
775
776/* Return a new symbol that uses the LEN bytes pointed to by NAME as its
777 underlying storage. */
778SCM
779scm_i_c_take_symbol (char *name, size_t len,
780 scm_t_bits flags, unsigned long hash, SCM props)
781{
782 SCM buf = scm_i_take_stringbufn (name, len);
783
784 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
785 (scm_t_bits) hash, SCM_UNPACK (props));
786}
787
50b1996f
MG
788/* Returns the number of characters in SYM. This may be different
789 from the memory size of SYM. */
3ee86942
MV
790size_t
791scm_i_symbol_length (SCM sym)
0f2d19dd 792{
3ee86942 793 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
0f2d19dd
JB
794}
795
071bb6a8
LC
796size_t
797scm_c_symbol_length (SCM sym)
798#define FUNC_NAME "scm_c_symbol_length"
799{
800 SCM_VALIDATE_SYMBOL (1, sym);
801
802 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
803}
804#undef FUNC_NAME
805
50b1996f
MG
806/* True if the name of SYM is stored as a Latin-1 encoded string.
807 False if it is stored as a 32-bit UCS-4-encoded string. */
9c44cd45
MG
808int
809scm_i_is_narrow_symbol (SCM sym)
810{
811 SCM buf;
812
813 buf = SYMBOL_STRINGBUF (sym);
814 return !STRINGBUF_WIDE (buf);
815}
816
50b1996f
MG
817/* Returns a pointer to the 8-bit Latin-1 encoded character array that
818 contains the name of SYM. */
3ee86942
MV
819const char *
820scm_i_symbol_chars (SCM sym)
821{
9c44cd45
MG
822 SCM buf;
823
824 buf = SYMBOL_STRINGBUF (sym);
825 if (!STRINGBUF_WIDE (buf))
f59cf998 826 return (const char *) STRINGBUF_CHARS (buf);
9c44cd45
MG
827 else
828 scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S",
829 scm_list_1 (sym));
830}
831
50b1996f
MG
832/* Return a pointer to the 32-bit UCS-4-encoded character array of a
833 symbol's name. */
9c44cd45
MG
834const scm_t_wchar *
835scm_i_symbol_wide_chars (SCM sym)
836{
837 SCM buf;
838
839 buf = SYMBOL_STRINGBUF (sym);
840 if (STRINGBUF_WIDE (buf))
f59cf998 841 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf);
9c44cd45
MG
842 else
843 scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S",
844 scm_list_1 (sym));
3ee86942 845}
1cc91f1b 846
3ee86942
MV
847SCM
848scm_i_symbol_mark (SCM sym)
0f2d19dd 849{
3ee86942
MV
850 scm_gc_mark (SYMBOL_STRINGBUF (sym));
851 return SCM_CELL_OBJECT_3 (sym);
0f2d19dd
JB
852}
853
3ee86942
MV
854void
855scm_i_symbol_free (SCM sym)
856{
857}
0f2d19dd 858
be54b15d 859SCM
3ee86942 860scm_i_symbol_substring (SCM sym, size_t start, size_t end)
be54b15d 861{
3ee86942 862 SCM buf = SYMBOL_STRINGBUF (sym);
9de87eea 863 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 864 SET_STRINGBUF_SHARED (buf);
9de87eea 865 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
fd2b17b9 866 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
3ee86942
MV
867 (scm_t_bits)start, (scm_t_bits) end - start);
868}
be54b15d 869
50b1996f 870/* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */
9c44cd45
MG
871scm_t_wchar
872scm_i_symbol_ref (SCM sym, size_t x)
873{
874 if (scm_i_is_narrow_symbol (sym))
875 return (scm_t_wchar) (unsigned char) (scm_i_symbol_chars (sym)[x]);
876 else
877 return scm_i_symbol_wide_chars (sym)[x];
878}
879
3ee86942
MV
880/* Debugging
881 */
be54b15d 882
6ce6923b
MG
883SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
884 "Returns an association list containing debugging information\n"
885 "for @var{str}. The association list has the following entries."
886 "@table @code\n"
887 "@item string\n"
888 "The string itself.\n"
889 "@item start\n"
890 "The start index of the string into its stringbuf\n"
891 "@item length\n"
892 "The length of the string\n"
893 "@item shared\n"
894 "If this string is a substring, it returns its parent string.\n"
895 "Otherwise, it returns @code{#f}\n"
88ed5759
MG
896 "@item read-only\n"
897 "@code{#t} if the string is read-only\n"
6ce6923b
MG
898 "@item stringbuf-chars\n"
899 "A new string containing this string's stringbuf's characters\n"
900 "@item stringbuf-length\n"
901 "The number of characters in this stringbuf\n"
902 "@item stringbuf-shared\n"
903 "@code{#t} if this stringbuf is shared\n"
904 "@item stringbuf-inline\n"
905 "@code{#t} if this stringbuf's characters are stored in the\n"
906 "cell itself, or @code{#f} if they were allocated in memory\n"
907 "@item stringbuf-wide\n"
908 "@code{#t} if this stringbuf's characters are stored in a\n"
909 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
910 "buffer\n"
911 "@end table")
3ee86942
MV
912#define FUNC_NAME s_scm_sys_string_dump
913{
6ce6923b
MG
914 SCM e1, e2, e3, e4, e5, e6, e7, e8, e9, e10;
915 SCM buf;
3ee86942 916 SCM_VALIDATE_STRING (1, str);
6ce6923b
MG
917
918 /* String info */
919 e1 = scm_cons (scm_from_locale_symbol ("string"),
920 str);
921 e2 = scm_cons (scm_from_locale_symbol ("start"),
922 scm_from_size_t (STRING_START (str)));
923 e3 = scm_cons (scm_from_locale_symbol ("length"),
924 scm_from_size_t (STRING_LENGTH (str)));
925
3ee86942
MV
926 if (IS_SH_STRING (str))
927 {
6ce6923b
MG
928 e4 = scm_cons (scm_from_locale_symbol ("shared"),
929 SH_STRING_STRING (str));
930 buf = STRING_STRINGBUF (SH_STRING_STRING (str));
3ee86942
MV
931 }
932 else
933 {
6ce6923b
MG
934 e4 = scm_cons (scm_from_locale_symbol ("shared"),
935 SCM_BOOL_F);
936 buf = STRING_STRINGBUF (str);
937 }
9c44cd45 938
88ed5759
MG
939 if (IS_RO_STRING (str))
940 e5 = scm_cons (scm_from_locale_symbol ("read-only"),
941 SCM_BOOL_T);
942 else
943 e5 = scm_cons (scm_from_locale_symbol ("read-only"),
944 SCM_BOOL_F);
587a3355 945
6ce6923b 946 /* Stringbuf info */
6ce6923b
MG
947 if (!STRINGBUF_WIDE (buf))
948 {
949 size_t len = STRINGBUF_LENGTH (buf);
950 char *cbuf;
951 SCM sbc = scm_i_make_string (len, &cbuf);
952 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
953 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
954 sbc);
3ee86942 955 }
6ce6923b
MG
956 else
957 {
958 size_t len = STRINGBUF_LENGTH (buf);
959 scm_t_wchar *cbuf;
960 SCM sbc = scm_i_make_wide_string (len, &cbuf);
961 u32_cpy ((scm_t_uint32 *) cbuf,
962 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
963 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
964 sbc);
965 }
966 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-length"),
967 scm_from_size_t (STRINGBUF_LENGTH (buf)));
968 if (STRINGBUF_SHARED (buf))
969 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
970 SCM_BOOL_T);
971 else
972 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
973 SCM_BOOL_F);
974 if (STRINGBUF_INLINE (buf))
975 e9 = scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
976 SCM_BOOL_T);
977 else
978 e9 = scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
979 SCM_BOOL_F);
980 if (STRINGBUF_WIDE (buf))
981 e10 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
982 SCM_BOOL_T);
983 else
984 e10 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
985 SCM_BOOL_F);
986
987 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, SCM_UNDEFINED);
3ee86942
MV
988}
989#undef FUNC_NAME
990
6ce6923b
MG
991SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
992 "Returns an association list containing debugging information\n"
993 "for @var{sym}. The association list has the following entries."
994 "@table @code\n"
995 "@item symbol\n"
996 "The symbol itself\n"
997 "@item hash\n"
998 "Its hash value\n"
88ed5759
MG
999 "@item interned\n"
1000 "@code{#t} if it is an interned symbol\n"
6ce6923b
MG
1001 "@item stringbuf-chars\n"
1002 "A new string containing this symbols's stringbuf's characters\n"
1003 "@item stringbuf-length\n"
1004 "The number of characters in this stringbuf\n"
1005 "@item stringbuf-shared\n"
1006 "@code{#t} if this stringbuf is shared\n"
1007 "@item stringbuf-inline\n"
1008 "@code{#t} if this stringbuf's characters are stored in the\n"
1009 "cell itself, or @code{#f} if they were allocated in memory\n"
1010 "@item stringbuf-wide\n"
1011 "@code{#t} if this stringbuf's characters are stored in a\n"
1012 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
1013 "buffer\n"
1014 "@end table")
3ee86942
MV
1015#define FUNC_NAME s_scm_sys_symbol_dump
1016{
6ce6923b
MG
1017 SCM e1, e2, e3, e4, e5, e6, e7, e8;
1018 SCM buf;
3ee86942 1019 SCM_VALIDATE_SYMBOL (1, sym);
6ce6923b
MG
1020 e1 = scm_cons (scm_from_locale_symbol ("symbol"),
1021 sym);
1022 e2 = scm_cons (scm_from_locale_symbol ("hash"),
1023 scm_from_ulong (scm_i_symbol_hash (sym)));
88ed5759
MG
1024 e3 = scm_cons (scm_from_locale_symbol ("interned"),
1025 scm_symbol_interned_p (sym));
6ce6923b
MG
1026 buf = SYMBOL_STRINGBUF (sym);
1027
1028 /* Stringbuf info */
6ce6923b
MG
1029 if (!STRINGBUF_WIDE (buf))
1030 {
1031 size_t len = STRINGBUF_LENGTH (buf);
1032 char *cbuf;
1033 SCM sbc = scm_i_make_string (len, &cbuf);
1034 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
1035 e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
1036 sbc);
1037 }
9c44cd45 1038 else
6ce6923b
MG
1039 {
1040 size_t len = STRINGBUF_LENGTH (buf);
1041 scm_t_wchar *cbuf;
1042 SCM sbc = scm_i_make_wide_string (len, &cbuf);
1043 u32_cpy ((scm_t_uint32 *) cbuf,
1044 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
1045 e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
1046 sbc);
1047 }
1048 e5 = scm_cons (scm_from_locale_symbol ("stringbuf-length"),
1049 scm_from_size_t (STRINGBUF_LENGTH (buf)));
1050 if (STRINGBUF_SHARED (buf))
1051 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
1052 SCM_BOOL_T);
1053 else
1054 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
1055 SCM_BOOL_F);
1056 if (STRINGBUF_INLINE (buf))
1057 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
1058 SCM_BOOL_T);
1059 else
1060 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
1061 SCM_BOOL_F);
1062 if (STRINGBUF_WIDE (buf))
1063 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
1064 SCM_BOOL_T);
1065 else
1066 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
1067 SCM_BOOL_F);
1068 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, SCM_UNDEFINED);
1069
3ee86942
MV
1070}
1071#undef FUNC_NAME
1072
6ce6923b
MG
1073#if SCM_STRING_LENGTH_HISTOGRAM
1074
9c44cd45 1075SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, (void), "")
e1b29f6a 1076#define FUNC_NAME s_scm_sys_stringbuf_hist
3ee86942
MV
1077{
1078 int i;
1079 for (i = 0; i < 1000; i++)
1080 if (lenhist[i])
1081 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
1082 fprintf (stderr, ">999: %u\n", lenhist[1000]);
1083 return SCM_UNSPECIFIED;
be54b15d
DH
1084}
1085#undef FUNC_NAME
1086
3ee86942
MV
1087#endif
1088
1089\f
1090
1091SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
1092 (SCM obj),
1093 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
1094#define FUNC_NAME s_scm_string_p
1095{
1096 return scm_from_bool (IS_STRING (obj));
1097}
1098#undef FUNC_NAME
1099
1100
1101SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
1102
1103SCM_DEFINE (scm_string, "string", 0, 0, 1,
1104 (SCM chrs),
1105 "@deffnx {Scheme Procedure} list->string chrs\n"
1106 "Return a newly allocated string composed of the arguments,\n"
1107 "@var{chrs}.")
1108#define FUNC_NAME s_scm_string
1109{
9aa27c1a 1110 SCM result = SCM_BOOL_F;
9c44cd45 1111 SCM rest;
3ee86942 1112 size_t len;
9c44cd45
MG
1113 size_t p = 0;
1114 long i;
9aa27c1a 1115 int wide = 0;
3ee86942 1116
9c44cd45
MG
1117 /* Verify that this is a list of chars. */
1118 i = scm_ilength (chrs);
3c7cf7f5
MG
1119 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
1120
9c44cd45
MG
1121 len = (size_t) i;
1122 rest = chrs;
3ee86942 1123
9c44cd45 1124 while (len > 0 && scm_is_pair (rest))
3ee86942 1125 {
9c44cd45 1126 SCM elt = SCM_CAR (rest);
3ee86942 1127 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
9aa27c1a
MG
1128 if (SCM_CHAR (elt) > 0xFF)
1129 wide = 1;
9c44cd45
MG
1130 rest = SCM_CDR (rest);
1131 len--;
1132 scm_remember_upto_here_1 (elt);
1133 }
1134
1135 /* Construct a string containing this list of chars. */
1136 len = (size_t) i;
1137 rest = chrs;
1138
9aa27c1a 1139 if (wide == 0)
9c44cd45 1140 {
9aa27c1a
MG
1141 result = scm_i_make_string (len, NULL);
1142 result = scm_i_string_start_writing (result);
1143 char *buf = scm_i_string_writable_chars (result);
1144 while (len > 0 && scm_is_pair (rest))
1145 {
1146 SCM elt = SCM_CAR (rest);
1147 buf[p] = (unsigned char) SCM_CHAR (elt);
1148 p++;
1149 rest = SCM_CDR (rest);
1150 len--;
1151 scm_remember_upto_here_1 (elt);
1152 }
1153 }
1154 else
1155 {
1156 result = scm_i_make_wide_string (len, NULL);
1157 result = scm_i_string_start_writing (result);
1158 scm_t_wchar *buf = scm_i_string_writable_wide_chars (result);
1159 while (len > 0 && scm_is_pair (rest))
1160 {
1161 SCM elt = SCM_CAR (rest);
1162 buf[p] = SCM_CHAR (elt);
1163 p++;
1164 rest = SCM_CDR (rest);
1165 len--;
1166 scm_remember_upto_here_1 (elt);
1167 }
3ee86942 1168 }
9c44cd45
MG
1169 scm_i_string_stop_writing ();
1170
3ee86942
MV
1171 if (len > 0)
1172 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
9c44cd45 1173 if (!scm_is_null (rest))
3ee86942
MV
1174 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
1175
1176 return result;
1177}
1178#undef FUNC_NAME
be54b15d 1179
3b3b36dd 1180SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
6fa73e72 1181 (SCM k, SCM chr),
0d26a824
MG
1182 "Return a newly allocated string of\n"
1183 "length @var{k}. If @var{chr} is given, then all elements of\n"
1184 "the string are initialized to @var{chr}, otherwise the contents\n"
9401323e 1185 "of the @var{string} are unspecified.")
1bbd0b84 1186#define FUNC_NAME s_scm_make_string
0f2d19dd 1187{
3ee86942
MV
1188 return scm_c_make_string (scm_to_size_t (k), chr);
1189}
1190#undef FUNC_NAME
1191
1192SCM
1193scm_c_make_string (size_t len, SCM chr)
1194#define FUNC_NAME NULL
1195{
9c44cd45
MG
1196 size_t p;
1197 SCM res = scm_i_make_string (len, NULL);
cb0d8be2 1198
e11e83f3
MV
1199 if (!SCM_UNBNDP (chr))
1200 {
3ee86942 1201 SCM_VALIDATE_CHAR (0, chr);
9c44cd45
MG
1202 res = scm_i_string_start_writing (res);
1203 for (p = 0; p < len; p++)
1204 scm_i_string_set_x (res, p, SCM_CHAR (chr));
1205 scm_i_string_stop_writing ();
0f2d19dd 1206 }
e11e83f3
MV
1207
1208 return res;
0f2d19dd 1209}
1bbd0b84 1210#undef FUNC_NAME
0f2d19dd 1211
3b3b36dd 1212SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
0d26a824
MG
1213 (SCM string),
1214 "Return the number of characters in @var{string}.")
1bbd0b84 1215#define FUNC_NAME s_scm_string_length
0f2d19dd 1216{
d1ca2c64 1217 SCM_VALIDATE_STRING (1, string);
3ee86942 1218 return scm_from_size_t (STRING_LENGTH (string));
0f2d19dd 1219}
1bbd0b84 1220#undef FUNC_NAME
0f2d19dd 1221
f8ba2bb9 1222SCM_DEFINE (scm_string_bytes_per_char, "string-bytes-per-char", 1, 0, 0,
9c44cd45
MG
1223 (SCM string),
1224 "Return the bytes used to represent a character in @var{string}."
1225 "This will return 1 or 4.")
f8ba2bb9 1226#define FUNC_NAME s_scm_string_bytes_per_char
9c44cd45
MG
1227{
1228 SCM_VALIDATE_STRING (1, string);
1229 if (!scm_i_is_narrow_string (string))
1230 return scm_from_int (4);
1231
1232 return scm_from_int (1);
1233}
1234#undef FUNC_NAME
1235
3ee86942
MV
1236size_t
1237scm_c_string_length (SCM string)
1238{
1239 if (!IS_STRING (string))
1240 scm_wrong_type_arg_msg (NULL, 0, string, "string");
1241 return STRING_LENGTH (string);
1242}
1243
bd9e24b3 1244SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
6fa73e72 1245 (SCM str, SCM k),
9c44cd45
MG
1246 "Return character @var{k} of @var{str} using zero-origin\n"
1247 "indexing. @var{k} must be a valid index of @var{str}.")
1bbd0b84 1248#define FUNC_NAME s_scm_string_ref
0f2d19dd 1249{
3ae3166b 1250 size_t len;
a55c2b68 1251 unsigned long idx;
bd9e24b3 1252
d1ca2c64 1253 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
1254
1255 len = scm_i_string_length (str);
1256 if (SCM_LIKELY (len > 0))
1257 idx = scm_to_unsigned_integer (k, 0, len - 1);
1258 else
1259 scm_out_of_range (NULL, k);
1260
9c44cd45
MG
1261 if (scm_i_is_narrow_string (str))
1262 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
1263 else
1264 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[idx]);
0f2d19dd 1265}
1bbd0b84 1266#undef FUNC_NAME
0f2d19dd 1267
3ee86942
MV
1268SCM
1269scm_c_string_ref (SCM str, size_t p)
1270{
1271 if (p >= scm_i_string_length (str))
1272 scm_out_of_range (NULL, scm_from_size_t (p));
9c44cd45
MG
1273 if (scm_i_is_narrow_string (str))
1274 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
1275 else
1276 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[p]);
1277
3ee86942 1278}
f0942910 1279
3b3b36dd 1280SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
6fa73e72 1281 (SCM str, SCM k, SCM chr),
9c44cd45
MG
1282 "Store @var{chr} in element @var{k} of @var{str} and return\n"
1283 "an unspecified value. @var{k} must be a valid index of\n"
1284 "@var{str}.")
1bbd0b84 1285#define FUNC_NAME s_scm_string_set_x
0f2d19dd 1286{
3ae3166b 1287 size_t len;
a55c2b68
MV
1288 unsigned long idx;
1289
f0942910 1290 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
1291
1292 len = scm_i_string_length (str);
1293 if (SCM_LIKELY (len > 0))
1294 idx = scm_to_unsigned_integer (k, 0, len - 1);
1295 else
1296 scm_out_of_range (NULL, k);
1297
34d19ef6 1298 SCM_VALIDATE_CHAR (3, chr);
9c44cd45
MG
1299 str = scm_i_string_start_writing (str);
1300 scm_i_string_set_x (str, idx, SCM_CHAR (chr));
1301 scm_i_string_stop_writing ();
1302
0f2d19dd
JB
1303 return SCM_UNSPECIFIED;
1304}
1bbd0b84 1305#undef FUNC_NAME
0f2d19dd 1306
3ee86942
MV
1307void
1308scm_c_string_set_x (SCM str, size_t p, SCM chr)
1309{
1310 if (p >= scm_i_string_length (str))
1311 scm_out_of_range (NULL, scm_from_size_t (p));
9c44cd45
MG
1312 str = scm_i_string_start_writing (str);
1313 scm_i_string_set_x (str, p, SCM_CHAR (chr));
1314 scm_i_string_stop_writing ();
3ee86942 1315}
0f2d19dd 1316
3b3b36dd 1317SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
0d26a824
MG
1318 (SCM str, SCM start, SCM end),
1319 "Return a newly allocated string formed from the characters\n"
1320 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1321 "ending with index @var{end} (exclusive).\n"
1322 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1323 "exact integers satisfying:\n\n"
1324 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1bbd0b84 1325#define FUNC_NAME s_scm_substring
0f2d19dd 1326{
3ee86942 1327 size_t len, from, to;
685c0d71 1328
d1ca2c64 1329 SCM_VALIDATE_STRING (1, str);
3ee86942
MV
1330 len = scm_i_string_length (str);
1331 from = scm_to_unsigned_integer (start, 0, len);
a55c2b68 1332 if (SCM_UNBNDP (end))
3ee86942 1333 to = len;
a55c2b68 1334 else
3ee86942
MV
1335 to = scm_to_unsigned_integer (end, from, len);
1336 return scm_i_substring (str, from, to);
0f2d19dd 1337}
1bbd0b84 1338#undef FUNC_NAME
0f2d19dd 1339
ed35de72
MV
1340SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
1341 (SCM str, SCM start, SCM end),
1342 "Return a newly allocated string formed from the characters\n"
1343 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1344 "ending with index @var{end} (exclusive).\n"
1345 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1346 "exact integers satisfying:\n"
1347 "\n"
1348 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
1349 "\n"
1350 "The returned string is read-only.\n")
1351#define FUNC_NAME s_scm_substring_read_only
1352{
1353 size_t len, from, to;
1354
1355 SCM_VALIDATE_STRING (1, str);
1356 len = scm_i_string_length (str);
1357 from = scm_to_unsigned_integer (start, 0, len);
1358 if (SCM_UNBNDP (end))
1359 to = len;
1360 else
1361 to = scm_to_unsigned_integer (end, from, len);
1362 return scm_i_substring_read_only (str, from, to);
1363}
1364#undef FUNC_NAME
1365
3ee86942
MV
1366SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
1367 (SCM str, SCM start, SCM end),
1368 "Return a newly allocated string formed from the characters\n"
1369 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1370 "ending with index @var{end} (exclusive).\n"
1371 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1372 "exact integers satisfying:\n\n"
1373 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1374#define FUNC_NAME s_scm_substring_copy
1375{
e1b29f6a
MV
1376 /* For the Scheme version, START is mandatory, but for the C
1377 version, it is optional. See scm_string_copy in srfi-13.c for a
1378 rationale.
1379 */
1380
1381 size_t from, to;
3ee86942
MV
1382
1383 SCM_VALIDATE_STRING (1, str);
e1b29f6a
MV
1384 scm_i_get_substring_spec (scm_i_string_length (str),
1385 start, &from, end, &to);
3ee86942
MV
1386 return scm_i_substring_copy (str, from, to);
1387}
1388#undef FUNC_NAME
1389
1390SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
1391 (SCM str, SCM start, SCM end),
1392 "Return string that indirectly refers to the characters\n"
1393 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1394 "ending with index @var{end} (exclusive).\n"
1395 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1396 "exact integers satisfying:\n\n"
1397 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1398#define FUNC_NAME s_scm_substring_shared
1399{
1400 size_t len, from, to;
1401
1402 SCM_VALIDATE_STRING (1, str);
1403 len = scm_i_string_length (str);
1404 from = scm_to_unsigned_integer (start, 0, len);
1405 if (SCM_UNBNDP (end))
1406 to = len;
1407 else
1408 to = scm_to_unsigned_integer (end, from, len);
1409 return scm_i_substring_shared (str, from, to);
1410}
1411#undef FUNC_NAME
685c0d71 1412
3b3b36dd 1413SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
6fa73e72 1414 (SCM args),
9c44cd45 1415 "Return a newly allocated string whose characters form the\n"
0d26a824 1416 "concatenation of the given strings, @var{args}.")
1bbd0b84 1417#define FUNC_NAME s_scm_string_append
0f2d19dd
JB
1418{
1419 SCM res;
9c44cd45
MG
1420 size_t len = 0;
1421 int wide = 0;
c829a427 1422 SCM l, s;
bd4911ef 1423 size_t i;
9909c395
MG
1424 union
1425 {
1426 char *narrow;
1427 scm_t_wchar *wide;
1428 } data;
af45e3b0
DH
1429
1430 SCM_VALIDATE_REST_ARGUMENT (args);
9c44cd45 1431 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427
MV
1432 {
1433 s = SCM_CAR (l);
1434 SCM_VALIDATE_STRING (SCM_ARGn, s);
9c44cd45
MG
1435 len += scm_i_string_length (s);
1436 if (!scm_i_is_narrow_string (s))
1437 wide = 1;
c829a427 1438 }
9909c395 1439 data.narrow = NULL;
9c44cd45 1440 if (!wide)
9909c395 1441 res = scm_i_make_string (len, &data.narrow);
9c44cd45 1442 else
9909c395 1443 res = scm_i_make_wide_string (len, &data.wide);
9c44cd45
MG
1444
1445 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427 1446 {
edea856c 1447 size_t len;
c829a427 1448 s = SCM_CAR (l);
3ee86942 1449 SCM_VALIDATE_STRING (SCM_ARGn, s);
edea856c 1450 len = scm_i_string_length (s);
9c44cd45
MG
1451 if (!wide)
1452 {
9909c395
MG
1453 memcpy (data.narrow, scm_i_string_chars (s), len);
1454 data.narrow += len;
9c44cd45
MG
1455 }
1456 else
1457 {
1458 if (scm_i_is_narrow_string (s))
1459 {
1460 for (i = 0; i < scm_i_string_length (s); i++)
9909c395 1461 data.wide[i] = (unsigned char) scm_i_string_chars (s)[i];
9c44cd45
MG
1462 }
1463 else
9909c395 1464 u32_cpy ((scm_t_uint32 *) data.wide,
9c44cd45 1465 (scm_t_uint32 *) scm_i_string_wide_chars (s), len);
9909c395 1466 data.wide += len;
9c44cd45 1467 }
c829a427
MV
1468 scm_remember_upto_here_1 (s);
1469 }
0f2d19dd
JB
1470 return res;
1471}
1bbd0b84 1472#undef FUNC_NAME
0f2d19dd 1473
c829a427
MV
1474int
1475scm_is_string (SCM obj)
1476{
3ee86942 1477 return IS_STRING (obj);
c829a427 1478}
24933780 1479
fac32b51
MG
1480SCM
1481scm_i_from_stringn (const char *str, size_t len, const char *encoding,
1482 scm_t_string_failed_conversion_handler handler)
587a3355
MG
1483{
1484 size_t u32len, i;
1485 scm_t_wchar *u32;
1486 int wide = 0;
1487 SCM res;
1488
fac32b51
MG
1489 if (len == 0)
1490 return scm_nullstr;
1491
889975e5
MG
1492 if (encoding == NULL)
1493 {
1494 /* If encoding is null, use Latin-1. */
1495 char *buf;
1496 res = scm_i_make_string (len, &buf);
1497 memcpy (buf, str, len);
1498 return res;
1499 }
1500
587a3355
MG
1501 u32len = 0;
1502 u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
1503 (enum iconv_ilseq_handler)
1504 handler,
1505 str, len,
1506 NULL,
1507 NULL, &u32len);
1508
1509 if (u32 == NULL)
1510 {
1511 if (errno == ENOMEM)
1512 scm_memory_error ("locale string conversion");
1513 else
1514 {
889975e5 1515 /* There are invalid sequences in the input string. */
587a3355
MG
1516 SCM errstr;
1517 char *dst;
587a3355
MG
1518 errstr = scm_i_make_string (len, &dst);
1519 memcpy (dst, str, len);
1520 scm_misc_error (NULL, "input locale conversion error from ~s: ~s",
1521 scm_list_2 (scm_from_locale_string (encoding),
1522 errstr));
1523 scm_remember_upto_here_1 (errstr);
1524 }
1525 }
1526
1527 i = 0;
1528 while (i < u32len)
1529 if (u32[i++] > 0xFF)
1530 {
1531 wide = 1;
1532 break;
1533 }
1534
1535 if (!wide)
1536 {
1537 char *dst;
1538 res = scm_i_make_string (u32len, &dst);
1539 for (i = 0; i < u32len; i ++)
1540 dst[i] = (unsigned char) u32[i];
1541 dst[u32len] = '\0';
1542 }
1543 else
1544 {
1545 scm_t_wchar *wdst;
1546 res = scm_i_make_wide_string (u32len, &wdst);
1547 u32_cpy ((scm_t_uint32 *) wdst, (scm_t_uint32 *) u32, u32len);
1548 wdst[u32len] = 0;
1549 }
1550
1551 free (u32);
1552 return res;
1553}
1554
c829a427
MV
1555SCM
1556scm_from_locale_stringn (const char *str, size_t len)
1557{
889975e5
MG
1558 const char *enc;
1559 scm_t_string_failed_conversion_handler hndl;
1560 SCM inport;
1561 scm_t_port *pt;
4d4528e7 1562
9c44cd45 1563 if (len == (size_t) -1)
c829a427 1564 len = strlen (str);
9c44cd45
MG
1565 if (len == 0)
1566 return scm_nullstr;
1567
889975e5
MG
1568 inport = scm_current_input_port ();
1569 if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
1570 {
1571 pt = SCM_PTAB_ENTRY (inport);
1572 enc = pt->encoding;
1573 hndl = pt->ilseq_handler;
1574 }
1575 else
1576 {
1577 enc = NULL;
1578 hndl = SCM_FAILED_CONVERSION_ERROR;
1579 }
1580
fac32b51 1581 return scm_i_from_stringn (str, len, enc, hndl);
c829a427 1582}
4d4528e7 1583
c829a427
MV
1584SCM
1585scm_from_locale_string (const char *str)
4d4528e7 1586{
9c44cd45
MG
1587 if (str == NULL)
1588 return scm_nullstr;
1589
c829a427
MV
1590 return scm_from_locale_stringn (str, -1);
1591}
4d4528e7 1592
587a3355
MG
1593SCM
1594scm_i_from_utf8_string (const scm_t_uint8 *str)
1595{
fac32b51 1596 return scm_i_from_stringn ((const char *) str,
587a3355
MG
1597 strlen ((char *) str), "UTF-8",
1598 SCM_FAILED_CONVERSION_ERROR);
1599}
1600
50b1996f
MG
1601/* Create a new scheme string from the C string STR. The memory of
1602 STR may be used directly as storage for the new string. */
c829a427
MV
1603SCM
1604scm_take_locale_stringn (char *str, size_t len)
1605{
48ddf0d9
KR
1606 SCM buf, res;
1607
9c44cd45 1608 if (len == (size_t) -1)
48ddf0d9 1609 len = strlen (str);
c829a427
MV
1610 else
1611 {
48ddf0d9
KR
1612 /* Ensure STR is null terminated. A realloc for 1 extra byte should
1613 often be satisfied from the alignment padding after the block, with
1614 no actual data movement. */
9c44cd45 1615 str = scm_realloc (str, len + 1);
48ddf0d9 1616 str[len] = '\0';
c829a427 1617 }
c829a427 1618
fd0a5bbc 1619 buf = scm_i_take_stringbufn (str, len);
3ee86942 1620 res = scm_double_cell (STRING_TAG,
9c44cd45 1621 SCM_UNPACK (buf), (scm_t_bits) 0, (scm_t_bits) len);
c829a427
MV
1622 return res;
1623}
1624
48ddf0d9
KR
1625SCM
1626scm_take_locale_string (char *str)
1627{
1628 return scm_take_locale_stringn (str, -1);
1629}
1630
9c44cd45
MG
1631/* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
1632 and \UXXXXXX. */
1633static void
1634unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
1635{
1636 char *before, *after;
1637 size_t i, j;
1638
1639 before = *bufp;
1640 after = *bufp;
1641 i = 0;
1642 j = 0;
1643 while (i < *lenp)
1644 {
1645 if ((i <= *lenp - 6)
1646 && before[i] == '\\'
1647 && before[i + 1] == 'u'
1648 && before[i + 2] == '0' && before[i + 3] == '0')
1649 {
1650 /* Convert \u00NN to \xNN */
1651 after[j] = '\\';
1652 after[j + 1] = 'x';
30a6b9ca
MG
1653 after[j + 2] = tolower ((int) before[i + 4]);
1654 after[j + 3] = tolower ((int) before[i + 5]);
9c44cd45
MG
1655 i += 6;
1656 j += 4;
1657 }
1658 else if ((i <= *lenp - 10)
1659 && before[i] == '\\'
1660 && before[i + 1] == 'U'
1661 && before[i + 2] == '0' && before[i + 3] == '0')
1662 {
1663 /* Convert \U00NNNNNN to \UNNNNNN */
1664 after[j] = '\\';
1665 after[j + 1] = 'U';
30a6b9ca
MG
1666 after[j + 2] = tolower ((int) before[i + 4]);
1667 after[j + 3] = tolower ((int) before[i + 5]);
1668 after[j + 4] = tolower ((int) before[i + 6]);
1669 after[j + 5] = tolower ((int) before[i + 7]);
1670 after[j + 6] = tolower ((int) before[i + 8]);
1671 after[j + 7] = tolower ((int) before[i + 9]);
9c44cd45
MG
1672 i += 10;
1673 j += 8;
1674 }
1675 else
1676 {
1677 after[j] = before[i];
1678 i++;
1679 j++;
1680 }
1681 }
1682 *lenp = j;
1683 after = scm_realloc (after, j);
1684}
1685
c829a427 1686char *
fac32b51 1687scm_to_locale_stringn (SCM str, size_t *lenp)
c829a427 1688{
889975e5
MG
1689 SCM outport;
1690 scm_t_port *pt;
9c44cd45
MG
1691 const char *enc;
1692
889975e5
MG
1693 outport = scm_current_output_port ();
1694 if (!SCM_UNBNDP (outport) && SCM_OPOUTPORTP (outport))
1695 {
1696 pt = SCM_PTAB_ENTRY (outport);
1697 enc = pt->encoding;
1698 }
1699 else
1700 enc = NULL;
9c44cd45 1701
889975e5
MG
1702 return scm_to_stringn (str, lenp,
1703 enc,
1704 scm_i_get_conversion_strategy (SCM_BOOL_F));
9c44cd45
MG
1705}
1706
1707/* Low-level scheme to C string conversion function. */
1708char *
587a3355 1709scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
eca29b02 1710 scm_t_string_failed_conversion_handler handler)
9c44cd45 1711{
9c44cd45
MG
1712 char *buf;
1713 size_t ilen, len, i;
889975e5
MG
1714 int ret;
1715 const char *enc;
4d4528e7 1716
3ee86942 1717 if (!scm_is_string (str))
c829a427 1718 scm_wrong_type_arg_msg (NULL, 0, str, "string");
9c44cd45
MG
1719 ilen = scm_i_string_length (str);
1720
1721 if (ilen == 0)
1722 {
1723 buf = scm_malloc (1);
1724 buf[0] = '\0';
1725 if (lenp)
1726 *lenp = 0;
1727 return buf;
1728 }
587a3355 1729
c829a427 1730 if (lenp == NULL)
9c44cd45
MG
1731 for (i = 0; i < ilen; i++)
1732 if (scm_i_string_ref (str, i) == '\0')
1733 scm_misc_error (NULL,
1734 "string contains #\\nul character: ~S",
1735 scm_list_1 (str));
1736
889975e5 1737 if (scm_i_is_narrow_string (str) && (encoding == NULL))
c829a427 1738 {
889975e5
MG
1739 /* If using native Latin-1 encoding, just copy the string
1740 contents. */
9c44cd45
MG
1741 if (lenp)
1742 {
1743 buf = scm_malloc (ilen);
1744 memcpy (buf, scm_i_string_chars (str), ilen);
1745 *lenp = ilen;
1746 return buf;
1747 }
1748 else
1749 {
1750 buf = scm_malloc (ilen + 1);
1751 memcpy (buf, scm_i_string_chars (str), ilen);
1752 buf[ilen] = '\0';
1753 return buf;
1754 }
c829a427 1755 }
9c44cd45 1756
587a3355 1757
9c44cd45
MG
1758 buf = NULL;
1759 len = 0;
889975e5
MG
1760 enc = encoding;
1761 if (enc == NULL)
1762 enc = "ISO-8859-1";
1763 if (scm_i_is_narrow_string (str))
1764 {
1765 ret = mem_iconveh (scm_i_string_chars (str), ilen,
1766 "ISO-8859-1", enc,
1767 (enum iconv_ilseq_handler) handler, NULL,
1768 &buf, &len);
9c44cd45 1769
889975e5
MG
1770 if (ret == 0 && handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1771 unistring_escapes_to_guile_escapes (&buf, &len);
9c44cd45 1772
889975e5
MG
1773 if (ret != 0)
1774 {
1775 scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
1776 scm_list_2 (scm_from_locale_string (enc),
1777 str));
1778 }
1779 }
1780 else
1781 {
1782 buf = u32_conv_to_encoding (enc,
1783 (enum iconv_ilseq_handler) handler,
1784 (scm_t_uint32 *) scm_i_string_wide_chars (str),
1785 ilen,
1786 NULL,
1787 NULL, &len);
1788 if (buf == NULL)
1789 {
1790 scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
1791 scm_list_2 (scm_from_locale_string (enc),
1792 str));
1793 }
1794 }
9c44cd45 1795 if (lenp)
4d4528e7 1796 *lenp = len;
9c44cd45
MG
1797 else
1798 {
1799 buf = scm_realloc (buf, len + 1);
1800 buf[len] = '\0';
1801 }
24933780 1802
c829a427 1803 scm_remember_upto_here_1 (str);
9c44cd45 1804 return buf;
4d4528e7 1805}
af68e5e5 1806
c829a427
MV
1807char *
1808scm_to_locale_string (SCM str)
1809{
1810 return scm_to_locale_stringn (str, NULL);
1811}
af68e5e5 1812
587a3355
MG
1813scm_t_uint8 *
1814scm_i_to_utf8_string (SCM str)
1815{
1816 char *u8str;
1817 u8str = scm_to_stringn (str, NULL, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
1818 return (scm_t_uint8 *) u8str;
1819}
1820
c829a427
MV
1821size_t
1822scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
1823{
1824 size_t len;
9c44cd45 1825 char *result = NULL;
3ee86942 1826 if (!scm_is_string (str))
c829a427 1827 scm_wrong_type_arg_msg (NULL, 0, str, "string");
9c44cd45
MG
1828 result = scm_to_locale_stringn (str, &len);
1829
1830 memcpy (buf, result, (len > max_len) ? max_len : len);
1831 free (result);
1832
c829a427
MV
1833 scm_remember_upto_here_1 (str);
1834 return len;
1835}
af68e5e5 1836
3ee86942
MV
1837/* converts C scm_array of strings to SCM scm_list of strings. */
1838/* If argc < 0, a null terminated scm_array is assumed. */
9c44cd45 1839SCM
3ee86942
MV
1840scm_makfromstrs (int argc, char **argv)
1841{
1842 int i = argc;
1843 SCM lst = SCM_EOL;
1844 if (0 > i)
1845 for (i = 0; argv[i]; i++);
1846 while (i--)
1847 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
1848 return lst;
1849}
1850
c829a427
MV
1851/* Return a newly allocated array of char pointers to each of the strings
1852 in args, with a terminating NULL pointer. */
1853
1854char **
1855scm_i_allocate_string_pointers (SCM list)
af68e5e5 1856{
c829a427
MV
1857 char **result;
1858 int len = scm_ilength (list);
1859 int i;
1860
1861 if (len < 0)
1862 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
1863
661ae7ab 1864 scm_dynwind_begin (0);
c829a427
MV
1865
1866 result = (char **) scm_malloc ((len + 1) * sizeof (char *));
1867 result[len] = NULL;
661ae7ab 1868 scm_dynwind_unwind_handler (free, result, 0);
c829a427
MV
1869
1870 /* The list might be have been modified in another thread, so
1871 we check LIST before each access.
1872 */
d2e53ed6 1873 for (i = 0; i < len && scm_is_pair (list); i++)
c829a427
MV
1874 {
1875 result[i] = scm_to_locale_string (SCM_CAR (list));
1876 list = SCM_CDR (list);
1877 }
1878
661ae7ab 1879 scm_dynwind_end ();
c829a427 1880 return result;
af68e5e5 1881}
e53cc817 1882
c829a427
MV
1883void
1884scm_i_free_string_pointers (char **pointers)
1885{
1886 int i;
1887
1888 for (i = 0; pointers[i]; i++)
1889 free (pointers[i]);
1890 free (pointers);
1891}
24933780 1892
6f14f578
MV
1893void
1894scm_i_get_substring_spec (size_t len,
1895 SCM start, size_t *cstart,
1896 SCM end, size_t *cend)
1897{
1898 if (SCM_UNBNDP (start))
1899 *cstart = 0;
1900 else
1901 *cstart = scm_to_unsigned_integer (start, 0, len);
1902
1903 if (SCM_UNBNDP (end))
1904 *cend = len;
1905 else
1906 *cend = scm_to_unsigned_integer (end, *cstart, len);
1907}
1908
3ee86942
MV
1909#if SCM_ENABLE_DEPRECATED
1910
556d75db
MV
1911/* When these definitions are removed, it becomes reasonable to use
1912 read-only strings for string literals. For that, change the reader
1913 to create string literals with scm_c_substring_read_only instead of
1914 with scm_c_substring_copy.
1915*/
1916
3ee86942 1917int
fe78c51a 1918scm_i_deprecated_stringp (SCM str)
3ee86942
MV
1919{
1920 scm_c_issue_deprecation_warning
1921 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1922
2616f0e0 1923 return scm_is_string (str);
3ee86942
MV
1924}
1925
1926char *
fe78c51a 1927scm_i_deprecated_string_chars (SCM str)
3ee86942
MV
1928{
1929 char *chars;
1930
1931 scm_c_issue_deprecation_warning
1932 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1933
2616f0e0
MV
1934 /* We don't accept shared substrings here since they are not
1935 null-terminated.
1936 */
1937 if (IS_SH_STRING (str))
1938 scm_misc_error (NULL,
1939 "SCM_STRING_CHARS does not work with shared substrings.",
1940 SCM_EOL);
1941
877f06c3 1942 /* We explicitly test for read-only strings to produce a better
556d75db
MV
1943 error message.
1944 */
1945
1946 if (IS_RO_STRING (str))
1947 scm_misc_error (NULL,
1948 "SCM_STRING_CHARS does not work with read-only strings.",
1949 SCM_EOL);
1950
2616f0e0 1951 /* The following is still wrong, of course...
3ee86942 1952 */
9c44cd45 1953 str = scm_i_string_start_writing (str);
3ee86942
MV
1954 chars = scm_i_string_writable_chars (str);
1955 scm_i_string_stop_writing ();
1956 return chars;
1957}
1958
1959size_t
fe78c51a 1960scm_i_deprecated_string_length (SCM str)
3ee86942
MV
1961{
1962 scm_c_issue_deprecation_warning
1963 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1964 return scm_c_string_length (str);
1965}
1966
1967#endif
1968
2a610be5
AW
1969static SCM
1970string_handle_ref (scm_t_array_handle *h, size_t index)
1971{
1972 return scm_c_string_ref (h->array, index);
1973}
1974
1975static void
1976string_handle_set (scm_t_array_handle *h, size_t index, SCM val)
1977{
1978 scm_c_string_set_x (h->array, index, val);
1979}
1980
1981static void
1982string_get_handle (SCM v, scm_t_array_handle *h)
1983{
1984 h->array = v;
1985 h->ndims = 1;
1986 h->dims = &h->dim0;
1987 h->dim0.lbnd = 0;
1988 h->dim0.ubnd = scm_c_string_length (v) - 1;
1989 h->dim0.inc = 1;
1990 h->element_type = SCM_ARRAY_ELEMENT_TYPE_CHAR;
1991 h->elements = h->writable_elements = NULL;
1992}
1993
1994SCM_ARRAY_IMPLEMENTATION (scm_tc7_string, 0x7f & ~2,
1995 string_handle_ref, string_handle_set,
1996 string_get_handle);
f45eccff 1997SCM_VECTOR_IMPLEMENTATION (SCM_ARRAY_ELEMENT_TYPE_CHAR, scm_make_string);
2a610be5 1998
0f2d19dd
JB
1999void
2000scm_init_strings ()
0f2d19dd 2001{
3ee86942 2002 scm_nullstr = scm_i_make_string (0, NULL);
7c33806a 2003
a0599745 2004#include "libguile/strings.x"
0f2d19dd
JB
2005}
2006
89e00824
ML
2007
2008/*
2009 Local Variables:
2010 c-file-style: "gnu"
2011 End:
2012*/