further boot cleanups
[bpt/guile.git] / libguile / strings.c
CommitLineData
2a776823 1/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd 20\f
dbb605f5
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
0f2d19dd 24
faf2c9d7 25#include <string.h>
3ee86942 26#include <stdio.h>
9c44cd45
MG
27#include <ctype.h>
28#include <unistr.h>
eca29b02 29#include <uniconv.h>
faf2c9d7 30
889975e5
MG
31#include "striconveh.h"
32
a0599745
MD
33#include "libguile/_scm.h"
34#include "libguile/chars.h"
7c33806a 35#include "libguile/root.h"
a0599745 36#include "libguile/strings.h"
f45eccff 37#include "libguile/generalized-vectors.h"
1afff620 38#include "libguile/deprecation.h"
a0599745 39#include "libguile/validate.h"
1afff620 40
0f2d19dd
JB
41\f
42
43/* {Strings}
44 */
45
3ee86942
MV
46
47/* Stringbufs
48 *
49 * XXX - keeping an accurate refcount during GC seems to be quite
50 * tricky, so we just keep score of whether a stringbuf might be
50b1996f 51 * shared, not whether it definitely is.
3ee86942
MV
52 *
53 * The scheme I (mvo) tried to keep an accurate reference count would
54 * recount all strings that point to a stringbuf during the mark-phase
55 * of the GC. This was done since one cannot access the stringbuf of
56 * a string when that string is freed (in order to decrease the
57 * reference count). The memory of the stringbuf might have been
58 * reused already for something completely different.
59 *
60 * This recounted worked for a small number of threads beating on
61 * cow-strings, but it failed randomly with more than 10 threads, say.
62 * I couldn't figure out what went wrong, so I used the conservative
63 * approach implemented below.
50b1996f 64 *
ba54a202
LC
65 * There are 2 storage strategies for stringbufs: 8-bit and wide. 8-bit
66 * strings are ISO-8859-1-encoded strings; wide strings are 32-bit (UCS-4)
67 * strings.
3ee86942
MV
68 */
69
ba54a202
LC
70/* The size in words of the stringbuf header (type tag + size). */
71#define STRINGBUF_HEADER_SIZE 2U
72
73#define STRINGBUF_HEADER_BYTES (STRINGBUF_HEADER_SIZE * sizeof (SCM))
74
35920c00 75#define STRINGBUF_F_SHARED SCM_I_STRINGBUF_F_SHARED
5f236208 76#define STRINGBUF_F_WIDE SCM_I_STRINGBUF_F_WIDE
3ee86942
MV
77
78#define STRINGBUF_TAG scm_tc7_stringbuf
79#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
9c44cd45 80#define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
3ee86942 81
ba54a202
LC
82#define STRINGBUF_CHARS(buf) ((unsigned char *) \
83 SCM_CELL_OBJECT_LOC (buf, \
84 STRINGBUF_HEADER_SIZE))
85#define STRINGBUF_LENGTH(buf) (SCM_CELL_WORD_1 (buf))
3ee86942 86
ba54a202 87#define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *) STRINGBUF_CHARS (buf))
3ee86942 88
9b41542f
LC
89#define SET_STRINGBUF_SHARED(buf) \
90 do \
91 { \
92 /* Don't modify BUF if it's already marked as shared since it might be \
93 a read-only, statically allocated stringbuf. */ \
94 if (SCM_LIKELY (!STRINGBUF_SHARED (buf))) \
95 SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
96 } \
97 while (0)
3ee86942 98
56a3dcd4 99#ifdef SCM_STRING_LENGTH_HISTOGRAM
3ee86942
MV
100static size_t lenhist[1001];
101#endif
102
50b1996f
MG
103/* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
104 characters. */
3ee86942
MV
105static SCM
106make_stringbuf (size_t len)
0f2d19dd 107{
3ee86942
MV
108 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
109 scm_i_symbol_chars, all stringbufs are null-terminated. Once
110 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
111 has been changed for scm_i_symbol_chars, this null-termination
112 can be dropped.
113 */
114
ba54a202
LC
115 SCM buf;
116
56a3dcd4 117#ifdef SCM_STRING_LENGTH_HISTOGRAM
3ee86942
MV
118 if (len < 1000)
119 lenhist[len]++;
120 else
121 lenhist[1000]++;
122#endif
0f2d19dd 123
ba54a202
LC
124 buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + len + 1,
125 "string"));
126
127 SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG);
128 SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
129
130 STRINGBUF_CHARS (buf)[len] = 0;
131
132 return buf;
3ee86942 133}
e53cc817 134
50b1996f
MG
135/* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
136 characters. */
9c44cd45
MG
137static SCM
138make_wide_stringbuf (size_t len)
139{
ba54a202
LC
140 SCM buf;
141 size_t raw_len;
142
56a3dcd4 143#ifdef SCM_STRING_LENGTH_HISTOGRAM
9c44cd45
MG
144 if (len < 1000)
145 lenhist[len]++;
146 else
147 lenhist[1000]++;
148#endif
149
ba54a202
LC
150 raw_len = (len + 1) * sizeof (scm_t_wchar);
151 buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + raw_len,
152 "string"));
153
154 SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG | STRINGBUF_F_WIDE);
155 SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
156
157 STRINGBUF_WIDE_CHARS (buf)[len] = 0;
158
159 return buf;
9c44cd45
MG
160}
161
ba54a202
LC
162/* Return a UCS-4-encoded stringbuf containing the (possibly Latin-1-encoded)
163 characters from BUF. */
164static SCM
165wide_stringbuf (SCM buf)
9c44cd45 166{
ba54a202 167 SCM new_buf;
9c44cd45
MG
168
169 if (STRINGBUF_WIDE (buf))
ba54a202
LC
170 new_buf = buf;
171 else
9c44cd45 172 {
ba54a202
LC
173 size_t i, len;
174 scm_t_wchar *mem;
9c44cd45 175
ba54a202 176 len = STRINGBUF_LENGTH (buf);
9c44cd45 177
ba54a202 178 new_buf = make_wide_stringbuf (len);
9c44cd45 179
ba54a202 180 mem = STRINGBUF_WIDE_CHARS (new_buf);
9c44cd45 181 for (i = 0; i < len; i++)
ba54a202 182 mem[i] = (scm_t_wchar) STRINGBUF_CHARS (buf)[i];
9c44cd45 183 mem[len] = 0;
9c44cd45 184 }
ba54a202
LC
185
186 return new_buf;
3ee86942 187}
bd9e24b3 188
ba54a202
LC
189/* Return a Latin-1-encoded stringbuf containing the (possibly UCS-4-encoded)
190 characters from BUF, if possible. */
191static SCM
587a3355
MG
192narrow_stringbuf (SCM buf)
193{
ba54a202 194 SCM new_buf;
587a3355
MG
195
196 if (!STRINGBUF_WIDE (buf))
ba54a202
LC
197 new_buf = buf;
198 else
199 {
200 size_t i, len;
201 scm_t_wchar *wmem;
202 unsigned char *mem;
587a3355 203
ba54a202
LC
204 len = STRINGBUF_LENGTH (buf);
205 wmem = STRINGBUF_WIDE_CHARS (buf);
587a3355 206
ba54a202
LC
207 for (i = 0; i < len; i++)
208 if (wmem[i] > 0xFF)
209 /* BUF cannot be narrowed. */
210 return buf;
587a3355 211
ba54a202 212 new_buf = make_stringbuf (len);
587a3355 213
ba54a202
LC
214 mem = STRINGBUF_CHARS (new_buf);
215 for (i = 0; i < len; i++)
216 mem[i] = (unsigned char) wmem[i];
217 mem[len] = 0;
218 }
219
220 return new_buf;
587a3355
MG
221}
222
9de87eea 223scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
bd9e24b3 224
ba54a202 225\f
3ee86942
MV
226/* Copy-on-write strings.
227 */
bd9e24b3 228
3ee86942 229#define STRING_TAG scm_tc7_string
bd9e24b3 230
3ee86942
MV
231#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
232#define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
233#define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
bd9e24b3 234
3ee86942
MV
235#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
236#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
237
238#define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
239
ed35de72
MV
240/* Read-only strings.
241 */
242
35920c00 243#define RO_STRING_TAG scm_tc7_ro_string
ed35de72
MV
244#define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
245
e1b29f6a
MV
246/* Mutation-sharing substrings
247 */
248
249#define SH_STRING_TAG (scm_tc7_string + 0x100)
250
251#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
252/* START and LENGTH as for STRINGs. */
253
254#define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
255
50b1996f
MG
256/* Create a scheme string with space for LEN 8-bit Latin-1-encoded
257 characters. CHARSP, if not NULL, will be set to location of the
258 char array. */
3ee86942
MV
259SCM
260scm_i_make_string (size_t len, char **charsp)
261{
262 SCM buf = make_stringbuf (len);
263 SCM res;
264 if (charsp)
f59cf998 265 *charsp = (char *) STRINGBUF_CHARS (buf);
3ee86942
MV
266 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
267 (scm_t_bits)0, (scm_t_bits) len);
268 return res;
0f2d19dd
JB
269}
270
50b1996f
MG
271/* Create a scheme string with space for LEN 32-bit UCS-4-encoded
272 characters. CHARSP, if not NULL, will be set to location of the
273 character array. */
9c44cd45 274SCM
50b1996f 275scm_i_make_wide_string (size_t len, scm_t_wchar **charsp)
9c44cd45
MG
276{
277 SCM buf = make_wide_stringbuf (len);
278 SCM res;
279 if (charsp)
280 *charsp = STRINGBUF_WIDE_CHARS (buf);
281 res = scm_double_cell (STRING_TAG, SCM_UNPACK (buf),
282 (scm_t_bits) 0, (scm_t_bits) len);
283 return res;
284}
285
3ee86942
MV
286static void
287validate_substring_args (SCM str, size_t start, size_t end)
288{
289 if (!IS_STRING (str))
290 scm_wrong_type_arg_msg (NULL, 0, str, "string");
291 if (start > STRING_LENGTH (str))
292 scm_out_of_range (NULL, scm_from_size_t (start));
293 if (end > STRING_LENGTH (str) || end < start)
294 scm_out_of_range (NULL, scm_from_size_t (end));
295}
0f2d19dd 296
e1b29f6a
MV
297static inline void
298get_str_buf_start (SCM *str, SCM *buf, size_t *start)
299{
300 *start = STRING_START (*str);
301 if (IS_SH_STRING (*str))
302 {
303 *str = SH_STRING_STRING (*str);
304 *start += STRING_START (*str);
305 }
306 *buf = STRING_STRINGBUF (*str);
307}
308
3ee86942
MV
309SCM
310scm_i_substring (SCM str, size_t start, size_t end)
0f2d19dd 311{
e1b29f6a
MV
312 SCM buf;
313 size_t str_start;
314 get_str_buf_start (&str, &buf, &str_start);
9de87eea 315 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 316 SET_STRINGBUF_SHARED (buf);
9de87eea 317 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 318 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
e1b29f6a
MV
319 (scm_t_bits)str_start + start,
320 (scm_t_bits) end - start);
0f2d19dd
JB
321}
322
ed35de72
MV
323SCM
324scm_i_substring_read_only (SCM str, size_t start, size_t end)
325{
45a9f430
LC
326 SCM buf;
327 size_t str_start;
328 get_str_buf_start (&str, &buf, &str_start);
329 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
330 SET_STRINGBUF_SHARED (buf);
331 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
332 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
333 (scm_t_bits)str_start + start,
334 (scm_t_bits) end - start);
ed35de72
MV
335}
336
3ee86942
MV
337SCM
338scm_i_substring_copy (SCM str, size_t start, size_t end)
339{
340 size_t len = end - start;
edea856c 341 SCM buf, my_buf;
e1b29f6a
MV
342 size_t str_start;
343 get_str_buf_start (&str, &buf, &str_start);
9c44cd45
MG
344 if (scm_i_is_narrow_string (str))
345 {
346 my_buf = make_stringbuf (len);
347 memcpy (STRINGBUF_CHARS (my_buf),
348 STRINGBUF_CHARS (buf) + str_start + start, len);
349 }
350 else
351 {
352 my_buf = make_wide_stringbuf (len);
353 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
354 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
355 + start), len);
356 /* Even though this string is wide, the substring may be narrow.
50b1996f 357 Consider adding code to narrow the string. */
9c44cd45 358 }
3ee86942 359 scm_remember_upto_here_1 (buf);
9c44cd45
MG
360 return scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
361 (scm_t_bits) 0, (scm_t_bits) len);
3ee86942 362}
0f2d19dd 363
e1b29f6a
MV
364SCM
365scm_i_substring_shared (SCM str, size_t start, size_t end)
366{
367 if (start == 0 && end == STRING_LENGTH (str))
368 return str;
369 else
370 {
371 size_t len = end - start;
372 if (IS_SH_STRING (str))
373 {
374 start += STRING_START (str);
375 str = SH_STRING_STRING (str);
376 }
377 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
378 (scm_t_bits)start, (scm_t_bits) len);
379 }
380}
381
3ee86942
MV
382SCM
383scm_c_substring (SCM str, size_t start, size_t end)
384{
385 validate_substring_args (str, start, end);
386 return scm_i_substring (str, start, end);
387}
ee149d03 388
ed35de72
MV
389SCM
390scm_c_substring_read_only (SCM str, size_t start, size_t end)
391{
392 validate_substring_args (str, start, end);
393 return scm_i_substring_read_only (str, start, end);
394}
395
0f2d19dd 396SCM
3ee86942 397scm_c_substring_copy (SCM str, size_t start, size_t end)
0f2d19dd 398{
3ee86942
MV
399 validate_substring_args (str, start, end);
400 return scm_i_substring_copy (str, start, end);
401}
402
3ee86942
MV
403SCM
404scm_c_substring_shared (SCM str, size_t start, size_t end)
405{
406 validate_substring_args (str, start, end);
407 return scm_i_substring_shared (str, start, end);
408}
0f2d19dd 409
d6c74168 410\f
3ee86942
MV
411/* Internal accessors
412 */
413
50b1996f
MG
414/* Returns the number of characters in STR. This may be different
415 than the memory size of the string storage. */
3ee86942
MV
416size_t
417scm_i_string_length (SCM str)
0f2d19dd 418{
3ee86942 419 return STRING_LENGTH (str);
0f2d19dd
JB
420}
421
50b1996f
MG
422/* True if the string is 'narrow', meaning it has a 8-bit Latin-1
423 encoding. False if it is 'wide', having a 32-bit UCS-4
424 encoding. */
9c44cd45
MG
425int
426scm_i_is_narrow_string (SCM str)
427{
428 return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
429}
430
587a3355
MG
431/* Try to coerce a string to be narrow. It if is narrow already, do
432 nothing. If it is wide, shrink it to narrow if none of its
433 characters are above 0xFF. Return true if the string is narrow or
434 was made to be narrow. */
435int
436scm_i_try_narrow_string (SCM str)
437{
ba54a202 438 SET_STRING_STRINGBUF (str, narrow_stringbuf (STRING_STRINGBUF (str)));
587a3355
MG
439
440 return scm_i_is_narrow_string (str);
441}
442
50b1996f
MG
443/* Returns a pointer to the 8-bit Latin-1 encoded character array of
444 STR. */
3ee86942
MV
445const char *
446scm_i_string_chars (SCM str)
447{
448 SCM buf;
e1b29f6a
MV
449 size_t start;
450 get_str_buf_start (&str, &buf, &start);
9c44cd45 451 if (scm_i_is_narrow_string (str))
f59cf998 452 return (const char *) STRINGBUF_CHARS (buf) + start;
9c44cd45
MG
453 else
454 scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s",
455 scm_list_1 (str));
456 return NULL;
3ee86942 457}
b00418df 458
50b1996f
MG
459/* Returns a pointer to the 32-bit UCS-4 encoded character array of
460 STR. */
9c44cd45
MG
461const scm_t_wchar *
462scm_i_string_wide_chars (SCM str)
463{
464 SCM buf;
465 size_t start;
466
467 get_str_buf_start (&str, &buf, &start);
468 if (!scm_i_is_narrow_string (str))
f59cf998 469 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf) + start;
9c44cd45
MG
470 else
471 scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
472 scm_list_1 (str));
473}
474
475/* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
476 a new string buffer, so that it can be modified without modifying
50b1996f
MG
477 other strings. Also, lock the string mutex. Later, one must call
478 scm_i_string_stop_writing to unlock the mutex. */
9c44cd45
MG
479SCM
480scm_i_string_start_writing (SCM orig_str)
b00418df 481{
ed35de72 482 SCM buf, str = orig_str;
e1b29f6a 483 size_t start;
ed35de72 484
e1b29f6a 485 get_str_buf_start (&str, &buf, &start);
ed35de72
MV
486 if (IS_RO_STRING (str))
487 scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
488
9de87eea 489 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942
MV
490 if (STRINGBUF_SHARED (buf))
491 {
9c44cd45 492 /* Clone the stringbuf. */
3ee86942
MV
493 size_t len = STRING_LENGTH (str);
494 SCM new_buf;
495
9de87eea 496 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 497
9c44cd45
MG
498 if (scm_i_is_narrow_string (str))
499 {
500 new_buf = make_stringbuf (len);
501 memcpy (STRINGBUF_CHARS (new_buf),
502 STRINGBUF_CHARS (buf) + STRING_START (str), len);
503
504 }
505 else
506 {
507 new_buf = make_wide_stringbuf (len);
508 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
509 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf)
510 + STRING_START (str)), len);
511 }
3ee86942 512
3ee86942
MV
513 SET_STRING_STRINGBUF (str, new_buf);
514 start -= STRING_START (str);
902578f1
LC
515
516 /* FIXME: The following operations are not atomic, so other threads
517 looking at STR may see an inconsistent state. Nevertheless it can't
518 hurt much since (i) accessing STR while it is being mutated can't
519 yield a crash, and (ii) concurrent accesses to STR should be
520 protected by a mutex at the application level. The latter may not
521 apply when STR != ORIG_STR, though. */
3ee86942 522 SET_STRING_START (str, 0);
902578f1 523 SET_STRING_STRINGBUF (str, new_buf);
3ee86942
MV
524
525 buf = new_buf;
526
9de87eea 527 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 528 }
9c44cd45
MG
529 return orig_str;
530}
3ee86942 531
50b1996f 532/* Return a pointer to the 8-bit Latin-1 chars of a string. */
9c44cd45
MG
533char *
534scm_i_string_writable_chars (SCM str)
535{
536 SCM buf;
537 size_t start;
3ee86942 538
9c44cd45
MG
539 get_str_buf_start (&str, &buf, &start);
540 if (scm_i_is_narrow_string (str))
f59cf998 541 return (char *) STRINGBUF_CHARS (buf) + start;
9c44cd45
MG
542 else
543 scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s",
544 scm_list_1 (str));
545 return NULL;
b00418df
DH
546}
547
50b1996f 548/* Return a pointer to the UCS-4 codepoints of a string. */
9c44cd45
MG
549static scm_t_wchar *
550scm_i_string_writable_wide_chars (SCM str)
551{
552 SCM buf;
553 size_t start;
554
555 get_str_buf_start (&str, &buf, &start);
556 if (!scm_i_is_narrow_string (str))
557 return STRINGBUF_WIDE_CHARS (buf) + start;
558 else
1c7b216f 559 scm_misc_error (NULL, "Invalid write access of chars of narrow string: ~s",
9c44cd45 560 scm_list_1 (str));
b00418df
DH
561}
562
50b1996f
MG
563/* Unlock the string mutex that was locked when
564 scm_i_string_start_writing was called. */
3ee86942
MV
565void
566scm_i_string_stop_writing (void)
567{
9de87eea 568 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 569}
b00418df 570
50b1996f 571/* Return the Xth character of STR as a UCS-4 codepoint. */
9c44cd45
MG
572scm_t_wchar
573scm_i_string_ref (SCM str, size_t x)
574{
575 if (scm_i_is_narrow_string (str))
576 return (scm_t_wchar) (unsigned char) (scm_i_string_chars (str)[x]);
577 else
578 return scm_i_string_wide_chars (str)[x];
579}
580
889975e5
MG
581/* Returns index+1 of the first char in STR that matches C, or
582 0 if the char is not found. */
583int
584scm_i_string_contains_char (SCM str, char ch)
585{
586 size_t i;
587 size_t len = scm_i_string_length (str);
588
589 i = 0;
590 if (scm_i_is_narrow_string (str))
591 {
592 while (i < len)
593 {
594 if (scm_i_string_chars (str)[i] == ch)
595 return i+1;
596 i++;
597 }
598 }
599 else
600 {
601 while (i < len)
602 {
603 if (scm_i_string_wide_chars (str)[i]
604 == (unsigned char) ch)
605 return i+1;
606 i++;
607 }
608 }
609 return 0;
610}
611
3f47e526
MG
612int
613scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
614{
615 if (scm_i_is_narrow_string (sstr))
616 {
617 const char *a = scm_i_string_chars (sstr) + start_x;
618 const char *b = cstr;
619 return strncmp (a, b, strlen(b));
620 }
621 else
622 {
623 size_t i;
624 const scm_t_wchar *a = scm_i_string_wide_chars (sstr) + start_x;
625 const char *b = cstr;
626 for (i = 0; i < strlen (b); i++)
627 {
628 if (a[i] != (unsigned char) b[i])
629 return 1;
630 }
631 }
632 return 0;
633}
634
50b1996f 635/* Set the Pth character of STR to UCS-4 codepoint CHR. */
9c44cd45
MG
636void
637scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
638{
639 if (chr > 0xFF && scm_i_is_narrow_string (str))
ba54a202 640 SET_STRING_STRINGBUF (str, wide_stringbuf (STRING_STRINGBUF (str)));
9c44cd45
MG
641
642 if (scm_i_is_narrow_string (str))
643 {
644 char *dst = scm_i_string_writable_chars (str);
587a3355 645 dst[p] = chr;
9c44cd45
MG
646 }
647 else
648 {
649 scm_t_wchar *dst = scm_i_string_writable_wide_chars (str);
650 dst[p] = chr;
651 }
652}
653
ba54a202 654\f
3ee86942 655/* Symbols.
587a3355 656
3ee86942
MV
657 Basic symbol creation and accessing is done here, the rest is in
658 symbols.[hc]. This has been done to keep stringbufs and the
659 internals of strings and string-like objects confined to this file.
660*/
661
662#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
663
664SCM
6869328b
MV
665scm_i_make_symbol (SCM name, scm_t_bits flags,
666 unsigned long hash, SCM props)
3ee86942
MV
667{
668 SCM buf;
669 size_t start = STRING_START (name);
670 size_t length = STRING_LENGTH (name);
671
672 if (IS_SH_STRING (name))
673 {
674 name = SH_STRING_STRING (name);
675 start += STRING_START (name);
676 }
677 buf = SYMBOL_STRINGBUF (name);
678
679 if (start == 0 && length == STRINGBUF_LENGTH (buf))
680 {
681 /* reuse buf. */
9de87eea 682 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 683 SET_STRINGBUF_SHARED (buf);
9de87eea 684 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942
MV
685 }
686 else
687 {
688 /* make new buf. */
9c44cd45
MG
689 if (scm_i_is_narrow_string (name))
690 {
691 SCM new_buf = make_stringbuf (length);
692 memcpy (STRINGBUF_CHARS (new_buf),
693 STRINGBUF_CHARS (buf) + start, length);
694 buf = new_buf;
695 }
696 else
697 {
698 SCM new_buf = make_wide_stringbuf (length);
699 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
700 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
701 length);
702 buf = new_buf;
703 }
3ee86942 704 }
6869328b 705 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
3ee86942
MV
706 (scm_t_bits) hash, SCM_UNPACK (props));
707}
708
fd0a5bbc
HWN
709SCM
710scm_i_c_make_symbol (const char *name, size_t len,
711 scm_t_bits flags, unsigned long hash, SCM props)
712{
713 SCM buf = make_stringbuf (len);
714 memcpy (STRINGBUF_CHARS (buf), name, len);
715
a284cc7e
LC
716 return scm_immutable_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
717 (scm_t_bits) hash, SCM_UNPACK (props));
fd0a5bbc
HWN
718}
719
50b1996f
MG
720/* Returns the number of characters in SYM. This may be different
721 from the memory size of SYM. */
3ee86942
MV
722size_t
723scm_i_symbol_length (SCM sym)
0f2d19dd 724{
3ee86942 725 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
0f2d19dd
JB
726}
727
071bb6a8
LC
728size_t
729scm_c_symbol_length (SCM sym)
730#define FUNC_NAME "scm_c_symbol_length"
731{
732 SCM_VALIDATE_SYMBOL (1, sym);
733
734 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
735}
736#undef FUNC_NAME
737
50b1996f
MG
738/* True if the name of SYM is stored as a Latin-1 encoded string.
739 False if it is stored as a 32-bit UCS-4-encoded string. */
9c44cd45
MG
740int
741scm_i_is_narrow_symbol (SCM sym)
742{
743 SCM buf;
744
745 buf = SYMBOL_STRINGBUF (sym);
746 return !STRINGBUF_WIDE (buf);
747}
748
50b1996f
MG
749/* Returns a pointer to the 8-bit Latin-1 encoded character array that
750 contains the name of SYM. */
3ee86942
MV
751const char *
752scm_i_symbol_chars (SCM sym)
753{
9c44cd45
MG
754 SCM buf;
755
756 buf = SYMBOL_STRINGBUF (sym);
757 if (!STRINGBUF_WIDE (buf))
f59cf998 758 return (const char *) STRINGBUF_CHARS (buf);
9c44cd45
MG
759 else
760 scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S",
761 scm_list_1 (sym));
762}
763
50b1996f
MG
764/* Return a pointer to the 32-bit UCS-4-encoded character array of a
765 symbol's name. */
9c44cd45
MG
766const scm_t_wchar *
767scm_i_symbol_wide_chars (SCM sym)
768{
769 SCM buf;
770
771 buf = SYMBOL_STRINGBUF (sym);
772 if (STRINGBUF_WIDE (buf))
f59cf998 773 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf);
9c44cd45
MG
774 else
775 scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S",
776 scm_list_1 (sym));
3ee86942 777}
1cc91f1b 778
be54b15d 779SCM
3ee86942 780scm_i_symbol_substring (SCM sym, size_t start, size_t end)
be54b15d 781{
3ee86942 782 SCM buf = SYMBOL_STRINGBUF (sym);
9de87eea 783 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 784 SET_STRINGBUF_SHARED (buf);
9de87eea 785 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
fd2b17b9 786 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
3ee86942
MV
787 (scm_t_bits)start, (scm_t_bits) end - start);
788}
be54b15d 789
50b1996f 790/* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */
9c44cd45
MG
791scm_t_wchar
792scm_i_symbol_ref (SCM sym, size_t x)
793{
794 if (scm_i_is_narrow_symbol (sym))
795 return (scm_t_wchar) (unsigned char) (scm_i_symbol_chars (sym)[x]);
796 else
797 return scm_i_symbol_wide_chars (sym)[x];
798}
799
3ee86942
MV
800/* Debugging
801 */
be54b15d 802
6ce6923b
MG
803SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
804 "Returns an association list containing debugging information\n"
805 "for @var{str}. The association list has the following entries."
806 "@table @code\n"
807 "@item string\n"
808 "The string itself.\n"
809 "@item start\n"
810 "The start index of the string into its stringbuf\n"
811 "@item length\n"
812 "The length of the string\n"
813 "@item shared\n"
814 "If this string is a substring, it returns its parent string.\n"
815 "Otherwise, it returns @code{#f}\n"
88ed5759
MG
816 "@item read-only\n"
817 "@code{#t} if the string is read-only\n"
6ce6923b
MG
818 "@item stringbuf-chars\n"
819 "A new string containing this string's stringbuf's characters\n"
820 "@item stringbuf-length\n"
821 "The number of characters in this stringbuf\n"
822 "@item stringbuf-shared\n"
823 "@code{#t} if this stringbuf is shared\n"
6ce6923b
MG
824 "@item stringbuf-wide\n"
825 "@code{#t} if this stringbuf's characters are stored in a\n"
826 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
827 "buffer\n"
828 "@end table")
3ee86942
MV
829#define FUNC_NAME s_scm_sys_string_dump
830{
ba54a202 831 SCM e1, e2, e3, e4, e5, e6, e7, e8, e9;
6ce6923b 832 SCM buf;
3ee86942 833 SCM_VALIDATE_STRING (1, str);
6ce6923b
MG
834
835 /* String info */
836 e1 = scm_cons (scm_from_locale_symbol ("string"),
837 str);
838 e2 = scm_cons (scm_from_locale_symbol ("start"),
839 scm_from_size_t (STRING_START (str)));
840 e3 = scm_cons (scm_from_locale_symbol ("length"),
841 scm_from_size_t (STRING_LENGTH (str)));
842
3ee86942
MV
843 if (IS_SH_STRING (str))
844 {
6ce6923b
MG
845 e4 = scm_cons (scm_from_locale_symbol ("shared"),
846 SH_STRING_STRING (str));
847 buf = STRING_STRINGBUF (SH_STRING_STRING (str));
3ee86942
MV
848 }
849 else
850 {
6ce6923b
MG
851 e4 = scm_cons (scm_from_locale_symbol ("shared"),
852 SCM_BOOL_F);
853 buf = STRING_STRINGBUF (str);
3ee86942 854 }
9c44cd45 855
88ed5759
MG
856 if (IS_RO_STRING (str))
857 e5 = scm_cons (scm_from_locale_symbol ("read-only"),
858 SCM_BOOL_T);
859 else
860 e5 = scm_cons (scm_from_locale_symbol ("read-only"),
861 SCM_BOOL_F);
587a3355 862
6ce6923b 863 /* Stringbuf info */
6ce6923b
MG
864 if (!STRINGBUF_WIDE (buf))
865 {
866 size_t len = STRINGBUF_LENGTH (buf);
867 char *cbuf;
868 SCM sbc = scm_i_make_string (len, &cbuf);
869 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
870 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
871 sbc);
3ee86942 872 }
6ce6923b
MG
873 else
874 {
875 size_t len = STRINGBUF_LENGTH (buf);
876 scm_t_wchar *cbuf;
877 SCM sbc = scm_i_make_wide_string (len, &cbuf);
878 u32_cpy ((scm_t_uint32 *) cbuf,
879 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
880 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
881 sbc);
882 }
883 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-length"),
884 scm_from_size_t (STRINGBUF_LENGTH (buf)));
885 if (STRINGBUF_SHARED (buf))
886 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
887 SCM_BOOL_T);
888 else
889 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
890 SCM_BOOL_F);
6ce6923b 891 if (STRINGBUF_WIDE (buf))
ba54a202
LC
892 e9 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
893 SCM_BOOL_T);
6ce6923b 894 else
ba54a202
LC
895 e9 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
896 SCM_BOOL_F);
6ce6923b 897
ba54a202 898 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, e9, SCM_UNDEFINED);
3ee86942
MV
899}
900#undef FUNC_NAME
901
6ce6923b
MG
902SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
903 "Returns an association list containing debugging information\n"
904 "for @var{sym}. The association list has the following entries."
905 "@table @code\n"
906 "@item symbol\n"
907 "The symbol itself\n"
908 "@item hash\n"
909 "Its hash value\n"
88ed5759
MG
910 "@item interned\n"
911 "@code{#t} if it is an interned symbol\n"
6ce6923b
MG
912 "@item stringbuf-chars\n"
913 "A new string containing this symbols's stringbuf's characters\n"
914 "@item stringbuf-length\n"
915 "The number of characters in this stringbuf\n"
916 "@item stringbuf-shared\n"
917 "@code{#t} if this stringbuf is shared\n"
6ce6923b
MG
918 "@item stringbuf-wide\n"
919 "@code{#t} if this stringbuf's characters are stored in a\n"
920 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
921 "buffer\n"
922 "@end table")
3ee86942
MV
923#define FUNC_NAME s_scm_sys_symbol_dump
924{
ba54a202 925 SCM e1, e2, e3, e4, e5, e6, e7;
6ce6923b 926 SCM buf;
3ee86942 927 SCM_VALIDATE_SYMBOL (1, sym);
6ce6923b
MG
928 e1 = scm_cons (scm_from_locale_symbol ("symbol"),
929 sym);
930 e2 = scm_cons (scm_from_locale_symbol ("hash"),
931 scm_from_ulong (scm_i_symbol_hash (sym)));
88ed5759
MG
932 e3 = scm_cons (scm_from_locale_symbol ("interned"),
933 scm_symbol_interned_p (sym));
6ce6923b
MG
934 buf = SYMBOL_STRINGBUF (sym);
935
936 /* Stringbuf info */
6ce6923b
MG
937 if (!STRINGBUF_WIDE (buf))
938 {
939 size_t len = STRINGBUF_LENGTH (buf);
940 char *cbuf;
941 SCM sbc = scm_i_make_string (len, &cbuf);
942 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
943 e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
944 sbc);
945 }
9c44cd45 946 else
6ce6923b
MG
947 {
948 size_t len = STRINGBUF_LENGTH (buf);
949 scm_t_wchar *cbuf;
950 SCM sbc = scm_i_make_wide_string (len, &cbuf);
951 u32_cpy ((scm_t_uint32 *) cbuf,
952 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
953 e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
954 sbc);
955 }
956 e5 = scm_cons (scm_from_locale_symbol ("stringbuf-length"),
957 scm_from_size_t (STRINGBUF_LENGTH (buf)));
958 if (STRINGBUF_SHARED (buf))
959 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
960 SCM_BOOL_T);
961 else
962 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
963 SCM_BOOL_F);
6ce6923b 964 if (STRINGBUF_WIDE (buf))
ba54a202 965 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
6ce6923b
MG
966 SCM_BOOL_T);
967 else
ba54a202 968 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
6ce6923b 969 SCM_BOOL_F);
ba54a202 970 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, SCM_UNDEFINED);
6ce6923b 971
3ee86942
MV
972}
973#undef FUNC_NAME
974
56a3dcd4 975#ifdef SCM_STRING_LENGTH_HISTOGRAM
6ce6923b 976
9c44cd45 977SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, (void), "")
e1b29f6a 978#define FUNC_NAME s_scm_sys_stringbuf_hist
3ee86942
MV
979{
980 int i;
981 for (i = 0; i < 1000; i++)
982 if (lenhist[i])
983 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
984 fprintf (stderr, ">999: %u\n", lenhist[1000]);
985 return SCM_UNSPECIFIED;
be54b15d
DH
986}
987#undef FUNC_NAME
988
3ee86942
MV
989#endif
990
991\f
992
993SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
994 (SCM obj),
995 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
996#define FUNC_NAME s_scm_string_p
997{
998 return scm_from_bool (IS_STRING (obj));
999}
1000#undef FUNC_NAME
1001
1002
1003SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
1004
1005SCM_DEFINE (scm_string, "string", 0, 0, 1,
1006 (SCM chrs),
1007 "@deffnx {Scheme Procedure} list->string chrs\n"
1008 "Return a newly allocated string composed of the arguments,\n"
1009 "@var{chrs}.")
1010#define FUNC_NAME s_scm_string
1011{
9aa27c1a 1012 SCM result = SCM_BOOL_F;
9c44cd45 1013 SCM rest;
3ee86942 1014 size_t len;
9c44cd45
MG
1015 size_t p = 0;
1016 long i;
9aa27c1a 1017 int wide = 0;
3ee86942 1018
9c44cd45
MG
1019 /* Verify that this is a list of chars. */
1020 i = scm_ilength (chrs);
3c7cf7f5 1021 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
3ee86942 1022
9c44cd45
MG
1023 len = (size_t) i;
1024 rest = chrs;
3ee86942 1025
9c44cd45 1026 while (len > 0 && scm_is_pair (rest))
3ee86942 1027 {
9c44cd45 1028 SCM elt = SCM_CAR (rest);
3ee86942 1029 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
9aa27c1a
MG
1030 if (SCM_CHAR (elt) > 0xFF)
1031 wide = 1;
9c44cd45
MG
1032 rest = SCM_CDR (rest);
1033 len--;
1034 scm_remember_upto_here_1 (elt);
1035 }
1036
1037 /* Construct a string containing this list of chars. */
1038 len = (size_t) i;
1039 rest = chrs;
1040
9aa27c1a 1041 if (wide == 0)
9c44cd45 1042 {
56a3dcd4
LC
1043 char *buf;
1044
9aa27c1a
MG
1045 result = scm_i_make_string (len, NULL);
1046 result = scm_i_string_start_writing (result);
56a3dcd4 1047 buf = scm_i_string_writable_chars (result);
9aa27c1a
MG
1048 while (len > 0 && scm_is_pair (rest))
1049 {
1050 SCM elt = SCM_CAR (rest);
1051 buf[p] = (unsigned char) SCM_CHAR (elt);
1052 p++;
1053 rest = SCM_CDR (rest);
1054 len--;
1055 scm_remember_upto_here_1 (elt);
1056 }
1057 }
1058 else
1059 {
56a3dcd4
LC
1060 scm_t_wchar *buf;
1061
9aa27c1a
MG
1062 result = scm_i_make_wide_string (len, NULL);
1063 result = scm_i_string_start_writing (result);
56a3dcd4 1064 buf = scm_i_string_writable_wide_chars (result);
9aa27c1a
MG
1065 while (len > 0 && scm_is_pair (rest))
1066 {
1067 SCM elt = SCM_CAR (rest);
1068 buf[p] = SCM_CHAR (elt);
1069 p++;
1070 rest = SCM_CDR (rest);
1071 len--;
1072 scm_remember_upto_here_1 (elt);
1073 }
3ee86942 1074 }
9c44cd45
MG
1075 scm_i_string_stop_writing ();
1076
3ee86942
MV
1077 if (len > 0)
1078 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
9c44cd45 1079 if (!scm_is_null (rest))
3ee86942
MV
1080 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
1081
1082 return result;
1083}
1084#undef FUNC_NAME
be54b15d 1085
3b3b36dd 1086SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
6fa73e72 1087 (SCM k, SCM chr),
0d26a824
MG
1088 "Return a newly allocated string of\n"
1089 "length @var{k}. If @var{chr} is given, then all elements of\n"
1090 "the string are initialized to @var{chr}, otherwise the contents\n"
9401323e 1091 "of the @var{string} are unspecified.")
1bbd0b84 1092#define FUNC_NAME s_scm_make_string
0f2d19dd 1093{
3ee86942
MV
1094 return scm_c_make_string (scm_to_size_t (k), chr);
1095}
1096#undef FUNC_NAME
1097
1098SCM
1099scm_c_make_string (size_t len, SCM chr)
1100#define FUNC_NAME NULL
1101{
9c44cd45
MG
1102 size_t p;
1103 SCM res = scm_i_make_string (len, NULL);
cb0d8be2 1104
e11e83f3
MV
1105 if (!SCM_UNBNDP (chr))
1106 {
3ee86942 1107 SCM_VALIDATE_CHAR (0, chr);
9c44cd45
MG
1108 res = scm_i_string_start_writing (res);
1109 for (p = 0; p < len; p++)
1110 scm_i_string_set_x (res, p, SCM_CHAR (chr));
1111 scm_i_string_stop_writing ();
0f2d19dd 1112 }
e11e83f3
MV
1113
1114 return res;
0f2d19dd 1115}
1bbd0b84 1116#undef FUNC_NAME
0f2d19dd 1117
3b3b36dd 1118SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
0d26a824
MG
1119 (SCM string),
1120 "Return the number of characters in @var{string}.")
1bbd0b84 1121#define FUNC_NAME s_scm_string_length
0f2d19dd 1122{
d1ca2c64 1123 SCM_VALIDATE_STRING (1, string);
3ee86942 1124 return scm_from_size_t (STRING_LENGTH (string));
0f2d19dd 1125}
1bbd0b84 1126#undef FUNC_NAME
0f2d19dd 1127
f8ba2bb9 1128SCM_DEFINE (scm_string_bytes_per_char, "string-bytes-per-char", 1, 0, 0,
9c44cd45
MG
1129 (SCM string),
1130 "Return the bytes used to represent a character in @var{string}."
1131 "This will return 1 or 4.")
f8ba2bb9 1132#define FUNC_NAME s_scm_string_bytes_per_char
9c44cd45
MG
1133{
1134 SCM_VALIDATE_STRING (1, string);
1135 if (!scm_i_is_narrow_string (string))
1136 return scm_from_int (4);
1137
1138 return scm_from_int (1);
1139}
1140#undef FUNC_NAME
1141
3ee86942
MV
1142size_t
1143scm_c_string_length (SCM string)
1144{
1145 if (!IS_STRING (string))
1146 scm_wrong_type_arg_msg (NULL, 0, string, "string");
1147 return STRING_LENGTH (string);
1148}
1149
bd9e24b3 1150SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
6fa73e72 1151 (SCM str, SCM k),
9c44cd45
MG
1152 "Return character @var{k} of @var{str} using zero-origin\n"
1153 "indexing. @var{k} must be a valid index of @var{str}.")
1bbd0b84 1154#define FUNC_NAME s_scm_string_ref
0f2d19dd 1155{
3ae3166b 1156 size_t len;
a55c2b68 1157 unsigned long idx;
bd9e24b3 1158
d1ca2c64 1159 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
1160
1161 len = scm_i_string_length (str);
1162 if (SCM_LIKELY (len > 0))
1163 idx = scm_to_unsigned_integer (k, 0, len - 1);
1164 else
1165 scm_out_of_range (NULL, k);
1166
9c44cd45
MG
1167 if (scm_i_is_narrow_string (str))
1168 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
1169 else
1170 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[idx]);
0f2d19dd 1171}
1bbd0b84 1172#undef FUNC_NAME
0f2d19dd 1173
3ee86942
MV
1174SCM
1175scm_c_string_ref (SCM str, size_t p)
1176{
1177 if (p >= scm_i_string_length (str))
1178 scm_out_of_range (NULL, scm_from_size_t (p));
9c44cd45
MG
1179 if (scm_i_is_narrow_string (str))
1180 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
1181 else
1182 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[p]);
1183
3ee86942 1184}
f0942910 1185
3b3b36dd 1186SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
6fa73e72 1187 (SCM str, SCM k, SCM chr),
9c44cd45
MG
1188 "Store @var{chr} in element @var{k} of @var{str} and return\n"
1189 "an unspecified value. @var{k} must be a valid index of\n"
1190 "@var{str}.")
1bbd0b84 1191#define FUNC_NAME s_scm_string_set_x
0f2d19dd 1192{
3ae3166b 1193 size_t len;
a55c2b68
MV
1194 unsigned long idx;
1195
f0942910 1196 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
1197
1198 len = scm_i_string_length (str);
1199 if (SCM_LIKELY (len > 0))
1200 idx = scm_to_unsigned_integer (k, 0, len - 1);
1201 else
1202 scm_out_of_range (NULL, k);
1203
34d19ef6 1204 SCM_VALIDATE_CHAR (3, chr);
9c44cd45
MG
1205 str = scm_i_string_start_writing (str);
1206 scm_i_string_set_x (str, idx, SCM_CHAR (chr));
1207 scm_i_string_stop_writing ();
1208
0f2d19dd
JB
1209 return SCM_UNSPECIFIED;
1210}
1bbd0b84 1211#undef FUNC_NAME
0f2d19dd 1212
3ee86942
MV
1213void
1214scm_c_string_set_x (SCM str, size_t p, SCM chr)
1215{
1216 if (p >= scm_i_string_length (str))
1217 scm_out_of_range (NULL, scm_from_size_t (p));
9c44cd45
MG
1218 str = scm_i_string_start_writing (str);
1219 scm_i_string_set_x (str, p, SCM_CHAR (chr));
1220 scm_i_string_stop_writing ();
3ee86942 1221}
0f2d19dd 1222
3b3b36dd 1223SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
0d26a824
MG
1224 (SCM str, SCM start, SCM end),
1225 "Return a newly allocated string formed from the characters\n"
1226 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1227 "ending with index @var{end} (exclusive).\n"
1228 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1229 "exact integers satisfying:\n\n"
1230 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1bbd0b84 1231#define FUNC_NAME s_scm_substring
0f2d19dd 1232{
3ee86942 1233 size_t len, from, to;
685c0d71 1234
d1ca2c64 1235 SCM_VALIDATE_STRING (1, str);
3ee86942
MV
1236 len = scm_i_string_length (str);
1237 from = scm_to_unsigned_integer (start, 0, len);
a55c2b68 1238 if (SCM_UNBNDP (end))
3ee86942 1239 to = len;
a55c2b68 1240 else
3ee86942
MV
1241 to = scm_to_unsigned_integer (end, from, len);
1242 return scm_i_substring (str, from, to);
0f2d19dd 1243}
1bbd0b84 1244#undef FUNC_NAME
0f2d19dd 1245
ed35de72
MV
1246SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
1247 (SCM str, SCM start, SCM end),
1248 "Return a newly allocated string formed from the characters\n"
1249 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1250 "ending with index @var{end} (exclusive).\n"
1251 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1252 "exact integers satisfying:\n"
1253 "\n"
1254 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
1255 "\n"
1256 "The returned string is read-only.\n")
1257#define FUNC_NAME s_scm_substring_read_only
1258{
1259 size_t len, from, to;
1260
1261 SCM_VALIDATE_STRING (1, str);
1262 len = scm_i_string_length (str);
1263 from = scm_to_unsigned_integer (start, 0, len);
1264 if (SCM_UNBNDP (end))
1265 to = len;
1266 else
1267 to = scm_to_unsigned_integer (end, from, len);
1268 return scm_i_substring_read_only (str, from, to);
1269}
1270#undef FUNC_NAME
1271
3ee86942
MV
1272SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
1273 (SCM str, SCM start, SCM end),
1274 "Return a newly allocated string formed from the characters\n"
1275 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1276 "ending with index @var{end} (exclusive).\n"
1277 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1278 "exact integers satisfying:\n\n"
1279 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1280#define FUNC_NAME s_scm_substring_copy
1281{
e1b29f6a
MV
1282 /* For the Scheme version, START is mandatory, but for the C
1283 version, it is optional. See scm_string_copy in srfi-13.c for a
1284 rationale.
1285 */
1286
1287 size_t from, to;
3ee86942
MV
1288
1289 SCM_VALIDATE_STRING (1, str);
e1b29f6a
MV
1290 scm_i_get_substring_spec (scm_i_string_length (str),
1291 start, &from, end, &to);
3ee86942
MV
1292 return scm_i_substring_copy (str, from, to);
1293}
1294#undef FUNC_NAME
1295
1296SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
1297 (SCM str, SCM start, SCM end),
1298 "Return string that indirectly refers to the characters\n"
1299 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1300 "ending with index @var{end} (exclusive).\n"
1301 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1302 "exact integers satisfying:\n\n"
1303 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1304#define FUNC_NAME s_scm_substring_shared
1305{
1306 size_t len, from, to;
1307
1308 SCM_VALIDATE_STRING (1, str);
1309 len = scm_i_string_length (str);
1310 from = scm_to_unsigned_integer (start, 0, len);
1311 if (SCM_UNBNDP (end))
1312 to = len;
1313 else
1314 to = scm_to_unsigned_integer (end, from, len);
1315 return scm_i_substring_shared (str, from, to);
1316}
1317#undef FUNC_NAME
685c0d71 1318
3b3b36dd 1319SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
6fa73e72 1320 (SCM args),
9c44cd45 1321 "Return a newly allocated string whose characters form the\n"
0d26a824 1322 "concatenation of the given strings, @var{args}.")
1bbd0b84 1323#define FUNC_NAME s_scm_string_append
0f2d19dd
JB
1324{
1325 SCM res;
9c44cd45
MG
1326 size_t len = 0;
1327 int wide = 0;
c829a427 1328 SCM l, s;
bd4911ef 1329 size_t i;
9909c395
MG
1330 union
1331 {
1332 char *narrow;
1333 scm_t_wchar *wide;
1334 } data;
af45e3b0
DH
1335
1336 SCM_VALIDATE_REST_ARGUMENT (args);
9c44cd45 1337 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427
MV
1338 {
1339 s = SCM_CAR (l);
1340 SCM_VALIDATE_STRING (SCM_ARGn, s);
9c44cd45
MG
1341 len += scm_i_string_length (s);
1342 if (!scm_i_is_narrow_string (s))
1343 wide = 1;
c829a427 1344 }
9909c395 1345 data.narrow = NULL;
9c44cd45 1346 if (!wide)
9909c395 1347 res = scm_i_make_string (len, &data.narrow);
9c44cd45 1348 else
9909c395 1349 res = scm_i_make_wide_string (len, &data.wide);
9c44cd45
MG
1350
1351 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427 1352 {
edea856c 1353 size_t len;
c829a427 1354 s = SCM_CAR (l);
3ee86942 1355 SCM_VALIDATE_STRING (SCM_ARGn, s);
edea856c 1356 len = scm_i_string_length (s);
9c44cd45
MG
1357 if (!wide)
1358 {
9909c395
MG
1359 memcpy (data.narrow, scm_i_string_chars (s), len);
1360 data.narrow += len;
9c44cd45
MG
1361 }
1362 else
1363 {
1364 if (scm_i_is_narrow_string (s))
1365 {
1366 for (i = 0; i < scm_i_string_length (s); i++)
9909c395 1367 data.wide[i] = (unsigned char) scm_i_string_chars (s)[i];
9c44cd45
MG
1368 }
1369 else
9909c395 1370 u32_cpy ((scm_t_uint32 *) data.wide,
9c44cd45 1371 (scm_t_uint32 *) scm_i_string_wide_chars (s), len);
9909c395 1372 data.wide += len;
9c44cd45 1373 }
c829a427
MV
1374 scm_remember_upto_here_1 (s);
1375 }
0f2d19dd
JB
1376 return res;
1377}
1bbd0b84 1378#undef FUNC_NAME
0f2d19dd 1379
c829a427
MV
1380int
1381scm_is_string (SCM obj)
1382{
3ee86942 1383 return IS_STRING (obj);
c829a427 1384}
24933780 1385
fac32b51 1386SCM
587a3355
MG
1387scm_from_stringn (const char *str, size_t len, const char *encoding,
1388 scm_t_string_failed_conversion_handler handler)
1389{
1390 size_t u32len, i;
1391 scm_t_wchar *u32;
1392 int wide = 0;
1393 SCM res;
1394
fac32b51
MG
1395 if (len == 0)
1396 return scm_nullstr;
1397
889975e5
MG
1398 if (encoding == NULL)
1399 {
1400 /* If encoding is null, use Latin-1. */
1401 char *buf;
1402 res = scm_i_make_string (len, &buf);
1403 memcpy (buf, str, len);
1404 return res;
1405 }
1406
587a3355
MG
1407 u32len = 0;
1408 u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
1409 (enum iconv_ilseq_handler)
1410 handler,
1411 str, len,
1412 NULL,
1413 NULL, &u32len);
1414
1415 if (u32 == NULL)
1416 {
1417 if (errno == ENOMEM)
1418 scm_memory_error ("locale string conversion");
1419 else
1420 {
889975e5 1421 /* There are invalid sequences in the input string. */
587a3355
MG
1422 SCM errstr;
1423 char *dst;
587a3355
MG
1424 errstr = scm_i_make_string (len, &dst);
1425 memcpy (dst, str, len);
1426 scm_misc_error (NULL, "input locale conversion error from ~s: ~s",
1427 scm_list_2 (scm_from_locale_string (encoding),
1428 errstr));
1429 scm_remember_upto_here_1 (errstr);
1430 }
1431 }
1432
1433 i = 0;
1434 while (i < u32len)
1435 if (u32[i++] > 0xFF)
1436 {
1437 wide = 1;
1438 break;
1439 }
1440
1441 if (!wide)
1442 {
1443 char *dst;
1444 res = scm_i_make_string (u32len, &dst);
1445 for (i = 0; i < u32len; i ++)
1446 dst[i] = (unsigned char) u32[i];
1447 dst[u32len] = '\0';
1448 }
1449 else
1450 {
1451 scm_t_wchar *wdst;
1452 res = scm_i_make_wide_string (u32len, &wdst);
1453 u32_cpy ((scm_t_uint32 *) wdst, (scm_t_uint32 *) u32, u32len);
1454 wdst[u32len] = 0;
1455 }
1456
1457 free (u32);
1458 return res;
1459}
1460
c829a427
MV
1461SCM
1462scm_from_locale_stringn (const char *str, size_t len)
1463{
889975e5
MG
1464 const char *enc;
1465 scm_t_string_failed_conversion_handler hndl;
1466 SCM inport;
1467 scm_t_port *pt;
4d4528e7 1468
9c44cd45 1469 if (len == (size_t) -1)
c829a427 1470 len = strlen (str);
9c44cd45
MG
1471 if (len == 0)
1472 return scm_nullstr;
1473
889975e5
MG
1474 inport = scm_current_input_port ();
1475 if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
1476 {
1477 pt = SCM_PTAB_ENTRY (inport);
1478 enc = pt->encoding;
1479 hndl = pt->ilseq_handler;
1480 }
1481 else
1482 {
1483 enc = NULL;
1484 hndl = SCM_FAILED_CONVERSION_ERROR;
1485 }
1486
1487 return scm_from_stringn (str, len, enc, hndl);
c829a427 1488}
4d4528e7 1489
c829a427
MV
1490SCM
1491scm_from_locale_string (const char *str)
4d4528e7 1492{
9c44cd45
MG
1493 if (str == NULL)
1494 return scm_nullstr;
1495
c829a427
MV
1496 return scm_from_locale_stringn (str, -1);
1497}
4d4528e7 1498
587a3355
MG
1499SCM
1500scm_i_from_utf8_string (const scm_t_uint8 *str)
1501{
1502 return scm_from_stringn ((const char *) str,
1503 strlen ((char *) str), "UTF-8",
1504 SCM_FAILED_CONVERSION_ERROR);
1505}
1506
50b1996f
MG
1507/* Create a new scheme string from the C string STR. The memory of
1508 STR may be used directly as storage for the new string. */
13a94556
LC
1509/* FIXME: GC-wise, the only way to use the memory area pointed to by STR
1510 would be to register a finalizer to eventually free(3) STR, which isn't
1511 worth it. Should we just deprecate the `scm_take_' functions? */
c829a427
MV
1512SCM
1513scm_take_locale_stringn (char *str, size_t len)
1514{
13a94556 1515 SCM res;
48ddf0d9 1516
13a94556
LC
1517 res = scm_from_locale_stringn (str, len);
1518 free (str);
c829a427 1519
c829a427
MV
1520 return res;
1521}
1522
48ddf0d9
KR
1523SCM
1524scm_take_locale_string (char *str)
1525{
1526 return scm_take_locale_stringn (str, -1);
1527}
1528
9c44cd45
MG
1529/* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
1530 and \UXXXXXX. */
1531static void
1532unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
1533{
1534 char *before, *after;
1535 size_t i, j;
1536
1537 before = *bufp;
1538 after = *bufp;
1539 i = 0;
1540 j = 0;
1541 while (i < *lenp)
1542 {
1543 if ((i <= *lenp - 6)
1544 && before[i] == '\\'
1545 && before[i + 1] == 'u'
1546 && before[i + 2] == '0' && before[i + 3] == '0')
1547 {
1548 /* Convert \u00NN to \xNN */
1549 after[j] = '\\';
1550 after[j + 1] = 'x';
30a6b9ca
MG
1551 after[j + 2] = tolower ((int) before[i + 4]);
1552 after[j + 3] = tolower ((int) before[i + 5]);
9c44cd45
MG
1553 i += 6;
1554 j += 4;
1555 }
1556 else if ((i <= *lenp - 10)
1557 && before[i] == '\\'
1558 && before[i + 1] == 'U'
1559 && before[i + 2] == '0' && before[i + 3] == '0')
1560 {
1561 /* Convert \U00NNNNNN to \UNNNNNN */
1562 after[j] = '\\';
1563 after[j + 1] = 'U';
30a6b9ca
MG
1564 after[j + 2] = tolower ((int) before[i + 4]);
1565 after[j + 3] = tolower ((int) before[i + 5]);
1566 after[j + 4] = tolower ((int) before[i + 6]);
1567 after[j + 5] = tolower ((int) before[i + 7]);
1568 after[j + 6] = tolower ((int) before[i + 8]);
1569 after[j + 7] = tolower ((int) before[i + 9]);
9c44cd45
MG
1570 i += 10;
1571 j += 8;
1572 }
1573 else
1574 {
1575 after[j] = before[i];
1576 i++;
1577 j++;
1578 }
1579 }
1580 *lenp = j;
1581 after = scm_realloc (after, j);
1582}
1583
c829a427 1584char *
fac32b51 1585scm_to_locale_stringn (SCM str, size_t *lenp)
c829a427 1586{
889975e5
MG
1587 SCM outport;
1588 scm_t_port *pt;
9c44cd45
MG
1589 const char *enc;
1590
889975e5
MG
1591 outport = scm_current_output_port ();
1592 if (!SCM_UNBNDP (outport) && SCM_OPOUTPORTP (outport))
1593 {
1594 pt = SCM_PTAB_ENTRY (outport);
1595 enc = pt->encoding;
1596 }
1597 else
1598 enc = NULL;
9c44cd45 1599
889975e5
MG
1600 return scm_to_stringn (str, lenp,
1601 enc,
1602 scm_i_get_conversion_strategy (SCM_BOOL_F));
9c44cd45
MG
1603}
1604
1605/* Low-level scheme to C string conversion function. */
1606char *
587a3355 1607scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
eca29b02 1608 scm_t_string_failed_conversion_handler handler)
9c44cd45 1609{
9c44cd45
MG
1610 char *buf;
1611 size_t ilen, len, i;
889975e5
MG
1612 int ret;
1613 const char *enc;
4d4528e7 1614
3ee86942 1615 if (!scm_is_string (str))
c829a427 1616 scm_wrong_type_arg_msg (NULL, 0, str, "string");
9c44cd45
MG
1617 ilen = scm_i_string_length (str);
1618
1619 if (ilen == 0)
1620 {
1621 buf = scm_malloc (1);
1622 buf[0] = '\0';
1623 if (lenp)
1624 *lenp = 0;
1625 return buf;
1626 }
587a3355 1627
c829a427 1628 if (lenp == NULL)
9c44cd45
MG
1629 for (i = 0; i < ilen; i++)
1630 if (scm_i_string_ref (str, i) == '\0')
1631 scm_misc_error (NULL,
1632 "string contains #\\nul character: ~S",
1633 scm_list_1 (str));
1634
889975e5 1635 if (scm_i_is_narrow_string (str) && (encoding == NULL))
c829a427 1636 {
889975e5
MG
1637 /* If using native Latin-1 encoding, just copy the string
1638 contents. */
9c44cd45
MG
1639 if (lenp)
1640 {
1641 buf = scm_malloc (ilen);
1642 memcpy (buf, scm_i_string_chars (str), ilen);
1643 *lenp = ilen;
1644 return buf;
1645 }
1646 else
1647 {
1648 buf = scm_malloc (ilen + 1);
1649 memcpy (buf, scm_i_string_chars (str), ilen);
1650 buf[ilen] = '\0';
1651 return buf;
1652 }
c829a427 1653 }
9c44cd45 1654
587a3355 1655
9c44cd45
MG
1656 buf = NULL;
1657 len = 0;
889975e5
MG
1658 enc = encoding;
1659 if (enc == NULL)
1660 enc = "ISO-8859-1";
1661 if (scm_i_is_narrow_string (str))
1662 {
1663 ret = mem_iconveh (scm_i_string_chars (str), ilen,
1664 "ISO-8859-1", enc,
1665 (enum iconv_ilseq_handler) handler, NULL,
1666 &buf, &len);
9c44cd45 1667
889975e5
MG
1668 if (ret == 0 && handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1669 unistring_escapes_to_guile_escapes (&buf, &len);
9c44cd45 1670
889975e5
MG
1671 if (ret != 0)
1672 {
1673 scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
1674 scm_list_2 (scm_from_locale_string (enc),
1675 str));
1676 }
1677 }
1678 else
1679 {
1680 buf = u32_conv_to_encoding (enc,
1681 (enum iconv_ilseq_handler) handler,
1682 (scm_t_uint32 *) scm_i_string_wide_chars (str),
1683 ilen,
1684 NULL,
1685 NULL, &len);
1686 if (buf == NULL)
1687 {
1688 scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
1689 scm_list_2 (scm_from_locale_string (enc),
1690 str));
1691 }
5f5920e0
MG
1692 if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1693 unistring_escapes_to_guile_escapes (&buf, &len);
889975e5 1694 }
9c44cd45 1695 if (lenp)
4d4528e7 1696 *lenp = len;
9c44cd45
MG
1697 else
1698 {
1699 buf = scm_realloc (buf, len + 1);
1700 buf[len] = '\0';
1701 }
24933780 1702
c829a427 1703 scm_remember_upto_here_1 (str);
9c44cd45 1704 return buf;
4d4528e7 1705}
af68e5e5 1706
c829a427
MV
1707char *
1708scm_to_locale_string (SCM str)
1709{
1710 return scm_to_locale_stringn (str, NULL);
1711}
af68e5e5 1712
587a3355
MG
1713scm_t_uint8 *
1714scm_i_to_utf8_string (SCM str)
1715{
1716 char *u8str;
1717 u8str = scm_to_stringn (str, NULL, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
1718 return (scm_t_uint8 *) u8str;
1719}
1720
c829a427
MV
1721size_t
1722scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
1723{
1724 size_t len;
9c44cd45 1725 char *result = NULL;
3ee86942 1726 if (!scm_is_string (str))
c829a427 1727 scm_wrong_type_arg_msg (NULL, 0, str, "string");
9c44cd45
MG
1728 result = scm_to_locale_stringn (str, &len);
1729
1730 memcpy (buf, result, (len > max_len) ? max_len : len);
1731 free (result);
1732
c829a427
MV
1733 scm_remember_upto_here_1 (str);
1734 return len;
1735}
af68e5e5 1736
3ee86942
MV
1737/* converts C scm_array of strings to SCM scm_list of strings. */
1738/* If argc < 0, a null terminated scm_array is assumed. */
9c44cd45 1739SCM
3ee86942
MV
1740scm_makfromstrs (int argc, char **argv)
1741{
1742 int i = argc;
1743 SCM lst = SCM_EOL;
1744 if (0 > i)
1745 for (i = 0; argv[i]; i++);
1746 while (i--)
1747 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
1748 return lst;
1749}
1750
c829a427
MV
1751/* Return a newly allocated array of char pointers to each of the strings
1752 in args, with a terminating NULL pointer. */
1753
1754char **
1755scm_i_allocate_string_pointers (SCM list)
2a776823 1756#define FUNC_NAME "scm_i_allocate_string_pointers"
af68e5e5 1757{
c829a427
MV
1758 char **result;
1759 int len = scm_ilength (list);
1760 int i;
1761
1762 if (len < 0)
1763 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
1764
2a776823
LC
1765 result = scm_gc_malloc ((len + 1) * sizeof (char *),
1766 "string pointers");
c829a427 1767 result[len] = NULL;
c829a427
MV
1768
1769 /* The list might be have been modified in another thread, so
1770 we check LIST before each access.
1771 */
d2e53ed6 1772 for (i = 0; i < len && scm_is_pair (list); i++)
c829a427 1773 {
2a776823
LC
1774 SCM str;
1775 size_t len;
1776
1777 str = SCM_CAR (list);
1778 len = scm_c_string_length (str);
1779
1780 result[i] = scm_gc_malloc_pointerless (len + 1, "string pointers");
1781 memcpy (result[i], scm_i_string_chars (str), len);
1782 result[i][len] = '\0';
1783
c829a427
MV
1784 list = SCM_CDR (list);
1785 }
1786
c829a427 1787 return result;
af68e5e5 1788}
2a776823 1789#undef FUNC_NAME
24933780 1790
6f14f578
MV
1791void
1792scm_i_get_substring_spec (size_t len,
1793 SCM start, size_t *cstart,
1794 SCM end, size_t *cend)
1795{
1796 if (SCM_UNBNDP (start))
1797 *cstart = 0;
1798 else
1799 *cstart = scm_to_unsigned_integer (start, 0, len);
1800
1801 if (SCM_UNBNDP (end))
1802 *cend = len;
1803 else
1804 *cend = scm_to_unsigned_integer (end, *cstart, len);
1805}
1806
3ee86942
MV
1807#if SCM_ENABLE_DEPRECATED
1808
556d75db
MV
1809/* When these definitions are removed, it becomes reasonable to use
1810 read-only strings for string literals. For that, change the reader
1811 to create string literals with scm_c_substring_read_only instead of
1812 with scm_c_substring_copy.
1813*/
1814
3ee86942 1815int
fe78c51a 1816scm_i_deprecated_stringp (SCM str)
3ee86942
MV
1817{
1818 scm_c_issue_deprecation_warning
1819 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1820
2616f0e0 1821 return scm_is_string (str);
3ee86942
MV
1822}
1823
1824char *
fe78c51a 1825scm_i_deprecated_string_chars (SCM str)
3ee86942
MV
1826{
1827 char *chars;
1828
1829 scm_c_issue_deprecation_warning
1830 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1831
2616f0e0
MV
1832 /* We don't accept shared substrings here since they are not
1833 null-terminated.
1834 */
1835 if (IS_SH_STRING (str))
c291b588
LC
1836 scm_misc_error (NULL,
1837 "SCM_STRING_CHARS does not work with shared substrings",
2616f0e0
MV
1838 SCM_EOL);
1839
877f06c3 1840 /* We explicitly test for read-only strings to produce a better
556d75db
MV
1841 error message.
1842 */
1843
1844 if (IS_RO_STRING (str))
c291b588
LC
1845 scm_misc_error (NULL,
1846 "SCM_STRING_CHARS does not work with read-only strings",
556d75db 1847 SCM_EOL);
c291b588 1848
2616f0e0 1849 /* The following is still wrong, of course...
3ee86942 1850 */
9c44cd45 1851 str = scm_i_string_start_writing (str);
3ee86942
MV
1852 chars = scm_i_string_writable_chars (str);
1853 scm_i_string_stop_writing ();
1854 return chars;
1855}
1856
1857size_t
fe78c51a 1858scm_i_deprecated_string_length (SCM str)
3ee86942
MV
1859{
1860 scm_c_issue_deprecation_warning
1861 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1862 return scm_c_string_length (str);
1863}
1864
1865#endif
1866
2a610be5
AW
1867static SCM
1868string_handle_ref (scm_t_array_handle *h, size_t index)
1869{
1870 return scm_c_string_ref (h->array, index);
1871}
1872
1873static void
1874string_handle_set (scm_t_array_handle *h, size_t index, SCM val)
1875{
1876 scm_c_string_set_x (h->array, index, val);
1877}
1878
1879static void
1880string_get_handle (SCM v, scm_t_array_handle *h)
1881{
1882 h->array = v;
1883 h->ndims = 1;
1884 h->dims = &h->dim0;
1885 h->dim0.lbnd = 0;
1886 h->dim0.ubnd = scm_c_string_length (v) - 1;
1887 h->dim0.inc = 1;
1888 h->element_type = SCM_ARRAY_ELEMENT_TYPE_CHAR;
1889 h->elements = h->writable_elements = NULL;
1890}
1891
1892SCM_ARRAY_IMPLEMENTATION (scm_tc7_string, 0x7f & ~2,
1893 string_handle_ref, string_handle_set,
1894 string_get_handle);
f45eccff 1895SCM_VECTOR_IMPLEMENTATION (SCM_ARRAY_ELEMENT_TYPE_CHAR, scm_make_string);
2a610be5 1896
0f2d19dd
JB
1897void
1898scm_init_strings ()
0f2d19dd 1899{
3ee86942 1900 scm_nullstr = scm_i_make_string (0, NULL);
7c33806a 1901
a0599745 1902#include "libguile/strings.x"
0f2d19dd
JB
1903}
1904
89e00824
ML
1905
1906/*
1907 Local Variables:
1908 c-file-style: "gnu"
1909 End:
1910*/