Remove now unneeded `putenv.c'.
[bpt/guile.git] / libguile / strings.c
CommitLineData
2a776823 1/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd 20\f
dbb605f5
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
0f2d19dd 24
faf2c9d7 25#include <string.h>
3ee86942 26#include <stdio.h>
9c44cd45
MG
27#include <ctype.h>
28#include <unistr.h>
eca29b02 29#include <uniconv.h>
faf2c9d7 30
889975e5
MG
31#include "striconveh.h"
32
a0599745
MD
33#include "libguile/_scm.h"
34#include "libguile/chars.h"
7c33806a 35#include "libguile/root.h"
a0599745 36#include "libguile/strings.h"
f45eccff 37#include "libguile/generalized-vectors.h"
1afff620 38#include "libguile/deprecation.h"
a0599745 39#include "libguile/validate.h"
1afff620 40
0f2d19dd
JB
41\f
42
43/* {Strings}
44 */
45
3ee86942
MV
46
47/* Stringbufs
48 *
49 * XXX - keeping an accurate refcount during GC seems to be quite
50 * tricky, so we just keep score of whether a stringbuf might be
50b1996f 51 * shared, not whether it definitely is.
3ee86942
MV
52 *
53 * The scheme I (mvo) tried to keep an accurate reference count would
54 * recount all strings that point to a stringbuf during the mark-phase
55 * of the GC. This was done since one cannot access the stringbuf of
56 * a string when that string is freed (in order to decrease the
57 * reference count). The memory of the stringbuf might have been
58 * reused already for something completely different.
59 *
60 * This recounted worked for a small number of threads beating on
61 * cow-strings, but it failed randomly with more than 10 threads, say.
62 * I couldn't figure out what went wrong, so I used the conservative
63 * approach implemented below.
50b1996f 64 *
ba54a202
LC
65 * There are 2 storage strategies for stringbufs: 8-bit and wide. 8-bit
66 * strings are ISO-8859-1-encoded strings; wide strings are 32-bit (UCS-4)
67 * strings.
3ee86942
MV
68 */
69
ba54a202
LC
70/* The size in words of the stringbuf header (type tag + size). */
71#define STRINGBUF_HEADER_SIZE 2U
72
73#define STRINGBUF_HEADER_BYTES (STRINGBUF_HEADER_SIZE * sizeof (SCM))
74
35920c00 75#define STRINGBUF_F_SHARED SCM_I_STRINGBUF_F_SHARED
5f236208 76#define STRINGBUF_F_WIDE SCM_I_STRINGBUF_F_WIDE
3ee86942
MV
77
78#define STRINGBUF_TAG scm_tc7_stringbuf
79#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
9c44cd45 80#define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
3ee86942 81
ba54a202
LC
82#define STRINGBUF_CHARS(buf) ((unsigned char *) \
83 SCM_CELL_OBJECT_LOC (buf, \
84 STRINGBUF_HEADER_SIZE))
85#define STRINGBUF_LENGTH(buf) (SCM_CELL_WORD_1 (buf))
3ee86942 86
ba54a202 87#define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *) STRINGBUF_CHARS (buf))
3ee86942 88
9b41542f
LC
89#define SET_STRINGBUF_SHARED(buf) \
90 do \
91 { \
92 /* Don't modify BUF if it's already marked as shared since it might be \
93 a read-only, statically allocated stringbuf. */ \
94 if (SCM_LIKELY (!STRINGBUF_SHARED (buf))) \
95 SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
96 } \
97 while (0)
3ee86942 98
6ce6923b 99#if SCM_STRING_LENGTH_HISTOGRAM
3ee86942
MV
100static size_t lenhist[1001];
101#endif
102
50b1996f
MG
103/* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
104 characters. */
3ee86942
MV
105static SCM
106make_stringbuf (size_t len)
0f2d19dd 107{
3ee86942
MV
108 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
109 scm_i_symbol_chars, all stringbufs are null-terminated. Once
110 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
111 has been changed for scm_i_symbol_chars, this null-termination
112 can be dropped.
113 */
114
ba54a202
LC
115 SCM buf;
116
6ce6923b 117#if SCM_STRING_LENGTH_HISTOGRAM
3ee86942
MV
118 if (len < 1000)
119 lenhist[len]++;
120 else
121 lenhist[1000]++;
122#endif
0f2d19dd 123
ba54a202
LC
124 buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + len + 1,
125 "string"));
126
127 SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG);
128 SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
129
130 STRINGBUF_CHARS (buf)[len] = 0;
131
132 return buf;
3ee86942 133}
e53cc817 134
50b1996f
MG
135/* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
136 characters. */
9c44cd45
MG
137static SCM
138make_wide_stringbuf (size_t len)
139{
ba54a202
LC
140 SCM buf;
141 size_t raw_len;
142
6ce6923b 143#if SCM_STRING_LENGTH_HISTOGRAM
9c44cd45
MG
144 if (len < 1000)
145 lenhist[len]++;
146 else
147 lenhist[1000]++;
148#endif
149
ba54a202
LC
150 raw_len = (len + 1) * sizeof (scm_t_wchar);
151 buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + raw_len,
152 "string"));
153
154 SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG | STRINGBUF_F_WIDE);
155 SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
156
157 STRINGBUF_WIDE_CHARS (buf)[len] = 0;
158
159 return buf;
9c44cd45
MG
160}
161
ba54a202
LC
162/* Return a UCS-4-encoded stringbuf containing the (possibly Latin-1-encoded)
163 characters from BUF. */
164static SCM
165wide_stringbuf (SCM buf)
9c44cd45 166{
ba54a202 167 SCM new_buf;
9c44cd45
MG
168
169 if (STRINGBUF_WIDE (buf))
ba54a202
LC
170 new_buf = buf;
171 else
9c44cd45 172 {
ba54a202
LC
173 size_t i, len;
174 scm_t_wchar *mem;
9c44cd45 175
ba54a202 176 len = STRINGBUF_LENGTH (buf);
9c44cd45 177
ba54a202 178 new_buf = make_wide_stringbuf (len);
9c44cd45 179
ba54a202 180 mem = STRINGBUF_WIDE_CHARS (new_buf);
9c44cd45 181 for (i = 0; i < len; i++)
ba54a202 182 mem[i] = (scm_t_wchar) STRINGBUF_CHARS (buf)[i];
9c44cd45 183 mem[len] = 0;
9c44cd45 184 }
ba54a202
LC
185
186 return new_buf;
3ee86942 187}
bd9e24b3 188
ba54a202
LC
189/* Return a Latin-1-encoded stringbuf containing the (possibly UCS-4-encoded)
190 characters from BUF, if possible. */
191static SCM
587a3355
MG
192narrow_stringbuf (SCM buf)
193{
ba54a202 194 SCM new_buf;
587a3355
MG
195
196 if (!STRINGBUF_WIDE (buf))
ba54a202
LC
197 new_buf = buf;
198 else
199 {
200 size_t i, len;
201 scm_t_wchar *wmem;
202 unsigned char *mem;
587a3355 203
ba54a202
LC
204 len = STRINGBUF_LENGTH (buf);
205 wmem = STRINGBUF_WIDE_CHARS (buf);
587a3355 206
ba54a202
LC
207 for (i = 0; i < len; i++)
208 if (wmem[i] > 0xFF)
209 /* BUF cannot be narrowed. */
210 return buf;
587a3355 211
ba54a202 212 new_buf = make_stringbuf (len);
587a3355 213
ba54a202
LC
214 mem = STRINGBUF_CHARS (new_buf);
215 for (i = 0; i < len; i++)
216 mem[i] = (unsigned char) wmem[i];
217 mem[len] = 0;
218 }
219
220 return new_buf;
587a3355
MG
221}
222
9de87eea 223scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
bd9e24b3 224
ba54a202 225\f
3ee86942
MV
226/* Copy-on-write strings.
227 */
bd9e24b3 228
3ee86942 229#define STRING_TAG scm_tc7_string
bd9e24b3 230
3ee86942
MV
231#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
232#define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
233#define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
bd9e24b3 234
3ee86942
MV
235#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
236#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
237
238#define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
239
ed35de72
MV
240/* Read-only strings.
241 */
242
35920c00 243#define RO_STRING_TAG scm_tc7_ro_string
ed35de72
MV
244#define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
245
e1b29f6a
MV
246/* Mutation-sharing substrings
247 */
248
249#define SH_STRING_TAG (scm_tc7_string + 0x100)
250
251#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
252/* START and LENGTH as for STRINGs. */
253
254#define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
255
50b1996f
MG
256/* Create a scheme string with space for LEN 8-bit Latin-1-encoded
257 characters. CHARSP, if not NULL, will be set to location of the
258 char array. */
3ee86942
MV
259SCM
260scm_i_make_string (size_t len, char **charsp)
261{
262 SCM buf = make_stringbuf (len);
263 SCM res;
264 if (charsp)
f59cf998 265 *charsp = (char *) STRINGBUF_CHARS (buf);
3ee86942
MV
266 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
267 (scm_t_bits)0, (scm_t_bits) len);
268 return res;
0f2d19dd
JB
269}
270
50b1996f
MG
271/* Create a scheme string with space for LEN 32-bit UCS-4-encoded
272 characters. CHARSP, if not NULL, will be set to location of the
273 character array. */
9c44cd45 274SCM
50b1996f 275scm_i_make_wide_string (size_t len, scm_t_wchar **charsp)
9c44cd45
MG
276{
277 SCM buf = make_wide_stringbuf (len);
278 SCM res;
279 if (charsp)
280 *charsp = STRINGBUF_WIDE_CHARS (buf);
281 res = scm_double_cell (STRING_TAG, SCM_UNPACK (buf),
282 (scm_t_bits) 0, (scm_t_bits) len);
283 return res;
284}
285
3ee86942
MV
286static void
287validate_substring_args (SCM str, size_t start, size_t end)
288{
289 if (!IS_STRING (str))
290 scm_wrong_type_arg_msg (NULL, 0, str, "string");
291 if (start > STRING_LENGTH (str))
292 scm_out_of_range (NULL, scm_from_size_t (start));
293 if (end > STRING_LENGTH (str) || end < start)
294 scm_out_of_range (NULL, scm_from_size_t (end));
295}
0f2d19dd 296
e1b29f6a
MV
297static inline void
298get_str_buf_start (SCM *str, SCM *buf, size_t *start)
299{
300 *start = STRING_START (*str);
301 if (IS_SH_STRING (*str))
302 {
303 *str = SH_STRING_STRING (*str);
304 *start += STRING_START (*str);
305 }
306 *buf = STRING_STRINGBUF (*str);
307}
308
3ee86942
MV
309SCM
310scm_i_substring (SCM str, size_t start, size_t end)
0f2d19dd 311{
e1b29f6a
MV
312 SCM buf;
313 size_t str_start;
314 get_str_buf_start (&str, &buf, &str_start);
9de87eea 315 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 316 SET_STRINGBUF_SHARED (buf);
9de87eea 317 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 318 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
e1b29f6a
MV
319 (scm_t_bits)str_start + start,
320 (scm_t_bits) end - start);
0f2d19dd
JB
321}
322
ed35de72
MV
323SCM
324scm_i_substring_read_only (SCM str, size_t start, size_t end)
325{
45a9f430
LC
326 SCM buf;
327 size_t str_start;
328 get_str_buf_start (&str, &buf, &str_start);
329 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
330 SET_STRINGBUF_SHARED (buf);
331 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
332 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
333 (scm_t_bits)str_start + start,
334 (scm_t_bits) end - start);
ed35de72
MV
335}
336
3ee86942
MV
337SCM
338scm_i_substring_copy (SCM str, size_t start, size_t end)
339{
340 size_t len = end - start;
edea856c 341 SCM buf, my_buf;
e1b29f6a
MV
342 size_t str_start;
343 get_str_buf_start (&str, &buf, &str_start);
9c44cd45
MG
344 if (scm_i_is_narrow_string (str))
345 {
346 my_buf = make_stringbuf (len);
347 memcpy (STRINGBUF_CHARS (my_buf),
348 STRINGBUF_CHARS (buf) + str_start + start, len);
349 }
350 else
351 {
352 my_buf = make_wide_stringbuf (len);
353 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
354 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
355 + start), len);
356 /* Even though this string is wide, the substring may be narrow.
50b1996f 357 Consider adding code to narrow the string. */
9c44cd45 358 }
3ee86942 359 scm_remember_upto_here_1 (buf);
9c44cd45
MG
360 return scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
361 (scm_t_bits) 0, (scm_t_bits) len);
3ee86942 362}
0f2d19dd 363
e1b29f6a
MV
364SCM
365scm_i_substring_shared (SCM str, size_t start, size_t end)
366{
367 if (start == 0 && end == STRING_LENGTH (str))
368 return str;
369 else
370 {
371 size_t len = end - start;
372 if (IS_SH_STRING (str))
373 {
374 start += STRING_START (str);
375 str = SH_STRING_STRING (str);
376 }
377 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
378 (scm_t_bits)start, (scm_t_bits) len);
379 }
380}
381
3ee86942
MV
382SCM
383scm_c_substring (SCM str, size_t start, size_t end)
384{
385 validate_substring_args (str, start, end);
386 return scm_i_substring (str, start, end);
387}
ee149d03 388
ed35de72
MV
389SCM
390scm_c_substring_read_only (SCM str, size_t start, size_t end)
391{
392 validate_substring_args (str, start, end);
393 return scm_i_substring_read_only (str, start, end);
394}
395
0f2d19dd 396SCM
3ee86942 397scm_c_substring_copy (SCM str, size_t start, size_t end)
0f2d19dd 398{
3ee86942
MV
399 validate_substring_args (str, start, end);
400 return scm_i_substring_copy (str, start, end);
401}
402
3ee86942
MV
403SCM
404scm_c_substring_shared (SCM str, size_t start, size_t end)
405{
406 validate_substring_args (str, start, end);
407 return scm_i_substring_shared (str, start, end);
408}
0f2d19dd 409
d6c74168 410\f
3ee86942
MV
411/* Internal accessors
412 */
413
50b1996f
MG
414/* Returns the number of characters in STR. This may be different
415 than the memory size of the string storage. */
3ee86942
MV
416size_t
417scm_i_string_length (SCM str)
0f2d19dd 418{
3ee86942 419 return STRING_LENGTH (str);
0f2d19dd
JB
420}
421
50b1996f
MG
422/* True if the string is 'narrow', meaning it has a 8-bit Latin-1
423 encoding. False if it is 'wide', having a 32-bit UCS-4
424 encoding. */
9c44cd45
MG
425int
426scm_i_is_narrow_string (SCM str)
427{
428 return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
429}
430
587a3355
MG
431/* Try to coerce a string to be narrow. It if is narrow already, do
432 nothing. If it is wide, shrink it to narrow if none of its
433 characters are above 0xFF. Return true if the string is narrow or
434 was made to be narrow. */
435int
436scm_i_try_narrow_string (SCM str)
437{
ba54a202 438 SET_STRING_STRINGBUF (str, narrow_stringbuf (STRING_STRINGBUF (str)));
587a3355
MG
439
440 return scm_i_is_narrow_string (str);
441}
442
50b1996f
MG
443/* Returns a pointer to the 8-bit Latin-1 encoded character array of
444 STR. */
3ee86942
MV
445const char *
446scm_i_string_chars (SCM str)
447{
448 SCM buf;
e1b29f6a
MV
449 size_t start;
450 get_str_buf_start (&str, &buf, &start);
9c44cd45 451 if (scm_i_is_narrow_string (str))
f59cf998 452 return (const char *) STRINGBUF_CHARS (buf) + start;
9c44cd45
MG
453 else
454 scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s",
455 scm_list_1 (str));
456 return NULL;
3ee86942 457}
b00418df 458
50b1996f
MG
459/* Returns a pointer to the 32-bit UCS-4 encoded character array of
460 STR. */
9c44cd45
MG
461const scm_t_wchar *
462scm_i_string_wide_chars (SCM str)
463{
464 SCM buf;
465 size_t start;
466
467 get_str_buf_start (&str, &buf, &start);
468 if (!scm_i_is_narrow_string (str))
f59cf998 469 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf) + start;
9c44cd45
MG
470 else
471 scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
472 scm_list_1 (str));
473}
474
475/* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
476 a new string buffer, so that it can be modified without modifying
50b1996f
MG
477 other strings. Also, lock the string mutex. Later, one must call
478 scm_i_string_stop_writing to unlock the mutex. */
9c44cd45
MG
479SCM
480scm_i_string_start_writing (SCM orig_str)
b00418df 481{
ed35de72 482 SCM buf, str = orig_str;
e1b29f6a 483 size_t start;
ed35de72 484
e1b29f6a 485 get_str_buf_start (&str, &buf, &start);
ed35de72
MV
486 if (IS_RO_STRING (str))
487 scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
488
9de87eea 489 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942
MV
490 if (STRINGBUF_SHARED (buf))
491 {
9c44cd45 492 /* Clone the stringbuf. */
3ee86942
MV
493 size_t len = STRING_LENGTH (str);
494 SCM new_buf;
495
9de87eea 496 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 497
9c44cd45
MG
498 if (scm_i_is_narrow_string (str))
499 {
500 new_buf = make_stringbuf (len);
501 memcpy (STRINGBUF_CHARS (new_buf),
502 STRINGBUF_CHARS (buf) + STRING_START (str), len);
503
504 }
505 else
506 {
507 new_buf = make_wide_stringbuf (len);
508 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
509 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf)
510 + STRING_START (str)), len);
511 }
3ee86942 512
3ee86942
MV
513 SET_STRING_STRINGBUF (str, new_buf);
514 start -= STRING_START (str);
902578f1
LC
515
516 /* FIXME: The following operations are not atomic, so other threads
517 looking at STR may see an inconsistent state. Nevertheless it can't
518 hurt much since (i) accessing STR while it is being mutated can't
519 yield a crash, and (ii) concurrent accesses to STR should be
520 protected by a mutex at the application level. The latter may not
521 apply when STR != ORIG_STR, though. */
3ee86942 522 SET_STRING_START (str, 0);
902578f1 523 SET_STRING_STRINGBUF (str, new_buf);
3ee86942
MV
524
525 buf = new_buf;
526
9de87eea 527 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 528 }
9c44cd45
MG
529 return orig_str;
530}
3ee86942 531
50b1996f 532/* Return a pointer to the 8-bit Latin-1 chars of a string. */
9c44cd45
MG
533char *
534scm_i_string_writable_chars (SCM str)
535{
536 SCM buf;
537 size_t start;
3ee86942 538
9c44cd45
MG
539 get_str_buf_start (&str, &buf, &start);
540 if (scm_i_is_narrow_string (str))
f59cf998 541 return (char *) STRINGBUF_CHARS (buf) + start;
9c44cd45
MG
542 else
543 scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s",
544 scm_list_1 (str));
545 return NULL;
b00418df
DH
546}
547
50b1996f 548/* Return a pointer to the UCS-4 codepoints of a string. */
9c44cd45
MG
549static scm_t_wchar *
550scm_i_string_writable_wide_chars (SCM str)
551{
552 SCM buf;
553 size_t start;
554
555 get_str_buf_start (&str, &buf, &start);
556 if (!scm_i_is_narrow_string (str))
557 return STRINGBUF_WIDE_CHARS (buf) + start;
558 else
1c7b216f 559 scm_misc_error (NULL, "Invalid write access of chars of narrow string: ~s",
9c44cd45 560 scm_list_1 (str));
b00418df
DH
561}
562
50b1996f
MG
563/* Unlock the string mutex that was locked when
564 scm_i_string_start_writing was called. */
3ee86942
MV
565void
566scm_i_string_stop_writing (void)
567{
9de87eea 568 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 569}
b00418df 570
50b1996f 571/* Return the Xth character of STR as a UCS-4 codepoint. */
9c44cd45
MG
572scm_t_wchar
573scm_i_string_ref (SCM str, size_t x)
574{
575 if (scm_i_is_narrow_string (str))
576 return (scm_t_wchar) (unsigned char) (scm_i_string_chars (str)[x]);
577 else
578 return scm_i_string_wide_chars (str)[x];
579}
580
889975e5
MG
581/* Returns index+1 of the first char in STR that matches C, or
582 0 if the char is not found. */
583int
584scm_i_string_contains_char (SCM str, char ch)
585{
586 size_t i;
587 size_t len = scm_i_string_length (str);
588
589 i = 0;
590 if (scm_i_is_narrow_string (str))
591 {
592 while (i < len)
593 {
594 if (scm_i_string_chars (str)[i] == ch)
595 return i+1;
596 i++;
597 }
598 }
599 else
600 {
601 while (i < len)
602 {
603 if (scm_i_string_wide_chars (str)[i]
604 == (unsigned char) ch)
605 return i+1;
606 i++;
607 }
608 }
609 return 0;
610}
611
3f47e526
MG
612int
613scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
614{
615 if (scm_i_is_narrow_string (sstr))
616 {
617 const char *a = scm_i_string_chars (sstr) + start_x;
618 const char *b = cstr;
619 return strncmp (a, b, strlen(b));
620 }
621 else
622 {
623 size_t i;
624 const scm_t_wchar *a = scm_i_string_wide_chars (sstr) + start_x;
625 const char *b = cstr;
626 for (i = 0; i < strlen (b); i++)
627 {
628 if (a[i] != (unsigned char) b[i])
629 return 1;
630 }
631 }
632 return 0;
633}
634
50b1996f 635/* Set the Pth character of STR to UCS-4 codepoint CHR. */
9c44cd45
MG
636void
637scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
638{
639 if (chr > 0xFF && scm_i_is_narrow_string (str))
ba54a202 640 SET_STRING_STRINGBUF (str, wide_stringbuf (STRING_STRINGBUF (str)));
9c44cd45
MG
641
642 if (scm_i_is_narrow_string (str))
643 {
644 char *dst = scm_i_string_writable_chars (str);
587a3355 645 dst[p] = chr;
9c44cd45
MG
646 }
647 else
648 {
649 scm_t_wchar *dst = scm_i_string_writable_wide_chars (str);
650 dst[p] = chr;
651 }
652}
653
ba54a202 654\f
3ee86942 655/* Symbols.
587a3355 656
3ee86942
MV
657 Basic symbol creation and accessing is done here, the rest is in
658 symbols.[hc]. This has been done to keep stringbufs and the
659 internals of strings and string-like objects confined to this file.
660*/
661
662#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
663
664SCM
6869328b
MV
665scm_i_make_symbol (SCM name, scm_t_bits flags,
666 unsigned long hash, SCM props)
3ee86942
MV
667{
668 SCM buf;
669 size_t start = STRING_START (name);
670 size_t length = STRING_LENGTH (name);
671
672 if (IS_SH_STRING (name))
673 {
674 name = SH_STRING_STRING (name);
675 start += STRING_START (name);
676 }
677 buf = SYMBOL_STRINGBUF (name);
678
679 if (start == 0 && length == STRINGBUF_LENGTH (buf))
680 {
681 /* reuse buf. */
9de87eea 682 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 683 SET_STRINGBUF_SHARED (buf);
9de87eea 684 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942
MV
685 }
686 else
687 {
688 /* make new buf. */
9c44cd45
MG
689 if (scm_i_is_narrow_string (name))
690 {
691 SCM new_buf = make_stringbuf (length);
692 memcpy (STRINGBUF_CHARS (new_buf),
693 STRINGBUF_CHARS (buf) + start, length);
694 buf = new_buf;
695 }
696 else
697 {
698 SCM new_buf = make_wide_stringbuf (length);
699 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
700 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
701 length);
702 buf = new_buf;
703 }
3ee86942 704 }
6869328b 705 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
3ee86942
MV
706 (scm_t_bits) hash, SCM_UNPACK (props));
707}
708
fd0a5bbc
HWN
709SCM
710scm_i_c_make_symbol (const char *name, size_t len,
711 scm_t_bits flags, unsigned long hash, SCM props)
712{
713 SCM buf = make_stringbuf (len);
714 memcpy (STRINGBUF_CHARS (buf), name, len);
715
a284cc7e
LC
716 return scm_immutable_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
717 (scm_t_bits) hash, SCM_UNPACK (props));
fd0a5bbc
HWN
718}
719
50b1996f
MG
720/* Returns the number of characters in SYM. This may be different
721 from the memory size of SYM. */
3ee86942
MV
722size_t
723scm_i_symbol_length (SCM sym)
0f2d19dd 724{
3ee86942 725 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
0f2d19dd
JB
726}
727
071bb6a8
LC
728size_t
729scm_c_symbol_length (SCM sym)
730#define FUNC_NAME "scm_c_symbol_length"
731{
732 SCM_VALIDATE_SYMBOL (1, sym);
733
734 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
735}
736#undef FUNC_NAME
737
50b1996f
MG
738/* True if the name of SYM is stored as a Latin-1 encoded string.
739 False if it is stored as a 32-bit UCS-4-encoded string. */
9c44cd45
MG
740int
741scm_i_is_narrow_symbol (SCM sym)
742{
743 SCM buf;
744
745 buf = SYMBOL_STRINGBUF (sym);
746 return !STRINGBUF_WIDE (buf);
747}
748
50b1996f
MG
749/* Returns a pointer to the 8-bit Latin-1 encoded character array that
750 contains the name of SYM. */
3ee86942
MV
751const char *
752scm_i_symbol_chars (SCM sym)
753{
9c44cd45
MG
754 SCM buf;
755
756 buf = SYMBOL_STRINGBUF (sym);
757 if (!STRINGBUF_WIDE (buf))
f59cf998 758 return (const char *) STRINGBUF_CHARS (buf);
9c44cd45
MG
759 else
760 scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S",
761 scm_list_1 (sym));
762}
763
50b1996f
MG
764/* Return a pointer to the 32-bit UCS-4-encoded character array of a
765 symbol's name. */
9c44cd45
MG
766const scm_t_wchar *
767scm_i_symbol_wide_chars (SCM sym)
768{
769 SCM buf;
770
771 buf = SYMBOL_STRINGBUF (sym);
772 if (STRINGBUF_WIDE (buf))
f59cf998 773 return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf);
9c44cd45
MG
774 else
775 scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S",
776 scm_list_1 (sym));
3ee86942 777}
1cc91f1b 778
be54b15d 779SCM
3ee86942 780scm_i_symbol_substring (SCM sym, size_t start, size_t end)
be54b15d 781{
3ee86942 782 SCM buf = SYMBOL_STRINGBUF (sym);
9de87eea 783 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 784 SET_STRINGBUF_SHARED (buf);
9de87eea 785 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
fd2b17b9 786 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
3ee86942
MV
787 (scm_t_bits)start, (scm_t_bits) end - start);
788}
be54b15d 789
50b1996f 790/* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */
9c44cd45
MG
791scm_t_wchar
792scm_i_symbol_ref (SCM sym, size_t x)
793{
794 if (scm_i_is_narrow_symbol (sym))
795 return (scm_t_wchar) (unsigned char) (scm_i_symbol_chars (sym)[x]);
796 else
797 return scm_i_symbol_wide_chars (sym)[x];
798}
799
3ee86942
MV
800/* Debugging
801 */
be54b15d 802
6ce6923b
MG
803SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
804 "Returns an association list containing debugging information\n"
805 "for @var{str}. The association list has the following entries."
806 "@table @code\n"
807 "@item string\n"
808 "The string itself.\n"
809 "@item start\n"
810 "The start index of the string into its stringbuf\n"
811 "@item length\n"
812 "The length of the string\n"
813 "@item shared\n"
814 "If this string is a substring, it returns its parent string.\n"
815 "Otherwise, it returns @code{#f}\n"
88ed5759
MG
816 "@item read-only\n"
817 "@code{#t} if the string is read-only\n"
6ce6923b
MG
818 "@item stringbuf-chars\n"
819 "A new string containing this string's stringbuf's characters\n"
820 "@item stringbuf-length\n"
821 "The number of characters in this stringbuf\n"
822 "@item stringbuf-shared\n"
823 "@code{#t} if this stringbuf is shared\n"
6ce6923b
MG
824 "@item stringbuf-wide\n"
825 "@code{#t} if this stringbuf's characters are stored in a\n"
826 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
827 "buffer\n"
828 "@end table")
3ee86942
MV
829#define FUNC_NAME s_scm_sys_string_dump
830{
ba54a202 831 SCM e1, e2, e3, e4, e5, e6, e7, e8, e9;
6ce6923b 832 SCM buf;
3ee86942 833 SCM_VALIDATE_STRING (1, str);
6ce6923b
MG
834
835 /* String info */
836 e1 = scm_cons (scm_from_locale_symbol ("string"),
837 str);
838 e2 = scm_cons (scm_from_locale_symbol ("start"),
839 scm_from_size_t (STRING_START (str)));
840 e3 = scm_cons (scm_from_locale_symbol ("length"),
841 scm_from_size_t (STRING_LENGTH (str)));
842
3ee86942
MV
843 if (IS_SH_STRING (str))
844 {
6ce6923b
MG
845 e4 = scm_cons (scm_from_locale_symbol ("shared"),
846 SH_STRING_STRING (str));
847 buf = STRING_STRINGBUF (SH_STRING_STRING (str));
3ee86942
MV
848 }
849 else
850 {
6ce6923b
MG
851 e4 = scm_cons (scm_from_locale_symbol ("shared"),
852 SCM_BOOL_F);
853 buf = STRING_STRINGBUF (str);
3ee86942 854 }
9c44cd45 855
88ed5759
MG
856 if (IS_RO_STRING (str))
857 e5 = scm_cons (scm_from_locale_symbol ("read-only"),
858 SCM_BOOL_T);
859 else
860 e5 = scm_cons (scm_from_locale_symbol ("read-only"),
861 SCM_BOOL_F);
587a3355 862
6ce6923b 863 /* Stringbuf info */
6ce6923b
MG
864 if (!STRINGBUF_WIDE (buf))
865 {
866 size_t len = STRINGBUF_LENGTH (buf);
867 char *cbuf;
868 SCM sbc = scm_i_make_string (len, &cbuf);
869 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
870 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
871 sbc);
3ee86942 872 }
6ce6923b
MG
873 else
874 {
875 size_t len = STRINGBUF_LENGTH (buf);
876 scm_t_wchar *cbuf;
877 SCM sbc = scm_i_make_wide_string (len, &cbuf);
878 u32_cpy ((scm_t_uint32 *) cbuf,
879 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
880 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
881 sbc);
882 }
883 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-length"),
884 scm_from_size_t (STRINGBUF_LENGTH (buf)));
885 if (STRINGBUF_SHARED (buf))
886 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
887 SCM_BOOL_T);
888 else
889 e8 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
890 SCM_BOOL_F);
6ce6923b 891 if (STRINGBUF_WIDE (buf))
ba54a202
LC
892 e9 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
893 SCM_BOOL_T);
6ce6923b 894 else
ba54a202
LC
895 e9 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
896 SCM_BOOL_F);
6ce6923b 897
ba54a202 898 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, e9, SCM_UNDEFINED);
3ee86942
MV
899}
900#undef FUNC_NAME
901
6ce6923b
MG
902SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
903 "Returns an association list containing debugging information\n"
904 "for @var{sym}. The association list has the following entries."
905 "@table @code\n"
906 "@item symbol\n"
907 "The symbol itself\n"
908 "@item hash\n"
909 "Its hash value\n"
88ed5759
MG
910 "@item interned\n"
911 "@code{#t} if it is an interned symbol\n"
6ce6923b
MG
912 "@item stringbuf-chars\n"
913 "A new string containing this symbols's stringbuf's characters\n"
914 "@item stringbuf-length\n"
915 "The number of characters in this stringbuf\n"
916 "@item stringbuf-shared\n"
917 "@code{#t} if this stringbuf is shared\n"
6ce6923b
MG
918 "@item stringbuf-wide\n"
919 "@code{#t} if this stringbuf's characters are stored in a\n"
920 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
921 "buffer\n"
922 "@end table")
3ee86942
MV
923#define FUNC_NAME s_scm_sys_symbol_dump
924{
ba54a202 925 SCM e1, e2, e3, e4, e5, e6, e7;
6ce6923b 926 SCM buf;
3ee86942 927 SCM_VALIDATE_SYMBOL (1, sym);
6ce6923b
MG
928 e1 = scm_cons (scm_from_locale_symbol ("symbol"),
929 sym);
930 e2 = scm_cons (scm_from_locale_symbol ("hash"),
931 scm_from_ulong (scm_i_symbol_hash (sym)));
88ed5759
MG
932 e3 = scm_cons (scm_from_locale_symbol ("interned"),
933 scm_symbol_interned_p (sym));
6ce6923b
MG
934 buf = SYMBOL_STRINGBUF (sym);
935
936 /* Stringbuf info */
6ce6923b
MG
937 if (!STRINGBUF_WIDE (buf))
938 {
939 size_t len = STRINGBUF_LENGTH (buf);
940 char *cbuf;
941 SCM sbc = scm_i_make_string (len, &cbuf);
942 memcpy (cbuf, STRINGBUF_CHARS (buf), len);
943 e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
944 sbc);
945 }
9c44cd45 946 else
6ce6923b
MG
947 {
948 size_t len = STRINGBUF_LENGTH (buf);
949 scm_t_wchar *cbuf;
950 SCM sbc = scm_i_make_wide_string (len, &cbuf);
951 u32_cpy ((scm_t_uint32 *) cbuf,
952 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
953 e4 = scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
954 sbc);
955 }
956 e5 = scm_cons (scm_from_locale_symbol ("stringbuf-length"),
957 scm_from_size_t (STRINGBUF_LENGTH (buf)));
958 if (STRINGBUF_SHARED (buf))
959 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
960 SCM_BOOL_T);
961 else
962 e6 = scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
963 SCM_BOOL_F);
6ce6923b 964 if (STRINGBUF_WIDE (buf))
ba54a202 965 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
6ce6923b
MG
966 SCM_BOOL_T);
967 else
ba54a202 968 e7 = scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
6ce6923b 969 SCM_BOOL_F);
ba54a202 970 return scm_list_n (e1, e2, e3, e4, e5, e6, e7, SCM_UNDEFINED);
6ce6923b 971
3ee86942
MV
972}
973#undef FUNC_NAME
974
6ce6923b
MG
975#if SCM_STRING_LENGTH_HISTOGRAM
976
9c44cd45 977SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, (void), "")
e1b29f6a 978#define FUNC_NAME s_scm_sys_stringbuf_hist
3ee86942
MV
979{
980 int i;
981 for (i = 0; i < 1000; i++)
982 if (lenhist[i])
983 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
984 fprintf (stderr, ">999: %u\n", lenhist[1000]);
985 return SCM_UNSPECIFIED;
be54b15d
DH
986}
987#undef FUNC_NAME
988
3ee86942
MV
989#endif
990
991\f
992
993SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
994 (SCM obj),
995 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
996#define FUNC_NAME s_scm_string_p
997{
998 return scm_from_bool (IS_STRING (obj));
999}
1000#undef FUNC_NAME
1001
1002
1003SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
1004
1005SCM_DEFINE (scm_string, "string", 0, 0, 1,
1006 (SCM chrs),
1007 "@deffnx {Scheme Procedure} list->string chrs\n"
1008 "Return a newly allocated string composed of the arguments,\n"
1009 "@var{chrs}.")
1010#define FUNC_NAME s_scm_string
1011{
9aa27c1a 1012 SCM result = SCM_BOOL_F;
9c44cd45 1013 SCM rest;
3ee86942 1014 size_t len;
9c44cd45
MG
1015 size_t p = 0;
1016 long i;
9aa27c1a 1017 int wide = 0;
3ee86942 1018
9c44cd45
MG
1019 /* Verify that this is a list of chars. */
1020 i = scm_ilength (chrs);
3c7cf7f5 1021 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
3ee86942 1022
9c44cd45
MG
1023 len = (size_t) i;
1024 rest = chrs;
3ee86942 1025
9c44cd45 1026 while (len > 0 && scm_is_pair (rest))
3ee86942 1027 {
9c44cd45 1028 SCM elt = SCM_CAR (rest);
3ee86942 1029 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
9aa27c1a
MG
1030 if (SCM_CHAR (elt) > 0xFF)
1031 wide = 1;
9c44cd45
MG
1032 rest = SCM_CDR (rest);
1033 len--;
1034 scm_remember_upto_here_1 (elt);
1035 }
1036
1037 /* Construct a string containing this list of chars. */
1038 len = (size_t) i;
1039 rest = chrs;
1040
9aa27c1a 1041 if (wide == 0)
9c44cd45 1042 {
9aa27c1a
MG
1043 result = scm_i_make_string (len, NULL);
1044 result = scm_i_string_start_writing (result);
1045 char *buf = scm_i_string_writable_chars (result);
1046 while (len > 0 && scm_is_pair (rest))
1047 {
1048 SCM elt = SCM_CAR (rest);
1049 buf[p] = (unsigned char) SCM_CHAR (elt);
1050 p++;
1051 rest = SCM_CDR (rest);
1052 len--;
1053 scm_remember_upto_here_1 (elt);
1054 }
1055 }
1056 else
1057 {
1058 result = scm_i_make_wide_string (len, NULL);
1059 result = scm_i_string_start_writing (result);
1060 scm_t_wchar *buf = scm_i_string_writable_wide_chars (result);
1061 while (len > 0 && scm_is_pair (rest))
1062 {
1063 SCM elt = SCM_CAR (rest);
1064 buf[p] = SCM_CHAR (elt);
1065 p++;
1066 rest = SCM_CDR (rest);
1067 len--;
1068 scm_remember_upto_here_1 (elt);
1069 }
3ee86942 1070 }
9c44cd45
MG
1071 scm_i_string_stop_writing ();
1072
3ee86942
MV
1073 if (len > 0)
1074 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
9c44cd45 1075 if (!scm_is_null (rest))
3ee86942
MV
1076 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
1077
1078 return result;
1079}
1080#undef FUNC_NAME
be54b15d 1081
3b3b36dd 1082SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
6fa73e72 1083 (SCM k, SCM chr),
0d26a824
MG
1084 "Return a newly allocated string of\n"
1085 "length @var{k}. If @var{chr} is given, then all elements of\n"
1086 "the string are initialized to @var{chr}, otherwise the contents\n"
9401323e 1087 "of the @var{string} are unspecified.")
1bbd0b84 1088#define FUNC_NAME s_scm_make_string
0f2d19dd 1089{
3ee86942
MV
1090 return scm_c_make_string (scm_to_size_t (k), chr);
1091}
1092#undef FUNC_NAME
1093
1094SCM
1095scm_c_make_string (size_t len, SCM chr)
1096#define FUNC_NAME NULL
1097{
9c44cd45
MG
1098 size_t p;
1099 SCM res = scm_i_make_string (len, NULL);
cb0d8be2 1100
e11e83f3
MV
1101 if (!SCM_UNBNDP (chr))
1102 {
3ee86942 1103 SCM_VALIDATE_CHAR (0, chr);
9c44cd45
MG
1104 res = scm_i_string_start_writing (res);
1105 for (p = 0; p < len; p++)
1106 scm_i_string_set_x (res, p, SCM_CHAR (chr));
1107 scm_i_string_stop_writing ();
0f2d19dd 1108 }
e11e83f3
MV
1109
1110 return res;
0f2d19dd 1111}
1bbd0b84 1112#undef FUNC_NAME
0f2d19dd 1113
3b3b36dd 1114SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
0d26a824
MG
1115 (SCM string),
1116 "Return the number of characters in @var{string}.")
1bbd0b84 1117#define FUNC_NAME s_scm_string_length
0f2d19dd 1118{
d1ca2c64 1119 SCM_VALIDATE_STRING (1, string);
3ee86942 1120 return scm_from_size_t (STRING_LENGTH (string));
0f2d19dd 1121}
1bbd0b84 1122#undef FUNC_NAME
0f2d19dd 1123
f8ba2bb9 1124SCM_DEFINE (scm_string_bytes_per_char, "string-bytes-per-char", 1, 0, 0,
9c44cd45
MG
1125 (SCM string),
1126 "Return the bytes used to represent a character in @var{string}."
1127 "This will return 1 or 4.")
f8ba2bb9 1128#define FUNC_NAME s_scm_string_bytes_per_char
9c44cd45
MG
1129{
1130 SCM_VALIDATE_STRING (1, string);
1131 if (!scm_i_is_narrow_string (string))
1132 return scm_from_int (4);
1133
1134 return scm_from_int (1);
1135}
1136#undef FUNC_NAME
1137
3ee86942
MV
1138size_t
1139scm_c_string_length (SCM string)
1140{
1141 if (!IS_STRING (string))
1142 scm_wrong_type_arg_msg (NULL, 0, string, "string");
1143 return STRING_LENGTH (string);
1144}
1145
bd9e24b3 1146SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
6fa73e72 1147 (SCM str, SCM k),
9c44cd45
MG
1148 "Return character @var{k} of @var{str} using zero-origin\n"
1149 "indexing. @var{k} must be a valid index of @var{str}.")
1bbd0b84 1150#define FUNC_NAME s_scm_string_ref
0f2d19dd 1151{
3ae3166b 1152 size_t len;
a55c2b68 1153 unsigned long idx;
bd9e24b3 1154
d1ca2c64 1155 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
1156
1157 len = scm_i_string_length (str);
1158 if (SCM_LIKELY (len > 0))
1159 idx = scm_to_unsigned_integer (k, 0, len - 1);
1160 else
1161 scm_out_of_range (NULL, k);
1162
9c44cd45
MG
1163 if (scm_i_is_narrow_string (str))
1164 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
1165 else
1166 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[idx]);
0f2d19dd 1167}
1bbd0b84 1168#undef FUNC_NAME
0f2d19dd 1169
3ee86942
MV
1170SCM
1171scm_c_string_ref (SCM str, size_t p)
1172{
1173 if (p >= scm_i_string_length (str))
1174 scm_out_of_range (NULL, scm_from_size_t (p));
9c44cd45
MG
1175 if (scm_i_is_narrow_string (str))
1176 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
1177 else
1178 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[p]);
1179
3ee86942 1180}
f0942910 1181
3b3b36dd 1182SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
6fa73e72 1183 (SCM str, SCM k, SCM chr),
9c44cd45
MG
1184 "Store @var{chr} in element @var{k} of @var{str} and return\n"
1185 "an unspecified value. @var{k} must be a valid index of\n"
1186 "@var{str}.")
1bbd0b84 1187#define FUNC_NAME s_scm_string_set_x
0f2d19dd 1188{
3ae3166b 1189 size_t len;
a55c2b68
MV
1190 unsigned long idx;
1191
f0942910 1192 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
1193
1194 len = scm_i_string_length (str);
1195 if (SCM_LIKELY (len > 0))
1196 idx = scm_to_unsigned_integer (k, 0, len - 1);
1197 else
1198 scm_out_of_range (NULL, k);
1199
34d19ef6 1200 SCM_VALIDATE_CHAR (3, chr);
9c44cd45
MG
1201 str = scm_i_string_start_writing (str);
1202 scm_i_string_set_x (str, idx, SCM_CHAR (chr));
1203 scm_i_string_stop_writing ();
1204
0f2d19dd
JB
1205 return SCM_UNSPECIFIED;
1206}
1bbd0b84 1207#undef FUNC_NAME
0f2d19dd 1208
3ee86942
MV
1209void
1210scm_c_string_set_x (SCM str, size_t p, SCM chr)
1211{
1212 if (p >= scm_i_string_length (str))
1213 scm_out_of_range (NULL, scm_from_size_t (p));
9c44cd45
MG
1214 str = scm_i_string_start_writing (str);
1215 scm_i_string_set_x (str, p, SCM_CHAR (chr));
1216 scm_i_string_stop_writing ();
3ee86942 1217}
0f2d19dd 1218
3b3b36dd 1219SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
0d26a824
MG
1220 (SCM str, SCM start, SCM end),
1221 "Return a newly allocated string formed from the characters\n"
1222 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1223 "ending with index @var{end} (exclusive).\n"
1224 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1225 "exact integers satisfying:\n\n"
1226 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1bbd0b84 1227#define FUNC_NAME s_scm_substring
0f2d19dd 1228{
3ee86942 1229 size_t len, from, to;
685c0d71 1230
d1ca2c64 1231 SCM_VALIDATE_STRING (1, str);
3ee86942
MV
1232 len = scm_i_string_length (str);
1233 from = scm_to_unsigned_integer (start, 0, len);
a55c2b68 1234 if (SCM_UNBNDP (end))
3ee86942 1235 to = len;
a55c2b68 1236 else
3ee86942
MV
1237 to = scm_to_unsigned_integer (end, from, len);
1238 return scm_i_substring (str, from, to);
0f2d19dd 1239}
1bbd0b84 1240#undef FUNC_NAME
0f2d19dd 1241
ed35de72
MV
1242SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
1243 (SCM str, SCM start, SCM end),
1244 "Return a newly allocated string formed from the characters\n"
1245 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1246 "ending with index @var{end} (exclusive).\n"
1247 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1248 "exact integers satisfying:\n"
1249 "\n"
1250 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
1251 "\n"
1252 "The returned string is read-only.\n")
1253#define FUNC_NAME s_scm_substring_read_only
1254{
1255 size_t len, from, to;
1256
1257 SCM_VALIDATE_STRING (1, str);
1258 len = scm_i_string_length (str);
1259 from = scm_to_unsigned_integer (start, 0, len);
1260 if (SCM_UNBNDP (end))
1261 to = len;
1262 else
1263 to = scm_to_unsigned_integer (end, from, len);
1264 return scm_i_substring_read_only (str, from, to);
1265}
1266#undef FUNC_NAME
1267
3ee86942
MV
1268SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
1269 (SCM str, SCM start, SCM end),
1270 "Return a newly allocated string formed from the characters\n"
1271 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1272 "ending with index @var{end} (exclusive).\n"
1273 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1274 "exact integers satisfying:\n\n"
1275 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1276#define FUNC_NAME s_scm_substring_copy
1277{
e1b29f6a
MV
1278 /* For the Scheme version, START is mandatory, but for the C
1279 version, it is optional. See scm_string_copy in srfi-13.c for a
1280 rationale.
1281 */
1282
1283 size_t from, to;
3ee86942
MV
1284
1285 SCM_VALIDATE_STRING (1, str);
e1b29f6a
MV
1286 scm_i_get_substring_spec (scm_i_string_length (str),
1287 start, &from, end, &to);
3ee86942
MV
1288 return scm_i_substring_copy (str, from, to);
1289}
1290#undef FUNC_NAME
1291
1292SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
1293 (SCM str, SCM start, SCM end),
1294 "Return string that indirectly refers to the characters\n"
1295 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1296 "ending with index @var{end} (exclusive).\n"
1297 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1298 "exact integers satisfying:\n\n"
1299 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1300#define FUNC_NAME s_scm_substring_shared
1301{
1302 size_t len, from, to;
1303
1304 SCM_VALIDATE_STRING (1, str);
1305 len = scm_i_string_length (str);
1306 from = scm_to_unsigned_integer (start, 0, len);
1307 if (SCM_UNBNDP (end))
1308 to = len;
1309 else
1310 to = scm_to_unsigned_integer (end, from, len);
1311 return scm_i_substring_shared (str, from, to);
1312}
1313#undef FUNC_NAME
685c0d71 1314
3b3b36dd 1315SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
6fa73e72 1316 (SCM args),
9c44cd45 1317 "Return a newly allocated string whose characters form the\n"
0d26a824 1318 "concatenation of the given strings, @var{args}.")
1bbd0b84 1319#define FUNC_NAME s_scm_string_append
0f2d19dd
JB
1320{
1321 SCM res;
9c44cd45
MG
1322 size_t len = 0;
1323 int wide = 0;
c829a427 1324 SCM l, s;
bd4911ef 1325 size_t i;
9909c395
MG
1326 union
1327 {
1328 char *narrow;
1329 scm_t_wchar *wide;
1330 } data;
af45e3b0
DH
1331
1332 SCM_VALIDATE_REST_ARGUMENT (args);
9c44cd45 1333 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427
MV
1334 {
1335 s = SCM_CAR (l);
1336 SCM_VALIDATE_STRING (SCM_ARGn, s);
9c44cd45
MG
1337 len += scm_i_string_length (s);
1338 if (!scm_i_is_narrow_string (s))
1339 wide = 1;
c829a427 1340 }
9909c395 1341 data.narrow = NULL;
9c44cd45 1342 if (!wide)
9909c395 1343 res = scm_i_make_string (len, &data.narrow);
9c44cd45 1344 else
9909c395 1345 res = scm_i_make_wide_string (len, &data.wide);
9c44cd45
MG
1346
1347 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427 1348 {
edea856c 1349 size_t len;
c829a427 1350 s = SCM_CAR (l);
3ee86942 1351 SCM_VALIDATE_STRING (SCM_ARGn, s);
edea856c 1352 len = scm_i_string_length (s);
9c44cd45
MG
1353 if (!wide)
1354 {
9909c395
MG
1355 memcpy (data.narrow, scm_i_string_chars (s), len);
1356 data.narrow += len;
9c44cd45
MG
1357 }
1358 else
1359 {
1360 if (scm_i_is_narrow_string (s))
1361 {
1362 for (i = 0; i < scm_i_string_length (s); i++)
9909c395 1363 data.wide[i] = (unsigned char) scm_i_string_chars (s)[i];
9c44cd45
MG
1364 }
1365 else
9909c395 1366 u32_cpy ((scm_t_uint32 *) data.wide,
9c44cd45 1367 (scm_t_uint32 *) scm_i_string_wide_chars (s), len);
9909c395 1368 data.wide += len;
9c44cd45 1369 }
c829a427
MV
1370 scm_remember_upto_here_1 (s);
1371 }
0f2d19dd
JB
1372 return res;
1373}
1bbd0b84 1374#undef FUNC_NAME
0f2d19dd 1375
c829a427
MV
1376int
1377scm_is_string (SCM obj)
1378{
3ee86942 1379 return IS_STRING (obj);
c829a427 1380}
24933780 1381
fac32b51 1382SCM
587a3355
MG
1383scm_from_stringn (const char *str, size_t len, const char *encoding,
1384 scm_t_string_failed_conversion_handler handler)
1385{
1386 size_t u32len, i;
1387 scm_t_wchar *u32;
1388 int wide = 0;
1389 SCM res;
1390
fac32b51
MG
1391 if (len == 0)
1392 return scm_nullstr;
1393
889975e5
MG
1394 if (encoding == NULL)
1395 {
1396 /* If encoding is null, use Latin-1. */
1397 char *buf;
1398 res = scm_i_make_string (len, &buf);
1399 memcpy (buf, str, len);
1400 return res;
1401 }
1402
587a3355
MG
1403 u32len = 0;
1404 u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
1405 (enum iconv_ilseq_handler)
1406 handler,
1407 str, len,
1408 NULL,
1409 NULL, &u32len);
1410
1411 if (u32 == NULL)
1412 {
1413 if (errno == ENOMEM)
1414 scm_memory_error ("locale string conversion");
1415 else
1416 {
889975e5 1417 /* There are invalid sequences in the input string. */
587a3355
MG
1418 SCM errstr;
1419 char *dst;
587a3355
MG
1420 errstr = scm_i_make_string (len, &dst);
1421 memcpy (dst, str, len);
1422 scm_misc_error (NULL, "input locale conversion error from ~s: ~s",
1423 scm_list_2 (scm_from_locale_string (encoding),
1424 errstr));
1425 scm_remember_upto_here_1 (errstr);
1426 }
1427 }
1428
1429 i = 0;
1430 while (i < u32len)
1431 if (u32[i++] > 0xFF)
1432 {
1433 wide = 1;
1434 break;
1435 }
1436
1437 if (!wide)
1438 {
1439 char *dst;
1440 res = scm_i_make_string (u32len, &dst);
1441 for (i = 0; i < u32len; i ++)
1442 dst[i] = (unsigned char) u32[i];
1443 dst[u32len] = '\0';
1444 }
1445 else
1446 {
1447 scm_t_wchar *wdst;
1448 res = scm_i_make_wide_string (u32len, &wdst);
1449 u32_cpy ((scm_t_uint32 *) wdst, (scm_t_uint32 *) u32, u32len);
1450 wdst[u32len] = 0;
1451 }
1452
1453 free (u32);
1454 return res;
1455}
1456
c829a427
MV
1457SCM
1458scm_from_locale_stringn (const char *str, size_t len)
1459{
889975e5
MG
1460 const char *enc;
1461 scm_t_string_failed_conversion_handler hndl;
1462 SCM inport;
1463 scm_t_port *pt;
4d4528e7 1464
9c44cd45 1465 if (len == (size_t) -1)
c829a427 1466 len = strlen (str);
9c44cd45
MG
1467 if (len == 0)
1468 return scm_nullstr;
1469
889975e5
MG
1470 inport = scm_current_input_port ();
1471 if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
1472 {
1473 pt = SCM_PTAB_ENTRY (inport);
1474 enc = pt->encoding;
1475 hndl = pt->ilseq_handler;
1476 }
1477 else
1478 {
1479 enc = NULL;
1480 hndl = SCM_FAILED_CONVERSION_ERROR;
1481 }
1482
1483 return scm_from_stringn (str, len, enc, hndl);
c829a427 1484}
4d4528e7 1485
c829a427
MV
1486SCM
1487scm_from_locale_string (const char *str)
4d4528e7 1488{
9c44cd45
MG
1489 if (str == NULL)
1490 return scm_nullstr;
1491
c829a427
MV
1492 return scm_from_locale_stringn (str, -1);
1493}
4d4528e7 1494
587a3355
MG
1495SCM
1496scm_i_from_utf8_string (const scm_t_uint8 *str)
1497{
1498 return scm_from_stringn ((const char *) str,
1499 strlen ((char *) str), "UTF-8",
1500 SCM_FAILED_CONVERSION_ERROR);
1501}
1502
50b1996f
MG
1503/* Create a new scheme string from the C string STR. The memory of
1504 STR may be used directly as storage for the new string. */
13a94556
LC
1505/* FIXME: GC-wise, the only way to use the memory area pointed to by STR
1506 would be to register a finalizer to eventually free(3) STR, which isn't
1507 worth it. Should we just deprecate the `scm_take_' functions? */
c829a427
MV
1508SCM
1509scm_take_locale_stringn (char *str, size_t len)
1510{
13a94556 1511 SCM res;
48ddf0d9 1512
13a94556
LC
1513 res = scm_from_locale_stringn (str, len);
1514 free (str);
c829a427 1515
c829a427
MV
1516 return res;
1517}
1518
48ddf0d9
KR
1519SCM
1520scm_take_locale_string (char *str)
1521{
1522 return scm_take_locale_stringn (str, -1);
1523}
1524
9c44cd45
MG
1525/* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
1526 and \UXXXXXX. */
1527static void
1528unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
1529{
1530 char *before, *after;
1531 size_t i, j;
1532
1533 before = *bufp;
1534 after = *bufp;
1535 i = 0;
1536 j = 0;
1537 while (i < *lenp)
1538 {
1539 if ((i <= *lenp - 6)
1540 && before[i] == '\\'
1541 && before[i + 1] == 'u'
1542 && before[i + 2] == '0' && before[i + 3] == '0')
1543 {
1544 /* Convert \u00NN to \xNN */
1545 after[j] = '\\';
1546 after[j + 1] = 'x';
30a6b9ca
MG
1547 after[j + 2] = tolower ((int) before[i + 4]);
1548 after[j + 3] = tolower ((int) before[i + 5]);
9c44cd45
MG
1549 i += 6;
1550 j += 4;
1551 }
1552 else if ((i <= *lenp - 10)
1553 && before[i] == '\\'
1554 && before[i + 1] == 'U'
1555 && before[i + 2] == '0' && before[i + 3] == '0')
1556 {
1557 /* Convert \U00NNNNNN to \UNNNNNN */
1558 after[j] = '\\';
1559 after[j + 1] = 'U';
30a6b9ca
MG
1560 after[j + 2] = tolower ((int) before[i + 4]);
1561 after[j + 3] = tolower ((int) before[i + 5]);
1562 after[j + 4] = tolower ((int) before[i + 6]);
1563 after[j + 5] = tolower ((int) before[i + 7]);
1564 after[j + 6] = tolower ((int) before[i + 8]);
1565 after[j + 7] = tolower ((int) before[i + 9]);
9c44cd45
MG
1566 i += 10;
1567 j += 8;
1568 }
1569 else
1570 {
1571 after[j] = before[i];
1572 i++;
1573 j++;
1574 }
1575 }
1576 *lenp = j;
1577 after = scm_realloc (after, j);
1578}
1579
c829a427 1580char *
fac32b51 1581scm_to_locale_stringn (SCM str, size_t *lenp)
c829a427 1582{
889975e5
MG
1583 SCM outport;
1584 scm_t_port *pt;
9c44cd45
MG
1585 const char *enc;
1586
889975e5
MG
1587 outport = scm_current_output_port ();
1588 if (!SCM_UNBNDP (outport) && SCM_OPOUTPORTP (outport))
1589 {
1590 pt = SCM_PTAB_ENTRY (outport);
1591 enc = pt->encoding;
1592 }
1593 else
1594 enc = NULL;
9c44cd45 1595
889975e5
MG
1596 return scm_to_stringn (str, lenp,
1597 enc,
1598 scm_i_get_conversion_strategy (SCM_BOOL_F));
9c44cd45
MG
1599}
1600
1601/* Low-level scheme to C string conversion function. */
1602char *
587a3355 1603scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
eca29b02 1604 scm_t_string_failed_conversion_handler handler)
9c44cd45 1605{
9c44cd45
MG
1606 char *buf;
1607 size_t ilen, len, i;
889975e5
MG
1608 int ret;
1609 const char *enc;
4d4528e7 1610
3ee86942 1611 if (!scm_is_string (str))
c829a427 1612 scm_wrong_type_arg_msg (NULL, 0, str, "string");
9c44cd45
MG
1613 ilen = scm_i_string_length (str);
1614
1615 if (ilen == 0)
1616 {
1617 buf = scm_malloc (1);
1618 buf[0] = '\0';
1619 if (lenp)
1620 *lenp = 0;
1621 return buf;
1622 }
587a3355 1623
c829a427 1624 if (lenp == NULL)
9c44cd45
MG
1625 for (i = 0; i < ilen; i++)
1626 if (scm_i_string_ref (str, i) == '\0')
1627 scm_misc_error (NULL,
1628 "string contains #\\nul character: ~S",
1629 scm_list_1 (str));
1630
889975e5 1631 if (scm_i_is_narrow_string (str) && (encoding == NULL))
c829a427 1632 {
889975e5
MG
1633 /* If using native Latin-1 encoding, just copy the string
1634 contents. */
9c44cd45
MG
1635 if (lenp)
1636 {
1637 buf = scm_malloc (ilen);
1638 memcpy (buf, scm_i_string_chars (str), ilen);
1639 *lenp = ilen;
1640 return buf;
1641 }
1642 else
1643 {
1644 buf = scm_malloc (ilen + 1);
1645 memcpy (buf, scm_i_string_chars (str), ilen);
1646 buf[ilen] = '\0';
1647 return buf;
1648 }
c829a427 1649 }
9c44cd45 1650
587a3355 1651
9c44cd45
MG
1652 buf = NULL;
1653 len = 0;
889975e5
MG
1654 enc = encoding;
1655 if (enc == NULL)
1656 enc = "ISO-8859-1";
1657 if (scm_i_is_narrow_string (str))
1658 {
1659 ret = mem_iconveh (scm_i_string_chars (str), ilen,
1660 "ISO-8859-1", enc,
1661 (enum iconv_ilseq_handler) handler, NULL,
1662 &buf, &len);
9c44cd45 1663
889975e5
MG
1664 if (ret == 0 && handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1665 unistring_escapes_to_guile_escapes (&buf, &len);
9c44cd45 1666
889975e5
MG
1667 if (ret != 0)
1668 {
1669 scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
1670 scm_list_2 (scm_from_locale_string (enc),
1671 str));
1672 }
1673 }
1674 else
1675 {
1676 buf = u32_conv_to_encoding (enc,
1677 (enum iconv_ilseq_handler) handler,
1678 (scm_t_uint32 *) scm_i_string_wide_chars (str),
1679 ilen,
1680 NULL,
1681 NULL, &len);
1682 if (buf == NULL)
1683 {
1684 scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
1685 scm_list_2 (scm_from_locale_string (enc),
1686 str));
1687 }
5f5920e0
MG
1688 if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
1689 unistring_escapes_to_guile_escapes (&buf, &len);
889975e5 1690 }
9c44cd45 1691 if (lenp)
4d4528e7 1692 *lenp = len;
9c44cd45
MG
1693 else
1694 {
1695 buf = scm_realloc (buf, len + 1);
1696 buf[len] = '\0';
1697 }
24933780 1698
c829a427 1699 scm_remember_upto_here_1 (str);
9c44cd45 1700 return buf;
4d4528e7 1701}
af68e5e5 1702
c829a427
MV
1703char *
1704scm_to_locale_string (SCM str)
1705{
1706 return scm_to_locale_stringn (str, NULL);
1707}
af68e5e5 1708
587a3355
MG
1709scm_t_uint8 *
1710scm_i_to_utf8_string (SCM str)
1711{
1712 char *u8str;
1713 u8str = scm_to_stringn (str, NULL, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
1714 return (scm_t_uint8 *) u8str;
1715}
1716
c829a427
MV
1717size_t
1718scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
1719{
1720 size_t len;
9c44cd45 1721 char *result = NULL;
3ee86942 1722 if (!scm_is_string (str))
c829a427 1723 scm_wrong_type_arg_msg (NULL, 0, str, "string");
9c44cd45
MG
1724 result = scm_to_locale_stringn (str, &len);
1725
1726 memcpy (buf, result, (len > max_len) ? max_len : len);
1727 free (result);
1728
c829a427
MV
1729 scm_remember_upto_here_1 (str);
1730 return len;
1731}
af68e5e5 1732
3ee86942
MV
1733/* converts C scm_array of strings to SCM scm_list of strings. */
1734/* If argc < 0, a null terminated scm_array is assumed. */
9c44cd45 1735SCM
3ee86942
MV
1736scm_makfromstrs (int argc, char **argv)
1737{
1738 int i = argc;
1739 SCM lst = SCM_EOL;
1740 if (0 > i)
1741 for (i = 0; argv[i]; i++);
1742 while (i--)
1743 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
1744 return lst;
1745}
1746
c829a427
MV
1747/* Return a newly allocated array of char pointers to each of the strings
1748 in args, with a terminating NULL pointer. */
1749
1750char **
1751scm_i_allocate_string_pointers (SCM list)
2a776823 1752#define FUNC_NAME "scm_i_allocate_string_pointers"
af68e5e5 1753{
c829a427
MV
1754 char **result;
1755 int len = scm_ilength (list);
1756 int i;
1757
1758 if (len < 0)
1759 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
1760
2a776823
LC
1761 result = scm_gc_malloc ((len + 1) * sizeof (char *),
1762 "string pointers");
c829a427 1763 result[len] = NULL;
c829a427
MV
1764
1765 /* The list might be have been modified in another thread, so
1766 we check LIST before each access.
1767 */
d2e53ed6 1768 for (i = 0; i < len && scm_is_pair (list); i++)
c829a427 1769 {
2a776823
LC
1770 SCM str;
1771 size_t len;
1772
1773 str = SCM_CAR (list);
1774 len = scm_c_string_length (str);
1775
1776 result[i] = scm_gc_malloc_pointerless (len + 1, "string pointers");
1777 memcpy (result[i], scm_i_string_chars (str), len);
1778 result[i][len] = '\0';
1779
c829a427
MV
1780 list = SCM_CDR (list);
1781 }
1782
c829a427 1783 return result;
af68e5e5 1784}
2a776823 1785#undef FUNC_NAME
24933780 1786
6f14f578
MV
1787void
1788scm_i_get_substring_spec (size_t len,
1789 SCM start, size_t *cstart,
1790 SCM end, size_t *cend)
1791{
1792 if (SCM_UNBNDP (start))
1793 *cstart = 0;
1794 else
1795 *cstart = scm_to_unsigned_integer (start, 0, len);
1796
1797 if (SCM_UNBNDP (end))
1798 *cend = len;
1799 else
1800 *cend = scm_to_unsigned_integer (end, *cstart, len);
1801}
1802
3ee86942
MV
1803#if SCM_ENABLE_DEPRECATED
1804
556d75db
MV
1805/* When these definitions are removed, it becomes reasonable to use
1806 read-only strings for string literals. For that, change the reader
1807 to create string literals with scm_c_substring_read_only instead of
1808 with scm_c_substring_copy.
1809*/
1810
3ee86942 1811int
fe78c51a 1812scm_i_deprecated_stringp (SCM str)
3ee86942
MV
1813{
1814 scm_c_issue_deprecation_warning
1815 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1816
2616f0e0 1817 return scm_is_string (str);
3ee86942
MV
1818}
1819
1820char *
fe78c51a 1821scm_i_deprecated_string_chars (SCM str)
3ee86942
MV
1822{
1823 char *chars;
1824
1825 scm_c_issue_deprecation_warning
1826 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1827
2616f0e0
MV
1828 /* We don't accept shared substrings here since they are not
1829 null-terminated.
1830 */
1831 if (IS_SH_STRING (str))
1832 scm_misc_error (NULL,
1833 "SCM_STRING_CHARS does not work with shared substrings.",
1834 SCM_EOL);
1835
877f06c3 1836 /* We explicitly test for read-only strings to produce a better
556d75db
MV
1837 error message.
1838 */
1839
1840 if (IS_RO_STRING (str))
1841 scm_misc_error (NULL,
1842 "SCM_STRING_CHARS does not work with read-only strings.",
1843 SCM_EOL);
1844
2616f0e0 1845 /* The following is still wrong, of course...
3ee86942 1846 */
9c44cd45 1847 str = scm_i_string_start_writing (str);
3ee86942
MV
1848 chars = scm_i_string_writable_chars (str);
1849 scm_i_string_stop_writing ();
1850 return chars;
1851}
1852
1853size_t
fe78c51a 1854scm_i_deprecated_string_length (SCM str)
3ee86942
MV
1855{
1856 scm_c_issue_deprecation_warning
1857 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1858 return scm_c_string_length (str);
1859}
1860
1861#endif
1862
2a610be5
AW
1863static SCM
1864string_handle_ref (scm_t_array_handle *h, size_t index)
1865{
1866 return scm_c_string_ref (h->array, index);
1867}
1868
1869static void
1870string_handle_set (scm_t_array_handle *h, size_t index, SCM val)
1871{
1872 scm_c_string_set_x (h->array, index, val);
1873}
1874
1875static void
1876string_get_handle (SCM v, scm_t_array_handle *h)
1877{
1878 h->array = v;
1879 h->ndims = 1;
1880 h->dims = &h->dim0;
1881 h->dim0.lbnd = 0;
1882 h->dim0.ubnd = scm_c_string_length (v) - 1;
1883 h->dim0.inc = 1;
1884 h->element_type = SCM_ARRAY_ELEMENT_TYPE_CHAR;
1885 h->elements = h->writable_elements = NULL;
1886}
1887
1888SCM_ARRAY_IMPLEMENTATION (scm_tc7_string, 0x7f & ~2,
1889 string_handle_ref, string_handle_set,
1890 string_get_handle);
f45eccff 1891SCM_VECTOR_IMPLEMENTATION (SCM_ARRAY_ELEMENT_TYPE_CHAR, scm_make_string);
2a610be5 1892
0f2d19dd
JB
1893void
1894scm_init_strings ()
0f2d19dd 1895{
3ee86942 1896 scm_nullstr = scm_i_make_string (0, NULL);
7c33806a 1897
a0599745 1898#include "libguile/strings.x"
0f2d19dd
JB
1899}
1900
89e00824
ML
1901
1902/*
1903 Local Variables:
1904 c-file-style: "gnu"
1905 End:
1906*/