More comments for string functions
[bpt/guile.git] / libguile / strings.c
CommitLineData
50b1996f 1/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e 3 * This library is free software; you can redistribute it and/or
53befeb7
NJ
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
0f2d19dd 7 *
53befeb7
NJ
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
73be1d9e
MV
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
53befeb7
NJ
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301 USA
73be1d9e 17 */
1bbd0b84 18
1bbd0b84 19
0f2d19dd 20\f
dbb605f5
LC
21#ifdef HAVE_CONFIG_H
22# include <config.h>
23#endif
0f2d19dd 24
faf2c9d7 25#include <string.h>
3ee86942 26#include <stdio.h>
9c44cd45
MG
27#include <ctype.h>
28#include <unistr.h>
faf2c9d7 29
a0599745
MD
30#include "libguile/_scm.h"
31#include "libguile/chars.h"
7c33806a 32#include "libguile/root.h"
a0599745 33#include "libguile/strings.h"
1afff620 34#include "libguile/deprecation.h"
a0599745 35#include "libguile/validate.h"
c829a427 36#include "libguile/dynwind.h"
1afff620 37
0f2d19dd
JB
38\f
39
40/* {Strings}
41 */
42
3ee86942
MV
43
44/* Stringbufs
45 *
46 * XXX - keeping an accurate refcount during GC seems to be quite
47 * tricky, so we just keep score of whether a stringbuf might be
50b1996f 48 * shared, not whether it definitely is.
3ee86942
MV
49 *
50 * The scheme I (mvo) tried to keep an accurate reference count would
51 * recount all strings that point to a stringbuf during the mark-phase
52 * of the GC. This was done since one cannot access the stringbuf of
53 * a string when that string is freed (in order to decrease the
54 * reference count). The memory of the stringbuf might have been
55 * reused already for something completely different.
56 *
57 * This recounted worked for a small number of threads beating on
58 * cow-strings, but it failed randomly with more than 10 threads, say.
59 * I couldn't figure out what went wrong, so I used the conservative
60 * approach implemented below.
61 *
62 * A stringbuf needs to know its length, but only so that it can be
63 * reported when the stringbuf is freed.
64 *
50b1996f
MG
65 * There are 3 storage strategies for stringbufs: inline, outline, and
66 * wide.
67 *
68 * Inline strings are small 8-bit strings stored within the double
69 * cell itself. Outline strings are larger 8-bit strings with GC
70 * allocated storage. Wide strings are 32-bit strings with allocated
71 * storage.
72 *
73 * There was little value in making wide string inlineable, since
74 * there is only room for three inlined 32-bit characters. Thus wide
75 * stringbufs are never inlined.
3ee86942
MV
76 */
77
78#define STRINGBUF_F_SHARED 0x100
79#define STRINGBUF_F_INLINE 0x200
50b1996f
MG
80#define STRINGBUF_F_WIDE 0x400 /* If true, strings have UCS-4
81 encoding. Otherwise, strings
82 are Latin-1. */
3ee86942
MV
83
84#define STRINGBUF_TAG scm_tc7_stringbuf
85#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
86#define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
9c44cd45 87#define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
3ee86942
MV
88
89#define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
90#define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
91#define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
92#define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
93
94#define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
95 ? STRINGBUF_INLINE_CHARS (buf) \
96 : STRINGBUF_OUTLINE_CHARS (buf))
50b1996f 97
9c44cd45 98#define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *)SCM_CELL_WORD_1(buf))
3ee86942
MV
99#define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
100 ? STRINGBUF_INLINE_LENGTH (buf) \
101 : STRINGBUF_OUTLINE_LENGTH (buf))
102
103#define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
104
105#define SET_STRINGBUF_SHARED(buf) \
106 (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
107
108#if SCM_DEBUG
109static size_t lenhist[1001];
110#endif
111
50b1996f
MG
112/* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
113 characters. */
3ee86942
MV
114static SCM
115make_stringbuf (size_t len)
0f2d19dd 116{
3ee86942
MV
117 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
118 scm_i_symbol_chars, all stringbufs are null-terminated. Once
119 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
120 has been changed for scm_i_symbol_chars, this null-termination
121 can be dropped.
122 */
123
124#if SCM_DEBUG
125 if (len < 1000)
126 lenhist[len]++;
127 else
128 lenhist[1000]++;
129#endif
0f2d19dd 130
3ee86942
MV
131 if (len <= STRINGBUF_MAX_INLINE_LEN-1)
132 {
133 return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_INLINE | (len << 16),
134 0, 0, 0);
135 }
136 else
137 {
138 char *mem = scm_gc_malloc (len+1, "string");
139 mem[len] = '\0';
140 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) mem,
141 (scm_t_bits) len, (scm_t_bits) 0);
142 }
143}
e53cc817 144
50b1996f
MG
145/* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
146 characters. */
9c44cd45
MG
147static SCM
148make_wide_stringbuf (size_t len)
149{
150 scm_t_wchar *mem;
151#if SCM_DEBUG
152 if (len < 1000)
153 lenhist[len]++;
154 else
155 lenhist[1000]++;
156#endif
157
158 mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
159 mem[len] = 0;
160 return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_WIDE, (scm_t_bits) mem,
161 (scm_t_bits) len, (scm_t_bits) 0);
162}
163
2b829bbb
KR
164/* Return a new stringbuf whose underlying storage consists of the LEN+1
165 octets pointed to by STR (the last octet is zero). */
7f74cf9a 166SCM
fd0a5bbc
HWN
167scm_i_take_stringbufn (char *str, size_t len)
168{
2b829bbb 169 scm_gc_register_collectable_memory (str, len + 1, "stringbuf");
fd0a5bbc
HWN
170
171 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str,
172 (scm_t_bits) len, (scm_t_bits) 0);
173}
174
3ee86942
MV
175SCM
176scm_i_stringbuf_mark (SCM buf)
177{
178 return SCM_BOOL_F;
179}
1bbd0b84 180
3ee86942
MV
181void
182scm_i_stringbuf_free (SCM buf)
0f2d19dd 183{
3ee86942 184 if (!STRINGBUF_INLINE (buf))
9c44cd45
MG
185 {
186 if (!STRINGBUF_WIDE (buf))
187 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf),
188 STRINGBUF_OUTLINE_LENGTH (buf) + 1, "string");
189 else
190 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf),
191 sizeof (scm_t_wchar) * (STRINGBUF_OUTLINE_LENGTH (buf)
192 + 1), "string");
193 }
194
195}
196
50b1996f
MG
197/* Convert a stringbuf containing 8-bit Latin-1-encoded characters to
198 one containing 32-bit UCS-4-encoded characters. */
9c44cd45
MG
199static void
200widen_stringbuf (SCM buf)
201{
202 size_t i, len;
203 scm_t_wchar *mem;
204
205 if (STRINGBUF_WIDE (buf))
206 return;
207
208 if (STRINGBUF_INLINE (buf))
209 {
210 len = STRINGBUF_INLINE_LENGTH (buf);
211
212 mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
213 for (i = 0; i < len; i++)
214 mem[i] =
215 (scm_t_wchar) (unsigned char) STRINGBUF_INLINE_CHARS (buf)[i];
216 mem[len] = 0;
217
218 SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) ^ STRINGBUF_F_INLINE);
219 SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_WIDE);
220 SCM_SET_CELL_WORD_1 (buf, mem);
221 SCM_SET_CELL_WORD_2 (buf, len);
222 }
223 else
224 {
225 len = STRINGBUF_OUTLINE_LENGTH (buf);
226
227 mem = scm_gc_malloc (sizeof (scm_t_wchar) * (len + 1), "string");
228 for (i = 0; i < len; i++)
229 mem[i] =
230 (scm_t_wchar) (unsigned char) STRINGBUF_OUTLINE_CHARS (buf)[i];
231 mem[len] = 0;
232
233 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf), len + 1, "string");
234
235 SCM_SET_CELL_WORD_0 (buf, SCM_CELL_WORD_0 (buf) | STRINGBUF_F_WIDE);
236 SCM_SET_CELL_WORD_1 (buf, mem);
237 SCM_SET_CELL_WORD_2 (buf, len);
238 }
3ee86942 239}
bd9e24b3 240
9de87eea 241scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
bd9e24b3 242
3ee86942
MV
243/* Copy-on-write strings.
244 */
bd9e24b3 245
3ee86942 246#define STRING_TAG scm_tc7_string
bd9e24b3 247
3ee86942
MV
248#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
249#define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
250#define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
bd9e24b3 251
3ee86942
MV
252#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
253#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
254
255#define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
256
ed35de72
MV
257/* Read-only strings.
258 */
259
260#define RO_STRING_TAG (scm_tc7_string + 0x200)
261#define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
262
e1b29f6a
MV
263/* Mutation-sharing substrings
264 */
265
266#define SH_STRING_TAG (scm_tc7_string + 0x100)
267
268#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
269/* START and LENGTH as for STRINGs. */
270
271#define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
272
50b1996f
MG
273/* Create a scheme string with space for LEN 8-bit Latin-1-encoded
274 characters. CHARSP, if not NULL, will be set to location of the
275 char array. */
3ee86942
MV
276SCM
277scm_i_make_string (size_t len, char **charsp)
278{
279 SCM buf = make_stringbuf (len);
280 SCM res;
281 if (charsp)
282 *charsp = STRINGBUF_CHARS (buf);
283 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
284 (scm_t_bits)0, (scm_t_bits) len);
285 return res;
0f2d19dd
JB
286}
287
50b1996f
MG
288/* Create a scheme string with space for LEN 32-bit UCS-4-encoded
289 characters. CHARSP, if not NULL, will be set to location of the
290 character array. */
9c44cd45 291SCM
50b1996f 292scm_i_make_wide_string (size_t len, scm_t_wchar **charsp)
9c44cd45
MG
293{
294 SCM buf = make_wide_stringbuf (len);
295 SCM res;
296 if (charsp)
297 *charsp = STRINGBUF_WIDE_CHARS (buf);
298 res = scm_double_cell (STRING_TAG, SCM_UNPACK (buf),
299 (scm_t_bits) 0, (scm_t_bits) len);
300 return res;
301}
302
3ee86942
MV
303static void
304validate_substring_args (SCM str, size_t start, size_t end)
305{
306 if (!IS_STRING (str))
307 scm_wrong_type_arg_msg (NULL, 0, str, "string");
308 if (start > STRING_LENGTH (str))
309 scm_out_of_range (NULL, scm_from_size_t (start));
310 if (end > STRING_LENGTH (str) || end < start)
311 scm_out_of_range (NULL, scm_from_size_t (end));
312}
0f2d19dd 313
e1b29f6a
MV
314static inline void
315get_str_buf_start (SCM *str, SCM *buf, size_t *start)
316{
317 *start = STRING_START (*str);
318 if (IS_SH_STRING (*str))
319 {
320 *str = SH_STRING_STRING (*str);
321 *start += STRING_START (*str);
322 }
323 *buf = STRING_STRINGBUF (*str);
324}
325
3ee86942
MV
326SCM
327scm_i_substring (SCM str, size_t start, size_t end)
0f2d19dd 328{
e1b29f6a
MV
329 SCM buf;
330 size_t str_start;
331 get_str_buf_start (&str, &buf, &str_start);
9de87eea 332 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 333 SET_STRINGBUF_SHARED (buf);
9de87eea 334 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 335 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
e1b29f6a
MV
336 (scm_t_bits)str_start + start,
337 (scm_t_bits) end - start);
0f2d19dd
JB
338}
339
ed35de72
MV
340SCM
341scm_i_substring_read_only (SCM str, size_t start, size_t end)
342{
45a9f430
LC
343 SCM buf;
344 size_t str_start;
345 get_str_buf_start (&str, &buf, &str_start);
346 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
347 SET_STRINGBUF_SHARED (buf);
348 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
349 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
350 (scm_t_bits)str_start + start,
351 (scm_t_bits) end - start);
ed35de72
MV
352}
353
3ee86942
MV
354SCM
355scm_i_substring_copy (SCM str, size_t start, size_t end)
356{
357 size_t len = end - start;
edea856c 358 SCM buf, my_buf;
e1b29f6a
MV
359 size_t str_start;
360 get_str_buf_start (&str, &buf, &str_start);
9c44cd45
MG
361 if (scm_i_is_narrow_string (str))
362 {
363 my_buf = make_stringbuf (len);
364 memcpy (STRINGBUF_CHARS (my_buf),
365 STRINGBUF_CHARS (buf) + str_start + start, len);
366 }
367 else
368 {
369 my_buf = make_wide_stringbuf (len);
370 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
371 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
372 + start), len);
373 /* Even though this string is wide, the substring may be narrow.
50b1996f 374 Consider adding code to narrow the string. */
9c44cd45 375 }
3ee86942 376 scm_remember_upto_here_1 (buf);
9c44cd45
MG
377 return scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
378 (scm_t_bits) 0, (scm_t_bits) len);
3ee86942 379}
0f2d19dd 380
e1b29f6a
MV
381SCM
382scm_i_substring_shared (SCM str, size_t start, size_t end)
383{
384 if (start == 0 && end == STRING_LENGTH (str))
385 return str;
386 else
387 {
388 size_t len = end - start;
389 if (IS_SH_STRING (str))
390 {
391 start += STRING_START (str);
392 str = SH_STRING_STRING (str);
393 }
394 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
395 (scm_t_bits)start, (scm_t_bits) len);
396 }
397}
398
3ee86942
MV
399SCM
400scm_c_substring (SCM str, size_t start, size_t end)
401{
402 validate_substring_args (str, start, end);
403 return scm_i_substring (str, start, end);
404}
ee149d03 405
ed35de72
MV
406SCM
407scm_c_substring_read_only (SCM str, size_t start, size_t end)
408{
409 validate_substring_args (str, start, end);
410 return scm_i_substring_read_only (str, start, end);
411}
412
0f2d19dd 413SCM
3ee86942 414scm_c_substring_copy (SCM str, size_t start, size_t end)
0f2d19dd 415{
3ee86942
MV
416 validate_substring_args (str, start, end);
417 return scm_i_substring_copy (str, start, end);
418}
419
3ee86942
MV
420SCM
421scm_c_substring_shared (SCM str, size_t start, size_t end)
422{
423 validate_substring_args (str, start, end);
424 return scm_i_substring_shared (str, start, end);
425}
0f2d19dd 426
ee149d03 427SCM
3ee86942 428scm_i_string_mark (SCM str)
ee149d03 429{
3ee86942
MV
430 if (IS_SH_STRING (str))
431 return SH_STRING_STRING (str);
432 else
433 return STRING_STRINGBUF (str);
ee149d03
JB
434}
435
3ee86942
MV
436void
437scm_i_string_free (SCM str)
438{
439}
36284627 440
3ee86942
MV
441/* Internal accessors
442 */
443
50b1996f
MG
444/* Returns the number of characters in STR. This may be different
445 than the memory size of the string storage. */
3ee86942
MV
446size_t
447scm_i_string_length (SCM str)
0f2d19dd 448{
3ee86942 449 return STRING_LENGTH (str);
0f2d19dd
JB
450}
451
50b1996f
MG
452/* True if the string is 'narrow', meaning it has a 8-bit Latin-1
453 encoding. False if it is 'wide', having a 32-bit UCS-4
454 encoding. */
9c44cd45
MG
455int
456scm_i_is_narrow_string (SCM str)
457{
458 return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
459}
460
50b1996f
MG
461/* Returns a pointer to the 8-bit Latin-1 encoded character array of
462 STR. */
3ee86942
MV
463const char *
464scm_i_string_chars (SCM str)
465{
466 SCM buf;
e1b29f6a
MV
467 size_t start;
468 get_str_buf_start (&str, &buf, &start);
9c44cd45
MG
469 if (scm_i_is_narrow_string (str))
470 return STRINGBUF_CHARS (buf) + start;
471 else
472 scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s",
473 scm_list_1 (str));
474 return NULL;
3ee86942 475}
b00418df 476
50b1996f
MG
477/* Returns a pointer to the 32-bit UCS-4 encoded character array of
478 STR. */
9c44cd45
MG
479const scm_t_wchar *
480scm_i_string_wide_chars (SCM str)
481{
482 SCM buf;
483 size_t start;
484
485 get_str_buf_start (&str, &buf, &start);
486 if (!scm_i_is_narrow_string (str))
487 return STRINGBUF_WIDE_CHARS (buf) + start;
488 else
489 scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
490 scm_list_1 (str));
491}
492
493/* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
494 a new string buffer, so that it can be modified without modifying
50b1996f
MG
495 other strings. Also, lock the string mutex. Later, one must call
496 scm_i_string_stop_writing to unlock the mutex. */
9c44cd45
MG
497SCM
498scm_i_string_start_writing (SCM orig_str)
b00418df 499{
ed35de72 500 SCM buf, str = orig_str;
e1b29f6a 501 size_t start;
ed35de72 502
e1b29f6a 503 get_str_buf_start (&str, &buf, &start);
ed35de72
MV
504 if (IS_RO_STRING (str))
505 scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
506
9de87eea 507 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942
MV
508 if (STRINGBUF_SHARED (buf))
509 {
9c44cd45 510 /* Clone the stringbuf. */
3ee86942
MV
511 size_t len = STRING_LENGTH (str);
512 SCM new_buf;
513
9de87eea 514 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 515
9c44cd45
MG
516 if (scm_i_is_narrow_string (str))
517 {
518 new_buf = make_stringbuf (len);
519 memcpy (STRINGBUF_CHARS (new_buf),
520 STRINGBUF_CHARS (buf) + STRING_START (str), len);
521
522 }
523 else
524 {
525 new_buf = make_wide_stringbuf (len);
526 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
527 (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf)
528 + STRING_START (str)), len);
529 }
3ee86942
MV
530 scm_i_thread_put_to_sleep ();
531 SET_STRING_STRINGBUF (str, new_buf);
532 start -= STRING_START (str);
533 SET_STRING_START (str, 0);
534 scm_i_thread_wake_up ();
535
536 buf = new_buf;
537
9de87eea 538 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 539 }
9c44cd45
MG
540 return orig_str;
541}
542
50b1996f 543/* Return a pointer to the 8-bit Latin-1 chars of a string. */
9c44cd45
MG
544char *
545scm_i_string_writable_chars (SCM str)
546{
547 SCM buf;
548 size_t start;
3ee86942 549
9c44cd45
MG
550 get_str_buf_start (&str, &buf, &start);
551 if (scm_i_is_narrow_string (str))
552 return STRINGBUF_CHARS (buf) + start;
553 else
554 scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s",
555 scm_list_1 (str));
556 return NULL;
557}
558
50b1996f 559/* Return a pointer to the UCS-4 codepoints of a string. */
9c44cd45
MG
560static scm_t_wchar *
561scm_i_string_writable_wide_chars (SCM str)
562{
563 SCM buf;
564 size_t start;
565
566 get_str_buf_start (&str, &buf, &start);
567 if (!scm_i_is_narrow_string (str))
568 return STRINGBUF_WIDE_CHARS (buf) + start;
569 else
570 scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
571 scm_list_1 (str));
b00418df
DH
572}
573
50b1996f
MG
574/* Unlock the string mutex that was locked when
575 scm_i_string_start_writing was called. */
3ee86942
MV
576void
577scm_i_string_stop_writing (void)
578{
9de87eea 579 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 580}
b00418df 581
50b1996f 582/* Return the Xth character of STR as a UCS-4 codepoint. */
9c44cd45
MG
583scm_t_wchar
584scm_i_string_ref (SCM str, size_t x)
585{
586 if (scm_i_is_narrow_string (str))
587 return (scm_t_wchar) (unsigned char) (scm_i_string_chars (str)[x]);
588 else
589 return scm_i_string_wide_chars (str)[x];
590}
591
50b1996f 592/* Set the Pth character of STR to UCS-4 codepoint CHR. */
9c44cd45
MG
593void
594scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
595{
596 if (chr > 0xFF && scm_i_is_narrow_string (str))
597 widen_stringbuf (STRING_STRINGBUF (str));
598
599 if (scm_i_is_narrow_string (str))
600 {
601 char *dst = scm_i_string_writable_chars (str);
602 dst[p] = (char) (unsigned char) chr;
603 }
604 else
605 {
606 scm_t_wchar *dst = scm_i_string_writable_wide_chars (str);
607 dst[p] = chr;
608 }
609}
610
3ee86942
MV
611/* Symbols.
612
613 Basic symbol creation and accessing is done here, the rest is in
614 symbols.[hc]. This has been done to keep stringbufs and the
615 internals of strings and string-like objects confined to this file.
616*/
617
618#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
619
620SCM
6869328b
MV
621scm_i_make_symbol (SCM name, scm_t_bits flags,
622 unsigned long hash, SCM props)
3ee86942
MV
623{
624 SCM buf;
625 size_t start = STRING_START (name);
626 size_t length = STRING_LENGTH (name);
627
628 if (IS_SH_STRING (name))
629 {
630 name = SH_STRING_STRING (name);
631 start += STRING_START (name);
632 }
633 buf = SYMBOL_STRINGBUF (name);
634
635 if (start == 0 && length == STRINGBUF_LENGTH (buf))
636 {
637 /* reuse buf. */
9de87eea 638 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 639 SET_STRINGBUF_SHARED (buf);
9de87eea 640 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942
MV
641 }
642 else
643 {
644 /* make new buf. */
9c44cd45
MG
645 if (scm_i_is_narrow_string (name))
646 {
647 SCM new_buf = make_stringbuf (length);
648 memcpy (STRINGBUF_CHARS (new_buf),
649 STRINGBUF_CHARS (buf) + start, length);
650 buf = new_buf;
651 }
652 else
653 {
654 SCM new_buf = make_wide_stringbuf (length);
655 u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
656 (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
657 length);
658 buf = new_buf;
659 }
3ee86942 660 }
6869328b 661 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
3ee86942
MV
662 (scm_t_bits) hash, SCM_UNPACK (props));
663}
664
fd0a5bbc
HWN
665SCM
666scm_i_c_make_symbol (const char *name, size_t len,
667 scm_t_bits flags, unsigned long hash, SCM props)
668{
669 SCM buf = make_stringbuf (len);
670 memcpy (STRINGBUF_CHARS (buf), name, len);
671
672 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
673 (scm_t_bits) hash, SCM_UNPACK (props));
674}
675
676/* Return a new symbol that uses the LEN bytes pointed to by NAME as its
677 underlying storage. */
678SCM
679scm_i_c_take_symbol (char *name, size_t len,
680 scm_t_bits flags, unsigned long hash, SCM props)
681{
682 SCM buf = scm_i_take_stringbufn (name, len);
683
684 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
685 (scm_t_bits) hash, SCM_UNPACK (props));
686}
687
50b1996f
MG
688/* Returns the number of characters in SYM. This may be different
689 from the memory size of SYM. */
3ee86942
MV
690size_t
691scm_i_symbol_length (SCM sym)
0f2d19dd 692{
3ee86942 693 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
0f2d19dd
JB
694}
695
071bb6a8
LC
696size_t
697scm_c_symbol_length (SCM sym)
698#define FUNC_NAME "scm_c_symbol_length"
699{
700 SCM_VALIDATE_SYMBOL (1, sym);
701
702 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
703}
704#undef FUNC_NAME
705
50b1996f
MG
706/* True if the name of SYM is stored as a Latin-1 encoded string.
707 False if it is stored as a 32-bit UCS-4-encoded string. */
9c44cd45
MG
708int
709scm_i_is_narrow_symbol (SCM sym)
710{
711 SCM buf;
712
713 buf = SYMBOL_STRINGBUF (sym);
714 return !STRINGBUF_WIDE (buf);
715}
716
50b1996f
MG
717/* Returns a pointer to the 8-bit Latin-1 encoded character array that
718 contains the name of SYM. */
3ee86942
MV
719const char *
720scm_i_symbol_chars (SCM sym)
721{
9c44cd45
MG
722 SCM buf;
723
724 buf = SYMBOL_STRINGBUF (sym);
725 if (!STRINGBUF_WIDE (buf))
726 return STRINGBUF_CHARS (buf);
727 else
728 scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S",
729 scm_list_1 (sym));
730}
731
50b1996f
MG
732/* Return a pointer to the 32-bit UCS-4-encoded character array of a
733 symbol's name. */
9c44cd45
MG
734const scm_t_wchar *
735scm_i_symbol_wide_chars (SCM sym)
736{
737 SCM buf;
738
739 buf = SYMBOL_STRINGBUF (sym);
740 if (STRINGBUF_WIDE (buf))
741 return STRINGBUF_WIDE_CHARS (buf);
742 else
743 scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S",
744 scm_list_1 (sym));
3ee86942 745}
1cc91f1b 746
3ee86942
MV
747SCM
748scm_i_symbol_mark (SCM sym)
0f2d19dd 749{
3ee86942
MV
750 scm_gc_mark (SYMBOL_STRINGBUF (sym));
751 return SCM_CELL_OBJECT_3 (sym);
0f2d19dd
JB
752}
753
3ee86942
MV
754void
755scm_i_symbol_free (SCM sym)
756{
757}
0f2d19dd 758
be54b15d 759SCM
3ee86942 760scm_i_symbol_substring (SCM sym, size_t start, size_t end)
be54b15d 761{
3ee86942 762 SCM buf = SYMBOL_STRINGBUF (sym);
9de87eea 763 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 764 SET_STRINGBUF_SHARED (buf);
9de87eea 765 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
fd2b17b9 766 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
3ee86942
MV
767 (scm_t_bits)start, (scm_t_bits) end - start);
768}
be54b15d 769
50b1996f 770/* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */
9c44cd45
MG
771scm_t_wchar
772scm_i_symbol_ref (SCM sym, size_t x)
773{
774 if (scm_i_is_narrow_symbol (sym))
775 return (scm_t_wchar) (unsigned char) (scm_i_symbol_chars (sym)[x]);
776 else
777 return scm_i_symbol_wide_chars (sym)[x];
778}
779
3ee86942
MV
780/* Debugging
781 */
be54b15d 782
3ee86942 783#if SCM_DEBUG
be54b15d 784
3ee86942
MV
785SCM scm_sys_string_dump (SCM);
786SCM scm_sys_symbol_dump (SCM);
787SCM scm_sys_stringbuf_hist (void);
be54b15d 788
9c44cd45 789SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str), "")
3ee86942
MV
790#define FUNC_NAME s_scm_sys_string_dump
791{
792 SCM_VALIDATE_STRING (1, str);
793 fprintf (stderr, "%p:\n", str);
794 fprintf (stderr, " start: %u\n", STRING_START (str));
795 fprintf (stderr, " len: %u\n", STRING_LENGTH (str));
9c44cd45
MG
796 if (scm_i_is_narrow_string (str))
797 fprintf (stderr, " format: narrow\n");
798 else
799 fprintf (stderr, " format: wide\n");
3ee86942
MV
800 if (IS_SH_STRING (str))
801 {
802 fprintf (stderr, " string: %p\n", SH_STRING_STRING (str));
803 fprintf (stderr, "\n");
804 scm_sys_string_dump (SH_STRING_STRING (str));
805 }
806 else
807 {
808 SCM buf = STRING_STRINGBUF (str);
809 fprintf (stderr, " buf: %p\n", buf);
9c44cd45
MG
810 if (scm_i_is_narrow_string (str))
811 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
812 else
813 fprintf (stderr, " chars: %p\n", STRINGBUF_WIDE_CHARS (buf));
3ee86942 814 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
9c44cd45
MG
815 if (STRINGBUF_SHARED (buf))
816 fprintf (stderr, " shared: true\n");
817 else
818 fprintf (stderr, " shared: false\n");
819 if (STRINGBUF_INLINE (buf))
820 fprintf (stderr, " inline: true\n");
821 else
822 fprintf (stderr, " inline: false\n");
823
3ee86942
MV
824 }
825 return SCM_UNSPECIFIED;
826}
827#undef FUNC_NAME
828
9c44cd45 829SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym), "")
3ee86942
MV
830#define FUNC_NAME s_scm_sys_symbol_dump
831{
832 SCM_VALIDATE_SYMBOL (1, sym);
833 fprintf (stderr, "%p:\n", sym);
834 fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym));
9c44cd45
MG
835 if (scm_i_is_narrow_symbol (sym))
836 fprintf (stderr, " format: narrow\n");
837 else
838 fprintf (stderr, " format: wide\n");
3ee86942
MV
839 {
840 SCM buf = SYMBOL_STRINGBUF (sym);
841 fprintf (stderr, " buf: %p\n", buf);
9c44cd45
MG
842 if (scm_i_is_narrow_symbol (sym))
843 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
844 else
845 fprintf (stderr, " chars: %p\n", STRINGBUF_WIDE_CHARS (buf));
3ee86942 846 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
9c44cd45
MG
847 if (STRINGBUF_SHARED (buf))
848 fprintf (stderr, " shared: true\n");
849 else
850 fprintf (stderr, " shared: false\n");
851
3ee86942
MV
852 }
853 return SCM_UNSPECIFIED;
854}
855#undef FUNC_NAME
856
9c44cd45 857SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, (void), "")
e1b29f6a 858#define FUNC_NAME s_scm_sys_stringbuf_hist
3ee86942
MV
859{
860 int i;
861 for (i = 0; i < 1000; i++)
862 if (lenhist[i])
863 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
864 fprintf (stderr, ">999: %u\n", lenhist[1000]);
865 return SCM_UNSPECIFIED;
be54b15d
DH
866}
867#undef FUNC_NAME
868
3ee86942
MV
869#endif
870
871\f
872
873SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
874 (SCM obj),
875 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
876#define FUNC_NAME s_scm_string_p
877{
878 return scm_from_bool (IS_STRING (obj));
879}
880#undef FUNC_NAME
881
882
883SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
884
885SCM_DEFINE (scm_string, "string", 0, 0, 1,
886 (SCM chrs),
887 "@deffnx {Scheme Procedure} list->string chrs\n"
888 "Return a newly allocated string composed of the arguments,\n"
889 "@var{chrs}.")
890#define FUNC_NAME s_scm_string
891{
892 SCM result;
9c44cd45 893 SCM rest;
3ee86942 894 size_t len;
9c44cd45
MG
895 size_t p = 0;
896 long i;
3ee86942 897
9c44cd45
MG
898 /* Verify that this is a list of chars. */
899 i = scm_ilength (chrs);
900 len = (size_t) i;
901 rest = chrs;
3ee86942 902
9c44cd45
MG
903 SCM_ASSERT (len >= 0, chrs, SCM_ARG1, FUNC_NAME);
904 while (len > 0 && scm_is_pair (rest))
3ee86942 905 {
9c44cd45 906 SCM elt = SCM_CAR (rest);
3ee86942 907 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
9c44cd45
MG
908 rest = SCM_CDR (rest);
909 len--;
910 scm_remember_upto_here_1 (elt);
911 }
912
913 /* Construct a string containing this list of chars. */
914 len = (size_t) i;
915 rest = chrs;
916
917 result = scm_i_make_string (len, NULL);
918 result = scm_i_string_start_writing (result);
919 while (len > 0 && scm_is_pair (rest))
920 {
921 SCM elt = SCM_CAR (rest);
922 scm_i_string_set_x (result, p, SCM_CHAR (elt));
923 p++;
924 rest = SCM_CDR (rest);
3ee86942 925 len--;
9c44cd45 926 scm_remember_upto_here_1 (elt);
3ee86942 927 }
9c44cd45
MG
928 scm_i_string_stop_writing ();
929
3ee86942
MV
930 if (len > 0)
931 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
9c44cd45 932 if (!scm_is_null (rest))
3ee86942
MV
933 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
934
935 return result;
936}
937#undef FUNC_NAME
be54b15d 938
3b3b36dd 939SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
6fa73e72 940 (SCM k, SCM chr),
0d26a824
MG
941 "Return a newly allocated string of\n"
942 "length @var{k}. If @var{chr} is given, then all elements of\n"
943 "the string are initialized to @var{chr}, otherwise the contents\n"
9401323e 944 "of the @var{string} are unspecified.")
1bbd0b84 945#define FUNC_NAME s_scm_make_string
0f2d19dd 946{
3ee86942
MV
947 return scm_c_make_string (scm_to_size_t (k), chr);
948}
949#undef FUNC_NAME
950
951SCM
952scm_c_make_string (size_t len, SCM chr)
953#define FUNC_NAME NULL
954{
9c44cd45
MG
955 size_t p;
956 SCM res = scm_i_make_string (len, NULL);
cb0d8be2 957
e11e83f3
MV
958 if (!SCM_UNBNDP (chr))
959 {
3ee86942 960 SCM_VALIDATE_CHAR (0, chr);
9c44cd45
MG
961 res = scm_i_string_start_writing (res);
962 for (p = 0; p < len; p++)
963 scm_i_string_set_x (res, p, SCM_CHAR (chr));
964 scm_i_string_stop_writing ();
0f2d19dd 965 }
e11e83f3
MV
966
967 return res;
0f2d19dd 968}
1bbd0b84 969#undef FUNC_NAME
0f2d19dd 970
3b3b36dd 971SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
0d26a824
MG
972 (SCM string),
973 "Return the number of characters in @var{string}.")
1bbd0b84 974#define FUNC_NAME s_scm_string_length
0f2d19dd 975{
d1ca2c64 976 SCM_VALIDATE_STRING (1, string);
3ee86942 977 return scm_from_size_t (STRING_LENGTH (string));
0f2d19dd 978}
1bbd0b84 979#undef FUNC_NAME
0f2d19dd 980
9c44cd45
MG
981SCM_DEFINE (scm_string_width, "string-width", 1, 0, 0,
982 (SCM string),
983 "Return the bytes used to represent a character in @var{string}."
984 "This will return 1 or 4.")
985#define FUNC_NAME s_scm_string_width
986{
987 SCM_VALIDATE_STRING (1, string);
988 if (!scm_i_is_narrow_string (string))
989 return scm_from_int (4);
990
991 return scm_from_int (1);
992}
993#undef FUNC_NAME
994
3ee86942
MV
995size_t
996scm_c_string_length (SCM string)
997{
998 if (!IS_STRING (string))
999 scm_wrong_type_arg_msg (NULL, 0, string, "string");
1000 return STRING_LENGTH (string);
1001}
1002
bd9e24b3 1003SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
6fa73e72 1004 (SCM str, SCM k),
9c44cd45
MG
1005 "Return character @var{k} of @var{str} using zero-origin\n"
1006 "indexing. @var{k} must be a valid index of @var{str}.")
1bbd0b84 1007#define FUNC_NAME s_scm_string_ref
0f2d19dd 1008{
3ae3166b 1009 size_t len;
a55c2b68 1010 unsigned long idx;
bd9e24b3 1011
d1ca2c64 1012 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
1013
1014 len = scm_i_string_length (str);
1015 if (SCM_LIKELY (len > 0))
1016 idx = scm_to_unsigned_integer (k, 0, len - 1);
1017 else
1018 scm_out_of_range (NULL, k);
1019
9c44cd45
MG
1020 if (scm_i_is_narrow_string (str))
1021 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
1022 else
1023 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[idx]);
0f2d19dd 1024}
1bbd0b84 1025#undef FUNC_NAME
0f2d19dd 1026
3ee86942
MV
1027SCM
1028scm_c_string_ref (SCM str, size_t p)
1029{
1030 if (p >= scm_i_string_length (str))
1031 scm_out_of_range (NULL, scm_from_size_t (p));
9c44cd45
MG
1032 if (scm_i_is_narrow_string (str))
1033 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
1034 else
1035 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[p]);
1036
3ee86942 1037}
f0942910 1038
3b3b36dd 1039SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
6fa73e72 1040 (SCM str, SCM k, SCM chr),
9c44cd45
MG
1041 "Store @var{chr} in element @var{k} of @var{str} and return\n"
1042 "an unspecified value. @var{k} must be a valid index of\n"
1043 "@var{str}.")
1bbd0b84 1044#define FUNC_NAME s_scm_string_set_x
0f2d19dd 1045{
3ae3166b 1046 size_t len;
a55c2b68
MV
1047 unsigned long idx;
1048
f0942910 1049 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
1050
1051 len = scm_i_string_length (str);
1052 if (SCM_LIKELY (len > 0))
1053 idx = scm_to_unsigned_integer (k, 0, len - 1);
1054 else
1055 scm_out_of_range (NULL, k);
1056
34d19ef6 1057 SCM_VALIDATE_CHAR (3, chr);
9c44cd45
MG
1058 str = scm_i_string_start_writing (str);
1059 scm_i_string_set_x (str, idx, SCM_CHAR (chr));
1060 scm_i_string_stop_writing ();
1061
0f2d19dd
JB
1062 return SCM_UNSPECIFIED;
1063}
1bbd0b84 1064#undef FUNC_NAME
0f2d19dd 1065
3ee86942
MV
1066void
1067scm_c_string_set_x (SCM str, size_t p, SCM chr)
1068{
1069 if (p >= scm_i_string_length (str))
1070 scm_out_of_range (NULL, scm_from_size_t (p));
9c44cd45
MG
1071 str = scm_i_string_start_writing (str);
1072 scm_i_string_set_x (str, p, SCM_CHAR (chr));
1073 scm_i_string_stop_writing ();
3ee86942 1074}
0f2d19dd 1075
3b3b36dd 1076SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
0d26a824
MG
1077 (SCM str, SCM start, SCM end),
1078 "Return a newly allocated string formed from the characters\n"
1079 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1080 "ending with index @var{end} (exclusive).\n"
1081 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1082 "exact integers satisfying:\n\n"
1083 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1bbd0b84 1084#define FUNC_NAME s_scm_substring
0f2d19dd 1085{
3ee86942 1086 size_t len, from, to;
685c0d71 1087
d1ca2c64 1088 SCM_VALIDATE_STRING (1, str);
3ee86942
MV
1089 len = scm_i_string_length (str);
1090 from = scm_to_unsigned_integer (start, 0, len);
a55c2b68 1091 if (SCM_UNBNDP (end))
3ee86942 1092 to = len;
a55c2b68 1093 else
3ee86942
MV
1094 to = scm_to_unsigned_integer (end, from, len);
1095 return scm_i_substring (str, from, to);
0f2d19dd 1096}
1bbd0b84 1097#undef FUNC_NAME
0f2d19dd 1098
ed35de72
MV
1099SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
1100 (SCM str, SCM start, SCM end),
1101 "Return a newly allocated string formed from the characters\n"
1102 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1103 "ending with index @var{end} (exclusive).\n"
1104 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1105 "exact integers satisfying:\n"
1106 "\n"
1107 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
1108 "\n"
1109 "The returned string is read-only.\n")
1110#define FUNC_NAME s_scm_substring_read_only
1111{
1112 size_t len, from, to;
1113
1114 SCM_VALIDATE_STRING (1, str);
1115 len = scm_i_string_length (str);
1116 from = scm_to_unsigned_integer (start, 0, len);
1117 if (SCM_UNBNDP (end))
1118 to = len;
1119 else
1120 to = scm_to_unsigned_integer (end, from, len);
1121 return scm_i_substring_read_only (str, from, to);
1122}
1123#undef FUNC_NAME
1124
3ee86942
MV
1125SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
1126 (SCM str, SCM start, SCM end),
1127 "Return a newly allocated string formed from the characters\n"
1128 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1129 "ending with index @var{end} (exclusive).\n"
1130 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1131 "exact integers satisfying:\n\n"
1132 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1133#define FUNC_NAME s_scm_substring_copy
1134{
e1b29f6a
MV
1135 /* For the Scheme version, START is mandatory, but for the C
1136 version, it is optional. See scm_string_copy in srfi-13.c for a
1137 rationale.
1138 */
1139
1140 size_t from, to;
3ee86942
MV
1141
1142 SCM_VALIDATE_STRING (1, str);
e1b29f6a
MV
1143 scm_i_get_substring_spec (scm_i_string_length (str),
1144 start, &from, end, &to);
3ee86942
MV
1145 return scm_i_substring_copy (str, from, to);
1146}
1147#undef FUNC_NAME
1148
1149SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
1150 (SCM str, SCM start, SCM end),
1151 "Return string that indirectly refers to the characters\n"
1152 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1153 "ending with index @var{end} (exclusive).\n"
1154 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1155 "exact integers satisfying:\n\n"
1156 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1157#define FUNC_NAME s_scm_substring_shared
1158{
1159 size_t len, from, to;
1160
1161 SCM_VALIDATE_STRING (1, str);
1162 len = scm_i_string_length (str);
1163 from = scm_to_unsigned_integer (start, 0, len);
1164 if (SCM_UNBNDP (end))
1165 to = len;
1166 else
1167 to = scm_to_unsigned_integer (end, from, len);
1168 return scm_i_substring_shared (str, from, to);
1169}
1170#undef FUNC_NAME
685c0d71 1171
3b3b36dd 1172SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
6fa73e72 1173 (SCM args),
9c44cd45 1174 "Return a newly allocated string whose characters form the\n"
0d26a824 1175 "concatenation of the given strings, @var{args}.")
1bbd0b84 1176#define FUNC_NAME s_scm_string_append
0f2d19dd
JB
1177{
1178 SCM res;
9c44cd45
MG
1179 size_t len = 0;
1180 int wide = 0;
c829a427
MV
1181 SCM l, s;
1182 char *data;
9c44cd45
MG
1183 scm_t_wchar *wdata;
1184 int i;
af45e3b0
DH
1185
1186 SCM_VALIDATE_REST_ARGUMENT (args);
9c44cd45 1187 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427
MV
1188 {
1189 s = SCM_CAR (l);
1190 SCM_VALIDATE_STRING (SCM_ARGn, s);
9c44cd45
MG
1191 len += scm_i_string_length (s);
1192 if (!scm_i_is_narrow_string (s))
1193 wide = 1;
c829a427 1194 }
9c44cd45
MG
1195 if (!wide)
1196 res = scm_i_make_string (len, &data);
1197 else
1198 res = scm_i_make_wide_string (len, &wdata);
1199
1200 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427 1201 {
edea856c 1202 size_t len;
c829a427 1203 s = SCM_CAR (l);
3ee86942 1204 SCM_VALIDATE_STRING (SCM_ARGn, s);
edea856c 1205 len = scm_i_string_length (s);
9c44cd45
MG
1206 if (!wide)
1207 {
1208 memcpy (data, scm_i_string_chars (s), len);
1209 data += len;
1210 }
1211 else
1212 {
1213 if (scm_i_is_narrow_string (s))
1214 {
1215 for (i = 0; i < scm_i_string_length (s); i++)
1216 wdata[i] = (unsigned char) scm_i_string_chars (s)[i];
1217 }
1218 else
1219 u32_cpy ((scm_t_uint32 *) wdata,
1220 (scm_t_uint32 *) scm_i_string_wide_chars (s), len);
1221 wdata += len;
1222 }
c829a427
MV
1223 scm_remember_upto_here_1 (s);
1224 }
0f2d19dd
JB
1225 return res;
1226}
1bbd0b84 1227#undef FUNC_NAME
0f2d19dd 1228
c829a427
MV
1229int
1230scm_is_string (SCM obj)
1231{
3ee86942 1232 return IS_STRING (obj);
c829a427 1233}
24933780 1234
c829a427
MV
1235SCM
1236scm_from_locale_stringn (const char *str, size_t len)
1237{
1238 SCM res;
1239 char *dst;
4d4528e7 1240
9c44cd45 1241 if (len == (size_t) -1)
c829a427 1242 len = strlen (str);
9c44cd45
MG
1243 if (len == 0)
1244 return scm_nullstr;
1245
3ee86942 1246 res = scm_i_make_string (len, &dst);
c829a427
MV
1247 memcpy (dst, str, len);
1248 return res;
1249}
4d4528e7 1250
c829a427
MV
1251SCM
1252scm_from_locale_string (const char *str)
4d4528e7 1253{
9c44cd45
MG
1254 if (str == NULL)
1255 return scm_nullstr;
1256
c829a427
MV
1257 return scm_from_locale_stringn (str, -1);
1258}
4d4528e7 1259
50b1996f
MG
1260/* Create a new scheme string from the C string STR. The memory of
1261 STR may be used directly as storage for the new string. */
c829a427
MV
1262SCM
1263scm_take_locale_stringn (char *str, size_t len)
1264{
48ddf0d9
KR
1265 SCM buf, res;
1266
9c44cd45 1267 if (len == (size_t) -1)
48ddf0d9 1268 len = strlen (str);
c829a427
MV
1269 else
1270 {
48ddf0d9
KR
1271 /* Ensure STR is null terminated. A realloc for 1 extra byte should
1272 often be satisfied from the alignment padding after the block, with
1273 no actual data movement. */
9c44cd45 1274 str = scm_realloc (str, len + 1);
48ddf0d9 1275 str[len] = '\0';
c829a427 1276 }
c829a427 1277
fd0a5bbc 1278 buf = scm_i_take_stringbufn (str, len);
3ee86942 1279 res = scm_double_cell (STRING_TAG,
9c44cd45 1280 SCM_UNPACK (buf), (scm_t_bits) 0, (scm_t_bits) len);
c829a427
MV
1281 return res;
1282}
1283
48ddf0d9
KR
1284SCM
1285scm_take_locale_string (char *str)
1286{
1287 return scm_take_locale_stringn (str, -1);
1288}
1289
9c44cd45
MG
1290/* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
1291 and \UXXXXXX. */
1292static void
1293unistring_escapes_to_guile_escapes (char **bufp, size_t *lenp)
1294{
1295 char *before, *after;
1296 size_t i, j;
1297
1298 before = *bufp;
1299 after = *bufp;
1300 i = 0;
1301 j = 0;
1302 while (i < *lenp)
1303 {
1304 if ((i <= *lenp - 6)
1305 && before[i] == '\\'
1306 && before[i + 1] == 'u'
1307 && before[i + 2] == '0' && before[i + 3] == '0')
1308 {
1309 /* Convert \u00NN to \xNN */
1310 after[j] = '\\';
1311 after[j + 1] = 'x';
1312 after[j + 2] = tolower (before[i + 4]);
1313 after[j + 3] = tolower (before[i + 5]);
1314 i += 6;
1315 j += 4;
1316 }
1317 else if ((i <= *lenp - 10)
1318 && before[i] == '\\'
1319 && before[i + 1] == 'U'
1320 && before[i + 2] == '0' && before[i + 3] == '0')
1321 {
1322 /* Convert \U00NNNNNN to \UNNNNNN */
1323 after[j] = '\\';
1324 after[j + 1] = 'U';
1325 after[j + 2] = tolower (before[i + 4]);
1326 after[j + 3] = tolower (before[i + 5]);
1327 after[j + 4] = tolower (before[i + 6]);
1328 after[j + 5] = tolower (before[i + 7]);
1329 after[j + 6] = tolower (before[i + 8]);
1330 after[j + 7] = tolower (before[i + 9]);
1331 i += 10;
1332 j += 8;
1333 }
1334 else
1335 {
1336 after[j] = before[i];
1337 i++;
1338 j++;
1339 }
1340 }
1341 *lenp = j;
1342 after = scm_realloc (after, j);
1343}
1344
c829a427 1345char *
9c44cd45 1346scm_to_locale_stringn (SCM str, size_t * lenp)
c829a427 1347{
9c44cd45
MG
1348 const char *enc;
1349
1350 /* In the future, enc will hold the port's encoding. */
1351 enc = NULL;
1352
1353 return scm_to_stringn (str, lenp, enc, iconveh_escape_sequence);
1354}
1355
1356/* Low-level scheme to C string conversion function. */
1357char *
1358scm_to_stringn (SCM str, size_t * lenp, const char *encoding,
1359 enum iconv_ilseq_handler handler)
1360{
1361 static const char iso[11] = "ISO-8859-1";
1362 char *buf;
1363 size_t ilen, len, i;
4d4528e7 1364
3ee86942 1365 if (!scm_is_string (str))
c829a427 1366 scm_wrong_type_arg_msg (NULL, 0, str, "string");
9c44cd45
MG
1367 ilen = scm_i_string_length (str);
1368
1369 if (ilen == 0)
1370 {
1371 buf = scm_malloc (1);
1372 buf[0] = '\0';
1373 if (lenp)
1374 *lenp = 0;
1375 return buf;
1376 }
1377
c829a427 1378 if (lenp == NULL)
9c44cd45
MG
1379 for (i = 0; i < ilen; i++)
1380 if (scm_i_string_ref (str, i) == '\0')
1381 scm_misc_error (NULL,
1382 "string contains #\\nul character: ~S",
1383 scm_list_1 (str));
1384
1385 if (scm_i_is_narrow_string (str))
c829a427 1386 {
9c44cd45
MG
1387 if (lenp)
1388 {
1389 buf = scm_malloc (ilen);
1390 memcpy (buf, scm_i_string_chars (str), ilen);
1391 *lenp = ilen;
1392 return buf;
1393 }
1394 else
1395 {
1396 buf = scm_malloc (ilen + 1);
1397 memcpy (buf, scm_i_string_chars (str), ilen);
1398 buf[ilen] = '\0';
1399 return buf;
1400 }
c829a427 1401 }
9c44cd45
MG
1402
1403
1404 buf = NULL;
1405 len = 0;
1406 buf = u32_conv_to_encoding (iso,
1407 handler,
1408 (scm_t_uint32 *) scm_i_string_wide_chars (str),
1409 ilen, NULL, NULL, &len);
1410 if (buf == NULL)
1411 scm_misc_error (NULL, "cannot convert to output locale ~s: \"~s\"",
1412 scm_list_2 (scm_from_locale_string (iso), str));
1413
1414 if (handler == iconveh_escape_sequence)
1415 unistring_escapes_to_guile_escapes (&buf, &len);
1416
1417 if (lenp)
4d4528e7 1418 *lenp = len;
9c44cd45
MG
1419 else
1420 {
1421 buf = scm_realloc (buf, len + 1);
1422 buf[len] = '\0';
1423 }
24933780 1424
c829a427 1425 scm_remember_upto_here_1 (str);
9c44cd45 1426 return buf;
4d4528e7 1427}
af68e5e5 1428
c829a427
MV
1429char *
1430scm_to_locale_string (SCM str)
1431{
1432 return scm_to_locale_stringn (str, NULL);
1433}
af68e5e5 1434
c829a427
MV
1435size_t
1436scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
1437{
1438 size_t len;
9c44cd45 1439 char *result = NULL;
3ee86942 1440 if (!scm_is_string (str))
c829a427 1441 scm_wrong_type_arg_msg (NULL, 0, str, "string");
9c44cd45
MG
1442 result = scm_to_locale_stringn (str, &len);
1443
1444 memcpy (buf, result, (len > max_len) ? max_len : len);
1445 free (result);
1446
c829a427
MV
1447 scm_remember_upto_here_1 (str);
1448 return len;
1449}
af68e5e5 1450
3ee86942
MV
1451/* converts C scm_array of strings to SCM scm_list of strings. */
1452/* If argc < 0, a null terminated scm_array is assumed. */
9c44cd45 1453SCM
3ee86942
MV
1454scm_makfromstrs (int argc, char **argv)
1455{
1456 int i = argc;
1457 SCM lst = SCM_EOL;
1458 if (0 > i)
1459 for (i = 0; argv[i]; i++);
1460 while (i--)
1461 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
1462 return lst;
1463}
1464
c829a427
MV
1465/* Return a newly allocated array of char pointers to each of the strings
1466 in args, with a terminating NULL pointer. */
1467
1468char **
1469scm_i_allocate_string_pointers (SCM list)
af68e5e5 1470{
c829a427
MV
1471 char **result;
1472 int len = scm_ilength (list);
1473 int i;
1474
1475 if (len < 0)
1476 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
1477
661ae7ab 1478 scm_dynwind_begin (0);
c829a427
MV
1479
1480 result = (char **) scm_malloc ((len + 1) * sizeof (char *));
1481 result[len] = NULL;
661ae7ab 1482 scm_dynwind_unwind_handler (free, result, 0);
c829a427
MV
1483
1484 /* The list might be have been modified in another thread, so
1485 we check LIST before each access.
1486 */
d2e53ed6 1487 for (i = 0; i < len && scm_is_pair (list); i++)
c829a427
MV
1488 {
1489 result[i] = scm_to_locale_string (SCM_CAR (list));
1490 list = SCM_CDR (list);
1491 }
1492
661ae7ab 1493 scm_dynwind_end ();
c829a427 1494 return result;
af68e5e5 1495}
e53cc817 1496
c829a427
MV
1497void
1498scm_i_free_string_pointers (char **pointers)
1499{
1500 int i;
1501
1502 for (i = 0; pointers[i]; i++)
1503 free (pointers[i]);
1504 free (pointers);
1505}
24933780 1506
6f14f578
MV
1507void
1508scm_i_get_substring_spec (size_t len,
1509 SCM start, size_t *cstart,
1510 SCM end, size_t *cend)
1511{
1512 if (SCM_UNBNDP (start))
1513 *cstart = 0;
1514 else
1515 *cstart = scm_to_unsigned_integer (start, 0, len);
1516
1517 if (SCM_UNBNDP (end))
1518 *cend = len;
1519 else
1520 *cend = scm_to_unsigned_integer (end, *cstart, len);
1521}
1522
3ee86942
MV
1523#if SCM_ENABLE_DEPRECATED
1524
556d75db
MV
1525/* When these definitions are removed, it becomes reasonable to use
1526 read-only strings for string literals. For that, change the reader
1527 to create string literals with scm_c_substring_read_only instead of
1528 with scm_c_substring_copy.
1529*/
1530
3ee86942 1531int
fe78c51a 1532scm_i_deprecated_stringp (SCM str)
3ee86942
MV
1533{
1534 scm_c_issue_deprecation_warning
1535 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1536
2616f0e0 1537 return scm_is_string (str);
3ee86942
MV
1538}
1539
1540char *
fe78c51a 1541scm_i_deprecated_string_chars (SCM str)
3ee86942
MV
1542{
1543 char *chars;
1544
1545 scm_c_issue_deprecation_warning
1546 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1547
2616f0e0
MV
1548 /* We don't accept shared substrings here since they are not
1549 null-terminated.
1550 */
1551 if (IS_SH_STRING (str))
1552 scm_misc_error (NULL,
1553 "SCM_STRING_CHARS does not work with shared substrings.",
1554 SCM_EOL);
1555
877f06c3 1556 /* We explicitly test for read-only strings to produce a better
556d75db
MV
1557 error message.
1558 */
1559
1560 if (IS_RO_STRING (str))
1561 scm_misc_error (NULL,
1562 "SCM_STRING_CHARS does not work with read-only strings.",
1563 SCM_EOL);
1564
2616f0e0 1565 /* The following is still wrong, of course...
3ee86942 1566 */
9c44cd45 1567 str = scm_i_string_start_writing (str);
3ee86942
MV
1568 chars = scm_i_string_writable_chars (str);
1569 scm_i_string_stop_writing ();
1570 return chars;
1571}
1572
1573size_t
fe78c51a 1574scm_i_deprecated_string_length (SCM str)
3ee86942
MV
1575{
1576 scm_c_issue_deprecation_warning
1577 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1578 return scm_c_string_length (str);
1579}
1580
1581#endif
1582
0f2d19dd
JB
1583void
1584scm_init_strings ()
0f2d19dd 1585{
3ee86942 1586 scm_nullstr = scm_i_make_string (0, NULL);
7c33806a 1587
a0599745 1588#include "libguile/strings.x"
0f2d19dd
JB
1589}
1590
89e00824
ML
1591
1592/*
1593 Local Variables:
1594 c-file-style: "gnu"
1595 End:
1596*/