Merge branch 'master' into boehm-demers-weiser-gc
[bpt/guile.git] / libguile / strings.c
CommitLineData
7f74cf9a 1/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e
MV
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation; either
6 * version 2.1 of the License, or (at your option) any later version.
0f2d19dd 7 *
73be1d9e
MV
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
92205699 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
73be1d9e 16 */
1bbd0b84 17
1bbd0b84 18
0f2d19dd 19\f
dbb605f5
LC
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
0f2d19dd 23
faf2c9d7 24#include <string.h>
3ee86942 25#include <stdio.h>
faf2c9d7 26
a0599745
MD
27#include "libguile/_scm.h"
28#include "libguile/chars.h"
7c33806a 29#include "libguile/root.h"
a0599745 30#include "libguile/strings.h"
1afff620 31#include "libguile/deprecation.h"
a0599745 32#include "libguile/validate.h"
c829a427 33#include "libguile/dynwind.h"
1afff620 34
0f2d19dd
JB
35\f
36
37/* {Strings}
38 */
39
3ee86942
MV
40
41/* Stringbufs
42 *
43 * XXX - keeping an accurate refcount during GC seems to be quite
44 * tricky, so we just keep score of whether a stringbuf might be
45 * shared, not wether it definitely is.
46 *
47 * The scheme I (mvo) tried to keep an accurate reference count would
48 * recount all strings that point to a stringbuf during the mark-phase
49 * of the GC. This was done since one cannot access the stringbuf of
50 * a string when that string is freed (in order to decrease the
51 * reference count). The memory of the stringbuf might have been
52 * reused already for something completely different.
53 *
54 * This recounted worked for a small number of threads beating on
55 * cow-strings, but it failed randomly with more than 10 threads, say.
56 * I couldn't figure out what went wrong, so I used the conservative
57 * approach implemented below.
58 *
59 * A stringbuf needs to know its length, but only so that it can be
60 * reported when the stringbuf is freed.
61 *
62 * Stringbufs (and strings) are not stored very compactly: a stringbuf
63 * has room for about 2*sizeof(scm_t_bits)-1 bytes additional
64 * information. As a compensation, the code below is made more
65 * complicated by storing small strings inline in the double cell of a
66 * stringbuf. So we have fixstrings and bigstrings...
67 */
68
69#define STRINGBUF_F_SHARED 0x100
70#define STRINGBUF_F_INLINE 0x200
71
72#define STRINGBUF_TAG scm_tc7_stringbuf
73#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
74#define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
75
76#define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
77#define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
78#define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
79#define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
80
81#define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
82 ? STRINGBUF_INLINE_CHARS (buf) \
83 : STRINGBUF_OUTLINE_CHARS (buf))
84#define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
85 ? STRINGBUF_INLINE_LENGTH (buf) \
86 : STRINGBUF_OUTLINE_LENGTH (buf))
87
88#define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
89
90#define SET_STRINGBUF_SHARED(buf) \
91 (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
92
93#if SCM_DEBUG
94static size_t lenhist[1001];
95#endif
96
97static SCM
98make_stringbuf (size_t len)
0f2d19dd 99{
3ee86942
MV
100 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
101 scm_i_symbol_chars, all stringbufs are null-terminated. Once
102 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
103 has been changed for scm_i_symbol_chars, this null-termination
104 can be dropped.
105 */
106
107#if SCM_DEBUG
108 if (len < 1000)
109 lenhist[len]++;
110 else
111 lenhist[1000]++;
112#endif
0f2d19dd 113
3ee86942
MV
114 if (len <= STRINGBUF_MAX_INLINE_LEN-1)
115 {
116 return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_INLINE | (len << 16),
117 0, 0, 0);
118 }
119 else
120 {
c5018a2b 121 char *mem = scm_gc_malloc_pointerless (len + 1, "string");
3ee86942
MV
122 mem[len] = '\0';
123 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) mem,
124 (scm_t_bits) len, (scm_t_bits) 0);
125 }
126}
e53cc817 127
2b829bbb
KR
128/* Return a new stringbuf whose underlying storage consists of the LEN+1
129 octets pointed to by STR (the last octet is zero). */
7f74cf9a 130SCM
fd0a5bbc
HWN
131scm_i_take_stringbufn (char *str, size_t len)
132{
2b829bbb 133 scm_gc_register_collectable_memory (str, len + 1, "stringbuf");
fd0a5bbc
HWN
134
135 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str,
136 (scm_t_bits) len, (scm_t_bits) 0);
137}
138
bd9e24b3 139
9de87eea 140scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
bd9e24b3 141
3ee86942
MV
142/* Copy-on-write strings.
143 */
bd9e24b3 144
3ee86942 145#define STRING_TAG scm_tc7_string
bd9e24b3 146
3ee86942
MV
147#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
148#define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
149#define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
bd9e24b3 150
3ee86942
MV
151#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
152#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
153
154#define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
155
ed35de72
MV
156/* Read-only strings.
157 */
158
159#define RO_STRING_TAG (scm_tc7_string + 0x200)
160#define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
161
e1b29f6a
MV
162/* Mutation-sharing substrings
163 */
164
165#define SH_STRING_TAG (scm_tc7_string + 0x100)
166
167#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
168/* START and LENGTH as for STRINGs. */
169
170#define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
171
3ee86942
MV
172SCM
173scm_i_make_string (size_t len, char **charsp)
174{
175 SCM buf = make_stringbuf (len);
176 SCM res;
177 if (charsp)
178 *charsp = STRINGBUF_CHARS (buf);
179 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
180 (scm_t_bits)0, (scm_t_bits) len);
181 return res;
0f2d19dd
JB
182}
183
3ee86942
MV
184static void
185validate_substring_args (SCM str, size_t start, size_t end)
186{
187 if (!IS_STRING (str))
188 scm_wrong_type_arg_msg (NULL, 0, str, "string");
189 if (start > STRING_LENGTH (str))
190 scm_out_of_range (NULL, scm_from_size_t (start));
191 if (end > STRING_LENGTH (str) || end < start)
192 scm_out_of_range (NULL, scm_from_size_t (end));
193}
0f2d19dd 194
e1b29f6a
MV
195static inline void
196get_str_buf_start (SCM *str, SCM *buf, size_t *start)
197{
198 *start = STRING_START (*str);
199 if (IS_SH_STRING (*str))
200 {
201 *str = SH_STRING_STRING (*str);
202 *start += STRING_START (*str);
203 }
204 *buf = STRING_STRINGBUF (*str);
205}
206
3ee86942
MV
207SCM
208scm_i_substring (SCM str, size_t start, size_t end)
0f2d19dd 209{
e1b29f6a
MV
210 SCM buf;
211 size_t str_start;
212 get_str_buf_start (&str, &buf, &str_start);
9de87eea 213 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 214 SET_STRINGBUF_SHARED (buf);
9de87eea 215 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 216 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
e1b29f6a
MV
217 (scm_t_bits)str_start + start,
218 (scm_t_bits) end - start);
0f2d19dd
JB
219}
220
ed35de72
MV
221SCM
222scm_i_substring_read_only (SCM str, size_t start, size_t end)
223{
45a9f430
LC
224 SCM buf;
225 size_t str_start;
226 get_str_buf_start (&str, &buf, &str_start);
227 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
228 SET_STRINGBUF_SHARED (buf);
229 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
230 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
231 (scm_t_bits)str_start + start,
232 (scm_t_bits) end - start);
ed35de72
MV
233}
234
3ee86942
MV
235SCM
236scm_i_substring_copy (SCM str, size_t start, size_t end)
237{
238 size_t len = end - start;
edea856c 239 SCM buf, my_buf;
e1b29f6a
MV
240 size_t str_start;
241 get_str_buf_start (&str, &buf, &str_start);
edea856c 242 my_buf = make_stringbuf (len);
e1b29f6a
MV
243 memcpy (STRINGBUF_CHARS (my_buf),
244 STRINGBUF_CHARS (buf) + str_start + start, len);
3ee86942
MV
245 scm_remember_upto_here_1 (buf);
246 return scm_double_cell (STRING_TAG, SCM_UNPACK(my_buf),
247 (scm_t_bits)0, (scm_t_bits) len);
248}
0f2d19dd 249
e1b29f6a
MV
250SCM
251scm_i_substring_shared (SCM str, size_t start, size_t end)
252{
253 if (start == 0 && end == STRING_LENGTH (str))
254 return str;
255 else
256 {
257 size_t len = end - start;
258 if (IS_SH_STRING (str))
259 {
260 start += STRING_START (str);
261 str = SH_STRING_STRING (str);
262 }
263 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
264 (scm_t_bits)start, (scm_t_bits) len);
265 }
266}
267
3ee86942
MV
268SCM
269scm_c_substring (SCM str, size_t start, size_t end)
270{
271 validate_substring_args (str, start, end);
272 return scm_i_substring (str, start, end);
273}
ee149d03 274
ed35de72
MV
275SCM
276scm_c_substring_read_only (SCM str, size_t start, size_t end)
277{
278 validate_substring_args (str, start, end);
279 return scm_i_substring_read_only (str, start, end);
280}
281
0f2d19dd 282SCM
3ee86942 283scm_c_substring_copy (SCM str, size_t start, size_t end)
0f2d19dd 284{
3ee86942
MV
285 validate_substring_args (str, start, end);
286 return scm_i_substring_copy (str, start, end);
287}
288
3ee86942
MV
289SCM
290scm_c_substring_shared (SCM str, size_t start, size_t end)
291{
292 validate_substring_args (str, start, end);
293 return scm_i_substring_shared (str, start, end);
294}
0f2d19dd 295
d6c74168 296\f
3ee86942
MV
297/* Internal accessors
298 */
299
300size_t
301scm_i_string_length (SCM str)
0f2d19dd 302{
3ee86942 303 return STRING_LENGTH (str);
0f2d19dd
JB
304}
305
3ee86942
MV
306const char *
307scm_i_string_chars (SCM str)
308{
309 SCM buf;
e1b29f6a
MV
310 size_t start;
311 get_str_buf_start (&str, &buf, &start);
3ee86942
MV
312 return STRINGBUF_CHARS (buf) + start;
313}
b00418df 314
3ee86942 315char *
ed35de72 316scm_i_string_writable_chars (SCM orig_str)
b00418df 317{
ed35de72 318 SCM buf, str = orig_str;
e1b29f6a 319 size_t start;
ed35de72 320
e1b29f6a 321 get_str_buf_start (&str, &buf, &start);
ed35de72
MV
322 if (IS_RO_STRING (str))
323 scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
324
9de87eea 325 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942
MV
326 if (STRINGBUF_SHARED (buf))
327 {
902578f1 328 /* Clone stringbuf. */
3ee86942
MV
329
330 size_t len = STRING_LENGTH (str);
331 SCM new_buf;
332
9de87eea 333 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942
MV
334
335 new_buf = make_stringbuf (len);
336 memcpy (STRINGBUF_CHARS (new_buf),
337 STRINGBUF_CHARS (buf) + STRING_START (str), len);
338
3ee86942 339 start -= STRING_START (str);
902578f1
LC
340
341 /* FIXME: The following operations are not atomic, so other threads
342 looking at STR may see an inconsistent state. Nevertheless it can't
343 hurt much since (i) accessing STR while it is being mutated can't
344 yield a crash, and (ii) concurrent accesses to STR should be
345 protected by a mutex at the application level. The latter may not
346 apply when STR != ORIG_STR, though. */
3ee86942 347 SET_STRING_START (str, 0);
902578f1 348 SET_STRING_STRINGBUF (str, new_buf);
3ee86942
MV
349
350 buf = new_buf;
351
9de87eea 352 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942
MV
353 }
354
355 return STRINGBUF_CHARS (buf) + start;
b00418df
DH
356}
357
3ee86942
MV
358void
359scm_i_string_stop_writing (void)
360{
9de87eea 361 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 362}
b00418df 363
3ee86942
MV
364/* Symbols.
365
366 Basic symbol creation and accessing is done here, the rest is in
367 symbols.[hc]. This has been done to keep stringbufs and the
368 internals of strings and string-like objects confined to this file.
369*/
370
371#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
372
373SCM
6869328b
MV
374scm_i_make_symbol (SCM name, scm_t_bits flags,
375 unsigned long hash, SCM props)
3ee86942
MV
376{
377 SCM buf;
378 size_t start = STRING_START (name);
379 size_t length = STRING_LENGTH (name);
380
381 if (IS_SH_STRING (name))
382 {
383 name = SH_STRING_STRING (name);
384 start += STRING_START (name);
385 }
386 buf = SYMBOL_STRINGBUF (name);
387
388 if (start == 0 && length == STRINGBUF_LENGTH (buf))
389 {
390 /* reuse buf. */
9de87eea 391 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 392 SET_STRINGBUF_SHARED (buf);
9de87eea 393 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942
MV
394 }
395 else
396 {
397 /* make new buf. */
398 SCM new_buf = make_stringbuf (length);
399 memcpy (STRINGBUF_CHARS (new_buf),
400 STRINGBUF_CHARS (buf) + start, length);
401 buf = new_buf;
402 }
6869328b 403 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
3ee86942
MV
404 (scm_t_bits) hash, SCM_UNPACK (props));
405}
406
fd0a5bbc
HWN
407SCM
408scm_i_c_make_symbol (const char *name, size_t len,
409 scm_t_bits flags, unsigned long hash, SCM props)
410{
411 SCM buf = make_stringbuf (len);
412 memcpy (STRINGBUF_CHARS (buf), name, len);
413
a284cc7e
LC
414 return scm_immutable_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
415 (scm_t_bits) hash, SCM_UNPACK (props));
fd0a5bbc
HWN
416}
417
418/* Return a new symbol that uses the LEN bytes pointed to by NAME as its
419 underlying storage. */
420SCM
421scm_i_c_take_symbol (char *name, size_t len,
422 scm_t_bits flags, unsigned long hash, SCM props)
423{
424 SCM buf = scm_i_take_stringbufn (name, len);
425
426 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
427 (scm_t_bits) hash, SCM_UNPACK (props));
428}
429
3ee86942
MV
430size_t
431scm_i_symbol_length (SCM sym)
0f2d19dd 432{
3ee86942 433 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
0f2d19dd
JB
434}
435
071bb6a8
LC
436size_t
437scm_c_symbol_length (SCM sym)
438#define FUNC_NAME "scm_c_symbol_length"
439{
440 SCM_VALIDATE_SYMBOL (1, sym);
441
442 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
443}
444#undef FUNC_NAME
445
3ee86942
MV
446const char *
447scm_i_symbol_chars (SCM sym)
448{
449 SCM buf = SYMBOL_STRINGBUF (sym);
450 return STRINGBUF_CHARS (buf);
451}
1cc91f1b 452
be54b15d 453SCM
3ee86942 454scm_i_symbol_substring (SCM sym, size_t start, size_t end)
be54b15d 455{
3ee86942 456 SCM buf = SYMBOL_STRINGBUF (sym);
9de87eea 457 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 458 SET_STRINGBUF_SHARED (buf);
9de87eea 459 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
fd2b17b9 460 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
3ee86942
MV
461 (scm_t_bits)start, (scm_t_bits) end - start);
462}
be54b15d 463
3ee86942
MV
464/* Debugging
465 */
be54b15d 466
3ee86942 467#if SCM_DEBUG
be54b15d 468
3ee86942
MV
469SCM scm_sys_string_dump (SCM);
470SCM scm_sys_symbol_dump (SCM);
471SCM scm_sys_stringbuf_hist (void);
be54b15d 472
3ee86942
MV
473SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0,
474 (SCM str),
475 "")
476#define FUNC_NAME s_scm_sys_string_dump
477{
478 SCM_VALIDATE_STRING (1, str);
479 fprintf (stderr, "%p:\n", str);
480 fprintf (stderr, " start: %u\n", STRING_START (str));
481 fprintf (stderr, " len: %u\n", STRING_LENGTH (str));
482 if (IS_SH_STRING (str))
483 {
484 fprintf (stderr, " string: %p\n", SH_STRING_STRING (str));
485 fprintf (stderr, "\n");
486 scm_sys_string_dump (SH_STRING_STRING (str));
487 }
488 else
489 {
490 SCM buf = STRING_STRINGBUF (str);
491 fprintf (stderr, " buf: %p\n", buf);
492 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
493 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
494 fprintf (stderr, " flags: %x\n", (SCM_CELL_WORD_0 (buf) & 0x300));
495 }
496 return SCM_UNSPECIFIED;
497}
498#undef FUNC_NAME
499
500SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0,
501 (SCM sym),
502 "")
503#define FUNC_NAME s_scm_sys_symbol_dump
504{
505 SCM_VALIDATE_SYMBOL (1, sym);
506 fprintf (stderr, "%p:\n", sym);
507 fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym));
508 {
509 SCM buf = SYMBOL_STRINGBUF (sym);
510 fprintf (stderr, " buf: %p\n", buf);
511 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
512 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
513 fprintf (stderr, " shared: %u\n", STRINGBUF_SHARED (buf));
514 }
515 return SCM_UNSPECIFIED;
516}
517#undef FUNC_NAME
518
519SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0,
520 (void),
521 "")
e1b29f6a 522#define FUNC_NAME s_scm_sys_stringbuf_hist
3ee86942
MV
523{
524 int i;
525 for (i = 0; i < 1000; i++)
526 if (lenhist[i])
527 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
528 fprintf (stderr, ">999: %u\n", lenhist[1000]);
529 return SCM_UNSPECIFIED;
be54b15d
DH
530}
531#undef FUNC_NAME
532
3ee86942
MV
533#endif
534
535\f
536
537SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
538 (SCM obj),
539 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
540#define FUNC_NAME s_scm_string_p
541{
542 return scm_from_bool (IS_STRING (obj));
543}
544#undef FUNC_NAME
545
546
547SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
548
549SCM_DEFINE (scm_string, "string", 0, 0, 1,
550 (SCM chrs),
551 "@deffnx {Scheme Procedure} list->string chrs\n"
552 "Return a newly allocated string composed of the arguments,\n"
553 "@var{chrs}.")
554#define FUNC_NAME s_scm_string
555{
556 SCM result;
557 size_t len;
558 char *data;
559
560 {
561 long i = scm_ilength (chrs);
562
563 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
564 len = i;
565 }
566
567 result = scm_i_make_string (len, &data);
d2e53ed6 568 while (len > 0 && scm_is_pair (chrs))
3ee86942
MV
569 {
570 SCM elt = SCM_CAR (chrs);
571
572 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
573 *data++ = SCM_CHAR (elt);
574 chrs = SCM_CDR (chrs);
575 len--;
576 }
577 if (len > 0)
578 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
d2e53ed6 579 if (!scm_is_null (chrs))
3ee86942
MV
580 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
581
582 return result;
583}
584#undef FUNC_NAME
be54b15d 585
3b3b36dd 586SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
6fa73e72 587 (SCM k, SCM chr),
0d26a824
MG
588 "Return a newly allocated string of\n"
589 "length @var{k}. If @var{chr} is given, then all elements of\n"
590 "the string are initialized to @var{chr}, otherwise the contents\n"
9401323e 591 "of the @var{string} are unspecified.")
1bbd0b84 592#define FUNC_NAME s_scm_make_string
0f2d19dd 593{
3ee86942
MV
594 return scm_c_make_string (scm_to_size_t (k), chr);
595}
596#undef FUNC_NAME
597
598SCM
599scm_c_make_string (size_t len, SCM chr)
600#define FUNC_NAME NULL
601{
602 char *dst;
603 SCM res = scm_i_make_string (len, &dst);
cb0d8be2 604
e11e83f3
MV
605 if (!SCM_UNBNDP (chr))
606 {
3ee86942
MV
607 SCM_VALIDATE_CHAR (0, chr);
608 memset (dst, SCM_CHAR (chr), len);
0f2d19dd 609 }
e11e83f3
MV
610
611 return res;
0f2d19dd 612}
1bbd0b84 613#undef FUNC_NAME
0f2d19dd 614
3b3b36dd 615SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
0d26a824
MG
616 (SCM string),
617 "Return the number of characters in @var{string}.")
1bbd0b84 618#define FUNC_NAME s_scm_string_length
0f2d19dd 619{
d1ca2c64 620 SCM_VALIDATE_STRING (1, string);
3ee86942 621 return scm_from_size_t (STRING_LENGTH (string));
0f2d19dd 622}
1bbd0b84 623#undef FUNC_NAME
0f2d19dd 624
3ee86942
MV
625size_t
626scm_c_string_length (SCM string)
627{
628 if (!IS_STRING (string))
629 scm_wrong_type_arg_msg (NULL, 0, string, "string");
630 return STRING_LENGTH (string);
631}
632
bd9e24b3 633SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
6fa73e72 634 (SCM str, SCM k),
0d26a824
MG
635 "Return character @var{k} of @var{str} using zero-origin\n"
636 "indexing. @var{k} must be a valid index of @var{str}.")
1bbd0b84 637#define FUNC_NAME s_scm_string_ref
0f2d19dd 638{
3ae3166b 639 size_t len;
a55c2b68 640 unsigned long idx;
bd9e24b3 641
d1ca2c64 642 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
643
644 len = scm_i_string_length (str);
645 if (SCM_LIKELY (len > 0))
646 idx = scm_to_unsigned_integer (k, 0, len - 1);
647 else
648 scm_out_of_range (NULL, k);
649
3ee86942 650 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
0f2d19dd 651}
1bbd0b84 652#undef FUNC_NAME
0f2d19dd 653
3ee86942
MV
654SCM
655scm_c_string_ref (SCM str, size_t p)
656{
657 if (p >= scm_i_string_length (str))
658 scm_out_of_range (NULL, scm_from_size_t (p));
659 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
660}
f0942910 661
3b3b36dd 662SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
6fa73e72 663 (SCM str, SCM k, SCM chr),
0d26a824
MG
664 "Store @var{chr} in element @var{k} of @var{str} and return\n"
665 "an unspecified value. @var{k} must be a valid index of\n"
666 "@var{str}.")
1bbd0b84 667#define FUNC_NAME s_scm_string_set_x
0f2d19dd 668{
3ae3166b 669 size_t len;
a55c2b68
MV
670 unsigned long idx;
671
f0942910 672 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
673
674 len = scm_i_string_length (str);
675 if (SCM_LIKELY (len > 0))
676 idx = scm_to_unsigned_integer (k, 0, len - 1);
677 else
678 scm_out_of_range (NULL, k);
679
34d19ef6 680 SCM_VALIDATE_CHAR (3, chr);
3ee86942
MV
681 {
682 char *dst = scm_i_string_writable_chars (str);
683 dst[idx] = SCM_CHAR (chr);
684 scm_i_string_stop_writing ();
685 }
0f2d19dd
JB
686 return SCM_UNSPECIFIED;
687}
1bbd0b84 688#undef FUNC_NAME
0f2d19dd 689
3ee86942
MV
690void
691scm_c_string_set_x (SCM str, size_t p, SCM chr)
692{
693 if (p >= scm_i_string_length (str))
694 scm_out_of_range (NULL, scm_from_size_t (p));
695 {
696 char *dst = scm_i_string_writable_chars (str);
697 dst[p] = SCM_CHAR (chr);
698 scm_i_string_stop_writing ();
699 }
700}
0f2d19dd 701
3b3b36dd 702SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
0d26a824
MG
703 (SCM str, SCM start, SCM end),
704 "Return a newly allocated string formed from the characters\n"
705 "of @var{str} beginning with index @var{start} (inclusive) and\n"
706 "ending with index @var{end} (exclusive).\n"
707 "@var{str} must be a string, @var{start} and @var{end} must be\n"
708 "exact integers satisfying:\n\n"
709 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1bbd0b84 710#define FUNC_NAME s_scm_substring
0f2d19dd 711{
3ee86942 712 size_t len, from, to;
685c0d71 713
d1ca2c64 714 SCM_VALIDATE_STRING (1, str);
3ee86942
MV
715 len = scm_i_string_length (str);
716 from = scm_to_unsigned_integer (start, 0, len);
a55c2b68 717 if (SCM_UNBNDP (end))
3ee86942 718 to = len;
a55c2b68 719 else
3ee86942
MV
720 to = scm_to_unsigned_integer (end, from, len);
721 return scm_i_substring (str, from, to);
0f2d19dd 722}
1bbd0b84 723#undef FUNC_NAME
0f2d19dd 724
ed35de72
MV
725SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
726 (SCM str, SCM start, SCM end),
727 "Return a newly allocated string formed from the characters\n"
728 "of @var{str} beginning with index @var{start} (inclusive) and\n"
729 "ending with index @var{end} (exclusive).\n"
730 "@var{str} must be a string, @var{start} and @var{end} must be\n"
731 "exact integers satisfying:\n"
732 "\n"
733 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
734 "\n"
735 "The returned string is read-only.\n")
736#define FUNC_NAME s_scm_substring_read_only
737{
738 size_t len, from, to;
739
740 SCM_VALIDATE_STRING (1, str);
741 len = scm_i_string_length (str);
742 from = scm_to_unsigned_integer (start, 0, len);
743 if (SCM_UNBNDP (end))
744 to = len;
745 else
746 to = scm_to_unsigned_integer (end, from, len);
747 return scm_i_substring_read_only (str, from, to);
748}
749#undef FUNC_NAME
750
3ee86942
MV
751SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
752 (SCM str, SCM start, SCM end),
753 "Return a newly allocated string formed from the characters\n"
754 "of @var{str} beginning with index @var{start} (inclusive) and\n"
755 "ending with index @var{end} (exclusive).\n"
756 "@var{str} must be a string, @var{start} and @var{end} must be\n"
757 "exact integers satisfying:\n\n"
758 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
759#define FUNC_NAME s_scm_substring_copy
760{
e1b29f6a
MV
761 /* For the Scheme version, START is mandatory, but for the C
762 version, it is optional. See scm_string_copy in srfi-13.c for a
763 rationale.
764 */
765
766 size_t from, to;
3ee86942
MV
767
768 SCM_VALIDATE_STRING (1, str);
e1b29f6a
MV
769 scm_i_get_substring_spec (scm_i_string_length (str),
770 start, &from, end, &to);
3ee86942
MV
771 return scm_i_substring_copy (str, from, to);
772}
773#undef FUNC_NAME
774
775SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
776 (SCM str, SCM start, SCM end),
777 "Return string that indirectly refers to the characters\n"
778 "of @var{str} beginning with index @var{start} (inclusive) and\n"
779 "ending with index @var{end} (exclusive).\n"
780 "@var{str} must be a string, @var{start} and @var{end} must be\n"
781 "exact integers satisfying:\n\n"
782 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
783#define FUNC_NAME s_scm_substring_shared
784{
785 size_t len, from, to;
786
787 SCM_VALIDATE_STRING (1, str);
788 len = scm_i_string_length (str);
789 from = scm_to_unsigned_integer (start, 0, len);
790 if (SCM_UNBNDP (end))
791 to = len;
792 else
793 to = scm_to_unsigned_integer (end, from, len);
794 return scm_i_substring_shared (str, from, to);
795}
796#undef FUNC_NAME
685c0d71 797
3b3b36dd 798SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
6fa73e72 799 (SCM args),
0d26a824
MG
800 "Return a newly allocated string whose characters form the\n"
801 "concatenation of the given strings, @var{args}.")
1bbd0b84 802#define FUNC_NAME s_scm_string_append
0f2d19dd
JB
803{
804 SCM res;
1be6b49c 805 size_t i = 0;
c829a427
MV
806 SCM l, s;
807 char *data;
af45e3b0
DH
808
809 SCM_VALIDATE_REST_ARGUMENT (args);
d2e53ed6 810 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427
MV
811 {
812 s = SCM_CAR (l);
813 SCM_VALIDATE_STRING (SCM_ARGn, s);
3ee86942 814 i += scm_i_string_length (s);
c829a427 815 }
3ee86942 816 res = scm_i_make_string (i, &data);
d2e53ed6 817 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427 818 {
edea856c 819 size_t len;
c829a427 820 s = SCM_CAR (l);
3ee86942 821 SCM_VALIDATE_STRING (SCM_ARGn, s);
edea856c 822 len = scm_i_string_length (s);
3ee86942
MV
823 memcpy (data, scm_i_string_chars (s), len);
824 data += len;
c829a427
MV
825 scm_remember_upto_here_1 (s);
826 }
0f2d19dd
JB
827 return res;
828}
1bbd0b84 829#undef FUNC_NAME
0f2d19dd 830
c829a427
MV
831int
832scm_is_string (SCM obj)
833{
3ee86942 834 return IS_STRING (obj);
c829a427 835}
24933780 836
c829a427
MV
837SCM
838scm_from_locale_stringn (const char *str, size_t len)
839{
840 SCM res;
841 char *dst;
4d4528e7 842
c829a427
MV
843 if (len == (size_t)-1)
844 len = strlen (str);
3ee86942 845 res = scm_i_make_string (len, &dst);
c829a427
MV
846 memcpy (dst, str, len);
847 return res;
848}
4d4528e7 849
c829a427
MV
850SCM
851scm_from_locale_string (const char *str)
4d4528e7 852{
c829a427
MV
853 return scm_from_locale_stringn (str, -1);
854}
4d4528e7 855
c829a427
MV
856SCM
857scm_take_locale_stringn (char *str, size_t len)
858{
48ddf0d9
KR
859 SCM buf, res;
860
c829a427 861 if (len == (size_t)-1)
48ddf0d9 862 len = strlen (str);
c829a427
MV
863 else
864 {
48ddf0d9
KR
865 /* Ensure STR is null terminated. A realloc for 1 extra byte should
866 often be satisfied from the alignment padding after the block, with
867 no actual data movement. */
868 str = scm_realloc (str, len+1);
869 str[len] = '\0';
c829a427 870 }
c829a427 871
fd0a5bbc 872 buf = scm_i_take_stringbufn (str, len);
3ee86942 873 res = scm_double_cell (STRING_TAG,
48ddf0d9
KR
874 SCM_UNPACK (buf),
875 (scm_t_bits) 0, (scm_t_bits) len);
c829a427
MV
876 return res;
877}
878
48ddf0d9
KR
879SCM
880scm_take_locale_string (char *str)
881{
882 return scm_take_locale_stringn (str, -1);
883}
884
c829a427
MV
885char *
886scm_to_locale_stringn (SCM str, size_t *lenp)
887{
888 char *res;
889 size_t len;
4d4528e7 890
3ee86942 891 if (!scm_is_string (str))
c829a427 892 scm_wrong_type_arg_msg (NULL, 0, str, "string");
3ee86942 893 len = scm_i_string_length (str);
c829a427 894 res = scm_malloc (len + ((lenp==NULL)? 1 : 0));
3ee86942 895 memcpy (res, scm_i_string_chars (str), len);
c829a427
MV
896 if (lenp == NULL)
897 {
898 res[len] = '\0';
899 if (strlen (res) != len)
900 {
901 free (res);
902 scm_misc_error (NULL,
903 "string contains #\\nul character: ~S",
904 scm_list_1 (str));
905 }
906 }
907 else
4d4528e7 908 *lenp = len;
24933780 909
c829a427
MV
910 scm_remember_upto_here_1 (str);
911 return res;
4d4528e7 912}
af68e5e5 913
c829a427
MV
914char *
915scm_to_locale_string (SCM str)
916{
917 return scm_to_locale_stringn (str, NULL);
918}
af68e5e5 919
c829a427
MV
920size_t
921scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
922{
923 size_t len;
924
3ee86942 925 if (!scm_is_string (str))
c829a427 926 scm_wrong_type_arg_msg (NULL, 0, str, "string");
3ee86942
MV
927 len = scm_i_string_length (str);
928 memcpy (buf, scm_i_string_chars (str), (len > max_len)? max_len : len);
c829a427
MV
929 scm_remember_upto_here_1 (str);
930 return len;
931}
af68e5e5 932
3ee86942
MV
933/* converts C scm_array of strings to SCM scm_list of strings. */
934/* If argc < 0, a null terminated scm_array is assumed. */
935SCM
936scm_makfromstrs (int argc, char **argv)
937{
938 int i = argc;
939 SCM lst = SCM_EOL;
940 if (0 > i)
941 for (i = 0; argv[i]; i++);
942 while (i--)
943 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
944 return lst;
945}
946
c829a427
MV
947/* Return a newly allocated array of char pointers to each of the strings
948 in args, with a terminating NULL pointer. */
949
950char **
951scm_i_allocate_string_pointers (SCM list)
af68e5e5 952{
c829a427
MV
953 char **result;
954 int len = scm_ilength (list);
955 int i;
956
957 if (len < 0)
958 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
959
661ae7ab 960 scm_dynwind_begin (0);
c829a427
MV
961
962 result = (char **) scm_malloc ((len + 1) * sizeof (char *));
963 result[len] = NULL;
661ae7ab 964 scm_dynwind_unwind_handler (free, result, 0);
c829a427
MV
965
966 /* The list might be have been modified in another thread, so
967 we check LIST before each access.
968 */
d2e53ed6 969 for (i = 0; i < len && scm_is_pair (list); i++)
c829a427
MV
970 {
971 result[i] = scm_to_locale_string (SCM_CAR (list));
972 list = SCM_CDR (list);
973 }
974
661ae7ab 975 scm_dynwind_end ();
c829a427 976 return result;
af68e5e5 977}
e53cc817 978
c829a427
MV
979void
980scm_i_free_string_pointers (char **pointers)
981{
982 int i;
983
984 for (i = 0; pointers[i]; i++)
985 free (pointers[i]);
986 free (pointers);
987}
24933780 988
6f14f578
MV
989void
990scm_i_get_substring_spec (size_t len,
991 SCM start, size_t *cstart,
992 SCM end, size_t *cend)
993{
994 if (SCM_UNBNDP (start))
995 *cstart = 0;
996 else
997 *cstart = scm_to_unsigned_integer (start, 0, len);
998
999 if (SCM_UNBNDP (end))
1000 *cend = len;
1001 else
1002 *cend = scm_to_unsigned_integer (end, *cstart, len);
1003}
1004
3ee86942
MV
1005#if SCM_ENABLE_DEPRECATED
1006
556d75db
MV
1007/* When these definitions are removed, it becomes reasonable to use
1008 read-only strings for string literals. For that, change the reader
1009 to create string literals with scm_c_substring_read_only instead of
1010 with scm_c_substring_copy.
1011*/
1012
3ee86942 1013int
fe78c51a 1014scm_i_deprecated_stringp (SCM str)
3ee86942
MV
1015{
1016 scm_c_issue_deprecation_warning
1017 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1018
2616f0e0 1019 return scm_is_string (str);
3ee86942
MV
1020}
1021
1022char *
fe78c51a 1023scm_i_deprecated_string_chars (SCM str)
3ee86942
MV
1024{
1025 char *chars;
1026
1027 scm_c_issue_deprecation_warning
1028 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1029
2616f0e0
MV
1030 /* We don't accept shared substrings here since they are not
1031 null-terminated.
1032 */
1033 if (IS_SH_STRING (str))
1034 scm_misc_error (NULL,
1035 "SCM_STRING_CHARS does not work with shared substrings.",
1036 SCM_EOL);
1037
556d75db
MV
1038 /* We explicitely test for read-only strings to produce a better
1039 error message.
1040 */
1041
1042 if (IS_RO_STRING (str))
1043 scm_misc_error (NULL,
1044 "SCM_STRING_CHARS does not work with read-only strings.",
1045 SCM_EOL);
1046
2616f0e0 1047 /* The following is still wrong, of course...
3ee86942
MV
1048 */
1049 chars = scm_i_string_writable_chars (str);
1050 scm_i_string_stop_writing ();
1051 return chars;
1052}
1053
1054size_t
fe78c51a 1055scm_i_deprecated_string_length (SCM str)
3ee86942
MV
1056{
1057 scm_c_issue_deprecation_warning
1058 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1059 return scm_c_string_length (str);
1060}
1061
1062#endif
1063
0f2d19dd
JB
1064void
1065scm_init_strings ()
0f2d19dd 1066{
3ee86942 1067 scm_nullstr = scm_i_make_string (0, NULL);
7c33806a 1068
a0599745 1069#include "libguile/strings.x"
0f2d19dd
JB
1070}
1071
89e00824
ML
1072
1073/*
1074 Local Variables:
1075 c-file-style: "gnu"
1076 End:
1077*/