Allow the static allocation of all types of subrs.
[bpt/guile.git] / libguile / strings.c
CommitLineData
35920c00 1/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e
MV
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation; either
6 * version 2.1 of the License, or (at your option) any later version.
0f2d19dd 7 *
73be1d9e
MV
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
92205699 15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
73be1d9e 16 */
1bbd0b84 17
1bbd0b84 18
0f2d19dd 19\f
dbb605f5
LC
20#ifdef HAVE_CONFIG_H
21# include <config.h>
22#endif
0f2d19dd 23
faf2c9d7 24#include <string.h>
3ee86942 25#include <stdio.h>
faf2c9d7 26
a0599745
MD
27#include "libguile/_scm.h"
28#include "libguile/chars.h"
7c33806a 29#include "libguile/root.h"
a0599745 30#include "libguile/strings.h"
1afff620 31#include "libguile/deprecation.h"
a0599745 32#include "libguile/validate.h"
1afff620 33
0f2d19dd
JB
34\f
35
36/* {Strings}
37 */
38
3ee86942
MV
39
40/* Stringbufs
41 *
42 * XXX - keeping an accurate refcount during GC seems to be quite
43 * tricky, so we just keep score of whether a stringbuf might be
44 * shared, not wether it definitely is.
45 *
46 * The scheme I (mvo) tried to keep an accurate reference count would
47 * recount all strings that point to a stringbuf during the mark-phase
48 * of the GC. This was done since one cannot access the stringbuf of
49 * a string when that string is freed (in order to decrease the
50 * reference count). The memory of the stringbuf might have been
51 * reused already for something completely different.
52 *
53 * This recounted worked for a small number of threads beating on
54 * cow-strings, but it failed randomly with more than 10 threads, say.
55 * I couldn't figure out what went wrong, so I used the conservative
56 * approach implemented below.
57 *
58 * A stringbuf needs to know its length, but only so that it can be
59 * reported when the stringbuf is freed.
60 *
61 * Stringbufs (and strings) are not stored very compactly: a stringbuf
62 * has room for about 2*sizeof(scm_t_bits)-1 bytes additional
63 * information. As a compensation, the code below is made more
64 * complicated by storing small strings inline in the double cell of a
65 * stringbuf. So we have fixstrings and bigstrings...
66 */
67
35920c00
LC
68#define STRINGBUF_F_SHARED SCM_I_STRINGBUF_F_SHARED
69#define STRINGBUF_F_INLINE SCM_I_STRINGBUF_F_INLINE
3ee86942
MV
70
71#define STRINGBUF_TAG scm_tc7_stringbuf
72#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
73#define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
74
75#define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
76#define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
77#define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
78#define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
79
80#define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
81 ? STRINGBUF_INLINE_CHARS (buf) \
82 : STRINGBUF_OUTLINE_CHARS (buf))
83#define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
84 ? STRINGBUF_INLINE_LENGTH (buf) \
85 : STRINGBUF_OUTLINE_LENGTH (buf))
86
87#define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
88
9b41542f
LC
89#define SET_STRINGBUF_SHARED(buf) \
90 do \
91 { \
92 /* Don't modify BUF if it's already marked as shared since it might be \
93 a read-only, statically allocated stringbuf. */ \
94 if (SCM_LIKELY (!STRINGBUF_SHARED (buf))) \
95 SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
96 } \
97 while (0)
3ee86942
MV
98
99#if SCM_DEBUG
100static size_t lenhist[1001];
101#endif
102
103static SCM
104make_stringbuf (size_t len)
0f2d19dd 105{
3ee86942
MV
106 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
107 scm_i_symbol_chars, all stringbufs are null-terminated. Once
108 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
109 has been changed for scm_i_symbol_chars, this null-termination
110 can be dropped.
111 */
112
113#if SCM_DEBUG
114 if (len < 1000)
115 lenhist[len]++;
116 else
117 lenhist[1000]++;
118#endif
0f2d19dd 119
3ee86942
MV
120 if (len <= STRINGBUF_MAX_INLINE_LEN-1)
121 {
122 return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_INLINE | (len << 16),
123 0, 0, 0);
124 }
125 else
126 {
c5018a2b 127 char *mem = scm_gc_malloc_pointerless (len + 1, "string");
3ee86942
MV
128 mem[len] = '\0';
129 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) mem,
130 (scm_t_bits) len, (scm_t_bits) 0);
131 }
132}
e53cc817 133
2b829bbb
KR
134/* Return a new stringbuf whose underlying storage consists of the LEN+1
135 octets pointed to by STR (the last octet is zero). */
7f74cf9a 136SCM
fd0a5bbc
HWN
137scm_i_take_stringbufn (char *str, size_t len)
138{
2b829bbb 139 scm_gc_register_collectable_memory (str, len + 1, "stringbuf");
fd0a5bbc
HWN
140
141 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str,
142 (scm_t_bits) len, (scm_t_bits) 0);
143}
144
bd9e24b3 145
9de87eea 146scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
bd9e24b3 147
3ee86942
MV
148/* Copy-on-write strings.
149 */
bd9e24b3 150
3ee86942 151#define STRING_TAG scm_tc7_string
bd9e24b3 152
3ee86942
MV
153#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
154#define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
155#define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
bd9e24b3 156
3ee86942
MV
157#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
158#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
159
160#define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
161
ed35de72
MV
162/* Read-only strings.
163 */
164
35920c00 165#define RO_STRING_TAG scm_tc7_ro_string
ed35de72
MV
166#define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
167
e1b29f6a
MV
168/* Mutation-sharing substrings
169 */
170
171#define SH_STRING_TAG (scm_tc7_string + 0x100)
172
173#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
174/* START and LENGTH as for STRINGs. */
175
176#define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
177
3ee86942
MV
178SCM
179scm_i_make_string (size_t len, char **charsp)
180{
181 SCM buf = make_stringbuf (len);
182 SCM res;
183 if (charsp)
184 *charsp = STRINGBUF_CHARS (buf);
185 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
186 (scm_t_bits)0, (scm_t_bits) len);
187 return res;
0f2d19dd
JB
188}
189
3ee86942
MV
190static void
191validate_substring_args (SCM str, size_t start, size_t end)
192{
193 if (!IS_STRING (str))
194 scm_wrong_type_arg_msg (NULL, 0, str, "string");
195 if (start > STRING_LENGTH (str))
196 scm_out_of_range (NULL, scm_from_size_t (start));
197 if (end > STRING_LENGTH (str) || end < start)
198 scm_out_of_range (NULL, scm_from_size_t (end));
199}
0f2d19dd 200
e1b29f6a
MV
201static inline void
202get_str_buf_start (SCM *str, SCM *buf, size_t *start)
203{
204 *start = STRING_START (*str);
205 if (IS_SH_STRING (*str))
206 {
207 *str = SH_STRING_STRING (*str);
208 *start += STRING_START (*str);
209 }
210 *buf = STRING_STRINGBUF (*str);
211}
212
3ee86942
MV
213SCM
214scm_i_substring (SCM str, size_t start, size_t end)
0f2d19dd 215{
e1b29f6a
MV
216 SCM buf;
217 size_t str_start;
218 get_str_buf_start (&str, &buf, &str_start);
9de87eea 219 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 220 SET_STRINGBUF_SHARED (buf);
9de87eea 221 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 222 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
e1b29f6a
MV
223 (scm_t_bits)str_start + start,
224 (scm_t_bits) end - start);
0f2d19dd
JB
225}
226
ed35de72
MV
227SCM
228scm_i_substring_read_only (SCM str, size_t start, size_t end)
229{
45a9f430
LC
230 SCM buf;
231 size_t str_start;
232 get_str_buf_start (&str, &buf, &str_start);
233 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
234 SET_STRINGBUF_SHARED (buf);
235 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
236 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
237 (scm_t_bits)str_start + start,
238 (scm_t_bits) end - start);
ed35de72
MV
239}
240
3ee86942
MV
241SCM
242scm_i_substring_copy (SCM str, size_t start, size_t end)
243{
244 size_t len = end - start;
edea856c 245 SCM buf, my_buf;
e1b29f6a
MV
246 size_t str_start;
247 get_str_buf_start (&str, &buf, &str_start);
edea856c 248 my_buf = make_stringbuf (len);
e1b29f6a
MV
249 memcpy (STRINGBUF_CHARS (my_buf),
250 STRINGBUF_CHARS (buf) + str_start + start, len);
3ee86942
MV
251 scm_remember_upto_here_1 (buf);
252 return scm_double_cell (STRING_TAG, SCM_UNPACK(my_buf),
253 (scm_t_bits)0, (scm_t_bits) len);
254}
0f2d19dd 255
e1b29f6a
MV
256SCM
257scm_i_substring_shared (SCM str, size_t start, size_t end)
258{
259 if (start == 0 && end == STRING_LENGTH (str))
260 return str;
261 else
262 {
263 size_t len = end - start;
264 if (IS_SH_STRING (str))
265 {
266 start += STRING_START (str);
267 str = SH_STRING_STRING (str);
268 }
269 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
270 (scm_t_bits)start, (scm_t_bits) len);
271 }
272}
273
3ee86942
MV
274SCM
275scm_c_substring (SCM str, size_t start, size_t end)
276{
277 validate_substring_args (str, start, end);
278 return scm_i_substring (str, start, end);
279}
ee149d03 280
ed35de72
MV
281SCM
282scm_c_substring_read_only (SCM str, size_t start, size_t end)
283{
284 validate_substring_args (str, start, end);
285 return scm_i_substring_read_only (str, start, end);
286}
287
0f2d19dd 288SCM
3ee86942 289scm_c_substring_copy (SCM str, size_t start, size_t end)
0f2d19dd 290{
3ee86942
MV
291 validate_substring_args (str, start, end);
292 return scm_i_substring_copy (str, start, end);
293}
294
3ee86942
MV
295SCM
296scm_c_substring_shared (SCM str, size_t start, size_t end)
297{
298 validate_substring_args (str, start, end);
299 return scm_i_substring_shared (str, start, end);
300}
0f2d19dd 301
d6c74168 302\f
3ee86942
MV
303/* Internal accessors
304 */
305
306size_t
307scm_i_string_length (SCM str)
0f2d19dd 308{
3ee86942 309 return STRING_LENGTH (str);
0f2d19dd
JB
310}
311
3ee86942
MV
312const char *
313scm_i_string_chars (SCM str)
314{
315 SCM buf;
e1b29f6a
MV
316 size_t start;
317 get_str_buf_start (&str, &buf, &start);
3ee86942
MV
318 return STRINGBUF_CHARS (buf) + start;
319}
b00418df 320
3ee86942 321char *
ed35de72 322scm_i_string_writable_chars (SCM orig_str)
b00418df 323{
ed35de72 324 SCM buf, str = orig_str;
e1b29f6a 325 size_t start;
ed35de72 326
e1b29f6a 327 get_str_buf_start (&str, &buf, &start);
ed35de72
MV
328 if (IS_RO_STRING (str))
329 scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
330
9de87eea 331 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942
MV
332 if (STRINGBUF_SHARED (buf))
333 {
902578f1 334 /* Clone stringbuf. */
3ee86942
MV
335
336 size_t len = STRING_LENGTH (str);
337 SCM new_buf;
338
9de87eea 339 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942
MV
340
341 new_buf = make_stringbuf (len);
342 memcpy (STRINGBUF_CHARS (new_buf),
343 STRINGBUF_CHARS (buf) + STRING_START (str), len);
344
3ee86942 345 start -= STRING_START (str);
902578f1
LC
346
347 /* FIXME: The following operations are not atomic, so other threads
348 looking at STR may see an inconsistent state. Nevertheless it can't
349 hurt much since (i) accessing STR while it is being mutated can't
350 yield a crash, and (ii) concurrent accesses to STR should be
351 protected by a mutex at the application level. The latter may not
352 apply when STR != ORIG_STR, though. */
3ee86942 353 SET_STRING_START (str, 0);
902578f1 354 SET_STRING_STRINGBUF (str, new_buf);
3ee86942
MV
355
356 buf = new_buf;
357
9de87eea 358 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942
MV
359 }
360
361 return STRINGBUF_CHARS (buf) + start;
b00418df
DH
362}
363
3ee86942
MV
364void
365scm_i_string_stop_writing (void)
366{
9de87eea 367 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942 368}
b00418df 369
3ee86942
MV
370/* Symbols.
371
372 Basic symbol creation and accessing is done here, the rest is in
373 symbols.[hc]. This has been done to keep stringbufs and the
374 internals of strings and string-like objects confined to this file.
375*/
376
377#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
378
379SCM
6869328b
MV
380scm_i_make_symbol (SCM name, scm_t_bits flags,
381 unsigned long hash, SCM props)
3ee86942
MV
382{
383 SCM buf;
384 size_t start = STRING_START (name);
385 size_t length = STRING_LENGTH (name);
386
387 if (IS_SH_STRING (name))
388 {
389 name = SH_STRING_STRING (name);
390 start += STRING_START (name);
391 }
392 buf = SYMBOL_STRINGBUF (name);
393
394 if (start == 0 && length == STRINGBUF_LENGTH (buf))
395 {
396 /* reuse buf. */
9de87eea 397 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 398 SET_STRINGBUF_SHARED (buf);
9de87eea 399 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
3ee86942
MV
400 }
401 else
402 {
403 /* make new buf. */
404 SCM new_buf = make_stringbuf (length);
405 memcpy (STRINGBUF_CHARS (new_buf),
406 STRINGBUF_CHARS (buf) + start, length);
407 buf = new_buf;
408 }
6869328b 409 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
3ee86942
MV
410 (scm_t_bits) hash, SCM_UNPACK (props));
411}
412
fd0a5bbc
HWN
413SCM
414scm_i_c_make_symbol (const char *name, size_t len,
415 scm_t_bits flags, unsigned long hash, SCM props)
416{
417 SCM buf = make_stringbuf (len);
418 memcpy (STRINGBUF_CHARS (buf), name, len);
419
a284cc7e
LC
420 return scm_immutable_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
421 (scm_t_bits) hash, SCM_UNPACK (props));
fd0a5bbc
HWN
422}
423
424/* Return a new symbol that uses the LEN bytes pointed to by NAME as its
425 underlying storage. */
426SCM
427scm_i_c_take_symbol (char *name, size_t len,
428 scm_t_bits flags, unsigned long hash, SCM props)
429{
430 SCM buf = scm_i_take_stringbufn (name, len);
431
432 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
433 (scm_t_bits) hash, SCM_UNPACK (props));
434}
435
3ee86942
MV
436size_t
437scm_i_symbol_length (SCM sym)
0f2d19dd 438{
3ee86942 439 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
0f2d19dd
JB
440}
441
071bb6a8
LC
442size_t
443scm_c_symbol_length (SCM sym)
444#define FUNC_NAME "scm_c_symbol_length"
445{
446 SCM_VALIDATE_SYMBOL (1, sym);
447
448 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
449}
450#undef FUNC_NAME
451
3ee86942
MV
452const char *
453scm_i_symbol_chars (SCM sym)
454{
455 SCM buf = SYMBOL_STRINGBUF (sym);
456 return STRINGBUF_CHARS (buf);
457}
1cc91f1b 458
be54b15d 459SCM
3ee86942 460scm_i_symbol_substring (SCM sym, size_t start, size_t end)
be54b15d 461{
3ee86942 462 SCM buf = SYMBOL_STRINGBUF (sym);
9de87eea 463 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
3ee86942 464 SET_STRINGBUF_SHARED (buf);
9de87eea 465 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
fd2b17b9 466 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
3ee86942
MV
467 (scm_t_bits)start, (scm_t_bits) end - start);
468}
be54b15d 469
3ee86942
MV
470/* Debugging
471 */
be54b15d 472
3ee86942 473#if SCM_DEBUG
be54b15d 474
3ee86942
MV
475SCM scm_sys_string_dump (SCM);
476SCM scm_sys_symbol_dump (SCM);
477SCM scm_sys_stringbuf_hist (void);
be54b15d 478
3ee86942
MV
479SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0,
480 (SCM str),
481 "")
482#define FUNC_NAME s_scm_sys_string_dump
483{
484 SCM_VALIDATE_STRING (1, str);
485 fprintf (stderr, "%p:\n", str);
486 fprintf (stderr, " start: %u\n", STRING_START (str));
487 fprintf (stderr, " len: %u\n", STRING_LENGTH (str));
488 if (IS_SH_STRING (str))
489 {
490 fprintf (stderr, " string: %p\n", SH_STRING_STRING (str));
491 fprintf (stderr, "\n");
492 scm_sys_string_dump (SH_STRING_STRING (str));
493 }
494 else
495 {
496 SCM buf = STRING_STRINGBUF (str);
497 fprintf (stderr, " buf: %p\n", buf);
498 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
499 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
500 fprintf (stderr, " flags: %x\n", (SCM_CELL_WORD_0 (buf) & 0x300));
501 }
502 return SCM_UNSPECIFIED;
503}
504#undef FUNC_NAME
505
506SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0,
507 (SCM sym),
508 "")
509#define FUNC_NAME s_scm_sys_symbol_dump
510{
511 SCM_VALIDATE_SYMBOL (1, sym);
512 fprintf (stderr, "%p:\n", sym);
513 fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym));
514 {
515 SCM buf = SYMBOL_STRINGBUF (sym);
516 fprintf (stderr, " buf: %p\n", buf);
517 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
518 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
519 fprintf (stderr, " shared: %u\n", STRINGBUF_SHARED (buf));
520 }
521 return SCM_UNSPECIFIED;
522}
523#undef FUNC_NAME
524
525SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0,
526 (void),
527 "")
e1b29f6a 528#define FUNC_NAME s_scm_sys_stringbuf_hist
3ee86942
MV
529{
530 int i;
531 for (i = 0; i < 1000; i++)
532 if (lenhist[i])
533 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
534 fprintf (stderr, ">999: %u\n", lenhist[1000]);
535 return SCM_UNSPECIFIED;
be54b15d
DH
536}
537#undef FUNC_NAME
538
3ee86942
MV
539#endif
540
541\f
542
543SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
544 (SCM obj),
545 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
546#define FUNC_NAME s_scm_string_p
547{
548 return scm_from_bool (IS_STRING (obj));
549}
550#undef FUNC_NAME
551
552
553SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
554
555SCM_DEFINE (scm_string, "string", 0, 0, 1,
556 (SCM chrs),
557 "@deffnx {Scheme Procedure} list->string chrs\n"
558 "Return a newly allocated string composed of the arguments,\n"
559 "@var{chrs}.")
560#define FUNC_NAME s_scm_string
561{
562 SCM result;
563 size_t len;
564 char *data;
565
566 {
567 long i = scm_ilength (chrs);
568
569 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
570 len = i;
571 }
572
573 result = scm_i_make_string (len, &data);
d2e53ed6 574 while (len > 0 && scm_is_pair (chrs))
3ee86942
MV
575 {
576 SCM elt = SCM_CAR (chrs);
577
578 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
579 *data++ = SCM_CHAR (elt);
580 chrs = SCM_CDR (chrs);
581 len--;
582 }
583 if (len > 0)
584 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
d2e53ed6 585 if (!scm_is_null (chrs))
3ee86942
MV
586 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
587
588 return result;
589}
590#undef FUNC_NAME
be54b15d 591
3b3b36dd 592SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
6fa73e72 593 (SCM k, SCM chr),
0d26a824
MG
594 "Return a newly allocated string of\n"
595 "length @var{k}. If @var{chr} is given, then all elements of\n"
596 "the string are initialized to @var{chr}, otherwise the contents\n"
9401323e 597 "of the @var{string} are unspecified.")
1bbd0b84 598#define FUNC_NAME s_scm_make_string
0f2d19dd 599{
3ee86942
MV
600 return scm_c_make_string (scm_to_size_t (k), chr);
601}
602#undef FUNC_NAME
603
604SCM
605scm_c_make_string (size_t len, SCM chr)
606#define FUNC_NAME NULL
607{
608 char *dst;
609 SCM res = scm_i_make_string (len, &dst);
cb0d8be2 610
e11e83f3
MV
611 if (!SCM_UNBNDP (chr))
612 {
3ee86942
MV
613 SCM_VALIDATE_CHAR (0, chr);
614 memset (dst, SCM_CHAR (chr), len);
0f2d19dd 615 }
e11e83f3
MV
616
617 return res;
0f2d19dd 618}
1bbd0b84 619#undef FUNC_NAME
0f2d19dd 620
3b3b36dd 621SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
0d26a824
MG
622 (SCM string),
623 "Return the number of characters in @var{string}.")
1bbd0b84 624#define FUNC_NAME s_scm_string_length
0f2d19dd 625{
d1ca2c64 626 SCM_VALIDATE_STRING (1, string);
3ee86942 627 return scm_from_size_t (STRING_LENGTH (string));
0f2d19dd 628}
1bbd0b84 629#undef FUNC_NAME
0f2d19dd 630
3ee86942
MV
631size_t
632scm_c_string_length (SCM string)
633{
634 if (!IS_STRING (string))
635 scm_wrong_type_arg_msg (NULL, 0, string, "string");
636 return STRING_LENGTH (string);
637}
638
bd9e24b3 639SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
6fa73e72 640 (SCM str, SCM k),
0d26a824
MG
641 "Return character @var{k} of @var{str} using zero-origin\n"
642 "indexing. @var{k} must be a valid index of @var{str}.")
1bbd0b84 643#define FUNC_NAME s_scm_string_ref
0f2d19dd 644{
3ae3166b 645 size_t len;
a55c2b68 646 unsigned long idx;
bd9e24b3 647
d1ca2c64 648 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
649
650 len = scm_i_string_length (str);
651 if (SCM_LIKELY (len > 0))
652 idx = scm_to_unsigned_integer (k, 0, len - 1);
653 else
654 scm_out_of_range (NULL, k);
655
3ee86942 656 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
0f2d19dd 657}
1bbd0b84 658#undef FUNC_NAME
0f2d19dd 659
3ee86942
MV
660SCM
661scm_c_string_ref (SCM str, size_t p)
662{
663 if (p >= scm_i_string_length (str))
664 scm_out_of_range (NULL, scm_from_size_t (p));
665 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
666}
f0942910 667
3b3b36dd 668SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
6fa73e72 669 (SCM str, SCM k, SCM chr),
0d26a824
MG
670 "Store @var{chr} in element @var{k} of @var{str} and return\n"
671 "an unspecified value. @var{k} must be a valid index of\n"
672 "@var{str}.")
1bbd0b84 673#define FUNC_NAME s_scm_string_set_x
0f2d19dd 674{
3ae3166b 675 size_t len;
a55c2b68
MV
676 unsigned long idx;
677
f0942910 678 SCM_VALIDATE_STRING (1, str);
3ae3166b
LC
679
680 len = scm_i_string_length (str);
681 if (SCM_LIKELY (len > 0))
682 idx = scm_to_unsigned_integer (k, 0, len - 1);
683 else
684 scm_out_of_range (NULL, k);
685
34d19ef6 686 SCM_VALIDATE_CHAR (3, chr);
3ee86942
MV
687 {
688 char *dst = scm_i_string_writable_chars (str);
689 dst[idx] = SCM_CHAR (chr);
690 scm_i_string_stop_writing ();
691 }
0f2d19dd
JB
692 return SCM_UNSPECIFIED;
693}
1bbd0b84 694#undef FUNC_NAME
0f2d19dd 695
3ee86942
MV
696void
697scm_c_string_set_x (SCM str, size_t p, SCM chr)
698{
699 if (p >= scm_i_string_length (str))
700 scm_out_of_range (NULL, scm_from_size_t (p));
701 {
702 char *dst = scm_i_string_writable_chars (str);
703 dst[p] = SCM_CHAR (chr);
704 scm_i_string_stop_writing ();
705 }
706}
0f2d19dd 707
3b3b36dd 708SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
0d26a824
MG
709 (SCM str, SCM start, SCM end),
710 "Return a newly allocated string formed from the characters\n"
711 "of @var{str} beginning with index @var{start} (inclusive) and\n"
712 "ending with index @var{end} (exclusive).\n"
713 "@var{str} must be a string, @var{start} and @var{end} must be\n"
714 "exact integers satisfying:\n\n"
715 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1bbd0b84 716#define FUNC_NAME s_scm_substring
0f2d19dd 717{
3ee86942 718 size_t len, from, to;
685c0d71 719
d1ca2c64 720 SCM_VALIDATE_STRING (1, str);
3ee86942
MV
721 len = scm_i_string_length (str);
722 from = scm_to_unsigned_integer (start, 0, len);
a55c2b68 723 if (SCM_UNBNDP (end))
3ee86942 724 to = len;
a55c2b68 725 else
3ee86942
MV
726 to = scm_to_unsigned_integer (end, from, len);
727 return scm_i_substring (str, from, to);
0f2d19dd 728}
1bbd0b84 729#undef FUNC_NAME
0f2d19dd 730
ed35de72
MV
731SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
732 (SCM str, SCM start, SCM end),
733 "Return a newly allocated string formed from the characters\n"
734 "of @var{str} beginning with index @var{start} (inclusive) and\n"
735 "ending with index @var{end} (exclusive).\n"
736 "@var{str} must be a string, @var{start} and @var{end} must be\n"
737 "exact integers satisfying:\n"
738 "\n"
739 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
740 "\n"
741 "The returned string is read-only.\n")
742#define FUNC_NAME s_scm_substring_read_only
743{
744 size_t len, from, to;
745
746 SCM_VALIDATE_STRING (1, str);
747 len = scm_i_string_length (str);
748 from = scm_to_unsigned_integer (start, 0, len);
749 if (SCM_UNBNDP (end))
750 to = len;
751 else
752 to = scm_to_unsigned_integer (end, from, len);
753 return scm_i_substring_read_only (str, from, to);
754}
755#undef FUNC_NAME
756
3ee86942
MV
757SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
758 (SCM str, SCM start, SCM end),
759 "Return a newly allocated string formed from the characters\n"
760 "of @var{str} beginning with index @var{start} (inclusive) and\n"
761 "ending with index @var{end} (exclusive).\n"
762 "@var{str} must be a string, @var{start} and @var{end} must be\n"
763 "exact integers satisfying:\n\n"
764 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
765#define FUNC_NAME s_scm_substring_copy
766{
e1b29f6a
MV
767 /* For the Scheme version, START is mandatory, but for the C
768 version, it is optional. See scm_string_copy in srfi-13.c for a
769 rationale.
770 */
771
772 size_t from, to;
3ee86942
MV
773
774 SCM_VALIDATE_STRING (1, str);
e1b29f6a
MV
775 scm_i_get_substring_spec (scm_i_string_length (str),
776 start, &from, end, &to);
3ee86942
MV
777 return scm_i_substring_copy (str, from, to);
778}
779#undef FUNC_NAME
780
781SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
782 (SCM str, SCM start, SCM end),
783 "Return string that indirectly refers to the characters\n"
784 "of @var{str} beginning with index @var{start} (inclusive) and\n"
785 "ending with index @var{end} (exclusive).\n"
786 "@var{str} must be a string, @var{start} and @var{end} must be\n"
787 "exact integers satisfying:\n\n"
788 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
789#define FUNC_NAME s_scm_substring_shared
790{
791 size_t len, from, to;
792
793 SCM_VALIDATE_STRING (1, str);
794 len = scm_i_string_length (str);
795 from = scm_to_unsigned_integer (start, 0, len);
796 if (SCM_UNBNDP (end))
797 to = len;
798 else
799 to = scm_to_unsigned_integer (end, from, len);
800 return scm_i_substring_shared (str, from, to);
801}
802#undef FUNC_NAME
685c0d71 803
3b3b36dd 804SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
6fa73e72 805 (SCM args),
0d26a824
MG
806 "Return a newly allocated string whose characters form the\n"
807 "concatenation of the given strings, @var{args}.")
1bbd0b84 808#define FUNC_NAME s_scm_string_append
0f2d19dd
JB
809{
810 SCM res;
1be6b49c 811 size_t i = 0;
c829a427
MV
812 SCM l, s;
813 char *data;
af45e3b0
DH
814
815 SCM_VALIDATE_REST_ARGUMENT (args);
d2e53ed6 816 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427
MV
817 {
818 s = SCM_CAR (l);
819 SCM_VALIDATE_STRING (SCM_ARGn, s);
3ee86942 820 i += scm_i_string_length (s);
c829a427 821 }
3ee86942 822 res = scm_i_make_string (i, &data);
d2e53ed6 823 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427 824 {
edea856c 825 size_t len;
c829a427 826 s = SCM_CAR (l);
3ee86942 827 SCM_VALIDATE_STRING (SCM_ARGn, s);
edea856c 828 len = scm_i_string_length (s);
3ee86942
MV
829 memcpy (data, scm_i_string_chars (s), len);
830 data += len;
c829a427
MV
831 scm_remember_upto_here_1 (s);
832 }
0f2d19dd
JB
833 return res;
834}
1bbd0b84 835#undef FUNC_NAME
0f2d19dd 836
c829a427
MV
837int
838scm_is_string (SCM obj)
839{
3ee86942 840 return IS_STRING (obj);
c829a427 841}
24933780 842
c829a427
MV
843SCM
844scm_from_locale_stringn (const char *str, size_t len)
845{
846 SCM res;
847 char *dst;
4d4528e7 848
c829a427
MV
849 if (len == (size_t)-1)
850 len = strlen (str);
3ee86942 851 res = scm_i_make_string (len, &dst);
c829a427
MV
852 memcpy (dst, str, len);
853 return res;
854}
4d4528e7 855
c829a427
MV
856SCM
857scm_from_locale_string (const char *str)
4d4528e7 858{
c829a427
MV
859 return scm_from_locale_stringn (str, -1);
860}
4d4528e7 861
c829a427
MV
862SCM
863scm_take_locale_stringn (char *str, size_t len)
864{
48ddf0d9
KR
865 SCM buf, res;
866
c829a427 867 if (len == (size_t)-1)
48ddf0d9 868 len = strlen (str);
c829a427
MV
869 else
870 {
48ddf0d9
KR
871 /* Ensure STR is null terminated. A realloc for 1 extra byte should
872 often be satisfied from the alignment padding after the block, with
873 no actual data movement. */
874 str = scm_realloc (str, len+1);
875 str[len] = '\0';
c829a427 876 }
c829a427 877
fd0a5bbc 878 buf = scm_i_take_stringbufn (str, len);
3ee86942 879 res = scm_double_cell (STRING_TAG,
48ddf0d9
KR
880 SCM_UNPACK (buf),
881 (scm_t_bits) 0, (scm_t_bits) len);
c829a427
MV
882 return res;
883}
884
48ddf0d9
KR
885SCM
886scm_take_locale_string (char *str)
887{
888 return scm_take_locale_stringn (str, -1);
889}
890
c829a427
MV
891char *
892scm_to_locale_stringn (SCM str, size_t *lenp)
893{
894 char *res;
895 size_t len;
4d4528e7 896
3ee86942 897 if (!scm_is_string (str))
c829a427 898 scm_wrong_type_arg_msg (NULL, 0, str, "string");
3ee86942 899 len = scm_i_string_length (str);
c829a427 900 res = scm_malloc (len + ((lenp==NULL)? 1 : 0));
3ee86942 901 memcpy (res, scm_i_string_chars (str), len);
c829a427
MV
902 if (lenp == NULL)
903 {
904 res[len] = '\0';
905 if (strlen (res) != len)
906 {
907 free (res);
908 scm_misc_error (NULL,
909 "string contains #\\nul character: ~S",
910 scm_list_1 (str));
911 }
912 }
913 else
4d4528e7 914 *lenp = len;
24933780 915
c829a427
MV
916 scm_remember_upto_here_1 (str);
917 return res;
4d4528e7 918}
af68e5e5 919
c829a427
MV
920char *
921scm_to_locale_string (SCM str)
922{
923 return scm_to_locale_stringn (str, NULL);
924}
af68e5e5 925
c829a427
MV
926size_t
927scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
928{
929 size_t len;
930
3ee86942 931 if (!scm_is_string (str))
c829a427 932 scm_wrong_type_arg_msg (NULL, 0, str, "string");
3ee86942
MV
933 len = scm_i_string_length (str);
934 memcpy (buf, scm_i_string_chars (str), (len > max_len)? max_len : len);
c829a427
MV
935 scm_remember_upto_here_1 (str);
936 return len;
937}
af68e5e5 938
3ee86942
MV
939/* converts C scm_array of strings to SCM scm_list of strings. */
940/* If argc < 0, a null terminated scm_array is assumed. */
941SCM
942scm_makfromstrs (int argc, char **argv)
943{
944 int i = argc;
945 SCM lst = SCM_EOL;
946 if (0 > i)
947 for (i = 0; argv[i]; i++);
948 while (i--)
949 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
950 return lst;
951}
952
c829a427
MV
953/* Return a newly allocated array of char pointers to each of the strings
954 in args, with a terminating NULL pointer. */
955
956char **
957scm_i_allocate_string_pointers (SCM list)
2a776823 958#define FUNC_NAME "scm_i_allocate_string_pointers"
af68e5e5 959{
c829a427
MV
960 char **result;
961 int len = scm_ilength (list);
962 int i;
963
964 if (len < 0)
965 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
966
2a776823
LC
967 result = scm_gc_malloc ((len + 1) * sizeof (char *),
968 "string pointers");
c829a427 969 result[len] = NULL;
c829a427
MV
970
971 /* The list might be have been modified in another thread, so
972 we check LIST before each access.
973 */
d2e53ed6 974 for (i = 0; i < len && scm_is_pair (list); i++)
c829a427 975 {
2a776823
LC
976 SCM str;
977 size_t len;
978
979 str = SCM_CAR (list);
980 len = scm_c_string_length (str);
981
982 result[i] = scm_gc_malloc_pointerless (len + 1, "string pointers");
983 memcpy (result[i], scm_i_string_chars (str), len);
984 result[i][len] = '\0';
985
c829a427
MV
986 list = SCM_CDR (list);
987 }
988
c829a427 989 return result;
af68e5e5 990}
2a776823 991#undef FUNC_NAME
24933780 992
6f14f578
MV
993void
994scm_i_get_substring_spec (size_t len,
995 SCM start, size_t *cstart,
996 SCM end, size_t *cend)
997{
998 if (SCM_UNBNDP (start))
999 *cstart = 0;
1000 else
1001 *cstart = scm_to_unsigned_integer (start, 0, len);
1002
1003 if (SCM_UNBNDP (end))
1004 *cend = len;
1005 else
1006 *cend = scm_to_unsigned_integer (end, *cstart, len);
1007}
1008
3ee86942
MV
1009#if SCM_ENABLE_DEPRECATED
1010
556d75db
MV
1011/* When these definitions are removed, it becomes reasonable to use
1012 read-only strings for string literals. For that, change the reader
1013 to create string literals with scm_c_substring_read_only instead of
1014 with scm_c_substring_copy.
1015*/
1016
3ee86942 1017int
fe78c51a 1018scm_i_deprecated_stringp (SCM str)
3ee86942
MV
1019{
1020 scm_c_issue_deprecation_warning
1021 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1022
2616f0e0 1023 return scm_is_string (str);
3ee86942
MV
1024}
1025
1026char *
fe78c51a 1027scm_i_deprecated_string_chars (SCM str)
3ee86942
MV
1028{
1029 char *chars;
1030
1031 scm_c_issue_deprecation_warning
1032 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1033
2616f0e0
MV
1034 /* We don't accept shared substrings here since they are not
1035 null-terminated.
1036 */
1037 if (IS_SH_STRING (str))
1038 scm_misc_error (NULL,
1039 "SCM_STRING_CHARS does not work with shared substrings.",
1040 SCM_EOL);
1041
556d75db
MV
1042 /* We explicitely test for read-only strings to produce a better
1043 error message.
1044 */
1045
1046 if (IS_RO_STRING (str))
1047 scm_misc_error (NULL,
1048 "SCM_STRING_CHARS does not work with read-only strings.",
1049 SCM_EOL);
1050
2616f0e0 1051 /* The following is still wrong, of course...
3ee86942
MV
1052 */
1053 chars = scm_i_string_writable_chars (str);
1054 scm_i_string_stop_writing ();
1055 return chars;
1056}
1057
1058size_t
fe78c51a 1059scm_i_deprecated_string_length (SCM str)
3ee86942
MV
1060{
1061 scm_c_issue_deprecation_warning
1062 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1063 return scm_c_string_length (str);
1064}
1065
1066#endif
1067
0f2d19dd
JB
1068void
1069scm_init_strings ()
0f2d19dd 1070{
3ee86942 1071 scm_nullstr = scm_i_make_string (0, NULL);
7c33806a 1072
a0599745 1073#include "libguile/strings.x"
0f2d19dd
JB
1074}
1075
89e00824
ML
1076
1077/*
1078 Local Variables:
1079 c-file-style: "gnu"
1080 End:
1081*/