(scm_frame_current_module): New.
[bpt/guile.git] / libguile / strings.c
CommitLineData
3ee86942 1/* Copyright (C) 1995,1996,1998,2000,2001, 2004 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e
MV
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation; either
6 * version 2.1 of the License, or (at your option) any later version.
0f2d19dd 7 *
73be1d9e
MV
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
1bbd0b84 17
1bbd0b84 18
0f2d19dd
JB
19\f
20
faf2c9d7 21#include <string.h>
3ee86942 22#include <stdio.h>
faf2c9d7 23
a0599745
MD
24#include "libguile/_scm.h"
25#include "libguile/chars.h"
7c33806a 26#include "libguile/root.h"
a0599745 27#include "libguile/strings.h"
1afff620 28#include "libguile/deprecation.h"
a0599745 29#include "libguile/validate.h"
c829a427 30#include "libguile/dynwind.h"
1afff620 31
0f2d19dd
JB
32\f
33
34/* {Strings}
35 */
36
3ee86942
MV
37
38/* Stringbufs
39 *
40 * XXX - keeping an accurate refcount during GC seems to be quite
41 * tricky, so we just keep score of whether a stringbuf might be
42 * shared, not wether it definitely is.
43 *
44 * The scheme I (mvo) tried to keep an accurate reference count would
45 * recount all strings that point to a stringbuf during the mark-phase
46 * of the GC. This was done since one cannot access the stringbuf of
47 * a string when that string is freed (in order to decrease the
48 * reference count). The memory of the stringbuf might have been
49 * reused already for something completely different.
50 *
51 * This recounted worked for a small number of threads beating on
52 * cow-strings, but it failed randomly with more than 10 threads, say.
53 * I couldn't figure out what went wrong, so I used the conservative
54 * approach implemented below.
55 *
56 * A stringbuf needs to know its length, but only so that it can be
57 * reported when the stringbuf is freed.
58 *
59 * Stringbufs (and strings) are not stored very compactly: a stringbuf
60 * has room for about 2*sizeof(scm_t_bits)-1 bytes additional
61 * information. As a compensation, the code below is made more
62 * complicated by storing small strings inline in the double cell of a
63 * stringbuf. So we have fixstrings and bigstrings...
64 */
65
66#define STRINGBUF_F_SHARED 0x100
67#define STRINGBUF_F_INLINE 0x200
68
69#define STRINGBUF_TAG scm_tc7_stringbuf
70#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
71#define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
72
73#define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
74#define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
75#define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
76#define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
77
78#define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
79 ? STRINGBUF_INLINE_CHARS (buf) \
80 : STRINGBUF_OUTLINE_CHARS (buf))
81#define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
82 ? STRINGBUF_INLINE_LENGTH (buf) \
83 : STRINGBUF_OUTLINE_LENGTH (buf))
84
85#define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
86
87#define SET_STRINGBUF_SHARED(buf) \
88 (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
89
90#if SCM_DEBUG
91static size_t lenhist[1001];
92#endif
93
94static SCM
95make_stringbuf (size_t len)
0f2d19dd 96{
3ee86942
MV
97 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
98 scm_i_symbol_chars, all stringbufs are null-terminated. Once
99 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
100 has been changed for scm_i_symbol_chars, this null-termination
101 can be dropped.
102 */
103
104#if SCM_DEBUG
105 if (len < 1000)
106 lenhist[len]++;
107 else
108 lenhist[1000]++;
109#endif
0f2d19dd 110
3ee86942
MV
111 if (len <= STRINGBUF_MAX_INLINE_LEN-1)
112 {
113 return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_INLINE | (len << 16),
114 0, 0, 0);
115 }
116 else
117 {
118 char *mem = scm_gc_malloc (len+1, "string");
119 mem[len] = '\0';
120 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) mem,
121 (scm_t_bits) len, (scm_t_bits) 0);
122 }
123}
e53cc817 124
3ee86942
MV
125SCM
126scm_i_stringbuf_mark (SCM buf)
127{
128 return SCM_BOOL_F;
129}
1bbd0b84 130
3ee86942
MV
131void
132scm_i_stringbuf_free (SCM buf)
0f2d19dd 133{
3ee86942
MV
134 if (!STRINGBUF_INLINE (buf))
135 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf),
136 STRINGBUF_OUTLINE_LENGTH (buf) + 1, "string");
137}
bd9e24b3 138
76da80e7 139SCM_MUTEX (stringbuf_write_mutex);
bd9e24b3 140
3ee86942
MV
141/* Copy-on-write strings.
142 */
bd9e24b3 143
3ee86942 144#define STRING_TAG scm_tc7_string
bd9e24b3 145
3ee86942
MV
146#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
147#define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
148#define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
bd9e24b3 149
3ee86942
MV
150#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
151#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
152
153#define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
154
ed35de72
MV
155/* Read-only strings.
156 */
157
158#define RO_STRING_TAG (scm_tc7_string + 0x200)
159#define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
160
e1b29f6a
MV
161/* Mutation-sharing substrings
162 */
163
164#define SH_STRING_TAG (scm_tc7_string + 0x100)
165
166#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
167/* START and LENGTH as for STRINGs. */
168
169#define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
170
3ee86942
MV
171SCM
172scm_i_make_string (size_t len, char **charsp)
173{
174 SCM buf = make_stringbuf (len);
175 SCM res;
176 if (charsp)
177 *charsp = STRINGBUF_CHARS (buf);
178 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
179 (scm_t_bits)0, (scm_t_bits) len);
180 return res;
0f2d19dd
JB
181}
182
3ee86942
MV
183static void
184validate_substring_args (SCM str, size_t start, size_t end)
185{
186 if (!IS_STRING (str))
187 scm_wrong_type_arg_msg (NULL, 0, str, "string");
188 if (start > STRING_LENGTH (str))
189 scm_out_of_range (NULL, scm_from_size_t (start));
190 if (end > STRING_LENGTH (str) || end < start)
191 scm_out_of_range (NULL, scm_from_size_t (end));
192}
0f2d19dd 193
e1b29f6a
MV
194static inline void
195get_str_buf_start (SCM *str, SCM *buf, size_t *start)
196{
197 *start = STRING_START (*str);
198 if (IS_SH_STRING (*str))
199 {
200 *str = SH_STRING_STRING (*str);
201 *start += STRING_START (*str);
202 }
203 *buf = STRING_STRINGBUF (*str);
204}
205
3ee86942
MV
206SCM
207scm_i_substring (SCM str, size_t start, size_t end)
0f2d19dd 208{
e1b29f6a
MV
209 SCM buf;
210 size_t str_start;
211 get_str_buf_start (&str, &buf, &str_start);
76da80e7 212 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
3ee86942 213 SET_STRINGBUF_SHARED (buf);
76da80e7 214 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
3ee86942 215 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
e1b29f6a
MV
216 (scm_t_bits)str_start + start,
217 (scm_t_bits) end - start);
0f2d19dd
JB
218}
219
ed35de72
MV
220SCM
221scm_i_substring_read_only (SCM str, size_t start, size_t end)
222{
223 SCM buf;
224 size_t str_start;
225 get_str_buf_start (&str, &buf, &str_start);
76da80e7 226 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
ed35de72 227 SET_STRINGBUF_SHARED (buf);
76da80e7 228 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
ed35de72
MV
229 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
230 (scm_t_bits)str_start + start,
231 (scm_t_bits) end - start);
232}
233
3ee86942
MV
234SCM
235scm_i_substring_copy (SCM str, size_t start, size_t end)
236{
237 size_t len = end - start;
edea856c 238 SCM buf, my_buf;
e1b29f6a
MV
239 size_t str_start;
240 get_str_buf_start (&str, &buf, &str_start);
edea856c 241 my_buf = make_stringbuf (len);
e1b29f6a
MV
242 memcpy (STRINGBUF_CHARS (my_buf),
243 STRINGBUF_CHARS (buf) + str_start + start, len);
3ee86942
MV
244 scm_remember_upto_here_1 (buf);
245 return scm_double_cell (STRING_TAG, SCM_UNPACK(my_buf),
246 (scm_t_bits)0, (scm_t_bits) len);
247}
0f2d19dd 248
e1b29f6a
MV
249SCM
250scm_i_substring_shared (SCM str, size_t start, size_t end)
251{
252 if (start == 0 && end == STRING_LENGTH (str))
253 return str;
254 else
255 {
256 size_t len = end - start;
257 if (IS_SH_STRING (str))
258 {
259 start += STRING_START (str);
260 str = SH_STRING_STRING (str);
261 }
262 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
263 (scm_t_bits)start, (scm_t_bits) len);
264 }
265}
266
3ee86942
MV
267SCM
268scm_c_substring (SCM str, size_t start, size_t end)
269{
270 validate_substring_args (str, start, end);
271 return scm_i_substring (str, start, end);
272}
ee149d03 273
ed35de72
MV
274SCM
275scm_c_substring_read_only (SCM str, size_t start, size_t end)
276{
277 validate_substring_args (str, start, end);
278 return scm_i_substring_read_only (str, start, end);
279}
280
0f2d19dd 281SCM
3ee86942 282scm_c_substring_copy (SCM str, size_t start, size_t end)
0f2d19dd 283{
3ee86942
MV
284 validate_substring_args (str, start, end);
285 return scm_i_substring_copy (str, start, end);
286}
287
3ee86942
MV
288SCM
289scm_c_substring_shared (SCM str, size_t start, size_t end)
290{
291 validate_substring_args (str, start, end);
292 return scm_i_substring_shared (str, start, end);
293}
0f2d19dd 294
ee149d03 295SCM
3ee86942 296scm_i_string_mark (SCM str)
ee149d03 297{
3ee86942
MV
298 if (IS_SH_STRING (str))
299 return SH_STRING_STRING (str);
300 else
301 return STRING_STRINGBUF (str);
ee149d03
JB
302}
303
3ee86942
MV
304void
305scm_i_string_free (SCM str)
306{
307}
36284627 308
3ee86942
MV
309/* Internal accessors
310 */
311
312size_t
313scm_i_string_length (SCM str)
0f2d19dd 314{
3ee86942 315 return STRING_LENGTH (str);
0f2d19dd
JB
316}
317
3ee86942
MV
318const char *
319scm_i_string_chars (SCM str)
320{
321 SCM buf;
e1b29f6a
MV
322 size_t start;
323 get_str_buf_start (&str, &buf, &start);
3ee86942
MV
324 return STRINGBUF_CHARS (buf) + start;
325}
b00418df 326
3ee86942 327char *
ed35de72 328scm_i_string_writable_chars (SCM orig_str)
b00418df 329{
ed35de72 330 SCM buf, str = orig_str;
e1b29f6a 331 size_t start;
ed35de72 332
e1b29f6a 333 get_str_buf_start (&str, &buf, &start);
ed35de72
MV
334 if (IS_RO_STRING (str))
335 scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
336
76da80e7 337 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
3ee86942
MV
338 if (STRINGBUF_SHARED (buf))
339 {
340 /* Clone stringbuf. For this, we put all threads to sleep.
341 */
342
343 size_t len = STRING_LENGTH (str);
344 SCM new_buf;
345
76da80e7 346 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
3ee86942
MV
347
348 new_buf = make_stringbuf (len);
349 memcpy (STRINGBUF_CHARS (new_buf),
350 STRINGBUF_CHARS (buf) + STRING_START (str), len);
351
352 scm_i_thread_put_to_sleep ();
353 SET_STRING_STRINGBUF (str, new_buf);
354 start -= STRING_START (str);
355 SET_STRING_START (str, 0);
356 scm_i_thread_wake_up ();
357
358 buf = new_buf;
359
76da80e7 360 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
3ee86942
MV
361 }
362
363 return STRINGBUF_CHARS (buf) + start;
b00418df
DH
364}
365
3ee86942
MV
366void
367scm_i_string_stop_writing (void)
368{
76da80e7 369 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
3ee86942 370}
b00418df 371
3ee86942
MV
372/* Symbols.
373
374 Basic symbol creation and accessing is done here, the rest is in
375 symbols.[hc]. This has been done to keep stringbufs and the
376 internals of strings and string-like objects confined to this file.
377*/
378
379#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
380
381SCM
6869328b
MV
382scm_i_make_symbol (SCM name, scm_t_bits flags,
383 unsigned long hash, SCM props)
3ee86942
MV
384{
385 SCM buf;
386 size_t start = STRING_START (name);
387 size_t length = STRING_LENGTH (name);
388
389 if (IS_SH_STRING (name))
390 {
391 name = SH_STRING_STRING (name);
392 start += STRING_START (name);
393 }
394 buf = SYMBOL_STRINGBUF (name);
395
396 if (start == 0 && length == STRINGBUF_LENGTH (buf))
397 {
398 /* reuse buf. */
76da80e7 399 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
3ee86942 400 SET_STRINGBUF_SHARED (buf);
76da80e7 401 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
3ee86942
MV
402 }
403 else
404 {
405 /* make new buf. */
406 SCM new_buf = make_stringbuf (length);
407 memcpy (STRINGBUF_CHARS (new_buf),
408 STRINGBUF_CHARS (buf) + start, length);
409 buf = new_buf;
410 }
6869328b 411 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
3ee86942
MV
412 (scm_t_bits) hash, SCM_UNPACK (props));
413}
414
415size_t
416scm_i_symbol_length (SCM sym)
0f2d19dd 417{
3ee86942 418 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
0f2d19dd
JB
419}
420
3ee86942
MV
421const char *
422scm_i_symbol_chars (SCM sym)
423{
424 SCM buf = SYMBOL_STRINGBUF (sym);
425 return STRINGBUF_CHARS (buf);
426}
1cc91f1b 427
3ee86942
MV
428SCM
429scm_i_symbol_mark (SCM sym)
0f2d19dd 430{
3ee86942
MV
431 scm_gc_mark (SYMBOL_STRINGBUF (sym));
432 return SCM_CELL_OBJECT_3 (sym);
0f2d19dd
JB
433}
434
3ee86942
MV
435void
436scm_i_symbol_free (SCM sym)
437{
438}
0f2d19dd 439
be54b15d 440SCM
3ee86942 441scm_i_symbol_substring (SCM sym, size_t start, size_t end)
be54b15d 442{
3ee86942 443 SCM buf = SYMBOL_STRINGBUF (sym);
76da80e7 444 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
3ee86942 445 SET_STRINGBUF_SHARED (buf);
76da80e7 446 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
3ee86942
MV
447 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
448 (scm_t_bits)start, (scm_t_bits) end - start);
449}
be54b15d 450
3ee86942
MV
451/* Debugging
452 */
be54b15d 453
3ee86942 454#if SCM_DEBUG
be54b15d 455
3ee86942
MV
456SCM scm_sys_string_dump (SCM);
457SCM scm_sys_symbol_dump (SCM);
458SCM scm_sys_stringbuf_hist (void);
be54b15d 459
3ee86942
MV
460SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0,
461 (SCM str),
462 "")
463#define FUNC_NAME s_scm_sys_string_dump
464{
465 SCM_VALIDATE_STRING (1, str);
466 fprintf (stderr, "%p:\n", str);
467 fprintf (stderr, " start: %u\n", STRING_START (str));
468 fprintf (stderr, " len: %u\n", STRING_LENGTH (str));
469 if (IS_SH_STRING (str))
470 {
471 fprintf (stderr, " string: %p\n", SH_STRING_STRING (str));
472 fprintf (stderr, "\n");
473 scm_sys_string_dump (SH_STRING_STRING (str));
474 }
475 else
476 {
477 SCM buf = STRING_STRINGBUF (str);
478 fprintf (stderr, " buf: %p\n", buf);
479 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
480 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
481 fprintf (stderr, " flags: %x\n", (SCM_CELL_WORD_0 (buf) & 0x300));
482 }
483 return SCM_UNSPECIFIED;
484}
485#undef FUNC_NAME
486
487SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0,
488 (SCM sym),
489 "")
490#define FUNC_NAME s_scm_sys_symbol_dump
491{
492 SCM_VALIDATE_SYMBOL (1, sym);
493 fprintf (stderr, "%p:\n", sym);
494 fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym));
495 {
496 SCM buf = SYMBOL_STRINGBUF (sym);
497 fprintf (stderr, " buf: %p\n", buf);
498 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
499 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
500 fprintf (stderr, " shared: %u\n", STRINGBUF_SHARED (buf));
501 }
502 return SCM_UNSPECIFIED;
503}
504#undef FUNC_NAME
505
506SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0,
507 (void),
508 "")
e1b29f6a 509#define FUNC_NAME s_scm_sys_stringbuf_hist
3ee86942
MV
510{
511 int i;
512 for (i = 0; i < 1000; i++)
513 if (lenhist[i])
514 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
515 fprintf (stderr, ">999: %u\n", lenhist[1000]);
516 return SCM_UNSPECIFIED;
be54b15d
DH
517}
518#undef FUNC_NAME
519
3ee86942
MV
520#endif
521
522\f
523
524SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
525 (SCM obj),
526 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
527#define FUNC_NAME s_scm_string_p
528{
529 return scm_from_bool (IS_STRING (obj));
530}
531#undef FUNC_NAME
532
533
534SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
535
536SCM_DEFINE (scm_string, "string", 0, 0, 1,
537 (SCM chrs),
538 "@deffnx {Scheme Procedure} list->string chrs\n"
539 "Return a newly allocated string composed of the arguments,\n"
540 "@var{chrs}.")
541#define FUNC_NAME s_scm_string
542{
543 SCM result;
544 size_t len;
545 char *data;
546
547 {
548 long i = scm_ilength (chrs);
549
550 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
551 len = i;
552 }
553
554 result = scm_i_make_string (len, &data);
d2e53ed6 555 while (len > 0 && scm_is_pair (chrs))
3ee86942
MV
556 {
557 SCM elt = SCM_CAR (chrs);
558
559 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
560 *data++ = SCM_CHAR (elt);
561 chrs = SCM_CDR (chrs);
562 len--;
563 }
564 if (len > 0)
565 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
d2e53ed6 566 if (!scm_is_null (chrs))
3ee86942
MV
567 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
568
569 return result;
570}
571#undef FUNC_NAME
be54b15d 572
3b3b36dd 573SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
6fa73e72 574 (SCM k, SCM chr),
0d26a824
MG
575 "Return a newly allocated string of\n"
576 "length @var{k}. If @var{chr} is given, then all elements of\n"
577 "the string are initialized to @var{chr}, otherwise the contents\n"
9401323e 578 "of the @var{string} are unspecified.")
1bbd0b84 579#define FUNC_NAME s_scm_make_string
0f2d19dd 580{
3ee86942
MV
581 return scm_c_make_string (scm_to_size_t (k), chr);
582}
583#undef FUNC_NAME
584
585SCM
586scm_c_make_string (size_t len, SCM chr)
587#define FUNC_NAME NULL
588{
589 char *dst;
590 SCM res = scm_i_make_string (len, &dst);
cb0d8be2 591
e11e83f3
MV
592 if (!SCM_UNBNDP (chr))
593 {
3ee86942
MV
594 SCM_VALIDATE_CHAR (0, chr);
595 memset (dst, SCM_CHAR (chr), len);
0f2d19dd 596 }
e11e83f3
MV
597
598 return res;
0f2d19dd 599}
1bbd0b84 600#undef FUNC_NAME
0f2d19dd 601
3b3b36dd 602SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
0d26a824
MG
603 (SCM string),
604 "Return the number of characters in @var{string}.")
1bbd0b84 605#define FUNC_NAME s_scm_string_length
0f2d19dd 606{
d1ca2c64 607 SCM_VALIDATE_STRING (1, string);
3ee86942 608 return scm_from_size_t (STRING_LENGTH (string));
0f2d19dd 609}
1bbd0b84 610#undef FUNC_NAME
0f2d19dd 611
3ee86942
MV
612size_t
613scm_c_string_length (SCM string)
614{
615 if (!IS_STRING (string))
616 scm_wrong_type_arg_msg (NULL, 0, string, "string");
617 return STRING_LENGTH (string);
618}
619
bd9e24b3 620SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
6fa73e72 621 (SCM str, SCM k),
0d26a824
MG
622 "Return character @var{k} of @var{str} using zero-origin\n"
623 "indexing. @var{k} must be a valid index of @var{str}.")
1bbd0b84 624#define FUNC_NAME s_scm_string_ref
0f2d19dd 625{
a55c2b68 626 unsigned long idx;
bd9e24b3 627
d1ca2c64 628 SCM_VALIDATE_STRING (1, str);
3ee86942
MV
629 idx = scm_to_unsigned_integer (k, 0, scm_i_string_length (str)-1);
630 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
0f2d19dd 631}
1bbd0b84 632#undef FUNC_NAME
0f2d19dd 633
3ee86942
MV
634SCM
635scm_c_string_ref (SCM str, size_t p)
636{
637 if (p >= scm_i_string_length (str))
638 scm_out_of_range (NULL, scm_from_size_t (p));
639 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
640}
f0942910 641
3b3b36dd 642SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
6fa73e72 643 (SCM str, SCM k, SCM chr),
0d26a824
MG
644 "Store @var{chr} in element @var{k} of @var{str} and return\n"
645 "an unspecified value. @var{k} must be a valid index of\n"
646 "@var{str}.")
1bbd0b84 647#define FUNC_NAME s_scm_string_set_x
0f2d19dd 648{
a55c2b68
MV
649 unsigned long idx;
650
f0942910 651 SCM_VALIDATE_STRING (1, str);
3ee86942 652 idx = scm_to_unsigned_integer (k, 0, scm_i_string_length(str)-1);
34d19ef6 653 SCM_VALIDATE_CHAR (3, chr);
3ee86942
MV
654 {
655 char *dst = scm_i_string_writable_chars (str);
656 dst[idx] = SCM_CHAR (chr);
657 scm_i_string_stop_writing ();
658 }
0f2d19dd
JB
659 return SCM_UNSPECIFIED;
660}
1bbd0b84 661#undef FUNC_NAME
0f2d19dd 662
3ee86942
MV
663void
664scm_c_string_set_x (SCM str, size_t p, SCM chr)
665{
666 if (p >= scm_i_string_length (str))
667 scm_out_of_range (NULL, scm_from_size_t (p));
668 {
669 char *dst = scm_i_string_writable_chars (str);
670 dst[p] = SCM_CHAR (chr);
671 scm_i_string_stop_writing ();
672 }
673}
0f2d19dd 674
3b3b36dd 675SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
0d26a824
MG
676 (SCM str, SCM start, SCM end),
677 "Return a newly allocated string formed from the characters\n"
678 "of @var{str} beginning with index @var{start} (inclusive) and\n"
679 "ending with index @var{end} (exclusive).\n"
680 "@var{str} must be a string, @var{start} and @var{end} must be\n"
681 "exact integers satisfying:\n\n"
682 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1bbd0b84 683#define FUNC_NAME s_scm_substring
0f2d19dd 684{
3ee86942 685 size_t len, from, to;
685c0d71 686
d1ca2c64 687 SCM_VALIDATE_STRING (1, str);
3ee86942
MV
688 len = scm_i_string_length (str);
689 from = scm_to_unsigned_integer (start, 0, len);
a55c2b68 690 if (SCM_UNBNDP (end))
3ee86942 691 to = len;
a55c2b68 692 else
3ee86942
MV
693 to = scm_to_unsigned_integer (end, from, len);
694 return scm_i_substring (str, from, to);
0f2d19dd 695}
1bbd0b84 696#undef FUNC_NAME
0f2d19dd 697
ed35de72
MV
698SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
699 (SCM str, SCM start, SCM end),
700 "Return a newly allocated string formed from the characters\n"
701 "of @var{str} beginning with index @var{start} (inclusive) and\n"
702 "ending with index @var{end} (exclusive).\n"
703 "@var{str} must be a string, @var{start} and @var{end} must be\n"
704 "exact integers satisfying:\n"
705 "\n"
706 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
707 "\n"
708 "The returned string is read-only.\n")
709#define FUNC_NAME s_scm_substring_read_only
710{
711 size_t len, from, to;
712
713 SCM_VALIDATE_STRING (1, str);
714 len = scm_i_string_length (str);
715 from = scm_to_unsigned_integer (start, 0, len);
716 if (SCM_UNBNDP (end))
717 to = len;
718 else
719 to = scm_to_unsigned_integer (end, from, len);
720 return scm_i_substring_read_only (str, from, to);
721}
722#undef FUNC_NAME
723
3ee86942
MV
724SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
725 (SCM str, SCM start, SCM end),
726 "Return a newly allocated string formed from the characters\n"
727 "of @var{str} beginning with index @var{start} (inclusive) and\n"
728 "ending with index @var{end} (exclusive).\n"
729 "@var{str} must be a string, @var{start} and @var{end} must be\n"
730 "exact integers satisfying:\n\n"
731 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
732#define FUNC_NAME s_scm_substring_copy
733{
e1b29f6a
MV
734 /* For the Scheme version, START is mandatory, but for the C
735 version, it is optional. See scm_string_copy in srfi-13.c for a
736 rationale.
737 */
738
739 size_t from, to;
3ee86942
MV
740
741 SCM_VALIDATE_STRING (1, str);
e1b29f6a
MV
742 scm_i_get_substring_spec (scm_i_string_length (str),
743 start, &from, end, &to);
3ee86942
MV
744 return scm_i_substring_copy (str, from, to);
745}
746#undef FUNC_NAME
747
748SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
749 (SCM str, SCM start, SCM end),
750 "Return string that indirectly refers to the characters\n"
751 "of @var{str} beginning with index @var{start} (inclusive) and\n"
752 "ending with index @var{end} (exclusive).\n"
753 "@var{str} must be a string, @var{start} and @var{end} must be\n"
754 "exact integers satisfying:\n\n"
755 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
756#define FUNC_NAME s_scm_substring_shared
757{
758 size_t len, from, to;
759
760 SCM_VALIDATE_STRING (1, str);
761 len = scm_i_string_length (str);
762 from = scm_to_unsigned_integer (start, 0, len);
763 if (SCM_UNBNDP (end))
764 to = len;
765 else
766 to = scm_to_unsigned_integer (end, from, len);
767 return scm_i_substring_shared (str, from, to);
768}
769#undef FUNC_NAME
685c0d71 770
3b3b36dd 771SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
6fa73e72 772 (SCM args),
0d26a824
MG
773 "Return a newly allocated string whose characters form the\n"
774 "concatenation of the given strings, @var{args}.")
1bbd0b84 775#define FUNC_NAME s_scm_string_append
0f2d19dd
JB
776{
777 SCM res;
1be6b49c 778 size_t i = 0;
c829a427
MV
779 SCM l, s;
780 char *data;
af45e3b0
DH
781
782 SCM_VALIDATE_REST_ARGUMENT (args);
d2e53ed6 783 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427
MV
784 {
785 s = SCM_CAR (l);
786 SCM_VALIDATE_STRING (SCM_ARGn, s);
3ee86942 787 i += scm_i_string_length (s);
c829a427 788 }
3ee86942 789 res = scm_i_make_string (i, &data);
d2e53ed6 790 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
c829a427 791 {
edea856c 792 size_t len;
c829a427 793 s = SCM_CAR (l);
3ee86942 794 SCM_VALIDATE_STRING (SCM_ARGn, s);
edea856c 795 len = scm_i_string_length (s);
3ee86942
MV
796 memcpy (data, scm_i_string_chars (s), len);
797 data += len;
c829a427
MV
798 scm_remember_upto_here_1 (s);
799 }
0f2d19dd
JB
800 return res;
801}
1bbd0b84 802#undef FUNC_NAME
0f2d19dd 803
c829a427
MV
804int
805scm_is_string (SCM obj)
806{
3ee86942 807 return IS_STRING (obj);
c829a427 808}
24933780 809
c829a427
MV
810SCM
811scm_from_locale_stringn (const char *str, size_t len)
812{
813 SCM res;
814 char *dst;
4d4528e7 815
c829a427
MV
816 if (len == (size_t)-1)
817 len = strlen (str);
3ee86942 818 res = scm_i_make_string (len, &dst);
c829a427
MV
819 memcpy (dst, str, len);
820 return res;
821}
4d4528e7 822
c829a427
MV
823SCM
824scm_from_locale_string (const char *str)
4d4528e7 825{
c829a427
MV
826 return scm_from_locale_stringn (str, -1);
827}
4d4528e7 828
c829a427
MV
829SCM
830scm_take_locale_stringn (char *str, size_t len)
831{
832 if (len == (size_t)-1)
833 return scm_take_locale_string (str);
834 else
835 {
836 /* STR might not be zero terminated and we are not allowed to
837 look at str[len], so we have to make a new one...
838 */
839 SCM res = scm_from_locale_stringn (str, len);
840 free (str);
841 return res;
842 }
843}
4d4528e7 844
c829a427
MV
845SCM
846scm_take_locale_string (char *str)
847{
848 size_t len = strlen (str);
3ee86942 849 SCM buf, res;
c829a427 850
3ee86942
MV
851 buf = scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str,
852 (scm_t_bits) len, (scm_t_bits) 0);
853 res = scm_double_cell (STRING_TAG,
854 SCM_UNPACK (buf),
855 (scm_t_bits) 0, (scm_t_bits) len);
c829a427 856 scm_gc_register_collectable_memory (str, len+1, "string");
c829a427
MV
857 return res;
858}
859
860char *
861scm_to_locale_stringn (SCM str, size_t *lenp)
862{
863 char *res;
864 size_t len;
4d4528e7 865
3ee86942 866 if (!scm_is_string (str))
c829a427 867 scm_wrong_type_arg_msg (NULL, 0, str, "string");
3ee86942 868 len = scm_i_string_length (str);
c829a427 869 res = scm_malloc (len + ((lenp==NULL)? 1 : 0));
3ee86942 870 memcpy (res, scm_i_string_chars (str), len);
c829a427
MV
871 if (lenp == NULL)
872 {
873 res[len] = '\0';
874 if (strlen (res) != len)
875 {
876 free (res);
877 scm_misc_error (NULL,
878 "string contains #\\nul character: ~S",
879 scm_list_1 (str));
880 }
881 }
882 else
4d4528e7 883 *lenp = len;
24933780 884
c829a427
MV
885 scm_remember_upto_here_1 (str);
886 return res;
4d4528e7 887}
af68e5e5 888
c829a427
MV
889char *
890scm_to_locale_string (SCM str)
891{
892 return scm_to_locale_stringn (str, NULL);
893}
af68e5e5 894
c829a427
MV
895size_t
896scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
897{
898 size_t len;
899
3ee86942 900 if (!scm_is_string (str))
c829a427 901 scm_wrong_type_arg_msg (NULL, 0, str, "string");
3ee86942
MV
902 len = scm_i_string_length (str);
903 memcpy (buf, scm_i_string_chars (str), (len > max_len)? max_len : len);
c829a427
MV
904 scm_remember_upto_here_1 (str);
905 return len;
906}
af68e5e5 907
3ee86942
MV
908/* converts C scm_array of strings to SCM scm_list of strings. */
909/* If argc < 0, a null terminated scm_array is assumed. */
910SCM
911scm_makfromstrs (int argc, char **argv)
912{
913 int i = argc;
914 SCM lst = SCM_EOL;
915 if (0 > i)
916 for (i = 0; argv[i]; i++);
917 while (i--)
918 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
919 return lst;
920}
921
c829a427
MV
922/* Return a newly allocated array of char pointers to each of the strings
923 in args, with a terminating NULL pointer. */
924
925char **
926scm_i_allocate_string_pointers (SCM list)
af68e5e5 927{
c829a427
MV
928 char **result;
929 int len = scm_ilength (list);
930 int i;
931
932 if (len < 0)
933 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
934
935 scm_frame_begin (0);
936
937 result = (char **) scm_malloc ((len + 1) * sizeof (char *));
938 result[len] = NULL;
939 scm_frame_unwind_handler (free, result, 0);
940
941 /* The list might be have been modified in another thread, so
942 we check LIST before each access.
943 */
d2e53ed6 944 for (i = 0; i < len && scm_is_pair (list); i++)
c829a427
MV
945 {
946 result[i] = scm_to_locale_string (SCM_CAR (list));
947 list = SCM_CDR (list);
948 }
949
950 scm_frame_end ();
951 return result;
af68e5e5 952}
e53cc817 953
c829a427
MV
954void
955scm_i_free_string_pointers (char **pointers)
956{
957 int i;
958
959 for (i = 0; pointers[i]; i++)
960 free (pointers[i]);
961 free (pointers);
962}
24933780 963
6f14f578
MV
964void
965scm_i_get_substring_spec (size_t len,
966 SCM start, size_t *cstart,
967 SCM end, size_t *cend)
968{
969 if (SCM_UNBNDP (start))
970 *cstart = 0;
971 else
972 *cstart = scm_to_unsigned_integer (start, 0, len);
973
974 if (SCM_UNBNDP (end))
975 *cend = len;
976 else
977 *cend = scm_to_unsigned_integer (end, *cstart, len);
978}
979
3ee86942
MV
980#if SCM_ENABLE_DEPRECATED
981
556d75db
MV
982/* When these definitions are removed, it becomes reasonable to use
983 read-only strings for string literals. For that, change the reader
984 to create string literals with scm_c_substring_read_only instead of
985 with scm_c_substring_copy.
986*/
987
3ee86942 988int
fe78c51a 989scm_i_deprecated_stringp (SCM str)
3ee86942
MV
990{
991 scm_c_issue_deprecation_warning
992 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
993
2616f0e0 994 return scm_is_string (str);
3ee86942
MV
995}
996
997char *
fe78c51a 998scm_i_deprecated_string_chars (SCM str)
3ee86942
MV
999{
1000 char *chars;
1001
1002 scm_c_issue_deprecation_warning
1003 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1004
2616f0e0
MV
1005 /* We don't accept shared substrings here since they are not
1006 null-terminated.
1007 */
1008 if (IS_SH_STRING (str))
1009 scm_misc_error (NULL,
1010 "SCM_STRING_CHARS does not work with shared substrings.",
1011 SCM_EOL);
1012
556d75db
MV
1013 /* We explicitely test for read-only strings to produce a better
1014 error message.
1015 */
1016
1017 if (IS_RO_STRING (str))
1018 scm_misc_error (NULL,
1019 "SCM_STRING_CHARS does not work with read-only strings.",
1020 SCM_EOL);
1021
2616f0e0 1022 /* The following is still wrong, of course...
3ee86942
MV
1023 */
1024 chars = scm_i_string_writable_chars (str);
1025 scm_i_string_stop_writing ();
1026 return chars;
1027}
1028
1029size_t
fe78c51a 1030scm_i_deprecated_string_length (SCM str)
3ee86942
MV
1031{
1032 scm_c_issue_deprecation_warning
1033 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1034 return scm_c_string_length (str);
1035}
1036
1037#endif
1038
0f2d19dd
JB
1039void
1040scm_init_strings ()
0f2d19dd 1041{
3ee86942 1042 scm_nullstr = scm_i_make_string (0, NULL);
7c33806a 1043
a0599745 1044#include "libguile/strings.x"
0f2d19dd
JB
1045}
1046
89e00824
ML
1047
1048/*
1049 Local Variables:
1050 c-file-style: "gnu"
1051 End:
1052*/