Merge branch 'master' into boehm-demers-weiser-gc
[bpt/guile.git] / libguile / strings.c
1 /* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation; either
6 * version 2.1 of the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17
18
19 \f
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <string.h>
25 #include <stdio.h>
26
27 #include "libguile/_scm.h"
28 #include "libguile/chars.h"
29 #include "libguile/root.h"
30 #include "libguile/strings.h"
31 #include "libguile/deprecation.h"
32 #include "libguile/validate.h"
33 #include "libguile/dynwind.h"
34
35 \f
36
37 /* {Strings}
38 */
39
40
41 /* Stringbufs
42 *
43 * XXX - keeping an accurate refcount during GC seems to be quite
44 * tricky, so we just keep score of whether a stringbuf might be
45 * shared, not wether it definitely is.
46 *
47 * The scheme I (mvo) tried to keep an accurate reference count would
48 * recount all strings that point to a stringbuf during the mark-phase
49 * of the GC. This was done since one cannot access the stringbuf of
50 * a string when that string is freed (in order to decrease the
51 * reference count). The memory of the stringbuf might have been
52 * reused already for something completely different.
53 *
54 * This recounted worked for a small number of threads beating on
55 * cow-strings, but it failed randomly with more than 10 threads, say.
56 * I couldn't figure out what went wrong, so I used the conservative
57 * approach implemented below.
58 *
59 * A stringbuf needs to know its length, but only so that it can be
60 * reported when the stringbuf is freed.
61 *
62 * Stringbufs (and strings) are not stored very compactly: a stringbuf
63 * has room for about 2*sizeof(scm_t_bits)-1 bytes additional
64 * information. As a compensation, the code below is made more
65 * complicated by storing small strings inline in the double cell of a
66 * stringbuf. So we have fixstrings and bigstrings...
67 */
68
69 #define STRINGBUF_F_SHARED 0x100
70 #define STRINGBUF_F_INLINE 0x200
71
72 #define STRINGBUF_TAG scm_tc7_stringbuf
73 #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
74 #define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
75
76 #define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
77 #define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
78 #define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
79 #define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
80
81 #define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
82 ? STRINGBUF_INLINE_CHARS (buf) \
83 : STRINGBUF_OUTLINE_CHARS (buf))
84 #define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
85 ? STRINGBUF_INLINE_LENGTH (buf) \
86 : STRINGBUF_OUTLINE_LENGTH (buf))
87
88 #define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
89
90 #define SET_STRINGBUF_SHARED(buf) \
91 (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
92
93 #if SCM_DEBUG
94 static size_t lenhist[1001];
95 #endif
96
97 static SCM
98 make_stringbuf (size_t len)
99 {
100 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
101 scm_i_symbol_chars, all stringbufs are null-terminated. Once
102 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
103 has been changed for scm_i_symbol_chars, this null-termination
104 can be dropped.
105 */
106
107 #if SCM_DEBUG
108 if (len < 1000)
109 lenhist[len]++;
110 else
111 lenhist[1000]++;
112 #endif
113
114 if (len <= STRINGBUF_MAX_INLINE_LEN-1)
115 {
116 return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_INLINE | (len << 16),
117 0, 0, 0);
118 }
119 else
120 {
121 char *mem = scm_gc_malloc_pointerless (len + 1, "string");
122 mem[len] = '\0';
123 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) mem,
124 (scm_t_bits) len, (scm_t_bits) 0);
125 }
126 }
127
128 /* Return a new stringbuf whose underlying storage consists of the LEN+1
129 octets pointed to by STR (the last octet is zero). */
130 SCM
131 scm_i_take_stringbufn (char *str, size_t len)
132 {
133 scm_gc_register_collectable_memory (str, len + 1, "stringbuf");
134
135 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str,
136 (scm_t_bits) len, (scm_t_bits) 0);
137 }
138
139
140 scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
141
142 /* Copy-on-write strings.
143 */
144
145 #define STRING_TAG scm_tc7_string
146
147 #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
148 #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
149 #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
150
151 #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
152 #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
153
154 #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
155
156 /* Read-only strings.
157 */
158
159 #define RO_STRING_TAG (scm_tc7_string + 0x200)
160 #define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
161
162 /* Mutation-sharing substrings
163 */
164
165 #define SH_STRING_TAG (scm_tc7_string + 0x100)
166
167 #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
168 /* START and LENGTH as for STRINGs. */
169
170 #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
171
172 SCM
173 scm_i_make_string (size_t len, char **charsp)
174 {
175 SCM buf = make_stringbuf (len);
176 SCM res;
177 if (charsp)
178 *charsp = STRINGBUF_CHARS (buf);
179 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
180 (scm_t_bits)0, (scm_t_bits) len);
181 return res;
182 }
183
184 static void
185 validate_substring_args (SCM str, size_t start, size_t end)
186 {
187 if (!IS_STRING (str))
188 scm_wrong_type_arg_msg (NULL, 0, str, "string");
189 if (start > STRING_LENGTH (str))
190 scm_out_of_range (NULL, scm_from_size_t (start));
191 if (end > STRING_LENGTH (str) || end < start)
192 scm_out_of_range (NULL, scm_from_size_t (end));
193 }
194
195 static inline void
196 get_str_buf_start (SCM *str, SCM *buf, size_t *start)
197 {
198 *start = STRING_START (*str);
199 if (IS_SH_STRING (*str))
200 {
201 *str = SH_STRING_STRING (*str);
202 *start += STRING_START (*str);
203 }
204 *buf = STRING_STRINGBUF (*str);
205 }
206
207 SCM
208 scm_i_substring (SCM str, size_t start, size_t end)
209 {
210 SCM buf;
211 size_t str_start;
212 get_str_buf_start (&str, &buf, &str_start);
213 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
214 SET_STRINGBUF_SHARED (buf);
215 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
216 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
217 (scm_t_bits)str_start + start,
218 (scm_t_bits) end - start);
219 }
220
221 SCM
222 scm_i_substring_read_only (SCM str, size_t start, size_t end)
223 {
224 SCM buf;
225 size_t str_start;
226 get_str_buf_start (&str, &buf, &str_start);
227 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
228 SET_STRINGBUF_SHARED (buf);
229 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
230 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
231 (scm_t_bits)str_start + start,
232 (scm_t_bits) end - start);
233 }
234
235 SCM
236 scm_i_substring_copy (SCM str, size_t start, size_t end)
237 {
238 size_t len = end - start;
239 SCM buf, my_buf;
240 size_t str_start;
241 get_str_buf_start (&str, &buf, &str_start);
242 my_buf = make_stringbuf (len);
243 memcpy (STRINGBUF_CHARS (my_buf),
244 STRINGBUF_CHARS (buf) + str_start + start, len);
245 scm_remember_upto_here_1 (buf);
246 return scm_double_cell (STRING_TAG, SCM_UNPACK(my_buf),
247 (scm_t_bits)0, (scm_t_bits) len);
248 }
249
250 SCM
251 scm_i_substring_shared (SCM str, size_t start, size_t end)
252 {
253 if (start == 0 && end == STRING_LENGTH (str))
254 return str;
255 else
256 {
257 size_t len = end - start;
258 if (IS_SH_STRING (str))
259 {
260 start += STRING_START (str);
261 str = SH_STRING_STRING (str);
262 }
263 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
264 (scm_t_bits)start, (scm_t_bits) len);
265 }
266 }
267
268 SCM
269 scm_c_substring (SCM str, size_t start, size_t end)
270 {
271 validate_substring_args (str, start, end);
272 return scm_i_substring (str, start, end);
273 }
274
275 SCM
276 scm_c_substring_read_only (SCM str, size_t start, size_t end)
277 {
278 validate_substring_args (str, start, end);
279 return scm_i_substring_read_only (str, start, end);
280 }
281
282 SCM
283 scm_c_substring_copy (SCM str, size_t start, size_t end)
284 {
285 validate_substring_args (str, start, end);
286 return scm_i_substring_copy (str, start, end);
287 }
288
289 SCM
290 scm_c_substring_shared (SCM str, size_t start, size_t end)
291 {
292 validate_substring_args (str, start, end);
293 return scm_i_substring_shared (str, start, end);
294 }
295
296 \f
297 /* Internal accessors
298 */
299
300 size_t
301 scm_i_string_length (SCM str)
302 {
303 return STRING_LENGTH (str);
304 }
305
306 const char *
307 scm_i_string_chars (SCM str)
308 {
309 SCM buf;
310 size_t start;
311 get_str_buf_start (&str, &buf, &start);
312 return STRINGBUF_CHARS (buf) + start;
313 }
314
315 char *
316 scm_i_string_writable_chars (SCM orig_str)
317 {
318 SCM buf, str = orig_str;
319 size_t start;
320
321 get_str_buf_start (&str, &buf, &start);
322 if (IS_RO_STRING (str))
323 scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
324
325 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
326 if (STRINGBUF_SHARED (buf))
327 {
328 /* Clone stringbuf. */
329
330 size_t len = STRING_LENGTH (str);
331 SCM new_buf;
332
333 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
334
335 new_buf = make_stringbuf (len);
336 memcpy (STRINGBUF_CHARS (new_buf),
337 STRINGBUF_CHARS (buf) + STRING_START (str), len);
338
339 start -= STRING_START (str);
340
341 /* FIXME: The following operations are not atomic, so other threads
342 looking at STR may see an inconsistent state. Nevertheless it can't
343 hurt much since (i) accessing STR while it is being mutated can't
344 yield a crash, and (ii) concurrent accesses to STR should be
345 protected by a mutex at the application level. The latter may not
346 apply when STR != ORIG_STR, though. */
347 SET_STRING_START (str, 0);
348 SET_STRING_STRINGBUF (str, new_buf);
349
350 buf = new_buf;
351
352 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
353 }
354
355 return STRINGBUF_CHARS (buf) + start;
356 }
357
358 void
359 scm_i_string_stop_writing (void)
360 {
361 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
362 }
363
364 /* Symbols.
365
366 Basic symbol creation and accessing is done here, the rest is in
367 symbols.[hc]. This has been done to keep stringbufs and the
368 internals of strings and string-like objects confined to this file.
369 */
370
371 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
372
373 SCM
374 scm_i_make_symbol (SCM name, scm_t_bits flags,
375 unsigned long hash, SCM props)
376 {
377 SCM buf;
378 size_t start = STRING_START (name);
379 size_t length = STRING_LENGTH (name);
380
381 if (IS_SH_STRING (name))
382 {
383 name = SH_STRING_STRING (name);
384 start += STRING_START (name);
385 }
386 buf = SYMBOL_STRINGBUF (name);
387
388 if (start == 0 && length == STRINGBUF_LENGTH (buf))
389 {
390 /* reuse buf. */
391 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
392 SET_STRINGBUF_SHARED (buf);
393 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
394 }
395 else
396 {
397 /* make new buf. */
398 SCM new_buf = make_stringbuf (length);
399 memcpy (STRINGBUF_CHARS (new_buf),
400 STRINGBUF_CHARS (buf) + start, length);
401 buf = new_buf;
402 }
403 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
404 (scm_t_bits) hash, SCM_UNPACK (props));
405 }
406
407 SCM
408 scm_i_c_make_symbol (const char *name, size_t len,
409 scm_t_bits flags, unsigned long hash, SCM props)
410 {
411 SCM buf = make_stringbuf (len);
412 memcpy (STRINGBUF_CHARS (buf), name, len);
413
414 return scm_immutable_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
415 (scm_t_bits) hash, SCM_UNPACK (props));
416 }
417
418 /* Return a new symbol that uses the LEN bytes pointed to by NAME as its
419 underlying storage. */
420 SCM
421 scm_i_c_take_symbol (char *name, size_t len,
422 scm_t_bits flags, unsigned long hash, SCM props)
423 {
424 SCM buf = scm_i_take_stringbufn (name, len);
425
426 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
427 (scm_t_bits) hash, SCM_UNPACK (props));
428 }
429
430 size_t
431 scm_i_symbol_length (SCM sym)
432 {
433 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
434 }
435
436 size_t
437 scm_c_symbol_length (SCM sym)
438 #define FUNC_NAME "scm_c_symbol_length"
439 {
440 SCM_VALIDATE_SYMBOL (1, sym);
441
442 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
443 }
444 #undef FUNC_NAME
445
446 const char *
447 scm_i_symbol_chars (SCM sym)
448 {
449 SCM buf = SYMBOL_STRINGBUF (sym);
450 return STRINGBUF_CHARS (buf);
451 }
452
453 SCM
454 scm_i_symbol_substring (SCM sym, size_t start, size_t end)
455 {
456 SCM buf = SYMBOL_STRINGBUF (sym);
457 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
458 SET_STRINGBUF_SHARED (buf);
459 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
460 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
461 (scm_t_bits)start, (scm_t_bits) end - start);
462 }
463
464 /* Debugging
465 */
466
467 #if SCM_DEBUG
468
469 SCM scm_sys_string_dump (SCM);
470 SCM scm_sys_symbol_dump (SCM);
471 SCM scm_sys_stringbuf_hist (void);
472
473 SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0,
474 (SCM str),
475 "")
476 #define FUNC_NAME s_scm_sys_string_dump
477 {
478 SCM_VALIDATE_STRING (1, str);
479 fprintf (stderr, "%p:\n", str);
480 fprintf (stderr, " start: %u\n", STRING_START (str));
481 fprintf (stderr, " len: %u\n", STRING_LENGTH (str));
482 if (IS_SH_STRING (str))
483 {
484 fprintf (stderr, " string: %p\n", SH_STRING_STRING (str));
485 fprintf (stderr, "\n");
486 scm_sys_string_dump (SH_STRING_STRING (str));
487 }
488 else
489 {
490 SCM buf = STRING_STRINGBUF (str);
491 fprintf (stderr, " buf: %p\n", buf);
492 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
493 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
494 fprintf (stderr, " flags: %x\n", (SCM_CELL_WORD_0 (buf) & 0x300));
495 }
496 return SCM_UNSPECIFIED;
497 }
498 #undef FUNC_NAME
499
500 SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0,
501 (SCM sym),
502 "")
503 #define FUNC_NAME s_scm_sys_symbol_dump
504 {
505 SCM_VALIDATE_SYMBOL (1, sym);
506 fprintf (stderr, "%p:\n", sym);
507 fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym));
508 {
509 SCM buf = SYMBOL_STRINGBUF (sym);
510 fprintf (stderr, " buf: %p\n", buf);
511 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
512 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
513 fprintf (stderr, " shared: %u\n", STRINGBUF_SHARED (buf));
514 }
515 return SCM_UNSPECIFIED;
516 }
517 #undef FUNC_NAME
518
519 SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0,
520 (void),
521 "")
522 #define FUNC_NAME s_scm_sys_stringbuf_hist
523 {
524 int i;
525 for (i = 0; i < 1000; i++)
526 if (lenhist[i])
527 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
528 fprintf (stderr, ">999: %u\n", lenhist[1000]);
529 return SCM_UNSPECIFIED;
530 }
531 #undef FUNC_NAME
532
533 #endif
534
535 \f
536
537 SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
538 (SCM obj),
539 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
540 #define FUNC_NAME s_scm_string_p
541 {
542 return scm_from_bool (IS_STRING (obj));
543 }
544 #undef FUNC_NAME
545
546
547 SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
548
549 SCM_DEFINE (scm_string, "string", 0, 0, 1,
550 (SCM chrs),
551 "@deffnx {Scheme Procedure} list->string chrs\n"
552 "Return a newly allocated string composed of the arguments,\n"
553 "@var{chrs}.")
554 #define FUNC_NAME s_scm_string
555 {
556 SCM result;
557 size_t len;
558 char *data;
559
560 {
561 long i = scm_ilength (chrs);
562
563 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
564 len = i;
565 }
566
567 result = scm_i_make_string (len, &data);
568 while (len > 0 && scm_is_pair (chrs))
569 {
570 SCM elt = SCM_CAR (chrs);
571
572 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
573 *data++ = SCM_CHAR (elt);
574 chrs = SCM_CDR (chrs);
575 len--;
576 }
577 if (len > 0)
578 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
579 if (!scm_is_null (chrs))
580 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
581
582 return result;
583 }
584 #undef FUNC_NAME
585
586 SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
587 (SCM k, SCM chr),
588 "Return a newly allocated string of\n"
589 "length @var{k}. If @var{chr} is given, then all elements of\n"
590 "the string are initialized to @var{chr}, otherwise the contents\n"
591 "of the @var{string} are unspecified.")
592 #define FUNC_NAME s_scm_make_string
593 {
594 return scm_c_make_string (scm_to_size_t (k), chr);
595 }
596 #undef FUNC_NAME
597
598 SCM
599 scm_c_make_string (size_t len, SCM chr)
600 #define FUNC_NAME NULL
601 {
602 char *dst;
603 SCM res = scm_i_make_string (len, &dst);
604
605 if (!SCM_UNBNDP (chr))
606 {
607 SCM_VALIDATE_CHAR (0, chr);
608 memset (dst, SCM_CHAR (chr), len);
609 }
610
611 return res;
612 }
613 #undef FUNC_NAME
614
615 SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
616 (SCM string),
617 "Return the number of characters in @var{string}.")
618 #define FUNC_NAME s_scm_string_length
619 {
620 SCM_VALIDATE_STRING (1, string);
621 return scm_from_size_t (STRING_LENGTH (string));
622 }
623 #undef FUNC_NAME
624
625 size_t
626 scm_c_string_length (SCM string)
627 {
628 if (!IS_STRING (string))
629 scm_wrong_type_arg_msg (NULL, 0, string, "string");
630 return STRING_LENGTH (string);
631 }
632
633 SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
634 (SCM str, SCM k),
635 "Return character @var{k} of @var{str} using zero-origin\n"
636 "indexing. @var{k} must be a valid index of @var{str}.")
637 #define FUNC_NAME s_scm_string_ref
638 {
639 size_t len;
640 unsigned long idx;
641
642 SCM_VALIDATE_STRING (1, str);
643
644 len = scm_i_string_length (str);
645 if (SCM_LIKELY (len > 0))
646 idx = scm_to_unsigned_integer (k, 0, len - 1);
647 else
648 scm_out_of_range (NULL, k);
649
650 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
651 }
652 #undef FUNC_NAME
653
654 SCM
655 scm_c_string_ref (SCM str, size_t p)
656 {
657 if (p >= scm_i_string_length (str))
658 scm_out_of_range (NULL, scm_from_size_t (p));
659 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
660 }
661
662 SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
663 (SCM str, SCM k, SCM chr),
664 "Store @var{chr} in element @var{k} of @var{str} and return\n"
665 "an unspecified value. @var{k} must be a valid index of\n"
666 "@var{str}.")
667 #define FUNC_NAME s_scm_string_set_x
668 {
669 size_t len;
670 unsigned long idx;
671
672 SCM_VALIDATE_STRING (1, str);
673
674 len = scm_i_string_length (str);
675 if (SCM_LIKELY (len > 0))
676 idx = scm_to_unsigned_integer (k, 0, len - 1);
677 else
678 scm_out_of_range (NULL, k);
679
680 SCM_VALIDATE_CHAR (3, chr);
681 {
682 char *dst = scm_i_string_writable_chars (str);
683 dst[idx] = SCM_CHAR (chr);
684 scm_i_string_stop_writing ();
685 }
686 return SCM_UNSPECIFIED;
687 }
688 #undef FUNC_NAME
689
690 void
691 scm_c_string_set_x (SCM str, size_t p, SCM chr)
692 {
693 if (p >= scm_i_string_length (str))
694 scm_out_of_range (NULL, scm_from_size_t (p));
695 {
696 char *dst = scm_i_string_writable_chars (str);
697 dst[p] = SCM_CHAR (chr);
698 scm_i_string_stop_writing ();
699 }
700 }
701
702 SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
703 (SCM str, SCM start, SCM end),
704 "Return a newly allocated string formed from the characters\n"
705 "of @var{str} beginning with index @var{start} (inclusive) and\n"
706 "ending with index @var{end} (exclusive).\n"
707 "@var{str} must be a string, @var{start} and @var{end} must be\n"
708 "exact integers satisfying:\n\n"
709 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
710 #define FUNC_NAME s_scm_substring
711 {
712 size_t len, from, to;
713
714 SCM_VALIDATE_STRING (1, str);
715 len = scm_i_string_length (str);
716 from = scm_to_unsigned_integer (start, 0, len);
717 if (SCM_UNBNDP (end))
718 to = len;
719 else
720 to = scm_to_unsigned_integer (end, from, len);
721 return scm_i_substring (str, from, to);
722 }
723 #undef FUNC_NAME
724
725 SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
726 (SCM str, SCM start, SCM end),
727 "Return a newly allocated string formed from the characters\n"
728 "of @var{str} beginning with index @var{start} (inclusive) and\n"
729 "ending with index @var{end} (exclusive).\n"
730 "@var{str} must be a string, @var{start} and @var{end} must be\n"
731 "exact integers satisfying:\n"
732 "\n"
733 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
734 "\n"
735 "The returned string is read-only.\n")
736 #define FUNC_NAME s_scm_substring_read_only
737 {
738 size_t len, from, to;
739
740 SCM_VALIDATE_STRING (1, str);
741 len = scm_i_string_length (str);
742 from = scm_to_unsigned_integer (start, 0, len);
743 if (SCM_UNBNDP (end))
744 to = len;
745 else
746 to = scm_to_unsigned_integer (end, from, len);
747 return scm_i_substring_read_only (str, from, to);
748 }
749 #undef FUNC_NAME
750
751 SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
752 (SCM str, SCM start, SCM end),
753 "Return a newly allocated string formed from the characters\n"
754 "of @var{str} beginning with index @var{start} (inclusive) and\n"
755 "ending with index @var{end} (exclusive).\n"
756 "@var{str} must be a string, @var{start} and @var{end} must be\n"
757 "exact integers satisfying:\n\n"
758 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
759 #define FUNC_NAME s_scm_substring_copy
760 {
761 /* For the Scheme version, START is mandatory, but for the C
762 version, it is optional. See scm_string_copy in srfi-13.c for a
763 rationale.
764 */
765
766 size_t from, to;
767
768 SCM_VALIDATE_STRING (1, str);
769 scm_i_get_substring_spec (scm_i_string_length (str),
770 start, &from, end, &to);
771 return scm_i_substring_copy (str, from, to);
772 }
773 #undef FUNC_NAME
774
775 SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
776 (SCM str, SCM start, SCM end),
777 "Return string that indirectly refers to the characters\n"
778 "of @var{str} beginning with index @var{start} (inclusive) and\n"
779 "ending with index @var{end} (exclusive).\n"
780 "@var{str} must be a string, @var{start} and @var{end} must be\n"
781 "exact integers satisfying:\n\n"
782 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
783 #define FUNC_NAME s_scm_substring_shared
784 {
785 size_t len, from, to;
786
787 SCM_VALIDATE_STRING (1, str);
788 len = scm_i_string_length (str);
789 from = scm_to_unsigned_integer (start, 0, len);
790 if (SCM_UNBNDP (end))
791 to = len;
792 else
793 to = scm_to_unsigned_integer (end, from, len);
794 return scm_i_substring_shared (str, from, to);
795 }
796 #undef FUNC_NAME
797
798 SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
799 (SCM args),
800 "Return a newly allocated string whose characters form the\n"
801 "concatenation of the given strings, @var{args}.")
802 #define FUNC_NAME s_scm_string_append
803 {
804 SCM res;
805 size_t i = 0;
806 SCM l, s;
807 char *data;
808
809 SCM_VALIDATE_REST_ARGUMENT (args);
810 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
811 {
812 s = SCM_CAR (l);
813 SCM_VALIDATE_STRING (SCM_ARGn, s);
814 i += scm_i_string_length (s);
815 }
816 res = scm_i_make_string (i, &data);
817 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
818 {
819 size_t len;
820 s = SCM_CAR (l);
821 SCM_VALIDATE_STRING (SCM_ARGn, s);
822 len = scm_i_string_length (s);
823 memcpy (data, scm_i_string_chars (s), len);
824 data += len;
825 scm_remember_upto_here_1 (s);
826 }
827 return res;
828 }
829 #undef FUNC_NAME
830
831 int
832 scm_is_string (SCM obj)
833 {
834 return IS_STRING (obj);
835 }
836
837 SCM
838 scm_from_locale_stringn (const char *str, size_t len)
839 {
840 SCM res;
841 char *dst;
842
843 if (len == (size_t)-1)
844 len = strlen (str);
845 res = scm_i_make_string (len, &dst);
846 memcpy (dst, str, len);
847 return res;
848 }
849
850 SCM
851 scm_from_locale_string (const char *str)
852 {
853 return scm_from_locale_stringn (str, -1);
854 }
855
856 SCM
857 scm_take_locale_stringn (char *str, size_t len)
858 {
859 SCM buf, res;
860
861 if (len == (size_t)-1)
862 len = strlen (str);
863 else
864 {
865 /* Ensure STR is null terminated. A realloc for 1 extra byte should
866 often be satisfied from the alignment padding after the block, with
867 no actual data movement. */
868 str = scm_realloc (str, len+1);
869 str[len] = '\0';
870 }
871
872 buf = scm_i_take_stringbufn (str, len);
873 res = scm_double_cell (STRING_TAG,
874 SCM_UNPACK (buf),
875 (scm_t_bits) 0, (scm_t_bits) len);
876 return res;
877 }
878
879 SCM
880 scm_take_locale_string (char *str)
881 {
882 return scm_take_locale_stringn (str, -1);
883 }
884
885 char *
886 scm_to_locale_stringn (SCM str, size_t *lenp)
887 {
888 char *res;
889 size_t len;
890
891 if (!scm_is_string (str))
892 scm_wrong_type_arg_msg (NULL, 0, str, "string");
893 len = scm_i_string_length (str);
894 res = scm_malloc (len + ((lenp==NULL)? 1 : 0));
895 memcpy (res, scm_i_string_chars (str), len);
896 if (lenp == NULL)
897 {
898 res[len] = '\0';
899 if (strlen (res) != len)
900 {
901 free (res);
902 scm_misc_error (NULL,
903 "string contains #\\nul character: ~S",
904 scm_list_1 (str));
905 }
906 }
907 else
908 *lenp = len;
909
910 scm_remember_upto_here_1 (str);
911 return res;
912 }
913
914 char *
915 scm_to_locale_string (SCM str)
916 {
917 return scm_to_locale_stringn (str, NULL);
918 }
919
920 size_t
921 scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
922 {
923 size_t len;
924
925 if (!scm_is_string (str))
926 scm_wrong_type_arg_msg (NULL, 0, str, "string");
927 len = scm_i_string_length (str);
928 memcpy (buf, scm_i_string_chars (str), (len > max_len)? max_len : len);
929 scm_remember_upto_here_1 (str);
930 return len;
931 }
932
933 /* converts C scm_array of strings to SCM scm_list of strings. */
934 /* If argc < 0, a null terminated scm_array is assumed. */
935 SCM
936 scm_makfromstrs (int argc, char **argv)
937 {
938 int i = argc;
939 SCM lst = SCM_EOL;
940 if (0 > i)
941 for (i = 0; argv[i]; i++);
942 while (i--)
943 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
944 return lst;
945 }
946
947 /* Return a newly allocated array of char pointers to each of the strings
948 in args, with a terminating NULL pointer. */
949
950 char **
951 scm_i_allocate_string_pointers (SCM list)
952 {
953 char **result;
954 int len = scm_ilength (list);
955 int i;
956
957 if (len < 0)
958 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
959
960 scm_dynwind_begin (0);
961
962 result = (char **) scm_malloc ((len + 1) * sizeof (char *));
963 result[len] = NULL;
964 scm_dynwind_unwind_handler (free, result, 0);
965
966 /* The list might be have been modified in another thread, so
967 we check LIST before each access.
968 */
969 for (i = 0; i < len && scm_is_pair (list); i++)
970 {
971 result[i] = scm_to_locale_string (SCM_CAR (list));
972 list = SCM_CDR (list);
973 }
974
975 scm_dynwind_end ();
976 return result;
977 }
978
979 void
980 scm_i_free_string_pointers (char **pointers)
981 {
982 int i;
983
984 for (i = 0; pointers[i]; i++)
985 free (pointers[i]);
986 free (pointers);
987 }
988
989 void
990 scm_i_get_substring_spec (size_t len,
991 SCM start, size_t *cstart,
992 SCM end, size_t *cend)
993 {
994 if (SCM_UNBNDP (start))
995 *cstart = 0;
996 else
997 *cstart = scm_to_unsigned_integer (start, 0, len);
998
999 if (SCM_UNBNDP (end))
1000 *cend = len;
1001 else
1002 *cend = scm_to_unsigned_integer (end, *cstart, len);
1003 }
1004
1005 #if SCM_ENABLE_DEPRECATED
1006
1007 /* When these definitions are removed, it becomes reasonable to use
1008 read-only strings for string literals. For that, change the reader
1009 to create string literals with scm_c_substring_read_only instead of
1010 with scm_c_substring_copy.
1011 */
1012
1013 int
1014 scm_i_deprecated_stringp (SCM str)
1015 {
1016 scm_c_issue_deprecation_warning
1017 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1018
1019 return scm_is_string (str);
1020 }
1021
1022 char *
1023 scm_i_deprecated_string_chars (SCM str)
1024 {
1025 char *chars;
1026
1027 scm_c_issue_deprecation_warning
1028 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1029
1030 /* We don't accept shared substrings here since they are not
1031 null-terminated.
1032 */
1033 if (IS_SH_STRING (str))
1034 scm_misc_error (NULL,
1035 "SCM_STRING_CHARS does not work with shared substrings.",
1036 SCM_EOL);
1037
1038 /* We explicitely test for read-only strings to produce a better
1039 error message.
1040 */
1041
1042 if (IS_RO_STRING (str))
1043 scm_misc_error (NULL,
1044 "SCM_STRING_CHARS does not work with read-only strings.",
1045 SCM_EOL);
1046
1047 /* The following is still wrong, of course...
1048 */
1049 chars = scm_i_string_writable_chars (str);
1050 scm_i_string_stop_writing ();
1051 return chars;
1052 }
1053
1054 size_t
1055 scm_i_deprecated_string_length (SCM str)
1056 {
1057 scm_c_issue_deprecation_warning
1058 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1059 return scm_c_string_length (str);
1060 }
1061
1062 #endif
1063
1064 void
1065 scm_init_strings ()
1066 {
1067 scm_nullstr = scm_i_make_string (0, NULL);
1068
1069 #include "libguile/strings.x"
1070 }
1071
1072
1073 /*
1074 Local Variables:
1075 c-file-style: "gnu"
1076 End:
1077 */