* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
[bpt/guile.git] / libguile / strings.c
1 /* Copyright (C) 1995,1996,1998,2000,2001, 2004 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation; either
6 * version 2.1 of the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
17
18
19 \f
20
21 #include <string.h>
22 #include <stdio.h>
23
24 #include "libguile/_scm.h"
25 #include "libguile/chars.h"
26 #include "libguile/root.h"
27 #include "libguile/strings.h"
28 #include "libguile/deprecation.h"
29 #include "libguile/validate.h"
30 #include "libguile/dynwind.h"
31
32 \f
33
34 /* {Strings}
35 */
36
37
38 /* Stringbufs
39 *
40 * XXX - keeping an accurate refcount during GC seems to be quite
41 * tricky, so we just keep score of whether a stringbuf might be
42 * shared, not wether it definitely is.
43 *
44 * The scheme I (mvo) tried to keep an accurate reference count would
45 * recount all strings that point to a stringbuf during the mark-phase
46 * of the GC. This was done since one cannot access the stringbuf of
47 * a string when that string is freed (in order to decrease the
48 * reference count). The memory of the stringbuf might have been
49 * reused already for something completely different.
50 *
51 * This recounted worked for a small number of threads beating on
52 * cow-strings, but it failed randomly with more than 10 threads, say.
53 * I couldn't figure out what went wrong, so I used the conservative
54 * approach implemented below.
55 *
56 * A stringbuf needs to know its length, but only so that it can be
57 * reported when the stringbuf is freed.
58 *
59 * Stringbufs (and strings) are not stored very compactly: a stringbuf
60 * has room for about 2*sizeof(scm_t_bits)-1 bytes additional
61 * information. As a compensation, the code below is made more
62 * complicated by storing small strings inline in the double cell of a
63 * stringbuf. So we have fixstrings and bigstrings...
64 */
65
66 #define STRINGBUF_F_SHARED 0x100
67 #define STRINGBUF_F_INLINE 0x200
68
69 #define STRINGBUF_TAG scm_tc7_stringbuf
70 #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
71 #define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
72
73 #define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
74 #define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
75 #define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
76 #define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
77
78 #define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
79 ? STRINGBUF_INLINE_CHARS (buf) \
80 : STRINGBUF_OUTLINE_CHARS (buf))
81 #define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
82 ? STRINGBUF_INLINE_LENGTH (buf) \
83 : STRINGBUF_OUTLINE_LENGTH (buf))
84
85 #define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
86
87 #define SET_STRINGBUF_SHARED(buf) \
88 (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
89
90 #if SCM_DEBUG
91 static size_t lenhist[1001];
92 #endif
93
94 static SCM
95 make_stringbuf (size_t len)
96 {
97 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
98 scm_i_symbol_chars, all stringbufs are null-terminated. Once
99 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
100 has been changed for scm_i_symbol_chars, this null-termination
101 can be dropped.
102 */
103
104 #if SCM_DEBUG
105 if (len < 1000)
106 lenhist[len]++;
107 else
108 lenhist[1000]++;
109 #endif
110
111 if (len <= STRINGBUF_MAX_INLINE_LEN-1)
112 {
113 return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_INLINE | (len << 16),
114 0, 0, 0);
115 }
116 else
117 {
118 char *mem = scm_gc_malloc (len+1, "string");
119 mem[len] = '\0';
120 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) mem,
121 (scm_t_bits) len, (scm_t_bits) 0);
122 }
123 }
124
125 SCM
126 scm_i_stringbuf_mark (SCM buf)
127 {
128 return SCM_BOOL_F;
129 }
130
131 void
132 scm_i_stringbuf_free (SCM buf)
133 {
134 if (!STRINGBUF_INLINE (buf))
135 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf),
136 STRINGBUF_OUTLINE_LENGTH (buf) + 1, "string");
137 }
138
139 SCM_MUTEX (stringbuf_write_mutex);
140
141 /* Copy-on-write strings.
142 */
143
144 #define STRING_TAG scm_tc7_string
145
146 #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
147 #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
148 #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
149
150 #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
151 #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
152
153 #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
154
155 SCM
156 scm_i_make_string (size_t len, char **charsp)
157 {
158 SCM buf = make_stringbuf (len);
159 SCM res;
160 if (charsp)
161 *charsp = STRINGBUF_CHARS (buf);
162 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
163 (scm_t_bits)0, (scm_t_bits) len);
164 return res;
165 }
166
167 static void
168 validate_substring_args (SCM str, size_t start, size_t end)
169 {
170 if (!IS_STRING (str))
171 scm_wrong_type_arg_msg (NULL, 0, str, "string");
172 if (start > STRING_LENGTH (str))
173 scm_out_of_range (NULL, scm_from_size_t (start));
174 if (end > STRING_LENGTH (str) || end < start)
175 scm_out_of_range (NULL, scm_from_size_t (end));
176 }
177
178 SCM
179 scm_i_substring (SCM str, size_t start, size_t end)
180 {
181 SCM buf = STRING_STRINGBUF (str);
182 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
183 SET_STRINGBUF_SHARED (buf);
184 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
185 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
186 (scm_t_bits)start, (scm_t_bits) end - start);
187 }
188
189 SCM
190 scm_i_substring_copy (SCM str, size_t start, size_t end)
191 {
192 size_t len = end - start;
193 SCM buf = STRING_STRINGBUF (str);
194 SCM my_buf = make_stringbuf (len);
195 memcpy (STRINGBUF_CHARS (my_buf), STRINGBUF_CHARS (buf) + start, len);
196 scm_remember_upto_here_1 (buf);
197 return scm_double_cell (STRING_TAG, SCM_UNPACK(my_buf),
198 (scm_t_bits)0, (scm_t_bits) len);
199 }
200
201 SCM
202 scm_c_substring (SCM str, size_t start, size_t end)
203 {
204 validate_substring_args (str, start, end);
205 return scm_i_substring (str, start, end);
206 }
207
208 SCM
209 scm_c_substring_copy (SCM str, size_t start, size_t end)
210 {
211 validate_substring_args (str, start, end);
212 return scm_i_substring_copy (str, start, end);
213 }
214
215 /* Mutation-sharing substrings
216 */
217
218 #define SH_STRING_TAG (scm_tc7_string + 0x100)
219
220 #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
221 /* START and LENGTH as for STRINGs. */
222
223 #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
224
225 SCM
226 scm_i_substring_shared (SCM str, size_t start, size_t end)
227 {
228 if (start == 0 && end == STRING_LENGTH (str))
229 return str;
230 else
231 {
232 SCM res = scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
233 (scm_t_bits)start, (scm_t_bits) end - start);
234 return res;
235 }
236 }
237
238 SCM
239 scm_c_substring_shared (SCM str, size_t start, size_t end)
240 {
241 validate_substring_args (str, start, end);
242 return scm_i_substring_shared (str, start, end);
243 }
244
245 SCM
246 scm_i_string_mark (SCM str)
247 {
248 if (IS_SH_STRING (str))
249 return SH_STRING_STRING (str);
250 else
251 return STRING_STRINGBUF (str);
252 }
253
254 void
255 scm_i_string_free (SCM str)
256 {
257 }
258
259 /* Internal accessors
260 */
261
262 size_t
263 scm_i_string_length (SCM str)
264 {
265 return STRING_LENGTH (str);
266 }
267
268 const char *
269 scm_i_string_chars (SCM str)
270 {
271 SCM buf;
272 size_t start = STRING_START(str);
273 if (IS_SH_STRING (str))
274 {
275 str = SH_STRING_STRING (str);
276 start += STRING_START (str);
277 }
278 buf = STRING_STRINGBUF (str);
279 return STRINGBUF_CHARS (buf) + start;
280 }
281
282 char *
283 scm_i_string_writable_chars (SCM str)
284 {
285 SCM buf;
286 size_t start = STRING_START(str);
287 if (IS_SH_STRING (str))
288 {
289 str = SH_STRING_STRING (str);
290 start += STRING_START (str);
291 }
292 buf = STRING_STRINGBUF (str);
293 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
294 if (STRINGBUF_SHARED (buf))
295 {
296 /* Clone stringbuf. For this, we put all threads to sleep.
297 */
298
299 size_t len = STRING_LENGTH (str);
300 SCM new_buf;
301
302 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
303
304 new_buf = make_stringbuf (len);
305 memcpy (STRINGBUF_CHARS (new_buf),
306 STRINGBUF_CHARS (buf) + STRING_START (str), len);
307
308 scm_i_thread_put_to_sleep ();
309 SET_STRING_STRINGBUF (str, new_buf);
310 start -= STRING_START (str);
311 SET_STRING_START (str, 0);
312 scm_i_thread_wake_up ();
313
314 buf = new_buf;
315
316 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
317 }
318
319 return STRINGBUF_CHARS (buf) + start;
320 }
321
322 void
323 scm_i_string_stop_writing (void)
324 {
325 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
326 }
327
328 /* Symbols.
329
330 Basic symbol creation and accessing is done here, the rest is in
331 symbols.[hc]. This has been done to keep stringbufs and the
332 internals of strings and string-like objects confined to this file.
333 */
334
335 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
336
337 SCM
338 scm_i_make_symbol (SCM name, unsigned long hash, SCM props)
339 {
340 SCM buf;
341 size_t start = STRING_START (name);
342 size_t length = STRING_LENGTH (name);
343
344 if (IS_SH_STRING (name))
345 {
346 name = SH_STRING_STRING (name);
347 start += STRING_START (name);
348 }
349 buf = SYMBOL_STRINGBUF (name);
350
351 if (start == 0 && length == STRINGBUF_LENGTH (buf))
352 {
353 /* reuse buf. */
354 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
355 SET_STRINGBUF_SHARED (buf);
356 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
357 }
358 else
359 {
360 /* make new buf. */
361 SCM new_buf = make_stringbuf (length);
362 memcpy (STRINGBUF_CHARS (new_buf),
363 STRINGBUF_CHARS (buf) + start, length);
364 buf = new_buf;
365 }
366 return scm_double_cell (scm_tc7_symbol, SCM_UNPACK (buf),
367 (scm_t_bits) hash, SCM_UNPACK (props));
368 }
369
370 size_t
371 scm_i_symbol_length (SCM sym)
372 {
373 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
374 }
375
376 const char *
377 scm_i_symbol_chars (SCM sym)
378 {
379 SCM buf = SYMBOL_STRINGBUF (sym);
380 return STRINGBUF_CHARS (buf);
381 }
382
383 SCM
384 scm_i_symbol_mark (SCM sym)
385 {
386 scm_gc_mark (SYMBOL_STRINGBUF (sym));
387 return SCM_CELL_OBJECT_3 (sym);
388 }
389
390 void
391 scm_i_symbol_free (SCM sym)
392 {
393 }
394
395 SCM
396 scm_i_symbol_substring (SCM sym, size_t start, size_t end)
397 {
398 SCM buf = SYMBOL_STRINGBUF (sym);
399 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
400 SET_STRINGBUF_SHARED (buf);
401 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
402 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
403 (scm_t_bits)start, (scm_t_bits) end - start);
404 }
405
406 /* Debugging
407 */
408
409 #if SCM_DEBUG
410
411 SCM scm_sys_string_dump (SCM);
412 SCM scm_sys_symbol_dump (SCM);
413 SCM scm_sys_stringbuf_hist (void);
414
415 SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0,
416 (SCM str),
417 "")
418 #define FUNC_NAME s_scm_sys_string_dump
419 {
420 SCM_VALIDATE_STRING (1, str);
421 fprintf (stderr, "%p:\n", str);
422 fprintf (stderr, " start: %u\n", STRING_START (str));
423 fprintf (stderr, " len: %u\n", STRING_LENGTH (str));
424 if (IS_SH_STRING (str))
425 {
426 fprintf (stderr, " string: %p\n", SH_STRING_STRING (str));
427 fprintf (stderr, "\n");
428 scm_sys_string_dump (SH_STRING_STRING (str));
429 }
430 else
431 {
432 SCM buf = STRING_STRINGBUF (str);
433 fprintf (stderr, " buf: %p\n", buf);
434 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
435 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
436 fprintf (stderr, " flags: %x\n", (SCM_CELL_WORD_0 (buf) & 0x300));
437 }
438 return SCM_UNSPECIFIED;
439 }
440 #undef FUNC_NAME
441
442 SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0,
443 (SCM sym),
444 "")
445 #define FUNC_NAME s_scm_sys_symbol_dump
446 {
447 SCM_VALIDATE_SYMBOL (1, sym);
448 fprintf (stderr, "%p:\n", sym);
449 fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym));
450 {
451 SCM buf = SYMBOL_STRINGBUF (sym);
452 fprintf (stderr, " buf: %p\n", buf);
453 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
454 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
455 fprintf (stderr, " shared: %u\n", STRINGBUF_SHARED (buf));
456 }
457 return SCM_UNSPECIFIED;
458 }
459 #undef FUNC_NAME
460
461 SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0,
462 (void),
463 "")
464 #define FUNC_NAME s_scm_sys_string_dump
465 {
466 int i;
467 for (i = 0; i < 1000; i++)
468 if (lenhist[i])
469 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
470 fprintf (stderr, ">999: %u\n", lenhist[1000]);
471 return SCM_UNSPECIFIED;
472 }
473 #undef FUNC_NAME
474
475 #endif
476
477 \f
478
479 SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
480 (SCM obj),
481 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
482 #define FUNC_NAME s_scm_string_p
483 {
484 return scm_from_bool (IS_STRING (obj));
485 }
486 #undef FUNC_NAME
487
488
489 SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
490
491 SCM_DEFINE (scm_string, "string", 0, 0, 1,
492 (SCM chrs),
493 "@deffnx {Scheme Procedure} list->string chrs\n"
494 "Return a newly allocated string composed of the arguments,\n"
495 "@var{chrs}.")
496 #define FUNC_NAME s_scm_string
497 {
498 SCM result;
499 size_t len;
500 char *data;
501
502 {
503 long i = scm_ilength (chrs);
504
505 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
506 len = i;
507 }
508
509 result = scm_i_make_string (len, &data);
510 while (len > 0 && SCM_CONSP (chrs))
511 {
512 SCM elt = SCM_CAR (chrs);
513
514 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
515 *data++ = SCM_CHAR (elt);
516 chrs = SCM_CDR (chrs);
517 len--;
518 }
519 if (len > 0)
520 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
521 if (!SCM_NULLP (chrs))
522 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
523
524 return result;
525 }
526 #undef FUNC_NAME
527
528 SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
529 (SCM k, SCM chr),
530 "Return a newly allocated string of\n"
531 "length @var{k}. If @var{chr} is given, then all elements of\n"
532 "the string are initialized to @var{chr}, otherwise the contents\n"
533 "of the @var{string} are unspecified.")
534 #define FUNC_NAME s_scm_make_string
535 {
536 return scm_c_make_string (scm_to_size_t (k), chr);
537 }
538 #undef FUNC_NAME
539
540 SCM
541 scm_c_make_string (size_t len, SCM chr)
542 #define FUNC_NAME NULL
543 {
544 char *dst;
545 SCM res = scm_i_make_string (len, &dst);
546
547 if (!SCM_UNBNDP (chr))
548 {
549 SCM_VALIDATE_CHAR (0, chr);
550 memset (dst, SCM_CHAR (chr), len);
551 }
552
553 return res;
554 }
555 #undef FUNC_NAME
556
557 SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
558 (SCM string),
559 "Return the number of characters in @var{string}.")
560 #define FUNC_NAME s_scm_string_length
561 {
562 SCM_VALIDATE_STRING (1, string);
563 return scm_from_size_t (STRING_LENGTH (string));
564 }
565 #undef FUNC_NAME
566
567 size_t
568 scm_c_string_length (SCM string)
569 {
570 if (!IS_STRING (string))
571 scm_wrong_type_arg_msg (NULL, 0, string, "string");
572 return STRING_LENGTH (string);
573 }
574
575 SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
576 (SCM str, SCM k),
577 "Return character @var{k} of @var{str} using zero-origin\n"
578 "indexing. @var{k} must be a valid index of @var{str}.")
579 #define FUNC_NAME s_scm_string_ref
580 {
581 unsigned long idx;
582
583 SCM_VALIDATE_STRING (1, str);
584 idx = scm_to_unsigned_integer (k, 0, scm_i_string_length (str)-1);
585 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
586 }
587 #undef FUNC_NAME
588
589 SCM
590 scm_c_string_ref (SCM str, size_t p)
591 {
592 if (p >= scm_i_string_length (str))
593 scm_out_of_range (NULL, scm_from_size_t (p));
594 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
595 }
596
597 SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
598 (SCM str, SCM k, SCM chr),
599 "Store @var{chr} in element @var{k} of @var{str} and return\n"
600 "an unspecified value. @var{k} must be a valid index of\n"
601 "@var{str}.")
602 #define FUNC_NAME s_scm_string_set_x
603 {
604 unsigned long idx;
605
606 SCM_VALIDATE_STRING (1, str);
607 idx = scm_to_unsigned_integer (k, 0, scm_i_string_length(str)-1);
608 SCM_VALIDATE_CHAR (3, chr);
609 {
610 char *dst = scm_i_string_writable_chars (str);
611 dst[idx] = SCM_CHAR (chr);
612 scm_i_string_stop_writing ();
613 }
614 return SCM_UNSPECIFIED;
615 }
616 #undef FUNC_NAME
617
618 void
619 scm_c_string_set_x (SCM str, size_t p, SCM chr)
620 {
621 if (p >= scm_i_string_length (str))
622 scm_out_of_range (NULL, scm_from_size_t (p));
623 {
624 char *dst = scm_i_string_writable_chars (str);
625 dst[p] = SCM_CHAR (chr);
626 scm_i_string_stop_writing ();
627 }
628 }
629
630 SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
631 (SCM str, SCM start, SCM end),
632 "Return a newly allocated string formed from the characters\n"
633 "of @var{str} beginning with index @var{start} (inclusive) and\n"
634 "ending with index @var{end} (exclusive).\n"
635 "@var{str} must be a string, @var{start} and @var{end} must be\n"
636 "exact integers satisfying:\n\n"
637 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
638 #define FUNC_NAME s_scm_substring
639 {
640 size_t len, from, to;
641
642 SCM_VALIDATE_STRING (1, str);
643 len = scm_i_string_length (str);
644 from = scm_to_unsigned_integer (start, 0, len);
645 if (SCM_UNBNDP (end))
646 to = len;
647 else
648 to = scm_to_unsigned_integer (end, from, len);
649 return scm_i_substring (str, from, to);
650 }
651 #undef FUNC_NAME
652
653 SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
654 (SCM str, SCM start, SCM end),
655 "Return a newly allocated string formed from the characters\n"
656 "of @var{str} beginning with index @var{start} (inclusive) and\n"
657 "ending with index @var{end} (exclusive).\n"
658 "@var{str} must be a string, @var{start} and @var{end} must be\n"
659 "exact integers satisfying:\n\n"
660 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
661 #define FUNC_NAME s_scm_substring_copy
662 {
663 size_t len, from, to;
664
665 SCM_VALIDATE_STRING (1, str);
666 len = scm_i_string_length (str);
667 from = scm_to_unsigned_integer (start, 0, len);
668 if (SCM_UNBNDP (end))
669 to = len;
670 else
671 to = scm_to_unsigned_integer (end, from, len);
672 return scm_i_substring_copy (str, from, to);
673 }
674 #undef FUNC_NAME
675
676 SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
677 (SCM str, SCM start, SCM end),
678 "Return string that indirectly refers to the characters\n"
679 "of @var{str} beginning with index @var{start} (inclusive) and\n"
680 "ending with index @var{end} (exclusive).\n"
681 "@var{str} must be a string, @var{start} and @var{end} must be\n"
682 "exact integers satisfying:\n\n"
683 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
684 #define FUNC_NAME s_scm_substring_shared
685 {
686 size_t len, from, to;
687
688 SCM_VALIDATE_STRING (1, str);
689 len = scm_i_string_length (str);
690 from = scm_to_unsigned_integer (start, 0, len);
691 if (SCM_UNBNDP (end))
692 to = len;
693 else
694 to = scm_to_unsigned_integer (end, from, len);
695 return scm_i_substring_shared (str, from, to);
696 }
697 #undef FUNC_NAME
698
699 SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
700 (SCM args),
701 "Return a newly allocated string whose characters form the\n"
702 "concatenation of the given strings, @var{args}.")
703 #define FUNC_NAME s_scm_string_append
704 {
705 SCM res;
706 size_t i = 0;
707 SCM l, s;
708 char *data;
709
710 SCM_VALIDATE_REST_ARGUMENT (args);
711 for (l = args; !SCM_NULLP (l); l = SCM_CDR (l))
712 {
713 s = SCM_CAR (l);
714 SCM_VALIDATE_STRING (SCM_ARGn, s);
715 i += scm_i_string_length (s);
716 }
717 res = scm_i_make_string (i, &data);
718 for (l = args; !SCM_NULLP (l); l = SCM_CDR (l))
719 {
720 s = SCM_CAR (l);
721 SCM_VALIDATE_STRING (SCM_ARGn, s);
722 size_t len = scm_i_string_length (s);
723 memcpy (data, scm_i_string_chars (s), len);
724 data += len;
725 scm_remember_upto_here_1 (s);
726 }
727 return res;
728 }
729 #undef FUNC_NAME
730
731 int
732 scm_is_string (SCM obj)
733 {
734 return IS_STRING (obj);
735 }
736
737 SCM
738 scm_from_locale_stringn (const char *str, size_t len)
739 {
740 SCM res;
741 char *dst;
742
743 if (len == (size_t)-1)
744 len = strlen (str);
745 res = scm_i_make_string (len, &dst);
746 memcpy (dst, str, len);
747 return res;
748 }
749
750 SCM
751 scm_from_locale_string (const char *str)
752 {
753 return scm_from_locale_stringn (str, -1);
754 }
755
756 SCM
757 scm_take_locale_stringn (char *str, size_t len)
758 {
759 if (len == (size_t)-1)
760 return scm_take_locale_string (str);
761 else
762 {
763 /* STR might not be zero terminated and we are not allowed to
764 look at str[len], so we have to make a new one...
765 */
766 SCM res = scm_from_locale_stringn (str, len);
767 free (str);
768 return res;
769 }
770 }
771
772 SCM
773 scm_take_locale_string (char *str)
774 {
775 size_t len = strlen (str);
776 SCM buf, res;
777
778 buf = scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str,
779 (scm_t_bits) len, (scm_t_bits) 0);
780 res = scm_double_cell (STRING_TAG,
781 SCM_UNPACK (buf),
782 (scm_t_bits) 0, (scm_t_bits) len);
783 scm_gc_register_collectable_memory (str, len+1, "string");
784 return res;
785 }
786
787 char *
788 scm_to_locale_stringn (SCM str, size_t *lenp)
789 {
790 char *res;
791 size_t len;
792
793 if (!scm_is_string (str))
794 scm_wrong_type_arg_msg (NULL, 0, str, "string");
795 len = scm_i_string_length (str);
796 res = scm_malloc (len + ((lenp==NULL)? 1 : 0));
797 memcpy (res, scm_i_string_chars (str), len);
798 if (lenp == NULL)
799 {
800 res[len] = '\0';
801 if (strlen (res) != len)
802 {
803 free (res);
804 scm_misc_error (NULL,
805 "string contains #\\nul character: ~S",
806 scm_list_1 (str));
807 }
808 }
809 else
810 *lenp = len;
811
812 scm_remember_upto_here_1 (str);
813 return res;
814 }
815
816 char *
817 scm_to_locale_string (SCM str)
818 {
819 return scm_to_locale_stringn (str, NULL);
820 }
821
822 size_t
823 scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
824 {
825 size_t len;
826
827 if (!scm_is_string (str))
828 scm_wrong_type_arg_msg (NULL, 0, str, "string");
829 len = scm_i_string_length (str);
830 memcpy (buf, scm_i_string_chars (str), (len > max_len)? max_len : len);
831 scm_remember_upto_here_1 (str);
832 return len;
833 }
834
835 /* converts C scm_array of strings to SCM scm_list of strings. */
836 /* If argc < 0, a null terminated scm_array is assumed. */
837 SCM
838 scm_makfromstrs (int argc, char **argv)
839 {
840 int i = argc;
841 SCM lst = SCM_EOL;
842 if (0 > i)
843 for (i = 0; argv[i]; i++);
844 while (i--)
845 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
846 return lst;
847 }
848
849 /* Return a newly allocated array of char pointers to each of the strings
850 in args, with a terminating NULL pointer. */
851
852 char **
853 scm_i_allocate_string_pointers (SCM list)
854 {
855 char **result;
856 int len = scm_ilength (list);
857 int i;
858
859 if (len < 0)
860 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
861
862 scm_frame_begin (0);
863
864 result = (char **) scm_malloc ((len + 1) * sizeof (char *));
865 result[len] = NULL;
866 scm_frame_unwind_handler (free, result, 0);
867
868 /* The list might be have been modified in another thread, so
869 we check LIST before each access.
870 */
871 for (i = 0; i < len && SCM_CONSP (list); i++)
872 {
873 result[i] = scm_to_locale_string (SCM_CAR (list));
874 list = SCM_CDR (list);
875 }
876
877 scm_frame_end ();
878 return result;
879 }
880
881 void
882 scm_i_free_string_pointers (char **pointers)
883 {
884 int i;
885
886 for (i = 0; pointers[i]; i++)
887 free (pointers[i]);
888 free (pointers);
889 }
890
891 void
892 scm_i_get_substring_spec (size_t len,
893 SCM start, size_t *cstart,
894 SCM end, size_t *cend)
895 {
896 if (SCM_UNBNDP (start))
897 *cstart = 0;
898 else
899 *cstart = scm_to_unsigned_integer (start, 0, len);
900
901 if (SCM_UNBNDP (end))
902 *cend = len;
903 else
904 *cend = scm_to_unsigned_integer (end, *cstart, len);
905 }
906
907 #if SCM_ENABLE_DEPRECATED
908
909 int
910 SCM_STRINGP (SCM str)
911 {
912 scm_c_issue_deprecation_warning
913 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
914
915 /* We don't accept shared substrings here since they are not
916 null-terminated.
917 */
918
919 return IS_STRING (str) && !IS_SH_STRING (str);
920 }
921
922 char *
923 SCM_STRING_CHARS (SCM str)
924 {
925 char *chars;
926
927 scm_c_issue_deprecation_warning
928 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
929
930 /* The following is wrong, of course...
931 */
932 chars = scm_i_string_writable_chars (str);
933 scm_i_string_stop_writing ();
934 return chars;
935 }
936
937 size_t
938 SCM_STRING_LENGTH (SCM str)
939 {
940 scm_c_issue_deprecation_warning
941 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
942 return scm_c_string_length (str);
943 }
944
945 #endif
946
947 void
948 scm_init_strings ()
949 {
950 scm_nullstr = scm_i_make_string (0, NULL);
951
952 #include "libguile/strings.x"
953 }
954
955
956 /*
957 Local Variables:
958 c-file-style: "gnu"
959 End:
960 */