1 /* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008 Free Software Foundation, Inc.
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation; either
6 * version 2.1 of the License, or (at your option) any later version.
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libguile/_scm.h"
28 #include "libguile/chars.h"
29 #include "libguile/root.h"
30 #include "libguile/strings.h"
31 #include "libguile/deprecation.h"
32 #include "libguile/validate.h"
33 #include "libguile/dynwind.h"
43 * XXX - keeping an accurate refcount during GC seems to be quite
44 * tricky, so we just keep score of whether a stringbuf might be
45 * shared, not wether it definitely is.
47 * The scheme I (mvo) tried to keep an accurate reference count would
48 * recount all strings that point to a stringbuf during the mark-phase
49 * of the GC. This was done since one cannot access the stringbuf of
50 * a string when that string is freed (in order to decrease the
51 * reference count). The memory of the stringbuf might have been
52 * reused already for something completely different.
54 * This recounted worked for a small number of threads beating on
55 * cow-strings, but it failed randomly with more than 10 threads, say.
56 * I couldn't figure out what went wrong, so I used the conservative
57 * approach implemented below.
59 * A stringbuf needs to know its length, but only so that it can be
60 * reported when the stringbuf is freed.
62 * Stringbufs (and strings) are not stored very compactly: a stringbuf
63 * has room for about 2*sizeof(scm_t_bits)-1 bytes additional
64 * information. As a compensation, the code below is made more
65 * complicated by storing small strings inline in the double cell of a
66 * stringbuf. So we have fixstrings and bigstrings...
69 #define STRINGBUF_F_SHARED 0x100
70 #define STRINGBUF_F_INLINE 0x200
72 #define STRINGBUF_TAG scm_tc7_stringbuf
73 #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
74 #define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
76 #define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
77 #define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
78 #define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
79 #define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
81 #define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
82 ? STRINGBUF_INLINE_CHARS (buf) \
83 : STRINGBUF_OUTLINE_CHARS (buf))
84 #define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
85 ? STRINGBUF_INLINE_LENGTH (buf) \
86 : STRINGBUF_OUTLINE_LENGTH (buf))
88 #define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
90 #define SET_STRINGBUF_SHARED(buf) \
91 (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
94 static size_t lenhist
[1001];
98 make_stringbuf (size_t len
)
100 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
101 scm_i_symbol_chars, all stringbufs are null-terminated. Once
102 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
103 has been changed for scm_i_symbol_chars, this null-termination
114 if (len
<= STRINGBUF_MAX_INLINE_LEN
-1)
116 return scm_double_cell (STRINGBUF_TAG
| STRINGBUF_F_INLINE
| (len
<< 16),
121 char *mem
= scm_gc_malloc_pointerless (len
+ 1, "string");
123 return scm_double_cell (STRINGBUF_TAG
, (scm_t_bits
) mem
,
124 (scm_t_bits
) len
, (scm_t_bits
) 0);
128 /* Return a new stringbuf whose underlying storage consists of the LEN+1
129 octets pointed to by STR (the last octet is zero). */
131 scm_i_take_stringbufn (char *str
, size_t len
)
133 scm_gc_register_collectable_memory (str
, len
+ 1, "stringbuf");
135 return scm_double_cell (STRINGBUF_TAG
, (scm_t_bits
) str
,
136 (scm_t_bits
) len
, (scm_t_bits
) 0);
140 scm_i_pthread_mutex_t stringbuf_write_mutex
= SCM_I_PTHREAD_MUTEX_INITIALIZER
;
142 /* Copy-on-write strings.
145 #define STRING_TAG scm_tc7_string
147 #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
148 #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
149 #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
151 #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
152 #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
154 #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
156 /* Read-only strings.
159 #define RO_STRING_TAG (scm_tc7_string + 0x200)
160 #define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
162 /* Mutation-sharing substrings
165 #define SH_STRING_TAG (scm_tc7_string + 0x100)
167 #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
168 /* START and LENGTH as for STRINGs. */
170 #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
173 scm_i_make_string (size_t len
, char **charsp
)
175 SCM buf
= make_stringbuf (len
);
178 *charsp
= STRINGBUF_CHARS (buf
);
179 res
= scm_double_cell (STRING_TAG
, SCM_UNPACK(buf
),
180 (scm_t_bits
)0, (scm_t_bits
) len
);
185 validate_substring_args (SCM str
, size_t start
, size_t end
)
187 if (!IS_STRING (str
))
188 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
189 if (start
> STRING_LENGTH (str
))
190 scm_out_of_range (NULL
, scm_from_size_t (start
));
191 if (end
> STRING_LENGTH (str
) || end
< start
)
192 scm_out_of_range (NULL
, scm_from_size_t (end
));
196 get_str_buf_start (SCM
*str
, SCM
*buf
, size_t *start
)
198 *start
= STRING_START (*str
);
199 if (IS_SH_STRING (*str
))
201 *str
= SH_STRING_STRING (*str
);
202 *start
+= STRING_START (*str
);
204 *buf
= STRING_STRINGBUF (*str
);
208 scm_i_substring (SCM str
, size_t start
, size_t end
)
212 get_str_buf_start (&str
, &buf
, &str_start
);
213 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
214 SET_STRINGBUF_SHARED (buf
);
215 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
216 return scm_double_cell (STRING_TAG
, SCM_UNPACK(buf
),
217 (scm_t_bits
)str_start
+ start
,
218 (scm_t_bits
) end
- start
);
222 scm_i_substring_read_only (SCM str
, size_t start
, size_t end
)
226 get_str_buf_start (&str
, &buf
, &str_start
);
227 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
228 SET_STRINGBUF_SHARED (buf
);
229 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
230 return scm_double_cell (RO_STRING_TAG
, SCM_UNPACK(buf
),
231 (scm_t_bits
)str_start
+ start
,
232 (scm_t_bits
) end
- start
);
236 scm_i_substring_copy (SCM str
, size_t start
, size_t end
)
238 size_t len
= end
- start
;
241 get_str_buf_start (&str
, &buf
, &str_start
);
242 my_buf
= make_stringbuf (len
);
243 memcpy (STRINGBUF_CHARS (my_buf
),
244 STRINGBUF_CHARS (buf
) + str_start
+ start
, len
);
245 scm_remember_upto_here_1 (buf
);
246 return scm_double_cell (STRING_TAG
, SCM_UNPACK(my_buf
),
247 (scm_t_bits
)0, (scm_t_bits
) len
);
251 scm_i_substring_shared (SCM str
, size_t start
, size_t end
)
253 if (start
== 0 && end
== STRING_LENGTH (str
))
257 size_t len
= end
- start
;
258 if (IS_SH_STRING (str
))
260 start
+= STRING_START (str
);
261 str
= SH_STRING_STRING (str
);
263 return scm_double_cell (SH_STRING_TAG
, SCM_UNPACK(str
),
264 (scm_t_bits
)start
, (scm_t_bits
) len
);
269 scm_c_substring (SCM str
, size_t start
, size_t end
)
271 validate_substring_args (str
, start
, end
);
272 return scm_i_substring (str
, start
, end
);
276 scm_c_substring_read_only (SCM str
, size_t start
, size_t end
)
278 validate_substring_args (str
, start
, end
);
279 return scm_i_substring_read_only (str
, start
, end
);
283 scm_c_substring_copy (SCM str
, size_t start
, size_t end
)
285 validate_substring_args (str
, start
, end
);
286 return scm_i_substring_copy (str
, start
, end
);
290 scm_c_substring_shared (SCM str
, size_t start
, size_t end
)
292 validate_substring_args (str
, start
, end
);
293 return scm_i_substring_shared (str
, start
, end
);
297 /* Internal accessors
301 scm_i_string_length (SCM str
)
303 return STRING_LENGTH (str
);
307 scm_i_string_chars (SCM str
)
311 get_str_buf_start (&str
, &buf
, &start
);
312 return STRINGBUF_CHARS (buf
) + start
;
316 scm_i_string_writable_chars (SCM orig_str
)
318 SCM buf
, str
= orig_str
;
321 get_str_buf_start (&str
, &buf
, &start
);
322 if (IS_RO_STRING (str
))
323 scm_misc_error (NULL
, "string is read-only: ~s", scm_list_1 (orig_str
));
325 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
326 if (STRINGBUF_SHARED (buf
))
328 /* Clone stringbuf. */
330 size_t len
= STRING_LENGTH (str
);
333 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
335 new_buf
= make_stringbuf (len
);
336 memcpy (STRINGBUF_CHARS (new_buf
),
337 STRINGBUF_CHARS (buf
) + STRING_START (str
), len
);
339 start
-= STRING_START (str
);
341 /* FIXME: The following operations are not atomic, so other threads
342 looking at STR may see an inconsistent state. Nevertheless it can't
343 hurt much since (i) accessing STR while it is being mutated can't
344 yield a crash, and (ii) concurrent accesses to STR should be
345 protected by a mutex at the application level. The latter may not
346 apply when STR != ORIG_STR, though. */
347 SET_STRING_START (str
, 0);
348 SET_STRING_STRINGBUF (str
, new_buf
);
352 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
355 return STRINGBUF_CHARS (buf
) + start
;
359 scm_i_string_stop_writing (void)
361 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
366 Basic symbol creation and accessing is done here, the rest is in
367 symbols.[hc]. This has been done to keep stringbufs and the
368 internals of strings and string-like objects confined to this file.
371 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
374 scm_i_make_symbol (SCM name
, scm_t_bits flags
,
375 unsigned long hash
, SCM props
)
378 size_t start
= STRING_START (name
);
379 size_t length
= STRING_LENGTH (name
);
381 if (IS_SH_STRING (name
))
383 name
= SH_STRING_STRING (name
);
384 start
+= STRING_START (name
);
386 buf
= SYMBOL_STRINGBUF (name
);
388 if (start
== 0 && length
== STRINGBUF_LENGTH (buf
))
391 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
392 SET_STRINGBUF_SHARED (buf
);
393 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
398 SCM new_buf
= make_stringbuf (length
);
399 memcpy (STRINGBUF_CHARS (new_buf
),
400 STRINGBUF_CHARS (buf
) + start
, length
);
403 return scm_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
404 (scm_t_bits
) hash
, SCM_UNPACK (props
));
408 scm_i_c_make_symbol (const char *name
, size_t len
,
409 scm_t_bits flags
, unsigned long hash
, SCM props
)
411 SCM buf
= make_stringbuf (len
);
412 memcpy (STRINGBUF_CHARS (buf
), name
, len
);
414 return scm_immutable_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
415 (scm_t_bits
) hash
, SCM_UNPACK (props
));
418 /* Return a new symbol that uses the LEN bytes pointed to by NAME as its
419 underlying storage. */
421 scm_i_c_take_symbol (char *name
, size_t len
,
422 scm_t_bits flags
, unsigned long hash
, SCM props
)
424 SCM buf
= scm_i_take_stringbufn (name
, len
);
426 return scm_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
427 (scm_t_bits
) hash
, SCM_UNPACK (props
));
431 scm_i_symbol_length (SCM sym
)
433 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym
));
437 scm_c_symbol_length (SCM sym
)
438 #define FUNC_NAME "scm_c_symbol_length"
440 SCM_VALIDATE_SYMBOL (1, sym
);
442 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym
));
447 scm_i_symbol_chars (SCM sym
)
449 SCM buf
= SYMBOL_STRINGBUF (sym
);
450 return STRINGBUF_CHARS (buf
);
454 scm_i_symbol_substring (SCM sym
, size_t start
, size_t end
)
456 SCM buf
= SYMBOL_STRINGBUF (sym
);
457 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
458 SET_STRINGBUF_SHARED (buf
);
459 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
460 return scm_double_cell (RO_STRING_TAG
, SCM_UNPACK (buf
),
461 (scm_t_bits
)start
, (scm_t_bits
) end
- start
);
469 SCM
scm_sys_string_dump (SCM
);
470 SCM
scm_sys_symbol_dump (SCM
);
471 SCM
scm_sys_stringbuf_hist (void);
473 SCM_DEFINE (scm_sys_string_dump
, "%string-dump", 1, 0, 0,
476 #define FUNC_NAME s_scm_sys_string_dump
478 SCM_VALIDATE_STRING (1, str
);
479 fprintf (stderr
, "%p:\n", str
);
480 fprintf (stderr
, " start: %u\n", STRING_START (str
));
481 fprintf (stderr
, " len: %u\n", STRING_LENGTH (str
));
482 if (IS_SH_STRING (str
))
484 fprintf (stderr
, " string: %p\n", SH_STRING_STRING (str
));
485 fprintf (stderr
, "\n");
486 scm_sys_string_dump (SH_STRING_STRING (str
));
490 SCM buf
= STRING_STRINGBUF (str
);
491 fprintf (stderr
, " buf: %p\n", buf
);
492 fprintf (stderr
, " chars: %p\n", STRINGBUF_CHARS (buf
));
493 fprintf (stderr
, " length: %u\n", STRINGBUF_LENGTH (buf
));
494 fprintf (stderr
, " flags: %x\n", (SCM_CELL_WORD_0 (buf
) & 0x300));
496 return SCM_UNSPECIFIED
;
500 SCM_DEFINE (scm_sys_symbol_dump
, "%symbol-dump", 1, 0, 0,
503 #define FUNC_NAME s_scm_sys_symbol_dump
505 SCM_VALIDATE_SYMBOL (1, sym
);
506 fprintf (stderr
, "%p:\n", sym
);
507 fprintf (stderr
, " hash: %lu\n", scm_i_symbol_hash (sym
));
509 SCM buf
= SYMBOL_STRINGBUF (sym
);
510 fprintf (stderr
, " buf: %p\n", buf
);
511 fprintf (stderr
, " chars: %p\n", STRINGBUF_CHARS (buf
));
512 fprintf (stderr
, " length: %u\n", STRINGBUF_LENGTH (buf
));
513 fprintf (stderr
, " shared: %u\n", STRINGBUF_SHARED (buf
));
515 return SCM_UNSPECIFIED
;
519 SCM_DEFINE (scm_sys_stringbuf_hist
, "%stringbuf-hist", 0, 0, 0,
522 #define FUNC_NAME s_scm_sys_stringbuf_hist
525 for (i
= 0; i
< 1000; i
++)
527 fprintf (stderr
, " %3d: %u\n", i
, lenhist
[i
]);
528 fprintf (stderr
, ">999: %u\n", lenhist
[1000]);
529 return SCM_UNSPECIFIED
;
537 SCM_DEFINE (scm_string_p
, "string?", 1, 0, 0,
539 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
540 #define FUNC_NAME s_scm_string_p
542 return scm_from_bool (IS_STRING (obj
));
547 SCM_REGISTER_PROC (s_scm_list_to_string
, "list->string", 1, 0, 0, scm_string
);
549 SCM_DEFINE (scm_string
, "string", 0, 0, 1,
551 "@deffnx {Scheme Procedure} list->string chrs\n"
552 "Return a newly allocated string composed of the arguments,\n"
554 #define FUNC_NAME s_scm_string
561 long i
= scm_ilength (chrs
);
563 SCM_ASSERT (i
>= 0, chrs
, SCM_ARG1
, FUNC_NAME
);
567 result
= scm_i_make_string (len
, &data
);
568 while (len
> 0 && scm_is_pair (chrs
))
570 SCM elt
= SCM_CAR (chrs
);
572 SCM_VALIDATE_CHAR (SCM_ARGn
, elt
);
573 *data
++ = SCM_CHAR (elt
);
574 chrs
= SCM_CDR (chrs
);
578 scm_misc_error (NULL
, "list changed while constructing string", SCM_EOL
);
579 if (!scm_is_null (chrs
))
580 scm_wrong_type_arg_msg (NULL
, 0, chrs
, "proper list");
586 SCM_DEFINE (scm_make_string
, "make-string", 1, 1, 0,
588 "Return a newly allocated string of\n"
589 "length @var{k}. If @var{chr} is given, then all elements of\n"
590 "the string are initialized to @var{chr}, otherwise the contents\n"
591 "of the @var{string} are unspecified.")
592 #define FUNC_NAME s_scm_make_string
594 return scm_c_make_string (scm_to_size_t (k
), chr
);
599 scm_c_make_string (size_t len
, SCM chr
)
600 #define FUNC_NAME NULL
603 SCM res
= scm_i_make_string (len
, &dst
);
605 if (!SCM_UNBNDP (chr
))
607 SCM_VALIDATE_CHAR (0, chr
);
608 memset (dst
, SCM_CHAR (chr
), len
);
615 SCM_DEFINE (scm_string_length
, "string-length", 1, 0, 0,
617 "Return the number of characters in @var{string}.")
618 #define FUNC_NAME s_scm_string_length
620 SCM_VALIDATE_STRING (1, string
);
621 return scm_from_size_t (STRING_LENGTH (string
));
626 scm_c_string_length (SCM string
)
628 if (!IS_STRING (string
))
629 scm_wrong_type_arg_msg (NULL
, 0, string
, "string");
630 return STRING_LENGTH (string
);
633 SCM_DEFINE (scm_string_ref
, "string-ref", 2, 0, 0,
635 "Return character @var{k} of @var{str} using zero-origin\n"
636 "indexing. @var{k} must be a valid index of @var{str}.")
637 #define FUNC_NAME s_scm_string_ref
642 SCM_VALIDATE_STRING (1, str
);
644 len
= scm_i_string_length (str
);
645 if (SCM_LIKELY (len
> 0))
646 idx
= scm_to_unsigned_integer (k
, 0, len
- 1);
648 scm_out_of_range (NULL
, k
);
650 return SCM_MAKE_CHAR (scm_i_string_chars (str
)[idx
]);
655 scm_c_string_ref (SCM str
, size_t p
)
657 if (p
>= scm_i_string_length (str
))
658 scm_out_of_range (NULL
, scm_from_size_t (p
));
659 return SCM_MAKE_CHAR (scm_i_string_chars (str
)[p
]);
662 SCM_DEFINE (scm_string_set_x
, "string-set!", 3, 0, 0,
663 (SCM str
, SCM k
, SCM chr
),
664 "Store @var{chr} in element @var{k} of @var{str} and return\n"
665 "an unspecified value. @var{k} must be a valid index of\n"
667 #define FUNC_NAME s_scm_string_set_x
672 SCM_VALIDATE_STRING (1, str
);
674 len
= scm_i_string_length (str
);
675 if (SCM_LIKELY (len
> 0))
676 idx
= scm_to_unsigned_integer (k
, 0, len
- 1);
678 scm_out_of_range (NULL
, k
);
680 SCM_VALIDATE_CHAR (3, chr
);
682 char *dst
= scm_i_string_writable_chars (str
);
683 dst
[idx
] = SCM_CHAR (chr
);
684 scm_i_string_stop_writing ();
686 return SCM_UNSPECIFIED
;
691 scm_c_string_set_x (SCM str
, size_t p
, SCM chr
)
693 if (p
>= scm_i_string_length (str
))
694 scm_out_of_range (NULL
, scm_from_size_t (p
));
696 char *dst
= scm_i_string_writable_chars (str
);
697 dst
[p
] = SCM_CHAR (chr
);
698 scm_i_string_stop_writing ();
702 SCM_DEFINE (scm_substring
, "substring", 2, 1, 0,
703 (SCM str
, SCM start
, SCM end
),
704 "Return a newly allocated string formed from the characters\n"
705 "of @var{str} beginning with index @var{start} (inclusive) and\n"
706 "ending with index @var{end} (exclusive).\n"
707 "@var{str} must be a string, @var{start} and @var{end} must be\n"
708 "exact integers satisfying:\n\n"
709 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
710 #define FUNC_NAME s_scm_substring
712 size_t len
, from
, to
;
714 SCM_VALIDATE_STRING (1, str
);
715 len
= scm_i_string_length (str
);
716 from
= scm_to_unsigned_integer (start
, 0, len
);
717 if (SCM_UNBNDP (end
))
720 to
= scm_to_unsigned_integer (end
, from
, len
);
721 return scm_i_substring (str
, from
, to
);
725 SCM_DEFINE (scm_substring_read_only
, "substring/read-only", 2, 1, 0,
726 (SCM str
, SCM start
, SCM end
),
727 "Return a newly allocated string formed from the characters\n"
728 "of @var{str} beginning with index @var{start} (inclusive) and\n"
729 "ending with index @var{end} (exclusive).\n"
730 "@var{str} must be a string, @var{start} and @var{end} must be\n"
731 "exact integers satisfying:\n"
733 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
735 "The returned string is read-only.\n")
736 #define FUNC_NAME s_scm_substring_read_only
738 size_t len
, from
, to
;
740 SCM_VALIDATE_STRING (1, str
);
741 len
= scm_i_string_length (str
);
742 from
= scm_to_unsigned_integer (start
, 0, len
);
743 if (SCM_UNBNDP (end
))
746 to
= scm_to_unsigned_integer (end
, from
, len
);
747 return scm_i_substring_read_only (str
, from
, to
);
751 SCM_DEFINE (scm_substring_copy
, "substring/copy", 2, 1, 0,
752 (SCM str
, SCM start
, SCM end
),
753 "Return a newly allocated string formed from the characters\n"
754 "of @var{str} beginning with index @var{start} (inclusive) and\n"
755 "ending with index @var{end} (exclusive).\n"
756 "@var{str} must be a string, @var{start} and @var{end} must be\n"
757 "exact integers satisfying:\n\n"
758 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
759 #define FUNC_NAME s_scm_substring_copy
761 /* For the Scheme version, START is mandatory, but for the C
762 version, it is optional. See scm_string_copy in srfi-13.c for a
768 SCM_VALIDATE_STRING (1, str
);
769 scm_i_get_substring_spec (scm_i_string_length (str
),
770 start
, &from
, end
, &to
);
771 return scm_i_substring_copy (str
, from
, to
);
775 SCM_DEFINE (scm_substring_shared
, "substring/shared", 2, 1, 0,
776 (SCM str
, SCM start
, SCM end
),
777 "Return string that indirectly refers to the characters\n"
778 "of @var{str} beginning with index @var{start} (inclusive) and\n"
779 "ending with index @var{end} (exclusive).\n"
780 "@var{str} must be a string, @var{start} and @var{end} must be\n"
781 "exact integers satisfying:\n\n"
782 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
783 #define FUNC_NAME s_scm_substring_shared
785 size_t len
, from
, to
;
787 SCM_VALIDATE_STRING (1, str
);
788 len
= scm_i_string_length (str
);
789 from
= scm_to_unsigned_integer (start
, 0, len
);
790 if (SCM_UNBNDP (end
))
793 to
= scm_to_unsigned_integer (end
, from
, len
);
794 return scm_i_substring_shared (str
, from
, to
);
798 SCM_DEFINE (scm_string_append
, "string-append", 0, 0, 1,
800 "Return a newly allocated string whose characters form the\n"
801 "concatenation of the given strings, @var{args}.")
802 #define FUNC_NAME s_scm_string_append
809 SCM_VALIDATE_REST_ARGUMENT (args
);
810 for (l
= args
; !scm_is_null (l
); l
= SCM_CDR (l
))
813 SCM_VALIDATE_STRING (SCM_ARGn
, s
);
814 i
+= scm_i_string_length (s
);
816 res
= scm_i_make_string (i
, &data
);
817 for (l
= args
; !scm_is_null (l
); l
= SCM_CDR (l
))
821 SCM_VALIDATE_STRING (SCM_ARGn
, s
);
822 len
= scm_i_string_length (s
);
823 memcpy (data
, scm_i_string_chars (s
), len
);
825 scm_remember_upto_here_1 (s
);
832 scm_is_string (SCM obj
)
834 return IS_STRING (obj
);
838 scm_from_locale_stringn (const char *str
, size_t len
)
843 if (len
== (size_t)-1)
845 res
= scm_i_make_string (len
, &dst
);
846 memcpy (dst
, str
, len
);
851 scm_from_locale_string (const char *str
)
853 return scm_from_locale_stringn (str
, -1);
857 scm_take_locale_stringn (char *str
, size_t len
)
861 if (len
== (size_t)-1)
865 /* Ensure STR is null terminated. A realloc for 1 extra byte should
866 often be satisfied from the alignment padding after the block, with
867 no actual data movement. */
868 str
= scm_realloc (str
, len
+1);
872 buf
= scm_i_take_stringbufn (str
, len
);
873 res
= scm_double_cell (STRING_TAG
,
875 (scm_t_bits
) 0, (scm_t_bits
) len
);
880 scm_take_locale_string (char *str
)
882 return scm_take_locale_stringn (str
, -1);
886 scm_to_locale_stringn (SCM str
, size_t *lenp
)
891 if (!scm_is_string (str
))
892 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
893 len
= scm_i_string_length (str
);
894 res
= scm_malloc (len
+ ((lenp
==NULL
)? 1 : 0));
895 memcpy (res
, scm_i_string_chars (str
), len
);
899 if (strlen (res
) != len
)
902 scm_misc_error (NULL
,
903 "string contains #\\nul character: ~S",
910 scm_remember_upto_here_1 (str
);
915 scm_to_locale_string (SCM str
)
917 return scm_to_locale_stringn (str
, NULL
);
921 scm_to_locale_stringbuf (SCM str
, char *buf
, size_t max_len
)
925 if (!scm_is_string (str
))
926 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
927 len
= scm_i_string_length (str
);
928 memcpy (buf
, scm_i_string_chars (str
), (len
> max_len
)? max_len
: len
);
929 scm_remember_upto_here_1 (str
);
933 /* converts C scm_array of strings to SCM scm_list of strings. */
934 /* If argc < 0, a null terminated scm_array is assumed. */
936 scm_makfromstrs (int argc
, char **argv
)
941 for (i
= 0; argv
[i
]; i
++);
943 lst
= scm_cons (scm_from_locale_string (argv
[i
]), lst
);
947 /* Return a newly allocated array of char pointers to each of the strings
948 in args, with a terminating NULL pointer. */
951 scm_i_allocate_string_pointers (SCM list
)
954 int len
= scm_ilength (list
);
958 scm_wrong_type_arg_msg (NULL
, 0, list
, "proper list");
960 scm_dynwind_begin (0);
962 result
= (char **) scm_malloc ((len
+ 1) * sizeof (char *));
964 scm_dynwind_unwind_handler (free
, result
, 0);
966 /* The list might be have been modified in another thread, so
967 we check LIST before each access.
969 for (i
= 0; i
< len
&& scm_is_pair (list
); i
++)
971 result
[i
] = scm_to_locale_string (SCM_CAR (list
));
972 list
= SCM_CDR (list
);
980 scm_i_free_string_pointers (char **pointers
)
984 for (i
= 0; pointers
[i
]; i
++)
990 scm_i_get_substring_spec (size_t len
,
991 SCM start
, size_t *cstart
,
992 SCM end
, size_t *cend
)
994 if (SCM_UNBNDP (start
))
997 *cstart
= scm_to_unsigned_integer (start
, 0, len
);
999 if (SCM_UNBNDP (end
))
1002 *cend
= scm_to_unsigned_integer (end
, *cstart
, len
);
1005 #if SCM_ENABLE_DEPRECATED
1007 /* When these definitions are removed, it becomes reasonable to use
1008 read-only strings for string literals. For that, change the reader
1009 to create string literals with scm_c_substring_read_only instead of
1010 with scm_c_substring_copy.
1014 scm_i_deprecated_stringp (SCM str
)
1016 scm_c_issue_deprecation_warning
1017 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1019 return scm_is_string (str
);
1023 scm_i_deprecated_string_chars (SCM str
)
1027 scm_c_issue_deprecation_warning
1028 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1030 /* We don't accept shared substrings here since they are not
1033 if (IS_SH_STRING (str
))
1034 scm_misc_error (NULL
,
1035 "SCM_STRING_CHARS does not work with shared substrings.",
1038 /* We explicitely test for read-only strings to produce a better
1042 if (IS_RO_STRING (str
))
1043 scm_misc_error (NULL
,
1044 "SCM_STRING_CHARS does not work with read-only strings.",
1047 /* The following is still wrong, of course...
1049 chars
= scm_i_string_writable_chars (str
);
1050 scm_i_string_stop_writing ();
1055 scm_i_deprecated_string_length (SCM str
)
1057 scm_c_issue_deprecation_warning
1058 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1059 return scm_c_string_length (str
);
1067 scm_nullstr
= scm_i_make_string (0, NULL
);
1069 #include "libguile/strings.x"