1 /* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation; either
6 * version 2.1 of the License, or (at your option) any later version.
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libguile/_scm.h"
28 #include "libguile/chars.h"
29 #include "libguile/root.h"
30 #include "libguile/strings.h"
31 #include "libguile/deprecation.h"
32 #include "libguile/validate.h"
42 * XXX - keeping an accurate refcount during GC seems to be quite
43 * tricky, so we just keep score of whether a stringbuf might be
44 * shared, not wether it definitely is.
46 * The scheme I (mvo) tried to keep an accurate reference count would
47 * recount all strings that point to a stringbuf during the mark-phase
48 * of the GC. This was done since one cannot access the stringbuf of
49 * a string when that string is freed (in order to decrease the
50 * reference count). The memory of the stringbuf might have been
51 * reused already for something completely different.
53 * This recounted worked for a small number of threads beating on
54 * cow-strings, but it failed randomly with more than 10 threads, say.
55 * I couldn't figure out what went wrong, so I used the conservative
56 * approach implemented below.
58 * A stringbuf needs to know its length, but only so that it can be
59 * reported when the stringbuf is freed.
61 * Stringbufs (and strings) are not stored very compactly: a stringbuf
62 * has room for about 2*sizeof(scm_t_bits)-1 bytes additional
63 * information. As a compensation, the code below is made more
64 * complicated by storing small strings inline in the double cell of a
65 * stringbuf. So we have fixstrings and bigstrings...
68 #define STRINGBUF_F_SHARED SCM_I_STRINGBUF_F_SHARED
69 #define STRINGBUF_F_INLINE SCM_I_STRINGBUF_F_INLINE
71 #define STRINGBUF_TAG scm_tc7_stringbuf
72 #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
73 #define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
75 #define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
76 #define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
77 #define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
78 #define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
80 #define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
81 ? STRINGBUF_INLINE_CHARS (buf) \
82 : STRINGBUF_OUTLINE_CHARS (buf))
83 #define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
84 ? STRINGBUF_INLINE_LENGTH (buf) \
85 : STRINGBUF_OUTLINE_LENGTH (buf))
87 #define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
89 #define SET_STRINGBUF_SHARED(buf) \
92 /* Don't modify BUF if it's already marked as shared since it might be \
93 a read-only, statically allocated stringbuf. */ \
94 if (SCM_LIKELY (!STRINGBUF_SHARED (buf))) \
95 SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
100 static size_t lenhist
[1001];
104 make_stringbuf (size_t len
)
106 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
107 scm_i_symbol_chars, all stringbufs are null-terminated. Once
108 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
109 has been changed for scm_i_symbol_chars, this null-termination
120 if (len
<= STRINGBUF_MAX_INLINE_LEN
-1)
122 return scm_double_cell (STRINGBUF_TAG
| STRINGBUF_F_INLINE
| (len
<< 16),
127 char *mem
= scm_gc_malloc_pointerless (len
+ 1, "string");
129 return scm_double_cell (STRINGBUF_TAG
, (scm_t_bits
) mem
,
130 (scm_t_bits
) len
, (scm_t_bits
) 0);
134 /* Return a new stringbuf whose underlying storage consists of the LEN+1
135 octets pointed to by STR (the last octet is zero). */
137 scm_i_take_stringbufn (char *str
, size_t len
)
139 scm_gc_register_collectable_memory (str
, len
+ 1, "stringbuf");
141 return scm_double_cell (STRINGBUF_TAG
, (scm_t_bits
) str
,
142 (scm_t_bits
) len
, (scm_t_bits
) 0);
146 scm_i_pthread_mutex_t stringbuf_write_mutex
= SCM_I_PTHREAD_MUTEX_INITIALIZER
;
148 /* Copy-on-write strings.
151 #define STRING_TAG scm_tc7_string
153 #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
154 #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
155 #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
157 #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
158 #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
160 #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
162 /* Read-only strings.
165 #define RO_STRING_TAG scm_tc7_ro_string
166 #define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
168 /* Mutation-sharing substrings
171 #define SH_STRING_TAG (scm_tc7_string + 0x100)
173 #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
174 /* START and LENGTH as for STRINGs. */
176 #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
179 scm_i_make_string (size_t len
, char **charsp
)
181 SCM buf
= make_stringbuf (len
);
184 *charsp
= STRINGBUF_CHARS (buf
);
185 res
= scm_double_cell (STRING_TAG
, SCM_UNPACK(buf
),
186 (scm_t_bits
)0, (scm_t_bits
) len
);
191 validate_substring_args (SCM str
, size_t start
, size_t end
)
193 if (!IS_STRING (str
))
194 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
195 if (start
> STRING_LENGTH (str
))
196 scm_out_of_range (NULL
, scm_from_size_t (start
));
197 if (end
> STRING_LENGTH (str
) || end
< start
)
198 scm_out_of_range (NULL
, scm_from_size_t (end
));
202 get_str_buf_start (SCM
*str
, SCM
*buf
, size_t *start
)
204 *start
= STRING_START (*str
);
205 if (IS_SH_STRING (*str
))
207 *str
= SH_STRING_STRING (*str
);
208 *start
+= STRING_START (*str
);
210 *buf
= STRING_STRINGBUF (*str
);
214 scm_i_substring (SCM str
, size_t start
, size_t end
)
218 get_str_buf_start (&str
, &buf
, &str_start
);
219 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
220 SET_STRINGBUF_SHARED (buf
);
221 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
222 return scm_double_cell (STRING_TAG
, SCM_UNPACK(buf
),
223 (scm_t_bits
)str_start
+ start
,
224 (scm_t_bits
) end
- start
);
228 scm_i_substring_read_only (SCM str
, size_t start
, size_t end
)
232 get_str_buf_start (&str
, &buf
, &str_start
);
233 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
234 SET_STRINGBUF_SHARED (buf
);
235 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
236 return scm_double_cell (RO_STRING_TAG
, SCM_UNPACK(buf
),
237 (scm_t_bits
)str_start
+ start
,
238 (scm_t_bits
) end
- start
);
242 scm_i_substring_copy (SCM str
, size_t start
, size_t end
)
244 size_t len
= end
- start
;
247 get_str_buf_start (&str
, &buf
, &str_start
);
248 my_buf
= make_stringbuf (len
);
249 memcpy (STRINGBUF_CHARS (my_buf
),
250 STRINGBUF_CHARS (buf
) + str_start
+ start
, len
);
251 scm_remember_upto_here_1 (buf
);
252 return scm_double_cell (STRING_TAG
, SCM_UNPACK(my_buf
),
253 (scm_t_bits
)0, (scm_t_bits
) len
);
257 scm_i_substring_shared (SCM str
, size_t start
, size_t end
)
259 if (start
== 0 && end
== STRING_LENGTH (str
))
263 size_t len
= end
- start
;
264 if (IS_SH_STRING (str
))
266 start
+= STRING_START (str
);
267 str
= SH_STRING_STRING (str
);
269 return scm_double_cell (SH_STRING_TAG
, SCM_UNPACK(str
),
270 (scm_t_bits
)start
, (scm_t_bits
) len
);
275 scm_c_substring (SCM str
, size_t start
, size_t end
)
277 validate_substring_args (str
, start
, end
);
278 return scm_i_substring (str
, start
, end
);
282 scm_c_substring_read_only (SCM str
, size_t start
, size_t end
)
284 validate_substring_args (str
, start
, end
);
285 return scm_i_substring_read_only (str
, start
, end
);
289 scm_c_substring_copy (SCM str
, size_t start
, size_t end
)
291 validate_substring_args (str
, start
, end
);
292 return scm_i_substring_copy (str
, start
, end
);
296 scm_c_substring_shared (SCM str
, size_t start
, size_t end
)
298 validate_substring_args (str
, start
, end
);
299 return scm_i_substring_shared (str
, start
, end
);
303 /* Internal accessors
307 scm_i_string_length (SCM str
)
309 return STRING_LENGTH (str
);
313 scm_i_string_chars (SCM str
)
317 get_str_buf_start (&str
, &buf
, &start
);
318 return STRINGBUF_CHARS (buf
) + start
;
322 scm_i_string_writable_chars (SCM orig_str
)
324 SCM buf
, str
= orig_str
;
327 get_str_buf_start (&str
, &buf
, &start
);
328 if (IS_RO_STRING (str
))
329 scm_misc_error (NULL
, "string is read-only: ~s", scm_list_1 (orig_str
));
331 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
332 if (STRINGBUF_SHARED (buf
))
334 /* Clone stringbuf. */
336 size_t len
= STRING_LENGTH (str
);
339 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
341 new_buf
= make_stringbuf (len
);
342 memcpy (STRINGBUF_CHARS (new_buf
),
343 STRINGBUF_CHARS (buf
) + STRING_START (str
), len
);
345 start
-= STRING_START (str
);
347 /* FIXME: The following operations are not atomic, so other threads
348 looking at STR may see an inconsistent state. Nevertheless it can't
349 hurt much since (i) accessing STR while it is being mutated can't
350 yield a crash, and (ii) concurrent accesses to STR should be
351 protected by a mutex at the application level. The latter may not
352 apply when STR != ORIG_STR, though. */
353 SET_STRING_START (str
, 0);
354 SET_STRING_STRINGBUF (str
, new_buf
);
358 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
361 return STRINGBUF_CHARS (buf
) + start
;
365 scm_i_string_stop_writing (void)
367 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
372 Basic symbol creation and accessing is done here, the rest is in
373 symbols.[hc]. This has been done to keep stringbufs and the
374 internals of strings and string-like objects confined to this file.
377 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
380 scm_i_make_symbol (SCM name
, scm_t_bits flags
,
381 unsigned long hash
, SCM props
)
384 size_t start
= STRING_START (name
);
385 size_t length
= STRING_LENGTH (name
);
387 if (IS_SH_STRING (name
))
389 name
= SH_STRING_STRING (name
);
390 start
+= STRING_START (name
);
392 buf
= SYMBOL_STRINGBUF (name
);
394 if (start
== 0 && length
== STRINGBUF_LENGTH (buf
))
397 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
398 SET_STRINGBUF_SHARED (buf
);
399 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
404 SCM new_buf
= make_stringbuf (length
);
405 memcpy (STRINGBUF_CHARS (new_buf
),
406 STRINGBUF_CHARS (buf
) + start
, length
);
409 return scm_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
410 (scm_t_bits
) hash
, SCM_UNPACK (props
));
414 scm_i_c_make_symbol (const char *name
, size_t len
,
415 scm_t_bits flags
, unsigned long hash
, SCM props
)
417 SCM buf
= make_stringbuf (len
);
418 memcpy (STRINGBUF_CHARS (buf
), name
, len
);
420 return scm_immutable_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
421 (scm_t_bits
) hash
, SCM_UNPACK (props
));
424 /* Return a new symbol that uses the LEN bytes pointed to by NAME as its
425 underlying storage. */
427 scm_i_c_take_symbol (char *name
, size_t len
,
428 scm_t_bits flags
, unsigned long hash
, SCM props
)
430 SCM buf
= scm_i_take_stringbufn (name
, len
);
432 return scm_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
433 (scm_t_bits
) hash
, SCM_UNPACK (props
));
437 scm_i_symbol_length (SCM sym
)
439 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym
));
443 scm_c_symbol_length (SCM sym
)
444 #define FUNC_NAME "scm_c_symbol_length"
446 SCM_VALIDATE_SYMBOL (1, sym
);
448 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym
));
453 scm_i_symbol_chars (SCM sym
)
455 SCM buf
= SYMBOL_STRINGBUF (sym
);
456 return STRINGBUF_CHARS (buf
);
460 scm_i_symbol_substring (SCM sym
, size_t start
, size_t end
)
462 SCM buf
= SYMBOL_STRINGBUF (sym
);
463 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
464 SET_STRINGBUF_SHARED (buf
);
465 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
466 return scm_double_cell (RO_STRING_TAG
, SCM_UNPACK (buf
),
467 (scm_t_bits
)start
, (scm_t_bits
) end
- start
);
475 SCM
scm_sys_string_dump (SCM
);
476 SCM
scm_sys_symbol_dump (SCM
);
477 SCM
scm_sys_stringbuf_hist (void);
479 SCM_DEFINE (scm_sys_string_dump
, "%string-dump", 1, 0, 0,
482 #define FUNC_NAME s_scm_sys_string_dump
484 SCM_VALIDATE_STRING (1, str
);
485 fprintf (stderr
, "%p:\n", str
);
486 fprintf (stderr
, " start: %u\n", STRING_START (str
));
487 fprintf (stderr
, " len: %u\n", STRING_LENGTH (str
));
488 if (IS_SH_STRING (str
))
490 fprintf (stderr
, " string: %p\n", SH_STRING_STRING (str
));
491 fprintf (stderr
, "\n");
492 scm_sys_string_dump (SH_STRING_STRING (str
));
496 SCM buf
= STRING_STRINGBUF (str
);
497 fprintf (stderr
, " buf: %p\n", buf
);
498 fprintf (stderr
, " chars: %p\n", STRINGBUF_CHARS (buf
));
499 fprintf (stderr
, " length: %u\n", STRINGBUF_LENGTH (buf
));
500 fprintf (stderr
, " flags: %x\n", (SCM_CELL_WORD_0 (buf
) & 0x300));
502 return SCM_UNSPECIFIED
;
506 SCM_DEFINE (scm_sys_symbol_dump
, "%symbol-dump", 1, 0, 0,
509 #define FUNC_NAME s_scm_sys_symbol_dump
511 SCM_VALIDATE_SYMBOL (1, sym
);
512 fprintf (stderr
, "%p:\n", sym
);
513 fprintf (stderr
, " hash: %lu\n", scm_i_symbol_hash (sym
));
515 SCM buf
= SYMBOL_STRINGBUF (sym
);
516 fprintf (stderr
, " buf: %p\n", buf
);
517 fprintf (stderr
, " chars: %p\n", STRINGBUF_CHARS (buf
));
518 fprintf (stderr
, " length: %u\n", STRINGBUF_LENGTH (buf
));
519 fprintf (stderr
, " shared: %u\n", STRINGBUF_SHARED (buf
));
521 return SCM_UNSPECIFIED
;
525 SCM_DEFINE (scm_sys_stringbuf_hist
, "%stringbuf-hist", 0, 0, 0,
528 #define FUNC_NAME s_scm_sys_stringbuf_hist
531 for (i
= 0; i
< 1000; i
++)
533 fprintf (stderr
, " %3d: %u\n", i
, lenhist
[i
]);
534 fprintf (stderr
, ">999: %u\n", lenhist
[1000]);
535 return SCM_UNSPECIFIED
;
543 SCM_DEFINE (scm_string_p
, "string?", 1, 0, 0,
545 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
546 #define FUNC_NAME s_scm_string_p
548 return scm_from_bool (IS_STRING (obj
));
553 SCM_REGISTER_PROC (s_scm_list_to_string
, "list->string", 1, 0, 0, scm_string
);
555 SCM_DEFINE (scm_string
, "string", 0, 0, 1,
557 "@deffnx {Scheme Procedure} list->string chrs\n"
558 "Return a newly allocated string composed of the arguments,\n"
560 #define FUNC_NAME s_scm_string
567 long i
= scm_ilength (chrs
);
569 SCM_ASSERT (i
>= 0, chrs
, SCM_ARG1
, FUNC_NAME
);
573 result
= scm_i_make_string (len
, &data
);
574 while (len
> 0 && scm_is_pair (chrs
))
576 SCM elt
= SCM_CAR (chrs
);
578 SCM_VALIDATE_CHAR (SCM_ARGn
, elt
);
579 *data
++ = SCM_CHAR (elt
);
580 chrs
= SCM_CDR (chrs
);
584 scm_misc_error (NULL
, "list changed while constructing string", SCM_EOL
);
585 if (!scm_is_null (chrs
))
586 scm_wrong_type_arg_msg (NULL
, 0, chrs
, "proper list");
592 SCM_DEFINE (scm_make_string
, "make-string", 1, 1, 0,
594 "Return a newly allocated string of\n"
595 "length @var{k}. If @var{chr} is given, then all elements of\n"
596 "the string are initialized to @var{chr}, otherwise the contents\n"
597 "of the @var{string} are unspecified.")
598 #define FUNC_NAME s_scm_make_string
600 return scm_c_make_string (scm_to_size_t (k
), chr
);
605 scm_c_make_string (size_t len
, SCM chr
)
606 #define FUNC_NAME NULL
609 SCM res
= scm_i_make_string (len
, &dst
);
611 if (!SCM_UNBNDP (chr
))
613 SCM_VALIDATE_CHAR (0, chr
);
614 memset (dst
, SCM_CHAR (chr
), len
);
621 SCM_DEFINE (scm_string_length
, "string-length", 1, 0, 0,
623 "Return the number of characters in @var{string}.")
624 #define FUNC_NAME s_scm_string_length
626 SCM_VALIDATE_STRING (1, string
);
627 return scm_from_size_t (STRING_LENGTH (string
));
632 scm_c_string_length (SCM string
)
634 if (!IS_STRING (string
))
635 scm_wrong_type_arg_msg (NULL
, 0, string
, "string");
636 return STRING_LENGTH (string
);
639 SCM_DEFINE (scm_string_ref
, "string-ref", 2, 0, 0,
641 "Return character @var{k} of @var{str} using zero-origin\n"
642 "indexing. @var{k} must be a valid index of @var{str}.")
643 #define FUNC_NAME s_scm_string_ref
648 SCM_VALIDATE_STRING (1, str
);
650 len
= scm_i_string_length (str
);
651 if (SCM_LIKELY (len
> 0))
652 idx
= scm_to_unsigned_integer (k
, 0, len
- 1);
654 scm_out_of_range (NULL
, k
);
656 return SCM_MAKE_CHAR (scm_i_string_chars (str
)[idx
]);
661 scm_c_string_ref (SCM str
, size_t p
)
663 if (p
>= scm_i_string_length (str
))
664 scm_out_of_range (NULL
, scm_from_size_t (p
));
665 return SCM_MAKE_CHAR (scm_i_string_chars (str
)[p
]);
668 SCM_DEFINE (scm_string_set_x
, "string-set!", 3, 0, 0,
669 (SCM str
, SCM k
, SCM chr
),
670 "Store @var{chr} in element @var{k} of @var{str} and return\n"
671 "an unspecified value. @var{k} must be a valid index of\n"
673 #define FUNC_NAME s_scm_string_set_x
678 SCM_VALIDATE_STRING (1, str
);
680 len
= scm_i_string_length (str
);
681 if (SCM_LIKELY (len
> 0))
682 idx
= scm_to_unsigned_integer (k
, 0, len
- 1);
684 scm_out_of_range (NULL
, k
);
686 SCM_VALIDATE_CHAR (3, chr
);
688 char *dst
= scm_i_string_writable_chars (str
);
689 dst
[idx
] = SCM_CHAR (chr
);
690 scm_i_string_stop_writing ();
692 return SCM_UNSPECIFIED
;
697 scm_c_string_set_x (SCM str
, size_t p
, SCM chr
)
699 if (p
>= scm_i_string_length (str
))
700 scm_out_of_range (NULL
, scm_from_size_t (p
));
702 char *dst
= scm_i_string_writable_chars (str
);
703 dst
[p
] = SCM_CHAR (chr
);
704 scm_i_string_stop_writing ();
708 SCM_DEFINE (scm_substring
, "substring", 2, 1, 0,
709 (SCM str
, SCM start
, SCM end
),
710 "Return a newly allocated string formed from the characters\n"
711 "of @var{str} beginning with index @var{start} (inclusive) and\n"
712 "ending with index @var{end} (exclusive).\n"
713 "@var{str} must be a string, @var{start} and @var{end} must be\n"
714 "exact integers satisfying:\n\n"
715 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
716 #define FUNC_NAME s_scm_substring
718 size_t len
, from
, to
;
720 SCM_VALIDATE_STRING (1, str
);
721 len
= scm_i_string_length (str
);
722 from
= scm_to_unsigned_integer (start
, 0, len
);
723 if (SCM_UNBNDP (end
))
726 to
= scm_to_unsigned_integer (end
, from
, len
);
727 return scm_i_substring (str
, from
, to
);
731 SCM_DEFINE (scm_substring_read_only
, "substring/read-only", 2, 1, 0,
732 (SCM str
, SCM start
, SCM end
),
733 "Return a newly allocated string formed from the characters\n"
734 "of @var{str} beginning with index @var{start} (inclusive) and\n"
735 "ending with index @var{end} (exclusive).\n"
736 "@var{str} must be a string, @var{start} and @var{end} must be\n"
737 "exact integers satisfying:\n"
739 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
741 "The returned string is read-only.\n")
742 #define FUNC_NAME s_scm_substring_read_only
744 size_t len
, from
, to
;
746 SCM_VALIDATE_STRING (1, str
);
747 len
= scm_i_string_length (str
);
748 from
= scm_to_unsigned_integer (start
, 0, len
);
749 if (SCM_UNBNDP (end
))
752 to
= scm_to_unsigned_integer (end
, from
, len
);
753 return scm_i_substring_read_only (str
, from
, to
);
757 SCM_DEFINE (scm_substring_copy
, "substring/copy", 2, 1, 0,
758 (SCM str
, SCM start
, SCM end
),
759 "Return a newly allocated string formed from the characters\n"
760 "of @var{str} beginning with index @var{start} (inclusive) and\n"
761 "ending with index @var{end} (exclusive).\n"
762 "@var{str} must be a string, @var{start} and @var{end} must be\n"
763 "exact integers satisfying:\n\n"
764 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
765 #define FUNC_NAME s_scm_substring_copy
767 /* For the Scheme version, START is mandatory, but for the C
768 version, it is optional. See scm_string_copy in srfi-13.c for a
774 SCM_VALIDATE_STRING (1, str
);
775 scm_i_get_substring_spec (scm_i_string_length (str
),
776 start
, &from
, end
, &to
);
777 return scm_i_substring_copy (str
, from
, to
);
781 SCM_DEFINE (scm_substring_shared
, "substring/shared", 2, 1, 0,
782 (SCM str
, SCM start
, SCM end
),
783 "Return string that indirectly refers to the characters\n"
784 "of @var{str} beginning with index @var{start} (inclusive) and\n"
785 "ending with index @var{end} (exclusive).\n"
786 "@var{str} must be a string, @var{start} and @var{end} must be\n"
787 "exact integers satisfying:\n\n"
788 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
789 #define FUNC_NAME s_scm_substring_shared
791 size_t len
, from
, to
;
793 SCM_VALIDATE_STRING (1, str
);
794 len
= scm_i_string_length (str
);
795 from
= scm_to_unsigned_integer (start
, 0, len
);
796 if (SCM_UNBNDP (end
))
799 to
= scm_to_unsigned_integer (end
, from
, len
);
800 return scm_i_substring_shared (str
, from
, to
);
804 SCM_DEFINE (scm_string_append
, "string-append", 0, 0, 1,
806 "Return a newly allocated string whose characters form the\n"
807 "concatenation of the given strings, @var{args}.")
808 #define FUNC_NAME s_scm_string_append
815 SCM_VALIDATE_REST_ARGUMENT (args
);
816 for (l
= args
; !scm_is_null (l
); l
= SCM_CDR (l
))
819 SCM_VALIDATE_STRING (SCM_ARGn
, s
);
820 i
+= scm_i_string_length (s
);
822 res
= scm_i_make_string (i
, &data
);
823 for (l
= args
; !scm_is_null (l
); l
= SCM_CDR (l
))
827 SCM_VALIDATE_STRING (SCM_ARGn
, s
);
828 len
= scm_i_string_length (s
);
829 memcpy (data
, scm_i_string_chars (s
), len
);
831 scm_remember_upto_here_1 (s
);
838 scm_is_string (SCM obj
)
840 return IS_STRING (obj
);
844 scm_from_locale_stringn (const char *str
, size_t len
)
849 if (len
== (size_t)-1)
851 res
= scm_i_make_string (len
, &dst
);
852 memcpy (dst
, str
, len
);
857 scm_from_locale_string (const char *str
)
859 return scm_from_locale_stringn (str
, -1);
863 scm_take_locale_stringn (char *str
, size_t len
)
867 if (len
== (size_t)-1)
871 /* Ensure STR is null terminated. A realloc for 1 extra byte should
872 often be satisfied from the alignment padding after the block, with
873 no actual data movement. */
874 str
= scm_realloc (str
, len
+1);
878 buf
= scm_i_take_stringbufn (str
, len
);
879 res
= scm_double_cell (STRING_TAG
,
881 (scm_t_bits
) 0, (scm_t_bits
) len
);
886 scm_take_locale_string (char *str
)
888 return scm_take_locale_stringn (str
, -1);
892 scm_to_locale_stringn (SCM str
, size_t *lenp
)
897 if (!scm_is_string (str
))
898 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
899 len
= scm_i_string_length (str
);
900 res
= scm_malloc (len
+ ((lenp
==NULL
)? 1 : 0));
901 memcpy (res
, scm_i_string_chars (str
), len
);
905 if (strlen (res
) != len
)
908 scm_misc_error (NULL
,
909 "string contains #\\nul character: ~S",
916 scm_remember_upto_here_1 (str
);
921 scm_to_locale_string (SCM str
)
923 return scm_to_locale_stringn (str
, NULL
);
927 scm_to_locale_stringbuf (SCM str
, char *buf
, size_t max_len
)
931 if (!scm_is_string (str
))
932 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
933 len
= scm_i_string_length (str
);
934 memcpy (buf
, scm_i_string_chars (str
), (len
> max_len
)? max_len
: len
);
935 scm_remember_upto_here_1 (str
);
939 /* converts C scm_array of strings to SCM scm_list of strings. */
940 /* If argc < 0, a null terminated scm_array is assumed. */
942 scm_makfromstrs (int argc
, char **argv
)
947 for (i
= 0; argv
[i
]; i
++);
949 lst
= scm_cons (scm_from_locale_string (argv
[i
]), lst
);
953 /* Return a newly allocated array of char pointers to each of the strings
954 in args, with a terminating NULL pointer. */
957 scm_i_allocate_string_pointers (SCM list
)
958 #define FUNC_NAME "scm_i_allocate_string_pointers"
961 int len
= scm_ilength (list
);
965 scm_wrong_type_arg_msg (NULL
, 0, list
, "proper list");
967 result
= scm_gc_malloc ((len
+ 1) * sizeof (char *),
971 /* The list might be have been modified in another thread, so
972 we check LIST before each access.
974 for (i
= 0; i
< len
&& scm_is_pair (list
); i
++)
979 str
= SCM_CAR (list
);
980 len
= scm_c_string_length (str
);
982 result
[i
] = scm_gc_malloc_pointerless (len
+ 1, "string pointers");
983 memcpy (result
[i
], scm_i_string_chars (str
), len
);
984 result
[i
][len
] = '\0';
986 list
= SCM_CDR (list
);
994 scm_i_get_substring_spec (size_t len
,
995 SCM start
, size_t *cstart
,
996 SCM end
, size_t *cend
)
998 if (SCM_UNBNDP (start
))
1001 *cstart
= scm_to_unsigned_integer (start
, 0, len
);
1003 if (SCM_UNBNDP (end
))
1006 *cend
= scm_to_unsigned_integer (end
, *cstart
, len
);
1009 #if SCM_ENABLE_DEPRECATED
1011 /* When these definitions are removed, it becomes reasonable to use
1012 read-only strings for string literals. For that, change the reader
1013 to create string literals with scm_c_substring_read_only instead of
1014 with scm_c_substring_copy.
1018 scm_i_deprecated_stringp (SCM str
)
1020 scm_c_issue_deprecation_warning
1021 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1023 return scm_is_string (str
);
1027 scm_i_deprecated_string_chars (SCM str
)
1031 scm_c_issue_deprecation_warning
1032 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1034 /* We don't accept shared substrings here since they are not
1037 if (IS_SH_STRING (str
))
1038 scm_misc_error (NULL
,
1039 "SCM_STRING_CHARS does not work with shared substrings.",
1042 /* We explicitely test for read-only strings to produce a better
1046 if (IS_RO_STRING (str
))
1047 scm_misc_error (NULL
,
1048 "SCM_STRING_CHARS does not work with read-only strings.",
1051 /* The following is still wrong, of course...
1053 chars
= scm_i_string_writable_chars (str
);
1054 scm_i_string_stop_writing ();
1059 scm_i_deprecated_string_length (SCM str
)
1061 scm_c_issue_deprecation_warning
1062 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1063 return scm_c_string_length (str
);
1071 scm_nullstr
= scm_i_make_string (0, NULL
);
1073 #include "libguile/strings.x"