Allow the static allocation of all types of subrs.
[bpt/guile.git] / libguile / strings.c
1 /* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
2 *
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation; either
6 * version 2.1 of the License, or (at your option) any later version.
7 *
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
16 */
17
18
19 \f
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #include <string.h>
25 #include <stdio.h>
26
27 #include "libguile/_scm.h"
28 #include "libguile/chars.h"
29 #include "libguile/root.h"
30 #include "libguile/strings.h"
31 #include "libguile/deprecation.h"
32 #include "libguile/validate.h"
33
34 \f
35
36 /* {Strings}
37 */
38
39
40 /* Stringbufs
41 *
42 * XXX - keeping an accurate refcount during GC seems to be quite
43 * tricky, so we just keep score of whether a stringbuf might be
44 * shared, not wether it definitely is.
45 *
46 * The scheme I (mvo) tried to keep an accurate reference count would
47 * recount all strings that point to a stringbuf during the mark-phase
48 * of the GC. This was done since one cannot access the stringbuf of
49 * a string when that string is freed (in order to decrease the
50 * reference count). The memory of the stringbuf might have been
51 * reused already for something completely different.
52 *
53 * This recounted worked for a small number of threads beating on
54 * cow-strings, but it failed randomly with more than 10 threads, say.
55 * I couldn't figure out what went wrong, so I used the conservative
56 * approach implemented below.
57 *
58 * A stringbuf needs to know its length, but only so that it can be
59 * reported when the stringbuf is freed.
60 *
61 * Stringbufs (and strings) are not stored very compactly: a stringbuf
62 * has room for about 2*sizeof(scm_t_bits)-1 bytes additional
63 * information. As a compensation, the code below is made more
64 * complicated by storing small strings inline in the double cell of a
65 * stringbuf. So we have fixstrings and bigstrings...
66 */
67
68 #define STRINGBUF_F_SHARED SCM_I_STRINGBUF_F_SHARED
69 #define STRINGBUF_F_INLINE SCM_I_STRINGBUF_F_INLINE
70
71 #define STRINGBUF_TAG scm_tc7_stringbuf
72 #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
73 #define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
74
75 #define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
76 #define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
77 #define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
78 #define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
79
80 #define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
81 ? STRINGBUF_INLINE_CHARS (buf) \
82 : STRINGBUF_OUTLINE_CHARS (buf))
83 #define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
84 ? STRINGBUF_INLINE_LENGTH (buf) \
85 : STRINGBUF_OUTLINE_LENGTH (buf))
86
87 #define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
88
89 #define SET_STRINGBUF_SHARED(buf) \
90 do \
91 { \
92 /* Don't modify BUF if it's already marked as shared since it might be \
93 a read-only, statically allocated stringbuf. */ \
94 if (SCM_LIKELY (!STRINGBUF_SHARED (buf))) \
95 SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
96 } \
97 while (0)
98
99 #if SCM_DEBUG
100 static size_t lenhist[1001];
101 #endif
102
103 static SCM
104 make_stringbuf (size_t len)
105 {
106 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
107 scm_i_symbol_chars, all stringbufs are null-terminated. Once
108 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
109 has been changed for scm_i_symbol_chars, this null-termination
110 can be dropped.
111 */
112
113 #if SCM_DEBUG
114 if (len < 1000)
115 lenhist[len]++;
116 else
117 lenhist[1000]++;
118 #endif
119
120 if (len <= STRINGBUF_MAX_INLINE_LEN-1)
121 {
122 return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_INLINE | (len << 16),
123 0, 0, 0);
124 }
125 else
126 {
127 char *mem = scm_gc_malloc_pointerless (len + 1, "string");
128 mem[len] = '\0';
129 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) mem,
130 (scm_t_bits) len, (scm_t_bits) 0);
131 }
132 }
133
134 /* Return a new stringbuf whose underlying storage consists of the LEN+1
135 octets pointed to by STR (the last octet is zero). */
136 SCM
137 scm_i_take_stringbufn (char *str, size_t len)
138 {
139 scm_gc_register_collectable_memory (str, len + 1, "stringbuf");
140
141 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str,
142 (scm_t_bits) len, (scm_t_bits) 0);
143 }
144
145
146 scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
147
148 /* Copy-on-write strings.
149 */
150
151 #define STRING_TAG scm_tc7_string
152
153 #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
154 #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
155 #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
156
157 #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
158 #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
159
160 #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
161
162 /* Read-only strings.
163 */
164
165 #define RO_STRING_TAG scm_tc7_ro_string
166 #define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
167
168 /* Mutation-sharing substrings
169 */
170
171 #define SH_STRING_TAG (scm_tc7_string + 0x100)
172
173 #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
174 /* START and LENGTH as for STRINGs. */
175
176 #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
177
178 SCM
179 scm_i_make_string (size_t len, char **charsp)
180 {
181 SCM buf = make_stringbuf (len);
182 SCM res;
183 if (charsp)
184 *charsp = STRINGBUF_CHARS (buf);
185 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
186 (scm_t_bits)0, (scm_t_bits) len);
187 return res;
188 }
189
190 static void
191 validate_substring_args (SCM str, size_t start, size_t end)
192 {
193 if (!IS_STRING (str))
194 scm_wrong_type_arg_msg (NULL, 0, str, "string");
195 if (start > STRING_LENGTH (str))
196 scm_out_of_range (NULL, scm_from_size_t (start));
197 if (end > STRING_LENGTH (str) || end < start)
198 scm_out_of_range (NULL, scm_from_size_t (end));
199 }
200
201 static inline void
202 get_str_buf_start (SCM *str, SCM *buf, size_t *start)
203 {
204 *start = STRING_START (*str);
205 if (IS_SH_STRING (*str))
206 {
207 *str = SH_STRING_STRING (*str);
208 *start += STRING_START (*str);
209 }
210 *buf = STRING_STRINGBUF (*str);
211 }
212
213 SCM
214 scm_i_substring (SCM str, size_t start, size_t end)
215 {
216 SCM buf;
217 size_t str_start;
218 get_str_buf_start (&str, &buf, &str_start);
219 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
220 SET_STRINGBUF_SHARED (buf);
221 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
222 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
223 (scm_t_bits)str_start + start,
224 (scm_t_bits) end - start);
225 }
226
227 SCM
228 scm_i_substring_read_only (SCM str, size_t start, size_t end)
229 {
230 SCM buf;
231 size_t str_start;
232 get_str_buf_start (&str, &buf, &str_start);
233 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
234 SET_STRINGBUF_SHARED (buf);
235 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
236 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
237 (scm_t_bits)str_start + start,
238 (scm_t_bits) end - start);
239 }
240
241 SCM
242 scm_i_substring_copy (SCM str, size_t start, size_t end)
243 {
244 size_t len = end - start;
245 SCM buf, my_buf;
246 size_t str_start;
247 get_str_buf_start (&str, &buf, &str_start);
248 my_buf = make_stringbuf (len);
249 memcpy (STRINGBUF_CHARS (my_buf),
250 STRINGBUF_CHARS (buf) + str_start + start, len);
251 scm_remember_upto_here_1 (buf);
252 return scm_double_cell (STRING_TAG, SCM_UNPACK(my_buf),
253 (scm_t_bits)0, (scm_t_bits) len);
254 }
255
256 SCM
257 scm_i_substring_shared (SCM str, size_t start, size_t end)
258 {
259 if (start == 0 && end == STRING_LENGTH (str))
260 return str;
261 else
262 {
263 size_t len = end - start;
264 if (IS_SH_STRING (str))
265 {
266 start += STRING_START (str);
267 str = SH_STRING_STRING (str);
268 }
269 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
270 (scm_t_bits)start, (scm_t_bits) len);
271 }
272 }
273
274 SCM
275 scm_c_substring (SCM str, size_t start, size_t end)
276 {
277 validate_substring_args (str, start, end);
278 return scm_i_substring (str, start, end);
279 }
280
281 SCM
282 scm_c_substring_read_only (SCM str, size_t start, size_t end)
283 {
284 validate_substring_args (str, start, end);
285 return scm_i_substring_read_only (str, start, end);
286 }
287
288 SCM
289 scm_c_substring_copy (SCM str, size_t start, size_t end)
290 {
291 validate_substring_args (str, start, end);
292 return scm_i_substring_copy (str, start, end);
293 }
294
295 SCM
296 scm_c_substring_shared (SCM str, size_t start, size_t end)
297 {
298 validate_substring_args (str, start, end);
299 return scm_i_substring_shared (str, start, end);
300 }
301
302 \f
303 /* Internal accessors
304 */
305
306 size_t
307 scm_i_string_length (SCM str)
308 {
309 return STRING_LENGTH (str);
310 }
311
312 const char *
313 scm_i_string_chars (SCM str)
314 {
315 SCM buf;
316 size_t start;
317 get_str_buf_start (&str, &buf, &start);
318 return STRINGBUF_CHARS (buf) + start;
319 }
320
321 char *
322 scm_i_string_writable_chars (SCM orig_str)
323 {
324 SCM buf, str = orig_str;
325 size_t start;
326
327 get_str_buf_start (&str, &buf, &start);
328 if (IS_RO_STRING (str))
329 scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
330
331 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
332 if (STRINGBUF_SHARED (buf))
333 {
334 /* Clone stringbuf. */
335
336 size_t len = STRING_LENGTH (str);
337 SCM new_buf;
338
339 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
340
341 new_buf = make_stringbuf (len);
342 memcpy (STRINGBUF_CHARS (new_buf),
343 STRINGBUF_CHARS (buf) + STRING_START (str), len);
344
345 start -= STRING_START (str);
346
347 /* FIXME: The following operations are not atomic, so other threads
348 looking at STR may see an inconsistent state. Nevertheless it can't
349 hurt much since (i) accessing STR while it is being mutated can't
350 yield a crash, and (ii) concurrent accesses to STR should be
351 protected by a mutex at the application level. The latter may not
352 apply when STR != ORIG_STR, though. */
353 SET_STRING_START (str, 0);
354 SET_STRING_STRINGBUF (str, new_buf);
355
356 buf = new_buf;
357
358 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
359 }
360
361 return STRINGBUF_CHARS (buf) + start;
362 }
363
364 void
365 scm_i_string_stop_writing (void)
366 {
367 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
368 }
369
370 /* Symbols.
371
372 Basic symbol creation and accessing is done here, the rest is in
373 symbols.[hc]. This has been done to keep stringbufs and the
374 internals of strings and string-like objects confined to this file.
375 */
376
377 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
378
379 SCM
380 scm_i_make_symbol (SCM name, scm_t_bits flags,
381 unsigned long hash, SCM props)
382 {
383 SCM buf;
384 size_t start = STRING_START (name);
385 size_t length = STRING_LENGTH (name);
386
387 if (IS_SH_STRING (name))
388 {
389 name = SH_STRING_STRING (name);
390 start += STRING_START (name);
391 }
392 buf = SYMBOL_STRINGBUF (name);
393
394 if (start == 0 && length == STRINGBUF_LENGTH (buf))
395 {
396 /* reuse buf. */
397 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
398 SET_STRINGBUF_SHARED (buf);
399 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
400 }
401 else
402 {
403 /* make new buf. */
404 SCM new_buf = make_stringbuf (length);
405 memcpy (STRINGBUF_CHARS (new_buf),
406 STRINGBUF_CHARS (buf) + start, length);
407 buf = new_buf;
408 }
409 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
410 (scm_t_bits) hash, SCM_UNPACK (props));
411 }
412
413 SCM
414 scm_i_c_make_symbol (const char *name, size_t len,
415 scm_t_bits flags, unsigned long hash, SCM props)
416 {
417 SCM buf = make_stringbuf (len);
418 memcpy (STRINGBUF_CHARS (buf), name, len);
419
420 return scm_immutable_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
421 (scm_t_bits) hash, SCM_UNPACK (props));
422 }
423
424 /* Return a new symbol that uses the LEN bytes pointed to by NAME as its
425 underlying storage. */
426 SCM
427 scm_i_c_take_symbol (char *name, size_t len,
428 scm_t_bits flags, unsigned long hash, SCM props)
429 {
430 SCM buf = scm_i_take_stringbufn (name, len);
431
432 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
433 (scm_t_bits) hash, SCM_UNPACK (props));
434 }
435
436 size_t
437 scm_i_symbol_length (SCM sym)
438 {
439 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
440 }
441
442 size_t
443 scm_c_symbol_length (SCM sym)
444 #define FUNC_NAME "scm_c_symbol_length"
445 {
446 SCM_VALIDATE_SYMBOL (1, sym);
447
448 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
449 }
450 #undef FUNC_NAME
451
452 const char *
453 scm_i_symbol_chars (SCM sym)
454 {
455 SCM buf = SYMBOL_STRINGBUF (sym);
456 return STRINGBUF_CHARS (buf);
457 }
458
459 SCM
460 scm_i_symbol_substring (SCM sym, size_t start, size_t end)
461 {
462 SCM buf = SYMBOL_STRINGBUF (sym);
463 scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
464 SET_STRINGBUF_SHARED (buf);
465 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
466 return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
467 (scm_t_bits)start, (scm_t_bits) end - start);
468 }
469
470 /* Debugging
471 */
472
473 #if SCM_DEBUG
474
475 SCM scm_sys_string_dump (SCM);
476 SCM scm_sys_symbol_dump (SCM);
477 SCM scm_sys_stringbuf_hist (void);
478
479 SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0,
480 (SCM str),
481 "")
482 #define FUNC_NAME s_scm_sys_string_dump
483 {
484 SCM_VALIDATE_STRING (1, str);
485 fprintf (stderr, "%p:\n", str);
486 fprintf (stderr, " start: %u\n", STRING_START (str));
487 fprintf (stderr, " len: %u\n", STRING_LENGTH (str));
488 if (IS_SH_STRING (str))
489 {
490 fprintf (stderr, " string: %p\n", SH_STRING_STRING (str));
491 fprintf (stderr, "\n");
492 scm_sys_string_dump (SH_STRING_STRING (str));
493 }
494 else
495 {
496 SCM buf = STRING_STRINGBUF (str);
497 fprintf (stderr, " buf: %p\n", buf);
498 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
499 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
500 fprintf (stderr, " flags: %x\n", (SCM_CELL_WORD_0 (buf) & 0x300));
501 }
502 return SCM_UNSPECIFIED;
503 }
504 #undef FUNC_NAME
505
506 SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0,
507 (SCM sym),
508 "")
509 #define FUNC_NAME s_scm_sys_symbol_dump
510 {
511 SCM_VALIDATE_SYMBOL (1, sym);
512 fprintf (stderr, "%p:\n", sym);
513 fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym));
514 {
515 SCM buf = SYMBOL_STRINGBUF (sym);
516 fprintf (stderr, " buf: %p\n", buf);
517 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
518 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
519 fprintf (stderr, " shared: %u\n", STRINGBUF_SHARED (buf));
520 }
521 return SCM_UNSPECIFIED;
522 }
523 #undef FUNC_NAME
524
525 SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0,
526 (void),
527 "")
528 #define FUNC_NAME s_scm_sys_stringbuf_hist
529 {
530 int i;
531 for (i = 0; i < 1000; i++)
532 if (lenhist[i])
533 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
534 fprintf (stderr, ">999: %u\n", lenhist[1000]);
535 return SCM_UNSPECIFIED;
536 }
537 #undef FUNC_NAME
538
539 #endif
540
541 \f
542
543 SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
544 (SCM obj),
545 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
546 #define FUNC_NAME s_scm_string_p
547 {
548 return scm_from_bool (IS_STRING (obj));
549 }
550 #undef FUNC_NAME
551
552
553 SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
554
555 SCM_DEFINE (scm_string, "string", 0, 0, 1,
556 (SCM chrs),
557 "@deffnx {Scheme Procedure} list->string chrs\n"
558 "Return a newly allocated string composed of the arguments,\n"
559 "@var{chrs}.")
560 #define FUNC_NAME s_scm_string
561 {
562 SCM result;
563 size_t len;
564 char *data;
565
566 {
567 long i = scm_ilength (chrs);
568
569 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
570 len = i;
571 }
572
573 result = scm_i_make_string (len, &data);
574 while (len > 0 && scm_is_pair (chrs))
575 {
576 SCM elt = SCM_CAR (chrs);
577
578 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
579 *data++ = SCM_CHAR (elt);
580 chrs = SCM_CDR (chrs);
581 len--;
582 }
583 if (len > 0)
584 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
585 if (!scm_is_null (chrs))
586 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
587
588 return result;
589 }
590 #undef FUNC_NAME
591
592 SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
593 (SCM k, SCM chr),
594 "Return a newly allocated string of\n"
595 "length @var{k}. If @var{chr} is given, then all elements of\n"
596 "the string are initialized to @var{chr}, otherwise the contents\n"
597 "of the @var{string} are unspecified.")
598 #define FUNC_NAME s_scm_make_string
599 {
600 return scm_c_make_string (scm_to_size_t (k), chr);
601 }
602 #undef FUNC_NAME
603
604 SCM
605 scm_c_make_string (size_t len, SCM chr)
606 #define FUNC_NAME NULL
607 {
608 char *dst;
609 SCM res = scm_i_make_string (len, &dst);
610
611 if (!SCM_UNBNDP (chr))
612 {
613 SCM_VALIDATE_CHAR (0, chr);
614 memset (dst, SCM_CHAR (chr), len);
615 }
616
617 return res;
618 }
619 #undef FUNC_NAME
620
621 SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
622 (SCM string),
623 "Return the number of characters in @var{string}.")
624 #define FUNC_NAME s_scm_string_length
625 {
626 SCM_VALIDATE_STRING (1, string);
627 return scm_from_size_t (STRING_LENGTH (string));
628 }
629 #undef FUNC_NAME
630
631 size_t
632 scm_c_string_length (SCM string)
633 {
634 if (!IS_STRING (string))
635 scm_wrong_type_arg_msg (NULL, 0, string, "string");
636 return STRING_LENGTH (string);
637 }
638
639 SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
640 (SCM str, SCM k),
641 "Return character @var{k} of @var{str} using zero-origin\n"
642 "indexing. @var{k} must be a valid index of @var{str}.")
643 #define FUNC_NAME s_scm_string_ref
644 {
645 size_t len;
646 unsigned long idx;
647
648 SCM_VALIDATE_STRING (1, str);
649
650 len = scm_i_string_length (str);
651 if (SCM_LIKELY (len > 0))
652 idx = scm_to_unsigned_integer (k, 0, len - 1);
653 else
654 scm_out_of_range (NULL, k);
655
656 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
657 }
658 #undef FUNC_NAME
659
660 SCM
661 scm_c_string_ref (SCM str, size_t p)
662 {
663 if (p >= scm_i_string_length (str))
664 scm_out_of_range (NULL, scm_from_size_t (p));
665 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
666 }
667
668 SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
669 (SCM str, SCM k, SCM chr),
670 "Store @var{chr} in element @var{k} of @var{str} and return\n"
671 "an unspecified value. @var{k} must be a valid index of\n"
672 "@var{str}.")
673 #define FUNC_NAME s_scm_string_set_x
674 {
675 size_t len;
676 unsigned long idx;
677
678 SCM_VALIDATE_STRING (1, str);
679
680 len = scm_i_string_length (str);
681 if (SCM_LIKELY (len > 0))
682 idx = scm_to_unsigned_integer (k, 0, len - 1);
683 else
684 scm_out_of_range (NULL, k);
685
686 SCM_VALIDATE_CHAR (3, chr);
687 {
688 char *dst = scm_i_string_writable_chars (str);
689 dst[idx] = SCM_CHAR (chr);
690 scm_i_string_stop_writing ();
691 }
692 return SCM_UNSPECIFIED;
693 }
694 #undef FUNC_NAME
695
696 void
697 scm_c_string_set_x (SCM str, size_t p, SCM chr)
698 {
699 if (p >= scm_i_string_length (str))
700 scm_out_of_range (NULL, scm_from_size_t (p));
701 {
702 char *dst = scm_i_string_writable_chars (str);
703 dst[p] = SCM_CHAR (chr);
704 scm_i_string_stop_writing ();
705 }
706 }
707
708 SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
709 (SCM str, SCM start, SCM end),
710 "Return a newly allocated string formed from the characters\n"
711 "of @var{str} beginning with index @var{start} (inclusive) and\n"
712 "ending with index @var{end} (exclusive).\n"
713 "@var{str} must be a string, @var{start} and @var{end} must be\n"
714 "exact integers satisfying:\n\n"
715 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
716 #define FUNC_NAME s_scm_substring
717 {
718 size_t len, from, to;
719
720 SCM_VALIDATE_STRING (1, str);
721 len = scm_i_string_length (str);
722 from = scm_to_unsigned_integer (start, 0, len);
723 if (SCM_UNBNDP (end))
724 to = len;
725 else
726 to = scm_to_unsigned_integer (end, from, len);
727 return scm_i_substring (str, from, to);
728 }
729 #undef FUNC_NAME
730
731 SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
732 (SCM str, SCM start, SCM end),
733 "Return a newly allocated string formed from the characters\n"
734 "of @var{str} beginning with index @var{start} (inclusive) and\n"
735 "ending with index @var{end} (exclusive).\n"
736 "@var{str} must be a string, @var{start} and @var{end} must be\n"
737 "exact integers satisfying:\n"
738 "\n"
739 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
740 "\n"
741 "The returned string is read-only.\n")
742 #define FUNC_NAME s_scm_substring_read_only
743 {
744 size_t len, from, to;
745
746 SCM_VALIDATE_STRING (1, str);
747 len = scm_i_string_length (str);
748 from = scm_to_unsigned_integer (start, 0, len);
749 if (SCM_UNBNDP (end))
750 to = len;
751 else
752 to = scm_to_unsigned_integer (end, from, len);
753 return scm_i_substring_read_only (str, from, to);
754 }
755 #undef FUNC_NAME
756
757 SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
758 (SCM str, SCM start, SCM end),
759 "Return a newly allocated string formed from the characters\n"
760 "of @var{str} beginning with index @var{start} (inclusive) and\n"
761 "ending with index @var{end} (exclusive).\n"
762 "@var{str} must be a string, @var{start} and @var{end} must be\n"
763 "exact integers satisfying:\n\n"
764 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
765 #define FUNC_NAME s_scm_substring_copy
766 {
767 /* For the Scheme version, START is mandatory, but for the C
768 version, it is optional. See scm_string_copy in srfi-13.c for a
769 rationale.
770 */
771
772 size_t from, to;
773
774 SCM_VALIDATE_STRING (1, str);
775 scm_i_get_substring_spec (scm_i_string_length (str),
776 start, &from, end, &to);
777 return scm_i_substring_copy (str, from, to);
778 }
779 #undef FUNC_NAME
780
781 SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
782 (SCM str, SCM start, SCM end),
783 "Return string that indirectly refers to the characters\n"
784 "of @var{str} beginning with index @var{start} (inclusive) and\n"
785 "ending with index @var{end} (exclusive).\n"
786 "@var{str} must be a string, @var{start} and @var{end} must be\n"
787 "exact integers satisfying:\n\n"
788 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
789 #define FUNC_NAME s_scm_substring_shared
790 {
791 size_t len, from, to;
792
793 SCM_VALIDATE_STRING (1, str);
794 len = scm_i_string_length (str);
795 from = scm_to_unsigned_integer (start, 0, len);
796 if (SCM_UNBNDP (end))
797 to = len;
798 else
799 to = scm_to_unsigned_integer (end, from, len);
800 return scm_i_substring_shared (str, from, to);
801 }
802 #undef FUNC_NAME
803
804 SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
805 (SCM args),
806 "Return a newly allocated string whose characters form the\n"
807 "concatenation of the given strings, @var{args}.")
808 #define FUNC_NAME s_scm_string_append
809 {
810 SCM res;
811 size_t i = 0;
812 SCM l, s;
813 char *data;
814
815 SCM_VALIDATE_REST_ARGUMENT (args);
816 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
817 {
818 s = SCM_CAR (l);
819 SCM_VALIDATE_STRING (SCM_ARGn, s);
820 i += scm_i_string_length (s);
821 }
822 res = scm_i_make_string (i, &data);
823 for (l = args; !scm_is_null (l); l = SCM_CDR (l))
824 {
825 size_t len;
826 s = SCM_CAR (l);
827 SCM_VALIDATE_STRING (SCM_ARGn, s);
828 len = scm_i_string_length (s);
829 memcpy (data, scm_i_string_chars (s), len);
830 data += len;
831 scm_remember_upto_here_1 (s);
832 }
833 return res;
834 }
835 #undef FUNC_NAME
836
837 int
838 scm_is_string (SCM obj)
839 {
840 return IS_STRING (obj);
841 }
842
843 SCM
844 scm_from_locale_stringn (const char *str, size_t len)
845 {
846 SCM res;
847 char *dst;
848
849 if (len == (size_t)-1)
850 len = strlen (str);
851 res = scm_i_make_string (len, &dst);
852 memcpy (dst, str, len);
853 return res;
854 }
855
856 SCM
857 scm_from_locale_string (const char *str)
858 {
859 return scm_from_locale_stringn (str, -1);
860 }
861
862 SCM
863 scm_take_locale_stringn (char *str, size_t len)
864 {
865 SCM buf, res;
866
867 if (len == (size_t)-1)
868 len = strlen (str);
869 else
870 {
871 /* Ensure STR is null terminated. A realloc for 1 extra byte should
872 often be satisfied from the alignment padding after the block, with
873 no actual data movement. */
874 str = scm_realloc (str, len+1);
875 str[len] = '\0';
876 }
877
878 buf = scm_i_take_stringbufn (str, len);
879 res = scm_double_cell (STRING_TAG,
880 SCM_UNPACK (buf),
881 (scm_t_bits) 0, (scm_t_bits) len);
882 return res;
883 }
884
885 SCM
886 scm_take_locale_string (char *str)
887 {
888 return scm_take_locale_stringn (str, -1);
889 }
890
891 char *
892 scm_to_locale_stringn (SCM str, size_t *lenp)
893 {
894 char *res;
895 size_t len;
896
897 if (!scm_is_string (str))
898 scm_wrong_type_arg_msg (NULL, 0, str, "string");
899 len = scm_i_string_length (str);
900 res = scm_malloc (len + ((lenp==NULL)? 1 : 0));
901 memcpy (res, scm_i_string_chars (str), len);
902 if (lenp == NULL)
903 {
904 res[len] = '\0';
905 if (strlen (res) != len)
906 {
907 free (res);
908 scm_misc_error (NULL,
909 "string contains #\\nul character: ~S",
910 scm_list_1 (str));
911 }
912 }
913 else
914 *lenp = len;
915
916 scm_remember_upto_here_1 (str);
917 return res;
918 }
919
920 char *
921 scm_to_locale_string (SCM str)
922 {
923 return scm_to_locale_stringn (str, NULL);
924 }
925
926 size_t
927 scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
928 {
929 size_t len;
930
931 if (!scm_is_string (str))
932 scm_wrong_type_arg_msg (NULL, 0, str, "string");
933 len = scm_i_string_length (str);
934 memcpy (buf, scm_i_string_chars (str), (len > max_len)? max_len : len);
935 scm_remember_upto_here_1 (str);
936 return len;
937 }
938
939 /* converts C scm_array of strings to SCM scm_list of strings. */
940 /* If argc < 0, a null terminated scm_array is assumed. */
941 SCM
942 scm_makfromstrs (int argc, char **argv)
943 {
944 int i = argc;
945 SCM lst = SCM_EOL;
946 if (0 > i)
947 for (i = 0; argv[i]; i++);
948 while (i--)
949 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
950 return lst;
951 }
952
953 /* Return a newly allocated array of char pointers to each of the strings
954 in args, with a terminating NULL pointer. */
955
956 char **
957 scm_i_allocate_string_pointers (SCM list)
958 #define FUNC_NAME "scm_i_allocate_string_pointers"
959 {
960 char **result;
961 int len = scm_ilength (list);
962 int i;
963
964 if (len < 0)
965 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
966
967 result = scm_gc_malloc ((len + 1) * sizeof (char *),
968 "string pointers");
969 result[len] = NULL;
970
971 /* The list might be have been modified in another thread, so
972 we check LIST before each access.
973 */
974 for (i = 0; i < len && scm_is_pair (list); i++)
975 {
976 SCM str;
977 size_t len;
978
979 str = SCM_CAR (list);
980 len = scm_c_string_length (str);
981
982 result[i] = scm_gc_malloc_pointerless (len + 1, "string pointers");
983 memcpy (result[i], scm_i_string_chars (str), len);
984 result[i][len] = '\0';
985
986 list = SCM_CDR (list);
987 }
988
989 return result;
990 }
991 #undef FUNC_NAME
992
993 void
994 scm_i_get_substring_spec (size_t len,
995 SCM start, size_t *cstart,
996 SCM end, size_t *cend)
997 {
998 if (SCM_UNBNDP (start))
999 *cstart = 0;
1000 else
1001 *cstart = scm_to_unsigned_integer (start, 0, len);
1002
1003 if (SCM_UNBNDP (end))
1004 *cend = len;
1005 else
1006 *cend = scm_to_unsigned_integer (end, *cstart, len);
1007 }
1008
1009 #if SCM_ENABLE_DEPRECATED
1010
1011 /* When these definitions are removed, it becomes reasonable to use
1012 read-only strings for string literals. For that, change the reader
1013 to create string literals with scm_c_substring_read_only instead of
1014 with scm_c_substring_copy.
1015 */
1016
1017 int
1018 scm_i_deprecated_stringp (SCM str)
1019 {
1020 scm_c_issue_deprecation_warning
1021 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1022
1023 return scm_is_string (str);
1024 }
1025
1026 char *
1027 scm_i_deprecated_string_chars (SCM str)
1028 {
1029 char *chars;
1030
1031 scm_c_issue_deprecation_warning
1032 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1033
1034 /* We don't accept shared substrings here since they are not
1035 null-terminated.
1036 */
1037 if (IS_SH_STRING (str))
1038 scm_misc_error (NULL,
1039 "SCM_STRING_CHARS does not work with shared substrings.",
1040 SCM_EOL);
1041
1042 /* We explicitely test for read-only strings to produce a better
1043 error message.
1044 */
1045
1046 if (IS_RO_STRING (str))
1047 scm_misc_error (NULL,
1048 "SCM_STRING_CHARS does not work with read-only strings.",
1049 SCM_EOL);
1050
1051 /* The following is still wrong, of course...
1052 */
1053 chars = scm_i_string_writable_chars (str);
1054 scm_i_string_stop_writing ();
1055 return chars;
1056 }
1057
1058 size_t
1059 scm_i_deprecated_string_length (SCM str)
1060 {
1061 scm_c_issue_deprecation_warning
1062 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1063 return scm_c_string_length (str);
1064 }
1065
1066 #endif
1067
1068 void
1069 scm_init_strings ()
1070 {
1071 scm_nullstr = scm_i_make_string (0, NULL);
1072
1073 #include "libguile/strings.x"
1074 }
1075
1076
1077 /*
1078 Local Variables:
1079 c-file-style: "gnu"
1080 End:
1081 */