*** empty log message ***
[bpt/guile.git] / libguile / strings.c
CommitLineData
3ee86942 1/* Copyright (C) 1995,1996,1998,2000,2001, 2004 Free Software Foundation, Inc.
0f2d19dd 2 *
73be1d9e
MV
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public
5 * License as published by the Free Software Foundation; either
6 * version 2.1 of the License, or (at your option) any later version.
0f2d19dd 7 *
73be1d9e
MV
8 * This library is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
0f2d19dd 12 *
73be1d9e
MV
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
16 */
1bbd0b84 17
1bbd0b84 18
0f2d19dd
JB
19\f
20
faf2c9d7 21#include <string.h>
3ee86942 22#include <stdio.h>
faf2c9d7 23
a0599745
MD
24#include "libguile/_scm.h"
25#include "libguile/chars.h"
7c33806a 26#include "libguile/root.h"
a0599745 27#include "libguile/strings.h"
1afff620 28#include "libguile/deprecation.h"
a0599745 29#include "libguile/validate.h"
c829a427 30#include "libguile/dynwind.h"
1afff620 31
0f2d19dd
JB
32\f
33
34/* {Strings}
35 */
36
3ee86942
MV
37
38/* Stringbufs
39 *
40 * XXX - keeping an accurate refcount during GC seems to be quite
41 * tricky, so we just keep score of whether a stringbuf might be
42 * shared, not wether it definitely is.
43 *
44 * The scheme I (mvo) tried to keep an accurate reference count would
45 * recount all strings that point to a stringbuf during the mark-phase
46 * of the GC. This was done since one cannot access the stringbuf of
47 * a string when that string is freed (in order to decrease the
48 * reference count). The memory of the stringbuf might have been
49 * reused already for something completely different.
50 *
51 * This recounted worked for a small number of threads beating on
52 * cow-strings, but it failed randomly with more than 10 threads, say.
53 * I couldn't figure out what went wrong, so I used the conservative
54 * approach implemented below.
55 *
56 * A stringbuf needs to know its length, but only so that it can be
57 * reported when the stringbuf is freed.
58 *
59 * Stringbufs (and strings) are not stored very compactly: a stringbuf
60 * has room for about 2*sizeof(scm_t_bits)-1 bytes additional
61 * information. As a compensation, the code below is made more
62 * complicated by storing small strings inline in the double cell of a
63 * stringbuf. So we have fixstrings and bigstrings...
64 */
65
66#define STRINGBUF_F_SHARED 0x100
67#define STRINGBUF_F_INLINE 0x200
68
69#define STRINGBUF_TAG scm_tc7_stringbuf
70#define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
71#define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
72
73#define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
74#define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
75#define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
76#define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
77
78#define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
79 ? STRINGBUF_INLINE_CHARS (buf) \
80 : STRINGBUF_OUTLINE_CHARS (buf))
81#define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
82 ? STRINGBUF_INLINE_LENGTH (buf) \
83 : STRINGBUF_OUTLINE_LENGTH (buf))
84
85#define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
86
87#define SET_STRINGBUF_SHARED(buf) \
88 (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
89
90#if SCM_DEBUG
91static size_t lenhist[1001];
92#endif
93
94static SCM
95make_stringbuf (size_t len)
0f2d19dd 96{
3ee86942
MV
97 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
98 scm_i_symbol_chars, all stringbufs are null-terminated. Once
99 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
100 has been changed for scm_i_symbol_chars, this null-termination
101 can be dropped.
102 */
103
104#if SCM_DEBUG
105 if (len < 1000)
106 lenhist[len]++;
107 else
108 lenhist[1000]++;
109#endif
0f2d19dd 110
3ee86942
MV
111 if (len <= STRINGBUF_MAX_INLINE_LEN-1)
112 {
113 return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_INLINE | (len << 16),
114 0, 0, 0);
115 }
116 else
117 {
118 char *mem = scm_gc_malloc (len+1, "string");
119 mem[len] = '\0';
120 return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) mem,
121 (scm_t_bits) len, (scm_t_bits) 0);
122 }
123}
e53cc817 124
3ee86942
MV
125SCM
126scm_i_stringbuf_mark (SCM buf)
127{
128 return SCM_BOOL_F;
129}
1bbd0b84 130
3ee86942
MV
131void
132scm_i_stringbuf_free (SCM buf)
0f2d19dd 133{
3ee86942
MV
134 if (!STRINGBUF_INLINE (buf))
135 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf),
136 STRINGBUF_OUTLINE_LENGTH (buf) + 1, "string");
137}
bd9e24b3 138
3ee86942 139SCM_MUTEX (stringbuf_write_mutex);
bd9e24b3 140
3ee86942
MV
141/* Copy-on-write strings.
142 */
bd9e24b3 143
3ee86942 144#define STRING_TAG scm_tc7_string
bd9e24b3 145
3ee86942
MV
146#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
147#define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
148#define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
bd9e24b3 149
3ee86942
MV
150#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
151#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
152
153#define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
154
e1b29f6a
MV
155/* Mutation-sharing substrings
156 */
157
158#define SH_STRING_TAG (scm_tc7_string + 0x100)
159
160#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
161/* START and LENGTH as for STRINGs. */
162
163#define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
164
3ee86942
MV
165SCM
166scm_i_make_string (size_t len, char **charsp)
167{
168 SCM buf = make_stringbuf (len);
169 SCM res;
170 if (charsp)
171 *charsp = STRINGBUF_CHARS (buf);
172 res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
173 (scm_t_bits)0, (scm_t_bits) len);
174 return res;
0f2d19dd
JB
175}
176
3ee86942
MV
177static void
178validate_substring_args (SCM str, size_t start, size_t end)
179{
180 if (!IS_STRING (str))
181 scm_wrong_type_arg_msg (NULL, 0, str, "string");
182 if (start > STRING_LENGTH (str))
183 scm_out_of_range (NULL, scm_from_size_t (start));
184 if (end > STRING_LENGTH (str) || end < start)
185 scm_out_of_range (NULL, scm_from_size_t (end));
186}
0f2d19dd 187
e1b29f6a
MV
188static inline void
189get_str_buf_start (SCM *str, SCM *buf, size_t *start)
190{
191 *start = STRING_START (*str);
192 if (IS_SH_STRING (*str))
193 {
194 *str = SH_STRING_STRING (*str);
195 *start += STRING_START (*str);
196 }
197 *buf = STRING_STRINGBUF (*str);
198}
199
3ee86942
MV
200SCM
201scm_i_substring (SCM str, size_t start, size_t end)
0f2d19dd 202{
e1b29f6a
MV
203 SCM buf;
204 size_t str_start;
205 get_str_buf_start (&str, &buf, &str_start);
3ee86942
MV
206 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
207 SET_STRINGBUF_SHARED (buf);
208 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
209 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
e1b29f6a
MV
210 (scm_t_bits)str_start + start,
211 (scm_t_bits) end - start);
0f2d19dd
JB
212}
213
3ee86942
MV
214SCM
215scm_i_substring_copy (SCM str, size_t start, size_t end)
216{
217 size_t len = end - start;
e1b29f6a
MV
218 SCM buf;
219 size_t str_start;
220 get_str_buf_start (&str, &buf, &str_start);
3ee86942 221 SCM my_buf = make_stringbuf (len);
e1b29f6a
MV
222 memcpy (STRINGBUF_CHARS (my_buf),
223 STRINGBUF_CHARS (buf) + str_start + start, len);
3ee86942
MV
224 scm_remember_upto_here_1 (buf);
225 return scm_double_cell (STRING_TAG, SCM_UNPACK(my_buf),
226 (scm_t_bits)0, (scm_t_bits) len);
227}
0f2d19dd 228
e1b29f6a
MV
229SCM
230scm_i_substring_shared (SCM str, size_t start, size_t end)
231{
232 if (start == 0 && end == STRING_LENGTH (str))
233 return str;
234 else
235 {
236 size_t len = end - start;
237 if (IS_SH_STRING (str))
238 {
239 start += STRING_START (str);
240 str = SH_STRING_STRING (str);
241 }
242 return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
243 (scm_t_bits)start, (scm_t_bits) len);
244 }
245}
246
3ee86942
MV
247SCM
248scm_c_substring (SCM str, size_t start, size_t end)
249{
250 validate_substring_args (str, start, end);
251 return scm_i_substring (str, start, end);
252}
ee149d03 253
0f2d19dd 254SCM
3ee86942 255scm_c_substring_copy (SCM str, size_t start, size_t end)
0f2d19dd 256{
3ee86942
MV
257 validate_substring_args (str, start, end);
258 return scm_i_substring_copy (str, start, end);
259}
260
3ee86942
MV
261SCM
262scm_c_substring_shared (SCM str, size_t start, size_t end)
263{
264 validate_substring_args (str, start, end);
265 return scm_i_substring_shared (str, start, end);
266}
0f2d19dd 267
ee149d03 268SCM
3ee86942 269scm_i_string_mark (SCM str)
ee149d03 270{
3ee86942
MV
271 if (IS_SH_STRING (str))
272 return SH_STRING_STRING (str);
273 else
274 return STRING_STRINGBUF (str);
ee149d03
JB
275}
276
3ee86942
MV
277void
278scm_i_string_free (SCM str)
279{
280}
36284627 281
3ee86942
MV
282/* Internal accessors
283 */
284
285size_t
286scm_i_string_length (SCM str)
0f2d19dd 287{
3ee86942 288 return STRING_LENGTH (str);
0f2d19dd
JB
289}
290
3ee86942
MV
291const char *
292scm_i_string_chars (SCM str)
293{
294 SCM buf;
e1b29f6a
MV
295 size_t start;
296 get_str_buf_start (&str, &buf, &start);
3ee86942
MV
297 return STRINGBUF_CHARS (buf) + start;
298}
b00418df 299
3ee86942
MV
300char *
301scm_i_string_writable_chars (SCM str)
b00418df 302{
3ee86942 303 SCM buf;
e1b29f6a
MV
304 size_t start;
305 get_str_buf_start (&str, &buf, &start);
3ee86942
MV
306 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
307 if (STRINGBUF_SHARED (buf))
308 {
309 /* Clone stringbuf. For this, we put all threads to sleep.
310 */
311
312 size_t len = STRING_LENGTH (str);
313 SCM new_buf;
314
315 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
316
317 new_buf = make_stringbuf (len);
318 memcpy (STRINGBUF_CHARS (new_buf),
319 STRINGBUF_CHARS (buf) + STRING_START (str), len);
320
321 scm_i_thread_put_to_sleep ();
322 SET_STRING_STRINGBUF (str, new_buf);
323 start -= STRING_START (str);
324 SET_STRING_START (str, 0);
325 scm_i_thread_wake_up ();
326
327 buf = new_buf;
328
329 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
330 }
331
332 return STRINGBUF_CHARS (buf) + start;
b00418df
DH
333}
334
3ee86942
MV
335void
336scm_i_string_stop_writing (void)
337{
338 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
339}
b00418df 340
3ee86942
MV
341/* Symbols.
342
343 Basic symbol creation and accessing is done here, the rest is in
344 symbols.[hc]. This has been done to keep stringbufs and the
345 internals of strings and string-like objects confined to this file.
346*/
347
348#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
349
350SCM
6869328b
MV
351scm_i_make_symbol (SCM name, scm_t_bits flags,
352 unsigned long hash, SCM props)
3ee86942
MV
353{
354 SCM buf;
355 size_t start = STRING_START (name);
356 size_t length = STRING_LENGTH (name);
357
358 if (IS_SH_STRING (name))
359 {
360 name = SH_STRING_STRING (name);
361 start += STRING_START (name);
362 }
363 buf = SYMBOL_STRINGBUF (name);
364
365 if (start == 0 && length == STRINGBUF_LENGTH (buf))
366 {
367 /* reuse buf. */
368 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
369 SET_STRINGBUF_SHARED (buf);
370 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
371 }
372 else
373 {
374 /* make new buf. */
375 SCM new_buf = make_stringbuf (length);
376 memcpy (STRINGBUF_CHARS (new_buf),
377 STRINGBUF_CHARS (buf) + start, length);
378 buf = new_buf;
379 }
6869328b 380 return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
3ee86942
MV
381 (scm_t_bits) hash, SCM_UNPACK (props));
382}
383
384size_t
385scm_i_symbol_length (SCM sym)
0f2d19dd 386{
3ee86942 387 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
0f2d19dd
JB
388}
389
3ee86942
MV
390const char *
391scm_i_symbol_chars (SCM sym)
392{
393 SCM buf = SYMBOL_STRINGBUF (sym);
394 return STRINGBUF_CHARS (buf);
395}
1cc91f1b 396
3ee86942
MV
397SCM
398scm_i_symbol_mark (SCM sym)
0f2d19dd 399{
3ee86942
MV
400 scm_gc_mark (SYMBOL_STRINGBUF (sym));
401 return SCM_CELL_OBJECT_3 (sym);
0f2d19dd
JB
402}
403
3ee86942
MV
404void
405scm_i_symbol_free (SCM sym)
406{
407}
0f2d19dd 408
be54b15d 409SCM
3ee86942 410scm_i_symbol_substring (SCM sym, size_t start, size_t end)
be54b15d 411{
3ee86942
MV
412 SCM buf = SYMBOL_STRINGBUF (sym);
413 scm_i_plugin_mutex_lock (&stringbuf_write_mutex);
414 SET_STRINGBUF_SHARED (buf);
415 scm_i_plugin_mutex_unlock (&stringbuf_write_mutex);
416 return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
417 (scm_t_bits)start, (scm_t_bits) end - start);
418}
be54b15d 419
3ee86942
MV
420/* Debugging
421 */
be54b15d 422
3ee86942 423#if SCM_DEBUG
be54b15d 424
3ee86942
MV
425SCM scm_sys_string_dump (SCM);
426SCM scm_sys_symbol_dump (SCM);
427SCM scm_sys_stringbuf_hist (void);
be54b15d 428
3ee86942
MV
429SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0,
430 (SCM str),
431 "")
432#define FUNC_NAME s_scm_sys_string_dump
433{
434 SCM_VALIDATE_STRING (1, str);
435 fprintf (stderr, "%p:\n", str);
436 fprintf (stderr, " start: %u\n", STRING_START (str));
437 fprintf (stderr, " len: %u\n", STRING_LENGTH (str));
438 if (IS_SH_STRING (str))
439 {
440 fprintf (stderr, " string: %p\n", SH_STRING_STRING (str));
441 fprintf (stderr, "\n");
442 scm_sys_string_dump (SH_STRING_STRING (str));
443 }
444 else
445 {
446 SCM buf = STRING_STRINGBUF (str);
447 fprintf (stderr, " buf: %p\n", buf);
448 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
449 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
450 fprintf (stderr, " flags: %x\n", (SCM_CELL_WORD_0 (buf) & 0x300));
451 }
452 return SCM_UNSPECIFIED;
453}
454#undef FUNC_NAME
455
456SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0,
457 (SCM sym),
458 "")
459#define FUNC_NAME s_scm_sys_symbol_dump
460{
461 SCM_VALIDATE_SYMBOL (1, sym);
462 fprintf (stderr, "%p:\n", sym);
463 fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym));
464 {
465 SCM buf = SYMBOL_STRINGBUF (sym);
466 fprintf (stderr, " buf: %p\n", buf);
467 fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf));
468 fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf));
469 fprintf (stderr, " shared: %u\n", STRINGBUF_SHARED (buf));
470 }
471 return SCM_UNSPECIFIED;
472}
473#undef FUNC_NAME
474
475SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0,
476 (void),
477 "")
e1b29f6a 478#define FUNC_NAME s_scm_sys_stringbuf_hist
3ee86942
MV
479{
480 int i;
481 for (i = 0; i < 1000; i++)
482 if (lenhist[i])
483 fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
484 fprintf (stderr, ">999: %u\n", lenhist[1000]);
485 return SCM_UNSPECIFIED;
be54b15d
DH
486}
487#undef FUNC_NAME
488
3ee86942
MV
489#endif
490
491\f
492
493SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
494 (SCM obj),
495 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
496#define FUNC_NAME s_scm_string_p
497{
498 return scm_from_bool (IS_STRING (obj));
499}
500#undef FUNC_NAME
501
502
503SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
504
505SCM_DEFINE (scm_string, "string", 0, 0, 1,
506 (SCM chrs),
507 "@deffnx {Scheme Procedure} list->string chrs\n"
508 "Return a newly allocated string composed of the arguments,\n"
509 "@var{chrs}.")
510#define FUNC_NAME s_scm_string
511{
512 SCM result;
513 size_t len;
514 char *data;
515
516 {
517 long i = scm_ilength (chrs);
518
519 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
520 len = i;
521 }
522
523 result = scm_i_make_string (len, &data);
524 while (len > 0 && SCM_CONSP (chrs))
525 {
526 SCM elt = SCM_CAR (chrs);
527
528 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
529 *data++ = SCM_CHAR (elt);
530 chrs = SCM_CDR (chrs);
531 len--;
532 }
533 if (len > 0)
534 scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
535 if (!SCM_NULLP (chrs))
536 scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
537
538 return result;
539}
540#undef FUNC_NAME
be54b15d 541
3b3b36dd 542SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
6fa73e72 543 (SCM k, SCM chr),
0d26a824
MG
544 "Return a newly allocated string of\n"
545 "length @var{k}. If @var{chr} is given, then all elements of\n"
546 "the string are initialized to @var{chr}, otherwise the contents\n"
9401323e 547 "of the @var{string} are unspecified.")
1bbd0b84 548#define FUNC_NAME s_scm_make_string
0f2d19dd 549{
3ee86942
MV
550 return scm_c_make_string (scm_to_size_t (k), chr);
551}
552#undef FUNC_NAME
553
554SCM
555scm_c_make_string (size_t len, SCM chr)
556#define FUNC_NAME NULL
557{
558 char *dst;
559 SCM res = scm_i_make_string (len, &dst);
cb0d8be2 560
e11e83f3
MV
561 if (!SCM_UNBNDP (chr))
562 {
3ee86942
MV
563 SCM_VALIDATE_CHAR (0, chr);
564 memset (dst, SCM_CHAR (chr), len);
0f2d19dd 565 }
e11e83f3
MV
566
567 return res;
0f2d19dd 568}
1bbd0b84 569#undef FUNC_NAME
0f2d19dd 570
3b3b36dd 571SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
0d26a824
MG
572 (SCM string),
573 "Return the number of characters in @var{string}.")
1bbd0b84 574#define FUNC_NAME s_scm_string_length
0f2d19dd 575{
d1ca2c64 576 SCM_VALIDATE_STRING (1, string);
3ee86942 577 return scm_from_size_t (STRING_LENGTH (string));
0f2d19dd 578}
1bbd0b84 579#undef FUNC_NAME
0f2d19dd 580
3ee86942
MV
581size_t
582scm_c_string_length (SCM string)
583{
584 if (!IS_STRING (string))
585 scm_wrong_type_arg_msg (NULL, 0, string, "string");
586 return STRING_LENGTH (string);
587}
588
bd9e24b3 589SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
6fa73e72 590 (SCM str, SCM k),
0d26a824
MG
591 "Return character @var{k} of @var{str} using zero-origin\n"
592 "indexing. @var{k} must be a valid index of @var{str}.")
1bbd0b84 593#define FUNC_NAME s_scm_string_ref
0f2d19dd 594{
a55c2b68 595 unsigned long idx;
bd9e24b3 596
d1ca2c64 597 SCM_VALIDATE_STRING (1, str);
3ee86942
MV
598 idx = scm_to_unsigned_integer (k, 0, scm_i_string_length (str)-1);
599 return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
0f2d19dd 600}
1bbd0b84 601#undef FUNC_NAME
0f2d19dd 602
3ee86942
MV
603SCM
604scm_c_string_ref (SCM str, size_t p)
605{
606 if (p >= scm_i_string_length (str))
607 scm_out_of_range (NULL, scm_from_size_t (p));
608 return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
609}
f0942910 610
3b3b36dd 611SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
6fa73e72 612 (SCM str, SCM k, SCM chr),
0d26a824
MG
613 "Store @var{chr} in element @var{k} of @var{str} and return\n"
614 "an unspecified value. @var{k} must be a valid index of\n"
615 "@var{str}.")
1bbd0b84 616#define FUNC_NAME s_scm_string_set_x
0f2d19dd 617{
a55c2b68
MV
618 unsigned long idx;
619
f0942910 620 SCM_VALIDATE_STRING (1, str);
3ee86942 621 idx = scm_to_unsigned_integer (k, 0, scm_i_string_length(str)-1);
34d19ef6 622 SCM_VALIDATE_CHAR (3, chr);
3ee86942
MV
623 {
624 char *dst = scm_i_string_writable_chars (str);
625 dst[idx] = SCM_CHAR (chr);
626 scm_i_string_stop_writing ();
627 }
0f2d19dd
JB
628 return SCM_UNSPECIFIED;
629}
1bbd0b84 630#undef FUNC_NAME
0f2d19dd 631
3ee86942
MV
632void
633scm_c_string_set_x (SCM str, size_t p, SCM chr)
634{
635 if (p >= scm_i_string_length (str))
636 scm_out_of_range (NULL, scm_from_size_t (p));
637 {
638 char *dst = scm_i_string_writable_chars (str);
639 dst[p] = SCM_CHAR (chr);
640 scm_i_string_stop_writing ();
641 }
642}
0f2d19dd 643
3b3b36dd 644SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
0d26a824
MG
645 (SCM str, SCM start, SCM end),
646 "Return a newly allocated string formed from the characters\n"
647 "of @var{str} beginning with index @var{start} (inclusive) and\n"
648 "ending with index @var{end} (exclusive).\n"
649 "@var{str} must be a string, @var{start} and @var{end} must be\n"
650 "exact integers satisfying:\n\n"
651 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1bbd0b84 652#define FUNC_NAME s_scm_substring
0f2d19dd 653{
3ee86942 654 size_t len, from, to;
685c0d71 655
d1ca2c64 656 SCM_VALIDATE_STRING (1, str);
3ee86942
MV
657 len = scm_i_string_length (str);
658 from = scm_to_unsigned_integer (start, 0, len);
a55c2b68 659 if (SCM_UNBNDP (end))
3ee86942 660 to = len;
a55c2b68 661 else
3ee86942
MV
662 to = scm_to_unsigned_integer (end, from, len);
663 return scm_i_substring (str, from, to);
0f2d19dd 664}
1bbd0b84 665#undef FUNC_NAME
0f2d19dd 666
3ee86942
MV
667SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
668 (SCM str, SCM start, SCM end),
669 "Return a newly allocated string formed from the characters\n"
670 "of @var{str} beginning with index @var{start} (inclusive) and\n"
671 "ending with index @var{end} (exclusive).\n"
672 "@var{str} must be a string, @var{start} and @var{end} must be\n"
673 "exact integers satisfying:\n\n"
674 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
675#define FUNC_NAME s_scm_substring_copy
676{
e1b29f6a
MV
677 /* For the Scheme version, START is mandatory, but for the C
678 version, it is optional. See scm_string_copy in srfi-13.c for a
679 rationale.
680 */
681
682 size_t from, to;
3ee86942
MV
683
684 SCM_VALIDATE_STRING (1, str);
e1b29f6a
MV
685 scm_i_get_substring_spec (scm_i_string_length (str),
686 start, &from, end, &to);
3ee86942
MV
687 return scm_i_substring_copy (str, from, to);
688}
689#undef FUNC_NAME
690
691SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
692 (SCM str, SCM start, SCM end),
693 "Return string that indirectly refers to the characters\n"
694 "of @var{str} beginning with index @var{start} (inclusive) and\n"
695 "ending with index @var{end} (exclusive).\n"
696 "@var{str} must be a string, @var{start} and @var{end} must be\n"
697 "exact integers satisfying:\n\n"
698 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
699#define FUNC_NAME s_scm_substring_shared
700{
701 size_t len, from, to;
702
703 SCM_VALIDATE_STRING (1, str);
704 len = scm_i_string_length (str);
705 from = scm_to_unsigned_integer (start, 0, len);
706 if (SCM_UNBNDP (end))
707 to = len;
708 else
709 to = scm_to_unsigned_integer (end, from, len);
710 return scm_i_substring_shared (str, from, to);
711}
712#undef FUNC_NAME
685c0d71 713
3b3b36dd 714SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
6fa73e72 715 (SCM args),
0d26a824
MG
716 "Return a newly allocated string whose characters form the\n"
717 "concatenation of the given strings, @var{args}.")
1bbd0b84 718#define FUNC_NAME s_scm_string_append
0f2d19dd
JB
719{
720 SCM res;
1be6b49c 721 size_t i = 0;
c829a427
MV
722 SCM l, s;
723 char *data;
af45e3b0
DH
724
725 SCM_VALIDATE_REST_ARGUMENT (args);
c829a427
MV
726 for (l = args; !SCM_NULLP (l); l = SCM_CDR (l))
727 {
728 s = SCM_CAR (l);
729 SCM_VALIDATE_STRING (SCM_ARGn, s);
3ee86942 730 i += scm_i_string_length (s);
c829a427 731 }
3ee86942 732 res = scm_i_make_string (i, &data);
c829a427
MV
733 for (l = args; !SCM_NULLP (l); l = SCM_CDR (l))
734 {
735 s = SCM_CAR (l);
3ee86942
MV
736 SCM_VALIDATE_STRING (SCM_ARGn, s);
737 size_t len = scm_i_string_length (s);
738 memcpy (data, scm_i_string_chars (s), len);
739 data += len;
c829a427
MV
740 scm_remember_upto_here_1 (s);
741 }
0f2d19dd
JB
742 return res;
743}
1bbd0b84 744#undef FUNC_NAME
0f2d19dd 745
c829a427
MV
746int
747scm_is_string (SCM obj)
748{
3ee86942 749 return IS_STRING (obj);
c829a427 750}
24933780 751
c829a427
MV
752SCM
753scm_from_locale_stringn (const char *str, size_t len)
754{
755 SCM res;
756 char *dst;
4d4528e7 757
c829a427
MV
758 if (len == (size_t)-1)
759 len = strlen (str);
3ee86942 760 res = scm_i_make_string (len, &dst);
c829a427
MV
761 memcpy (dst, str, len);
762 return res;
763}
4d4528e7 764
c829a427
MV
765SCM
766scm_from_locale_string (const char *str)
4d4528e7 767{
c829a427
MV
768 return scm_from_locale_stringn (str, -1);
769}
4d4528e7 770
c829a427
MV
771SCM
772scm_take_locale_stringn (char *str, size_t len)
773{
774 if (len == (size_t)-1)
775 return scm_take_locale_string (str);
776 else
777 {
778 /* STR might not be zero terminated and we are not allowed to
779 look at str[len], so we have to make a new one...
780 */
781 SCM res = scm_from_locale_stringn (str, len);
782 free (str);
783 return res;
784 }
785}
4d4528e7 786
c829a427
MV
787SCM
788scm_take_locale_string (char *str)
789{
790 size_t len = strlen (str);
3ee86942 791 SCM buf, res;
c829a427 792
3ee86942
MV
793 buf = scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str,
794 (scm_t_bits) len, (scm_t_bits) 0);
795 res = scm_double_cell (STRING_TAG,
796 SCM_UNPACK (buf),
797 (scm_t_bits) 0, (scm_t_bits) len);
c829a427 798 scm_gc_register_collectable_memory (str, len+1, "string");
c829a427
MV
799 return res;
800}
801
802char *
803scm_to_locale_stringn (SCM str, size_t *lenp)
804{
805 char *res;
806 size_t len;
4d4528e7 807
3ee86942 808 if (!scm_is_string (str))
c829a427 809 scm_wrong_type_arg_msg (NULL, 0, str, "string");
3ee86942 810 len = scm_i_string_length (str);
c829a427 811 res = scm_malloc (len + ((lenp==NULL)? 1 : 0));
3ee86942 812 memcpy (res, scm_i_string_chars (str), len);
c829a427
MV
813 if (lenp == NULL)
814 {
815 res[len] = '\0';
816 if (strlen (res) != len)
817 {
818 free (res);
819 scm_misc_error (NULL,
820 "string contains #\\nul character: ~S",
821 scm_list_1 (str));
822 }
823 }
824 else
4d4528e7 825 *lenp = len;
24933780 826
c829a427
MV
827 scm_remember_upto_here_1 (str);
828 return res;
4d4528e7 829}
af68e5e5 830
c829a427
MV
831char *
832scm_to_locale_string (SCM str)
833{
834 return scm_to_locale_stringn (str, NULL);
835}
af68e5e5 836
c829a427
MV
837size_t
838scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
839{
840 size_t len;
841
3ee86942 842 if (!scm_is_string (str))
c829a427 843 scm_wrong_type_arg_msg (NULL, 0, str, "string");
3ee86942
MV
844 len = scm_i_string_length (str);
845 memcpy (buf, scm_i_string_chars (str), (len > max_len)? max_len : len);
c829a427
MV
846 scm_remember_upto_here_1 (str);
847 return len;
848}
af68e5e5 849
3ee86942
MV
850/* converts C scm_array of strings to SCM scm_list of strings. */
851/* If argc < 0, a null terminated scm_array is assumed. */
852SCM
853scm_makfromstrs (int argc, char **argv)
854{
855 int i = argc;
856 SCM lst = SCM_EOL;
857 if (0 > i)
858 for (i = 0; argv[i]; i++);
859 while (i--)
860 lst = scm_cons (scm_from_locale_string (argv[i]), lst);
861 return lst;
862}
863
c829a427
MV
864/* Return a newly allocated array of char pointers to each of the strings
865 in args, with a terminating NULL pointer. */
866
867char **
868scm_i_allocate_string_pointers (SCM list)
af68e5e5 869{
c829a427
MV
870 char **result;
871 int len = scm_ilength (list);
872 int i;
873
874 if (len < 0)
875 scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
876
877 scm_frame_begin (0);
878
879 result = (char **) scm_malloc ((len + 1) * sizeof (char *));
880 result[len] = NULL;
881 scm_frame_unwind_handler (free, result, 0);
882
883 /* The list might be have been modified in another thread, so
884 we check LIST before each access.
885 */
886 for (i = 0; i < len && SCM_CONSP (list); i++)
887 {
888 result[i] = scm_to_locale_string (SCM_CAR (list));
889 list = SCM_CDR (list);
890 }
891
892 scm_frame_end ();
893 return result;
af68e5e5 894}
e53cc817 895
c829a427
MV
896void
897scm_i_free_string_pointers (char **pointers)
898{
899 int i;
900
901 for (i = 0; pointers[i]; i++)
902 free (pointers[i]);
903 free (pointers);
904}
24933780 905
6f14f578
MV
906void
907scm_i_get_substring_spec (size_t len,
908 SCM start, size_t *cstart,
909 SCM end, size_t *cend)
910{
911 if (SCM_UNBNDP (start))
912 *cstart = 0;
913 else
914 *cstart = scm_to_unsigned_integer (start, 0, len);
915
916 if (SCM_UNBNDP (end))
917 *cend = len;
918 else
919 *cend = scm_to_unsigned_integer (end, *cstart, len);
920}
921
3ee86942
MV
922#if SCM_ENABLE_DEPRECATED
923
924int
925SCM_STRINGP (SCM str)
926{
927 scm_c_issue_deprecation_warning
928 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
929
2616f0e0 930 return scm_is_string (str);
3ee86942
MV
931}
932
933char *
934SCM_STRING_CHARS (SCM str)
935{
936 char *chars;
937
938 scm_c_issue_deprecation_warning
939 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
940
2616f0e0
MV
941 /* We don't accept shared substrings here since they are not
942 null-terminated.
943 */
944 if (IS_SH_STRING (str))
945 scm_misc_error (NULL,
946 "SCM_STRING_CHARS does not work with shared substrings.",
947 SCM_EOL);
948
949 /* The following is still wrong, of course...
3ee86942
MV
950 */
951 chars = scm_i_string_writable_chars (str);
952 scm_i_string_stop_writing ();
953 return chars;
954}
955
956size_t
957SCM_STRING_LENGTH (SCM str)
958{
959 scm_c_issue_deprecation_warning
960 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
961 return scm_c_string_length (str);
962}
963
964#endif
965
0f2d19dd
JB
966void
967scm_init_strings ()
0f2d19dd 968{
3ee86942 969 scm_nullstr = scm_i_make_string (0, NULL);
7c33806a 970
a0599745 971#include "libguile/strings.x"
0f2d19dd
JB
972}
973
89e00824
ML
974
975/*
976 Local Variables:
977 c-file-style: "gnu"
978 End:
979*/