Commit | Line | Data |
---|---|---|
3ee86942 | 1 | /* Copyright (C) 1995,1996,1998,2000,2001, 2004 Free Software Foundation, Inc. |
0f2d19dd | 2 | * |
73be1d9e MV |
3 | * This library is free software; you can redistribute it and/or |
4 | * modify it under the terms of the GNU Lesser General Public | |
5 | * License as published by the Free Software Foundation; either | |
6 | * version 2.1 of the License, or (at your option) any later version. | |
0f2d19dd | 7 | * |
73be1d9e MV |
8 | * This library is distributed in the hope that it will be useful, |
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
11 | * Lesser General Public License for more details. | |
0f2d19dd | 12 | * |
73be1d9e MV |
13 | * You should have received a copy of the GNU Lesser General Public |
14 | * License along with this library; if not, write to the Free Software | |
15 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
16 | */ | |
1bbd0b84 | 17 | |
1bbd0b84 | 18 | |
0f2d19dd JB |
19 | \f |
20 | ||
faf2c9d7 | 21 | #include <string.h> |
3ee86942 | 22 | #include <stdio.h> |
faf2c9d7 | 23 | |
a0599745 MD |
24 | #include "libguile/_scm.h" |
25 | #include "libguile/chars.h" | |
7c33806a | 26 | #include "libguile/root.h" |
a0599745 | 27 | #include "libguile/strings.h" |
1afff620 | 28 | #include "libguile/deprecation.h" |
a0599745 | 29 | #include "libguile/validate.h" |
c829a427 | 30 | #include "libguile/dynwind.h" |
1afff620 | 31 | |
0f2d19dd JB |
32 | \f |
33 | ||
34 | /* {Strings} | |
35 | */ | |
36 | ||
3ee86942 MV |
37 | |
38 | /* Stringbufs | |
39 | * | |
40 | * XXX - keeping an accurate refcount during GC seems to be quite | |
41 | * tricky, so we just keep score of whether a stringbuf might be | |
42 | * shared, not wether it definitely is. | |
43 | * | |
44 | * The scheme I (mvo) tried to keep an accurate reference count would | |
45 | * recount all strings that point to a stringbuf during the mark-phase | |
46 | * of the GC. This was done since one cannot access the stringbuf of | |
47 | * a string when that string is freed (in order to decrease the | |
48 | * reference count). The memory of the stringbuf might have been | |
49 | * reused already for something completely different. | |
50 | * | |
51 | * This recounted worked for a small number of threads beating on | |
52 | * cow-strings, but it failed randomly with more than 10 threads, say. | |
53 | * I couldn't figure out what went wrong, so I used the conservative | |
54 | * approach implemented below. | |
55 | * | |
56 | * A stringbuf needs to know its length, but only so that it can be | |
57 | * reported when the stringbuf is freed. | |
58 | * | |
59 | * Stringbufs (and strings) are not stored very compactly: a stringbuf | |
60 | * has room for about 2*sizeof(scm_t_bits)-1 bytes additional | |
61 | * information. As a compensation, the code below is made more | |
62 | * complicated by storing small strings inline in the double cell of a | |
63 | * stringbuf. So we have fixstrings and bigstrings... | |
64 | */ | |
65 | ||
66 | #define STRINGBUF_F_SHARED 0x100 | |
67 | #define STRINGBUF_F_INLINE 0x200 | |
68 | ||
69 | #define STRINGBUF_TAG scm_tc7_stringbuf | |
70 | #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED) | |
71 | #define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE) | |
72 | ||
73 | #define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf)) | |
74 | #define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf)) | |
75 | #define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1)) | |
76 | #define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16) | |
77 | ||
78 | #define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \ | |
79 | ? STRINGBUF_INLINE_CHARS (buf) \ | |
80 | : STRINGBUF_OUTLINE_CHARS (buf)) | |
81 | #define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \ | |
82 | ? STRINGBUF_INLINE_LENGTH (buf) \ | |
83 | : STRINGBUF_OUTLINE_LENGTH (buf)) | |
84 | ||
85 | #define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits)) | |
86 | ||
87 | #define SET_STRINGBUF_SHARED(buf) \ | |
88 | (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED)) | |
89 | ||
90 | #if SCM_DEBUG | |
91 | static size_t lenhist[1001]; | |
92 | #endif | |
93 | ||
94 | static SCM | |
95 | make_stringbuf (size_t len) | |
0f2d19dd | 96 | { |
3ee86942 MV |
97 | /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and |
98 | scm_i_symbol_chars, all stringbufs are null-terminated. Once | |
99 | SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code | |
100 | has been changed for scm_i_symbol_chars, this null-termination | |
101 | can be dropped. | |
102 | */ | |
103 | ||
104 | #if SCM_DEBUG | |
105 | if (len < 1000) | |
106 | lenhist[len]++; | |
107 | else | |
108 | lenhist[1000]++; | |
109 | #endif | |
0f2d19dd | 110 | |
3ee86942 MV |
111 | if (len <= STRINGBUF_MAX_INLINE_LEN-1) |
112 | { | |
113 | return scm_double_cell (STRINGBUF_TAG | STRINGBUF_F_INLINE | (len << 16), | |
114 | 0, 0, 0); | |
115 | } | |
116 | else | |
117 | { | |
118 | char *mem = scm_gc_malloc (len+1, "string"); | |
119 | mem[len] = '\0'; | |
120 | return scm_double_cell (STRINGBUF_TAG, (scm_t_bits) mem, | |
121 | (scm_t_bits) len, (scm_t_bits) 0); | |
122 | } | |
123 | } | |
e53cc817 | 124 | |
3ee86942 MV |
125 | SCM |
126 | scm_i_stringbuf_mark (SCM buf) | |
127 | { | |
128 | return SCM_BOOL_F; | |
129 | } | |
1bbd0b84 | 130 | |
3ee86942 MV |
131 | void |
132 | scm_i_stringbuf_free (SCM buf) | |
0f2d19dd | 133 | { |
3ee86942 MV |
134 | if (!STRINGBUF_INLINE (buf)) |
135 | scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf), | |
136 | STRINGBUF_OUTLINE_LENGTH (buf) + 1, "string"); | |
137 | } | |
bd9e24b3 | 138 | |
3ee86942 | 139 | SCM_MUTEX (stringbuf_write_mutex); |
bd9e24b3 | 140 | |
3ee86942 MV |
141 | /* Copy-on-write strings. |
142 | */ | |
bd9e24b3 | 143 | |
3ee86942 | 144 | #define STRING_TAG scm_tc7_string |
bd9e24b3 | 145 | |
3ee86942 MV |
146 | #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str)) |
147 | #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str)) | |
148 | #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str)) | |
bd9e24b3 | 149 | |
3ee86942 MV |
150 | #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf)) |
151 | #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start)) | |
152 | ||
153 | #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG) | |
154 | ||
e1b29f6a MV |
155 | /* Mutation-sharing substrings |
156 | */ | |
157 | ||
158 | #define SH_STRING_TAG (scm_tc7_string + 0x100) | |
159 | ||
160 | #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh)) | |
161 | /* START and LENGTH as for STRINGs. */ | |
162 | ||
163 | #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG) | |
164 | ||
3ee86942 MV |
165 | SCM |
166 | scm_i_make_string (size_t len, char **charsp) | |
167 | { | |
168 | SCM buf = make_stringbuf (len); | |
169 | SCM res; | |
170 | if (charsp) | |
171 | *charsp = STRINGBUF_CHARS (buf); | |
172 | res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf), | |
173 | (scm_t_bits)0, (scm_t_bits) len); | |
174 | return res; | |
0f2d19dd JB |
175 | } |
176 | ||
3ee86942 MV |
177 | static void |
178 | validate_substring_args (SCM str, size_t start, size_t end) | |
179 | { | |
180 | if (!IS_STRING (str)) | |
181 | scm_wrong_type_arg_msg (NULL, 0, str, "string"); | |
182 | if (start > STRING_LENGTH (str)) | |
183 | scm_out_of_range (NULL, scm_from_size_t (start)); | |
184 | if (end > STRING_LENGTH (str) || end < start) | |
185 | scm_out_of_range (NULL, scm_from_size_t (end)); | |
186 | } | |
0f2d19dd | 187 | |
e1b29f6a MV |
188 | static inline void |
189 | get_str_buf_start (SCM *str, SCM *buf, size_t *start) | |
190 | { | |
191 | *start = STRING_START (*str); | |
192 | if (IS_SH_STRING (*str)) | |
193 | { | |
194 | *str = SH_STRING_STRING (*str); | |
195 | *start += STRING_START (*str); | |
196 | } | |
197 | *buf = STRING_STRINGBUF (*str); | |
198 | } | |
199 | ||
3ee86942 MV |
200 | SCM |
201 | scm_i_substring (SCM str, size_t start, size_t end) | |
0f2d19dd | 202 | { |
e1b29f6a MV |
203 | SCM buf; |
204 | size_t str_start; | |
205 | get_str_buf_start (&str, &buf, &str_start); | |
3ee86942 MV |
206 | scm_i_plugin_mutex_lock (&stringbuf_write_mutex); |
207 | SET_STRINGBUF_SHARED (buf); | |
208 | scm_i_plugin_mutex_unlock (&stringbuf_write_mutex); | |
209 | return scm_double_cell (STRING_TAG, SCM_UNPACK(buf), | |
e1b29f6a MV |
210 | (scm_t_bits)str_start + start, |
211 | (scm_t_bits) end - start); | |
0f2d19dd JB |
212 | } |
213 | ||
3ee86942 MV |
214 | SCM |
215 | scm_i_substring_copy (SCM str, size_t start, size_t end) | |
216 | { | |
217 | size_t len = end - start; | |
e1b29f6a MV |
218 | SCM buf; |
219 | size_t str_start; | |
220 | get_str_buf_start (&str, &buf, &str_start); | |
3ee86942 | 221 | SCM my_buf = make_stringbuf (len); |
e1b29f6a MV |
222 | memcpy (STRINGBUF_CHARS (my_buf), |
223 | STRINGBUF_CHARS (buf) + str_start + start, len); | |
3ee86942 MV |
224 | scm_remember_upto_here_1 (buf); |
225 | return scm_double_cell (STRING_TAG, SCM_UNPACK(my_buf), | |
226 | (scm_t_bits)0, (scm_t_bits) len); | |
227 | } | |
0f2d19dd | 228 | |
e1b29f6a MV |
229 | SCM |
230 | scm_i_substring_shared (SCM str, size_t start, size_t end) | |
231 | { | |
232 | if (start == 0 && end == STRING_LENGTH (str)) | |
233 | return str; | |
234 | else | |
235 | { | |
236 | size_t len = end - start; | |
237 | if (IS_SH_STRING (str)) | |
238 | { | |
239 | start += STRING_START (str); | |
240 | str = SH_STRING_STRING (str); | |
241 | } | |
242 | return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str), | |
243 | (scm_t_bits)start, (scm_t_bits) len); | |
244 | } | |
245 | } | |
246 | ||
3ee86942 MV |
247 | SCM |
248 | scm_c_substring (SCM str, size_t start, size_t end) | |
249 | { | |
250 | validate_substring_args (str, start, end); | |
251 | return scm_i_substring (str, start, end); | |
252 | } | |
ee149d03 | 253 | |
0f2d19dd | 254 | SCM |
3ee86942 | 255 | scm_c_substring_copy (SCM str, size_t start, size_t end) |
0f2d19dd | 256 | { |
3ee86942 MV |
257 | validate_substring_args (str, start, end); |
258 | return scm_i_substring_copy (str, start, end); | |
259 | } | |
260 | ||
3ee86942 MV |
261 | SCM |
262 | scm_c_substring_shared (SCM str, size_t start, size_t end) | |
263 | { | |
264 | validate_substring_args (str, start, end); | |
265 | return scm_i_substring_shared (str, start, end); | |
266 | } | |
0f2d19dd | 267 | |
ee149d03 | 268 | SCM |
3ee86942 | 269 | scm_i_string_mark (SCM str) |
ee149d03 | 270 | { |
3ee86942 MV |
271 | if (IS_SH_STRING (str)) |
272 | return SH_STRING_STRING (str); | |
273 | else | |
274 | return STRING_STRINGBUF (str); | |
ee149d03 JB |
275 | } |
276 | ||
3ee86942 MV |
277 | void |
278 | scm_i_string_free (SCM str) | |
279 | { | |
280 | } | |
36284627 | 281 | |
3ee86942 MV |
282 | /* Internal accessors |
283 | */ | |
284 | ||
285 | size_t | |
286 | scm_i_string_length (SCM str) | |
0f2d19dd | 287 | { |
3ee86942 | 288 | return STRING_LENGTH (str); |
0f2d19dd JB |
289 | } |
290 | ||
3ee86942 MV |
291 | const char * |
292 | scm_i_string_chars (SCM str) | |
293 | { | |
294 | SCM buf; | |
e1b29f6a MV |
295 | size_t start; |
296 | get_str_buf_start (&str, &buf, &start); | |
3ee86942 MV |
297 | return STRINGBUF_CHARS (buf) + start; |
298 | } | |
b00418df | 299 | |
3ee86942 MV |
300 | char * |
301 | scm_i_string_writable_chars (SCM str) | |
b00418df | 302 | { |
3ee86942 | 303 | SCM buf; |
e1b29f6a MV |
304 | size_t start; |
305 | get_str_buf_start (&str, &buf, &start); | |
3ee86942 MV |
306 | scm_i_plugin_mutex_lock (&stringbuf_write_mutex); |
307 | if (STRINGBUF_SHARED (buf)) | |
308 | { | |
309 | /* Clone stringbuf. For this, we put all threads to sleep. | |
310 | */ | |
311 | ||
312 | size_t len = STRING_LENGTH (str); | |
313 | SCM new_buf; | |
314 | ||
315 | scm_i_plugin_mutex_unlock (&stringbuf_write_mutex); | |
316 | ||
317 | new_buf = make_stringbuf (len); | |
318 | memcpy (STRINGBUF_CHARS (new_buf), | |
319 | STRINGBUF_CHARS (buf) + STRING_START (str), len); | |
320 | ||
321 | scm_i_thread_put_to_sleep (); | |
322 | SET_STRING_STRINGBUF (str, new_buf); | |
323 | start -= STRING_START (str); | |
324 | SET_STRING_START (str, 0); | |
325 | scm_i_thread_wake_up (); | |
326 | ||
327 | buf = new_buf; | |
328 | ||
329 | scm_i_plugin_mutex_lock (&stringbuf_write_mutex); | |
330 | } | |
331 | ||
332 | return STRINGBUF_CHARS (buf) + start; | |
b00418df DH |
333 | } |
334 | ||
3ee86942 MV |
335 | void |
336 | scm_i_string_stop_writing (void) | |
337 | { | |
338 | scm_i_plugin_mutex_unlock (&stringbuf_write_mutex); | |
339 | } | |
b00418df | 340 | |
3ee86942 MV |
341 | /* Symbols. |
342 | ||
343 | Basic symbol creation and accessing is done here, the rest is in | |
344 | symbols.[hc]. This has been done to keep stringbufs and the | |
345 | internals of strings and string-like objects confined to this file. | |
346 | */ | |
347 | ||
348 | #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1 | |
349 | ||
350 | SCM | |
6869328b MV |
351 | scm_i_make_symbol (SCM name, scm_t_bits flags, |
352 | unsigned long hash, SCM props) | |
3ee86942 MV |
353 | { |
354 | SCM buf; | |
355 | size_t start = STRING_START (name); | |
356 | size_t length = STRING_LENGTH (name); | |
357 | ||
358 | if (IS_SH_STRING (name)) | |
359 | { | |
360 | name = SH_STRING_STRING (name); | |
361 | start += STRING_START (name); | |
362 | } | |
363 | buf = SYMBOL_STRINGBUF (name); | |
364 | ||
365 | if (start == 0 && length == STRINGBUF_LENGTH (buf)) | |
366 | { | |
367 | /* reuse buf. */ | |
368 | scm_i_plugin_mutex_lock (&stringbuf_write_mutex); | |
369 | SET_STRINGBUF_SHARED (buf); | |
370 | scm_i_plugin_mutex_unlock (&stringbuf_write_mutex); | |
371 | } | |
372 | else | |
373 | { | |
374 | /* make new buf. */ | |
375 | SCM new_buf = make_stringbuf (length); | |
376 | memcpy (STRINGBUF_CHARS (new_buf), | |
377 | STRINGBUF_CHARS (buf) + start, length); | |
378 | buf = new_buf; | |
379 | } | |
6869328b | 380 | return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf), |
3ee86942 MV |
381 | (scm_t_bits) hash, SCM_UNPACK (props)); |
382 | } | |
383 | ||
384 | size_t | |
385 | scm_i_symbol_length (SCM sym) | |
0f2d19dd | 386 | { |
3ee86942 | 387 | return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym)); |
0f2d19dd JB |
388 | } |
389 | ||
3ee86942 MV |
390 | const char * |
391 | scm_i_symbol_chars (SCM sym) | |
392 | { | |
393 | SCM buf = SYMBOL_STRINGBUF (sym); | |
394 | return STRINGBUF_CHARS (buf); | |
395 | } | |
1cc91f1b | 396 | |
3ee86942 MV |
397 | SCM |
398 | scm_i_symbol_mark (SCM sym) | |
0f2d19dd | 399 | { |
3ee86942 MV |
400 | scm_gc_mark (SYMBOL_STRINGBUF (sym)); |
401 | return SCM_CELL_OBJECT_3 (sym); | |
0f2d19dd JB |
402 | } |
403 | ||
3ee86942 MV |
404 | void |
405 | scm_i_symbol_free (SCM sym) | |
406 | { | |
407 | } | |
0f2d19dd | 408 | |
be54b15d | 409 | SCM |
3ee86942 | 410 | scm_i_symbol_substring (SCM sym, size_t start, size_t end) |
be54b15d | 411 | { |
3ee86942 MV |
412 | SCM buf = SYMBOL_STRINGBUF (sym); |
413 | scm_i_plugin_mutex_lock (&stringbuf_write_mutex); | |
414 | SET_STRINGBUF_SHARED (buf); | |
415 | scm_i_plugin_mutex_unlock (&stringbuf_write_mutex); | |
416 | return scm_double_cell (STRING_TAG, SCM_UNPACK(buf), | |
417 | (scm_t_bits)start, (scm_t_bits) end - start); | |
418 | } | |
be54b15d | 419 | |
3ee86942 MV |
420 | /* Debugging |
421 | */ | |
be54b15d | 422 | |
3ee86942 | 423 | #if SCM_DEBUG |
be54b15d | 424 | |
3ee86942 MV |
425 | SCM scm_sys_string_dump (SCM); |
426 | SCM scm_sys_symbol_dump (SCM); | |
427 | SCM scm_sys_stringbuf_hist (void); | |
be54b15d | 428 | |
3ee86942 MV |
429 | SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, |
430 | (SCM str), | |
431 | "") | |
432 | #define FUNC_NAME s_scm_sys_string_dump | |
433 | { | |
434 | SCM_VALIDATE_STRING (1, str); | |
435 | fprintf (stderr, "%p:\n", str); | |
436 | fprintf (stderr, " start: %u\n", STRING_START (str)); | |
437 | fprintf (stderr, " len: %u\n", STRING_LENGTH (str)); | |
438 | if (IS_SH_STRING (str)) | |
439 | { | |
440 | fprintf (stderr, " string: %p\n", SH_STRING_STRING (str)); | |
441 | fprintf (stderr, "\n"); | |
442 | scm_sys_string_dump (SH_STRING_STRING (str)); | |
443 | } | |
444 | else | |
445 | { | |
446 | SCM buf = STRING_STRINGBUF (str); | |
447 | fprintf (stderr, " buf: %p\n", buf); | |
448 | fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf)); | |
449 | fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf)); | |
450 | fprintf (stderr, " flags: %x\n", (SCM_CELL_WORD_0 (buf) & 0x300)); | |
451 | } | |
452 | return SCM_UNSPECIFIED; | |
453 | } | |
454 | #undef FUNC_NAME | |
455 | ||
456 | SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, | |
457 | (SCM sym), | |
458 | "") | |
459 | #define FUNC_NAME s_scm_sys_symbol_dump | |
460 | { | |
461 | SCM_VALIDATE_SYMBOL (1, sym); | |
462 | fprintf (stderr, "%p:\n", sym); | |
463 | fprintf (stderr, " hash: %lu\n", scm_i_symbol_hash (sym)); | |
464 | { | |
465 | SCM buf = SYMBOL_STRINGBUF (sym); | |
466 | fprintf (stderr, " buf: %p\n", buf); | |
467 | fprintf (stderr, " chars: %p\n", STRINGBUF_CHARS (buf)); | |
468 | fprintf (stderr, " length: %u\n", STRINGBUF_LENGTH (buf)); | |
469 | fprintf (stderr, " shared: %u\n", STRINGBUF_SHARED (buf)); | |
470 | } | |
471 | return SCM_UNSPECIFIED; | |
472 | } | |
473 | #undef FUNC_NAME | |
474 | ||
475 | SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, | |
476 | (void), | |
477 | "") | |
e1b29f6a | 478 | #define FUNC_NAME s_scm_sys_stringbuf_hist |
3ee86942 MV |
479 | { |
480 | int i; | |
481 | for (i = 0; i < 1000; i++) | |
482 | if (lenhist[i]) | |
483 | fprintf (stderr, " %3d: %u\n", i, lenhist[i]); | |
484 | fprintf (stderr, ">999: %u\n", lenhist[1000]); | |
485 | return SCM_UNSPECIFIED; | |
be54b15d DH |
486 | } |
487 | #undef FUNC_NAME | |
488 | ||
3ee86942 MV |
489 | #endif |
490 | ||
491 | \f | |
492 | ||
493 | SCM_DEFINE (scm_string_p, "string?", 1, 0, 0, | |
494 | (SCM obj), | |
495 | "Return @code{#t} if @var{obj} is a string, else @code{#f}.") | |
496 | #define FUNC_NAME s_scm_string_p | |
497 | { | |
498 | return scm_from_bool (IS_STRING (obj)); | |
499 | } | |
500 | #undef FUNC_NAME | |
501 | ||
502 | ||
503 | SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string); | |
504 | ||
505 | SCM_DEFINE (scm_string, "string", 0, 0, 1, | |
506 | (SCM chrs), | |
507 | "@deffnx {Scheme Procedure} list->string chrs\n" | |
508 | "Return a newly allocated string composed of the arguments,\n" | |
509 | "@var{chrs}.") | |
510 | #define FUNC_NAME s_scm_string | |
511 | { | |
512 | SCM result; | |
513 | size_t len; | |
514 | char *data; | |
515 | ||
516 | { | |
517 | long i = scm_ilength (chrs); | |
518 | ||
519 | SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME); | |
520 | len = i; | |
521 | } | |
522 | ||
523 | result = scm_i_make_string (len, &data); | |
524 | while (len > 0 && SCM_CONSP (chrs)) | |
525 | { | |
526 | SCM elt = SCM_CAR (chrs); | |
527 | ||
528 | SCM_VALIDATE_CHAR (SCM_ARGn, elt); | |
529 | *data++ = SCM_CHAR (elt); | |
530 | chrs = SCM_CDR (chrs); | |
531 | len--; | |
532 | } | |
533 | if (len > 0) | |
534 | scm_misc_error (NULL, "list changed while constructing string", SCM_EOL); | |
535 | if (!SCM_NULLP (chrs)) | |
536 | scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list"); | |
537 | ||
538 | return result; | |
539 | } | |
540 | #undef FUNC_NAME | |
be54b15d | 541 | |
3b3b36dd | 542 | SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0, |
6fa73e72 | 543 | (SCM k, SCM chr), |
0d26a824 MG |
544 | "Return a newly allocated string of\n" |
545 | "length @var{k}. If @var{chr} is given, then all elements of\n" | |
546 | "the string are initialized to @var{chr}, otherwise the contents\n" | |
9401323e | 547 | "of the @var{string} are unspecified.") |
1bbd0b84 | 548 | #define FUNC_NAME s_scm_make_string |
0f2d19dd | 549 | { |
3ee86942 MV |
550 | return scm_c_make_string (scm_to_size_t (k), chr); |
551 | } | |
552 | #undef FUNC_NAME | |
553 | ||
554 | SCM | |
555 | scm_c_make_string (size_t len, SCM chr) | |
556 | #define FUNC_NAME NULL | |
557 | { | |
558 | char *dst; | |
559 | SCM res = scm_i_make_string (len, &dst); | |
cb0d8be2 | 560 | |
e11e83f3 MV |
561 | if (!SCM_UNBNDP (chr)) |
562 | { | |
3ee86942 MV |
563 | SCM_VALIDATE_CHAR (0, chr); |
564 | memset (dst, SCM_CHAR (chr), len); | |
0f2d19dd | 565 | } |
e11e83f3 MV |
566 | |
567 | return res; | |
0f2d19dd | 568 | } |
1bbd0b84 | 569 | #undef FUNC_NAME |
0f2d19dd | 570 | |
3b3b36dd | 571 | SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0, |
0d26a824 MG |
572 | (SCM string), |
573 | "Return the number of characters in @var{string}.") | |
1bbd0b84 | 574 | #define FUNC_NAME s_scm_string_length |
0f2d19dd | 575 | { |
d1ca2c64 | 576 | SCM_VALIDATE_STRING (1, string); |
3ee86942 | 577 | return scm_from_size_t (STRING_LENGTH (string)); |
0f2d19dd | 578 | } |
1bbd0b84 | 579 | #undef FUNC_NAME |
0f2d19dd | 580 | |
3ee86942 MV |
581 | size_t |
582 | scm_c_string_length (SCM string) | |
583 | { | |
584 | if (!IS_STRING (string)) | |
585 | scm_wrong_type_arg_msg (NULL, 0, string, "string"); | |
586 | return STRING_LENGTH (string); | |
587 | } | |
588 | ||
bd9e24b3 | 589 | SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0, |
6fa73e72 | 590 | (SCM str, SCM k), |
0d26a824 MG |
591 | "Return character @var{k} of @var{str} using zero-origin\n" |
592 | "indexing. @var{k} must be a valid index of @var{str}.") | |
1bbd0b84 | 593 | #define FUNC_NAME s_scm_string_ref |
0f2d19dd | 594 | { |
a55c2b68 | 595 | unsigned long idx; |
bd9e24b3 | 596 | |
d1ca2c64 | 597 | SCM_VALIDATE_STRING (1, str); |
3ee86942 MV |
598 | idx = scm_to_unsigned_integer (k, 0, scm_i_string_length (str)-1); |
599 | return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]); | |
0f2d19dd | 600 | } |
1bbd0b84 | 601 | #undef FUNC_NAME |
0f2d19dd | 602 | |
3ee86942 MV |
603 | SCM |
604 | scm_c_string_ref (SCM str, size_t p) | |
605 | { | |
606 | if (p >= scm_i_string_length (str)) | |
607 | scm_out_of_range (NULL, scm_from_size_t (p)); | |
608 | return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]); | |
609 | } | |
f0942910 | 610 | |
3b3b36dd | 611 | SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0, |
6fa73e72 | 612 | (SCM str, SCM k, SCM chr), |
0d26a824 MG |
613 | "Store @var{chr} in element @var{k} of @var{str} and return\n" |
614 | "an unspecified value. @var{k} must be a valid index of\n" | |
615 | "@var{str}.") | |
1bbd0b84 | 616 | #define FUNC_NAME s_scm_string_set_x |
0f2d19dd | 617 | { |
a55c2b68 MV |
618 | unsigned long idx; |
619 | ||
f0942910 | 620 | SCM_VALIDATE_STRING (1, str); |
3ee86942 | 621 | idx = scm_to_unsigned_integer (k, 0, scm_i_string_length(str)-1); |
34d19ef6 | 622 | SCM_VALIDATE_CHAR (3, chr); |
3ee86942 MV |
623 | { |
624 | char *dst = scm_i_string_writable_chars (str); | |
625 | dst[idx] = SCM_CHAR (chr); | |
626 | scm_i_string_stop_writing (); | |
627 | } | |
0f2d19dd JB |
628 | return SCM_UNSPECIFIED; |
629 | } | |
1bbd0b84 | 630 | #undef FUNC_NAME |
0f2d19dd | 631 | |
3ee86942 MV |
632 | void |
633 | scm_c_string_set_x (SCM str, size_t p, SCM chr) | |
634 | { | |
635 | if (p >= scm_i_string_length (str)) | |
636 | scm_out_of_range (NULL, scm_from_size_t (p)); | |
637 | { | |
638 | char *dst = scm_i_string_writable_chars (str); | |
639 | dst[p] = SCM_CHAR (chr); | |
640 | scm_i_string_stop_writing (); | |
641 | } | |
642 | } | |
0f2d19dd | 643 | |
3b3b36dd | 644 | SCM_DEFINE (scm_substring, "substring", 2, 1, 0, |
0d26a824 MG |
645 | (SCM str, SCM start, SCM end), |
646 | "Return a newly allocated string formed from the characters\n" | |
647 | "of @var{str} beginning with index @var{start} (inclusive) and\n" | |
648 | "ending with index @var{end} (exclusive).\n" | |
649 | "@var{str} must be a string, @var{start} and @var{end} must be\n" | |
650 | "exact integers satisfying:\n\n" | |
651 | "0 <= @var{start} <= @var{end} <= (string-length @var{str}).") | |
1bbd0b84 | 652 | #define FUNC_NAME s_scm_substring |
0f2d19dd | 653 | { |
3ee86942 | 654 | size_t len, from, to; |
685c0d71 | 655 | |
d1ca2c64 | 656 | SCM_VALIDATE_STRING (1, str); |
3ee86942 MV |
657 | len = scm_i_string_length (str); |
658 | from = scm_to_unsigned_integer (start, 0, len); | |
a55c2b68 | 659 | if (SCM_UNBNDP (end)) |
3ee86942 | 660 | to = len; |
a55c2b68 | 661 | else |
3ee86942 MV |
662 | to = scm_to_unsigned_integer (end, from, len); |
663 | return scm_i_substring (str, from, to); | |
0f2d19dd | 664 | } |
1bbd0b84 | 665 | #undef FUNC_NAME |
0f2d19dd | 666 | |
3ee86942 MV |
667 | SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0, |
668 | (SCM str, SCM start, SCM end), | |
669 | "Return a newly allocated string formed from the characters\n" | |
670 | "of @var{str} beginning with index @var{start} (inclusive) and\n" | |
671 | "ending with index @var{end} (exclusive).\n" | |
672 | "@var{str} must be a string, @var{start} and @var{end} must be\n" | |
673 | "exact integers satisfying:\n\n" | |
674 | "0 <= @var{start} <= @var{end} <= (string-length @var{str}).") | |
675 | #define FUNC_NAME s_scm_substring_copy | |
676 | { | |
e1b29f6a MV |
677 | /* For the Scheme version, START is mandatory, but for the C |
678 | version, it is optional. See scm_string_copy in srfi-13.c for a | |
679 | rationale. | |
680 | */ | |
681 | ||
682 | size_t from, to; | |
3ee86942 MV |
683 | |
684 | SCM_VALIDATE_STRING (1, str); | |
e1b29f6a MV |
685 | scm_i_get_substring_spec (scm_i_string_length (str), |
686 | start, &from, end, &to); | |
3ee86942 MV |
687 | return scm_i_substring_copy (str, from, to); |
688 | } | |
689 | #undef FUNC_NAME | |
690 | ||
691 | SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0, | |
692 | (SCM str, SCM start, SCM end), | |
693 | "Return string that indirectly refers to the characters\n" | |
694 | "of @var{str} beginning with index @var{start} (inclusive) and\n" | |
695 | "ending with index @var{end} (exclusive).\n" | |
696 | "@var{str} must be a string, @var{start} and @var{end} must be\n" | |
697 | "exact integers satisfying:\n\n" | |
698 | "0 <= @var{start} <= @var{end} <= (string-length @var{str}).") | |
699 | #define FUNC_NAME s_scm_substring_shared | |
700 | { | |
701 | size_t len, from, to; | |
702 | ||
703 | SCM_VALIDATE_STRING (1, str); | |
704 | len = scm_i_string_length (str); | |
705 | from = scm_to_unsigned_integer (start, 0, len); | |
706 | if (SCM_UNBNDP (end)) | |
707 | to = len; | |
708 | else | |
709 | to = scm_to_unsigned_integer (end, from, len); | |
710 | return scm_i_substring_shared (str, from, to); | |
711 | } | |
712 | #undef FUNC_NAME | |
685c0d71 | 713 | |
3b3b36dd | 714 | SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1, |
6fa73e72 | 715 | (SCM args), |
0d26a824 MG |
716 | "Return a newly allocated string whose characters form the\n" |
717 | "concatenation of the given strings, @var{args}.") | |
1bbd0b84 | 718 | #define FUNC_NAME s_scm_string_append |
0f2d19dd JB |
719 | { |
720 | SCM res; | |
1be6b49c | 721 | size_t i = 0; |
c829a427 MV |
722 | SCM l, s; |
723 | char *data; | |
af45e3b0 DH |
724 | |
725 | SCM_VALIDATE_REST_ARGUMENT (args); | |
c829a427 MV |
726 | for (l = args; !SCM_NULLP (l); l = SCM_CDR (l)) |
727 | { | |
728 | s = SCM_CAR (l); | |
729 | SCM_VALIDATE_STRING (SCM_ARGn, s); | |
3ee86942 | 730 | i += scm_i_string_length (s); |
c829a427 | 731 | } |
3ee86942 | 732 | res = scm_i_make_string (i, &data); |
c829a427 MV |
733 | for (l = args; !SCM_NULLP (l); l = SCM_CDR (l)) |
734 | { | |
735 | s = SCM_CAR (l); | |
3ee86942 MV |
736 | SCM_VALIDATE_STRING (SCM_ARGn, s); |
737 | size_t len = scm_i_string_length (s); | |
738 | memcpy (data, scm_i_string_chars (s), len); | |
739 | data += len; | |
c829a427 MV |
740 | scm_remember_upto_here_1 (s); |
741 | } | |
0f2d19dd JB |
742 | return res; |
743 | } | |
1bbd0b84 | 744 | #undef FUNC_NAME |
0f2d19dd | 745 | |
c829a427 MV |
746 | int |
747 | scm_is_string (SCM obj) | |
748 | { | |
3ee86942 | 749 | return IS_STRING (obj); |
c829a427 | 750 | } |
24933780 | 751 | |
c829a427 MV |
752 | SCM |
753 | scm_from_locale_stringn (const char *str, size_t len) | |
754 | { | |
755 | SCM res; | |
756 | char *dst; | |
4d4528e7 | 757 | |
c829a427 MV |
758 | if (len == (size_t)-1) |
759 | len = strlen (str); | |
3ee86942 | 760 | res = scm_i_make_string (len, &dst); |
c829a427 MV |
761 | memcpy (dst, str, len); |
762 | return res; | |
763 | } | |
4d4528e7 | 764 | |
c829a427 MV |
765 | SCM |
766 | scm_from_locale_string (const char *str) | |
4d4528e7 | 767 | { |
c829a427 MV |
768 | return scm_from_locale_stringn (str, -1); |
769 | } | |
4d4528e7 | 770 | |
c829a427 MV |
771 | SCM |
772 | scm_take_locale_stringn (char *str, size_t len) | |
773 | { | |
774 | if (len == (size_t)-1) | |
775 | return scm_take_locale_string (str); | |
776 | else | |
777 | { | |
778 | /* STR might not be zero terminated and we are not allowed to | |
779 | look at str[len], so we have to make a new one... | |
780 | */ | |
781 | SCM res = scm_from_locale_stringn (str, len); | |
782 | free (str); | |
783 | return res; | |
784 | } | |
785 | } | |
4d4528e7 | 786 | |
c829a427 MV |
787 | SCM |
788 | scm_take_locale_string (char *str) | |
789 | { | |
790 | size_t len = strlen (str); | |
3ee86942 | 791 | SCM buf, res; |
c829a427 | 792 | |
3ee86942 MV |
793 | buf = scm_double_cell (STRINGBUF_TAG, (scm_t_bits) str, |
794 | (scm_t_bits) len, (scm_t_bits) 0); | |
795 | res = scm_double_cell (STRING_TAG, | |
796 | SCM_UNPACK (buf), | |
797 | (scm_t_bits) 0, (scm_t_bits) len); | |
c829a427 | 798 | scm_gc_register_collectable_memory (str, len+1, "string"); |
c829a427 MV |
799 | return res; |
800 | } | |
801 | ||
802 | char * | |
803 | scm_to_locale_stringn (SCM str, size_t *lenp) | |
804 | { | |
805 | char *res; | |
806 | size_t len; | |
4d4528e7 | 807 | |
3ee86942 | 808 | if (!scm_is_string (str)) |
c829a427 | 809 | scm_wrong_type_arg_msg (NULL, 0, str, "string"); |
3ee86942 | 810 | len = scm_i_string_length (str); |
c829a427 | 811 | res = scm_malloc (len + ((lenp==NULL)? 1 : 0)); |
3ee86942 | 812 | memcpy (res, scm_i_string_chars (str), len); |
c829a427 MV |
813 | if (lenp == NULL) |
814 | { | |
815 | res[len] = '\0'; | |
816 | if (strlen (res) != len) | |
817 | { | |
818 | free (res); | |
819 | scm_misc_error (NULL, | |
820 | "string contains #\\nul character: ~S", | |
821 | scm_list_1 (str)); | |
822 | } | |
823 | } | |
824 | else | |
4d4528e7 | 825 | *lenp = len; |
24933780 | 826 | |
c829a427 MV |
827 | scm_remember_upto_here_1 (str); |
828 | return res; | |
4d4528e7 | 829 | } |
af68e5e5 | 830 | |
c829a427 MV |
831 | char * |
832 | scm_to_locale_string (SCM str) | |
833 | { | |
834 | return scm_to_locale_stringn (str, NULL); | |
835 | } | |
af68e5e5 | 836 | |
c829a427 MV |
837 | size_t |
838 | scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len) | |
839 | { | |
840 | size_t len; | |
841 | ||
3ee86942 | 842 | if (!scm_is_string (str)) |
c829a427 | 843 | scm_wrong_type_arg_msg (NULL, 0, str, "string"); |
3ee86942 MV |
844 | len = scm_i_string_length (str); |
845 | memcpy (buf, scm_i_string_chars (str), (len > max_len)? max_len : len); | |
c829a427 MV |
846 | scm_remember_upto_here_1 (str); |
847 | return len; | |
848 | } | |
af68e5e5 | 849 | |
3ee86942 MV |
850 | /* converts C scm_array of strings to SCM scm_list of strings. */ |
851 | /* If argc < 0, a null terminated scm_array is assumed. */ | |
852 | SCM | |
853 | scm_makfromstrs (int argc, char **argv) | |
854 | { | |
855 | int i = argc; | |
856 | SCM lst = SCM_EOL; | |
857 | if (0 > i) | |
858 | for (i = 0; argv[i]; i++); | |
859 | while (i--) | |
860 | lst = scm_cons (scm_from_locale_string (argv[i]), lst); | |
861 | return lst; | |
862 | } | |
863 | ||
c829a427 MV |
864 | /* Return a newly allocated array of char pointers to each of the strings |
865 | in args, with a terminating NULL pointer. */ | |
866 | ||
867 | char ** | |
868 | scm_i_allocate_string_pointers (SCM list) | |
af68e5e5 | 869 | { |
c829a427 MV |
870 | char **result; |
871 | int len = scm_ilength (list); | |
872 | int i; | |
873 | ||
874 | if (len < 0) | |
875 | scm_wrong_type_arg_msg (NULL, 0, list, "proper list"); | |
876 | ||
877 | scm_frame_begin (0); | |
878 | ||
879 | result = (char **) scm_malloc ((len + 1) * sizeof (char *)); | |
880 | result[len] = NULL; | |
881 | scm_frame_unwind_handler (free, result, 0); | |
882 | ||
883 | /* The list might be have been modified in another thread, so | |
884 | we check LIST before each access. | |
885 | */ | |
886 | for (i = 0; i < len && SCM_CONSP (list); i++) | |
887 | { | |
888 | result[i] = scm_to_locale_string (SCM_CAR (list)); | |
889 | list = SCM_CDR (list); | |
890 | } | |
891 | ||
892 | scm_frame_end (); | |
893 | return result; | |
af68e5e5 | 894 | } |
e53cc817 | 895 | |
c829a427 MV |
896 | void |
897 | scm_i_free_string_pointers (char **pointers) | |
898 | { | |
899 | int i; | |
900 | ||
901 | for (i = 0; pointers[i]; i++) | |
902 | free (pointers[i]); | |
903 | free (pointers); | |
904 | } | |
24933780 | 905 | |
6f14f578 MV |
906 | void |
907 | scm_i_get_substring_spec (size_t len, | |
908 | SCM start, size_t *cstart, | |
909 | SCM end, size_t *cend) | |
910 | { | |
911 | if (SCM_UNBNDP (start)) | |
912 | *cstart = 0; | |
913 | else | |
914 | *cstart = scm_to_unsigned_integer (start, 0, len); | |
915 | ||
916 | if (SCM_UNBNDP (end)) | |
917 | *cend = len; | |
918 | else | |
919 | *cend = scm_to_unsigned_integer (end, *cstart, len); | |
920 | } | |
921 | ||
3ee86942 MV |
922 | #if SCM_ENABLE_DEPRECATED |
923 | ||
924 | int | |
925 | SCM_STRINGP (SCM str) | |
926 | { | |
927 | scm_c_issue_deprecation_warning | |
928 | ("SCM_STRINGP is deprecated. Use scm_is_string instead."); | |
929 | ||
2616f0e0 | 930 | return scm_is_string (str); |
3ee86942 MV |
931 | } |
932 | ||
933 | char * | |
934 | SCM_STRING_CHARS (SCM str) | |
935 | { | |
936 | char *chars; | |
937 | ||
938 | scm_c_issue_deprecation_warning | |
939 | ("SCM_STRING_CHARS is deprecated. See the manual for alternatives."); | |
940 | ||
2616f0e0 MV |
941 | /* We don't accept shared substrings here since they are not |
942 | null-terminated. | |
943 | */ | |
944 | if (IS_SH_STRING (str)) | |
945 | scm_misc_error (NULL, | |
946 | "SCM_STRING_CHARS does not work with shared substrings.", | |
947 | SCM_EOL); | |
948 | ||
949 | /* The following is still wrong, of course... | |
3ee86942 MV |
950 | */ |
951 | chars = scm_i_string_writable_chars (str); | |
952 | scm_i_string_stop_writing (); | |
953 | return chars; | |
954 | } | |
955 | ||
956 | size_t | |
957 | SCM_STRING_LENGTH (SCM str) | |
958 | { | |
959 | scm_c_issue_deprecation_warning | |
960 | ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead."); | |
961 | return scm_c_string_length (str); | |
962 | } | |
963 | ||
964 | #endif | |
965 | ||
0f2d19dd JB |
966 | void |
967 | scm_init_strings () | |
0f2d19dd | 968 | { |
3ee86942 | 969 | scm_nullstr = scm_i_make_string (0, NULL); |
7c33806a | 970 | |
a0599745 | 971 | #include "libguile/strings.x" |
0f2d19dd JB |
972 | } |
973 | ||
89e00824 ML |
974 | |
975 | /* | |
976 | Local Variables: | |
977 | c-file-style: "gnu" | |
978 | End: | |
979 | */ |