2002-01-28 Stefan Jahn <stefan@lkcc.org>
[bpt/guile.git] / libguile / strings.c
1 /* Copyright (C) 1995,1996,1998,2000,2001 Free Software Foundation, Inc.
2 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2, or (at your option)
6 * any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this software; see the file COPYING. If not, write to
15 * the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
16 * Boston, MA 02111-1307 USA
17 *
18 * As a special exception, the Free Software Foundation gives permission
19 * for additional uses of the text contained in its release of GUILE.
20 *
21 * The exception is that, if you link the GUILE library with other files
22 * to produce an executable, this does not by itself cause the
23 * resulting executable to be covered by the GNU General Public License.
24 * Your use of that executable is in no way restricted on account of
25 * linking the GUILE library code into it.
26 *
27 * This exception does not however invalidate any other reasons why
28 * the executable file might be covered by the GNU General Public License.
29 *
30 * This exception applies only to the code released by the
31 * Free Software Foundation under the name GUILE. If you copy
32 * code from other Free Software Foundation releases into a copy of
33 * GUILE, as the General Public License permits, the exception does
34 * not apply to the code that you add in this way. To avoid misleading
35 * anyone as to the status of such modified files, you must delete
36 * this exception notice from them.
37 *
38 * If you write modifications of your own for GUILE, it is your choice
39 * whether to permit this exception to apply to your modifications.
40 * If you do not wish that, delete this exception notice. */
41
42
43 \f
44
45 #include <string.h>
46
47 #include "libguile/_scm.h"
48 #include "libguile/chars.h"
49 #include "libguile/root.h"
50 #include "libguile/strings.h"
51 #include "libguile/deprecation.h"
52 #include "libguile/validate.h"
53
54 \f
55
56 /* {Strings}
57 */
58
59 SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
60 (SCM obj),
61 "Return @code{#t} iff @var{obj} is a string, else @code{#f}.")
62 #define FUNC_NAME s_scm_string_p
63 {
64 return SCM_BOOL (SCM_STRINGP (obj));
65 }
66 #undef FUNC_NAME
67
68
69 SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
70
71 SCM_DEFINE (scm_string, "string", 0, 0, 1,
72 (SCM chrs),
73 "@deffnx {Scheme Procedure} list->string chrs\n"
74 "Return a newly allocated string composed of the arguments,\n"
75 "@var{chrs}.")
76 #define FUNC_NAME s_scm_string
77 {
78 SCM result;
79
80 {
81 long i = scm_ilength (chrs);
82
83 SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
84 result = scm_allocate_string (i);
85 }
86
87 {
88 unsigned char *data = SCM_STRING_UCHARS (result);
89
90 while (!SCM_NULLP (chrs))
91 {
92 SCM elt = SCM_CAR (chrs);
93
94 SCM_VALIDATE_CHAR (SCM_ARGn, elt);
95 *data++ = SCM_CHAR (elt);
96 chrs = SCM_CDR (chrs);
97 }
98 }
99 return result;
100 }
101 #undef FUNC_NAME
102
103
104 /* converts C scm_array of strings to SCM scm_list of strings. */
105 /* If argc < 0, a null terminated scm_array is assumed. */
106 SCM
107 scm_makfromstrs (int argc, char **argv)
108 {
109 int i = argc;
110 SCM lst = SCM_EOL;
111 if (0 > i)
112 for (i = 0; argv[i]; i++);
113 while (i--)
114 lst = scm_cons (scm_mem2string (argv[i], strlen (argv[i])), lst);
115 return lst;
116 }
117
118
119 /* This function must only be applied to memory obtained via malloc,
120 since the GC is going to apply `free' to it when the string is
121 dropped.
122
123 Also, s[len] must be `\0', since we promise that strings are
124 null-terminated. Perhaps we could handle non-null-terminated
125 strings by claiming they're shared substrings of a string we just
126 made up. */
127 SCM
128 scm_take_str (char *s, size_t len)
129 #define FUNC_NAME "scm_take_str"
130 {
131 SCM answer;
132
133 SCM_ASSERT_RANGE (2, scm_ulong2num (len), len <= SCM_STRING_MAX_LENGTH);
134
135 answer = scm_alloc_cell (SCM_MAKE_STRING_TAG (len), (scm_t_bits) s);
136 scm_done_malloc (len + 1);
137
138 return answer;
139 }
140 #undef FUNC_NAME
141
142
143 /* `s' must be a malloc'd string. See scm_take_str. */
144 SCM
145 scm_take0str (char *s)
146 {
147 return scm_take_str (s, strlen (s));
148 }
149
150
151 SCM
152 scm_mem2string (const char *src, size_t len)
153 {
154 SCM s = scm_allocate_string (len);
155 char *dst = SCM_STRING_CHARS (s);
156
157 while (len--)
158 *dst++ = *src++;
159 return s;
160 }
161
162
163 SCM
164 scm_str2string (const char *src)
165 {
166 return scm_mem2string (src, strlen (src));
167 }
168
169
170 SCM
171 scm_makfrom0str (const char *src)
172 {
173 if (!src) return SCM_BOOL_F;
174 return scm_mem2string (src, strlen (src));
175 }
176
177
178 SCM
179 scm_makfrom0str_opt (const char *src)
180 {
181 return scm_makfrom0str (src);
182 }
183
184
185 SCM
186 scm_allocate_string (size_t len)
187 #define FUNC_NAME "scm_allocate_string"
188 {
189 char *mem;
190 SCM s;
191
192 SCM_ASSERT_RANGE (1, scm_long2num (len), len <= SCM_STRING_MAX_LENGTH);
193
194 mem = (char *) scm_must_malloc (len + 1, FUNC_NAME);
195 mem[len] = 0;
196
197 s = scm_alloc_cell (SCM_MAKE_STRING_TAG (len), (scm_t_bits) mem);
198
199 return s;
200 }
201 #undef FUNC_NAME
202
203
204 SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
205 (SCM k, SCM chr),
206 "Return a newly allocated string of\n"
207 "length @var{k}. If @var{chr} is given, then all elements of\n"
208 "the string are initialized to @var{chr}, otherwise the contents\n"
209 "of the @var{string} are unspecified.")
210 #define FUNC_NAME s_scm_make_string
211 {
212 if (SCM_INUMP (k))
213 {
214 long int i = SCM_INUM (k);
215 SCM res;
216
217 SCM_ASSERT_RANGE (1, k, i >= 0);
218
219 res = scm_allocate_string (i);
220 if (!SCM_UNBNDP (chr))
221 {
222 unsigned char *dst;
223
224 SCM_VALIDATE_CHAR (2, chr);
225
226 dst = SCM_STRING_UCHARS (res);
227 memset (dst, SCM_CHAR (chr), i);
228 }
229
230 return res;
231 }
232 else if (SCM_BIGP (k))
233 SCM_OUT_OF_RANGE (1, k);
234 else
235 SCM_WRONG_TYPE_ARG (1, k);
236 }
237 #undef FUNC_NAME
238
239
240 SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
241 (SCM string),
242 "Return the number of characters in @var{string}.")
243 #define FUNC_NAME s_scm_string_length
244 {
245 SCM_VALIDATE_STRING (1, string);
246 return SCM_MAKINUM (SCM_STRING_LENGTH (string));
247 }
248 #undef FUNC_NAME
249
250 SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
251 (SCM str, SCM k),
252 "Return character @var{k} of @var{str} using zero-origin\n"
253 "indexing. @var{k} must be a valid index of @var{str}.")
254 #define FUNC_NAME s_scm_string_ref
255 {
256 long idx;
257
258 SCM_VALIDATE_STRING (1, str);
259 SCM_VALIDATE_INUM_COPY (2, k, idx);
260 SCM_ASSERT_RANGE (2, k, idx >= 0 && idx < SCM_STRING_LENGTH (str));
261 return SCM_MAKE_CHAR (SCM_STRING_UCHARS (str)[idx]);
262 }
263 #undef FUNC_NAME
264
265
266 SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
267 (SCM str, SCM k, SCM chr),
268 "Store @var{chr} in element @var{k} of @var{str} and return\n"
269 "an unspecified value. @var{k} must be a valid index of\n"
270 "@var{str}.")
271 #define FUNC_NAME s_scm_string_set_x
272 {
273 SCM_VALIDATE_STRING (1, str);
274 SCM_VALIDATE_INUM_RANGE (2,k,0,SCM_STRING_LENGTH(str));
275 SCM_VALIDATE_CHAR (3,chr);
276 SCM_STRING_UCHARS (str)[SCM_INUM (k)] = SCM_CHAR (chr);
277 return SCM_UNSPECIFIED;
278 }
279 #undef FUNC_NAME
280
281
282 SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
283 (SCM str, SCM start, SCM end),
284 "Return a newly allocated string formed from the characters\n"
285 "of @var{str} beginning with index @var{start} (inclusive) and\n"
286 "ending with index @var{end} (exclusive).\n"
287 "@var{str} must be a string, @var{start} and @var{end} must be\n"
288 "exact integers satisfying:\n\n"
289 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
290 #define FUNC_NAME s_scm_substring
291 {
292 long int from;
293 long int to;
294 SCM substr;
295
296 SCM_VALIDATE_STRING (1, str);
297 SCM_VALIDATE_INUM (2, start);
298 SCM_VALIDATE_INUM_DEF (3, end, SCM_STRING_LENGTH (str));
299
300 from = SCM_INUM (start);
301 SCM_ASSERT_RANGE (2, start, 0 <= from && from <= SCM_STRING_LENGTH (str));
302 to = SCM_INUM (end);
303 SCM_ASSERT_RANGE (3, end, from <= to && to <= SCM_STRING_LENGTH (str));
304
305 substr = scm_mem2string (&SCM_STRING_CHARS (str)[from], to - from);
306 scm_remember_upto_here_1 (str);
307 return substr;
308 }
309 #undef FUNC_NAME
310
311
312 SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
313 (SCM args),
314 "Return a newly allocated string whose characters form the\n"
315 "concatenation of the given strings, @var{args}.")
316 #define FUNC_NAME s_scm_string_append
317 {
318 SCM res;
319 size_t i = 0;
320 register SCM l, s;
321 register unsigned char *data;
322
323 SCM_VALIDATE_REST_ARGUMENT (args);
324 for (l = args; !SCM_NULLP (l); l = SCM_CDR (l)) {
325 s = SCM_CAR (l);
326 SCM_VALIDATE_STRING (SCM_ARGn,s);
327 i += SCM_STRING_LENGTH (s);
328 }
329 res = scm_allocate_string (i);
330 data = SCM_STRING_UCHARS (res);
331 for (l = args; !SCM_NULLP (l);l = SCM_CDR (l)) {
332 s = SCM_CAR (l);
333 for (i = 0;i<SCM_STRING_LENGTH (s);i++) *data++ = SCM_STRING_UCHARS (s)[i];
334 }
335 return res;
336 }
337 #undef FUNC_NAME
338
339
340 /* Converts the given Scheme string OBJ into a C string, containing a copy
341 of OBJ's content with a trailing null byte. If LENP is non-NULL, set
342 *LENP to the string's length.
343
344 When STR is non-NULL it receives the copy and is returned by the function,
345 otherwise new memory is allocated and the caller is responsible for
346 freeing it via free(). If out of memory, NULL is returned.
347
348 Note that Scheme strings may contain arbitrary data, including null
349 characters. This means that null termination is not a reliable way to
350 determine the length of the returned value. However, the function always
351 copies the complete contents of OBJ, and sets *LENP to the length of the
352 scheme string (if LENP is non-null). */
353 #define FUNC_NAME "scm_c_string2str"
354 char *
355 scm_c_string2str (SCM obj, char *str, size_t *lenp)
356 {
357 size_t len;
358
359 SCM_ASSERT (SCM_STRINGP (obj), obj, SCM_ARG1, FUNC_NAME);
360 len = SCM_STRING_LENGTH (obj);
361
362 if (str == NULL)
363 {
364 /* FIXME: Should we use exported wrappers for malloc (and free), which
365 * allow windows DLLs to call the correct freeing function? */
366 str = (char *) malloc ((len + 1) * sizeof (char));
367 if (str == NULL)
368 return NULL;
369 }
370
371 memcpy (str, SCM_STRING_CHARS (obj), len);
372 scm_remember_upto_here_1 (obj);
373 str[len] = '\0';
374
375 if (lenp != NULL)
376 *lenp = len;
377
378 return str;
379 }
380 #undef FUNC_NAME
381
382
383 /* Copy LEN characters at START from the Scheme string OBJ to memory
384 at STR. START is an index into OBJ; zero means the beginning of
385 the string. STR has already been allocated by the caller.
386
387 If START + LEN is off the end of OBJ, silently truncate the source
388 region to fit the string. If truncation occurs, the corresponding
389 area of STR is left unchanged. */
390 #define FUNC_NAME "scm_c_substring2str"
391 char *
392 scm_c_substring2str (SCM obj, char *str, size_t start, size_t len)
393 {
394 size_t src_length, effective_length;
395
396 SCM_ASSERT (SCM_STRINGP (obj), obj, SCM_ARG2, FUNC_NAME);
397 src_length = SCM_STRING_LENGTH (obj);
398 effective_length = (len + start <= src_length) ? len : src_length - start;
399 memcpy (str, SCM_STRING_CHARS (obj) + start, effective_length);
400 scm_remember_upto_here_1 (obj);
401 return str;
402 }
403 #undef FUNC_NAME
404
405
406 void
407 scm_init_strings ()
408 {
409 scm_nullstr = scm_allocate_string (0);
410
411 #ifndef SCM_MAGIC_SNARFER
412 #include "libguile/strings.x"
413 #endif
414 }
415
416
417 /*
418 Local Variables:
419 c-file-style: "gnu"
420 End:
421 */