b4a7063cfdb8ccdcc990661e8b5cc494390b0ac9
[bpt/guile.git] / libguile / strop.c
1 /* classes: src_files */
2
3 /* Copyright (C) 1994,1996,1997,1999,2000,2001 Free Software Foundation, Inc.
4
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
19
20 \f
21 #if HAVE_CONFIG_H
22 # include <config.h>
23 #endif
24
25 #include <errno.h>
26
27 #include "libguile/_scm.h"
28 #include "libguile/chars.h"
29 #include "libguile/strings.h"
30
31 #include "libguile/validate.h"
32 #include "libguile/strop.h"
33 #include "libguile/read.h" /*For SCM_CASE_INSENSITIVE_P*/
34
35 #ifdef HAVE_STRING_H
36 #include <string.h>
37 #endif
38
39 \f
40
41 /*
42 xSCM_DEFINE (scm_i_index, "i-index", 2, 2, 0,
43 (SCM str, SCM chr, SCM frm, SCM to),
44 "@deftypefn {Internal C Function} {static int} scm_i_index (SCM *@var{str},\n"
45 "SCM @var{chr}, int @var{direction}, SCM @var{sub_start}, SCM @var{sub_end}, char *@var{why})
46 "This is a workhorse function that performs either an @code{index} or\n"
47 "@code{rindex} function, depending on the value of @var{direction}."
48 */
49 /* implements index if direction > 0 otherwise rindex. */
50 static long
51 scm_i_index (SCM str, SCM chr, int direction, SCM sub_start,
52 SCM sub_end, const char *why)
53 {
54 unsigned char * p;
55 long x;
56 long lower;
57 long upper;
58 int ch;
59
60 SCM_ASSERT (scm_is_string (str), str, SCM_ARG1, why);
61 SCM_ASSERT (SCM_CHARP (chr), chr, SCM_ARG2, why);
62
63 if (scm_is_false (sub_start))
64 lower = 0;
65 else
66 lower = scm_to_signed_integer (sub_start, 0, scm_i_string_length(str));
67
68 if (scm_is_false (sub_end))
69 upper = scm_i_string_length (str);
70 else
71 upper = scm_to_signed_integer (sub_end, lower, scm_i_string_length(str));
72
73 x = -1;
74
75 if (direction > 0)
76 {
77 p = (unsigned char *) scm_i_string_chars (str) + lower;
78 ch = SCM_CHAR (chr);
79
80 for (x = lower; x < upper; ++x, ++p)
81 if (*p == ch)
82 goto found_it;
83 }
84 else
85 {
86 p = upper - 1 + (unsigned char *)scm_i_string_chars (str);
87 ch = SCM_CHAR (chr);
88 for (x = upper - 1; x >= lower; --x, --p)
89 if (*p == ch)
90 goto found_it;
91 }
92
93 found_it:
94 scm_remember_upto_here_1 (str);
95 return x;
96 }
97
98 SCM_DEFINE (scm_string_index, "string-index", 2, 2, 0,
99 (SCM str, SCM chr, SCM frm, SCM to),
100 "Return the index of the first occurrence of @var{chr} in\n"
101 "@var{str}. The optional integer arguments @var{frm} and\n"
102 "@var{to} limit the search to a portion of the string. This\n"
103 "procedure essentially implements the @code{index} or\n"
104 "@code{strchr} functions from the C library.\n"
105 "\n"
106 "@lisp\n"
107 "(string-index \"weiner\" #\\e)\n"
108 "@result{} 1\n\n"
109 "(string-index \"weiner\" #\\e 2)\n"
110 "@result{} 4\n\n"
111 "(string-index \"weiner\" #\\e 2 4)\n"
112 "@result{} #f\n"
113 "@end lisp")
114 #define FUNC_NAME s_scm_string_index
115 {
116 long pos;
117
118 if (SCM_UNBNDP (frm))
119 frm = SCM_BOOL_F;
120 if (SCM_UNBNDP (to))
121 to = SCM_BOOL_F;
122 pos = scm_i_index (str, chr, 1, frm, to, FUNC_NAME);
123 return (pos < 0
124 ? SCM_BOOL_F
125 : scm_from_long (pos));
126 }
127 #undef FUNC_NAME
128
129 SCM_DEFINE (scm_string_rindex, "string-rindex", 2, 2, 0,
130 (SCM str, SCM chr, SCM frm, SCM to),
131 "Like @code{string-index}, but search from the right of the\n"
132 "string rather than from the left. This procedure essentially\n"
133 "implements the @code{rindex} or @code{strrchr} functions from\n"
134 "the C library.\n"
135 "\n"
136 "@lisp\n"
137 "(string-rindex \"weiner\" #\\e)\n"
138 "@result{} 4\n\n"
139 "(string-rindex \"weiner\" #\\e 2 4)\n"
140 "@result{} #f\n\n"
141 "(string-rindex \"weiner\" #\\e 2 5)\n"
142 "@result{} 4\n"
143 "@end lisp")
144 #define FUNC_NAME s_scm_string_rindex
145 {
146 long pos;
147
148 if (SCM_UNBNDP (frm))
149 frm = SCM_BOOL_F;
150 if (SCM_UNBNDP (to))
151 to = SCM_BOOL_F;
152 pos = scm_i_index (str, chr, -1, frm, to, FUNC_NAME);
153 return (pos < 0
154 ? SCM_BOOL_F
155 : scm_from_long (pos));
156 }
157 #undef FUNC_NAME
158
159 SCM_DEFINE (scm_substring_move_x, "substring-move!", 5, 0, 0,
160 (SCM str1, SCM start1, SCM end1, SCM str2, SCM start2),
161 "Copy the substring of @var{str1} bounded by @var{start1} and @var{end1}\n"
162 "into @var{str2} beginning at position @var{start2}.\n"
163 "@var{str1} and @var{str2} can be the same string.")
164 #define FUNC_NAME s_scm_substring_move_x
165 {
166 unsigned long s1, s2, e, len;
167 const char *src;
168 char *dst;
169
170 SCM_VALIDATE_STRING (1, str1);
171 SCM_VALIDATE_STRING (4, str2);
172 s1 = scm_to_unsigned_integer (start1, 0, scm_i_string_length(str1));
173 e = scm_to_unsigned_integer (end1, s1, scm_i_string_length(str1));
174 len = e - s1;
175 s2 = scm_to_unsigned_integer (start2, 0, scm_i_string_length(str2)-len);
176
177 src = scm_i_string_chars (str2);
178 dst = scm_i_string_writable_chars (str1);
179 SCM_SYSCALL (memmove (dst+s2, src+s1, len));
180 scm_i_string_stop_writing ();
181
182 scm_remember_upto_here_2 (str1, str2);
183 return SCM_UNSPECIFIED;
184 }
185 #undef FUNC_NAME
186
187
188 SCM_DEFINE (scm_substring_fill_x, "substring-fill!", 4, 0, 0,
189 (SCM str, SCM start, SCM end, SCM fill),
190 "Change every character in @var{str} between @var{start} and\n"
191 "@var{end} to @var{fill}.\n"
192 "\n"
193 "@lisp\n"
194 "(define y \"abcdefg\")\n"
195 "(substring-fill! y 1 3 #\\r)\n"
196 "y\n"
197 "@result{} \"arrdefg\"\n"
198 "@end lisp")
199 #define FUNC_NAME s_scm_substring_fill_x
200 {
201 size_t i, e;
202 char c;
203 char *dst;
204
205 SCM_VALIDATE_STRING (1, str);
206 i = scm_to_unsigned_integer (start, 0, scm_i_string_length (str));
207 e = scm_to_unsigned_integer (end, i, scm_i_string_length (str));
208 SCM_VALIDATE_CHAR_COPY (4, fill, c);
209 dst = scm_i_string_writable_chars (str);
210 while (i<e)
211 dst[i++] = c;
212 scm_i_string_stop_writing ();
213 scm_remember_upto_here (str);
214 return SCM_UNSPECIFIED;
215 }
216 #undef FUNC_NAME
217
218
219 SCM_DEFINE (scm_string_null_p, "string-null?", 1, 0, 0,
220 (SCM str),
221 "Return @code{#t} if @var{str}'s length is zero, and\n"
222 "@code{#f} otherwise.\n"
223 "@lisp\n"
224 "(string-null? \"\") @result{} #t\n"
225 "y @result{} \"foo\"\n"
226 "(string-null? y) @result{} #f\n"
227 "@end lisp")
228 #define FUNC_NAME s_scm_string_null_p
229 {
230 SCM_VALIDATE_STRING (1, str);
231 return scm_from_bool (scm_i_string_length (str) == 0);
232 }
233 #undef FUNC_NAME
234
235
236 SCM_DEFINE (scm_string_to_list, "string->list", 1, 0, 0,
237 (SCM str),
238 "Return a newly allocated list of the characters that make up\n"
239 "the given string @var{str}. @code{string->list} and\n"
240 "@code{list->string} are inverses as far as @samp{equal?} is\n"
241 "concerned.")
242 #define FUNC_NAME s_scm_string_to_list
243 {
244 long i;
245 SCM res = SCM_EOL;
246 const unsigned char *src;
247 SCM_VALIDATE_STRING (1, str);
248 src = scm_i_string_chars (str);
249 for (i = scm_i_string_length (str)-1;i >= 0;i--)
250 res = scm_cons (SCM_MAKE_CHAR (src[i]), res);
251 scm_remember_upto_here_1 (src);
252 return res;
253 }
254 #undef FUNC_NAME
255
256
257 /* Helper function for the string copy and string conversion functions.
258 * No argument checking is performed. */
259 static SCM
260 string_copy (SCM str)
261 {
262 const char* chars = scm_i_string_chars (str);
263 size_t length = scm_i_string_length (str);
264 char *dst;
265 SCM new_string = scm_i_make_string (length, &dst);
266 memcpy (dst, chars, length);
267 scm_remember_upto_here_1 (str);
268 return new_string;
269 }
270
271
272 SCM_DEFINE (scm_string_copy, "string-copy", 1, 0, 0,
273 (SCM str),
274 "Return a newly allocated copy of the given @var{string}.")
275 #define FUNC_NAME s_scm_string_copy
276 {
277 SCM_VALIDATE_STRING (1, str);
278
279 return string_copy (str);
280 }
281 #undef FUNC_NAME
282
283
284 SCM_DEFINE (scm_string_fill_x, "string-fill!", 2, 0, 0,
285 (SCM str, SCM chr),
286 "Store @var{char} in every element of the given @var{string} and\n"
287 "return an unspecified value.")
288 #define FUNC_NAME s_scm_string_fill_x
289 {
290 char *dst, c;
291 long k;
292 SCM_VALIDATE_STRING (1, str);
293 SCM_VALIDATE_CHAR_COPY (2, chr, c);
294 dst = scm_i_string_writable_chars (str);
295 for (k = scm_i_string_length (str)-1;k >= 0;k--)
296 dst[k] = c;
297 scm_i_string_stop_writing ();
298 scm_remember_upto_here_1 (str);
299 return SCM_UNSPECIFIED;
300 }
301 #undef FUNC_NAME
302
303
304 /* Helper function for the string uppercase conversion functions.
305 * No argument checking is performed. */
306 static SCM
307 string_upcase_x (SCM v)
308 {
309 size_t k, len;
310 char *dst;
311
312 len = scm_i_string_length (v);
313 dst = scm_i_string_writable_chars (v);
314 for (k = 0; k < len; ++k)
315 dst[k] = scm_c_upcase (dst[k]);
316 scm_i_string_stop_writing ();
317 return v;
318 }
319
320
321 SCM_DEFINE (scm_string_upcase_x, "string-upcase!", 1, 0, 0,
322 (SCM str),
323 "Destructively upcase every character in @var{str} and return\n"
324 "@var{str}.\n"
325 "@lisp\n"
326 "y @result{} \"arrdefg\"\n"
327 "(string-upcase! y) @result{} \"ARRDEFG\"\n"
328 "y @result{} \"ARRDEFG\"\n"
329 "@end lisp")
330 #define FUNC_NAME s_scm_string_upcase_x
331 {
332 SCM_VALIDATE_STRING (1, str);
333
334 return string_upcase_x (str);
335 }
336 #undef FUNC_NAME
337
338
339 SCM_DEFINE (scm_string_upcase, "string-upcase", 1, 0, 0,
340 (SCM str),
341 "Return a freshly allocated string containing the characters of\n"
342 "@var{str} in upper case.")
343 #define FUNC_NAME s_scm_string_upcase
344 {
345 SCM_VALIDATE_STRING (1, str);
346
347 return string_upcase_x (string_copy (str));
348 }
349 #undef FUNC_NAME
350
351
352 /* Helper function for the string lowercase conversion functions.
353 * No argument checking is performed. */
354 static SCM
355 string_downcase_x (SCM v)
356 {
357 size_t k, len;
358 char *dst;
359
360 len = scm_i_string_length (v);
361 dst = scm_i_string_writable_chars (v);
362 for (k = 0; k < len; ++k)
363 dst[k] = scm_c_downcase (dst[k]);
364 scm_i_string_stop_writing ();
365
366 return v;
367 }
368
369
370 SCM_DEFINE (scm_string_downcase_x, "string-downcase!", 1, 0, 0,
371 (SCM str),
372 "Destructively downcase every character in @var{str} and return\n"
373 "@var{str}.\n"
374 "@lisp\n"
375 "y @result{} \"ARRDEFG\"\n"
376 "(string-downcase! y) @result{} \"arrdefg\"\n"
377 "y @result{} \"arrdefg\"\n"
378 "@end lisp")
379 #define FUNC_NAME s_scm_string_downcase_x
380 {
381 SCM_VALIDATE_STRING (1, str);
382
383 return string_downcase_x (str);
384 }
385 #undef FUNC_NAME
386
387
388 SCM_DEFINE (scm_string_downcase, "string-downcase", 1, 0, 0,
389 (SCM str),
390 "Return a freshly allocation string containing the characters in\n"
391 "@var{str} in lower case.")
392 #define FUNC_NAME s_scm_string_downcase
393 {
394 SCM_VALIDATE_STRING (1, str);
395
396 return string_downcase_x (string_copy (str));
397 }
398 #undef FUNC_NAME
399
400
401 /* Helper function for the string capitalization functions.
402 * No argument checking is performed. */
403 static SCM
404 string_capitalize_x (SCM str)
405 {
406 unsigned char *sz;
407 size_t i, len;
408 int in_word=0;
409
410 len = scm_i_string_length (str);
411 sz = scm_i_string_writable_chars (str);
412 for (i = 0; i < len; i++)
413 {
414 if (scm_is_true (scm_char_alphabetic_p (SCM_MAKE_CHAR (sz[i]))))
415 {
416 if (!in_word)
417 {
418 sz[i] = scm_c_upcase (sz[i]);
419 in_word = 1;
420 }
421 else
422 {
423 sz[i] = scm_c_downcase (sz[i]);
424 }
425 }
426 else
427 in_word = 0;
428 }
429 scm_i_string_stop_writing ();
430 return str;
431 }
432
433
434 SCM_DEFINE (scm_string_capitalize_x, "string-capitalize!", 1, 0, 0,
435 (SCM str),
436 "Upcase the first character of every word in @var{str}\n"
437 "destructively and return @var{str}.\n"
438 "\n"
439 "@lisp\n"
440 "y @result{} \"hello world\"\n"
441 "(string-capitalize! y) @result{} \"Hello World\"\n"
442 "y @result{} \"Hello World\"\n"
443 "@end lisp")
444 #define FUNC_NAME s_scm_string_capitalize_x
445 {
446 SCM_VALIDATE_STRING (1, str);
447
448 return string_capitalize_x (str);
449 }
450 #undef FUNC_NAME
451
452
453 SCM_DEFINE (scm_string_capitalize, "string-capitalize", 1, 0, 0,
454 (SCM str),
455 "Return a freshly allocated string with the characters in\n"
456 "@var{str}, where the first character of every word is\n"
457 "capitalized.")
458 #define FUNC_NAME s_scm_string_capitalize
459 {
460 SCM_VALIDATE_STRING (1, str);
461
462 return string_capitalize_x (string_copy (str));
463 }
464 #undef FUNC_NAME
465
466
467 SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0,
468 (SCM str, SCM chr),
469 "Split the string @var{str} into the a list of the substrings delimited\n"
470 "by appearances of the character @var{chr}. Note that an empty substring\n"
471 "between separator characters will result in an empty string in the\n"
472 "result list.\n"
473 "\n"
474 "@lisp\n"
475 "(string-split \"root:x:0:0:root:/root:/bin/bash\" #\\:)\n"
476 "@result{}\n"
477 "(\"root\" \"x\" \"0\" \"0\" \"root\" \"/root\" \"/bin/bash\")\n"
478 "\n"
479 "(string-split \"::\" #\\:)\n"
480 "@result{}\n"
481 "(\"\" \"\" \"\")\n"
482 "\n"
483 "(string-split \"\" #\\:)\n"
484 "@result{}\n"
485 "(\"\")\n"
486 "@end lisp")
487 #define FUNC_NAME s_scm_string_split
488 {
489 long idx, last_idx;
490 const char * p;
491 int ch;
492 SCM res = SCM_EOL;
493
494 SCM_VALIDATE_STRING (1, str);
495 SCM_VALIDATE_CHAR (2, chr);
496
497 idx = scm_i_string_length (str);
498 p = scm_i_string_chars (str);
499 ch = SCM_CHAR (chr);
500 while (idx >= 0)
501 {
502 last_idx = idx;
503 while (idx > 0 && p[idx - 1] != ch)
504 idx--;
505 if (idx >= 0)
506 {
507 res = scm_cons (scm_c_substring (str, idx, last_idx), res);
508 p = scm_i_string_chars (str);
509 idx--;
510 }
511 }
512 scm_remember_upto_here_1 (str);
513 return res;
514 }
515 #undef FUNC_NAME
516
517
518 SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
519 (SCM str),
520 "Return the symbol whose name is @var{str}. @var{str} is\n"
521 "converted to lowercase before the conversion is done, if Guile\n"
522 "is currently reading symbols case-insensitively.")
523 #define FUNC_NAME s_scm_string_ci_to_symbol
524 {
525 return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
526 ? scm_string_downcase(str)
527 : str);
528 }
529 #undef FUNC_NAME
530
531 void
532 scm_init_strop ()
533 {
534 #include "libguile/strop.x"
535 }
536
537 /*
538 Local Variables:
539 c-file-style: "gnu"
540 End:
541 */