* Deprecated scm_makfromstr and added scm_mem2string as a replacement.
[bpt/guile.git] / libguile / strop.c
1 /* classes: src_files */
2
3 /* Copyright (C) 1994,1996,1997,1999,2000,2001 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this software; see the file COPYING. If not, write to the
17 Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA
19
20 As a special exception, the Free Software Foundation gives permission
21 for additional uses of the text contained in its release of GUILE.
22
23 The exception is that, if you link the GUILE library with other files
24 to produce an executable, this does not by itself cause the
25 resulting executable to be covered by the GNU General Public License.
26 Your use of that executable is in no way restricted on account of
27 linking the GUILE library code into it.
28
29 This exception does not however invalidate any other reasons why
30 the executable file might be covered by the GNU General Public License.
31
32 This exception applies only to the code released by the
33 Free Software Foundation under the name GUILE. If you copy
34 code from other Free Software Foundation releases into a copy of
35 GUILE, as the General Public License permits, the exception does
36 not apply to the code that you add in this way. To avoid misleading
37 anyone as to the status of such modified files, you must delete
38 this exception notice from them.
39
40 If you write modifications of your own for GUILE, it is your choice
41 whether to permit this exception to apply to your modifications.
42 If you do not wish that, delete this exception notice. */
43
44 /* Software engineering face-lift by Greg J. Badros, 11-Dec-1999,
45 gjb@cs.washington.edu, http://www.cs.washington.edu/homes/gjb */
46
47
48 \f
49
50 #include <errno.h>
51
52 #include "libguile/_scm.h"
53 #include "libguile/chars.h"
54 #include "libguile/strings.h"
55
56 #include "libguile/validate.h"
57 #include "libguile/strop.h"
58 #include "libguile/read.h" /*For SCM_CASE_INSENSITIVE_P*/
59
60 #ifdef HAVE_STRING_H
61 #include <string.h>
62 #endif
63
64 \f
65
66 /*
67 xSCM_DEFINE (scm_i_index, "i-index", 2, 2, 0,
68 (SCM str, SCM chr, SCM frm, SCM to),
69 "@deftypefn {Internal C Function} {static int} scm_i_index (SCM *@var{str}, \n"
70 "SCM @var{chr}, int @var{direction}, SCM @var{sub_start}, SCM @var{sub_end}, char *@var{why})
71 "This is a workhorse function that performs either an @code{index} or\n"
72 "@code{rindex} function, depending on the value of @var{direction}."
73 */
74 /* implements index if direction > 0 otherwise rindex. */
75 static long
76 scm_i_index (SCM *str, SCM chr, int direction, SCM sub_start,
77 SCM sub_end, const char *why)
78 {
79 unsigned char * p;
80 long x;
81 long lower;
82 long upper;
83 int ch;
84
85 SCM_ASSERT (SCM_STRINGP (*str), *str, SCM_ARG1, why);
86 SCM_ASSERT (SCM_CHARP (chr), chr, SCM_ARG2, why);
87
88 if (SCM_FALSEP (sub_start))
89 sub_start = SCM_MAKINUM (0);
90
91 SCM_ASSERT (SCM_INUMP (sub_start), sub_start, SCM_ARG3, why);
92 lower = SCM_INUM (sub_start);
93 if (lower < 0 || lower > SCM_STRING_LENGTH (*str))
94 scm_out_of_range (why, sub_start);
95
96 if (SCM_FALSEP (sub_end))
97 sub_end = SCM_MAKINUM (SCM_STRING_LENGTH (*str));
98
99 SCM_ASSERT (SCM_INUMP (sub_end), sub_end, SCM_ARG4, why);
100 upper = SCM_INUM (sub_end);
101 if (upper < SCM_INUM (sub_start) || upper > SCM_STRING_LENGTH (*str))
102 scm_out_of_range (why, sub_end);
103
104 if (direction > 0)
105 {
106 p = SCM_STRING_UCHARS (*str) + lower;
107 ch = SCM_CHAR (chr);
108
109 for (x = SCM_INUM (sub_start); x < upper; ++x, ++p)
110 if (*p == ch)
111 return x;
112 }
113 else
114 {
115 p = upper - 1 + SCM_STRING_UCHARS (*str);
116 ch = SCM_CHAR (chr);
117 for (x = upper - 1; x >= lower; --x, --p)
118 if (*p == ch)
119 return x;
120 }
121
122 return -1;
123 }
124
125 SCM_DEFINE (scm_string_index, "string-index", 2, 2, 0,
126 (SCM str, SCM chr, SCM frm, SCM to),
127 "Return the index of the first occurrence of @var{chr} in\n"
128 "@var{str}. The optional integer arguments @var{frm} and\n"
129 "@var{to} limit the search to a portion of the string. This\n"
130 "procedure essentially implements the @code{index} or\n"
131 "@code{strchr} functions from the C library.\n"
132 "\n"
133 "@lisp\n"
134 "(string-index \"weiner\" #\\e)\n"
135 "@result{} 1\n\n"
136 "(string-index \"weiner\" #\\e 2)\n"
137 "@result{} 4\n\n"
138 "(string-index \"weiner\" #\\e 2 4)\n"
139 "@result{} #f\n"
140 "@end lisp")
141 #define FUNC_NAME s_scm_string_index
142 {
143 long pos;
144
145 if (SCM_UNBNDP (frm))
146 frm = SCM_BOOL_F;
147 if (SCM_UNBNDP (to))
148 to = SCM_BOOL_F;
149 pos = scm_i_index (&str, chr, 1, frm, to, FUNC_NAME);
150 return (pos < 0
151 ? SCM_BOOL_F
152 : SCM_MAKINUM (pos));
153 }
154 #undef FUNC_NAME
155
156 SCM_DEFINE (scm_string_rindex, "string-rindex", 2, 2, 0,
157 (SCM str, SCM chr, SCM frm, SCM to),
158 "Like @code{string-index}, but search from the right of the\n"
159 "string rather than from the left. This procedure essentially\n"
160 "implements the @code{rindex} or @code{strrchr} functions from\n"
161 "the C library.\n"
162 "\n"
163 "@lisp\n"
164 "(string-rindex \"weiner\" #\\e)\n"
165 "@result{} 4\n\n"
166 "(string-rindex \"weiner\" #\\e 2 4)\n"
167 "@result{} #f\n\n"
168 "(string-rindex \"weiner\" #\\e 2 5)\n"
169 "@result{} 4\n"
170 "@end lisp")
171 #define FUNC_NAME s_scm_string_rindex
172 {
173 long pos;
174
175 if (SCM_UNBNDP (frm))
176 frm = SCM_BOOL_F;
177 if (SCM_UNBNDP (to))
178 to = SCM_BOOL_F;
179 pos = scm_i_index (&str, chr, -1, frm, to, FUNC_NAME);
180 return (pos < 0
181 ? SCM_BOOL_F
182 : SCM_MAKINUM (pos));
183 }
184 #undef FUNC_NAME
185
186
187 SCM_REGISTER_PROC(s_substring_move_left_x, "substring-move-left!", 5, 0, 0, scm_substring_move_x);
188 SCM_REGISTER_PROC(s_substring_move_right_x, "substring-move-right!", 5, 0, 0, scm_substring_move_x);
189
190 /*
191 @defun substring-move-left! str1 start1 end1 str2 start2
192 @end defun
193 @deftypefn {C Function} SCM scm_substring_move_left_x (SCM @var{str1}, SCM @var{start1}, SCM @var{end1}, SCM @var{str2}, SCM @var{start2})
194 [@strong{Note:} this is only valid if you've applied the strop patch].
195
196 Moves a substring of @var{str1}, from @var{start1} to @var{end1}
197 (@var{end1} is exclusive), into @var{str2}, starting at
198 @var{start2}. Allows overlapping strings.
199
200 @lisp
201 (define x (make-string 10 #\a))
202 (define y "bcd")
203 (substring-move-left! x 2 5 y 0)
204 y
205 @result{} "aaa"
206
207 x
208 @result{} "aaaaaaaaaa"
209
210 (define y "bcdefg")
211 (substring-move-left! x 2 5 y 0)
212 y
213 @result{} "aaaefg"
214
215 (define y "abcdefg")
216 (substring-move-left! y 2 5 y 3)
217 y
218 @result{} "abccccg"
219 @end lisp
220 */
221
222 /*
223 @defun substring-move-right! str1 start1 end1 str2 start2
224 @end defun
225 @deftypefn {C Function} SCM scm_substring_move_right_x (SCM @var{str1}, SCM @var{start1}, SCM @var{end1}, SCM @var{str2}, SCM @var{start2})
226 [@strong{Note:} this is only valid if you've applied the strop patch, if
227 it hasn't made it into the guile tree].
228
229 Does much the same thing as @code{substring-move-left!}, except it
230 starts moving at the end of the sequence, rather than the beginning.
231 @lisp
232 (define y "abcdefg")
233 (substring-move-right! y 2 5 y 0)
234 y
235 @result{} "ededefg"
236
237 (define y "abcdefg")
238 (substring-move-right! y 2 5 y 3)
239 y
240 @result{} "abccdeg"
241 @end lisp
242 */
243
244 SCM_DEFINE (scm_substring_move_x, "substring-move!", 5, 0, 0,
245 (SCM str1, SCM start1, SCM end1, SCM str2, SCM start2),
246 "@deffnx primitive substring-move-left! str1 start1 end1 str2 start2\n"
247 "@deffnx primitive substring-move-right! str1 start1 end1 str2 start2\n"
248 "Copy the substring of @var{str1} bounded by @var{start1} and @var{end1}\n"
249 "into @var{str2} beginning at position @var{start2}.\n"
250 "@code{substring-move-right!} begins copying from the rightmost character\n"
251 "and moves left, and @code{substring-move-left!} copies from the leftmost\n"
252 "character moving right.\n\n"
253 "It is useful to have two functions that copy in different directions so\n"
254 "that substrings can be copied back and forth within a single string. If\n"
255 "you wish to copy text from the left-hand side of a string to the\n"
256 "right-hand side of the same string, and the source and destination\n"
257 "overlap, you must be careful to copy the rightmost characters of the\n"
258 "text first, to avoid clobbering your data. Hence, when @var{str1} and\n"
259 "@var{str2} are the same string, you should use\n"
260 "@code{substring-move-right!} when moving text from left to right, and\n"
261 "@code{substring-move-left!} otherwise. If @code{str1} and @samp{str2}\n"
262 "are different strings, it does not matter which function you use.")
263 #define FUNC_NAME s_scm_substring_move_x
264 {
265 long s1, s2, e, len;
266
267 SCM_VALIDATE_STRING (1,str1);
268 SCM_VALIDATE_INUM_COPY (2,start1,s1);
269 SCM_VALIDATE_INUM_COPY (3,end1,e);
270 SCM_VALIDATE_STRING (4,str2);
271 SCM_VALIDATE_INUM_COPY (5,start2,s2);
272 len = e - s1;
273 SCM_ASSERT_RANGE (3,end1,len >= 0);
274 SCM_ASSERT_RANGE (2,start1,s1 <= SCM_STRING_LENGTH (str1) && s1 >= 0);
275 SCM_ASSERT_RANGE (5,start2,s2 <= SCM_STRING_LENGTH (str2) && s2 >= 0);
276 SCM_ASSERT_RANGE (3,end1,e <= SCM_STRING_LENGTH (str1) && e >= 0);
277 SCM_ASSERT_RANGE (5,start2,len+s2 <= SCM_STRING_LENGTH (str2));
278
279 SCM_SYSCALL(memmove((void *)(&(SCM_STRING_CHARS(str2)[s2])),
280 (void *)(&(SCM_STRING_CHARS(str1)[s1])),
281 len));
282
283 return scm_return_first(SCM_UNSPECIFIED, str1, str2);
284 }
285 #undef FUNC_NAME
286
287
288 SCM_DEFINE (scm_substring_fill_x, "substring-fill!", 4, 0, 0,
289 (SCM str, SCM start, SCM end, SCM fill),
290 "Change every character in @var{str} between @var{start} and\n"
291 "@var{end} to @var{fill}.\n"
292 "\n"
293 "@lisp\n"
294 "(define y \"abcdefg\")\n"
295 "(substring-fill! y 1 3 #\\r)\n"
296 "y\n"
297 "@result{} \"arrdefg\"\n"
298 "@end lisp")
299 #define FUNC_NAME s_scm_substring_fill_x
300 {
301 long i, e;
302 char c;
303 SCM_VALIDATE_STRING (1,str);
304 SCM_VALIDATE_INUM_COPY (2,start,i);
305 SCM_VALIDATE_INUM_COPY (3,end,e);
306 SCM_VALIDATE_CHAR_COPY (4,fill,c);
307 SCM_ASSERT_RANGE (2,start,i <= SCM_STRING_LENGTH (str) && i >= 0);
308 SCM_ASSERT_RANGE (3,end,e <= SCM_STRING_LENGTH (str) && e >= 0);
309 while (i<e) SCM_STRING_CHARS (str)[i++] = c;
310 return SCM_UNSPECIFIED;
311 }
312 #undef FUNC_NAME
313
314
315 SCM_DEFINE (scm_string_null_p, "string-null?", 1, 0, 0,
316 (SCM str),
317 "Return @code{#t} if @var{str}'s length is nonzero, and\n"
318 "@code{#f} otherwise.\n"
319 "@lisp\n"
320 "(string-null? \"\") @result{} #t\n"
321 "y @result{} \"foo\"\n"
322 "(string-null? y) @result{} #f\n"
323 "@end lisp")
324 #define FUNC_NAME s_scm_string_null_p
325 {
326 SCM_VALIDATE_STRING (1,str);
327 return SCM_BOOL (SCM_STRING_LENGTH (str) == 0);
328 }
329 #undef FUNC_NAME
330
331
332 SCM_DEFINE (scm_string_to_list, "string->list", 1, 0, 0,
333 (SCM str),
334 "Return a newly allocated list of the characters that make up\n"
335 "the given string @var{str}. @code{string->list} and\n"
336 "@code{list->string} are inverses as far as @samp{equal?} is\n"
337 "concerned.")
338 #define FUNC_NAME s_scm_string_to_list
339 {
340 long i;
341 SCM res = SCM_EOL;
342 unsigned char *src;
343 SCM_VALIDATE_STRING (1,str);
344 src = SCM_STRING_UCHARS (str);
345 for (i = SCM_STRING_LENGTH (str)-1;i >= 0;i--) res = scm_cons (SCM_MAKE_CHAR (src[i]), res);
346 return res;
347 }
348 #undef FUNC_NAME
349
350
351 /* Helper function for the string copy and string conversion functions.
352 * No argument checking is performed. */
353 static SCM
354 string_copy (SCM str)
355 {
356 const char* chars = SCM_STRING_CHARS (str);
357 size_t length = SCM_STRING_LENGTH (str);
358 SCM new_string = scm_mem2string (chars, length);
359 scm_remember_upto_here_1 (str);
360 return new_string;
361 }
362
363
364 SCM_DEFINE (scm_string_copy, "string-copy", 1, 0, 0,
365 (SCM str),
366 "Return a newly allocated copy of the given @var{string}.")
367 #define FUNC_NAME s_scm_string_copy
368 {
369 SCM_VALIDATE_STRING (1, str);
370
371 return string_copy (str);
372 }
373 #undef FUNC_NAME
374
375
376 SCM_DEFINE (scm_string_fill_x, "string-fill!", 2, 0, 0,
377 (SCM str, SCM chr),
378 "Store @var{char} in every element of the given @var{string} and\n"
379 "return an unspecified value.")
380 #define FUNC_NAME s_scm_string_fill_x
381 {
382 register char *dst, c;
383 register long k;
384 SCM_VALIDATE_STRING_COPY (1,str,dst);
385 SCM_VALIDATE_CHAR_COPY (2,chr,c);
386 for (k = SCM_STRING_LENGTH (str)-1;k >= 0;k--) dst[k] = c;
387 return SCM_UNSPECIFIED;
388 }
389 #undef FUNC_NAME
390
391
392 /* Helper function for the string uppercase conversion functions.
393 * No argument checking is performed. */
394 static SCM
395 string_upcase_x (SCM v)
396 {
397 unsigned long k;
398
399 for (k = 0; k < SCM_STRING_LENGTH (v); ++k)
400 SCM_STRING_UCHARS (v) [k] = scm_upcase (SCM_STRING_UCHARS (v) [k]);
401
402 return v;
403 }
404
405
406 SCM_DEFINE (scm_string_upcase_x, "string-upcase!", 1, 0, 0,
407 (SCM str),
408 "Destructively upcase every character in @var{str} and return\n"
409 "@var{str}.\n"
410 "@lisp\n"
411 "y @result{} \"arrdefg\"\n"
412 "(string-upcase! y) @result{} \"ARRDEFG\"\n"
413 "y @result{} \"ARRDEFG\"\n"
414 "@end lisp")
415 #define FUNC_NAME s_scm_string_upcase_x
416 {
417 SCM_VALIDATE_STRING (1, str);
418
419 return string_upcase_x (str);
420 }
421 #undef FUNC_NAME
422
423
424 SCM_DEFINE (scm_string_upcase, "string-upcase", 1, 0, 0,
425 (SCM str),
426 "Return a freshly allocated string containing the characters of\n"
427 "@var{str} in upper case.")
428 #define FUNC_NAME s_scm_string_upcase
429 {
430 SCM_VALIDATE_STRING (1, str);
431
432 return string_upcase_x (string_copy (str));
433 }
434 #undef FUNC_NAME
435
436
437 /* Helper function for the string lowercase conversion functions.
438 * No argument checking is performed. */
439 static SCM
440 string_downcase_x (SCM v)
441 {
442 unsigned long k;
443
444 for (k = 0; k < SCM_STRING_LENGTH (v); ++k)
445 SCM_STRING_UCHARS (v) [k] = scm_downcase (SCM_STRING_UCHARS (v) [k]);
446
447 return v;
448 }
449
450
451 SCM_DEFINE (scm_string_downcase_x, "string-downcase!", 1, 0, 0,
452 (SCM str),
453 "Destructively downcase every character in @var{str} and return\n"
454 "@var{str}.\n"
455 "@lisp\n"
456 "y @result{} \"ARRDEFG\"\n"
457 "(string-downcase! y) @result{} \"arrdefg\"\n"
458 "y @result{} \"arrdefg\"\n"
459 "@end lisp")
460 #define FUNC_NAME s_scm_string_downcase_x
461 {
462 SCM_VALIDATE_STRING (1, str);
463
464 return string_downcase_x (str);
465 }
466 #undef FUNC_NAME
467
468
469 SCM_DEFINE (scm_string_downcase, "string-downcase", 1, 0, 0,
470 (SCM str),
471 "Return a freshly allocation string containing the characters in\n"
472 "@var{str} in lower case.")
473 #define FUNC_NAME s_scm_string_downcase
474 {
475 SCM_VALIDATE_STRING (1, str);
476
477 return string_downcase_x (string_copy (str));
478 }
479 #undef FUNC_NAME
480
481
482 /* Helper function for the string capitalization functions.
483 * No argument checking is performed. */
484 static SCM
485 string_capitalize_x (SCM str)
486 {
487 char *sz;
488 long i, len;
489 int in_word=0;
490
491 len = SCM_STRING_LENGTH(str);
492 sz = SCM_STRING_CHARS (str);
493 for(i=0; i<len; i++) {
494 if (!SCM_FALSEP (scm_char_alphabetic_p (SCM_MAKE_CHAR (sz[i])))) {
495 if(!in_word) {
496 sz[i] = scm_upcase(sz[i]);
497 in_word = 1;
498 } else {
499 sz[i] = scm_downcase(sz[i]);
500 }
501 }
502 else in_word = 0;
503 }
504 return str;
505 }
506
507
508 SCM_DEFINE (scm_string_capitalize_x, "string-capitalize!", 1, 0, 0,
509 (SCM str),
510 "Upcase the first character of every word in @var{str}\n"
511 "destructively and return @var{str}.\n"
512 "\n"
513 "@lisp\n"
514 "y @result{} \"hello world\"\n"
515 "(string-capitalize! y) @result{} \"Hello World\"\n"
516 "y @result{} \"Hello World\"\n"
517 "@end lisp")
518 #define FUNC_NAME s_scm_string_capitalize_x
519 {
520 SCM_VALIDATE_STRING (1, str);
521
522 return string_capitalize_x (str);
523 }
524 #undef FUNC_NAME
525
526
527 SCM_DEFINE (scm_string_capitalize, "string-capitalize", 1, 0, 0,
528 (SCM str),
529 "Return a freshly allocated string with the characters in\n"
530 "@var{str}, where the first character of every word is\n"
531 "capitalized.")
532 #define FUNC_NAME s_scm_string_capitalize
533 {
534 SCM_VALIDATE_STRING (1, str);
535
536 return string_capitalize_x (string_copy (str));
537 }
538 #undef FUNC_NAME
539
540
541 SCM_DEFINE (scm_string_split, "string-split", 2, 0, 0,
542 (SCM str, SCM chr),
543 "Split the string @var{str} into the a list of the substrings delimited\n"
544 "by appearances of the character @var{chr}. Note that an empty substring\n"
545 "between separator characters will result in an empty string in the\n"
546 "result list.\n"
547 "\n"
548 "@lisp\n"
549 "(string-split \"root:x:0:0:root:/root:/bin/bash\" #\:)\n"
550 "@result{}\n"
551 "(\"root\" \"x\" \"0\" \"0\" \"root\" \"/root\" \"/bin/bash\")\n"
552 "\n"
553 "(string-split \"::\" #\:)\n"
554 "@result{}\n"
555 "(\"\" \"\" \"\")\n"
556 "\n"
557 "(string-split \"\" #\:)\n"
558 "@result{}\n"
559 "(\"\")\n"
560 "@end lisp")
561 #define FUNC_NAME s_scm_string_split
562 {
563 long idx, last_idx;
564 char * p;
565 int ch;
566 SCM res = SCM_EOL;
567
568 SCM_VALIDATE_STRING (1, str);
569 SCM_VALIDATE_CHAR (2, chr);
570
571 idx = SCM_STRING_LENGTH (str);
572 p = SCM_STRING_CHARS (str);
573 ch = SCM_CHAR (chr);
574 while (idx >= 0)
575 {
576 last_idx = idx;
577 while (idx > 0 && p[idx - 1] != ch)
578 idx--;
579 if (idx >= 0)
580 {
581 res = scm_cons (scm_mem2string (p + idx, last_idx - idx), res);
582 idx--;
583 }
584 }
585 scm_remember_upto_here_1 (str);
586 return res;
587 }
588 #undef FUNC_NAME
589
590
591 SCM_DEFINE (scm_string_ci_to_symbol, "string-ci->symbol", 1, 0, 0,
592 (SCM str),
593 "Return the symbol whose name is @var{str}. @var{str} is\n"
594 "converted to lowercase before the conversion is done, if Guile\n"
595 "is currently reading symbols case--insensitively.")
596 #define FUNC_NAME s_scm_string_ci_to_symbol
597 {
598 return scm_string_to_symbol (SCM_CASE_INSENSITIVE_P
599 ? scm_string_downcase(str)
600 : str);
601 }
602 #undef FUNC_NAME
603
604 void
605 scm_init_strop ()
606 {
607 #ifndef SCM_MAGIC_SNARFER
608 #include "libguile/strop.x"
609 #endif
610 }
611
612 /*
613 Local Variables:
614 c-file-style: "gnu"
615 End:
616 */