1 /* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009 Free Software Foundation, Inc.
3 * This library is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU Lesser General Public License
5 * as published by the Free Software Foundation; either version 3 of
6 * the License, or (at your option) any later version.
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 #include "libguile/_scm.h"
32 #include "libguile/chars.h"
33 #include "libguile/root.h"
34 #include "libguile/strings.h"
35 #include "libguile/deprecation.h"
36 #include "libguile/validate.h"
46 * XXX - keeping an accurate refcount during GC seems to be quite
47 * tricky, so we just keep score of whether a stringbuf might be
48 * shared, not whether it definitely is.
50 * The scheme I (mvo) tried to keep an accurate reference count would
51 * recount all strings that point to a stringbuf during the mark-phase
52 * of the GC. This was done since one cannot access the stringbuf of
53 * a string when that string is freed (in order to decrease the
54 * reference count). The memory of the stringbuf might have been
55 * reused already for something completely different.
57 * This recounted worked for a small number of threads beating on
58 * cow-strings, but it failed randomly with more than 10 threads, say.
59 * I couldn't figure out what went wrong, so I used the conservative
60 * approach implemented below.
62 * A stringbuf needs to know its length, but only so that it can be
63 * reported when the stringbuf is freed.
65 * There are 3 storage strategies for stringbufs: inline, outline, and
68 * Inline strings are small 8-bit strings stored within the double
69 * cell itself. Outline strings are larger 8-bit strings with GC
70 * allocated storage. Wide strings are 32-bit strings with allocated
73 * There was little value in making wide string inlineable, since
74 * there is only room for three inlined 32-bit characters. Thus wide
75 * stringbufs are never inlined.
78 #define STRINGBUF_F_SHARED 0x100
79 #define STRINGBUF_F_INLINE 0x200
80 #define STRINGBUF_F_WIDE 0x400 /* If true, strings have UCS-4
81 encoding. Otherwise, strings
84 #define STRINGBUF_TAG scm_tc7_stringbuf
85 #define STRINGBUF_SHARED(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
86 #define STRINGBUF_INLINE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_INLINE)
87 #define STRINGBUF_WIDE(buf) (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
89 #define STRINGBUF_OUTLINE_CHARS(buf) ((char *)SCM_CELL_WORD_1(buf))
90 #define STRINGBUF_OUTLINE_LENGTH(buf) (SCM_CELL_WORD_2(buf))
91 #define STRINGBUF_INLINE_CHARS(buf) ((char *)SCM_CELL_OBJECT_LOC(buf,1))
92 #define STRINGBUF_INLINE_LENGTH(buf) (((size_t)SCM_CELL_WORD_0(buf))>>16)
94 #define STRINGBUF_CHARS(buf) (STRINGBUF_INLINE (buf) \
95 ? STRINGBUF_INLINE_CHARS (buf) \
96 : STRINGBUF_OUTLINE_CHARS (buf))
98 #define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *)SCM_CELL_WORD_1(buf))
99 #define STRINGBUF_LENGTH(buf) (STRINGBUF_INLINE (buf) \
100 ? STRINGBUF_INLINE_LENGTH (buf) \
101 : STRINGBUF_OUTLINE_LENGTH (buf))
103 #define STRINGBUF_MAX_INLINE_LEN (3*sizeof(scm_t_bits))
105 #define SET_STRINGBUF_SHARED(buf) \
106 (SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED))
108 #if SCM_STRING_LENGTH_HISTOGRAM
109 static size_t lenhist
[1001];
112 /* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
115 make_stringbuf (size_t len
)
117 /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
118 scm_i_symbol_chars, all stringbufs are null-terminated. Once
119 SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
120 has been changed for scm_i_symbol_chars, this null-termination
124 #if SCM_STRING_LENGTH_HISTOGRAM
131 if (len
<= STRINGBUF_MAX_INLINE_LEN
-1)
133 return scm_double_cell (STRINGBUF_TAG
| STRINGBUF_F_INLINE
| (len
<< 16),
138 char *mem
= scm_gc_malloc_pointerless (len
+ 1, "string");
140 return scm_double_cell (STRINGBUF_TAG
, (scm_t_bits
) mem
,
141 (scm_t_bits
) len
, (scm_t_bits
) 0);
145 /* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
148 make_wide_stringbuf (size_t len
)
151 #if SCM_STRING_LENGTH_HISTOGRAM
158 mem
= scm_gc_malloc (sizeof (scm_t_wchar
) * (len
+ 1), "string");
160 return scm_double_cell (STRINGBUF_TAG
| STRINGBUF_F_WIDE
, (scm_t_bits
) mem
,
161 (scm_t_bits
) len
, (scm_t_bits
) 0);
164 /* Return a new stringbuf whose underlying storage consists of the LEN+1
165 octets pointed to by STR (the last octet is zero). */
167 scm_i_take_stringbufn (char *str
, size_t len
)
169 scm_gc_register_collectable_memory (str
, len
+ 1, "stringbuf");
171 return scm_double_cell (STRINGBUF_TAG
, (scm_t_bits
) str
,
172 (scm_t_bits
) len
, (scm_t_bits
) 0);
175 /* Convert a stringbuf containing 8-bit Latin-1-encoded characters to
176 one containing 32-bit UCS-4-encoded characters. */
178 widen_stringbuf (SCM buf
)
183 if (STRINGBUF_WIDE (buf
))
186 if (STRINGBUF_INLINE (buf
))
188 len
= STRINGBUF_INLINE_LENGTH (buf
);
190 mem
= scm_gc_malloc (sizeof (scm_t_wchar
) * (len
+ 1), "string");
191 for (i
= 0; i
< len
; i
++)
193 (scm_t_wchar
) (unsigned char) STRINGBUF_INLINE_CHARS (buf
)[i
];
196 SCM_SET_CELL_WORD_0 (buf
, SCM_CELL_WORD_0 (buf
) ^ STRINGBUF_F_INLINE
);
197 SCM_SET_CELL_WORD_0 (buf
, SCM_CELL_WORD_0 (buf
) | STRINGBUF_F_WIDE
);
198 SCM_SET_CELL_WORD_1 (buf
, mem
);
199 SCM_SET_CELL_WORD_2 (buf
, len
);
203 len
= STRINGBUF_OUTLINE_LENGTH (buf
);
205 mem
= scm_gc_malloc (sizeof (scm_t_wchar
) * (len
+ 1), "string");
206 for (i
= 0; i
< len
; i
++)
208 (scm_t_wchar
) (unsigned char) STRINGBUF_OUTLINE_CHARS (buf
)[i
];
211 scm_gc_free (STRINGBUF_OUTLINE_CHARS (buf
), len
+ 1, "string");
213 SCM_SET_CELL_WORD_0 (buf
, SCM_CELL_WORD_0 (buf
) | STRINGBUF_F_WIDE
);
214 SCM_SET_CELL_WORD_1 (buf
, mem
);
215 SCM_SET_CELL_WORD_2 (buf
, len
);
219 scm_i_pthread_mutex_t stringbuf_write_mutex
= SCM_I_PTHREAD_MUTEX_INITIALIZER
;
221 /* Copy-on-write strings.
224 #define STRING_TAG scm_tc7_string
226 #define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
227 #define STRING_START(str) ((size_t)SCM_CELL_WORD_2(str))
228 #define STRING_LENGTH(str) ((size_t)SCM_CELL_WORD_3(str))
230 #define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
231 #define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
233 #define IS_STRING(str) (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
235 /* Read-only strings.
238 #define RO_STRING_TAG (scm_tc7_string + 0x200)
239 #define IS_RO_STRING(str) (SCM_CELL_TYPE(str)==RO_STRING_TAG)
241 /* Mutation-sharing substrings
244 #define SH_STRING_TAG (scm_tc7_string + 0x100)
246 #define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
247 /* START and LENGTH as for STRINGs. */
249 #define IS_SH_STRING(str) (SCM_CELL_TYPE(str)==SH_STRING_TAG)
251 /* Create a scheme string with space for LEN 8-bit Latin-1-encoded
252 characters. CHARSP, if not NULL, will be set to location of the
255 scm_i_make_string (size_t len
, char **charsp
)
257 SCM buf
= make_stringbuf (len
);
260 *charsp
= STRINGBUF_CHARS (buf
);
261 res
= scm_double_cell (STRING_TAG
, SCM_UNPACK(buf
),
262 (scm_t_bits
)0, (scm_t_bits
) len
);
266 /* Create a scheme string with space for LEN 32-bit UCS-4-encoded
267 characters. CHARSP, if not NULL, will be set to location of the
270 scm_i_make_wide_string (size_t len
, scm_t_wchar
**charsp
)
272 SCM buf
= make_wide_stringbuf (len
);
275 *charsp
= STRINGBUF_WIDE_CHARS (buf
);
276 res
= scm_double_cell (STRING_TAG
, SCM_UNPACK (buf
),
277 (scm_t_bits
) 0, (scm_t_bits
) len
);
282 validate_substring_args (SCM str
, size_t start
, size_t end
)
284 if (!IS_STRING (str
))
285 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
286 if (start
> STRING_LENGTH (str
))
287 scm_out_of_range (NULL
, scm_from_size_t (start
));
288 if (end
> STRING_LENGTH (str
) || end
< start
)
289 scm_out_of_range (NULL
, scm_from_size_t (end
));
293 get_str_buf_start (SCM
*str
, SCM
*buf
, size_t *start
)
295 *start
= STRING_START (*str
);
296 if (IS_SH_STRING (*str
))
298 *str
= SH_STRING_STRING (*str
);
299 *start
+= STRING_START (*str
);
301 *buf
= STRING_STRINGBUF (*str
);
305 scm_i_substring (SCM str
, size_t start
, size_t end
)
309 get_str_buf_start (&str
, &buf
, &str_start
);
310 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
311 SET_STRINGBUF_SHARED (buf
);
312 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
313 return scm_double_cell (STRING_TAG
, SCM_UNPACK(buf
),
314 (scm_t_bits
)str_start
+ start
,
315 (scm_t_bits
) end
- start
);
319 scm_i_substring_read_only (SCM str
, size_t start
, size_t end
)
323 get_str_buf_start (&str
, &buf
, &str_start
);
324 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
325 SET_STRINGBUF_SHARED (buf
);
326 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
327 return scm_double_cell (RO_STRING_TAG
, SCM_UNPACK(buf
),
328 (scm_t_bits
)str_start
+ start
,
329 (scm_t_bits
) end
- start
);
333 scm_i_substring_copy (SCM str
, size_t start
, size_t end
)
335 size_t len
= end
- start
;
338 get_str_buf_start (&str
, &buf
, &str_start
);
339 if (scm_i_is_narrow_string (str
))
341 my_buf
= make_stringbuf (len
);
342 memcpy (STRINGBUF_CHARS (my_buf
),
343 STRINGBUF_CHARS (buf
) + str_start
+ start
, len
);
347 my_buf
= make_wide_stringbuf (len
);
348 u32_cpy ((scm_t_uint32
*) STRINGBUF_WIDE_CHARS (my_buf
),
349 (scm_t_uint32
*) (STRINGBUF_WIDE_CHARS (buf
) + str_start
351 /* Even though this string is wide, the substring may be narrow.
352 Consider adding code to narrow the string. */
354 scm_remember_upto_here_1 (buf
);
355 return scm_double_cell (STRING_TAG
, SCM_UNPACK (my_buf
),
356 (scm_t_bits
) 0, (scm_t_bits
) len
);
360 scm_i_substring_shared (SCM str
, size_t start
, size_t end
)
362 if (start
== 0 && end
== STRING_LENGTH (str
))
366 size_t len
= end
- start
;
367 if (IS_SH_STRING (str
))
369 start
+= STRING_START (str
);
370 str
= SH_STRING_STRING (str
);
372 return scm_double_cell (SH_STRING_TAG
, SCM_UNPACK(str
),
373 (scm_t_bits
)start
, (scm_t_bits
) len
);
378 scm_c_substring (SCM str
, size_t start
, size_t end
)
380 validate_substring_args (str
, start
, end
);
381 return scm_i_substring (str
, start
, end
);
385 scm_c_substring_read_only (SCM str
, size_t start
, size_t end
)
387 validate_substring_args (str
, start
, end
);
388 return scm_i_substring_read_only (str
, start
, end
);
392 scm_c_substring_copy (SCM str
, size_t start
, size_t end
)
394 validate_substring_args (str
, start
, end
);
395 return scm_i_substring_copy (str
, start
, end
);
399 scm_c_substring_shared (SCM str
, size_t start
, size_t end
)
401 validate_substring_args (str
, start
, end
);
402 return scm_i_substring_shared (str
, start
, end
);
406 /* Internal accessors
409 /* Returns the number of characters in STR. This may be different
410 than the memory size of the string storage. */
412 scm_i_string_length (SCM str
)
414 return STRING_LENGTH (str
);
417 /* True if the string is 'narrow', meaning it has a 8-bit Latin-1
418 encoding. False if it is 'wide', having a 32-bit UCS-4
421 scm_i_is_narrow_string (SCM str
)
423 return !STRINGBUF_WIDE (STRING_STRINGBUF (str
));
426 /* Returns a pointer to the 8-bit Latin-1 encoded character array of
429 scm_i_string_chars (SCM str
)
433 get_str_buf_start (&str
, &buf
, &start
);
434 if (scm_i_is_narrow_string (str
))
435 return STRINGBUF_CHARS (buf
) + start
;
437 scm_misc_error (NULL
, "Invalid read access of chars of wide string: ~s",
442 /* Returns a pointer to the 32-bit UCS-4 encoded character array of
445 scm_i_string_wide_chars (SCM str
)
450 get_str_buf_start (&str
, &buf
, &start
);
451 if (!scm_i_is_narrow_string (str
))
452 return STRINGBUF_WIDE_CHARS (buf
) + start
;
454 scm_misc_error (NULL
, "Invalid read access of chars of narrow string: ~s",
458 /* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
459 a new string buffer, so that it can be modified without modifying
460 other strings. Also, lock the string mutex. Later, one must call
461 scm_i_string_stop_writing to unlock the mutex. */
463 scm_i_string_start_writing (SCM orig_str
)
465 SCM buf
, str
= orig_str
;
468 get_str_buf_start (&str
, &buf
, &start
);
469 if (IS_RO_STRING (str
))
470 scm_misc_error (NULL
, "string is read-only: ~s", scm_list_1 (orig_str
));
472 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
473 if (STRINGBUF_SHARED (buf
))
475 /* Clone the stringbuf. */
476 size_t len
= STRING_LENGTH (str
);
479 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
481 if (scm_i_is_narrow_string (str
))
483 new_buf
= make_stringbuf (len
);
484 memcpy (STRINGBUF_CHARS (new_buf
),
485 STRINGBUF_CHARS (buf
) + STRING_START (str
), len
);
490 new_buf
= make_wide_stringbuf (len
);
491 u32_cpy ((scm_t_uint32
*) STRINGBUF_WIDE_CHARS (new_buf
),
492 (scm_t_uint32
*) (STRINGBUF_WIDE_CHARS (buf
)
493 + STRING_START (str
)), len
);
496 SET_STRING_STRINGBUF (str
, new_buf
);
497 start
-= STRING_START (str
);
499 /* FIXME: The following operations are not atomic, so other threads
500 looking at STR may see an inconsistent state. Nevertheless it can't
501 hurt much since (i) accessing STR while it is being mutated can't
502 yield a crash, and (ii) concurrent accesses to STR should be
503 protected by a mutex at the application level. The latter may not
504 apply when STR != ORIG_STR, though. */
505 SET_STRING_START (str
, 0);
506 SET_STRING_STRINGBUF (str
, new_buf
);
510 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
515 /* Return a pointer to the 8-bit Latin-1 chars of a string. */
517 scm_i_string_writable_chars (SCM str
)
522 get_str_buf_start (&str
, &buf
, &start
);
523 if (scm_i_is_narrow_string (str
))
524 return STRINGBUF_CHARS (buf
) + start
;
526 scm_misc_error (NULL
, "Invalid write access of chars of wide string: ~s",
531 /* Return a pointer to the UCS-4 codepoints of a string. */
533 scm_i_string_writable_wide_chars (SCM str
)
538 get_str_buf_start (&str
, &buf
, &start
);
539 if (!scm_i_is_narrow_string (str
))
540 return STRINGBUF_WIDE_CHARS (buf
) + start
;
542 scm_misc_error (NULL
, "Invalid read access of chars of narrow string: ~s",
546 /* Unlock the string mutex that was locked when
547 scm_i_string_start_writing was called. */
549 scm_i_string_stop_writing (void)
551 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
554 /* Return the Xth character of STR as a UCS-4 codepoint. */
556 scm_i_string_ref (SCM str
, size_t x
)
558 if (scm_i_is_narrow_string (str
))
559 return (scm_t_wchar
) (unsigned char) (scm_i_string_chars (str
)[x
]);
561 return scm_i_string_wide_chars (str
)[x
];
564 /* Set the Pth character of STR to UCS-4 codepoint CHR. */
566 scm_i_string_set_x (SCM str
, size_t p
, scm_t_wchar chr
)
568 if (chr
> 0xFF && scm_i_is_narrow_string (str
))
569 widen_stringbuf (STRING_STRINGBUF (str
));
571 if (scm_i_is_narrow_string (str
))
573 char *dst
= scm_i_string_writable_chars (str
);
574 dst
[p
] = (char) (unsigned char) chr
;
578 scm_t_wchar
*dst
= scm_i_string_writable_wide_chars (str
);
585 Basic symbol creation and accessing is done here, the rest is in
586 symbols.[hc]. This has been done to keep stringbufs and the
587 internals of strings and string-like objects confined to this file.
590 #define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
593 scm_i_make_symbol (SCM name
, scm_t_bits flags
,
594 unsigned long hash
, SCM props
)
597 size_t start
= STRING_START (name
);
598 size_t length
= STRING_LENGTH (name
);
600 if (IS_SH_STRING (name
))
602 name
= SH_STRING_STRING (name
);
603 start
+= STRING_START (name
);
605 buf
= SYMBOL_STRINGBUF (name
);
607 if (start
== 0 && length
== STRINGBUF_LENGTH (buf
))
610 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
611 SET_STRINGBUF_SHARED (buf
);
612 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
617 if (scm_i_is_narrow_string (name
))
619 SCM new_buf
= make_stringbuf (length
);
620 memcpy (STRINGBUF_CHARS (new_buf
),
621 STRINGBUF_CHARS (buf
) + start
, length
);
626 SCM new_buf
= make_wide_stringbuf (length
);
627 u32_cpy ((scm_t_uint32
*) STRINGBUF_WIDE_CHARS (new_buf
),
628 (scm_t_uint32
*) STRINGBUF_WIDE_CHARS (buf
) + start
,
633 return scm_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
634 (scm_t_bits
) hash
, SCM_UNPACK (props
));
638 scm_i_c_make_symbol (const char *name
, size_t len
,
639 scm_t_bits flags
, unsigned long hash
, SCM props
)
641 SCM buf
= make_stringbuf (len
);
642 memcpy (STRINGBUF_CHARS (buf
), name
, len
);
644 return scm_immutable_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
645 (scm_t_bits
) hash
, SCM_UNPACK (props
));
648 /* Return a new symbol that uses the LEN bytes pointed to by NAME as its
649 underlying storage. */
651 scm_i_c_take_symbol (char *name
, size_t len
,
652 scm_t_bits flags
, unsigned long hash
, SCM props
)
654 SCM buf
= scm_i_take_stringbufn (name
, len
);
656 return scm_double_cell (scm_tc7_symbol
| flags
, SCM_UNPACK (buf
),
657 (scm_t_bits
) hash
, SCM_UNPACK (props
));
660 /* Returns the number of characters in SYM. This may be different
661 from the memory size of SYM. */
663 scm_i_symbol_length (SCM sym
)
665 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym
));
669 scm_c_symbol_length (SCM sym
)
670 #define FUNC_NAME "scm_c_symbol_length"
672 SCM_VALIDATE_SYMBOL (1, sym
);
674 return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym
));
678 /* True if the name of SYM is stored as a Latin-1 encoded string.
679 False if it is stored as a 32-bit UCS-4-encoded string. */
681 scm_i_is_narrow_symbol (SCM sym
)
685 buf
= SYMBOL_STRINGBUF (sym
);
686 return !STRINGBUF_WIDE (buf
);
689 /* Returns a pointer to the 8-bit Latin-1 encoded character array that
690 contains the name of SYM. */
692 scm_i_symbol_chars (SCM sym
)
696 buf
= SYMBOL_STRINGBUF (sym
);
697 if (!STRINGBUF_WIDE (buf
))
698 return STRINGBUF_CHARS (buf
);
700 scm_misc_error (NULL
, "Invalid access of chars of a wide symbol ~S",
704 /* Return a pointer to the 32-bit UCS-4-encoded character array of a
707 scm_i_symbol_wide_chars (SCM sym
)
711 buf
= SYMBOL_STRINGBUF (sym
);
712 if (STRINGBUF_WIDE (buf
))
713 return STRINGBUF_WIDE_CHARS (buf
);
715 scm_misc_error (NULL
, "Invalid access of chars of a narrow symbol ~S",
720 scm_i_symbol_substring (SCM sym
, size_t start
, size_t end
)
722 SCM buf
= SYMBOL_STRINGBUF (sym
);
723 scm_i_pthread_mutex_lock (&stringbuf_write_mutex
);
724 SET_STRINGBUF_SHARED (buf
);
725 scm_i_pthread_mutex_unlock (&stringbuf_write_mutex
);
726 return scm_double_cell (RO_STRING_TAG
, SCM_UNPACK (buf
),
727 (scm_t_bits
)start
, (scm_t_bits
) end
- start
);
730 /* Returns the Xth character of symbol SYM as a UCS-4 codepoint. */
732 scm_i_symbol_ref (SCM sym
, size_t x
)
734 if (scm_i_is_narrow_symbol (sym
))
735 return (scm_t_wchar
) (unsigned char) (scm_i_symbol_chars (sym
)[x
]);
737 return scm_i_symbol_wide_chars (sym
)[x
];
743 SCM_DEFINE (scm_sys_string_dump
, "%string-dump", 1, 0, 0, (SCM str
),
744 "Returns an association list containing debugging information\n"
745 "for @var{str}. The association list has the following entries."
748 "The string itself.\n"
750 "The start index of the string into its stringbuf\n"
752 "The length of the string\n"
754 "If this string is a substring, it returns its parent string.\n"
755 "Otherwise, it returns @code{#f}\n"
757 "@code{#t} if the string is read-only\n"
758 "@item stringbuf-chars\n"
759 "A new string containing this string's stringbuf's characters\n"
760 "@item stringbuf-length\n"
761 "The number of characters in this stringbuf\n"
762 "@item stringbuf-shared\n"
763 "@code{#t} if this stringbuf is shared\n"
764 "@item stringbuf-inline\n"
765 "@code{#t} if this stringbuf's characters are stored in the\n"
766 "cell itself, or @code{#f} if they were allocated in memory\n"
767 "@item stringbuf-wide\n"
768 "@code{#t} if this stringbuf's characters are stored in a\n"
769 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
772 #define FUNC_NAME s_scm_sys_string_dump
774 SCM e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
;
776 SCM_VALIDATE_STRING (1, str
);
779 e1
= scm_cons (scm_from_locale_symbol ("string"),
781 e2
= scm_cons (scm_from_locale_symbol ("start"),
782 scm_from_size_t (STRING_START (str
)));
783 e3
= scm_cons (scm_from_locale_symbol ("length"),
784 scm_from_size_t (STRING_LENGTH (str
)));
786 if (IS_SH_STRING (str
))
788 e4
= scm_cons (scm_from_locale_symbol ("shared"),
789 SH_STRING_STRING (str
));
790 buf
= STRING_STRINGBUF (SH_STRING_STRING (str
));
794 e4
= scm_cons (scm_from_locale_symbol ("shared"),
796 buf
= STRING_STRINGBUF (str
);
799 if (IS_RO_STRING (str
))
800 e5
= scm_cons (scm_from_locale_symbol ("read-only"),
803 e5
= scm_cons (scm_from_locale_symbol ("read-only"),
807 if (!STRINGBUF_WIDE (buf
))
809 size_t len
= STRINGBUF_LENGTH (buf
);
811 SCM sbc
= scm_i_make_string (len
, &cbuf
);
812 memcpy (cbuf
, STRINGBUF_CHARS (buf
), len
);
813 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
818 size_t len
= STRINGBUF_LENGTH (buf
);
820 SCM sbc
= scm_i_make_wide_string (len
, &cbuf
);
821 u32_cpy ((scm_t_uint32
*) cbuf
,
822 (scm_t_uint32
*) STRINGBUF_WIDE_CHARS (buf
), len
);
823 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
826 e7
= scm_cons (scm_from_locale_symbol ("stringbuf-length"),
827 scm_from_size_t (STRINGBUF_LENGTH (buf
)));
828 if (STRINGBUF_SHARED (buf
))
829 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
832 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
834 if (STRINGBUF_INLINE (buf
))
835 e9
= scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
838 e9
= scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
840 if (STRINGBUF_WIDE (buf
))
841 e10
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
844 e10
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
847 return scm_list_n (e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, e9
, e10
, SCM_UNDEFINED
);
851 SCM_DEFINE (scm_sys_symbol_dump
, "%symbol-dump", 1, 0, 0, (SCM sym
),
852 "Returns an association list containing debugging information\n"
853 "for @var{sym}. The association list has the following entries."
856 "The symbol itself\n"
860 "@code{#t} if it is an interned symbol\n"
861 "@item stringbuf-chars\n"
862 "A new string containing this symbols's stringbuf's characters\n"
863 "@item stringbuf-length\n"
864 "The number of characters in this stringbuf\n"
865 "@item stringbuf-shared\n"
866 "@code{#t} if this stringbuf is shared\n"
867 "@item stringbuf-inline\n"
868 "@code{#t} if this stringbuf's characters are stored in the\n"
869 "cell itself, or @code{#f} if they were allocated in memory\n"
870 "@item stringbuf-wide\n"
871 "@code{#t} if this stringbuf's characters are stored in a\n"
872 "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
875 #define FUNC_NAME s_scm_sys_symbol_dump
877 SCM e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
;
879 SCM_VALIDATE_SYMBOL (1, sym
);
880 e1
= scm_cons (scm_from_locale_symbol ("symbol"),
882 e2
= scm_cons (scm_from_locale_symbol ("hash"),
883 scm_from_ulong (scm_i_symbol_hash (sym
)));
884 e3
= scm_cons (scm_from_locale_symbol ("interned"),
885 scm_symbol_interned_p (sym
));
886 buf
= SYMBOL_STRINGBUF (sym
);
889 if (!STRINGBUF_WIDE (buf
))
891 size_t len
= STRINGBUF_LENGTH (buf
);
893 SCM sbc
= scm_i_make_string (len
, &cbuf
);
894 memcpy (cbuf
, STRINGBUF_CHARS (buf
), len
);
895 e4
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
900 size_t len
= STRINGBUF_LENGTH (buf
);
902 SCM sbc
= scm_i_make_wide_string (len
, &cbuf
);
903 u32_cpy ((scm_t_uint32
*) cbuf
,
904 (scm_t_uint32
*) STRINGBUF_WIDE_CHARS (buf
), len
);
905 e4
= scm_cons (scm_from_locale_symbol ("stringbuf-chars"),
908 e5
= scm_cons (scm_from_locale_symbol ("stringbuf-length"),
909 scm_from_size_t (STRINGBUF_LENGTH (buf
)));
910 if (STRINGBUF_SHARED (buf
))
911 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
914 e6
= scm_cons (scm_from_locale_symbol ("stringbuf-shared"),
916 if (STRINGBUF_INLINE (buf
))
917 e7
= scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
920 e7
= scm_cons (scm_from_locale_symbol ("stringbuf-inline"),
922 if (STRINGBUF_WIDE (buf
))
923 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
926 e8
= scm_cons (scm_from_locale_symbol ("stringbuf-wide"),
928 return scm_list_n (e1
, e2
, e3
, e4
, e5
, e6
, e7
, e8
, SCM_UNDEFINED
);
933 #if SCM_STRING_LENGTH_HISTOGRAM
935 SCM_DEFINE (scm_sys_stringbuf_hist
, "%stringbuf-hist", 0, 0, 0, (void), "")
936 #define FUNC_NAME s_scm_sys_stringbuf_hist
939 for (i
= 0; i
< 1000; i
++)
941 fprintf (stderr
, " %3d: %u\n", i
, lenhist
[i
]);
942 fprintf (stderr
, ">999: %u\n", lenhist
[1000]);
943 return SCM_UNSPECIFIED
;
951 SCM_DEFINE (scm_string_p
, "string?", 1, 0, 0,
953 "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
954 #define FUNC_NAME s_scm_string_p
956 return scm_from_bool (IS_STRING (obj
));
961 SCM_REGISTER_PROC (s_scm_list_to_string
, "list->string", 1, 0, 0, scm_string
);
963 SCM_DEFINE (scm_string
, "string", 0, 0, 1,
965 "@deffnx {Scheme Procedure} list->string chrs\n"
966 "Return a newly allocated string composed of the arguments,\n"
968 #define FUNC_NAME s_scm_string
976 /* Verify that this is a list of chars. */
977 i
= scm_ilength (chrs
);
978 SCM_ASSERT (i
>= 0, chrs
, SCM_ARG1
, FUNC_NAME
);
983 while (len
> 0 && scm_is_pair (rest
))
985 SCM elt
= SCM_CAR (rest
);
986 SCM_VALIDATE_CHAR (SCM_ARGn
, elt
);
987 rest
= SCM_CDR (rest
);
989 scm_remember_upto_here_1 (elt
);
992 /* Construct a string containing this list of chars. */
996 result
= scm_i_make_string (len
, NULL
);
997 result
= scm_i_string_start_writing (result
);
998 while (len
> 0 && scm_is_pair (rest
))
1000 SCM elt
= SCM_CAR (rest
);
1001 scm_i_string_set_x (result
, p
, SCM_CHAR (elt
));
1003 rest
= SCM_CDR (rest
);
1005 scm_remember_upto_here_1 (elt
);
1007 scm_i_string_stop_writing ();
1010 scm_misc_error (NULL
, "list changed while constructing string", SCM_EOL
);
1011 if (!scm_is_null (rest
))
1012 scm_wrong_type_arg_msg (NULL
, 0, chrs
, "proper list");
1018 SCM_DEFINE (scm_make_string
, "make-string", 1, 1, 0,
1020 "Return a newly allocated string of\n"
1021 "length @var{k}. If @var{chr} is given, then all elements of\n"
1022 "the string are initialized to @var{chr}, otherwise the contents\n"
1023 "of the @var{string} are unspecified.")
1024 #define FUNC_NAME s_scm_make_string
1026 return scm_c_make_string (scm_to_size_t (k
), chr
);
1031 scm_c_make_string (size_t len
, SCM chr
)
1032 #define FUNC_NAME NULL
1035 SCM res
= scm_i_make_string (len
, NULL
);
1037 if (!SCM_UNBNDP (chr
))
1039 SCM_VALIDATE_CHAR (0, chr
);
1040 res
= scm_i_string_start_writing (res
);
1041 for (p
= 0; p
< len
; p
++)
1042 scm_i_string_set_x (res
, p
, SCM_CHAR (chr
));
1043 scm_i_string_stop_writing ();
1050 SCM_DEFINE (scm_string_length
, "string-length", 1, 0, 0,
1052 "Return the number of characters in @var{string}.")
1053 #define FUNC_NAME s_scm_string_length
1055 SCM_VALIDATE_STRING (1, string
);
1056 return scm_from_size_t (STRING_LENGTH (string
));
1060 SCM_DEFINE (scm_string_width
, "string-width", 1, 0, 0,
1062 "Return the bytes used to represent a character in @var{string}."
1063 "This will return 1 or 4.")
1064 #define FUNC_NAME s_scm_string_width
1066 SCM_VALIDATE_STRING (1, string
);
1067 if (!scm_i_is_narrow_string (string
))
1068 return scm_from_int (4);
1070 return scm_from_int (1);
1075 scm_c_string_length (SCM string
)
1077 if (!IS_STRING (string
))
1078 scm_wrong_type_arg_msg (NULL
, 0, string
, "string");
1079 return STRING_LENGTH (string
);
1082 SCM_DEFINE (scm_string_ref
, "string-ref", 2, 0, 0,
1084 "Return character @var{k} of @var{str} using zero-origin\n"
1085 "indexing. @var{k} must be a valid index of @var{str}.")
1086 #define FUNC_NAME s_scm_string_ref
1091 SCM_VALIDATE_STRING (1, str
);
1093 len
= scm_i_string_length (str
);
1094 if (SCM_LIKELY (len
> 0))
1095 idx
= scm_to_unsigned_integer (k
, 0, len
- 1);
1097 scm_out_of_range (NULL
, k
);
1099 if (scm_i_is_narrow_string (str
))
1100 return SCM_MAKE_CHAR (scm_i_string_chars (str
)[idx
]);
1102 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str
)[idx
]);
1107 scm_c_string_ref (SCM str
, size_t p
)
1109 if (p
>= scm_i_string_length (str
))
1110 scm_out_of_range (NULL
, scm_from_size_t (p
));
1111 if (scm_i_is_narrow_string (str
))
1112 return SCM_MAKE_CHAR (scm_i_string_chars (str
)[p
]);
1114 return SCM_MAKE_CHAR (scm_i_string_wide_chars (str
)[p
]);
1118 SCM_DEFINE (scm_string_set_x
, "string-set!", 3, 0, 0,
1119 (SCM str
, SCM k
, SCM chr
),
1120 "Store @var{chr} in element @var{k} of @var{str} and return\n"
1121 "an unspecified value. @var{k} must be a valid index of\n"
1123 #define FUNC_NAME s_scm_string_set_x
1128 SCM_VALIDATE_STRING (1, str
);
1130 len
= scm_i_string_length (str
);
1131 if (SCM_LIKELY (len
> 0))
1132 idx
= scm_to_unsigned_integer (k
, 0, len
- 1);
1134 scm_out_of_range (NULL
, k
);
1136 SCM_VALIDATE_CHAR (3, chr
);
1137 str
= scm_i_string_start_writing (str
);
1138 scm_i_string_set_x (str
, idx
, SCM_CHAR (chr
));
1139 scm_i_string_stop_writing ();
1141 return SCM_UNSPECIFIED
;
1146 scm_c_string_set_x (SCM str
, size_t p
, SCM chr
)
1148 if (p
>= scm_i_string_length (str
))
1149 scm_out_of_range (NULL
, scm_from_size_t (p
));
1150 str
= scm_i_string_start_writing (str
);
1151 scm_i_string_set_x (str
, p
, SCM_CHAR (chr
));
1152 scm_i_string_stop_writing ();
1155 SCM_DEFINE (scm_substring
, "substring", 2, 1, 0,
1156 (SCM str
, SCM start
, SCM end
),
1157 "Return a newly allocated string formed from the characters\n"
1158 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1159 "ending with index @var{end} (exclusive).\n"
1160 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1161 "exact integers satisfying:\n\n"
1162 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1163 #define FUNC_NAME s_scm_substring
1165 size_t len
, from
, to
;
1167 SCM_VALIDATE_STRING (1, str
);
1168 len
= scm_i_string_length (str
);
1169 from
= scm_to_unsigned_integer (start
, 0, len
);
1170 if (SCM_UNBNDP (end
))
1173 to
= scm_to_unsigned_integer (end
, from
, len
);
1174 return scm_i_substring (str
, from
, to
);
1178 SCM_DEFINE (scm_substring_read_only
, "substring/read-only", 2, 1, 0,
1179 (SCM str
, SCM start
, SCM end
),
1180 "Return a newly allocated string formed from the characters\n"
1181 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1182 "ending with index @var{end} (exclusive).\n"
1183 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1184 "exact integers satisfying:\n"
1186 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
1188 "The returned string is read-only.\n")
1189 #define FUNC_NAME s_scm_substring_read_only
1191 size_t len
, from
, to
;
1193 SCM_VALIDATE_STRING (1, str
);
1194 len
= scm_i_string_length (str
);
1195 from
= scm_to_unsigned_integer (start
, 0, len
);
1196 if (SCM_UNBNDP (end
))
1199 to
= scm_to_unsigned_integer (end
, from
, len
);
1200 return scm_i_substring_read_only (str
, from
, to
);
1204 SCM_DEFINE (scm_substring_copy
, "substring/copy", 2, 1, 0,
1205 (SCM str
, SCM start
, SCM end
),
1206 "Return a newly allocated string formed from the characters\n"
1207 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1208 "ending with index @var{end} (exclusive).\n"
1209 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1210 "exact integers satisfying:\n\n"
1211 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1212 #define FUNC_NAME s_scm_substring_copy
1214 /* For the Scheme version, START is mandatory, but for the C
1215 version, it is optional. See scm_string_copy in srfi-13.c for a
1221 SCM_VALIDATE_STRING (1, str
);
1222 scm_i_get_substring_spec (scm_i_string_length (str
),
1223 start
, &from
, end
, &to
);
1224 return scm_i_substring_copy (str
, from
, to
);
1228 SCM_DEFINE (scm_substring_shared
, "substring/shared", 2, 1, 0,
1229 (SCM str
, SCM start
, SCM end
),
1230 "Return string that indirectly refers to the characters\n"
1231 "of @var{str} beginning with index @var{start} (inclusive) and\n"
1232 "ending with index @var{end} (exclusive).\n"
1233 "@var{str} must be a string, @var{start} and @var{end} must be\n"
1234 "exact integers satisfying:\n\n"
1235 "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
1236 #define FUNC_NAME s_scm_substring_shared
1238 size_t len
, from
, to
;
1240 SCM_VALIDATE_STRING (1, str
);
1241 len
= scm_i_string_length (str
);
1242 from
= scm_to_unsigned_integer (start
, 0, len
);
1243 if (SCM_UNBNDP (end
))
1246 to
= scm_to_unsigned_integer (end
, from
, len
);
1247 return scm_i_substring_shared (str
, from
, to
);
1251 SCM_DEFINE (scm_string_append
, "string-append", 0, 0, 1,
1253 "Return a newly allocated string whose characters form the\n"
1254 "concatenation of the given strings, @var{args}.")
1255 #define FUNC_NAME s_scm_string_append
1268 SCM_VALIDATE_REST_ARGUMENT (args
);
1269 for (l
= args
; !scm_is_null (l
); l
= SCM_CDR (l
))
1272 SCM_VALIDATE_STRING (SCM_ARGn
, s
);
1273 len
+= scm_i_string_length (s
);
1274 if (!scm_i_is_narrow_string (s
))
1279 res
= scm_i_make_string (len
, &data
.narrow
);
1281 res
= scm_i_make_wide_string (len
, &data
.wide
);
1283 for (l
= args
; !scm_is_null (l
); l
= SCM_CDR (l
))
1287 SCM_VALIDATE_STRING (SCM_ARGn
, s
);
1288 len
= scm_i_string_length (s
);
1291 memcpy (data
.narrow
, scm_i_string_chars (s
), len
);
1296 if (scm_i_is_narrow_string (s
))
1298 for (i
= 0; i
< scm_i_string_length (s
); i
++)
1299 data
.wide
[i
] = (unsigned char) scm_i_string_chars (s
)[i
];
1302 u32_cpy ((scm_t_uint32
*) data
.wide
,
1303 (scm_t_uint32
*) scm_i_string_wide_chars (s
), len
);
1306 scm_remember_upto_here_1 (s
);
1313 scm_is_string (SCM obj
)
1315 return IS_STRING (obj
);
1319 scm_from_locale_stringn (const char *str
, size_t len
)
1324 if (len
== (size_t) -1)
1329 res
= scm_i_make_string (len
, &dst
);
1330 memcpy (dst
, str
, len
);
1335 scm_from_locale_string (const char *str
)
1340 return scm_from_locale_stringn (str
, -1);
1343 /* Create a new scheme string from the C string STR. The memory of
1344 STR may be used directly as storage for the new string. */
1346 scm_take_locale_stringn (char *str
, size_t len
)
1350 if (len
== (size_t) -1)
1354 /* Ensure STR is null terminated. A realloc for 1 extra byte should
1355 often be satisfied from the alignment padding after the block, with
1356 no actual data movement. */
1357 str
= scm_realloc (str
, len
+ 1);
1361 buf
= scm_i_take_stringbufn (str
, len
);
1362 res
= scm_double_cell (STRING_TAG
,
1363 SCM_UNPACK (buf
), (scm_t_bits
) 0, (scm_t_bits
) len
);
1368 scm_take_locale_string (char *str
)
1370 return scm_take_locale_stringn (str
, -1);
1373 /* Change libunistring escapes (\uXXXX and \UXXXXXXXX) to \xXX \uXXXX
1376 unistring_escapes_to_guile_escapes (char **bufp
, size_t *lenp
)
1378 char *before
, *after
;
1387 if ((i
<= *lenp
- 6)
1388 && before
[i
] == '\\'
1389 && before
[i
+ 1] == 'u'
1390 && before
[i
+ 2] == '0' && before
[i
+ 3] == '0')
1392 /* Convert \u00NN to \xNN */
1395 after
[j
+ 2] = tolower ((int) before
[i
+ 4]);
1396 after
[j
+ 3] = tolower ((int) before
[i
+ 5]);
1400 else if ((i
<= *lenp
- 10)
1401 && before
[i
] == '\\'
1402 && before
[i
+ 1] == 'U'
1403 && before
[i
+ 2] == '0' && before
[i
+ 3] == '0')
1405 /* Convert \U00NNNNNN to \UNNNNNN */
1408 after
[j
+ 2] = tolower ((int) before
[i
+ 4]);
1409 after
[j
+ 3] = tolower ((int) before
[i
+ 5]);
1410 after
[j
+ 4] = tolower ((int) before
[i
+ 6]);
1411 after
[j
+ 5] = tolower ((int) before
[i
+ 7]);
1412 after
[j
+ 6] = tolower ((int) before
[i
+ 8]);
1413 after
[j
+ 7] = tolower ((int) before
[i
+ 9]);
1419 after
[j
] = before
[i
];
1425 after
= scm_realloc (after
, j
);
1429 scm_to_locale_stringn (SCM str
, size_t * lenp
)
1433 /* In the future, enc will hold the port's encoding. */
1436 return scm_to_stringn (str
, lenp
, enc
,
1437 SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE
);
1440 /* Low-level scheme to C string conversion function. */
1442 scm_to_stringn (SCM str
, size_t * lenp
, const char *encoding
,
1443 scm_t_string_failed_conversion_handler handler
)
1445 static const char iso
[11] = "ISO-8859-1";
1447 size_t ilen
, len
, i
;
1449 if (!scm_is_string (str
))
1450 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
1451 ilen
= scm_i_string_length (str
);
1455 buf
= scm_malloc (1);
1463 for (i
= 0; i
< ilen
; i
++)
1464 if (scm_i_string_ref (str
, i
) == '\0')
1465 scm_misc_error (NULL
,
1466 "string contains #\\nul character: ~S",
1469 if (scm_i_is_narrow_string (str
))
1473 buf
= scm_malloc (ilen
);
1474 memcpy (buf
, scm_i_string_chars (str
), ilen
);
1480 buf
= scm_malloc (ilen
+ 1);
1481 memcpy (buf
, scm_i_string_chars (str
), ilen
);
1490 buf
= u32_conv_to_encoding (iso
,
1491 (enum iconv_ilseq_handler
) handler
,
1492 (scm_t_uint32
*) scm_i_string_wide_chars (str
),
1493 ilen
, NULL
, NULL
, &len
);
1495 scm_misc_error (NULL
, "cannot convert to output locale ~s: \"~s\"",
1496 scm_list_2 (scm_from_locale_string (iso
), str
));
1498 if (handler
== SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE
)
1499 unistring_escapes_to_guile_escapes (&buf
, &len
);
1505 buf
= scm_realloc (buf
, len
+ 1);
1509 scm_remember_upto_here_1 (str
);
1514 scm_to_locale_string (SCM str
)
1516 return scm_to_locale_stringn (str
, NULL
);
1520 scm_to_locale_stringbuf (SCM str
, char *buf
, size_t max_len
)
1523 char *result
= NULL
;
1524 if (!scm_is_string (str
))
1525 scm_wrong_type_arg_msg (NULL
, 0, str
, "string");
1526 result
= scm_to_locale_stringn (str
, &len
);
1528 memcpy (buf
, result
, (len
> max_len
) ? max_len
: len
);
1531 scm_remember_upto_here_1 (str
);
1535 /* converts C scm_array of strings to SCM scm_list of strings. */
1536 /* If argc < 0, a null terminated scm_array is assumed. */
1538 scm_makfromstrs (int argc
, char **argv
)
1543 for (i
= 0; argv
[i
]; i
++);
1545 lst
= scm_cons (scm_from_locale_string (argv
[i
]), lst
);
1549 /* Return a newly allocated array of char pointers to each of the strings
1550 in args, with a terminating NULL pointer. */
1553 scm_i_allocate_string_pointers (SCM list
)
1554 #define FUNC_NAME "scm_i_allocate_string_pointers"
1557 int len
= scm_ilength (list
);
1561 scm_wrong_type_arg_msg (NULL
, 0, list
, "proper list");
1563 result
= scm_gc_malloc ((len
+ 1) * sizeof (char *),
1567 /* The list might be have been modified in another thread, so
1568 we check LIST before each access.
1570 for (i
= 0; i
< len
&& scm_is_pair (list
); i
++)
1575 str
= SCM_CAR (list
);
1576 len
= scm_c_string_length (str
);
1578 result
[i
] = scm_gc_malloc_pointerless (len
+ 1, "string pointers");
1579 memcpy (result
[i
], scm_i_string_chars (str
), len
);
1580 result
[i
][len
] = '\0';
1582 list
= SCM_CDR (list
);
1590 scm_i_get_substring_spec (size_t len
,
1591 SCM start
, size_t *cstart
,
1592 SCM end
, size_t *cend
)
1594 if (SCM_UNBNDP (start
))
1597 *cstart
= scm_to_unsigned_integer (start
, 0, len
);
1599 if (SCM_UNBNDP (end
))
1602 *cend
= scm_to_unsigned_integer (end
, *cstart
, len
);
1605 #if SCM_ENABLE_DEPRECATED
1607 /* When these definitions are removed, it becomes reasonable to use
1608 read-only strings for string literals. For that, change the reader
1609 to create string literals with scm_c_substring_read_only instead of
1610 with scm_c_substring_copy.
1614 scm_i_deprecated_stringp (SCM str
)
1616 scm_c_issue_deprecation_warning
1617 ("SCM_STRINGP is deprecated. Use scm_is_string instead.");
1619 return scm_is_string (str
);
1623 scm_i_deprecated_string_chars (SCM str
)
1627 scm_c_issue_deprecation_warning
1628 ("SCM_STRING_CHARS is deprecated. See the manual for alternatives.");
1630 /* We don't accept shared substrings here since they are not
1633 if (IS_SH_STRING (str
))
1634 scm_misc_error (NULL
,
1635 "SCM_STRING_CHARS does not work with shared substrings.",
1638 /* We explicitly test for read-only strings to produce a better
1642 if (IS_RO_STRING (str
))
1643 scm_misc_error (NULL
,
1644 "SCM_STRING_CHARS does not work with read-only strings.",
1647 /* The following is still wrong, of course...
1649 str
= scm_i_string_start_writing (str
);
1650 chars
= scm_i_string_writable_chars (str
);
1651 scm_i_string_stop_writing ();
1656 scm_i_deprecated_string_length (SCM str
)
1658 scm_c_issue_deprecation_warning
1659 ("SCM_STRING_LENGTH is deprecated. Use scm_c_string_length instead.");
1660 return scm_c_string_length (str
);
1668 scm_nullstr
= scm_i_make_string (0, NULL
);
1670 #include "libguile/strings.x"